@botpress/zai 1.0.1-beta.1 → 1.0.1-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/index.js +4071 -0
- package/dist/{csj → node}/adapters/botpress-table.js +1 -2
- package/dist/{csj → node}/operations/check.js +1 -2
- package/dist/{csj → node}/operations/extract.js +8 -9
- package/dist/{csj → node}/operations/filter.js +4 -5
- package/dist/{csj → node}/operations/label.js +7 -8
- package/dist/{csj → node}/operations/rewrite.js +1 -2
- package/dist/{csj → node}/operations/summarize.js +4 -5
- package/dist/{csj → node}/operations/text.js +3 -4
- package/dist/{csj → node}/utils.js +1 -2
- package/dist/{csj → node}/zai.js +1 -2
- package/package.json +17 -12
- package/scripts/update-models.mts +3 -3
- package/scripts/update-types.mts +5 -15
- package/src/adapters/botpress-table.ts +3 -4
- package/src/operations/__tests/index.ts +1 -0
- package/src/operations/check.ts +2 -3
- package/src/operations/extract.ts +13 -14
- package/src/operations/filter.ts +5 -6
- package/src/operations/label.ts +7 -8
- package/src/operations/rewrite.ts +3 -4
- package/src/operations/summarize.ts +4 -5
- package/src/operations/text.ts +4 -5
- package/src/utils.ts +1 -2
- package/src/zai.ts +4 -4
- package/dist/esm/adapters/adapter.js +0 -5
- package/dist/esm/adapters/botpress-table.js +0 -194
- package/dist/esm/adapters/memory.js +0 -15
- package/dist/esm/index.js +0 -11
- package/dist/esm/models.js +0 -390
- package/dist/esm/operations/check.js +0 -149
- package/dist/esm/operations/constants.js +0 -6
- package/dist/esm/operations/errors.js +0 -18
- package/dist/esm/operations/extract.js +0 -217
- package/dist/esm/operations/filter.js +0 -189
- package/dist/esm/operations/label.js +0 -246
- package/dist/esm/operations/rewrite.js +0 -113
- package/dist/esm/operations/summarize.js +0 -134
- package/dist/esm/operations/text.js +0 -48
- package/dist/esm/utils.js +0 -51
- package/dist/esm/zai.js +0 -161
- /package/dist/{csj → node}/adapters/adapter.js +0 -0
- /package/dist/{csj → node}/adapters/memory.js +0 -0
- /package/dist/{csj → node}/index.js +0 -0
- /package/dist/{csj → node}/models.js +0 -0
- /package/dist/{csj → node}/operations/constants.js +0 -0
- /package/dist/{csj → node}/operations/errors.js +0 -0
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import
|
|
2
|
-
const { z } = sdk;
|
|
1
|
+
import { z } from "@bpinternal/zui";
|
|
3
2
|
import JSON5 from "json5";
|
|
4
3
|
import { jsonrepair } from "jsonrepair";
|
|
5
|
-
import
|
|
4
|
+
import { chunk, isArray } from "lodash-es";
|
|
6
5
|
import { fastHash, stringify, takeUntilTokens } from "../utils";
|
|
7
6
|
import { Zai } from "../zai";
|
|
8
7
|
import { PROMPT_INPUT_BUFFER } from "./constants";
|
|
@@ -23,9 +22,9 @@ Zai.prototype.extract = async function(input, schema, _options) {
|
|
|
23
22
|
const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100);
|
|
24
23
|
let isArrayOfObjects = false;
|
|
25
24
|
const originalSchema = schema;
|
|
26
|
-
if (schema instanceof
|
|
27
|
-
} else if (schema instanceof
|
|
28
|
-
if (schema._def.type instanceof
|
|
25
|
+
if (schema instanceof z.ZodObject) {
|
|
26
|
+
} else if (schema instanceof z.ZodArray) {
|
|
27
|
+
if (schema._def.type instanceof z.ZodObject) {
|
|
29
28
|
isArrayOfObjects = true;
|
|
30
29
|
schema = schema._def.type;
|
|
31
30
|
} else {
|
|
@@ -42,8 +41,8 @@ Zai.prototype.extract = async function(input, schema, _options) {
|
|
|
42
41
|
if (tokenizer.count(inputAsString) > options.chunkLength) {
|
|
43
42
|
if (isArrayOfObjects) {
|
|
44
43
|
const tokens = tokenizer.split(inputAsString);
|
|
45
|
-
const chunks =
|
|
46
|
-
const all = await Promise.all(chunks.map((
|
|
44
|
+
const chunks = chunk(tokens, options.chunkLength).map((x) => x.join(""));
|
|
45
|
+
const all = await Promise.all(chunks.map((chunk2) => this.extract(chunk2, originalSchema)));
|
|
47
46
|
return all.flat();
|
|
48
47
|
} else {
|
|
49
48
|
inputAsString = tokenizer.truncate(stringify(input), options.chunkLength);
|
|
@@ -137,7 +136,7 @@ ${input2.trim()}
|
|
|
137
136
|
`.trim();
|
|
138
137
|
};
|
|
139
138
|
const formatOutput = (extracted) => {
|
|
140
|
-
extracted =
|
|
139
|
+
extracted = isArray(extracted) ? extracted : [extracted];
|
|
141
140
|
return extracted.map(
|
|
142
141
|
(x) => `
|
|
143
142
|
${START}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
import _ from "lodash";
|
|
1
|
+
import { z } from "@bpinternal/zui";
|
|
2
|
+
import { clamp } from "lodash-es";
|
|
4
3
|
import { fastHash, stringify, takeUntilTokens } from "../utils";
|
|
5
4
|
import { Zai } from "../zai";
|
|
6
5
|
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
|
|
@@ -22,7 +21,7 @@ Zai.prototype.filter = async function(input, condition, _options) {
|
|
|
22
21
|
const MAX_ITEMS_PER_CHUNK = 50;
|
|
23
22
|
const TOKENS_TOTAL_MAX = this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
|
|
24
23
|
const TOKENS_EXAMPLES_MAX = Math.floor(Math.max(250, TOKENS_TOTAL_MAX * 0.5));
|
|
25
|
-
const TOKENS_CONDITION_MAX =
|
|
24
|
+
const TOKENS_CONDITION_MAX = clamp(TOKENS_TOTAL_MAX * 0.25, 250, tokenizer.count(condition));
|
|
26
25
|
const TOKENS_INPUT_ARRAY_MAX = TOKENS_TOTAL_MAX - TOKENS_EXAMPLES_MAX - TOKENS_CONDITION_MAX;
|
|
27
26
|
condition = tokenizer.truncate(condition, TOKENS_CONDITION_MAX);
|
|
28
27
|
let chunks = [];
|
|
@@ -152,7 +151,7 @@ The condition is: "${condition}"
|
|
|
152
151
|
const [idx, filter] = x.split(":");
|
|
153
152
|
return { idx: parseInt((idx == null ? void 0 : idx.trim()) ?? ""), filter: (filter == null ? void 0 : filter.toLowerCase().trim()) === "true" };
|
|
154
153
|
});
|
|
155
|
-
const partial = chunk.filter((
|
|
154
|
+
const partial = chunk.filter((_, idx) => {
|
|
156
155
|
var _a2;
|
|
157
156
|
return ((_a2 = indices.find((x) => x.idx === idx)) == null ? void 0 : _a2.filter) ?? false;
|
|
158
157
|
});
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
import _ from "lodash";
|
|
1
|
+
import { z } from "@bpinternal/zui";
|
|
2
|
+
import { clamp, chunk } from "lodash-es";
|
|
4
3
|
import { fastHash, stringify, takeUntilTokens } from "../utils";
|
|
5
4
|
import { Zai } from "../zai";
|
|
6
5
|
import { PROMPT_INPUT_BUFFER } from "./constants";
|
|
@@ -62,9 +61,9 @@ Zai.prototype.label = async function(input, _labels, _options) {
|
|
|
62
61
|
const tokenizer = await this.getTokenizer();
|
|
63
62
|
const taskId = this.taskId;
|
|
64
63
|
const taskType = "zai.label";
|
|
65
|
-
const TOTAL_MAX_TOKENS =
|
|
66
|
-
const CHUNK_EXAMPLES_MAX_TOKENS =
|
|
67
|
-
const CHUNK_INPUT_MAX_TOKENS =
|
|
64
|
+
const TOTAL_MAX_TOKENS = clamp(options.chunkLength, 1e3, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER);
|
|
65
|
+
const CHUNK_EXAMPLES_MAX_TOKENS = clamp(Math.floor(TOTAL_MAX_TOKENS * 0.5), 250, 1e4);
|
|
66
|
+
const CHUNK_INPUT_MAX_TOKENS = clamp(
|
|
68
67
|
TOTAL_MAX_TOKENS - CHUNK_EXAMPLES_MAX_TOKENS,
|
|
69
68
|
TOTAL_MAX_TOKENS * 0.5,
|
|
70
69
|
TOTAL_MAX_TOKENS
|
|
@@ -72,8 +71,8 @@ Zai.prototype.label = async function(input, _labels, _options) {
|
|
|
72
71
|
const inputAsString = stringify(input);
|
|
73
72
|
if (tokenizer.count(inputAsString) > CHUNK_INPUT_MAX_TOKENS) {
|
|
74
73
|
const tokens = tokenizer.split(inputAsString);
|
|
75
|
-
const chunks =
|
|
76
|
-
const allLabels = await Promise.all(chunks.map((
|
|
74
|
+
const chunks = chunk(tokens, CHUNK_INPUT_MAX_TOKENS).map((x) => x.join(""));
|
|
75
|
+
const allLabels = await Promise.all(chunks.map((chunk2) => this.label(chunk2, _labels)));
|
|
77
76
|
return allLabels.reduce((acc, x) => {
|
|
78
77
|
Object.keys(x).forEach((key) => {
|
|
79
78
|
if (acc[key] === true) {
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
import _ from "lodash";
|
|
1
|
+
import { z } from "@bpinternal/zui";
|
|
2
|
+
import { chunk } from "lodash-es";
|
|
4
3
|
import { Zai } from "../zai";
|
|
5
4
|
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
|
|
6
5
|
const Options = z.object({
|
|
@@ -52,8 +51,8 @@ ${newText}
|
|
|
52
51
|
const useMergeSort = parts >= Math.pow(2, N);
|
|
53
52
|
const chunkSize = Math.ceil(tokens.length / (parts * N));
|
|
54
53
|
if (useMergeSort) {
|
|
55
|
-
const chunks =
|
|
56
|
-
const allSummaries = await Promise.all(chunks.map((
|
|
54
|
+
const chunks = chunk(tokens, chunkSize).map((x) => x.join(""));
|
|
55
|
+
const allSummaries = await Promise.all(chunks.map((chunk2) => this.summarize(chunk2, options)));
|
|
57
56
|
return this.summarize(allSummaries.join("\n\n============\n\n"), options);
|
|
58
57
|
}
|
|
59
58
|
const summaries = [];
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
import _ from "lodash";
|
|
1
|
+
import { z } from "@bpinternal/zui";
|
|
2
|
+
import { clamp } from "lodash-es";
|
|
4
3
|
import { Zai } from "../zai";
|
|
5
4
|
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
|
|
6
5
|
const Options = z.object({
|
|
@@ -17,7 +16,7 @@ Zai.prototype.text = async function(prompt, _options) {
|
|
|
17
16
|
const instructions = [];
|
|
18
17
|
let chart = "";
|
|
19
18
|
if (options.length) {
|
|
20
|
-
const length =
|
|
19
|
+
const length = clamp(options.length * 0.75, 5, options.length);
|
|
21
20
|
instructions.push(`IMPORTANT: Length constraint: ${length} tokens/words`);
|
|
22
21
|
instructions.push(`The text must be standalone and complete in less than ${length} tokens/words`);
|
|
23
22
|
}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
const { z } = sdk;
|
|
1
|
+
import { z } from "@bpinternal/zui";
|
|
3
2
|
export const stringify = (input, beautify = true) => {
|
|
4
3
|
return typeof input === "string" && !!input.length ? input : input ? JSON.stringify(input, beautify ? null : void 0, beautify ? 2 : void 0) : "<input is null, false, undefined or empty>";
|
|
5
4
|
};
|
package/dist/{csj → node}/zai.js
RENAMED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { z } from "@bpinternal/zui";
|
|
2
2
|
import { getWasmTokenizer } from "@botpress/wasm";
|
|
3
3
|
import { TableAdapter } from "./adapters/botpress-table";
|
|
4
4
|
import { MemoryAdapter } from "./adapters/memory";
|
|
5
5
|
import { Models } from "./models";
|
|
6
6
|
import { BotpressClient } from "./utils";
|
|
7
|
-
const { z } = sdk;
|
|
8
7
|
const ActiveLearning = z.object({
|
|
9
8
|
enable: z.boolean().describe("Whether to enable active learning").default(false),
|
|
10
9
|
tableName: z.string().regex(
|
package/package.json
CHANGED
|
@@ -1,21 +1,25 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@botpress/zai",
|
|
3
3
|
"description": "Zui AI (zai) – An LLM utility library written on top of Zui and the Botpress API",
|
|
4
|
-
"version": "1.0.1-beta.
|
|
4
|
+
"version": "1.0.1-beta.3",
|
|
5
5
|
"exports": {
|
|
6
6
|
".": {
|
|
7
7
|
"types": "./src/index.ts",
|
|
8
|
-
"
|
|
9
|
-
"
|
|
8
|
+
"node": "./dist/node/index.js",
|
|
9
|
+
"default": "./dist/node/index.js"
|
|
10
|
+
},
|
|
11
|
+
"./browser": {
|
|
12
|
+
"types": "./src/index.ts",
|
|
13
|
+
"import": "./dist/browser/index.js"
|
|
10
14
|
}
|
|
11
15
|
},
|
|
12
|
-
"main": "./dist/
|
|
13
|
-
"browser": "./dist/
|
|
16
|
+
"main": "./dist/node/index.js",
|
|
17
|
+
"browser": "./dist/browser/index.js",
|
|
14
18
|
"types": "./src/index.ts",
|
|
15
19
|
"scripts": {
|
|
16
20
|
"build": "npm run build:nodejs && npm run build:browser",
|
|
17
|
-
"build:nodejs": "esbuild src/**/*.ts src/*.ts --platform=node --target=node16 --outdir=dist/
|
|
18
|
-
"build:browser": "esbuild src
|
|
21
|
+
"build:nodejs": "esbuild src/**/*.ts src/*.ts --platform=node --target=node16 --outdir=dist/node",
|
|
22
|
+
"build:browser": "esbuild src/index.ts --bundle --external:@bpinternal/zui --external:@botpress/client --external:@botpress/wasm --platform=browser --target=chrome58,firefox57,safari11 --outdir=dist/browser",
|
|
19
23
|
"watch": "tsup --watch",
|
|
20
24
|
"test": "vitest run --config vitest.config.ts",
|
|
21
25
|
"test:update": "vitest -u run --config vitest.config.ts",
|
|
@@ -29,22 +33,23 @@
|
|
|
29
33
|
"license": "ISC",
|
|
30
34
|
"dependencies": {
|
|
31
35
|
"json5": "^2.2.1",
|
|
32
|
-
"jsonrepair": "^3.2.0"
|
|
36
|
+
"jsonrepair": "^3.2.0",
|
|
37
|
+
"lodash-es": "^4.17.21"
|
|
33
38
|
},
|
|
34
39
|
"devDependencies": {
|
|
35
40
|
"@botpress/vai": "0.0.1-beta.7",
|
|
36
|
-
"@types/lodash": "^4.17.0",
|
|
41
|
+
"@types/lodash-es": "^4.17.0",
|
|
37
42
|
"dotenv": "^16.3.1",
|
|
38
43
|
"esbuild": "^0.24.2",
|
|
44
|
+
"lodash": "^4.17.21",
|
|
39
45
|
"ts-node": "^10.9.2",
|
|
40
46
|
"tsup": "^8.3.5",
|
|
41
47
|
"typescript": "^5.7.2",
|
|
42
48
|
"vitest": "^2.0.5"
|
|
43
49
|
},
|
|
44
50
|
"peerDependencies": {
|
|
45
|
-
"@botpress/client": "^0.
|
|
46
|
-
"@botpress/sdk": "^1.6.1",
|
|
51
|
+
"@botpress/client": "^0.40.0",
|
|
47
52
|
"@botpress/wasm": "^1.0.0",
|
|
48
|
-
"
|
|
53
|
+
"@bpinternal/zui": "0.13.3-beta.1"
|
|
49
54
|
}
|
|
50
55
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Client } from '@botpress/client'
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import { orderBy, isArray } from 'lodash-es'
|
|
4
4
|
import fs from 'node:fs'
|
|
5
5
|
|
|
6
6
|
const LLM_LIST_MODELS = 'listLanguageModels'
|
|
@@ -43,7 +43,7 @@ for (const integrationId in bot.integrations) {
|
|
|
43
43
|
input: {}
|
|
44
44
|
})
|
|
45
45
|
|
|
46
|
-
if (
|
|
46
|
+
if (isArray(output?.models)) {
|
|
47
47
|
for (const model of output.models) {
|
|
48
48
|
models.push({
|
|
49
49
|
id: `${integration.name}__${model.id}`,
|
|
@@ -60,7 +60,7 @@ for (const integrationId in bot.integrations) {
|
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
-
const content = JSON.stringify(
|
|
63
|
+
const content = JSON.stringify(orderBy(models, ['integration', 'name']), null, 2)
|
|
64
64
|
|
|
65
65
|
fs.writeFileSync(
|
|
66
66
|
'./src/models.ts',
|
package/scripts/update-types.mts
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
import { Client } from '@botpress/client'
|
|
2
|
-
import
|
|
2
|
+
import { z } from '@bpinternal/zui'
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
import { maxBy } from 'lodash-es'
|
|
5
5
|
import fs from 'node:fs'
|
|
6
6
|
import path from 'node:path'
|
|
7
7
|
|
|
8
|
-
const { z } = sdk
|
|
9
|
-
|
|
10
8
|
const Interfaces = ['llm'] as const
|
|
11
9
|
|
|
12
10
|
const client = new Client({
|
|
@@ -21,7 +19,7 @@ for (const name of Interfaces) {
|
|
|
21
19
|
})
|
|
22
20
|
|
|
23
21
|
const { interface: latest } = await client.getInterface({
|
|
24
|
-
id:
|
|
22
|
+
id: maxBy(interfaces, 'version')!.id
|
|
25
23
|
})
|
|
26
24
|
|
|
27
25
|
for (const action of Object.keys(latest.actions)) {
|
|
@@ -40,16 +38,8 @@ for (const name of Interfaces) {
|
|
|
40
38
|
|
|
41
39
|
export namespace ${name} {
|
|
42
40
|
export namespace ${action} {
|
|
43
|
-
export ${
|
|
44
|
-
|
|
45
|
-
.title('Input')
|
|
46
|
-
.dereference(references)
|
|
47
|
-
.toTypescript({ declaration: 'type' })};
|
|
48
|
-
export ${sdk.z
|
|
49
|
-
.fromJsonSchema(output)
|
|
50
|
-
.title('Output')
|
|
51
|
-
.dereference(references)
|
|
52
|
-
.toTypescript({ declaration: 'type' })};
|
|
41
|
+
export ${z.fromJsonSchema(input).title('Input').dereference(references).toTypescript({ declaration: 'type' })};
|
|
42
|
+
export ${z.fromJsonSchema(output).title('Output').dereference(references).toTypescript({ declaration: 'type' })};
|
|
53
43
|
}
|
|
54
44
|
}`
|
|
55
45
|
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { type Client } from '@botpress/client'
|
|
2
|
-
import
|
|
3
|
-
const { z } = sdk
|
|
2
|
+
import { z } from '@bpinternal/zui'
|
|
4
3
|
|
|
5
4
|
import { BotpressClient, GenerationMetadata } from '../utils'
|
|
6
5
|
import { Adapter, GetExamplesProps, SaveExampleProps } from './adapter'
|
|
@@ -31,7 +30,7 @@ const Props = z.object({
|
|
|
31
30
|
)
|
|
32
31
|
})
|
|
33
32
|
|
|
34
|
-
export type TableSchema =
|
|
33
|
+
export type TableSchema = z.input<typeof TableSchema>
|
|
35
34
|
const TableSchema = z.object({
|
|
36
35
|
taskType: z.string().describe('The type of the task (filter, extract, etc.)'),
|
|
37
36
|
taskId: z.string(),
|
|
@@ -67,7 +66,7 @@ export class TableAdapter extends Adapter {
|
|
|
67
66
|
private status: 'initialized' | 'ready' | 'error'
|
|
68
67
|
private errors = [] as string[]
|
|
69
68
|
|
|
70
|
-
constructor(props:
|
|
69
|
+
constructor(props: z.input<typeof Props>) {
|
|
71
70
|
super()
|
|
72
71
|
props = Props.parse(props)
|
|
73
72
|
this.client = props.client
|
package/src/operations/check.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
const { z } = sdk
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
3
2
|
|
|
4
3
|
import { fastHash, stringify, takeUntilTokens } from '../utils'
|
|
5
4
|
import { Zai } from '../zai'
|
|
@@ -11,7 +10,7 @@ const Example = z.object({
|
|
|
11
10
|
reason: z.string().optional()
|
|
12
11
|
})
|
|
13
12
|
|
|
14
|
-
export type Options =
|
|
13
|
+
export type Options = z.input<typeof Options>
|
|
15
14
|
const Options = z.object({
|
|
16
15
|
examples: z.array(Example).describe('Examples to check the condition against').default([])
|
|
17
16
|
})
|
|
@@ -1,16 +1,15 @@
|
|
|
1
|
-
import
|
|
2
|
-
const { z } = sdk
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
3
2
|
|
|
4
3
|
import JSON5 from 'json5'
|
|
5
4
|
import { jsonrepair } from 'jsonrepair'
|
|
6
5
|
|
|
7
|
-
import
|
|
6
|
+
import { chunk, isArray } from 'lodash-es'
|
|
8
7
|
import { fastHash, stringify, takeUntilTokens } from '../utils'
|
|
9
8
|
import { Zai } from '../zai'
|
|
10
9
|
import { PROMPT_INPUT_BUFFER } from './constants'
|
|
11
10
|
import { JsonParsingError } from './errors'
|
|
12
11
|
|
|
13
|
-
export type Options =
|
|
12
|
+
export type Options = z.input<typeof Options>
|
|
14
13
|
const Options = z.object({
|
|
15
14
|
instructions: z.string().optional().describe('Instructions to guide the user on how to extract the data'),
|
|
16
15
|
chunkLength: z
|
|
@@ -25,12 +24,12 @@ const Options = z.object({
|
|
|
25
24
|
declare module '@botpress/zai' {
|
|
26
25
|
interface Zai {
|
|
27
26
|
/** Extracts one or many elements from an arbitrary input */
|
|
28
|
-
extract<S extends
|
|
29
|
-
extract<S extends
|
|
27
|
+
extract<S extends z.AnyZodObject>(input: unknown, schema: S, options?: Options): Promise<z.infer<S>>
|
|
28
|
+
extract<S extends z.AnyZodObject>(
|
|
30
29
|
input: unknown,
|
|
31
|
-
schema:
|
|
30
|
+
schema: z.ZodArray<S>,
|
|
32
31
|
options?: Options
|
|
33
|
-
): Promise<Array<
|
|
32
|
+
): Promise<Array<z.infer<S>>>
|
|
34
33
|
}
|
|
35
34
|
}
|
|
36
35
|
|
|
@@ -50,10 +49,10 @@ Zai.prototype.extract = async function (this: Zai, input, schema, _options) {
|
|
|
50
49
|
let isArrayOfObjects = false
|
|
51
50
|
const originalSchema = schema
|
|
52
51
|
|
|
53
|
-
if (schema instanceof
|
|
52
|
+
if (schema instanceof z.ZodObject) {
|
|
54
53
|
// Do nothing
|
|
55
|
-
} else if (schema instanceof
|
|
56
|
-
if (schema._def.type instanceof
|
|
54
|
+
} else if (schema instanceof z.ZodArray) {
|
|
55
|
+
if (schema._def.type instanceof z.ZodObject) {
|
|
57
56
|
isArrayOfObjects = true
|
|
58
57
|
schema = schema._def.type
|
|
59
58
|
} else {
|
|
@@ -76,8 +75,8 @@ Zai.prototype.extract = async function (this: Zai, input, schema, _options) {
|
|
|
76
75
|
// If we want to extract an array of objects, we will run this function recursively
|
|
77
76
|
if (isArrayOfObjects) {
|
|
78
77
|
const tokens = tokenizer.split(inputAsString)
|
|
79
|
-
const chunks =
|
|
80
|
-
const all = await Promise.all(chunks.map((chunk) => this.extract(chunk, originalSchema as
|
|
78
|
+
const chunks = chunk(tokens, options.chunkLength).map((x) => x.join(''))
|
|
79
|
+
const all = await Promise.all(chunks.map((chunk) => this.extract(chunk, originalSchema as z.AnyZodObject)))
|
|
81
80
|
|
|
82
81
|
return all.flat()
|
|
83
82
|
} else {
|
|
@@ -194,7 +193,7 @@ ${input.trim()}
|
|
|
194
193
|
}
|
|
195
194
|
|
|
196
195
|
const formatOutput = (extracted: any) => {
|
|
197
|
-
extracted =
|
|
196
|
+
extracted = isArray(extracted) ? extracted : [extracted]
|
|
198
197
|
|
|
199
198
|
return (
|
|
200
199
|
extracted
|
package/src/operations/filter.ts
CHANGED
|
@@ -1,19 +1,18 @@
|
|
|
1
|
-
import
|
|
2
|
-
const { z } = sdk
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
3
2
|
|
|
4
|
-
import
|
|
3
|
+
import { clamp } from 'lodash-es'
|
|
5
4
|
import { fastHash, stringify, takeUntilTokens } from '../utils'
|
|
6
5
|
import { Zai } from '../zai'
|
|
7
6
|
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
|
|
8
7
|
|
|
9
|
-
type Example =
|
|
8
|
+
type Example = z.input<typeof Example>
|
|
10
9
|
const Example = z.object({
|
|
11
10
|
input: z.any(),
|
|
12
11
|
filter: z.boolean(),
|
|
13
12
|
reason: z.string().optional()
|
|
14
13
|
})
|
|
15
14
|
|
|
16
|
-
export type Options =
|
|
15
|
+
export type Options = z.input<typeof Options>
|
|
17
16
|
const Options = z.object({
|
|
18
17
|
tokensPerItem: z
|
|
19
18
|
.number()
|
|
@@ -44,7 +43,7 @@ Zai.prototype.filter = async function (this: Zai, input, condition, _options) {
|
|
|
44
43
|
const MAX_ITEMS_PER_CHUNK = 50
|
|
45
44
|
const TOKENS_TOTAL_MAX = this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER
|
|
46
45
|
const TOKENS_EXAMPLES_MAX = Math.floor(Math.max(250, TOKENS_TOTAL_MAX * 0.5))
|
|
47
|
-
const TOKENS_CONDITION_MAX =
|
|
46
|
+
const TOKENS_CONDITION_MAX = clamp(TOKENS_TOTAL_MAX * 0.25, 250, tokenizer.count(condition))
|
|
48
47
|
const TOKENS_INPUT_ARRAY_MAX = TOKENS_TOTAL_MAX - TOKENS_EXAMPLES_MAX - TOKENS_CONDITION_MAX
|
|
49
48
|
|
|
50
49
|
condition = tokenizer.truncate(condition, TOKENS_CONDITION_MAX)
|
package/src/operations/label.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import
|
|
2
|
-
const { z } = sdk
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
3
2
|
|
|
4
|
-
import
|
|
3
|
+
import { clamp, chunk } from 'lodash-es'
|
|
5
4
|
import { fastHash, stringify, takeUntilTokens } from '../utils'
|
|
6
5
|
import { Zai } from '../zai'
|
|
7
6
|
import { PROMPT_INPUT_BUFFER } from './constants'
|
|
@@ -21,7 +20,7 @@ type Example<T extends string> = {
|
|
|
21
20
|
labels: Partial<Record<T, { label: Label; explanation?: string }>>
|
|
22
21
|
}
|
|
23
22
|
|
|
24
|
-
export type Options<T extends string> = Omit<
|
|
23
|
+
export type Options<T extends string> = Omit<z.input<typeof Options>, 'examples'> & {
|
|
25
24
|
examples?: Array<Partial<Example<T>>>
|
|
26
25
|
}
|
|
27
26
|
|
|
@@ -108,9 +107,9 @@ Zai.prototype.label = async function <T extends string>(this: Zai, input, _label
|
|
|
108
107
|
const taskId = this.taskId
|
|
109
108
|
const taskType = 'zai.label'
|
|
110
109
|
|
|
111
|
-
const TOTAL_MAX_TOKENS =
|
|
112
|
-
const CHUNK_EXAMPLES_MAX_TOKENS =
|
|
113
|
-
const CHUNK_INPUT_MAX_TOKENS =
|
|
110
|
+
const TOTAL_MAX_TOKENS = clamp(options.chunkLength, 1000, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER)
|
|
111
|
+
const CHUNK_EXAMPLES_MAX_TOKENS = clamp(Math.floor(TOTAL_MAX_TOKENS * 0.5), 250, 10_000)
|
|
112
|
+
const CHUNK_INPUT_MAX_TOKENS = clamp(
|
|
114
113
|
TOTAL_MAX_TOKENS - CHUNK_EXAMPLES_MAX_TOKENS,
|
|
115
114
|
TOTAL_MAX_TOKENS * 0.5,
|
|
116
115
|
TOTAL_MAX_TOKENS
|
|
@@ -120,7 +119,7 @@ Zai.prototype.label = async function <T extends string>(this: Zai, input, _label
|
|
|
120
119
|
|
|
121
120
|
if (tokenizer.count(inputAsString) > CHUNK_INPUT_MAX_TOKENS) {
|
|
122
121
|
const tokens = tokenizer.split(inputAsString)
|
|
123
|
-
const chunks =
|
|
122
|
+
const chunks = chunk(tokens, CHUNK_INPUT_MAX_TOKENS).map((x) => x.join(''))
|
|
124
123
|
const allLabels = await Promise.all(chunks.map((chunk) => this.label(chunk, _labels)))
|
|
125
124
|
|
|
126
125
|
// Merge all the labels together (those who are true will remain true)
|
|
@@ -1,17 +1,16 @@
|
|
|
1
|
-
import
|
|
2
|
-
const { z } = sdk
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
3
2
|
|
|
4
3
|
import { fastHash, stringify, takeUntilTokens } from '../utils'
|
|
5
4
|
import { Zai } from '../zai'
|
|
6
5
|
import { PROMPT_INPUT_BUFFER } from './constants'
|
|
7
6
|
|
|
8
|
-
type Example =
|
|
7
|
+
type Example = z.input<typeof Example> & { instructions?: string }
|
|
9
8
|
const Example = z.object({
|
|
10
9
|
input: z.string(),
|
|
11
10
|
output: z.string()
|
|
12
11
|
})
|
|
13
12
|
|
|
14
|
-
export type Options =
|
|
13
|
+
export type Options = z.input<typeof Options>
|
|
15
14
|
const Options = z.object({
|
|
16
15
|
examples: z.array(Example).default([]),
|
|
17
16
|
length: z.number().min(10).max(16_000).optional().describe('The maximum number of tokens to generate')
|
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import
|
|
2
|
-
const { z } = sdk
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
3
2
|
|
|
4
|
-
import
|
|
3
|
+
import { chunk } from 'lodash-es'
|
|
5
4
|
import { Zai } from '../zai'
|
|
6
5
|
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
|
|
7
6
|
|
|
8
|
-
export type Options =
|
|
7
|
+
export type Options = z.input<typeof Options>
|
|
9
8
|
const Options = z.object({
|
|
10
9
|
prompt: z
|
|
11
10
|
.string()
|
|
@@ -93,7 +92,7 @@ ${newText}
|
|
|
93
92
|
const chunkSize = Math.ceil(tokens.length / (parts * N))
|
|
94
93
|
|
|
95
94
|
if (useMergeSort) {
|
|
96
|
-
const chunks =
|
|
95
|
+
const chunks = chunk(tokens, chunkSize).map((x) => x.join(''))
|
|
97
96
|
const allSummaries = await Promise.all(chunks.map((chunk) => this.summarize(chunk, options)))
|
|
98
97
|
return this.summarize(allSummaries.join('\n\n============\n\n'), options)
|
|
99
98
|
}
|
package/src/operations/text.ts
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import
|
|
2
|
-
const { z } = sdk
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
3
2
|
|
|
4
|
-
import
|
|
3
|
+
import { clamp } from 'lodash-es'
|
|
5
4
|
import { Zai } from '../zai'
|
|
6
5
|
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
|
|
7
6
|
|
|
8
|
-
export type Options =
|
|
7
|
+
export type Options = z.input<typeof Options>
|
|
9
8
|
const Options = z.object({
|
|
10
9
|
length: z.number().min(1).max(100_000).optional().describe('The maximum number of tokens to generate')
|
|
11
10
|
})
|
|
@@ -31,7 +30,7 @@ Zai.prototype.text = async function (this: Zai, prompt, _options) {
|
|
|
31
30
|
let chart = ''
|
|
32
31
|
|
|
33
32
|
if (options.length) {
|
|
34
|
-
const length =
|
|
33
|
+
const length = clamp(options.length * 0.75, 5, options.length)
|
|
35
34
|
instructions.push(`IMPORTANT: Length constraint: ${length} tokens/words`)
|
|
36
35
|
instructions.push(`The text must be standalone and complete in less than ${length} tokens/words`)
|
|
37
36
|
}
|
package/src/utils.ts
CHANGED
package/src/zai.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Client } from '@botpress/client'
|
|
2
|
-
import
|
|
2
|
+
import { z } from '@bpinternal/zui'
|
|
3
3
|
import { type TextTokenizer, getWasmTokenizer } from '@botpress/wasm'
|
|
4
4
|
|
|
5
5
|
import { Adapter } from './adapters/adapter'
|
|
@@ -7,12 +7,12 @@ import { TableAdapter } from './adapters/botpress-table'
|
|
|
7
7
|
import { MemoryAdapter } from './adapters/memory'
|
|
8
8
|
import { Models } from './models'
|
|
9
9
|
import { llm } from './sdk-interfaces/llm/generateContent'
|
|
10
|
+
|
|
10
11
|
import { BotpressClient, GenerationMetadata } from './utils'
|
|
11
12
|
|
|
12
|
-
const { z } = sdk
|
|
13
13
|
type ModelId = (typeof Models)[number]['id']
|
|
14
14
|
|
|
15
|
-
type ActiveLearning =
|
|
15
|
+
type ActiveLearning = z.input<typeof ActiveLearning>
|
|
16
16
|
const ActiveLearning = z.object({
|
|
17
17
|
enable: z.boolean().describe('Whether to enable active learning').default(false),
|
|
18
18
|
tableName: z
|
|
@@ -33,7 +33,7 @@ const ActiveLearning = z.object({
|
|
|
33
33
|
.default('default')
|
|
34
34
|
})
|
|
35
35
|
|
|
36
|
-
type ZaiConfig =
|
|
36
|
+
type ZaiConfig = z.input<typeof ZaiConfig>
|
|
37
37
|
const ZaiConfig = z.object({
|
|
38
38
|
client: BotpressClient,
|
|
39
39
|
userId: z.string().describe('The ID of the user consuming the API').optional(),
|