@botpress/zai 2.1.19 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +696 -0
- package/README.md +28 -2
- package/dist/index.d.ts +39 -18
- package/dist/index.js +1 -0
- package/dist/operations/errors.js +112 -8
- package/dist/operations/extract.js +20 -12
- package/dist/operations/filter.js +3 -1
- package/dist/operations/group.js +278 -0
- package/dist/operations/label.js +3 -1
- package/dist/operations/summarize.js +3 -1
- package/e2e/data/cache.jsonl +219 -0
- package/package.json +4 -3
- package/src/index.ts +1 -0
- package/src/operations/errors.ts +96 -1
- package/src/operations/extract.ts +21 -11
- package/src/operations/filter.ts +3 -1
- package/src/operations/group.ts +421 -0
- package/src/operations/label.ts +3 -1
- package/src/operations/summarize.ts +3 -2
- package/src/zai.ts +7 -9
package/README.md
CHANGED
|
@@ -104,7 +104,32 @@ const techCompanies = await zai.filter(companies, 'are technology companies')
|
|
|
104
104
|
const recentPosts = await zai.filter(posts, 'were published this week')
|
|
105
105
|
```
|
|
106
106
|
|
|
107
|
-
### 6.
|
|
107
|
+
### 6. Group - Organize items into categories
|
|
108
|
+
|
|
109
|
+
```typescript
|
|
110
|
+
// Group items automatically
|
|
111
|
+
const grouped = await zai.group(tasks, {
|
|
112
|
+
instructions: 'Group by priority level',
|
|
113
|
+
})
|
|
114
|
+
// Result: { 'High Priority': [...], 'Medium Priority': [...], 'Low Priority': [...] }
|
|
115
|
+
|
|
116
|
+
// Group with initial categories
|
|
117
|
+
const categorized = await zai.group(emails, {
|
|
118
|
+
instructions: 'Group by topic',
|
|
119
|
+
initialGroups: [
|
|
120
|
+
{ id: 'work', label: 'Work' },
|
|
121
|
+
{ id: 'personal', label: 'Personal' },
|
|
122
|
+
],
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
// Group large datasets efficiently
|
|
126
|
+
const organized = await zai.group(largeArray, {
|
|
127
|
+
instructions: 'Group by date',
|
|
128
|
+
chunkLength: 8000, // Process in chunks for better performance
|
|
129
|
+
})
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### 7. Text - Generate content
|
|
108
133
|
|
|
109
134
|
```typescript
|
|
110
135
|
const blogPost = await zai.text('Write about the future of AI', {
|
|
@@ -113,7 +138,7 @@ const blogPost = await zai.text('Write about the future of AI', {
|
|
|
113
138
|
})
|
|
114
139
|
```
|
|
115
140
|
|
|
116
|
-
###
|
|
141
|
+
### 8. Summarize - Create summaries
|
|
117
142
|
|
|
118
143
|
```typescript
|
|
119
144
|
// Simple summary
|
|
@@ -237,6 +262,7 @@ setTimeout(() => controller.abort(), 5000)
|
|
|
237
262
|
- `.label(content, criteria, options?)` - Apply multiple labels
|
|
238
263
|
- `.rewrite(content, instruction, options?)` - Transform text
|
|
239
264
|
- `.filter(items, condition, options?)` - Filter array items
|
|
265
|
+
- `.group(items, options?)` - Organize items into categories
|
|
240
266
|
- `.text(prompt, options?)` - Generate text
|
|
241
267
|
- `.summarize(content, options?)` - Create summary
|
|
242
268
|
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Cognitive, Model, BotpressClientLike, GenerateContentInput, GenerateContentOutput } from '@botpress/cognitive';
|
|
1
|
+
import { Cognitive, Models, Model, BotpressClientLike, GenerateContentInput, GenerateContentOutput } from '@botpress/cognitive';
|
|
2
2
|
import { TextTokenizer } from '@bpinternal/thicktoken';
|
|
3
3
|
|
|
4
4
|
type GenerationMetadata = {
|
|
@@ -41,7 +41,6 @@ declare abstract class Adapter {
|
|
|
41
41
|
abstract saveExample<TInput, TOutput>(props: SaveExampleProps<TInput, TOutput>): Promise<void>;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
type ModelId = Required<Parameters<Cognitive['generateContent']>[0]['model']>;
|
|
45
44
|
type ActiveLearning = {
|
|
46
45
|
enable: boolean;
|
|
47
46
|
tableName: string;
|
|
@@ -50,7 +49,7 @@ type ActiveLearning = {
|
|
|
50
49
|
type ZaiConfig = {
|
|
51
50
|
client: BotpressClientLike | Cognitive;
|
|
52
51
|
userId?: string;
|
|
53
|
-
modelId?:
|
|
52
|
+
modelId?: Models;
|
|
54
53
|
activeLearning?: ActiveLearning;
|
|
55
54
|
namespace?: string;
|
|
56
55
|
};
|
|
@@ -59,7 +58,7 @@ declare class Zai {
|
|
|
59
58
|
protected client: Cognitive;
|
|
60
59
|
private _originalConfig;
|
|
61
60
|
private _userId;
|
|
62
|
-
protected Model:
|
|
61
|
+
protected Model: Models;
|
|
63
62
|
protected ModelDetails: Model;
|
|
64
63
|
protected namespace: string;
|
|
65
64
|
protected adapter: Adapter;
|
|
@@ -167,14 +166,14 @@ declare class Response<T = any, S = T> implements PromiseLike<S> {
|
|
|
167
166
|
}>;
|
|
168
167
|
}
|
|
169
168
|
|
|
170
|
-
type Options$
|
|
169
|
+
type Options$7 = {
|
|
171
170
|
/** The maximum number of tokens to generate */
|
|
172
171
|
length?: number;
|
|
173
172
|
};
|
|
174
173
|
declare module '@botpress/zai' {
|
|
175
174
|
interface Zai {
|
|
176
175
|
/** Generates a text of the desired length according to the prompt */
|
|
177
|
-
text(prompt: string, options?: Options$
|
|
176
|
+
text(prompt: string, options?: Options$7): Response<string>;
|
|
178
177
|
}
|
|
179
178
|
}
|
|
180
179
|
|
|
@@ -183,7 +182,7 @@ type Example$3 = {
|
|
|
183
182
|
output: string;
|
|
184
183
|
instructions?: string;
|
|
185
184
|
};
|
|
186
|
-
type Options$
|
|
185
|
+
type Options$6 = {
|
|
187
186
|
/** Examples to guide the rewriting */
|
|
188
187
|
examples?: Array<Example$3>;
|
|
189
188
|
/** The maximum number of tokens to generate */
|
|
@@ -192,11 +191,11 @@ type Options$5 = {
|
|
|
192
191
|
declare module '@botpress/zai' {
|
|
193
192
|
interface Zai {
|
|
194
193
|
/** Rewrites a string according to match the prompt */
|
|
195
|
-
rewrite(original: string, prompt: string, options?: Options$
|
|
194
|
+
rewrite(original: string, prompt: string, options?: Options$6): Response<string>;
|
|
196
195
|
}
|
|
197
196
|
}
|
|
198
197
|
|
|
199
|
-
type Options$
|
|
198
|
+
type Options$5 = {
|
|
200
199
|
/** What should the text be summarized to? */
|
|
201
200
|
prompt?: string;
|
|
202
201
|
/** How to format the example text */
|
|
@@ -216,7 +215,7 @@ type Options$4 = {
|
|
|
216
215
|
declare module '@botpress/zai' {
|
|
217
216
|
interface Zai {
|
|
218
217
|
/** Summarizes a text of any length to a summary of the desired length */
|
|
219
|
-
summarize(original: string, options?: Options$
|
|
218
|
+
summarize(original: string, options?: Options$5): Response<string>;
|
|
220
219
|
}
|
|
221
220
|
}
|
|
222
221
|
|
|
@@ -226,14 +225,14 @@ type Example$2 = {
|
|
|
226
225
|
reason?: string;
|
|
227
226
|
condition?: string;
|
|
228
227
|
};
|
|
229
|
-
type Options$
|
|
228
|
+
type Options$4 = {
|
|
230
229
|
/** Examples to check the condition against */
|
|
231
230
|
examples?: Array<Example$2>;
|
|
232
231
|
};
|
|
233
232
|
declare module '@botpress/zai' {
|
|
234
233
|
interface Zai {
|
|
235
234
|
/** Checks wether a condition is true or not */
|
|
236
|
-
check(input: unknown, condition: string, options?: Options$
|
|
235
|
+
check(input: unknown, condition: string, options?: Options$4): Response<{
|
|
237
236
|
/** Whether the condition is true or not */
|
|
238
237
|
value: boolean;
|
|
239
238
|
/** The explanation of the decision */
|
|
@@ -247,7 +246,7 @@ type Example$1 = {
|
|
|
247
246
|
filter: boolean;
|
|
248
247
|
reason?: string;
|
|
249
248
|
};
|
|
250
|
-
type Options$
|
|
249
|
+
type Options$3 = {
|
|
251
250
|
/** The maximum number of tokens per item */
|
|
252
251
|
tokensPerItem?: number;
|
|
253
252
|
/** Examples to filter the condition against */
|
|
@@ -256,11 +255,11 @@ type Options$2 = {
|
|
|
256
255
|
declare module '@botpress/zai' {
|
|
257
256
|
interface Zai {
|
|
258
257
|
/** Filters elements of an array against a condition */
|
|
259
|
-
filter<T>(input: Array<T>, condition: string, options?: Options$
|
|
258
|
+
filter<T>(input: Array<T>, condition: string, options?: Options$3): Response<Array<T>>;
|
|
260
259
|
}
|
|
261
260
|
}
|
|
262
261
|
|
|
263
|
-
type Options$
|
|
262
|
+
type Options$2 = {
|
|
264
263
|
/** Instructions to guide the user on how to extract the data */
|
|
265
264
|
instructions?: string;
|
|
266
265
|
/** The maximum number of tokens per chunk */
|
|
@@ -275,7 +274,7 @@ type OfType<O, T extends __Z = __Z<O>> = T extends __Z<O> ? T : never;
|
|
|
275
274
|
declare module '@botpress/zai' {
|
|
276
275
|
interface Zai {
|
|
277
276
|
/** Extracts one or many elements from an arbitrary input */
|
|
278
|
-
extract<S extends OfType<any>>(input: unknown, schema: S, options?: Options$
|
|
277
|
+
extract<S extends OfType<any>>(input: unknown, schema: S, options?: Options$2): Response<S['_output']>;
|
|
279
278
|
}
|
|
280
279
|
}
|
|
281
280
|
|
|
@@ -294,7 +293,7 @@ type Example<T extends string> = {
|
|
|
294
293
|
explanation?: string;
|
|
295
294
|
}>>;
|
|
296
295
|
};
|
|
297
|
-
type Options<T extends string> = {
|
|
296
|
+
type Options$1<T extends string> = {
|
|
298
297
|
/** Examples to help the user make a decision */
|
|
299
298
|
examples?: Array<Example<T>>;
|
|
300
299
|
/** Instructions to guide the user on how to extract the data */
|
|
@@ -306,7 +305,7 @@ type Labels<T extends string> = Record<T, string>;
|
|
|
306
305
|
declare module '@botpress/zai' {
|
|
307
306
|
interface Zai {
|
|
308
307
|
/** Tags the provided input with a list of predefined labels */
|
|
309
|
-
label<T extends string>(input: unknown, labels: Labels<T>, options?: Options<T>): Response<{
|
|
308
|
+
label<T extends string>(input: unknown, labels: Labels<T>, options?: Options$1<T>): Response<{
|
|
310
309
|
[K in T]: {
|
|
311
310
|
explanation: string;
|
|
312
311
|
value: boolean;
|
|
@@ -318,4 +317,26 @@ declare module '@botpress/zai' {
|
|
|
318
317
|
}
|
|
319
318
|
}
|
|
320
319
|
|
|
320
|
+
type Group<T> = {
|
|
321
|
+
id: string;
|
|
322
|
+
label: string;
|
|
323
|
+
elements: T[];
|
|
324
|
+
};
|
|
325
|
+
type InitialGroup = {
|
|
326
|
+
id: string;
|
|
327
|
+
label: string;
|
|
328
|
+
elements?: unknown[];
|
|
329
|
+
};
|
|
330
|
+
type Options = {
|
|
331
|
+
instructions?: string;
|
|
332
|
+
tokensPerElement?: number;
|
|
333
|
+
chunkLength?: number;
|
|
334
|
+
initialGroups?: Array<InitialGroup>;
|
|
335
|
+
};
|
|
336
|
+
declare module '@botpress/zai' {
|
|
337
|
+
interface Zai {
|
|
338
|
+
group<T>(input: Array<T>, options?: Options): Response<Array<Group<T>>, Record<string, T[]>>;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
321
342
|
export { Zai };
|
package/dist/index.js
CHANGED
|
@@ -1,15 +1,119 @@
|
|
|
1
|
+
import { ZodError } from "@bpinternal/zui";
|
|
1
2
|
export class JsonParsingError extends Error {
|
|
2
3
|
constructor(json, error) {
|
|
3
|
-
const message =
|
|
4
|
-
|
|
5
|
-
---JSON---
|
|
6
|
-
${json}
|
|
7
|
-
|
|
8
|
-
---Error---
|
|
9
|
-
|
|
10
|
-
${error}`;
|
|
4
|
+
const message = JsonParsingError._formatError(json, error);
|
|
11
5
|
super(message);
|
|
12
6
|
this.json = json;
|
|
13
7
|
this.error = error;
|
|
14
8
|
}
|
|
9
|
+
static _formatError(json, error) {
|
|
10
|
+
let errorMessage = "Error parsing JSON:\n\n";
|
|
11
|
+
errorMessage += `---JSON---
|
|
12
|
+
${json}
|
|
13
|
+
|
|
14
|
+
`;
|
|
15
|
+
if (error instanceof ZodError) {
|
|
16
|
+
errorMessage += "---Validation Errors---\n\n";
|
|
17
|
+
errorMessage += JsonParsingError._formatZodError(error);
|
|
18
|
+
} else {
|
|
19
|
+
errorMessage += "---Error---\n\n";
|
|
20
|
+
errorMessage += "The JSON provided is not valid JSON.\n";
|
|
21
|
+
errorMessage += `Details: ${error.message}
|
|
22
|
+
`;
|
|
23
|
+
}
|
|
24
|
+
return errorMessage;
|
|
25
|
+
}
|
|
26
|
+
static _formatZodError(zodError) {
|
|
27
|
+
const issues = zodError.issues;
|
|
28
|
+
if (issues.length === 0) {
|
|
29
|
+
return "Unknown validation error\n";
|
|
30
|
+
}
|
|
31
|
+
let message = "";
|
|
32
|
+
for (let i = 0; i < issues.length; i++) {
|
|
33
|
+
const issue = issues[i];
|
|
34
|
+
const path = issue.path.length > 0 ? issue.path.join(".") : "root";
|
|
35
|
+
message += `${i + 1}. Field: "${path}"
|
|
36
|
+
`;
|
|
37
|
+
switch (issue.code) {
|
|
38
|
+
case "invalid_type":
|
|
39
|
+
message += ` Problem: Expected ${issue.expected}, but received ${issue.received}
|
|
40
|
+
`;
|
|
41
|
+
message += ` Message: ${issue.message}
|
|
42
|
+
`;
|
|
43
|
+
break;
|
|
44
|
+
case "invalid_string":
|
|
45
|
+
if ("validation" in issue) {
|
|
46
|
+
message += ` Problem: Invalid ${issue.validation} format
|
|
47
|
+
`;
|
|
48
|
+
}
|
|
49
|
+
message += ` Message: ${issue.message}
|
|
50
|
+
`;
|
|
51
|
+
break;
|
|
52
|
+
case "too_small":
|
|
53
|
+
if (issue.type === "string") {
|
|
54
|
+
if (issue.exact) {
|
|
55
|
+
message += ` Problem: String must be exactly ${issue.minimum} characters
|
|
56
|
+
`;
|
|
57
|
+
} else {
|
|
58
|
+
message += ` Problem: String must be at least ${issue.minimum} characters
|
|
59
|
+
`;
|
|
60
|
+
}
|
|
61
|
+
} else if (issue.type === "number") {
|
|
62
|
+
message += ` Problem: Number must be ${issue.inclusive ? "at least" : "greater than"} ${issue.minimum}
|
|
63
|
+
`;
|
|
64
|
+
} else if (issue.type === "array") {
|
|
65
|
+
message += ` Problem: Array must contain ${issue.inclusive ? "at least" : "more than"} ${issue.minimum} items
|
|
66
|
+
`;
|
|
67
|
+
}
|
|
68
|
+
message += ` Message: ${issue.message}
|
|
69
|
+
`;
|
|
70
|
+
break;
|
|
71
|
+
case "too_big":
|
|
72
|
+
if (issue.type === "string") {
|
|
73
|
+
if (issue.exact) {
|
|
74
|
+
message += ` Problem: String must be exactly ${issue.maximum} characters
|
|
75
|
+
`;
|
|
76
|
+
} else {
|
|
77
|
+
message += ` Problem: String must be at most ${issue.maximum} characters
|
|
78
|
+
`;
|
|
79
|
+
}
|
|
80
|
+
} else if (issue.type === "number") {
|
|
81
|
+
message += ` Problem: Number must be ${issue.inclusive ? "at most" : "less than"} ${issue.maximum}
|
|
82
|
+
`;
|
|
83
|
+
} else if (issue.type === "array") {
|
|
84
|
+
message += ` Problem: Array must contain ${issue.inclusive ? "at most" : "fewer than"} ${issue.maximum} items
|
|
85
|
+
`;
|
|
86
|
+
}
|
|
87
|
+
message += ` Message: ${issue.message}
|
|
88
|
+
`;
|
|
89
|
+
break;
|
|
90
|
+
case "invalid_enum_value":
|
|
91
|
+
message += ` Problem: Invalid value "${issue.received}"
|
|
92
|
+
`;
|
|
93
|
+
message += ` Allowed values: ${issue.options.map((o) => `"${o}"`).join(", ")}
|
|
94
|
+
`;
|
|
95
|
+
message += ` Message: ${issue.message}
|
|
96
|
+
`;
|
|
97
|
+
break;
|
|
98
|
+
case "invalid_literal":
|
|
99
|
+
message += ` Problem: Expected the literal value "${issue.expected}", but received "${issue.received}"
|
|
100
|
+
`;
|
|
101
|
+
message += ` Message: ${issue.message}
|
|
102
|
+
`;
|
|
103
|
+
break;
|
|
104
|
+
case "invalid_union":
|
|
105
|
+
message += " Problem: Value doesn't match any of the expected formats\n";
|
|
106
|
+
message += ` Message: ${issue.message}
|
|
107
|
+
`;
|
|
108
|
+
break;
|
|
109
|
+
default:
|
|
110
|
+
message += ` Problem: ${issue.message}
|
|
111
|
+
`;
|
|
112
|
+
}
|
|
113
|
+
if (i < issues.length - 1) {
|
|
114
|
+
message += "\n";
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return message;
|
|
118
|
+
}
|
|
15
119
|
}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { z } from "@bpinternal/zui";
|
|
1
|
+
import { z, transforms } from "@bpinternal/zui";
|
|
2
2
|
import JSON5 from "json5";
|
|
3
3
|
import { jsonrepair } from "jsonrepair";
|
|
4
4
|
import { chunk, isArray } from "lodash-es";
|
|
5
|
+
import pLimit from "p-limit";
|
|
5
6
|
import { ZaiContext } from "../context";
|
|
6
7
|
import { Response } from "../response";
|
|
7
8
|
import { getTokenizer } from "../tokenizer";
|
|
@@ -17,9 +18,10 @@ const Options = z.object({
|
|
|
17
18
|
const START = "\u25A0json_start\u25A0";
|
|
18
19
|
const END = "\u25A0json_end\u25A0";
|
|
19
20
|
const NO_MORE = "\u25A0NO_MORE_ELEMENT\u25A0";
|
|
21
|
+
const ZERO_ELEMENTS = "\u25A0ZERO_ELEMENTS\u25A0";
|
|
20
22
|
const extract = async (input, _schema, _options, ctx) => {
|
|
21
23
|
ctx.controller.signal.throwIfAborted();
|
|
22
|
-
let schema = _schema;
|
|
24
|
+
let schema = transforms.fromJSONSchema(transforms.toJSONSchema(_schema));
|
|
23
25
|
const options = Options.parse(_options ?? {});
|
|
24
26
|
const tokenizer = await getTokenizer();
|
|
25
27
|
const model = await ctx.getModel();
|
|
@@ -62,19 +64,22 @@ const extract = async (input, _schema, _options, ctx) => {
|
|
|
62
64
|
const keys = Object.keys(schema.shape);
|
|
63
65
|
const inputAsString = stringify(input);
|
|
64
66
|
if (tokenizer.count(inputAsString) > options.chunkLength) {
|
|
67
|
+
const limit = pLimit(10);
|
|
65
68
|
const tokens = tokenizer.split(inputAsString);
|
|
66
69
|
const chunks = chunk(tokens, options.chunkLength).map((x) => x.join(""));
|
|
67
70
|
const all = await Promise.allSettled(
|
|
68
71
|
chunks.map(
|
|
69
|
-
(chunk2) =>
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
72
|
+
(chunk2) => limit(
|
|
73
|
+
() => extract(
|
|
74
|
+
chunk2,
|
|
75
|
+
originalSchema,
|
|
76
|
+
{
|
|
77
|
+
...options,
|
|
78
|
+
strict: false
|
|
79
|
+
// We don't want to fail on strict mode for sub-chunks
|
|
80
|
+
},
|
|
81
|
+
ctx
|
|
82
|
+
)
|
|
78
83
|
)
|
|
79
84
|
)
|
|
80
85
|
).then(
|
|
@@ -110,8 +115,11 @@ Merge it back into a final result.`.trim(),
|
|
|
110
115
|
instructions.push("You may have multiple elements, or zero elements in the input.");
|
|
111
116
|
instructions.push("You must extract each element separately.");
|
|
112
117
|
instructions.push(`Each element must be a JSON object with exactly the format: ${START}${shape}${END}`);
|
|
118
|
+
instructions.push(`If there are no elements to extract, respond with ${ZERO_ELEMENTS}.`);
|
|
113
119
|
instructions.push(`When you are done extracting all elements, type "${NO_MORE}" to finish.`);
|
|
114
|
-
instructions.push(
|
|
120
|
+
instructions.push(
|
|
121
|
+
`For example, if you have zero elements, the output should look like this: ${ZERO_ELEMENTS}${NO_MORE}`
|
|
122
|
+
);
|
|
115
123
|
instructions.push(
|
|
116
124
|
`For example, if you have two elements, the output should look like this: ${START}${abbv}${END}${START}${abbv}${END}${NO_MORE}`
|
|
117
125
|
);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { z } from "@bpinternal/zui";
|
|
2
2
|
import { clamp } from "lodash-es";
|
|
3
|
+
import pLimit from "p-limit";
|
|
3
4
|
import { ZaiContext } from "../context";
|
|
4
5
|
import { Response } from "../response";
|
|
5
6
|
import { getTokenizer } from "../tokenizer";
|
|
@@ -191,7 +192,8 @@ The condition is: "${condition}"
|
|
|
191
192
|
}
|
|
192
193
|
return partial;
|
|
193
194
|
};
|
|
194
|
-
const
|
|
195
|
+
const limit = pLimit(10);
|
|
196
|
+
const filteredChunks = await Promise.all(chunks.map((chunk) => limit(() => filterChunk(chunk))));
|
|
195
197
|
return filteredChunks.flat();
|
|
196
198
|
};
|
|
197
199
|
Zai.prototype.filter = function(input, condition, _options) {
|