@botpress/zai 2.1.20 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -104,7 +104,32 @@ const techCompanies = await zai.filter(companies, 'are technology companies')
104
104
  const recentPosts = await zai.filter(posts, 'were published this week')
105
105
  ```
106
106
 
107
- ### 6. Text - Generate content
107
+ ### 6. Group - Organize items into categories
108
+
109
+ ```typescript
110
+ // Group items automatically
111
+ const grouped = await zai.group(tasks, {
112
+ instructions: 'Group by priority level',
113
+ })
114
+ // Result: { 'High Priority': [...], 'Medium Priority': [...], 'Low Priority': [...] }
115
+
116
+ // Group with initial categories
117
+ const categorized = await zai.group(emails, {
118
+ instructions: 'Group by topic',
119
+ initialGroups: [
120
+ { id: 'work', label: 'Work' },
121
+ { id: 'personal', label: 'Personal' },
122
+ ],
123
+ })
124
+
125
+ // Group large datasets efficiently
126
+ const organized = await zai.group(largeArray, {
127
+ instructions: 'Group by date',
128
+ chunkLength: 8000, // Process in chunks for better performance
129
+ })
130
+ ```
131
+
132
+ ### 7. Text - Generate content
108
133
 
109
134
  ```typescript
110
135
  const blogPost = await zai.text('Write about the future of AI', {
@@ -113,7 +138,7 @@ const blogPost = await zai.text('Write about the future of AI', {
113
138
  })
114
139
  ```
115
140
 
116
- ### 7. Summarize - Create summaries
141
+ ### 8. Summarize - Create summaries
117
142
 
118
143
  ```typescript
119
144
  // Simple summary
@@ -237,6 +262,7 @@ setTimeout(() => controller.abort(), 5000)
237
262
  - `.label(content, criteria, options?)` - Apply multiple labels
238
263
  - `.rewrite(content, instruction, options?)` - Transform text
239
264
  - `.filter(items, condition, options?)` - Filter array items
265
+ - `.group(items, options?)` - Organize items into categories
240
266
  - `.text(prompt, options?)` - Generate text
241
267
  - `.summarize(content, options?)` - Create summary
242
268
 
package/dist/index.d.ts CHANGED
@@ -166,14 +166,14 @@ declare class Response<T = any, S = T> implements PromiseLike<S> {
166
166
  }>;
167
167
  }
168
168
 
169
- type Options$6 = {
169
+ type Options$7 = {
170
170
  /** The maximum number of tokens to generate */
171
171
  length?: number;
172
172
  };
173
173
  declare module '@botpress/zai' {
174
174
  interface Zai {
175
175
  /** Generates a text of the desired length according to the prompt */
176
- text(prompt: string, options?: Options$6): Response<string>;
176
+ text(prompt: string, options?: Options$7): Response<string>;
177
177
  }
178
178
  }
179
179
 
@@ -182,7 +182,7 @@ type Example$3 = {
182
182
  output: string;
183
183
  instructions?: string;
184
184
  };
185
- type Options$5 = {
185
+ type Options$6 = {
186
186
  /** Examples to guide the rewriting */
187
187
  examples?: Array<Example$3>;
188
188
  /** The maximum number of tokens to generate */
@@ -191,11 +191,11 @@ type Options$5 = {
191
191
  declare module '@botpress/zai' {
192
192
  interface Zai {
193
193
  /** Rewrites a string according to match the prompt */
194
- rewrite(original: string, prompt: string, options?: Options$5): Response<string>;
194
+ rewrite(original: string, prompt: string, options?: Options$6): Response<string>;
195
195
  }
196
196
  }
197
197
 
198
- type Options$4 = {
198
+ type Options$5 = {
199
199
  /** What should the text be summarized to? */
200
200
  prompt?: string;
201
201
  /** How to format the example text */
@@ -215,7 +215,7 @@ type Options$4 = {
215
215
  declare module '@botpress/zai' {
216
216
  interface Zai {
217
217
  /** Summarizes a text of any length to a summary of the desired length */
218
- summarize(original: string, options?: Options$4): Response<string>;
218
+ summarize(original: string, options?: Options$5): Response<string>;
219
219
  }
220
220
  }
221
221
 
@@ -225,14 +225,14 @@ type Example$2 = {
225
225
  reason?: string;
226
226
  condition?: string;
227
227
  };
228
- type Options$3 = {
228
+ type Options$4 = {
229
229
  /** Examples to check the condition against */
230
230
  examples?: Array<Example$2>;
231
231
  };
232
232
  declare module '@botpress/zai' {
233
233
  interface Zai {
234
234
  /** Checks wether a condition is true or not */
235
- check(input: unknown, condition: string, options?: Options$3): Response<{
235
+ check(input: unknown, condition: string, options?: Options$4): Response<{
236
236
  /** Whether the condition is true or not */
237
237
  value: boolean;
238
238
  /** The explanation of the decision */
@@ -246,7 +246,7 @@ type Example$1 = {
246
246
  filter: boolean;
247
247
  reason?: string;
248
248
  };
249
- type Options$2 = {
249
+ type Options$3 = {
250
250
  /** The maximum number of tokens per item */
251
251
  tokensPerItem?: number;
252
252
  /** Examples to filter the condition against */
@@ -255,11 +255,11 @@ type Options$2 = {
255
255
  declare module '@botpress/zai' {
256
256
  interface Zai {
257
257
  /** Filters elements of an array against a condition */
258
- filter<T>(input: Array<T>, condition: string, options?: Options$2): Response<Array<T>>;
258
+ filter<T>(input: Array<T>, condition: string, options?: Options$3): Response<Array<T>>;
259
259
  }
260
260
  }
261
261
 
262
- type Options$1 = {
262
+ type Options$2 = {
263
263
  /** Instructions to guide the user on how to extract the data */
264
264
  instructions?: string;
265
265
  /** The maximum number of tokens per chunk */
@@ -274,7 +274,7 @@ type OfType<O, T extends __Z = __Z<O>> = T extends __Z<O> ? T : never;
274
274
  declare module '@botpress/zai' {
275
275
  interface Zai {
276
276
  /** Extracts one or many elements from an arbitrary input */
277
- extract<S extends OfType<any>>(input: unknown, schema: S, options?: Options$1): Response<S['_output']>;
277
+ extract<S extends OfType<any>>(input: unknown, schema: S, options?: Options$2): Response<S['_output']>;
278
278
  }
279
279
  }
280
280
 
@@ -293,7 +293,7 @@ type Example<T extends string> = {
293
293
  explanation?: string;
294
294
  }>>;
295
295
  };
296
- type Options<T extends string> = {
296
+ type Options$1<T extends string> = {
297
297
  /** Examples to help the user make a decision */
298
298
  examples?: Array<Example<T>>;
299
299
  /** Instructions to guide the user on how to extract the data */
@@ -305,7 +305,7 @@ type Labels<T extends string> = Record<T, string>;
305
305
  declare module '@botpress/zai' {
306
306
  interface Zai {
307
307
  /** Tags the provided input with a list of predefined labels */
308
- label<T extends string>(input: unknown, labels: Labels<T>, options?: Options<T>): Response<{
308
+ label<T extends string>(input: unknown, labels: Labels<T>, options?: Options$1<T>): Response<{
309
309
  [K in T]: {
310
310
  explanation: string;
311
311
  value: boolean;
@@ -317,4 +317,26 @@ declare module '@botpress/zai' {
317
317
  }
318
318
  }
319
319
 
320
+ type Group<T> = {
321
+ id: string;
322
+ label: string;
323
+ elements: T[];
324
+ };
325
+ type InitialGroup = {
326
+ id: string;
327
+ label: string;
328
+ elements?: unknown[];
329
+ };
330
+ type Options = {
331
+ instructions?: string;
332
+ tokensPerElement?: number;
333
+ chunkLength?: number;
334
+ initialGroups?: Array<InitialGroup>;
335
+ };
336
+ declare module '@botpress/zai' {
337
+ interface Zai {
338
+ group<T>(input: Array<T>, options?: Options): Response<Array<Group<T>>, Record<string, T[]>>;
339
+ }
340
+ }
341
+
320
342
  export { Zai };
package/dist/index.js CHANGED
@@ -6,4 +6,5 @@ import "./operations/check";
6
6
  import "./operations/filter";
7
7
  import "./operations/extract";
8
8
  import "./operations/label";
9
+ import "./operations/group";
9
10
  export { Zai };
@@ -0,0 +1,278 @@
1
+ import { z } from "@bpinternal/zui";
2
+ import { clamp } from "lodash-es";
3
+ import pLimit from "p-limit";
4
+ import { ZaiContext } from "../context";
5
+ import { Response } from "../response";
6
+ import { getTokenizer } from "../tokenizer";
7
+ import { stringify } from "../utils";
8
+ import { Zai } from "../zai";
9
+ import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
10
+ const _InitialGroup = z.object({
11
+ id: z.string().min(1).max(100),
12
+ label: z.string().min(1).max(250),
13
+ elements: z.array(z.any()).optional().default([])
14
+ });
15
+ const _Options = z.object({
16
+ instructions: z.string().optional(),
17
+ tokensPerElement: z.number().min(1).max(1e5).optional().default(250),
18
+ chunkLength: z.number().min(100).max(1e5).optional().default(16e3),
19
+ initialGroups: z.array(_InitialGroup).optional().default([])
20
+ });
21
+ const END = "\u25A0END\u25A0";
22
+ const normalizeLabel = (label) => {
23
+ return label.trim().toLowerCase().replace(/^(group|new group|new)\s*[-:]\s*/i, "").replace(/^(group|new group|new)\s+/i, "").trim();
24
+ };
25
+ const group = async (input, _options, ctx) => {
26
+ ctx.controller.signal.throwIfAborted();
27
+ const options = _Options.parse(_options ?? {});
28
+ const tokenizer = await getTokenizer();
29
+ const model = await ctx.getModel();
30
+ if (input.length === 0) {
31
+ return [];
32
+ }
33
+ const groups = /* @__PURE__ */ new Map();
34
+ const groupElements = /* @__PURE__ */ new Map();
35
+ const elementGroups = /* @__PURE__ */ new Map();
36
+ const labelToGroupId = /* @__PURE__ */ new Map();
37
+ let groupIdCounter = 0;
38
+ options.initialGroups.forEach((ig) => {
39
+ const normalized = normalizeLabel(ig.label);
40
+ groups.set(ig.id, { id: ig.id, label: ig.label, normalizedLabel: normalized });
41
+ groupElements.set(ig.id, /* @__PURE__ */ new Set());
42
+ labelToGroupId.set(normalized, ig.id);
43
+ });
44
+ const elements = input.map((element, idx) => ({
45
+ element,
46
+ index: idx,
47
+ stringified: stringify(element, false)
48
+ }));
49
+ const TOKENS_TOTAL_MAX = model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
50
+ const TOKENS_INSTRUCTIONS_MAX = options.instructions ? clamp(tokenizer.count(options.instructions), 100, TOKENS_TOTAL_MAX * 0.2) : 0;
51
+ const TOKENS_AVAILABLE = TOKENS_TOTAL_MAX - TOKENS_INSTRUCTIONS_MAX;
52
+ const TOKENS_FOR_GROUPS_MAX = Math.floor(TOKENS_AVAILABLE * 0.4);
53
+ const TOKENS_FOR_ELEMENTS_MAX = Math.floor(TOKENS_AVAILABLE * 0.6);
54
+ const MAX_ELEMENTS_PER_CHUNK = 50;
55
+ const elementChunks = [];
56
+ let currentChunk = [];
57
+ let currentTokens = 0;
58
+ for (const elem of elements) {
59
+ const truncated = tokenizer.truncate(elem.stringified, options.tokensPerElement);
60
+ const elemTokens = tokenizer.count(truncated);
61
+ if ((currentTokens + elemTokens > TOKENS_FOR_ELEMENTS_MAX || currentChunk.length >= MAX_ELEMENTS_PER_CHUNK) && currentChunk.length > 0) {
62
+ elementChunks.push(currentChunk);
63
+ currentChunk = [];
64
+ currentTokens = 0;
65
+ }
66
+ currentChunk.push(elem.index);
67
+ currentTokens += elemTokens;
68
+ }
69
+ if (currentChunk.length > 0) {
70
+ elementChunks.push(currentChunk);
71
+ }
72
+ const getGroupChunks = () => {
73
+ const allGroupIds2 = Array.from(groups.keys());
74
+ if (allGroupIds2.length === 0) return [[]];
75
+ const chunks = [];
76
+ let currentChunk2 = [];
77
+ let currentTokens2 = 0;
78
+ for (const groupId of allGroupIds2) {
79
+ const group2 = groups.get(groupId);
80
+ const groupTokens = tokenizer.count(`${group2.label}`) + 10;
81
+ if (currentTokens2 + groupTokens > TOKENS_FOR_GROUPS_MAX && currentChunk2.length > 0) {
82
+ chunks.push(currentChunk2);
83
+ currentChunk2 = [];
84
+ currentTokens2 = 0;
85
+ }
86
+ currentChunk2.push(groupId);
87
+ currentTokens2 += groupTokens;
88
+ }
89
+ if (currentChunk2.length > 0) {
90
+ chunks.push(currentChunk2);
91
+ }
92
+ return chunks.length > 0 ? chunks : [[]];
93
+ };
94
+ const processChunk = async (elementIndices, groupIds) => {
95
+ const elementsText = elementIndices.map((idx, i) => {
96
+ const elem = elements[idx];
97
+ const truncated = tokenizer.truncate(elem.stringified, options.tokensPerElement);
98
+ return `\u25A0${i}: ${truncated}\u25A0`;
99
+ }).join("\n");
100
+ const groupsList = groupIds.map((gid) => groups.get(gid).label);
101
+ const groupsText = groupsList.length > 0 ? `**Existing Groups (prefer reusing these):**
102
+ ${groupsList.map((l) => `- ${l}`).join("\n")}
103
+
104
+ ` : "";
105
+ const systemPrompt = `You are grouping elements into cohesive groups.
106
+
107
+ ${options.instructions ? `**Instructions:** ${options.instructions}
108
+ ` : "**Instructions:** Group similar elements together."}
109
+
110
+ **Important:**
111
+ - Each element gets exactly ONE group label
112
+ - Use EXACT SAME label for similar items (case-sensitive)
113
+ - Create new descriptive labels when needed
114
+
115
+ **Output Format:**
116
+ One line per element:
117
+ \u25A00:Group Label\u25A0
118
+ \u25A01:Group Label\u25A0
119
+ ${END}`.trim();
120
+ const userPrompt = `${groupsText}**Elements (\u25A00 to \u25A0${elementIndices.length - 1}):**
121
+ ${elementsText}
122
+
123
+ **Task:** For each element, output one line with its group label.
124
+ ${END}`.trim();
125
+ const { extracted } = await ctx.generateContent({
126
+ systemPrompt,
127
+ stopSequences: [END],
128
+ messages: [{ type: "text", role: "user", content: userPrompt }],
129
+ transform: (text) => {
130
+ const assignments = [];
131
+ const regex = /■(\d+):([^■]+)■/g;
132
+ let match;
133
+ while ((match = regex.exec(text)) !== null) {
134
+ const idx = parseInt(match[1] ?? "", 10);
135
+ if (isNaN(idx) || idx < 0 || idx >= elementIndices.length) continue;
136
+ const label = (match[2] ?? "").trim();
137
+ if (!label) continue;
138
+ assignments.push({
139
+ elementIndex: elementIndices[idx],
140
+ label: label.slice(0, 250)
141
+ });
142
+ }
143
+ return assignments;
144
+ }
145
+ });
146
+ return extracted;
147
+ };
148
+ const elementLimit = pLimit(10);
149
+ const groupLimit = pLimit(10);
150
+ const allChunkResults = await Promise.all(
151
+ elementChunks.map(
152
+ (elementChunk) => elementLimit(async () => {
153
+ const groupChunks = getGroupChunks();
154
+ const allAssignments = await Promise.all(
155
+ groupChunks.map((groupChunk) => groupLimit(() => processChunk(elementChunk, groupChunk)))
156
+ );
157
+ return allAssignments.flat();
158
+ })
159
+ )
160
+ );
161
+ for (const assignments of allChunkResults) {
162
+ for (const { elementIndex, label } of assignments) {
163
+ const normalized = normalizeLabel(label);
164
+ let groupId = labelToGroupId.get(normalized);
165
+ if (!groupId) {
166
+ groupId = `group_${groupIdCounter++}`;
167
+ groups.set(groupId, { id: groupId, label, normalizedLabel: normalized });
168
+ groupElements.set(groupId, /* @__PURE__ */ new Set());
169
+ labelToGroupId.set(normalized, groupId);
170
+ }
171
+ groupElements.get(groupId).add(elementIndex);
172
+ if (!elementGroups.has(elementIndex)) {
173
+ elementGroups.set(elementIndex, /* @__PURE__ */ new Set());
174
+ }
175
+ elementGroups.get(elementIndex).add(groupId);
176
+ }
177
+ }
178
+ const allGroupIds = Array.from(groups.keys());
179
+ if (allGroupIds.length > 0) {
180
+ const elementsNeedingReview = [];
181
+ for (const elem of elements) {
182
+ const seenGroups = elementGroups.get(elem.index) ?? /* @__PURE__ */ new Set();
183
+ const unseenCount = allGroupIds.filter((gid) => !seenGroups.has(gid)).length;
184
+ if (unseenCount > 0) {
185
+ elementsNeedingReview.push(elem.index);
186
+ }
187
+ }
188
+ if (elementsNeedingReview.length > 0) {
189
+ const reviewChunks = [];
190
+ let reviewChunk = [];
191
+ let reviewTokens = 0;
192
+ for (const elemIdx of elementsNeedingReview) {
193
+ const elem = elements[elemIdx];
194
+ const truncated = tokenizer.truncate(elem.stringified, options.tokensPerElement);
195
+ const elemTokens = tokenizer.count(truncated);
196
+ const shouldStartNewChunk = (reviewTokens + elemTokens > TOKENS_FOR_ELEMENTS_MAX || reviewChunk.length >= MAX_ELEMENTS_PER_CHUNK) && reviewChunk.length > 0;
197
+ if (shouldStartNewChunk) {
198
+ reviewChunks.push(reviewChunk);
199
+ reviewChunk = [];
200
+ reviewTokens = 0;
201
+ }
202
+ reviewChunk.push(elemIdx);
203
+ reviewTokens += elemTokens;
204
+ }
205
+ if (reviewChunk.length > 0) {
206
+ reviewChunks.push(reviewChunk);
207
+ }
208
+ const reviewResults = await Promise.all(
209
+ reviewChunks.map(
210
+ (chunk) => elementLimit(async () => {
211
+ const groupChunks = getGroupChunks();
212
+ const allAssignments = await Promise.all(
213
+ groupChunks.map((groupChunk) => groupLimit(() => processChunk(chunk, groupChunk)))
214
+ );
215
+ return allAssignments.flat();
216
+ })
217
+ )
218
+ );
219
+ const updateElementGroupAssignment = (elementIndex, label) => {
220
+ const normalized = normalizeLabel(label);
221
+ const groupId = labelToGroupId.get(normalized);
222
+ if (!groupId) return;
223
+ groupElements.get(groupId).add(elementIndex);
224
+ const elemGroups = elementGroups.get(elementIndex) ?? /* @__PURE__ */ new Set();
225
+ if (!elementGroups.has(elementIndex)) {
226
+ elementGroups.set(elementIndex, elemGroups);
227
+ }
228
+ elemGroups.add(groupId);
229
+ };
230
+ for (const assignments of reviewResults) {
231
+ for (const { elementIndex, label } of assignments) {
232
+ updateElementGroupAssignment(elementIndex, label);
233
+ }
234
+ }
235
+ }
236
+ }
237
+ for (const [elementIndex, groupSet] of elementGroups.entries()) {
238
+ if (groupSet.size > 1) {
239
+ const groupIds = Array.from(groupSet);
240
+ for (const gid of groupIds) {
241
+ groupElements.get(gid)?.delete(elementIndex);
242
+ }
243
+ const finalGroupId = groupIds[0];
244
+ groupElements.get(finalGroupId).add(elementIndex);
245
+ }
246
+ }
247
+ const result = [];
248
+ for (const [groupId, elementIndices] of groupElements.entries()) {
249
+ if (elementIndices.size > 0) {
250
+ const groupInfo = groups.get(groupId);
251
+ result.push({
252
+ id: groupInfo.id,
253
+ label: groupInfo.label,
254
+ elements: Array.from(elementIndices).map((idx) => elements[idx].element)
255
+ });
256
+ }
257
+ }
258
+ return result;
259
+ };
260
+ Zai.prototype.group = function(input, _options) {
261
+ const context = new ZaiContext({
262
+ client: this.client,
263
+ modelId: this.Model,
264
+ taskId: this.taskId,
265
+ taskType: "zai.group",
266
+ adapter: this.adapter
267
+ });
268
+ return new Response(context, group(input, _options, context), (result) => {
269
+ const merged = {};
270
+ result.forEach((group2) => {
271
+ if (!merged[group2.label]) {
272
+ merged[group2.label] = [];
273
+ }
274
+ merged[group2.label].push(...group2.elements);
275
+ });
276
+ return merged;
277
+ });
278
+ };