@botpress/zai 2.1.20 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +696 -0
- package/README.md +79 -2
- package/dist/index.d.ts +85 -14
- package/dist/index.js +3 -0
- package/dist/operations/group.js +369 -0
- package/dist/operations/rate.js +350 -0
- package/dist/operations/sort.js +450 -0
- package/e2e/data/cache.jsonl +289 -0
- package/package.json +1 -1
- package/src/index.ts +3 -0
- package/src/operations/group.ts +543 -0
- package/src/operations/rate.ts +518 -0
- package/src/operations/sort.ts +618 -0
package/README.md
CHANGED
|
@@ -104,7 +104,81 @@ const techCompanies = await zai.filter(companies, 'are technology companies')
|
|
|
104
104
|
const recentPosts = await zai.filter(posts, 'were published this week')
|
|
105
105
|
```
|
|
106
106
|
|
|
107
|
-
### 6.
|
|
107
|
+
### 6. Group - Organize items into categories
|
|
108
|
+
|
|
109
|
+
```typescript
|
|
110
|
+
// Group items automatically
|
|
111
|
+
const grouped = await zai.group(tasks, {
|
|
112
|
+
instructions: 'Group by priority level',
|
|
113
|
+
})
|
|
114
|
+
// Result: { 'High Priority': [...], 'Medium Priority': [...], 'Low Priority': [...] }
|
|
115
|
+
|
|
116
|
+
// Group with initial categories
|
|
117
|
+
const categorized = await zai.group(emails, {
|
|
118
|
+
instructions: 'Group by topic',
|
|
119
|
+
initialGroups: [
|
|
120
|
+
{ id: 'work', label: 'Work' },
|
|
121
|
+
{ id: 'personal', label: 'Personal' },
|
|
122
|
+
],
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
// Group large datasets efficiently
|
|
126
|
+
const organized = await zai.group(largeArray, {
|
|
127
|
+
instructions: 'Group by date',
|
|
128
|
+
chunkLength: 8000, // Process in chunks for better performance
|
|
129
|
+
})
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### 7. Rate - Score items on a 1-5 scale
|
|
133
|
+
|
|
134
|
+
```typescript
|
|
135
|
+
// Auto-generate criteria (returns total score)
|
|
136
|
+
const scores = await zai.rate(products, 'is it a good value product?')
|
|
137
|
+
// Result: [12, 8, 15] (total scores for each item)
|
|
138
|
+
|
|
139
|
+
// Get detailed ratings
|
|
140
|
+
const { output } = await zai.rate(products, 'is it a good value product?').result()
|
|
141
|
+
// Result: [
|
|
142
|
+
// { affordability: 4, quality: 5, features: 3, total: 12 },
|
|
143
|
+
// { affordability: 3, quality: 2, features: 3, total: 8 },
|
|
144
|
+
// ...
|
|
145
|
+
// ]
|
|
146
|
+
|
|
147
|
+
// Use fixed criteria
|
|
148
|
+
const ratings = await zai.rate(passwords, {
|
|
149
|
+
length: 'password length (12+ chars = very_good, 8-11 = good, 6-7 = average, 4-5 = bad, <4 = very_bad)',
|
|
150
|
+
complexity: 'character variety (all types = very_good, 3 types = good, 2 types = average, 1 type = bad)',
|
|
151
|
+
strength: 'overall password strength',
|
|
152
|
+
})
|
|
153
|
+
// Result: [
|
|
154
|
+
// { length: 5, complexity: 5, strength: 5, total: 15 },
|
|
155
|
+
// { length: 1, complexity: 1, strength: 1, total: 3 },
|
|
156
|
+
// ]
|
|
157
|
+
|
|
158
|
+
// Rate large datasets efficiently (parallelized)
|
|
159
|
+
const allRatings = await zai.rate(Array(500).fill(item), 'how complete is this?')
|
|
160
|
+
// Processes ~500 items in ~120ms with automatic chunking
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### 8. Sort - Order items with natural language
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
// Sort by natural criteria
|
|
167
|
+
const sorted = await zai.sort(emails, 'sort by urgency')
|
|
168
|
+
// LLM determines criteria and orders items accordingly
|
|
169
|
+
|
|
170
|
+
// Sort with detailed results
|
|
171
|
+
const { output } = await zai.sort(tasks, 'sort by priority').result()
|
|
172
|
+
// output includes scoring breakdown for each item
|
|
173
|
+
|
|
174
|
+
// Complex multi-criteria sorting
|
|
175
|
+
const prioritized = await zai.sort(tickets, 'sort by customer importance and issue severity')
|
|
176
|
+
|
|
177
|
+
// Sort large datasets efficiently (parallelized with chunking)
|
|
178
|
+
const orderedItems = await zai.sort(Array(500).fill(item), 'sort by relevance')
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### 9. Text - Generate content
|
|
108
182
|
|
|
109
183
|
```typescript
|
|
110
184
|
const blogPost = await zai.text('Write about the future of AI', {
|
|
@@ -113,7 +187,7 @@ const blogPost = await zai.text('Write about the future of AI', {
|
|
|
113
187
|
})
|
|
114
188
|
```
|
|
115
189
|
|
|
116
|
-
###
|
|
190
|
+
### 10. Summarize - Create summaries
|
|
117
191
|
|
|
118
192
|
```typescript
|
|
119
193
|
// Simple summary
|
|
@@ -237,6 +311,9 @@ setTimeout(() => controller.abort(), 5000)
|
|
|
237
311
|
- `.label(content, criteria, options?)` - Apply multiple labels
|
|
238
312
|
- `.rewrite(content, instruction, options?)` - Transform text
|
|
239
313
|
- `.filter(items, condition, options?)` - Filter array items
|
|
314
|
+
- `.group(items, options?)` - Organize items into categories
|
|
315
|
+
- `.rate(items, instructions, options?)` - Rate items on 1-5 scale
|
|
316
|
+
- `.sort(items, instructions, options?)` - Order items with natural language
|
|
240
317
|
- `.text(prompt, options?)` - Generate text
|
|
241
318
|
- `.summarize(content, options?)` - Create summary
|
|
242
319
|
|
package/dist/index.d.ts
CHANGED
|
@@ -166,14 +166,14 @@ declare class Response<T = any, S = T> implements PromiseLike<S> {
|
|
|
166
166
|
}>;
|
|
167
167
|
}
|
|
168
168
|
|
|
169
|
-
type Options$
|
|
169
|
+
type Options$9 = {
|
|
170
170
|
/** The maximum number of tokens to generate */
|
|
171
171
|
length?: number;
|
|
172
172
|
};
|
|
173
173
|
declare module '@botpress/zai' {
|
|
174
174
|
interface Zai {
|
|
175
175
|
/** Generates a text of the desired length according to the prompt */
|
|
176
|
-
text(prompt: string, options?: Options$
|
|
176
|
+
text(prompt: string, options?: Options$9): Response<string>;
|
|
177
177
|
}
|
|
178
178
|
}
|
|
179
179
|
|
|
@@ -182,7 +182,7 @@ type Example$3 = {
|
|
|
182
182
|
output: string;
|
|
183
183
|
instructions?: string;
|
|
184
184
|
};
|
|
185
|
-
type Options$
|
|
185
|
+
type Options$8 = {
|
|
186
186
|
/** Examples to guide the rewriting */
|
|
187
187
|
examples?: Array<Example$3>;
|
|
188
188
|
/** The maximum number of tokens to generate */
|
|
@@ -191,11 +191,11 @@ type Options$5 = {
|
|
|
191
191
|
declare module '@botpress/zai' {
|
|
192
192
|
interface Zai {
|
|
193
193
|
/** Rewrites a string according to match the prompt */
|
|
194
|
-
rewrite(original: string, prompt: string, options?: Options$
|
|
194
|
+
rewrite(original: string, prompt: string, options?: Options$8): Response<string>;
|
|
195
195
|
}
|
|
196
196
|
}
|
|
197
197
|
|
|
198
|
-
type Options$
|
|
198
|
+
type Options$7 = {
|
|
199
199
|
/** What should the text be summarized to? */
|
|
200
200
|
prompt?: string;
|
|
201
201
|
/** How to format the example text */
|
|
@@ -215,7 +215,7 @@ type Options$4 = {
|
|
|
215
215
|
declare module '@botpress/zai' {
|
|
216
216
|
interface Zai {
|
|
217
217
|
/** Summarizes a text of any length to a summary of the desired length */
|
|
218
|
-
summarize(original: string, options?: Options$
|
|
218
|
+
summarize(original: string, options?: Options$7): Response<string>;
|
|
219
219
|
}
|
|
220
220
|
}
|
|
221
221
|
|
|
@@ -225,14 +225,14 @@ type Example$2 = {
|
|
|
225
225
|
reason?: string;
|
|
226
226
|
condition?: string;
|
|
227
227
|
};
|
|
228
|
-
type Options$
|
|
228
|
+
type Options$6 = {
|
|
229
229
|
/** Examples to check the condition against */
|
|
230
230
|
examples?: Array<Example$2>;
|
|
231
231
|
};
|
|
232
232
|
declare module '@botpress/zai' {
|
|
233
233
|
interface Zai {
|
|
234
234
|
/** Checks wether a condition is true or not */
|
|
235
|
-
check(input: unknown, condition: string, options?: Options$
|
|
235
|
+
check(input: unknown, condition: string, options?: Options$6): Response<{
|
|
236
236
|
/** Whether the condition is true or not */
|
|
237
237
|
value: boolean;
|
|
238
238
|
/** The explanation of the decision */
|
|
@@ -246,7 +246,7 @@ type Example$1 = {
|
|
|
246
246
|
filter: boolean;
|
|
247
247
|
reason?: string;
|
|
248
248
|
};
|
|
249
|
-
type Options$
|
|
249
|
+
type Options$5 = {
|
|
250
250
|
/** The maximum number of tokens per item */
|
|
251
251
|
tokensPerItem?: number;
|
|
252
252
|
/** Examples to filter the condition against */
|
|
@@ -255,11 +255,11 @@ type Options$2 = {
|
|
|
255
255
|
declare module '@botpress/zai' {
|
|
256
256
|
interface Zai {
|
|
257
257
|
/** Filters elements of an array against a condition */
|
|
258
|
-
filter<T>(input: Array<T>, condition: string, options?: Options$
|
|
258
|
+
filter<T>(input: Array<T>, condition: string, options?: Options$5): Response<Array<T>>;
|
|
259
259
|
}
|
|
260
260
|
}
|
|
261
261
|
|
|
262
|
-
type Options$
|
|
262
|
+
type Options$4 = {
|
|
263
263
|
/** Instructions to guide the user on how to extract the data */
|
|
264
264
|
instructions?: string;
|
|
265
265
|
/** The maximum number of tokens per chunk */
|
|
@@ -274,7 +274,7 @@ type OfType<O, T extends __Z = __Z<O>> = T extends __Z<O> ? T : never;
|
|
|
274
274
|
declare module '@botpress/zai' {
|
|
275
275
|
interface Zai {
|
|
276
276
|
/** Extracts one or many elements from an arbitrary input */
|
|
277
|
-
extract<S extends OfType<any>>(input: unknown, schema: S, options?: Options$
|
|
277
|
+
extract<S extends OfType<any>>(input: unknown, schema: S, options?: Options$4): Response<S['_output']>;
|
|
278
278
|
}
|
|
279
279
|
}
|
|
280
280
|
|
|
@@ -293,7 +293,7 @@ type Example<T extends string> = {
|
|
|
293
293
|
explanation?: string;
|
|
294
294
|
}>>;
|
|
295
295
|
};
|
|
296
|
-
type Options<T extends string> = {
|
|
296
|
+
type Options$3<T extends string> = {
|
|
297
297
|
/** Examples to help the user make a decision */
|
|
298
298
|
examples?: Array<Example<T>>;
|
|
299
299
|
/** Instructions to guide the user on how to extract the data */
|
|
@@ -305,7 +305,7 @@ type Labels<T extends string> = Record<T, string>;
|
|
|
305
305
|
declare module '@botpress/zai' {
|
|
306
306
|
interface Zai {
|
|
307
307
|
/** Tags the provided input with a list of predefined labels */
|
|
308
|
-
label<T extends string>(input: unknown, labels: Labels<T>, options?: Options<T>): Response<{
|
|
308
|
+
label<T extends string>(input: unknown, labels: Labels<T>, options?: Options$3<T>): Response<{
|
|
309
309
|
[K in T]: {
|
|
310
310
|
explanation: string;
|
|
311
311
|
value: boolean;
|
|
@@ -317,4 +317,75 @@ declare module '@botpress/zai' {
|
|
|
317
317
|
}
|
|
318
318
|
}
|
|
319
319
|
|
|
320
|
+
type Group<T> = {
|
|
321
|
+
id: string;
|
|
322
|
+
label: string;
|
|
323
|
+
elements: T[];
|
|
324
|
+
};
|
|
325
|
+
type InitialGroup = {
|
|
326
|
+
id: string;
|
|
327
|
+
label: string;
|
|
328
|
+
elements?: unknown[];
|
|
329
|
+
};
|
|
330
|
+
type Options$2 = {
|
|
331
|
+
instructions?: string;
|
|
332
|
+
tokensPerElement?: number;
|
|
333
|
+
chunkLength?: number;
|
|
334
|
+
initialGroups?: Array<InitialGroup>;
|
|
335
|
+
};
|
|
336
|
+
declare module '@botpress/zai' {
|
|
337
|
+
interface Zai {
|
|
338
|
+
group<T>(input: Array<T>, options?: Options$2): Response<Array<Group<T>>, Record<string, T[]>>;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
type RatingInstructions = string | Record<string, string>;
|
|
343
|
+
type Options$1 = {
|
|
344
|
+
/** The maximum number of tokens per item */
|
|
345
|
+
tokensPerItem?: number;
|
|
346
|
+
/** The maximum number of items to rate per chunk */
|
|
347
|
+
maxItemsPerChunk?: number;
|
|
348
|
+
};
|
|
349
|
+
type RatingResult<T extends RatingInstructions> = T extends string ? {
|
|
350
|
+
[key: string]: number;
|
|
351
|
+
total: number;
|
|
352
|
+
} : T extends Record<string, string> ? {
|
|
353
|
+
[K in keyof T]: number;
|
|
354
|
+
} & {
|
|
355
|
+
total: number;
|
|
356
|
+
} : never;
|
|
357
|
+
type SimplifiedRatingResult<T extends RatingInstructions> = T extends string ? number : RatingResult<T>;
|
|
358
|
+
declare module '@botpress/zai' {
|
|
359
|
+
interface Zai {
|
|
360
|
+
/**
|
|
361
|
+
* Rates an array of items based on provided instructions.
|
|
362
|
+
* Returns a number (1-5) if instructions is a string, or a Record<string, number> if instructions is a Record.
|
|
363
|
+
*/
|
|
364
|
+
rate<T, I extends RatingInstructions>(input: Array<T>, instructions: I, options?: Options$1): Response<Array<RatingResult<I>>, Array<SimplifiedRatingResult<I>>>;
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
type Options = {
|
|
369
|
+
/** The maximum number of tokens per item */
|
|
370
|
+
tokensPerItem?: number;
|
|
371
|
+
};
|
|
372
|
+
declare module '@botpress/zai' {
|
|
373
|
+
interface Zai {
|
|
374
|
+
/**
|
|
375
|
+
* Sorts an array of items based on provided instructions.
|
|
376
|
+
* Returns the sorted array directly when awaited.
|
|
377
|
+
* Use .result() to get detailed scoring information including why each item got its position.
|
|
378
|
+
*
|
|
379
|
+
* @example
|
|
380
|
+
* // Simple usage
|
|
381
|
+
* const sorted = await zai.sort(items, 'from least expensive to most expensive')
|
|
382
|
+
*
|
|
383
|
+
* @example
|
|
384
|
+
* // Get detailed results
|
|
385
|
+
* const { output: sorted, usage } = await zai.sort(items, 'by priority').result()
|
|
386
|
+
*/
|
|
387
|
+
sort<T>(input: Array<T>, instructions: string, options?: Options): Response<Array<T>, Array<T>>;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
320
391
|
export { Zai };
|
package/dist/index.js
CHANGED
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
import { z } from "@bpinternal/zui";
|
|
2
|
+
import { clamp } from "lodash-es";
|
|
3
|
+
import pLimit from "p-limit";
|
|
4
|
+
import { ZaiContext } from "../context";
|
|
5
|
+
import { Response } from "../response";
|
|
6
|
+
import { getTokenizer } from "../tokenizer";
|
|
7
|
+
import { fastHash, stringify } from "../utils";
|
|
8
|
+
import { Zai } from "../zai";
|
|
9
|
+
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
|
|
10
|
+
const _InitialGroup = z.object({
|
|
11
|
+
id: z.string().min(1).max(100),
|
|
12
|
+
label: z.string().min(1).max(250),
|
|
13
|
+
elements: z.array(z.any()).optional().default([])
|
|
14
|
+
});
|
|
15
|
+
const _Options = z.object({
|
|
16
|
+
instructions: z.string().optional(),
|
|
17
|
+
tokensPerElement: z.number().min(1).max(1e5).optional().default(250),
|
|
18
|
+
chunkLength: z.number().min(100).max(1e5).optional().default(16e3),
|
|
19
|
+
initialGroups: z.array(_InitialGroup).optional().default([])
|
|
20
|
+
});
|
|
21
|
+
const END = "\u25A0END\u25A0";
|
|
22
|
+
const normalizeLabel = (label) => {
|
|
23
|
+
return label.trim().toLowerCase().replace(/^(group|new group|new)\s*[-:]\s*/i, "").replace(/^(group|new group|new)\s+/i, "").trim();
|
|
24
|
+
};
|
|
25
|
+
const group = async (input, _options, ctx) => {
|
|
26
|
+
ctx.controller.signal.throwIfAborted();
|
|
27
|
+
const options = _Options.parse(_options ?? {});
|
|
28
|
+
const tokenizer = await getTokenizer();
|
|
29
|
+
const model = await ctx.getModel();
|
|
30
|
+
const taskId = ctx.taskId;
|
|
31
|
+
const taskType = "zai.group";
|
|
32
|
+
if (input.length === 0) {
|
|
33
|
+
return [];
|
|
34
|
+
}
|
|
35
|
+
const groups = /* @__PURE__ */ new Map();
|
|
36
|
+
const groupElements = /* @__PURE__ */ new Map();
|
|
37
|
+
const elementGroups = /* @__PURE__ */ new Map();
|
|
38
|
+
const labelToGroupId = /* @__PURE__ */ new Map();
|
|
39
|
+
let groupIdCounter = 0;
|
|
40
|
+
options.initialGroups.forEach((ig) => {
|
|
41
|
+
const normalized = normalizeLabel(ig.label);
|
|
42
|
+
groups.set(ig.id, { id: ig.id, label: ig.label, normalizedLabel: normalized });
|
|
43
|
+
groupElements.set(ig.id, /* @__PURE__ */ new Set());
|
|
44
|
+
labelToGroupId.set(normalized, ig.id);
|
|
45
|
+
});
|
|
46
|
+
const elements = input.map((element, idx) => ({
|
|
47
|
+
element,
|
|
48
|
+
index: idx,
|
|
49
|
+
stringified: stringify(element, false)
|
|
50
|
+
}));
|
|
51
|
+
const TOKENS_TOTAL_MAX = model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
|
|
52
|
+
const TOKENS_INSTRUCTIONS_MAX = options.instructions ? clamp(tokenizer.count(options.instructions), 100, TOKENS_TOTAL_MAX * 0.2) : 0;
|
|
53
|
+
const TOKENS_AVAILABLE = TOKENS_TOTAL_MAX - TOKENS_INSTRUCTIONS_MAX;
|
|
54
|
+
const TOKENS_FOR_GROUPS_MAX = Math.floor(TOKENS_AVAILABLE * 0.4);
|
|
55
|
+
const TOKENS_FOR_ELEMENTS_MAX = Math.floor(TOKENS_AVAILABLE * 0.6);
|
|
56
|
+
const MAX_ELEMENTS_PER_CHUNK = 50;
|
|
57
|
+
const elementChunks = [];
|
|
58
|
+
let currentChunk = [];
|
|
59
|
+
let currentTokens = 0;
|
|
60
|
+
for (const elem of elements) {
|
|
61
|
+
const truncated = tokenizer.truncate(elem.stringified, options.tokensPerElement);
|
|
62
|
+
const elemTokens = tokenizer.count(truncated);
|
|
63
|
+
if ((currentTokens + elemTokens > TOKENS_FOR_ELEMENTS_MAX || currentChunk.length >= MAX_ELEMENTS_PER_CHUNK) && currentChunk.length > 0) {
|
|
64
|
+
elementChunks.push(currentChunk);
|
|
65
|
+
currentChunk = [];
|
|
66
|
+
currentTokens = 0;
|
|
67
|
+
}
|
|
68
|
+
currentChunk.push(elem.index);
|
|
69
|
+
currentTokens += elemTokens;
|
|
70
|
+
}
|
|
71
|
+
if (currentChunk.length > 0) {
|
|
72
|
+
elementChunks.push(currentChunk);
|
|
73
|
+
}
|
|
74
|
+
const getGroupChunks = () => {
|
|
75
|
+
const allGroupIds2 = Array.from(groups.keys());
|
|
76
|
+
if (allGroupIds2.length === 0) return [[]];
|
|
77
|
+
const chunks = [];
|
|
78
|
+
let currentChunk2 = [];
|
|
79
|
+
let currentTokens2 = 0;
|
|
80
|
+
for (const groupId of allGroupIds2) {
|
|
81
|
+
const group2 = groups.get(groupId);
|
|
82
|
+
const groupTokens = tokenizer.count(`${group2.label}`) + 10;
|
|
83
|
+
if (currentTokens2 + groupTokens > TOKENS_FOR_GROUPS_MAX && currentChunk2.length > 0) {
|
|
84
|
+
chunks.push(currentChunk2);
|
|
85
|
+
currentChunk2 = [];
|
|
86
|
+
currentTokens2 = 0;
|
|
87
|
+
}
|
|
88
|
+
currentChunk2.push(groupId);
|
|
89
|
+
currentTokens2 += groupTokens;
|
|
90
|
+
}
|
|
91
|
+
if (currentChunk2.length > 0) {
|
|
92
|
+
chunks.push(currentChunk2);
|
|
93
|
+
}
|
|
94
|
+
return chunks.length > 0 ? chunks : [[]];
|
|
95
|
+
};
|
|
96
|
+
const processChunk = async (elementIndices, groupIds) => {
|
|
97
|
+
const chunkElements = elementIndices.map((idx) => elements[idx].element);
|
|
98
|
+
const chunkInputStr = JSON.stringify(chunkElements);
|
|
99
|
+
const examples = taskId && ctx.adapter ? await ctx.adapter.getExamples({
|
|
100
|
+
input: chunkInputStr.slice(0, 1e3),
|
|
101
|
+
// Limit search string length
|
|
102
|
+
taskType,
|
|
103
|
+
taskId
|
|
104
|
+
}) : [];
|
|
105
|
+
const key = fastHash(
|
|
106
|
+
stringify({
|
|
107
|
+
taskId,
|
|
108
|
+
taskType,
|
|
109
|
+
input: chunkInputStr,
|
|
110
|
+
instructions: options.instructions ?? "",
|
|
111
|
+
groupIds: groupIds.join(",")
|
|
112
|
+
})
|
|
113
|
+
);
|
|
114
|
+
const exactMatch = examples.find((x) => x.key === key);
|
|
115
|
+
if (exactMatch && exactMatch.output) {
|
|
116
|
+
return exactMatch.output;
|
|
117
|
+
}
|
|
118
|
+
const elementsText = elementIndices.map((idx, i) => {
|
|
119
|
+
const elem = elements[idx];
|
|
120
|
+
const truncated = tokenizer.truncate(elem.stringified, options.tokensPerElement);
|
|
121
|
+
return `\u25A0${i}: ${truncated}\u25A0`;
|
|
122
|
+
}).join("\n");
|
|
123
|
+
const groupsList = groupIds.map((gid) => groups.get(gid).label);
|
|
124
|
+
const groupsText = groupsList.length > 0 ? `**Existing Groups (prefer reusing these):**
|
|
125
|
+
${groupsList.map((l) => `- ${l}`).join("\n")}
|
|
126
|
+
|
|
127
|
+
` : "";
|
|
128
|
+
const exampleMessages = [];
|
|
129
|
+
for (const example of examples.slice(0, 5)) {
|
|
130
|
+
try {
|
|
131
|
+
const exampleInput = JSON.parse(example.input);
|
|
132
|
+
const exampleElements = Array.isArray(exampleInput) ? exampleInput : [exampleInput];
|
|
133
|
+
const exampleElementsText = exampleElements.map((el, i) => `\u25A0${i}: ${stringify(el, false).slice(0, 200)}\u25A0`).join("\n");
|
|
134
|
+
exampleMessages.push({
|
|
135
|
+
type: "text",
|
|
136
|
+
role: "user",
|
|
137
|
+
content: `Expert Example - Elements to group:
|
|
138
|
+
${exampleElementsText}
|
|
139
|
+
|
|
140
|
+
Group each element.`
|
|
141
|
+
});
|
|
142
|
+
const exampleOutput = example.output;
|
|
143
|
+
if (Array.isArray(exampleOutput) && exampleOutput.length > 0) {
|
|
144
|
+
const formattedAssignments = exampleOutput.map((assignment) => `\u25A0${assignment.elementIndex}:${assignment.label}\u25A0`).join("\n");
|
|
145
|
+
exampleMessages.push({
|
|
146
|
+
type: "text",
|
|
147
|
+
role: "assistant",
|
|
148
|
+
content: `${formattedAssignments}
|
|
149
|
+
${END}`
|
|
150
|
+
});
|
|
151
|
+
if (example.explanation) {
|
|
152
|
+
exampleMessages.push({
|
|
153
|
+
type: "text",
|
|
154
|
+
role: "assistant",
|
|
155
|
+
content: `Reasoning: ${example.explanation}`
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
} catch {
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
const systemPrompt = `You are grouping elements into cohesive groups.
|
|
163
|
+
|
|
164
|
+
${options.instructions ? `**Instructions:** ${options.instructions}
|
|
165
|
+
` : "**Instructions:** Group similar elements together."}
|
|
166
|
+
|
|
167
|
+
**Important:**
|
|
168
|
+
- Each element gets exactly ONE group label
|
|
169
|
+
- Use EXACT SAME label for similar items (case-sensitive)
|
|
170
|
+
- Create new descriptive labels when needed
|
|
171
|
+
|
|
172
|
+
**Output Format:**
|
|
173
|
+
One line per element:
|
|
174
|
+
\u25A00:Group Label\u25A0
|
|
175
|
+
\u25A01:Group Label\u25A0
|
|
176
|
+
${END}`.trim();
|
|
177
|
+
const userPrompt = `${groupsText}**Elements (\u25A00 to \u25A0${elementIndices.length - 1}):**
|
|
178
|
+
${elementsText}
|
|
179
|
+
|
|
180
|
+
**Task:** For each element, output one line with its group label.
|
|
181
|
+
${END}`.trim();
|
|
182
|
+
const { extracted } = await ctx.generateContent({
|
|
183
|
+
systemPrompt,
|
|
184
|
+
stopSequences: [END],
|
|
185
|
+
messages: [...exampleMessages, { type: "text", role: "user", content: userPrompt }],
|
|
186
|
+
transform: (text) => {
|
|
187
|
+
const assignments = [];
|
|
188
|
+
const regex = /■(\d+):([^■]+)■/g;
|
|
189
|
+
let match;
|
|
190
|
+
while ((match = regex.exec(text)) !== null) {
|
|
191
|
+
const idx = parseInt(match[1] ?? "", 10);
|
|
192
|
+
if (isNaN(idx) || idx < 0 || idx >= elementIndices.length) continue;
|
|
193
|
+
const label = (match[2] ?? "").trim();
|
|
194
|
+
if (!label) continue;
|
|
195
|
+
assignments.push({
|
|
196
|
+
elementIndex: elementIndices[idx],
|
|
197
|
+
label: label.slice(0, 250)
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
return assignments;
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
return extracted;
|
|
204
|
+
};
|
|
205
|
+
const elementLimit = pLimit(10);
|
|
206
|
+
const groupLimit = pLimit(10);
|
|
207
|
+
const allChunkResults = await Promise.all(
|
|
208
|
+
elementChunks.map(
|
|
209
|
+
(elementChunk) => elementLimit(async () => {
|
|
210
|
+
const groupChunks = getGroupChunks();
|
|
211
|
+
const allAssignments = await Promise.all(
|
|
212
|
+
groupChunks.map((groupChunk) => groupLimit(() => processChunk(elementChunk, groupChunk)))
|
|
213
|
+
);
|
|
214
|
+
return allAssignments.flat();
|
|
215
|
+
})
|
|
216
|
+
)
|
|
217
|
+
);
|
|
218
|
+
for (const assignments of allChunkResults) {
|
|
219
|
+
for (const { elementIndex, label } of assignments) {
|
|
220
|
+
const normalized = normalizeLabel(label);
|
|
221
|
+
let groupId = labelToGroupId.get(normalized);
|
|
222
|
+
if (!groupId) {
|
|
223
|
+
groupId = `group_${groupIdCounter++}`;
|
|
224
|
+
groups.set(groupId, { id: groupId, label, normalizedLabel: normalized });
|
|
225
|
+
groupElements.set(groupId, /* @__PURE__ */ new Set());
|
|
226
|
+
labelToGroupId.set(normalized, groupId);
|
|
227
|
+
}
|
|
228
|
+
groupElements.get(groupId).add(elementIndex);
|
|
229
|
+
if (!elementGroups.has(elementIndex)) {
|
|
230
|
+
elementGroups.set(elementIndex, /* @__PURE__ */ new Set());
|
|
231
|
+
}
|
|
232
|
+
elementGroups.get(elementIndex).add(groupId);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
const allGroupIds = Array.from(groups.keys());
|
|
236
|
+
if (allGroupIds.length > 0) {
|
|
237
|
+
const elementsNeedingReview = [];
|
|
238
|
+
for (const elem of elements) {
|
|
239
|
+
const seenGroups = elementGroups.get(elem.index) ?? /* @__PURE__ */ new Set();
|
|
240
|
+
const unseenCount = allGroupIds.filter((gid) => !seenGroups.has(gid)).length;
|
|
241
|
+
if (unseenCount > 0) {
|
|
242
|
+
elementsNeedingReview.push(elem.index);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
if (elementsNeedingReview.length > 0) {
|
|
246
|
+
const reviewChunks = [];
|
|
247
|
+
let reviewChunk = [];
|
|
248
|
+
let reviewTokens = 0;
|
|
249
|
+
for (const elemIdx of elementsNeedingReview) {
|
|
250
|
+
const elem = elements[elemIdx];
|
|
251
|
+
const truncated = tokenizer.truncate(elem.stringified, options.tokensPerElement);
|
|
252
|
+
const elemTokens = tokenizer.count(truncated);
|
|
253
|
+
const shouldStartNewChunk = (reviewTokens + elemTokens > TOKENS_FOR_ELEMENTS_MAX || reviewChunk.length >= MAX_ELEMENTS_PER_CHUNK) && reviewChunk.length > 0;
|
|
254
|
+
if (shouldStartNewChunk) {
|
|
255
|
+
reviewChunks.push(reviewChunk);
|
|
256
|
+
reviewChunk = [];
|
|
257
|
+
reviewTokens = 0;
|
|
258
|
+
}
|
|
259
|
+
reviewChunk.push(elemIdx);
|
|
260
|
+
reviewTokens += elemTokens;
|
|
261
|
+
}
|
|
262
|
+
if (reviewChunk.length > 0) {
|
|
263
|
+
reviewChunks.push(reviewChunk);
|
|
264
|
+
}
|
|
265
|
+
const reviewResults = await Promise.all(
|
|
266
|
+
reviewChunks.map(
|
|
267
|
+
(chunk) => elementLimit(async () => {
|
|
268
|
+
const groupChunks = getGroupChunks();
|
|
269
|
+
const allAssignments = await Promise.all(
|
|
270
|
+
groupChunks.map((groupChunk) => groupLimit(() => processChunk(chunk, groupChunk)))
|
|
271
|
+
);
|
|
272
|
+
return allAssignments.flat();
|
|
273
|
+
})
|
|
274
|
+
)
|
|
275
|
+
);
|
|
276
|
+
const updateElementGroupAssignment = (elementIndex, label) => {
|
|
277
|
+
const normalized = normalizeLabel(label);
|
|
278
|
+
const groupId = labelToGroupId.get(normalized);
|
|
279
|
+
if (!groupId) return;
|
|
280
|
+
groupElements.get(groupId).add(elementIndex);
|
|
281
|
+
const elemGroups = elementGroups.get(elementIndex) ?? /* @__PURE__ */ new Set();
|
|
282
|
+
if (!elementGroups.has(elementIndex)) {
|
|
283
|
+
elementGroups.set(elementIndex, elemGroups);
|
|
284
|
+
}
|
|
285
|
+
elemGroups.add(groupId);
|
|
286
|
+
};
|
|
287
|
+
for (const assignments of reviewResults) {
|
|
288
|
+
for (const { elementIndex, label } of assignments) {
|
|
289
|
+
updateElementGroupAssignment(elementIndex, label);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
for (const [elementIndex, groupSet] of elementGroups.entries()) {
|
|
295
|
+
if (groupSet.size > 1) {
|
|
296
|
+
const groupIds = Array.from(groupSet);
|
|
297
|
+
for (const gid of groupIds) {
|
|
298
|
+
groupElements.get(gid)?.delete(elementIndex);
|
|
299
|
+
}
|
|
300
|
+
const finalGroupId = groupIds[0];
|
|
301
|
+
groupElements.get(finalGroupId).add(elementIndex);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
const result = [];
|
|
305
|
+
for (const [groupId, elementIndices] of groupElements.entries()) {
|
|
306
|
+
if (elementIndices.size > 0) {
|
|
307
|
+
const groupInfo = groups.get(groupId);
|
|
308
|
+
result.push({
|
|
309
|
+
id: groupInfo.id,
|
|
310
|
+
label: groupInfo.label,
|
|
311
|
+
elements: Array.from(elementIndices).map((idx) => elements[idx].element)
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
if (taskId && ctx.adapter && !ctx.controller.signal.aborted) {
|
|
316
|
+
const key = fastHash(
|
|
317
|
+
stringify({
|
|
318
|
+
taskId,
|
|
319
|
+
taskType,
|
|
320
|
+
input: JSON.stringify(input),
|
|
321
|
+
instructions: options.instructions ?? ""
|
|
322
|
+
})
|
|
323
|
+
);
|
|
324
|
+
const outputAssignments = [];
|
|
325
|
+
for (const [groupId, elementIndices] of groupElements.entries()) {
|
|
326
|
+
const groupInfo = groups.get(groupId);
|
|
327
|
+
for (const idx of elementIndices) {
|
|
328
|
+
outputAssignments.push({
|
|
329
|
+
elementIndex: idx,
|
|
330
|
+
label: groupInfo.label
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
await ctx.adapter.saveExample({
|
|
335
|
+
key,
|
|
336
|
+
taskType,
|
|
337
|
+
taskId,
|
|
338
|
+
input: JSON.stringify(input),
|
|
339
|
+
output: result,
|
|
340
|
+
instructions: options.instructions ?? "",
|
|
341
|
+
metadata: {
|
|
342
|
+
cost: { input: 0, output: 0 },
|
|
343
|
+
latency: 0,
|
|
344
|
+
model: ctx.modelId,
|
|
345
|
+
tokens: { input: 0, output: 0 }
|
|
346
|
+
}
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
return result;
|
|
350
|
+
};
|
|
351
|
+
Zai.prototype.group = function(input, _options) {
|
|
352
|
+
const context = new ZaiContext({
|
|
353
|
+
client: this.client,
|
|
354
|
+
modelId: this.Model,
|
|
355
|
+
taskId: this.taskId,
|
|
356
|
+
taskType: "zai.group",
|
|
357
|
+
adapter: this.adapter
|
|
358
|
+
});
|
|
359
|
+
return new Response(context, group(input, _options, context), (result) => {
|
|
360
|
+
const merged = {};
|
|
361
|
+
result.forEach((group2) => {
|
|
362
|
+
if (!merged[group2.label]) {
|
|
363
|
+
merged[group2.label] = [];
|
|
364
|
+
}
|
|
365
|
+
merged[group2.label].push(...group2.elements);
|
|
366
|
+
});
|
|
367
|
+
return merged;
|
|
368
|
+
});
|
|
369
|
+
};
|