@botpress/zai 1.0.1-beta.4 → 1.0.1-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/browser/index.js +1 -4
  2. package/dist/index.d.ts +917 -0
  3. package/dist/index.js +1743 -0
  4. package/dist/node/adapters/adapter.d.js +0 -0
  5. package/dist/node/adapters/botpress-table.d.js +1 -0
  6. package/dist/node/adapters/botpress-table.js +1 -4
  7. package/dist/node/adapters/memory.d.js +0 -0
  8. package/dist/node/index.d.js +9 -0
  9. package/dist/node/models.d.js +0 -0
  10. package/dist/node/operations/check.d.js +1 -0
  11. package/dist/node/operations/check.test.d.js +1 -0
  12. package/dist/node/operations/constants.d.js +0 -0
  13. package/dist/node/operations/errors.d.js +0 -0
  14. package/dist/node/operations/extract.d.js +1 -0
  15. package/dist/node/operations/extract.test.d.js +1 -0
  16. package/dist/node/operations/filter.d.js +1 -0
  17. package/dist/node/operations/filter.test.d.js +1 -0
  18. package/dist/node/operations/label.d.js +1 -0
  19. package/dist/node/operations/label.test.d.js +1 -0
  20. package/dist/node/operations/rewrite.d.js +1 -0
  21. package/dist/node/operations/rewrite.test.d.js +1 -0
  22. package/dist/node/operations/summarize.d.js +1 -0
  23. package/dist/node/operations/summarize.test.d.js +1 -0
  24. package/dist/node/operations/text.d.js +1 -0
  25. package/dist/node/operations/text.test.d.js +1 -0
  26. package/dist/node/operations/zai-learn.test.d.js +1 -0
  27. package/dist/node/operations/zai-retry.test.d.js +1 -0
  28. package/dist/node/utils.d.js +0 -0
  29. package/dist/node/zai.d.js +1 -0
  30. package/package.json +5 -14
  31. package/src/adapters/adapter.d.ts +27 -0
  32. package/src/adapters/botpress-table.d.ts +153 -0
  33. package/src/adapters/botpress-table.ts +1 -4
  34. package/src/adapters/memory.d.ts +7 -0
  35. package/src/index.d.ts +9 -0
  36. package/src/models.d.ts +351 -0
  37. package/src/operations/__tests/index.d.ts +20 -0
  38. package/src/operations/check.d.ts +36 -0
  39. package/src/operations/check.test.d.ts +1 -0
  40. package/src/operations/constants.d.ts +2 -0
  41. package/src/operations/errors.d.ts +5 -0
  42. package/src/operations/extract.d.ts +20 -0
  43. package/src/operations/extract.test.d.ts +1 -0
  44. package/src/operations/filter.d.ts +39 -0
  45. package/src/operations/filter.test.d.ts +1 -0
  46. package/src/operations/label.d.ts +79 -0
  47. package/src/operations/label.test.d.ts +1 -0
  48. package/src/operations/rewrite.d.ts +34 -0
  49. package/src/operations/rewrite.test.d.ts +1 -0
  50. package/src/operations/summarize.d.ts +46 -0
  51. package/src/operations/summarize.test.d.ts +1 -0
  52. package/src/operations/text.d.ts +16 -0
  53. package/src/operations/text.test.d.ts +1 -0
  54. package/src/operations/zai-learn.test.d.ts +1 -0
  55. package/src/operations/zai-retry.test.d.ts +1 -0
  56. package/src/sdk-interfaces/llm/generateContent.d.ts +128 -0
  57. package/src/sdk-interfaces/llm/listLanguageModels.d.ts +25 -0
  58. package/src/utils.d.ts +52 -0
  59. package/src/zai.d.ts +99 -0
  60. package/tsconfig.json +2 -2
  61. package/scripts/update-models.mts +0 -76
  62. package/scripts/update-types.mts +0 -49
  63. package/tsup.config.ts +0 -16
  64. package/vitest.config.ts +0 -9
  65. package/vitest.setup.ts +0 -24
package/dist/index.js ADDED
@@ -0,0 +1,1743 @@
1
+ var __create = Object.create;
2
+ var __defProp = Object.defineProperty;
3
+ var __defProps = Object.defineProperties;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropDescs = Object.getOwnPropertyDescriptors;
6
+ var __getOwnPropNames = Object.getOwnPropertyNames;
7
+ var __getOwnPropSymbols = Object.getOwnPropertySymbols;
8
+ var __getProtoOf = Object.getPrototypeOf;
9
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
10
+ var __propIsEnum = Object.prototype.propertyIsEnumerable;
11
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
12
+ var __spreadValues = (a, b) => {
13
+ for (var prop in b || (b = {}))
14
+ if (__hasOwnProp.call(b, prop))
15
+ __defNormalProp(a, prop, b[prop]);
16
+ if (__getOwnPropSymbols)
17
+ for (var prop of __getOwnPropSymbols(b)) {
18
+ if (__propIsEnum.call(b, prop))
19
+ __defNormalProp(a, prop, b[prop]);
20
+ }
21
+ return a;
22
+ };
23
+ var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b));
24
+ var __export = (target, all) => {
25
+ for (var name in all)
26
+ __defProp(target, name, { get: all[name], enumerable: true });
27
+ };
28
+ var __copyProps = (to, from, except, desc) => {
29
+ if (from && typeof from === "object" || typeof from === "function") {
30
+ for (let key of __getOwnPropNames(from))
31
+ if (!__hasOwnProp.call(to, key) && key !== except)
32
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
33
+ }
34
+ return to;
35
+ };
36
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
37
+ // If the importer is in node compatibility mode or this is not an ESM
38
+ // file that has been converted to a CommonJS file using a Babel-
39
+ // compatible transform (i.e. "__esModule" has not been set), then set
40
+ // "default" to the CommonJS "module.exports" for node compatibility.
41
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
42
+ mod
43
+ ));
44
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
45
+ var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
46
+
47
+ // src/index.ts
48
+ var index_exports = {};
49
+ __export(index_exports, {
50
+ Zai: () => Zai
51
+ });
52
+ module.exports = __toCommonJS(index_exports);
53
+
54
+ // src/zai.ts
55
+ var import_zui3 = require("@bpinternal/zui");
56
+ var import_wasm = require("@botpress/wasm");
57
+
58
+ // src/adapters/botpress-table.ts
59
+ var import_zui2 = require("@bpinternal/zui");
60
+
61
+ // src/utils.ts
62
+ var import_zui = require("@bpinternal/zui");
63
+ var stringify = (input, beautify = true) => {
64
+ return typeof input === "string" && !!input.length ? input : input ? JSON.stringify(input, beautify ? null : void 0, beautify ? 2 : void 0) : "<input is null, false, undefined or empty>";
65
+ };
66
+ var BotpressClient = import_zui.z.custom(
67
+ (value) => typeof value === "object" && value !== null && "callAction" in value && typeof value.callAction === "function",
68
+ {
69
+ message: "Invalid Botpress Client. Make sure to pass an instance of @botpress/client"
70
+ }
71
+ );
72
+ function fastHash(str) {
73
+ let hash = 0;
74
+ for (let i = 0; i < str.length; i++) {
75
+ hash = (hash << 5) - hash + str.charCodeAt(i);
76
+ hash |= 0;
77
+ }
78
+ return (hash >>> 0).toString(16);
79
+ }
80
+ var takeUntilTokens = (arr, tokens, count) => {
81
+ const result = [];
82
+ let total = 0;
83
+ for (const value of arr) {
84
+ const valueTokens = count(value);
85
+ if (total + valueTokens > tokens) {
86
+ break;
87
+ }
88
+ total += valueTokens;
89
+ result.push(value);
90
+ }
91
+ return result;
92
+ };
93
+ var GenerationMetadata = import_zui.z.object({
94
+ model: import_zui.z.string(),
95
+ cost: import_zui.z.object({
96
+ input: import_zui.z.number(),
97
+ output: import_zui.z.number()
98
+ }).describe("Cost in $USD"),
99
+ latency: import_zui.z.number().describe("Latency in milliseconds"),
100
+ tokens: import_zui.z.object({
101
+ input: import_zui.z.number(),
102
+ output: import_zui.z.number()
103
+ }).describe("Number of tokens used")
104
+ });
105
+
106
+ // src/adapters/adapter.ts
107
+ var Adapter = class {
108
+ };
109
+
110
+ // src/adapters/botpress-table.ts
111
+ var CRITICAL_TAGS = {
112
+ system: "true",
113
+ "schema-purpose": "active-learning",
114
+ "schema-version": "Oct-2024"
115
+ };
116
+ var OPTIONAL_TAGS = {
117
+ "x-studio-title": "Active Learning",
118
+ "x-studio-description": "Table for storing active learning tasks and examples",
119
+ "x-studio-readonly": "true",
120
+ "x-studio-icon": "lucide://atom",
121
+ "x-studio-color": "green"
122
+ };
123
+ var FACTOR = 30;
124
+ var Props = import_zui2.z.object({
125
+ client: BotpressClient,
126
+ tableName: import_zui2.z.string().regex(
127
+ /^[a-zA-Z0-9_]{1,45}Table$/,
128
+ "Table name must be lowercase and contain only letters, numbers and underscores"
129
+ )
130
+ });
131
+ var TableSchema = import_zui2.z.object({
132
+ taskType: import_zui2.z.string().describe("The type of the task (filter, extract, etc.)"),
133
+ taskId: import_zui2.z.string(),
134
+ key: import_zui2.z.string().describe("A unique key for the task (e.g. a hash of the input, taskId, taskType and instructions)"),
135
+ instructions: import_zui2.z.string(),
136
+ input: import_zui2.z.object({}).passthrough().describe("The input to the task"),
137
+ output: import_zui2.z.object({}).passthrough().describe("The expected output"),
138
+ explanation: import_zui2.z.string().nullable(),
139
+ metadata: GenerationMetadata,
140
+ status: import_zui2.z.enum(["pending", "rejected", "approved"]),
141
+ feedback: import_zui2.z.object({
142
+ rating: import_zui2.z.enum(["very-bad", "bad", "good", "very-good"]),
143
+ comment: import_zui2.z.string().nullable()
144
+ }).nullable().default(null)
145
+ });
146
+ var searchableColumns = ["input"];
147
+ var TableJsonSchema = Object.entries(TableSchema.shape).reduce((acc, [key, value]) => {
148
+ var _a, _b;
149
+ acc[key] = value.toJsonSchema();
150
+ (_b = (_a = acc[key])["x-zui"]) != null ? _b : _a["x-zui"] = {};
151
+ acc[key]["x-zui"].searchable = searchableColumns.includes(key);
152
+ return acc;
153
+ }, {});
154
+ var TableAdapter = class extends Adapter {
155
+ constructor(props) {
156
+ super();
157
+ __publicField(this, "client");
158
+ __publicField(this, "tableName");
159
+ __publicField(this, "status");
160
+ props = Props.parse(props);
161
+ this.client = props.client;
162
+ this.tableName = props.tableName;
163
+ this.status = "ready";
164
+ }
165
+ async getExamples({ taskType, taskId, input }) {
166
+ await this.assertTableExists();
167
+ const { rows } = await this.client.findTableRows({
168
+ table: this.tableName,
169
+ search: JSON.stringify({ value: input }).substring(0, 1023),
170
+ // Search is limited to 1024 characters
171
+ limit: 10,
172
+ // TODO
173
+ filter: {
174
+ // Proximity match of approved examples
175
+ taskType,
176
+ taskId,
177
+ status: "approved"
178
+ }
179
+ }).catch((err) => {
180
+ console.error("Error fetching examples: ".concat(err.message));
181
+ return { rows: [] };
182
+ });
183
+ return rows.map((row) => {
184
+ var _a;
185
+ return {
186
+ key: row.key,
187
+ input: row.input.value,
188
+ output: row.output.value,
189
+ explanation: row.explanation,
190
+ similarity: (_a = row.similarity) != null ? _a : 0
191
+ };
192
+ });
193
+ }
194
+ async saveExample({
195
+ key,
196
+ taskType,
197
+ taskId,
198
+ instructions,
199
+ input,
200
+ output,
201
+ explanation,
202
+ metadata,
203
+ status = "pending"
204
+ }) {
205
+ await this.assertTableExists();
206
+ await this.client.upsertTableRows({
207
+ table: this.tableName,
208
+ keyColumn: "key",
209
+ rows: [
210
+ {
211
+ key,
212
+ taskType,
213
+ taskId,
214
+ instructions,
215
+ input: { value: input },
216
+ output: { value: output },
217
+ explanation: explanation != null ? explanation : null,
218
+ status,
219
+ metadata
220
+ }
221
+ ]
222
+ }).catch(() => {
223
+ });
224
+ }
225
+ async assertTableExists() {
226
+ var _a, _b, _c;
227
+ if (this.status !== "ready") {
228
+ return;
229
+ }
230
+ const { table, created } = await this.client.getOrCreateTable({
231
+ table: this.tableName,
232
+ factor: FACTOR,
233
+ frozen: true,
234
+ isComputeEnabled: false,
235
+ tags: __spreadValues(__spreadValues({}, CRITICAL_TAGS), OPTIONAL_TAGS),
236
+ schema: TableJsonSchema
237
+ }).catch(() => {
238
+ this.status = "error";
239
+ return { table: null, created: false };
240
+ });
241
+ if (!table) {
242
+ return;
243
+ }
244
+ if (!created) {
245
+ const issues = [];
246
+ if (table.factor !== FACTOR) {
247
+ issues.push("Factor is ".concat(table.factor, " instead of ").concat(FACTOR));
248
+ }
249
+ if (table.frozen !== true) {
250
+ issues.push("Table is not frozen");
251
+ }
252
+ for (const [key, value] of Object.entries(CRITICAL_TAGS)) {
253
+ if (((_a = table.tags) == null ? void 0 : _a[key]) !== value) {
254
+ issues.push("Tag ".concat(key, " is ").concat((_b = table.tags) == null ? void 0 : _b[key], " instead of ").concat(value));
255
+ }
256
+ }
257
+ for (const key of Object.keys(TableJsonSchema)) {
258
+ const column = (_c = table.schema) == null ? void 0 : _c.properties[key];
259
+ const expected = TableJsonSchema[key];
260
+ if (!column) {
261
+ issues.push("Column ".concat(key, " is missing"));
262
+ continue;
263
+ }
264
+ if (column.type !== expected.type) {
265
+ issues.push("Column ".concat(key, " has type ").concat(column.type, " instead of ").concat(expected.type));
266
+ }
267
+ if (expected["x-zui"].searchable && !column["x-zui"].searchable) {
268
+ issues.push("Column ".concat(key, " is not searchable but should be"));
269
+ }
270
+ }
271
+ if (issues.length) {
272
+ this.status = "error";
273
+ }
274
+ }
275
+ this.status = "initialized";
276
+ }
277
+ };
278
+
279
+ // src/adapters/memory.ts
280
+ var MemoryAdapter = class extends Adapter {
281
+ constructor(examples) {
282
+ super();
283
+ this.examples = examples;
284
+ }
285
+ async getExamples() {
286
+ return this.examples;
287
+ }
288
+ async saveExample() {
289
+ }
290
+ };
291
+
292
+ // src/models.ts
293
+ var Models = [
294
+ {
295
+ "id": "anthropic__claude-3-haiku-20240307",
296
+ "name": "Claude 3 Haiku",
297
+ "integration": "anthropic",
298
+ "input": {
299
+ "maxTokens": 2e5
300
+ },
301
+ "output": {
302
+ "maxTokens": 4096
303
+ }
304
+ },
305
+ {
306
+ "id": "anthropic__claude-3-5-sonnet-20240620",
307
+ "name": "Claude 3.5 Sonnet",
308
+ "integration": "anthropic",
309
+ "input": {
310
+ "maxTokens": 2e5
311
+ },
312
+ "output": {
313
+ "maxTokens": 4096
314
+ }
315
+ },
316
+ {
317
+ "id": "cerebras__llama3.1-70b",
318
+ "name": "Llama 3.1 70B",
319
+ "integration": "cerebras",
320
+ "input": {
321
+ "maxTokens": 8192
322
+ },
323
+ "output": {
324
+ "maxTokens": 8192
325
+ }
326
+ },
327
+ {
328
+ "id": "cerebras__llama3.1-8b",
329
+ "name": "Llama 3.1 8B",
330
+ "integration": "cerebras",
331
+ "input": {
332
+ "maxTokens": 8192
333
+ },
334
+ "output": {
335
+ "maxTokens": 8192
336
+ }
337
+ },
338
+ {
339
+ "id": "fireworks-ai__accounts/fireworks/models/deepseek-coder-v2-instruct",
340
+ "name": "DeepSeek Coder V2 Instruct",
341
+ "integration": "fireworks-ai",
342
+ "input": {
343
+ "maxTokens": 131072
344
+ },
345
+ "output": {
346
+ "maxTokens": 131072
347
+ }
348
+ },
349
+ {
350
+ "id": "fireworks-ai__accounts/fireworks/models/deepseek-coder-v2-lite-instruct",
351
+ "name": "DeepSeek Coder V2 Lite",
352
+ "integration": "fireworks-ai",
353
+ "input": {
354
+ "maxTokens": 163840
355
+ },
356
+ "output": {
357
+ "maxTokens": 163840
358
+ }
359
+ },
360
+ {
361
+ "id": "fireworks-ai__accounts/fireworks/models/firellava-13b",
362
+ "name": "FireLLaVA-13B",
363
+ "integration": "fireworks-ai",
364
+ "input": {
365
+ "maxTokens": 4096
366
+ },
367
+ "output": {
368
+ "maxTokens": 4096
369
+ }
370
+ },
371
+ {
372
+ "id": "fireworks-ai__accounts/fireworks/models/firefunction-v2",
373
+ "name": "Firefunction V2",
374
+ "integration": "fireworks-ai",
375
+ "input": {
376
+ "maxTokens": 8192
377
+ },
378
+ "output": {
379
+ "maxTokens": 8192
380
+ }
381
+ },
382
+ {
383
+ "id": "fireworks-ai__accounts/fireworks/models/gemma2-9b-it",
384
+ "name": "Gemma 2 9B Instruct",
385
+ "integration": "fireworks-ai",
386
+ "input": {
387
+ "maxTokens": 8192
388
+ },
389
+ "output": {
390
+ "maxTokens": 8192
391
+ }
392
+ },
393
+ {
394
+ "id": "fireworks-ai__accounts/fireworks/models/llama-v3p1-405b-instruct",
395
+ "name": "Llama 3.1 405B Instruct",
396
+ "integration": "fireworks-ai",
397
+ "input": {
398
+ "maxTokens": 131072
399
+ },
400
+ "output": {
401
+ "maxTokens": 131072
402
+ }
403
+ },
404
+ {
405
+ "id": "fireworks-ai__accounts/fireworks/models/llama-v3p1-70b-instruct",
406
+ "name": "Llama 3.1 70B Instruct",
407
+ "integration": "fireworks-ai",
408
+ "input": {
409
+ "maxTokens": 131072
410
+ },
411
+ "output": {
412
+ "maxTokens": 131072
413
+ }
414
+ },
415
+ {
416
+ "id": "fireworks-ai__accounts/fireworks/models/llama-v3p1-8b-instruct",
417
+ "name": "Llama 3.1 8B Instruct",
418
+ "integration": "fireworks-ai",
419
+ "input": {
420
+ "maxTokens": 131072
421
+ },
422
+ "output": {
423
+ "maxTokens": 131072
424
+ }
425
+ },
426
+ {
427
+ "id": "fireworks-ai__accounts/fireworks/models/mixtral-8x22b-instruct",
428
+ "name": "Mixtral MoE 8x22B Instruct",
429
+ "integration": "fireworks-ai",
430
+ "input": {
431
+ "maxTokens": 65536
432
+ },
433
+ "output": {
434
+ "maxTokens": 65536
435
+ }
436
+ },
437
+ {
438
+ "id": "fireworks-ai__accounts/fireworks/models/mixtral-8x7b-instruct",
439
+ "name": "Mixtral MoE 8x7B Instruct",
440
+ "integration": "fireworks-ai",
441
+ "input": {
442
+ "maxTokens": 32768
443
+ },
444
+ "output": {
445
+ "maxTokens": 32768
446
+ }
447
+ },
448
+ {
449
+ "id": "fireworks-ai__accounts/fireworks/models/mythomax-l2-13b",
450
+ "name": "MythoMax L2 13b",
451
+ "integration": "fireworks-ai",
452
+ "input": {
453
+ "maxTokens": 4096
454
+ },
455
+ "output": {
456
+ "maxTokens": 4096
457
+ }
458
+ },
459
+ {
460
+ "id": "fireworks-ai__accounts/fireworks/models/qwen2-72b-instruct",
461
+ "name": "Qwen2 72b Instruct",
462
+ "integration": "fireworks-ai",
463
+ "input": {
464
+ "maxTokens": 32768
465
+ },
466
+ "output": {
467
+ "maxTokens": 32768
468
+ }
469
+ },
470
+ {
471
+ "id": "groq__gemma2-9b-it",
472
+ "name": "Gemma2 9B",
473
+ "integration": "groq",
474
+ "input": {
475
+ "maxTokens": 8192
476
+ },
477
+ "output": {
478
+ "maxTokens": 8192
479
+ }
480
+ },
481
+ {
482
+ "id": "groq__llama3-70b-8192",
483
+ "name": "LLaMA 3 70B",
484
+ "integration": "groq",
485
+ "input": {
486
+ "maxTokens": 8192
487
+ },
488
+ "output": {
489
+ "maxTokens": 8192
490
+ }
491
+ },
492
+ {
493
+ "id": "groq__llama3-8b-8192",
494
+ "name": "LLaMA 3 8B",
495
+ "integration": "groq",
496
+ "input": {
497
+ "maxTokens": 8192
498
+ },
499
+ "output": {
500
+ "maxTokens": 8192
501
+ }
502
+ },
503
+ {
504
+ "id": "groq__llama-3.1-70b-versatile",
505
+ "name": "LLaMA 3.1 70B",
506
+ "integration": "groq",
507
+ "input": {
508
+ "maxTokens": 128e3
509
+ },
510
+ "output": {
511
+ "maxTokens": 8192
512
+ }
513
+ },
514
+ {
515
+ "id": "groq__llama-3.1-8b-instant",
516
+ "name": "LLaMA 3.1 8B",
517
+ "integration": "groq",
518
+ "input": {
519
+ "maxTokens": 128e3
520
+ },
521
+ "output": {
522
+ "maxTokens": 8192
523
+ }
524
+ },
525
+ {
526
+ "id": "groq__llama-3.2-11b-vision-preview",
527
+ "name": "LLaMA 3.2 11B Vision",
528
+ "integration": "groq",
529
+ "input": {
530
+ "maxTokens": 128e3
531
+ },
532
+ "output": {
533
+ "maxTokens": 8192
534
+ }
535
+ },
536
+ {
537
+ "id": "groq__llama-3.2-1b-preview",
538
+ "name": "LLaMA 3.2 1B",
539
+ "integration": "groq",
540
+ "input": {
541
+ "maxTokens": 128e3
542
+ },
543
+ "output": {
544
+ "maxTokens": 8192
545
+ }
546
+ },
547
+ {
548
+ "id": "groq__llama-3.2-3b-preview",
549
+ "name": "LLaMA 3.2 3B",
550
+ "integration": "groq",
551
+ "input": {
552
+ "maxTokens": 128e3
553
+ },
554
+ "output": {
555
+ "maxTokens": 8192
556
+ }
557
+ },
558
+ {
559
+ "id": "groq__llama-3.2-90b-vision-preview",
560
+ "name": "LLaMA 3.2 90B Vision",
561
+ "integration": "groq",
562
+ "input": {
563
+ "maxTokens": 128e3
564
+ },
565
+ "output": {
566
+ "maxTokens": 8192
567
+ }
568
+ },
569
+ {
570
+ "id": "groq__llama-3.3-70b-versatile",
571
+ "name": "LLaMA 3.3 70B",
572
+ "integration": "groq",
573
+ "input": {
574
+ "maxTokens": 128e3
575
+ },
576
+ "output": {
577
+ "maxTokens": 32768
578
+ }
579
+ },
580
+ {
581
+ "id": "groq__mixtral-8x7b-32768",
582
+ "name": "Mixtral 8x7B",
583
+ "integration": "groq",
584
+ "input": {
585
+ "maxTokens": 32768
586
+ },
587
+ "output": {
588
+ "maxTokens": 32768
589
+ }
590
+ },
591
+ {
592
+ "id": "openai__o1-2024-12-17",
593
+ "name": "GPT o1",
594
+ "integration": "openai",
595
+ "input": {
596
+ "maxTokens": 2e5
597
+ },
598
+ "output": {
599
+ "maxTokens": 1e5
600
+ }
601
+ },
602
+ {
603
+ "id": "openai__o1-mini-2024-09-12",
604
+ "name": "GPT o1-mini",
605
+ "integration": "openai",
606
+ "input": {
607
+ "maxTokens": 128e3
608
+ },
609
+ "output": {
610
+ "maxTokens": 65536
611
+ }
612
+ },
613
+ {
614
+ "id": "openai__gpt-3.5-turbo-0125",
615
+ "name": "GPT-3.5 Turbo",
616
+ "integration": "openai",
617
+ "input": {
618
+ "maxTokens": 128e3
619
+ },
620
+ "output": {
621
+ "maxTokens": 4096
622
+ }
623
+ },
624
+ {
625
+ "id": "openai__gpt-4-turbo-2024-04-09",
626
+ "name": "GPT-4 Turbo",
627
+ "integration": "openai",
628
+ "input": {
629
+ "maxTokens": 128e3
630
+ },
631
+ "output": {
632
+ "maxTokens": 4096
633
+ }
634
+ },
635
+ {
636
+ "id": "openai__gpt-4o-2024-08-06",
637
+ "name": "GPT-4o (August 2024)",
638
+ "integration": "openai",
639
+ "input": {
640
+ "maxTokens": 128e3
641
+ },
642
+ "output": {
643
+ "maxTokens": 16384
644
+ }
645
+ },
646
+ {
647
+ "id": "openai__gpt-4o-2024-05-13",
648
+ "name": "GPT-4o (May 2024)",
649
+ "integration": "openai",
650
+ "input": {
651
+ "maxTokens": 128e3
652
+ },
653
+ "output": {
654
+ "maxTokens": 4096
655
+ }
656
+ },
657
+ {
658
+ "id": "openai__gpt-4o-2024-11-20",
659
+ "name": "GPT-4o (November 2024)",
660
+ "integration": "openai",
661
+ "input": {
662
+ "maxTokens": 128e3
663
+ },
664
+ "output": {
665
+ "maxTokens": 16384
666
+ }
667
+ },
668
+ {
669
+ "id": "openai__gpt-4o-mini-2024-07-18",
670
+ "name": "GPT-4o Mini",
671
+ "integration": "openai",
672
+ "input": {
673
+ "maxTokens": 128e3
674
+ },
675
+ "output": {
676
+ "maxTokens": 16384
677
+ }
678
+ }
679
+ ];
680
+
681
+ // src/zai.ts
682
+ var ActiveLearning = import_zui3.z.object({
683
+ enable: import_zui3.z.boolean().describe("Whether to enable active learning").default(false),
684
+ tableName: import_zui3.z.string().regex(
685
+ /^[A-Za-z0-9_/-]{1,100}Table$/,
686
+ "Namespace must be alphanumeric and contain only letters, numbers, underscores, hyphens and slashes"
687
+ ).describe("The name of the table to store active learning tasks").default("ActiveLearningTable"),
688
+ taskId: import_zui3.z.string().regex(
689
+ /^[A-Za-z0-9_/-]{1,100}$/,
690
+ "Namespace must be alphanumeric and contain only letters, numbers, underscores, hyphens and slashes"
691
+ ).describe("The ID of the task").default("default")
692
+ });
693
+ var ZaiConfig = import_zui3.z.object({
694
+ client: BotpressClient,
695
+ userId: import_zui3.z.string().describe("The ID of the user consuming the API").optional(),
696
+ retry: import_zui3.z.object({ maxRetries: import_zui3.z.number().min(0).max(100) }).default({ maxRetries: 3 }),
697
+ modelId: import_zui3.z.custom(
698
+ (value) => {
699
+ if (typeof value !== "string" || !value.includes("__")) {
700
+ return false;
701
+ }
702
+ return true;
703
+ },
704
+ {
705
+ message: "Invalid model ID"
706
+ }
707
+ ).describe("The ID of the model you want to use").default("openai__gpt-4o-mini-2024-07-18"),
708
+ activeLearning: ActiveLearning.default({ enable: false }),
709
+ namespace: import_zui3.z.string().regex(
710
+ /^[A-Za-z0-9_/-]{1,100}$/,
711
+ "Namespace must be alphanumeric and contain only letters, numbers, underscores, hyphens and slashes"
712
+ ).default("zai")
713
+ });
714
+ var _Zai = class _Zai {
715
+ constructor(config) {
716
+ __publicField(this, "client");
717
+ __publicField(this, "originalConfig");
718
+ __publicField(this, "userId");
719
+ __publicField(this, "integration");
720
+ __publicField(this, "model");
721
+ __publicField(this, "retry");
722
+ __publicField(this, "Model");
723
+ __publicField(this, "namespace");
724
+ __publicField(this, "adapter");
725
+ __publicField(this, "activeLearning");
726
+ var _a;
727
+ this.originalConfig = config;
728
+ const parsed = ZaiConfig.parse(config);
729
+ this.client = parsed.client;
730
+ const [integration, modelId] = parsed.modelId.split("__");
731
+ if (!(integration == null ? void 0 : integration.length) || !(modelId == null ? void 0 : modelId.length)) {
732
+ throw new Error("Invalid model ID: ".concat(parsed.modelId, ". Expected format: <integration>__<modelId>"));
733
+ }
734
+ this.integration = integration;
735
+ this.model = modelId;
736
+ this.namespace = parsed.namespace;
737
+ this.userId = parsed.userId;
738
+ this.retry = parsed.retry;
739
+ this.Model = Models.find((m) => m.id === parsed.modelId);
740
+ this.activeLearning = parsed.activeLearning;
741
+ this.adapter = ((_a = parsed.activeLearning) == null ? void 0 : _a.enable) ? new TableAdapter({ client: this.client, tableName: parsed.activeLearning.tableName }) : new MemoryAdapter([]);
742
+ }
743
+ /** @internal */
744
+ async callModel(props) {
745
+ let retries = this.retry.maxRetries;
746
+ while (retries-- >= 0) {
747
+ try {
748
+ return await this._callModel(props);
749
+ } catch (e) {
750
+ if (retries >= 0) {
751
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
752
+ } else {
753
+ throw new Error("Failed to call model after multiple retries");
754
+ }
755
+ }
756
+ }
757
+ throw new Error("Failed to call model after multiple retries");
758
+ }
759
+ /** @internal */
760
+ async _callModel(props) {
761
+ let retries = this.retry.maxRetries;
762
+ do {
763
+ const start = Date.now();
764
+ const input = __spreadValues({
765
+ messages: [],
766
+ temperature: 0,
767
+ topP: 1,
768
+ model: { id: this.model },
769
+ userId: this.userId
770
+ }, props);
771
+ const { output } = await this.client.callAction({
772
+ type: "".concat(this.integration, ":generateContent"),
773
+ input
774
+ });
775
+ const latency = Date.now() - start;
776
+ return __spreadProps(__spreadValues({}, output), {
777
+ metadata: {
778
+ model: this.model,
779
+ latency,
780
+ cost: { input: output.usage.inputCost, output: output.usage.outputCost },
781
+ tokens: { input: output.usage.inputTokens, output: output.usage.outputTokens }
782
+ }
783
+ });
784
+ } while (--retries > 0);
785
+ }
786
+ async getTokenizer() {
787
+ var _a;
788
+ (_a = _Zai.tokenizer) != null ? _a : _Zai.tokenizer = await (async () => {
789
+ while (!import_wasm.getWasmTokenizer) {
790
+ await new Promise((resolve) => setTimeout(resolve, 25));
791
+ }
792
+ return (0, import_wasm.getWasmTokenizer)();
793
+ })();
794
+ return _Zai.tokenizer;
795
+ }
796
+ get taskId() {
797
+ if (!this.activeLearning.enable) {
798
+ return void 0;
799
+ }
800
+ return "".concat(this.namespace, "/").concat(this.activeLearning.taskId).replace(/\/+/g, "/");
801
+ }
802
+ with(options) {
803
+ return new _Zai(__spreadValues(__spreadValues({}, this.originalConfig), options));
804
+ }
805
+ learn(taskId) {
806
+ return new _Zai(__spreadProps(__spreadValues({}, this.originalConfig), {
807
+ activeLearning: __spreadProps(__spreadValues({}, this.activeLearning), { taskId, enable: true })
808
+ }));
809
+ }
810
+ };
811
+ __publicField(_Zai, "tokenizer", null);
812
+ var Zai = _Zai;
813
+
814
+ // src/operations/text.ts
815
+ var import_zui4 = require("@bpinternal/zui");
816
+ var import_lodash_es = require("lodash-es");
817
+
818
+ // src/operations/constants.ts
819
+ var PROMPT_INPUT_BUFFER = 1048;
820
+ var PROMPT_OUTPUT_BUFFER = 512;
821
+
822
+ // src/operations/text.ts
823
+ var Options = import_zui4.z.object({
824
+ length: import_zui4.z.number().min(1).max(1e5).optional().describe("The maximum number of tokens to generate")
825
+ });
826
+ Zai.prototype.text = async function(prompt, _options) {
827
+ var _a, _b;
828
+ const options = Options.parse(_options != null ? _options : {});
829
+ const tokenizer = await this.getTokenizer();
830
+ prompt = tokenizer.truncate(prompt, Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100));
831
+ if (options.length) {
832
+ options.length = Math.min(this.Model.output.maxTokens - PROMPT_OUTPUT_BUFFER, options.length);
833
+ }
834
+ const instructions = [];
835
+ let chart = "";
836
+ if (options.length) {
837
+ const length = (0, import_lodash_es.clamp)(options.length * 0.75, 5, options.length);
838
+ instructions.push("IMPORTANT: Length constraint: ".concat(length, " tokens/words"));
839
+ instructions.push("The text must be standalone and complete in less than ".concat(length, " tokens/words"));
840
+ }
841
+ if (options.length && options.length <= 500) {
842
+ chart = "\n| Tokens | Text Length (approximate) |\n|-------------|--------------------------------------|\n| < 5 tokens | 1-3 words |\n| 5-10 tokens | 3-6 words |\n| 10-20 tokens| 6-15 words |\n| 20-50 tokens| A short sentence (15-30 words) |\n| 50-100 tokens| A medium sentence (30-70 words) |\n| 100-200 tokens| A short paragraph (70-150 words) |\n| 200-300 tokens| A medium paragraph (150-200 words) |\n| 300-500 tokens| A long paragraph (200-300 words) |".trim();
843
+ }
844
+ const output = await this.callModel({
845
+ systemPrompt: "\nGenerate a text that fulfills the user prompt below. Answer directly to the prompt, without any acknowledgements or fluff. Also, make sure the text is standalone and complete.\n".concat(instructions.map((x) => "- ".concat(x)).join("\n"), "\n").concat(chart, "\n").trim(),
846
+ temperature: 0.7,
847
+ messages: [{ type: "text", content: prompt, role: "user" }],
848
+ maxTokens: options.length
849
+ });
850
+ return (_b = (_a = output == null ? void 0 : output.choices) == null ? void 0 : _a[0]) == null ? void 0 : _b.content;
851
+ };
852
+
853
+ // src/operations/rewrite.ts
854
+ var import_zui5 = require("@bpinternal/zui");
855
+ var Example = import_zui5.z.object({
856
+ input: import_zui5.z.string(),
857
+ output: import_zui5.z.string()
858
+ });
859
+ var Options2 = import_zui5.z.object({
860
+ examples: import_zui5.z.array(Example).default([]),
861
+ length: import_zui5.z.number().min(10).max(16e3).optional().describe("The maximum number of tokens to generate")
862
+ });
863
+ var START = "\u25A0START\u25A0";
864
+ var END = "\u25A0END\u25A0";
865
+ Zai.prototype.rewrite = async function(original, prompt, _options) {
866
+ var _a;
867
+ const options = Options2.parse(_options != null ? _options : {});
868
+ const tokenizer = await this.getTokenizer();
869
+ const taskId = this.taskId;
870
+ const taskType = "zai.rewrite";
871
+ const INPUT_COMPONENT_SIZE = Math.max(100, (this.Model.input.maxTokens - PROMPT_INPUT_BUFFER) / 2);
872
+ prompt = tokenizer.truncate(prompt, INPUT_COMPONENT_SIZE);
873
+ const inputSize = tokenizer.count(original) + tokenizer.count(prompt);
874
+ const maxInputSize = this.Model.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER;
875
+ if (inputSize > maxInputSize) {
876
+ throw new Error(
877
+ "The input size is ".concat(inputSize, " tokens long, which is more than the maximum of ").concat(maxInputSize, " tokens for this model (").concat(this.Model.name, " = ").concat(this.Model.input.maxTokens, " tokens)")
878
+ );
879
+ }
880
+ const instructions = [];
881
+ const originalSize = tokenizer.count(original);
882
+ if (options.length && originalSize > options.length) {
883
+ instructions.push("The original text is ".concat(originalSize, " tokens long \u2013 it should be less than ").concat(options.length));
884
+ instructions.push(
885
+ "The text must be standalone and complete in less than ".concat(options.length, " tokens, so it has to be shortened to fit the length as well")
886
+ );
887
+ }
888
+ const format = (before, prompt2) => {
889
+ return "\nPrompt: ".concat(prompt2, "\n\n").concat(START, "\n").concat(before, "\n").concat(END, "\n").trim();
890
+ };
891
+ const Key = fastHash(
892
+ stringify({
893
+ taskId,
894
+ taskType,
895
+ input: original,
896
+ prompt
897
+ })
898
+ );
899
+ const formatExample = ({ input, output: output2, instructions: instructions2 }) => {
900
+ return [
901
+ { type: "text", role: "user", content: format(input, instructions2 || prompt) },
902
+ { type: "text", role: "assistant", content: "".concat(START).concat(output2).concat(END) }
903
+ ];
904
+ };
905
+ const defaultExamples = [
906
+ { input: "Hello, how are you?", output: "Bonjour, comment \xE7a va?", instructions: "translate to French" },
907
+ { input: "1\n2\n3", output: "3\n2\n1", instructions: "reverse the order" }
908
+ ];
909
+ const tableExamples = taskId ? await this.adapter.getExamples({
910
+ input: original,
911
+ taskId,
912
+ taskType
913
+ }) : [];
914
+ const exactMatch = tableExamples.find((x) => x.key === Key);
915
+ if (exactMatch) {
916
+ return exactMatch.output;
917
+ }
918
+ const savedExamples = [
919
+ ...tableExamples.map((x) => ({ input: x.input, output: x.output })),
920
+ ...options.examples
921
+ ];
922
+ const REMAINING_TOKENS = this.Model.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER;
923
+ const examples = takeUntilTokens(
924
+ savedExamples.length ? savedExamples : defaultExamples,
925
+ REMAINING_TOKENS,
926
+ (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.output))
927
+ ).map(formatExample).flat();
928
+ const output = await this.callModel({
929
+ systemPrompt: "\nRewrite the text between the ".concat(START, " and ").concat(END, " tags to match the user prompt.\n").concat(instructions.map((x) => "\u2022 ".concat(x)).join("\n"), "\n").trim(),
930
+ messages: [...examples, { type: "text", content: format(original, prompt), role: "user" }],
931
+ maxTokens: options.length,
932
+ stopSequences: [END]
933
+ });
934
+ let result = (_a = output.choices[0]) == null ? void 0 : _a.content;
935
+ if (result.includes(START)) {
936
+ result = result.slice(result.indexOf(START) + START.length);
937
+ }
938
+ if (result.includes(END)) {
939
+ result = result.slice(0, result.indexOf(END));
940
+ }
941
+ if (taskId) {
942
+ await this.adapter.saveExample({
943
+ key: Key,
944
+ metadata: output.metadata,
945
+ instructions: prompt,
946
+ input: original,
947
+ output: result,
948
+ taskType,
949
+ taskId
950
+ });
951
+ }
952
+ return result;
953
+ };
954
+
955
+ // src/operations/summarize.ts
956
+ var import_zui6 = require("@bpinternal/zui");
957
+ var import_lodash_es2 = require("lodash-es");
958
+ var Options3 = import_zui6.z.object({
959
+ prompt: import_zui6.z.string().describe("What should the text be summarized to?").default("New information, concepts and ideas that are deemed important"),
960
+ format: import_zui6.z.string().describe("How to format the example text").default(
961
+ "A normal text with multiple sentences and paragraphs. Use markdown to format the text into sections. Use headings, lists, and other markdown features to make the text more readable. Do not include links, images, or other non-text elements."
962
+ ),
963
+ length: import_zui6.z.number().min(10).max(1e5).describe("The length of the summary in tokens").default(250),
964
+ intermediateFactor: import_zui6.z.number().min(1).max(10).describe("How many times longer (than final length) are the intermediate summaries generated").default(4),
965
+ maxIterations: import_zui6.z.number().min(1).default(100),
966
+ sliding: import_zui6.z.object({
967
+ window: import_zui6.z.number().min(10).max(1e5),
968
+ overlap: import_zui6.z.number().min(0).max(1e5)
969
+ }).describe("Sliding window options").default({ window: 5e4, overlap: 250 })
970
+ });
971
+ var START2 = "\u25A0START\u25A0";
972
+ var END2 = "\u25A0END\u25A0";
973
+ Zai.prototype.summarize = async function(original, _options) {
974
+ var _a;
975
+ const options = Options3.parse(_options != null ? _options : {});
976
+ const tokenizer = await this.getTokenizer();
977
+ const INPUT_COMPONENT_SIZE = Math.max(100, (this.Model.input.maxTokens - PROMPT_INPUT_BUFFER) / 4);
978
+ options.prompt = tokenizer.truncate(options.prompt, INPUT_COMPONENT_SIZE);
979
+ options.format = tokenizer.truncate(options.format, INPUT_COMPONENT_SIZE);
980
+ const maxOutputSize = this.Model.output.maxTokens - PROMPT_OUTPUT_BUFFER;
981
+ if (options.length > maxOutputSize) {
982
+ throw new Error(
983
+ "The desired output length is ".concat(maxOutputSize, " tokens long, which is more than the maximum of ").concat(this.Model.output.maxTokens, " tokens for this model (").concat(this.Model.name, ")")
984
+ );
985
+ }
986
+ options.sliding.window = Math.min(options.sliding.window, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER);
987
+ options.sliding.overlap = Math.min(options.sliding.overlap, options.sliding.window - 3 * options.sliding.overlap);
988
+ const format = (summary, newText) => {
989
+ return "\n".concat(START2, "\n").concat(summary.length ? summary : "<summary still empty>", "\n").concat(END2, "\n\nPlease amend the summary between the ").concat(START2, " and ").concat(END2, " tags to accurately reflect the prompt and the additional text below.\n\n<|start_new_information|>\n").concat(newText, "\n<|new_information|>").trim();
990
+ };
991
+ const tokens = tokenizer.split(original);
992
+ const parts = Math.ceil(tokens.length / (options.sliding.window - options.sliding.overlap));
993
+ let iteration = 0;
994
+ const N = 2;
995
+ const useMergeSort = parts >= Math.pow(2, N);
996
+ const chunkSize = Math.ceil(tokens.length / (parts * N));
997
+ if (useMergeSort) {
998
+ const chunks = (0, import_lodash_es2.chunk)(tokens, chunkSize).map((x) => x.join(""));
999
+ const allSummaries = await Promise.all(chunks.map((chunk4) => this.summarize(chunk4, options)));
1000
+ return this.summarize(allSummaries.join("\n\n============\n\n"), options);
1001
+ }
1002
+ const summaries = [];
1003
+ let currentSummary = "";
1004
+ for (let i = 0; i < tokens.length; i += options.sliding.window) {
1005
+ const from = Math.max(0, i - options.sliding.overlap);
1006
+ const to = Math.min(tokens.length, i + options.sliding.window + options.sliding.overlap);
1007
+ const isFirst = i === 0;
1008
+ const isLast = to >= tokens.length;
1009
+ const slice = tokens.slice(from, to).join("");
1010
+ if (iteration++ >= options.maxIterations) {
1011
+ break;
1012
+ }
1013
+ const instructions = [
1014
+ "At each step, you will receive a part of the text to summarize. Make sure to reply with the new summary in the tags ".concat(START2, " and ").concat(END2, "."),
1015
+ "Summarize the text and make sure that the main points are included.",
1016
+ "Ignore any unnecessary details and focus on the main points.",
1017
+ "Use short and concise sentences to increase readability and information density.",
1018
+ "When looking at the new information, focus on: " + options.prompt
1019
+ ];
1020
+ if (isFirst) {
1021
+ instructions.push(
1022
+ "The current summary is empty. You need to generate a summary that covers the main points of the text."
1023
+ );
1024
+ }
1025
+ let generationLength = options.length;
1026
+ if (!isLast) {
1027
+ generationLength = Math.min(
1028
+ tokenizer.count(currentSummary) + options.length * options.intermediateFactor,
1029
+ maxOutputSize
1030
+ );
1031
+ instructions.push(
1032
+ "You need to amend the summary to include the new information. Make sure the summary is complete and covers all the main points."
1033
+ );
1034
+ instructions.push("The current summary is ".concat(currentSummary.length, " tokens long."));
1035
+ instructions.push("You can amend the summary to be up to ".concat(generationLength, " tokens long."));
1036
+ }
1037
+ if (isLast) {
1038
+ instructions.push(
1039
+ "This is the last part you will have to summarize. Make sure the summary is complete and covers all the main points."
1040
+ );
1041
+ instructions.push(
1042
+ "The current summary is ".concat(currentSummary.length, " tokens long. You need to make sure it is ").concat(options.length, " tokens or less.")
1043
+ );
1044
+ if (currentSummary.length > options.length) {
1045
+ instructions.push(
1046
+ "The current summary is already too long, so you need to shorten it to ".concat(options.length, " tokens while also including the new information.")
1047
+ );
1048
+ }
1049
+ }
1050
+ const output = await this.callModel({
1051
+ systemPrompt: "\nYou are summarizing a text. The text is split into ".concat(parts, " parts, and you are currently working on part ").concat(iteration, ".\nAt every step, you will receive the current summary and a new part of the text. You need to amend the summary to include the new information (if needed).\nThe summary needs to cover the main points of the text and must be concise.\n\nIMPORTANT INSTRUCTIONS:\n").concat(instructions.map((x) => "- ".concat(x.trim())).join("\n"), "\n\nFORMAT OF THE SUMMARY:\n").concat(options.format, "\n").trim(),
1052
+ messages: [{ type: "text", content: format(currentSummary, slice), role: "user" }],
1053
+ maxTokens: generationLength,
1054
+ stopSequences: [END2]
1055
+ });
1056
+ let result = (_a = output == null ? void 0 : output.choices[0]) == null ? void 0 : _a.content;
1057
+ if (result.includes(START2)) {
1058
+ result = result.slice(result.indexOf(START2) + START2.length);
1059
+ }
1060
+ if (result.includes("\u25A0")) {
1061
+ result = result.slice(0, result.indexOf("\u25A0"));
1062
+ }
1063
+ summaries.push(result);
1064
+ currentSummary = result;
1065
+ }
1066
+ return currentSummary.trim();
1067
+ };
1068
+
1069
+ // src/operations/check.ts
1070
+ var import_zui7 = require("@bpinternal/zui");
1071
+ var Example2 = import_zui7.z.object({
1072
+ input: import_zui7.z.any(),
1073
+ check: import_zui7.z.boolean(),
1074
+ reason: import_zui7.z.string().optional()
1075
+ });
1076
+ var Options4 = import_zui7.z.object({
1077
+ examples: import_zui7.z.array(Example2).describe("Examples to check the condition against").default([])
1078
+ });
1079
+ var TRUE = "\u25A0TRUE\u25A0";
1080
+ var FALSE = "\u25A0FALSE\u25A0";
1081
+ var END3 = "\u25A0END\u25A0";
1082
+ Zai.prototype.check = async function(input, condition, _options) {
1083
+ var _a;
1084
+ const options = Options4.parse(_options != null ? _options : {});
1085
+ const tokenizer = await this.getTokenizer();
1086
+ const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100);
1087
+ const taskId = this.taskId;
1088
+ const taskType = "zai.check";
1089
+ const PROMPT_TOKENS = {
1090
+ INPUT: Math.floor(0.5 * PROMPT_COMPONENT),
1091
+ CONDITION: Math.floor(0.2 * PROMPT_COMPONENT)
1092
+ };
1093
+ const inputAsString = tokenizer.truncate(stringify(input), PROMPT_TOKENS.INPUT);
1094
+ condition = tokenizer.truncate(condition, PROMPT_TOKENS.CONDITION);
1095
+ const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(condition);
1096
+ const Key = fastHash(
1097
+ JSON.stringify({
1098
+ taskType,
1099
+ taskId,
1100
+ input: inputAsString,
1101
+ condition
1102
+ })
1103
+ );
1104
+ const examples = taskId ? await this.adapter.getExamples({
1105
+ input: inputAsString,
1106
+ taskType,
1107
+ taskId
1108
+ }) : [];
1109
+ const exactMatch = examples.find((x) => x.key === Key);
1110
+ if (exactMatch) {
1111
+ return exactMatch.output;
1112
+ }
1113
+ const defaultExamples = [
1114
+ { input: "50 Cent", check: true, reason: "50 Cent is widely recognized as a public personality." },
1115
+ {
1116
+ input: ["apple", "banana", "carrot", "house"],
1117
+ check: false,
1118
+ reason: "The list contains a house, which is not a fruit. Also, the list contains a carrot, which is a vegetable."
1119
+ }
1120
+ ];
1121
+ const userExamples = [
1122
+ ...examples.map((e) => ({ input: e.input, check: e.output, reason: e.explanation })),
1123
+ ...options.examples
1124
+ ];
1125
+ let exampleId = 1;
1126
+ const formatInput = (input2, condition2) => {
1127
+ const header = userExamples.length ? "Expert Example #".concat(exampleId++) : 'Example of condition: "'.concat(condition2, '"');
1128
+ return "\n".concat(header, "\n<|start_input|>\n").concat(input2.trim(), "\n<|end_input|>\n").trim();
1129
+ };
1130
+ const formatOutput = (answer2, justification) => {
1131
+ return "\nAnalysis: ".concat(justification, "\nFinal Answer: ").concat(answer2 ? TRUE : FALSE, "\n").concat(END3, "\n").trim();
1132
+ };
1133
+ const formatExample = (example) => {
1134
+ var _a2, _b;
1135
+ return [
1136
+ { type: "text", content: formatInput(stringify((_a2 = example.input) != null ? _a2 : null), condition), role: "user" },
1137
+ {
1138
+ type: "text",
1139
+ content: formatOutput(example.check, (_b = example.reason) != null ? _b : ""),
1140
+ role: "assistant"
1141
+ }
1142
+ ];
1143
+ };
1144
+ const allExamples = takeUntilTokens(
1145
+ userExamples.length ? userExamples : defaultExamples,
1146
+ EXAMPLES_TOKENS,
1147
+ (el) => {
1148
+ var _a2;
1149
+ return tokenizer.count(stringify(el.input)) + tokenizer.count((_a2 = el.reason) != null ? _a2 : "");
1150
+ }
1151
+ ).map(formatExample).flat();
1152
+ const specialInstructions = userExamples.length ? "\n- You have been provided with examples from previous experts. Make sure to read them carefully before making your decision.\n- Make sure to refer to the examples provided by the experts to justify your decision (when applicable).\n- When in doubt, ground your decision on the examples provided by the experts instead of your own intuition.\n- When no example is similar to the input, make sure to provide a clear justification for your decision while inferring the decision-making process from the examples provided by the experts.\n".trim() : "";
1153
+ const output = await this.callModel({
1154
+ systemPrompt: "\nCheck if the following condition is true or false for the given input. Before answering, make sure to read the input and the condition carefully.\nJustify your answer, then answer with either ".concat(TRUE, " or ").concat(FALSE, " at the very end, then add ").concat(END3, " to finish the response.\nIMPORTANT: Make sure to answer with either ").concat(TRUE, " or ").concat(FALSE, " at the end of your response, but NOT both.\n---\nExpert Examples (#1 to #").concat(exampleId - 1, "):\n").concat(specialInstructions, "\n").trim(),
1155
+ stopSequences: [END3],
1156
+ messages: [
1157
+ ...allExamples,
1158
+ {
1159
+ type: "text",
1160
+ content: "\nConsidering the below input and above examples, is the following condition true or false?\n".concat(formatInput(inputAsString, condition), '\nIn your "Analysis", please refer to the Expert Examples # to justify your decision.').trim(),
1161
+ role: "user"
1162
+ }
1163
+ ]
1164
+ });
1165
+ const answer = (_a = output.choices[0]) == null ? void 0 : _a.content;
1166
+ const hasTrue = answer.includes(TRUE);
1167
+ const hasFalse = answer.includes(FALSE);
1168
+ if (!hasTrue && !hasFalse) {
1169
+ throw new Error("The model did not return a valid answer. The response was: ".concat(answer));
1170
+ }
1171
+ let finalAnswer;
1172
+ if (hasTrue && hasFalse) {
1173
+ finalAnswer = answer.lastIndexOf(TRUE) > answer.lastIndexOf(FALSE);
1174
+ } else {
1175
+ finalAnswer = hasTrue;
1176
+ }
1177
+ if (taskId) {
1178
+ await this.adapter.saveExample({
1179
+ key: Key,
1180
+ taskType,
1181
+ taskId,
1182
+ input: inputAsString,
1183
+ instructions: condition,
1184
+ metadata: output.metadata,
1185
+ output: finalAnswer,
1186
+ explanation: answer.replace(TRUE, "").replace(FALSE, "").replace(END3, "").replace("Final Answer:", "").trim()
1187
+ });
1188
+ }
1189
+ return finalAnswer;
1190
+ };
1191
+
1192
+ // src/operations/filter.ts
1193
+ var import_zui8 = require("@bpinternal/zui");
1194
+ var import_lodash_es3 = require("lodash-es");
1195
+ var Example3 = import_zui8.z.object({
1196
+ input: import_zui8.z.any(),
1197
+ filter: import_zui8.z.boolean(),
1198
+ reason: import_zui8.z.string().optional()
1199
+ });
1200
+ var Options5 = import_zui8.z.object({
1201
+ tokensPerItem: import_zui8.z.number().min(1).max(1e5).optional().describe("The maximum number of tokens per item").default(250),
1202
+ examples: import_zui8.z.array(Example3).describe("Examples to filter the condition against").default([])
1203
+ });
1204
+ var END4 = "\u25A0END\u25A0";
1205
+ Zai.prototype.filter = async function(input, condition, _options) {
1206
+ const options = Options5.parse(_options != null ? _options : {});
1207
+ const tokenizer = await this.getTokenizer();
1208
+ const taskId = this.taskId;
1209
+ const taskType = "zai.filter";
1210
+ const MAX_ITEMS_PER_CHUNK = 50;
1211
+ const TOKENS_TOTAL_MAX = this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
1212
+ const TOKENS_EXAMPLES_MAX = Math.floor(Math.max(250, TOKENS_TOTAL_MAX * 0.5));
1213
+ const TOKENS_CONDITION_MAX = (0, import_lodash_es3.clamp)(TOKENS_TOTAL_MAX * 0.25, 250, tokenizer.count(condition));
1214
+ const TOKENS_INPUT_ARRAY_MAX = TOKENS_TOTAL_MAX - TOKENS_EXAMPLES_MAX - TOKENS_CONDITION_MAX;
1215
+ condition = tokenizer.truncate(condition, TOKENS_CONDITION_MAX);
1216
+ let chunks = [];
1217
+ let currentChunk = [];
1218
+ let currentChunkTokens = 0;
1219
+ for (const element of input) {
1220
+ const elementAsString = tokenizer.truncate(stringify(element, false), options.tokensPerItem);
1221
+ const elementTokens = tokenizer.count(elementAsString);
1222
+ if (currentChunkTokens + elementTokens > TOKENS_INPUT_ARRAY_MAX || currentChunk.length >= MAX_ITEMS_PER_CHUNK) {
1223
+ chunks.push(currentChunk);
1224
+ currentChunk = [];
1225
+ currentChunkTokens = 0;
1226
+ }
1227
+ currentChunk.push(element);
1228
+ currentChunkTokens += elementTokens;
1229
+ }
1230
+ if (currentChunk.length > 0) {
1231
+ chunks.push(currentChunk);
1232
+ }
1233
+ chunks = chunks.filter((x) => x.length > 0);
1234
+ const formatInput = (input2, condition2) => {
1235
+ return "\nCondition to check:\n".concat(condition2, "\n\nItems (from \u25A00 to \u25A0").concat(input2.length - 1, ")\n==============================\n").concat(input2.map((x, idx) => {
1236
+ var _a;
1237
+ return "\u25A0".concat(idx, " = ").concat(stringify((_a = x.input) != null ? _a : null, false));
1238
+ }).join("\n"), "\n").trim();
1239
+ };
1240
+ const formatExamples = (examples) => {
1241
+ return "\n".concat(examples.map((x, idx) => "\u25A0".concat(idx, ":").concat(!!x.filter ? "true" : "false")).join(""), "\n").concat(END4, "\n====\nHere's the reasoning behind each example:\n").concat(examples.map((x, idx) => {
1242
+ var _a;
1243
+ return "\u25A0".concat(idx, ":").concat(!!x.filter ? "true" : "false", ":").concat((_a = x.reason) != null ? _a : "No reason provided");
1244
+ }).join("\n"), "\n").trim();
1245
+ };
1246
+ const genericExamples = [
1247
+ {
1248
+ input: "apple",
1249
+ filter: true,
1250
+ reason: "Apples are fruits"
1251
+ },
1252
+ {
1253
+ input: "Apple Inc.",
1254
+ filter: false,
1255
+ reason: "Apple Inc. is a company, not a fruit"
1256
+ },
1257
+ {
1258
+ input: "banana",
1259
+ filter: true,
1260
+ reason: "Bananas are fruits"
1261
+ },
1262
+ {
1263
+ input: "potato",
1264
+ filter: false,
1265
+ reason: "Potatoes are vegetables"
1266
+ }
1267
+ ];
1268
+ const genericExamplesMessages = [
1269
+ {
1270
+ type: "text",
1271
+ content: formatInput(genericExamples, "is a fruit"),
1272
+ role: "user"
1273
+ },
1274
+ {
1275
+ type: "text",
1276
+ content: formatExamples(genericExamples),
1277
+ role: "assistant"
1278
+ }
1279
+ ];
1280
+ const filterChunk = async (chunk4) => {
1281
+ var _a, _b;
1282
+ const examples = taskId ? await this.adapter.getExamples({
1283
+ // The Table API can't search for a huge input string
1284
+ input: JSON.stringify(chunk4).slice(0, 1e3),
1285
+ taskType,
1286
+ taskId
1287
+ }).then(
1288
+ (x) => x.map((y) => ({ filter: y.output, input: y.input, reason: y.explanation }))
1289
+ ) : [];
1290
+ const allExamples = takeUntilTokens(
1291
+ [...examples, ...(_a = options.examples) != null ? _a : []],
1292
+ TOKENS_EXAMPLES_MAX,
1293
+ (el) => tokenizer.count(stringify(el.input))
1294
+ );
1295
+ const exampleMessages = [
1296
+ {
1297
+ type: "text",
1298
+ content: formatInput(allExamples, condition),
1299
+ role: "user"
1300
+ },
1301
+ {
1302
+ type: "text",
1303
+ content: formatExamples(allExamples),
1304
+ role: "assistant"
1305
+ }
1306
+ ];
1307
+ const output = await this.callModel({
1308
+ systemPrompt: '\nYou are given a list of items. Your task is to filter out the items that meet the condition below.\nYou need to return the full list of items with the format:\n\u25A0x:true\u25A0y:false\u25A0z:true (where x, y, z are the indices of the items in the list)\nYou need to start with "\u25A00" and go up to the last index "\u25A0'.concat(chunk4.length - 1, '".\nIf an item meets the condition, you should return ":true", otherwise ":false".\n\nIMPORTANT: Make sure to read the condition and the examples carefully before making your decision.\nThe condition is: "').concat(condition, '"\n').trim(),
1309
+ stopSequences: [END4],
1310
+ messages: [
1311
+ ...exampleMessages.length ? exampleMessages : genericExamplesMessages,
1312
+ {
1313
+ type: "text",
1314
+ content: formatInput(
1315
+ chunk4.map((x) => ({ input: x })),
1316
+ condition
1317
+ ),
1318
+ role: "user"
1319
+ }
1320
+ ]
1321
+ });
1322
+ const answer = (_b = output.choices[0]) == null ? void 0 : _b.content;
1323
+ const indices = answer.trim().split("\u25A0").filter((x) => x.length > 0).map((x) => {
1324
+ var _a2;
1325
+ const [idx, filter] = x.split(":");
1326
+ return { idx: parseInt((_a2 = idx == null ? void 0 : idx.trim()) != null ? _a2 : ""), filter: (filter == null ? void 0 : filter.toLowerCase().trim()) === "true" };
1327
+ });
1328
+ const partial = chunk4.filter((_, idx) => {
1329
+ var _a2, _b2;
1330
+ return (_b2 = (_a2 = indices.find((x) => x.idx === idx)) == null ? void 0 : _a2.filter) != null ? _b2 : false;
1331
+ });
1332
+ if (taskId) {
1333
+ const key = fastHash(
1334
+ stringify({
1335
+ taskId,
1336
+ taskType,
1337
+ input: JSON.stringify(chunk4),
1338
+ condition
1339
+ })
1340
+ );
1341
+ await this.adapter.saveExample({
1342
+ key,
1343
+ taskType,
1344
+ taskId,
1345
+ input: JSON.stringify(chunk4),
1346
+ output: partial,
1347
+ instructions: condition,
1348
+ metadata: output.metadata
1349
+ });
1350
+ }
1351
+ return partial;
1352
+ };
1353
+ const filteredChunks = await Promise.all(chunks.map(filterChunk));
1354
+ return filteredChunks.flat();
1355
+ };
1356
+
1357
+ // src/operations/extract.ts
1358
+ var import_zui9 = require("@bpinternal/zui");
1359
+ var import_json5 = __toESM(require("json5"));
1360
+ var import_jsonrepair = require("jsonrepair");
1361
+ var import_lodash_es4 = require("lodash-es");
1362
+
1363
+ // src/operations/errors.ts
1364
+ var JsonParsingError = class extends Error {
1365
+ constructor(json, error) {
1366
+ const message = "Error parsing JSON:\n\n---JSON---\n".concat(json, "\n\n---Error---\n\n ").concat(error);
1367
+ super(message);
1368
+ this.json = json;
1369
+ this.error = error;
1370
+ }
1371
+ };
1372
+
1373
+ // src/operations/extract.ts
1374
+ var Options6 = import_zui9.z.object({
1375
+ instructions: import_zui9.z.string().optional().describe("Instructions to guide the user on how to extract the data"),
1376
+ chunkLength: import_zui9.z.number().min(100).max(1e5).optional().describe("The maximum number of tokens per chunk").default(16e3)
1377
+ });
1378
+ var START3 = "\u25A0json_start\u25A0";
1379
+ var END5 = "\u25A0json_end\u25A0";
1380
+ var NO_MORE = "\u25A0NO_MORE_ELEMENT\u25A0";
1381
+ Zai.prototype.extract = async function(input, schema, _options) {
1382
+ var _a, _b, _c;
1383
+ const options = Options6.parse(_options != null ? _options : {});
1384
+ const tokenizer = await this.getTokenizer();
1385
+ const taskId = this.taskId;
1386
+ const taskType = "zai.extract";
1387
+ const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100);
1388
+ let isArrayOfObjects = false;
1389
+ const originalSchema = schema;
1390
+ if (schema instanceof import_zui9.z.ZodObject) {
1391
+ } else if (schema instanceof import_zui9.z.ZodArray) {
1392
+ if (schema._def.type instanceof import_zui9.z.ZodObject) {
1393
+ isArrayOfObjects = true;
1394
+ schema = schema._def.type;
1395
+ } else {
1396
+ throw new Error("Schema must be a ZodObject or a ZodArray<ZodObject>");
1397
+ }
1398
+ } else {
1399
+ throw new Error("Schema must be either a ZuiObject or a ZuiArray<ZuiObject>");
1400
+ }
1401
+ const schemaTypescript = schema.toTypescript({ declaration: false });
1402
+ const schemaLength = tokenizer.count(schemaTypescript);
1403
+ options.chunkLength = Math.min(options.chunkLength, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - schemaLength);
1404
+ const keys = Object.keys(schema.shape);
1405
+ let inputAsString = stringify(input);
1406
+ if (tokenizer.count(inputAsString) > options.chunkLength) {
1407
+ if (isArrayOfObjects) {
1408
+ const tokens = tokenizer.split(inputAsString);
1409
+ const chunks = (0, import_lodash_es4.chunk)(tokens, options.chunkLength).map((x) => x.join(""));
1410
+ const all = await Promise.all(chunks.map((chunk4) => this.extract(chunk4, originalSchema)));
1411
+ return all.flat();
1412
+ } else {
1413
+ inputAsString = tokenizer.truncate(stringify(input), options.chunkLength);
1414
+ }
1415
+ }
1416
+ const instructions = [];
1417
+ if (options.instructions) {
1418
+ instructions.push(options.instructions);
1419
+ }
1420
+ const shape = "{ ".concat(keys.map((key) => '"'.concat(key, '": ...')).join(", "), " }");
1421
+ const abbv = "{ ... }";
1422
+ if (isArrayOfObjects) {
1423
+ instructions.push("You may have multiple elements, or zero elements in the input.");
1424
+ instructions.push("You must extract each element separately.");
1425
+ instructions.push("Each element must be a JSON object with exactly the format: ".concat(START3).concat(shape).concat(END5));
1426
+ instructions.push('When you are done extracting all elements, type "'.concat(NO_MORE, '" to finish.'));
1427
+ instructions.push("For example, if you have zero elements, the output should look like this: ".concat(NO_MORE));
1428
+ instructions.push(
1429
+ "For example, if you have two elements, the output should look like this: ".concat(START3).concat(abbv).concat(END5).concat(START3).concat(abbv).concat(END5).concat(NO_MORE)
1430
+ );
1431
+ } else {
1432
+ instructions.push("You may have exactly one element in the input.");
1433
+ instructions.push("The element must be a JSON object with exactly the format: ".concat(START3).concat(shape).concat(END5));
1434
+ }
1435
+ const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(instructions.join("\n"));
1436
+ const Key = fastHash(
1437
+ JSON.stringify({
1438
+ taskType,
1439
+ taskId,
1440
+ input: inputAsString,
1441
+ instructions: options.instructions
1442
+ })
1443
+ );
1444
+ const examples = taskId ? await this.adapter.getExamples({
1445
+ input: inputAsString,
1446
+ taskType,
1447
+ taskId
1448
+ }) : [];
1449
+ const exactMatch = examples.find((x) => x.key === Key);
1450
+ if (exactMatch) {
1451
+ return exactMatch.output;
1452
+ }
1453
+ const defaultExample = isArrayOfObjects ? {
1454
+ input: "The story goes as follow.\nOnce upon a time, there was a person named Alice who was 30 years old.\nThen, there was a person named Bob who was 25 years old.\nThe end.",
1455
+ schema: "Array<{ name: string, age: number }>",
1456
+ instructions: "Extract all people",
1457
+ extracted: [
1458
+ {
1459
+ name: "Alice",
1460
+ age: 30
1461
+ },
1462
+ {
1463
+ name: "Bob",
1464
+ age: 25
1465
+ }
1466
+ ]
1467
+ } : {
1468
+ input: "The story goes as follow.\nOnce upon a time, there was a person named Alice who was 30 years old.\nThe end.",
1469
+ schema: "{ name: string, age: number }",
1470
+ instructions: "Extract the person",
1471
+ extracted: { name: "Alice", age: 30 }
1472
+ };
1473
+ const userExamples = examples.map((e) => ({
1474
+ input: e.input,
1475
+ extracted: e.output,
1476
+ schema: schemaTypescript,
1477
+ instructions: options.instructions
1478
+ }));
1479
+ let exampleId = 1;
1480
+ const formatInput = (input2, schema2, instructions2) => {
1481
+ const header = userExamples.length ? "Expert Example #".concat(exampleId++) : "Here's an example to help you understand the format:";
1482
+ return "\n".concat(header, "\n\n<|start_schema|>\n").concat(schema2, "\n<|end_schema|>\n\n<|start_instructions|>\n").concat(instructions2 != null ? instructions2 : "No specific instructions, just follow the schema above.", "\n<|end_instructions|>\n\n<|start_input|>\n").concat(input2.trim(), "\n<|end_input|>\n ").trim();
1483
+ };
1484
+ const formatOutput = (extracted) => {
1485
+ extracted = (0, import_lodash_es4.isArray)(extracted) ? extracted : [extracted];
1486
+ return extracted.map(
1487
+ (x) => "\n".concat(START3, "\n").concat(JSON.stringify(x, null, 2), "\n").concat(END5).trim()
1488
+ ).join("\n") + NO_MORE;
1489
+ };
1490
+ const formatExample = (example) => {
1491
+ var _a2;
1492
+ return [
1493
+ {
1494
+ type: "text",
1495
+ content: formatInput(stringify((_a2 = example.input) != null ? _a2 : null), example.schema, example.instructions),
1496
+ role: "user"
1497
+ },
1498
+ {
1499
+ type: "text",
1500
+ content: formatOutput(example.extracted),
1501
+ role: "assistant"
1502
+ }
1503
+ ];
1504
+ };
1505
+ const allExamples = takeUntilTokens(
1506
+ userExamples.length ? userExamples : [defaultExample],
1507
+ EXAMPLES_TOKENS,
1508
+ (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.extracted))
1509
+ ).map(formatExample).flat();
1510
+ const output = await this.callModel({
1511
+ systemPrompt: "\nExtract the following information from the input:\n".concat(schemaTypescript, "\n====\n\n").concat(instructions.map((x) => "\u2022 ".concat(x)).join("\n"), "\n").trim(),
1512
+ stopSequences: [isArrayOfObjects ? NO_MORE : END5],
1513
+ messages: [
1514
+ ...allExamples,
1515
+ {
1516
+ role: "user",
1517
+ type: "text",
1518
+ content: formatInput(inputAsString, schemaTypescript, (_a = options.instructions) != null ? _a : "")
1519
+ }
1520
+ ]
1521
+ });
1522
+ const answer = (_b = output.choices[0]) == null ? void 0 : _b.content;
1523
+ const elements = answer.split(START3).filter((x) => x.trim().length > 0).map((x) => {
1524
+ try {
1525
+ const json = x.slice(0, x.indexOf(END5)).trim();
1526
+ const repairedJson = (0, import_jsonrepair.jsonrepair)(json);
1527
+ const parsedJson = import_json5.default.parse(repairedJson);
1528
+ return schema.parse(parsedJson);
1529
+ } catch (error) {
1530
+ throw new JsonParsingError(x, error instanceof Error ? error : new Error("Unknown error"));
1531
+ }
1532
+ }).filter((x) => x !== null);
1533
+ let final;
1534
+ if (isArrayOfObjects) {
1535
+ final = elements;
1536
+ } else if (elements.length === 0) {
1537
+ final = schema.parse({});
1538
+ } else {
1539
+ final = elements[0];
1540
+ }
1541
+ if (taskId) {
1542
+ await this.adapter.saveExample({
1543
+ key: Key,
1544
+ taskId: "zai/".concat(taskId),
1545
+ taskType,
1546
+ instructions: (_c = options.instructions) != null ? _c : "No specific instructions",
1547
+ input: inputAsString,
1548
+ output: final,
1549
+ metadata: output.metadata
1550
+ });
1551
+ }
1552
+ return final;
1553
+ };
1554
+
1555
+ // src/operations/label.ts
1556
+ var import_zui10 = require("@bpinternal/zui");
1557
+ var import_lodash_es5 = require("lodash-es");
1558
+ var LABELS = {
1559
+ ABSOLUTELY_NOT: "ABSOLUTELY_NOT",
1560
+ PROBABLY_NOT: "PROBABLY_NOT",
1561
+ AMBIGUOUS: "AMBIGUOUS",
1562
+ PROBABLY_YES: "PROBABLY_YES",
1563
+ ABSOLUTELY_YES: "ABSOLUTELY_YES"
1564
+ };
1565
+ var ALL_LABELS = Object.values(LABELS).join(" | ");
1566
+ var Options7 = import_zui10.z.object({
1567
+ examples: import_zui10.z.array(
1568
+ import_zui10.z.object({
1569
+ input: import_zui10.z.any(),
1570
+ labels: import_zui10.z.record(import_zui10.z.object({ label: import_zui10.z.enum(ALL_LABELS), explanation: import_zui10.z.string().optional() }))
1571
+ })
1572
+ ).default([]).describe("Examples to help the user make a decision"),
1573
+ instructions: import_zui10.z.string().optional().describe("Instructions to guide the user on how to extract the data"),
1574
+ chunkLength: import_zui10.z.number().min(100).max(1e5).optional().describe("The maximum number of tokens per chunk").default(16e3)
1575
+ });
1576
+ var Labels = import_zui10.z.record(import_zui10.z.string().min(1).max(250), import_zui10.z.string()).superRefine((labels, ctx) => {
1577
+ const keys = Object.keys(labels);
1578
+ for (const key of keys) {
1579
+ if (key.length < 1 || key.length > 250) {
1580
+ ctx.addIssue({ message: 'The label key "'.concat(key, '" must be between 1 and 250 characters long'), code: "custom" });
1581
+ }
1582
+ if (keys.lastIndexOf(key) !== keys.indexOf(key)) {
1583
+ ctx.addIssue({ message: "Duplicate label: ".concat(labels[key]), code: "custom" });
1584
+ }
1585
+ if (/[^a-zA-Z0-9_]/.test(key)) {
1586
+ ctx.addIssue({
1587
+ message: 'The label key "'.concat(key, '" must only contain alphanumeric characters and underscores'),
1588
+ code: "custom"
1589
+ });
1590
+ }
1591
+ }
1592
+ return true;
1593
+ });
1594
+ var parseLabel = (label) => {
1595
+ label = label.toUpperCase().replace(/\s+/g, "_").replace(/_{2,}/g, "_").trim();
1596
+ if (label.includes("ABSOLUTELY") && label.includes("NOT")) {
1597
+ return LABELS.ABSOLUTELY_NOT;
1598
+ } else if (label.includes("NOT")) {
1599
+ return LABELS.PROBABLY_NOT;
1600
+ } else if (label.includes("AMBIGUOUS")) {
1601
+ return LABELS.AMBIGUOUS;
1602
+ }
1603
+ if (label.includes("YES")) {
1604
+ return LABELS.PROBABLY_YES;
1605
+ } else if (label.includes("ABSOLUTELY") && label.includes("YES")) {
1606
+ return LABELS.ABSOLUTELY_YES;
1607
+ }
1608
+ return LABELS.AMBIGUOUS;
1609
+ };
1610
+ Zai.prototype.label = async function(input, _labels, _options) {
1611
+ var _a, _b;
1612
+ const options = Options7.parse(_options != null ? _options : {});
1613
+ const labels = Labels.parse(_labels);
1614
+ const tokenizer = await this.getTokenizer();
1615
+ const taskId = this.taskId;
1616
+ const taskType = "zai.label";
1617
+ const TOTAL_MAX_TOKENS = (0, import_lodash_es5.clamp)(options.chunkLength, 1e3, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER);
1618
+ const CHUNK_EXAMPLES_MAX_TOKENS = (0, import_lodash_es5.clamp)(Math.floor(TOTAL_MAX_TOKENS * 0.5), 250, 1e4);
1619
+ const CHUNK_INPUT_MAX_TOKENS = (0, import_lodash_es5.clamp)(
1620
+ TOTAL_MAX_TOKENS - CHUNK_EXAMPLES_MAX_TOKENS,
1621
+ TOTAL_MAX_TOKENS * 0.5,
1622
+ TOTAL_MAX_TOKENS
1623
+ );
1624
+ const inputAsString = stringify(input);
1625
+ if (tokenizer.count(inputAsString) > CHUNK_INPUT_MAX_TOKENS) {
1626
+ const tokens = tokenizer.split(inputAsString);
1627
+ const chunks = (0, import_lodash_es5.chunk)(tokens, CHUNK_INPUT_MAX_TOKENS).map((x) => x.join(""));
1628
+ const allLabels = await Promise.all(chunks.map((chunk4) => this.label(chunk4, _labels)));
1629
+ return allLabels.reduce((acc, x) => {
1630
+ Object.keys(x).forEach((key) => {
1631
+ if (acc[key] === true) {
1632
+ acc[key] = true;
1633
+ } else {
1634
+ acc[key] = acc[key] || x[key];
1635
+ }
1636
+ });
1637
+ return acc;
1638
+ }, {});
1639
+ }
1640
+ const END6 = "\u25A0END\u25A0";
1641
+ const Key = fastHash(
1642
+ JSON.stringify({
1643
+ taskType,
1644
+ taskId,
1645
+ input: inputAsString,
1646
+ instructions: (_a = options.instructions) != null ? _a : ""
1647
+ })
1648
+ );
1649
+ const convertToAnswer = (mapping) => {
1650
+ return Object.keys(labels).reduce((acc, key) => {
1651
+ var _a2, _b2;
1652
+ acc[key] = ((_a2 = mapping[key]) == null ? void 0 : _a2.label) === "ABSOLUTELY_YES" || ((_b2 = mapping[key]) == null ? void 0 : _b2.label) === "PROBABLY_YES";
1653
+ return acc;
1654
+ }, {});
1655
+ };
1656
+ const examples = taskId ? await this.adapter.getExamples({
1657
+ input: inputAsString,
1658
+ taskType,
1659
+ taskId
1660
+ }) : [];
1661
+ options.examples.forEach((example) => {
1662
+ examples.push({
1663
+ key: fastHash(JSON.stringify(example)),
1664
+ input: example.input,
1665
+ similarity: 1,
1666
+ explanation: "",
1667
+ output: example.labels
1668
+ });
1669
+ });
1670
+ const exactMatch = examples.find((x) => x.key === Key);
1671
+ if (exactMatch) {
1672
+ return convertToAnswer(exactMatch.output);
1673
+ }
1674
+ const allExamples = takeUntilTokens(
1675
+ examples,
1676
+ CHUNK_EXAMPLES_MAX_TOKENS,
1677
+ (el) => {
1678
+ var _a2;
1679
+ return tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.output)) + tokenizer.count((_a2 = el.explanation) != null ? _a2 : "") + 100;
1680
+ }
1681
+ ).map((example, idx) => [
1682
+ {
1683
+ type: "text",
1684
+ role: "user",
1685
+ content: "\nExpert Example #".concat(idx + 1, "\n\n<|start_input|>\n").concat(stringify(example.input), "\n<|end_input|>").trim()
1686
+ },
1687
+ {
1688
+ type: "text",
1689
+ role: "assistant",
1690
+ content: "\nExpert Example #".concat(idx + 1, "\n============\n").concat(Object.keys(example.output).map(
1691
+ (key) => {
1692
+ var _a2, _b2;
1693
+ return "\n\u25A0".concat(key, ":\u3010").concat((_a2 = example.output[key]) == null ? void 0 : _a2.explanation, "\u3011:").concat((_b2 = example.output[key]) == null ? void 0 : _b2.label, "\u25A0\n").trim();
1694
+ }
1695
+ ).join("\n"), "\n").concat(END6, "\n").trim()
1696
+ }
1697
+ ]).flat();
1698
+ const format = Object.keys(labels).map((key) => {
1699
+ return "\n\u25A0".concat(key, ':\u3010explanation (where "explanation" is answering the question "').concat(labels[key], '")\u3011:x\u25A0 (where x is ').concat(ALL_LABELS, ")\n").trim();
1700
+ }).join("\n\n");
1701
+ const output = await this.callModel({
1702
+ stopSequences: [END6],
1703
+ systemPrompt: "\nYou need to tag the input with the following labels based on the question asked:\n".concat(LABELS.ABSOLUTELY_NOT, ': You are absolutely sure that the answer is "NO" to the question.\n').concat(LABELS.PROBABLY_NOT, ': You are leaning towards "NO" to the question.\n').concat(LABELS.AMBIGUOUS, ": You are unsure about the answer to the question.\n").concat(LABELS.PROBABLY_YES, ': You are leaning towards "YES" to the question.\n').concat(LABELS.ABSOLUTELY_YES, ': You are absolutely sure that the answer is "YES" to the question.\n\nYou need to return a mapping of the labels, an explanation and the answer for each label following the format below:\n```\n').concat(format, "\n").concat(END6, "\n```\n\n").concat(options.instructions, '\n\n===\nYou should consider the Expert Examples below to help you make your decision.\nIn your "Analysis", please refer to the Expert Examples # to justify your decision.\n').trim(),
1704
+ messages: [
1705
+ ...allExamples,
1706
+ {
1707
+ type: "text",
1708
+ role: "user",
1709
+ content: "\nInput to tag:\n<|start_input|>\n".concat(inputAsString, "\n<|end_input|>\n\nAnswer with this following format:\n```\n").concat(format, "\n").concat(END6, "\n```\n\nFormat cheatsheet:\n```\n\u25A0label:\u3010explanation\u3011:x\u25A0\n```\n\nWhere `x` is one of the following: ").concat(ALL_LABELS, '\n\nRemember: In your `explanation`, please refer to the Expert Examples # (and quote them) that are relevant to ground your decision-making process.\nThe Expert Examples are there to help you make your decision. They have been provided by experts in the field and their answers (and reasoning) are considered the ground truth and should be used as a reference to make your decision when applicable.\nFor example, you can say: "According to Expert Example #1, ..."').trim()
1710
+ }
1711
+ ]
1712
+ });
1713
+ const answer = output.choices[0].content;
1714
+ const final = Object.keys(labels).reduce((acc, key) => {
1715
+ const match = answer.match(new RegExp("\u25A0".concat(key, ":\u3010(.+)\u3011:(\\w{2,})\u25A0"), "i"));
1716
+ if (match) {
1717
+ const explanation = match[1].trim();
1718
+ const label = parseLabel(match[2]);
1719
+ acc[key] = {
1720
+ explanation,
1721
+ label
1722
+ };
1723
+ } else {
1724
+ acc[key] = {
1725
+ explanation: "",
1726
+ label: LABELS.AMBIGUOUS
1727
+ };
1728
+ }
1729
+ return acc;
1730
+ }, {});
1731
+ if (taskId) {
1732
+ await this.adapter.saveExample({
1733
+ key: Key,
1734
+ taskType,
1735
+ taskId,
1736
+ instructions: (_b = options.instructions) != null ? _b : "",
1737
+ metadata: output.metadata,
1738
+ input: inputAsString,
1739
+ output: final
1740
+ });
1741
+ }
1742
+ return convertToAnswer(final);
1743
+ };