@botpress/zai 1.0.0-beta.8 → 1.0.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/csj/adapters/adapter.js +2 -0
  2. package/dist/csj/adapters/botpress-table.js +173 -0
  3. package/dist/csj/adapters/memory.js +12 -0
  4. package/dist/csj/index.js +9 -0
  5. package/dist/csj/models.js +387 -0
  6. package/dist/csj/operations/check.js +143 -0
  7. package/dist/csj/operations/constants.js +2 -0
  8. package/dist/csj/operations/errors.js +15 -0
  9. package/dist/csj/operations/extract.js +214 -0
  10. package/dist/csj/operations/filter.js +182 -0
  11. package/dist/csj/operations/label.js +242 -0
  12. package/dist/csj/operations/rewrite.js +113 -0
  13. package/dist/csj/operations/summarize.js +134 -0
  14. package/dist/csj/operations/text.js +48 -0
  15. package/dist/csj/utils.js +44 -0
  16. package/dist/csj/zai.js +142 -0
  17. package/dist/esm/adapters/adapter.js +5 -0
  18. package/dist/esm/adapters/botpress-table.js +194 -0
  19. package/dist/esm/adapters/memory.js +15 -0
  20. package/dist/esm/index.js +11 -0
  21. package/dist/esm/models.js +390 -0
  22. package/dist/esm/operations/check.js +149 -0
  23. package/dist/esm/operations/constants.js +6 -0
  24. package/dist/esm/operations/errors.js +18 -0
  25. package/dist/esm/operations/extract.js +217 -0
  26. package/dist/esm/operations/filter.js +189 -0
  27. package/dist/esm/operations/label.js +246 -0
  28. package/dist/esm/operations/rewrite.js +113 -0
  29. package/dist/esm/operations/summarize.js +134 -0
  30. package/dist/esm/operations/text.js +48 -0
  31. package/dist/esm/utils.js +51 -0
  32. package/dist/esm/zai.js +161 -0
  33. package/package.json +17 -13
  34. package/scripts/update-models.mts +76 -0
  35. package/scripts/update-types.mts +59 -0
  36. package/src/adapters/adapter.ts +35 -0
  37. package/src/adapters/botpress-table.ts +214 -0
  38. package/src/adapters/memory.ts +13 -0
  39. package/src/index.ts +11 -0
  40. package/src/models.ts +394 -0
  41. package/src/operations/__tests/botpress_docs.txt +26040 -0
  42. package/src/operations/__tests/cache.jsonl +101 -0
  43. package/src/operations/__tests/index.ts +86 -0
  44. package/src/operations/check.ts +188 -0
  45. package/src/operations/constants.ts +2 -0
  46. package/src/operations/errors.ts +9 -0
  47. package/src/operations/extract.ts +292 -0
  48. package/src/operations/filter.ts +232 -0
  49. package/src/operations/label.ts +333 -0
  50. package/src/operations/rewrite.ts +149 -0
  51. package/src/operations/summarize.ts +194 -0
  52. package/src/operations/text.ts +64 -0
  53. package/src/sdk-interfaces/llm/generateContent.ts +127 -0
  54. package/src/sdk-interfaces/llm/listLanguageModels.ts +19 -0
  55. package/src/utils.ts +62 -0
  56. package/src/zai.ts +193 -0
  57. package/dist/index.cjs +0 -1903
  58. package/dist/index.cjs.map +0 -1
  59. package/dist/index.d.cts +0 -916
  60. package/dist/index.d.ts +0 -916
  61. package/dist/index.js +0 -1873
  62. package/dist/index.js.map +0 -1
@@ -0,0 +1,217 @@
1
+ import sdk from "@botpress/sdk";
2
+ const { z } = sdk;
3
+ import JSON5 from "json5";
4
+ import { jsonrepair } from "jsonrepair";
5
+ import _ from "lodash";
6
+ import { fastHash, stringify, takeUntilTokens } from "../utils";
7
+ import { Zai } from "../zai";
8
+ import { PROMPT_INPUT_BUFFER } from "./constants";
9
+ import { JsonParsingError } from "./errors";
10
+ const Options = z.object({
11
+ instructions: z.string().optional().describe("Instructions to guide the user on how to extract the data"),
12
+ chunkLength: z.number().min(100).max(1e5).optional().describe("The maximum number of tokens per chunk").default(16e3)
13
+ });
14
+ const START = "\u25A0json_start\u25A0";
15
+ const END = "\u25A0json_end\u25A0";
16
+ const NO_MORE = "\u25A0NO_MORE_ELEMENT\u25A0";
17
+ Zai.prototype.extract = async function(input, schema, _options) {
18
+ var _a, _b, _c;
19
+ const options = Options.parse(_options != null ? _options : {});
20
+ const tokenizer = await this.getTokenizer();
21
+ const taskId = this.taskId;
22
+ const taskType = "zai.extract";
23
+ const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100);
24
+ let isArrayOfObjects = false;
25
+ const originalSchema = schema;
26
+ if (schema instanceof sdk.ZodObject) {
27
+ } else if (schema instanceof sdk.ZodArray) {
28
+ if (schema._def.type instanceof sdk.ZodObject) {
29
+ isArrayOfObjects = true;
30
+ schema = schema._def.type;
31
+ } else {
32
+ throw new Error("Schema must be a ZodObject or a ZodArray<ZodObject>");
33
+ }
34
+ } else {
35
+ throw new Error("Schema must be either a ZuiObject or a ZuiArray<ZuiObject>");
36
+ }
37
+ const schemaTypescript = schema.toTypescript({ declaration: false });
38
+ const schemaLength = tokenizer.count(schemaTypescript);
39
+ options.chunkLength = Math.min(options.chunkLength, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - schemaLength);
40
+ const keys = Object.keys(schema.shape);
41
+ let inputAsString = stringify(input);
42
+ if (tokenizer.count(inputAsString) > options.chunkLength) {
43
+ if (isArrayOfObjects) {
44
+ const tokens = tokenizer.split(inputAsString);
45
+ const chunks = _.chunk(tokens, options.chunkLength).map((x) => x.join(""));
46
+ const all = await Promise.all(chunks.map((chunk) => this.extract(chunk, originalSchema)));
47
+ return all.flat();
48
+ } else {
49
+ inputAsString = tokenizer.truncate(stringify(input), options.chunkLength);
50
+ }
51
+ }
52
+ const instructions = [];
53
+ if (options.instructions) {
54
+ instructions.push(options.instructions);
55
+ }
56
+ const shape = `{ ${keys.map((key) => `"${key}": ...`).join(", ")} }`;
57
+ const abbv = "{ ... }";
58
+ if (isArrayOfObjects) {
59
+ instructions.push("You may have multiple elements, or zero elements in the input.");
60
+ instructions.push("You must extract each element separately.");
61
+ instructions.push(`Each element must be a JSON object with exactly the format: ${START}${shape}${END}`);
62
+ instructions.push(`When you are done extracting all elements, type "${NO_MORE}" to finish.`);
63
+ instructions.push(`For example, if you have zero elements, the output should look like this: ${NO_MORE}`);
64
+ instructions.push(
65
+ `For example, if you have two elements, the output should look like this: ${START}${abbv}${END}${START}${abbv}${END}${NO_MORE}`
66
+ );
67
+ } else {
68
+ instructions.push("You may have exactly one element in the input.");
69
+ instructions.push(`The element must be a JSON object with exactly the format: ${START}${shape}${END}`);
70
+ }
71
+ const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(instructions.join("\n"));
72
+ const Key = fastHash(
73
+ JSON.stringify({
74
+ taskType,
75
+ taskId,
76
+ input: inputAsString,
77
+ instructions: options.instructions
78
+ })
79
+ );
80
+ const examples = taskId ? await this.adapter.getExamples({
81
+ input: inputAsString,
82
+ taskType,
83
+ taskId
84
+ }) : [];
85
+ const exactMatch = examples.find((x) => x.key === Key);
86
+ if (exactMatch) {
87
+ return exactMatch.output;
88
+ }
89
+ const defaultExample = isArrayOfObjects ? {
90
+ input: `The story goes as follow.
91
+ Once upon a time, there was a person named Alice who was 30 years old.
92
+ Then, there was a person named Bob who was 25 years old.
93
+ The end.`,
94
+ schema: "Array<{ name: string, age: number }>",
95
+ instructions: "Extract all people",
96
+ extracted: [
97
+ {
98
+ name: "Alice",
99
+ age: 30
100
+ },
101
+ {
102
+ name: "Bob",
103
+ age: 25
104
+ }
105
+ ]
106
+ } : {
107
+ input: `The story goes as follow.
108
+ Once upon a time, there was a person named Alice who was 30 years old.
109
+ The end.`,
110
+ schema: "{ name: string, age: number }",
111
+ instructions: "Extract the person",
112
+ extracted: { name: "Alice", age: 30 }
113
+ };
114
+ const userExamples = examples.map((e) => ({
115
+ input: e.input,
116
+ extracted: e.output,
117
+ schema: schemaTypescript,
118
+ instructions: options.instructions
119
+ }));
120
+ let exampleId = 1;
121
+ const formatInput = (input2, schema2, instructions2) => {
122
+ const header = userExamples.length ? `Expert Example #${exampleId++}` : "Here's an example to help you understand the format:";
123
+ return `
124
+ ${header}
125
+
126
+ <|start_schema|>
127
+ ${schema2}
128
+ <|end_schema|>
129
+
130
+ <|start_instructions|>
131
+ ${instructions2 != null ? instructions2 : "No specific instructions, just follow the schema above."}
132
+ <|end_instructions|>
133
+
134
+ <|start_input|>
135
+ ${input2.trim()}
136
+ <|end_input|>
137
+ `.trim();
138
+ };
139
+ const formatOutput = (extracted) => {
140
+ extracted = _.isArray(extracted) ? extracted : [extracted];
141
+ return extracted.map(
142
+ (x) => `
143
+ ${START}
144
+ ${JSON.stringify(x, null, 2)}
145
+ ${END}`.trim()
146
+ ).join("\n") + NO_MORE;
147
+ };
148
+ const formatExample = (example) => {
149
+ var _a2;
150
+ return [
151
+ {
152
+ type: "text",
153
+ content: formatInput(stringify((_a2 = example.input) != null ? _a2 : null), example.schema, example.instructions),
154
+ role: "user"
155
+ },
156
+ {
157
+ type: "text",
158
+ content: formatOutput(example.extracted),
159
+ role: "assistant"
160
+ }
161
+ ];
162
+ };
163
+ const allExamples = takeUntilTokens(
164
+ userExamples.length ? userExamples : [defaultExample],
165
+ EXAMPLES_TOKENS,
166
+ (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.extracted))
167
+ ).map(formatExample).flat();
168
+ const output = await this.callModel({
169
+ systemPrompt: `
170
+ Extract the following information from the input:
171
+ ${schemaTypescript}
172
+ ====
173
+
174
+ ${instructions.map((x) => `\u2022 ${x}`).join("\n")}
175
+ `.trim(),
176
+ stopSequences: [isArrayOfObjects ? NO_MORE : END],
177
+ messages: [
178
+ ...allExamples,
179
+ {
180
+ role: "user",
181
+ type: "text",
182
+ content: formatInput(inputAsString, schemaTypescript, (_a = options.instructions) != null ? _a : "")
183
+ }
184
+ ]
185
+ });
186
+ const answer = (_b = output.choices[0]) == null ? void 0 : _b.content;
187
+ const elements = answer.split(START).filter((x) => x.trim().length > 0).map((x) => {
188
+ try {
189
+ const json = x.slice(0, x.indexOf(END)).trim();
190
+ const repairedJson = jsonrepair(json);
191
+ const parsedJson = JSON5.parse(repairedJson);
192
+ return schema.parse(parsedJson);
193
+ } catch (error) {
194
+ throw new JsonParsingError(x, error instanceof Error ? error : new Error("Unknown error"));
195
+ }
196
+ }).filter((x) => x !== null);
197
+ let final;
198
+ if (isArrayOfObjects) {
199
+ final = elements;
200
+ } else if (elements.length === 0) {
201
+ final = schema.parse({});
202
+ } else {
203
+ final = elements[0];
204
+ }
205
+ if (taskId) {
206
+ await this.adapter.saveExample({
207
+ key: Key,
208
+ taskId: `zai/${taskId}`,
209
+ taskType,
210
+ instructions: (_c = options.instructions) != null ? _c : "No specific instructions",
211
+ input: inputAsString,
212
+ output: final,
213
+ metadata: output.metadata
214
+ });
215
+ }
216
+ return final;
217
+ };
@@ -0,0 +1,189 @@
1
+ import sdk from "@botpress/sdk";
2
+ const { z } = sdk;
3
+ import _ from "lodash";
4
+ import { fastHash, stringify, takeUntilTokens } from "../utils";
5
+ import { Zai } from "../zai";
6
+ import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
7
+ const Example = z.object({
8
+ input: z.any(),
9
+ filter: z.boolean(),
10
+ reason: z.string().optional()
11
+ });
12
+ const Options = z.object({
13
+ tokensPerItem: z.number().min(1).max(1e5).optional().describe("The maximum number of tokens per item").default(250),
14
+ examples: z.array(Example).describe("Examples to filter the condition against").default([])
15
+ });
16
+ const END = "\u25A0END\u25A0";
17
+ Zai.prototype.filter = async function(input, condition, _options) {
18
+ const options = Options.parse(_options != null ? _options : {});
19
+ const tokenizer = await this.getTokenizer();
20
+ const taskId = this.taskId;
21
+ const taskType = "zai.filter";
22
+ const MAX_ITEMS_PER_CHUNK = 50;
23
+ const TOKENS_TOTAL_MAX = this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
24
+ const TOKENS_EXAMPLES_MAX = Math.floor(Math.max(250, TOKENS_TOTAL_MAX * 0.5));
25
+ const TOKENS_CONDITION_MAX = _.clamp(TOKENS_TOTAL_MAX * 0.25, 250, tokenizer.count(condition));
26
+ const TOKENS_INPUT_ARRAY_MAX = TOKENS_TOTAL_MAX - TOKENS_EXAMPLES_MAX - TOKENS_CONDITION_MAX;
27
+ condition = tokenizer.truncate(condition, TOKENS_CONDITION_MAX);
28
+ let chunks = [];
29
+ let currentChunk = [];
30
+ let currentChunkTokens = 0;
31
+ for (const element of input) {
32
+ const elementAsString = tokenizer.truncate(stringify(element, false), options.tokensPerItem);
33
+ const elementTokens = tokenizer.count(elementAsString);
34
+ if (currentChunkTokens + elementTokens > TOKENS_INPUT_ARRAY_MAX || currentChunk.length >= MAX_ITEMS_PER_CHUNK) {
35
+ chunks.push(currentChunk);
36
+ currentChunk = [];
37
+ currentChunkTokens = 0;
38
+ }
39
+ currentChunk.push(element);
40
+ currentChunkTokens += elementTokens;
41
+ }
42
+ if (currentChunk.length > 0) {
43
+ chunks.push(currentChunk);
44
+ }
45
+ chunks = chunks.filter((x) => x.length > 0);
46
+ const formatInput = (input2, condition2) => {
47
+ return `
48
+ Condition to check:
49
+ ${condition2}
50
+
51
+ Items (from \u25A00 to \u25A0${input2.length - 1})
52
+ ==============================
53
+ ${input2.map((x, idx) => {
54
+ var _a;
55
+ return `\u25A0${idx} = ${stringify((_a = x.input) != null ? _a : null, false)}`;
56
+ }).join("\n")}
57
+ `.trim();
58
+ };
59
+ const formatExamples = (examples) => {
60
+ return `
61
+ ${examples.map((x, idx) => `\u25A0${idx}:${!!x.filter ? "true" : "false"}`).join("")}
62
+ ${END}
63
+ ====
64
+ Here's the reasoning behind each example:
65
+ ${examples.map((x, idx) => {
66
+ var _a;
67
+ return `\u25A0${idx}:${!!x.filter ? "true" : "false"}:${(_a = x.reason) != null ? _a : "No reason provided"}`;
68
+ }).join("\n")}
69
+ `.trim();
70
+ };
71
+ const genericExamples = [
72
+ {
73
+ input: "apple",
74
+ filter: true,
75
+ reason: "Apples are fruits"
76
+ },
77
+ {
78
+ input: "Apple Inc.",
79
+ filter: false,
80
+ reason: "Apple Inc. is a company, not a fruit"
81
+ },
82
+ {
83
+ input: "banana",
84
+ filter: true,
85
+ reason: "Bananas are fruits"
86
+ },
87
+ {
88
+ input: "potato",
89
+ filter: false,
90
+ reason: "Potatoes are vegetables"
91
+ }
92
+ ];
93
+ const genericExamplesMessages = [
94
+ {
95
+ type: "text",
96
+ content: formatInput(genericExamples, "is a fruit"),
97
+ role: "user"
98
+ },
99
+ {
100
+ type: "text",
101
+ content: formatExamples(genericExamples),
102
+ role: "assistant"
103
+ }
104
+ ];
105
+ const filterChunk = async (chunk) => {
106
+ var _a, _b;
107
+ const examples = taskId ? await this.adapter.getExamples({
108
+ // The Table API can't search for a huge input string
109
+ input: JSON.stringify(chunk).slice(0, 1e3),
110
+ taskType,
111
+ taskId
112
+ }).then(
113
+ (x) => x.map((y) => ({ filter: y.output, input: y.input, reason: y.explanation }))
114
+ ) : [];
115
+ const allExamples = takeUntilTokens(
116
+ [...examples, ...(_a = options.examples) != null ? _a : []],
117
+ TOKENS_EXAMPLES_MAX,
118
+ (el) => tokenizer.count(stringify(el.input))
119
+ );
120
+ const exampleMessages = [
121
+ {
122
+ type: "text",
123
+ content: formatInput(allExamples, condition),
124
+ role: "user"
125
+ },
126
+ {
127
+ type: "text",
128
+ content: formatExamples(allExamples),
129
+ role: "assistant"
130
+ }
131
+ ];
132
+ const output = await this.callModel({
133
+ systemPrompt: `
134
+ You are given a list of items. Your task is to filter out the items that meet the condition below.
135
+ You need to return the full list of items with the format:
136
+ \u25A0x:true\u25A0y:false\u25A0z:true (where x, y, z are the indices of the items in the list)
137
+ You need to start with "\u25A00" and go up to the last index "\u25A0${chunk.length - 1}".
138
+ If an item meets the condition, you should return ":true", otherwise ":false".
139
+
140
+ IMPORTANT: Make sure to read the condition and the examples carefully before making your decision.
141
+ The condition is: "${condition}"
142
+ `.trim(),
143
+ stopSequences: [END],
144
+ messages: [
145
+ ...exampleMessages.length ? exampleMessages : genericExamplesMessages,
146
+ {
147
+ type: "text",
148
+ content: formatInput(
149
+ chunk.map((x) => ({ input: x })),
150
+ condition
151
+ ),
152
+ role: "user"
153
+ }
154
+ ]
155
+ });
156
+ const answer = (_b = output.choices[0]) == null ? void 0 : _b.content;
157
+ const indices = answer.trim().split("\u25A0").filter((x) => x.length > 0).map((x) => {
158
+ var _a2;
159
+ const [idx, filter] = x.split(":");
160
+ return { idx: parseInt((_a2 = idx == null ? void 0 : idx.trim()) != null ? _a2 : ""), filter: (filter == null ? void 0 : filter.toLowerCase().trim()) === "true" };
161
+ });
162
+ const partial = chunk.filter((_2, idx) => {
163
+ var _a2, _b2;
164
+ return (_b2 = (_a2 = indices.find((x) => x.idx === idx)) == null ? void 0 : _a2.filter) != null ? _b2 : false;
165
+ });
166
+ if (taskId) {
167
+ const key = fastHash(
168
+ stringify({
169
+ taskId,
170
+ taskType,
171
+ input: JSON.stringify(chunk),
172
+ condition
173
+ })
174
+ );
175
+ await this.adapter.saveExample({
176
+ key,
177
+ taskType,
178
+ taskId,
179
+ input: JSON.stringify(chunk),
180
+ output: partial,
181
+ instructions: condition,
182
+ metadata: output.metadata
183
+ });
184
+ }
185
+ return partial;
186
+ };
187
+ const filteredChunks = await Promise.all(chunks.map(filterChunk));
188
+ return filteredChunks.flat();
189
+ };
@@ -0,0 +1,246 @@
1
+ import sdk from "@botpress/sdk";
2
+ const { z } = sdk;
3
+ import _ from "lodash";
4
+ import { fastHash, stringify, takeUntilTokens } from "../utils";
5
+ import { Zai } from "../zai";
6
+ import { PROMPT_INPUT_BUFFER } from "./constants";
7
+ const LABELS = {
8
+ ABSOLUTELY_NOT: "ABSOLUTELY_NOT",
9
+ PROBABLY_NOT: "PROBABLY_NOT",
10
+ AMBIGUOUS: "AMBIGUOUS",
11
+ PROBABLY_YES: "PROBABLY_YES",
12
+ ABSOLUTELY_YES: "ABSOLUTELY_YES"
13
+ };
14
+ const ALL_LABELS = Object.values(LABELS).join(" | ");
15
+ const Options = z.object({
16
+ examples: z.array(
17
+ z.object({
18
+ input: z.any(),
19
+ labels: z.record(z.object({ label: z.enum(ALL_LABELS), explanation: z.string().optional() }))
20
+ })
21
+ ).default([]).describe("Examples to help the user make a decision"),
22
+ instructions: z.string().optional().describe("Instructions to guide the user on how to extract the data"),
23
+ chunkLength: z.number().min(100).max(1e5).optional().describe("The maximum number of tokens per chunk").default(16e3)
24
+ });
25
+ const Labels = z.record(z.string().min(1).max(250), z.string()).superRefine((labels, ctx) => {
26
+ const keys = Object.keys(labels);
27
+ for (const key of keys) {
28
+ if (key.length < 1 || key.length > 250) {
29
+ ctx.addIssue({ message: `The label key "${key}" must be between 1 and 250 characters long`, code: "custom" });
30
+ }
31
+ if (keys.lastIndexOf(key) !== keys.indexOf(key)) {
32
+ ctx.addIssue({ message: `Duplicate label: ${labels[key]}`, code: "custom" });
33
+ }
34
+ if (/[^a-zA-Z0-9_]/.test(key)) {
35
+ ctx.addIssue({
36
+ message: `The label key "${key}" must only contain alphanumeric characters and underscores`,
37
+ code: "custom"
38
+ });
39
+ }
40
+ }
41
+ return true;
42
+ });
43
+ const parseLabel = (label) => {
44
+ label = label.toUpperCase().replace(/\s+/g, "_").replace(/_{2,}/g, "_").trim();
45
+ if (label.includes("ABSOLUTELY") && label.includes("NOT")) {
46
+ return LABELS.ABSOLUTELY_NOT;
47
+ } else if (label.includes("NOT")) {
48
+ return LABELS.PROBABLY_NOT;
49
+ } else if (label.includes("AMBIGUOUS")) {
50
+ return LABELS.AMBIGUOUS;
51
+ }
52
+ if (label.includes("YES")) {
53
+ return LABELS.PROBABLY_YES;
54
+ } else if (label.includes("ABSOLUTELY") && label.includes("YES")) {
55
+ return LABELS.ABSOLUTELY_YES;
56
+ }
57
+ return LABELS.AMBIGUOUS;
58
+ };
59
+ Zai.prototype.label = async function(input, _labels, _options) {
60
+ var _a, _b;
61
+ const options = Options.parse(_options != null ? _options : {});
62
+ const labels = Labels.parse(_labels);
63
+ const tokenizer = await this.getTokenizer();
64
+ const taskId = this.taskId;
65
+ const taskType = "zai.label";
66
+ const TOTAL_MAX_TOKENS = _.clamp(options.chunkLength, 1e3, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER);
67
+ const CHUNK_EXAMPLES_MAX_TOKENS = _.clamp(Math.floor(TOTAL_MAX_TOKENS * 0.5), 250, 1e4);
68
+ const CHUNK_INPUT_MAX_TOKENS = _.clamp(
69
+ TOTAL_MAX_TOKENS - CHUNK_EXAMPLES_MAX_TOKENS,
70
+ TOTAL_MAX_TOKENS * 0.5,
71
+ TOTAL_MAX_TOKENS
72
+ );
73
+ const inputAsString = stringify(input);
74
+ if (tokenizer.count(inputAsString) > CHUNK_INPUT_MAX_TOKENS) {
75
+ const tokens = tokenizer.split(inputAsString);
76
+ const chunks = _.chunk(tokens, CHUNK_INPUT_MAX_TOKENS).map((x) => x.join(""));
77
+ const allLabels = await Promise.all(chunks.map((chunk) => this.label(chunk, _labels)));
78
+ return allLabels.reduce((acc, x) => {
79
+ Object.keys(x).forEach((key) => {
80
+ if (acc[key] === true) {
81
+ acc[key] = true;
82
+ } else {
83
+ acc[key] = acc[key] || x[key];
84
+ }
85
+ });
86
+ return acc;
87
+ }, {});
88
+ }
89
+ const END = "\u25A0END\u25A0";
90
+ const Key = fastHash(
91
+ JSON.stringify({
92
+ taskType,
93
+ taskId,
94
+ input: inputAsString,
95
+ instructions: (_a = options.instructions) != null ? _a : ""
96
+ })
97
+ );
98
+ const convertToAnswer = (mapping) => {
99
+ return Object.keys(labels).reduce((acc, key) => {
100
+ var _a2, _b2;
101
+ acc[key] = ((_a2 = mapping[key]) == null ? void 0 : _a2.label) === "ABSOLUTELY_YES" || ((_b2 = mapping[key]) == null ? void 0 : _b2.label) === "PROBABLY_YES";
102
+ return acc;
103
+ }, {});
104
+ };
105
+ const examples = taskId ? await this.adapter.getExamples({
106
+ input: inputAsString,
107
+ taskType,
108
+ taskId
109
+ }) : [];
110
+ options.examples.forEach((example) => {
111
+ examples.push({
112
+ key: fastHash(JSON.stringify(example)),
113
+ input: example.input,
114
+ similarity: 1,
115
+ explanation: "",
116
+ output: example.labels
117
+ });
118
+ });
119
+ const exactMatch = examples.find((x) => x.key === Key);
120
+ if (exactMatch) {
121
+ return convertToAnswer(exactMatch.output);
122
+ }
123
+ const allExamples = takeUntilTokens(
124
+ examples,
125
+ CHUNK_EXAMPLES_MAX_TOKENS,
126
+ (el) => {
127
+ var _a2;
128
+ return tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.output)) + tokenizer.count((_a2 = el.explanation) != null ? _a2 : "") + 100;
129
+ }
130
+ ).map((example, idx) => [
131
+ {
132
+ type: "text",
133
+ role: "user",
134
+ content: `
135
+ Expert Example #${idx + 1}
136
+
137
+ <|start_input|>
138
+ ${stringify(example.input)}
139
+ <|end_input|>`.trim()
140
+ },
141
+ {
142
+ type: "text",
143
+ role: "assistant",
144
+ content: `
145
+ Expert Example #${idx + 1}
146
+ ============
147
+ ${Object.keys(example.output).map(
148
+ (key) => {
149
+ var _a2, _b2;
150
+ return `
151
+ \u25A0${key}:\u3010${(_a2 = example.output[key]) == null ? void 0 : _a2.explanation}\u3011:${(_b2 = example.output[key]) == null ? void 0 : _b2.label}\u25A0
152
+ `.trim();
153
+ }
154
+ ).join("\n")}
155
+ ${END}
156
+ `.trim()
157
+ }
158
+ ]).flat();
159
+ const format = Object.keys(labels).map((key) => {
160
+ return `
161
+ \u25A0${key}:\u3010explanation (where "explanation" is answering the question "${labels[key]}")\u3011:x\u25A0 (where x is ${ALL_LABELS})
162
+ `.trim();
163
+ }).join("\n\n");
164
+ const output = await this.callModel({
165
+ stopSequences: [END],
166
+ systemPrompt: `
167
+ You need to tag the input with the following labels based on the question asked:
168
+ ${LABELS.ABSOLUTELY_NOT}: You are absolutely sure that the answer is "NO" to the question.
169
+ ${LABELS.PROBABLY_NOT}: You are leaning towards "NO" to the question.
170
+ ${LABELS.AMBIGUOUS}: You are unsure about the answer to the question.
171
+ ${LABELS.PROBABLY_YES}: You are leaning towards "YES" to the question.
172
+ ${LABELS.ABSOLUTELY_YES}: You are absolutely sure that the answer is "YES" to the question.
173
+
174
+ You need to return a mapping of the labels, an explanation and the answer for each label following the format below:
175
+ \`\`\`
176
+ ${format}
177
+ ${END}
178
+ \`\`\`
179
+
180
+ ${options.instructions}
181
+
182
+ ===
183
+ You should consider the Expert Examples below to help you make your decision.
184
+ In your "Analysis", please refer to the Expert Examples # to justify your decision.
185
+ `.trim(),
186
+ messages: [
187
+ ...allExamples,
188
+ {
189
+ type: "text",
190
+ role: "user",
191
+ content: `
192
+ Input to tag:
193
+ <|start_input|>
194
+ ${inputAsString}
195
+ <|end_input|>
196
+
197
+ Answer with this following format:
198
+ \`\`\`
199
+ ${format}
200
+ ${END}
201
+ \`\`\`
202
+
203
+ Format cheatsheet:
204
+ \`\`\`
205
+ \u25A0label:\u3010explanation\u3011:x\u25A0
206
+ \`\`\`
207
+
208
+ Where \`x\` is one of the following: ${ALL_LABELS}
209
+
210
+ Remember: In your \`explanation\`, please refer to the Expert Examples # (and quote them) that are relevant to ground your decision-making process.
211
+ The Expert Examples are there to help you make your decision. They have been provided by experts in the field and their answers (and reasoning) are considered the ground truth and should be used as a reference to make your decision when applicable.
212
+ For example, you can say: "According to Expert Example #1, ..."`.trim()
213
+ }
214
+ ]
215
+ });
216
+ const answer = output.choices[0].content;
217
+ const final = Object.keys(labels).reduce((acc, key) => {
218
+ const match = answer.match(new RegExp(`\u25A0${key}:\u3010(.+)\u3011:(\\w{2,})\u25A0`, "i"));
219
+ if (match) {
220
+ const explanation = match[1].trim();
221
+ const label = parseLabel(match[2]);
222
+ acc[key] = {
223
+ explanation,
224
+ label
225
+ };
226
+ } else {
227
+ acc[key] = {
228
+ explanation: "",
229
+ label: LABELS.AMBIGUOUS
230
+ };
231
+ }
232
+ return acc;
233
+ }, {});
234
+ if (taskId) {
235
+ await this.adapter.saveExample({
236
+ key: Key,
237
+ taskType,
238
+ taskId,
239
+ instructions: (_b = options.instructions) != null ? _b : "",
240
+ metadata: output.metadata,
241
+ input: inputAsString,
242
+ output: final
243
+ });
244
+ }
245
+ return convertToAnswer(final);
246
+ };