@botpress/zai 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/adapters/adapter.js +2 -0
  2. package/dist/adapters/botpress-table.js +168 -0
  3. package/dist/adapters/memory.js +12 -0
  4. package/dist/index.d.ts +99 -98
  5. package/dist/index.js +9 -1873
  6. package/dist/models.js +387 -0
  7. package/dist/operations/check.js +141 -0
  8. package/dist/operations/constants.js +2 -0
  9. package/dist/operations/errors.js +15 -0
  10. package/dist/operations/extract.js +212 -0
  11. package/dist/operations/filter.js +179 -0
  12. package/dist/operations/label.js +237 -0
  13. package/dist/operations/rewrite.js +111 -0
  14. package/dist/operations/summarize.js +132 -0
  15. package/dist/operations/text.js +46 -0
  16. package/dist/utils.js +43 -0
  17. package/dist/zai.js +140 -0
  18. package/package.json +21 -19
  19. package/src/adapters/adapter.ts +35 -0
  20. package/src/adapters/botpress-table.ts +210 -0
  21. package/src/adapters/memory.ts +13 -0
  22. package/src/index.ts +11 -0
  23. package/src/models.ts +394 -0
  24. package/src/operations/__tests/botpress_docs.txt +26040 -0
  25. package/src/operations/__tests/cache.jsonl +101 -0
  26. package/src/operations/__tests/index.ts +87 -0
  27. package/src/operations/check.ts +187 -0
  28. package/src/operations/constants.ts +2 -0
  29. package/src/operations/errors.ts +9 -0
  30. package/src/operations/extract.ts +291 -0
  31. package/src/operations/filter.ts +231 -0
  32. package/src/operations/label.ts +332 -0
  33. package/src/operations/rewrite.ts +148 -0
  34. package/src/operations/summarize.ts +193 -0
  35. package/src/operations/text.ts +63 -0
  36. package/src/sdk-interfaces/llm/generateContent.ts +127 -0
  37. package/src/sdk-interfaces/llm/listLanguageModels.ts +19 -0
  38. package/src/utils.ts +61 -0
  39. package/src/zai.ts +193 -0
  40. package/tsconfig.json +2 -2
  41. package/dist/index.cjs +0 -1903
  42. package/dist/index.cjs.map +0 -1
  43. package/dist/index.d.cts +0 -916
  44. package/dist/index.js.map +0 -1
  45. package/tsup.config.ts +0 -16
  46. package/vitest.config.ts +0 -9
  47. package/vitest.setup.ts +0 -24
@@ -0,0 +1,212 @@
1
+ import { z } from "@bpinternal/zui";
2
+ import JSON5 from "json5";
3
+ import { jsonrepair } from "jsonrepair";
4
+ import { chunk, isArray } from "lodash-es";
5
+ import { fastHash, stringify, takeUntilTokens } from "../utils";
6
+ import { Zai } from "../zai";
7
+ import { PROMPT_INPUT_BUFFER } from "./constants";
8
+ import { JsonParsingError } from "./errors";
9
+ const Options = z.object({
10
+ instructions: z.string().optional().describe("Instructions to guide the user on how to extract the data"),
11
+ chunkLength: z.number().min(100).max(1e5).optional().describe("The maximum number of tokens per chunk").default(16e3)
12
+ });
13
+ const START = "\u25A0json_start\u25A0";
14
+ const END = "\u25A0json_end\u25A0";
15
+ const NO_MORE = "\u25A0NO_MORE_ELEMENT\u25A0";
16
+ Zai.prototype.extract = async function(input, schema, _options) {
17
+ const options = Options.parse(_options ?? {});
18
+ const tokenizer = await this.getTokenizer();
19
+ const taskId = this.taskId;
20
+ const taskType = "zai.extract";
21
+ const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100);
22
+ let isArrayOfObjects = false;
23
+ const originalSchema = schema;
24
+ if (schema instanceof z.ZodObject) {
25
+ } else if (schema instanceof z.ZodArray) {
26
+ if (schema._def.type instanceof z.ZodObject) {
27
+ isArrayOfObjects = true;
28
+ schema = schema._def.type;
29
+ } else {
30
+ throw new Error("Schema must be a ZodObject or a ZodArray<ZodObject>");
31
+ }
32
+ } else {
33
+ throw new Error("Schema must be either a ZuiObject or a ZuiArray<ZuiObject>");
34
+ }
35
+ const schemaTypescript = schema.toTypescript({ declaration: false });
36
+ const schemaLength = tokenizer.count(schemaTypescript);
37
+ options.chunkLength = Math.min(options.chunkLength, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - schemaLength);
38
+ const keys = Object.keys(schema.shape);
39
+ let inputAsString = stringify(input);
40
+ if (tokenizer.count(inputAsString) > options.chunkLength) {
41
+ if (isArrayOfObjects) {
42
+ const tokens = tokenizer.split(inputAsString);
43
+ const chunks = chunk(tokens, options.chunkLength).map((x) => x.join(""));
44
+ const all = await Promise.all(chunks.map((chunk2) => this.extract(chunk2, originalSchema)));
45
+ return all.flat();
46
+ } else {
47
+ inputAsString = tokenizer.truncate(stringify(input), options.chunkLength);
48
+ }
49
+ }
50
+ const instructions = [];
51
+ if (options.instructions) {
52
+ instructions.push(options.instructions);
53
+ }
54
+ const shape = `{ ${keys.map((key) => `"${key}": ...`).join(", ")} }`;
55
+ const abbv = "{ ... }";
56
+ if (isArrayOfObjects) {
57
+ instructions.push("You may have multiple elements, or zero elements in the input.");
58
+ instructions.push("You must extract each element separately.");
59
+ instructions.push(`Each element must be a JSON object with exactly the format: ${START}${shape}${END}`);
60
+ instructions.push(`When you are done extracting all elements, type "${NO_MORE}" to finish.`);
61
+ instructions.push(`For example, if you have zero elements, the output should look like this: ${NO_MORE}`);
62
+ instructions.push(
63
+ `For example, if you have two elements, the output should look like this: ${START}${abbv}${END}${START}${abbv}${END}${NO_MORE}`
64
+ );
65
+ } else {
66
+ instructions.push("You may have exactly one element in the input.");
67
+ instructions.push(`The element must be a JSON object with exactly the format: ${START}${shape}${END}`);
68
+ }
69
+ const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(instructions.join("\n"));
70
+ const Key = fastHash(
71
+ JSON.stringify({
72
+ taskType,
73
+ taskId,
74
+ input: inputAsString,
75
+ instructions: options.instructions
76
+ })
77
+ );
78
+ const examples = taskId ? await this.adapter.getExamples({
79
+ input: inputAsString,
80
+ taskType,
81
+ taskId
82
+ }) : [];
83
+ const exactMatch = examples.find((x) => x.key === Key);
84
+ if (exactMatch) {
85
+ return exactMatch.output;
86
+ }
87
+ const defaultExample = isArrayOfObjects ? {
88
+ input: `The story goes as follow.
89
+ Once upon a time, there was a person named Alice who was 30 years old.
90
+ Then, there was a person named Bob who was 25 years old.
91
+ The end.`,
92
+ schema: "Array<{ name: string, age: number }>",
93
+ instructions: "Extract all people",
94
+ extracted: [
95
+ {
96
+ name: "Alice",
97
+ age: 30
98
+ },
99
+ {
100
+ name: "Bob",
101
+ age: 25
102
+ }
103
+ ]
104
+ } : {
105
+ input: `The story goes as follow.
106
+ Once upon a time, there was a person named Alice who was 30 years old.
107
+ The end.`,
108
+ schema: "{ name: string, age: number }",
109
+ instructions: "Extract the person",
110
+ extracted: { name: "Alice", age: 30 }
111
+ };
112
+ const userExamples = examples.map((e) => ({
113
+ input: e.input,
114
+ extracted: e.output,
115
+ schema: schemaTypescript,
116
+ instructions: options.instructions
117
+ }));
118
+ let exampleId = 1;
119
+ const formatInput = (input2, schema2, instructions2) => {
120
+ const header = userExamples.length ? `Expert Example #${exampleId++}` : "Here's an example to help you understand the format:";
121
+ return `
122
+ ${header}
123
+
124
+ <|start_schema|>
125
+ ${schema2}
126
+ <|end_schema|>
127
+
128
+ <|start_instructions|>
129
+ ${instructions2 ?? "No specific instructions, just follow the schema above."}
130
+ <|end_instructions|>
131
+
132
+ <|start_input|>
133
+ ${input2.trim()}
134
+ <|end_input|>
135
+ `.trim();
136
+ };
137
+ const formatOutput = (extracted) => {
138
+ extracted = isArray(extracted) ? extracted : [extracted];
139
+ return extracted.map(
140
+ (x) => `
141
+ ${START}
142
+ ${JSON.stringify(x, null, 2)}
143
+ ${END}`.trim()
144
+ ).join("\n") + NO_MORE;
145
+ };
146
+ const formatExample = (example) => [
147
+ {
148
+ type: "text",
149
+ content: formatInput(stringify(example.input ?? null), example.schema, example.instructions),
150
+ role: "user"
151
+ },
152
+ {
153
+ type: "text",
154
+ content: formatOutput(example.extracted),
155
+ role: "assistant"
156
+ }
157
+ ];
158
+ const allExamples = takeUntilTokens(
159
+ userExamples.length ? userExamples : [defaultExample],
160
+ EXAMPLES_TOKENS,
161
+ (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.extracted))
162
+ ).map(formatExample).flat();
163
+ const output = await this.callModel({
164
+ systemPrompt: `
165
+ Extract the following information from the input:
166
+ ${schemaTypescript}
167
+ ====
168
+
169
+ ${instructions.map((x) => `\u2022 ${x}`).join("\n")}
170
+ `.trim(),
171
+ stopSequences: [isArrayOfObjects ? NO_MORE : END],
172
+ messages: [
173
+ ...allExamples,
174
+ {
175
+ role: "user",
176
+ type: "text",
177
+ content: formatInput(inputAsString, schemaTypescript, options.instructions ?? "")
178
+ }
179
+ ]
180
+ });
181
+ const answer = output.choices[0]?.content;
182
+ const elements = answer.split(START).filter((x) => x.trim().length > 0).map((x) => {
183
+ try {
184
+ const json = x.slice(0, x.indexOf(END)).trim();
185
+ const repairedJson = jsonrepair(json);
186
+ const parsedJson = JSON5.parse(repairedJson);
187
+ return schema.parse(parsedJson);
188
+ } catch (error) {
189
+ throw new JsonParsingError(x, error instanceof Error ? error : new Error("Unknown error"));
190
+ }
191
+ }).filter((x) => x !== null);
192
+ let final;
193
+ if (isArrayOfObjects) {
194
+ final = elements;
195
+ } else if (elements.length === 0) {
196
+ final = schema.parse({});
197
+ } else {
198
+ final = elements[0];
199
+ }
200
+ if (taskId) {
201
+ await this.adapter.saveExample({
202
+ key: Key,
203
+ taskId: `zai/${taskId}`,
204
+ taskType,
205
+ instructions: options.instructions ?? "No specific instructions",
206
+ input: inputAsString,
207
+ output: final,
208
+ metadata: output.metadata
209
+ });
210
+ }
211
+ return final;
212
+ };
@@ -0,0 +1,179 @@
1
+ import { z } from "@bpinternal/zui";
2
+ import { clamp } from "lodash-es";
3
+ import { fastHash, stringify, takeUntilTokens } from "../utils";
4
+ import { Zai } from "../zai";
5
+ import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
6
+ const Example = z.object({
7
+ input: z.any(),
8
+ filter: z.boolean(),
9
+ reason: z.string().optional()
10
+ });
11
+ const Options = z.object({
12
+ tokensPerItem: z.number().min(1).max(1e5).optional().describe("The maximum number of tokens per item").default(250),
13
+ examples: z.array(Example).describe("Examples to filter the condition against").default([])
14
+ });
15
+ const END = "\u25A0END\u25A0";
16
+ Zai.prototype.filter = async function(input, condition, _options) {
17
+ const options = Options.parse(_options ?? {});
18
+ const tokenizer = await this.getTokenizer();
19
+ const taskId = this.taskId;
20
+ const taskType = "zai.filter";
21
+ const MAX_ITEMS_PER_CHUNK = 50;
22
+ const TOKENS_TOTAL_MAX = this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
23
+ const TOKENS_EXAMPLES_MAX = Math.floor(Math.max(250, TOKENS_TOTAL_MAX * 0.5));
24
+ const TOKENS_CONDITION_MAX = clamp(TOKENS_TOTAL_MAX * 0.25, 250, tokenizer.count(condition));
25
+ const TOKENS_INPUT_ARRAY_MAX = TOKENS_TOTAL_MAX - TOKENS_EXAMPLES_MAX - TOKENS_CONDITION_MAX;
26
+ condition = tokenizer.truncate(condition, TOKENS_CONDITION_MAX);
27
+ let chunks = [];
28
+ let currentChunk = [];
29
+ let currentChunkTokens = 0;
30
+ for (const element of input) {
31
+ const elementAsString = tokenizer.truncate(stringify(element, false), options.tokensPerItem);
32
+ const elementTokens = tokenizer.count(elementAsString);
33
+ if (currentChunkTokens + elementTokens > TOKENS_INPUT_ARRAY_MAX || currentChunk.length >= MAX_ITEMS_PER_CHUNK) {
34
+ chunks.push(currentChunk);
35
+ currentChunk = [];
36
+ currentChunkTokens = 0;
37
+ }
38
+ currentChunk.push(element);
39
+ currentChunkTokens += elementTokens;
40
+ }
41
+ if (currentChunk.length > 0) {
42
+ chunks.push(currentChunk);
43
+ }
44
+ chunks = chunks.filter((x) => x.length > 0);
45
+ const formatInput = (input2, condition2) => {
46
+ return `
47
+ Condition to check:
48
+ ${condition2}
49
+
50
+ Items (from \u25A00 to \u25A0${input2.length - 1})
51
+ ==============================
52
+ ${input2.map((x, idx) => `\u25A0${idx} = ${stringify(x.input ?? null, false)}`).join("\n")}
53
+ `.trim();
54
+ };
55
+ const formatExamples = (examples) => {
56
+ return `
57
+ ${examples.map((x, idx) => `\u25A0${idx}:${!!x.filter ? "true" : "false"}`).join("")}
58
+ ${END}
59
+ ====
60
+ Here's the reasoning behind each example:
61
+ ${examples.map((x, idx) => `\u25A0${idx}:${!!x.filter ? "true" : "false"}:${x.reason ?? "No reason provided"}`).join("\n")}
62
+ `.trim();
63
+ };
64
+ const genericExamples = [
65
+ {
66
+ input: "apple",
67
+ filter: true,
68
+ reason: "Apples are fruits"
69
+ },
70
+ {
71
+ input: "Apple Inc.",
72
+ filter: false,
73
+ reason: "Apple Inc. is a company, not a fruit"
74
+ },
75
+ {
76
+ input: "banana",
77
+ filter: true,
78
+ reason: "Bananas are fruits"
79
+ },
80
+ {
81
+ input: "potato",
82
+ filter: false,
83
+ reason: "Potatoes are vegetables"
84
+ }
85
+ ];
86
+ const genericExamplesMessages = [
87
+ {
88
+ type: "text",
89
+ content: formatInput(genericExamples, "is a fruit"),
90
+ role: "user"
91
+ },
92
+ {
93
+ type: "text",
94
+ content: formatExamples(genericExamples),
95
+ role: "assistant"
96
+ }
97
+ ];
98
+ const filterChunk = async (chunk) => {
99
+ const examples = taskId ? await this.adapter.getExamples({
100
+ // The Table API can't search for a huge input string
101
+ input: JSON.stringify(chunk).slice(0, 1e3),
102
+ taskType,
103
+ taskId
104
+ }).then(
105
+ (x) => x.map((y) => ({ filter: y.output, input: y.input, reason: y.explanation }))
106
+ ) : [];
107
+ const allExamples = takeUntilTokens(
108
+ [...examples, ...options.examples ?? []],
109
+ TOKENS_EXAMPLES_MAX,
110
+ (el) => tokenizer.count(stringify(el.input))
111
+ );
112
+ const exampleMessages = [
113
+ {
114
+ type: "text",
115
+ content: formatInput(allExamples, condition),
116
+ role: "user"
117
+ },
118
+ {
119
+ type: "text",
120
+ content: formatExamples(allExamples),
121
+ role: "assistant"
122
+ }
123
+ ];
124
+ const output = await this.callModel({
125
+ systemPrompt: `
126
+ You are given a list of items. Your task is to filter out the items that meet the condition below.
127
+ You need to return the full list of items with the format:
128
+ \u25A0x:true\u25A0y:false\u25A0z:true (where x, y, z are the indices of the items in the list)
129
+ You need to start with "\u25A00" and go up to the last index "\u25A0${chunk.length - 1}".
130
+ If an item meets the condition, you should return ":true", otherwise ":false".
131
+
132
+ IMPORTANT: Make sure to read the condition and the examples carefully before making your decision.
133
+ The condition is: "${condition}"
134
+ `.trim(),
135
+ stopSequences: [END],
136
+ messages: [
137
+ ...exampleMessages.length ? exampleMessages : genericExamplesMessages,
138
+ {
139
+ type: "text",
140
+ content: formatInput(
141
+ chunk.map((x) => ({ input: x })),
142
+ condition
143
+ ),
144
+ role: "user"
145
+ }
146
+ ]
147
+ });
148
+ const answer = output.choices[0]?.content;
149
+ const indices = answer.trim().split("\u25A0").filter((x) => x.length > 0).map((x) => {
150
+ const [idx, filter] = x.split(":");
151
+ return { idx: parseInt(idx?.trim() ?? ""), filter: filter?.toLowerCase().trim() === "true" };
152
+ });
153
+ const partial = chunk.filter((_, idx) => {
154
+ return indices.find((x) => x.idx === idx)?.filter ?? false;
155
+ });
156
+ if (taskId) {
157
+ const key = fastHash(
158
+ stringify({
159
+ taskId,
160
+ taskType,
161
+ input: JSON.stringify(chunk),
162
+ condition
163
+ })
164
+ );
165
+ await this.adapter.saveExample({
166
+ key,
167
+ taskType,
168
+ taskId,
169
+ input: JSON.stringify(chunk),
170
+ output: partial,
171
+ instructions: condition,
172
+ metadata: output.metadata
173
+ });
174
+ }
175
+ return partial;
176
+ };
177
+ const filteredChunks = await Promise.all(chunks.map(filterChunk));
178
+ return filteredChunks.flat();
179
+ };
@@ -0,0 +1,237 @@
1
+ import { z } from "@bpinternal/zui";
2
+ import { clamp, chunk } from "lodash-es";
3
+ import { fastHash, stringify, takeUntilTokens } from "../utils";
4
+ import { Zai } from "../zai";
5
+ import { PROMPT_INPUT_BUFFER } from "./constants";
6
+ const LABELS = {
7
+ ABSOLUTELY_NOT: "ABSOLUTELY_NOT",
8
+ PROBABLY_NOT: "PROBABLY_NOT",
9
+ AMBIGUOUS: "AMBIGUOUS",
10
+ PROBABLY_YES: "PROBABLY_YES",
11
+ ABSOLUTELY_YES: "ABSOLUTELY_YES"
12
+ };
13
+ const ALL_LABELS = Object.values(LABELS).join(" | ");
14
+ const Options = z.object({
15
+ examples: z.array(
16
+ z.object({
17
+ input: z.any(),
18
+ labels: z.record(z.object({ label: z.enum(ALL_LABELS), explanation: z.string().optional() }))
19
+ })
20
+ ).default([]).describe("Examples to help the user make a decision"),
21
+ instructions: z.string().optional().describe("Instructions to guide the user on how to extract the data"),
22
+ chunkLength: z.number().min(100).max(1e5).optional().describe("The maximum number of tokens per chunk").default(16e3)
23
+ });
24
+ const Labels = z.record(z.string().min(1).max(250), z.string()).superRefine((labels, ctx) => {
25
+ const keys = Object.keys(labels);
26
+ for (const key of keys) {
27
+ if (key.length < 1 || key.length > 250) {
28
+ ctx.addIssue({ message: `The label key "${key}" must be between 1 and 250 characters long`, code: "custom" });
29
+ }
30
+ if (keys.lastIndexOf(key) !== keys.indexOf(key)) {
31
+ ctx.addIssue({ message: `Duplicate label: ${labels[key]}`, code: "custom" });
32
+ }
33
+ if (/[^a-zA-Z0-9_]/.test(key)) {
34
+ ctx.addIssue({
35
+ message: `The label key "${key}" must only contain alphanumeric characters and underscores`,
36
+ code: "custom"
37
+ });
38
+ }
39
+ }
40
+ return true;
41
+ });
42
+ const parseLabel = (label) => {
43
+ label = label.toUpperCase().replace(/\s+/g, "_").replace(/_{2,}/g, "_").trim();
44
+ if (label.includes("ABSOLUTELY") && label.includes("NOT")) {
45
+ return LABELS.ABSOLUTELY_NOT;
46
+ } else if (label.includes("NOT")) {
47
+ return LABELS.PROBABLY_NOT;
48
+ } else if (label.includes("AMBIGUOUS")) {
49
+ return LABELS.AMBIGUOUS;
50
+ }
51
+ if (label.includes("YES")) {
52
+ return LABELS.PROBABLY_YES;
53
+ } else if (label.includes("ABSOLUTELY") && label.includes("YES")) {
54
+ return LABELS.ABSOLUTELY_YES;
55
+ }
56
+ return LABELS.AMBIGUOUS;
57
+ };
58
+ Zai.prototype.label = async function(input, _labels, _options) {
59
+ const options = Options.parse(_options ?? {});
60
+ const labels = Labels.parse(_labels);
61
+ const tokenizer = await this.getTokenizer();
62
+ const taskId = this.taskId;
63
+ const taskType = "zai.label";
64
+ const TOTAL_MAX_TOKENS = clamp(options.chunkLength, 1e3, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER);
65
+ const CHUNK_EXAMPLES_MAX_TOKENS = clamp(Math.floor(TOTAL_MAX_TOKENS * 0.5), 250, 1e4);
66
+ const CHUNK_INPUT_MAX_TOKENS = clamp(
67
+ TOTAL_MAX_TOKENS - CHUNK_EXAMPLES_MAX_TOKENS,
68
+ TOTAL_MAX_TOKENS * 0.5,
69
+ TOTAL_MAX_TOKENS
70
+ );
71
+ const inputAsString = stringify(input);
72
+ if (tokenizer.count(inputAsString) > CHUNK_INPUT_MAX_TOKENS) {
73
+ const tokens = tokenizer.split(inputAsString);
74
+ const chunks = chunk(tokens, CHUNK_INPUT_MAX_TOKENS).map((x) => x.join(""));
75
+ const allLabels = await Promise.all(chunks.map((chunk2) => this.label(chunk2, _labels)));
76
+ return allLabels.reduce((acc, x) => {
77
+ Object.keys(x).forEach((key) => {
78
+ if (acc[key] === true) {
79
+ acc[key] = true;
80
+ } else {
81
+ acc[key] = acc[key] || x[key];
82
+ }
83
+ });
84
+ return acc;
85
+ }, {});
86
+ }
87
+ const END = "\u25A0END\u25A0";
88
+ const Key = fastHash(
89
+ JSON.stringify({
90
+ taskType,
91
+ taskId,
92
+ input: inputAsString,
93
+ instructions: options.instructions ?? ""
94
+ })
95
+ );
96
+ const convertToAnswer = (mapping) => {
97
+ return Object.keys(labels).reduce((acc, key) => {
98
+ acc[key] = mapping[key]?.label === "ABSOLUTELY_YES" || mapping[key]?.label === "PROBABLY_YES";
99
+ return acc;
100
+ }, {});
101
+ };
102
+ const examples = taskId ? await this.adapter.getExamples({
103
+ input: inputAsString,
104
+ taskType,
105
+ taskId
106
+ }) : [];
107
+ options.examples.forEach((example) => {
108
+ examples.push({
109
+ key: fastHash(JSON.stringify(example)),
110
+ input: example.input,
111
+ similarity: 1,
112
+ explanation: "",
113
+ output: example.labels
114
+ });
115
+ });
116
+ const exactMatch = examples.find((x) => x.key === Key);
117
+ if (exactMatch) {
118
+ return convertToAnswer(exactMatch.output);
119
+ }
120
+ const allExamples = takeUntilTokens(
121
+ examples,
122
+ CHUNK_EXAMPLES_MAX_TOKENS,
123
+ (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.output)) + tokenizer.count(el.explanation ?? "") + 100
124
+ ).map((example, idx) => [
125
+ {
126
+ type: "text",
127
+ role: "user",
128
+ content: `
129
+ Expert Example #${idx + 1}
130
+
131
+ <|start_input|>
132
+ ${stringify(example.input)}
133
+ <|end_input|>`.trim()
134
+ },
135
+ {
136
+ type: "text",
137
+ role: "assistant",
138
+ content: `
139
+ Expert Example #${idx + 1}
140
+ ============
141
+ ${Object.keys(example.output).map(
142
+ (key) => `
143
+ \u25A0${key}:\u3010${example.output[key]?.explanation}\u3011:${example.output[key]?.label}\u25A0
144
+ `.trim()
145
+ ).join("\n")}
146
+ ${END}
147
+ `.trim()
148
+ }
149
+ ]).flat();
150
+ const format = Object.keys(labels).map((key) => {
151
+ return `
152
+ \u25A0${key}:\u3010explanation (where "explanation" is answering the question "${labels[key]}")\u3011:x\u25A0 (where x is ${ALL_LABELS})
153
+ `.trim();
154
+ }).join("\n\n");
155
+ const output = await this.callModel({
156
+ stopSequences: [END],
157
+ systemPrompt: `
158
+ You need to tag the input with the following labels based on the question asked:
159
+ ${LABELS.ABSOLUTELY_NOT}: You are absolutely sure that the answer is "NO" to the question.
160
+ ${LABELS.PROBABLY_NOT}: You are leaning towards "NO" to the question.
161
+ ${LABELS.AMBIGUOUS}: You are unsure about the answer to the question.
162
+ ${LABELS.PROBABLY_YES}: You are leaning towards "YES" to the question.
163
+ ${LABELS.ABSOLUTELY_YES}: You are absolutely sure that the answer is "YES" to the question.
164
+
165
+ You need to return a mapping of the labels, an explanation and the answer for each label following the format below:
166
+ \`\`\`
167
+ ${format}
168
+ ${END}
169
+ \`\`\`
170
+
171
+ ${options.instructions}
172
+
173
+ ===
174
+ You should consider the Expert Examples below to help you make your decision.
175
+ In your "Analysis", please refer to the Expert Examples # to justify your decision.
176
+ `.trim(),
177
+ messages: [
178
+ ...allExamples,
179
+ {
180
+ type: "text",
181
+ role: "user",
182
+ content: `
183
+ Input to tag:
184
+ <|start_input|>
185
+ ${inputAsString}
186
+ <|end_input|>
187
+
188
+ Answer with this following format:
189
+ \`\`\`
190
+ ${format}
191
+ ${END}
192
+ \`\`\`
193
+
194
+ Format cheatsheet:
195
+ \`\`\`
196
+ \u25A0label:\u3010explanation\u3011:x\u25A0
197
+ \`\`\`
198
+
199
+ Where \`x\` is one of the following: ${ALL_LABELS}
200
+
201
+ Remember: In your \`explanation\`, please refer to the Expert Examples # (and quote them) that are relevant to ground your decision-making process.
202
+ The Expert Examples are there to help you make your decision. They have been provided by experts in the field and their answers (and reasoning) are considered the ground truth and should be used as a reference to make your decision when applicable.
203
+ For example, you can say: "According to Expert Example #1, ..."`.trim()
204
+ }
205
+ ]
206
+ });
207
+ const answer = output.choices[0].content;
208
+ const final = Object.keys(labels).reduce((acc, key) => {
209
+ const match = answer.match(new RegExp(`\u25A0${key}:\u3010(.+)\u3011:(\\w{2,})\u25A0`, "i"));
210
+ if (match) {
211
+ const explanation = match[1].trim();
212
+ const label = parseLabel(match[2]);
213
+ acc[key] = {
214
+ explanation,
215
+ label
216
+ };
217
+ } else {
218
+ acc[key] = {
219
+ explanation: "",
220
+ label: LABELS.AMBIGUOUS
221
+ };
222
+ }
223
+ return acc;
224
+ }, {});
225
+ if (taskId) {
226
+ await this.adapter.saveExample({
227
+ key: Key,
228
+ taskType,
229
+ taskId,
230
+ instructions: options.instructions ?? "",
231
+ metadata: output.metadata,
232
+ input: inputAsString,
233
+ output: final
234
+ });
235
+ }
236
+ return convertToAnswer(final);
237
+ };