@botpress/zai 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build.ts +9 -0
- package/dist/adapters/botpress-table.js +21 -21
- package/dist/index.d.ts +27 -517
- package/dist/operations/check.js +22 -6
- package/dist/operations/extract.js +28 -8
- package/dist/operations/filter.js +15 -3
- package/dist/operations/label.js +36 -6
- package/dist/operations/rewrite.js +18 -6
- package/dist/operations/summarize.js +6 -5
- package/dist/operations/text.js +4 -3
- package/dist/utils.js +0 -6
- package/dist/zai.js +28 -68
- package/e2e/data/cache.jsonl +118 -0
- package/{src/operations/__tests/index.ts → e2e/utils.ts} +18 -16
- package/package.json +23 -21
- package/src/adapters/adapter.ts +2 -2
- package/src/adapters/botpress-table.ts +36 -36
- package/src/adapters/memory.ts +3 -3
- package/src/operations/check.ts +53 -20
- package/src/operations/errors.ts +1 -1
- package/src/operations/extract.ts +49 -31
- package/src/operations/filter.ts +36 -23
- package/src/operations/label.ts +73 -25
- package/src/operations/rewrite.ts +28 -15
- package/src/operations/summarize.ts +11 -9
- package/src/operations/text.ts +7 -5
- package/src/utils.ts +5 -14
- package/src/zai.ts +45 -91
- package/tsconfig.json +2 -22
- package/dist/models.js +0 -387
- package/src/models.ts +0 -394
- package/src/operations/__tests/cache.jsonl +0 -101
- package/src/sdk-interfaces/llm/generateContent.ts +0 -127
- package/src/sdk-interfaces/llm/listLanguageModels.ts +0 -19
- /package/{src/operations/__tests → e2e/data}/botpress_docs.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
// eslint-disable consistent-type-definitions
|
|
2
|
+
import { z, ZodObject } from '@bpinternal/zui'
|
|
2
3
|
|
|
3
4
|
import JSON5 from 'json5'
|
|
4
5
|
import { jsonrepair } from 'jsonrepair'
|
|
@@ -18,18 +19,13 @@ const Options = z.object({
|
|
|
18
19
|
.max(100_000)
|
|
19
20
|
.optional()
|
|
20
21
|
.describe('The maximum number of tokens per chunk')
|
|
21
|
-
.default(16_000)
|
|
22
|
+
.default(16_000),
|
|
22
23
|
})
|
|
23
24
|
|
|
24
25
|
declare module '@botpress/zai' {
|
|
25
26
|
interface Zai {
|
|
26
27
|
/** Extracts one or many elements from an arbitrary input */
|
|
27
|
-
extract<S extends z.AnyZodObject>(input: unknown, schema: S, options?: Options): Promise<z.
|
|
28
|
-
extract<S extends z.AnyZodObject>(
|
|
29
|
-
input: unknown,
|
|
30
|
-
schema: z.ZodArray<S>,
|
|
31
|
-
options?: Options
|
|
32
|
-
): Promise<Array<z.infer<S>>>
|
|
28
|
+
extract<S extends z.AnyZodObject | z.ZodArray>(input: unknown, schema: S, options?: Options): Promise<z.TypeOf<S>>
|
|
33
29
|
}
|
|
34
30
|
}
|
|
35
31
|
|
|
@@ -40,21 +36,29 @@ const NO_MORE = '■NO_MORE_ELEMENT■'
|
|
|
40
36
|
Zai.prototype.extract = async function (this: Zai, input, schema, _options) {
|
|
41
37
|
const options = Options.parse(_options ?? {})
|
|
42
38
|
const tokenizer = await this.getTokenizer()
|
|
39
|
+
await this.fetchModelDetails()
|
|
43
40
|
|
|
44
41
|
const taskId = this.taskId
|
|
45
42
|
const taskType = 'zai.extract'
|
|
46
43
|
|
|
47
|
-
const PROMPT_COMPONENT = Math.max(this.
|
|
44
|
+
const PROMPT_COMPONENT = Math.max(this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER, 100)
|
|
48
45
|
|
|
49
46
|
let isArrayOfObjects = false
|
|
50
47
|
const originalSchema = schema
|
|
51
48
|
|
|
52
|
-
|
|
49
|
+
const baseType = (schema.naked ? schema.naked() : schema)?.constructor?.name ?? 'unknown'
|
|
50
|
+
|
|
51
|
+
if (baseType === 'ZodObject') {
|
|
53
52
|
// Do nothing
|
|
54
|
-
} else if (
|
|
55
|
-
|
|
53
|
+
} else if (baseType === 'ZodArray') {
|
|
54
|
+
let elementType = (schema as any).element
|
|
55
|
+
if (elementType.naked) {
|
|
56
|
+
elementType = elementType.naked()
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (elementType?.constructor?.name === 'ZodObject') {
|
|
56
60
|
isArrayOfObjects = true
|
|
57
|
-
schema =
|
|
61
|
+
schema = elementType
|
|
58
62
|
} else {
|
|
59
63
|
throw new Error('Schema must be a ZodObject or a ZodArray<ZodObject>')
|
|
60
64
|
}
|
|
@@ -65,9 +69,12 @@ Zai.prototype.extract = async function (this: Zai, input, schema, _options) {
|
|
|
65
69
|
const schemaTypescript = schema.toTypescript({ declaration: false })
|
|
66
70
|
const schemaLength = tokenizer.count(schemaTypescript)
|
|
67
71
|
|
|
68
|
-
options.chunkLength = Math.min(
|
|
72
|
+
options.chunkLength = Math.min(
|
|
73
|
+
options.chunkLength,
|
|
74
|
+
this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER - schemaLength
|
|
75
|
+
)
|
|
69
76
|
|
|
70
|
-
const keys = Object.keys(schema.shape)
|
|
77
|
+
const keys = Object.keys((schema as ZodObject).shape)
|
|
71
78
|
|
|
72
79
|
let inputAsString = stringify(input)
|
|
73
80
|
|
|
@@ -116,7 +123,7 @@ Zai.prototype.extract = async function (this: Zai, input, schema, _options) {
|
|
|
116
123
|
taskType,
|
|
117
124
|
taskId,
|
|
118
125
|
input: inputAsString,
|
|
119
|
-
instructions: options.instructions
|
|
126
|
+
instructions: options.instructions,
|
|
120
127
|
})
|
|
121
128
|
)
|
|
122
129
|
|
|
@@ -124,7 +131,7 @@ Zai.prototype.extract = async function (this: Zai, input, schema, _options) {
|
|
|
124
131
|
? await this.adapter.getExamples<string, unknown>({
|
|
125
132
|
input: inputAsString,
|
|
126
133
|
taskType,
|
|
127
|
-
taskId
|
|
134
|
+
taskId,
|
|
128
135
|
})
|
|
129
136
|
: []
|
|
130
137
|
|
|
@@ -144,13 +151,13 @@ The end.`,
|
|
|
144
151
|
extracted: [
|
|
145
152
|
{
|
|
146
153
|
name: 'Alice',
|
|
147
|
-
age: 30
|
|
154
|
+
age: 30,
|
|
148
155
|
},
|
|
149
156
|
{
|
|
150
157
|
name: 'Bob',
|
|
151
|
-
age: 25
|
|
152
|
-
}
|
|
153
|
-
]
|
|
158
|
+
age: 25,
|
|
159
|
+
},
|
|
160
|
+
],
|
|
154
161
|
}
|
|
155
162
|
: {
|
|
156
163
|
input: `The story goes as follow.
|
|
@@ -158,14 +165,14 @@ Once upon a time, there was a person named Alice who was 30 years old.
|
|
|
158
165
|
The end.`,
|
|
159
166
|
schema: '{ name: string, age: number }',
|
|
160
167
|
instructions: 'Extract the person',
|
|
161
|
-
extracted: { name: 'Alice', age: 30 }
|
|
168
|
+
extracted: { name: 'Alice', age: 30 },
|
|
162
169
|
}
|
|
163
170
|
|
|
164
171
|
const userExamples = examples.map((e) => ({
|
|
165
172
|
input: e.input,
|
|
166
173
|
extracted: e.output,
|
|
167
174
|
schema: schemaTypescript,
|
|
168
|
-
instructions: options.instructions
|
|
175
|
+
instructions: options.instructions,
|
|
169
176
|
}))
|
|
170
177
|
|
|
171
178
|
let exampleId = 1
|
|
@@ -211,13 +218,13 @@ ${END}`.trim()
|
|
|
211
218
|
{
|
|
212
219
|
type: 'text' as const,
|
|
213
220
|
content: formatInput(stringify(example.input ?? null), example.schema, example.instructions),
|
|
214
|
-
role: 'user' as const
|
|
221
|
+
role: 'user' as const,
|
|
215
222
|
},
|
|
216
223
|
{
|
|
217
224
|
type: 'text' as const,
|
|
218
225
|
content: formatOutput(example.extracted),
|
|
219
|
-
role: 'assistant' as const
|
|
220
|
-
}
|
|
226
|
+
role: 'assistant' as const,
|
|
227
|
+
},
|
|
221
228
|
]
|
|
222
229
|
|
|
223
230
|
const allExamples = takeUntilTokens(
|
|
@@ -228,7 +235,7 @@ ${END}`.trim()
|
|
|
228
235
|
.map(formatExample)
|
|
229
236
|
.flat()
|
|
230
237
|
|
|
231
|
-
const output = await this.callModel({
|
|
238
|
+
const { output, meta } = await this.callModel({
|
|
232
239
|
systemPrompt: `
|
|
233
240
|
Extract the following information from the input:
|
|
234
241
|
${schemaTypescript}
|
|
@@ -242,9 +249,9 @@ ${instructions.map((x) => `• ${x}`).join('\n')}
|
|
|
242
249
|
{
|
|
243
250
|
role: 'user',
|
|
244
251
|
type: 'text',
|
|
245
|
-
content: formatInput(inputAsString, schemaTypescript, options.instructions ?? '')
|
|
246
|
-
}
|
|
247
|
-
]
|
|
252
|
+
content: formatInput(inputAsString, schemaTypescript, options.instructions ?? ''),
|
|
253
|
+
},
|
|
254
|
+
],
|
|
248
255
|
})
|
|
249
256
|
|
|
250
257
|
const answer = output.choices[0]?.content as string
|
|
@@ -283,7 +290,18 @@ ${instructions.map((x) => `• ${x}`).join('\n')}
|
|
|
283
290
|
instructions: options.instructions ?? 'No specific instructions',
|
|
284
291
|
input: inputAsString,
|
|
285
292
|
output: final,
|
|
286
|
-
metadata:
|
|
293
|
+
metadata: {
|
|
294
|
+
cost: {
|
|
295
|
+
input: meta.cost.input,
|
|
296
|
+
output: meta.cost.output,
|
|
297
|
+
},
|
|
298
|
+
latency: meta.latency,
|
|
299
|
+
model: this.Model,
|
|
300
|
+
tokens: {
|
|
301
|
+
input: meta.tokens.input,
|
|
302
|
+
output: meta.tokens.output,
|
|
303
|
+
},
|
|
304
|
+
},
|
|
287
305
|
})
|
|
288
306
|
}
|
|
289
307
|
|
package/src/operations/filter.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
// eslint-disable consistent-type-definitions
|
|
1
2
|
import { z } from '@bpinternal/zui'
|
|
2
3
|
|
|
3
4
|
import { clamp } from 'lodash-es'
|
|
@@ -9,7 +10,7 @@ type Example = z.input<typeof Example>
|
|
|
9
10
|
const Example = z.object({
|
|
10
11
|
input: z.any(),
|
|
11
12
|
filter: z.boolean(),
|
|
12
|
-
reason: z.string().optional()
|
|
13
|
+
reason: z.string().optional(),
|
|
13
14
|
})
|
|
14
15
|
|
|
15
16
|
export type Options = z.input<typeof Options>
|
|
@@ -21,7 +22,7 @@ const Options = z.object({
|
|
|
21
22
|
.optional()
|
|
22
23
|
.describe('The maximum number of tokens per item')
|
|
23
24
|
.default(250),
|
|
24
|
-
examples: z.array(Example).describe('Examples to filter the condition against').default([])
|
|
25
|
+
examples: z.array(Example).describe('Examples to filter the condition against').default([]),
|
|
25
26
|
})
|
|
26
27
|
|
|
27
28
|
declare module '@botpress/zai' {
|
|
@@ -36,12 +37,13 @@ const END = '■END■'
|
|
|
36
37
|
Zai.prototype.filter = async function (this: Zai, input, condition, _options) {
|
|
37
38
|
const options = Options.parse(_options ?? {})
|
|
38
39
|
const tokenizer = await this.getTokenizer()
|
|
40
|
+
await this.fetchModelDetails()
|
|
39
41
|
|
|
40
42
|
const taskId = this.taskId
|
|
41
43
|
const taskType = 'zai.filter'
|
|
42
44
|
|
|
43
45
|
const MAX_ITEMS_PER_CHUNK = 50
|
|
44
|
-
const TOKENS_TOTAL_MAX = this.
|
|
46
|
+
const TOKENS_TOTAL_MAX = this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER
|
|
45
47
|
const TOKENS_EXAMPLES_MAX = Math.floor(Math.max(250, TOKENS_TOTAL_MAX * 0.5))
|
|
46
48
|
const TOKENS_CONDITION_MAX = clamp(TOKENS_TOTAL_MAX * 0.25, 250, tokenizer.count(condition))
|
|
47
49
|
const TOKENS_INPUT_ARRAY_MAX = TOKENS_TOTAL_MAX - TOKENS_EXAMPLES_MAX - TOKENS_CONDITION_MAX
|
|
@@ -99,36 +101,36 @@ ${examples.map((x, idx) => `■${idx}:${!!x.filter ? 'true' : 'false'}:${x.reaso
|
|
|
99
101
|
{
|
|
100
102
|
input: 'apple',
|
|
101
103
|
filter: true,
|
|
102
|
-
reason: 'Apples are fruits'
|
|
104
|
+
reason: 'Apples are fruits',
|
|
103
105
|
},
|
|
104
106
|
{
|
|
105
107
|
input: 'Apple Inc.',
|
|
106
108
|
filter: false,
|
|
107
|
-
reason: 'Apple Inc. is a company, not a fruit'
|
|
109
|
+
reason: 'Apple Inc. is a company, not a fruit',
|
|
108
110
|
},
|
|
109
111
|
{
|
|
110
112
|
input: 'banana',
|
|
111
113
|
filter: true,
|
|
112
|
-
reason: 'Bananas are fruits'
|
|
114
|
+
reason: 'Bananas are fruits',
|
|
113
115
|
},
|
|
114
116
|
{
|
|
115
117
|
input: 'potato',
|
|
116
118
|
filter: false,
|
|
117
|
-
reason: 'Potatoes are vegetables'
|
|
118
|
-
}
|
|
119
|
+
reason: 'Potatoes are vegetables',
|
|
120
|
+
},
|
|
119
121
|
]
|
|
120
122
|
|
|
121
123
|
const genericExamplesMessages = [
|
|
122
124
|
{
|
|
123
125
|
type: 'text' as const,
|
|
124
126
|
content: formatInput(genericExamples, 'is a fruit'),
|
|
125
|
-
role: 'user' as const
|
|
127
|
+
role: 'user' as const,
|
|
126
128
|
},
|
|
127
129
|
{
|
|
128
130
|
type: 'text' as const,
|
|
129
131
|
content: formatExamples(genericExamples),
|
|
130
|
-
role: 'assistant' as const
|
|
131
|
-
}
|
|
132
|
+
role: 'assistant' as const,
|
|
133
|
+
},
|
|
132
134
|
]
|
|
133
135
|
|
|
134
136
|
const filterChunk = async (chunk: typeof input) => {
|
|
@@ -138,10 +140,10 @@ ${examples.map((x, idx) => `■${idx}:${!!x.filter ? 'true' : 'false'}:${x.reaso
|
|
|
138
140
|
// The Table API can't search for a huge input string
|
|
139
141
|
input: JSON.stringify(chunk).slice(0, 1000),
|
|
140
142
|
taskType,
|
|
141
|
-
taskId
|
|
143
|
+
taskId,
|
|
142
144
|
})
|
|
143
145
|
.then((x) =>
|
|
144
|
-
x.map((y) => ({ filter: y.output as boolean, input: y.input, reason: y.explanation } satisfies Example)
|
|
146
|
+
x.map((y) => ({ filter: y.output as boolean, input: y.input, reason: y.explanation }) satisfies Example)
|
|
145
147
|
)
|
|
146
148
|
: []
|
|
147
149
|
|
|
@@ -153,16 +155,16 @@ ${examples.map((x, idx) => `■${idx}:${!!x.filter ? 'true' : 'false'}:${x.reaso
|
|
|
153
155
|
{
|
|
154
156
|
type: 'text' as const,
|
|
155
157
|
content: formatInput(allExamples, condition),
|
|
156
|
-
role: 'user' as const
|
|
158
|
+
role: 'user' as const,
|
|
157
159
|
},
|
|
158
160
|
{
|
|
159
161
|
type: 'text' as const,
|
|
160
162
|
content: formatExamples(allExamples),
|
|
161
|
-
role: 'assistant' as const
|
|
162
|
-
}
|
|
163
|
+
role: 'assistant' as const,
|
|
164
|
+
},
|
|
163
165
|
]
|
|
164
166
|
|
|
165
|
-
const output = await this.callModel({
|
|
167
|
+
const { output, meta } = await this.callModel({
|
|
166
168
|
systemPrompt: `
|
|
167
169
|
You are given a list of items. Your task is to filter out the items that meet the condition below.
|
|
168
170
|
You need to return the full list of items with the format:
|
|
@@ -179,12 +181,12 @@ The condition is: "${condition}"
|
|
|
179
181
|
{
|
|
180
182
|
type: 'text',
|
|
181
183
|
content: formatInput(
|
|
182
|
-
chunk.map((x) => ({ input: x } as Example)
|
|
184
|
+
chunk.map((x) => ({ input: x }) as Example),
|
|
183
185
|
condition
|
|
184
186
|
),
|
|
185
|
-
role: 'user'
|
|
186
|
-
}
|
|
187
|
-
]
|
|
187
|
+
role: 'user',
|
|
188
|
+
},
|
|
189
|
+
],
|
|
188
190
|
})
|
|
189
191
|
|
|
190
192
|
const answer = output.choices[0]?.content as string
|
|
@@ -207,7 +209,7 @@ The condition is: "${condition}"
|
|
|
207
209
|
taskId,
|
|
208
210
|
taskType,
|
|
209
211
|
input: JSON.stringify(chunk),
|
|
210
|
-
condition
|
|
212
|
+
condition,
|
|
211
213
|
})
|
|
212
214
|
)
|
|
213
215
|
|
|
@@ -218,7 +220,18 @@ The condition is: "${condition}"
|
|
|
218
220
|
input: JSON.stringify(chunk),
|
|
219
221
|
output: partial,
|
|
220
222
|
instructions: condition,
|
|
221
|
-
metadata:
|
|
223
|
+
metadata: {
|
|
224
|
+
cost: {
|
|
225
|
+
input: meta.cost.input,
|
|
226
|
+
output: meta.cost.output,
|
|
227
|
+
},
|
|
228
|
+
latency: meta.latency,
|
|
229
|
+
model: this.Model,
|
|
230
|
+
tokens: {
|
|
231
|
+
input: meta.tokens.input,
|
|
232
|
+
output: meta.tokens.output,
|
|
233
|
+
},
|
|
234
|
+
},
|
|
222
235
|
})
|
|
223
236
|
}
|
|
224
237
|
|
package/src/operations/label.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
// eslint-disable consistent-type-definitions
|
|
1
2
|
import { z } from '@bpinternal/zui'
|
|
2
3
|
|
|
3
4
|
import { clamp, chunk } from 'lodash-es'
|
|
@@ -11,8 +12,9 @@ const LABELS = {
|
|
|
11
12
|
PROBABLY_NOT: 'PROBABLY_NOT',
|
|
12
13
|
AMBIGUOUS: 'AMBIGUOUS',
|
|
13
14
|
PROBABLY_YES: 'PROBABLY_YES',
|
|
14
|
-
ABSOLUTELY_YES: 'ABSOLUTELY_YES'
|
|
15
|
+
ABSOLUTELY_YES: 'ABSOLUTELY_YES',
|
|
15
16
|
} as const
|
|
17
|
+
|
|
16
18
|
const ALL_LABELS = Object.values(LABELS).join(' | ')
|
|
17
19
|
|
|
18
20
|
type Example<T extends string> = {
|
|
@@ -29,7 +31,7 @@ const Options = z.object({
|
|
|
29
31
|
.array(
|
|
30
32
|
z.object({
|
|
31
33
|
input: z.any(),
|
|
32
|
-
labels: z.record(z.object({ label: z.enum(ALL_LABELS as never), explanation: z.string().optional() }))
|
|
34
|
+
labels: z.record(z.object({ label: z.enum(ALL_LABELS as never), explanation: z.string().optional() })),
|
|
33
35
|
})
|
|
34
36
|
)
|
|
35
37
|
.default([])
|
|
@@ -41,7 +43,7 @@ const Options = z.object({
|
|
|
41
43
|
.max(100_000)
|
|
42
44
|
.optional()
|
|
43
45
|
.describe('The maximum number of tokens per chunk')
|
|
44
|
-
.default(16_000)
|
|
46
|
+
.default(16_000),
|
|
45
47
|
})
|
|
46
48
|
|
|
47
49
|
type Labels<T extends string> = Record<T, string>
|
|
@@ -61,7 +63,7 @@ const Labels = z.record(z.string().min(1).max(250), z.string()).superRefine((lab
|
|
|
61
63
|
if (/[^a-zA-Z0-9_]/.test(key)) {
|
|
62
64
|
ctx.addIssue({
|
|
63
65
|
message: `The label key "${key}" must only contain alphanumeric characters and underscores`,
|
|
64
|
-
code: 'custom'
|
|
66
|
+
code: 'custom',
|
|
65
67
|
})
|
|
66
68
|
}
|
|
67
69
|
}
|
|
@@ -77,7 +79,11 @@ declare module '@botpress/zai' {
|
|
|
77
79
|
labels: Labels<T>,
|
|
78
80
|
options?: Options<T>
|
|
79
81
|
): Promise<{
|
|
80
|
-
[K in T]:
|
|
82
|
+
[K in T]: {
|
|
83
|
+
explanation: string
|
|
84
|
+
value: boolean
|
|
85
|
+
confidence: number
|
|
86
|
+
}
|
|
81
87
|
}>
|
|
82
88
|
}
|
|
83
89
|
}
|
|
@@ -99,15 +105,30 @@ const parseLabel = (label: string): Label => {
|
|
|
99
105
|
return LABELS.AMBIGUOUS
|
|
100
106
|
}
|
|
101
107
|
|
|
108
|
+
const getConfidence = (label: Label) => {
|
|
109
|
+
switch (label) {
|
|
110
|
+
case LABELS.ABSOLUTELY_NOT:
|
|
111
|
+
case LABELS.ABSOLUTELY_YES:
|
|
112
|
+
return 1
|
|
113
|
+
|
|
114
|
+
case LABELS.PROBABLY_NOT:
|
|
115
|
+
case LABELS.PROBABLY_YES:
|
|
116
|
+
return 0.5
|
|
117
|
+
default:
|
|
118
|
+
return 0
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
102
122
|
Zai.prototype.label = async function <T extends string>(this: Zai, input, _labels, _options) {
|
|
103
123
|
const options = Options.parse(_options ?? {})
|
|
104
124
|
const labels = Labels.parse(_labels)
|
|
105
125
|
const tokenizer = await this.getTokenizer()
|
|
126
|
+
await this.fetchModelDetails()
|
|
106
127
|
|
|
107
128
|
const taskId = this.taskId
|
|
108
129
|
const taskType = 'zai.label'
|
|
109
130
|
|
|
110
|
-
const TOTAL_MAX_TOKENS = clamp(options.chunkLength, 1000, this.
|
|
131
|
+
const TOTAL_MAX_TOKENS = clamp(options.chunkLength, 1000, this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER)
|
|
111
132
|
const CHUNK_EXAMPLES_MAX_TOKENS = clamp(Math.floor(TOTAL_MAX_TOKENS * 0.5), 250, 10_000)
|
|
112
133
|
const CHUNK_INPUT_MAX_TOKENS = clamp(
|
|
113
134
|
TOTAL_MAX_TOKENS - CHUNK_EXAMPLES_MAX_TOKENS,
|
|
@@ -125,15 +146,21 @@ Zai.prototype.label = async function <T extends string>(this: Zai, input, _label
|
|
|
125
146
|
// Merge all the labels together (those who are true will remain true)
|
|
126
147
|
return allLabels.reduce((acc, x) => {
|
|
127
148
|
Object.keys(x).forEach((key) => {
|
|
128
|
-
if (acc[key] === true) {
|
|
129
|
-
acc[key] =
|
|
149
|
+
if (acc[key]?.value === true) {
|
|
150
|
+
acc[key] = acc[key]
|
|
151
|
+
} else if (x[key]?.value === true) {
|
|
152
|
+
acc[key] = x[key]
|
|
130
153
|
} else {
|
|
131
154
|
acc[key] = acc[key] || x[key]
|
|
132
155
|
}
|
|
133
156
|
})
|
|
134
157
|
return acc
|
|
135
158
|
}, {}) as {
|
|
136
|
-
[K in T]:
|
|
159
|
+
[K in T]: {
|
|
160
|
+
explanation: string
|
|
161
|
+
value: boolean
|
|
162
|
+
confidence: number
|
|
163
|
+
}
|
|
137
164
|
}
|
|
138
165
|
}
|
|
139
166
|
|
|
@@ -144,15 +171,25 @@ Zai.prototype.label = async function <T extends string>(this: Zai, input, _label
|
|
|
144
171
|
taskType,
|
|
145
172
|
taskId,
|
|
146
173
|
input: inputAsString,
|
|
147
|
-
instructions: options.instructions ?? ''
|
|
174
|
+
instructions: options.instructions ?? '',
|
|
148
175
|
})
|
|
149
176
|
)
|
|
150
177
|
|
|
151
178
|
const convertToAnswer = (mapping: { [K in T]: { explanation: string; label: Label } }) => {
|
|
152
179
|
return Object.keys(labels).reduce((acc, key) => {
|
|
153
|
-
acc[key] =
|
|
180
|
+
acc[key] = {
|
|
181
|
+
explanation: mapping[key]?.explanation ?? '',
|
|
182
|
+
value: mapping[key]?.label === LABELS.ABSOLUTELY_YES || mapping[key]?.label === LABELS.PROBABLY_YES,
|
|
183
|
+
confidence: getConfidence(mapping[key]?.label),
|
|
184
|
+
}
|
|
154
185
|
return acc
|
|
155
|
-
}, {}) as {
|
|
186
|
+
}, {}) as {
|
|
187
|
+
[K in T]: {
|
|
188
|
+
explanation: string
|
|
189
|
+
value: boolean
|
|
190
|
+
confidence: number
|
|
191
|
+
}
|
|
192
|
+
}
|
|
156
193
|
}
|
|
157
194
|
|
|
158
195
|
const examples = taskId
|
|
@@ -167,7 +204,7 @@ Zai.prototype.label = async function <T extends string>(this: Zai, input, _label
|
|
|
167
204
|
>({
|
|
168
205
|
input: inputAsString,
|
|
169
206
|
taskType,
|
|
170
|
-
taskId
|
|
207
|
+
taskId,
|
|
171
208
|
})
|
|
172
209
|
: []
|
|
173
210
|
|
|
@@ -182,7 +219,7 @@ Zai.prototype.label = async function <T extends string>(this: Zai, input, _label
|
|
|
182
219
|
explanation: string
|
|
183
220
|
label: Label
|
|
184
221
|
}
|
|
185
|
-
}
|
|
222
|
+
},
|
|
186
223
|
})
|
|
187
224
|
})
|
|
188
225
|
|
|
@@ -209,7 +246,7 @@ Expert Example #${idx + 1}
|
|
|
209
246
|
|
|
210
247
|
<|start_input|>
|
|
211
248
|
${stringify(example.input)}
|
|
212
|
-
<|end_input|>`.trim()
|
|
249
|
+
<|end_input|>`.trim(),
|
|
213
250
|
},
|
|
214
251
|
{
|
|
215
252
|
type: 'text' as const,
|
|
@@ -225,8 +262,8 @@ ${Object.keys(example.output)
|
|
|
225
262
|
)
|
|
226
263
|
.join('\n')}
|
|
227
264
|
${END}
|
|
228
|
-
`.trim()
|
|
229
|
-
}
|
|
265
|
+
`.trim(),
|
|
266
|
+
},
|
|
230
267
|
])
|
|
231
268
|
.flat()
|
|
232
269
|
|
|
@@ -238,7 +275,7 @@ ${END}
|
|
|
238
275
|
})
|
|
239
276
|
.join('\n\n')
|
|
240
277
|
|
|
241
|
-
const output = await this.callModel({
|
|
278
|
+
const { output, meta } = await this.callModel({
|
|
242
279
|
stopSequences: [END],
|
|
243
280
|
systemPrompt: `
|
|
244
281
|
You need to tag the input with the following labels based on the question asked:
|
|
@@ -286,9 +323,9 @@ Where \`x\` is one of the following: ${ALL_LABELS}
|
|
|
286
323
|
|
|
287
324
|
Remember: In your \`explanation\`, please refer to the Expert Examples # (and quote them) that are relevant to ground your decision-making process.
|
|
288
325
|
The Expert Examples are there to help you make your decision. They have been provided by experts in the field and their answers (and reasoning) are considered the ground truth and should be used as a reference to make your decision when applicable.
|
|
289
|
-
For example, you can say: "According to Expert Example #1, ..."`.trim()
|
|
290
|
-
}
|
|
291
|
-
]
|
|
326
|
+
For example, you can say: "According to Expert Example #1, ..."`.trim(),
|
|
327
|
+
},
|
|
328
|
+
],
|
|
292
329
|
})
|
|
293
330
|
|
|
294
331
|
const answer = output.choices[0].content as string
|
|
@@ -300,12 +337,12 @@ For example, you can say: "According to Expert Example #1, ..."`.trim()
|
|
|
300
337
|
const label = parseLabel(match[2])
|
|
301
338
|
acc[key] = {
|
|
302
339
|
explanation,
|
|
303
|
-
label
|
|
340
|
+
label,
|
|
304
341
|
}
|
|
305
342
|
} else {
|
|
306
343
|
acc[key] = {
|
|
307
344
|
explanation: '',
|
|
308
|
-
label: LABELS.AMBIGUOUS
|
|
345
|
+
label: LABELS.AMBIGUOUS,
|
|
309
346
|
}
|
|
310
347
|
}
|
|
311
348
|
return acc
|
|
@@ -322,9 +359,20 @@ For example, you can say: "According to Expert Example #1, ..."`.trim()
|
|
|
322
359
|
taskType,
|
|
323
360
|
taskId,
|
|
324
361
|
instructions: options.instructions ?? '',
|
|
325
|
-
metadata:
|
|
362
|
+
metadata: {
|
|
363
|
+
cost: {
|
|
364
|
+
input: meta.cost.input,
|
|
365
|
+
output: meta.cost.output,
|
|
366
|
+
},
|
|
367
|
+
latency: meta.latency,
|
|
368
|
+
model: this.Model,
|
|
369
|
+
tokens: {
|
|
370
|
+
input: meta.tokens.input,
|
|
371
|
+
output: meta.tokens.output,
|
|
372
|
+
},
|
|
373
|
+
},
|
|
326
374
|
input: inputAsString,
|
|
327
|
-
output: final
|
|
375
|
+
output: final,
|
|
328
376
|
})
|
|
329
377
|
}
|
|
330
378
|
|