@botpress/zai 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/adapter.js +2 -0
- package/dist/adapters/botpress-table.js +168 -0
- package/dist/adapters/memory.js +12 -0
- package/dist/index.d.ts +99 -98
- package/dist/index.js +9 -1873
- package/dist/models.js +387 -0
- package/dist/operations/check.js +141 -0
- package/dist/operations/constants.js +2 -0
- package/dist/operations/errors.js +15 -0
- package/dist/operations/extract.js +212 -0
- package/dist/operations/filter.js +179 -0
- package/dist/operations/label.js +237 -0
- package/dist/operations/rewrite.js +111 -0
- package/dist/operations/summarize.js +132 -0
- package/dist/operations/text.js +46 -0
- package/dist/utils.js +43 -0
- package/dist/zai.js +140 -0
- package/package.json +21 -19
- package/src/adapters/adapter.ts +35 -0
- package/src/adapters/botpress-table.ts +210 -0
- package/src/adapters/memory.ts +13 -0
- package/src/index.ts +11 -0
- package/src/models.ts +394 -0
- package/src/operations/__tests/botpress_docs.txt +26040 -0
- package/src/operations/__tests/cache.jsonl +101 -0
- package/src/operations/__tests/index.ts +87 -0
- package/src/operations/check.ts +187 -0
- package/src/operations/constants.ts +2 -0
- package/src/operations/errors.ts +9 -0
- package/src/operations/extract.ts +291 -0
- package/src/operations/filter.ts +231 -0
- package/src/operations/label.ts +332 -0
- package/src/operations/rewrite.ts +148 -0
- package/src/operations/summarize.ts +193 -0
- package/src/operations/text.ts +63 -0
- package/src/sdk-interfaces/llm/generateContent.ts +127 -0
- package/src/sdk-interfaces/llm/listLanguageModels.ts +19 -0
- package/src/utils.ts +61 -0
- package/src/zai.ts +193 -0
- package/tsconfig.json +2 -2
- package/dist/index.cjs +0 -1903
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.cts +0 -916
- package/dist/index.js.map +0 -1
- package/tsup.config.ts +0 -16
- package/vitest.config.ts +0 -9
- package/vitest.setup.ts +0 -24
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { Client } from '@botpress/client'
|
|
2
|
+
import { type TextTokenizer, getWasmTokenizer } from '@botpress/wasm'
|
|
3
|
+
|
|
4
|
+
import fs from 'node:fs'
|
|
5
|
+
import path from 'node:path'
|
|
6
|
+
import { beforeAll } from 'vitest'
|
|
7
|
+
|
|
8
|
+
import { Zai } from '../..'
|
|
9
|
+
|
|
10
|
+
import { fastHash } from '../../utils'
|
|
11
|
+
|
|
12
|
+
export const getClient = () => {
|
|
13
|
+
return new Client({
|
|
14
|
+
apiUrl: process.env.CLOUD_API_ENDPOINT ?? 'https://api.botpress.dev',
|
|
15
|
+
botId: process.env.CLOUD_BOT_ID,
|
|
16
|
+
token: process.env.CLOUD_PAT
|
|
17
|
+
})
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function readJSONL<T>(filePath: string, keyProperty: keyof T): Map<string, T> {
|
|
21
|
+
const lines = fs.readFileSync(filePath, 'utf-8').split(/\r?\n/).filter(Boolean)
|
|
22
|
+
|
|
23
|
+
const map = new Map<string, T>()
|
|
24
|
+
|
|
25
|
+
for (const line of lines) {
|
|
26
|
+
const obj = JSON.parse(line) as T
|
|
27
|
+
const key = String(obj[keyProperty])
|
|
28
|
+
map.set(key, obj)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return map
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const cache: Map<string, { key: string; value: any }> = readJSONL(
|
|
35
|
+
path.resolve(import.meta.dirname, './cache.jsonl'),
|
|
36
|
+
'key'
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
export const getCachedClient = () => {
|
|
40
|
+
const client = getClient()
|
|
41
|
+
|
|
42
|
+
const proxy = new Proxy(client, {
|
|
43
|
+
get(target, prop) {
|
|
44
|
+
if (prop === 'callAction') {
|
|
45
|
+
return async (...args: Parameters<Client['callAction']>) => {
|
|
46
|
+
const key = fastHash(JSON.stringify(args))
|
|
47
|
+
const cached = cache.get(key)
|
|
48
|
+
|
|
49
|
+
if (cached) {
|
|
50
|
+
return cached.value
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const response = await target.callAction(...args)
|
|
54
|
+
cache.set(key, { key, value: response })
|
|
55
|
+
|
|
56
|
+
fs.appendFileSync(
|
|
57
|
+
path.resolve(import.meta.dirname, './cache.jsonl'),
|
|
58
|
+
JSON.stringify({
|
|
59
|
+
key,
|
|
60
|
+
value: response
|
|
61
|
+
}) + '\n'
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return response
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return Reflect.get(target, prop)
|
|
68
|
+
}
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
return proxy
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export const getZai = () => {
|
|
75
|
+
const client = getCachedClient()
|
|
76
|
+
return new Zai({ client, retry: { maxRetries: 0 } })
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export let tokenizer: TextTokenizer = null!
|
|
80
|
+
|
|
81
|
+
beforeAll(async () => {
|
|
82
|
+
tokenizer = await getWasmTokenizer()
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
export const BotpressDocumentation = fs.readFileSync(path.join(__dirname, './botpress_docs.txt'), 'utf-8').trim()
|
|
86
|
+
|
|
87
|
+
export const metadata = { cost: { input: 1, output: 1 }, latency: 0, model: '', tokens: { input: 1, output: 1 } }
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
2
|
+
|
|
3
|
+
import { fastHash, stringify, takeUntilTokens } from '../utils'
|
|
4
|
+
import { Zai } from '../zai'
|
|
5
|
+
import { PROMPT_INPUT_BUFFER } from './constants'
|
|
6
|
+
|
|
7
|
+
const Example = z.object({
|
|
8
|
+
input: z.any(),
|
|
9
|
+
check: z.boolean(),
|
|
10
|
+
reason: z.string().optional()
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
export type Options = z.input<typeof Options>
|
|
14
|
+
const Options = z.object({
|
|
15
|
+
examples: z.array(Example).describe('Examples to check the condition against').default([])
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
declare module '@botpress/zai' {
|
|
19
|
+
interface Zai {
|
|
20
|
+
/** Checks wether a condition is true or not */
|
|
21
|
+
check(input: unknown, condition: string, options?: Options): Promise<boolean>
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const TRUE = '■TRUE■'
|
|
26
|
+
const FALSE = '■FALSE■'
|
|
27
|
+
const END = '■END■'
|
|
28
|
+
|
|
29
|
+
Zai.prototype.check = async function (this: Zai, input, condition, _options) {
|
|
30
|
+
const options = Options.parse(_options ?? {})
|
|
31
|
+
const tokenizer = await this.getTokenizer()
|
|
32
|
+
const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100)
|
|
33
|
+
|
|
34
|
+
const taskId = this.taskId
|
|
35
|
+
const taskType = 'zai.check'
|
|
36
|
+
|
|
37
|
+
const PROMPT_TOKENS = {
|
|
38
|
+
INPUT: Math.floor(0.5 * PROMPT_COMPONENT),
|
|
39
|
+
CONDITION: Math.floor(0.2 * PROMPT_COMPONENT)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Truncate the input to fit the model's input size
|
|
43
|
+
const inputAsString = tokenizer.truncate(stringify(input), PROMPT_TOKENS.INPUT)
|
|
44
|
+
condition = tokenizer.truncate(condition, PROMPT_TOKENS.CONDITION)
|
|
45
|
+
|
|
46
|
+
// All tokens remaining after the input and condition are accounted can be used for examples
|
|
47
|
+
const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(condition)
|
|
48
|
+
|
|
49
|
+
const Key = fastHash(
|
|
50
|
+
JSON.stringify({
|
|
51
|
+
taskType,
|
|
52
|
+
taskId,
|
|
53
|
+
input: inputAsString,
|
|
54
|
+
condition
|
|
55
|
+
})
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
const examples = taskId
|
|
59
|
+
? await this.adapter.getExamples<string, boolean>({
|
|
60
|
+
input: inputAsString,
|
|
61
|
+
taskType,
|
|
62
|
+
taskId
|
|
63
|
+
})
|
|
64
|
+
: []
|
|
65
|
+
|
|
66
|
+
const exactMatch = examples.find((x) => x.key === Key)
|
|
67
|
+
if (exactMatch) {
|
|
68
|
+
return exactMatch.output
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const defaultExamples = [
|
|
72
|
+
{ input: '50 Cent', check: true, reason: '50 Cent is widely recognized as a public personality.' },
|
|
73
|
+
{
|
|
74
|
+
input: ['apple', 'banana', 'carrot', 'house'],
|
|
75
|
+
check: false,
|
|
76
|
+
reason: 'The list contains a house, which is not a fruit. Also, the list contains a carrot, which is a vegetable.'
|
|
77
|
+
}
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
const userExamples = [
|
|
81
|
+
...examples.map((e) => ({ input: e.input, check: e.output, reason: e.explanation })),
|
|
82
|
+
...options.examples
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
let exampleId = 1
|
|
86
|
+
|
|
87
|
+
const formatInput = (input: string, condition: string) => {
|
|
88
|
+
const header = userExamples.length ? `Expert Example #${exampleId++}` : `Example of condition: "${condition}"`
|
|
89
|
+
|
|
90
|
+
return `
|
|
91
|
+
${header}
|
|
92
|
+
<|start_input|>
|
|
93
|
+
${input.trim()}
|
|
94
|
+
<|end_input|>
|
|
95
|
+
`.trim()
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const formatOutput = (answer: boolean, justification: string) => {
|
|
99
|
+
return `
|
|
100
|
+
Analysis: ${justification}
|
|
101
|
+
Final Answer: ${answer ? TRUE : FALSE}
|
|
102
|
+
${END}
|
|
103
|
+
`.trim()
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const formatExample = (example: { input?: any; check: boolean; reason?: string }) => [
|
|
107
|
+
{ type: 'text' as const, content: formatInput(stringify(example.input ?? null), condition), role: 'user' as const },
|
|
108
|
+
{
|
|
109
|
+
type: 'text' as const,
|
|
110
|
+
content: formatOutput(example.check, example.reason ?? ''),
|
|
111
|
+
role: 'assistant' as const
|
|
112
|
+
}
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
const allExamples = takeUntilTokens(
|
|
116
|
+
userExamples.length ? userExamples : defaultExamples,
|
|
117
|
+
EXAMPLES_TOKENS,
|
|
118
|
+
(el) => tokenizer.count(stringify(el.input)) + tokenizer.count(el.reason ?? '')
|
|
119
|
+
)
|
|
120
|
+
.map(formatExample)
|
|
121
|
+
.flat()
|
|
122
|
+
|
|
123
|
+
const specialInstructions = userExamples.length
|
|
124
|
+
? `
|
|
125
|
+
- You have been provided with examples from previous experts. Make sure to read them carefully before making your decision.
|
|
126
|
+
- Make sure to refer to the examples provided by the experts to justify your decision (when applicable).
|
|
127
|
+
- When in doubt, ground your decision on the examples provided by the experts instead of your own intuition.
|
|
128
|
+
- When no example is similar to the input, make sure to provide a clear justification for your decision while inferring the decision-making process from the examples provided by the experts.
|
|
129
|
+
`.trim()
|
|
130
|
+
: ''
|
|
131
|
+
|
|
132
|
+
const output = await this.callModel({
|
|
133
|
+
systemPrompt: `
|
|
134
|
+
Check if the following condition is true or false for the given input. Before answering, make sure to read the input and the condition carefully.
|
|
135
|
+
Justify your answer, then answer with either ${TRUE} or ${FALSE} at the very end, then add ${END} to finish the response.
|
|
136
|
+
IMPORTANT: Make sure to answer with either ${TRUE} or ${FALSE} at the end of your response, but NOT both.
|
|
137
|
+
---
|
|
138
|
+
Expert Examples (#1 to #${exampleId - 1}):
|
|
139
|
+
${specialInstructions}
|
|
140
|
+
`.trim(),
|
|
141
|
+
stopSequences: [END],
|
|
142
|
+
messages: [
|
|
143
|
+
...allExamples,
|
|
144
|
+
{
|
|
145
|
+
type: 'text',
|
|
146
|
+
content: `
|
|
147
|
+
Considering the below input and above examples, is the following condition true or false?
|
|
148
|
+
${formatInput(inputAsString, condition)}
|
|
149
|
+
In your "Analysis", please refer to the Expert Examples # to justify your decision.`.trim(),
|
|
150
|
+
role: 'user'
|
|
151
|
+
}
|
|
152
|
+
]
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
const answer = output.choices[0]?.content as string
|
|
156
|
+
|
|
157
|
+
const hasTrue = answer.includes(TRUE)
|
|
158
|
+
const hasFalse = answer.includes(FALSE)
|
|
159
|
+
|
|
160
|
+
if (!hasTrue && !hasFalse) {
|
|
161
|
+
throw new Error(`The model did not return a valid answer. The response was: ${answer}`)
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
let finalAnswer: boolean
|
|
165
|
+
|
|
166
|
+
if (hasTrue && hasFalse) {
|
|
167
|
+
// If both TRUE and FALSE are present, we need to check which one was answered last
|
|
168
|
+
finalAnswer = answer.lastIndexOf(TRUE) > answer.lastIndexOf(FALSE)
|
|
169
|
+
} else {
|
|
170
|
+
finalAnswer = hasTrue
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (taskId) {
|
|
174
|
+
await this.adapter.saveExample({
|
|
175
|
+
key: Key,
|
|
176
|
+
taskType,
|
|
177
|
+
taskId,
|
|
178
|
+
input: inputAsString,
|
|
179
|
+
instructions: condition,
|
|
180
|
+
metadata: output.metadata,
|
|
181
|
+
output: finalAnswer,
|
|
182
|
+
explanation: answer.replace(TRUE, '').replace(FALSE, '').replace(END, '').replace('Final Answer:', '').trim()
|
|
183
|
+
})
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return finalAnswer
|
|
187
|
+
}
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
2
|
+
|
|
3
|
+
import JSON5 from 'json5'
|
|
4
|
+
import { jsonrepair } from 'jsonrepair'
|
|
5
|
+
|
|
6
|
+
import { chunk, isArray } from 'lodash-es'
|
|
7
|
+
import { fastHash, stringify, takeUntilTokens } from '../utils'
|
|
8
|
+
import { Zai } from '../zai'
|
|
9
|
+
import { PROMPT_INPUT_BUFFER } from './constants'
|
|
10
|
+
import { JsonParsingError } from './errors'
|
|
11
|
+
|
|
12
|
+
export type Options = z.input<typeof Options>
|
|
13
|
+
const Options = z.object({
|
|
14
|
+
instructions: z.string().optional().describe('Instructions to guide the user on how to extract the data'),
|
|
15
|
+
chunkLength: z
|
|
16
|
+
.number()
|
|
17
|
+
.min(100)
|
|
18
|
+
.max(100_000)
|
|
19
|
+
.optional()
|
|
20
|
+
.describe('The maximum number of tokens per chunk')
|
|
21
|
+
.default(16_000)
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
declare module '@botpress/zai' {
|
|
25
|
+
interface Zai {
|
|
26
|
+
/** Extracts one or many elements from an arbitrary input */
|
|
27
|
+
extract<S extends z.AnyZodObject>(input: unknown, schema: S, options?: Options): Promise<z.infer<S>>
|
|
28
|
+
extract<S extends z.AnyZodObject>(
|
|
29
|
+
input: unknown,
|
|
30
|
+
schema: z.ZodArray<S>,
|
|
31
|
+
options?: Options
|
|
32
|
+
): Promise<Array<z.infer<S>>>
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const START = '■json_start■'
|
|
37
|
+
const END = '■json_end■'
|
|
38
|
+
const NO_MORE = '■NO_MORE_ELEMENT■'
|
|
39
|
+
|
|
40
|
+
Zai.prototype.extract = async function (this: Zai, input, schema, _options) {
|
|
41
|
+
const options = Options.parse(_options ?? {})
|
|
42
|
+
const tokenizer = await this.getTokenizer()
|
|
43
|
+
|
|
44
|
+
const taskId = this.taskId
|
|
45
|
+
const taskType = 'zai.extract'
|
|
46
|
+
|
|
47
|
+
const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100)
|
|
48
|
+
|
|
49
|
+
let isArrayOfObjects = false
|
|
50
|
+
const originalSchema = schema
|
|
51
|
+
|
|
52
|
+
if (schema instanceof z.ZodObject) {
|
|
53
|
+
// Do nothing
|
|
54
|
+
} else if (schema instanceof z.ZodArray) {
|
|
55
|
+
if (schema._def.type instanceof z.ZodObject) {
|
|
56
|
+
isArrayOfObjects = true
|
|
57
|
+
schema = schema._def.type
|
|
58
|
+
} else {
|
|
59
|
+
throw new Error('Schema must be a ZodObject or a ZodArray<ZodObject>')
|
|
60
|
+
}
|
|
61
|
+
} else {
|
|
62
|
+
throw new Error('Schema must be either a ZuiObject or a ZuiArray<ZuiObject>')
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const schemaTypescript = schema.toTypescript({ declaration: false })
|
|
66
|
+
const schemaLength = tokenizer.count(schemaTypescript)
|
|
67
|
+
|
|
68
|
+
options.chunkLength = Math.min(options.chunkLength, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - schemaLength)
|
|
69
|
+
|
|
70
|
+
const keys = Object.keys(schema.shape)
|
|
71
|
+
|
|
72
|
+
let inputAsString = stringify(input)
|
|
73
|
+
|
|
74
|
+
if (tokenizer.count(inputAsString) > options.chunkLength) {
|
|
75
|
+
// If we want to extract an array of objects, we will run this function recursively
|
|
76
|
+
if (isArrayOfObjects) {
|
|
77
|
+
const tokens = tokenizer.split(inputAsString)
|
|
78
|
+
const chunks = chunk(tokens, options.chunkLength).map((x) => x.join(''))
|
|
79
|
+
const all = await Promise.all(chunks.map((chunk) => this.extract(chunk, originalSchema as z.AnyZodObject)))
|
|
80
|
+
|
|
81
|
+
return all.flat()
|
|
82
|
+
} else {
|
|
83
|
+
// Truncate the input to fit the model's input size
|
|
84
|
+
inputAsString = tokenizer.truncate(stringify(input), options.chunkLength)
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const instructions: string[] = []
|
|
89
|
+
|
|
90
|
+
if (options.instructions) {
|
|
91
|
+
instructions.push(options.instructions)
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const shape = `{ ${keys.map((key) => `"${key}": ...`).join(', ')} }`
|
|
95
|
+
const abbv = '{ ... }'
|
|
96
|
+
|
|
97
|
+
if (isArrayOfObjects) {
|
|
98
|
+
instructions.push('You may have multiple elements, or zero elements in the input.')
|
|
99
|
+
instructions.push('You must extract each element separately.')
|
|
100
|
+
instructions.push(`Each element must be a JSON object with exactly the format: ${START}${shape}${END}`)
|
|
101
|
+
instructions.push(`When you are done extracting all elements, type "${NO_MORE}" to finish.`)
|
|
102
|
+
instructions.push(`For example, if you have zero elements, the output should look like this: ${NO_MORE}`)
|
|
103
|
+
instructions.push(
|
|
104
|
+
`For example, if you have two elements, the output should look like this: ${START}${abbv}${END}${START}${abbv}${END}${NO_MORE}`
|
|
105
|
+
)
|
|
106
|
+
} else {
|
|
107
|
+
instructions.push('You may have exactly one element in the input.')
|
|
108
|
+
instructions.push(`The element must be a JSON object with exactly the format: ${START}${shape}${END}`)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// All tokens remaining after the input and condition are accounted can be used for examples
|
|
112
|
+
const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(instructions.join('\n'))
|
|
113
|
+
|
|
114
|
+
const Key = fastHash(
|
|
115
|
+
JSON.stringify({
|
|
116
|
+
taskType,
|
|
117
|
+
taskId,
|
|
118
|
+
input: inputAsString,
|
|
119
|
+
instructions: options.instructions
|
|
120
|
+
})
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
const examples = taskId
|
|
124
|
+
? await this.adapter.getExamples<string, unknown>({
|
|
125
|
+
input: inputAsString,
|
|
126
|
+
taskType,
|
|
127
|
+
taskId
|
|
128
|
+
})
|
|
129
|
+
: []
|
|
130
|
+
|
|
131
|
+
const exactMatch = examples.find((x) => x.key === Key)
|
|
132
|
+
if (exactMatch) {
|
|
133
|
+
return exactMatch.output
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const defaultExample = isArrayOfObjects
|
|
137
|
+
? {
|
|
138
|
+
input: `The story goes as follow.
|
|
139
|
+
Once upon a time, there was a person named Alice who was 30 years old.
|
|
140
|
+
Then, there was a person named Bob who was 25 years old.
|
|
141
|
+
The end.`,
|
|
142
|
+
schema: 'Array<{ name: string, age: number }>',
|
|
143
|
+
instructions: 'Extract all people',
|
|
144
|
+
extracted: [
|
|
145
|
+
{
|
|
146
|
+
name: 'Alice',
|
|
147
|
+
age: 30
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
name: 'Bob',
|
|
151
|
+
age: 25
|
|
152
|
+
}
|
|
153
|
+
]
|
|
154
|
+
}
|
|
155
|
+
: {
|
|
156
|
+
input: `The story goes as follow.
|
|
157
|
+
Once upon a time, there was a person named Alice who was 30 years old.
|
|
158
|
+
The end.`,
|
|
159
|
+
schema: '{ name: string, age: number }',
|
|
160
|
+
instructions: 'Extract the person',
|
|
161
|
+
extracted: { name: 'Alice', age: 30 }
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const userExamples = examples.map((e) => ({
|
|
165
|
+
input: e.input,
|
|
166
|
+
extracted: e.output,
|
|
167
|
+
schema: schemaTypescript,
|
|
168
|
+
instructions: options.instructions
|
|
169
|
+
}))
|
|
170
|
+
|
|
171
|
+
let exampleId = 1
|
|
172
|
+
|
|
173
|
+
const formatInput = (input: string, schema: string, instructions?: string) => {
|
|
174
|
+
const header = userExamples.length
|
|
175
|
+
? `Expert Example #${exampleId++}`
|
|
176
|
+
: "Here's an example to help you understand the format:"
|
|
177
|
+
|
|
178
|
+
return `
|
|
179
|
+
${header}
|
|
180
|
+
|
|
181
|
+
<|start_schema|>
|
|
182
|
+
${schema}
|
|
183
|
+
<|end_schema|>
|
|
184
|
+
|
|
185
|
+
<|start_instructions|>
|
|
186
|
+
${instructions ?? 'No specific instructions, just follow the schema above.'}
|
|
187
|
+
<|end_instructions|>
|
|
188
|
+
|
|
189
|
+
<|start_input|>
|
|
190
|
+
${input.trim()}
|
|
191
|
+
<|end_input|>
|
|
192
|
+
`.trim()
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const formatOutput = (extracted: any) => {
|
|
196
|
+
extracted = isArray(extracted) ? extracted : [extracted]
|
|
197
|
+
|
|
198
|
+
return (
|
|
199
|
+
extracted
|
|
200
|
+
.map((x: string) =>
|
|
201
|
+
`
|
|
202
|
+
${START}
|
|
203
|
+
${JSON.stringify(x, null, 2)}
|
|
204
|
+
${END}`.trim()
|
|
205
|
+
)
|
|
206
|
+
.join('\n') + NO_MORE
|
|
207
|
+
)
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const formatExample = (example: { input?: any; schema: string; instructions?: string; extracted: any }) => [
|
|
211
|
+
{
|
|
212
|
+
type: 'text' as const,
|
|
213
|
+
content: formatInput(stringify(example.input ?? null), example.schema, example.instructions),
|
|
214
|
+
role: 'user' as const
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
type: 'text' as const,
|
|
218
|
+
content: formatOutput(example.extracted),
|
|
219
|
+
role: 'assistant' as const
|
|
220
|
+
}
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
const allExamples = takeUntilTokens(
|
|
224
|
+
userExamples.length ? userExamples : [defaultExample],
|
|
225
|
+
EXAMPLES_TOKENS,
|
|
226
|
+
(el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.extracted))
|
|
227
|
+
)
|
|
228
|
+
.map(formatExample)
|
|
229
|
+
.flat()
|
|
230
|
+
|
|
231
|
+
const output = await this.callModel({
|
|
232
|
+
systemPrompt: `
|
|
233
|
+
Extract the following information from the input:
|
|
234
|
+
${schemaTypescript}
|
|
235
|
+
====
|
|
236
|
+
|
|
237
|
+
${instructions.map((x) => `• ${x}`).join('\n')}
|
|
238
|
+
`.trim(),
|
|
239
|
+
stopSequences: [isArrayOfObjects ? NO_MORE : END],
|
|
240
|
+
messages: [
|
|
241
|
+
...allExamples,
|
|
242
|
+
{
|
|
243
|
+
role: 'user',
|
|
244
|
+
type: 'text',
|
|
245
|
+
content: formatInput(inputAsString, schemaTypescript, options.instructions ?? '')
|
|
246
|
+
}
|
|
247
|
+
]
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
const answer = output.choices[0]?.content as string
|
|
251
|
+
|
|
252
|
+
const elements = answer
|
|
253
|
+
.split(START)
|
|
254
|
+
.filter((x) => x.trim().length > 0)
|
|
255
|
+
.map((x) => {
|
|
256
|
+
try {
|
|
257
|
+
const json = x.slice(0, x.indexOf(END)).trim()
|
|
258
|
+
const repairedJson = jsonrepair(json)
|
|
259
|
+
const parsedJson = JSON5.parse(repairedJson)
|
|
260
|
+
|
|
261
|
+
return schema.parse(parsedJson)
|
|
262
|
+
} catch (error) {
|
|
263
|
+
throw new JsonParsingError(x, error instanceof Error ? error : new Error('Unknown error'))
|
|
264
|
+
}
|
|
265
|
+
})
|
|
266
|
+
.filter((x) => x !== null)
|
|
267
|
+
|
|
268
|
+
let final: any
|
|
269
|
+
|
|
270
|
+
if (isArrayOfObjects) {
|
|
271
|
+
final = elements
|
|
272
|
+
} else if (elements.length === 0) {
|
|
273
|
+
final = schema.parse({})
|
|
274
|
+
} else {
|
|
275
|
+
final = elements[0]
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
if (taskId) {
|
|
279
|
+
await this.adapter.saveExample({
|
|
280
|
+
key: Key,
|
|
281
|
+
taskId: `zai/${taskId}`,
|
|
282
|
+
taskType,
|
|
283
|
+
instructions: options.instructions ?? 'No specific instructions',
|
|
284
|
+
input: inputAsString,
|
|
285
|
+
output: final,
|
|
286
|
+
metadata: output.metadata
|
|
287
|
+
})
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
return final
|
|
291
|
+
}
|