@botpress/zai 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build.ts +9 -0
- package/dist/adapters/adapter.js +2 -0
- package/dist/adapters/botpress-table.js +168 -0
- package/dist/adapters/memory.js +12 -0
- package/dist/index.d.ts +111 -609
- package/dist/index.js +9 -1873
- package/dist/operations/check.js +153 -0
- package/dist/operations/constants.js +2 -0
- package/dist/operations/errors.js +15 -0
- package/dist/operations/extract.js +232 -0
- package/dist/operations/filter.js +191 -0
- package/dist/operations/label.js +249 -0
- package/dist/operations/rewrite.js +123 -0
- package/dist/operations/summarize.js +133 -0
- package/dist/operations/text.js +47 -0
- package/dist/utils.js +37 -0
- package/dist/zai.js +100 -0
- package/e2e/data/botpress_docs.txt +26040 -0
- package/e2e/data/cache.jsonl +107 -0
- package/e2e/utils.ts +89 -0
- package/package.json +33 -29
- package/src/adapters/adapter.ts +35 -0
- package/src/adapters/botpress-table.ts +210 -0
- package/src/adapters/memory.ts +13 -0
- package/src/index.ts +11 -0
- package/src/operations/check.ts +201 -0
- package/src/operations/constants.ts +2 -0
- package/src/operations/errors.ts +9 -0
- package/src/operations/extract.ts +309 -0
- package/src/operations/filter.ts +244 -0
- package/src/operations/label.ts +345 -0
- package/src/operations/rewrite.ts +161 -0
- package/src/operations/summarize.ts +195 -0
- package/src/operations/text.ts +65 -0
- package/src/utils.ts +52 -0
- package/src/zai.ts +147 -0
- package/tsconfig.json +3 -23
- package/dist/index.cjs +0 -1903
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.cts +0 -916
- package/dist/index.js.map +0 -1
- package/tsup.config.ts +0 -16
- package/vitest.config.ts +0 -9
- package/vitest.setup.ts +0 -24
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// eslint-disable consistent-type-definitions
|
|
2
|
+
import { z } from '@bpinternal/zui'
|
|
3
|
+
|
|
4
|
+
import { clamp } from 'lodash-es'
|
|
5
|
+
import { Zai } from '../zai'
|
|
6
|
+
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
|
|
7
|
+
|
|
8
|
+
export type Options = z.input<typeof Options>
|
|
9
|
+
const Options = z.object({
|
|
10
|
+
length: z.number().min(1).max(100_000).optional().describe('The maximum number of tokens to generate'),
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
declare module '@botpress/zai' {
|
|
14
|
+
interface Zai {
|
|
15
|
+
/** Generates a text of the desired length according to the prompt */
|
|
16
|
+
text(prompt: string, options?: Options): Promise<string>
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
Zai.prototype.text = async function (this: Zai, prompt, _options) {
|
|
21
|
+
const options = Options.parse(_options ?? {})
|
|
22
|
+
const tokenizer = await this.getTokenizer()
|
|
23
|
+
await this.fetchModelDetails()
|
|
24
|
+
|
|
25
|
+
prompt = tokenizer.truncate(prompt, Math.max(this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER, 100))
|
|
26
|
+
|
|
27
|
+
if (options.length) {
|
|
28
|
+
options.length = Math.min(this.ModelDetails.output.maxTokens - PROMPT_OUTPUT_BUFFER, options.length)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const instructions: string[] = []
|
|
32
|
+
let chart = ''
|
|
33
|
+
|
|
34
|
+
if (options.length) {
|
|
35
|
+
const length = clamp(options.length * 0.75, 5, options.length)
|
|
36
|
+
instructions.push(`IMPORTANT: Length constraint: ${length} tokens/words`)
|
|
37
|
+
instructions.push(`The text must be standalone and complete in less than ${length} tokens/words`)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (options.length && options.length <= 500) {
|
|
41
|
+
chart = `
|
|
42
|
+
| Tokens | Text Length (approximate) |
|
|
43
|
+
|-------------|--------------------------------------|
|
|
44
|
+
| < 5 tokens | 1-3 words |
|
|
45
|
+
| 5-10 tokens | 3-6 words |
|
|
46
|
+
| 10-20 tokens| 6-15 words |
|
|
47
|
+
| 20-50 tokens| A short sentence (15-30 words) |
|
|
48
|
+
| 50-100 tokens| A medium sentence (30-70 words) |
|
|
49
|
+
| 100-200 tokens| A short paragraph (70-150 words) |
|
|
50
|
+
| 200-300 tokens| A medium paragraph (150-200 words) |
|
|
51
|
+
| 300-500 tokens| A long paragraph (200-300 words) |`.trim()
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const { output } = await this.callModel({
|
|
55
|
+
systemPrompt: `
|
|
56
|
+
Generate a text that fulfills the user prompt below. Answer directly to the prompt, without any acknowledgements or fluff. Also, make sure the text is standalone and complete.
|
|
57
|
+
${instructions.map((x) => `- ${x}`).join('\n')}
|
|
58
|
+
${chart}
|
|
59
|
+
`.trim(),
|
|
60
|
+
temperature: 0.7,
|
|
61
|
+
messages: [{ type: 'text', content: prompt, role: 'user' }],
|
|
62
|
+
maxTokens: options.length,
|
|
63
|
+
})
|
|
64
|
+
return output?.choices?.[0]?.content! as string
|
|
65
|
+
}
|
package/src/utils.ts
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { z } from '@bpinternal/zui'
|
|
2
|
+
|
|
3
|
+
export const stringify = (input: unknown, beautify = true) => {
|
|
4
|
+
return typeof input === 'string' && !!input.length
|
|
5
|
+
? input
|
|
6
|
+
: input
|
|
7
|
+
? JSON.stringify(input, beautify ? null : undefined, beautify ? 2 : undefined)
|
|
8
|
+
: '<input is null, false, undefined or empty>'
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function fastHash(str: string): string {
|
|
12
|
+
let hash = 0
|
|
13
|
+
for (let i = 0; i < str.length; i++) {
|
|
14
|
+
hash = (hash << 5) - hash + str.charCodeAt(i)
|
|
15
|
+
hash |= 0 // Convert to 32bit integer
|
|
16
|
+
}
|
|
17
|
+
return (hash >>> 0).toString(16) // Convert to unsigned and then to hex
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export const takeUntilTokens = <T>(arr: T[], tokens: number, count: (el: T) => number) => {
|
|
21
|
+
const result: T[] = []
|
|
22
|
+
let total = 0
|
|
23
|
+
|
|
24
|
+
for (const value of arr) {
|
|
25
|
+
const valueTokens = count(value)
|
|
26
|
+
if (total + valueTokens > tokens) {
|
|
27
|
+
break
|
|
28
|
+
}
|
|
29
|
+
total += valueTokens
|
|
30
|
+
result.push(value)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return result
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export type GenerationMetadata = z.input<typeof GenerationMetadata>
|
|
37
|
+
export const GenerationMetadata = z.object({
|
|
38
|
+
model: z.string(),
|
|
39
|
+
cost: z
|
|
40
|
+
.object({
|
|
41
|
+
input: z.number(),
|
|
42
|
+
output: z.number(),
|
|
43
|
+
})
|
|
44
|
+
.describe('Cost in $USD'),
|
|
45
|
+
latency: z.number().describe('Latency in milliseconds'),
|
|
46
|
+
tokens: z
|
|
47
|
+
.object({
|
|
48
|
+
input: z.number(),
|
|
49
|
+
output: z.number(),
|
|
50
|
+
})
|
|
51
|
+
.describe('Number of tokens used'),
|
|
52
|
+
})
|
package/src/zai.ts
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { BotpressClientLike, Cognitive, Model } from '@botpress/cognitive'
|
|
2
|
+
|
|
3
|
+
import { type TextTokenizer, getWasmTokenizer } from '@bpinternal/thicktoken'
|
|
4
|
+
import { z } from '@bpinternal/zui'
|
|
5
|
+
|
|
6
|
+
import { Adapter } from './adapters/adapter'
|
|
7
|
+
import { TableAdapter } from './adapters/botpress-table'
|
|
8
|
+
import { MemoryAdapter } from './adapters/memory'
|
|
9
|
+
|
|
10
|
+
type ModelId = Required<Parameters<Cognitive['generateContent']>[0]['model']>
|
|
11
|
+
|
|
12
|
+
type ActiveLearning = z.input<typeof ActiveLearning>
|
|
13
|
+
const ActiveLearning = z.object({
|
|
14
|
+
enable: z.boolean().describe('Whether to enable active learning').default(false),
|
|
15
|
+
tableName: z
|
|
16
|
+
.string()
|
|
17
|
+
.regex(
|
|
18
|
+
/^[A-Za-z0-9_/-]{1,100}Table$/,
|
|
19
|
+
'Namespace must be alphanumeric and contain only letters, numbers, underscores, hyphens and slashes'
|
|
20
|
+
)
|
|
21
|
+
.describe('The name of the table to store active learning tasks')
|
|
22
|
+
.default('ActiveLearningTable'),
|
|
23
|
+
taskId: z
|
|
24
|
+
.string()
|
|
25
|
+
.regex(
|
|
26
|
+
/^[A-Za-z0-9_/-]{1,100}$/,
|
|
27
|
+
'Namespace must be alphanumeric and contain only letters, numbers, underscores, hyphens and slashes'
|
|
28
|
+
)
|
|
29
|
+
.describe('The ID of the task')
|
|
30
|
+
.default('default'),
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
type ZaiConfig = z.input<typeof ZaiConfig>
|
|
34
|
+
const ZaiConfig = z.object({
|
|
35
|
+
client: z.custom<BotpressClientLike | Cognitive>(),
|
|
36
|
+
userId: z.string().describe('The ID of the user consuming the API').optional(),
|
|
37
|
+
modelId: z
|
|
38
|
+
.custom<ModelId | string>(
|
|
39
|
+
(value) => {
|
|
40
|
+
if (typeof value !== 'string') {
|
|
41
|
+
return false
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (value !== 'best' && value !== 'fast' && !value.includes(':')) {
|
|
45
|
+
return false
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return true
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
message: 'Invalid model ID',
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
.describe('The ID of the model you want to use')
|
|
55
|
+
.default('best' satisfies ModelId),
|
|
56
|
+
activeLearning: ActiveLearning.default({ enable: false }),
|
|
57
|
+
namespace: z
|
|
58
|
+
.string()
|
|
59
|
+
.regex(
|
|
60
|
+
/^[A-Za-z0-9_/-]{1,100}$/,
|
|
61
|
+
'Namespace must be alphanumeric and contain only letters, numbers, underscores, hyphens and slashes'
|
|
62
|
+
)
|
|
63
|
+
.default('zai'),
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
export class Zai {
|
|
67
|
+
protected static tokenizer: TextTokenizer = null!
|
|
68
|
+
protected client: Cognitive
|
|
69
|
+
|
|
70
|
+
private _originalConfig: ZaiConfig
|
|
71
|
+
|
|
72
|
+
private _userId: string | undefined
|
|
73
|
+
|
|
74
|
+
protected Model: ModelId
|
|
75
|
+
protected ModelDetails: Model
|
|
76
|
+
protected namespace: string
|
|
77
|
+
protected adapter: Adapter
|
|
78
|
+
protected activeLearning: ActiveLearning
|
|
79
|
+
|
|
80
|
+
public constructor(config: ZaiConfig) {
|
|
81
|
+
this._originalConfig = config
|
|
82
|
+
const parsed = ZaiConfig.parse(config)
|
|
83
|
+
|
|
84
|
+
this.client = Cognitive.isCognitiveClient(parsed.client)
|
|
85
|
+
? (parsed.client as unknown as Cognitive)
|
|
86
|
+
: new Cognitive({ client: parsed.client })
|
|
87
|
+
|
|
88
|
+
this.namespace = parsed.namespace
|
|
89
|
+
this._userId = parsed.userId
|
|
90
|
+
this.Model = parsed.modelId as ModelId
|
|
91
|
+
this.activeLearning = parsed.activeLearning
|
|
92
|
+
|
|
93
|
+
this.adapter = parsed.activeLearning?.enable
|
|
94
|
+
? new TableAdapter({ client: this.client.client, tableName: parsed.activeLearning.tableName })
|
|
95
|
+
: new MemoryAdapter([])
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/** @internal */
|
|
99
|
+
protected async callModel(
|
|
100
|
+
props: Parameters<Cognitive['generateContent']>[0]
|
|
101
|
+
): ReturnType<Cognitive['generateContent']> {
|
|
102
|
+
return this.client.generateContent({
|
|
103
|
+
...props,
|
|
104
|
+
model: this.Model,
|
|
105
|
+
userId: this._userId,
|
|
106
|
+
})
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
protected async getTokenizer() {
|
|
110
|
+
Zai.tokenizer ??= await (async () => {
|
|
111
|
+
while (!getWasmTokenizer) {
|
|
112
|
+
// there's an issue with wasm, it doesn't load immediately
|
|
113
|
+
await new Promise((resolve) => setTimeout(resolve, 25))
|
|
114
|
+
}
|
|
115
|
+
return getWasmTokenizer() as TextTokenizer
|
|
116
|
+
})()
|
|
117
|
+
return Zai.tokenizer
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
protected async fetchModelDetails(): Promise<void> {
|
|
121
|
+
if (!this.ModelDetails) {
|
|
122
|
+
this.ModelDetails = await this.client.getModelDetails(this.Model)
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
protected get taskId() {
|
|
127
|
+
if (!this.activeLearning.enable) {
|
|
128
|
+
return undefined
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return `${this.namespace}/${this.activeLearning.taskId}`.replace(/\/+/g, '/')
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
public with(options: Partial<ZaiConfig>): Zai {
|
|
135
|
+
return new Zai({
|
|
136
|
+
...this._originalConfig,
|
|
137
|
+
...options,
|
|
138
|
+
})
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
public learn(taskId: string) {
|
|
142
|
+
return new Zai({
|
|
143
|
+
...this._originalConfig,
|
|
144
|
+
activeLearning: { ...this.activeLearning, taskId, enable: true },
|
|
145
|
+
})
|
|
146
|
+
}
|
|
147
|
+
}
|
package/tsconfig.json
CHANGED
|
@@ -1,32 +1,12 @@
|
|
|
1
1
|
{
|
|
2
|
+
"extends": "../../tsconfig.json",
|
|
2
3
|
"compilerOptions": {
|
|
3
|
-
"
|
|
4
|
-
"module": "ESNext",
|
|
5
|
-
"moduleResolution": "bundler",
|
|
6
|
-
"allowJs": true,
|
|
7
|
-
"skipLibCheck": true,
|
|
8
|
-
"esModuleInterop": true,
|
|
9
|
-
"allowSyntheticDefaultImports": true,
|
|
10
|
-
"forceConsistentCasingInFileNames": true,
|
|
11
|
-
"disableReferencedProjectLoad": true,
|
|
12
|
-
"resolveJsonModule": true,
|
|
13
|
-
"isolatedModules": true,
|
|
4
|
+
"outDir": "dist",
|
|
14
5
|
"strict": false,
|
|
15
|
-
"noUnusedLocals": true,
|
|
16
|
-
"noUnusedParameters": true,
|
|
17
|
-
"noUncheckedIndexedAccess": true,
|
|
18
|
-
"lib": ["dom", "ESNext", "dom.iterable"],
|
|
19
|
-
"declaration": false,
|
|
20
|
-
"noEmit": false,
|
|
21
6
|
"paths": {
|
|
22
7
|
"@botpress/zai": ["./src/zai.ts"]
|
|
23
8
|
}
|
|
24
9
|
},
|
|
25
10
|
"exclude": ["node_modules", "dist"],
|
|
26
|
-
"include": ["src/**/*", "vitest.d.ts"]
|
|
27
|
-
|
|
28
|
-
"ts-node": {
|
|
29
|
-
"esm": true,
|
|
30
|
-
"require": ["dotenv/config", "./ensure-env.cjs"]
|
|
31
|
-
}
|
|
11
|
+
"include": ["src/**/*", "vitest.d.ts", "e2e/**/*"]
|
|
32
12
|
}
|