@botpress/zai 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +1 -1
  2. package/build.ts +9 -0
  3. package/dist/adapters/adapter.js +2 -0
  4. package/dist/adapters/botpress-table.js +168 -0
  5. package/dist/adapters/memory.js +12 -0
  6. package/dist/index.d.ts +111 -609
  7. package/dist/index.js +9 -1873
  8. package/dist/operations/check.js +153 -0
  9. package/dist/operations/constants.js +2 -0
  10. package/dist/operations/errors.js +15 -0
  11. package/dist/operations/extract.js +232 -0
  12. package/dist/operations/filter.js +191 -0
  13. package/dist/operations/label.js +249 -0
  14. package/dist/operations/rewrite.js +123 -0
  15. package/dist/operations/summarize.js +133 -0
  16. package/dist/operations/text.js +47 -0
  17. package/dist/utils.js +37 -0
  18. package/dist/zai.js +100 -0
  19. package/e2e/data/botpress_docs.txt +26040 -0
  20. package/e2e/data/cache.jsonl +107 -0
  21. package/e2e/utils.ts +89 -0
  22. package/package.json +33 -29
  23. package/src/adapters/adapter.ts +35 -0
  24. package/src/adapters/botpress-table.ts +210 -0
  25. package/src/adapters/memory.ts +13 -0
  26. package/src/index.ts +11 -0
  27. package/src/operations/check.ts +201 -0
  28. package/src/operations/constants.ts +2 -0
  29. package/src/operations/errors.ts +9 -0
  30. package/src/operations/extract.ts +309 -0
  31. package/src/operations/filter.ts +244 -0
  32. package/src/operations/label.ts +345 -0
  33. package/src/operations/rewrite.ts +161 -0
  34. package/src/operations/summarize.ts +195 -0
  35. package/src/operations/text.ts +65 -0
  36. package/src/utils.ts +52 -0
  37. package/src/zai.ts +147 -0
  38. package/tsconfig.json +3 -23
  39. package/dist/index.cjs +0 -1903
  40. package/dist/index.cjs.map +0 -1
  41. package/dist/index.d.cts +0 -916
  42. package/dist/index.js.map +0 -1
  43. package/tsup.config.ts +0 -16
  44. package/vitest.config.ts +0 -9
  45. package/vitest.setup.ts +0 -24
@@ -0,0 +1,65 @@
1
+ // eslint-disable consistent-type-definitions
2
+ import { z } from '@bpinternal/zui'
3
+
4
+ import { clamp } from 'lodash-es'
5
+ import { Zai } from '../zai'
6
+ import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
7
+
8
+ export type Options = z.input<typeof Options>
9
+ const Options = z.object({
10
+ length: z.number().min(1).max(100_000).optional().describe('The maximum number of tokens to generate'),
11
+ })
12
+
13
+ declare module '@botpress/zai' {
14
+ interface Zai {
15
+ /** Generates a text of the desired length according to the prompt */
16
+ text(prompt: string, options?: Options): Promise<string>
17
+ }
18
+ }
19
+
20
+ Zai.prototype.text = async function (this: Zai, prompt, _options) {
21
+ const options = Options.parse(_options ?? {})
22
+ const tokenizer = await this.getTokenizer()
23
+ await this.fetchModelDetails()
24
+
25
+ prompt = tokenizer.truncate(prompt, Math.max(this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER, 100))
26
+
27
+ if (options.length) {
28
+ options.length = Math.min(this.ModelDetails.output.maxTokens - PROMPT_OUTPUT_BUFFER, options.length)
29
+ }
30
+
31
+ const instructions: string[] = []
32
+ let chart = ''
33
+
34
+ if (options.length) {
35
+ const length = clamp(options.length * 0.75, 5, options.length)
36
+ instructions.push(`IMPORTANT: Length constraint: ${length} tokens/words`)
37
+ instructions.push(`The text must be standalone and complete in less than ${length} tokens/words`)
38
+ }
39
+
40
+ if (options.length && options.length <= 500) {
41
+ chart = `
42
+ | Tokens | Text Length (approximate) |
43
+ |-------------|--------------------------------------|
44
+ | < 5 tokens | 1-3 words |
45
+ | 5-10 tokens | 3-6 words |
46
+ | 10-20 tokens| 6-15 words |
47
+ | 20-50 tokens| A short sentence (15-30 words) |
48
+ | 50-100 tokens| A medium sentence (30-70 words) |
49
+ | 100-200 tokens| A short paragraph (70-150 words) |
50
+ | 200-300 tokens| A medium paragraph (150-200 words) |
51
+ | 300-500 tokens| A long paragraph (200-300 words) |`.trim()
52
+ }
53
+
54
+ const { output } = await this.callModel({
55
+ systemPrompt: `
56
+ Generate a text that fulfills the user prompt below. Answer directly to the prompt, without any acknowledgements or fluff. Also, make sure the text is standalone and complete.
57
+ ${instructions.map((x) => `- ${x}`).join('\n')}
58
+ ${chart}
59
+ `.trim(),
60
+ temperature: 0.7,
61
+ messages: [{ type: 'text', content: prompt, role: 'user' }],
62
+ maxTokens: options.length,
63
+ })
64
+ return output?.choices?.[0]?.content! as string
65
+ }
package/src/utils.ts ADDED
@@ -0,0 +1,52 @@
1
+ import { z } from '@bpinternal/zui'
2
+
3
+ export const stringify = (input: unknown, beautify = true) => {
4
+ return typeof input === 'string' && !!input.length
5
+ ? input
6
+ : input
7
+ ? JSON.stringify(input, beautify ? null : undefined, beautify ? 2 : undefined)
8
+ : '<input is null, false, undefined or empty>'
9
+ }
10
+
11
+ export function fastHash(str: string): string {
12
+ let hash = 0
13
+ for (let i = 0; i < str.length; i++) {
14
+ hash = (hash << 5) - hash + str.charCodeAt(i)
15
+ hash |= 0 // Convert to 32bit integer
16
+ }
17
+ return (hash >>> 0).toString(16) // Convert to unsigned and then to hex
18
+ }
19
+
20
+ export const takeUntilTokens = <T>(arr: T[], tokens: number, count: (el: T) => number) => {
21
+ const result: T[] = []
22
+ let total = 0
23
+
24
+ for (const value of arr) {
25
+ const valueTokens = count(value)
26
+ if (total + valueTokens > tokens) {
27
+ break
28
+ }
29
+ total += valueTokens
30
+ result.push(value)
31
+ }
32
+
33
+ return result
34
+ }
35
+
36
+ export type GenerationMetadata = z.input<typeof GenerationMetadata>
37
+ export const GenerationMetadata = z.object({
38
+ model: z.string(),
39
+ cost: z
40
+ .object({
41
+ input: z.number(),
42
+ output: z.number(),
43
+ })
44
+ .describe('Cost in $USD'),
45
+ latency: z.number().describe('Latency in milliseconds'),
46
+ tokens: z
47
+ .object({
48
+ input: z.number(),
49
+ output: z.number(),
50
+ })
51
+ .describe('Number of tokens used'),
52
+ })
package/src/zai.ts ADDED
@@ -0,0 +1,147 @@
1
+ import { BotpressClientLike, Cognitive, Model } from '@botpress/cognitive'
2
+
3
+ import { type TextTokenizer, getWasmTokenizer } from '@bpinternal/thicktoken'
4
+ import { z } from '@bpinternal/zui'
5
+
6
+ import { Adapter } from './adapters/adapter'
7
+ import { TableAdapter } from './adapters/botpress-table'
8
+ import { MemoryAdapter } from './adapters/memory'
9
+
10
+ type ModelId = Required<Parameters<Cognitive['generateContent']>[0]['model']>
11
+
12
+ type ActiveLearning = z.input<typeof ActiveLearning>
13
+ const ActiveLearning = z.object({
14
+ enable: z.boolean().describe('Whether to enable active learning').default(false),
15
+ tableName: z
16
+ .string()
17
+ .regex(
18
+ /^[A-Za-z0-9_/-]{1,100}Table$/,
19
+ 'Namespace must be alphanumeric and contain only letters, numbers, underscores, hyphens and slashes'
20
+ )
21
+ .describe('The name of the table to store active learning tasks')
22
+ .default('ActiveLearningTable'),
23
+ taskId: z
24
+ .string()
25
+ .regex(
26
+ /^[A-Za-z0-9_/-]{1,100}$/,
27
+ 'Namespace must be alphanumeric and contain only letters, numbers, underscores, hyphens and slashes'
28
+ )
29
+ .describe('The ID of the task')
30
+ .default('default'),
31
+ })
32
+
33
+ type ZaiConfig = z.input<typeof ZaiConfig>
34
+ const ZaiConfig = z.object({
35
+ client: z.custom<BotpressClientLike | Cognitive>(),
36
+ userId: z.string().describe('The ID of the user consuming the API').optional(),
37
+ modelId: z
38
+ .custom<ModelId | string>(
39
+ (value) => {
40
+ if (typeof value !== 'string') {
41
+ return false
42
+ }
43
+
44
+ if (value !== 'best' && value !== 'fast' && !value.includes(':')) {
45
+ return false
46
+ }
47
+
48
+ return true
49
+ },
50
+ {
51
+ message: 'Invalid model ID',
52
+ }
53
+ )
54
+ .describe('The ID of the model you want to use')
55
+ .default('best' satisfies ModelId),
56
+ activeLearning: ActiveLearning.default({ enable: false }),
57
+ namespace: z
58
+ .string()
59
+ .regex(
60
+ /^[A-Za-z0-9_/-]{1,100}$/,
61
+ 'Namespace must be alphanumeric and contain only letters, numbers, underscores, hyphens and slashes'
62
+ )
63
+ .default('zai'),
64
+ })
65
+
66
+ export class Zai {
67
+ protected static tokenizer: TextTokenizer = null!
68
+ protected client: Cognitive
69
+
70
+ private _originalConfig: ZaiConfig
71
+
72
+ private _userId: string | undefined
73
+
74
+ protected Model: ModelId
75
+ protected ModelDetails: Model
76
+ protected namespace: string
77
+ protected adapter: Adapter
78
+ protected activeLearning: ActiveLearning
79
+
80
+ public constructor(config: ZaiConfig) {
81
+ this._originalConfig = config
82
+ const parsed = ZaiConfig.parse(config)
83
+
84
+ this.client = Cognitive.isCognitiveClient(parsed.client)
85
+ ? (parsed.client as unknown as Cognitive)
86
+ : new Cognitive({ client: parsed.client })
87
+
88
+ this.namespace = parsed.namespace
89
+ this._userId = parsed.userId
90
+ this.Model = parsed.modelId as ModelId
91
+ this.activeLearning = parsed.activeLearning
92
+
93
+ this.adapter = parsed.activeLearning?.enable
94
+ ? new TableAdapter({ client: this.client.client, tableName: parsed.activeLearning.tableName })
95
+ : new MemoryAdapter([])
96
+ }
97
+
98
+ /** @internal */
99
+ protected async callModel(
100
+ props: Parameters<Cognitive['generateContent']>[0]
101
+ ): ReturnType<Cognitive['generateContent']> {
102
+ return this.client.generateContent({
103
+ ...props,
104
+ model: this.Model,
105
+ userId: this._userId,
106
+ })
107
+ }
108
+
109
+ protected async getTokenizer() {
110
+ Zai.tokenizer ??= await (async () => {
111
+ while (!getWasmTokenizer) {
112
+ // there's an issue with wasm, it doesn't load immediately
113
+ await new Promise((resolve) => setTimeout(resolve, 25))
114
+ }
115
+ return getWasmTokenizer() as TextTokenizer
116
+ })()
117
+ return Zai.tokenizer
118
+ }
119
+
120
+ protected async fetchModelDetails(): Promise<void> {
121
+ if (!this.ModelDetails) {
122
+ this.ModelDetails = await this.client.getModelDetails(this.Model)
123
+ }
124
+ }
125
+
126
+ protected get taskId() {
127
+ if (!this.activeLearning.enable) {
128
+ return undefined
129
+ }
130
+
131
+ return `${this.namespace}/${this.activeLearning.taskId}`.replace(/\/+/g, '/')
132
+ }
133
+
134
+ public with(options: Partial<ZaiConfig>): Zai {
135
+ return new Zai({
136
+ ...this._originalConfig,
137
+ ...options,
138
+ })
139
+ }
140
+
141
+ public learn(taskId: string) {
142
+ return new Zai({
143
+ ...this._originalConfig,
144
+ activeLearning: { ...this.activeLearning, taskId, enable: true },
145
+ })
146
+ }
147
+ }
package/tsconfig.json CHANGED
@@ -1,32 +1,12 @@
1
1
  {
2
+ "extends": "../../tsconfig.json",
2
3
  "compilerOptions": {
3
- "target": "ESNext",
4
- "module": "ESNext",
5
- "moduleResolution": "bundler",
6
- "allowJs": true,
7
- "skipLibCheck": true,
8
- "esModuleInterop": true,
9
- "allowSyntheticDefaultImports": true,
10
- "forceConsistentCasingInFileNames": true,
11
- "disableReferencedProjectLoad": true,
12
- "resolveJsonModule": true,
13
- "isolatedModules": true,
4
+ "outDir": "dist",
14
5
  "strict": false,
15
- "noUnusedLocals": true,
16
- "noUnusedParameters": true,
17
- "noUncheckedIndexedAccess": true,
18
- "lib": ["dom", "ESNext", "dom.iterable"],
19
- "declaration": false,
20
- "noEmit": false,
21
6
  "paths": {
22
7
  "@botpress/zai": ["./src/zai.ts"]
23
8
  }
24
9
  },
25
10
  "exclude": ["node_modules", "dist"],
26
- "include": ["src/**/*", "vitest.d.ts"],
27
-
28
- "ts-node": {
29
- "esm": true,
30
- "require": ["dotenv/config", "./ensure-env.cjs"]
31
- }
11
+ "include": ["src/**/*", "vitest.d.ts", "e2e/**/*"]
32
12
  }