@botpress/cognitive 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,19 @@
1
1
 
2
- > @botpress/cognitive@0.1.0 build /home/runner/work/botpress/botpress/packages/cognitive
2
+ > @botpress/cognitive@0.1.1 build /home/runner/work/botpress/botpress/packages/cognitive
3
3
  > pnpm build:type && pnpm build:neutral && size-limit
4
4
 
5
5
 
6
- > @botpress/cognitive@0.1.0 build:type /home/runner/work/botpress/botpress/packages/cognitive
7
- > tsup ./src/index.ts --dts-resolve --dts-only --clean
6
+ > @botpress/cognitive@0.1.1 build:type /home/runner/work/botpress/botpress/packages/cognitive
7
+ > tsup --tsconfig tsconfig.build.json ./src/index.ts --dts-resolve --dts-only --clean
8
8
 
9
9
  CLI Building entry: ./src/index.ts
10
- CLI Using tsconfig: tsconfig.json
10
+ CLI Using tsconfig: tsconfig.build.json
11
11
  CLI tsup v8.0.2
12
12
  DTS Build start
13
- DTS ⚡️ Build success in 1577ms
13
+ DTS ⚡️ Build success in 7369ms
14
14
  DTS dist/index.d.ts 8.92 KB
15
15
 
16
- > @botpress/cognitive@0.1.0 build:neutral /home/runner/work/botpress/botpress/packages/cognitive
16
+ > @botpress/cognitive@0.1.1 build:neutral /home/runner/work/botpress/botpress/packages/cognitive
17
17
  > ts-node -T ./build.ts --neutral
18
18
 
19
19
  Done
@@ -1,4 +1,4 @@
1
1
 
2
- > @botpress/cognitive@0.1.0 generate /home/runner/work/botpress/botpress/packages/cognitive
2
+ > @botpress/cognitive@0.1.1 generate /home/runner/work/botpress/botpress/packages/cognitive
3
3
  > ts-node -T ./types.ts ./src/gen
4
4
 
package/dist/index.d.ts CHANGED
@@ -9,12 +9,12 @@ type GenerateContentInput = {
9
9
  systemPrompt?: string;
10
10
  /** Array of messages for the model to process */
11
11
  messages: Array<{
12
- role: 'user' | 'assistant';
13
- type?: 'text' | 'tool_calls' | 'tool_result' | 'multipart';
12
+ role: "user" | "assistant";
13
+ type?: "text" | "tool_calls" | "tool_result" | "multipart";
14
14
  /** Required if `type` is "tool_calls" */
15
15
  toolCalls?: Array<{
16
16
  id: string;
17
- type: 'function';
17
+ type: "function";
18
18
  function: {
19
19
  name: string;
20
20
  /** Some LLMs may generate invalid JSON for a tool call, so this will be `null` when it happens. */
@@ -27,7 +27,7 @@ type GenerateContentInput = {
27
27
  toolResultCallId?: string;
28
28
  /** Required unless `type` is "tool_call". If `type` is "multipart", this field must be an array of content objects. If `type` is "tool_result" then this field should be the result of the tool call (a plain string or a JSON-encoded array or object). If `type` is "tool_call" then the `toolCalls` field should be used instead. */
29
29
  content: string | Array<{
30
- type: 'text' | 'image';
30
+ type: "text" | "image";
31
31
  /** Indicates the MIME type of the content. If not provided it will be detected from the content-type header of the provided URL. */
32
32
  mimeType?: string;
33
33
  /** Required if part type is "text" */
@@ -37,7 +37,7 @@ type GenerateContentInput = {
37
37
  }> | null;
38
38
  }>;
39
39
  /** Response format expected from the model. If "json_object" is chosen, you must instruct the model to generate JSON either via the system prompt or a user message. */
40
- responseFormat?: 'text' | 'json_object';
40
+ responseFormat?: "text" | "json_object";
41
41
  /** Maximum number of tokens allowed in the generated response */
42
42
  maxTokens?: number;
43
43
  /** Sampling temperature for the model. Higher values result in more random outputs. */
@@ -47,7 +47,7 @@ type GenerateContentInput = {
47
47
  /** Sequences where the model should stop generating further tokens. */
48
48
  stopSequences?: string[];
49
49
  tools?: Array<{
50
- type: 'function';
50
+ type: "function";
51
51
  function: {
52
52
  /** Function name */
53
53
  name: string;
@@ -57,7 +57,7 @@ type GenerateContentInput = {
57
57
  };
58
58
  }>;
59
59
  toolChoice?: {
60
- type?: 'auto' | 'specific' | 'any' | 'none' | '';
60
+ type?: "auto" | "specific" | "any" | "none" | "";
61
61
  /** Required if `type` is "specific" */
62
62
  functionName?: string;
63
63
  };
@@ -80,11 +80,11 @@ type GenerateContentOutput = {
80
80
  /** Model name */
81
81
  model: string;
82
82
  choices: Array<{
83
- type?: 'text' | 'tool_calls' | 'tool_result' | 'multipart';
83
+ type?: "text" | "tool_calls" | "tool_result" | "multipart";
84
84
  /** Required if `type` is "tool_calls" */
85
85
  toolCalls?: Array<{
86
86
  id: string;
87
- type: 'function';
87
+ type: "function";
88
88
  function: {
89
89
  name: string;
90
90
  /** Some LLMs may generate invalid JSON for a tool call, so this will be `null` when it happens. */
@@ -97,7 +97,7 @@ type GenerateContentOutput = {
97
97
  toolResultCallId?: string;
98
98
  /** Required unless `type` is "tool_call". If `type` is "multipart", this field must be an array of content objects. If `type` is "tool_result" then this field should be the result of the tool call (a plain string or a JSON-encoded array or object). If `type` is "tool_call" then the `toolCalls` field should be used instead. */
99
99
  content: string | Array<{
100
- type: 'text' | 'image';
100
+ type: "text" | "image";
101
101
  /** Indicates the MIME type of the content. If not provided it will be detected from the content-type header of the provided URL. */
102
102
  mimeType?: string;
103
103
  /** Required if part type is "text" */
@@ -105,9 +105,9 @@ type GenerateContentOutput = {
105
105
  /** Required if part type is "image" */
106
106
  url?: string;
107
107
  }> | null;
108
- role: 'assistant';
108
+ role: "assistant";
109
109
  index: number;
110
- stopReason: 'stop' | 'max_tokens' | 'tool_calls' | 'content_filter' | 'other';
110
+ stopReason: "stop" | "max_tokens" | "tool_calls" | "content_filter" | "other";
111
111
  }>;
112
112
  usage: {
113
113
  /** Number of input tokens used by the model */
@@ -128,7 +128,7 @@ type Model$1 = {
128
128
  id: string;
129
129
  name: string;
130
130
  description: string;
131
- tags: Array<'recommended' | 'deprecated' | 'general-purpose' | 'low-cost' | 'vision' | 'coding' | 'agents' | 'function-calling' | 'roleplay' | 'storytelling' | 'reasoning'>;
131
+ tags: Array<"recommended" | "deprecated" | "general-purpose" | "low-cost" | "vision" | "coding" | "agents" | "function-calling" | "roleplay" | "storytelling" | "reasoning">;
132
132
  input: {
133
133
  maxTokens: number;
134
134
  /** Cost per 1 million tokens, in U.S. dollars */
@@ -0,0 +1,126 @@
1
+ import { describe, test, expect, vi, beforeEach } from 'vitest'
2
+ import { Cognitive } from '../src/client'
3
+ import { getTestClient } from './client'
4
+ import MODELS from './models.json'
5
+ import { RemoteModelProvider } from '../src/models'
6
+ import { GenerateContentOutput } from '../src/gen'
7
+
8
+ const RandomResponse = {
9
+ output: {
10
+ botpress: { cost: 123 },
11
+ choices: [{ role: 'assistant', content: 'This is the LLM response', stopReason: 'stop', index: 1 }],
12
+ id: '123456',
13
+ model: '',
14
+ provider: '',
15
+ usage: { inputCost: 1, inputTokens: 2, outputCost: 3, outputTokens: 4 },
16
+ } satisfies GenerateContentOutput,
17
+ meta: {},
18
+ } as const
19
+
20
+ // Simple mock for the provider
21
+ class MockProvider extends RemoteModelProvider {
22
+ fetchModelPreferences = vi.fn().mockResolvedValue(null)
23
+ fetchInstalledModels = vi.fn().mockResolvedValue(MODELS)
24
+ saveModelPreferences = vi.fn().mockResolvedValue(void 0)
25
+ }
26
+
27
+ class TestClient {
28
+ callAction = vi.fn().mockImplementation(() => {
29
+ if (this.axiosInstance.defaults?.signal?.aborted) {
30
+ throw this.axiosInstance.defaults?.signal.reason ?? 'Aborted'
31
+ }
32
+ return Promise.resolve(RandomResponse)
33
+ })
34
+ getBot = vi.fn()
35
+ getFile = vi.fn()
36
+ axiosInstance = {
37
+ defaults: { signal: new AbortController().signal },
38
+ }
39
+ config = { headers: { 'x-bot-id': 'test' } }
40
+ clone = () => this
41
+ }
42
+
43
+ describe('constructor', () => {
44
+ test('valid client', () => {
45
+ // Just check that no error is thrown
46
+ const provider = new MockProvider(getTestClient())
47
+ expect(() => new Cognitive({ client: getTestClient(), provider })).not.toThrow()
48
+ })
49
+ })
50
+
51
+ describe('client', () => {
52
+ let bp: TestClient
53
+ let client: Cognitive
54
+ let provider: MockProvider
55
+
56
+ beforeEach(() => {
57
+ vi.clearAllMocks()
58
+ bp = new TestClient()
59
+ provider = new MockProvider(bp)
60
+ client = new Cognitive({ client: bp, provider })
61
+ })
62
+
63
+ describe('predict (request)', () => {
64
+ test('fetches models when preferences are not available and saves the preferences', async () => {
65
+ await client.generateContent({ messages: [], model: 'best' })
66
+ expect(provider.fetchModelPreferences).toHaveBeenCalled()
67
+ expect(provider.fetchInstalledModels).toHaveBeenCalled()
68
+ expect(provider.saveModelPreferences).toHaveBeenCalled()
69
+ })
70
+
71
+ test('fetches model preferences the first time generateContent is called', async () => {
72
+ await client.generateContent({ messages: [], model: 'fast' })
73
+ // fetchInstalledModels is called because fetchModelPreferences returned null
74
+ expect(provider.fetchInstalledModels).toHaveBeenCalledTimes(1)
75
+ // A second call won't fetch again if preferences are cached
76
+ await client.generateContent({ messages: [], model: 'fast' })
77
+ expect(provider.fetchInstalledModels).toHaveBeenCalledTimes(1)
78
+ })
79
+ })
80
+
81
+ describe('predict (fallback)', () => {
82
+ test('when model is unavailable, registers the downtime, saves it, and selects another model', async () => {
83
+ client = new Cognitive({ client: bp, provider })
84
+
85
+ bp.callAction.mockRejectedValueOnce({
86
+ isApiError: true,
87
+ code: 400,
88
+ id: '123',
89
+ type: 'UPSTREAM_PROVIDER_FAILED',
90
+ subtype: 'UPSTREAM_PROVIDER_FAILED',
91
+ })
92
+
93
+ provider.fetchModelPreferences.mockResolvedValue({
94
+ best: ['a:a', 'b:b'],
95
+ })
96
+
97
+ // First generate call triggers fallback
98
+ await client.generateContent({ messages: [], model: 'a:a' })
99
+
100
+ expect(bp.callAction).toHaveBeenCalledTimes(2)
101
+ expect(provider.saveModelPreferences).toHaveBeenCalledOnce()
102
+ expect(provider.saveModelPreferences.mock.calls[0]?.[0].best).toMatchObject(['a:a', 'b:b'])
103
+ expect(provider.saveModelPreferences.mock.calls[0]?.[0].downtimes[0].ref).toBe('a:a')
104
+ })
105
+ })
106
+
107
+ describe('predict (abort)', () => {
108
+ test('abort request', async () => {
109
+ const ac = new AbortController()
110
+ ac.abort('Manual abort')
111
+
112
+ await expect(client.generateContent({ messages: [], signal: ac.signal })).rejects.toMatch('Manual abort')
113
+ })
114
+ })
115
+
116
+ describe('predict (response)', () => {
117
+ test('request cost and metrics are returned', async () => {
118
+ const resp = await client.generateContent({ messages: [] })
119
+ expect(resp.meta.cost.input).toBe(1)
120
+ expect(resp.meta.cost.output).toBe(3)
121
+ expect(resp.meta.tokens.input).toBe(2)
122
+ expect(resp.meta.tokens.output).toBe(4)
123
+ expect(resp.output.choices[0]?.content).toBe('This is the LLM response')
124
+ })
125
+ })
126
+ })
package/e2e/client.ts ADDED
@@ -0,0 +1,13 @@
1
+ import 'dotenv/config'
2
+
3
+ import { Client } from '@botpress/client'
4
+ import { getExtendedClient } from '../src/bp-client'
5
+
6
+ export const getTestClient = () =>
7
+ getExtendedClient(
8
+ new Client({
9
+ apiUrl: process.env.CLOUD_API_ENDPOINT ?? 'https://api.botpress.dev',
10
+ botId: process.env.CLOUD_BOT_ID,
11
+ token: process.env.CLOUD_PAT,
12
+ })
13
+ )
@@ -0,0 +1,562 @@
1
+ [
2
+ {
3
+ "ref": "openai:o1-2024-12-17",
4
+ "integration": "openai",
5
+ "id": "o1-2024-12-17",
6
+ "name": "GPT o1",
7
+ "description": "The o1 model is designed to solve hard problems across domains. The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user.",
8
+ "input": {
9
+ "costPer1MTokens": 15,
10
+ "maxTokens": 200000
11
+ },
12
+ "output": {
13
+ "costPer1MTokens": 60,
14
+ "maxTokens": 100000
15
+ },
16
+ "tags": ["reasoning", "vision", "general-purpose"]
17
+ },
18
+ {
19
+ "ref": "openai:o1-mini-2024-09-12",
20
+ "integration": "openai",
21
+ "id": "o1-mini-2024-09-12",
22
+ "name": "GPT o1-mini",
23
+ "description": "The o1-mini model is a fast and affordable reasoning model for specialized tasks. The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user.",
24
+ "input": {
25
+ "costPer1MTokens": 3,
26
+ "maxTokens": 128000
27
+ },
28
+ "output": {
29
+ "costPer1MTokens": 12,
30
+ "maxTokens": 65536
31
+ },
32
+ "tags": ["reasoning", "vision", "general-purpose"]
33
+ },
34
+ {
35
+ "ref": "openai:gpt-4o-mini-2024-07-18",
36
+ "integration": "openai",
37
+ "id": "gpt-4o-mini-2024-07-18",
38
+ "name": "GPT-4o Mini",
39
+ "description": "GPT-4o mini (“o” for “omni”) is OpenAI's most advanced model in the small models category, and their cheapest model yet. It is multimodal (accepting text or image inputs and outputting text), has higher intelligence than gpt-3.5-turbo but is just as fast. It is meant to be used for smaller tasks, including vision tasks. It's recommended to choose gpt-4o-mini where you would have previously used gpt-3.5-turbo as this model is more capable and cheaper.",
40
+ "input": {
41
+ "costPer1MTokens": 0.15,
42
+ "maxTokens": 128000
43
+ },
44
+ "output": {
45
+ "costPer1MTokens": 0.6,
46
+ "maxTokens": 16384
47
+ },
48
+ "tags": ["recommended", "vision", "low-cost", "general-purpose", "function-calling"]
49
+ },
50
+ {
51
+ "ref": "openai:gpt-4o-2024-11-20",
52
+ "integration": "openai",
53
+ "id": "gpt-4o-2024-11-20",
54
+ "name": "GPT-4o (November 2024)",
55
+ "description": "GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
56
+ "input": {
57
+ "costPer1MTokens": 2.5,
58
+ "maxTokens": 128000
59
+ },
60
+ "output": {
61
+ "costPer1MTokens": 10,
62
+ "maxTokens": 16384
63
+ },
64
+ "tags": ["recommended", "vision", "general-purpose", "coding", "agents", "function-calling"]
65
+ },
66
+ {
67
+ "ref": "openai:gpt-4o-2024-08-06",
68
+ "integration": "openai",
69
+ "id": "gpt-4o-2024-08-06",
70
+ "name": "GPT-4o (August 2024)",
71
+ "description": "GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
72
+ "input": {
73
+ "costPer1MTokens": 2.5,
74
+ "maxTokens": 128000
75
+ },
76
+ "output": {
77
+ "costPer1MTokens": 10,
78
+ "maxTokens": 16384
79
+ },
80
+ "tags": ["recommended", "vision", "general-purpose", "coding", "agents", "function-calling"]
81
+ },
82
+ {
83
+ "ref": "openai:gpt-4o-2024-05-13",
84
+ "integration": "openai",
85
+ "id": "gpt-4o-2024-05-13",
86
+ "name": "GPT-4o (May 2024)",
87
+ "description": "GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
88
+ "input": {
89
+ "costPer1MTokens": 5,
90
+ "maxTokens": 128000
91
+ },
92
+ "output": {
93
+ "costPer1MTokens": 15,
94
+ "maxTokens": 4096
95
+ },
96
+ "tags": ["vision", "general-purpose", "coding", "agents", "function-calling"]
97
+ },
98
+ {
99
+ "ref": "openai:gpt-4-turbo-2024-04-09",
100
+ "integration": "openai",
101
+ "id": "gpt-4-turbo-2024-04-09",
102
+ "name": "GPT-4 Turbo",
103
+ "description": "GPT-4 is a large multimodal model (accepting text or image inputs and outputting text) that can solve difficult problems with greater accuracy than any of our previous models, thanks to its broader general knowledge and advanced reasoning capabilities.",
104
+ "input": {
105
+ "costPer1MTokens": 10,
106
+ "maxTokens": 128000
107
+ },
108
+ "output": {
109
+ "costPer1MTokens": 30,
110
+ "maxTokens": 4096
111
+ },
112
+ "tags": ["deprecated", "general-purpose", "coding", "agents", "function-calling"]
113
+ },
114
+ {
115
+ "ref": "openai:gpt-3.5-turbo-0125",
116
+ "integration": "openai",
117
+ "id": "gpt-3.5-turbo-0125",
118
+ "name": "GPT-3.5 Turbo",
119
+ "description": "GPT-3.5 Turbo can understand and generate natural language or code and has been optimized for chat but works well for non-chat tasks as well.",
120
+ "input": {
121
+ "costPer1MTokens": 0.5,
122
+ "maxTokens": 128000
123
+ },
124
+ "output": {
125
+ "costPer1MTokens": 1.5,
126
+ "maxTokens": 4096
127
+ },
128
+ "tags": ["deprecated", "general-purpose", "low-cost"]
129
+ },
130
+ {
131
+ "ref": "groq:llama-3.3-70b-versatile",
132
+ "integration": "groq",
133
+ "id": "llama-3.3-70b-versatile",
134
+ "name": "LLaMA 3.3 70B",
135
+ "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.",
136
+ "input": {
137
+ "costPer1MTokens": 0.59,
138
+ "maxTokens": 128000
139
+ },
140
+ "output": {
141
+ "costPer1MTokens": 0.79,
142
+ "maxTokens": 32768
143
+ },
144
+ "tags": ["recommended", "general-purpose", "coding"]
145
+ },
146
+ {
147
+ "ref": "groq:llama-3.2-1b-preview",
148
+ "integration": "groq",
149
+ "id": "llama-3.2-1b-preview",
150
+ "name": "LLaMA 3.2 1B",
151
+ "description": "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
152
+ "input": {
153
+ "costPer1MTokens": 0.04,
154
+ "maxTokens": 128000
155
+ },
156
+ "output": {
157
+ "costPer1MTokens": 0.04,
158
+ "maxTokens": 8192
159
+ },
160
+ "tags": ["low-cost"]
161
+ },
162
+ {
163
+ "ref": "groq:llama-3.2-3b-preview",
164
+ "integration": "groq",
165
+ "id": "llama-3.2-3b-preview",
166
+ "name": "LLaMA 3.2 3B",
167
+ "description": "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
168
+ "input": {
169
+ "costPer1MTokens": 0.06,
170
+ "maxTokens": 128000
171
+ },
172
+ "output": {
173
+ "costPer1MTokens": 0.06,
174
+ "maxTokens": 8192
175
+ },
176
+ "tags": ["low-cost", "general-purpose"]
177
+ },
178
+ {
179
+ "ref": "groq:llama-3.2-11b-vision-preview",
180
+ "integration": "groq",
181
+ "id": "llama-3.2-11b-vision-preview",
182
+ "name": "LLaMA 3.2 11B Vision",
183
+ "description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
184
+ "input": {
185
+ "costPer1MTokens": 0.18,
186
+ "maxTokens": 128000
187
+ },
188
+ "output": {
189
+ "costPer1MTokens": 0.18,
190
+ "maxTokens": 8192
191
+ },
192
+ "tags": ["low-cost", "vision", "general-purpose"]
193
+ },
194
+ {
195
+ "ref": "groq:llama-3.2-90b-vision-preview",
196
+ "integration": "groq",
197
+ "id": "llama-3.2-90b-vision-preview",
198
+ "name": "LLaMA 3.2 90B Vision",
199
+ "description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
200
+ "input": {
201
+ "costPer1MTokens": 0.9,
202
+ "maxTokens": 128000
203
+ },
204
+ "output": {
205
+ "costPer1MTokens": 0.9,
206
+ "maxTokens": 8192
207
+ },
208
+ "tags": ["recommended", "vision", "general-purpose"]
209
+ },
210
+ {
211
+ "ref": "groq:llama-3.1-8b-instant",
212
+ "integration": "groq",
213
+ "id": "llama-3.1-8b-instant",
214
+ "name": "LLaMA 3.1 8B",
215
+ "description": "The Llama 3.1 instruction-tuned, text-only models are optimized for multilingual dialogue use cases.",
216
+ "input": {
217
+ "costPer1MTokens": 0.05,
218
+ "maxTokens": 128000
219
+ },
220
+ "output": {
221
+ "costPer1MTokens": 0.08,
222
+ "maxTokens": 8192
223
+ },
224
+ "tags": ["low-cost", "general-purpose"]
225
+ },
226
+ {
227
+ "ref": "groq:llama3-8b-8192",
228
+ "integration": "groq",
229
+ "id": "llama3-8b-8192",
230
+ "name": "LLaMA 3 8B",
231
+ "description": "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
232
+ "input": {
233
+ "costPer1MTokens": 0.05,
234
+ "maxTokens": 8192
235
+ },
236
+ "output": {
237
+ "costPer1MTokens": 0.08,
238
+ "maxTokens": 8192
239
+ },
240
+ "tags": ["low-cost", "general-purpose", "deprecated"]
241
+ },
242
+ {
243
+ "ref": "groq:llama3-70b-8192",
244
+ "integration": "groq",
245
+ "id": "llama3-70b-8192",
246
+ "name": "LLaMA 3 70B",
247
+ "description": "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
248
+ "input": {
249
+ "costPer1MTokens": 0.59,
250
+ "maxTokens": 8192
251
+ },
252
+ "output": {
253
+ "costPer1MTokens": 0.79,
254
+ "maxTokens": 8192
255
+ },
256
+ "tags": ["general-purpose"]
257
+ },
258
+ {
259
+ "ref": "groq:mixtral-8x7b-32768",
260
+ "integration": "groq",
261
+ "id": "mixtral-8x7b-32768",
262
+ "name": "Mixtral 8x7B",
263
+ "description": "Mistral MoE 8x7B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following",
264
+ "input": {
265
+ "costPer1MTokens": 0.24,
266
+ "maxTokens": 32768
267
+ },
268
+ "output": {
269
+ "costPer1MTokens": 0.24,
270
+ "maxTokens": 32768
271
+ },
272
+ "tags": ["low-cost", "general-purpose", "deprecated"]
273
+ },
274
+ {
275
+ "ref": "groq:gemma2-9b-it",
276
+ "integration": "groq",
277
+ "id": "gemma2-9b-it",
278
+ "name": "Gemma2 9B",
279
+ "description": "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
280
+ "input": {
281
+ "costPer1MTokens": 0.2,
282
+ "maxTokens": 8192
283
+ },
284
+ "output": {
285
+ "costPer1MTokens": 0.2,
286
+ "maxTokens": 8192
287
+ },
288
+ "tags": ["low-cost", "general-purpose"]
289
+ },
290
+ {
291
+ "ref": "anthropic:claude-3-5-sonnet-20240620",
292
+ "integration": "anthropic",
293
+ "id": "claude-3-5-sonnet-20240620",
294
+ "name": "Claude 3.5 Sonnet",
295
+ "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at coding, data science, visual processing, and agentic tasks.",
296
+ "input": {
297
+ "costPer1MTokens": 3,
298
+ "maxTokens": 200000
299
+ },
300
+ "output": {
301
+ "costPer1MTokens": 15,
302
+ "maxTokens": 4096
303
+ },
304
+ "tags": ["recommended", "vision", "general-purpose", "agents", "coding", "function-calling", "storytelling"]
305
+ },
306
+ {
307
+ "ref": "anthropic:claude-3-haiku-20240307",
308
+ "integration": "anthropic",
309
+ "id": "claude-3-haiku-20240307",
310
+ "name": "Claude 3 Haiku",
311
+ "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.",
312
+ "input": {
313
+ "costPer1MTokens": 0.25,
314
+ "maxTokens": 200000
315
+ },
316
+ "output": {
317
+ "costPer1MTokens": 1.25,
318
+ "maxTokens": 4096
319
+ },
320
+ "tags": ["low-cost", "general-purpose"]
321
+ },
322
+ {
323
+ "ref": "fireworks-ai:accounts/fireworks/models/llama-v3p1-405b-instruct",
324
+ "integration": "fireworks-ai",
325
+ "id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
326
+ "name": "Llama 3.1 405B Instruct",
327
+ "description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
328
+ "input": {
329
+ "costPer1MTokens": 3,
330
+ "maxTokens": 131072
331
+ },
332
+ "output": {
333
+ "costPer1MTokens": 3,
334
+ "maxTokens": 131072
335
+ },
336
+ "tags": ["recommended", "general-purpose"]
337
+ },
338
+ {
339
+ "ref": "fireworks-ai:accounts/fireworks/models/llama-v3p1-70b-instruct",
340
+ "integration": "fireworks-ai",
341
+ "id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
342
+ "name": "Llama 3.1 70B Instruct",
343
+ "description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
344
+ "input": {
345
+ "costPer1MTokens": 0.9,
346
+ "maxTokens": 131072
347
+ },
348
+ "output": {
349
+ "costPer1MTokens": 0.9,
350
+ "maxTokens": 131072
351
+ },
352
+ "tags": ["general-purpose"]
353
+ },
354
+ {
355
+ "ref": "fireworks-ai:accounts/fireworks/models/llama-v3p1-8b-instruct",
356
+ "integration": "fireworks-ai",
357
+ "id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
358
+ "name": "Llama 3.1 8B Instruct",
359
+ "description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
360
+ "input": {
361
+ "costPer1MTokens": 0.2,
362
+ "maxTokens": 131072
363
+ },
364
+ "output": {
365
+ "costPer1MTokens": 0.2,
366
+ "maxTokens": 131072
367
+ },
368
+ "tags": ["low-cost", "general-purpose"]
369
+ },
370
+ {
371
+ "ref": "fireworks-ai:accounts/fireworks/models/mixtral-8x22b-instruct",
372
+ "integration": "fireworks-ai",
373
+ "id": "accounts/fireworks/models/mixtral-8x22b-instruct",
374
+ "name": "Mixtral MoE 8x22B Instruct",
375
+ "description": "Mistral MoE 8x22B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following.",
376
+ "input": {
377
+ "costPer1MTokens": 1.2,
378
+ "maxTokens": 65536
379
+ },
380
+ "output": {
381
+ "costPer1MTokens": 1.2,
382
+ "maxTokens": 65536
383
+ },
384
+ "tags": ["general-purpose"]
385
+ },
386
+ {
387
+ "ref": "fireworks-ai:accounts/fireworks/models/mixtral-8x7b-instruct",
388
+ "integration": "fireworks-ai",
389
+ "id": "accounts/fireworks/models/mixtral-8x7b-instruct",
390
+ "name": "Mixtral MoE 8x7B Instruct",
391
+ "description": "Mistral MoE 8x7B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following",
392
+ "input": {
393
+ "costPer1MTokens": 0.5,
394
+ "maxTokens": 32768
395
+ },
396
+ "output": {
397
+ "costPer1MTokens": 0.5,
398
+ "maxTokens": 32768
399
+ },
400
+ "tags": ["low-cost", "general-purpose"]
401
+ },
402
+ {
403
+ "ref": "fireworks-ai:accounts/fireworks/models/firefunction-v2",
404
+ "integration": "fireworks-ai",
405
+ "id": "accounts/fireworks/models/firefunction-v2",
406
+ "name": "Firefunction V2",
407
+ "description": "Fireworks' latest and most performant function-calling model. Firefunction-v2 is based on Llama-3 and trained to excel at function-calling as well as chat and instruction-following.",
408
+ "input": {
409
+ "costPer1MTokens": 0.9,
410
+ "maxTokens": 8192
411
+ },
412
+ "output": {
413
+ "costPer1MTokens": 0.9,
414
+ "maxTokens": 8192
415
+ },
416
+ "tags": ["function-calling"]
417
+ },
418
+ {
419
+ "ref": "fireworks-ai:accounts/fireworks/models/firellava-13b",
420
+ "integration": "fireworks-ai",
421
+ "id": "accounts/fireworks/models/firellava-13b",
422
+ "name": "FireLLaVA-13B",
423
+ "description": "Vision-language model allowing both image and text as inputs (single image is recommended), trained on OSS model generated training data.",
424
+ "input": {
425
+ "costPer1MTokens": 0.2,
426
+ "maxTokens": 4096
427
+ },
428
+ "output": {
429
+ "costPer1MTokens": 0.2,
430
+ "maxTokens": 4096
431
+ },
432
+ "tags": ["low-cost", "vision"]
433
+ },
434
+ {
435
+ "ref": "fireworks-ai:accounts/fireworks/models/deepseek-coder-v2-instruct",
436
+ "integration": "fireworks-ai",
437
+ "id": "accounts/fireworks/models/deepseek-coder-v2-instruct",
438
+ "name": "DeepSeek Coder V2 Instruct",
439
+ "description": "An open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks from Deepseek.",
440
+ "input": {
441
+ "costPer1MTokens": 2.7,
442
+ "maxTokens": 131072
443
+ },
444
+ "output": {
445
+ "costPer1MTokens": 2.7,
446
+ "maxTokens": 131072
447
+ },
448
+ "tags": ["coding"]
449
+ },
450
+ {
451
+ "ref": "fireworks-ai:accounts/fireworks/models/deepseek-coder-v2-lite-instruct",
452
+ "integration": "fireworks-ai",
453
+ "id": "accounts/fireworks/models/deepseek-coder-v2-lite-instruct",
454
+ "name": "DeepSeek Coder V2 Lite",
455
+ "description": "DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks.",
456
+ "input": {
457
+ "costPer1MTokens": 0.2,
458
+ "maxTokens": 163840
459
+ },
460
+ "output": {
461
+ "costPer1MTokens": 0.2,
462
+ "maxTokens": 163840
463
+ },
464
+ "tags": ["low-cost", "coding"]
465
+ },
466
+ {
467
+ "ref": "fireworks-ai:accounts/fireworks/models/mythomax-l2-13b",
468
+ "integration": "fireworks-ai",
469
+ "id": "accounts/fireworks/models/mythomax-l2-13b",
470
+ "name": "MythoMax L2 13b",
471
+ "description": "MythoMax L2 is designed to excel at both roleplaying and storytelling, and is an improved variant of the previous MythoMix model, combining the MythoLogic-L2 and Huginn models.",
472
+ "input": {
473
+ "costPer1MTokens": 0.2,
474
+ "maxTokens": 4096
475
+ },
476
+ "output": {
477
+ "costPer1MTokens": 0.2,
478
+ "maxTokens": 4096
479
+ },
480
+ "tags": ["roleplay", "storytelling", "low-cost"]
481
+ },
482
+ {
483
+ "ref": "fireworks-ai:accounts/fireworks/models/qwen2-72b-instruct",
484
+ "integration": "fireworks-ai",
485
+ "id": "accounts/fireworks/models/qwen2-72b-instruct",
486
+ "name": "Qwen2 72b Instruct",
487
+ "description": "Qwen 2 is the latest large language model series developed by the Qwen team at Alibaba Cloud. Key features and capabilities of Qwen 2 include multilingual proficiency with a particular strength in Asian languages, and enhanced performance in coding, mathematics, and long context understanding",
488
+ "input": {
489
+ "costPer1MTokens": 0.9,
490
+ "maxTokens": 32768
491
+ },
492
+ "output": {
493
+ "costPer1MTokens": 0.9,
494
+ "maxTokens": 32768
495
+ },
496
+ "tags": ["general-purpose", "function-calling"]
497
+ },
498
+ {
499
+ "ref": "fireworks-ai:accounts/fireworks/models/gemma2-9b-it",
500
+ "integration": "fireworks-ai",
501
+ "id": "accounts/fireworks/models/gemma2-9b-it",
502
+ "name": "Gemma 2 9B Instruct",
503
+ "description": "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
504
+ "input": {
505
+ "costPer1MTokens": 0.2,
506
+ "maxTokens": 8192
507
+ },
508
+ "output": {
509
+ "costPer1MTokens": 0.2,
510
+ "maxTokens": 8192
511
+ },
512
+ "tags": ["low-cost", "general-purpose"]
513
+ },
514
+ {
515
+ "ref": "google-ai:models/gemini-1.5-flash-8b-001",
516
+ "integration": "google-ai",
517
+ "id": "models/gemini-1.5-flash-8b-001",
518
+ "name": "Gemini 1.5 Flash-8B",
519
+ "description": "A small model designed for lower intelligence tasks. Google AI's fastest and most cost-efficient model with great performance for high-frequency tasks.",
520
+ "input": {
521
+ "costPer1MTokens": 0.0375,
522
+ "maxTokens": 128000
523
+ },
524
+ "output": {
525
+ "costPer1MTokens": 0.15,
526
+ "maxTokens": 128000
527
+ },
528
+ "tags": ["low-cost", "general-purpose", "vision"]
529
+ },
530
+ {
531
+ "ref": "google-ai:models/gemini-1.5-flash-002",
532
+ "integration": "google-ai",
533
+ "id": "models/gemini-1.5-flash-002",
534
+ "name": "Gemini 1.5 Flash",
535
+ "description": "A fast and versatile model for scaling across diverse tasks. Google AI's most balanced multimodal model with great performance for most tasks.",
536
+ "input": {
537
+ "costPer1MTokens": 0.075,
538
+ "maxTokens": 128000
539
+ },
540
+ "output": {
541
+ "costPer1MTokens": 0.3,
542
+ "maxTokens": 128000
543
+ },
544
+ "tags": ["recommended", "general-purpose", "vision"]
545
+ },
546
+ {
547
+ "ref": "google-ai:models/gemini-1.5-pro-002",
548
+ "integration": "google-ai",
549
+ "id": "models/gemini-1.5-pro-002",
550
+ "name": "Gemini 1.5 Pro",
551
+ "description": "A mid-size multimodal model that is optimized for a wide-range of reasoning tasks. Google AI's best-performing model with features for a wide variety of reasoning tasks.",
552
+ "input": {
553
+ "costPer1MTokens": 1.25,
554
+ "maxTokens": 128000
555
+ },
556
+ "output": {
557
+ "costPer1MTokens": 5,
558
+ "maxTokens": 128000
559
+ },
560
+ "tags": ["recommended", "general-purpose", "vision"]
561
+ }
562
+ ]
@@ -0,0 +1,132 @@
1
+ import { beforeEach, describe, expect, test } from 'vitest'
2
+ import { getBestModels, getFastModels, Model, ModelPreferences, pickModel, RemoteModelProvider } from '../src/models'
3
+
4
+ import MODELS from './models.json'
5
+ import { writeFileSync } from 'node:fs'
6
+ import { getTestClient } from './client'
7
+
8
+ describe('Models', () => {
9
+ test.skip('should fetch models', async () => {
10
+ // Run me manually if you need to re-generate the models.json file
11
+ // Make sure to setup the environment variables
12
+ const provider = new RemoteModelProvider(getTestClient())
13
+ const models = await provider.fetchInstalledModels()
14
+ writeFileSync('./models.json', JSON.stringify(models, null, 2))
15
+ })
16
+
17
+ test('Models ranking (best)', () => {
18
+ const best = getBestModels(MODELS as Model[])
19
+ expect(best.slice(0, 10).map((x) => x.ref)).toEqual([
20
+ 'openai:gpt-4o-2024-11-20',
21
+ 'openai:gpt-4o-2024-08-06',
22
+ 'google-ai:models/gemini-1.5-pro-002',
23
+ 'anthropic:claude-3-5-sonnet-20240620',
24
+ 'openai:gpt-4o-mini-2024-07-18',
25
+ 'groq:llama-3.2-90b-vision-preview',
26
+ 'groq:llama-3.3-70b-versatile',
27
+ 'fireworks-ai:accounts/fireworks/models/llama-v3p1-405b-instruct',
28
+ 'google-ai:models/gemini-1.5-flash-002',
29
+ 'openai:o1-mini-2024-09-12',
30
+ ])
31
+ })
32
+
33
+ test('Models ranking (fast)', () => {
34
+ const fast = getFastModels(MODELS as Model[])
35
+ expect(fast.slice(0, 10).map((x) => x.ref)).toEqual([
36
+ 'openai:gpt-4o-mini-2024-07-18',
37
+ 'google-ai:models/gemini-1.5-flash-002',
38
+ 'google-ai:models/gemini-1.5-flash-8b-001',
39
+ 'openai:gpt-4o-2024-11-20',
40
+ 'openai:gpt-4o-2024-08-06',
41
+ 'google-ai:models/gemini-1.5-pro-002',
42
+ 'anthropic:claude-3-haiku-20240307',
43
+ 'anthropic:claude-3-5-sonnet-20240620',
44
+ 'groq:llama-3.2-90b-vision-preview',
45
+ 'groq:llama-3.3-70b-versatile',
46
+ ])
47
+ })
48
+
49
+ test('Models ranking (boosted)', () => {
50
+ const fast = getFastModels(MODELS as Model[], {
51
+ 'groq:llama-3.3-70b-versatile': 10,
52
+ 'openai:gpt-4o-mini-2024-07-18': -10,
53
+ 'google-ai:': 20,
54
+ })
55
+ expect(fast.slice(0, 10).map((x) => x.ref)).toEqual([
56
+ 'google-ai:models/gemini-1.5-flash-002',
57
+ 'google-ai:models/gemini-1.5-flash-8b-001',
58
+ 'google-ai:models/gemini-1.5-pro-002',
59
+ 'groq:llama-3.3-70b-versatile',
60
+ 'openai:gpt-4o-2024-11-20',
61
+ 'openai:gpt-4o-2024-08-06',
62
+ 'anthropic:claude-3-haiku-20240307',
63
+ 'anthropic:claude-3-5-sonnet-20240620',
64
+ 'groq:llama-3.2-90b-vision-preview',
65
+ 'fireworks-ai:accounts/fireworks/models/llama-v3p1-405b-instruct',
66
+ ])
67
+ })
68
+
69
+ test('Pick model throws if none provided', () => {
70
+ expect(() => pickModel([])).toThrow()
71
+ expect(() => pickModel([], [])).toThrow()
72
+ })
73
+
74
+ test('Pick model throws if all models down', () => {
75
+ expect(() =>
76
+ pickModel(
77
+ ['a:b', 'b:c'],
78
+ [
79
+ { ref: 'a:b', reason: 'down', startedAt: new Date().toISOString() },
80
+ { ref: 'b:c', reason: 'down', startedAt: new Date().toISOString() },
81
+ ]
82
+ )
83
+ ).toThrow()
84
+ })
85
+
86
+ test('Pick model picks the first one if all are up', () => {
87
+ expect(pickModel(['a:b', 'b:c'])).toEqual('a:b')
88
+ })
89
+
90
+ test('Pick model picks fallback when first down', () => {
91
+ expect(pickModel(['a:b', 'b:c'], [{ ref: 'a:b', reason: 'down', startedAt: new Date().toISOString() }])).toEqual(
92
+ 'b:c'
93
+ )
94
+ })
95
+ })
96
+
97
+ describe('Remote Model Provider', () => {
98
+ beforeEach(async () => {
99
+ const client = getTestClient()
100
+ const provider = new RemoteModelProvider(client)
101
+ await provider.deleteModelPreferences()
102
+ })
103
+
104
+ test('fetch models preferences', async () => {
105
+ const client = getTestClient()
106
+ const provider = new RemoteModelProvider(client)
107
+ const preferences = await provider.fetchModelPreferences()
108
+ expect(preferences).toEqual(null)
109
+ })
110
+
111
+ // TODO: fix this test
112
+ test.skip('save file preferences', async () => {
113
+ const client = getTestClient()
114
+ const provider = new RemoteModelProvider(client)
115
+
116
+ const customPreferences = {
117
+ best: ['openai:gpt-4o-2024-11-20' as const],
118
+ fast: ['openai:gpt-4o-mini-2024-07-18' as const],
119
+ downtimes: [],
120
+ } satisfies ModelPreferences
121
+
122
+ await provider.saveModelPreferences(customPreferences)
123
+
124
+ const preferences = await provider.fetchModelPreferences()
125
+
126
+ expect(preferences).toEqual({
127
+ best: ['openai:gpt-4o-2024-11-20'],
128
+ downtimes: [],
129
+ fast: ['openai:gpt-4o-mini-2024-07-18'],
130
+ })
131
+ })
132
+ })
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botpress/cognitive",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Wrapper around the Botpress Client to call LLMs",
5
5
  "main": "./dist/index.cjs",
6
6
  "module": "./dist/index.mjs",
@@ -9,10 +9,10 @@
9
9
  "scripts": {
10
10
  "check:type": "tsc --noEmit",
11
11
  "generate": "ts-node -T ./types.ts ./src/gen",
12
- "build:type": "tsup ./src/index.ts --dts-resolve --dts-only --clean",
12
+ "build:type": "tsup --tsconfig tsconfig.build.json ./src/index.ts --dts-resolve --dts-only --clean",
13
13
  "build:neutral": "ts-node -T ./build.ts --neutral",
14
14
  "build": "pnpm build:type && pnpm build:neutral && size-limit",
15
- "test:e2e": "vitest run --dir ./src/__tests__/"
15
+ "test:e2e": "vitest run --dir ./e2e"
16
16
  },
17
17
  "size-limit": [
18
18
  {
@@ -31,7 +31,7 @@
31
31
  "devDependencies": {
32
32
  "@botpress/client": "workspace:*",
33
33
  "@botpress/common": "workspace:*",
34
- "@bpinternal/zui": "0.12.0",
34
+ "@bpinternal/zui": "0.13.4",
35
35
  "@size-limit/file": "^11.1.6",
36
36
  "@types/axios": "^0.14.4",
37
37
  "@types/debug": "^4.1.12",
@@ -0,0 +1,9 @@
1
+ {
2
+ "extends": "../../tsconfig.json",
3
+ "compilerOptions": {
4
+ "baseUrl": ".",
5
+ "rootDir": "./src",
6
+ "outDir": "./dist"
7
+ },
8
+ "include": ["src/**/*"]
9
+ }