smoltalk 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +222 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +11 -0
- package/dist/lib/client.d.ts +6 -0
- package/dist/lib/client.js +26 -0
- package/dist/lib/clients/google.d.ts +11 -0
- package/dist/lib/clients/google.js +39 -0
- package/dist/lib/clients/openai.d.ts +11 -0
- package/dist/lib/clients/openai.js +28 -0
- package/dist/lib/index.d.ts +4 -0
- package/dist/lib/index.js +4 -0
- package/dist/lib/models.d.ts +460 -0
- package/dist/lib/models.js +294 -0
- package/dist/lib/smolError.d.ts +3 -0
- package/dist/lib/smolError.js +6 -0
- package/dist/lib/types/result.d.ts +12 -0
- package/dist/lib/types/result.js +16 -0
- package/dist/lib/types.d.ts +30 -0
- package/dist/lib/types.js +1 -0
- package/package.json +42 -0
package/README.md
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# Typestache: static typing for mustache
|
|
2
|
+
|
|
3
|
+
> Get your templates to "if it compiles, it probably works!"
|
|
4
|
+
|
|
5
|
+
Typestache is still a work in progress. Use with caution.
|
|
6
|
+
|
|
7
|
+
## Quickstart
|
|
8
|
+
Install Typestache:
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
npm install typestache
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Typestache consists of a CLI tool and a library. To use it, point the CLI tool towards your template directory:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
typestache src/templates
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Typestache will find your mustache files, and create a corresponding TypeScript file:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
src/templates
|
|
24
|
+
- myTemplate.mustache
|
|
25
|
+
- myTemplate.ts
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Now simply import this TypeScript file and render it.
|
|
29
|
+
|
|
30
|
+
```typescript
|
|
31
|
+
import myTemplate from './myTemplate';
|
|
32
|
+
|
|
33
|
+
const data = {
|
|
34
|
+
name: 'Adit',
|
|
35
|
+
value: 10000,
|
|
36
|
+
in_ca: true
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const result = myTemplate(data);
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Easy as that! Behind the scenes, Typestache has converted your mustache template into a typed template for you, so if you have a type error, TypeScript will tell you. Example:
|
|
43
|
+
|
|
44
|
+
```typescript
|
|
45
|
+
const data = {
|
|
46
|
+
name: 'Adit',
|
|
47
|
+
value: 10000,
|
|
48
|
+
in_ca: "true" // Oops, this should be a boolean
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
const result = myTemplate(data); // Error: Type 'string' is not assignable to type 'boolean'.
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
[See examples here](https://github.com/egonSchiele/typestache/tree/main/examples).
|
|
55
|
+
|
|
56
|
+
Typestache also extends mustache syntax to add type hints. Here's a short example:
|
|
57
|
+
|
|
58
|
+
```mustache
|
|
59
|
+
I am {{age:number}} years old.
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Now `age` will be a `number` in the generated TypeScript file.
|
|
63
|
+
|
|
64
|
+
**Heads up, typestache is *not* a drop-in replacement for mustache.** Read more below.
|
|
65
|
+
|
|
66
|
+
## Deriving types
|
|
67
|
+
|
|
68
|
+
Typestache will automatically derive types for you. For example, given this template
|
|
69
|
+
|
|
70
|
+
```mustache
|
|
71
|
+
{{#person}}
|
|
72
|
+
Hello, {{name}}!
|
|
73
|
+
{{/person}}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Typestache will derive this type:
|
|
77
|
+
|
|
78
|
+
```typescript
|
|
79
|
+
type TemplateType = {
|
|
80
|
+
person: boolean;
|
|
81
|
+
name: string | boolean | number;
|
|
82
|
+
};
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Specifying types
|
|
86
|
+
If you know what type something will be, you can tell typestache. For example, in the above example, we know `name` is a `string`. Here's how we can tell typestache:
|
|
87
|
+
|
|
88
|
+
```mustache
|
|
89
|
+
{{#person}}
|
|
90
|
+
Hello, {{name:string}}!
|
|
91
|
+
{{/person}}
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
and here's the derived type:
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
type TemplateType = {
|
|
98
|
+
person: boolean;
|
|
99
|
+
name: string;
|
|
100
|
+
};
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Here is another example. `amount` can be a `string` or a `number`, so we have used a union here.
|
|
104
|
+
|
|
105
|
+
```mustache
|
|
106
|
+
{{#person}}
|
|
107
|
+
Hello, {{name:string}}! You have {{amount:string|number}} in your account.
|
|
108
|
+
{{/person}}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
and here's the derived type:
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
114
|
+
type TemplateType = {
|
|
115
|
+
person: boolean;
|
|
116
|
+
name: string;
|
|
117
|
+
amount: string | number;
|
|
118
|
+
};
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Sections and scoping
|
|
122
|
+
|
|
123
|
+
In all these examples, you'll notice `name` is never a key. `person` is always a `boolean`, it's never an object with a key `name`. Mustache has very loose scoping rules. Deriving a type for this template
|
|
124
|
+
|
|
125
|
+
```mustache
|
|
126
|
+
{{#person}}
|
|
127
|
+
Hello, {{name}}!
|
|
128
|
+
{{/person}}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
in mustache might look something like this:
|
|
132
|
+
|
|
133
|
+
```typescript
|
|
134
|
+
type TemplateType = {
|
|
135
|
+
person: boolean;
|
|
136
|
+
name: string | boolean | number;
|
|
137
|
+
} | {
|
|
138
|
+
person: {
|
|
139
|
+
name: string | boolean | number;
|
|
140
|
+
}
|
|
141
|
+
} | {
|
|
142
|
+
person: {
|
|
143
|
+
name: string | boolean | number;
|
|
144
|
+
}
|
|
145
|
+
}[]
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Even that's not enough, since technically, `person` could be any truthy value, and `person` and `name` could both be `undefined`.
|
|
149
|
+
|
|
150
|
+
A type like this is harder to read, and reduces type safety. Things look even worse as you have more sections, and more variables. So typestache chooses to interpret every variable as if it's in the global context. If you want `name` to be a key on `person`, use the new `this` keyword:
|
|
151
|
+
|
|
152
|
+
```mustache
|
|
153
|
+
{{#person}}
|
|
154
|
+
Hello, {{this.name}}!
|
|
155
|
+
{{/person}}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Generates this type:
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
type TemplateType = {
|
|
162
|
+
person: {
|
|
163
|
+
name: string | boolean | number;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
You'll also notice `person` is an object. If you want it to be an array of objects, use `[]` after the name in the opening tag:
|
|
169
|
+
|
|
170
|
+
```mustache
|
|
171
|
+
{{#person[]}}
|
|
172
|
+
Hello, {{this.name}}!
|
|
173
|
+
{{/person}}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
Generates this type:
|
|
178
|
+
|
|
179
|
+
```typescript
|
|
180
|
+
type TemplateType = {
|
|
181
|
+
person: {
|
|
182
|
+
name: string | boolean | number;
|
|
183
|
+
}[];
|
|
184
|
+
}
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Optionals
|
|
188
|
+
|
|
189
|
+
Finally, typestache makes all variables required by default. You can make something optional by adding a question mark at the end of the name, like this:
|
|
190
|
+
|
|
191
|
+
```mustache
|
|
192
|
+
Hello, {{name?:string}}!
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Generates this type:
|
|
196
|
+
|
|
197
|
+
```typescript
|
|
198
|
+
type TemplateType = {
|
|
199
|
+
name?: string;
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Typestache doesn't implement the entire mustache spec.
|
|
204
|
+
|
|
205
|
+
There are several parts of the mustache spec that Typestache does not implement. The most important one to know about is that Typestache handles scope differently. Mustache is very loose with its scoping, which makes it hard to write a useful type for it.
|
|
206
|
+
|
|
207
|
+
Here are some other things not currently supported:
|
|
208
|
+
|
|
209
|
+
Eventual support:
|
|
210
|
+
|
|
211
|
+
- Nested sections
|
|
212
|
+
- Lambdas (no support for dynamic templates)
|
|
213
|
+
- partials
|
|
214
|
+
|
|
215
|
+
No support planned:
|
|
216
|
+
|
|
217
|
+
- Dynamic names
|
|
218
|
+
- blocks
|
|
219
|
+
- parents
|
|
220
|
+
- custom delimiter tags.
|
|
221
|
+
|
|
222
|
+
For the ones where there is no support planned, mostly it's because the feature would be very hard or impossible to type correctly. The nature of dynamic partials, for example, means we don't know what will be generated until runtime, which makes it impossible to type.
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { getClient } from "./lib/client.js";
|
|
2
|
+
const client = getClient({
|
|
3
|
+
apiKey: process.env.GEMINI_API_KEY || "",
|
|
4
|
+
logLevel: "debug",
|
|
5
|
+
model: "gemini-2.0-flash-lite",
|
|
6
|
+
});
|
|
7
|
+
async function main() {
|
|
8
|
+
const resp = await client.text("Hello, how are you?");
|
|
9
|
+
console.log(resp);
|
|
10
|
+
}
|
|
11
|
+
main();
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export * from "./clients/google.js";
|
|
2
|
+
export * from "./clients/openai.js";
|
|
3
|
+
import { SmolGoogle } from "./clients/google.js";
|
|
4
|
+
import { SmolOpenAi } from "./clients/openai.js";
|
|
5
|
+
import { SmolConfig } from "./types.js";
|
|
6
|
+
export declare function getClient(config: SmolConfig): SmolOpenAi | SmolGoogle;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export * from "./clients/google.js";
|
|
2
|
+
export * from "./clients/openai.js";
|
|
3
|
+
import { SmolGoogle } from "./clients/google.js";
|
|
4
|
+
import { SmolOpenAi } from "./clients/openai.js";
|
|
5
|
+
import { getModel, isTextModel } from "./models.js";
|
|
6
|
+
import { SmolError } from "./smolError.js";
|
|
7
|
+
import { EgonLog } from "egonlog";
|
|
8
|
+
export function getClient(config) {
|
|
9
|
+
const apiKey = config.apiKey;
|
|
10
|
+
const logger = new EgonLog({ level: config.logLevel || "info" });
|
|
11
|
+
const model = getModel(config.model);
|
|
12
|
+
if (model === undefined || !isTextModel(model)) {
|
|
13
|
+
throw new SmolError(`Only text models are supported currently. ${config.model} is a ${model?.type} model.`);
|
|
14
|
+
}
|
|
15
|
+
const clientConfig = { ...config, logger };
|
|
16
|
+
switch (model.source) {
|
|
17
|
+
case "openai":
|
|
18
|
+
return new SmolOpenAi(clientConfig);
|
|
19
|
+
break;
|
|
20
|
+
case "google":
|
|
21
|
+
return new SmolGoogle(clientConfig);
|
|
22
|
+
break;
|
|
23
|
+
default:
|
|
24
|
+
throw new SmolError(`Model source ${model.source} is not supported.`);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { GoogleGenAI } from "@google/genai";
|
|
2
|
+
import { BaseClientConfig, PromptConfig, PromptResult, Result } from "../types.js";
|
|
3
|
+
export type SmolGoogleConfig = BaseClientConfig;
|
|
4
|
+
export declare class SmolGoogle implements SmolClient {
|
|
5
|
+
private client;
|
|
6
|
+
private logger;
|
|
7
|
+
private model;
|
|
8
|
+
constructor(config: SmolGoogleConfig);
|
|
9
|
+
getClient(): GoogleGenAI;
|
|
10
|
+
text(content: string, config?: PromptConfig): Promise<Result<PromptResult>>;
|
|
11
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
|
+
import { success, } from "../types.js";
|
|
4
|
+
export class SmolGoogle {
|
|
5
|
+
client;
|
|
6
|
+
logger;
|
|
7
|
+
model;
|
|
8
|
+
constructor(config) {
|
|
9
|
+
this.client = new GoogleGenAI({ apiKey: config.apiKey });
|
|
10
|
+
this.logger = config.logger;
|
|
11
|
+
this.model = config.model;
|
|
12
|
+
}
|
|
13
|
+
getClient() {
|
|
14
|
+
return this.client;
|
|
15
|
+
}
|
|
16
|
+
async text(content, config) {
|
|
17
|
+
const messages = structuredClone(config?.messages) || [];
|
|
18
|
+
messages.push({ role: "user", content });
|
|
19
|
+
/* const contents = messages.map((message) => ({
|
|
20
|
+
role: message.role === "user" ? "user" : "model", // Use consistent roles
|
|
21
|
+
parts: [makePart(message)] as Part[],
|
|
22
|
+
}));
|
|
23
|
+
|
|
24
|
+
role: "user",
|
|
25
|
+
parts: [{ text: sanitizedPrompt }],
|
|
26
|
+
});
|
|
27
|
+
contents.push({
|
|
28
|
+
*/
|
|
29
|
+
// Send the prompt as the latest message
|
|
30
|
+
const result = await this.client.models.generateContent({
|
|
31
|
+
contents: content,
|
|
32
|
+
model: this.model,
|
|
33
|
+
});
|
|
34
|
+
//console.log("Full response:", JSON.stringify(result, null, 2));
|
|
35
|
+
const text = result.text;
|
|
36
|
+
// Return the response, updating the chat history
|
|
37
|
+
return success({ output: result.text });
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient } from "../types.js";
|
|
3
|
+
export type SmolOpenAiConfig = BaseClientConfig;
|
|
4
|
+
export declare class SmolOpenAi implements SmolClient {
|
|
5
|
+
private client;
|
|
6
|
+
private logger;
|
|
7
|
+
private model;
|
|
8
|
+
constructor(config: SmolOpenAiConfig);
|
|
9
|
+
getClient(): OpenAI;
|
|
10
|
+
text(content: string, config?: PromptConfig): Promise<Result<PromptResult>>;
|
|
11
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import { success, } from "../types.js";
|
|
3
|
+
export class SmolOpenAi {
|
|
4
|
+
client;
|
|
5
|
+
logger;
|
|
6
|
+
model;
|
|
7
|
+
constructor(config) {
|
|
8
|
+
this.client = new OpenAI({ apiKey: config.apiKey });
|
|
9
|
+
this.logger = config.logger;
|
|
10
|
+
this.model = config.model;
|
|
11
|
+
}
|
|
12
|
+
getClient() {
|
|
13
|
+
return this.client;
|
|
14
|
+
}
|
|
15
|
+
async text(content, config) {
|
|
16
|
+
const messages = structuredClone(config?.messages) || [];
|
|
17
|
+
messages.push({ role: "user", content });
|
|
18
|
+
const response = await this.client.responses.create({
|
|
19
|
+
model: this.model,
|
|
20
|
+
instructions: config?.instructions,
|
|
21
|
+
input: content,
|
|
22
|
+
max_output_tokens: config?.maxTokens,
|
|
23
|
+
temperature: config?.temperature,
|
|
24
|
+
parallel_tool_calls: config?.parallelToolCalls,
|
|
25
|
+
});
|
|
26
|
+
return success({ output: response.output_text });
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
export type ModelSource = "local" | "debug" | "local-ollama" | "openai" | "anthropic" | "google" | "replicate" | "modal";
|
|
2
|
+
export type BaseModel = {
|
|
3
|
+
modelName: string;
|
|
4
|
+
source: ModelSource;
|
|
5
|
+
description?: string;
|
|
6
|
+
inputTokenCost?: number;
|
|
7
|
+
cachedInputTokenCost?: number;
|
|
8
|
+
outputTokenCost?: number;
|
|
9
|
+
disabled?: boolean;
|
|
10
|
+
costUnit?: "tokens" | "characters" | "minutes";
|
|
11
|
+
};
|
|
12
|
+
export type SpeechToTextModel = BaseModel & {
|
|
13
|
+
type: "speech-to-text";
|
|
14
|
+
perMinuteCost?: number;
|
|
15
|
+
};
|
|
16
|
+
export type ImageModel = BaseModel & {
|
|
17
|
+
type: "image";
|
|
18
|
+
costPerImage?: number;
|
|
19
|
+
outputType?: "FileOutput" | "Array";
|
|
20
|
+
};
|
|
21
|
+
export type TextModel = BaseModel & {
|
|
22
|
+
type: "text";
|
|
23
|
+
modelName: string;
|
|
24
|
+
maxInputTokens: number;
|
|
25
|
+
maxOutputTokens: number;
|
|
26
|
+
};
|
|
27
|
+
export type EmbeddingsModel = {
|
|
28
|
+
type: "embeddings";
|
|
29
|
+
modelName: string;
|
|
30
|
+
tokenCost?: number;
|
|
31
|
+
};
|
|
32
|
+
export type Model = SpeechToTextModel | TextModel | EmbeddingsModel | ImageModel;
|
|
33
|
+
export declare const speechToTextModels: readonly [{
|
|
34
|
+
readonly type: "speech-to-text";
|
|
35
|
+
readonly modelName: "whisper-local";
|
|
36
|
+
readonly source: "local";
|
|
37
|
+
}, {
|
|
38
|
+
readonly type: "speech-to-text";
|
|
39
|
+
readonly modelName: "whisper-web";
|
|
40
|
+
readonly perMinuteCost: 0.006;
|
|
41
|
+
readonly source: "openai";
|
|
42
|
+
}];
|
|
43
|
+
export declare const textModels: readonly [{
|
|
44
|
+
readonly type: "text";
|
|
45
|
+
readonly modelName: "gpt-4o-mini";
|
|
46
|
+
readonly description: "GPT-4o mini (“o” for “omni”) is a fast, affordable small model for focused tasks. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). It is ideal for fine-tuning, and model outputs from a larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar results at lower cost and latency. The knowledge cutoff for GPT-4o-mini models is October, 2023.";
|
|
47
|
+
readonly maxInputTokens: 128000;
|
|
48
|
+
readonly maxOutputTokens: 16384;
|
|
49
|
+
readonly inputTokenCost: 0.15;
|
|
50
|
+
readonly cachedInputTokenCost: 0.075;
|
|
51
|
+
readonly outputTokenCost: 0.6;
|
|
52
|
+
readonly source: "openai";
|
|
53
|
+
}, {
|
|
54
|
+
readonly type: "text";
|
|
55
|
+
readonly modelName: "gpt-4o";
|
|
56
|
+
readonly description: "GPT-4o (“o” for “omni”) is our versatile, high-intelligence flagship model. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). The knowledge cutoff for GPT-4o-mini models is October, 2023.";
|
|
57
|
+
readonly maxInputTokens: 128000;
|
|
58
|
+
readonly maxOutputTokens: 16384;
|
|
59
|
+
readonly inputTokenCost: 2.5;
|
|
60
|
+
readonly cachedInputTokenCost: 1.25;
|
|
61
|
+
readonly outputTokenCost: 10;
|
|
62
|
+
readonly source: "openai";
|
|
63
|
+
}, {
|
|
64
|
+
readonly type: "text";
|
|
65
|
+
readonly modelName: "o3-mini";
|
|
66
|
+
readonly description: "o3-mini is our most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.The knowledge cutoff for o3-mini models is October, 2023.";
|
|
67
|
+
readonly maxInputTokens: 200000;
|
|
68
|
+
readonly maxOutputTokens: 100000;
|
|
69
|
+
readonly inputTokenCost: 1.1;
|
|
70
|
+
readonly cachedInputTokenCost: 0.55;
|
|
71
|
+
readonly outputTokenCost: 4.4;
|
|
72
|
+
readonly source: "openai";
|
|
73
|
+
}, {
|
|
74
|
+
readonly type: "text";
|
|
75
|
+
readonly modelName: "gpt-4-turbo";
|
|
76
|
+
readonly description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
|
|
77
|
+
readonly maxInputTokens: 128000;
|
|
78
|
+
readonly maxOutputTokens: 4096;
|
|
79
|
+
readonly inputTokenCost: 10;
|
|
80
|
+
readonly outputTokenCost: 30;
|
|
81
|
+
readonly disabled: true;
|
|
82
|
+
readonly source: "openai";
|
|
83
|
+
}, {
|
|
84
|
+
readonly type: "text";
|
|
85
|
+
readonly modelName: "gpt-4";
|
|
86
|
+
readonly description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
|
|
87
|
+
readonly maxInputTokens: 8192;
|
|
88
|
+
readonly maxOutputTokens: 8192;
|
|
89
|
+
readonly inputTokenCost: 30;
|
|
90
|
+
readonly outputTokenCost: 60;
|
|
91
|
+
readonly disabled: true;
|
|
92
|
+
readonly source: "openai";
|
|
93
|
+
}, {
|
|
94
|
+
readonly type: "text";
|
|
95
|
+
readonly modelName: "gpt-3.5-turbo";
|
|
96
|
+
readonly description: "GPT-3.5 Turbo models can understand and generate natural language or code and have been optimized for chat using the Chat Completions API but work well for non-chat tasks as well. gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.";
|
|
97
|
+
readonly maxInputTokens: 16385;
|
|
98
|
+
readonly maxOutputTokens: 4096;
|
|
99
|
+
readonly inputTokenCost: 0.5;
|
|
100
|
+
readonly outputTokenCost: 1.5;
|
|
101
|
+
readonly disabled: true;
|
|
102
|
+
readonly source: "openai";
|
|
103
|
+
}, {
|
|
104
|
+
readonly type: "text";
|
|
105
|
+
readonly modelName: "gemini-2.0-flash";
|
|
106
|
+
readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API";
|
|
107
|
+
readonly maxInputTokens: 1048576;
|
|
108
|
+
readonly maxOutputTokens: 8192;
|
|
109
|
+
readonly inputTokenCost: 0.15;
|
|
110
|
+
readonly outputTokenCost: 0.6;
|
|
111
|
+
readonly source: "google";
|
|
112
|
+
}, {
|
|
113
|
+
readonly type: "text";
|
|
114
|
+
readonly modelName: "gemini-2.0-pro-exp-02-05";
|
|
115
|
+
readonly description: "Strongest model quality, especially for code & world knowledge; 2M long context. In private beta.";
|
|
116
|
+
readonly maxInputTokens: 2097152;
|
|
117
|
+
readonly maxOutputTokens: 8192;
|
|
118
|
+
readonly inputTokenCost: 0.5;
|
|
119
|
+
readonly outputTokenCost: 1.5;
|
|
120
|
+
readonly disabled: true;
|
|
121
|
+
readonly source: "google";
|
|
122
|
+
}, {
|
|
123
|
+
readonly type: "text";
|
|
124
|
+
readonly modelName: "gemini-2.0-flash-lite";
|
|
125
|
+
readonly description: "Our cost effective offering to support high throughput.";
|
|
126
|
+
readonly maxInputTokens: 1048576;
|
|
127
|
+
readonly maxOutputTokens: 8192;
|
|
128
|
+
readonly inputTokenCost: 0.075;
|
|
129
|
+
readonly outputTokenCost: 0.3;
|
|
130
|
+
readonly source: "google";
|
|
131
|
+
}, {
|
|
132
|
+
readonly type: "text";
|
|
133
|
+
readonly modelName: "gemini-1.5-flash";
|
|
134
|
+
readonly description: "Provides speed and efficiency for high-volume, quality, cost-effective apps. Note: prices ~double after the first 128k tokens.";
|
|
135
|
+
readonly maxInputTokens: 1048576;
|
|
136
|
+
readonly maxOutputTokens: 8192;
|
|
137
|
+
readonly inputTokenCost: 0.01875;
|
|
138
|
+
readonly outputTokenCost: 0.075;
|
|
139
|
+
readonly costUnit: "characters";
|
|
140
|
+
readonly source: "google";
|
|
141
|
+
}, {
|
|
142
|
+
readonly type: "text";
|
|
143
|
+
readonly modelName: "gemini-1.5-pro";
|
|
144
|
+
readonly description: "Supports text or chat prompts for a text or code response. Supports long-context understanding up to the maximum input token limit. Also does video?";
|
|
145
|
+
readonly maxInputTokens: 2097152;
|
|
146
|
+
readonly maxOutputTokens: 8192;
|
|
147
|
+
readonly inputTokenCost: 0.3125;
|
|
148
|
+
readonly outputTokenCost: 1.25;
|
|
149
|
+
readonly costUnit: "characters";
|
|
150
|
+
readonly source: "google";
|
|
151
|
+
}, {
|
|
152
|
+
readonly type: "text";
|
|
153
|
+
readonly modelName: "gemini-1.0-pro";
|
|
154
|
+
readonly description: "The best performing model for a wide range of text-only tasks.";
|
|
155
|
+
readonly maxInputTokens: 32760;
|
|
156
|
+
readonly maxOutputTokens: 8192;
|
|
157
|
+
readonly inputTokenCost: 0.125;
|
|
158
|
+
readonly outputTokenCost: 0.375;
|
|
159
|
+
readonly costUnit: "characters";
|
|
160
|
+
readonly source: "google";
|
|
161
|
+
}, {
|
|
162
|
+
readonly type: "text";
|
|
163
|
+
readonly modelName: "claude-3-7-sonnet-latest";
|
|
164
|
+
readonly description: "Our most intelligent model to date and the first hybrid reasoning model on the market. Claude 3.7 Sonnet shows particularly strong improvements in coding and front-end web development.";
|
|
165
|
+
readonly maxInputTokens: 200000;
|
|
166
|
+
readonly maxOutputTokens: 8192;
|
|
167
|
+
readonly inputTokenCost: 3;
|
|
168
|
+
readonly outputTokenCost: 15;
|
|
169
|
+
readonly source: "anthropic";
|
|
170
|
+
}, {
|
|
171
|
+
readonly type: "text";
|
|
172
|
+
readonly modelName: "claude-3-5-haiku-latest";
|
|
173
|
+
readonly description: "Our fastest model";
|
|
174
|
+
readonly maxInputTokens: 200000;
|
|
175
|
+
readonly maxOutputTokens: 8192;
|
|
176
|
+
readonly inputTokenCost: 0.8;
|
|
177
|
+
readonly outputTokenCost: 4;
|
|
178
|
+
readonly source: "anthropic";
|
|
179
|
+
}, {
|
|
180
|
+
readonly type: "text";
|
|
181
|
+
readonly modelName: "deepseek-r1:8b";
|
|
182
|
+
readonly description: "Runs via ollama";
|
|
183
|
+
readonly source: "local-ollama";
|
|
184
|
+
readonly maxInputTokens: 128000;
|
|
185
|
+
readonly maxOutputTokens: 128000;
|
|
186
|
+
}, {
|
|
187
|
+
readonly type: "text";
|
|
188
|
+
readonly modelName: "mistral-adapters-chunk50-iters100";
|
|
189
|
+
readonly description: "Fine tuned Mistral 7B model fed on my stories, chunked into parts of 50 chars each, 100 iterations.";
|
|
190
|
+
readonly source: "local";
|
|
191
|
+
readonly maxInputTokens: 8192;
|
|
192
|
+
readonly maxOutputTokens: 8192;
|
|
193
|
+
}, {
|
|
194
|
+
readonly type: "text";
|
|
195
|
+
readonly modelName: "llama-7b";
|
|
196
|
+
readonly source: "replicate";
|
|
197
|
+
readonly maxInputTokens: 256;
|
|
198
|
+
readonly maxOutputTokens: 256;
|
|
199
|
+
}, {
|
|
200
|
+
readonly type: "text";
|
|
201
|
+
readonly modelName: "console.log text";
|
|
202
|
+
readonly source: "debug";
|
|
203
|
+
readonly description: "Fake model that just echoes the prompt to the console for debugging";
|
|
204
|
+
readonly maxInputTokens: 8192;
|
|
205
|
+
readonly maxOutputTokens: 8192;
|
|
206
|
+
}];
|
|
207
|
+
export declare const imageModels: readonly [{
|
|
208
|
+
readonly type: "image";
|
|
209
|
+
readonly modelName: "google/imagen-3";
|
|
210
|
+
readonly source: "replicate";
|
|
211
|
+
readonly costPerImage: 0.05;
|
|
212
|
+
}, {
|
|
213
|
+
readonly type: "image";
|
|
214
|
+
readonly modelName: "minimax/image-01";
|
|
215
|
+
readonly source: "replicate";
|
|
216
|
+
readonly costPerImage: 0.01;
|
|
217
|
+
readonly outputType: "Array";
|
|
218
|
+
}, {
|
|
219
|
+
readonly type: "image";
|
|
220
|
+
readonly modelName: "flux-modal";
|
|
221
|
+
readonly source: "modal";
|
|
222
|
+
readonly costPerImage: 0.03;
|
|
223
|
+
}, {
|
|
224
|
+
readonly type: "image";
|
|
225
|
+
readonly modelName: "gpt-image-1";
|
|
226
|
+
readonly source: "openai";
|
|
227
|
+
readonly costPerImage: 0.25;
|
|
228
|
+
}, {
|
|
229
|
+
readonly type: "image";
|
|
230
|
+
readonly modelName: "gemini-2.5-flash-image-preview";
|
|
231
|
+
readonly source: "google";
|
|
232
|
+
readonly description: "aka nano-banana";
|
|
233
|
+
readonly costPerImage: 0.04;
|
|
234
|
+
}, {
|
|
235
|
+
readonly type: "image";
|
|
236
|
+
readonly modelName: "console.log image";
|
|
237
|
+
readonly source: "debug";
|
|
238
|
+
readonly description: "Fake model that just echoes the prompt to the console for debugging";
|
|
239
|
+
readonly costPerImage: 0;
|
|
240
|
+
}];
|
|
241
|
+
export declare const embeddingsModels: {
|
|
242
|
+
type: string;
|
|
243
|
+
modelName: string;
|
|
244
|
+
tokenCost: number;
|
|
245
|
+
}[];
|
|
246
|
+
export type TextModelName = (typeof textModels)[number]["modelName"];
|
|
247
|
+
export type ImageModelName = (typeof imageModels)[number]["modelName"];
|
|
248
|
+
export type SpeechToTextModelName = (typeof speechToTextModels)[number]["modelName"];
|
|
249
|
+
export type EmbeddingsModelName = (typeof embeddingsModels)[number]["modelName"];
|
|
250
|
+
export type ModelName = TextModelName | ImageModelName | SpeechToTextModelName | EmbeddingsModelName;
|
|
251
|
+
export declare function getModel(modelName: ModelName): {
|
|
252
|
+
readonly type: "speech-to-text";
|
|
253
|
+
readonly modelName: "whisper-local";
|
|
254
|
+
readonly source: "local";
|
|
255
|
+
} | {
|
|
256
|
+
readonly type: "speech-to-text";
|
|
257
|
+
readonly modelName: "whisper-web";
|
|
258
|
+
readonly perMinuteCost: 0.006;
|
|
259
|
+
readonly source: "openai";
|
|
260
|
+
} | {
|
|
261
|
+
readonly type: "text";
|
|
262
|
+
readonly modelName: "gpt-4o-mini";
|
|
263
|
+
readonly description: "GPT-4o mini (“o” for “omni”) is a fast, affordable small model for focused tasks. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). It is ideal for fine-tuning, and model outputs from a larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar results at lower cost and latency. The knowledge cutoff for GPT-4o-mini models is October, 2023.";
|
|
264
|
+
readonly maxInputTokens: 128000;
|
|
265
|
+
readonly maxOutputTokens: 16384;
|
|
266
|
+
readonly inputTokenCost: 0.15;
|
|
267
|
+
readonly cachedInputTokenCost: 0.075;
|
|
268
|
+
readonly outputTokenCost: 0.6;
|
|
269
|
+
readonly source: "openai";
|
|
270
|
+
} | {
|
|
271
|
+
readonly type: "text";
|
|
272
|
+
readonly modelName: "gpt-4o";
|
|
273
|
+
readonly description: "GPT-4o (“o” for “omni”) is our versatile, high-intelligence flagship model. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). The knowledge cutoff for GPT-4o-mini models is October, 2023.";
|
|
274
|
+
readonly maxInputTokens: 128000;
|
|
275
|
+
readonly maxOutputTokens: 16384;
|
|
276
|
+
readonly inputTokenCost: 2.5;
|
|
277
|
+
readonly cachedInputTokenCost: 1.25;
|
|
278
|
+
readonly outputTokenCost: 10;
|
|
279
|
+
readonly source: "openai";
|
|
280
|
+
} | {
|
|
281
|
+
readonly type: "text";
|
|
282
|
+
readonly modelName: "o3-mini";
|
|
283
|
+
readonly description: "o3-mini is our most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.The knowledge cutoff for o3-mini models is October, 2023.";
|
|
284
|
+
readonly maxInputTokens: 200000;
|
|
285
|
+
readonly maxOutputTokens: 100000;
|
|
286
|
+
readonly inputTokenCost: 1.1;
|
|
287
|
+
readonly cachedInputTokenCost: 0.55;
|
|
288
|
+
readonly outputTokenCost: 4.4;
|
|
289
|
+
readonly source: "openai";
|
|
290
|
+
} | {
|
|
291
|
+
readonly type: "text";
|
|
292
|
+
readonly modelName: "gpt-4-turbo";
|
|
293
|
+
readonly description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
|
|
294
|
+
readonly maxInputTokens: 128000;
|
|
295
|
+
readonly maxOutputTokens: 4096;
|
|
296
|
+
readonly inputTokenCost: 10;
|
|
297
|
+
readonly outputTokenCost: 30;
|
|
298
|
+
readonly disabled: true;
|
|
299
|
+
readonly source: "openai";
|
|
300
|
+
} | {
|
|
301
|
+
readonly type: "text";
|
|
302
|
+
readonly modelName: "gpt-4";
|
|
303
|
+
readonly description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.";
|
|
304
|
+
readonly maxInputTokens: 8192;
|
|
305
|
+
readonly maxOutputTokens: 8192;
|
|
306
|
+
readonly inputTokenCost: 30;
|
|
307
|
+
readonly outputTokenCost: 60;
|
|
308
|
+
readonly disabled: true;
|
|
309
|
+
readonly source: "openai";
|
|
310
|
+
} | {
|
|
311
|
+
readonly type: "text";
|
|
312
|
+
readonly modelName: "gpt-3.5-turbo";
|
|
313
|
+
readonly description: "GPT-3.5 Turbo models can understand and generate natural language or code and have been optimized for chat using the Chat Completions API but work well for non-chat tasks as well. gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.";
|
|
314
|
+
readonly maxInputTokens: 16385;
|
|
315
|
+
readonly maxOutputTokens: 4096;
|
|
316
|
+
readonly inputTokenCost: 0.5;
|
|
317
|
+
readonly outputTokenCost: 1.5;
|
|
318
|
+
readonly disabled: true;
|
|
319
|
+
readonly source: "openai";
|
|
320
|
+
} | {
|
|
321
|
+
readonly type: "text";
|
|
322
|
+
readonly modelName: "gemini-2.0-flash";
|
|
323
|
+
readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API";
|
|
324
|
+
readonly maxInputTokens: 1048576;
|
|
325
|
+
readonly maxOutputTokens: 8192;
|
|
326
|
+
readonly inputTokenCost: 0.15;
|
|
327
|
+
readonly outputTokenCost: 0.6;
|
|
328
|
+
readonly source: "google";
|
|
329
|
+
} | {
|
|
330
|
+
readonly type: "text";
|
|
331
|
+
readonly modelName: "gemini-2.0-pro-exp-02-05";
|
|
332
|
+
readonly description: "Strongest model quality, especially for code & world knowledge; 2M long context. In private beta.";
|
|
333
|
+
readonly maxInputTokens: 2097152;
|
|
334
|
+
readonly maxOutputTokens: 8192;
|
|
335
|
+
readonly inputTokenCost: 0.5;
|
|
336
|
+
readonly outputTokenCost: 1.5;
|
|
337
|
+
readonly disabled: true;
|
|
338
|
+
readonly source: "google";
|
|
339
|
+
} | {
|
|
340
|
+
readonly type: "text";
|
|
341
|
+
readonly modelName: "gemini-2.0-flash-lite";
|
|
342
|
+
readonly description: "Our cost effective offering to support high throughput.";
|
|
343
|
+
readonly maxInputTokens: 1048576;
|
|
344
|
+
readonly maxOutputTokens: 8192;
|
|
345
|
+
readonly inputTokenCost: 0.075;
|
|
346
|
+
readonly outputTokenCost: 0.3;
|
|
347
|
+
readonly source: "google";
|
|
348
|
+
} | {
|
|
349
|
+
readonly type: "text";
|
|
350
|
+
readonly modelName: "gemini-1.5-flash";
|
|
351
|
+
readonly description: "Provides speed and efficiency for high-volume, quality, cost-effective apps. Note: prices ~double after the first 128k tokens.";
|
|
352
|
+
readonly maxInputTokens: 1048576;
|
|
353
|
+
readonly maxOutputTokens: 8192;
|
|
354
|
+
readonly inputTokenCost: 0.01875;
|
|
355
|
+
readonly outputTokenCost: 0.075;
|
|
356
|
+
readonly costUnit: "characters";
|
|
357
|
+
readonly source: "google";
|
|
358
|
+
} | {
|
|
359
|
+
readonly type: "text";
|
|
360
|
+
readonly modelName: "gemini-1.5-pro";
|
|
361
|
+
readonly description: "Supports text or chat prompts for a text or code response. Supports long-context understanding up to the maximum input token limit. Also does video?";
|
|
362
|
+
readonly maxInputTokens: 2097152;
|
|
363
|
+
readonly maxOutputTokens: 8192;
|
|
364
|
+
readonly inputTokenCost: 0.3125;
|
|
365
|
+
readonly outputTokenCost: 1.25;
|
|
366
|
+
readonly costUnit: "characters";
|
|
367
|
+
readonly source: "google";
|
|
368
|
+
} | {
|
|
369
|
+
readonly type: "text";
|
|
370
|
+
readonly modelName: "gemini-1.0-pro";
|
|
371
|
+
readonly description: "The best performing model for a wide range of text-only tasks.";
|
|
372
|
+
readonly maxInputTokens: 32760;
|
|
373
|
+
readonly maxOutputTokens: 8192;
|
|
374
|
+
readonly inputTokenCost: 0.125;
|
|
375
|
+
readonly outputTokenCost: 0.375;
|
|
376
|
+
readonly costUnit: "characters";
|
|
377
|
+
readonly source: "google";
|
|
378
|
+
} | {
|
|
379
|
+
readonly type: "text";
|
|
380
|
+
readonly modelName: "claude-3-7-sonnet-latest";
|
|
381
|
+
readonly description: "Our most intelligent model to date and the first hybrid reasoning model on the market. Claude 3.7 Sonnet shows particularly strong improvements in coding and front-end web development.";
|
|
382
|
+
readonly maxInputTokens: 200000;
|
|
383
|
+
readonly maxOutputTokens: 8192;
|
|
384
|
+
readonly inputTokenCost: 3;
|
|
385
|
+
readonly outputTokenCost: 15;
|
|
386
|
+
readonly source: "anthropic";
|
|
387
|
+
} | {
|
|
388
|
+
readonly type: "text";
|
|
389
|
+
readonly modelName: "claude-3-5-haiku-latest";
|
|
390
|
+
readonly description: "Our fastest model";
|
|
391
|
+
readonly maxInputTokens: 200000;
|
|
392
|
+
readonly maxOutputTokens: 8192;
|
|
393
|
+
readonly inputTokenCost: 0.8;
|
|
394
|
+
readonly outputTokenCost: 4;
|
|
395
|
+
readonly source: "anthropic";
|
|
396
|
+
} | {
|
|
397
|
+
readonly type: "text";
|
|
398
|
+
readonly modelName: "deepseek-r1:8b";
|
|
399
|
+
readonly description: "Runs via ollama";
|
|
400
|
+
readonly source: "local-ollama";
|
|
401
|
+
readonly maxInputTokens: 128000;
|
|
402
|
+
readonly maxOutputTokens: 128000;
|
|
403
|
+
} | {
|
|
404
|
+
readonly type: "text";
|
|
405
|
+
readonly modelName: "mistral-adapters-chunk50-iters100";
|
|
406
|
+
readonly description: "Fine tuned Mistral 7B model fed on my stories, chunked into parts of 50 chars each, 100 iterations.";
|
|
407
|
+
readonly source: "local";
|
|
408
|
+
readonly maxInputTokens: 8192;
|
|
409
|
+
readonly maxOutputTokens: 8192;
|
|
410
|
+
} | {
|
|
411
|
+
readonly type: "text";
|
|
412
|
+
readonly modelName: "llama-7b";
|
|
413
|
+
readonly source: "replicate";
|
|
414
|
+
readonly maxInputTokens: 256;
|
|
415
|
+
readonly maxOutputTokens: 256;
|
|
416
|
+
} | {
|
|
417
|
+
readonly type: "text";
|
|
418
|
+
readonly modelName: "console.log text";
|
|
419
|
+
readonly source: "debug";
|
|
420
|
+
readonly description: "Fake model that just echoes the prompt to the console for debugging";
|
|
421
|
+
readonly maxInputTokens: 8192;
|
|
422
|
+
readonly maxOutputTokens: 8192;
|
|
423
|
+
} | {
|
|
424
|
+
readonly type: "image";
|
|
425
|
+
readonly modelName: "google/imagen-3";
|
|
426
|
+
readonly source: "replicate";
|
|
427
|
+
readonly costPerImage: 0.05;
|
|
428
|
+
} | {
|
|
429
|
+
readonly type: "image";
|
|
430
|
+
readonly modelName: "minimax/image-01";
|
|
431
|
+
readonly source: "replicate";
|
|
432
|
+
readonly costPerImage: 0.01;
|
|
433
|
+
readonly outputType: "Array";
|
|
434
|
+
} | {
|
|
435
|
+
readonly type: "image";
|
|
436
|
+
readonly modelName: "flux-modal";
|
|
437
|
+
readonly source: "modal";
|
|
438
|
+
readonly costPerImage: 0.03;
|
|
439
|
+
} | {
|
|
440
|
+
readonly type: "image";
|
|
441
|
+
readonly modelName: "gpt-image-1";
|
|
442
|
+
readonly source: "openai";
|
|
443
|
+
readonly costPerImage: 0.25;
|
|
444
|
+
} | {
|
|
445
|
+
readonly type: "image";
|
|
446
|
+
readonly modelName: "gemini-2.5-flash-image-preview";
|
|
447
|
+
readonly source: "google";
|
|
448
|
+
readonly description: "aka nano-banana";
|
|
449
|
+
readonly costPerImage: 0.04;
|
|
450
|
+
} | {
|
|
451
|
+
readonly type: "image";
|
|
452
|
+
readonly modelName: "console.log image";
|
|
453
|
+
readonly source: "debug";
|
|
454
|
+
readonly description: "Fake model that just echoes the prompt to the console for debugging";
|
|
455
|
+
readonly costPerImage: 0;
|
|
456
|
+
} | undefined;
|
|
457
|
+
export declare function isImageModel(model: Model): model is ImageModel;
|
|
458
|
+
export declare function isTextModel(model: Model): model is TextModel;
|
|
459
|
+
export declare function isSpeechToTextModel(model: Model): model is SpeechToTextModel;
|
|
460
|
+
export declare function isEmbeddingsModel(model: Model): model is EmbeddingsModel;
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
export const speechToTextModels = [
|
|
2
|
+
{ type: "speech-to-text", modelName: "whisper-local", source: "local" },
|
|
3
|
+
{
|
|
4
|
+
type: "speech-to-text",
|
|
5
|
+
modelName: "whisper-web",
|
|
6
|
+
perMinuteCost: 0.006,
|
|
7
|
+
source: "openai",
|
|
8
|
+
},
|
|
9
|
+
// not a speech to text model?
|
|
10
|
+
/* {
|
|
11
|
+
type: "speech-to-text",
|
|
12
|
+
modelName: "gpt-4o-audio-preview",
|
|
13
|
+
description:
|
|
14
|
+
"This is a preview release of the GPT-4o Audio models. These models accept audio inputs and outputs, and can be used in the Chat Completions REST API. Learn more. The knowledge cutoff for GPT-4o Audio models is October, 2023.",
|
|
15
|
+
inputTokenCost: 2.5,
|
|
16
|
+
outputTokenCost: 10,
|
|
17
|
+
source: "openai",
|
|
18
|
+
}, */
|
|
19
|
+
];
|
|
20
|
+
export const textModels = [
|
|
21
|
+
{
|
|
22
|
+
type: "text",
|
|
23
|
+
modelName: "gpt-4o-mini",
|
|
24
|
+
description: "GPT-4o mini (“o” for “omni”) is a fast, affordable small model for focused tasks. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). It is ideal for fine-tuning, and model outputs from a larger model like GPT-4o can be distilled to GPT-4o-mini to produce similar results at lower cost and latency. The knowledge cutoff for GPT-4o-mini models is October, 2023.",
|
|
25
|
+
maxInputTokens: 128000,
|
|
26
|
+
maxOutputTokens: 16384,
|
|
27
|
+
inputTokenCost: 0.15,
|
|
28
|
+
cachedInputTokenCost: 0.075,
|
|
29
|
+
outputTokenCost: 0.6,
|
|
30
|
+
source: "openai",
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
type: "text",
|
|
34
|
+
modelName: "gpt-4o",
|
|
35
|
+
description: "GPT-4o (“o” for “omni”) is our versatile, high-intelligence flagship model. It accepts both text and image inputs, and produces text outputs (including Structured Outputs). The knowledge cutoff for GPT-4o-mini models is October, 2023.",
|
|
36
|
+
maxInputTokens: 128000,
|
|
37
|
+
maxOutputTokens: 16384,
|
|
38
|
+
inputTokenCost: 2.5,
|
|
39
|
+
cachedInputTokenCost: 1.25,
|
|
40
|
+
outputTokenCost: 10,
|
|
41
|
+
source: "openai",
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
type: "text",
|
|
45
|
+
modelName: "o3-mini",
|
|
46
|
+
description: "o3-mini is our most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.The knowledge cutoff for o3-mini models is October, 2023.",
|
|
47
|
+
maxInputTokens: 200000,
|
|
48
|
+
maxOutputTokens: 100000,
|
|
49
|
+
inputTokenCost: 1.1,
|
|
50
|
+
cachedInputTokenCost: 0.55,
|
|
51
|
+
outputTokenCost: 4.4,
|
|
52
|
+
source: "openai",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
type: "text",
|
|
56
|
+
modelName: "gpt-4-turbo",
|
|
57
|
+
description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.",
|
|
58
|
+
maxInputTokens: 128000,
|
|
59
|
+
maxOutputTokens: 4096,
|
|
60
|
+
inputTokenCost: 10,
|
|
61
|
+
outputTokenCost: 30,
|
|
62
|
+
disabled: true,
|
|
63
|
+
source: "openai",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
type: "text",
|
|
67
|
+
modelName: "gpt-4",
|
|
68
|
+
description: "GPT-4 is an older version of a high-intelligence GPT model, usable in Chat Completions. Learn more in the text generation guide. The knowledge cutoff for the latest GPT-4 Turbo version is December, 2023.",
|
|
69
|
+
maxInputTokens: 8192,
|
|
70
|
+
maxOutputTokens: 8192,
|
|
71
|
+
inputTokenCost: 30,
|
|
72
|
+
outputTokenCost: 60,
|
|
73
|
+
disabled: true,
|
|
74
|
+
source: "openai",
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
type: "text",
|
|
78
|
+
modelName: "gpt-3.5-turbo",
|
|
79
|
+
description: "GPT-3.5 Turbo models can understand and generate natural language or code and have been optimized for chat using the Chat Completions API but work well for non-chat tasks as well. gpt-4o-mini should be used in place of gpt-3.5-turbo, as it is cheaper, more capable, multimodal, and just as fast.",
|
|
80
|
+
maxInputTokens: 16385,
|
|
81
|
+
maxOutputTokens: 4096,
|
|
82
|
+
inputTokenCost: 0.5,
|
|
83
|
+
outputTokenCost: 1.5,
|
|
84
|
+
disabled: true,
|
|
85
|
+
source: "openai",
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
type: "text",
|
|
89
|
+
modelName: "gemini-2.0-flash",
|
|
90
|
+
description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API",
|
|
91
|
+
maxInputTokens: 1_048_576,
|
|
92
|
+
maxOutputTokens: 8192,
|
|
93
|
+
inputTokenCost: 0.15,
|
|
94
|
+
outputTokenCost: 0.6,
|
|
95
|
+
source: "google",
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
type: "text",
|
|
99
|
+
modelName: "gemini-2.0-pro-exp-02-05",
|
|
100
|
+
description: "Strongest model quality, especially for code & world knowledge; 2M long context. In private beta.",
|
|
101
|
+
maxInputTokens: 2_097_152,
|
|
102
|
+
maxOutputTokens: 8192,
|
|
103
|
+
inputTokenCost: 0.5,
|
|
104
|
+
outputTokenCost: 1.5,
|
|
105
|
+
disabled: true,
|
|
106
|
+
source: "google",
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
type: "text",
|
|
110
|
+
modelName: "gemini-2.0-flash-lite",
|
|
111
|
+
description: "Our cost effective offering to support high throughput.",
|
|
112
|
+
maxInputTokens: 1_048_576,
|
|
113
|
+
maxOutputTokens: 8192,
|
|
114
|
+
inputTokenCost: 0.075,
|
|
115
|
+
outputTokenCost: 0.3,
|
|
116
|
+
source: "google",
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
type: "text",
|
|
120
|
+
modelName: "gemini-1.5-flash",
|
|
121
|
+
description: "Provides speed and efficiency for high-volume, quality, cost-effective apps. Note: prices ~double after the first 128k tokens.",
|
|
122
|
+
maxInputTokens: 1_048_576,
|
|
123
|
+
maxOutputTokens: 8192,
|
|
124
|
+
inputTokenCost: 0.01875,
|
|
125
|
+
outputTokenCost: 0.075,
|
|
126
|
+
costUnit: "characters",
|
|
127
|
+
source: "google",
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
type: "text",
|
|
131
|
+
modelName: "gemini-1.5-pro",
|
|
132
|
+
description: "Supports text or chat prompts for a text or code response. Supports long-context understanding up to the maximum input token limit. Also does video?",
|
|
133
|
+
maxInputTokens: 2_097_152,
|
|
134
|
+
maxOutputTokens: 8192,
|
|
135
|
+
inputTokenCost: 0.3125,
|
|
136
|
+
outputTokenCost: 1.25,
|
|
137
|
+
costUnit: "characters",
|
|
138
|
+
source: "google",
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
type: "text",
|
|
142
|
+
modelName: "gemini-1.0-pro",
|
|
143
|
+
description: "The best performing model for a wide range of text-only tasks.",
|
|
144
|
+
maxInputTokens: 32_760,
|
|
145
|
+
maxOutputTokens: 8192,
|
|
146
|
+
inputTokenCost: 0.125,
|
|
147
|
+
outputTokenCost: 0.375,
|
|
148
|
+
costUnit: "characters",
|
|
149
|
+
source: "google",
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
type: "text",
|
|
153
|
+
modelName: "claude-3-7-sonnet-latest",
|
|
154
|
+
description: "Our most intelligent model to date and the first hybrid reasoning model on the market. Claude 3.7 Sonnet shows particularly strong improvements in coding and front-end web development.",
|
|
155
|
+
maxInputTokens: 200_000,
|
|
156
|
+
maxOutputTokens: 8192,
|
|
157
|
+
inputTokenCost: 3,
|
|
158
|
+
outputTokenCost: 15,
|
|
159
|
+
source: "anthropic",
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
type: "text",
|
|
163
|
+
modelName: "claude-3-5-haiku-latest",
|
|
164
|
+
description: "Our fastest model",
|
|
165
|
+
maxInputTokens: 200_000,
|
|
166
|
+
maxOutputTokens: 8192,
|
|
167
|
+
inputTokenCost: 0.8,
|
|
168
|
+
outputTokenCost: 4,
|
|
169
|
+
source: "anthropic",
|
|
170
|
+
},
|
|
171
|
+
/* {
|
|
172
|
+
type: "text",
|
|
173
|
+
modelName: "llama.cpp",
|
|
174
|
+
maxInputTokens: 4000,
|
|
175
|
+
maxOutputTokens: 4000,
|
|
176
|
+
}, */
|
|
177
|
+
/* {
|
|
178
|
+
type: "text",
|
|
179
|
+
modelName: "claude-3-opus-20240229",
|
|
180
|
+
maxInputTokens: 4096,
|
|
181
|
+
maxOutputTokens: 4096,
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
type: "text",
|
|
185
|
+
modelName: "claude-3-sonnet-20240229",
|
|
186
|
+
maxInputTokens: 4096,
|
|
187
|
+
maxOutputTokens: 4096,
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
type: "text",
|
|
191
|
+
modelName: "gemini-pro",
|
|
192
|
+
maxInputTokens: 4096,
|
|
193
|
+
maxOutputTokens: 4096,
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
type: "text",
|
|
197
|
+
modelName: "gemini-pro-vision",
|
|
198
|
+
maxInputTokens: 4096,
|
|
199
|
+
maxOutputTokens: 4096,
|
|
200
|
+
}, */
|
|
201
|
+
{
|
|
202
|
+
type: "text",
|
|
203
|
+
modelName: "deepseek-r1:8b",
|
|
204
|
+
description: "Runs via ollama",
|
|
205
|
+
source: "local-ollama",
|
|
206
|
+
maxInputTokens: 128000,
|
|
207
|
+
maxOutputTokens: 128000,
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
type: "text",
|
|
211
|
+
modelName: "mistral-adapters-chunk50-iters100",
|
|
212
|
+
description: "Fine tuned Mistral 7B model fed on my stories, chunked into parts of 50 chars each, 100 iterations.",
|
|
213
|
+
source: "local",
|
|
214
|
+
// https://huggingface.co/mistralai/Mistral-7B-v0.1/discussions/104
|
|
215
|
+
maxInputTokens: 8192,
|
|
216
|
+
maxOutputTokens: 8192,
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
type: "text",
|
|
220
|
+
modelName: "llama-7b",
|
|
221
|
+
source: "replicate",
|
|
222
|
+
maxInputTokens: 256,
|
|
223
|
+
maxOutputTokens: 256,
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
type: "text",
|
|
227
|
+
modelName: "console.log text",
|
|
228
|
+
source: "debug",
|
|
229
|
+
description: "Fake model that just echoes the prompt to the console for debugging",
|
|
230
|
+
maxInputTokens: 8192,
|
|
231
|
+
maxOutputTokens: 8192,
|
|
232
|
+
},
|
|
233
|
+
];
|
|
234
|
+
export const imageModels = [
|
|
235
|
+
{
|
|
236
|
+
type: "image",
|
|
237
|
+
modelName: "google/imagen-3",
|
|
238
|
+
source: "replicate",
|
|
239
|
+
costPerImage: 0.05,
|
|
240
|
+
},
|
|
241
|
+
{
|
|
242
|
+
type: "image",
|
|
243
|
+
modelName: "minimax/image-01",
|
|
244
|
+
source: "replicate",
|
|
245
|
+
costPerImage: 0.01,
|
|
246
|
+
outputType: "Array",
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
type: "image",
|
|
250
|
+
modelName: "flux-modal",
|
|
251
|
+
source: "modal",
|
|
252
|
+
costPerImage: 0.03,
|
|
253
|
+
},
|
|
254
|
+
{
|
|
255
|
+
type: "image",
|
|
256
|
+
modelName: "gpt-image-1",
|
|
257
|
+
source: "openai",
|
|
258
|
+
// varies: https://platform.openai.com/docs/models/gpt-image-1
|
|
259
|
+
costPerImage: 0.25,
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
type: "image",
|
|
263
|
+
modelName: "gemini-2.5-flash-image-preview",
|
|
264
|
+
source: "google",
|
|
265
|
+
description: "aka nano-banana",
|
|
266
|
+
costPerImage: 0.04,
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
type: "image",
|
|
270
|
+
modelName: "console.log image",
|
|
271
|
+
source: "debug",
|
|
272
|
+
description: "Fake model that just echoes the prompt to the console for debugging",
|
|
273
|
+
costPerImage: 0,
|
|
274
|
+
},
|
|
275
|
+
];
|
|
276
|
+
export const embeddingsModels = [
|
|
277
|
+
{ type: "embeddings", modelName: "text-embedding-3-small", tokenCost: 0.02 },
|
|
278
|
+
];
|
|
279
|
+
export function getModel(modelName) {
|
|
280
|
+
const allModels = [...textModels, ...imageModels, ...speechToTextModels];
|
|
281
|
+
return allModels.find((model) => model.modelName === modelName);
|
|
282
|
+
}
|
|
283
|
+
export function isImageModel(model) {
|
|
284
|
+
return model.type === "image";
|
|
285
|
+
}
|
|
286
|
+
export function isTextModel(model) {
|
|
287
|
+
return model.type === "text";
|
|
288
|
+
}
|
|
289
|
+
export function isSpeechToTextModel(model) {
|
|
290
|
+
return model.type === "speech-to-text";
|
|
291
|
+
}
|
|
292
|
+
export function isEmbeddingsModel(model) {
|
|
293
|
+
return model.type === "embeddings";
|
|
294
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export type Success<T> = {
|
|
2
|
+
success: true;
|
|
3
|
+
value: T;
|
|
4
|
+
};
|
|
5
|
+
export type Failure = {
|
|
6
|
+
success: false;
|
|
7
|
+
error: string;
|
|
8
|
+
};
|
|
9
|
+
export type Result<T> = Success<T> | Failure;
|
|
10
|
+
export declare function success<T>(value: T): Success<T>;
|
|
11
|
+
export declare function failure(error: string): Failure;
|
|
12
|
+
export declare function mergeResults<T>(results: Result<T>[]): Result<T[]>;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export function success(value) {
|
|
2
|
+
return { success: true, value };
|
|
3
|
+
}
|
|
4
|
+
export function failure(error) {
|
|
5
|
+
return { success: false, error };
|
|
6
|
+
}
|
|
7
|
+
export function mergeResults(results) {
|
|
8
|
+
const values = [];
|
|
9
|
+
for (const result of results) {
|
|
10
|
+
if (!result.success) {
|
|
11
|
+
return failure(result.error);
|
|
12
|
+
}
|
|
13
|
+
values.push(result.value);
|
|
14
|
+
}
|
|
15
|
+
return success(values);
|
|
16
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export * from "./types/result.js";
|
|
2
|
+
import { EgonLog, LogLevel } from "egonlog";
|
|
3
|
+
import { ModelName } from "./models.js";
|
|
4
|
+
import { Result } from "./index.js";
|
|
5
|
+
export type PromptConfig<Tool = any> = {
|
|
6
|
+
messages?: {
|
|
7
|
+
role: string;
|
|
8
|
+
content: string;
|
|
9
|
+
}[];
|
|
10
|
+
instructions?: string;
|
|
11
|
+
maxTokens?: number;
|
|
12
|
+
temperature?: number;
|
|
13
|
+
numSuggestions?: number;
|
|
14
|
+
parallelToolCalls?: boolean;
|
|
15
|
+
tools?: Tool[];
|
|
16
|
+
};
|
|
17
|
+
export type SmolConfig = {
|
|
18
|
+
apiKey: string;
|
|
19
|
+
model: ModelName;
|
|
20
|
+
logLevel?: LogLevel;
|
|
21
|
+
};
|
|
22
|
+
export type BaseClientConfig = SmolConfig & {
|
|
23
|
+
logger: EgonLog;
|
|
24
|
+
};
|
|
25
|
+
export type PromptResult = {
|
|
26
|
+
output: string;
|
|
27
|
+
};
|
|
28
|
+
export interface SmolClient {
|
|
29
|
+
text(content: string, config?: PromptConfig): Promise<Result<PromptResult>>;
|
|
30
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./types/result.js";
|
package/package.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "smoltalk",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "A common interface for LLM APIs",
|
|
5
|
+
"homepage": "https://github.com/egonSchiele/smoltalk",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "vitest",
|
|
8
|
+
"test:tsc": "tsc -p tests/tsconfig.json",
|
|
9
|
+
"coverage": "vitest --coverage",
|
|
10
|
+
"build": "rm -rf dist && tsc",
|
|
11
|
+
"start": "cd dist && node index.js"
|
|
12
|
+
},
|
|
13
|
+
"files": [
|
|
14
|
+
"./dist"
|
|
15
|
+
],
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"import": "./dist/index.js",
|
|
19
|
+
"require": "./dist/index.js",
|
|
20
|
+
"types": "./dist/index.d.ts"
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
"type": "module",
|
|
24
|
+
"types": "./dist/index.d.ts",
|
|
25
|
+
"keywords": [
|
|
26
|
+
"smoltalk",
|
|
27
|
+
"llm",
|
|
28
|
+
"api"
|
|
29
|
+
],
|
|
30
|
+
"author": "Aditya Bhargava",
|
|
31
|
+
"license": "ISC",
|
|
32
|
+
"devDependencies": {
|
|
33
|
+
"@types/node": "^25.0.3",
|
|
34
|
+
"typescript": "^5.9.3",
|
|
35
|
+
"vitest": "^4.0.16"
|
|
36
|
+
},
|
|
37
|
+
"dependencies": {
|
|
38
|
+
"@google/genai": "^1.34.0",
|
|
39
|
+
"egonlog": "^0.0.2",
|
|
40
|
+
"openai": "^6.15.0"
|
|
41
|
+
}
|
|
42
|
+
}
|