smoltalk 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.js +9 -10
- package/dist/clients/baseClient.js +14 -14
- package/dist/clients/openai.js +0 -1
- package/dist/clients/openaiResponses.js +0 -1
- package/dist/embed/google.d.ts +3 -0
- package/dist/embed/google.js +25 -0
- package/dist/embed/ollama.d.ts +3 -0
- package/dist/embed/ollama.js +35 -0
- package/dist/embed/openai.d.ts +3 -0
- package/dist/embed/openai.js +42 -0
- package/dist/embed.d.ts +21 -0
- package/dist/embed.js +35 -0
- package/dist/functions.d.ts +0 -8
- package/dist/image/google.d.ts +3 -0
- package/dist/image/google.js +57 -0
- package/dist/image/openai.d.ts +3 -0
- package/dist/image/openai.js +140 -0
- package/dist/image.d.ts +35 -0
- package/dist/image.js +37 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/models.d.ts +231 -26
- package/dist/models.js +147 -12
- package/dist/types.d.ts +3 -3
- package/dist/types.js +1 -1
- package/dist/util/imageRef.d.ts +29 -0
- package/dist/util/imageRef.js +51 -0
- package/dist/util/provider.d.ts +17 -0
- package/dist/util/provider.js +34 -0
- package/dist/util/util.d.ts +23 -0
- package/dist/util/util.js +40 -0
- package/package.json +2 -2
package/dist/client.js
CHANGED
|
@@ -11,6 +11,7 @@ import { SmolOpenAi } from "./clients/openai.js";
|
|
|
11
11
|
import { SmolOpenAiResponses } from "./clients/openaiResponses.js";
|
|
12
12
|
import { getModel, isTextModel } from "./models.js";
|
|
13
13
|
import { SmolError } from "./smolError.js";
|
|
14
|
+
import { resolveApiKey, resolveProvider } from "./util/provider.js";
|
|
14
15
|
const registeredProviders = {};
|
|
15
16
|
export function registerProvider(providerName, clientClass) {
|
|
16
17
|
registeredProviders[providerName] = clientClass;
|
|
@@ -23,22 +24,20 @@ export function unregisterProvider(providerName) {
|
|
|
23
24
|
return false;
|
|
24
25
|
}
|
|
25
26
|
export function getClient(config) {
|
|
26
|
-
let provider = config.provider;
|
|
27
27
|
const modelName = config.model;
|
|
28
|
-
|
|
28
|
+
const provider = resolveProvider(modelName, config.provider);
|
|
29
|
+
// For getClient, validate that the model is a text model when no explicit
|
|
30
|
+
// provider is given (since this factory only returns text-generation clients).
|
|
31
|
+
if (!config.provider) {
|
|
29
32
|
const model = getModel(modelName);
|
|
30
|
-
if (model
|
|
31
|
-
throw new SmolError(`Model ${modelName} is not recognized. Please specify a known model, or explicitly set the provider option in the config.`);
|
|
32
|
-
}
|
|
33
|
-
if (!isTextModel(model)) {
|
|
33
|
+
if (model && !isTextModel(model)) {
|
|
34
34
|
throw new SmolError(`Only text models are supported currently. ${modelName} is a ${model?.type} model.`);
|
|
35
35
|
}
|
|
36
|
-
provider = model.provider;
|
|
37
36
|
}
|
|
38
37
|
const resolvedKeys = {
|
|
39
|
-
openAiApiKey: config
|
|
40
|
-
googleApiKey: config
|
|
41
|
-
anthropicApiKey: config
|
|
38
|
+
openAiApiKey: resolveApiKey("openai", config),
|
|
39
|
+
googleApiKey: resolveApiKey("google", config),
|
|
40
|
+
anthropicApiKey: resolveApiKey("anthropic", config),
|
|
42
41
|
};
|
|
43
42
|
const clientConfig = {
|
|
44
43
|
messages: [],
|
|
@@ -2,6 +2,7 @@ import { userMessage, assistantMessage } from "../classes/message/index.js";
|
|
|
2
2
|
import { getLogger } from "../util/logger.js";
|
|
3
3
|
import { SmolStructuredOutputError } from "../smolError.js";
|
|
4
4
|
import { getStatelogClient } from "../statelogClient.js";
|
|
5
|
+
import { stripCodeFence } from "../util/util.js";
|
|
5
6
|
import { success, } from "../types.js";
|
|
6
7
|
import { z } from "zod";
|
|
7
8
|
const DEFAULT_NUM_RETRIES = 2;
|
|
@@ -141,17 +142,11 @@ export class BaseClient {
|
|
|
141
142
|
if (rawValue && typeof rawValue === "object" && rawValue.type === "object" && rawValue.properties) {
|
|
142
143
|
return this.extractResponse(promptConfig, rawValue.properties, schema, depth + 1);
|
|
143
144
|
}
|
|
144
|
-
// 2. String → try JSON.parse
|
|
145
|
-
// Throws SmolStructuredOutputError if the string isn't valid JSON; the
|
|
146
|
-
// textWithRetry caller catches this and retries with a validation hint.
|
|
145
|
+
// 2. String → try JSON.parse, then recurse
|
|
147
146
|
if (typeof rawValue === "string") {
|
|
148
|
-
const stripped = rawValue
|
|
149
|
-
.trim()
|
|
150
|
-
.replace(/^```json\s*/, "")
|
|
151
|
-
.replace(/```\s*$/, "");
|
|
152
|
-
let parsed;
|
|
147
|
+
const stripped = stripCodeFence(rawValue);
|
|
153
148
|
try {
|
|
154
|
-
|
|
149
|
+
return this.extractResponse(promptConfig, JSON.parse(stripped), schema, depth + 1);
|
|
155
150
|
}
|
|
156
151
|
catch (err) {
|
|
157
152
|
const logger = getLogger();
|
|
@@ -160,9 +155,8 @@ export class BaseClient {
|
|
|
160
155
|
rawValue: stripped,
|
|
161
156
|
});
|
|
162
157
|
this.statelogClient?.debug("extractResponse: failed to parse JSON from string", { error: err.message });
|
|
163
|
-
throw new SmolStructuredOutputError(`Response did not parse as JSON: ${err.message}`);
|
|
164
158
|
}
|
|
165
|
-
return
|
|
159
|
+
return rawValue;
|
|
166
160
|
}
|
|
167
161
|
// 3. Null/undefined/primitive — nothing to unwrap
|
|
168
162
|
if (rawValue == null || typeof rawValue !== "object") {
|
|
@@ -208,7 +202,8 @@ export class BaseClient {
|
|
|
208
202
|
if (result.value.toolCalls.length > 0) {
|
|
209
203
|
return result;
|
|
210
204
|
}
|
|
211
|
-
if (!promptConfig.responseFormat
|
|
205
|
+
if (!promptConfig.responseFormat ||
|
|
206
|
+
!promptConfig.responseFormatOptions?.strict) {
|
|
212
207
|
return result;
|
|
213
208
|
}
|
|
214
209
|
if (!("output" in result.value)) {
|
|
@@ -219,9 +214,14 @@ export class BaseClient {
|
|
|
219
214
|
return this.textWithRetry({ ...promptConfig, messages: retryMessages }, retries - 1);
|
|
220
215
|
}
|
|
221
216
|
const { output } = result.value;
|
|
222
|
-
if (output !== null &&
|
|
217
|
+
if (output !== null &&
|
|
218
|
+
promptConfig.responseFormat &&
|
|
219
|
+
promptConfig.responseFormatOptions?.strict &&
|
|
220
|
+
retries > 0) {
|
|
221
|
+
const allowExtraKeys = promptConfig.responseFormatOptions?.allowExtraKeys ?? false;
|
|
223
222
|
try {
|
|
224
|
-
const
|
|
223
|
+
const parsed = JSON.parse(output);
|
|
224
|
+
const parseResult = this.extractResponse(promptConfig, parsed, promptConfig.responseFormat);
|
|
225
225
|
return success({
|
|
226
226
|
...result.value,
|
|
227
227
|
output: parseResult,
|
package/dist/clients/openai.js
CHANGED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { GoogleGenAI } from "@google/genai";
|
|
2
|
+
import { success, failure } from "../types/result.js";
|
|
3
|
+
export async function googleEmbed(inputs, config, apiKey) {
|
|
4
|
+
try {
|
|
5
|
+
const client = new GoogleGenAI({ apiKey });
|
|
6
|
+
const response = await client.models.embedContent({
|
|
7
|
+
model: config.model,
|
|
8
|
+
contents: inputs,
|
|
9
|
+
...(config.dimensions !== undefined
|
|
10
|
+
? { config: { outputDimensionality: config.dimensions } }
|
|
11
|
+
: {}),
|
|
12
|
+
});
|
|
13
|
+
const embeddings = (response.embeddings ?? []).map((e) => e.values ?? []);
|
|
14
|
+
return success({
|
|
15
|
+
embeddings,
|
|
16
|
+
model: config.model,
|
|
17
|
+
// Google Gemini embeddings API does not return token usage in the
|
|
18
|
+
// response. Cost cannot be auto-computed without a separate
|
|
19
|
+
// countTokens() call.
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
catch (err) {
|
|
23
|
+
return failure(err instanceof Error ? err.message : "Google embedding request failed");
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { Ollama } from "ollama";
|
|
2
|
+
import { success, failure } from "../types/result.js";
|
|
3
|
+
import { DEFAULT_OLLAMA_HOST } from "../clients/ollama.js";
|
|
4
|
+
export async function ollamaEmbed(inputs, config, apiKey, ollamaHost) {
|
|
5
|
+
try {
|
|
6
|
+
let client;
|
|
7
|
+
if (apiKey) {
|
|
8
|
+
client = new Ollama({
|
|
9
|
+
host: "https://cloud.ollama.com",
|
|
10
|
+
headers: { Authorization: "Bearer " + apiKey },
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
else {
|
|
14
|
+
client = new Ollama({ host: ollamaHost || DEFAULT_OLLAMA_HOST });
|
|
15
|
+
}
|
|
16
|
+
const response = await client.embed({
|
|
17
|
+
model: config.model,
|
|
18
|
+
input: inputs,
|
|
19
|
+
...(config.dimensions !== undefined
|
|
20
|
+
? { dimensions: config.dimensions }
|
|
21
|
+
: {}),
|
|
22
|
+
});
|
|
23
|
+
return success({
|
|
24
|
+
embeddings: response.embeddings,
|
|
25
|
+
model: response.model,
|
|
26
|
+
tokenUsage: {
|
|
27
|
+
inputTokens: response.prompt_eval_count ?? 0,
|
|
28
|
+
outputTokens: 0,
|
|
29
|
+
},
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
catch (err) {
|
|
33
|
+
return failure(err instanceof Error ? err.message : "Ollama embedding request failed");
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import { success, failure } from "../types/result.js";
|
|
3
|
+
import { getModel, isEmbeddingsModel } from "../models.js";
|
|
4
|
+
import { round } from "../util/util.js";
|
|
5
|
+
export async function openaiEmbed(inputs, config, apiKey) {
|
|
6
|
+
try {
|
|
7
|
+
const client = new OpenAI({ apiKey });
|
|
8
|
+
const response = await client.embeddings.create({
|
|
9
|
+
model: config.model,
|
|
10
|
+
input: inputs,
|
|
11
|
+
...(config.dimensions !== undefined
|
|
12
|
+
? { dimensions: config.dimensions }
|
|
13
|
+
: {}),
|
|
14
|
+
});
|
|
15
|
+
const embeddings = [...response.data]
|
|
16
|
+
.sort((a, b) => a.index - b.index)
|
|
17
|
+
.map((d) => d.embedding);
|
|
18
|
+
const inputTokens = response.usage.prompt_tokens;
|
|
19
|
+
const costEstimate = calculateEmbeddingCost(config.model, inputTokens);
|
|
20
|
+
return success({
|
|
21
|
+
embeddings,
|
|
22
|
+
model: response.model,
|
|
23
|
+
tokenUsage: { inputTokens, outputTokens: 0 },
|
|
24
|
+
costEstimate,
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
catch (err) {
|
|
28
|
+
return failure(err instanceof Error ? err.message : "OpenAI embedding request failed");
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
function calculateEmbeddingCost(modelName, inputTokens) {
|
|
32
|
+
const model = getModel(modelName);
|
|
33
|
+
if (!model || !isEmbeddingsModel(model) || !model.tokenCost)
|
|
34
|
+
return undefined;
|
|
35
|
+
const inputCost = round((inputTokens * model.tokenCost) / 1_000_000, 6);
|
|
36
|
+
return {
|
|
37
|
+
inputCost,
|
|
38
|
+
outputCost: 0,
|
|
39
|
+
totalCost: inputCost,
|
|
40
|
+
currency: "USD",
|
|
41
|
+
};
|
|
42
|
+
}
|
package/dist/embed.d.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { Provider } from "./models.js";
|
|
2
|
+
import { Result } from "./types/result.js";
|
|
3
|
+
import { TokenUsage } from "./types/tokenUsage.js";
|
|
4
|
+
import { CostEstimate } from "./types/costEstimate.js";
|
|
5
|
+
export type EmbedConfig = {
|
|
6
|
+
model: string;
|
|
7
|
+
provider?: Provider;
|
|
8
|
+
dimensions?: number;
|
|
9
|
+
openAiApiKey?: string;
|
|
10
|
+
googleApiKey?: string;
|
|
11
|
+
ollamaApiKey?: string;
|
|
12
|
+
ollamaHost?: string;
|
|
13
|
+
metadata?: Record<string, unknown>;
|
|
14
|
+
};
|
|
15
|
+
export type EmbedResult = {
|
|
16
|
+
embeddings: number[][];
|
|
17
|
+
model: string;
|
|
18
|
+
tokenUsage?: TokenUsage;
|
|
19
|
+
costEstimate?: CostEstimate;
|
|
20
|
+
};
|
|
21
|
+
export declare function embed(input: string | string[], config: EmbedConfig): Promise<Result<EmbedResult>>;
|
package/dist/embed.js
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { failure } from "./types/result.js";
|
|
2
|
+
import { resolveProvider, resolveApiKey } from "./util/provider.js";
|
|
3
|
+
import { openaiEmbed } from "./embed/openai.js";
|
|
4
|
+
import { googleEmbed } from "./embed/google.js";
|
|
5
|
+
import { ollamaEmbed } from "./embed/ollama.js";
|
|
6
|
+
export async function embed(input, config) {
|
|
7
|
+
const inputs = Array.isArray(input) ? input : [input];
|
|
8
|
+
let provider;
|
|
9
|
+
try {
|
|
10
|
+
provider = resolveProvider(config.model, config.provider);
|
|
11
|
+
}
|
|
12
|
+
catch (err) {
|
|
13
|
+
return failure(err instanceof Error ? err.message : "Failed to resolve provider");
|
|
14
|
+
}
|
|
15
|
+
const apiKey = resolveApiKey(provider, config);
|
|
16
|
+
switch (provider) {
|
|
17
|
+
case "openai":
|
|
18
|
+
case "openai-responses": {
|
|
19
|
+
if (!apiKey) {
|
|
20
|
+
return failure("No OpenAI API key provided. Set openAiApiKey in config or the OPENAI_API_KEY environment variable.");
|
|
21
|
+
}
|
|
22
|
+
return openaiEmbed(inputs, config, apiKey);
|
|
23
|
+
}
|
|
24
|
+
case "google": {
|
|
25
|
+
if (!apiKey) {
|
|
26
|
+
return failure("No Google API key provided. Set googleApiKey in config or the GEMINI_API_KEY environment variable.");
|
|
27
|
+
}
|
|
28
|
+
return googleEmbed(inputs, config, apiKey);
|
|
29
|
+
}
|
|
30
|
+
case "ollama":
|
|
31
|
+
return ollamaEmbed(inputs, config, apiKey, config.ollamaHost);
|
|
32
|
+
default:
|
|
33
|
+
return failure(`Provider "${provider}" does not support embeddings`);
|
|
34
|
+
}
|
|
35
|
+
}
|
package/dist/functions.d.ts
CHANGED
|
@@ -1,18 +1,10 @@
|
|
|
1
1
|
import { PromptResult, SmolConfig, StreamChunk } from "./types.js";
|
|
2
2
|
import { Result } from "./types/result.js";
|
|
3
|
-
import type { z, ZodType } from "zod";
|
|
4
3
|
export declare function text(config: SmolConfig & {
|
|
5
4
|
stream: true;
|
|
6
5
|
}): AsyncGenerator<StreamChunk>;
|
|
7
|
-
export declare function text<S extends ZodType>(config: Omit<SmolConfig, "responseFormat"> & {
|
|
8
|
-
responseFormat: S;
|
|
9
|
-
stream?: false;
|
|
10
|
-
}): Promise<Result<PromptResult<z.infer<S>>>>;
|
|
11
6
|
export declare function text(config: SmolConfig & {
|
|
12
7
|
stream?: false;
|
|
13
8
|
}): Promise<Result<PromptResult>>;
|
|
14
|
-
export declare function textSync<S extends ZodType>(config: Omit<SmolConfig, "responseFormat"> & {
|
|
15
|
-
responseFormat: S;
|
|
16
|
-
}): Promise<Result<PromptResult<z.infer<S>>>>;
|
|
17
9
|
export declare function textSync(config: SmolConfig): Promise<Result<PromptResult>>;
|
|
18
10
|
export declare function textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { GoogleGenAI } from "@google/genai";
|
|
2
|
+
import { success, failure } from "../types/result.js";
|
|
3
|
+
import { getModel, isImageModel } from "../models.js";
|
|
4
|
+
import { normalizeImageRef } from "../util/imageRef.js";
|
|
5
|
+
import { COST_DECIMAL_PLACES, round } from "../util/util.js";
|
|
6
|
+
export async function googleImage(input, config, apiKey) {
|
|
7
|
+
try {
|
|
8
|
+
const normalized = typeof input === "string" ? { prompt: input } : input;
|
|
9
|
+
const client = new GoogleGenAI({ apiKey });
|
|
10
|
+
const parts = [{ text: normalized.prompt }];
|
|
11
|
+
if (normalized.images && normalized.images.length > 0) {
|
|
12
|
+
const normalizedImages = await Promise.all(normalized.images.map((ref) => normalizeImageRef(ref)));
|
|
13
|
+
for (const img of normalizedImages) {
|
|
14
|
+
parts.push({
|
|
15
|
+
inlineData: {
|
|
16
|
+
mimeType: img.mimeType,
|
|
17
|
+
data: Buffer.from(img.data).toString("base64"),
|
|
18
|
+
},
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
const response = await client.models.generateContent({
|
|
23
|
+
model: config.model,
|
|
24
|
+
contents: [{ role: "user", parts }],
|
|
25
|
+
config: {
|
|
26
|
+
responseModalities: ["IMAGE", "TEXT"],
|
|
27
|
+
...(config.metadata ?? {}),
|
|
28
|
+
},
|
|
29
|
+
});
|
|
30
|
+
const images = [];
|
|
31
|
+
for (const candidate of response.candidates ?? []) {
|
|
32
|
+
for (const part of candidate.content?.parts ?? []) {
|
|
33
|
+
if (part.inlineData?.data && part.inlineData?.mimeType) {
|
|
34
|
+
images.push({
|
|
35
|
+
data: new Uint8Array(Buffer.from(part.inlineData.data, "base64")),
|
|
36
|
+
mimeType: part.inlineData.mimeType,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return success({
|
|
42
|
+
images,
|
|
43
|
+
model: config.model,
|
|
44
|
+
costEstimate: calculateGoogleImageCost(config.model, images.length),
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
catch (err) {
|
|
48
|
+
return failure(err instanceof Error ? err.message : "Google image request failed");
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
function calculateGoogleImageCost(modelName, imageCount) {
|
|
52
|
+
const model = getModel(modelName);
|
|
53
|
+
if (!model || !isImageModel(model) || !model.costPerImage)
|
|
54
|
+
return undefined;
|
|
55
|
+
const totalCost = round(model.costPerImage * imageCount, COST_DECIMAL_PLACES);
|
|
56
|
+
return { inputCost: 0, outputCost: totalCost, totalCost, currency: "USD" };
|
|
57
|
+
}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import { toFile } from "openai/uploads";
|
|
3
|
+
import { success, failure } from "../types/result.js";
|
|
4
|
+
import { getModel, isImageModel } from "../models.js";
|
|
5
|
+
import { normalizeImageRef } from "../util/imageRef.js";
|
|
6
|
+
import { COST_DECIMAL_PLACES, omitUndefined, round, tokenCost, } from "../util/util.js";
|
|
7
|
+
export async function openaiImage(input, config, apiKey) {
|
|
8
|
+
try {
|
|
9
|
+
const normalized = typeof input === "string" ? { prompt: input } : input;
|
|
10
|
+
const hasImages = !!(normalized.images && normalized.images.length > 0);
|
|
11
|
+
if (normalized.mask && !hasImages) {
|
|
12
|
+
return failure("A mask was provided without any input images. Masks are only valid for image edits — pass at least one entry in `images` alongside the mask.");
|
|
13
|
+
}
|
|
14
|
+
const client = new OpenAI({ apiKey });
|
|
15
|
+
const baseParams = buildBaseParams(config, normalized.prompt);
|
|
16
|
+
const response = hasImages
|
|
17
|
+
? await callEdit(client, baseParams, normalized)
|
|
18
|
+
: await client.images.generate(baseParams);
|
|
19
|
+
const mimeType = mimeFromFormat(config.outputFormat) ?? "image/png";
|
|
20
|
+
const images = (response.data ?? []).map((d) => ({
|
|
21
|
+
data: new Uint8Array(Buffer.from(d.b64_json, "base64")),
|
|
22
|
+
mimeType,
|
|
23
|
+
revisedPrompt: d.revised_prompt,
|
|
24
|
+
}));
|
|
25
|
+
const tokenUsage = extractUsage(response);
|
|
26
|
+
const costEstimate = tokenUsage
|
|
27
|
+
? calculateImageCost(config.model, tokenUsage)
|
|
28
|
+
: calculatePerImageCost(config.model, images.length);
|
|
29
|
+
return success({
|
|
30
|
+
images,
|
|
31
|
+
model: config.model,
|
|
32
|
+
tokenUsage,
|
|
33
|
+
costEstimate,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
catch (err) {
|
|
37
|
+
return failure(err instanceof Error ? err.message : "OpenAI image request failed");
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
function buildBaseParams(config, prompt) {
|
|
41
|
+
return omitUndefined({
|
|
42
|
+
model: config.model,
|
|
43
|
+
prompt,
|
|
44
|
+
n: config.n,
|
|
45
|
+
size: config.size,
|
|
46
|
+
quality: config.quality,
|
|
47
|
+
output_format: config.outputFormat,
|
|
48
|
+
background: config.background,
|
|
49
|
+
...(config.metadata ?? {}),
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
async function callEdit(client, baseParams, normalized) {
|
|
53
|
+
const imageFiles = await Promise.all((normalized.images ?? []).map(async (ref, i) => {
|
|
54
|
+
const n = await normalizeImageRef(ref);
|
|
55
|
+
return toFileFor(n, `image-${i}`);
|
|
56
|
+
}));
|
|
57
|
+
const maskFile = normalized.mask
|
|
58
|
+
? await toFileFor(await normalizeImageRef(normalized.mask), "mask")
|
|
59
|
+
: undefined;
|
|
60
|
+
return client.images.edit(omitUndefined({
|
|
61
|
+
...baseParams,
|
|
62
|
+
image: imageFiles.length === 1 ? imageFiles[0] : imageFiles,
|
|
63
|
+
mask: maskFile,
|
|
64
|
+
}));
|
|
65
|
+
}
|
|
66
|
+
async function toFileFor(img, baseName) {
|
|
67
|
+
return toFile(img.data, `${baseName}.${extFromMime(img.mimeType)}`, {
|
|
68
|
+
type: img.mimeType,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
function extFromMime(mime) {
|
|
72
|
+
if (mime === "image/jpeg")
|
|
73
|
+
return "jpg";
|
|
74
|
+
if (mime === "image/webp")
|
|
75
|
+
return "webp";
|
|
76
|
+
return "png";
|
|
77
|
+
}
|
|
78
|
+
function mimeFromFormat(fmt) {
|
|
79
|
+
if (!fmt)
|
|
80
|
+
return undefined;
|
|
81
|
+
if (fmt === "jpeg")
|
|
82
|
+
return "image/jpeg";
|
|
83
|
+
if (fmt === "webp")
|
|
84
|
+
return "image/webp";
|
|
85
|
+
return "image/png";
|
|
86
|
+
}
|
|
87
|
+
function extractUsage(response) {
|
|
88
|
+
const u = response?.usage;
|
|
89
|
+
if (!u)
|
|
90
|
+
return undefined;
|
|
91
|
+
return {
|
|
92
|
+
inputTokens: u.input_tokens ?? 0,
|
|
93
|
+
outputTokens: u.output_tokens ?? 0,
|
|
94
|
+
cachedInputTokens: u.input_tokens_details?.cached_tokens,
|
|
95
|
+
inputImageTokens: u.input_tokens_details?.image_tokens,
|
|
96
|
+
inputTextTokens: u.input_tokens_details?.text_tokens,
|
|
97
|
+
totalTokens: u.total_tokens,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
function calculateImageCost(modelName, usage) {
|
|
101
|
+
const model = getModel(modelName);
|
|
102
|
+
if (!model || !isImageModel(model))
|
|
103
|
+
return undefined;
|
|
104
|
+
const totalIn = usage.inputTokens ?? 0;
|
|
105
|
+
const cachedIn = usage.cachedInputTokens ?? 0;
|
|
106
|
+
const imgOut = usage.outputTokens ?? 0;
|
|
107
|
+
// Prefer the detailed breakdown if the API returned it; otherwise treat
|
|
108
|
+
// all non-cached input tokens as text input.
|
|
109
|
+
let textIn;
|
|
110
|
+
let imageIn;
|
|
111
|
+
if (usage.inputTextTokens !== undefined ||
|
|
112
|
+
usage.inputImageTokens !== undefined) {
|
|
113
|
+
textIn = Math.max(0, (usage.inputTextTokens ?? 0) - cachedIn);
|
|
114
|
+
imageIn = usage.inputImageTokens ?? 0;
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
textIn = Math.max(0, totalIn - cachedIn);
|
|
118
|
+
imageIn = 0;
|
|
119
|
+
}
|
|
120
|
+
const textInputCost = tokenCost(textIn, model.inputTokenCost);
|
|
121
|
+
const imageInputCost = tokenCost(imageIn, model.inputImageTokenCost);
|
|
122
|
+
const cachedCost = tokenCost(cachedIn, model.cachedInputTokenCost);
|
|
123
|
+
const outputCost = tokenCost(imgOut, model.outputImageTokenCost);
|
|
124
|
+
const inputCost = round(textInputCost + imageInputCost, COST_DECIMAL_PLACES);
|
|
125
|
+
const totalCost = round(inputCost + cachedCost + outputCost, COST_DECIMAL_PLACES);
|
|
126
|
+
return {
|
|
127
|
+
inputCost,
|
|
128
|
+
outputCost,
|
|
129
|
+
cachedInputCost: cachedCost,
|
|
130
|
+
totalCost,
|
|
131
|
+
currency: "USD",
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
function calculatePerImageCost(modelName, imageCount) {
|
|
135
|
+
const model = getModel(modelName);
|
|
136
|
+
if (!model || !isImageModel(model) || !model.costPerImage)
|
|
137
|
+
return undefined;
|
|
138
|
+
const totalCost = round(model.costPerImage * imageCount, COST_DECIMAL_PLACES);
|
|
139
|
+
return { inputCost: 0, outputCost: totalCost, totalCost, currency: "USD" };
|
|
140
|
+
}
|
package/dist/image.d.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { Provider } from "./models.js";
|
|
2
|
+
import { Result } from "./types/result.js";
|
|
3
|
+
import { TokenUsage } from "./types/tokenUsage.js";
|
|
4
|
+
import { CostEstimate } from "./types/costEstimate.js";
|
|
5
|
+
import { ImageRef } from "./util/imageRef.js";
|
|
6
|
+
export { ImageRef };
|
|
7
|
+
export type ImageInput = string | {
|
|
8
|
+
prompt: string;
|
|
9
|
+
images?: ImageRef[];
|
|
10
|
+
mask?: ImageRef;
|
|
11
|
+
};
|
|
12
|
+
export type ImageConfig = {
|
|
13
|
+
model: string;
|
|
14
|
+
provider?: Provider;
|
|
15
|
+
n?: number;
|
|
16
|
+
size?: string;
|
|
17
|
+
quality?: "low" | "medium" | "high" | "auto";
|
|
18
|
+
outputFormat?: "png" | "jpeg" | "webp";
|
|
19
|
+
background?: "transparent" | "opaque" | "auto";
|
|
20
|
+
openAiApiKey?: string;
|
|
21
|
+
googleApiKey?: string;
|
|
22
|
+
metadata?: Record<string, unknown>;
|
|
23
|
+
};
|
|
24
|
+
export type GeneratedImage = {
|
|
25
|
+
data: Uint8Array;
|
|
26
|
+
mimeType: string;
|
|
27
|
+
revisedPrompt?: string;
|
|
28
|
+
};
|
|
29
|
+
export type ImageGenResult = {
|
|
30
|
+
images: GeneratedImage[];
|
|
31
|
+
model: string;
|
|
32
|
+
tokenUsage?: TokenUsage;
|
|
33
|
+
costEstimate?: CostEstimate;
|
|
34
|
+
};
|
|
35
|
+
export declare function image(input: ImageInput, config: ImageConfig): Promise<Result<ImageGenResult>>;
|
package/dist/image.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { failure } from "./types/result.js";
|
|
2
|
+
import { resolveProvider, resolveApiKey } from "./util/provider.js";
|
|
3
|
+
import { openaiImage } from "./image/openai.js";
|
|
4
|
+
import { googleImage } from "./image/google.js";
|
|
5
|
+
export async function image(input, config) {
|
|
6
|
+
let provider;
|
|
7
|
+
try {
|
|
8
|
+
provider = resolveProvider(config.model, config.provider);
|
|
9
|
+
}
|
|
10
|
+
catch (err) {
|
|
11
|
+
return failure(err instanceof Error ? err.message : "Failed to resolve provider");
|
|
12
|
+
}
|
|
13
|
+
const apiKey = resolveApiKey(provider, config);
|
|
14
|
+
// `mask` is only meaningful for OpenAI inpainting. Reject up front so
|
|
15
|
+
// other providers don't silently drop it.
|
|
16
|
+
const hasMask = typeof input !== "string" && !!input.mask;
|
|
17
|
+
if (hasMask && provider !== "openai" && provider !== "openai-responses") {
|
|
18
|
+
return failure(`\`mask\` is only supported by the OpenAI image edit endpoint; provider "${provider}" cannot use it.`);
|
|
19
|
+
}
|
|
20
|
+
switch (provider) {
|
|
21
|
+
case "openai":
|
|
22
|
+
case "openai-responses": {
|
|
23
|
+
if (!apiKey) {
|
|
24
|
+
return failure("No OpenAI API key provided. Set openAiApiKey in config or the OPENAI_API_KEY environment variable.");
|
|
25
|
+
}
|
|
26
|
+
return openaiImage(input, config, apiKey);
|
|
27
|
+
}
|
|
28
|
+
case "google": {
|
|
29
|
+
if (!apiKey) {
|
|
30
|
+
return failure("No Google API key provided. Set googleApiKey in config or the GEMINI_API_KEY environment variable.");
|
|
31
|
+
}
|
|
32
|
+
return googleImage(input, config, apiKey);
|
|
33
|
+
}
|
|
34
|
+
default:
|
|
35
|
+
return failure(`Provider "${provider}" does not support image generation`);
|
|
36
|
+
}
|
|
37
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -7,5 +7,7 @@ export * from "./util/util.js";
|
|
|
7
7
|
export * from "./classes/message/index.js";
|
|
8
8
|
export * from "./functions.js";
|
|
9
9
|
export * from "./classes/ToolCall.js";
|
|
10
|
+
export * from "./embed.js";
|
|
11
|
+
export * from "./image.js";
|
|
10
12
|
export { getLogger, EgonLog } from "./util/logger.js";
|
|
11
13
|
export type { LogLevel } from "./util/logger.js";
|
package/dist/index.js
CHANGED
|
@@ -7,4 +7,6 @@ export * from "./util/util.js";
|
|
|
7
7
|
export * from "./classes/message/index.js";
|
|
8
8
|
export * from "./functions.js";
|
|
9
9
|
export * from "./classes/ToolCall.js";
|
|
10
|
+
export * from "./embed.js";
|
|
11
|
+
export * from "./image.js";
|
|
10
12
|
export { getLogger, EgonLog } from "./util/logger.js";
|