@tyvm/knowhow 0.0.120 → 0.0.122
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agents/base/base.ts +11 -1
- package/src/cli.ts +2 -0
- package/src/clients/fireworks.ts +54 -0
- package/src/clients/http.ts +94 -0
- package/src/clients/index.ts +87 -24
- package/src/clients/openai.ts +58 -0
- package/src/clients/pricing/fireworks.ts +3 -0
- package/src/clients/types.ts +18 -0
- package/src/commands/replay.ts +424 -0
- package/src/processors/Base64ImageDetector.ts +44 -22
- package/src/services/modules/index.ts +5 -4
- package/tests/clients/AIClient.test.ts +5 -3
- package/tests/processors/Base64ImageDetector.test.ts +135 -0
- package/tests/processors/CustomVariables.test.ts +17 -7
- package/ts_build/package.json +1 -1
- package/ts_build/src/agents/base/base.js +8 -1
- package/ts_build/src/agents/base/base.js.map +1 -1
- package/ts_build/src/cli.js +2 -0
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/fireworks.d.ts +9 -0
- package/ts_build/src/clients/fireworks.js +29 -0
- package/ts_build/src/clients/fireworks.js.map +1 -1
- package/ts_build/src/clients/http.d.ts +3 -1
- package/ts_build/src/clients/http.js +76 -0
- package/ts_build/src/clients/http.js.map +1 -1
- package/ts_build/src/clients/index.d.ts +23 -9
- package/ts_build/src/clients/index.js +68 -20
- package/ts_build/src/clients/index.js.map +1 -1
- package/ts_build/src/clients/openai.d.ts +6 -0
- package/ts_build/src/clients/openai.js +45 -0
- package/ts_build/src/clients/openai.js.map +1 -1
- package/ts_build/src/clients/pricing/fireworks.js +3 -0
- package/ts_build/src/clients/pricing/fireworks.js.map +1 -1
- package/ts_build/src/clients/types.d.ts +8 -0
- package/ts_build/src/commands/replay.d.ts +2 -0
- package/ts_build/src/commands/replay.js +324 -0
- package/ts_build/src/commands/replay.js.map +1 -0
- package/ts_build/src/processors/Base64ImageDetector.js +19 -12
- package/ts_build/src/processors/Base64ImageDetector.js.map +1 -1
- package/ts_build/src/services/modules/index.js.map +1 -1
- package/ts_build/tests/clients/AIClient.test.js +3 -3
- package/ts_build/tests/clients/AIClient.test.js.map +1 -1
- package/ts_build/tests/processors/Base64ImageDetector.test.js +88 -0
- package/ts_build/tests/processors/Base64ImageDetector.test.js.map +1 -1
- package/ts_build/tests/processors/CustomVariables.test.js +8 -4
- package/ts_build/tests/processors/CustomVariables.test.js.map +1 -1
package/package.json
CHANGED
package/src/agents/base/base.ts
CHANGED
|
@@ -668,11 +668,15 @@ export abstract class BaseAgent implements IAgent {
|
|
|
668
668
|
promise: Promise<T>,
|
|
669
669
|
interruptValue: T
|
|
670
670
|
): Promise<T> {
|
|
671
|
-
return new Promise<T>((resolve) => {
|
|
671
|
+
return new Promise<T>((resolve, reject) => {
|
|
672
|
+
let interrupted = false;
|
|
673
|
+
|
|
672
674
|
this._interruptResolve = (value: any) => {
|
|
675
|
+
interrupted = true;
|
|
673
676
|
this._interruptResolve = null;
|
|
674
677
|
resolve(value);
|
|
675
678
|
};
|
|
679
|
+
|
|
676
680
|
promise.then((result) => {
|
|
677
681
|
if (this._interruptResolve) {
|
|
678
682
|
this._interruptResolve = null;
|
|
@@ -681,7 +685,13 @@ export abstract class BaseAgent implements IAgent {
|
|
|
681
685
|
}).catch((err) => {
|
|
682
686
|
if (this._interruptResolve) {
|
|
683
687
|
this._interruptResolve = null;
|
|
688
|
+
}
|
|
689
|
+
// Only swallow the error if interrupt() was explicitly called.
|
|
690
|
+
// Otherwise re-throw so callers see the real error.
|
|
691
|
+
if (interrupted) {
|
|
684
692
|
resolve(interruptValue);
|
|
693
|
+
} else {
|
|
694
|
+
reject(err);
|
|
685
695
|
}
|
|
686
696
|
});
|
|
687
697
|
});
|
package/src/cli.ts
CHANGED
|
@@ -37,6 +37,7 @@ import {
|
|
|
37
37
|
addGithubCredentialsCommand,
|
|
38
38
|
} from "./commands/misc";
|
|
39
39
|
import { addConvertCommand } from "./commands/convert";
|
|
40
|
+
import { addReplayCommand } from "./commands/replay";
|
|
40
41
|
|
|
41
42
|
// Handle unhandled promise rejections gracefully — particularly from MCP SDK
|
|
42
43
|
// which fires errors via event emitters that can bypass Promise.allSettled.
|
|
@@ -100,6 +101,7 @@ async function main() {
|
|
|
100
101
|
addModulesCommand(program);
|
|
101
102
|
addMcpCommands(program);
|
|
102
103
|
addConvertCommand(program);
|
|
104
|
+
addReplayCommand(program);
|
|
103
105
|
|
|
104
106
|
// Load global modules early (before parse) so they can register CLI subcommands.
|
|
105
107
|
// We pass only the Program in context — no services are spun up at this stage.
|
package/src/clients/fireworks.ts
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { HttpClient } from "./http";
|
|
2
2
|
import { FireworksTextPricing } from "./pricing/fireworks";
|
|
3
|
+
import { CompletionOptions, CompletionResponse } from "./types";
|
|
4
|
+
|
|
5
|
+
type ModelInfo = { id: string; object: string; owned_by: string };
|
|
3
6
|
|
|
4
7
|
/**
|
|
5
8
|
* Fireworks AI client — OpenAI-compatible API (fast serverless inference)
|
|
@@ -12,4 +15,55 @@ export class GenericFireworksClient extends HttpClient {
|
|
|
12
15
|
if (apiKey) this.setJwt(apiKey);
|
|
13
16
|
this.setPrices(FireworksTextPricing);
|
|
14
17
|
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Supplement the live /v1/models response with any models we have in the
|
|
21
|
+
* pricing table. The Fireworks API sometimes doesn't return newly-released
|
|
22
|
+
* models (e.g. minimax-m3, kimi-k2p7-code) even though they are available
|
|
23
|
+
* for inference — so we use the pricing map as the source of truth for
|
|
24
|
+
* "models we know exist on this provider".
|
|
25
|
+
*/
|
|
26
|
+
async getModels(type = "all"): Promise<ModelInfo[]> {
|
|
27
|
+
let liveModels: ModelInfo[] = [];
|
|
28
|
+
try {
|
|
29
|
+
liveModels = await super.getModels(type);
|
|
30
|
+
} catch (_err) {
|
|
31
|
+
// Live API call failed — fall back to pricing map only
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const liveIds = new Set(liveModels.map((m) => m.id));
|
|
35
|
+
const pricingModels: ModelInfo[] = Object.keys(FireworksTextPricing)
|
|
36
|
+
.filter((id) => !liveIds.has(id))
|
|
37
|
+
.map((id) => ({ id, object: "model", owned_by: "fireworks" }));
|
|
38
|
+
|
|
39
|
+
return [...liveModels, ...pricingModels];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Sanitize the request before sending to Fireworks.
|
|
44
|
+
* Some models (e.g. kimi-k2p7-code) reject extra fields like:
|
|
45
|
+
* - tools[N].function.returns (non-standard extension)
|
|
46
|
+
* - long_ttl_cache (Anthropic-specific cache flag)
|
|
47
|
+
*/
|
|
48
|
+
async createChatCompletion(
|
|
49
|
+
options: CompletionOptions
|
|
50
|
+
): Promise<CompletionResponse> {
|
|
51
|
+
const sanitized: CompletionOptions = {
|
|
52
|
+
...options,
|
|
53
|
+
// Strip Anthropic-specific field not accepted by Fireworks
|
|
54
|
+
long_ttl_cache: undefined,
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
if (sanitized.tools) {
|
|
58
|
+
sanitized.tools = sanitized.tools.map((tool) => {
|
|
59
|
+
const { returns, ...fnRest } = tool.function as any;
|
|
60
|
+
return {
|
|
61
|
+
...tool,
|
|
62
|
+
function: fnRest,
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return super.createChatCompletion(sanitized);
|
|
68
|
+
}
|
|
15
69
|
}
|
package/src/clients/http.ts
CHANGED
|
@@ -3,6 +3,7 @@ import {
|
|
|
3
3
|
GenericClient,
|
|
4
4
|
CompletionOptions,
|
|
5
5
|
CompletionResponse,
|
|
6
|
+
StreamChunk,
|
|
6
7
|
EmbeddingOptions,
|
|
7
8
|
EmbeddingResponse,
|
|
8
9
|
} from "./types";
|
|
@@ -91,6 +92,17 @@ export class HttpClient implements GenericClient {
|
|
|
91
92
|
this.pricingMap = pricingMap;
|
|
92
93
|
}
|
|
93
94
|
|
|
95
|
+
/**
|
|
96
|
+
* Returns the pricing entry for a specific model, or all pricing entries if no model is given.
|
|
97
|
+
* Returns undefined for a specific model if no pricing is known.
|
|
98
|
+
*/
|
|
99
|
+
getPricing(model?: string): ModelPricing | Record<string, ModelPricing> | undefined {
|
|
100
|
+
if (model !== undefined) {
|
|
101
|
+
return this.pricingMap[model];
|
|
102
|
+
}
|
|
103
|
+
return this.pricingMap;
|
|
104
|
+
}
|
|
105
|
+
|
|
94
106
|
/**
|
|
95
107
|
* Calculate USD cost for a completion/embedding call from token usage.
|
|
96
108
|
* Returns undefined if no pricing entry exists for the model.
|
|
@@ -186,6 +198,88 @@ export class HttpClient implements GenericClient {
|
|
|
186
198
|
});
|
|
187
199
|
}
|
|
188
200
|
|
|
201
|
+
/**
|
|
202
|
+
* Streams a chat completion via OpenAI-compatible SSE (`stream: true`).
|
|
203
|
+
* Parses `data: {...}` lines and yields token deltas, then a final done chunk.
|
|
204
|
+
*/
|
|
205
|
+
async *createChatCompletionStream(
|
|
206
|
+
options: CompletionOptions
|
|
207
|
+
): AsyncGenerator<StreamChunk> {
|
|
208
|
+
const body = {
|
|
209
|
+
...options,
|
|
210
|
+
model: options.model,
|
|
211
|
+
messages: options.messages,
|
|
212
|
+
max_tokens: options.max_tokens || 4000,
|
|
213
|
+
...this.extra_body,
|
|
214
|
+
stream: true,
|
|
215
|
+
stream_options: { include_usage: true },
|
|
216
|
+
...(options.tools && {
|
|
217
|
+
tools: options.tools,
|
|
218
|
+
tool_choice: "auto",
|
|
219
|
+
}),
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
|
223
|
+
method: "POST",
|
|
224
|
+
headers: {
|
|
225
|
+
...(this.headers as Record<string, string>),
|
|
226
|
+
"Content-Type": "application/json",
|
|
227
|
+
"Accept": "text/event-stream",
|
|
228
|
+
},
|
|
229
|
+
body: JSON.stringify(body),
|
|
230
|
+
signal: options.signal,
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
if (!response.ok) {
|
|
234
|
+
const text = await response.text().catch(() => "");
|
|
235
|
+
throw new Error(`HTTP ${response.status}: ${text}`);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
if (!response.body) {
|
|
239
|
+
throw new Error("No response body for streaming request");
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
const reader = response.body.getReader();
|
|
243
|
+
const decoder = new TextDecoder();
|
|
244
|
+
let buffer = "";
|
|
245
|
+
let usage: StreamChunk["usage"] | undefined;
|
|
246
|
+
|
|
247
|
+
while (true) {
|
|
248
|
+
const { done, value } = await reader.read();
|
|
249
|
+
if (done) break;
|
|
250
|
+
|
|
251
|
+
buffer += decoder.decode(value, { stream: true });
|
|
252
|
+
const lines = buffer.split("\n");
|
|
253
|
+
buffer = lines.pop() ?? "";
|
|
254
|
+
|
|
255
|
+
for (const line of lines) {
|
|
256
|
+
const trimmed = line.trim();
|
|
257
|
+
if (!trimmed || !trimmed.startsWith("data: ")) continue;
|
|
258
|
+
const jsonStr = trimmed.slice(6);
|
|
259
|
+
if (jsonStr === "[DONE]") continue;
|
|
260
|
+
try {
|
|
261
|
+
const chunk = JSON.parse(jsonStr);
|
|
262
|
+
const delta = chunk.choices?.[0]?.delta?.content;
|
|
263
|
+
if (delta) {
|
|
264
|
+
yield { delta, done: false };
|
|
265
|
+
}
|
|
266
|
+
if (chunk.usage) {
|
|
267
|
+
usage = {
|
|
268
|
+
prompt_tokens: chunk.usage.prompt_tokens ?? 0,
|
|
269
|
+
completion_tokens: chunk.usage.completion_tokens ?? 0,
|
|
270
|
+
total_tokens: chunk.usage.total_tokens,
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
} catch {
|
|
274
|
+
// Ignore malformed SSE lines
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const usdCost = this.calculateCost(options.model, usage);
|
|
280
|
+
yield { done: true, usage, usd_cost: usdCost };
|
|
281
|
+
}
|
|
282
|
+
|
|
189
283
|
/**
|
|
190
284
|
* Creates a completion using the Responses API (/v1/responses).
|
|
191
285
|
* Compatible with providers that implement the OpenAI Responses API spec
|
package/src/clients/index.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
CompletionOptions,
|
|
3
3
|
CompletionResponse,
|
|
4
|
+
StreamChunk,
|
|
4
5
|
EmbeddingOptions,
|
|
5
6
|
EmbeddingResponse,
|
|
6
7
|
GenericClient,
|
|
@@ -34,6 +35,7 @@ import { OpenAiTextPricing } from "./pricing/openai";
|
|
|
34
35
|
import { AnthropicTextPricing } from "./pricing/anthropic";
|
|
35
36
|
import { GeminiPricing } from "./pricing/google";
|
|
36
37
|
import { withRetry } from "./withRetry";
|
|
38
|
+
import { FireworksTextPricing } from "./pricing/fireworks";
|
|
37
39
|
import {
|
|
38
40
|
XaiTextPricing,
|
|
39
41
|
XaiImagePricing,
|
|
@@ -397,9 +399,10 @@ export class AIClient {
|
|
|
397
399
|
const hasModel = this.providerHasModel(provider, model);
|
|
398
400
|
|
|
399
401
|
if (!hasModel) {
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
)
|
|
402
|
+
// Model not in local registry — pass it through anyway so the provider
|
|
403
|
+
// API can accept or reject it directly (e.g. newly-released models that
|
|
404
|
+
// haven't been fetched into our local model list yet).
|
|
405
|
+
console.warn(`⚠️ Model '${model}' not in local registry for provider '${provider}', attempting anyway.`);
|
|
403
406
|
}
|
|
404
407
|
|
|
405
408
|
return { client: this.clients[provider], provider, model };
|
|
@@ -609,6 +612,11 @@ export class AIClient {
|
|
|
609
612
|
return { provider, model };
|
|
610
613
|
}
|
|
611
614
|
|
|
615
|
+
// If an explicit provider was given, don't fall through to fuzzy cross-provider
|
|
616
|
+
// search — that would silently pick a completely different provider (e.g. nvidia
|
|
617
|
+
// instead of fireworks). Just pass through and let the API accept/reject the model.
|
|
618
|
+
const hasExplicitProvider = !!provider;
|
|
619
|
+
|
|
612
620
|
if (model?.includes("/")) {
|
|
613
621
|
const split = model.split("/");
|
|
614
622
|
|
|
@@ -620,16 +628,21 @@ export class AIClient {
|
|
|
620
628
|
return { provider: inferredProvider, model: inferredModel };
|
|
621
629
|
}
|
|
622
630
|
|
|
623
|
-
// Starts with match
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
631
|
+
// Starts with match — only if no explicit provider was given
|
|
632
|
+
if (!hasExplicitProvider) {
|
|
633
|
+
const foundBySplit = this.findModel(inferredModel);
|
|
634
|
+
if (foundBySplit) {
|
|
635
|
+
return foundBySplit;
|
|
636
|
+
}
|
|
627
637
|
}
|
|
628
638
|
}
|
|
629
639
|
|
|
630
|
-
|
|
631
|
-
if (
|
|
632
|
-
|
|
640
|
+
// Fuzzy cross-provider search — only if no explicit provider was given
|
|
641
|
+
if (!hasExplicitProvider) {
|
|
642
|
+
const foundByModel = this.findModel(model);
|
|
643
|
+
if (foundByModel) {
|
|
644
|
+
return foundByModel;
|
|
645
|
+
}
|
|
633
646
|
}
|
|
634
647
|
|
|
635
648
|
const allModels = this.listAllModels();
|
|
@@ -672,6 +685,31 @@ export class AIClient {
|
|
|
672
685
|
);
|
|
673
686
|
}
|
|
674
687
|
|
|
688
|
+
async *createCompletionStream(
|
|
689
|
+
provider: string,
|
|
690
|
+
options: CompletionOptions
|
|
691
|
+
): AsyncGenerator<StreamChunk> {
|
|
692
|
+
const { client, model } = this.getClient(provider, options.model);
|
|
693
|
+
if (!model || !client) {
|
|
694
|
+
throw new Error(
|
|
695
|
+
`provider: ${provider} does not have ${
|
|
696
|
+
options.model
|
|
697
|
+
} model registered. Try using ${JSON.stringify(this.listAllModels())}`
|
|
698
|
+
);
|
|
699
|
+
}
|
|
700
|
+
if (client.createChatCompletionStream) {
|
|
701
|
+
yield* client.createChatCompletionStream({ ...options, model });
|
|
702
|
+
} else {
|
|
703
|
+
// Fallback: non-streaming clients — call normal completion and yield as single chunk
|
|
704
|
+
const result = await withRetry(
|
|
705
|
+
(signal) => client.createChatCompletion({ ...options, model, signal }),
|
|
706
|
+
options
|
|
707
|
+
);
|
|
708
|
+
yield { delta: result.choices[0]?.message?.content ?? "", done: false };
|
|
709
|
+
yield { done: true, usage: result.usage, usd_cost: result.usd_cost };
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
|
|
675
713
|
async createEmbedding(
|
|
676
714
|
provider: string,
|
|
677
715
|
options: EmbeddingOptions
|
|
@@ -821,12 +859,24 @@ export class AIClient {
|
|
|
821
859
|
return this.clientModels;
|
|
822
860
|
}
|
|
823
861
|
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
862
|
+
/**
|
|
863
|
+
* Filters a provider→models map to only include models that have known pricing.
|
|
864
|
+
* For HttpClient-based providers with getPricing(), only priced models are kept.
|
|
865
|
+
* For other providers (no getPricing()), all models pass through unchanged.
|
|
866
|
+
*/
|
|
867
|
+
private _filterByPricing(models: Record<string, string[]>): Record<string, string[]> {
|
|
868
|
+
const result: Record<string, string[]> = {};
|
|
869
|
+
for (const [provider, ids] of Object.entries(models)) {
|
|
870
|
+
const client = this.clients[provider];
|
|
871
|
+
if (client?.getPricing) {
|
|
872
|
+
const pricingMap = client.getPricing() as Record<string, ModelPricing>;
|
|
873
|
+
const priced = ids.filter((id) => !!pricingMap[id]);
|
|
874
|
+
if (priced.length > 0) result[provider] = priced;
|
|
875
|
+
} else {
|
|
876
|
+
result[provider] = ids;
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
return result;
|
|
830
880
|
}
|
|
831
881
|
|
|
832
882
|
/*
|
|
@@ -879,11 +929,8 @@ export class AIClient {
|
|
|
879
929
|
return providerModels;
|
|
880
930
|
}
|
|
881
931
|
|
|
882
|
-
|
|
883
|
-
return this.
|
|
884
|
-
}
|
|
885
|
-
|
|
886
|
-
listAllCompletionModels() {
|
|
932
|
+
listAllCompletionModels(options?: { pricing?: boolean }) {
|
|
933
|
+
if (options?.pricing) return this._filterByPricing(this.completionModels);
|
|
887
934
|
return this.completionModels;
|
|
888
935
|
}
|
|
889
936
|
|
|
@@ -891,18 +938,33 @@ export class AIClient {
|
|
|
891
938
|
return Object.keys(this.clientModels);
|
|
892
939
|
}
|
|
893
940
|
|
|
894
|
-
|
|
941
|
+
listAllEmbeddingModels(options?: { pricing?: boolean }) {
|
|
942
|
+
if (options?.pricing) return this._filterByPricing(this.embeddingModels);
|
|
943
|
+
return this.embeddingModels;
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
listAllImageModels(options?: { pricing?: boolean }) {
|
|
947
|
+
if (options?.pricing) return this._filterByPricing(this.imageModels);
|
|
895
948
|
return this.imageModels;
|
|
896
949
|
}
|
|
897
950
|
|
|
898
|
-
listAllAudioModels() {
|
|
951
|
+
listAllAudioModels(options?: { pricing?: boolean }) {
|
|
952
|
+
if (options?.pricing) return this._filterByPricing(this.audioModels);
|
|
899
953
|
return this.audioModels;
|
|
900
954
|
}
|
|
901
955
|
|
|
902
|
-
listAllVideoModels() {
|
|
956
|
+
listAllVideoModels(options?: { pricing?: boolean }) {
|
|
957
|
+
if (options?.pricing) return this._filterByPricing(this.videoModels);
|
|
903
958
|
return this.videoModels;
|
|
904
959
|
}
|
|
905
960
|
|
|
961
|
+
listAllModelsWithProvider(options?: { pricing?: boolean }) {
|
|
962
|
+
const models = options?.pricing ? this._filterByPricing(this.clientModels) : this.clientModels;
|
|
963
|
+
return Object.entries(models)
|
|
964
|
+
.map(([provider, ids]) => ids.map((m) => ({ id: `${provider}/${m}` })))
|
|
965
|
+
.flat();
|
|
966
|
+
}
|
|
967
|
+
|
|
906
968
|
/**
|
|
907
969
|
* Returns the context window limit (in tokens) for a given model.
|
|
908
970
|
* Delegates to the registered client's getContextLimit() if available.
|
|
@@ -958,6 +1020,7 @@ export class AIClient {
|
|
|
958
1020
|
...AnthropicTextPricing,
|
|
959
1021
|
...GeminiPricing,
|
|
960
1022
|
...XaiTextPricing,
|
|
1023
|
+
...FireworksTextPricing,
|
|
961
1024
|
};
|
|
962
1025
|
const allImagePricing: Record<string, ModelPricing> = {
|
|
963
1026
|
...XaiImagePricing,
|
package/src/clients/openai.ts
CHANGED
|
@@ -190,6 +190,64 @@ export class GenericOpenAiClient implements GenericClient {
|
|
|
190
190
|
usd_cost: usdCost,
|
|
191
191
|
};
|
|
192
192
|
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Streams a chat completion token-by-token.
|
|
196
|
+
* Yields delta content strings as they arrive, then yields a final
|
|
197
|
+
* CompletionResponse with usage info when the stream ends.
|
|
198
|
+
*/
|
|
199
|
+
async *createChatCompletionStream(
|
|
200
|
+
options: CompletionOptions
|
|
201
|
+
): AsyncGenerator<{ delta?: string; done: boolean; usage?: CompletionResponse['usage']; usd_cost?: number }> {
|
|
202
|
+
if (OpenAiResponsesOnlyModels.includes(options.model)) {
|
|
203
|
+
// Fallback: non-streaming for Responses-only models
|
|
204
|
+
const result = await this.createChatCompletion(options);
|
|
205
|
+
yield { delta: result.choices[0]?.message?.content ?? "", done: false };
|
|
206
|
+
yield { done: true, usage: result.usage, usd_cost: result.usd_cost };
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const openaiMessages = options.messages.map((msg) => {
|
|
211
|
+
if (msg.role === "tool") {
|
|
212
|
+
return {
|
|
213
|
+
...msg,
|
|
214
|
+
content: msg.content || "",
|
|
215
|
+
role: "tool",
|
|
216
|
+
tool_call_id: msg.tool_call_id,
|
|
217
|
+
} as ChatCompletionToolMessageParam;
|
|
218
|
+
}
|
|
219
|
+
return msg as ChatCompletionMessageParam;
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
const stream = await this.client.chat.completions.create({
|
|
223
|
+
model: options.model,
|
|
224
|
+
messages: openaiMessages,
|
|
225
|
+
max_tokens: options.max_tokens,
|
|
226
|
+
stream: true,
|
|
227
|
+
stream_options: { include_usage: true },
|
|
228
|
+
...(options.tools && { tools: options.tools, tool_choice: "auto" }),
|
|
229
|
+
}, { signal: options.signal });
|
|
230
|
+
|
|
231
|
+
let usage: CompletionResponse['usage'] | undefined;
|
|
232
|
+
for await (const chunk of stream) {
|
|
233
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
234
|
+
if (delta) {
|
|
235
|
+
yield { delta, done: false };
|
|
236
|
+
}
|
|
237
|
+
if (chunk.usage) {
|
|
238
|
+
usage = {
|
|
239
|
+
prompt_tokens: chunk.usage.prompt_tokens ?? 0,
|
|
240
|
+
completion_tokens: chunk.usage.completion_tokens ?? 0,
|
|
241
|
+
total_tokens: chunk.usage.total_tokens ?? 0,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
const usdCost = usage
|
|
246
|
+
? this.calculateCost(options.model, usage as OpenAI.ChatCompletion["usage"])
|
|
247
|
+
: undefined;
|
|
248
|
+
yield { done: true, usage, usd_cost: usdCost };
|
|
249
|
+
}
|
|
250
|
+
|
|
193
251
|
/**
|
|
194
252
|
* Creates a completion using the OpenAI Responses API.
|
|
195
253
|
* Used for models that only support the Responses API (e.g. gpt-5.3-codex, gpt-5.4).
|
|
@@ -4,10 +4,12 @@
|
|
|
4
4
|
*/
|
|
5
5
|
export const FireworksTextPricing: Record<string, { input: number; output: number; cache_hit?: number }> = {
|
|
6
6
|
// Moonshot AI
|
|
7
|
+
"accounts/fireworks/models/kimi-k2p7-code": { input: 0.95, cache_hit: 0.19, output: 4.0 },
|
|
7
8
|
"accounts/fireworks/models/kimi-k2-6": { input: 0.95, cache_hit: 0.16, output: 4.0 },
|
|
8
9
|
"accounts/fireworks/models/kimi-k2-5": { input: 0.60, cache_hit: 0.10, output: 3.0 },
|
|
9
10
|
|
|
10
11
|
// MiniMax
|
|
12
|
+
"accounts/fireworks/models/minimax-m3": { input: 0.30, cache_hit: 0.06, output: 1.20 },
|
|
11
13
|
"accounts/fireworks/models/minimax-m2-7": { input: 0.30, cache_hit: 0.06, output: 1.20 },
|
|
12
14
|
"accounts/fireworks/models/minimax-m2-5": { input: 0.30, cache_hit: 0.03, output: 1.20 },
|
|
13
15
|
|
|
@@ -19,6 +21,7 @@ export const FireworksTextPricing: Record<string, { input: number; output: numbe
|
|
|
19
21
|
"accounts/fireworks/models/qwen3-8b": { input: 0.20, cache_hit: 0.10, output: 0.20 },
|
|
20
22
|
|
|
21
23
|
// Z.ai
|
|
24
|
+
"accounts/fireworks/models/glm-5-2": { input: 1.40, cache_hit: 0.26, output: 4.40 },
|
|
22
25
|
"accounts/fireworks/models/glm-5-1": { input: 1.40, cache_hit: 0.26, output: 4.40 },
|
|
23
26
|
"accounts/fireworks/models/glm-5": { input: 1.00, cache_hit: 0.20, output: 3.20 },
|
|
24
27
|
"accounts/fireworks/models/glm-4-7": { input: 0.60, cache_hit: 0.30, output: 2.20 },
|
package/src/clients/types.ts
CHANGED
|
@@ -136,6 +136,16 @@ export interface CompletionResponse {
|
|
|
136
136
|
usd_cost?: number;
|
|
137
137
|
}
|
|
138
138
|
|
|
139
|
+
/** A single chunk yielded by a streaming completion. */
|
|
140
|
+
export interface StreamChunk {
|
|
141
|
+
/** Incremental text token(s). Only present on intermediate chunks. */
|
|
142
|
+
delta?: string;
|
|
143
|
+
/** True on the final chunk (no delta, but usage/cost available). */
|
|
144
|
+
done: boolean;
|
|
145
|
+
usage?: TokenUsage;
|
|
146
|
+
usd_cost?: number;
|
|
147
|
+
}
|
|
148
|
+
|
|
139
149
|
export interface EmbeddingOptions extends RetryOptions {
|
|
140
150
|
input: string;
|
|
141
151
|
model?: string;
|
|
@@ -305,6 +315,8 @@ export interface FileDownloadResponse {
|
|
|
305
315
|
export interface GenericClient {
|
|
306
316
|
setKey(key: string): void;
|
|
307
317
|
createChatCompletion(options: CompletionOptions): Promise<CompletionResponse>;
|
|
318
|
+
/** Optional streaming variant — yields incremental tokens then a final done chunk. */
|
|
319
|
+
createChatCompletionStream?(options: CompletionOptions): AsyncGenerator<StreamChunk>;
|
|
308
320
|
createEmbedding(options: EmbeddingOptions): Promise<EmbeddingResponse>;
|
|
309
321
|
createAudioTranscription?(
|
|
310
322
|
options: AudioTranscriptionOptions
|
|
@@ -343,4 +355,10 @@ export interface GenericClient {
|
|
|
343
355
|
getContextLimit?(
|
|
344
356
|
model: string
|
|
345
357
|
): { contextLimit: number; threshold: number } | undefined;
|
|
358
|
+
/**
|
|
359
|
+
* Returns the pricing entry for a specific model, or the entire pricing map if no model is given.
|
|
360
|
+
* Returns undefined for a specific model if no pricing is known.
|
|
361
|
+
* Only implemented by HttpClient-based providers that have been given a pricing map via setPrices().
|
|
362
|
+
*/
|
|
363
|
+
getPricing?(model?: string): import("./pricing/types").ModelPricing | Record<string, import("./pricing/types").ModelPricing> | undefined;
|
|
346
364
|
}
|