@tyvm/knowhow 0.0.120 → 0.0.122

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +1 -1
  2. package/src/agents/base/base.ts +11 -1
  3. package/src/cli.ts +2 -0
  4. package/src/clients/fireworks.ts +54 -0
  5. package/src/clients/http.ts +94 -0
  6. package/src/clients/index.ts +87 -24
  7. package/src/clients/openai.ts +58 -0
  8. package/src/clients/pricing/fireworks.ts +3 -0
  9. package/src/clients/types.ts +18 -0
  10. package/src/commands/replay.ts +424 -0
  11. package/src/processors/Base64ImageDetector.ts +44 -22
  12. package/src/services/modules/index.ts +5 -4
  13. package/tests/clients/AIClient.test.ts +5 -3
  14. package/tests/processors/Base64ImageDetector.test.ts +135 -0
  15. package/tests/processors/CustomVariables.test.ts +17 -7
  16. package/ts_build/package.json +1 -1
  17. package/ts_build/src/agents/base/base.js +8 -1
  18. package/ts_build/src/agents/base/base.js.map +1 -1
  19. package/ts_build/src/cli.js +2 -0
  20. package/ts_build/src/cli.js.map +1 -1
  21. package/ts_build/src/clients/fireworks.d.ts +9 -0
  22. package/ts_build/src/clients/fireworks.js +29 -0
  23. package/ts_build/src/clients/fireworks.js.map +1 -1
  24. package/ts_build/src/clients/http.d.ts +3 -1
  25. package/ts_build/src/clients/http.js +76 -0
  26. package/ts_build/src/clients/http.js.map +1 -1
  27. package/ts_build/src/clients/index.d.ts +23 -9
  28. package/ts_build/src/clients/index.js +68 -20
  29. package/ts_build/src/clients/index.js.map +1 -1
  30. package/ts_build/src/clients/openai.d.ts +6 -0
  31. package/ts_build/src/clients/openai.js +45 -0
  32. package/ts_build/src/clients/openai.js.map +1 -1
  33. package/ts_build/src/clients/pricing/fireworks.js +3 -0
  34. package/ts_build/src/clients/pricing/fireworks.js.map +1 -1
  35. package/ts_build/src/clients/types.d.ts +8 -0
  36. package/ts_build/src/commands/replay.d.ts +2 -0
  37. package/ts_build/src/commands/replay.js +324 -0
  38. package/ts_build/src/commands/replay.js.map +1 -0
  39. package/ts_build/src/processors/Base64ImageDetector.js +19 -12
  40. package/ts_build/src/processors/Base64ImageDetector.js.map +1 -1
  41. package/ts_build/src/services/modules/index.js.map +1 -1
  42. package/ts_build/tests/clients/AIClient.test.js +3 -3
  43. package/ts_build/tests/clients/AIClient.test.js.map +1 -1
  44. package/ts_build/tests/processors/Base64ImageDetector.test.js +88 -0
  45. package/ts_build/tests/processors/Base64ImageDetector.test.js.map +1 -1
  46. package/ts_build/tests/processors/CustomVariables.test.js +8 -4
  47. package/ts_build/tests/processors/CustomVariables.test.js.map +1 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tyvm/knowhow",
3
- "version": "0.0.120",
3
+ "version": "0.0.122",
4
4
  "description": "ai cli with plugins and agents",
5
5
  "main": "ts_build/src/index.js",
6
6
  "bin": {
@@ -668,11 +668,15 @@ export abstract class BaseAgent implements IAgent {
668
668
  promise: Promise<T>,
669
669
  interruptValue: T
670
670
  ): Promise<T> {
671
- return new Promise<T>((resolve) => {
671
+ return new Promise<T>((resolve, reject) => {
672
+ let interrupted = false;
673
+
672
674
  this._interruptResolve = (value: any) => {
675
+ interrupted = true;
673
676
  this._interruptResolve = null;
674
677
  resolve(value);
675
678
  };
679
+
676
680
  promise.then((result) => {
677
681
  if (this._interruptResolve) {
678
682
  this._interruptResolve = null;
@@ -681,7 +685,13 @@ export abstract class BaseAgent implements IAgent {
681
685
  }).catch((err) => {
682
686
  if (this._interruptResolve) {
683
687
  this._interruptResolve = null;
688
+ }
689
+ // Only swallow the error if interrupt() was explicitly called.
690
+ // Otherwise re-throw so callers see the real error.
691
+ if (interrupted) {
684
692
  resolve(interruptValue);
693
+ } else {
694
+ reject(err);
685
695
  }
686
696
  });
687
697
  });
package/src/cli.ts CHANGED
@@ -37,6 +37,7 @@ import {
37
37
  addGithubCredentialsCommand,
38
38
  } from "./commands/misc";
39
39
  import { addConvertCommand } from "./commands/convert";
40
+ import { addReplayCommand } from "./commands/replay";
40
41
 
41
42
  // Handle unhandled promise rejections gracefully — particularly from MCP SDK
42
43
  // which fires errors via event emitters that can bypass Promise.allSettled.
@@ -100,6 +101,7 @@ async function main() {
100
101
  addModulesCommand(program);
101
102
  addMcpCommands(program);
102
103
  addConvertCommand(program);
104
+ addReplayCommand(program);
103
105
 
104
106
  // Load global modules early (before parse) so they can register CLI subcommands.
105
107
  // We pass only the Program in context — no services are spun up at this stage.
@@ -1,5 +1,8 @@
1
1
  import { HttpClient } from "./http";
2
2
  import { FireworksTextPricing } from "./pricing/fireworks";
3
+ import { CompletionOptions, CompletionResponse } from "./types";
4
+
5
+ type ModelInfo = { id: string; object: string; owned_by: string };
3
6
 
4
7
  /**
5
8
  * Fireworks AI client — OpenAI-compatible API (fast serverless inference)
@@ -12,4 +15,55 @@ export class GenericFireworksClient extends HttpClient {
12
15
  if (apiKey) this.setJwt(apiKey);
13
16
  this.setPrices(FireworksTextPricing);
14
17
  }
18
+
19
+ /**
20
+ * Supplement the live /v1/models response with any models we have in the
21
+ * pricing table. The Fireworks API sometimes doesn't return newly-released
22
+ * models (e.g. minimax-m3, kimi-k2p7-code) even though they are available
23
+ * for inference — so we use the pricing map as the source of truth for
24
+ * "models we know exist on this provider".
25
+ */
26
+ async getModels(type = "all"): Promise<ModelInfo[]> {
27
+ let liveModels: ModelInfo[] = [];
28
+ try {
29
+ liveModels = await super.getModels(type);
30
+ } catch (_err) {
31
+ // Live API call failed — fall back to pricing map only
32
+ }
33
+
34
+ const liveIds = new Set(liveModels.map((m) => m.id));
35
+ const pricingModels: ModelInfo[] = Object.keys(FireworksTextPricing)
36
+ .filter((id) => !liveIds.has(id))
37
+ .map((id) => ({ id, object: "model", owned_by: "fireworks" }));
38
+
39
+ return [...liveModels, ...pricingModels];
40
+ }
41
+
42
+ /**
43
+ * Sanitize the request before sending to Fireworks.
44
+ * Some models (e.g. kimi-k2p7-code) reject extra fields like:
45
+ * - tools[N].function.returns (non-standard extension)
46
+ * - long_ttl_cache (Anthropic-specific cache flag)
47
+ */
48
+ async createChatCompletion(
49
+ options: CompletionOptions
50
+ ): Promise<CompletionResponse> {
51
+ const sanitized: CompletionOptions = {
52
+ ...options,
53
+ // Strip Anthropic-specific field not accepted by Fireworks
54
+ long_ttl_cache: undefined,
55
+ };
56
+
57
+ if (sanitized.tools) {
58
+ sanitized.tools = sanitized.tools.map((tool) => {
59
+ const { returns, ...fnRest } = tool.function as any;
60
+ return {
61
+ ...tool,
62
+ function: fnRest,
63
+ };
64
+ });
65
+ }
66
+
67
+ return super.createChatCompletion(sanitized);
68
+ }
15
69
  }
@@ -3,6 +3,7 @@ import {
3
3
  GenericClient,
4
4
  CompletionOptions,
5
5
  CompletionResponse,
6
+ StreamChunk,
6
7
  EmbeddingOptions,
7
8
  EmbeddingResponse,
8
9
  } from "./types";
@@ -91,6 +92,17 @@ export class HttpClient implements GenericClient {
91
92
  this.pricingMap = pricingMap;
92
93
  }
93
94
 
95
+ /**
96
+ * Returns the pricing entry for a specific model, or all pricing entries if no model is given.
97
+ * Returns undefined for a specific model if no pricing is known.
98
+ */
99
+ getPricing(model?: string): ModelPricing | Record<string, ModelPricing> | undefined {
100
+ if (model !== undefined) {
101
+ return this.pricingMap[model];
102
+ }
103
+ return this.pricingMap;
104
+ }
105
+
94
106
  /**
95
107
  * Calculate USD cost for a completion/embedding call from token usage.
96
108
  * Returns undefined if no pricing entry exists for the model.
@@ -186,6 +198,88 @@ export class HttpClient implements GenericClient {
186
198
  });
187
199
  }
188
200
 
201
+ /**
202
+ * Streams a chat completion via OpenAI-compatible SSE (`stream: true`).
203
+ * Parses `data: {...}` lines and yields token deltas, then a final done chunk.
204
+ */
205
+ async *createChatCompletionStream(
206
+ options: CompletionOptions
207
+ ): AsyncGenerator<StreamChunk> {
208
+ const body = {
209
+ ...options,
210
+ model: options.model,
211
+ messages: options.messages,
212
+ max_tokens: options.max_tokens || 4000,
213
+ ...this.extra_body,
214
+ stream: true,
215
+ stream_options: { include_usage: true },
216
+ ...(options.tools && {
217
+ tools: options.tools,
218
+ tool_choice: "auto",
219
+ }),
220
+ };
221
+
222
+ const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
223
+ method: "POST",
224
+ headers: {
225
+ ...(this.headers as Record<string, string>),
226
+ "Content-Type": "application/json",
227
+ "Accept": "text/event-stream",
228
+ },
229
+ body: JSON.stringify(body),
230
+ signal: options.signal,
231
+ });
232
+
233
+ if (!response.ok) {
234
+ const text = await response.text().catch(() => "");
235
+ throw new Error(`HTTP ${response.status}: ${text}`);
236
+ }
237
+
238
+ if (!response.body) {
239
+ throw new Error("No response body for streaming request");
240
+ }
241
+
242
+ const reader = response.body.getReader();
243
+ const decoder = new TextDecoder();
244
+ let buffer = "";
245
+ let usage: StreamChunk["usage"] | undefined;
246
+
247
+ while (true) {
248
+ const { done, value } = await reader.read();
249
+ if (done) break;
250
+
251
+ buffer += decoder.decode(value, { stream: true });
252
+ const lines = buffer.split("\n");
253
+ buffer = lines.pop() ?? "";
254
+
255
+ for (const line of lines) {
256
+ const trimmed = line.trim();
257
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
258
+ const jsonStr = trimmed.slice(6);
259
+ if (jsonStr === "[DONE]") continue;
260
+ try {
261
+ const chunk = JSON.parse(jsonStr);
262
+ const delta = chunk.choices?.[0]?.delta?.content;
263
+ if (delta) {
264
+ yield { delta, done: false };
265
+ }
266
+ if (chunk.usage) {
267
+ usage = {
268
+ prompt_tokens: chunk.usage.prompt_tokens ?? 0,
269
+ completion_tokens: chunk.usage.completion_tokens ?? 0,
270
+ total_tokens: chunk.usage.total_tokens,
271
+ };
272
+ }
273
+ } catch {
274
+ // Ignore malformed SSE lines
275
+ }
276
+ }
277
+ }
278
+
279
+ const usdCost = this.calculateCost(options.model, usage);
280
+ yield { done: true, usage, usd_cost: usdCost };
281
+ }
282
+
189
283
  /**
190
284
  * Creates a completion using the Responses API (/v1/responses).
191
285
  * Compatible with providers that implement the OpenAI Responses API spec
@@ -1,6 +1,7 @@
1
1
  import {
2
2
  CompletionOptions,
3
3
  CompletionResponse,
4
+ StreamChunk,
4
5
  EmbeddingOptions,
5
6
  EmbeddingResponse,
6
7
  GenericClient,
@@ -34,6 +35,7 @@ import { OpenAiTextPricing } from "./pricing/openai";
34
35
  import { AnthropicTextPricing } from "./pricing/anthropic";
35
36
  import { GeminiPricing } from "./pricing/google";
36
37
  import { withRetry } from "./withRetry";
38
+ import { FireworksTextPricing } from "./pricing/fireworks";
37
39
  import {
38
40
  XaiTextPricing,
39
41
  XaiImagePricing,
@@ -397,9 +399,10 @@ export class AIClient {
397
399
  const hasModel = this.providerHasModel(provider, model);
398
400
 
399
401
  if (!hasModel) {
400
- throw new Error(
401
- `Model ${model} not registered for provider ${provider}.`
402
- );
402
+ // Model not in local registry — pass it through anyway so the provider
403
+ // API can accept or reject it directly (e.g. newly-released models that
404
+ // haven't been fetched into our local model list yet).
405
+ console.warn(`⚠️ Model '${model}' not in local registry for provider '${provider}', attempting anyway.`);
403
406
  }
404
407
 
405
408
  return { client: this.clients[provider], provider, model };
@@ -609,6 +612,11 @@ export class AIClient {
609
612
  return { provider, model };
610
613
  }
611
614
 
615
+ // If an explicit provider was given, don't fall through to fuzzy cross-provider
616
+ // search — that would silently pick a completely different provider (e.g. nvidia
617
+ // instead of fireworks). Just pass through and let the API accept/reject the model.
618
+ const hasExplicitProvider = !!provider;
619
+
612
620
  if (model?.includes("/")) {
613
621
  const split = model.split("/");
614
622
 
@@ -620,16 +628,21 @@ export class AIClient {
620
628
  return { provider: inferredProvider, model: inferredModel };
621
629
  }
622
630
 
623
- // Starts with match
624
- const foundBySplit = this.findModel(inferredModel);
625
- if (foundBySplit) {
626
- return foundBySplit;
631
+ // Starts with match — only if no explicit provider was given
632
+ if (!hasExplicitProvider) {
633
+ const foundBySplit = this.findModel(inferredModel);
634
+ if (foundBySplit) {
635
+ return foundBySplit;
636
+ }
627
637
  }
628
638
  }
629
639
 
630
- const foundByModel = this.findModel(model);
631
- if (foundByModel) {
632
- return foundByModel;
640
+ // Fuzzy cross-provider search — only if no explicit provider was given
641
+ if (!hasExplicitProvider) {
642
+ const foundByModel = this.findModel(model);
643
+ if (foundByModel) {
644
+ return foundByModel;
645
+ }
633
646
  }
634
647
 
635
648
  const allModels = this.listAllModels();
@@ -672,6 +685,31 @@ export class AIClient {
672
685
  );
673
686
  }
674
687
 
688
+ async *createCompletionStream(
689
+ provider: string,
690
+ options: CompletionOptions
691
+ ): AsyncGenerator<StreamChunk> {
692
+ const { client, model } = this.getClient(provider, options.model);
693
+ if (!model || !client) {
694
+ throw new Error(
695
+ `provider: ${provider} does not have ${
696
+ options.model
697
+ } model registered. Try using ${JSON.stringify(this.listAllModels())}`
698
+ );
699
+ }
700
+ if (client.createChatCompletionStream) {
701
+ yield* client.createChatCompletionStream({ ...options, model });
702
+ } else {
703
+ // Fallback: non-streaming clients — call normal completion and yield as single chunk
704
+ const result = await withRetry(
705
+ (signal) => client.createChatCompletion({ ...options, model, signal }),
706
+ options
707
+ );
708
+ yield { delta: result.choices[0]?.message?.content ?? "", done: false };
709
+ yield { done: true, usage: result.usage, usd_cost: result.usd_cost };
710
+ }
711
+ }
712
+
675
713
  async createEmbedding(
676
714
  provider: string,
677
715
  options: EmbeddingOptions
@@ -821,12 +859,24 @@ export class AIClient {
821
859
  return this.clientModels;
822
860
  }
823
861
 
824
- listAllModelsWithProvider() {
825
- return Object.entries(this.listAllModels())
826
- .map(([provider, models]) =>
827
- models.map((m) => ({ id: `${provider}/${m}` }))
828
- )
829
- .flat();
862
+ /**
863
+ * Filters a provider→models map to only include models that have known pricing.
864
+ * For HttpClient-based providers with getPricing(), only priced models are kept.
865
+ * For other providers (no getPricing()), all models pass through unchanged.
866
+ */
867
+ private _filterByPricing(models: Record<string, string[]>): Record<string, string[]> {
868
+ const result: Record<string, string[]> = {};
869
+ for (const [provider, ids] of Object.entries(models)) {
870
+ const client = this.clients[provider];
871
+ if (client?.getPricing) {
872
+ const pricingMap = client.getPricing() as Record<string, ModelPricing>;
873
+ const priced = ids.filter((id) => !!pricingMap[id]);
874
+ if (priced.length > 0) result[provider] = priced;
875
+ } else {
876
+ result[provider] = ids;
877
+ }
878
+ }
879
+ return result;
830
880
  }
831
881
 
832
882
  /*
@@ -879,11 +929,8 @@ export class AIClient {
879
929
  return providerModels;
880
930
  }
881
931
 
882
- listAllEmbeddingModels() {
883
- return this.embeddingModels;
884
- }
885
-
886
- listAllCompletionModels() {
932
+ listAllCompletionModels(options?: { pricing?: boolean }) {
933
+ if (options?.pricing) return this._filterByPricing(this.completionModels);
887
934
  return this.completionModels;
888
935
  }
889
936
 
@@ -891,18 +938,33 @@ export class AIClient {
891
938
  return Object.keys(this.clientModels);
892
939
  }
893
940
 
894
- listAllImageModels() {
941
+ listAllEmbeddingModels(options?: { pricing?: boolean }) {
942
+ if (options?.pricing) return this._filterByPricing(this.embeddingModels);
943
+ return this.embeddingModels;
944
+ }
945
+
946
+ listAllImageModels(options?: { pricing?: boolean }) {
947
+ if (options?.pricing) return this._filterByPricing(this.imageModels);
895
948
  return this.imageModels;
896
949
  }
897
950
 
898
- listAllAudioModels() {
951
+ listAllAudioModels(options?: { pricing?: boolean }) {
952
+ if (options?.pricing) return this._filterByPricing(this.audioModels);
899
953
  return this.audioModels;
900
954
  }
901
955
 
902
- listAllVideoModels() {
956
+ listAllVideoModels(options?: { pricing?: boolean }) {
957
+ if (options?.pricing) return this._filterByPricing(this.videoModels);
903
958
  return this.videoModels;
904
959
  }
905
960
 
961
+ listAllModelsWithProvider(options?: { pricing?: boolean }) {
962
+ const models = options?.pricing ? this._filterByPricing(this.clientModels) : this.clientModels;
963
+ return Object.entries(models)
964
+ .map(([provider, ids]) => ids.map((m) => ({ id: `${provider}/${m}` })))
965
+ .flat();
966
+ }
967
+
906
968
  /**
907
969
  * Returns the context window limit (in tokens) for a given model.
908
970
  * Delegates to the registered client's getContextLimit() if available.
@@ -958,6 +1020,7 @@ export class AIClient {
958
1020
  ...AnthropicTextPricing,
959
1021
  ...GeminiPricing,
960
1022
  ...XaiTextPricing,
1023
+ ...FireworksTextPricing,
961
1024
  };
962
1025
  const allImagePricing: Record<string, ModelPricing> = {
963
1026
  ...XaiImagePricing,
@@ -190,6 +190,64 @@ export class GenericOpenAiClient implements GenericClient {
190
190
  usd_cost: usdCost,
191
191
  };
192
192
  }
193
+
194
+ /**
195
+ * Streams a chat completion token-by-token.
196
+ * Yields delta content strings as they arrive, then yields a final
197
+ * CompletionResponse with usage info when the stream ends.
198
+ */
199
+ async *createChatCompletionStream(
200
+ options: CompletionOptions
201
+ ): AsyncGenerator<{ delta?: string; done: boolean; usage?: CompletionResponse['usage']; usd_cost?: number }> {
202
+ if (OpenAiResponsesOnlyModels.includes(options.model)) {
203
+ // Fallback: non-streaming for Responses-only models
204
+ const result = await this.createChatCompletion(options);
205
+ yield { delta: result.choices[0]?.message?.content ?? "", done: false };
206
+ yield { done: true, usage: result.usage, usd_cost: result.usd_cost };
207
+ return;
208
+ }
209
+
210
+ const openaiMessages = options.messages.map((msg) => {
211
+ if (msg.role === "tool") {
212
+ return {
213
+ ...msg,
214
+ content: msg.content || "",
215
+ role: "tool",
216
+ tool_call_id: msg.tool_call_id,
217
+ } as ChatCompletionToolMessageParam;
218
+ }
219
+ return msg as ChatCompletionMessageParam;
220
+ });
221
+
222
+ const stream = await this.client.chat.completions.create({
223
+ model: options.model,
224
+ messages: openaiMessages,
225
+ max_tokens: options.max_tokens,
226
+ stream: true,
227
+ stream_options: { include_usage: true },
228
+ ...(options.tools && { tools: options.tools, tool_choice: "auto" }),
229
+ }, { signal: options.signal });
230
+
231
+ let usage: CompletionResponse['usage'] | undefined;
232
+ for await (const chunk of stream) {
233
+ const delta = chunk.choices[0]?.delta?.content;
234
+ if (delta) {
235
+ yield { delta, done: false };
236
+ }
237
+ if (chunk.usage) {
238
+ usage = {
239
+ prompt_tokens: chunk.usage.prompt_tokens ?? 0,
240
+ completion_tokens: chunk.usage.completion_tokens ?? 0,
241
+ total_tokens: chunk.usage.total_tokens ?? 0,
242
+ };
243
+ }
244
+ }
245
+ const usdCost = usage
246
+ ? this.calculateCost(options.model, usage as OpenAI.ChatCompletion["usage"])
247
+ : undefined;
248
+ yield { done: true, usage, usd_cost: usdCost };
249
+ }
250
+
193
251
  /**
194
252
  * Creates a completion using the OpenAI Responses API.
195
253
  * Used for models that only support the Responses API (e.g. gpt-5.3-codex, gpt-5.4).
@@ -4,10 +4,12 @@
4
4
  */
5
5
  export const FireworksTextPricing: Record<string, { input: number; output: number; cache_hit?: number }> = {
6
6
  // Moonshot AI
7
+ "accounts/fireworks/models/kimi-k2p7-code": { input: 0.95, cache_hit: 0.19, output: 4.0 },
7
8
  "accounts/fireworks/models/kimi-k2-6": { input: 0.95, cache_hit: 0.16, output: 4.0 },
8
9
  "accounts/fireworks/models/kimi-k2-5": { input: 0.60, cache_hit: 0.10, output: 3.0 },
9
10
 
10
11
  // MiniMax
12
+ "accounts/fireworks/models/minimax-m3": { input: 0.30, cache_hit: 0.06, output: 1.20 },
11
13
  "accounts/fireworks/models/minimax-m2-7": { input: 0.30, cache_hit: 0.06, output: 1.20 },
12
14
  "accounts/fireworks/models/minimax-m2-5": { input: 0.30, cache_hit: 0.03, output: 1.20 },
13
15
 
@@ -19,6 +21,7 @@ export const FireworksTextPricing: Record<string, { input: number; output: numbe
19
21
  "accounts/fireworks/models/qwen3-8b": { input: 0.20, cache_hit: 0.10, output: 0.20 },
20
22
 
21
23
  // Z.ai
24
+ "accounts/fireworks/models/glm-5-2": { input: 1.40, cache_hit: 0.26, output: 4.40 },
22
25
  "accounts/fireworks/models/glm-5-1": { input: 1.40, cache_hit: 0.26, output: 4.40 },
23
26
  "accounts/fireworks/models/glm-5": { input: 1.00, cache_hit: 0.20, output: 3.20 },
24
27
  "accounts/fireworks/models/glm-4-7": { input: 0.60, cache_hit: 0.30, output: 2.20 },
@@ -136,6 +136,16 @@ export interface CompletionResponse {
136
136
  usd_cost?: number;
137
137
  }
138
138
 
139
+ /** A single chunk yielded by a streaming completion. */
140
+ export interface StreamChunk {
141
+ /** Incremental text token(s). Only present on intermediate chunks. */
142
+ delta?: string;
143
+ /** True on the final chunk (no delta, but usage/cost available). */
144
+ done: boolean;
145
+ usage?: TokenUsage;
146
+ usd_cost?: number;
147
+ }
148
+
139
149
  export interface EmbeddingOptions extends RetryOptions {
140
150
  input: string;
141
151
  model?: string;
@@ -305,6 +315,8 @@ export interface FileDownloadResponse {
305
315
  export interface GenericClient {
306
316
  setKey(key: string): void;
307
317
  createChatCompletion(options: CompletionOptions): Promise<CompletionResponse>;
318
+ /** Optional streaming variant — yields incremental tokens then a final done chunk. */
319
+ createChatCompletionStream?(options: CompletionOptions): AsyncGenerator<StreamChunk>;
308
320
  createEmbedding(options: EmbeddingOptions): Promise<EmbeddingResponse>;
309
321
  createAudioTranscription?(
310
322
  options: AudioTranscriptionOptions
@@ -343,4 +355,10 @@ export interface GenericClient {
343
355
  getContextLimit?(
344
356
  model: string
345
357
  ): { contextLimit: number; threshold: number } | undefined;
358
+ /**
359
+ * Returns the pricing entry for a specific model, or the entire pricing map if no model is given.
360
+ * Returns undefined for a specific model if no pricing is known.
361
+ * Only implemented by HttpClient-based providers that have been given a pricing map via setPrices().
362
+ */
363
+ getPricing?(model?: string): import("./pricing/types").ModelPricing | Record<string, import("./pricing/types").ModelPricing> | undefined;
346
364
  }