@yourgpt/llm-sdk 2.5.1-beta.2 → 2.5.1-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -487,6 +487,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
487
487
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
488
488
  if (baseUrl.includes("x.ai")) return "xai";
489
489
  if (baseUrl.includes("azure")) return "azure";
490
+ if (baseUrl.includes("fireworks.ai")) return "fireworks";
490
491
  return "openai";
491
492
  }
492
493
  async getClient() {
@@ -1304,6 +1305,11 @@ var AnthropicAdapter = class {
1304
1305
  content,
1305
1306
  toolCalls,
1306
1307
  thinking: thinking || void 0,
1308
+ usage: response.usage ? {
1309
+ promptTokens: response.usage.input_tokens ?? 0,
1310
+ completionTokens: response.usage.output_tokens ?? 0,
1311
+ totalTokens: (response.usage.input_tokens ?? 0) + (response.usage.output_tokens ?? 0)
1312
+ } : void 0,
1307
1313
  rawResponse: response
1308
1314
  };
1309
1315
  } catch (error) {
@@ -2162,6 +2168,11 @@ var GoogleAdapter = class {
2162
2168
  return {
2163
2169
  content: textContent,
2164
2170
  toolCalls,
2171
+ usage: response.usageMetadata ? {
2172
+ promptTokens: response.usageMetadata.promptTokenCount ?? 0,
2173
+ completionTokens: (response.usageMetadata.candidatesTokenCount ?? 0) + (response.usageMetadata.thoughtsTokenCount ?? 0),
2174
+ totalTokens: response.usageMetadata.totalTokenCount ?? 0
2175
+ } : void 0,
2165
2176
  rawResponse: response
2166
2177
  };
2167
2178
  }
@@ -2390,6 +2401,11 @@ var AzureAdapter = class {
2390
2401
  return {
2391
2402
  content: message?.content || "",
2392
2403
  toolCalls,
2404
+ usage: response.usage ? {
2405
+ promptTokens: response.usage.prompt_tokens ?? 0,
2406
+ completionTokens: response.usage.completion_tokens ?? 0,
2407
+ totalTokens: response.usage.total_tokens ?? (response.usage.prompt_tokens ?? 0) + (response.usage.completion_tokens ?? 0)
2408
+ } : void 0,
2393
2409
  rawResponse: response
2394
2410
  };
2395
2411
  }
@@ -485,6 +485,7 @@ var OpenAIAdapter = class _OpenAIAdapter {
485
485
  if (baseUrl.includes("generativelanguage.googleapis.com")) return "google";
486
486
  if (baseUrl.includes("x.ai")) return "xai";
487
487
  if (baseUrl.includes("azure")) return "azure";
488
+ if (baseUrl.includes("fireworks.ai")) return "fireworks";
488
489
  return "openai";
489
490
  }
490
491
  async getClient() {
@@ -1302,6 +1303,11 @@ var AnthropicAdapter = class {
1302
1303
  content,
1303
1304
  toolCalls,
1304
1305
  thinking: thinking || void 0,
1306
+ usage: response.usage ? {
1307
+ promptTokens: response.usage.input_tokens ?? 0,
1308
+ completionTokens: response.usage.output_tokens ?? 0,
1309
+ totalTokens: (response.usage.input_tokens ?? 0) + (response.usage.output_tokens ?? 0)
1310
+ } : void 0,
1305
1311
  rawResponse: response
1306
1312
  };
1307
1313
  } catch (error) {
@@ -2160,6 +2166,11 @@ var GoogleAdapter = class {
2160
2166
  return {
2161
2167
  content: textContent,
2162
2168
  toolCalls,
2169
+ usage: response.usageMetadata ? {
2170
+ promptTokens: response.usageMetadata.promptTokenCount ?? 0,
2171
+ completionTokens: (response.usageMetadata.candidatesTokenCount ?? 0) + (response.usageMetadata.thoughtsTokenCount ?? 0),
2172
+ totalTokens: response.usageMetadata.totalTokenCount ?? 0
2173
+ } : void 0,
2163
2174
  rawResponse: response
2164
2175
  };
2165
2176
  }
@@ -2388,6 +2399,11 @@ var AzureAdapter = class {
2388
2399
  return {
2389
2400
  content: message?.content || "",
2390
2401
  toolCalls,
2402
+ usage: response.usage ? {
2403
+ promptTokens: response.usage.prompt_tokens ?? 0,
2404
+ completionTokens: response.usage.completion_tokens ?? 0,
2405
+ totalTokens: response.usage.total_tokens ?? (response.usage.prompt_tokens ?? 0) + (response.usage.completion_tokens ?? 0)
2406
+ } : void 0,
2391
2407
  rawResponse: response
2392
2408
  };
2393
2409
  }
package/dist/index.d.mts CHANGED
@@ -1001,7 +1001,7 @@ declare class Runtime {
1001
1001
  * 5. Loop continues until no more tool calls or max iterations reached
1002
1002
  * 6. Returns all new messages in the done event for client to append
1003
1003
  */
1004
- processChatWithLoop(request: ChatRequest, signal?: AbortSignal, _accumulatedMessages?: DoneEventMessage[], _isRecursive?: boolean, _httpRequest?: Request, _toolSearchState?: ToolSearchState): AsyncGenerator<StreamEvent>;
1004
+ processChatWithLoop(request: ChatRequest, signal?: AbortSignal, _accumulatedMessages?: DoneEventMessage[], _isRecursive?: boolean, _httpRequest?: Request, _toolSearchState?: ToolSearchState, _iteration?: number): AsyncGenerator<StreamEvent>;
1005
1005
  /**
1006
1006
  * Non-streaming agent loop implementation
1007
1007
  *
package/dist/index.d.ts CHANGED
@@ -1001,7 +1001,7 @@ declare class Runtime {
1001
1001
  * 5. Loop continues until no more tool calls or max iterations reached
1002
1002
  * 6. Returns all new messages in the done event for client to append
1003
1003
  */
1004
- processChatWithLoop(request: ChatRequest, signal?: AbortSignal, _accumulatedMessages?: DoneEventMessage[], _isRecursive?: boolean, _httpRequest?: Request, _toolSearchState?: ToolSearchState): AsyncGenerator<StreamEvent>;
1004
+ processChatWithLoop(request: ChatRequest, signal?: AbortSignal, _accumulatedMessages?: DoneEventMessage[], _isRecursive?: boolean, _httpRequest?: Request, _toolSearchState?: ToolSearchState, _iteration?: number): AsyncGenerator<StreamEvent>;
1005
1005
  /**
1006
1006
  * Non-streaming agent loop implementation
1007
1007
  *
package/dist/index.js CHANGED
@@ -2112,7 +2112,7 @@ var Runtime = class {
2112
2112
  * 5. Loop continues until no more tool calls or max iterations reached
2113
2113
  * 6. Returns all new messages in the done event for client to append
2114
2114
  */
2115
- async *processChatWithLoop(request, signal, _accumulatedMessages, _isRecursive, _httpRequest, _toolSearchState) {
2115
+ async *processChatWithLoop(request, signal, _accumulatedMessages, _isRecursive, _httpRequest, _toolSearchState, _iteration = 0) {
2116
2116
  const debug = this.config.debug;
2117
2117
  if (request.streaming === false) {
2118
2118
  if (debug) {
@@ -2131,7 +2131,25 @@ var Runtime = class {
2131
2131
  return;
2132
2132
  }
2133
2133
  const newMessages = _accumulatedMessages || [];
2134
- this.config.maxIterations ?? 20;
2134
+ const maxIterations = this.config.maxIterations ?? 20;
2135
+ if (signal?.aborted) {
2136
+ yield {
2137
+ type: "error",
2138
+ message: "Aborted",
2139
+ code: "ABORTED"
2140
+ };
2141
+ return;
2142
+ }
2143
+ if (_iteration >= maxIterations) {
2144
+ if (debug) {
2145
+ console.log(`[Copilot SDK] Max iterations (${maxIterations}) reached`);
2146
+ }
2147
+ yield {
2148
+ type: "done",
2149
+ messages: newMessages.length > 0 ? newMessages : void 0
2150
+ };
2151
+ return;
2152
+ }
2135
2153
  const allTools = this.collectToolsForRequest(request);
2136
2154
  const nativeToolSearch = this.resolveNativeToolSearchForRequest(request);
2137
2155
  const nativeToolCatalog = nativeToolSearch ? this.buildNativeToolCatalogForRequest(request, allTools) : null;
@@ -2393,7 +2411,8 @@ var Runtime = class {
2393
2411
  true,
2394
2412
  // Mark as recursive
2395
2413
  _httpRequest,
2396
- nextToolSearchState
2414
+ nextToolSearchState,
2415
+ _iteration + 1
2397
2416
  )) {
2398
2417
  yield event;
2399
2418
  }
@@ -2490,7 +2509,9 @@ var Runtime = class {
2490
2509
  _accumulatedMessages,
2491
2510
  _isRecursive,
2492
2511
  _httpRequest,
2493
- toolSearchState
2512
+ toolSearchState,
2513
+ iteration
2514
+ // carry the non-streaming loop's depth into the streaming sub-call
2494
2515
  )) {
2495
2516
  yield event;
2496
2517
  }
package/dist/index.mjs CHANGED
@@ -2110,7 +2110,7 @@ var Runtime = class {
2110
2110
  * 5. Loop continues until no more tool calls or max iterations reached
2111
2111
  * 6. Returns all new messages in the done event for client to append
2112
2112
  */
2113
- async *processChatWithLoop(request, signal, _accumulatedMessages, _isRecursive, _httpRequest, _toolSearchState) {
2113
+ async *processChatWithLoop(request, signal, _accumulatedMessages, _isRecursive, _httpRequest, _toolSearchState, _iteration = 0) {
2114
2114
  const debug = this.config.debug;
2115
2115
  if (request.streaming === false) {
2116
2116
  if (debug) {
@@ -2129,7 +2129,25 @@ var Runtime = class {
2129
2129
  return;
2130
2130
  }
2131
2131
  const newMessages = _accumulatedMessages || [];
2132
- this.config.maxIterations ?? 20;
2132
+ const maxIterations = this.config.maxIterations ?? 20;
2133
+ if (signal?.aborted) {
2134
+ yield {
2135
+ type: "error",
2136
+ message: "Aborted",
2137
+ code: "ABORTED"
2138
+ };
2139
+ return;
2140
+ }
2141
+ if (_iteration >= maxIterations) {
2142
+ if (debug) {
2143
+ console.log(`[Copilot SDK] Max iterations (${maxIterations}) reached`);
2144
+ }
2145
+ yield {
2146
+ type: "done",
2147
+ messages: newMessages.length > 0 ? newMessages : void 0
2148
+ };
2149
+ return;
2150
+ }
2133
2151
  const allTools = this.collectToolsForRequest(request);
2134
2152
  const nativeToolSearch = this.resolveNativeToolSearchForRequest(request);
2135
2153
  const nativeToolCatalog = nativeToolSearch ? this.buildNativeToolCatalogForRequest(request, allTools) : null;
@@ -2391,7 +2409,8 @@ var Runtime = class {
2391
2409
  true,
2392
2410
  // Mark as recursive
2393
2411
  _httpRequest,
2394
- nextToolSearchState
2412
+ nextToolSearchState,
2413
+ _iteration + 1
2395
2414
  )) {
2396
2415
  yield event;
2397
2416
  }
@@ -2488,7 +2507,9 @@ var Runtime = class {
2488
2507
  _accumulatedMessages,
2489
2508
  _isRecursive,
2490
2509
  _httpRequest,
2491
- toolSearchState
2510
+ toolSearchState,
2511
+ iteration
2512
+ // carry the non-streaming loop's depth into the streaming sub-call
2492
2513
  )) {
2493
2514
  yield event;
2494
2515
  }
@@ -980,6 +980,11 @@ var AnthropicAdapter = class {
980
980
  content,
981
981
  toolCalls,
982
982
  thinking: thinking || void 0,
983
+ usage: response.usage ? {
984
+ promptTokens: response.usage.input_tokens ?? 0,
985
+ completionTokens: response.usage.output_tokens ?? 0,
986
+ totalTokens: (response.usage.input_tokens ?? 0) + (response.usage.output_tokens ?? 0)
987
+ } : void 0,
983
988
  rawResponse: response
984
989
  };
985
990
  } catch (error) {
@@ -978,6 +978,11 @@ var AnthropicAdapter = class {
978
978
  content,
979
979
  toolCalls,
980
980
  thinking: thinking || void 0,
981
+ usage: response.usage ? {
982
+ promptTokens: response.usage.input_tokens ?? 0,
983
+ completionTokens: response.usage.output_tokens ?? 0,
984
+ totalTokens: (response.usage.input_tokens ?? 0) + (response.usage.output_tokens ?? 0)
985
+ } : void 0,
981
986
  rawResponse: response
982
987
  };
983
988
  } catch (error) {
@@ -419,6 +419,11 @@ var AzureAdapter = class {
419
419
  return {
420
420
  content: message?.content || "",
421
421
  toolCalls,
422
+ usage: response.usage ? {
423
+ promptTokens: response.usage.prompt_tokens ?? 0,
424
+ completionTokens: response.usage.completion_tokens ?? 0,
425
+ totalTokens: response.usage.total_tokens ?? (response.usage.prompt_tokens ?? 0) + (response.usage.completion_tokens ?? 0)
426
+ } : void 0,
422
427
  rawResponse: response
423
428
  };
424
429
  }
@@ -417,6 +417,11 @@ var AzureAdapter = class {
417
417
  return {
418
418
  content: message?.content || "",
419
419
  toolCalls,
420
+ usage: response.usage ? {
421
+ promptTokens: response.usage.prompt_tokens ?? 0,
422
+ completionTokens: response.usage.completion_tokens ?? 0,
423
+ totalTokens: response.usage.total_tokens ?? (response.usage.prompt_tokens ?? 0) + (response.usage.completion_tokens ?? 0)
424
+ } : void 0,
420
425
  rawResponse: response
421
426
  };
422
427
  }
@@ -1,5 +1,7 @@
1
1
  import { L as LanguageModel } from '../../types-BkQCSiIt.mjs';
2
+ import { A as AIProvider } from '../../types-BSSiJW2o.mjs';
2
3
  import 'zod';
4
+ import '../../base-tNgbBaSo.mjs';
3
5
 
4
6
  /**
5
7
  * Fireworks Provider
@@ -46,4 +48,67 @@ interface FireworksProviderOptions {
46
48
  */
47
49
  declare function fireworks(modelId: string, options?: FireworksProviderOptions): LanguageModel;
48
50
 
49
- export { type FireworksProviderOptions, fireworks as createFireworks, fireworks };
51
+ /**
52
+ * Fireworks Provider
53
+ *
54
+ * Fireworks.ai is a high-performance inference platform for open-source models
55
+ * (Llama, DeepSeek, Qwen, Mixtral, Gemma, and more).
56
+ *
57
+ * Uses an OpenAI-compatible API — set FIREWORKS_API_KEY in your environment.
58
+ *
59
+ * @see https://fireworks.ai/docs
60
+ *
61
+ * @example
62
+ * ```ts
63
+ * // Modern pattern — returns LanguageModel directly
64
+ * import { fireworks } from '@yourgpt/llm-sdk/fireworks';
65
+ * import { generateText } from '@yourgpt/llm-sdk';
66
+ *
67
+ * const result = await generateText({
68
+ * model: fireworks('accounts/fireworks/models/llama-v3p1-70b-instruct'),
69
+ * prompt: 'Hello!',
70
+ * });
71
+ *
72
+ * // Runtime pattern — returns AIProvider for createRuntime / fallback chain
73
+ * import { createFireworks } from '@yourgpt/llm-sdk/fireworks';
74
+ * import { createRuntime } from '@yourgpt/llm-sdk';
75
+ *
76
+ * const provider = createFireworks({ apiKey: '...' });
77
+ * const runtime = createRuntime({ provider, model: 'accounts/fireworks/models/deepseek-v3p1' });
78
+ * ```
79
+ */
80
+
81
+ interface FireworksProviderConfig {
82
+ /** API key (defaults to FIREWORKS_API_KEY env var) */
83
+ apiKey?: string;
84
+ /** Base URL for API */
85
+ baseUrl?: string;
86
+ }
87
+ /**
88
+ * Create a Fireworks provider (callable, for use with createRuntime).
89
+ *
90
+ * Fireworks exposes an OpenAI-compatible API, so this reuses the OpenAI adapter
91
+ * pointed at the Fireworks base URL — the same approach used by Together AI and
92
+ * OpenRouter. The adapter speaks the runtime's API (generate/stream, rawMessages,
93
+ * message:delta/end events) and sets `stream_options.include_usage`, so streaming
94
+ * usage is reported correctly for credit accounting.
95
+ *
96
+ * @example
97
+ * ```typescript
98
+ * import { createFireworks } from '@yourgpt/llm-sdk/fireworks';
99
+ * import { createRuntime } from '@yourgpt/llm-sdk';
100
+ *
101
+ * const fireworks = createFireworks({ apiKey: '...' });
102
+ * const runtime = createRuntime({
103
+ * provider: fireworks,
104
+ * model: 'accounts/fireworks/models/deepseek-v3p1',
105
+ * });
106
+ *
107
+ * // Handle incoming chat requests
108
+ * return runtime.handleRequest(request);
109
+ * ```
110
+ */
111
+ declare function createFireworks(config?: FireworksProviderConfig): AIProvider;
112
+ declare const createFireworksProvider: typeof createFireworks;
113
+
114
+ export { type FireworksProviderConfig, type FireworksProviderOptions, createFireworks, fireworks as createFireworksModel, createFireworksProvider, fireworks };
@@ -1,5 +1,7 @@
1
1
  import { L as LanguageModel } from '../../types-BkQCSiIt.js';
2
+ import { A as AIProvider } from '../../types-BQ31QIsA.js';
2
3
  import 'zod';
4
+ import '../../base-C58Dsr9p.js';
3
5
 
4
6
  /**
5
7
  * Fireworks Provider
@@ -46,4 +48,67 @@ interface FireworksProviderOptions {
46
48
  */
47
49
  declare function fireworks(modelId: string, options?: FireworksProviderOptions): LanguageModel;
48
50
 
49
- export { type FireworksProviderOptions, fireworks as createFireworks, fireworks };
51
+ /**
52
+ * Fireworks Provider
53
+ *
54
+ * Fireworks.ai is a high-performance inference platform for open-source models
55
+ * (Llama, DeepSeek, Qwen, Mixtral, Gemma, and more).
56
+ *
57
+ * Uses an OpenAI-compatible API — set FIREWORKS_API_KEY in your environment.
58
+ *
59
+ * @see https://fireworks.ai/docs
60
+ *
61
+ * @example
62
+ * ```ts
63
+ * // Modern pattern — returns LanguageModel directly
64
+ * import { fireworks } from '@yourgpt/llm-sdk/fireworks';
65
+ * import { generateText } from '@yourgpt/llm-sdk';
66
+ *
67
+ * const result = await generateText({
68
+ * model: fireworks('accounts/fireworks/models/llama-v3p1-70b-instruct'),
69
+ * prompt: 'Hello!',
70
+ * });
71
+ *
72
+ * // Runtime pattern — returns AIProvider for createRuntime / fallback chain
73
+ * import { createFireworks } from '@yourgpt/llm-sdk/fireworks';
74
+ * import { createRuntime } from '@yourgpt/llm-sdk';
75
+ *
76
+ * const provider = createFireworks({ apiKey: '...' });
77
+ * const runtime = createRuntime({ provider, model: 'accounts/fireworks/models/deepseek-v3p1' });
78
+ * ```
79
+ */
80
+
81
+ interface FireworksProviderConfig {
82
+ /** API key (defaults to FIREWORKS_API_KEY env var) */
83
+ apiKey?: string;
84
+ /** Base URL for API */
85
+ baseUrl?: string;
86
+ }
87
+ /**
88
+ * Create a Fireworks provider (callable, for use with createRuntime).
89
+ *
90
+ * Fireworks exposes an OpenAI-compatible API, so this reuses the OpenAI adapter
91
+ * pointed at the Fireworks base URL — the same approach used by Together AI and
92
+ * OpenRouter. The adapter speaks the runtime's API (generate/stream, rawMessages,
93
+ * message:delta/end events) and sets `stream_options.include_usage`, so streaming
94
+ * usage is reported correctly for credit accounting.
95
+ *
96
+ * @example
97
+ * ```typescript
98
+ * import { createFireworks } from '@yourgpt/llm-sdk/fireworks';
99
+ * import { createRuntime } from '@yourgpt/llm-sdk';
100
+ *
101
+ * const fireworks = createFireworks({ apiKey: '...' });
102
+ * const runtime = createRuntime({
103
+ * provider: fireworks,
104
+ * model: 'accounts/fireworks/models/deepseek-v3p1',
105
+ * });
106
+ *
107
+ * // Handle incoming chat requests
108
+ * return runtime.handleRequest(request);
109
+ * ```
110
+ */
111
+ declare function createFireworks(config?: FireworksProviderConfig): AIProvider;
112
+ declare const createFireworksProvider: typeof createFireworks;
113
+
114
+ export { type FireworksProviderConfig, type FireworksProviderOptions, createFireworks, fireworks as createFireworksModel, createFireworksProvider, fireworks };