@stackbilt/llm-providers 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/LICENSE +0 -0
  2. package/README.md +112 -85
  3. package/dist/errors.d.ts +18 -1
  4. package/dist/errors.d.ts.map +1 -1
  5. package/dist/errors.js +12 -4
  6. package/dist/errors.js.map +1 -1
  7. package/dist/factory.d.ts +62 -4
  8. package/dist/factory.d.ts.map +1 -1
  9. package/dist/factory.js +630 -92
  10. package/dist/factory.js.map +1 -1
  11. package/dist/image/index.d.ts +5 -0
  12. package/dist/image/index.d.ts.map +1 -0
  13. package/dist/image/index.js +3 -0
  14. package/dist/image/index.js.map +1 -0
  15. package/dist/image/provider.d.ts +44 -0
  16. package/dist/image/provider.d.ts.map +1 -0
  17. package/dist/image/provider.js +182 -0
  18. package/dist/image/provider.js.map +1 -0
  19. package/dist/image/types.d.ts +45 -0
  20. package/dist/image/types.d.ts.map +1 -0
  21. package/dist/image/types.js +83 -0
  22. package/dist/image/types.js.map +1 -0
  23. package/dist/index.d.ts +46 -12
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +73 -14
  26. package/dist/index.js.map +1 -1
  27. package/dist/providers/anthropic.d.ts +5 -2
  28. package/dist/providers/anthropic.d.ts.map +1 -1
  29. package/dist/providers/anthropic.js +118 -43
  30. package/dist/providers/anthropic.js.map +1 -1
  31. package/dist/providers/base.d.ts +18 -2
  32. package/dist/providers/base.d.ts.map +1 -1
  33. package/dist/providers/base.js +107 -5
  34. package/dist/providers/base.js.map +1 -1
  35. package/dist/providers/cerebras.d.ts +0 -0
  36. package/dist/providers/cerebras.d.ts.map +1 -1
  37. package/dist/providers/cerebras.js +21 -13
  38. package/dist/providers/cerebras.js.map +1 -1
  39. package/dist/providers/cloudflare.d.ts +0 -0
  40. package/dist/providers/cloudflare.d.ts.map +1 -1
  41. package/dist/providers/cloudflare.js +12 -8
  42. package/dist/providers/cloudflare.js.map +1 -1
  43. package/dist/providers/groq.d.ts +2 -1
  44. package/dist/providers/groq.d.ts.map +1 -1
  45. package/dist/providers/groq.js +95 -15
  46. package/dist/providers/groq.js.map +1 -1
  47. package/dist/providers/openai.d.ts +2 -0
  48. package/dist/providers/openai.d.ts.map +1 -1
  49. package/dist/providers/openai.js +56 -24
  50. package/dist/providers/openai.js.map +1 -1
  51. package/dist/types.d.ts +114 -4
  52. package/dist/types.d.ts.map +1 -1
  53. package/dist/types.js +0 -0
  54. package/dist/types.js.map +0 -0
  55. package/dist/utils/circuit-breaker.d.ts +5 -2
  56. package/dist/utils/circuit-breaker.d.ts.map +1 -1
  57. package/dist/utils/circuit-breaker.js +18 -13
  58. package/dist/utils/circuit-breaker.js.map +1 -1
  59. package/dist/utils/cost-tracker.d.ts +9 -2
  60. package/dist/utils/cost-tracker.d.ts.map +1 -1
  61. package/dist/utils/cost-tracker.js +20 -9
  62. package/dist/utils/cost-tracker.js.map +1 -1
  63. package/dist/utils/credit-ledger.d.ts +3 -0
  64. package/dist/utils/credit-ledger.d.ts.map +1 -1
  65. package/dist/utils/credit-ledger.js +5 -2
  66. package/dist/utils/credit-ledger.js.map +1 -1
  67. package/dist/utils/exhaustion.d.ts +38 -0
  68. package/dist/utils/exhaustion.d.ts.map +1 -0
  69. package/dist/utils/exhaustion.js +74 -0
  70. package/dist/utils/exhaustion.js.map +1 -0
  71. package/dist/utils/hooks.d.ts +113 -0
  72. package/dist/utils/hooks.d.ts.map +1 -0
  73. package/dist/utils/hooks.js +44 -0
  74. package/dist/utils/hooks.js.map +1 -0
  75. package/dist/utils/latency-histogram.d.ts +38 -0
  76. package/dist/utils/latency-histogram.d.ts.map +1 -0
  77. package/dist/utils/latency-histogram.js +81 -0
  78. package/dist/utils/latency-histogram.js.map +1 -0
  79. package/dist/utils/logger.d.ts +18 -0
  80. package/dist/utils/logger.d.ts.map +1 -0
  81. package/dist/utils/logger.js +22 -0
  82. package/dist/utils/logger.js.map +1 -0
  83. package/dist/utils/retry.d.ts +4 -2
  84. package/dist/utils/retry.d.ts.map +1 -1
  85. package/dist/utils/retry.js +12 -8
  86. package/dist/utils/retry.js.map +1 -1
  87. package/package.json +2 -2
package/dist/factory.js CHANGED
@@ -2,22 +2,32 @@
2
2
  * LLM Provider Factory
3
3
  * Creates and manages LLM provider instances with intelligent fallback logic
4
4
  */
5
+ import { noopLogger } from './utils/logger';
6
+ import { noopHooks } from './utils/hooks';
5
7
  import { OpenAIProvider } from './providers/openai';
6
8
  import { AnthropicProvider } from './providers/anthropic';
7
9
  import { CloudflareProvider } from './providers/cloudflare';
8
10
  import { CerebrasProvider } from './providers/cerebras';
9
11
  import { GroqProvider } from './providers/groq';
10
- import { defaultCostTracker } from './utils/cost-tracker';
12
+ import { CostTracker, defaultCostTracker } from './utils/cost-tracker';
11
13
  import { defaultCircuitBreakerManager } from './utils/circuit-breaker';
12
- import { LLMProviderError, ConfigurationError, CircuitBreakerOpenError, AuthenticationError, RateLimitError } from './errors';
14
+ import { defaultExhaustionRegistry } from './utils/exhaustion';
15
+ import { defaultLatencyHistogram } from './utils/latency-histogram';
16
+ import { LLMProviderError, ConfigurationError, CircuitBreakerOpenError, AuthenticationError, RateLimitError, QuotaExceededError, ToolLoopAbortedError, ToolLoopLimitError, } from './errors';
13
17
  export class LLMProviderFactory {
14
18
  providers = new Map();
15
19
  config;
16
20
  costTracker;
17
21
  fallbackRules;
22
+ logger;
23
+ hooks;
18
24
  constructor(config) {
19
25
  this.config = config;
20
- this.costTracker = defaultCostTracker;
26
+ this.logger = config.logger ?? noopLogger;
27
+ this.hooks = config.hooks ?? noopHooks;
28
+ this.costTracker = config.ledger
29
+ ? new CostTracker({}, config.ledger, this.logger)
30
+ : defaultCostTracker;
21
31
  this.fallbackRules = config.fallbackRules || this.getDefaultFallbackRules();
22
32
  this.initializeProviders();
23
33
  }
@@ -25,69 +35,34 @@ export class LLMProviderFactory {
25
35
  * Initialize all configured providers
26
36
  */
27
37
  initializeProviders() {
28
- // Initialize OpenAI provider
29
- if (this.config.openai) {
30
- try {
31
- const provider = new OpenAIProvider(this.config.openai);
32
- if (provider.validateConfig()) {
33
- this.providers.set('openai', provider);
34
- console.log('[LLMProviderFactory] OpenAI provider initialized');
35
- }
36
- }
37
- catch (error) {
38
- console.warn('[LLMProviderFactory] Failed to initialize OpenAI provider:', error);
39
- }
40
- }
41
- // Initialize Anthropic provider
42
- if (this.config.anthropic) {
43
- try {
44
- const provider = new AnthropicProvider(this.config.anthropic);
45
- if (provider.validateConfig()) {
46
- this.providers.set('anthropic', provider);
47
- console.log('[LLMProviderFactory] Anthropic provider initialized');
48
- }
49
- }
50
- catch (error) {
51
- console.warn('[LLMProviderFactory] Failed to initialize Anthropic provider:', error);
52
- }
53
- }
54
- // Initialize Cloudflare provider
55
- if (this.config.cloudflare) {
56
- try {
57
- const provider = new CloudflareProvider(this.config.cloudflare);
58
- if (provider.validateConfig()) {
59
- this.providers.set('cloudflare', provider);
60
- console.log('[LLMProviderFactory] Cloudflare provider initialized');
61
- }
62
- }
63
- catch (error) {
64
- console.warn('[LLMProviderFactory] Failed to initialize Cloudflare provider:', error);
65
- }
66
- }
67
- // Initialize Cerebras provider
68
- if (this.config.cerebras) {
69
- try {
70
- const provider = new CerebrasProvider(this.config.cerebras);
71
- if (provider.validateConfig()) {
72
- this.providers.set('cerebras', provider);
73
- console.log('[LLMProviderFactory] Cerebras provider initialized');
74
- }
75
- }
76
- catch (error) {
77
- console.warn('[LLMProviderFactory] Failed to initialize Cerebras provider:', error);
78
- }
79
- }
80
- // Initialize Groq provider
81
- if (this.config.groq) {
38
+ const providerEntries = [
39
+ ['openai', OpenAIProvider],
40
+ ['anthropic', AnthropicProvider],
41
+ ['cloudflare', CloudflareProvider],
42
+ ['cerebras', CerebrasProvider],
43
+ ['groq', GroqProvider],
44
+ ];
45
+ for (const [name, ProviderClass] of providerEntries) {
46
+ const providerConfig = this.config[name];
47
+ if (!providerConfig)
48
+ continue;
82
49
  try {
83
- const provider = new GroqProvider(this.config.groq);
50
+ const retryConfig = this.config.enableRetries === false && providerConfig.maxRetries === undefined
51
+ ? { maxRetries: 0 }
52
+ : {};
53
+ const provider = new ProviderClass({
54
+ ...providerConfig,
55
+ ...retryConfig,
56
+ logger: this.logger,
57
+ hooks: this.hooks,
58
+ });
84
59
  if (provider.validateConfig()) {
85
- this.providers.set('groq', provider);
86
- console.log('[LLMProviderFactory] Groq provider initialized');
60
+ this.providers.set(name, provider);
61
+ this.logger.info(`[LLMProviderFactory] ${name} provider initialized`);
87
62
  }
88
63
  }
89
64
  catch (error) {
90
- console.warn('[LLMProviderFactory] Failed to initialize Groq provider:', error);
65
+ this.logger.warn(`[LLMProviderFactory] Failed to initialize ${name} provider:`, error.message);
91
66
  }
92
67
  }
93
68
  if (this.providers.size === 0) {
@@ -99,41 +74,332 @@ export class LLMProviderFactory {
99
74
  */
100
75
  async generateResponse(request) {
101
76
  const providerChain = this.buildProviderChain(request);
77
+ const providerModels = new Map();
102
78
  let lastError = null;
103
- for (const providerName of providerChain) {
79
+ let previousProvider = null;
80
+ for (let index = 0; index < providerChain.length; index++) {
81
+ const providerName = providerChain[index];
104
82
  try {
105
83
  const provider = this.providers.get(providerName);
106
84
  if (!provider)
107
85
  continue;
86
+ // Check exhaustion registry
87
+ if (defaultExhaustionRegistry.isExhausted(providerName)) {
88
+ this.logger.warn(`[LLMProviderFactory] Provider ${providerName} is quota-exhausted, skipping`);
89
+ continue;
90
+ }
108
91
  // Check circuit breaker
109
92
  if (this.config.enableCircuitBreaker) {
110
93
  const breaker = defaultCircuitBreakerManager.getBreaker(providerName);
111
94
  if (breaker.isOpen()) {
112
- console.warn(`[LLMProviderFactory] Circuit breaker open for ${providerName}, skipping`);
95
+ this.logger.warn(`[LLMProviderFactory] Circuit breaker open for ${providerName}, skipping`);
113
96
  continue;
114
97
  }
115
98
  }
116
- console.log(`[LLMProviderFactory] Trying provider: ${providerName}`);
117
- const response = await provider.generateResponse(request);
118
- // Track cost if enabled
119
- if (this.config.costOptimization) {
99
+ if (this.config.ledger && this.isLedgerLimited(providerName)) {
100
+ continue;
101
+ }
102
+ // Emit fallback event if this isn't the first provider attempted
103
+ if (previousProvider && lastError) {
104
+ this.hooks.onFallback?.({
105
+ fromProvider: previousProvider,
106
+ toProvider: providerName,
107
+ requestId: request.requestId,
108
+ reason: lastError.message,
109
+ errorCode: lastError.code,
110
+ timestamp: Date.now(),
111
+ });
112
+ }
113
+ this.logger.debug(`[LLMProviderFactory] Trying provider: ${providerName}`);
114
+ const providerRequest = this.requestForProvider(request, providerName, providerModels);
115
+ const model = providerRequest.model || provider.models[0] || 'unknown';
116
+ await this.checkQuota(providerName, provider, providerRequest, model);
117
+ this.hooks.onRequestStart?.({
118
+ provider: providerName,
119
+ model,
120
+ requestId: request.requestId,
121
+ tenantId: request.tenantId,
122
+ timestamp: Date.now(),
123
+ });
124
+ const startTime = Date.now();
125
+ const response = await provider.generateResponse(providerRequest);
126
+ const durationMs = Date.now() - startTime;
127
+ this.hooks.onRequestEnd?.({
128
+ provider: providerName,
129
+ model: response.model,
130
+ requestId: request.requestId,
131
+ tenantId: request.tenantId,
132
+ durationMs,
133
+ usage: response.usage,
134
+ finishReason: response.finishReason,
135
+ timestamp: Date.now(),
136
+ });
137
+ // Track spend whenever analytics or ledger accounting is configured.
138
+ if (this.config.costOptimization || this.config.ledger) {
120
139
  this.costTracker.trackCost(providerName, response);
121
140
  }
122
- console.log(`[LLMProviderFactory] Successfully used provider: ${providerName}`);
141
+ this.recordQuota(providerName, response, providerRequest);
142
+ this.logger.debug(`[LLMProviderFactory] Successfully used provider: ${providerName}`);
123
143
  return response;
124
144
  }
125
145
  catch (error) {
126
- lastError = error;
127
- console.warn(`[LLMProviderFactory] Provider ${providerName} failed:`, error);
128
- // Check if we should continue trying other providers
129
- if (!this.shouldFallback(error)) {
146
+ const err = error;
147
+ lastError = err;
148
+ previousProvider = providerName;
149
+ this.logger.warn(`[LLMProviderFactory] Provider ${providerName} failed:`, err.message);
150
+ this.hooks.onRequestError?.({
151
+ provider: providerName,
152
+ model: request.model || 'unknown',
153
+ requestId: request.requestId,
154
+ tenantId: request.tenantId,
155
+ error: err,
156
+ errorCode: err.code,
157
+ attempt: 1,
158
+ willRetry: this.shouldFallback(err),
159
+ timestamp: Date.now(),
160
+ });
161
+ // Auto-mark quota-exhausted providers
162
+ if (err instanceof QuotaExceededError) {
163
+ defaultExhaustionRegistry.markExhausted(providerName);
164
+ this.hooks.onQuotaExhausted?.({
165
+ provider: providerName,
166
+ resetAfterMs: defaultExhaustionRegistry.defaultResetMs,
167
+ timestamp: Date.now(),
168
+ });
169
+ }
170
+ const fallbackDecision = this.getFallbackDecision(error);
171
+ if (!fallbackDecision.shouldFallback) {
130
172
  throw error;
131
173
  }
174
+ this.applyFallbackDecision(fallbackDecision, providerName, providerChain, index, providerModels);
132
175
  }
133
176
  }
134
177
  // All providers failed
135
178
  throw lastError || new LLMProviderError('All providers failed', 'ALL_PROVIDERS_FAILED', 'factory', false);
136
179
  }
180
+ async generateResponseStream(request) {
181
+ const providerChain = this.buildProviderChain({ ...request, stream: true });
182
+ const providerModels = new Map();
183
+ let lastError = null;
184
+ let previousProvider = null;
185
+ for (let index = 0; index < providerChain.length; index++) {
186
+ const providerName = providerChain[index];
187
+ try {
188
+ const provider = this.providers.get(providerName);
189
+ if (!provider || !provider.supportsStreaming || !provider.streamResponse)
190
+ continue;
191
+ if (defaultExhaustionRegistry.isExhausted(providerName))
192
+ continue;
193
+ if (this.config.enableCircuitBreaker && defaultCircuitBreakerManager.getBreaker(providerName).isOpen())
194
+ continue;
195
+ if (this.config.ledger && this.isLedgerLimited(providerName))
196
+ continue;
197
+ if (previousProvider && lastError) {
198
+ this.hooks.onFallback?.({
199
+ fromProvider: previousProvider,
200
+ toProvider: providerName,
201
+ requestId: request.requestId,
202
+ reason: lastError.message,
203
+ errorCode: lastError.code,
204
+ timestamp: Date.now(),
205
+ });
206
+ }
207
+ const providerRequest = {
208
+ ...this.requestForProvider(request, providerName, providerModels),
209
+ stream: true
210
+ };
211
+ const model = providerRequest.model || provider.models[0] || 'unknown';
212
+ const estimatedCost = await this.checkQuota(providerName, provider, providerRequest, model);
213
+ this.hooks.onRequestStart?.({
214
+ provider: providerName,
215
+ model,
216
+ requestId: request.requestId,
217
+ tenantId: request.tenantId,
218
+ timestamp: Date.now(),
219
+ });
220
+ const startTime = Date.now();
221
+ const opened = await this.openStreamWithFirstChunk(provider, providerRequest);
222
+ return this.buildFactoryStream(opened.reader, opened.firstChunk, opened.done, providerName, model, providerRequest, startTime, estimatedCost);
223
+ }
224
+ catch (error) {
225
+ const err = error;
226
+ lastError = err;
227
+ previousProvider = providerName;
228
+ this.hooks.onRequestError?.({
229
+ provider: providerName,
230
+ model: request.model || 'unknown',
231
+ requestId: request.requestId,
232
+ tenantId: request.tenantId,
233
+ error: err,
234
+ errorCode: err.code,
235
+ attempt: 1,
236
+ willRetry: this.shouldFallback(err),
237
+ timestamp: Date.now(),
238
+ });
239
+ const fallbackDecision = this.getFallbackDecision(err);
240
+ if (!fallbackDecision.shouldFallback) {
241
+ throw error;
242
+ }
243
+ this.applyFallbackDecision(fallbackDecision, providerName, providerChain, index, providerModels);
244
+ }
245
+ }
246
+ throw lastError || new LLMProviderError('All streaming providers failed', 'ALL_PROVIDERS_FAILED', 'factory', false);
247
+ }
248
+ async generateResponseWithTools(request, toolExecutor, opts = {}) {
249
+ const maxIterations = opts.maxIterations ?? 10;
250
+ let cumulativeCost = 0;
251
+ let messages = [...request.messages];
252
+ let lastResponseCost = 0;
253
+ for (let iteration = 0; iteration <= maxIterations; iteration++) {
254
+ if (opts.abortSignal?.aborted) {
255
+ throw new ToolLoopAbortedError('factory');
256
+ }
257
+ // Pre-flight cost guard: use the previous iteration's cost as an
258
+ // estimate for the next one. This prevents obvious overshoots where
259
+ // a single expensive response would blow past the cap. The cap is
260
+ // still soft (±1 iteration tolerance) because the actual cost is
261
+ // only known after the response.
262
+ if (opts.maxCostUSD !== undefined && iteration > 0) {
263
+ const projectedCost = cumulativeCost + lastResponseCost;
264
+ if (projectedCost > opts.maxCostUSD) {
265
+ throw new ToolLoopLimitError('factory', `Tool loop would exceed max cost ${opts.maxCostUSD} (projected ${projectedCost.toFixed(4)})`);
266
+ }
267
+ }
268
+ const response = await this.generateResponse({ ...request, messages });
269
+ lastResponseCost = response.usage.cost;
270
+ cumulativeCost += lastResponseCost;
271
+ if (opts.maxCostUSD !== undefined && cumulativeCost > opts.maxCostUSD) {
272
+ throw new ToolLoopLimitError('factory', `Tool loop exceeded max cost ${opts.maxCostUSD}`);
273
+ }
274
+ if (!response.toolCalls || response.toolCalls.length === 0) {
275
+ return {
276
+ ...response,
277
+ metadata: {
278
+ ...response.metadata,
279
+ cumulativeCost,
280
+ toolIterations: iteration
281
+ }
282
+ };
283
+ }
284
+ if (iteration >= maxIterations) {
285
+ throw new ToolLoopLimitError('factory', `Tool loop exceeded ${maxIterations} iterations`);
286
+ }
287
+ const toolResults = [];
288
+ for (const toolCall of response.toolCalls) {
289
+ if (opts.abortSignal?.aborted) {
290
+ throw new ToolLoopAbortedError('factory');
291
+ }
292
+ let parsedArguments;
293
+ try {
294
+ parsedArguments = JSON.parse(toolCall.function.arguments);
295
+ }
296
+ catch {
297
+ parsedArguments = toolCall.function.arguments;
298
+ }
299
+ try {
300
+ const output = await toolExecutor.execute(toolCall.function.name, parsedArguments);
301
+ toolResults.push({
302
+ id: toolCall.id,
303
+ output: typeof output === 'string' ? output : JSON.stringify(output)
304
+ });
305
+ }
306
+ catch (error) {
307
+ toolResults.push({
308
+ id: toolCall.id,
309
+ output: '',
310
+ error: error.message
311
+ });
312
+ }
313
+ }
314
+ messages = [
315
+ ...messages,
316
+ {
317
+ role: 'assistant',
318
+ content: response.message,
319
+ toolCalls: response.toolCalls
320
+ },
321
+ {
322
+ role: 'user',
323
+ content: '',
324
+ toolResults
325
+ }
326
+ ];
327
+ const state = {
328
+ iteration: iteration + 1,
329
+ cumulativeCost,
330
+ messageCount: messages.length,
331
+ lastToolCalls: response.toolCalls
332
+ };
333
+ await opts.onIteration?.(iteration + 1, state);
334
+ }
335
+ throw new ToolLoopLimitError('factory', `Tool loop exceeded ${maxIterations} iterations`);
336
+ }
337
+ async classify(input, options = {}) {
338
+ const parser = options.schema && typeof options.schema.parse === 'function'
339
+ ? options.schema.parse
340
+ : undefined;
341
+ const schemaDescription = options.schema && !parser
342
+ ? `\nJSON schema:\n${JSON.stringify(options.schema)}`
343
+ : '';
344
+ const systemPrompt = options.systemPrompt ||
345
+ `Classify the input and return only valid JSON.${schemaDescription}`;
346
+ const request = typeof input === 'string'
347
+ ? {
348
+ messages: [{ role: 'user', content: input }],
349
+ model: options.model,
350
+ temperature: options.temperature ?? 0,
351
+ maxTokens: options.maxTokens,
352
+ response_format: { type: 'json_object' },
353
+ systemPrompt,
354
+ seed: options.seed
355
+ }
356
+ : {
357
+ ...input,
358
+ model: options.model ?? input.model,
359
+ temperature: options.temperature ?? input.temperature ?? 0,
360
+ maxTokens: options.maxTokens ?? input.maxTokens,
361
+ response_format: { type: 'json_object' },
362
+ systemPrompt: options.systemPrompt ?? input.systemPrompt ?? systemPrompt,
363
+ seed: options.seed ?? input.seed
364
+ };
365
+ const response = await this.generateResponse(request);
366
+ const parsed = this.parseJsonResponse(response.message);
367
+ const data = parser ? parser(parsed) : parsed;
368
+ const confidenceValue = parsed[options.confidenceField ?? 'confidence'];
369
+ return {
370
+ data,
371
+ confidence: typeof confidenceValue === 'number' ? confidenceValue : undefined,
372
+ response
373
+ };
374
+ }
375
+ async analyzeImage(input) {
376
+ return this.generateResponse({
377
+ messages: [{ role: 'user', content: input.prompt }],
378
+ images: [input.image],
379
+ model: input.model ?? this.getDefaultVisionModel(),
380
+ systemPrompt: input.systemPrompt,
381
+ temperature: input.temperature,
382
+ maxTokens: input.maxTokens,
383
+ response_format: input.response_format,
384
+ tenantId: input.tenantId,
385
+ requestId: input.requestId,
386
+ metadata: input.metadata
387
+ });
388
+ }
389
+ async getProviderBalance(provider) {
390
+ if (provider) {
391
+ const balance = await this.getSingleProviderBalance(provider);
392
+ this.hooks.onProviderBalance?.({ provider, balance, timestamp: Date.now() });
393
+ return balance;
394
+ }
395
+ const result = {};
396
+ for (const providerName of this.providers.keys()) {
397
+ const balance = await this.getSingleProviderBalance(providerName);
398
+ result[providerName] = balance;
399
+ this.hooks.onProviderBalance?.({ provider: providerName, balance, timestamp: Date.now() });
400
+ }
401
+ return result;
402
+ }
137
403
  /**
138
404
  * Build provider chain based on request and configuration
139
405
  */
@@ -166,12 +432,16 @@ export class LLMProviderFactory {
166
432
  * Get prioritized list of providers based on cost optimization and capabilities
167
433
  */
168
434
  getPrioritizedProviders(request) {
435
+ const visionOnly = (request.images?.length ?? 0) > 0;
169
436
  if (!this.config.costOptimization) {
170
- // Default priority: Cloudflare (cheapest) -> Anthropic -> OpenAI
171
- return ['cloudflare', 'anthropic', 'openai'];
437
+ // Default priority: all configured providers, cheapest first
438
+ return ['cloudflare', 'cerebras', 'groq', 'anthropic', 'openai']
439
+ .filter(p => this.providers.has(p))
440
+ .filter(p => !visionOnly || this.providerSupportsVision(p));
172
441
  }
173
442
  // Cost-optimized routing
174
- const providers = Array.from(this.providers.keys());
443
+ const providers = Array.from(this.providers.keys())
444
+ .filter(p => !visionOnly || this.providerSupportsVision(p));
175
445
  const sortedProviders = [...providers].sort((a, b) => {
176
446
  const providerA = this.providers.get(a);
177
447
  const providerB = this.providers.get(b);
@@ -203,8 +473,8 @@ export class LLMProviderFactory {
203
473
  if (model.startsWith('@cf/')) {
204
474
  return 'cloudflare';
205
475
  }
206
- // Groq models
207
- if (model.includes('-versatile') || model.includes('-instant')) {
476
+ // Groq models (openai/gpt-oss-120b is Groq-hosted, not @cf/ prefixed)
477
+ if (model.includes('-versatile') || model.includes('-instant') || model === 'openai/gpt-oss-120b') {
208
478
  return 'groq';
209
479
  }
210
480
  // Cerebras models
@@ -218,29 +488,43 @@ export class LLMProviderFactory {
218
488
  * Check if we should fallback to another provider
219
489
  */
220
490
  shouldFallback(error) {
491
+ return this.getFallbackDecision(error).shouldFallback;
492
+ }
493
+ /**
494
+ * Get fallback routing decision for an error.
495
+ */
496
+ getFallbackDecision(error) {
221
497
  // Don't fallback for authentication errors
222
498
  if (error instanceof AuthenticationError) {
223
- return false;
499
+ return { shouldFallback: false };
224
500
  }
225
501
  // Don't fallback for configuration errors
226
502
  if (error instanceof ConfigurationError) {
227
- return false;
503
+ return { shouldFallback: false };
504
+ }
505
+ // Custom fallback rules can provide explicit provider/model routing.
506
+ for (const rule of this.fallbackRules) {
507
+ if (this.evaluateFallbackRule(rule, error)) {
508
+ return {
509
+ shouldFallback: true,
510
+ fallbackProvider: rule.fallbackProvider,
511
+ fallbackModel: rule.fallbackModel
512
+ };
513
+ }
228
514
  }
229
515
  // Fallback for circuit breaker, rate limits, and server errors
230
516
  if (error instanceof CircuitBreakerOpenError ||
231
- error instanceof RateLimitError ||
232
- error.code === 'SERVER_ERROR' ||
233
- error.code === 'NETWORK_ERROR' ||
234
- error.code === 'TIMEOUT') {
235
- return true;
517
+ error instanceof RateLimitError) {
518
+ return { shouldFallback: true };
236
519
  }
237
- // Check custom fallback rules
238
- for (const rule of this.fallbackRules) {
239
- if (this.evaluateFallbackRule(rule, error)) {
240
- return true;
520
+ if (error instanceof LLMProviderError) {
521
+ if (error.code === 'SERVER_ERROR' ||
522
+ error.code === 'NETWORK_ERROR' ||
523
+ error.code === 'TIMEOUT') {
524
+ return { shouldFallback: true };
241
525
  }
242
526
  }
243
- return false;
527
+ return { shouldFallback: false };
244
528
  }
245
529
  /**
246
530
  * Evaluate a fallback rule against an error
@@ -364,7 +648,19 @@ export class LLMProviderFactory {
364
648
  return recommendations;
365
649
  }
366
650
  /**
367
- * Reset all provider metrics and circuit breakers
651
+ * Get latency histogram data for all providers
652
+ */
653
+ getLatencyHistogram() {
654
+ return defaultLatencyHistogram.allSummaries();
655
+ }
656
+ /**
657
+ * Get currently exhausted providers
658
+ */
659
+ getExhaustedProviders() {
660
+ return defaultExhaustionRegistry.getExhaustedProviders();
661
+ }
662
+ /**
663
+ * Reset all provider metrics, circuit breakers, exhaustion, and histograms
368
664
  */
369
665
  reset() {
370
666
  for (const [name, provider] of this.providers) {
@@ -373,24 +669,266 @@ export class LLMProviderFactory {
373
669
  defaultCircuitBreakerManager.reset(name);
374
670
  }
375
671
  }
376
- if (this.config.costOptimization) {
672
+ if (this.config.costOptimization || this.config.ledger) {
377
673
  this.costTracker.reset();
378
674
  }
675
+ defaultExhaustionRegistry.reset();
676
+ defaultLatencyHistogram.reset();
379
677
  }
380
678
  /**
381
679
  * Update factory configuration
382
680
  */
383
681
  updateConfig(config) {
384
682
  this.config = { ...this.config, ...config };
683
+ if ('ledger' in config) {
684
+ this.costTracker = config.ledger
685
+ ? new CostTracker({}, config.ledger, this.logger)
686
+ : defaultCostTracker;
687
+ }
385
688
  if (config.fallbackRules) {
386
689
  this.fallbackRules = config.fallbackRules;
387
690
  }
388
691
  // Re-initialize providers if configs changed
389
- if (config.openai || config.anthropic || config.cloudflare || config.cerebras || config.groq) {
692
+ if (config.openai ||
693
+ config.anthropic ||
694
+ config.cloudflare ||
695
+ config.cerebras ||
696
+ config.groq ||
697
+ config.enableRetries !== undefined) {
390
698
  this.providers.clear();
391
699
  this.initializeProviders();
392
700
  }
393
701
  }
702
+ async openStreamWithFirstChunk(provider, request) {
703
+ if (!provider.streamResponse) {
704
+ throw new ConfigurationError(provider.name, 'Provider does not support streaming');
705
+ }
706
+ const stream = await provider.streamResponse(request);
707
+ const reader = stream.getReader();
708
+ const first = await reader.read();
709
+ return {
710
+ reader,
711
+ firstChunk: first.value,
712
+ done: first.done
713
+ };
714
+ }
715
+ buildFactoryStream(reader, firstChunk, firstDone, providerName, model, request, startTime, estimatedCost) {
716
+ return new ReadableStream({
717
+ start: async (controller) => {
718
+ try {
719
+ if (!firstDone && firstChunk !== undefined) {
720
+ controller.enqueue(firstChunk);
721
+ }
722
+ if (!firstDone) {
723
+ while (true) {
724
+ const { done, value } = await reader.read();
725
+ if (done)
726
+ break;
727
+ if (value !== undefined)
728
+ controller.enqueue(value);
729
+ }
730
+ }
731
+ const usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0, cost: estimatedCost };
732
+ this.hooks.onRequestEnd?.({
733
+ provider: providerName,
734
+ model,
735
+ requestId: request.requestId,
736
+ tenantId: request.tenantId,
737
+ durationMs: Date.now() - startTime,
738
+ usage,
739
+ finishReason: 'stop',
740
+ timestamp: Date.now(),
741
+ });
742
+ this.recordQuotaInput({
743
+ tenantId: request.tenantId,
744
+ provider: providerName,
745
+ model,
746
+ actualCost: estimatedCost,
747
+ metadata: request.metadata
748
+ });
749
+ controller.close();
750
+ }
751
+ catch (error) {
752
+ controller.error(error);
753
+ }
754
+ finally {
755
+ reader.releaseLock();
756
+ }
757
+ }
758
+ });
759
+ }
760
+ async checkQuota(providerName, provider, request, model) {
761
+ const estimatedCost = provider.estimateCost(request);
762
+ if (!this.config.quotaHook) {
763
+ return estimatedCost;
764
+ }
765
+ const input = {
766
+ tenantId: request.tenantId,
767
+ provider: providerName,
768
+ model,
769
+ estimatedCost,
770
+ metadata: request.metadata
771
+ };
772
+ try {
773
+ const result = await this.config.quotaHook.check(input);
774
+ this.hooks.onQuotaCheck?.({ input, result, timestamp: Date.now() });
775
+ if (!result.allowed) {
776
+ this.hooks.onQuotaDenied?.({ input, reason: result.reason, timestamp: Date.now() });
777
+ throw new QuotaExceededError(providerName, result.reason || 'Quota hook denied request');
778
+ }
779
+ }
780
+ catch (error) {
781
+ if (error instanceof QuotaExceededError) {
782
+ throw error;
783
+ }
784
+ if ((this.config.quotaFailPolicy ?? 'closed') === 'open') {
785
+ this.logger.warn(`[LLMProviderFactory] Quota check failed open for ${providerName}:`, error.message);
786
+ return estimatedCost;
787
+ }
788
+ const reason = error.message;
789
+ this.hooks.onQuotaDenied?.({ input, reason, timestamp: Date.now() });
790
+ throw new QuotaExceededError(providerName, reason);
791
+ }
792
+ return estimatedCost;
793
+ }
794
+ recordQuota(providerName, response, request) {
795
+ this.recordQuotaInput({
796
+ tenantId: request.tenantId,
797
+ provider: providerName,
798
+ model: response.model,
799
+ actualCost: response.usage.cost,
800
+ inputTokens: response.usage.inputTokens,
801
+ outputTokens: response.usage.outputTokens,
802
+ metadata: request.metadata
803
+ });
804
+ }
805
+ recordQuotaInput(input) {
806
+ if (!this.config.quotaHook)
807
+ return;
808
+ void this.config.quotaHook.record(input).catch(error => {
809
+ this.logger.warn(`[LLMProviderFactory] Quota record failed for ${input.provider}:`, error.message);
810
+ });
811
+ }
812
+ parseJsonResponse(message) {
813
+ try {
814
+ return JSON.parse(message);
815
+ }
816
+ catch {
817
+ // Strip markdown fences (```json ... ``` or ``` ... ```) before
818
+ // falling back to brace extraction so fenced JSON parses cleanly.
819
+ const fenced = message.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, '');
820
+ try {
821
+ return JSON.parse(fenced);
822
+ }
823
+ catch {
824
+ // Last resort: extract outermost braces.
825
+ const start = fenced.indexOf('{');
826
+ const end = fenced.lastIndexOf('}');
827
+ if (start >= 0 && end > start) {
828
+ return JSON.parse(fenced.slice(start, end + 1));
829
+ }
830
+ }
831
+ throw new ConfigurationError('factory', 'Classification response was not valid JSON');
832
+ }
833
+ }
834
+ getDefaultVisionModel() {
835
+ if (this.config.defaultVisionModel)
836
+ return this.config.defaultVisionModel;
837
+ if (this.providers.has('anthropic'))
838
+ return 'claude-haiku-4-5-20251001';
839
+ if (this.providers.has('openai'))
840
+ return 'gpt-4o-mini';
841
+ return undefined;
842
+ }
843
+ providerSupportsVision(providerName) {
844
+ return this.providers.get(providerName)?.supportsVision === true;
845
+ }
846
+ async getSingleProviderBalance(providerName) {
847
+ const ledgerBalance = this.getLedgerBalance(providerName);
848
+ if (ledgerBalance) {
849
+ return ledgerBalance;
850
+ }
851
+ const provider = this.providers.get(providerName);
852
+ if (!provider) {
853
+ return {
854
+ provider: providerName,
855
+ status: 'error',
856
+ source: 'not_supported',
857
+ message: `Provider '${providerName}' is not configured`
858
+ };
859
+ }
860
+ if (provider.getProviderBalance) {
861
+ return provider.getProviderBalance();
862
+ }
863
+ return {
864
+ provider: providerName,
865
+ status: 'unavailable',
866
+ source: 'not_supported',
867
+ message: `Provider '${providerName}' does not expose balance reporting`
868
+ };
869
+ }
870
+ getLedgerBalance(providerName) {
871
+ const acc = this.config.ledger?.getProviderAccumulator(providerName);
872
+ if (!acc)
873
+ return undefined;
874
+ const rateLimits = {};
875
+ for (const [dimension, window] of Object.entries(acc.rateLimits)) {
876
+ rateLimits[dimension] = {
877
+ limit: window.limit,
878
+ used: window.used,
879
+ remaining: Math.max(window.limit - window.used, 0)
880
+ };
881
+ }
882
+ return {
883
+ provider: providerName,
884
+ status: 'available',
885
+ source: 'ledger',
886
+ currentSpend: acc.spend,
887
+ monthlyBudget: acc.budget ?? undefined,
888
+ remainingBudget: acc.budget === null ? undefined : acc.budget - acc.spend,
889
+ usedTokens: acc.inputTokens + acc.outputTokens,
890
+ requestCount: acc.requestCount,
891
+ rateLimits
892
+ };
893
+ }
894
+ isLedgerLimited(providerName) {
895
+ if (!this.config.ledger)
896
+ return false;
897
+ for (const dimension of ['rpm', 'rpd', 'tpm', 'tpd']) {
898
+ const check = this.config.ledger.checkRateLimit(providerName, dimension);
899
+ if (!check.allowed) {
900
+ this.logger.warn(`[LLMProviderFactory] Rate limit (${dimension}) exceeded for ${providerName} (${check.used}/${check.limit}), skipping`);
901
+ return true;
902
+ }
903
+ }
904
+ return false;
905
+ }
906
+ requestForProvider(request, providerName, providerModels) {
907
+ const model = providerModels.get(providerName);
908
+ if (!model) {
909
+ return request;
910
+ }
911
+ return { ...request, model };
912
+ }
913
+ applyFallbackDecision(decision, failedProvider, providerChain, currentIndex, providerModels) {
914
+ const targetProvider = decision.fallbackProvider;
915
+ if (!targetProvider || targetProvider === failedProvider || !this.providers.has(targetProvider)) {
916
+ return;
917
+ }
918
+ if (decision.fallbackModel) {
919
+ providerModels.set(targetProvider, decision.fallbackModel);
920
+ }
921
+ const nextIndex = currentIndex + 1;
922
+ const firstIndex = providerChain.indexOf(targetProvider);
923
+ if (firstIndex >= 0 && firstIndex <= currentIndex) {
924
+ return;
925
+ }
926
+ const existingIndex = providerChain.indexOf(targetProvider, nextIndex);
927
+ if (existingIndex >= 0) {
928
+ providerChain.splice(existingIndex, 1);
929
+ }
930
+ providerChain.splice(nextIndex, 0, targetProvider);
931
+ }
394
932
  }
395
933
  /**
396
934
  * Create a provider factory with common configurations