@stackbilt/llm-providers 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +112 -85
  2. package/dist/errors.d.ts +32 -1
  3. package/dist/errors.d.ts.map +1 -1
  4. package/dist/errors.js +29 -2
  5. package/dist/errors.js.map +1 -1
  6. package/dist/factory.d.ts +62 -4
  7. package/dist/factory.d.ts.map +1 -1
  8. package/dist/factory.js +651 -92
  9. package/dist/factory.js.map +1 -1
  10. package/dist/index.d.ts +45 -12
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +72 -14
  13. package/dist/index.js.map +1 -1
  14. package/dist/providers/anthropic.d.ts +5 -2
  15. package/dist/providers/anthropic.d.ts.map +1 -1
  16. package/dist/providers/anthropic.js +157 -43
  17. package/dist/providers/anthropic.js.map +1 -1
  18. package/dist/providers/base.d.ts +18 -2
  19. package/dist/providers/base.d.ts.map +1 -1
  20. package/dist/providers/base.js +107 -5
  21. package/dist/providers/base.js.map +1 -1
  22. package/dist/providers/cerebras.d.ts.map +1 -1
  23. package/dist/providers/cerebras.js +21 -13
  24. package/dist/providers/cerebras.js.map +1 -1
  25. package/dist/providers/cloudflare.d.ts +3 -0
  26. package/dist/providers/cloudflare.d.ts.map +1 -1
  27. package/dist/providers/cloudflare.js +86 -9
  28. package/dist/providers/cloudflare.js.map +1 -1
  29. package/dist/providers/groq.d.ts +2 -1
  30. package/dist/providers/groq.d.ts.map +1 -1
  31. package/dist/providers/groq.js +95 -15
  32. package/dist/providers/groq.js.map +1 -1
  33. package/dist/providers/openai.d.ts +2 -0
  34. package/dist/providers/openai.d.ts.map +1 -1
  35. package/dist/providers/openai.js +56 -24
  36. package/dist/providers/openai.js.map +1 -1
  37. package/dist/types.d.ts +114 -4
  38. package/dist/types.d.ts.map +1 -1
  39. package/dist/utils/circuit-breaker.d.ts +5 -2
  40. package/dist/utils/circuit-breaker.d.ts.map +1 -1
  41. package/dist/utils/circuit-breaker.js +18 -13
  42. package/dist/utils/circuit-breaker.js.map +1 -1
  43. package/dist/utils/cost-tracker.d.ts +9 -2
  44. package/dist/utils/cost-tracker.d.ts.map +1 -1
  45. package/dist/utils/cost-tracker.js +20 -9
  46. package/dist/utils/cost-tracker.js.map +1 -1
  47. package/dist/utils/credit-ledger.d.ts +3 -0
  48. package/dist/utils/credit-ledger.d.ts.map +1 -1
  49. package/dist/utils/credit-ledger.js +5 -2
  50. package/dist/utils/credit-ledger.js.map +1 -1
  51. package/dist/utils/exhaustion.d.ts +38 -0
  52. package/dist/utils/exhaustion.d.ts.map +1 -0
  53. package/dist/utils/exhaustion.js +74 -0
  54. package/dist/utils/exhaustion.js.map +1 -0
  55. package/dist/utils/hooks.d.ts +123 -0
  56. package/dist/utils/hooks.d.ts.map +1 -0
  57. package/dist/utils/hooks.js +44 -0
  58. package/dist/utils/hooks.js.map +1 -0
  59. package/dist/utils/latency-histogram.d.ts +38 -0
  60. package/dist/utils/latency-histogram.d.ts.map +1 -0
  61. package/dist/utils/latency-histogram.js +81 -0
  62. package/dist/utils/latency-histogram.js.map +1 -0
  63. package/dist/utils/logger.d.ts +18 -0
  64. package/dist/utils/logger.d.ts.map +1 -0
  65. package/dist/utils/logger.js +22 -0
  66. package/dist/utils/logger.js.map +1 -0
  67. package/dist/utils/retry.d.ts +4 -2
  68. package/dist/utils/retry.d.ts.map +1 -1
  69. package/dist/utils/retry.js +12 -8
  70. package/dist/utils/retry.js.map +1 -1
  71. package/dist/utils/schema-validator.d.ts +67 -0
  72. package/dist/utils/schema-validator.d.ts.map +1 -0
  73. package/dist/utils/schema-validator.js +140 -0
  74. package/dist/utils/schema-validator.js.map +1 -0
  75. package/package.json +1 -1
package/dist/factory.js CHANGED
@@ -2,22 +2,32 @@
2
2
  * LLM Provider Factory
3
3
  * Creates and manages LLM provider instances with intelligent fallback logic
4
4
  */
5
+ import { noopLogger } from './utils/logger';
6
+ import { noopHooks } from './utils/hooks';
5
7
  import { OpenAIProvider } from './providers/openai';
6
8
  import { AnthropicProvider } from './providers/anthropic';
7
9
  import { CloudflareProvider } from './providers/cloudflare';
8
10
  import { CerebrasProvider } from './providers/cerebras';
9
11
  import { GroqProvider } from './providers/groq';
10
- import { defaultCostTracker } from './utils/cost-tracker';
12
+ import { CostTracker, defaultCostTracker } from './utils/cost-tracker';
11
13
  import { defaultCircuitBreakerManager } from './utils/circuit-breaker';
12
- import { LLMProviderError, ConfigurationError, CircuitBreakerOpenError, AuthenticationError, RateLimitError } from './errors';
14
+ import { defaultExhaustionRegistry } from './utils/exhaustion';
15
+ import { defaultLatencyHistogram } from './utils/latency-histogram';
16
+ import { LLMProviderError, ConfigurationError, CircuitBreakerOpenError, AuthenticationError, RateLimitError, QuotaExceededError, SchemaDriftError, ToolLoopAbortedError, ToolLoopLimitError, } from './errors';
13
17
  export class LLMProviderFactory {
14
18
  providers = new Map();
15
19
  config;
16
20
  costTracker;
17
21
  fallbackRules;
22
+ logger;
23
+ hooks;
18
24
  constructor(config) {
19
25
  this.config = config;
20
- this.costTracker = defaultCostTracker;
26
+ this.logger = config.logger ?? noopLogger;
27
+ this.hooks = config.hooks ?? noopHooks;
28
+ this.costTracker = config.ledger
29
+ ? new CostTracker({}, config.ledger, this.logger)
30
+ : defaultCostTracker;
21
31
  this.fallbackRules = config.fallbackRules || this.getDefaultFallbackRules();
22
32
  this.initializeProviders();
23
33
  }
@@ -25,69 +35,34 @@ export class LLMProviderFactory {
25
35
  * Initialize all configured providers
26
36
  */
27
37
  initializeProviders() {
28
- // Initialize OpenAI provider
29
- if (this.config.openai) {
30
- try {
31
- const provider = new OpenAIProvider(this.config.openai);
32
- if (provider.validateConfig()) {
33
- this.providers.set('openai', provider);
34
- console.log('[LLMProviderFactory] OpenAI provider initialized');
35
- }
36
- }
37
- catch (error) {
38
- console.warn('[LLMProviderFactory] Failed to initialize OpenAI provider:', error);
39
- }
40
- }
41
- // Initialize Anthropic provider
42
- if (this.config.anthropic) {
43
- try {
44
- const provider = new AnthropicProvider(this.config.anthropic);
45
- if (provider.validateConfig()) {
46
- this.providers.set('anthropic', provider);
47
- console.log('[LLMProviderFactory] Anthropic provider initialized');
48
- }
49
- }
50
- catch (error) {
51
- console.warn('[LLMProviderFactory] Failed to initialize Anthropic provider:', error);
52
- }
53
- }
54
- // Initialize Cloudflare provider
55
- if (this.config.cloudflare) {
56
- try {
57
- const provider = new CloudflareProvider(this.config.cloudflare);
58
- if (provider.validateConfig()) {
59
- this.providers.set('cloudflare', provider);
60
- console.log('[LLMProviderFactory] Cloudflare provider initialized');
61
- }
62
- }
63
- catch (error) {
64
- console.warn('[LLMProviderFactory] Failed to initialize Cloudflare provider:', error);
65
- }
66
- }
67
- // Initialize Cerebras provider
68
- if (this.config.cerebras) {
69
- try {
70
- const provider = new CerebrasProvider(this.config.cerebras);
71
- if (provider.validateConfig()) {
72
- this.providers.set('cerebras', provider);
73
- console.log('[LLMProviderFactory] Cerebras provider initialized');
74
- }
75
- }
76
- catch (error) {
77
- console.warn('[LLMProviderFactory] Failed to initialize Cerebras provider:', error);
78
- }
79
- }
80
- // Initialize Groq provider
81
- if (this.config.groq) {
38
+ const providerEntries = [
39
+ ['openai', OpenAIProvider],
40
+ ['anthropic', AnthropicProvider],
41
+ ['cloudflare', CloudflareProvider],
42
+ ['cerebras', CerebrasProvider],
43
+ ['groq', GroqProvider],
44
+ ];
45
+ for (const [name, ProviderClass] of providerEntries) {
46
+ const providerConfig = this.config[name];
47
+ if (!providerConfig)
48
+ continue;
82
49
  try {
83
- const provider = new GroqProvider(this.config.groq);
50
+ const retryConfig = this.config.enableRetries === false && providerConfig.maxRetries === undefined
51
+ ? { maxRetries: 0 }
52
+ : {};
53
+ const provider = new ProviderClass({
54
+ ...providerConfig,
55
+ ...retryConfig,
56
+ logger: this.logger,
57
+ hooks: this.hooks,
58
+ });
84
59
  if (provider.validateConfig()) {
85
- this.providers.set('groq', provider);
86
- console.log('[LLMProviderFactory] Groq provider initialized');
60
+ this.providers.set(name, provider);
61
+ this.logger.info(`[LLMProviderFactory] ${name} provider initialized`);
87
62
  }
88
63
  }
89
64
  catch (error) {
90
- console.warn('[LLMProviderFactory] Failed to initialize Groq provider:', error);
65
+ this.logger.warn(`[LLMProviderFactory] Failed to initialize ${name} provider:`, error.message);
91
66
  }
92
67
  }
93
68
  if (this.providers.size === 0) {
@@ -99,41 +74,345 @@ export class LLMProviderFactory {
99
74
  */
100
75
  async generateResponse(request) {
101
76
  const providerChain = this.buildProviderChain(request);
77
+ const providerModels = new Map();
102
78
  let lastError = null;
103
- for (const providerName of providerChain) {
79
+ let previousProvider = null;
80
+ for (let index = 0; index < providerChain.length; index++) {
81
+ const providerName = providerChain[index];
104
82
  try {
105
83
  const provider = this.providers.get(providerName);
106
84
  if (!provider)
107
85
  continue;
86
+ // Check exhaustion registry
87
+ if (defaultExhaustionRegistry.isExhausted(providerName)) {
88
+ this.logger.warn(`[LLMProviderFactory] Provider ${providerName} is quota-exhausted, skipping`);
89
+ continue;
90
+ }
108
91
  // Check circuit breaker
109
92
  if (this.config.enableCircuitBreaker) {
110
93
  const breaker = defaultCircuitBreakerManager.getBreaker(providerName);
111
94
  if (breaker.isOpen()) {
112
- console.warn(`[LLMProviderFactory] Circuit breaker open for ${providerName}, skipping`);
95
+ this.logger.warn(`[LLMProviderFactory] Circuit breaker open for ${providerName}, skipping`);
113
96
  continue;
114
97
  }
115
98
  }
116
- console.log(`[LLMProviderFactory] Trying provider: ${providerName}`);
117
- const response = await provider.generateResponse(request);
118
- // Track cost if enabled
119
- if (this.config.costOptimization) {
99
+ if (this.config.ledger && this.isLedgerLimited(providerName)) {
100
+ continue;
101
+ }
102
+ // Emit fallback event if this isn't the first provider attempted
103
+ if (previousProvider && lastError) {
104
+ this.hooks.onFallback?.({
105
+ fromProvider: previousProvider,
106
+ toProvider: providerName,
107
+ requestId: request.requestId,
108
+ reason: lastError.message,
109
+ errorCode: lastError.code,
110
+ timestamp: Date.now(),
111
+ });
112
+ }
113
+ this.logger.debug(`[LLMProviderFactory] Trying provider: ${providerName}`);
114
+ const providerRequest = this.requestForProvider(request, providerName, providerModels);
115
+ const model = providerRequest.model || provider.models[0] || 'unknown';
116
+ await this.checkQuota(providerName, provider, providerRequest, model);
117
+ this.hooks.onRequestStart?.({
118
+ provider: providerName,
119
+ model,
120
+ requestId: request.requestId,
121
+ tenantId: request.tenantId,
122
+ timestamp: Date.now(),
123
+ });
124
+ const startTime = Date.now();
125
+ const response = await provider.generateResponse(providerRequest);
126
+ const durationMs = Date.now() - startTime;
127
+ this.hooks.onRequestEnd?.({
128
+ provider: providerName,
129
+ model: response.model,
130
+ requestId: request.requestId,
131
+ tenantId: request.tenantId,
132
+ durationMs,
133
+ usage: response.usage,
134
+ finishReason: response.finishReason,
135
+ timestamp: Date.now(),
136
+ });
137
+ // Track spend whenever analytics or ledger accounting is configured.
138
+ if (this.config.costOptimization || this.config.ledger) {
120
139
  this.costTracker.trackCost(providerName, response);
121
140
  }
122
- console.log(`[LLMProviderFactory] Successfully used provider: ${providerName}`);
141
+ this.recordQuota(providerName, response, providerRequest);
142
+ this.logger.debug(`[LLMProviderFactory] Successfully used provider: ${providerName}`);
123
143
  return response;
124
144
  }
125
145
  catch (error) {
126
- lastError = error;
127
- console.warn(`[LLMProviderFactory] Provider ${providerName} failed:`, error);
128
- // Check if we should continue trying other providers
129
- if (!this.shouldFallback(error)) {
146
+ const err = error;
147
+ lastError = err;
148
+ previousProvider = providerName;
149
+ this.logger.warn(`[LLMProviderFactory] Provider ${providerName} failed:`, err.message);
150
+ this.hooks.onRequestError?.({
151
+ provider: providerName,
152
+ model: request.model || 'unknown',
153
+ requestId: request.requestId,
154
+ tenantId: request.tenantId,
155
+ error: err,
156
+ errorCode: err.code,
157
+ attempt: 1,
158
+ willRetry: this.shouldFallback(err),
159
+ timestamp: Date.now(),
160
+ });
161
+ // Auto-mark quota-exhausted providers
162
+ if (err instanceof QuotaExceededError) {
163
+ defaultExhaustionRegistry.markExhausted(providerName);
164
+ this.hooks.onQuotaExhausted?.({
165
+ provider: providerName,
166
+ resetAfterMs: defaultExhaustionRegistry.defaultResetMs,
167
+ timestamp: Date.now(),
168
+ });
169
+ }
170
+ // Schema drift — the upstream API silently changed shape. Surface
171
+ // structured telemetry so oncall sees the drift before it cascades.
172
+ if (err instanceof SchemaDriftError) {
173
+ this.hooks.onSchemaDrift?.({
174
+ provider: providerName,
175
+ model: request.model,
176
+ requestId: request.requestId,
177
+ path: err.path,
178
+ expected: err.expected,
179
+ actual: err.actual,
180
+ timestamp: Date.now(),
181
+ });
182
+ }
183
+ const fallbackDecision = this.getFallbackDecision(error);
184
+ if (!fallbackDecision.shouldFallback) {
130
185
  throw error;
131
186
  }
187
+ this.applyFallbackDecision(fallbackDecision, providerName, providerChain, index, providerModels);
132
188
  }
133
189
  }
134
190
  // All providers failed
135
191
  throw lastError || new LLMProviderError('All providers failed', 'ALL_PROVIDERS_FAILED', 'factory', false);
136
192
  }
193
+ async generateResponseStream(request) {
194
+ const providerChain = this.buildProviderChain({ ...request, stream: true });
195
+ const providerModels = new Map();
196
+ let lastError = null;
197
+ let previousProvider = null;
198
+ for (let index = 0; index < providerChain.length; index++) {
199
+ const providerName = providerChain[index];
200
+ try {
201
+ const provider = this.providers.get(providerName);
202
+ if (!provider || !provider.supportsStreaming || !provider.streamResponse)
203
+ continue;
204
+ if (defaultExhaustionRegistry.isExhausted(providerName))
205
+ continue;
206
+ if (this.config.enableCircuitBreaker && defaultCircuitBreakerManager.getBreaker(providerName).isOpen())
207
+ continue;
208
+ if (this.config.ledger && this.isLedgerLimited(providerName))
209
+ continue;
210
+ if (previousProvider && lastError) {
211
+ this.hooks.onFallback?.({
212
+ fromProvider: previousProvider,
213
+ toProvider: providerName,
214
+ requestId: request.requestId,
215
+ reason: lastError.message,
216
+ errorCode: lastError.code,
217
+ timestamp: Date.now(),
218
+ });
219
+ }
220
+ const providerRequest = {
221
+ ...this.requestForProvider(request, providerName, providerModels),
222
+ stream: true
223
+ };
224
+ const model = providerRequest.model || provider.models[0] || 'unknown';
225
+ const estimatedCost = await this.checkQuota(providerName, provider, providerRequest, model);
226
+ this.hooks.onRequestStart?.({
227
+ provider: providerName,
228
+ model,
229
+ requestId: request.requestId,
230
+ tenantId: request.tenantId,
231
+ timestamp: Date.now(),
232
+ });
233
+ const startTime = Date.now();
234
+ const opened = await this.openStreamWithFirstChunk(provider, providerRequest);
235
+ return this.buildFactoryStream(opened.reader, opened.firstChunk, opened.done, providerName, model, providerRequest, startTime, estimatedCost);
236
+ }
237
+ catch (error) {
238
+ const err = error;
239
+ lastError = err;
240
+ previousProvider = providerName;
241
+ this.hooks.onRequestError?.({
242
+ provider: providerName,
243
+ model: request.model || 'unknown',
244
+ requestId: request.requestId,
245
+ tenantId: request.tenantId,
246
+ error: err,
247
+ errorCode: err.code,
248
+ attempt: 1,
249
+ willRetry: this.shouldFallback(err),
250
+ timestamp: Date.now(),
251
+ });
252
+ const fallbackDecision = this.getFallbackDecision(err);
253
+ if (!fallbackDecision.shouldFallback) {
254
+ throw error;
255
+ }
256
+ this.applyFallbackDecision(fallbackDecision, providerName, providerChain, index, providerModels);
257
+ }
258
+ }
259
+ throw lastError || new LLMProviderError('All streaming providers failed', 'ALL_PROVIDERS_FAILED', 'factory', false);
260
+ }
261
+ async generateResponseWithTools(request, toolExecutor, opts = {}) {
262
+ const maxIterations = opts.maxIterations ?? 10;
263
+ let cumulativeCost = 0;
264
+ let messages = [...request.messages];
265
+ let lastResponseCost = 0;
266
+ for (let iteration = 0; iteration <= maxIterations; iteration++) {
267
+ if (opts.abortSignal?.aborted) {
268
+ throw new ToolLoopAbortedError('factory');
269
+ }
270
+ // Pre-flight cost guard: use the previous iteration's cost as an
271
+ // estimate for the next one. This prevents obvious overshoots where
272
+ // a single expensive response would blow past the cap. The cap is
273
+ // still soft (±1 iteration tolerance) because the actual cost is
274
+ // only known after the response.
275
+ if (opts.maxCostUSD !== undefined && iteration > 0) {
276
+ const projectedCost = cumulativeCost + lastResponseCost;
277
+ if (projectedCost > opts.maxCostUSD) {
278
+ throw new ToolLoopLimitError('factory', `Tool loop would exceed max cost ${opts.maxCostUSD} (projected ${projectedCost.toFixed(4)})`);
279
+ }
280
+ }
281
+ const response = await this.generateResponse({ ...request, messages });
282
+ lastResponseCost = response.usage.cost;
283
+ cumulativeCost += lastResponseCost;
284
+ if (opts.maxCostUSD !== undefined && cumulativeCost > opts.maxCostUSD) {
285
+ throw new ToolLoopLimitError('factory', `Tool loop exceeded max cost ${opts.maxCostUSD}`);
286
+ }
287
+ if (!response.toolCalls || response.toolCalls.length === 0) {
288
+ return {
289
+ ...response,
290
+ metadata: {
291
+ ...response.metadata,
292
+ cumulativeCost,
293
+ toolIterations: iteration
294
+ }
295
+ };
296
+ }
297
+ if (iteration >= maxIterations) {
298
+ throw new ToolLoopLimitError('factory', `Tool loop exceeded ${maxIterations} iterations`);
299
+ }
300
+ const toolResults = [];
301
+ for (const toolCall of response.toolCalls) {
302
+ if (opts.abortSignal?.aborted) {
303
+ throw new ToolLoopAbortedError('factory');
304
+ }
305
+ let parsedArguments;
306
+ try {
307
+ parsedArguments = JSON.parse(toolCall.function.arguments);
308
+ }
309
+ catch {
310
+ parsedArguments = toolCall.function.arguments;
311
+ }
312
+ try {
313
+ const output = await toolExecutor.execute(toolCall.function.name, parsedArguments);
314
+ toolResults.push({
315
+ id: toolCall.id,
316
+ output: typeof output === 'string' ? output : JSON.stringify(output)
317
+ });
318
+ }
319
+ catch (error) {
320
+ toolResults.push({
321
+ id: toolCall.id,
322
+ output: '',
323
+ error: error.message
324
+ });
325
+ }
326
+ }
327
+ messages = [
328
+ ...messages,
329
+ {
330
+ role: 'assistant',
331
+ content: response.message,
332
+ toolCalls: response.toolCalls
333
+ },
334
+ {
335
+ role: 'user',
336
+ content: '',
337
+ toolResults
338
+ }
339
+ ];
340
+ const state = {
341
+ iteration: iteration + 1,
342
+ cumulativeCost,
343
+ messageCount: messages.length,
344
+ lastToolCalls: response.toolCalls
345
+ };
346
+ await opts.onIteration?.(iteration + 1, state);
347
+ }
348
+ throw new ToolLoopLimitError('factory', `Tool loop exceeded ${maxIterations} iterations`);
349
+ }
350
+ async classify(input, options = {}) {
351
+ const parser = options.schema && typeof options.schema.parse === 'function'
352
+ ? options.schema.parse
353
+ : undefined;
354
+ const schemaDescription = options.schema && !parser
355
+ ? `\nJSON schema:\n${JSON.stringify(options.schema)}`
356
+ : '';
357
+ const systemPrompt = options.systemPrompt ||
358
+ `Classify the input and return only valid JSON.${schemaDescription}`;
359
+ const request = typeof input === 'string'
360
+ ? {
361
+ messages: [{ role: 'user', content: input }],
362
+ model: options.model,
363
+ temperature: options.temperature ?? 0,
364
+ maxTokens: options.maxTokens,
365
+ response_format: { type: 'json_object' },
366
+ systemPrompt,
367
+ seed: options.seed
368
+ }
369
+ : {
370
+ ...input,
371
+ model: options.model ?? input.model,
372
+ temperature: options.temperature ?? input.temperature ?? 0,
373
+ maxTokens: options.maxTokens ?? input.maxTokens,
374
+ response_format: { type: 'json_object' },
375
+ systemPrompt: options.systemPrompt ?? input.systemPrompt ?? systemPrompt,
376
+ seed: options.seed ?? input.seed
377
+ };
378
+ const response = await this.generateResponse(request);
379
+ const parsed = this.parseJsonResponse(response.message);
380
+ const data = parser ? parser(parsed) : parsed;
381
+ const confidenceValue = parsed[options.confidenceField ?? 'confidence'];
382
+ return {
383
+ data,
384
+ confidence: typeof confidenceValue === 'number' ? confidenceValue : undefined,
385
+ response
386
+ };
387
+ }
388
+ async analyzeImage(input) {
389
+ return this.generateResponse({
390
+ messages: [{ role: 'user', content: input.prompt }],
391
+ images: [input.image],
392
+ model: input.model ?? this.getDefaultVisionModel(),
393
+ systemPrompt: input.systemPrompt,
394
+ temperature: input.temperature,
395
+ maxTokens: input.maxTokens,
396
+ response_format: input.response_format,
397
+ tenantId: input.tenantId,
398
+ requestId: input.requestId,
399
+ metadata: input.metadata
400
+ });
401
+ }
402
+ async getProviderBalance(provider) {
403
+ if (provider) {
404
+ const balance = await this.getSingleProviderBalance(provider);
405
+ this.hooks.onProviderBalance?.({ provider, balance, timestamp: Date.now() });
406
+ return balance;
407
+ }
408
+ const result = {};
409
+ for (const providerName of this.providers.keys()) {
410
+ const balance = await this.getSingleProviderBalance(providerName);
411
+ result[providerName] = balance;
412
+ this.hooks.onProviderBalance?.({ provider: providerName, balance, timestamp: Date.now() });
413
+ }
414
+ return result;
415
+ }
137
416
  /**
138
417
  * Build provider chain based on request and configuration
139
418
  */
@@ -166,12 +445,16 @@ export class LLMProviderFactory {
166
445
  * Get prioritized list of providers based on cost optimization and capabilities
167
446
  */
168
447
  getPrioritizedProviders(request) {
448
+ const visionOnly = (request.images?.length ?? 0) > 0;
169
449
  if (!this.config.costOptimization) {
170
- // Default priority: Cloudflare (cheapest) -> Anthropic -> OpenAI
171
- return ['cloudflare', 'anthropic', 'openai'];
450
+ // Default priority: all configured providers, cheapest first
451
+ return ['cloudflare', 'cerebras', 'groq', 'anthropic', 'openai']
452
+ .filter(p => this.providers.has(p))
453
+ .filter(p => !visionOnly || this.providerSupportsVision(p));
172
454
  }
173
455
  // Cost-optimized routing
174
- const providers = Array.from(this.providers.keys());
456
+ const providers = Array.from(this.providers.keys())
457
+ .filter(p => !visionOnly || this.providerSupportsVision(p));
175
458
  const sortedProviders = [...providers].sort((a, b) => {
176
459
  const providerA = this.providers.get(a);
177
460
  const providerB = this.providers.get(b);
@@ -203,8 +486,8 @@ export class LLMProviderFactory {
203
486
  if (model.startsWith('@cf/')) {
204
487
  return 'cloudflare';
205
488
  }
206
- // Groq models
207
- if (model.includes('-versatile') || model.includes('-instant')) {
489
+ // Groq models (openai/gpt-oss-120b is Groq-hosted, not @cf/ prefixed)
490
+ if (model.includes('-versatile') || model.includes('-instant') || model === 'openai/gpt-oss-120b') {
208
491
  return 'groq';
209
492
  }
210
493
  // Cerebras models
@@ -218,29 +501,49 @@ export class LLMProviderFactory {
218
501
  * Check if we should fallback to another provider
219
502
  */
220
503
  shouldFallback(error) {
504
+ return this.getFallbackDecision(error).shouldFallback;
505
+ }
506
+ /**
507
+ * Get fallback routing decision for an error.
508
+ */
509
+ getFallbackDecision(error) {
221
510
  // Don't fallback for authentication errors
222
511
  if (error instanceof AuthenticationError) {
223
- return false;
512
+ return { shouldFallback: false };
224
513
  }
225
514
  // Don't fallback for configuration errors
226
515
  if (error instanceof ConfigurationError) {
227
- return false;
516
+ return { shouldFallback: false };
517
+ }
518
+ // Custom fallback rules can provide explicit provider/model routing.
519
+ for (const rule of this.fallbackRules) {
520
+ if (this.evaluateFallbackRule(rule, error)) {
521
+ return {
522
+ shouldFallback: true,
523
+ fallbackProvider: rule.fallbackProvider,
524
+ fallbackModel: rule.fallbackModel
525
+ };
526
+ }
228
527
  }
229
528
  // Fallback for circuit breaker, rate limits, and server errors
230
529
  if (error instanceof CircuitBreakerOpenError ||
231
- error instanceof RateLimitError ||
232
- error.code === 'SERVER_ERROR' ||
233
- error.code === 'NETWORK_ERROR' ||
234
- error.code === 'TIMEOUT') {
235
- return true;
530
+ error instanceof RateLimitError) {
531
+ return { shouldFallback: true };
236
532
  }
237
- // Check custom fallback rules
238
- for (const rule of this.fallbackRules) {
239
- if (this.evaluateFallbackRule(rule, error)) {
240
- return true;
533
+ // Schema drift: provider's response shape changed. Retry won't help;
534
+ // only another provider can. Defense against silent API deprecations.
535
+ if (error instanceof SchemaDriftError) {
536
+ return { shouldFallback: true };
537
+ }
538
+ if (error instanceof LLMProviderError) {
539
+ if (error.code === 'SERVER_ERROR' ||
540
+ error.code === 'NETWORK_ERROR' ||
541
+ error.code === 'TIMEOUT' ||
542
+ error.code === 'SCHEMA_DRIFT') {
543
+ return { shouldFallback: true };
241
544
  }
242
545
  }
243
- return false;
546
+ return { shouldFallback: false };
244
547
  }
245
548
  /**
246
549
  * Evaluate a fallback rule against an error
@@ -364,7 +667,19 @@ export class LLMProviderFactory {
364
667
  return recommendations;
365
668
  }
366
669
  /**
367
- * Reset all provider metrics and circuit breakers
670
+ * Get latency histogram data for all providers
671
+ */
672
+ getLatencyHistogram() {
673
+ return defaultLatencyHistogram.allSummaries();
674
+ }
675
+ /**
676
+ * Get currently exhausted providers
677
+ */
678
+ getExhaustedProviders() {
679
+ return defaultExhaustionRegistry.getExhaustedProviders();
680
+ }
681
+ /**
682
+ * Reset all provider metrics, circuit breakers, exhaustion, and histograms
368
683
  */
369
684
  reset() {
370
685
  for (const [name, provider] of this.providers) {
@@ -373,24 +688,268 @@ export class LLMProviderFactory {
373
688
  defaultCircuitBreakerManager.reset(name);
374
689
  }
375
690
  }
376
- if (this.config.costOptimization) {
691
+ if (this.config.costOptimization || this.config.ledger) {
377
692
  this.costTracker.reset();
378
693
  }
694
+ defaultExhaustionRegistry.reset();
695
+ defaultLatencyHistogram.reset();
379
696
  }
380
697
  /**
381
698
  * Update factory configuration
382
699
  */
383
700
  updateConfig(config) {
384
701
  this.config = { ...this.config, ...config };
702
+ if ('ledger' in config) {
703
+ this.costTracker = config.ledger
704
+ ? new CostTracker({}, config.ledger, this.logger)
705
+ : defaultCostTracker;
706
+ }
385
707
  if (config.fallbackRules) {
386
708
  this.fallbackRules = config.fallbackRules;
387
709
  }
388
710
  // Re-initialize providers if configs changed
389
- if (config.openai || config.anthropic || config.cloudflare || config.cerebras || config.groq) {
711
+ if (config.openai ||
712
+ config.anthropic ||
713
+ config.cloudflare ||
714
+ config.cerebras ||
715
+ config.groq ||
716
+ config.enableRetries !== undefined) {
390
717
  this.providers.clear();
391
718
  this.initializeProviders();
392
719
  }
393
720
  }
721
+ async openStreamWithFirstChunk(provider, request) {
722
+ if (!provider.streamResponse) {
723
+ throw new ConfigurationError(provider.name, 'Provider does not support streaming');
724
+ }
725
+ const stream = await provider.streamResponse(request);
726
+ const reader = stream.getReader();
727
+ const first = await reader.read();
728
+ return {
729
+ reader,
730
+ firstChunk: first.value,
731
+ done: first.done
732
+ };
733
+ }
734
+ buildFactoryStream(reader, firstChunk, firstDone, providerName, model, request, startTime, estimatedCost) {
735
+ return new ReadableStream({
736
+ start: async (controller) => {
737
+ try {
738
+ if (!firstDone && firstChunk !== undefined) {
739
+ controller.enqueue(firstChunk);
740
+ }
741
+ if (!firstDone) {
742
+ while (true) {
743
+ const { done, value } = await reader.read();
744
+ if (done)
745
+ break;
746
+ if (value !== undefined)
747
+ controller.enqueue(value);
748
+ }
749
+ }
750
+ const usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0, cost: estimatedCost };
751
+ this.hooks.onRequestEnd?.({
752
+ provider: providerName,
753
+ model,
754
+ requestId: request.requestId,
755
+ tenantId: request.tenantId,
756
+ durationMs: Date.now() - startTime,
757
+ usage,
758
+ finishReason: 'stop',
759
+ timestamp: Date.now(),
760
+ });
761
+ this.recordQuotaInput({
762
+ tenantId: request.tenantId,
763
+ provider: providerName,
764
+ model,
765
+ actualCost: estimatedCost,
766
+ metadata: request.metadata
767
+ });
768
+ controller.close();
769
+ }
770
+ catch (error) {
771
+ controller.error(error);
772
+ }
773
+ finally {
774
+ reader.releaseLock();
775
+ }
776
+ }
777
+ });
778
+ }
779
+ async checkQuota(providerName, provider, request, model) {
780
+ const estimatedCost = provider.estimateCost(request);
781
+ if (!this.config.quotaHook) {
782
+ return estimatedCost;
783
+ }
784
+ const input = {
785
+ tenantId: request.tenantId,
786
+ provider: providerName,
787
+ model,
788
+ estimatedCost,
789
+ metadata: request.metadata
790
+ };
791
+ try {
792
+ const result = await this.config.quotaHook.check(input);
793
+ this.hooks.onQuotaCheck?.({ input, result, timestamp: Date.now() });
794
+ if (!result.allowed) {
795
+ this.hooks.onQuotaDenied?.({ input, reason: result.reason, timestamp: Date.now() });
796
+ throw new QuotaExceededError(providerName, result.reason || 'Quota hook denied request');
797
+ }
798
+ }
799
+ catch (error) {
800
+ if (error instanceof QuotaExceededError) {
801
+ throw error;
802
+ }
803
+ if ((this.config.quotaFailPolicy ?? 'closed') === 'open') {
804
+ this.logger.warn(`[LLMProviderFactory] Quota check failed open for ${providerName}:`, error.message);
805
+ return estimatedCost;
806
+ }
807
+ const reason = error.message;
808
+ this.hooks.onQuotaDenied?.({ input, reason, timestamp: Date.now() });
809
+ throw new QuotaExceededError(providerName, reason);
810
+ }
811
+ return estimatedCost;
812
+ }
813
+ recordQuota(providerName, response, request) {
814
+ this.recordQuotaInput({
815
+ tenantId: request.tenantId,
816
+ provider: providerName,
817
+ model: response.model,
818
+ actualCost: response.usage.cost,
819
+ inputTokens: response.usage.inputTokens,
820
+ outputTokens: response.usage.outputTokens,
821
+ metadata: request.metadata
822
+ });
823
+ }
824
+ recordQuotaInput(input) {
825
+ if (!this.config.quotaHook)
826
+ return;
827
+ void this.config.quotaHook.record(input).catch(error => {
828
+ this.logger.warn(`[LLMProviderFactory] Quota record failed for ${input.provider}:`, error.message);
829
+ });
830
+ }
831
+ parseJsonResponse(message) {
832
+ try {
833
+ return JSON.parse(message);
834
+ }
835
+ catch {
836
+ // Strip markdown fences (```json ... ``` or ``` ... ```) before
837
+ // falling back to brace extraction so fenced JSON parses cleanly.
838
+ const fenced = message.replace(/^```(?:json)?\s*\n?/m, '').replace(/\n?```\s*$/m, '');
839
+ try {
840
+ return JSON.parse(fenced);
841
+ }
842
+ catch {
843
+ // Last resort: extract outermost braces.
844
+ const start = fenced.indexOf('{');
845
+ const end = fenced.lastIndexOf('}');
846
+ if (start >= 0 && end > start) {
847
+ return JSON.parse(fenced.slice(start, end + 1));
848
+ }
849
+ }
850
+ throw new ConfigurationError('factory', 'Classification response was not valid JSON');
851
+ }
852
+ }
853
+ getDefaultVisionModel() {
854
+ if (this.config.defaultVisionModel)
855
+ return this.config.defaultVisionModel;
856
+ if (this.providers.has('anthropic'))
857
+ return 'claude-haiku-4-5-20251001';
858
+ if (this.providers.has('openai'))
859
+ return 'gpt-4o-mini';
860
+ if (this.providers.has('cloudflare'))
861
+ return '@cf/google/gemma-4-26b-a4b-it';
862
+ return undefined;
863
+ }
864
+ providerSupportsVision(providerName) {
865
+ return this.providers.get(providerName)?.supportsVision === true;
866
+ }
867
+ async getSingleProviderBalance(providerName) {
868
+ const ledgerBalance = this.getLedgerBalance(providerName);
869
+ if (ledgerBalance) {
870
+ return ledgerBalance;
871
+ }
872
+ const provider = this.providers.get(providerName);
873
+ if (!provider) {
874
+ return {
875
+ provider: providerName,
876
+ status: 'error',
877
+ source: 'not_supported',
878
+ message: `Provider '${providerName}' is not configured`
879
+ };
880
+ }
881
+ if (provider.getProviderBalance) {
882
+ return provider.getProviderBalance();
883
+ }
884
+ return {
885
+ provider: providerName,
886
+ status: 'unavailable',
887
+ source: 'not_supported',
888
+ message: `Provider '${providerName}' does not expose balance reporting`
889
+ };
890
+ }
891
+ getLedgerBalance(providerName) {
892
+ const acc = this.config.ledger?.getProviderAccumulator(providerName);
893
+ if (!acc)
894
+ return undefined;
895
+ const rateLimits = {};
896
+ for (const [dimension, window] of Object.entries(acc.rateLimits)) {
897
+ rateLimits[dimension] = {
898
+ limit: window.limit,
899
+ used: window.used,
900
+ remaining: Math.max(window.limit - window.used, 0)
901
+ };
902
+ }
903
+ return {
904
+ provider: providerName,
905
+ status: 'available',
906
+ source: 'ledger',
907
+ currentSpend: acc.spend,
908
+ monthlyBudget: acc.budget ?? undefined,
909
+ remainingBudget: acc.budget === null ? undefined : acc.budget - acc.spend,
910
+ usedTokens: acc.inputTokens + acc.outputTokens,
911
+ requestCount: acc.requestCount,
912
+ rateLimits
913
+ };
914
+ }
915
+ isLedgerLimited(providerName) {
916
+ if (!this.config.ledger)
917
+ return false;
918
+ for (const dimension of ['rpm', 'rpd', 'tpm', 'tpd']) {
919
+ const check = this.config.ledger.checkRateLimit(providerName, dimension);
920
+ if (!check.allowed) {
921
+ this.logger.warn(`[LLMProviderFactory] Rate limit (${dimension}) exceeded for ${providerName} (${check.used}/${check.limit}), skipping`);
922
+ return true;
923
+ }
924
+ }
925
+ return false;
926
+ }
927
+ requestForProvider(request, providerName, providerModels) {
928
+ const model = providerModels.get(providerName);
929
+ if (!model) {
930
+ return request;
931
+ }
932
+ return { ...request, model };
933
+ }
934
+ applyFallbackDecision(decision, failedProvider, providerChain, currentIndex, providerModels) {
935
+ const targetProvider = decision.fallbackProvider;
936
+ if (!targetProvider || targetProvider === failedProvider || !this.providers.has(targetProvider)) {
937
+ return;
938
+ }
939
+ if (decision.fallbackModel) {
940
+ providerModels.set(targetProvider, decision.fallbackModel);
941
+ }
942
+ const nextIndex = currentIndex + 1;
943
+ const firstIndex = providerChain.indexOf(targetProvider);
944
+ if (firstIndex >= 0 && firstIndex <= currentIndex) {
945
+ return;
946
+ }
947
+ const existingIndex = providerChain.indexOf(targetProvider, nextIndex);
948
+ if (existingIndex >= 0) {
949
+ providerChain.splice(existingIndex, 1);
950
+ }
951
+ providerChain.splice(nextIndex, 0, targetProvider);
952
+ }
394
953
  }
395
954
  /**
396
955
  * Create a provider factory with common configurations