mcp-agent-foundry 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +22 -2
  2. package/dist/cli/setup-wizard.d.ts.map +1 -1
  3. package/dist/cli/setup-wizard.js +873 -8
  4. package/dist/cli/setup-wizard.js.map +1 -1
  5. package/dist/cli/test-connection.d.ts +28 -0
  6. package/dist/cli/test-connection.d.ts.map +1 -1
  7. package/dist/cli/test-connection.js +335 -1
  8. package/dist/cli/test-connection.js.map +1 -1
  9. package/dist/cli.d.ts +13 -1
  10. package/dist/cli.d.ts.map +1 -1
  11. package/dist/cli.js +169 -1
  12. package/dist/cli.js.map +1 -1
  13. package/dist/config/validator.d.ts +113 -0
  14. package/dist/config/validator.d.ts.map +1 -1
  15. package/dist/config/validator.js +113 -0
  16. package/dist/config/validator.js.map +1 -1
  17. package/dist/failover/health-tracker.d.ts +175 -0
  18. package/dist/failover/health-tracker.d.ts.map +1 -0
  19. package/dist/failover/health-tracker.js +350 -0
  20. package/dist/failover/health-tracker.js.map +1 -0
  21. package/dist/failover/index.d.ts +9 -0
  22. package/dist/failover/index.d.ts.map +1 -0
  23. package/dist/failover/index.js +9 -0
  24. package/dist/failover/index.js.map +1 -0
  25. package/dist/failover/orchestrator.d.ts +189 -0
  26. package/dist/failover/orchestrator.d.ts.map +1 -0
  27. package/dist/failover/orchestrator.js +488 -0
  28. package/dist/failover/orchestrator.js.map +1 -0
  29. package/dist/failover/pricing.d.ts +115 -0
  30. package/dist/failover/pricing.d.ts.map +1 -0
  31. package/dist/failover/pricing.js +283 -0
  32. package/dist/failover/pricing.js.map +1 -0
  33. package/dist/persistence/state-schema.d.ts +50 -0
  34. package/dist/persistence/state-schema.d.ts.map +1 -1
  35. package/dist/persistence/state-schema.js +2 -0
  36. package/dist/persistence/state-schema.js.map +1 -1
  37. package/dist/providers/fireworks.d.ts +23 -0
  38. package/dist/providers/fireworks.d.ts.map +1 -0
  39. package/dist/providers/fireworks.js +31 -0
  40. package/dist/providers/fireworks.js.map +1 -0
  41. package/dist/providers/groq.d.ts +23 -0
  42. package/dist/providers/groq.d.ts.map +1 -0
  43. package/dist/providers/groq.js +31 -0
  44. package/dist/providers/groq.js.map +1 -0
  45. package/dist/providers/kimi-code.d.ts +32 -0
  46. package/dist/providers/kimi-code.d.ts.map +1 -0
  47. package/dist/providers/kimi-code.js +46 -0
  48. package/dist/providers/kimi-code.js.map +1 -0
  49. package/dist/providers/kimi.d.ts +1 -1
  50. package/dist/providers/kimi.js +1 -1
  51. package/dist/providers/openrouter.d.ts +23 -0
  52. package/dist/providers/openrouter.d.ts.map +1 -0
  53. package/dist/providers/openrouter.js +31 -0
  54. package/dist/providers/openrouter.js.map +1 -0
  55. package/dist/providers/perplexity.d.ts +29 -0
  56. package/dist/providers/perplexity.d.ts.map +1 -0
  57. package/dist/providers/perplexity.js +51 -0
  58. package/dist/providers/perplexity.js.map +1 -0
  59. package/dist/providers/together.d.ts +23 -0
  60. package/dist/providers/together.d.ts.map +1 -0
  61. package/dist/providers/together.js +31 -0
  62. package/dist/providers/together.js.map +1 -0
  63. package/dist/router/engine.d.ts +21 -0
  64. package/dist/router/engine.d.ts.map +1 -1
  65. package/dist/router/engine.js +81 -21
  66. package/dist/router/engine.js.map +1 -1
  67. package/dist/server.d.ts.map +1 -1
  68. package/dist/server.js +49 -0
  69. package/dist/server.js.map +1 -1
  70. package/dist/types.d.ts +52 -1
  71. package/dist/types.d.ts.map +1 -1
  72. package/dist/types.js +14 -0
  73. package/dist/types.js.map +1 -1
  74. package/package.json +5 -2
  75. package/scripts/postinstall.js +78 -0
  76. package/scripts/preuninstall.js +67 -0
@@ -0,0 +1,189 @@
1
+ /**
2
+ * Failover Orchestrator
3
+ *
4
+ * Wraps provider operations with intelligent retry and failover logic.
5
+ * Integrates health tracking, circuit breakers, and cost-aware routing.
6
+ */
7
+ import type { Logger } from '../observability/logger.js';
8
+ import type { ProviderManager } from '../providers/manager.js';
9
+ import type { Config, CompletionResponse, Message } from '../types.js';
10
+ import { type CircuitBreakerOptions } from '../utils/circuit-breaker.js';
11
+ import { ProviderHealthTracker, type ProviderHealth } from './health-tracker.js';
12
+ import { PricingService } from './pricing.js';
13
+ import type { PersistedFailoverEvent, PersistedProviderHealth } from '../persistence/state-schema.js';
14
+ /**
15
+ * Options for a single completion request with failover.
16
+ */
17
+ export interface FailoverCompletionOptions {
18
+ /** Request temperature */
19
+ temperature?: number;
20
+ /** Maximum tokens to generate */
21
+ max_tokens?: number;
22
+ /** Request timeout in ms */
23
+ timeout_ms?: number;
24
+ }
25
+ /**
26
+ * Result of a completion with failover metadata.
27
+ */
28
+ export interface FailoverCompletionResult {
29
+ /** The completion response */
30
+ response: CompletionResponse;
31
+ /** Provider that handled the request */
32
+ provider: string;
33
+ /** Model used */
34
+ model: string;
35
+ /** Number of retry attempts */
36
+ retryCount: number;
37
+ /** Number of providers tried */
38
+ failoverCount: number;
39
+ /** Total latency in ms */
40
+ latencyMs: number;
41
+ /** Whether failover was triggered */
42
+ usedFailover: boolean;
43
+ }
44
+ /**
45
+ * Configuration for the failover orchestrator.
46
+ */
47
+ export interface FailoverOrchestratorConfig {
48
+ /** Enable failover (default: true) */
49
+ enabled?: boolean;
50
+ /** Maximum total retries across all providers (default: 6) */
51
+ maxTotalRetries?: number;
52
+ /** Health check interval in ms (default: 60000) */
53
+ healthCheckIntervalMs?: number;
54
+ /** Cooldown period in ms (default: 300000) */
55
+ cooldownMs?: number;
56
+ /** Use cheapest healthy provider when no preference (default: false) */
57
+ preferCostEfficient?: boolean;
58
+ /** Circuit breaker options per provider */
59
+ circuitBreaker?: Partial<CircuitBreakerOptions>;
60
+ }
61
+ /**
62
+ * Failover event for logging and persistence.
63
+ */
64
+ export interface FailoverEvent {
65
+ timestamp: Date;
66
+ role: string;
67
+ fromProvider: string;
68
+ toProvider: string;
69
+ reason: string;
70
+ errorCode?: number;
71
+ errorMessage?: string;
72
+ }
73
+ /**
74
+ * Orchestrates provider requests with intelligent failover.
75
+ *
76
+ * Features:
77
+ * - Automatic retry with exponential backoff
78
+ * - Multi-provider failover chain
79
+ * - Health tracking and cooldown management
80
+ * - Circuit breaker per provider
81
+ * - Cost-aware provider selection
82
+ * - Failover event logging for diagnostics
83
+ *
84
+ * @example
85
+ * ```typescript
86
+ * const orchestrator = new FailoverOrchestrator(
87
+ * providers,
88
+ * config,
89
+ * logger
90
+ * );
91
+ *
92
+ * // Execute with automatic failover
93
+ * const result = await orchestrator.executeWithFailover(
94
+ * 'coder',
95
+ * messages,
96
+ * { temperature: 0.7 }
97
+ * );
98
+ * ```
99
+ */
100
+ export declare class FailoverOrchestrator {
101
+ private readonly providers;
102
+ private readonly appConfig;
103
+ private readonly logger;
104
+ private readonly config;
105
+ private readonly healthTracker;
106
+ private readonly pricingService;
107
+ private readonly circuitBreakers;
108
+ private readonly failoverEvents;
109
+ private readonly maxEventHistory;
110
+ constructor(providers: ProviderManager, appConfig: Config, logger: Logger, config?: FailoverOrchestratorConfig);
111
+ /**
112
+ * Execute a completion request with automatic failover.
113
+ */
114
+ executeWithFailover(role: string, messages: Message[], options: FailoverCompletionOptions): Promise<FailoverCompletionResult>;
115
+ /**
116
+ * Get available providers for a role, sorted by health and optionally cost.
117
+ */
118
+ getAvailableProviders(role: string): Array<{
119
+ provider: string;
120
+ model: string;
121
+ }>;
122
+ /**
123
+ * Get health status for all tracked providers.
124
+ */
125
+ getProviderHealth(): Map<string, ProviderHealth>;
126
+ /**
127
+ * Get recent failover events.
128
+ */
129
+ getFailoverEvents(): FailoverEvent[];
130
+ /**
131
+ * Initialize the orchestrator (fetch pricing, start health checks).
132
+ */
133
+ initialize(): Promise<void>;
134
+ /**
135
+ * Shutdown the orchestrator (stop health checks, persist state).
136
+ */
137
+ shutdown(): void;
138
+ /**
139
+ * Serialize state for persistence.
140
+ */
141
+ serializeState(): {
142
+ providerHealth: PersistedProviderHealth[];
143
+ failoverEvents: PersistedFailoverEvent[];
144
+ };
145
+ /**
146
+ * Restore state from persistence.
147
+ */
148
+ restoreState(state: {
149
+ providerHealth?: PersistedProviderHealth[];
150
+ failoverEvents?: PersistedFailoverEvent[];
151
+ }): void;
152
+ /**
153
+ * Get the pricing service for external use.
154
+ */
155
+ getPricingService(): PricingService;
156
+ /**
157
+ * Get the health tracker for external use.
158
+ */
159
+ getHealthTracker(): ProviderHealthTracker;
160
+ /**
161
+ * Build the ordered chain of providers to try for a role.
162
+ */
163
+ private buildProviderChain;
164
+ /**
165
+ * Execute a single provider request with retry logic.
166
+ */
167
+ private executeWithRetry;
168
+ /**
169
+ * Execute without failover (single provider only).
170
+ */
171
+ private executeSingleProvider;
172
+ /**
173
+ * Check if an error should trigger failover.
174
+ */
175
+ private shouldTriggerFailover;
176
+ /**
177
+ * Get the next provider in the chain that hasn't been tried.
178
+ */
179
+ private getNextProvider;
180
+ /**
181
+ * Get or create a circuit breaker for a provider.
182
+ */
183
+ private getOrCreateCircuitBreaker;
184
+ /**
185
+ * Record a failover event for diagnostics.
186
+ */
187
+ private recordFailoverEvent;
188
+ }
189
+ //# sourceMappingURL=orchestrator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/failover/orchestrator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,4BAA4B,CAAC;AACzD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC/D,OAAO,KAAK,EAAE,MAAM,EAAqB,kBAAkB,EAAE,OAAO,EAAmC,MAAM,aAAa,CAAC;AAG3H,OAAO,EAAoC,KAAK,qBAAqB,EAAE,MAAM,6BAA6B,CAAC;AAC3G,OAAO,EAAE,qBAAqB,EAAE,KAAK,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACjF,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,gCAAgC,CAAC;AAMtG;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,0BAA0B;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,iCAAiC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4BAA4B;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,8BAA8B;IAC9B,QAAQ,EAAE,kBAAkB,CAAC;IAC7B,wCAAwC;IACxC,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,+BAA+B;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,gCAAgC;IAChC,aAAa,EAAE,MAAM,CAAC;IACtB,0BAA0B;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,qCAAqC;IACrC,YAAY,EAAE,OAAO,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,0BAA0B;IACzC,sCAAsC;IACtC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,8DAA8D;IAC9D,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,mDAAmD;IACnD,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,8CAA8C;IAC9C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wEAAwE;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,2CAA2C;IAC3C,cAAc,CAAC,EAAE,OAAO,CAAC,qBAAqB,CAAC,CAAC;CACjD;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,IAAI,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AA4BD;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAkB;IAC5C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAuC;IAC9D,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAwB;IACtD,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAA0C;IAC1E,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAuB;IACtD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAO;gBAGrC,SAAS,EAAE,eAAe,EAC1B,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,0BAA0B;IAiBrC;;OAEG;IACG,mBAAmB,CACvB,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,wBAAwB,CAAC;IAgHpC;;OAEG;IACH,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,KAAK,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAuB/E;;OAEG;IACH,iBAAiB,IAAI,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC;IAIhD;;OAEG;IACH,iBAAiB,IAAI,aAAa,EAAE;IAIpC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAajC;;OAEG;IACH,QAAQ,IAAI,IAAI;IAKhB;;OAEG;IACH,cAAc,IAAI;QAChB,cAAc,EAAE,uBAAuB,EAAE,CAAC;QAC1C,cAAc,EAAE,sBAAsB,EAAE,CAAC;KAC1C;IAeD;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE;QAClB,cAAc,CAAC,EAAE,uBAAuB,EAAE,CAAC;QAC3C,cAAc,CAAC,EAAE,sBAAsB,EAAE,CAAC;KAC3C,GAAG,IAAI;IAuBR;;OAEG;IACH,iBAAiB,IAAI,cAAc;IAInC;;OAEG;IACH,gBAAgB,IAAI,qBAAqB;IAQzC;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAuC1B;;OAEG;YACW,gBAAgB;IAiF9B;;OAEG;YACW,qBAAqB;IAoCnC;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAwB7B;;OAEG;IACH,OAAO,CAAC,eAAe;IAYvB;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAoBjC;;OAEG;IACH,OAAO,CAAC,mBAAmB;CA4B5B"}
@@ -0,0 +1,488 @@
1
+ /**
2
+ * Failover Orchestrator
3
+ *
4
+ * Wraps provider operations with intelligent retry and failover logic.
5
+ * Integrates health tracking, circuit breakers, and cost-aware routing.
6
+ */
7
+ import { ProviderError, RateLimitError, TimeoutError, FailoverExhaustedError } from '../types.js';
8
+ import { retry, isRetryableError } from '../utils/retry.js';
9
+ import { CircuitBreaker, CircuitOpenError } from '../utils/circuit-breaker.js';
10
+ import { ProviderHealthTracker } from './health-tracker.js';
11
+ import { PricingService } from './pricing.js';
12
+ // ============================================================================
13
+ // Constants
14
+ // ============================================================================
15
+ const DEFAULT_CONFIG = {
16
+ enabled: true,
17
+ maxTotalRetries: 6,
18
+ healthCheckIntervalMs: 60000,
19
+ cooldownMs: 300000,
20
+ preferCostEfficient: false,
21
+ circuitBreaker: {
22
+ failureThreshold: 5,
23
+ successThreshold: 2,
24
+ timeout: 30000,
25
+ },
26
+ };
27
+ /**
28
+ * Default error codes that trigger failover.
29
+ */
30
+ const DEFAULT_FAILOVER_ERRORS = [429, 500, 502, 503, 504];
31
+ // ============================================================================
32
+ // FailoverOrchestrator Class
33
+ // ============================================================================
34
+ /**
35
+ * Orchestrates provider requests with intelligent failover.
36
+ *
37
+ * Features:
38
+ * - Automatic retry with exponential backoff
39
+ * - Multi-provider failover chain
40
+ * - Health tracking and cooldown management
41
+ * - Circuit breaker per provider
42
+ * - Cost-aware provider selection
43
+ * - Failover event logging for diagnostics
44
+ *
45
+ * @example
46
+ * ```typescript
47
+ * const orchestrator = new FailoverOrchestrator(
48
+ * providers,
49
+ * config,
50
+ * logger
51
+ * );
52
+ *
53
+ * // Execute with automatic failover
54
+ * const result = await orchestrator.executeWithFailover(
55
+ * 'coder',
56
+ * messages,
57
+ * { temperature: 0.7 }
58
+ * );
59
+ * ```
60
+ */
61
+ export class FailoverOrchestrator {
62
+ providers;
63
+ appConfig;
64
+ logger;
65
+ config;
66
+ healthTracker;
67
+ pricingService;
68
+ circuitBreakers = new Map();
69
+ failoverEvents = [];
70
+ maxEventHistory = 100;
71
+ constructor(providers, appConfig, logger, config) {
72
+ this.providers = providers;
73
+ this.appConfig = appConfig;
74
+ this.logger = logger;
75
+ this.config = { ...DEFAULT_CONFIG, ...config };
76
+ // Initialize health tracker
77
+ this.healthTracker = new ProviderHealthTracker(logger, {
78
+ cooldownMs: this.config.cooldownMs,
79
+ healthCheckIntervalMs: this.config.healthCheckIntervalMs,
80
+ });
81
+ // Initialize pricing service
82
+ this.pricingService = new PricingService(logger);
83
+ }
84
+ /**
85
+ * Execute a completion request with automatic failover.
86
+ */
87
+ async executeWithFailover(role, messages, options) {
88
+ if (!this.config.enabled) {
89
+ // Failover disabled, use primary provider only
90
+ return this.executeSingleProvider(role, messages, options);
91
+ }
92
+ const startTime = Date.now();
93
+ const roleConfig = this.appConfig.roles[role];
94
+ if (!roleConfig) {
95
+ throw new Error(`Unknown role: ${role}`);
96
+ }
97
+ // Build the chain of providers to try
98
+ const providerChain = this.buildProviderChain(role, roleConfig);
99
+ const errors = new Map();
100
+ const attemptedProviders = [];
101
+ let totalRetries = 0;
102
+ let failoverCount = 0;
103
+ // Get failover error codes from config or use defaults
104
+ const failoverErrors = roleConfig.fallback_chain?.on_errors ?? DEFAULT_FAILOVER_ERRORS;
105
+ // Get retry config from role or use defaults
106
+ const retryConfig = roleConfig.fallback_chain?.retry ?? {};
107
+ for (const { provider, model } of providerChain) {
108
+ if (totalRetries >= this.config.maxTotalRetries) {
109
+ this.logger.warn('Max total retries reached', {
110
+ role,
111
+ totalRetries,
112
+ attemptedProviders,
113
+ });
114
+ break;
115
+ }
116
+ // Check if provider is available
117
+ if (!this.healthTracker.isAvailable(provider)) {
118
+ const cooldown = this.healthTracker.getCooldownRemaining(provider);
119
+ this.logger.debug('Skipping provider in cooldown', {
120
+ provider,
121
+ cooldownRemainingMs: cooldown,
122
+ });
123
+ continue;
124
+ }
125
+ // Check circuit breaker
126
+ const breaker = this.getOrCreateCircuitBreaker(provider);
127
+ if (breaker.getState() === 'OPEN') {
128
+ this.logger.debug('Skipping provider with open circuit', { provider });
129
+ continue;
130
+ }
131
+ attemptedProviders.push(provider);
132
+ try {
133
+ const result = await this.executeWithRetry(provider, model, messages, options, retryConfig, failoverErrors);
134
+ const latencyMs = Date.now() - startTime;
135
+ return {
136
+ response: result.response,
137
+ provider,
138
+ model,
139
+ retryCount: result.retryCount,
140
+ failoverCount,
141
+ latencyMs,
142
+ usedFailover: failoverCount > 0,
143
+ };
144
+ }
145
+ catch (error) {
146
+ const err = error instanceof Error ? error : new Error(String(error));
147
+ errors.set(provider, err);
148
+ totalRetries++;
149
+ // Check if this error should trigger failover
150
+ const shouldFailover = this.shouldTriggerFailover(err, failoverErrors);
151
+ if (shouldFailover && providerChain.length > attemptedProviders.length) {
152
+ const nextProvider = this.getNextProvider(providerChain, attemptedProviders);
153
+ if (nextProvider) {
154
+ this.recordFailoverEvent(role, provider, nextProvider.provider, err);
155
+ failoverCount++;
156
+ this.logger.info('Failing over to next provider', {
157
+ role,
158
+ fromProvider: provider,
159
+ toProvider: nextProvider.provider,
160
+ errorMessage: err.message,
161
+ });
162
+ continue;
163
+ }
164
+ }
165
+ // No more providers or not a failover-triggering error
166
+ throw err;
167
+ }
168
+ }
169
+ // All providers exhausted
170
+ throw new FailoverExhaustedError(role, attemptedProviders, errors);
171
+ }
172
+ /**
173
+ * Get available providers for a role, sorted by health and optionally cost.
174
+ */
175
+ getAvailableProviders(role) {
176
+ const roleConfig = this.appConfig.roles[role];
177
+ if (!roleConfig) {
178
+ return [];
179
+ }
180
+ const chain = this.buildProviderChain(role, roleConfig);
181
+ // Filter to only configured and available providers
182
+ const available = chain.filter(({ provider }) => {
183
+ const isConfigured = this.providers.isConfigured(provider);
184
+ const isAvailable = this.healthTracker.isAvailable(provider);
185
+ return isConfigured && isAvailable;
186
+ });
187
+ // Optionally sort by cost
188
+ if (this.config.preferCostEfficient) {
189
+ return this.pricingService.sortByCost(available);
190
+ }
191
+ return available;
192
+ }
193
+ /**
194
+ * Get health status for all tracked providers.
195
+ */
196
+ getProviderHealth() {
197
+ return this.healthTracker.getAllHealth();
198
+ }
199
+ /**
200
+ * Get recent failover events.
201
+ */
202
+ getFailoverEvents() {
203
+ return [...this.failoverEvents];
204
+ }
205
+ /**
206
+ * Initialize the orchestrator (fetch pricing, start health checks).
207
+ */
208
+ async initialize() {
209
+ // Fetch pricing data
210
+ await this.pricingService.refresh();
211
+ // Start health check loop
212
+ this.healthTracker.startHealthCheckLoop(async (provider) => {
213
+ const providerInstance = this.providers.get(provider);
214
+ await providerInstance.healthCheck();
215
+ });
216
+ this.logger.info('Failover orchestrator initialized');
217
+ }
218
+ /**
219
+ * Shutdown the orchestrator (stop health checks, persist state).
220
+ */
221
+ shutdown() {
222
+ this.healthTracker.stopHealthCheckLoop();
223
+ this.logger.info('Failover orchestrator shut down');
224
+ }
225
+ /**
226
+ * Serialize state for persistence.
227
+ */
228
+ serializeState() {
229
+ return {
230
+ providerHealth: this.healthTracker.serialize(),
231
+ failoverEvents: this.failoverEvents.map((e) => ({
232
+ timestamp: e.timestamp.getTime(),
233
+ role: e.role,
234
+ fromProvider: e.fromProvider,
235
+ toProvider: e.toProvider,
236
+ reason: e.reason,
237
+ errorCode: e.errorCode,
238
+ errorMessage: e.errorMessage,
239
+ })),
240
+ };
241
+ }
242
+ /**
243
+ * Restore state from persistence.
244
+ */
245
+ restoreState(state) {
246
+ if (state.providerHealth) {
247
+ this.healthTracker.restore(state.providerHealth);
248
+ }
249
+ if (state.failoverEvents) {
250
+ this.failoverEvents.length = 0;
251
+ for (const e of state.failoverEvents) {
252
+ this.failoverEvents.push({
253
+ timestamp: new Date(e.timestamp),
254
+ role: e.role,
255
+ fromProvider: e.fromProvider,
256
+ toProvider: e.toProvider,
257
+ reason: e.reason,
258
+ errorCode: e.errorCode,
259
+ errorMessage: e.errorMessage,
260
+ });
261
+ }
262
+ }
263
+ this.logger.info('Failover orchestrator state restored');
264
+ }
265
+ /**
266
+ * Get the pricing service for external use.
267
+ */
268
+ getPricingService() {
269
+ return this.pricingService;
270
+ }
271
+ /**
272
+ * Get the health tracker for external use.
273
+ */
274
+ getHealthTracker() {
275
+ return this.healthTracker;
276
+ }
277
+ // ==========================================================================
278
+ // Private Methods
279
+ // ==========================================================================
280
+ /**
281
+ * Build the ordered chain of providers to try for a role.
282
+ */
283
+ buildProviderChain(role, roleConfig) {
284
+ const chain = [];
285
+ // Primary provider
286
+ chain.push({
287
+ provider: roleConfig.provider,
288
+ model: roleConfig.model,
289
+ });
290
+ // Fallback chain (extended)
291
+ if (roleConfig.fallback_chain?.providers) {
292
+ for (const fb of roleConfig.fallback_chain.providers) {
293
+ chain.push({
294
+ provider: fb.provider,
295
+ model: fb.model,
296
+ });
297
+ }
298
+ }
299
+ // Legacy single fallback
300
+ if (roleConfig.fallback) {
301
+ // Only add if not already in chain
302
+ const exists = chain.some((p) => p.provider === roleConfig.fallback.provider && p.model === roleConfig.fallback.model);
303
+ if (!exists) {
304
+ chain.push({
305
+ provider: roleConfig.fallback.provider,
306
+ model: roleConfig.fallback.model,
307
+ });
308
+ }
309
+ }
310
+ return chain;
311
+ }
312
+ /**
313
+ * Execute a single provider request with retry logic.
314
+ */
315
+ async executeWithRetry(provider, model, messages, options, retryConfig, _failoverErrors) {
316
+ const breaker = this.getOrCreateCircuitBreaker(provider);
317
+ let retryCount = 0;
318
+ const retryOptions = {
319
+ maxAttempts: retryConfig.max_attempts ?? 2,
320
+ initialDelayMs: retryConfig.initial_delay_ms ?? 1000,
321
+ maxDelayMs: retryConfig.max_delay_ms ?? 30000,
322
+ shouldRetry: (error) => isRetryableError(error),
323
+ onRetry: (error, attempt, delayMs) => {
324
+ retryCount++;
325
+ this.logger.debug('Retrying provider request', {
326
+ provider,
327
+ attempt,
328
+ delayMs,
329
+ error: error.message,
330
+ });
331
+ },
332
+ };
333
+ try {
334
+ const response = await retry(async () => {
335
+ const startTime = Date.now();
336
+ try {
337
+ // Execute through circuit breaker
338
+ const result = await breaker.execute(async () => {
339
+ const providerInstance = this.providers.get(provider);
340
+ const request = {
341
+ model,
342
+ messages,
343
+ temperature: options.temperature,
344
+ max_tokens: options.max_tokens,
345
+ timeout_ms: options.timeout_ms,
346
+ };
347
+ return providerInstance.complete(request);
348
+ });
349
+ // Record success
350
+ const latencyMs = Date.now() - startTime;
351
+ this.healthTracker.markSuccess(provider, latencyMs);
352
+ return result;
353
+ }
354
+ catch (error) {
355
+ const latencyMs = Date.now() - startTime;
356
+ const err = error instanceof Error ? error : new Error(String(error));
357
+ // Extract status code if available
358
+ let statusCode;
359
+ if (error instanceof ProviderError) {
360
+ statusCode = error.statusCode;
361
+ }
362
+ else if (error instanceof RateLimitError) {
363
+ statusCode = 429;
364
+ }
365
+ else if (error instanceof CircuitOpenError) {
366
+ // Circuit is open, don't record as failure
367
+ throw error;
368
+ }
369
+ // Record failure
370
+ this.healthTracker.markFailure(provider, err, statusCode);
371
+ throw error;
372
+ }
373
+ }, retryOptions);
374
+ return { response, retryCount };
375
+ }
376
+ catch (error) {
377
+ // Final failure after retries
378
+ throw error;
379
+ }
380
+ }
381
+ /**
382
+ * Execute without failover (single provider only).
383
+ */
384
+ async executeSingleProvider(role, messages, options) {
385
+ const startTime = Date.now();
386
+ const roleConfig = this.appConfig.roles[role];
387
+ if (!roleConfig) {
388
+ throw new Error(`Unknown role: ${role}`);
389
+ }
390
+ const providerInstance = this.providers.get(roleConfig.provider);
391
+ const request = {
392
+ model: roleConfig.model,
393
+ messages,
394
+ temperature: options.temperature,
395
+ max_tokens: options.max_tokens,
396
+ timeout_ms: options.timeout_ms,
397
+ };
398
+ const response = await providerInstance.complete(request);
399
+ const latencyMs = Date.now() - startTime;
400
+ return {
401
+ response,
402
+ provider: roleConfig.provider,
403
+ model: roleConfig.model,
404
+ retryCount: 0,
405
+ failoverCount: 0,
406
+ latencyMs,
407
+ usedFailover: false,
408
+ };
409
+ }
410
+ /**
411
+ * Check if an error should trigger failover.
412
+ */
413
+ shouldTriggerFailover(error, failoverErrors) {
414
+ // Rate limit always triggers failover
415
+ if (error instanceof RateLimitError) {
416
+ return true;
417
+ }
418
+ // Timeout triggers failover
419
+ if (error instanceof TimeoutError) {
420
+ return true;
421
+ }
422
+ // Circuit open triggers failover
423
+ if (error instanceof CircuitOpenError) {
424
+ return true;
425
+ }
426
+ // Check status code
427
+ if (error instanceof ProviderError && error.statusCode !== undefined) {
428
+ return failoverErrors.includes(error.statusCode);
429
+ }
430
+ return false;
431
+ }
432
+ /**
433
+ * Get the next provider in the chain that hasn't been tried.
434
+ */
435
+ getNextProvider(chain, attempted) {
436
+ for (const p of chain) {
437
+ if (!attempted.includes(p.provider)) {
438
+ return p;
439
+ }
440
+ }
441
+ return undefined;
442
+ }
443
+ /**
444
+ * Get or create a circuit breaker for a provider.
445
+ */
446
+ getOrCreateCircuitBreaker(provider) {
447
+ let breaker = this.circuitBreakers.get(provider);
448
+ if (!breaker) {
449
+ breaker = new CircuitBreaker(this.config.circuitBreaker);
450
+ // Log state changes
451
+ breaker.onStateChange((prev, next, meta) => {
452
+ this.logger.info('Circuit breaker state change', {
453
+ provider,
454
+ previousState: prev,
455
+ newState: next,
456
+ failureCount: meta.failureCount,
457
+ });
458
+ });
459
+ this.circuitBreakers.set(provider, breaker);
460
+ }
461
+ return breaker;
462
+ }
463
+ /**
464
+ * Record a failover event for diagnostics.
465
+ */
466
+ recordFailoverEvent(role, fromProvider, toProvider, error) {
467
+ const event = {
468
+ timestamp: new Date(),
469
+ role,
470
+ fromProvider,
471
+ toProvider,
472
+ reason: error.message,
473
+ };
474
+ if (error instanceof ProviderError) {
475
+ event.errorCode = error.statusCode;
476
+ }
477
+ else if (error instanceof RateLimitError) {
478
+ event.errorCode = 429;
479
+ }
480
+ event.errorMessage = error.message;
481
+ this.failoverEvents.push(event);
482
+ // Trim history
483
+ while (this.failoverEvents.length > this.maxEventHistory) {
484
+ this.failoverEvents.shift();
485
+ }
486
+ }
487
+ }
488
+ //# sourceMappingURL=orchestrator.js.map