rax-flow-providers 0.2.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -8,5 +8,6 @@ export * from "./cohere-adapter.js";
8
8
  export * from "./rest-adapter.js";
9
9
  export * from "./host-bridge-adapter.js";
10
10
  export * from "./strategy.js";
11
+ export * from "./model-strategy.js";
11
12
  export * from "./utils.js";
12
13
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,mBAAmB,CAAC;AAClC,cAAc,sBAAsB,CAAC;AACrC,cAAc,qBAAqB,CAAC;AACpC,cAAc,mBAAmB,CAAC;AAClC,cAAc,0BAA0B,CAAC;AACzC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,mBAAmB,CAAC;AAClC,cAAc,sBAAsB,CAAC;AACrC,cAAc,qBAAqB,CAAC;AACpC,cAAc,mBAAmB,CAAC;AAClC,cAAc,0BAA0B,CAAC;AACzC,cAAc,eAAe,CAAC;AAC9B,cAAc,qBAAqB,CAAC;AACpC,cAAc,YAAY,CAAC"}
package/dist/index.js CHANGED
@@ -8,5 +8,6 @@ export * from "./cohere-adapter.js";
8
8
  export * from "./rest-adapter.js";
9
9
  export * from "./host-bridge-adapter.js";
10
10
  export * from "./strategy.js";
11
+ export * from "./model-strategy.js";
11
12
  export * from "./utils.js";
12
13
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,mBAAmB,CAAC;AAClC,cAAc,sBAAsB,CAAC;AACrC,cAAc,qBAAqB,CAAC;AACpC,cAAc,mBAAmB,CAAC;AAClC,cAAc,0BAA0B,CAAC;AACzC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,mBAAmB,CAAC;AAClC,cAAc,sBAAsB,CAAC;AACrC,cAAc,qBAAqB,CAAC;AACpC,cAAc,mBAAmB,CAAC;AAClC,cAAc,0BAA0B,CAAC;AACzC,cAAc,eAAe,CAAC;AAC9B,cAAc,qBAAqB,CAAC;AACpC,cAAc,YAAY,CAAC"}
@@ -0,0 +1,83 @@
1
+ import { IModelProvider, ModelResponse, ProviderCallOptions, Intent } from "rax-flow-core";
2
+ export type ModelTier = "economy" | "standard" | "premium" | "reasoning";
3
+ export type SelectionCriteria = "cost" | "latency" | "quality" | "balanced";
4
+ export interface ModelProfile {
5
+ provider: string;
6
+ model: string;
7
+ tier: ModelTier;
8
+ costPer1kTokens: {
9
+ input: number;
10
+ output: number;
11
+ };
12
+ avgLatencyMs: number;
13
+ maxTokens: number;
14
+ supportsStructured: boolean;
15
+ supportsVision: boolean;
16
+ supportsStreaming: boolean;
17
+ qualityScore: number;
18
+ capabilities: string[];
19
+ }
20
+ export interface SelectionContext {
21
+ prompt: string;
22
+ intent?: Intent;
23
+ estimatedTokens?: number;
24
+ maxCostUsd?: number;
25
+ maxLatencyMs?: number;
26
+ requiresStructured?: boolean;
27
+ requiresVision?: boolean;
28
+ requiresStreaming?: boolean;
29
+ preferredTier?: ModelTier;
30
+ criteria?: SelectionCriteria;
31
+ fallbackChain?: string[];
32
+ }
33
+ export interface SelectionResult {
34
+ provider: IModelProvider;
35
+ profile: ModelProfile;
36
+ fallbackChain: ModelProfile[];
37
+ estimatedCost: number;
38
+ estimatedLatency: number;
39
+ reasoning: string;
40
+ }
41
+ export interface ProviderMetrics {
42
+ provider: string;
43
+ totalCalls: number;
44
+ successRate: number;
45
+ avgLatencyMs: number;
46
+ totalCostUsd: number;
47
+ lastError?: string;
48
+ lastErrorTime?: number;
49
+ }
50
+ export declare class ModelStrategyEngine {
51
+ private profiles;
52
+ private providers;
53
+ private metrics;
54
+ private selectionHistory;
55
+ constructor(providerInstances: Record<string, IModelProvider>, customProfiles?: ModelProfile[]);
56
+ select(context: SelectionContext): SelectionResult;
57
+ callWithStrategy<T>(context: SelectionContext, prompt: string, schema?: object, options?: ProviderCallOptions): Promise<ModelResponse<T>>;
58
+ private getCandidates;
59
+ private meetsRequirements;
60
+ private scoreCandidate;
61
+ private scoreByCost;
62
+ private scoreByLatency;
63
+ private calculateCost;
64
+ private estimateTokens;
65
+ private generateReasoning;
66
+ private getFallbackProvider;
67
+ private getProviderName;
68
+ private recordSuccess;
69
+ private recordFailure;
70
+ getMetrics(): Record<string, ProviderMetrics>;
71
+ getSelectionHistory(limit?: number): typeof this.selectionHistory;
72
+ getProfiles(): ModelProfile[];
73
+ addProfile(profile: ModelProfile): void;
74
+ getIntentTier(intent: Intent): ModelTier;
75
+ recommendForIntent(intent: Intent): ModelProfile[];
76
+ }
77
+ export declare function smartFallbackCall(providers: IModelProvider[], prompt: string, options?: {
78
+ maxRetriesPerProvider?: number;
79
+ retryDelayMs?: number;
80
+ callOptions?: ProviderCallOptions;
81
+ onFallback?: (from: string, to: string, reason: string) => void;
82
+ }): Promise<ModelResponse<string>>;
83
+ //# sourceMappingURL=model-strategy.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"model-strategy.d.ts","sourceRoot":"","sources":["../src/model-strategy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAG3F,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,UAAU,GAAG,SAAS,GAAG,WAAW,CAAC;AACzE,MAAM,MAAM,iBAAiB,GAAG,MAAM,GAAG,SAAS,GAAG,SAAS,GAAG,UAAU,CAAC;AAE5E,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,SAAS,CAAC;IAChB,eAAe,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IACnD,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,kBAAkB,EAAE,OAAO,CAAC;IAC5B,cAAc,EAAE,OAAO,CAAC;IACxB,iBAAiB,EAAE,OAAO,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,aAAa,CAAC,EAAE,SAAS,CAAC;IAC1B,QAAQ,CAAC,EAAE,iBAAiB,CAAC;IAC7B,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,cAAc,CAAC;IACzB,OAAO,EAAE,YAAY,CAAC;IACtB,aAAa,EAAE,YAAY,EAAE,CAAC;IAC9B,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAyCD,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,QAAQ,CAAwC;IACxD,OAAO,CAAC,SAAS,CAA0C;IAC3D,OAAO,CAAC,OAAO,CAA2C;IAC1D,OAAO,CAAC,gBAAgB,CAAsH;gBAG5I,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,EACjD,cAAc,CAAC,EAAE,YAAY,EAAE;IAoBjC,MAAM,CAAC,OAAO,EAAE,gBAAgB,GAAG,eAAe;IAsC5C,gBAAgB,CAAC,CAAC,EACtB,OAAO,EAAE,gBAAgB,EACzB,MAAM,EAAE,MAAM,EACd,MAAM,CAAC,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,mBAAmB,GAC5B,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;IA6C5B,OAAO,CAAC,aAAa;IAerB,OAAO,CAAC,iBAAiB;IAuBzB,OAAO,CAAC,cAAc;IAqCtB,OAAO,CAAC,WAAW;IAMnB,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,aAAa;IAMrB,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,iBAAiB;IAOzB,OAAO,CAAC,mBAAmB;IAa3B,OAAO,CAAC,eAAe;IAOvB,OAAO,CAAC,aAAa;IAmBrB,OAAO,CAAC,aAAa;IAmBrB,UAAU,IAAI,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC;IAI7C,mBAAmB,CAAC,KAAK,GAAE,MAAY,GAAG,OAAO,IAAI,CAAC,gBAAgB;IAItE,WAAW,IAAI,YAAY,EAAE;IAI7B,UAAU,CAAC,OAAO,EAAE,YAAY,GAAG,IAAI;IAKvC,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS;IAIxC,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,YAAY,EAAE;CAMnD;AAED,wBAAsB,iBAAiB,CACrC,SAAS,EAAE,cAAc,EAAE,EAC3B,MAAM,EAAE,MAAM,EACd,OAAO,GAAE;IACP,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,mBAAmB,CAAC;IAClC,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CAC5D,GACL,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CA2ChC"}
@@ -0,0 +1,330 @@
1
+ import { RaxProviderError } from "./error-mapper.js";
2
+ const MODEL_CATALOG = [
3
+ // OpenAI models
4
+ { provider: "openai", model: "gpt-4o-mini", tier: "economy", costPer1kTokens: { input: 0.00015, output: 0.0006 }, avgLatencyMs: 400, maxTokens: 128000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.75, capabilities: ["chat", "code", "analysis"] },
5
+ { provider: "openai", model: "gpt-4.1-mini", tier: "economy", costPer1kTokens: { input: 0.0004, output: 0.0016 }, avgLatencyMs: 450, maxTokens: 128000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.78, capabilities: ["chat", "code", "analysis"] },
6
+ { provider: "openai", model: "gpt-4o", tier: "standard", costPer1kTokens: { input: 0.0025, output: 0.01 }, avgLatencyMs: 600, maxTokens: 128000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.88, capabilities: ["chat", "code", "analysis", "reasoning"] },
7
+ { provider: "openai", model: "gpt-4.1", tier: "standard", costPer1kTokens: { input: 0.002, output: 0.008 }, avgLatencyMs: 550, maxTokens: 128000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.90, capabilities: ["chat", "code", "analysis", "reasoning"] },
8
+ { provider: "openai", model: "o1", tier: "reasoning", costPer1kTokens: { input: 0.015, output: 0.06 }, avgLatencyMs: 5000, maxTokens: 200000, supportsStructured: false, supportsVision: false, supportsStreaming: false, qualityScore: 0.95, capabilities: ["reasoning", "math", "science"] },
9
+ { provider: "openai", model: "o3-mini", tier: "premium", costPer1kTokens: { input: 0.0011, output: 0.0044 }, avgLatencyMs: 2000, maxTokens: 200000, supportsStructured: true, supportsVision: false, supportsStreaming: true, qualityScore: 0.92, capabilities: ["reasoning", "code", "analysis"] },
10
+ // Claude models
11
+ { provider: "claude", model: "claude-3-5-haiku-latest", tier: "economy", costPer1kTokens: { input: 0.0008, output: 0.004 }, avgLatencyMs: 300, maxTokens: 200000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.80, capabilities: ["chat", "code", "analysis"] },
12
+ { provider: "claude", model: "claude-3-5-sonnet-latest", tier: "standard", costPer1kTokens: { input: 0.003, output: 0.015 }, avgLatencyMs: 500, maxTokens: 200000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.91, capabilities: ["chat", "code", "analysis", "reasoning"] },
13
+ { provider: "claude", model: "claude-3-opus-latest", tier: "premium", costPer1kTokens: { input: 0.015, output: 0.075 }, avgLatencyMs: 1200, maxTokens: 200000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.94, capabilities: ["chat", "code", "analysis", "reasoning", "creative"] },
14
+ // Gemini models
15
+ { provider: "gemini", model: "gemini-2.0-flash", tier: "economy", costPer1kTokens: { input: 0.0001, output: 0.0004 }, avgLatencyMs: 250, maxTokens: 1000000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.72, capabilities: ["chat", "code", "multimodal"] },
16
+ { provider: "gemini", model: "gemini-1.5-pro", tier: "standard", costPer1kTokens: { input: 0.00125, output: 0.005 }, avgLatencyMs: 600, maxTokens: 2000000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.85, capabilities: ["chat", "code", "analysis", "multimodal"] },
17
+ // Groq models (fast inference)
18
+ { provider: "groq", model: "llama-3.3-70b-versatile", tier: "economy", costPer1kTokens: { input: 0.00059, output: 0.00079 }, avgLatencyMs: 80, maxTokens: 128000, supportsStructured: true, supportsVision: false, supportsStreaming: true, qualityScore: 0.78, capabilities: ["chat", "code"] },
19
+ { provider: "groq", model: "llama-3.1-8b-instant", tier: "economy", costPer1kTokens: { input: 0.00002, output: 0.00002 }, avgLatencyMs: 40, maxTokens: 128000, supportsStructured: true, supportsVision: false, supportsStreaming: true, qualityScore: 0.65, capabilities: ["chat"] },
20
+ // Host bridge
21
+ { provider: "host", model: "host-managed", tier: "standard", costPer1kTokens: { input: 0, output: 0 }, avgLatencyMs: 1000, maxTokens: 1000000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.85, capabilities: ["chat", "code", "analysis"] }
22
+ ];
23
+ const INTENT_TIER_MAP = {
24
+ brainstorm: "economy",
25
+ spec: "standard",
26
+ architecture: "premium",
27
+ plan: "standard",
28
+ generate_code: "standard",
29
+ test: "economy",
30
+ fix: "standard",
31
+ optimize: "premium",
32
+ document: "economy",
33
+ benchmark: "economy"
34
+ };
35
+ export class ModelStrategyEngine {
36
+ profiles = new Map();
37
+ providers = new Map();
38
+ metrics = new Map();
39
+ selectionHistory = [];
40
+ constructor(providerInstances, customProfiles) {
41
+ const catalog = customProfiles ?? MODEL_CATALOG;
42
+ for (const profile of catalog) {
43
+ const key = `${profile.provider}:${profile.model}`;
44
+ this.profiles.set(key, profile);
45
+ }
46
+ for (const [name, provider] of Object.entries(providerInstances)) {
47
+ this.providers.set(name, provider);
48
+ this.metrics.set(name, {
49
+ provider: name,
50
+ totalCalls: 0,
51
+ successRate: 1,
52
+ avgLatencyMs: 0,
53
+ totalCostUsd: 0
54
+ });
55
+ }
56
+ }
57
+ select(context) {
58
+ const candidates = this.getCandidates(context);
59
+ if (candidates.length === 0) {
60
+ const fallback = this.getFallbackProvider();
61
+ return {
62
+ provider: fallback.provider,
63
+ profile: fallback.profile,
64
+ fallbackChain: [],
65
+ estimatedCost: 0,
66
+ estimatedLatency: 1000,
67
+ reasoning: "No suitable model found, using fallback"
68
+ };
69
+ }
70
+ const scored = candidates.map(c => ({
71
+ ...c,
72
+ score: this.scoreCandidate(c, context)
73
+ }));
74
+ scored.sort((a, b) => b.score - a.score);
75
+ const selected = scored[0];
76
+ const fallbackChain = scored.slice(1, 4);
77
+ const estimatedTokens = context.estimatedTokens ?? this.estimateTokens(context.prompt);
78
+ const estimatedCost = this.calculateCost(selected.profile, estimatedTokens);
79
+ return {
80
+ provider: selected.provider,
81
+ profile: selected.profile,
82
+ fallbackChain: fallbackChain.map(f => f.profile),
83
+ estimatedCost,
84
+ estimatedLatency: selected.profile.avgLatencyMs,
85
+ reasoning: this.generateReasoning(selected, context)
86
+ };
87
+ }
88
+ async callWithStrategy(context, prompt, schema, options) {
89
+ const selection = this.select(context);
90
+ const fallbackProviders = selection.fallbackChain
91
+ .map(p => this.providers.get(p.provider))
92
+ .filter((p) => p !== undefined);
93
+ const startTime = Date.now();
94
+ let lastError;
95
+ // Try primary
96
+ try {
97
+ const result = schema
98
+ ? await selection.provider.callStructured(prompt, schema, options)
99
+ : await selection.provider.callModel(prompt, options);
100
+ this.recordSuccess(selection.profile, Date.now() - startTime, result.costUsd ?? 0);
101
+ return result;
102
+ }
103
+ catch (err) {
104
+ lastError = err;
105
+ this.recordFailure(selection.profile, err);
106
+ }
107
+ // Try fallbacks
108
+ for (const fallbackProvider of fallbackProviders) {
109
+ const fallbackProfile = selection.fallbackChain.find(p => p.provider === this.getProviderName(fallbackProvider));
110
+ if (!fallbackProfile)
111
+ continue;
112
+ try {
113
+ const result = schema
114
+ ? await fallbackProvider.callStructured(prompt, schema, options)
115
+ : await fallbackProvider.callModel(prompt, options);
116
+ this.recordSuccess(fallbackProfile, Date.now() - startTime, result.costUsd ?? 0);
117
+ return result;
118
+ }
119
+ catch (err) {
120
+ lastError = err;
121
+ if (fallbackProfile) {
122
+ this.recordFailure(fallbackProfile, err);
123
+ }
124
+ }
125
+ }
126
+ throw new Error(`All providers failed. Last error: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
127
+ }
128
+ getCandidates(context) {
129
+ const candidates = [];
130
+ for (const [key, profile] of this.profiles) {
131
+ if (!this.meetsRequirements(profile, context))
132
+ continue;
133
+ const provider = this.providers.get(profile.provider);
134
+ if (!provider)
135
+ continue;
136
+ candidates.push({ provider, profile });
137
+ }
138
+ return candidates;
139
+ }
140
+ meetsRequirements(profile, context) {
141
+ if (context.requiresStructured && !profile.supportsStructured)
142
+ return false;
143
+ if (context.requiresVision && !profile.supportsVision)
144
+ return false;
145
+ if (context.requiresStreaming && !profile.supportsStreaming)
146
+ return false;
147
+ if (context.maxCostUsd !== undefined) {
148
+ const estimatedTokens = context.estimatedTokens ?? this.estimateTokens(context.prompt);
149
+ const estimatedCost = this.calculateCost(profile, estimatedTokens);
150
+ if (estimatedCost > context.maxCostUsd)
151
+ return false;
152
+ }
153
+ if (context.maxLatencyMs !== undefined && profile.avgLatencyMs > context.maxLatencyMs)
154
+ return false;
155
+ if (context.preferredTier && profile.tier !== context.preferredTier) {
156
+ const tierPriority = ["economy", "standard", "premium", "reasoning"];
157
+ const preferredIdx = tierPriority.indexOf(context.preferredTier);
158
+ const profileIdx = tierPriority.indexOf(profile.tier);
159
+ if (profileIdx > preferredIdx + 1)
160
+ return false;
161
+ }
162
+ return true;
163
+ }
164
+ scoreCandidate(candidate, context) {
165
+ const criteria = context.criteria ?? "balanced";
166
+ let score = 0;
167
+ const metrics = this.metrics.get(candidate.profile.provider);
168
+ const successRate = metrics?.successRate ?? 1;
169
+ switch (criteria) {
170
+ case "cost":
171
+ score = this.scoreByCost(candidate.profile, context);
172
+ break;
173
+ case "latency":
174
+ score = this.scoreByLatency(candidate.profile);
175
+ break;
176
+ case "quality":
177
+ score = candidate.profile.qualityScore * 100;
178
+ break;
179
+ case "balanced":
180
+ default:
181
+ score = (this.scoreByCost(candidate.profile, context) * 0.3 +
182
+ this.scoreByLatency(candidate.profile) * 0.3 +
183
+ candidate.profile.qualityScore * 30 +
184
+ successRate * 10);
185
+ }
186
+ if (context.intent) {
187
+ const preferredTier = INTENT_TIER_MAP[context.intent];
188
+ if (preferredTier && candidate.profile.tier === preferredTier) {
189
+ score += 10;
190
+ }
191
+ }
192
+ return score;
193
+ }
194
+ scoreByCost(profile, context) {
195
+ const tokens = context.estimatedTokens ?? this.estimateTokens(context.prompt);
196
+ const cost = this.calculateCost(profile, tokens);
197
+ return Math.max(0, 100 - cost * 1000);
198
+ }
199
+ scoreByLatency(profile) {
200
+ return Math.max(0, 100 - profile.avgLatencyMs / 50);
201
+ }
202
+ calculateCost(profile, tokens) {
203
+ const inputTokens = Math.ceil(tokens * 0.7);
204
+ const outputTokens = Math.ceil(tokens * 0.3);
205
+ return (inputTokens * profile.costPer1kTokens.input + outputTokens * profile.costPer1kTokens.output) / 1000;
206
+ }
207
+ estimateTokens(prompt) {
208
+ return Math.ceil(prompt.split(/\s+/).length * 1.3);
209
+ }
210
+ generateReasoning(selected, context) {
211
+ const criteria = context.criteria ?? "balanced";
212
+ return `Selected ${selected.profile.provider}/${selected.profile.model} (tier: ${selected.profile.tier}) ` +
213
+ `based on ${criteria} criteria with score ${selected.score.toFixed(1)}. ` +
214
+ `Est. latency: ${selected.profile.avgLatencyMs}ms, quality: ${(selected.profile.qualityScore * 100).toFixed(0)}%`;
215
+ }
216
+ getFallbackProvider() {
217
+ const hostProvider = this.providers.get("host");
218
+ const hostProfile = Array.from(this.profiles.values()).find(p => p.provider === "host");
219
+ if (hostProvider && hostProfile) {
220
+ return { provider: hostProvider, profile: hostProfile };
221
+ }
222
+ const firstProvider = this.providers.values().next().value;
223
+ const firstProfile = this.profiles.values().next().value;
224
+ return { provider: firstProvider, profile: firstProfile };
225
+ }
226
+ getProviderName(provider) {
227
+ for (const [name, p] of this.providers) {
228
+ if (p === provider)
229
+ return name;
230
+ }
231
+ return "unknown";
232
+ }
233
+ recordSuccess(profile, latency, cost) {
234
+ const metrics = this.metrics.get(profile.provider);
235
+ if (!metrics)
236
+ return;
237
+ metrics.totalCalls++;
238
+ metrics.successRate = (metrics.successRate * (metrics.totalCalls - 1) + 1) / metrics.totalCalls;
239
+ metrics.avgLatencyMs = (metrics.avgLatencyMs * (metrics.totalCalls - 1) + latency) / metrics.totalCalls;
240
+ metrics.totalCostUsd += cost;
241
+ this.selectionHistory.push({
242
+ timestamp: Date.now(),
243
+ provider: profile.provider,
244
+ model: profile.model,
245
+ success: true,
246
+ latency,
247
+ cost
248
+ });
249
+ }
250
+ recordFailure(profile, error) {
251
+ const metrics = this.metrics.get(profile.provider);
252
+ if (!metrics)
253
+ return;
254
+ metrics.totalCalls++;
255
+ metrics.successRate = (metrics.successRate * (metrics.totalCalls - 1)) / metrics.totalCalls;
256
+ metrics.lastError = error instanceof Error ? error.message : String(error);
257
+ metrics.lastErrorTime = Date.now();
258
+ this.selectionHistory.push({
259
+ timestamp: Date.now(),
260
+ provider: profile.provider,
261
+ model: profile.model,
262
+ success: false,
263
+ latency: 0,
264
+ cost: 0
265
+ });
266
+ }
267
+ getMetrics() {
268
+ return Object.fromEntries(this.metrics);
269
+ }
270
+ getSelectionHistory(limit = 100) {
271
+ return this.selectionHistory.slice(-limit);
272
+ }
273
+ getProfiles() {
274
+ return Array.from(this.profiles.values());
275
+ }
276
+ addProfile(profile) {
277
+ const key = `${profile.provider}:${profile.model}`;
278
+ this.profiles.set(key, profile);
279
+ }
280
+ getIntentTier(intent) {
281
+ return INTENT_TIER_MAP[intent] ?? "standard";
282
+ }
283
+ recommendForIntent(intent) {
284
+ const tier = this.getIntentTier(intent);
285
+ return Array.from(this.profiles.values())
286
+ .filter(p => p.tier === tier)
287
+ .sort((a, b) => b.qualityScore - a.qualityScore);
288
+ }
289
+ }
290
+ export async function smartFallbackCall(providers, prompt, options = {}) {
291
+ const { maxRetriesPerProvider = 1, retryDelayMs = 500, callOptions, onFallback } = options;
292
+ const errors = [];
293
+ let lastProvider = "";
294
+ for (const provider of providers) {
295
+ const providerName = provider.constructor?.name ?? "unknown";
296
+ const healthy = await provider.healthCheck().catch(() => false);
297
+ if (!healthy) {
298
+ if (onFallback && lastProvider) {
299
+ onFallback(lastProvider, providerName, "health_check_failed");
300
+ }
301
+ continue;
302
+ }
303
+ let attempt = 0;
304
+ while (attempt <= maxRetriesPerProvider) {
305
+ try {
306
+ return await provider.callModel(prompt, callOptions);
307
+ }
308
+ catch (err) {
309
+ const errorMsg = err instanceof Error ? err.message : String(err);
310
+ errors.push(`${providerName}: ${errorMsg}`);
311
+ if (err instanceof RaxProviderError && err.isRetryable && attempt < maxRetriesPerProvider) {
312
+ await new Promise(r => setTimeout(r, retryDelayMs * (attempt + 1)));
313
+ attempt++;
314
+ continue;
315
+ }
316
+ if (onFallback) {
317
+ const nextProvider = providers[providers.indexOf(provider) + 1];
318
+ if (nextProvider) {
319
+ const nextName = nextProvider.constructor?.name ?? "unknown";
320
+ onFallback(providerName, nextName, errorMsg);
321
+ }
322
+ }
323
+ break;
324
+ }
325
+ }
326
+ lastProvider = providerName;
327
+ }
328
+ throw new Error(`smart_fallback_exhausted: ${errors.join("; ")}`);
329
+ }
330
+ //# sourceMappingURL=model-strategy.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"model-strategy.js","sourceRoot":"","sources":["../src/model-strategy.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAoDrD,MAAM,aAAa,GAAmB;IACpC,gBAAgB;IAChB,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,aAAa,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE;IACjS,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE;IACjS,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,CAAC,EAAE;IACvS,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,CAAC,EAAE;IACxS,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,EAAE,iBAAiB,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE;IAC9R,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE;IAEnS,gBAAgB;IAChB,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,yBAAyB,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE;IAC3S,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,0BAA0B,EAAE,IAAI,EAAE,UAAU,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,CAAC,EAAE;IACzT,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,sBAAsB,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,CAAC,EAAE;IAEjU,gBAAgB;IAChB,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,kBAAkB,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,OAAO,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE;IACxS,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,OAAO,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,YAAY,CAAC,EAAE;IAEnT,+BAA+B;IAC/B,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,yBAAyB,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;IAChS,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,sBAAsB,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,CAAC,EAAE;IAErR,cAAc;IACd,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,UAAU,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,kBAAkB,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE;CACzR,CAAC;AAEF,MAAM,eAAe,GAAuC;IAC1D,UAAU,EAAE,SAAS;IACrB,IAAI,EAAE,UAAU;IAChB,YAAY,EAAE,SAAS;IACvB,IAAI,EAAE,UAAU;IAChB,aAAa,EAAE,UAAU;IACzB,IAAI,EAAE,SAAS;IACf,GAAG,EAAE,UAAU;IACf,QAAQ,EAAE,SAAS;IACnB,QAAQ,EAAE,SAAS;IACnB,SAAS,EAAE,SAAS;CACrB,CAAC;AAEF,MAAM,OAAO,mBAAmB;IACtB,QAAQ,GAA8B,IAAI,GAAG,EAAE,CAAC;IAChD,SAAS,GAAgC,IAAI,GAAG,EAAE,CAAC;IACnD,OAAO,GAAiC,IAAI,GAAG,EAAE,CAAC;IAClD,gBAAgB,GAAmH,EAAE,CAAC;IAE9I,YACE,iBAAiD,EACjD,cAA+B;QAE/B,MAAM,OAAO,GAAG,cAAc,IAAI,aAAa,CAAC;QAChD,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACnD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAClC,CAAC;QAED,KAAK,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,iBAAiB,CAAC,EAAE,CAAC;YACjE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YACnC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE;gBACrB,QAAQ,EAAE,IAAI;gBACd,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC;gBACd,YAAY,EAAE,CAAC;gBACf,YAAY,EAAE,CAAC;aAChB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,CAAC,OAAyB;QAC9B,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAE/C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAC5C,OAAO;gBACL,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,OAAO,EAAE,QAAQ,CAAC,OAAO;gBACzB,aAAa,EAAE,EAAE;gBACjB,aAAa,EAAE,CAAC;gBAChB,gBAAgB,EAAE,IAAI;gBACtB,SAAS,EAAE,yCAAyC;aACrD,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAClC,GAAG,CAAC;YACJ,KAAK,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC,EAAE,OAAO,CAAC;SACvC,CAAC,CAAC,CAAC;QAEJ,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAEzC,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,aAAa,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAEzC,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QACvF,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC;QAE5E,OAAO;YACL,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,aAAa,EAAE,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;YAChD,aAAa;YACb,gBAAgB,EAAE,QAAQ,CAAC,OAAO,CAAC,YAAY;YAC/C,SAAS,EAAE,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,OAAO,CAAC;SACrD,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,gBAAgB,CACpB,OAAyB,EACzB,MAAc,EACd,MAAe,EACf,OAA6B;QAE7B,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,iBAAiB,GAAG,SAAS,CAAC,aAAa;aAC9C,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;aACxC,MAAM,CAAC,CAAC,CAAC,EAAuB,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC;QAEvD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,SAAkB,CAAC;QAEvB,cAAc;QACd,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM;gBACnB,CAAC,CAAC,MAAM,SAAS,CAAC,QAAQ,CAAC,cAAc,CAAI,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;gBACrE,CAAC,CAAC,MAAM,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,MAAM,EAAE,OAAO,CAAqB,CAAC;YAE5E,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,EAAE,MAAM,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC;YACnF,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,CAAC;YAChB,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAC7C,CAAC;QAED,gBAAgB;QAChB,KAAK,MAAM,gBAAgB,IAAI,iBAAiB,EAAE,CAAC;YACjD,MAAM,eAAe,GAAG,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC,CAAC,CAAC;YACjH,IAAI,CAAC,eAAe;gBAAE,SAAS;YAE/B,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM;oBACnB,CAAC,CAAC,MAAM,gBAAgB,CAAC,cAAc,CAAI,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;oBACnE,CAAC,CAAC,MAAM,gBAAgB,CAAC,SAAS,CAAC,MAAM,EAAE,OAAO,CAAqB,CAAC;gBAE1E,IAAI,CAAC,aAAa,CAAC,eAAe,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,EAAE,MAAM,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC;gBACjF,OAAO,MAAM,CAAC;YAChB,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,SAAS,GAAG,GAAG,CAAC;gBAChB,IAAI,eAAe,EAAE,CAAC;oBACpB,IAAI,CAAC,aAAa,CAAC,eAAe,EAAE,GAAG,CAAC,CAAC;gBAC3C,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,qCAAqC,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IAC7H,CAAC;IAEO,aAAa,CAAC,OAAyB;QAC7C,MAAM,UAAU,GAA+D,EAAE,CAAC;QAElF,KAAK,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC3C,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE,OAAO,CAAC;gBAAE,SAAS;YAExD,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACtD,IAAI,CAAC,QAAQ;gBAAE,SAAS;YAExB,UAAU,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAEO,iBAAiB,CAAC,OAAqB,EAAE,OAAyB;QACxE,IAAI,OAAO,CAAC,kBAAkB,IAAI,CAAC,OAAO,CAAC,kBAAkB;YAAE,OAAO,KAAK,CAAC;QAC5E,IAAI,OAAO,CAAC,cAAc,IAAI,CAAC,OAAO,CAAC,cAAc;YAAE,OAAO,KAAK,CAAC;QACpE,IAAI,OAAO,CAAC,iBAAiB,IAAI,CAAC,OAAO,CAAC,iBAAiB;YAAE,OAAO,KAAK,CAAC;QAE1E,IAAI,OAAO,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;YACrC,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YACvF,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC;YACnE,IAAI,aAAa,GAAG,OAAO,CAAC,UAAU;gBAAE,OAAO,KAAK,CAAC;QACvD,CAAC;QAED,IAAI,OAAO,CAAC,YAAY,KAAK,SAAS,IAAI,OAAO,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY;YAAE,OAAO,KAAK,CAAC;QAEpG,IAAI,OAAO,CAAC,aAAa,IAAI,OAAO,CAAC,IAAI,KAAK,OAAO,CAAC,aAAa,EAAE,CAAC;YACpE,MAAM,YAAY,GAAgB,CAAC,SAAS,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;YAClF,MAAM,YAAY,GAAG,YAAY,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;YACjE,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YACtD,IAAI,UAAU,GAAG,YAAY,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;QAClD,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,cAAc,CAAC,SAAoC,EAAE,OAAyB;QACpF,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,UAAU,CAAC;QAChD,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC7D,MAAM,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,CAAC,CAAC;QAE9C,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,MAAM;gBACT,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;gBACrD,MAAM;YACR,KAAK,SAAS;gBACZ,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;gBAC/C,MAAM;YACR,KAAK,SAAS;gBACZ,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,YAAY,GAAG,GAAG,CAAC;gBAC7C,MAAM;YACR,KAAK,UAAU,CAAC;YAChB;gBACE,KAAK,GAAG,CACN,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,OAAO,EAAE,OAAO,CAAC,GAAG,GAAG;oBAClD,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,GAAG;oBAC5C,SAAS,CAAC,OAAO,CAAC,YAAY,GAAG,EAAE;oBACnC,WAAW,GAAG,EAAE,CACjB,CAAC;QACN,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YACnB,MAAM,aAAa,GAAG,eAAe,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YACtD,IAAI,aAAa,IAAI,SAAS,CAAC,OAAO,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAC9D,KAAK,IAAI,EAAE,CAAC;YACd,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,WAAW,CAAC,OAAqB,EAAE,OAAyB;QAClE,MAAM,MAAM,GAAG,OAAO,CAAC,eAAe,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC9E,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QACjD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC;IACxC,CAAC;IAEO,cAAc,CAAC,OAAqB;QAC1C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,OAAO,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;IACtD,CAAC;IAEO,aAAa,CAAC,OAAqB,EAAE,MAAc;QACzD,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;QAC5C,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;QAC7C,OAAO,CAAC,WAAW,GAAG,OAAO,CAAC,eAAe,CAAC,KAAK,GAAG,YAAY,GAAG,OAAO,CAAC,eAAe,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;IAC9G,CAAC;IAEO,cAAc,CAAC,MAAc;QACnC,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;IACrD,CAAC;IAEO,iBAAiB,CAAC,QAAkD,EAAE,OAAyB;QACrG,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,UAAU,CAAC;QAChD,OAAO,YAAY,QAAQ,CAAC,OAAO,CAAC,QAAQ,IAAI,QAAQ,CAAC,OAAO,CAAC,KAAK,WAAW,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI;YACnG,YAAY,QAAQ,wBAAwB,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;YACzE,iBAAiB,QAAQ,CAAC,OAAO,CAAC,YAAY,gBAAgB,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IAC3H,CAAC;IAEO,mBAAmB;QACzB,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAChD,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC;QAExF,IAAI,YAAY,IAAI,WAAW,EAAE,CAAC;YAChC,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;QAC1D,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,KAAM,CAAC;QAC5D,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,KAAM,CAAC;QAC1D,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;IAC5D,CAAC;IAEO,eAAe,CAAC,QAAwB;QAC9C,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACvC,IAAI,CAAC,KAAK,QAAQ;gBAAE,OAAO,IAAI,CAAC;QAClC,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,aAAa,CAAC,OAAqB,EAAE,OAAe,EAAE,IAAY;QACxE,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,OAAO;YAAE,OAAO;QAErB,OAAO,CAAC,UAAU,EAAE,CAAC;QACrB,OAAO,CAAC,WAAW,GAAG,CAAC,OAAO,CAAC,WAAW,GAAG,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC;QAChG,OAAO,CAAC,YAAY,GAAG,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC;QACxG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC;QAE7B,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC;YACzB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,OAAO,EAAE,IAAI;YACb,OAAO;YACP,IAAI;SACL,CAAC,CAAC;IACL,CAAC;IAEO,aAAa,CAAC,OAAqB,EAAE,KAAc;QACzD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,OAAO;YAAE,OAAO;QAErB,OAAO,CAAC,UAAU,EAAE,CAAC;QACrB,OAAO,CAAC,WAAW,GAAG,CAAC,OAAO,CAAC,WAAW,GAAG,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC;QAC5F,OAAO,CAAC,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC3E,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEnC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC;YACzB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,CAAC;YACV,IAAI,EAAE,CAAC;SACR,CAAC,CAAC;IACL,CAAC;IAED,UAAU;QACR,OAAO,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC;IAED,mBAAmB,CAAC,QAAgB,GAAG;QACrC,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC;IAC7C,CAAC;IAED,WAAW;QACT,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,UAAU,CAAC,OAAqB;QAC9B,MAAM,GAAG,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACnD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAClC,CAAC;IAED,aAAa,CAAC,MAAc;QAC1B,OAAO,eAAe,CAAC,MAAM,CAAC,IAAI,UAAU,CAAC;IAC/C,CAAC;IAED,kBAAkB,CAAC,MAAc;QAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;QACxC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;aACtC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC;aAC5B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC;IACrD,CAAC;CACF;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,SAA2B,EAC3B,MAAc,EACd,UAKI,EAAE;IAEN,MAAM,EAAE,qBAAqB,GAAG,CAAC,EAAE,YAAY,GAAG,GAAG,EAAE,WAAW,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC;IAC3F,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,YAAY,GAAI,QAAgB,CAAC,WAAW,EAAE,IAAI,IAAI,SAAS,CAAC;QACtE,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;QAChE,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,IAAI,UAAU,IAAI,YAAY,EAAE,CAAC;gBAC/B,UAAU,CAAC,YAAY,EAAE,YAAY,EAAE,qBAAqB,CAAC,CAAC;YAChE,CAAC;YACD,SAAS;QACX,CAAC;QAED,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,OAAO,OAAO,IAAI,qBAAqB,EAAE,CAAC;YACxC,IAAI,CAAC;gBACH,OAAO,MAAM,QAAQ,CAAC,SAAS,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;YACvD,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,QAAQ,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBAClE,MAAM,CAAC,IAAI,CAAC,GAAG,YAAY,KAAK,QAAQ,EAAE,CAAC,CAAC;gBAE5C,IAAI,GAAG,YAAY,gBAAgB,IAAI,GAAG,CAAC,WAAW,IAAI,OAAO,GAAG,qBAAqB,EAAE,CAAC;oBAC1F,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,YAAY,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;oBACpE,OAAO,EAAE,CAAC;oBACV,SAAS;gBACX,CAAC;gBAED,IAAI,UAAU,EAAE,CAAC;oBACf,MAAM,YAAY,GAAG,SAAS,CAAC,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;oBAChE,IAAI,YAAY,EAAE,CAAC;wBACjB,MAAM,QAAQ,GAAI,YAAoB,CAAC,WAAW,EAAE,IAAI,IAAI,SAAS,CAAC;wBACtE,UAAU,CAAC,YAAY,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;oBAC/C,CAAC;gBACH,CAAC;gBACD,MAAM;YACR,CAAC;QACH,CAAC;QACD,YAAY,GAAG,YAAY,CAAC;IAC9B,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,6BAA6B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACpE,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rax-flow-providers",
3
- "version": "0.2.1",
3
+ "version": "2.0.1",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/index.ts CHANGED
@@ -8,4 +8,5 @@ export * from "./cohere-adapter.js";
8
8
  export * from "./rest-adapter.js";
9
9
  export * from "./host-bridge-adapter.js";
10
10
  export * from "./strategy.js";
11
+ export * from "./model-strategy.js";
11
12
  export * from "./utils.js";
@@ -0,0 +1,450 @@
1
+ import { IModelProvider, ModelResponse, ProviderCallOptions, Intent } from "rax-flow-core";
2
+ import { RaxProviderError } from "./error-mapper.js";
3
+
4
+ export type ModelTier = "economy" | "standard" | "premium" | "reasoning";
5
+ export type SelectionCriteria = "cost" | "latency" | "quality" | "balanced";
6
+
7
+ export interface ModelProfile {
8
+ provider: string;
9
+ model: string;
10
+ tier: ModelTier;
11
+ costPer1kTokens: { input: number; output: number };
12
+ avgLatencyMs: number;
13
+ maxTokens: number;
14
+ supportsStructured: boolean;
15
+ supportsVision: boolean;
16
+ supportsStreaming: boolean;
17
+ qualityScore: number;
18
+ capabilities: string[];
19
+ }
20
+
21
+ export interface SelectionContext {
22
+ prompt: string;
23
+ intent?: Intent;
24
+ estimatedTokens?: number;
25
+ maxCostUsd?: number;
26
+ maxLatencyMs?: number;
27
+ requiresStructured?: boolean;
28
+ requiresVision?: boolean;
29
+ requiresStreaming?: boolean;
30
+ preferredTier?: ModelTier;
31
+ criteria?: SelectionCriteria;
32
+ fallbackChain?: string[];
33
+ }
34
+
35
+ export interface SelectionResult {
36
+ provider: IModelProvider;
37
+ profile: ModelProfile;
38
+ fallbackChain: ModelProfile[];
39
+ estimatedCost: number;
40
+ estimatedLatency: number;
41
+ reasoning: string;
42
+ }
43
+
44
+ export interface ProviderMetrics {
45
+ provider: string;
46
+ totalCalls: number;
47
+ successRate: number;
48
+ avgLatencyMs: number;
49
+ totalCostUsd: number;
50
+ lastError?: string;
51
+ lastErrorTime?: number;
52
+ }
53
+
54
+ const MODEL_CATALOG: ModelProfile[] = [
55
+ // OpenAI models
56
+ { provider: "openai", model: "gpt-4o-mini", tier: "economy", costPer1kTokens: { input: 0.00015, output: 0.0006 }, avgLatencyMs: 400, maxTokens: 128000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.75, capabilities: ["chat", "code", "analysis"] },
57
+ { provider: "openai", model: "gpt-4.1-mini", tier: "economy", costPer1kTokens: { input: 0.0004, output: 0.0016 }, avgLatencyMs: 450, maxTokens: 128000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.78, capabilities: ["chat", "code", "analysis"] },
58
+ { provider: "openai", model: "gpt-4o", tier: "standard", costPer1kTokens: { input: 0.0025, output: 0.01 }, avgLatencyMs: 600, maxTokens: 128000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.88, capabilities: ["chat", "code", "analysis", "reasoning"] },
59
+ { provider: "openai", model: "gpt-4.1", tier: "standard", costPer1kTokens: { input: 0.002, output: 0.008 }, avgLatencyMs: 550, maxTokens: 128000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.90, capabilities: ["chat", "code", "analysis", "reasoning"] },
60
+ { provider: "openai", model: "o1", tier: "reasoning", costPer1kTokens: { input: 0.015, output: 0.06 }, avgLatencyMs: 5000, maxTokens: 200000, supportsStructured: false, supportsVision: false, supportsStreaming: false, qualityScore: 0.95, capabilities: ["reasoning", "math", "science"] },
61
+ { provider: "openai", model: "o3-mini", tier: "premium", costPer1kTokens: { input: 0.0011, output: 0.0044 }, avgLatencyMs: 2000, maxTokens: 200000, supportsStructured: true, supportsVision: false, supportsStreaming: true, qualityScore: 0.92, capabilities: ["reasoning", "code", "analysis"] },
62
+
63
+ // Claude models
64
+ { provider: "claude", model: "claude-3-5-haiku-latest", tier: "economy", costPer1kTokens: { input: 0.0008, output: 0.004 }, avgLatencyMs: 300, maxTokens: 200000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.80, capabilities: ["chat", "code", "analysis"] },
65
+ { provider: "claude", model: "claude-3-5-sonnet-latest", tier: "standard", costPer1kTokens: { input: 0.003, output: 0.015 }, avgLatencyMs: 500, maxTokens: 200000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.91, capabilities: ["chat", "code", "analysis", "reasoning"] },
66
+ { provider: "claude", model: "claude-3-opus-latest", tier: "premium", costPer1kTokens: { input: 0.015, output: 0.075 }, avgLatencyMs: 1200, maxTokens: 200000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.94, capabilities: ["chat", "code", "analysis", "reasoning", "creative"] },
67
+
68
+ // Gemini models
69
+ { provider: "gemini", model: "gemini-2.0-flash", tier: "economy", costPer1kTokens: { input: 0.0001, output: 0.0004 }, avgLatencyMs: 250, maxTokens: 1000000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.72, capabilities: ["chat", "code", "multimodal"] },
70
+ { provider: "gemini", model: "gemini-1.5-pro", tier: "standard", costPer1kTokens: { input: 0.00125, output: 0.005 }, avgLatencyMs: 600, maxTokens: 2000000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.85, capabilities: ["chat", "code", "analysis", "multimodal"] },
71
+
72
+ // Groq models (fast inference)
73
+ { provider: "groq", model: "llama-3.3-70b-versatile", tier: "economy", costPer1kTokens: { input: 0.00059, output: 0.00079 }, avgLatencyMs: 80, maxTokens: 128000, supportsStructured: true, supportsVision: false, supportsStreaming: true, qualityScore: 0.78, capabilities: ["chat", "code"] },
74
+ { provider: "groq", model: "llama-3.1-8b-instant", tier: "economy", costPer1kTokens: { input: 0.00002, output: 0.00002 }, avgLatencyMs: 40, maxTokens: 128000, supportsStructured: true, supportsVision: false, supportsStreaming: true, qualityScore: 0.65, capabilities: ["chat"] },
75
+
76
+ // Host bridge
77
+ { provider: "host", model: "host-managed", tier: "standard", costPer1kTokens: { input: 0, output: 0 }, avgLatencyMs: 1000, maxTokens: 1000000, supportsStructured: true, supportsVision: true, supportsStreaming: true, qualityScore: 0.85, capabilities: ["chat", "code", "analysis"] }
78
+ ];
79
+
80
+ const INTENT_TIER_MAP: Partial<Record<Intent, ModelTier>> = {
81
+ brainstorm: "economy",
82
+ spec: "standard",
83
+ architecture: "premium",
84
+ plan: "standard",
85
+ generate_code: "standard",
86
+ test: "economy",
87
+ fix: "standard",
88
+ optimize: "premium",
89
+ document: "economy",
90
+ benchmark: "economy"
91
+ };
92
+
93
+ export class ModelStrategyEngine {
94
+ private profiles: Map<string, ModelProfile> = new Map();
95
+ private providers: Map<string, IModelProvider> = new Map();
96
+ private metrics: Map<string, ProviderMetrics> = new Map();
97
+ private selectionHistory: Array<{ timestamp: number; provider: string; model: string; success: boolean; latency: number; cost: number }> = [];
98
+
99
+ constructor(
100
+ providerInstances: Record<string, IModelProvider>,
101
+ customProfiles?: ModelProfile[]
102
+ ) {
103
+ const catalog = customProfiles ?? MODEL_CATALOG;
104
+ for (const profile of catalog) {
105
+ const key = `${profile.provider}:${profile.model}`;
106
+ this.profiles.set(key, profile);
107
+ }
108
+
109
+ for (const [name, provider] of Object.entries(providerInstances)) {
110
+ this.providers.set(name, provider);
111
+ this.metrics.set(name, {
112
+ provider: name,
113
+ totalCalls: 0,
114
+ successRate: 1,
115
+ avgLatencyMs: 0,
116
+ totalCostUsd: 0
117
+ });
118
+ }
119
+ }
120
+
121
+ select(context: SelectionContext): SelectionResult {
122
+ const candidates = this.getCandidates(context);
123
+
124
+ if (candidates.length === 0) {
125
+ const fallback = this.getFallbackProvider();
126
+ return {
127
+ provider: fallback.provider,
128
+ profile: fallback.profile,
129
+ fallbackChain: [],
130
+ estimatedCost: 0,
131
+ estimatedLatency: 1000,
132
+ reasoning: "No suitable model found, using fallback"
133
+ };
134
+ }
135
+
136
+ const scored = candidates.map(c => ({
137
+ ...c,
138
+ score: this.scoreCandidate(c, context)
139
+ }));
140
+
141
+ scored.sort((a, b) => b.score - a.score);
142
+
143
+ const selected = scored[0];
144
+ const fallbackChain = scored.slice(1, 4);
145
+
146
+ const estimatedTokens = context.estimatedTokens ?? this.estimateTokens(context.prompt);
147
+ const estimatedCost = this.calculateCost(selected.profile, estimatedTokens);
148
+
149
+ return {
150
+ provider: selected.provider,
151
+ profile: selected.profile,
152
+ fallbackChain: fallbackChain.map(f => f.profile),
153
+ estimatedCost,
154
+ estimatedLatency: selected.profile.avgLatencyMs,
155
+ reasoning: this.generateReasoning(selected, context)
156
+ };
157
+ }
158
+
159
+ async callWithStrategy<T>(
160
+ context: SelectionContext,
161
+ prompt: string,
162
+ schema?: object,
163
+ options?: ProviderCallOptions
164
+ ): Promise<ModelResponse<T>> {
165
+ const selection = this.select(context);
166
+ const fallbackProviders = selection.fallbackChain
167
+ .map(p => this.providers.get(p.provider))
168
+ .filter((p): p is IModelProvider => p !== undefined);
169
+
170
+ const startTime = Date.now();
171
+ let lastError: unknown;
172
+
173
+ // Try primary
174
+ try {
175
+ const result = schema
176
+ ? await selection.provider.callStructured<T>(prompt, schema, options)
177
+ : await selection.provider.callModel(prompt, options) as ModelResponse<T>;
178
+
179
+ this.recordSuccess(selection.profile, Date.now() - startTime, result.costUsd ?? 0);
180
+ return result;
181
+ } catch (err) {
182
+ lastError = err;
183
+ this.recordFailure(selection.profile, err);
184
+ }
185
+
186
+ // Try fallbacks
187
+ for (const fallbackProvider of fallbackProviders) {
188
+ const fallbackProfile = selection.fallbackChain.find(p => p.provider === this.getProviderName(fallbackProvider));
189
+ if (!fallbackProfile) continue;
190
+
191
+ try {
192
+ const result = schema
193
+ ? await fallbackProvider.callStructured<T>(prompt, schema, options)
194
+ : await fallbackProvider.callModel(prompt, options) as ModelResponse<T>;
195
+
196
+ this.recordSuccess(fallbackProfile, Date.now() - startTime, result.costUsd ?? 0);
197
+ return result;
198
+ } catch (err) {
199
+ lastError = err;
200
+ if (fallbackProfile) {
201
+ this.recordFailure(fallbackProfile, err);
202
+ }
203
+ }
204
+ }
205
+
206
+ throw new Error(`All providers failed. Last error: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
207
+ }
208
+
209
+ private getCandidates(context: SelectionContext): Array<{ provider: IModelProvider; profile: ModelProfile }> {
210
+ const candidates: Array<{ provider: IModelProvider; profile: ModelProfile }> = [];
211
+
212
+ for (const [key, profile] of this.profiles) {
213
+ if (!this.meetsRequirements(profile, context)) continue;
214
+
215
+ const provider = this.providers.get(profile.provider);
216
+ if (!provider) continue;
217
+
218
+ candidates.push({ provider, profile });
219
+ }
220
+
221
+ return candidates;
222
+ }
223
+
224
+ private meetsRequirements(profile: ModelProfile, context: SelectionContext): boolean {
225
+ if (context.requiresStructured && !profile.supportsStructured) return false;
226
+ if (context.requiresVision && !profile.supportsVision) return false;
227
+ if (context.requiresStreaming && !profile.supportsStreaming) return false;
228
+
229
+ if (context.maxCostUsd !== undefined) {
230
+ const estimatedTokens = context.estimatedTokens ?? this.estimateTokens(context.prompt);
231
+ const estimatedCost = this.calculateCost(profile, estimatedTokens);
232
+ if (estimatedCost > context.maxCostUsd) return false;
233
+ }
234
+
235
+ if (context.maxLatencyMs !== undefined && profile.avgLatencyMs > context.maxLatencyMs) return false;
236
+
237
+ if (context.preferredTier && profile.tier !== context.preferredTier) {
238
+ const tierPriority: ModelTier[] = ["economy", "standard", "premium", "reasoning"];
239
+ const preferredIdx = tierPriority.indexOf(context.preferredTier);
240
+ const profileIdx = tierPriority.indexOf(profile.tier);
241
+ if (profileIdx > preferredIdx + 1) return false;
242
+ }
243
+
244
+ return true;
245
+ }
246
+
247
+ private scoreCandidate(candidate: { profile: ModelProfile }, context: SelectionContext): number {
248
+ const criteria = context.criteria ?? "balanced";
249
+ let score = 0;
250
+
251
+ const metrics = this.metrics.get(candidate.profile.provider);
252
+ const successRate = metrics?.successRate ?? 1;
253
+
254
+ switch (criteria) {
255
+ case "cost":
256
+ score = this.scoreByCost(candidate.profile, context);
257
+ break;
258
+ case "latency":
259
+ score = this.scoreByLatency(candidate.profile);
260
+ break;
261
+ case "quality":
262
+ score = candidate.profile.qualityScore * 100;
263
+ break;
264
+ case "balanced":
265
+ default:
266
+ score = (
267
+ this.scoreByCost(candidate.profile, context) * 0.3 +
268
+ this.scoreByLatency(candidate.profile) * 0.3 +
269
+ candidate.profile.qualityScore * 30 +
270
+ successRate * 10
271
+ );
272
+ }
273
+
274
+ if (context.intent) {
275
+ const preferredTier = INTENT_TIER_MAP[context.intent];
276
+ if (preferredTier && candidate.profile.tier === preferredTier) {
277
+ score += 10;
278
+ }
279
+ }
280
+
281
+ return score;
282
+ }
283
+
284
+ private scoreByCost(profile: ModelProfile, context: SelectionContext): number {
285
+ const tokens = context.estimatedTokens ?? this.estimateTokens(context.prompt);
286
+ const cost = this.calculateCost(profile, tokens);
287
+ return Math.max(0, 100 - cost * 1000);
288
+ }
289
+
290
+ private scoreByLatency(profile: ModelProfile): number {
291
+ return Math.max(0, 100 - profile.avgLatencyMs / 50);
292
+ }
293
+
294
+ private calculateCost(profile: ModelProfile, tokens: number): number {
295
+ const inputTokens = Math.ceil(tokens * 0.7);
296
+ const outputTokens = Math.ceil(tokens * 0.3);
297
+ return (inputTokens * profile.costPer1kTokens.input + outputTokens * profile.costPer1kTokens.output) / 1000;
298
+ }
299
+
300
+ private estimateTokens(prompt: string): number {
301
+ return Math.ceil(prompt.split(/\s+/).length * 1.3);
302
+ }
303
+
304
+ private generateReasoning(selected: { profile: ModelProfile; score: number }, context: SelectionContext): string {
305
+ const criteria = context.criteria ?? "balanced";
306
+ return `Selected ${selected.profile.provider}/${selected.profile.model} (tier: ${selected.profile.tier}) ` +
307
+ `based on ${criteria} criteria with score ${selected.score.toFixed(1)}. ` +
308
+ `Est. latency: ${selected.profile.avgLatencyMs}ms, quality: ${(selected.profile.qualityScore * 100).toFixed(0)}%`;
309
+ }
310
+
311
+ private getFallbackProvider(): { provider: IModelProvider; profile: ModelProfile } {
312
+ const hostProvider = this.providers.get("host");
313
+ const hostProfile = Array.from(this.profiles.values()).find(p => p.provider === "host");
314
+
315
+ if (hostProvider && hostProfile) {
316
+ return { provider: hostProvider, profile: hostProfile };
317
+ }
318
+
319
+ const firstProvider = this.providers.values().next().value!;
320
+ const firstProfile = this.profiles.values().next().value!;
321
+ return { provider: firstProvider, profile: firstProfile };
322
+ }
323
+
324
+ private getProviderName(provider: IModelProvider): string {
325
+ for (const [name, p] of this.providers) {
326
+ if (p === provider) return name;
327
+ }
328
+ return "unknown";
329
+ }
330
+
331
+ private recordSuccess(profile: ModelProfile, latency: number, cost: number): void {
332
+ const metrics = this.metrics.get(profile.provider);
333
+ if (!metrics) return;
334
+
335
+ metrics.totalCalls++;
336
+ metrics.successRate = (metrics.successRate * (metrics.totalCalls - 1) + 1) / metrics.totalCalls;
337
+ metrics.avgLatencyMs = (metrics.avgLatencyMs * (metrics.totalCalls - 1) + latency) / metrics.totalCalls;
338
+ metrics.totalCostUsd += cost;
339
+
340
+ this.selectionHistory.push({
341
+ timestamp: Date.now(),
342
+ provider: profile.provider,
343
+ model: profile.model,
344
+ success: true,
345
+ latency,
346
+ cost
347
+ });
348
+ }
349
+
350
+ private recordFailure(profile: ModelProfile, error: unknown): void {
351
+ const metrics = this.metrics.get(profile.provider);
352
+ if (!metrics) return;
353
+
354
+ metrics.totalCalls++;
355
+ metrics.successRate = (metrics.successRate * (metrics.totalCalls - 1)) / metrics.totalCalls;
356
+ metrics.lastError = error instanceof Error ? error.message : String(error);
357
+ metrics.lastErrorTime = Date.now();
358
+
359
+ this.selectionHistory.push({
360
+ timestamp: Date.now(),
361
+ provider: profile.provider,
362
+ model: profile.model,
363
+ success: false,
364
+ latency: 0,
365
+ cost: 0
366
+ });
367
+ }
368
+
369
+ getMetrics(): Record<string, ProviderMetrics> {
370
+ return Object.fromEntries(this.metrics);
371
+ }
372
+
373
+ getSelectionHistory(limit: number = 100): typeof this.selectionHistory {
374
+ return this.selectionHistory.slice(-limit);
375
+ }
376
+
377
+ getProfiles(): ModelProfile[] {
378
+ return Array.from(this.profiles.values());
379
+ }
380
+
381
+ addProfile(profile: ModelProfile): void {
382
+ const key = `${profile.provider}:${profile.model}`;
383
+ this.profiles.set(key, profile);
384
+ }
385
+
386
+ getIntentTier(intent: Intent): ModelTier {
387
+ return INTENT_TIER_MAP[intent] ?? "standard";
388
+ }
389
+
390
+ recommendForIntent(intent: Intent): ModelProfile[] {
391
+ const tier = this.getIntentTier(intent);
392
+ return Array.from(this.profiles.values())
393
+ .filter(p => p.tier === tier)
394
+ .sort((a, b) => b.qualityScore - a.qualityScore);
395
+ }
396
+ }
397
+
398
+ export async function smartFallbackCall(
399
+ providers: IModelProvider[],
400
+ prompt: string,
401
+ options: {
402
+ maxRetriesPerProvider?: number;
403
+ retryDelayMs?: number;
404
+ callOptions?: ProviderCallOptions;
405
+ onFallback?: (from: string, to: string, reason: string) => void;
406
+ } = {}
407
+ ): Promise<ModelResponse<string>> {
408
+ const { maxRetriesPerProvider = 1, retryDelayMs = 500, callOptions, onFallback } = options;
409
+ const errors: string[] = [];
410
+ let lastProvider = "";
411
+
412
+ for (const provider of providers) {
413
+ const providerName = (provider as any).constructor?.name ?? "unknown";
414
+ const healthy = await provider.healthCheck().catch(() => false);
415
+ if (!healthy) {
416
+ if (onFallback && lastProvider) {
417
+ onFallback(lastProvider, providerName, "health_check_failed");
418
+ }
419
+ continue;
420
+ }
421
+
422
+ let attempt = 0;
423
+ while (attempt <= maxRetriesPerProvider) {
424
+ try {
425
+ return await provider.callModel(prompt, callOptions);
426
+ } catch (err) {
427
+ const errorMsg = err instanceof Error ? err.message : String(err);
428
+ errors.push(`${providerName}: ${errorMsg}`);
429
+
430
+ if (err instanceof RaxProviderError && err.isRetryable && attempt < maxRetriesPerProvider) {
431
+ await new Promise(r => setTimeout(r, retryDelayMs * (attempt + 1)));
432
+ attempt++;
433
+ continue;
434
+ }
435
+
436
+ if (onFallback) {
437
+ const nextProvider = providers[providers.indexOf(provider) + 1];
438
+ if (nextProvider) {
439
+ const nextName = (nextProvider as any).constructor?.name ?? "unknown";
440
+ onFallback(providerName, nextName, errorMsg);
441
+ }
442
+ }
443
+ break;
444
+ }
445
+ }
446
+ lastProvider = providerName;
447
+ }
448
+
449
+ throw new Error(`smart_fallback_exhausted: ${errors.join("; ")}`);
450
+ }