provider-kit 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -54,26 +54,35 @@ const provider = providerRegistry.getProvider('openai')
54
54
  const reply = await provider.chat('gpt-4o-mini', [{ role: 'user', content: 'Hi' }])
55
55
  ```
56
56
 
57
- ### Model fallback (like OpenRouter auto-failover)
57
+ ### Model auto-routing (health probes + real-time switching)
58
58
 
59
- Try GPT-4 first. If it's rate-limited or times out, fall back to Claude, then Ollama.
59
+ Periodically detects which models are available and routes to the best one. Changes take effect immediately — no restart needed.
60
60
 
61
61
  ```js
62
62
  import { createRouter } from 'provider-kit'
63
63
 
64
- const router = createRouter([
65
- { provider: 'openai', model: 'gpt-4', apiKey: process.env.OPENAI_API_KEY },
66
- { provider: 'openai', model: 'gpt-4o-mini', apiKey: process.env.OPENAI_API_KEY },
67
- { provider: 'anthropic', model: 'claude-3-haiku', apiKey: process.env.ANTHROPIC_API_KEY },
68
- { provider: 'ollama', model: 'llama3.2', baseUrl: 'http://localhost:11434' },
69
- ])
64
+ const router = createRouter({
65
+ probes: [
66
+ { provider: 'openai', model: 'gpt-4', apiKey: process.env.OPENAI_API_KEY },
67
+ { provider: 'openai', model: 'gpt-4o-mini', apiKey: process.env.OPENAI_API_KEY },
68
+ { provider: 'anthropic', model: 'claude-3-haiku', apiKey: process.env.ANTHROPIC_API_KEY },
69
+ { provider: 'ollama', model: 'llama3.2', baseUrl: 'http://localhost:11434' },
70
+ ],
71
+ strategy: 'latency', // 'latency' | 'failover' | 'round-robin'
72
+ probeInterval: 30000, // ping every 30s (0 = manual only)
73
+ onProbeResult: (results) => console.log(results),
74
+ })
70
75
 
71
- const reply = await router.chat([{ role: 'user', content: 'Explain quantum computing' }])
72
- // Automatically tries gpt-4 gpt-4o-mini claude-haiku llama3.2
73
- ```
76
+ const reply = await router.chat([{ role: 'user', content: 'Hello' }])
77
+ // → auto-routed to the healthiest model
74
78
 
75
- Router skips auth/bad_request/quota errors (those won't work on another model either).
76
- Only retries on rate_limit, timeout, server_error, and network errors.
79
+ // Manual probe
80
+ const status = await router.checkNow()
81
+ // → [{ provider, model, ok, latency, error }]
82
+
83
+ // Stop auto-probe
84
+ router.stop()
85
+ ```
77
86
 
78
87
  ### Streaming
79
88
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "provider-kit",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "42 LLM provider unified API — one interface for OpenAI, Anthropic, Ollama, OpenRouter, and 38 more. Built-in retry, timeout, and error handling.",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
@@ -103,43 +103,129 @@ export function createCancelSignal() {
103
103
  }
104
104
 
105
105
  /**
106
- * createRouter — model fallback router
106
+ * createRouter — model health probe + auto-routing
107
107
  *
108
- * Tries models in order. If one fails (rate_limit/timeout/server_error),
109
- * automatically falls back to the next.
108
+ * Periodically checks each model's availability and latency.
109
+ * Routes requests to the best available model in real time.
110
+ * Changes take effect immediately — no restart needed.
110
111
  *
111
112
  * Usage:
112
- * const router = createRouter([
113
- * { provider: 'openai', model: 'gpt-4', apiKey: 'sk-...' },
114
- * { provider: 'openai', model: 'gpt-4o-mini', apiKey: 'sk-...' },
115
- * { provider: 'anthropic', model: 'claude-3-haiku', apiKey: 'sk-...' },
116
- * ]);
113
+ * const router = createRouter({
114
+ * probes: [
115
+ * { provider: 'openai', model: 'gpt-4', apiKey: 'sk-...' },
116
+ * { provider: 'openai', model: 'gpt-4o-mini', apiKey: 'sk-...' },
117
+ * { provider: 'anthropic', model: 'claude-3-haiku', apiKey: 'sk-...' },
118
+ * { provider: 'ollama', model: 'llama3', baseUrl: 'http://localhost:11434' },
119
+ * ],
120
+ * strategy: 'latency', // 'latency' | 'failover' | 'round-robin' | 'cheapest'
121
+ * probeInterval: 30000, // check every 30s (0 = no auto-probe)
122
+ * probeTimeout: 5000, // per-probe timeout
123
+ * onProbeResult: (results) => console.log(results),
124
+ * });
125
+ *
117
126
  * const reply = await router.chat([{ role: 'user', content: 'Hi' }]);
127
+ * // → routed to the best available model
118
128
  */
119
- export function createRouter(strategies) {
120
- if (!Array.isArray(strategies) || strategies.length === 0) {
121
- throw new ProviderError('createRouter requires a non-empty array of strategies', { type: 'bad_request' });
129
+ export function createRouter(opts) {
130
+ const probes = Array.isArray(opts) ? opts : opts.probes || opts.strategies || [];
131
+ if (!Array.isArray(probes) || probes.length === 0) {
132
+ throw new ProviderError('createRouter requires probes array', { type: 'bad_request' });
122
133
  }
123
134
 
124
- async function chat(messages) {
125
- const errors = [];
126
- for (const entry of strategies) {
127
- try {
128
- const { createProvider } = await import('./openai-compatible.js');
129
- const provider = await createProvider(entry.provider, entry.apiKey, { baseUrl: entry.baseUrl });
130
- return await safeProviderCall(
131
- () => provider.chat(entry.model, messages),
132
- { provider: entry.provider, retries: 1, timeout: 15000 }
133
- );
134
- } catch (e) {
135
- const ce = classifyError(e, entry.provider);
136
- if (ce.type === 'auth' || ce.type === 'bad_request' || ce.type === 'quota') throw ce;
137
- errors.push({ provider: entry.provider, model: entry.model, error: ce.message });
138
- continue;
135
+ const strategy = opts.strategy || 'latency';
136
+ const probeInterval = opts.probeInterval ?? 0;
137
+ const probeTimeout = opts.probeTimeout ?? 5000;
138
+ const onProbeResult = opts.onProbeResult || null;
139
+
140
+ // Probe results: { provider, model, ok, latency, error, timestamp }
141
+ let results = probes.map(p => ({ provider: p.provider, model: p.model, ok: true, latency: 0, error: null, timestamp: 0 }));
142
+ let rrIndex = 0;
143
+ let probeTimer = null;
144
+
145
+ // Probe a single model
146
+ async function probeOne(entry) {
147
+ const start = Date.now();
148
+ try {
149
+ const { createProvider } = await import('./openai-compatible.js');
150
+ const provider = await createProvider(entry.provider, entry.apiKey, { baseUrl: entry.baseUrl });
151
+ await withTimeout(
152
+ () => provider.chat(entry.model, [{ role: 'user', content: 'Hi' }], { max_tokens: 1 }),
153
+ probeTimeout
154
+ );
155
+ return { ok: true, latency: Date.now() - start, error: null };
156
+ } catch (e) {
157
+ const ce = classifyError(e, entry.provider);
158
+ return { ok: false, latency: Date.now() - start, error: ce.type === 'auth' ? 'auth_failed' : ce.message };
159
+ }
160
+ }
161
+
162
+ // Probe all models, update results
163
+ async function probeAll() {
164
+ const newResults = await Promise.all(probes.map(async (entry, i) => {
165
+ const { ok, latency, error } = await probeOne(entry);
166
+ return { provider: entry.provider, model: entry.model, ok, latency, error, timestamp: Date.now() };
167
+ }));
168
+ results = newResults;
169
+ if (onProbeResult) onProbeResult(results);
170
+ }
171
+
172
+ // Pick the best model based on strategy
173
+ function pick() {
174
+ const alive = results.filter(r => r.ok);
175
+ if (alive.length === 0) return probes[0]; // all dead → try first anyway
176
+
177
+ switch (strategy) {
178
+ case 'latency':
179
+ alive.sort((a, b) => a.latency - b.latency);
180
+ return probes[results.indexOf(alive[0])];
181
+ case 'round-robin': {
182
+ const idx = rrIndex % alive.length;
183
+ rrIndex++;
184
+ return probes[results.indexOf(alive[idx])];
139
185
  }
186
+ case 'failover': {
187
+ const preferred = probes.map((p, i) => ({ p, i })).sort((a, b) => a.i - b.i);
188
+ for (const { p, i } of preferred) {
189
+ if (results[i]?.ok) return p;
190
+ }
191
+ return probes[0];
192
+ }
193
+ default:
194
+ return probes[probes.indexOf(alive[0])];
140
195
  }
141
- throw new ProviderError(`All models failed: ${errors.map(e => `${e.provider}/${e.model}`).join(', ')}`, { type: 'server_error' });
142
196
  }
143
197
 
144
- return { chat, strategies };
198
+ // Start auto-probe
199
+ if (probeInterval > 0) {
200
+ probeAll(); // first probe immediately
201
+ probeTimer = setInterval(probeAll, probeInterval);
202
+ }
203
+
204
+ // Chat — route to best model
205
+ async function chat(messages) {
206
+ const entry = pick();
207
+ const idx = probes.indexOf(entry);
208
+ const r = results[idx];
209
+
210
+ if (!r?.ok && strategy !== 'failover') {
211
+ // All dead — force probe once
212
+ const probeResult = await probeOne(entry);
213
+ results[idx] = { ...results[idx], ...probeResult, timestamp: Date.now() };
214
+ }
215
+
216
+ const { createProvider } = await import('./openai-compatible.js');
217
+ const provider = await createProvider(entry.provider, entry.apiKey, { baseUrl: entry.baseUrl });
218
+ return safeProviderCall(
219
+ () => provider.chat(entry.model, messages),
220
+ { provider: entry.provider, retries: 1, timeout: 30000 }
221
+ );
222
+ }
223
+
224
+ // Manual probe trigger
225
+ async function checkNow() { await probeAll(); return results; }
226
+
227
+ // Stop auto-probe
228
+ function stop() { if (probeTimer) { clearInterval(probeTimer); probeTimer = null; } }
229
+
230
+ return { chat, probes, results: () => results, strategy, checkNow, stop };
145
231
  }
@@ -17,8 +17,10 @@ import { persistentConfig } from '../core/persistent-config.js';
17
17
 
18
18
  class ProviderRegistry {
19
19
  constructor() {
20
- this.providers = new Map(); // id -> provider instance
21
- this.models = new Map(); // providerId -> [modelIds]
20
+ this.providers = new Map();
21
+ this.models = new Map();
22
+ this._modelTimestamps = new Map(); // providerId -> last fetch timestamp
23
+ this._modelCacheTtl = 60000; // 60s
22
24
  this.presets = PRESET_PROVIDERS;
23
25
  }
24
26
 
@@ -125,25 +127,30 @@ class ProviderRegistry {
125
127
  try {
126
128
  const models = await provider.fetchModels();
127
129
  this.models.set(providerId, models);
130
+ this._modelTimestamps.set(providerId, Date.now());
128
131
  return models;
129
132
  } catch (e) {
130
- // 如果失败,返回本地模型列表
131
133
  return provider.getModels();
132
134
  }
133
135
  }
134
136
 
135
137
  /**
136
- * 获取模型列表
138
+ * 获取模型列表(缓存 60 秒,过期自动刷新)
137
139
  */
138
- getModels(providerId) {
139
- // 优先使用缓存的模型列表
140
- if (this.models.has(providerId)) {
141
- return this.models.get(providerId);
140
+ async getModels(providerId) {
141
+ const cached = this.models.get(providerId);
142
+ const lastFetch = this._modelTimestamps.get(providerId) || 0;
143
+ const stale = Date.now() - lastFetch > this._modelCacheTtl;
144
+
145
+ if (cached && !stale) return cached;
146
+
147
+ // 尝试刷新,失败则返回缓存
148
+ if (this.getProvider(providerId)) {
149
+ const fresh = await this.refreshModels(providerId).catch(() => cached || []);
150
+ return fresh;
142
151
  }
143
152
 
144
- // provider 获取
145
- const provider = this.getProvider(providerId);
146
- return provider ? provider.getModels() : [];
153
+ return cached || [];
147
154
  }
148
155
 
149
156
  /**