adaptive-memory-multi-model-router 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +114 -0
- package/demo/research-demo.js +266 -0
- package/dist/cache/prefixCache.d.ts +114 -0
- package/dist/cache/prefixCache.d.ts.map +1 -0
- package/dist/cache/prefixCache.js +285 -0
- package/dist/cache/prefixCache.js.map +1 -0
- package/dist/cache/responseCache.d.ts +58 -0
- package/dist/cache/responseCache.d.ts.map +1 -0
- package/dist/cache/responseCache.js +153 -0
- package/dist/cache/responseCache.js.map +1 -0
- package/dist/cli.js +59 -0
- package/dist/cost/costTracker.d.ts +95 -0
- package/dist/cost/costTracker.d.ts.map +1 -0
- package/dist/cost/costTracker.js +240 -0
- package/dist/cost/costTracker.js.map +1 -0
- package/dist/index.d.ts +723 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +239 -0
- package/dist/index.js.map +1 -0
- package/dist/memory/episodicMemory.d.ts +82 -0
- package/dist/memory/episodicMemory.d.ts.map +1 -0
- package/dist/memory/episodicMemory.js +145 -0
- package/dist/memory/episodicMemory.js.map +1 -0
- package/dist/orchestration/haloOrchestrator.d.ts +102 -0
- package/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/dist/orchestration/haloOrchestrator.js +207 -0
- package/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/dist/orchestration/mctsWorkflow.d.ts +85 -0
- package/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/dist/orchestration/mctsWorkflow.js +210 -0
- package/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/dist/providers/localProvider.d.ts +102 -0
- package/dist/providers/localProvider.d.ts.map +1 -0
- package/dist/providers/localProvider.js +338 -0
- package/dist/providers/localProvider.js.map +1 -0
- package/dist/providers/registry.d.ts +55 -0
- package/dist/providers/registry.d.ts.map +1 -0
- package/dist/providers/registry.js +138 -0
- package/dist/providers/registry.js.map +1 -0
- package/dist/routing/advancedRouter.d.ts +68 -0
- package/dist/routing/advancedRouter.d.ts.map +1 -0
- package/dist/routing/advancedRouter.js +332 -0
- package/dist/routing/advancedRouter.js.map +1 -0
- package/dist/tools/tmlpdTools.d.ts +101 -0
- package/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/dist/tools/tmlpdTools.js +368 -0
- package/dist/tools/tmlpdTools.js.map +1 -0
- package/dist/utils/batchProcessor.d.ts +96 -0
- package/dist/utils/batchProcessor.d.ts.map +1 -0
- package/dist/utils/batchProcessor.js +170 -0
- package/dist/utils/batchProcessor.js.map +1 -0
- package/dist/utils/compression.d.ts +61 -0
- package/dist/utils/compression.d.ts.map +1 -0
- package/dist/utils/compression.js +281 -0
- package/dist/utils/compression.js.map +1 -0
- package/dist/utils/reliability.d.ts +74 -0
- package/dist/utils/reliability.d.ts.map +1 -0
- package/dist/utils/reliability.js +177 -0
- package/dist/utils/reliability.js.map +1 -0
- package/dist/utils/speculativeDecoding.d.ts +117 -0
- package/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/dist/utils/speculativeDecoding.js +246 -0
- package/dist/utils/speculativeDecoding.js.map +1 -0
- package/dist/utils/tokenUtils.d.ts +50 -0
- package/dist/utils/tokenUtils.d.ts.map +1 -0
- package/dist/utils/tokenUtils.js +124 -0
- package/dist/utils/tokenUtils.js.map +1 -0
- package/examples/QUICKSTART.md +183 -0
- package/notebooks/quickstart.ipynb +157 -0
- package/package.json +83 -0
- package/python/examples.py +53 -0
- package/python/integrations.py +330 -0
- package/python/setup.py +28 -0
- package/python/tmlpd.py +369 -0
- package/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/qna/TMLPD_QNA.md +751 -0
- package/rust/tmlpd.h +268 -0
- package/skill/SKILL.md +238 -0
- package/src/cache/prefixCache.ts +365 -0
- package/src/cache/responseCache.ts +147 -0
- package/src/cost/costTracker.ts +302 -0
- package/src/index.ts +224 -0
- package/src/memory/episodicMemory.ts +185 -0
- package/src/orchestration/haloOrchestrator.ts +266 -0
- package/src/orchestration/mctsWorkflow.ts +262 -0
- package/src/providers/localProvider.ts +406 -0
- package/src/providers/registry.ts +164 -0
- package/src/routing/advancedRouter.ts +406 -0
- package/src/tools/tmlpdTools.ts +433 -0
- package/src/utils/batchProcessor.ts +232 -0
- package/src/utils/compression.ts +325 -0
- package/src/utils/reliability.ts +221 -0
- package/src/utils/speculativeDecoding.ts +344 -0
- package/src/utils/tokenUtils.ts +145 -0
- package/tsconfig.json +18 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TMLPD Local Provider Support
|
|
3
|
+
*
|
|
4
|
+
* Integration with local LLM runtimes:
|
|
5
|
+
* - Ollama (localhost:11434)
|
|
6
|
+
* - vLLM (localhost:8000)
|
|
7
|
+
* - LM Studio (localhost:1234)
|
|
8
|
+
*
|
|
9
|
+
* Enables privacy-preserving, cost-free parallel LLM execution.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { nanoid } from "nanoid";
|
|
13
|
+
|
|
14
|
+
export type LocalRuntime = "ollama" | "vllm" | "lmstudio";
|
|
15
|
+
|
|
16
|
+
export interface LocalProviderConfig {
|
|
17
|
+
runtime: LocalRuntime;
|
|
18
|
+
endpoint?: string; // Auto-detected if not provided
|
|
19
|
+
default_model?: string;
|
|
20
|
+
timeout_ms?: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface LocalModelInfo {
|
|
24
|
+
name: string;
|
|
25
|
+
size?: string;
|
|
26
|
+
quantization?: string;
|
|
27
|
+
capabilities?: string[];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Default endpoints for each runtime
|
|
31
|
+
const DEFAULT_ENDPOINTS: Record<LocalRuntime, string> = {
|
|
32
|
+
ollama: "http://localhost:11434",
|
|
33
|
+
vllm: "http://localhost:8000",
|
|
34
|
+
lmstudio: "http://localhost:1234"
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export class LocalProvider {
|
|
38
|
+
private runtime: LocalRuntime;
|
|
39
|
+
private endpoint: string;
|
|
40
|
+
private default_model: string;
|
|
41
|
+
private timeout_ms: number;
|
|
42
|
+
|
|
43
|
+
constructor(config: LocalProviderConfig) {
|
|
44
|
+
this.runtime = config.runtime;
|
|
45
|
+
this.endpoint = config.endpoint || DEFAULT_ENDPOINTS[config.runtime];
|
|
46
|
+
this.default_model = config.default_model || "llama-3.3-70b";
|
|
47
|
+
this.timeout_ms = config.timeout_ms || 120000;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* List available models on this runtime.
|
|
52
|
+
*/
|
|
53
|
+
async listModels(): Promise<LocalModelInfo[]> {
|
|
54
|
+
try {
|
|
55
|
+
if (this.runtime === "ollama") {
|
|
56
|
+
return await this.listOllamaModels();
|
|
57
|
+
} else if (this.runtime === "vllm") {
|
|
58
|
+
return await this.listVLLMModels();
|
|
59
|
+
} else if (this.runtime === "lmstudio") {
|
|
60
|
+
return await this.listLMStudioModels();
|
|
61
|
+
}
|
|
62
|
+
return [];
|
|
63
|
+
} catch (error) {
|
|
64
|
+
console.error(`Failed to list models from ${this.runtime}:`, error);
|
|
65
|
+
return [];
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
private async listOllamaModels(): Promise<LocalModelInfo[]> {
|
|
70
|
+
const response = await fetch(`${this.endpoint}/api/tags`, {
|
|
71
|
+
method: "GET",
|
|
72
|
+
headers: { "Content-Type": "application/json" }
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
if (!response.ok) throw new Error(`Ollama API error: ${response.status}`);
|
|
76
|
+
|
|
77
|
+
const data = await response.json() as { models?: Array<{name: string; size?: number; details?: {quantization?: string}}> };
|
|
78
|
+
return (data.models || []).map((m) => ({
|
|
79
|
+
name: m.name,
|
|
80
|
+
size: m.size ? this.formatSize(m.size) : undefined,
|
|
81
|
+
quantization: m.details?.quantization,
|
|
82
|
+
capabilities: []
|
|
83
|
+
}));
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
private async listVLLMModels(): Promise<LocalModelInfo[]> {
|
|
87
|
+
// vLLM doesn't have a model list API, use OpenAI compatible endpoint
|
|
88
|
+
const response = await fetch(`${this.endpoint}/v1/models`, {
|
|
89
|
+
method: "GET",
|
|
90
|
+
headers: { "Content-Type": "application/json" }
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
if (!response.ok) throw new Error(`vLLM API error: ${response.status}`);
|
|
94
|
+
|
|
95
|
+
const data = await response.json() as { data?: Array<{id: string; extensions?: string[]}> };
|
|
96
|
+
return (data.data || []).map((m) => ({
|
|
97
|
+
name: m.id,
|
|
98
|
+
capabilities: m.extensions || []
|
|
99
|
+
}));
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
private async listLMStudioModels(): Promise<LocalModelInfo[]> {
|
|
103
|
+
// LM Studio has a different API
|
|
104
|
+
const response = await fetch(`${this.endpoint}/api/v0/models`, {
|
|
105
|
+
method: "GET",
|
|
106
|
+
headers: { "Content-Type": "application/json" }
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
if (!response.ok) throw new Error(`LM Studio API error: ${response.status}`);
|
|
110
|
+
|
|
111
|
+
const data = await response.json() as { models?: Array<{id?: string; name?: string; size?: string}> };
|
|
112
|
+
return (data.models || []).map((m) => ({
|
|
113
|
+
name: m.id || m.name || "unknown",
|
|
114
|
+
size: m.size,
|
|
115
|
+
capabilities: []
|
|
116
|
+
}));
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Generate completion (generic, works with all runtimes).
|
|
121
|
+
*/
|
|
122
|
+
async generate(prompt: string, model?: string, options?: Record<string, any>): Promise<LocalGenerationResult> {
|
|
123
|
+
const targetModel = model || this.default_model;
|
|
124
|
+
|
|
125
|
+
try {
|
|
126
|
+
if (this.runtime === "ollama") {
|
|
127
|
+
return await this.ollamaGenerate(targetModel, prompt, options);
|
|
128
|
+
} else if (this.runtime === "vllm") {
|
|
129
|
+
return await this.vllmGenerate(targetModel, prompt, options);
|
|
130
|
+
} else {
|
|
131
|
+
return await this.lmstudioGenerate(targetModel, prompt, options);
|
|
132
|
+
}
|
|
133
|
+
} catch (error) {
|
|
134
|
+
return {
|
|
135
|
+
success: false,
|
|
136
|
+
error: `Local generation failed: ${error}`,
|
|
137
|
+
model: targetModel,
|
|
138
|
+
provider: this.runtime,
|
|
139
|
+
content: "",
|
|
140
|
+
duration_ms: 0,
|
|
141
|
+
tokens: 0,
|
|
142
|
+
cost: 0
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
private async ollamaGenerate(model: string, prompt: string, options?: Record<string, any>): Promise<LocalGenerationResult> {
|
|
148
|
+
const response = await fetch(`${this.endpoint}/api/generate`, {
|
|
149
|
+
method: "POST",
|
|
150
|
+
headers: { "Content-Type": "application/json" },
|
|
151
|
+
body: JSON.stringify({
|
|
152
|
+
model,
|
|
153
|
+
prompt,
|
|
154
|
+
stream: false,
|
|
155
|
+
options: options || {}
|
|
156
|
+
})
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
if (!response.ok) throw new Error(`Ollama error: ${response.status}`);
|
|
160
|
+
|
|
161
|
+
const data = await response.json() as { model?: string; response?: string; total_duration?: number; eval_count?: number };
|
|
162
|
+
return {
|
|
163
|
+
success: true,
|
|
164
|
+
error: null,
|
|
165
|
+
model: data.model || model,
|
|
166
|
+
provider: "ollama",
|
|
167
|
+
content: data.response || "",
|
|
168
|
+
duration_ms: data.total_duration ? data.total_duration / 1_000_000 : 0,
|
|
169
|
+
tokens: data.eval_count || 0,
|
|
170
|
+
cost: 0 // Local = free
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
private async vllmGenerate(model: string, prompt: string, options?: Record<string, any>): Promise<LocalGenerationResult> {
|
|
175
|
+
// vLLM uses OpenAI-compatible API
|
|
176
|
+
const response = await fetch(`${this.endpoint}/v1/completions`, {
|
|
177
|
+
method: "POST",
|
|
178
|
+
headers: { "Content-Type": "application/json" },
|
|
179
|
+
body: JSON.stringify({
|
|
180
|
+
model,
|
|
181
|
+
prompt,
|
|
182
|
+
max_tokens: options?.max_tokens || 1024,
|
|
183
|
+
temperature: options?.temperature || 0.7
|
|
184
|
+
})
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
if (!response.ok) throw new Error(`vLLM error: ${response.status}`);
|
|
188
|
+
|
|
189
|
+
const data = await response.json() as { choices?: Array<{text?: string}>; model?: string; elapsed_time?: number; usage?: {total_tokens?: number} };
|
|
190
|
+
const completion = data.choices?.[0]?.text || "";
|
|
191
|
+
return {
|
|
192
|
+
success: true,
|
|
193
|
+
error: null,
|
|
194
|
+
model: data.model || model,
|
|
195
|
+
provider: "vllm",
|
|
196
|
+
content: completion,
|
|
197
|
+
duration_ms: data.elapsed_time ? data.elapsed_time * 1000 : 0,
|
|
198
|
+
tokens: data.usage?.total_tokens || 0,
|
|
199
|
+
cost: 0
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
private async lmstudioGenerate(model: string, prompt: string, options?: Record<string, any>): Promise<LocalGenerationResult> {
|
|
204
|
+
// LM Studio uses OpenAI-compatible API
|
|
205
|
+
const response = await fetch(`${this.endpoint}/v1/completions`, {
|
|
206
|
+
method: "POST",
|
|
207
|
+
headers: { "Content-Type": "application/json" },
|
|
208
|
+
body: JSON.stringify({
|
|
209
|
+
model,
|
|
210
|
+
prompt,
|
|
211
|
+
max_tokens: options?.max_tokens || 1024,
|
|
212
|
+
temperature: options?.temperature || 0.7
|
|
213
|
+
})
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
if (!response.ok) throw new Error(`LM Studio error: ${response.status}`);
|
|
217
|
+
|
|
218
|
+
const data = await response.json() as { choices?: Array<{text?: string}>; model?: string; usage?: {total_tokens?: number} };
|
|
219
|
+
const completion = data.choices?.[0]?.text || "";
|
|
220
|
+
return {
|
|
221
|
+
success: true,
|
|
222
|
+
error: null,
|
|
223
|
+
model: data.model || model,
|
|
224
|
+
provider: "lmstudio",
|
|
225
|
+
content: completion,
|
|
226
|
+
duration_ms: 0,
|
|
227
|
+
tokens: data.usage?.total_tokens || 0,
|
|
228
|
+
cost: 0
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Check if this runtime is available/healthy.
|
|
234
|
+
*/
|
|
235
|
+
async healthCheck(): Promise<boolean> {
|
|
236
|
+
try {
|
|
237
|
+
if (this.runtime === "ollama") {
|
|
238
|
+
const response = await fetch(`${this.endpoint}/api/tags`, { method: "GET" });
|
|
239
|
+
return response.ok;
|
|
240
|
+
} else if (this.runtime === "vllm" || this.runtime === "lmstudio") {
|
|
241
|
+
const response = await fetch(`${this.endpoint}/v1/models`, { method: "GET" });
|
|
242
|
+
return response.ok;
|
|
243
|
+
}
|
|
244
|
+
return false;
|
|
245
|
+
} catch {
|
|
246
|
+
return false;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Get runtime info.
|
|
252
|
+
*/
|
|
253
|
+
getInfo() {
|
|
254
|
+
return {
|
|
255
|
+
runtime: this.runtime,
|
|
256
|
+
endpoint: this.endpoint,
|
|
257
|
+
default_model: this.default_model,
|
|
258
|
+
timeout_ms: this.timeout_ms
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
private formatSize(bytes: number): string {
|
|
263
|
+
const units = ["B", "KB", "MB", "GB", "TB"];
|
|
264
|
+
let unitIndex = 0;
|
|
265
|
+
let size = bytes;
|
|
266
|
+
|
|
267
|
+
while (size >= 1024 && unitIndex < units.length - 1) {
|
|
268
|
+
size /= 1024;
|
|
269
|
+
unitIndex++;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return `${size.toFixed(1)} ${units[unitIndex]}`;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
export interface LocalGenerationResult {
|
|
277
|
+
success: boolean;
|
|
278
|
+
error: string | null;
|
|
279
|
+
model: string;
|
|
280
|
+
provider: string;
|
|
281
|
+
content: string;
|
|
282
|
+
duration_ms: number;
|
|
283
|
+
tokens: number;
|
|
284
|
+
cost: number; // Always 0 for local
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Manager for multiple local providers.
|
|
289
|
+
*/
|
|
290
|
+
export class LocalProviderManager {
|
|
291
|
+
private providers: Map<string, LocalProvider> = new Map();
|
|
292
|
+
|
|
293
|
+
addProvider(name: string, config: LocalProviderConfig): void {
|
|
294
|
+
this.providers.set(name, new LocalProvider(config));
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
async executeParallel(
|
|
298
|
+
prompt: string,
|
|
299
|
+
options?: {
|
|
300
|
+
models?: string[];
|
|
301
|
+
provider_priority?: string[];
|
|
302
|
+
fallback_to_cloud?: boolean;
|
|
303
|
+
}
|
|
304
|
+
): Promise<LocalParallelResult> {
|
|
305
|
+
const models = options?.models || ["local/llama-3.3-70b"];
|
|
306
|
+
const results: LocalGenerationResult[] = [];
|
|
307
|
+
|
|
308
|
+
// Execute in parallel across providers
|
|
309
|
+
const promises = models.map(async model => {
|
|
310
|
+
// Parse model string like "ollama/llama-3.3-70b" or just "llama-3.3-70b"
|
|
311
|
+
const parts = model.split("/");
|
|
312
|
+
const providerType = parts.length > 1 ? parts[0] : "ollama";
|
|
313
|
+
const modelName = parts.length > 1 ? parts[1] : model;
|
|
314
|
+
|
|
315
|
+
const provider = this.providers.get(providerType) ||
|
|
316
|
+
new LocalProvider({ runtime: providerType as LocalRuntime });
|
|
317
|
+
|
|
318
|
+
return provider.generate(prompt, modelName);
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
const settled = await Promise.allSettled(promises);
|
|
322
|
+
|
|
323
|
+
for (let i = 0; i < settled.length; i++) {
|
|
324
|
+
const result = settled[i];
|
|
325
|
+
if (result.status === "fulfilled") {
|
|
326
|
+
results.push(result.value);
|
|
327
|
+
} else {
|
|
328
|
+
results.push({
|
|
329
|
+
success: false,
|
|
330
|
+
error: result.reason?.message || "Unknown error",
|
|
331
|
+
model: models[i],
|
|
332
|
+
provider: "unknown",
|
|
333
|
+
content: "",
|
|
334
|
+
duration_ms: 0,
|
|
335
|
+
tokens: 0,
|
|
336
|
+
cost: 0
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const successful = results.filter(r => r.success);
|
|
342
|
+
|
|
343
|
+
return {
|
|
344
|
+
success: successful.length > 0,
|
|
345
|
+
responses: results,
|
|
346
|
+
total_models: models.length,
|
|
347
|
+
successful_models: successful.length,
|
|
348
|
+
total_cost: results.reduce((sum, r) => sum + r.cost, 0),
|
|
349
|
+
duration_ms: Math.max(...results.map(r => r.duration_ms), 0)
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
async healthCheckAll(): Promise<Record<string, boolean>> {
|
|
354
|
+
const results: Record<string, boolean> = {};
|
|
355
|
+
|
|
356
|
+
for (const [name, provider] of this.providers) {
|
|
357
|
+
results[name] = await provider.healthCheck();
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
return results;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
listProviders(): string[] {
|
|
364
|
+
return Array.from(this.providers.keys());
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
export interface LocalParallelResult {
|
|
369
|
+
success: boolean;
|
|
370
|
+
responses: LocalGenerationResult[];
|
|
371
|
+
total_models: number;
|
|
372
|
+
successful_models: number;
|
|
373
|
+
total_cost: number;
|
|
374
|
+
duration_ms: number;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// Utility function for creating common local configurations
|
|
378
|
+
export function createOllamaProvider(model?: string): LocalProvider {
|
|
379
|
+
return new LocalProvider({
|
|
380
|
+
runtime: "ollama",
|
|
381
|
+
default_model: model || "llama-3.3-70b"
|
|
382
|
+
});
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
export function createVLLMProvider(endpoint?: string, model?: string): LocalProvider {
|
|
386
|
+
return new LocalProvider({
|
|
387
|
+
runtime: "vllm",
|
|
388
|
+
endpoint: endpoint || "http://localhost:8000",
|
|
389
|
+
default_model: model || "meta-llama/Llama-3.3-70b-Instruct"
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
export function createLMStudioProvider(model?: string): LocalProvider {
|
|
394
|
+
return new LocalProvider({
|
|
395
|
+
runtime: "lmstudio",
|
|
396
|
+
default_model: model || "llama-3.3-70b"
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
export default {
|
|
401
|
+
LocalProvider,
|
|
402
|
+
LocalProviderManager,
|
|
403
|
+
createOllamaProvider,
|
|
404
|
+
createVLLMProvider,
|
|
405
|
+
createLMStudioProvider
|
|
406
|
+
};
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TMLPD Provider Registry
|
|
3
|
+
*
|
|
4
|
+
* Manages provider configurations, API keys, and base URLs.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export interface ProviderConfig {
|
|
8
|
+
name: string;
|
|
9
|
+
apiKey: string;
|
|
10
|
+
baseUrl: string;
|
|
11
|
+
mode: "openai" | "anthropic" | "gemini";
|
|
12
|
+
priority: number;
|
|
13
|
+
enabled: boolean;
|
|
14
|
+
cooldownUntil: number;
|
|
15
|
+
failureCount: number;
|
|
16
|
+
lastError: string | null;
|
|
17
|
+
lastStatus: number | null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface ProviderRegistryConfig {
|
|
21
|
+
providers: string[];
|
|
22
|
+
modelPriority: string[];
|
|
23
|
+
useOpenclawFallback: boolean;
|
|
24
|
+
maxTokens: number;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const DEFAULT_PROVIDER_CONFIG: ProviderRegistryConfig = {
|
|
28
|
+
providers: ["openai", "openrouter", "groq", "cerebras", "mistral", "xai", "zai", "anthropic", "google"],
|
|
29
|
+
modelPriority: ["openai/gpt-4o", "groq/llama-3.3-70b-versatile", "cerebras/llama-3.3-70b"],
|
|
30
|
+
useOpenclawFallback: false,
|
|
31
|
+
maxTokens: 4096,
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
export class ProviderRegistry {
|
|
35
|
+
private providers: Map<string, ProviderConfig> = new Map();
|
|
36
|
+
private config: ProviderRegistryConfig;
|
|
37
|
+
private modelPriority: string[];
|
|
38
|
+
|
|
39
|
+
constructor(config: Partial<ProviderRegistryConfig> = {}) {
|
|
40
|
+
this.config = { ...DEFAULT_PROVIDER_CONFIG, ...config };
|
|
41
|
+
this.modelPriority = this.config.modelPriority;
|
|
42
|
+
this.initializeProviders();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
private initializeProviders(): void {
|
|
46
|
+
// Initialize from environment
|
|
47
|
+
const envVars: Record<string, { key: string; url: string; mode: "openai" | "anthropic" | "gemini" }> = {
|
|
48
|
+
openai: { key: "OPENAI_API_KEY", url: "OPENAI_OPENAI_BASE_URL", mode: "openai" },
|
|
49
|
+
openrouter: { key: "OPENROUTER_API_KEY", url: "OPENROUTER_OPENAI_BASE_URL", mode: "openai" },
|
|
50
|
+
groq: { key: "GROQ_API_KEY", url: "GROQ_OPENAI_BASE_URL", mode: "openai" },
|
|
51
|
+
cerebras: { key: "CEREBRAS_API_KEY", url: "CEREBRAS_OPENAI_BASE_URL", mode: "openai" },
|
|
52
|
+
mistral: { key: "MISTRAL_API_KEY", url: "MISTRAL_OPENAI_BASE_URL", mode: "openai" },
|
|
53
|
+
xai: { key: "XAI_API_KEY", url: "XAI_OPENAI_BASE_URL", mode: "openai" },
|
|
54
|
+
zai: { key: "ZAI_API_KEY", url: "ZAI_OPENAI_BASE_URL", mode: "anthropic" },
|
|
55
|
+
anthropic: { key: "ANTHROPIC_API_KEY", url: "ANTHROPIC_BASE_URL", mode: "anthropic" },
|
|
56
|
+
google: { key: "GOOGLE_API_KEY", url: "GOOGLE_GEMINI_BASE_URL", mode: "gemini" },
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
for (const [name, env] of Object.entries(envVars)) {
|
|
60
|
+
const apiKey = process.env[env.key] || "";
|
|
61
|
+
const baseUrl = process.env[env.url] || "";
|
|
62
|
+
|
|
63
|
+
this.providers.set(name, {
|
|
64
|
+
name,
|
|
65
|
+
apiKey,
|
|
66
|
+
baseUrl,
|
|
67
|
+
mode: env.mode,
|
|
68
|
+
priority: this.modelPriority.findIndex((m) => m.startsWith(name + "/")),
|
|
69
|
+
enabled: Boolean(apiKey),
|
|
70
|
+
cooldownUntil: 0,
|
|
71
|
+
failureCount: 0,
|
|
72
|
+
lastError: null,
|
|
73
|
+
lastStatus: null,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Check if provider is ready (has API key, not in cooldown)
|
|
80
|
+
*/
|
|
81
|
+
isProviderReady(name: string): boolean {
|
|
82
|
+
const provider = this.providers.get(name);
|
|
83
|
+
if (!provider || !provider.enabled) return false;
|
|
84
|
+
if (Date.now() < provider.cooldownUntil) return false;
|
|
85
|
+
return true;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Get best available model from priority list
|
|
90
|
+
*/
|
|
91
|
+
selectModel(): string | null {
|
|
92
|
+
for (const model of this.modelPriority) {
|
|
93
|
+
const providerName = model.split("/")[0];
|
|
94
|
+
if (this.isProviderReady(providerName)) {
|
|
95
|
+
return model;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Get all providers sorted by priority
|
|
103
|
+
*/
|
|
104
|
+
getReadyProviders(): string[] {
|
|
105
|
+
return Array.from(this.providers.entries())
|
|
106
|
+
.filter(([_, p]) => this.isProviderReady(p.name))
|
|
107
|
+
.sort((a, b) => a[1].priority - b[1].priority)
|
|
108
|
+
.map(([name]) => name);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Record provider success
|
|
113
|
+
*/
|
|
114
|
+
recordSuccess(name: string): void {
|
|
115
|
+
const provider = this.providers.get(name);
|
|
116
|
+
if (provider) {
|
|
117
|
+
provider.cooldownUntil = 0;
|
|
118
|
+
provider.failureCount = 0;
|
|
119
|
+
provider.lastError = null;
|
|
120
|
+
provider.lastStatus = null;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Record provider failure
|
|
126
|
+
*/
|
|
127
|
+
recordFailure(name: string, statusCode: number | null, error: string): void {
|
|
128
|
+
const provider = this.providers.get(name);
|
|
129
|
+
if (!provider) return;
|
|
130
|
+
|
|
131
|
+
provider.failureCount++;
|
|
132
|
+
provider.lastError = error;
|
|
133
|
+
provider.lastStatus = statusCode;
|
|
134
|
+
|
|
135
|
+
// Apply exponential backoff cooldown
|
|
136
|
+
const baseDelay = statusCode === 429 ? 60000 : statusCode === 403 ? 300000 : 30000;
|
|
137
|
+
const multiplier = Math.min(4, Math.pow(2, Math.max(0, provider.failureCount - 1)));
|
|
138
|
+
provider.cooldownUntil = Date.now() + baseDelay * multiplier;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Get provider status summary
|
|
143
|
+
*/
|
|
144
|
+
getStatus(): Record<string, any> {
|
|
145
|
+
const status: Record<string, any> = {};
|
|
146
|
+
for (const [name, provider] of this.providers.entries()) {
|
|
147
|
+
status[name] = {
|
|
148
|
+
enabled: provider.enabled,
|
|
149
|
+
mode: provider.mode,
|
|
150
|
+
ready: this.isProviderReady(name),
|
|
151
|
+
cooldownUntil: provider.cooldownUntil ? new Date(provider.cooldownUntil).toISOString() : null,
|
|
152
|
+
lastError: provider.lastError,
|
|
153
|
+
lastStatus: provider.lastStatus,
|
|
154
|
+
failureCount: provider.failureCount,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
return {
|
|
158
|
+
modelPriority: this.modelPriority,
|
|
159
|
+
readyProviders: this.getReadyProviders(),
|
|
160
|
+
providers: status,
|
|
161
|
+
timestamp: new Date().toISOString(),
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
}
|