adaptive-memory-multi-model-router 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +114 -0
- package/demo/research-demo.js +266 -0
- package/dist/cache/prefixCache.d.ts +114 -0
- package/dist/cache/prefixCache.d.ts.map +1 -0
- package/dist/cache/prefixCache.js +285 -0
- package/dist/cache/prefixCache.js.map +1 -0
- package/dist/cache/responseCache.d.ts +58 -0
- package/dist/cache/responseCache.d.ts.map +1 -0
- package/dist/cache/responseCache.js +153 -0
- package/dist/cache/responseCache.js.map +1 -0
- package/dist/cli.js +59 -0
- package/dist/cost/costTracker.d.ts +95 -0
- package/dist/cost/costTracker.d.ts.map +1 -0
- package/dist/cost/costTracker.js +240 -0
- package/dist/cost/costTracker.js.map +1 -0
- package/dist/index.d.ts +723 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +239 -0
- package/dist/index.js.map +1 -0
- package/dist/memory/episodicMemory.d.ts +82 -0
- package/dist/memory/episodicMemory.d.ts.map +1 -0
- package/dist/memory/episodicMemory.js +145 -0
- package/dist/memory/episodicMemory.js.map +1 -0
- package/dist/orchestration/haloOrchestrator.d.ts +102 -0
- package/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/dist/orchestration/haloOrchestrator.js +207 -0
- package/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/dist/orchestration/mctsWorkflow.d.ts +85 -0
- package/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/dist/orchestration/mctsWorkflow.js +210 -0
- package/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/dist/providers/localProvider.d.ts +102 -0
- package/dist/providers/localProvider.d.ts.map +1 -0
- package/dist/providers/localProvider.js +338 -0
- package/dist/providers/localProvider.js.map +1 -0
- package/dist/providers/registry.d.ts +55 -0
- package/dist/providers/registry.d.ts.map +1 -0
- package/dist/providers/registry.js +138 -0
- package/dist/providers/registry.js.map +1 -0
- package/dist/routing/advancedRouter.d.ts +68 -0
- package/dist/routing/advancedRouter.d.ts.map +1 -0
- package/dist/routing/advancedRouter.js +332 -0
- package/dist/routing/advancedRouter.js.map +1 -0
- package/dist/tools/tmlpdTools.d.ts +101 -0
- package/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/dist/tools/tmlpdTools.js +368 -0
- package/dist/tools/tmlpdTools.js.map +1 -0
- package/dist/utils/batchProcessor.d.ts +96 -0
- package/dist/utils/batchProcessor.d.ts.map +1 -0
- package/dist/utils/batchProcessor.js +170 -0
- package/dist/utils/batchProcessor.js.map +1 -0
- package/dist/utils/compression.d.ts +61 -0
- package/dist/utils/compression.d.ts.map +1 -0
- package/dist/utils/compression.js +281 -0
- package/dist/utils/compression.js.map +1 -0
- package/dist/utils/reliability.d.ts +74 -0
- package/dist/utils/reliability.d.ts.map +1 -0
- package/dist/utils/reliability.js +177 -0
- package/dist/utils/reliability.js.map +1 -0
- package/dist/utils/speculativeDecoding.d.ts +117 -0
- package/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/dist/utils/speculativeDecoding.js +246 -0
- package/dist/utils/speculativeDecoding.js.map +1 -0
- package/dist/utils/tokenUtils.d.ts +50 -0
- package/dist/utils/tokenUtils.d.ts.map +1 -0
- package/dist/utils/tokenUtils.js +124 -0
- package/dist/utils/tokenUtils.js.map +1 -0
- package/examples/QUICKSTART.md +183 -0
- package/notebooks/quickstart.ipynb +157 -0
- package/package.json +83 -0
- package/python/examples.py +53 -0
- package/python/integrations.py +330 -0
- package/python/setup.py +28 -0
- package/python/tmlpd.py +369 -0
- package/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/qna/TMLPD_QNA.md +751 -0
- package/rust/tmlpd.h +268 -0
- package/skill/SKILL.md +238 -0
- package/src/cache/prefixCache.ts +365 -0
- package/src/cache/responseCache.ts +147 -0
- package/src/cost/costTracker.ts +302 -0
- package/src/index.ts +224 -0
- package/src/memory/episodicMemory.ts +185 -0
- package/src/orchestration/haloOrchestrator.ts +266 -0
- package/src/orchestration/mctsWorkflow.ts +262 -0
- package/src/providers/localProvider.ts +406 -0
- package/src/providers/registry.ts +164 -0
- package/src/routing/advancedRouter.ts +406 -0
- package/src/tools/tmlpdTools.ts +433 -0
- package/src/utils/batchProcessor.ts +232 -0
- package/src/utils/compression.ts +325 -0
- package/src/utils/reliability.ts +221 -0
- package/src/utils/speculativeDecoding.ts +344 -0
- package/src/utils/tokenUtils.ts +145 -0
- package/tsconfig.json +18 -0
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TMLPD Cost Tracker
|
|
3
|
+
*
|
|
4
|
+
* Tracks real-time spending across all providers.
|
|
5
|
+
* Supports per-model budgets, spending alerts, and cost analysis.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Cost per 1M tokens for known models (USD)
|
|
9
|
+
const MODEL_COSTS: Record<string, { input: number; output: number }> = {
|
|
10
|
+
// Anthropic
|
|
11
|
+
"claude-3-5-sonnet-20241022": { input: 3.0, output: 15.0 },
|
|
12
|
+
"claude-3-opus-20240229": { input: 15.0, output: 75.0 },
|
|
13
|
+
"claude-3-sonnet-20240229": { input: 3.0, output: 15.0 },
|
|
14
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
15
|
+
// OpenAI
|
|
16
|
+
"gpt-4o": { input: 2.5, output: 10.0 },
|
|
17
|
+
"gpt-4-turbo": { input: 10.0, output: 30.0 },
|
|
18
|
+
"gpt-4": { input: 30.0, output: 60.0 },
|
|
19
|
+
"gpt-3.5-turbo": { input: 0.5, output: 1.5 },
|
|
20
|
+
// Google
|
|
21
|
+
"gemini-1.5-pro": { input: 1.25, output: 5.0 },
|
|
22
|
+
"gemini-1.5-flash": { input: 0.075, output: 0.3 },
|
|
23
|
+
// Groq
|
|
24
|
+
"llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
|
|
25
|
+
"llama-3.1-8b-instant": { input: 0.05, output: 0.08 },
|
|
26
|
+
// Cerebras
|
|
27
|
+
"llama-3.3-70b": { input: 0.1, output: 0.1 },
|
|
28
|
+
// Mistral
|
|
29
|
+
"mistral-large-latest": { input: 2.0, output: 6.0 },
|
|
30
|
+
"mistral-small-latest": { input: 0.2, output: 0.6 },
|
|
31
|
+
// xAI
|
|
32
|
+
"grok-2": { input: 2.0, output: 8.0 },
|
|
33
|
+
"grok-2-mini": { input: 0.2, output: 0.8 },
|
|
34
|
+
// OpenRouter (varies by model)
|
|
35
|
+
"openai/gpt-4o": { input: 2.5, output: 10.0 },
|
|
36
|
+
"anthropic/claude-3.5-sonnet": { input: 3.0, output: 15.0 },
|
|
37
|
+
// ZAI (default estimate)
|
|
38
|
+
"glm-5": { input: 0.1, output: 0.3 },
|
|
39
|
+
"glm-4": { input: 0.1, output: 0.3 },
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export interface BudgetConfig {
|
|
43
|
+
daily_limit?: number;
|
|
44
|
+
monthly_limit?: number;
|
|
45
|
+
per_model_limits?: Record<string, number>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface CostAlert {
|
|
49
|
+
type: "daily" | "monthly" | "model" | "budget";
|
|
50
|
+
threshold: number;
|
|
51
|
+
current: number;
|
|
52
|
+
provider?: string;
|
|
53
|
+
model?: string;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export interface CostSnapshot {
|
|
57
|
+
provider: string;
|
|
58
|
+
model: string;
|
|
59
|
+
input_tokens: number;
|
|
60
|
+
output_tokens: number;
|
|
61
|
+
input_cost: number;
|
|
62
|
+
output_cost: number;
|
|
63
|
+
total_cost: number;
|
|
64
|
+
timestamp: number;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface CostSummary {
|
|
68
|
+
total_cost: number;
|
|
69
|
+
by_provider: Record<string, number>;
|
|
70
|
+
by_model: Record<string, number>;
|
|
71
|
+
daily_costs: Record<string, number>;
|
|
72
|
+
monthly_costs: Record<string, number>;
|
|
73
|
+
request_count: number;
|
|
74
|
+
token_count: { input: number; output: number };
|
|
75
|
+
average_cost_per_request: number;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export class CostTracker {
|
|
79
|
+
private history: CostSnapshot[] = [];
|
|
80
|
+
private budgets: BudgetConfig;
|
|
81
|
+
private alerts: CostAlert[] = [];
|
|
82
|
+
private alerts_callback: ((alert: CostAlert) => void) | null = null;
|
|
83
|
+
private daily_reset: number;
|
|
84
|
+
private monthly_reset: number;
|
|
85
|
+
|
|
86
|
+
constructor(budgets: BudgetConfig = {}) {
|
|
87
|
+
this.budgets = budgets;
|
|
88
|
+
const now = new Date();
|
|
89
|
+
this.daily_reset = new Date(now.getFullYear(), now.getMonth(), now.getDate() + 1).getTime();
|
|
90
|
+
this.monthly_reset = new Date(now.getFullYear(), now.getMonth() + 1, 1).getTime();
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Calculate cost for a model based on tokens
|
|
95
|
+
*/
|
|
96
|
+
calculateCost(model: string, input_tokens: number, output_tokens: number): { input: number; output: number; total: number } {
|
|
97
|
+
const model_key = model.split("/").pop() || model;
|
|
98
|
+
const rates = MODEL_COSTS[model_key] || { input: 1.0, output: 5.0 }; // Default estimate
|
|
99
|
+
|
|
100
|
+
const input_cost = (input_tokens / 1_000_000) * rates.input;
|
|
101
|
+
const output_cost = (output_tokens / 1_000_000) * rates.output;
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
input: Math.round(input_cost * 1000000) / 1000000, // 6 decimal precision
|
|
105
|
+
output: Math.round(output_cost * 1000000) / 1000000,
|
|
106
|
+
total: Math.round((input_cost + output_cost) * 1000000) / 1000000,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Record a request's cost
|
|
112
|
+
*/
|
|
113
|
+
record(provider: string, model: string, input_tokens: number, output_tokens: number): CostSnapshot {
|
|
114
|
+
const costs = this.calculateCost(model, input_tokens, output_tokens);
|
|
115
|
+
const snapshot: CostSnapshot = {
|
|
116
|
+
provider,
|
|
117
|
+
model,
|
|
118
|
+
input_tokens,
|
|
119
|
+
output_tokens,
|
|
120
|
+
input_cost: costs.input,
|
|
121
|
+
output_cost: costs.output,
|
|
122
|
+
total_cost: costs.total,
|
|
123
|
+
timestamp: Date.now(),
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
this.history.push(snapshot);
|
|
127
|
+
this.checkBudgets(snapshot);
|
|
128
|
+
return snapshot;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Check budgets and trigger alerts
|
|
133
|
+
*/
|
|
134
|
+
private checkBudgets(snapshot: CostSnapshot): void {
|
|
135
|
+
const summary = this.getSummary();
|
|
136
|
+
const today = new Date().toISOString().split("T")[0];
|
|
137
|
+
const month = today.substring(0, 7);
|
|
138
|
+
|
|
139
|
+
// Check daily budget
|
|
140
|
+
if (this.budgets.daily_limit) {
|
|
141
|
+
const daily_cost = summary.daily_costs[today] || 0;
|
|
142
|
+
if (daily_cost >= this.budgets.daily_limit * 0.9) { // Alert at 90%
|
|
143
|
+
this.emitAlert({
|
|
144
|
+
type: "daily",
|
|
145
|
+
threshold: this.budgets.daily_limit,
|
|
146
|
+
current: daily_cost,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Check monthly budget
|
|
152
|
+
if (this.budgets.monthly_limit) {
|
|
153
|
+
const monthly_cost = summary.monthly_costs[month] || 0;
|
|
154
|
+
if (monthly_cost >= this.budgets.monthly_limit * 0.9) {
|
|
155
|
+
this.emitAlert({
|
|
156
|
+
type: "monthly",
|
|
157
|
+
threshold: this.budgets.monthly_limit,
|
|
158
|
+
current: monthly_cost,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Check per-model budgets
|
|
164
|
+
if (this.budgets.per_model_limits) {
|
|
165
|
+
const model_limit = this.budgets.per_model_limits[snapshot.model];
|
|
166
|
+
if (model_limit) {
|
|
167
|
+
const model_cost = summary.by_model[snapshot.model] || 0;
|
|
168
|
+
if (model_cost >= model_limit * 0.9) {
|
|
169
|
+
this.emitAlert({
|
|
170
|
+
type: "model",
|
|
171
|
+
threshold: model_limit,
|
|
172
|
+
current: model_cost,
|
|
173
|
+
model: snapshot.model,
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Emit an alert via callback
|
|
182
|
+
*/
|
|
183
|
+
private emitAlert(alert: CostAlert): void {
|
|
184
|
+
// Avoid duplicate alerts for same threshold
|
|
185
|
+
const recent = this.alerts.find(
|
|
186
|
+
(a) =>
|
|
187
|
+
a.type === alert.type &&
|
|
188
|
+
a.threshold === alert.threshold &&
|
|
189
|
+
Date.now() - (a as any)._emitted_at < 3600000 // 1 hour cooldown
|
|
190
|
+
);
|
|
191
|
+
if (recent) return;
|
|
192
|
+
|
|
193
|
+
(alert as any)._emitted_at = Date.now();
|
|
194
|
+
this.alerts.push(alert);
|
|
195
|
+
if (this.alerts_callback) {
|
|
196
|
+
this.alerts_callback(alert);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Register alert callback
|
|
202
|
+
*/
|
|
203
|
+
onAlert(callback: (alert: CostAlert) => void): void {
|
|
204
|
+
this.alerts_callback = callback;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Get comprehensive cost summary
|
|
209
|
+
*/
|
|
210
|
+
getSummary(): CostSummary {
|
|
211
|
+
const nowMs = Date.now();
|
|
212
|
+
const today = new Date().toISOString().split("T")[0];
|
|
213
|
+
const month = today.substring(0, 7);
|
|
214
|
+
|
|
215
|
+
// Reset if new day/month
|
|
216
|
+
const nowDate = new Date(nowMs);
|
|
217
|
+
if (nowMs >= this.daily_reset) {
|
|
218
|
+
this.daily_reset = new Date(nowDate.getFullYear(), nowDate.getMonth(), nowDate.getDate() + 1).getTime();
|
|
219
|
+
}
|
|
220
|
+
if (nowMs >= this.monthly_reset) {
|
|
221
|
+
this.monthly_reset = new Date(nowDate.getFullYear(), nowDate.getMonth() + 1, 1).getTime();
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const by_provider: Record<string, number> = {};
|
|
225
|
+
const by_model: Record<string, number> = {};
|
|
226
|
+
const daily_costs: Record<string, number> = {};
|
|
227
|
+
const monthly_costs: Record<string, number> = {};
|
|
228
|
+
let total_cost = 0;
|
|
229
|
+
let total_input_tokens = 0;
|
|
230
|
+
let total_output_tokens = 0;
|
|
231
|
+
|
|
232
|
+
for (const entry of this.history) {
|
|
233
|
+
total_cost += entry.total_cost;
|
|
234
|
+
total_input_tokens += entry.input_tokens;
|
|
235
|
+
total_output_tokens += entry.output_tokens;
|
|
236
|
+
|
|
237
|
+
by_provider[entry.provider] = (by_provider[entry.provider] || 0) + entry.total_cost;
|
|
238
|
+
by_model[entry.model] = (by_model[entry.model] || 0) + entry.total_cost;
|
|
239
|
+
|
|
240
|
+
const entry_date = new Date(entry.timestamp).toISOString().split("T")[0];
|
|
241
|
+
const entry_month = entry_date.substring(0, 7);
|
|
242
|
+
|
|
243
|
+
daily_costs[entry_date] = (daily_costs[entry_date] || 0) + entry.total_cost;
|
|
244
|
+
monthly_costs[entry_month] = (monthly_costs[entry_month] || 0) + entry.total_cost;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
total_cost: Math.round(total_cost * 1000000) / 1000000,
|
|
249
|
+
by_provider,
|
|
250
|
+
by_model,
|
|
251
|
+
daily_costs,
|
|
252
|
+
monthly_costs,
|
|
253
|
+
request_count: this.history.length,
|
|
254
|
+
token_count: { input: total_input_tokens, output: total_output_tokens },
|
|
255
|
+
average_cost_per_request:
|
|
256
|
+
this.history.length > 0
|
|
257
|
+
? Math.round((total_cost / this.history.length) * 1000000) / 1000000
|
|
258
|
+
: 0,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Get remaining budget
|
|
264
|
+
*/
|
|
265
|
+
getRemainingBudget(): { daily: number | null; monthly: number | null; per_model: Record<string, number> } {
|
|
266
|
+
const summary = this.getSummary();
|
|
267
|
+
const today = new Date().toISOString().split("T")[0];
|
|
268
|
+
const month = today.substring(0, 7);
|
|
269
|
+
|
|
270
|
+
return {
|
|
271
|
+
daily: this.budgets.daily_limit
|
|
272
|
+
? Math.max(0, this.budgets.daily_limit - (summary.daily_costs[today] || 0))
|
|
273
|
+
: null,
|
|
274
|
+
monthly: this.budgets.monthly_limit
|
|
275
|
+
? Math.max(0, this.budgets.monthly_limit - (summary.monthly_costs[month] || 0))
|
|
276
|
+
: null,
|
|
277
|
+
per_model: this.budgets.per_model_limits
|
|
278
|
+
? Object.fromEntries(
|
|
279
|
+
Object.entries(this.budgets.per_model_limits).map(([model, limit]) => [
|
|
280
|
+
model,
|
|
281
|
+
Math.max(0, limit - (summary.by_model[model] || 0)),
|
|
282
|
+
])
|
|
283
|
+
)
|
|
284
|
+
: {},
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Reset cost history
|
|
290
|
+
*/
|
|
291
|
+
reset(): void {
|
|
292
|
+
this.history = [];
|
|
293
|
+
this.alerts = [];
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Export cost data for analysis
|
|
298
|
+
*/
|
|
299
|
+
export(): CostSnapshot[] {
|
|
300
|
+
return [...this.history];
|
|
301
|
+
}
|
|
302
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TMLPD PI Extension - v1.1.0
|
|
3
|
+
*
|
|
4
|
+
* Parallel Multi-LLM Processing with Streaming, Caching, Cost Tracking, Reliability
|
|
5
|
+
* + Reference Architecture to Full TMLPD (Episodic Memory, MCTS, HALO)
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { createTMLPD, HALOOrchestrator, EpisodicMemoryStore } from "adaptive-memory-multi-model-router";
|
|
10
|
+
*
|
|
11
|
+
* // Lightweight usage (core features)
|
|
12
|
+
* const tmlpd = createTMLPD({ cache: { ttl_seconds: 3600 } });
|
|
13
|
+
* const result = await tmlpd.executeParallel(prompt, ["gpt-4o", "claude"]);
|
|
14
|
+
*
|
|
15
|
+
* // Advanced: HALO orchestration with episodic memory
|
|
16
|
+
* const halo = new HALOOrchestrator({ maxConcurrent: 3, enableMCTS: true });
|
|
17
|
+
* const haloResult = await halo.execute("Build a REST API", async (subtask, agent) => {
|
|
18
|
+
* // Execute via agent
|
|
19
|
+
* });
|
|
20
|
+
*
|
|
21
|
+
* // Query episodic memory
|
|
22
|
+
* const similar = memory.getSimilarTasks("Python async API", 5);
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import { createTMLPD, TMLPDTools, TMLPDConfig, ExecuteResult, ParallelResult, StreamingConfig } from "./tools/tmlpdTools";
|
|
27
|
+
import { ResponseCache, CacheConfig, CacheEntry } from "./cache/responseCache";
|
|
28
|
+
import { CostTracker, BudgetConfig, CostAlert, CostSummary, CostSnapshot } from "./cost/costTracker";
|
|
29
|
+
import { ProviderRegistry, ProviderConfig, ProviderRegistryConfig } from "./providers/registry";
|
|
30
|
+
import { CircuitBreaker, withRetry, RetryConfig, CircuitState, calculateRetryDelay, isRetryableStatus, DEFAULT_RETRY_CONFIG } from "./utils/reliability";
|
|
31
|
+
import { EpisodicMemoryStore, EpisodicEntry, MemoryQuery } from "./memory/episodicMemory";
|
|
32
|
+
import { MCTSWorkflowOptimizer, WorkflowState, WorkflowAction, MCTSConfig } from "./orchestration/mctsWorkflow";
|
|
33
|
+
import { HALOOrchestrator, SubTask, AgentAssignment, ExecutionResult, HALOConfig } from "./orchestration/haloOrchestrator";
|
|
34
|
+
import { countTokens, estimateCost, estimateCostFromText, getModelCost, listModelsByCost, findCheapestModels, MODEL_COSTS, TokenCost } from "./utils/tokenUtils";
|
|
35
|
+
import { isonEncode, isonDecode, compressText, truncateMessages, truncateToTokenBudget, calculateCompressionRatio, Message, CompressionStrategy } from "./utils/compression";
|
|
36
|
+
import { LocalProvider, LocalProviderManager, createOllamaProvider, createVLLMProvider, createLMStudioProvider, LocalRuntime, LocalProviderConfig, LocalModelInfo, LocalGenerationResult, LocalParallelResult } from "./providers/localProvider";
|
|
37
|
+
import { BatchProcessor, executeBatch, BatchItem, BatchResult, BatchOptions, BatchProgress, ProgressCallback } from "./utils/batchProcessor";
|
|
38
|
+
import { routeQuery, routeBatch, recommendForTask, extractQueryFeatures, updateModelProfile, MODEL_PROFILES, QueryFeatures, ModelProfile, RouteDecision } from "./routing/advancedRouter";
|
|
39
|
+
import { PrefixCache, createWarmedCache, PrefixCacheStats } from "./cache/prefixCache";
|
|
40
|
+
import { SpeculativeDecoder, speculativeBatch, estimateSpeedupPotential, MedusaPredictor, EagleSpeculative, SpeculativeConfig, SpeculativeResult } from "./utils/speculativeDecoding";
|
|
41
|
+
|
|
42
|
+
// Re-exports
|
|
43
|
+
export { createTMLPD, TMLPDTools, TMLPDConfig, ExecuteResult, ParallelResult, StreamingConfig };
|
|
44
|
+
export { ResponseCache, CacheConfig, CacheEntry };
|
|
45
|
+
export { CostTracker, BudgetConfig, CostAlert, CostSummary, CostSnapshot };
|
|
46
|
+
export { ProviderRegistry, ProviderConfig, ProviderRegistryConfig };
|
|
47
|
+
export { CircuitBreaker, withRetry, RetryConfig, CircuitState, calculateRetryDelay, isRetryableStatus, DEFAULT_RETRY_CONFIG };
|
|
48
|
+
export { EpisodicMemoryStore, EpisodicEntry, MemoryQuery };
|
|
49
|
+
export { MCTSWorkflowOptimizer, WorkflowState, WorkflowAction, MCTSConfig };
|
|
50
|
+
export { HALOOrchestrator, SubTask, AgentAssignment, ExecutionResult, HALOConfig };
|
|
51
|
+
|
|
52
|
+
// Token utilities
|
|
53
|
+
export { countTokens, estimateCost, estimateCostFromText, getModelCost, listModelsByCost, findCheapestModels, MODEL_COSTS, TokenCost };
|
|
54
|
+
|
|
55
|
+
// Compression utilities
|
|
56
|
+
export { isonEncode, isonDecode, compressText, truncateMessages, truncateToTokenBudget, calculateCompressionRatio, Message, CompressionStrategy };
|
|
57
|
+
|
|
58
|
+
// Local provider support (Ollama, vLLM, LM Studio)
|
|
59
|
+
export { LocalProvider, LocalProviderManager, createOllamaProvider, createVLLMProvider, createLMStudioProvider, LocalRuntime, LocalProviderConfig, LocalModelInfo, LocalGenerationResult, LocalParallelResult };
|
|
60
|
+
|
|
61
|
+
// Batch processing
|
|
62
|
+
export { BatchProcessor, executeBatch, BatchItem, BatchResult, BatchOptions, BatchProgress, ProgressCallback };
|
|
63
|
+
|
|
64
|
+
// Advanced routing (RouteLLM-style)
|
|
65
|
+
export { routeQuery, routeBatch, recommendForTask, extractQueryFeatures, updateModelProfile, MODEL_PROFILES, QueryFeatures, ModelProfile as ModelProfileType, RouteDecision };
|
|
66
|
+
|
|
67
|
+
// Prefix caching (RadixAttention-style)
|
|
68
|
+
export { PrefixCache, createWarmedCache, PrefixCacheStats };
|
|
69
|
+
|
|
70
|
+
// Speculative decoding (Medusa/EAGLE-style)
|
|
71
|
+
export { SpeculativeDecoder, speculativeBatch, estimateSpeedupPotential, MedusaPredictor, EagleSpeculative, SpeculativeConfig, SpeculativeResult };
|
|
72
|
+
|
|
73
|
+
// PI Tool definitions (for PI agent integration)
|
|
74
|
+
export const TMLPD_PI_TOOLS = [
|
|
75
|
+
{
|
|
76
|
+
name: "tmlpd_execute",
|
|
77
|
+
description: "Execute prompt across multiple LLM providers in parallel. Optimizes for cost-quality tradeoff with automatic model selection. Use when comparing multiple AI responses or needing faster results via parallel execution.",
|
|
78
|
+
inputSchema: {
|
|
79
|
+
type: "object",
|
|
80
|
+
properties: {
|
|
81
|
+
prompt: { type: "string", description: "The prompt to execute" },
|
|
82
|
+
models: { type: "array", items: { type: "string" }, description: "Optional model list (auto-selects if omitted)" },
|
|
83
|
+
streaming: { type: "object", properties: { enabled: { type: "boolean" }, chunk_size: { type: "number" } } }
|
|
84
|
+
},
|
|
85
|
+
required: ["prompt"]
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
name: "tmlpd_execute_single",
|
|
90
|
+
description: "Execute with single model via smart routing. Analyzes prompt to select optimal agent based on task type (coding, explanation, analysis, etc.) with cost-quality optimization.",
|
|
91
|
+
inputSchema: {
|
|
92
|
+
type: "object",
|
|
93
|
+
properties: {
|
|
94
|
+
prompt: { type: "string", description: "The prompt to execute" },
|
|
95
|
+
model: { type: "string", description: "Optional specific model" }
|
|
96
|
+
},
|
|
97
|
+
required: ["prompt"]
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
name: "tmlpd_cost_summary",
|
|
102
|
+
description: "Get real-time cost tracking summary. Shows spending by provider, model, daily/monthly breakdowns, and remaining budget. Essential for cost monitoring in production.",
|
|
103
|
+
inputSchema: { type: "object", properties: {} }
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
name: "tmlpd_cache_stats",
|
|
107
|
+
description: "Get response cache statistics. Shows hit rate, cache size, and effectiveness. Cache hits cost $0 and provide instant responses.",
|
|
108
|
+
inputSchema: { type: "object", properties: {} }
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
name: "tmlpd_provider_status",
|
|
112
|
+
description: "Get status of all configured LLM providers. Shows readiness, cooldown status, failure counts. Use for debugging or selecting specific providers.",
|
|
113
|
+
inputSchema: { type: "object", properties: {} }
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
name: "tmlpd_invalidate_cache",
|
|
117
|
+
description: "Invalidate cached responses. Use when prompt content has changed and fresh response needed, or to clear stale cache entries.",
|
|
118
|
+
inputSchema: {
|
|
119
|
+
type: "object",
|
|
120
|
+
properties: { model: { type: "string", description: "Optional model to invalidate (all if omitted)" } }
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
name: "tmlpd_get_budget",
|
|
125
|
+
description: "Get remaining budget for cost controls. Returns daily, monthly, and per-model limits. Use for budget enforcement and alerting.",
|
|
126
|
+
inputSchema: { type: "object", properties: {} }
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
name: "tmlpd_halo_execute",
|
|
130
|
+
description: "Execute via HALO (Hierarchical Autonomous Logic-Oriented) orchestrator with 3-tier planning: decompose → assign → execute. Includes episodic memory for learning from past executions. For complex multi-step tasks.",
|
|
131
|
+
inputSchema: {
|
|
132
|
+
type: "object",
|
|
133
|
+
properties: {
|
|
134
|
+
task_description: { type: "string", description: "Task to execute" },
|
|
135
|
+
max_concurrent: { type: "number", description: "Max parallel executions (default: 3)" },
|
|
136
|
+
enable_mcts: { type: "boolean", description: "Enable MCTS optimization (slower but better)" }
|
|
137
|
+
},
|
|
138
|
+
required: ["task_description"]
|
|
139
|
+
}
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
name: "tmlpd_episodic_query",
|
|
143
|
+
description: "Query episodic memory for similar past tasks. Useful for learning from past executions and improving future routing decisions.",
|
|
144
|
+
inputSchema: {
|
|
145
|
+
type: "object",
|
|
146
|
+
properties: {
|
|
147
|
+
task_description: { type: "string", description: "Task to find similar executions for" },
|
|
148
|
+
limit: { type: "number", description: "Max results (default: 5)" }
|
|
149
|
+
},
|
|
150
|
+
required: ["task_description"]
|
|
151
|
+
}
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
name: "tmlpd_count_tokens",
|
|
155
|
+
description: "Count tokens in text for cost estimation. Supports all major models (GPT-4, Claude, Gemini, Llama). Use for estimating costs before execution or calculating context window usage.",
|
|
156
|
+
inputSchema: {
|
|
157
|
+
type: "object",
|
|
158
|
+
properties: {
|
|
159
|
+
text: { type: "string", description: "Text to count tokens in" },
|
|
160
|
+
model: { type: "string", description: "Model for tokenization (default: gpt-4o)" }
|
|
161
|
+
},
|
|
162
|
+
required: ["text"]
|
|
163
|
+
}
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
name: "tmlpd_compress_context",
|
|
167
|
+
description: "Compress context/messages using ISON encoding for token reduction. Reduces context by ~20-40% while preserving meaning. Useful for fitting more content in context windows.",
|
|
168
|
+
inputSchema: {
|
|
169
|
+
type: "object",
|
|
170
|
+
properties: {
|
|
171
|
+
messages: { type: "array", description: "Messages to compress", items: { type: "object" } },
|
|
172
|
+
strategy: { type: "string", enum: ["smart", "first", "last"], description: "Compression strategy (default: smart)" },
|
|
173
|
+
max_tokens: { type: "number", description: "Target token budget" }
|
|
174
|
+
},
|
|
175
|
+
required: ["messages"]
|
|
176
|
+
}
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
name: "tmlpd_local_generate",
|
|
180
|
+
description: "Generate using local LLM runtime (Ollama, vLLM, LM Studio). Zero cost, privacy-preserving. Use for development, testing, or when local GPU available. Falls back to cloud if local unavailable.",
|
|
181
|
+
inputSchema: {
|
|
182
|
+
type: "object",
|
|
183
|
+
properties: {
|
|
184
|
+
prompt: { type: "string", description: "Prompt for generation" },
|
|
185
|
+
runtime: { type: "string", enum: ["ollama", "vllm", "lmstudio"], description: "Local runtime type" },
|
|
186
|
+
model: { type: "string", description: "Model name (default: llama-3.3-70b)" }
|
|
187
|
+
},
|
|
188
|
+
required: ["prompt", "runtime"]
|
|
189
|
+
}
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
name: "tmlpd_batch_execute",
|
|
193
|
+
description: "Execute batch of prompts with concurrency control. Supports priority queuing, progress callbacks, rate limiting. Use for processing multiple prompts efficiently.",
|
|
194
|
+
inputSchema: {
|
|
195
|
+
type: "object",
|
|
196
|
+
properties: {
|
|
197
|
+
prompts: { type: "array", items: { type: "string" }, description: "Prompts to execute" },
|
|
198
|
+
concurrency: { type: "number", description: "Max parallel executions (default: 5)" },
|
|
199
|
+
model: { type: "string", description: "Model to use (default: gpt-4o)" }
|
|
200
|
+
},
|
|
201
|
+
required: ["prompts"]
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
];
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Reference to Full TMLPD
|
|
208
|
+
*
|
|
209
|
+
* This package provides:
|
|
210
|
+
* - Core: streaming, caching, cost tracking, reliability
|
|
211
|
+
* - Reference: episodic memory, MCTS, HALO orchestrator
|
|
212
|
+
*
|
|
213
|
+
* For production with full features:
|
|
214
|
+
* - Python TMLPD: https://github.com/Das-rebel/tmlpd-skill
|
|
215
|
+
* - Full memory: 3-tier (episodic + semantic + working)
|
|
216
|
+
* - Full MCTS: UCB1, deterministic rollouts, strategy caching
|
|
217
|
+
* - Full HALO: NLP decomposition, capability matching, verification
|
|
218
|
+
*/
|
|
219
|
+
|
|
220
|
+
export default {
|
|
221
|
+
createTMLPD: createTMLPD,
|
|
222
|
+
TMLPDTools: TMLPDTools,
|
|
223
|
+
TMLPD_PI_TOOLS: TMLPD_PI_TOOLS
|
|
224
|
+
};
|