adaptive-memory-multi-model-router 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +114 -0
  2. package/demo/research-demo.js +266 -0
  3. package/dist/cache/prefixCache.d.ts +114 -0
  4. package/dist/cache/prefixCache.d.ts.map +1 -0
  5. package/dist/cache/prefixCache.js +285 -0
  6. package/dist/cache/prefixCache.js.map +1 -0
  7. package/dist/cache/responseCache.d.ts +58 -0
  8. package/dist/cache/responseCache.d.ts.map +1 -0
  9. package/dist/cache/responseCache.js +153 -0
  10. package/dist/cache/responseCache.js.map +1 -0
  11. package/dist/cli.js +59 -0
  12. package/dist/cost/costTracker.d.ts +95 -0
  13. package/dist/cost/costTracker.d.ts.map +1 -0
  14. package/dist/cost/costTracker.js +240 -0
  15. package/dist/cost/costTracker.js.map +1 -0
  16. package/dist/index.d.ts +723 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +239 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/memory/episodicMemory.d.ts +82 -0
  21. package/dist/memory/episodicMemory.d.ts.map +1 -0
  22. package/dist/memory/episodicMemory.js +145 -0
  23. package/dist/memory/episodicMemory.js.map +1 -0
  24. package/dist/orchestration/haloOrchestrator.d.ts +102 -0
  25. package/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  26. package/dist/orchestration/haloOrchestrator.js +207 -0
  27. package/dist/orchestration/haloOrchestrator.js.map +1 -0
  28. package/dist/orchestration/mctsWorkflow.d.ts +85 -0
  29. package/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  30. package/dist/orchestration/mctsWorkflow.js +210 -0
  31. package/dist/orchestration/mctsWorkflow.js.map +1 -0
  32. package/dist/providers/localProvider.d.ts +102 -0
  33. package/dist/providers/localProvider.d.ts.map +1 -0
  34. package/dist/providers/localProvider.js +338 -0
  35. package/dist/providers/localProvider.js.map +1 -0
  36. package/dist/providers/registry.d.ts +55 -0
  37. package/dist/providers/registry.d.ts.map +1 -0
  38. package/dist/providers/registry.js +138 -0
  39. package/dist/providers/registry.js.map +1 -0
  40. package/dist/routing/advancedRouter.d.ts +68 -0
  41. package/dist/routing/advancedRouter.d.ts.map +1 -0
  42. package/dist/routing/advancedRouter.js +332 -0
  43. package/dist/routing/advancedRouter.js.map +1 -0
  44. package/dist/tools/tmlpdTools.d.ts +101 -0
  45. package/dist/tools/tmlpdTools.d.ts.map +1 -0
  46. package/dist/tools/tmlpdTools.js +368 -0
  47. package/dist/tools/tmlpdTools.js.map +1 -0
  48. package/dist/utils/batchProcessor.d.ts +96 -0
  49. package/dist/utils/batchProcessor.d.ts.map +1 -0
  50. package/dist/utils/batchProcessor.js +170 -0
  51. package/dist/utils/batchProcessor.js.map +1 -0
  52. package/dist/utils/compression.d.ts +61 -0
  53. package/dist/utils/compression.d.ts.map +1 -0
  54. package/dist/utils/compression.js +281 -0
  55. package/dist/utils/compression.js.map +1 -0
  56. package/dist/utils/reliability.d.ts +74 -0
  57. package/dist/utils/reliability.d.ts.map +1 -0
  58. package/dist/utils/reliability.js +177 -0
  59. package/dist/utils/reliability.js.map +1 -0
  60. package/dist/utils/speculativeDecoding.d.ts +117 -0
  61. package/dist/utils/speculativeDecoding.d.ts.map +1 -0
  62. package/dist/utils/speculativeDecoding.js +246 -0
  63. package/dist/utils/speculativeDecoding.js.map +1 -0
  64. package/dist/utils/tokenUtils.d.ts +50 -0
  65. package/dist/utils/tokenUtils.d.ts.map +1 -0
  66. package/dist/utils/tokenUtils.js +124 -0
  67. package/dist/utils/tokenUtils.js.map +1 -0
  68. package/examples/QUICKSTART.md +183 -0
  69. package/notebooks/quickstart.ipynb +157 -0
  70. package/package.json +83 -0
  71. package/python/examples.py +53 -0
  72. package/python/integrations.py +330 -0
  73. package/python/setup.py +28 -0
  74. package/python/tmlpd.py +369 -0
  75. package/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  76. package/qna/TMLPD_QNA.md +751 -0
  77. package/rust/tmlpd.h +268 -0
  78. package/skill/SKILL.md +238 -0
  79. package/src/cache/prefixCache.ts +365 -0
  80. package/src/cache/responseCache.ts +147 -0
  81. package/src/cost/costTracker.ts +302 -0
  82. package/src/index.ts +224 -0
  83. package/src/memory/episodicMemory.ts +185 -0
  84. package/src/orchestration/haloOrchestrator.ts +266 -0
  85. package/src/orchestration/mctsWorkflow.ts +262 -0
  86. package/src/providers/localProvider.ts +406 -0
  87. package/src/providers/registry.ts +164 -0
  88. package/src/routing/advancedRouter.ts +406 -0
  89. package/src/tools/tmlpdTools.ts +433 -0
  90. package/src/utils/batchProcessor.ts +232 -0
  91. package/src/utils/compression.ts +325 -0
  92. package/src/utils/reliability.ts +221 -0
  93. package/src/utils/speculativeDecoding.ts +344 -0
  94. package/src/utils/tokenUtils.ts +145 -0
  95. package/tsconfig.json +18 -0
@@ -0,0 +1,302 @@
1
+ /**
2
+ * TMLPD Cost Tracker
3
+ *
4
+ * Tracks real-time spending across all providers.
5
+ * Supports per-model budgets, spending alerts, and cost analysis.
6
+ */
7
+
8
+ // Cost per 1M tokens for known models (USD)
9
+ const MODEL_COSTS: Record<string, { input: number; output: number }> = {
10
+ // Anthropic
11
+ "claude-3-5-sonnet-20241022": { input: 3.0, output: 15.0 },
12
+ "claude-3-opus-20240229": { input: 15.0, output: 75.0 },
13
+ "claude-3-sonnet-20240229": { input: 3.0, output: 15.0 },
14
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
15
+ // OpenAI
16
+ "gpt-4o": { input: 2.5, output: 10.0 },
17
+ "gpt-4-turbo": { input: 10.0, output: 30.0 },
18
+ "gpt-4": { input: 30.0, output: 60.0 },
19
+ "gpt-3.5-turbo": { input: 0.5, output: 1.5 },
20
+ // Google
21
+ "gemini-1.5-pro": { input: 1.25, output: 5.0 },
22
+ "gemini-1.5-flash": { input: 0.075, output: 0.3 },
23
+ // Groq
24
+ "llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
25
+ "llama-3.1-8b-instant": { input: 0.05, output: 0.08 },
26
+ // Cerebras
27
+ "llama-3.3-70b": { input: 0.1, output: 0.1 },
28
+ // Mistral
29
+ "mistral-large-latest": { input: 2.0, output: 6.0 },
30
+ "mistral-small-latest": { input: 0.2, output: 0.6 },
31
+ // xAI
32
+ "grok-2": { input: 2.0, output: 8.0 },
33
+ "grok-2-mini": { input: 0.2, output: 0.8 },
34
+ // OpenRouter (varies by model)
35
+ "openai/gpt-4o": { input: 2.5, output: 10.0 },
36
+ "anthropic/claude-3.5-sonnet": { input: 3.0, output: 15.0 },
37
+ // ZAI (default estimate)
38
+ "glm-5": { input: 0.1, output: 0.3 },
39
+ "glm-4": { input: 0.1, output: 0.3 },
40
+ };
41
+
42
+ export interface BudgetConfig {
43
+ daily_limit?: number;
44
+ monthly_limit?: number;
45
+ per_model_limits?: Record<string, number>;
46
+ }
47
+
48
+ export interface CostAlert {
49
+ type: "daily" | "monthly" | "model" | "budget";
50
+ threshold: number;
51
+ current: number;
52
+ provider?: string;
53
+ model?: string;
54
+ }
55
+
56
+ export interface CostSnapshot {
57
+ provider: string;
58
+ model: string;
59
+ input_tokens: number;
60
+ output_tokens: number;
61
+ input_cost: number;
62
+ output_cost: number;
63
+ total_cost: number;
64
+ timestamp: number;
65
+ }
66
+
67
+ export interface CostSummary {
68
+ total_cost: number;
69
+ by_provider: Record<string, number>;
70
+ by_model: Record<string, number>;
71
+ daily_costs: Record<string, number>;
72
+ monthly_costs: Record<string, number>;
73
+ request_count: number;
74
+ token_count: { input: number; output: number };
75
+ average_cost_per_request: number;
76
+ }
77
+
78
+ export class CostTracker {
79
+ private history: CostSnapshot[] = [];
80
+ private budgets: BudgetConfig;
81
+ private alerts: CostAlert[] = [];
82
+ private alerts_callback: ((alert: CostAlert) => void) | null = null;
83
+ private daily_reset: number;
84
+ private monthly_reset: number;
85
+
86
+ constructor(budgets: BudgetConfig = {}) {
87
+ this.budgets = budgets;
88
+ const now = new Date();
89
+ this.daily_reset = new Date(now.getFullYear(), now.getMonth(), now.getDate() + 1).getTime();
90
+ this.monthly_reset = new Date(now.getFullYear(), now.getMonth() + 1, 1).getTime();
91
+ }
92
+
93
+ /**
94
+ * Calculate cost for a model based on tokens
95
+ */
96
+ calculateCost(model: string, input_tokens: number, output_tokens: number): { input: number; output: number; total: number } {
97
+ const model_key = model.split("/").pop() || model;
98
+ const rates = MODEL_COSTS[model_key] || { input: 1.0, output: 5.0 }; // Default estimate
99
+
100
+ const input_cost = (input_tokens / 1_000_000) * rates.input;
101
+ const output_cost = (output_tokens / 1_000_000) * rates.output;
102
+
103
+ return {
104
+ input: Math.round(input_cost * 1000000) / 1000000, // 6 decimal precision
105
+ output: Math.round(output_cost * 1000000) / 1000000,
106
+ total: Math.round((input_cost + output_cost) * 1000000) / 1000000,
107
+ };
108
+ }
109
+
110
+ /**
111
+ * Record a request's cost
112
+ */
113
+ record(provider: string, model: string, input_tokens: number, output_tokens: number): CostSnapshot {
114
+ const costs = this.calculateCost(model, input_tokens, output_tokens);
115
+ const snapshot: CostSnapshot = {
116
+ provider,
117
+ model,
118
+ input_tokens,
119
+ output_tokens,
120
+ input_cost: costs.input,
121
+ output_cost: costs.output,
122
+ total_cost: costs.total,
123
+ timestamp: Date.now(),
124
+ };
125
+
126
+ this.history.push(snapshot);
127
+ this.checkBudgets(snapshot);
128
+ return snapshot;
129
+ }
130
+
131
+ /**
132
+ * Check budgets and trigger alerts
133
+ */
134
+ private checkBudgets(snapshot: CostSnapshot): void {
135
+ const summary = this.getSummary();
136
+ const today = new Date().toISOString().split("T")[0];
137
+ const month = today.substring(0, 7);
138
+
139
+ // Check daily budget
140
+ if (this.budgets.daily_limit) {
141
+ const daily_cost = summary.daily_costs[today] || 0;
142
+ if (daily_cost >= this.budgets.daily_limit * 0.9) { // Alert at 90%
143
+ this.emitAlert({
144
+ type: "daily",
145
+ threshold: this.budgets.daily_limit,
146
+ current: daily_cost,
147
+ });
148
+ }
149
+ }
150
+
151
+ // Check monthly budget
152
+ if (this.budgets.monthly_limit) {
153
+ const monthly_cost = summary.monthly_costs[month] || 0;
154
+ if (monthly_cost >= this.budgets.monthly_limit * 0.9) {
155
+ this.emitAlert({
156
+ type: "monthly",
157
+ threshold: this.budgets.monthly_limit,
158
+ current: monthly_cost,
159
+ });
160
+ }
161
+ }
162
+
163
+ // Check per-model budgets
164
+ if (this.budgets.per_model_limits) {
165
+ const model_limit = this.budgets.per_model_limits[snapshot.model];
166
+ if (model_limit) {
167
+ const model_cost = summary.by_model[snapshot.model] || 0;
168
+ if (model_cost >= model_limit * 0.9) {
169
+ this.emitAlert({
170
+ type: "model",
171
+ threshold: model_limit,
172
+ current: model_cost,
173
+ model: snapshot.model,
174
+ });
175
+ }
176
+ }
177
+ }
178
+ }
179
+
180
+ /**
181
+ * Emit an alert via callback
182
+ */
183
+ private emitAlert(alert: CostAlert): void {
184
+ // Avoid duplicate alerts for same threshold
185
+ const recent = this.alerts.find(
186
+ (a) =>
187
+ a.type === alert.type &&
188
+ a.threshold === alert.threshold &&
189
+ Date.now() - (a as any)._emitted_at < 3600000 // 1 hour cooldown
190
+ );
191
+ if (recent) return;
192
+
193
+ (alert as any)._emitted_at = Date.now();
194
+ this.alerts.push(alert);
195
+ if (this.alerts_callback) {
196
+ this.alerts_callback(alert);
197
+ }
198
+ }
199
+
200
+ /**
201
+ * Register alert callback
202
+ */
203
+ onAlert(callback: (alert: CostAlert) => void): void {
204
+ this.alerts_callback = callback;
205
+ }
206
+
207
+ /**
208
+ * Get comprehensive cost summary
209
+ */
210
+ getSummary(): CostSummary {
211
+ const nowMs = Date.now();
212
+ const today = new Date().toISOString().split("T")[0];
213
+ const month = today.substring(0, 7);
214
+
215
+ // Reset if new day/month
216
+ const nowDate = new Date(nowMs);
217
+ if (nowMs >= this.daily_reset) {
218
+ this.daily_reset = new Date(nowDate.getFullYear(), nowDate.getMonth(), nowDate.getDate() + 1).getTime();
219
+ }
220
+ if (nowMs >= this.monthly_reset) {
221
+ this.monthly_reset = new Date(nowDate.getFullYear(), nowDate.getMonth() + 1, 1).getTime();
222
+ }
223
+
224
+ const by_provider: Record<string, number> = {};
225
+ const by_model: Record<string, number> = {};
226
+ const daily_costs: Record<string, number> = {};
227
+ const monthly_costs: Record<string, number> = {};
228
+ let total_cost = 0;
229
+ let total_input_tokens = 0;
230
+ let total_output_tokens = 0;
231
+
232
+ for (const entry of this.history) {
233
+ total_cost += entry.total_cost;
234
+ total_input_tokens += entry.input_tokens;
235
+ total_output_tokens += entry.output_tokens;
236
+
237
+ by_provider[entry.provider] = (by_provider[entry.provider] || 0) + entry.total_cost;
238
+ by_model[entry.model] = (by_model[entry.model] || 0) + entry.total_cost;
239
+
240
+ const entry_date = new Date(entry.timestamp).toISOString().split("T")[0];
241
+ const entry_month = entry_date.substring(0, 7);
242
+
243
+ daily_costs[entry_date] = (daily_costs[entry_date] || 0) + entry.total_cost;
244
+ monthly_costs[entry_month] = (monthly_costs[entry_month] || 0) + entry.total_cost;
245
+ }
246
+
247
+ return {
248
+ total_cost: Math.round(total_cost * 1000000) / 1000000,
249
+ by_provider,
250
+ by_model,
251
+ daily_costs,
252
+ monthly_costs,
253
+ request_count: this.history.length,
254
+ token_count: { input: total_input_tokens, output: total_output_tokens },
255
+ average_cost_per_request:
256
+ this.history.length > 0
257
+ ? Math.round((total_cost / this.history.length) * 1000000) / 1000000
258
+ : 0,
259
+ };
260
+ }
261
+
262
+ /**
263
+ * Get remaining budget
264
+ */
265
+ getRemainingBudget(): { daily: number | null; monthly: number | null; per_model: Record<string, number> } {
266
+ const summary = this.getSummary();
267
+ const today = new Date().toISOString().split("T")[0];
268
+ const month = today.substring(0, 7);
269
+
270
+ return {
271
+ daily: this.budgets.daily_limit
272
+ ? Math.max(0, this.budgets.daily_limit - (summary.daily_costs[today] || 0))
273
+ : null,
274
+ monthly: this.budgets.monthly_limit
275
+ ? Math.max(0, this.budgets.monthly_limit - (summary.monthly_costs[month] || 0))
276
+ : null,
277
+ per_model: this.budgets.per_model_limits
278
+ ? Object.fromEntries(
279
+ Object.entries(this.budgets.per_model_limits).map(([model, limit]) => [
280
+ model,
281
+ Math.max(0, limit - (summary.by_model[model] || 0)),
282
+ ])
283
+ )
284
+ : {},
285
+ };
286
+ }
287
+
288
+ /**
289
+ * Reset cost history
290
+ */
291
+ reset(): void {
292
+ this.history = [];
293
+ this.alerts = [];
294
+ }
295
+
296
+ /**
297
+ * Export cost data for analysis
298
+ */
299
+ export(): CostSnapshot[] {
300
+ return [...this.history];
301
+ }
302
+ }
package/src/index.ts ADDED
@@ -0,0 +1,224 @@
1
+ /**
2
+ * TMLPD PI Extension - v1.1.0
3
+ *
4
+ * Parallel Multi-LLM Processing with Streaming, Caching, Cost Tracking, Reliability
5
+ * + Reference Architecture to Full TMLPD (Episodic Memory, MCTS, HALO)
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { createTMLPD, HALOOrchestrator, EpisodicMemoryStore } from "adaptive-memory-multi-model-router";
10
+ *
11
+ * // Lightweight usage (core features)
12
+ * const tmlpd = createTMLPD({ cache: { ttl_seconds: 3600 } });
13
+ * const result = await tmlpd.executeParallel(prompt, ["gpt-4o", "claude"]);
14
+ *
15
+ * // Advanced: HALO orchestration with episodic memory
16
+ * const halo = new HALOOrchestrator({ maxConcurrent: 3, enableMCTS: true });
17
+ * const haloResult = await halo.execute("Build a REST API", async (subtask, agent) => {
18
+ * // Execute via agent
19
+ * });
20
+ *
21
+ * // Query episodic memory
22
+ * const similar = memory.getSimilarTasks("Python async API", 5);
23
+ * ```
24
+ */
25
+
26
+ import { createTMLPD, TMLPDTools, TMLPDConfig, ExecuteResult, ParallelResult, StreamingConfig } from "./tools/tmlpdTools";
27
+ import { ResponseCache, CacheConfig, CacheEntry } from "./cache/responseCache";
28
+ import { CostTracker, BudgetConfig, CostAlert, CostSummary, CostSnapshot } from "./cost/costTracker";
29
+ import { ProviderRegistry, ProviderConfig, ProviderRegistryConfig } from "./providers/registry";
30
+ import { CircuitBreaker, withRetry, RetryConfig, CircuitState, calculateRetryDelay, isRetryableStatus, DEFAULT_RETRY_CONFIG } from "./utils/reliability";
31
+ import { EpisodicMemoryStore, EpisodicEntry, MemoryQuery } from "./memory/episodicMemory";
32
+ import { MCTSWorkflowOptimizer, WorkflowState, WorkflowAction, MCTSConfig } from "./orchestration/mctsWorkflow";
33
+ import { HALOOrchestrator, SubTask, AgentAssignment, ExecutionResult, HALOConfig } from "./orchestration/haloOrchestrator";
34
+ import { countTokens, estimateCost, estimateCostFromText, getModelCost, listModelsByCost, findCheapestModels, MODEL_COSTS, TokenCost } from "./utils/tokenUtils";
35
+ import { isonEncode, isonDecode, compressText, truncateMessages, truncateToTokenBudget, calculateCompressionRatio, Message, CompressionStrategy } from "./utils/compression";
36
+ import { LocalProvider, LocalProviderManager, createOllamaProvider, createVLLMProvider, createLMStudioProvider, LocalRuntime, LocalProviderConfig, LocalModelInfo, LocalGenerationResult, LocalParallelResult } from "./providers/localProvider";
37
+ import { BatchProcessor, executeBatch, BatchItem, BatchResult, BatchOptions, BatchProgress, ProgressCallback } from "./utils/batchProcessor";
38
+ import { routeQuery, routeBatch, recommendForTask, extractQueryFeatures, updateModelProfile, MODEL_PROFILES, QueryFeatures, ModelProfile, RouteDecision } from "./routing/advancedRouter";
39
+ import { PrefixCache, createWarmedCache, PrefixCacheStats } from "./cache/prefixCache";
40
+ import { SpeculativeDecoder, speculativeBatch, estimateSpeedupPotential, MedusaPredictor, EagleSpeculative, SpeculativeConfig, SpeculativeResult } from "./utils/speculativeDecoding";
41
+
42
+ // Re-exports
43
+ export { createTMLPD, TMLPDTools, TMLPDConfig, ExecuteResult, ParallelResult, StreamingConfig };
44
+ export { ResponseCache, CacheConfig, CacheEntry };
45
+ export { CostTracker, BudgetConfig, CostAlert, CostSummary, CostSnapshot };
46
+ export { ProviderRegistry, ProviderConfig, ProviderRegistryConfig };
47
+ export { CircuitBreaker, withRetry, RetryConfig, CircuitState, calculateRetryDelay, isRetryableStatus, DEFAULT_RETRY_CONFIG };
48
+ export { EpisodicMemoryStore, EpisodicEntry, MemoryQuery };
49
+ export { MCTSWorkflowOptimizer, WorkflowState, WorkflowAction, MCTSConfig };
50
+ export { HALOOrchestrator, SubTask, AgentAssignment, ExecutionResult, HALOConfig };
51
+
52
+ // Token utilities
53
+ export { countTokens, estimateCost, estimateCostFromText, getModelCost, listModelsByCost, findCheapestModels, MODEL_COSTS, TokenCost };
54
+
55
+ // Compression utilities
56
+ export { isonEncode, isonDecode, compressText, truncateMessages, truncateToTokenBudget, calculateCompressionRatio, Message, CompressionStrategy };
57
+
58
+ // Local provider support (Ollama, vLLM, LM Studio)
59
+ export { LocalProvider, LocalProviderManager, createOllamaProvider, createVLLMProvider, createLMStudioProvider, LocalRuntime, LocalProviderConfig, LocalModelInfo, LocalGenerationResult, LocalParallelResult };
60
+
61
+ // Batch processing
62
+ export { BatchProcessor, executeBatch, BatchItem, BatchResult, BatchOptions, BatchProgress, ProgressCallback };
63
+
64
+ // Advanced routing (RouteLLM-style)
65
+ export { routeQuery, routeBatch, recommendForTask, extractQueryFeatures, updateModelProfile, MODEL_PROFILES, QueryFeatures, ModelProfile as ModelProfileType, RouteDecision };
66
+
67
+ // Prefix caching (RadixAttention-style)
68
+ export { PrefixCache, createWarmedCache, PrefixCacheStats };
69
+
70
+ // Speculative decoding (Medusa/EAGLE-style)
71
+ export { SpeculativeDecoder, speculativeBatch, estimateSpeedupPotential, MedusaPredictor, EagleSpeculative, SpeculativeConfig, SpeculativeResult };
72
+
73
+ // PI Tool definitions (for PI agent integration)
74
+ export const TMLPD_PI_TOOLS = [
75
+ {
76
+ name: "tmlpd_execute",
77
+ description: "Execute prompt across multiple LLM providers in parallel. Optimizes for cost-quality tradeoff with automatic model selection. Use when comparing multiple AI responses or needing faster results via parallel execution.",
78
+ inputSchema: {
79
+ type: "object",
80
+ properties: {
81
+ prompt: { type: "string", description: "The prompt to execute" },
82
+ models: { type: "array", items: { type: "string" }, description: "Optional model list (auto-selects if omitted)" },
83
+ streaming: { type: "object", properties: { enabled: { type: "boolean" }, chunk_size: { type: "number" } } }
84
+ },
85
+ required: ["prompt"]
86
+ }
87
+ },
88
+ {
89
+ name: "tmlpd_execute_single",
90
+ description: "Execute with single model via smart routing. Analyzes prompt to select optimal agent based on task type (coding, explanation, analysis, etc.) with cost-quality optimization.",
91
+ inputSchema: {
92
+ type: "object",
93
+ properties: {
94
+ prompt: { type: "string", description: "The prompt to execute" },
95
+ model: { type: "string", description: "Optional specific model" }
96
+ },
97
+ required: ["prompt"]
98
+ }
99
+ },
100
+ {
101
+ name: "tmlpd_cost_summary",
102
+ description: "Get real-time cost tracking summary. Shows spending by provider, model, daily/monthly breakdowns, and remaining budget. Essential for cost monitoring in production.",
103
+ inputSchema: { type: "object", properties: {} }
104
+ },
105
+ {
106
+ name: "tmlpd_cache_stats",
107
+ description: "Get response cache statistics. Shows hit rate, cache size, and effectiveness. Cache hits cost $0 and provide instant responses.",
108
+ inputSchema: { type: "object", properties: {} }
109
+ },
110
+ {
111
+ name: "tmlpd_provider_status",
112
+ description: "Get status of all configured LLM providers. Shows readiness, cooldown status, failure counts. Use for debugging or selecting specific providers.",
113
+ inputSchema: { type: "object", properties: {} }
114
+ },
115
+ {
116
+ name: "tmlpd_invalidate_cache",
117
+ description: "Invalidate cached responses. Use when prompt content has changed and fresh response needed, or to clear stale cache entries.",
118
+ inputSchema: {
119
+ type: "object",
120
+ properties: { model: { type: "string", description: "Optional model to invalidate (all if omitted)" } }
121
+ }
122
+ },
123
+ {
124
+ name: "tmlpd_get_budget",
125
+ description: "Get remaining budget for cost controls. Returns daily, monthly, and per-model limits. Use for budget enforcement and alerting.",
126
+ inputSchema: { type: "object", properties: {} }
127
+ },
128
+ {
129
+ name: "tmlpd_halo_execute",
130
+ description: "Execute via HALO (Hierarchical Autonomous Logic-Oriented) orchestrator with 3-tier planning: decompose → assign → execute. Includes episodic memory for learning from past executions. For complex multi-step tasks.",
131
+ inputSchema: {
132
+ type: "object",
133
+ properties: {
134
+ task_description: { type: "string", description: "Task to execute" },
135
+ max_concurrent: { type: "number", description: "Max parallel executions (default: 3)" },
136
+ enable_mcts: { type: "boolean", description: "Enable MCTS optimization (slower but better)" }
137
+ },
138
+ required: ["task_description"]
139
+ }
140
+ },
141
+ {
142
+ name: "tmlpd_episodic_query",
143
+ description: "Query episodic memory for similar past tasks. Useful for learning from past executions and improving future routing decisions.",
144
+ inputSchema: {
145
+ type: "object",
146
+ properties: {
147
+ task_description: { type: "string", description: "Task to find similar executions for" },
148
+ limit: { type: "number", description: "Max results (default: 5)" }
149
+ },
150
+ required: ["task_description"]
151
+ }
152
+ },
153
+ {
154
+ name: "tmlpd_count_tokens",
155
+ description: "Count tokens in text for cost estimation. Supports all major models (GPT-4, Claude, Gemini, Llama). Use for estimating costs before execution or calculating context window usage.",
156
+ inputSchema: {
157
+ type: "object",
158
+ properties: {
159
+ text: { type: "string", description: "Text to count tokens in" },
160
+ model: { type: "string", description: "Model for tokenization (default: gpt-4o)" }
161
+ },
162
+ required: ["text"]
163
+ }
164
+ },
165
+ {
166
+ name: "tmlpd_compress_context",
167
+ description: "Compress context/messages using ISON encoding for token reduction. Reduces context by ~20-40% while preserving meaning. Useful for fitting more content in context windows.",
168
+ inputSchema: {
169
+ type: "object",
170
+ properties: {
171
+ messages: { type: "array", description: "Messages to compress", items: { type: "object" } },
172
+ strategy: { type: "string", enum: ["smart", "first", "last"], description: "Compression strategy (default: smart)" },
173
+ max_tokens: { type: "number", description: "Target token budget" }
174
+ },
175
+ required: ["messages"]
176
+ }
177
+ },
178
+ {
179
+ name: "tmlpd_local_generate",
180
+ description: "Generate using local LLM runtime (Ollama, vLLM, LM Studio). Zero cost, privacy-preserving. Use for development, testing, or when local GPU available. Falls back to cloud if local unavailable.",
181
+ inputSchema: {
182
+ type: "object",
183
+ properties: {
184
+ prompt: { type: "string", description: "Prompt for generation" },
185
+ runtime: { type: "string", enum: ["ollama", "vllm", "lmstudio"], description: "Local runtime type" },
186
+ model: { type: "string", description: "Model name (default: llama-3.3-70b)" }
187
+ },
188
+ required: ["prompt", "runtime"]
189
+ }
190
+ },
191
+ {
192
+ name: "tmlpd_batch_execute",
193
+ description: "Execute batch of prompts with concurrency control. Supports priority queuing, progress callbacks, rate limiting. Use for processing multiple prompts efficiently.",
194
+ inputSchema: {
195
+ type: "object",
196
+ properties: {
197
+ prompts: { type: "array", items: { type: "string" }, description: "Prompts to execute" },
198
+ concurrency: { type: "number", description: "Max parallel executions (default: 5)" },
199
+ model: { type: "string", description: "Model to use (default: gpt-4o)" }
200
+ },
201
+ required: ["prompts"]
202
+ }
203
+ }
204
+ ];
205
+
206
+ /**
207
+ * Reference to Full TMLPD
208
+ *
209
+ * This package provides:
210
+ * - Core: streaming, caching, cost tracking, reliability
211
+ * - Reference: episodic memory, MCTS, HALO orchestrator
212
+ *
213
+ * For production with full features:
214
+ * - Python TMLPD: https://github.com/Das-rebel/tmlpd-skill
215
+ * - Full memory: 3-tier (episodic + semantic + working)
216
+ * - Full MCTS: UCB1, deterministic rollouts, strategy caching
217
+ * - Full HALO: NLP decomposition, capability matching, verification
218
+ */
219
+
220
+ export default {
221
+ createTMLPD: createTMLPD,
222
+ TMLPDTools: TMLPDTools,
223
+ TMLPD_PI_TOOLS: TMLPD_PI_TOOLS
224
+ };