adaptive-memory-multi-model-router 1.2.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +146 -66
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/integrations/airtable.js +20 -0
- package/dist/integrations/discord.js +18 -0
- package/dist/integrations/github.js +23 -0
- package/dist/integrations/gmail.js +19 -0
- package/dist/integrations/google-calendar.js +18 -0
- package/dist/integrations/index.js +61 -0
- package/dist/integrations/jira.js +21 -0
- package/dist/integrations/linear.js +19 -0
- package/dist/integrations/notion.js +19 -0
- package/dist/integrations/slack.js +18 -0
- package/dist/integrations/telegram.js +19 -0
- package/dist/providers/registry.js +7 -3
- package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
- package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
- package/docs/CONFIGURATION.md +476 -0
- package/docs/COUNCIL_DECISION.json +308 -0
- package/docs/COUNCIL_SUMMARY.md +265 -0
- package/docs/COUNCIL_V2.2_DECISION.md +416 -0
- package/docs/IMPROVEMENT_ROADMAP.md +515 -0
- package/docs/LLM_COUNCIL_DECISION.md +508 -0
- package/docs/QUICK_START_VISIBILITY.md +782 -0
- package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
- package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
- package/docs/TMLPD_QNA.md +751 -0
- package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
- package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
- package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
- package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
- package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
- package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
- package/docs/launch-content/README.md +457 -0
- package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
- package/docs/launch-content/assets/cumulative_savings.png +0 -0
- package/docs/launch-content/assets/parallel_speedup.png +0 -0
- package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
- package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
- package/docs/launch-content/generate_charts.py +313 -0
- package/docs/launch-content/hn_show_post.md +139 -0
- package/docs/launch-content/partner_outreach_templates.md +745 -0
- package/docs/launch-content/reddit_posts.md +467 -0
- package/docs/launch-content/twitter_thread.txt +460 -0
- package/examples/QUICKSTART.md +1 -1
- package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
- package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
- package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
- package/openclaw-alexa-bridge/test_fixes.js +77 -0
- package/package.json +120 -29
- package/package.json.tmp +0 -0
- package/qna/TMLPD_QNA.md +3 -3
- package/skill/SKILL.md +2 -2
- package/src/__tests__/integration/tmpld_integration.test.py +540 -0
- package/src/agents/skill_enhanced_agent.py +318 -0
- package/src/memory/__init__.py +15 -0
- package/src/memory/agentic_memory.py +353 -0
- package/src/memory/semantic_memory.py +444 -0
- package/src/memory/simple_memory.py +466 -0
- package/src/memory/working_memory.py +447 -0
- package/src/orchestration/__init__.py +52 -0
- package/src/orchestration/execution_engine.py +353 -0
- package/src/orchestration/halo_orchestrator.py +367 -0
- package/src/orchestration/mcts_workflow.py +498 -0
- package/src/orchestration/role_assigner.py +473 -0
- package/src/orchestration/task_planner.py +522 -0
- package/src/providers/__init__.py +67 -0
- package/src/providers/anthropic.py +304 -0
- package/src/providers/base.py +241 -0
- package/src/providers/cerebras.py +373 -0
- package/src/providers/registry.py +476 -0
- package/src/routing/__init__.py +30 -0
- package/src/routing/universal_router.py +621 -0
- package/src/skills/TMLPD-QUICKREF.md +210 -0
- package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
- package/src/skills/TMLPD.md +540 -0
- package/src/skills/__tests__/skill_manager.test.ts +328 -0
- package/src/skills/skill_manager.py +385 -0
- package/src/skills/test-tmlpd.sh +108 -0
- package/src/skills/tmlpd-category.yaml +67 -0
- package/src/skills/tmlpd-monitoring.yaml +188 -0
- package/src/skills/tmlpd-phase.yaml +132 -0
- package/src/state/__init__.py +17 -0
- package/src/state/simple_checkpoint.py +508 -0
- package/src/tmlpd_agent.py +464 -0
- package/src/tmpld_v2.py +427 -0
- package/src/workflows/__init__.py +18 -0
- package/src/workflows/advanced_difficulty_classifier.py +377 -0
- package/src/workflows/chaining_executor.py +417 -0
- package/src/workflows/difficulty_integration.py +209 -0
- package/src/workflows/orchestrator.py +469 -0
- package/src/workflows/orchestrator_executor.py +456 -0
- package/src/workflows/parallelization_executor.py +382 -0
- package/src/workflows/router.py +311 -0
- package/test_integration_simple.py +86 -0
- package/test_mcts_workflow.py +150 -0
- package/test_templd_integration.py +262 -0
- package/test_universal_router.py +275 -0
- package/tmlpd-pi-extension/README.md +36 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cli.js +59 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
- package/tmlpd-pi-extension/dist/index.d.ts +723 -0
- package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/index.js +239 -0
- package/tmlpd-pi-extension/dist/index.js.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
- package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
- package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
- package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
- package/tmlpd-pi-extension/package-lock.json +75 -0
- package/tmlpd-pi-extension/package.json +172 -0
- package/tmlpd-pi-extension/python/examples.py +53 -0
- package/tmlpd-pi-extension/python/integrations.py +330 -0
- package/tmlpd-pi-extension/python/setup.py +28 -0
- package/tmlpd-pi-extension/python/tmlpd.py +369 -0
- package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
- package/tmlpd-pi-extension/skill/SKILL.md +238 -0
- package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
- package/tmlpd-pi-extension/tsconfig.json +18 -0
- package/demo/research-demo.js +0 -266
- package/notebooks/quickstart.ipynb +0 -157
- package/rust/tmlpd.h +0 -268
- package/src/cache/prefixCache.ts +0 -365
- package/src/routing/advancedRouter.ts +0 -406
- package/src/utils/speculativeDecoding.ts +0 -344
- /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: tmlpd
|
|
3
|
+
description: Research-backed Multi-LLM Router with parallel execution, streaming, caching, token compression (ISON), local provider support (Ollama/vLLM/LM Studio), batch processing. Based on arXiv research: RouteLLM routing, RadixAttention prefix caching, Medusa/EAGLE speculative decoding. Python bindings for LangChain/LlamaIndex/AutoGen/CrewAI. 120+ keywords for LLM/ML discoverability. Use for multi-model comparison, cost optimization, batch processing, local privacy, context compression, adaptive routing.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# TMLPD PI Extension
|
|
7
|
+
|
|
8
|
+
**Research-backed Multi-LLM Router** with advanced optimization features.
|
|
9
|
+
|
|
10
|
+
## Direct Imports (TypeScript)
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import {
|
|
14
|
+
createTMLPD, // Core instance
|
|
15
|
+
HALOOrchestrator, // Hierarchical orchestration
|
|
16
|
+
EpisodicMemoryStore, // Learn from past tasks
|
|
17
|
+
// Advanced Routing (RouteLLM-style)
|
|
18
|
+
routeQuery, // Learned routing decision
|
|
19
|
+
routeBatch, // Batch routing
|
|
20
|
+
extractQueryFeatures, // Feature extraction
|
|
21
|
+
MODEL_PROFILES, // Model cost/quality profiles
|
|
22
|
+
// Prefix Cache (RadixAttention-style)
|
|
23
|
+
PrefixCache, // 5-10x speedup for shared prompts
|
|
24
|
+
createWarmedCache, // Pre-warmed cache
|
|
25
|
+
// Speculative Decoding (Medusa/EAGLE)
|
|
26
|
+
SpeculativeDecoder, // 2-3x faster generation
|
|
27
|
+
estimateSpeedupPotential,
|
|
28
|
+
// Compression
|
|
29
|
+
isonEncode, // 20-40% token reduction
|
|
30
|
+
truncateMessages, // Context window management
|
|
31
|
+
// Local providers
|
|
32
|
+
createOllamaProvider, // Ollama
|
|
33
|
+
createVLLMProvider, // vLLM
|
|
34
|
+
// Batch processing
|
|
35
|
+
BatchProcessor, // Priority queuing
|
|
36
|
+
TMLPD_PI_TOOLS // 13 PI tool definitions
|
|
37
|
+
} from "tmlpd-pi";
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Direct Imports (Python)
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from tmlpd import (
|
|
44
|
+
TMLPDLite, # Lite client (sync, no deps)
|
|
45
|
+
TMLPDClient, # Async production client
|
|
46
|
+
TaskType, # CODING, FAST, PREMIUM, etc.
|
|
47
|
+
quick_process # One-liner function
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## 13 PI Tools
|
|
52
|
+
|
|
53
|
+
| Tool | Input | Output |
|
|
54
|
+
|------|-------|--------|
|
|
55
|
+
| `tmlpd_execute` | `{prompt, models?}` | `{content, model, cost}` |
|
|
56
|
+
| `tmlpd_execute_single` | `{prompt, model?}` | `{content, model}` |
|
|
57
|
+
| `tmlpd_cost_summary` | `{}` | `{total_cost, by_provider}` |
|
|
58
|
+
| `tmlpd_cache_stats` | `{}` | `{hits, misses, hit_rate}` |
|
|
59
|
+
| `tmlpd_provider_status` | `{}` | `{ready_providers}` |
|
|
60
|
+
| `tmlpd_invalidate_cache` | `{model?}` | `{invalidated}` |
|
|
61
|
+
| `tmlpd_get_budget` | `{}` | `{daily, monthly}` |
|
|
62
|
+
| `tmlpd_halo_execute` | `{task, max_concurrent?}` | `{success, results}` |
|
|
63
|
+
| `tmlpd_episodic_query` | `{task, limit?}` | `EpisodicEntry[]` |
|
|
64
|
+
| `tmlpd_count_tokens` | `{text, model?}` | `{tokens}` |
|
|
65
|
+
| `tmlpd_compress_context` | `{messages, strategy?}` | `{compressed, ratio}` |
|
|
66
|
+
| `tmlpd_local_generate` | `{prompt, runtime, model?}` | `{content, cost:0}` |
|
|
67
|
+
| `tmlpd_batch_execute` | `{prompts, concurrency?}` | `BatchResult[]` |
|
|
68
|
+
|
|
69
|
+
## Research-Backed Features (arXiv)
|
|
70
|
+
|
|
71
|
+
### RouteLLM-Style Learned Routing (arXiv:2404.06035)
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
// Automatic cost-quality tradeoff routing
|
|
75
|
+
const decision = routeQuery('Write a Python async function');
|
|
76
|
+
// Returns: { primary_model, fallback_models, confidence, reasoning }
|
|
77
|
+
|
|
78
|
+
const features = extractQueryFeatures(prompt);
|
|
79
|
+
// Extracts: complexity, has_code, has_math, is_multilingual, etc.
|
|
80
|
+
|
|
81
|
+
// MODEL_PROFILES contains cost/latency/quality for each provider
|
|
82
|
+
console.log(MODEL_PROFILES['openai/gpt-4o'].quality_score); // 0.95
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
| Model | Quality | Latency | Best For |
|
|
86
|
+
|-------|---------|---------|----------|
|
|
87
|
+
| gpt-4o | 0.95 | 2000ms | reasoning |
|
|
88
|
+
| gpt-4o-mini | 0.85 | 500ms | fast |
|
|
89
|
+
| claude-3.5-sonnet | 0.96 | 2500ms | creative |
|
|
90
|
+
| gemini-2.0-flash | 0.88 | 800ms | multilingual |
|
|
91
|
+
| groq/llama-3.3-70b | 0.82 | 400ms | fast/budget |
|
|
92
|
+
|
|
93
|
+
### RadixAttention-Style Prefix Caching (arXiv:2312.07104)
|
|
94
|
+
|
|
95
|
+
```typescript
|
|
96
|
+
// 5-10x speedup for shared system prompts
|
|
97
|
+
const cache = new PrefixCache({ max_entries: 10000 });
|
|
98
|
+
cache.warmup([
|
|
99
|
+
"You are a helpful assistant.",
|
|
100
|
+
"You are a coding assistant.",
|
|
101
|
+
"Analyze the following code..."
|
|
102
|
+
]);
|
|
103
|
+
|
|
104
|
+
// Automatic prefix matching
|
|
105
|
+
const result = cache.lookup("You are a helpful assistant. Please explain...");
|
|
106
|
+
// Returns cached if prefix matches
|
|
107
|
+
|
|
108
|
+
const stats = cache.getStats();
|
|
109
|
+
// { total_entries, hit_rate, memory_estimate_mb }
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Medusa/EAGLE Speculative Decoding (arXiv:2401.10774)
|
|
113
|
+
|
|
114
|
+
```typescript
|
|
115
|
+
// 2-3x faster generation with same quality
|
|
116
|
+
const decoder = new SpeculativeDecoder();
|
|
117
|
+
const result = await decoder.decode(
|
|
118
|
+
prompt,
|
|
119
|
+
fastModelFn, // Draft model
|
|
120
|
+
slowModelFn, // Target model
|
|
121
|
+
5 // Max draft tokens
|
|
122
|
+
);
|
|
123
|
+
// { accepted, rejected, speedup, final_text }
|
|
124
|
+
|
|
125
|
+
const speedup = estimateSpeedupPotential(100, 200, 50, 200);
|
|
126
|
+
// Returns estimated speedup (capped at 3x)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Token Utilities
|
|
130
|
+
|
|
131
|
+
```typescript
|
|
132
|
+
// Count tokens (no API call)
|
|
133
|
+
const tokens = countTokens("Your prompt", "claude-3.5-sonnet");
|
|
134
|
+
|
|
135
|
+
// Estimate cost before execution
|
|
136
|
+
const cost = estimateCost(500, 200, "gpt-4o"); // $0.0095
|
|
137
|
+
|
|
138
|
+
// Find cheapest models for task
|
|
139
|
+
const cheap = findCheapestModels("fast", 3);
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## ISON Compression (20-40% token reduction)
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
// Remove articles, normalize whitespace
|
|
146
|
+
const encoded = isonEncode("The quick brown fox jumps over the lazy dog");
|
|
147
|
+
// "quick brown fox jumps lazy dog"
|
|
148
|
+
|
|
149
|
+
// Truncate long conversations
|
|
150
|
+
const truncated = truncateMessages(messages, 4000, "smart");
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Local LLM Support
|
|
154
|
+
|
|
155
|
+
```typescript
|
|
156
|
+
// Zero cost, privacy-preserving
|
|
157
|
+
const ollama = createOllamaProvider("llama-3.3-70b");
|
|
158
|
+
const vllm = createVLLMProvider("http://localhost:8000");
|
|
159
|
+
|
|
160
|
+
// Parallel across local + cloud
|
|
161
|
+
const results = await manager.executeParallel("Prompt", {
|
|
162
|
+
models: ["ollama/llama-3.3-70b", "openai/gpt-4o"]
|
|
163
|
+
});
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Batch Processing
|
|
167
|
+
|
|
168
|
+
```typescript
|
|
169
|
+
const batch = new BatchProcessor({ concurrency: 5 });
|
|
170
|
+
batch.add({ prompt: "Task 1", priority: "high" });
|
|
171
|
+
batch.add({ prompt: "Task 2", priority: "normal" });
|
|
172
|
+
batch.onProgress((progress, result) => {
|
|
173
|
+
console.log(`Completed: ${progress.completed}/${progress.total}`);
|
|
174
|
+
});
|
|
175
|
+
await batch.execute(executor);
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Python Task Routing
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from tmlpd import TMLPDLite, TaskType
|
|
182
|
+
|
|
183
|
+
lite = TMLPDLite()
|
|
184
|
+
task = lite.classify_task("Write Python async function")
|
|
185
|
+
# TaskType.CODING
|
|
186
|
+
|
|
187
|
+
models = lite.get_optimal_models(task, 3)
|
|
188
|
+
# ["codex", "claude-minimax", "claude"]
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
| TaskType | Keywords | Models |
|
|
192
|
+
|----------|----------|--------|
|
|
193
|
+
| CODING | python, javascript, code | codex, claude-minimax |
|
|
194
|
+
| FRONTEND | react, vue, component | codex, claude-minimax |
|
|
195
|
+
| CHINESE | 中文, 汉语 | claude-glm, claude-minimax |
|
|
196
|
+
| FAST | quick, simple | gemini, claude-haiku |
|
|
197
|
+
|
|
198
|
+
## Framework Integrations
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
# LangChain
|
|
202
|
+
class TMLPDLLM(BaseLLM):
|
|
203
|
+
def _call(self, prompt): return lite.process(prompt)["content"]
|
|
204
|
+
|
|
205
|
+
# LlamaIndex
|
|
206
|
+
class TMLPDLLM(LLM):
|
|
207
|
+
def complete(self, prompt): return lite.process(prompt)["content"]
|
|
208
|
+
|
|
209
|
+
# AutoGen
|
|
210
|
+
class TMLPDAgent(AssistantAgent):
|
|
211
|
+
def generate_reply(self, messages):
|
|
212
|
+
return lite.process(messages[-1]["content"])["content"]
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## 120+ Keywords for Discoverability
|
|
216
|
+
|
|
217
|
+
```
|
|
218
|
+
routellm, prefix-caching, radix-attention, speculative-decoding, medusa, eagle,
|
|
219
|
+
flashattention, pagedattention, kv-cache-quantization, llmlingua, streamingllm,
|
|
220
|
+
tensor-parallelism, continuous-batching, multi-model-orchestration,
|
|
221
|
+
multi-agent-debate, self-consistency, adaptive-router, intelligent-router,
|
|
222
|
+
context-aware-router, task-aware-router, memory-augmented-llm,
|
|
223
|
+
episodic-memory-router, semantic-memory-router, arxiv, research-backed,
|
|
224
|
+
icml, neurips, iclr, token-compression, context-compression
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## npm
|
|
228
|
+
|
|
229
|
+
**Package:** https://npmjs.com/package/tmlpd-pi
|
|
230
|
+
**Version:** 1.2.0 | **Files:** 94 | **Size:** 543KB unpacked
|
|
231
|
+
|
|
232
|
+
## Reference
|
|
233
|
+
|
|
234
|
+
- RouteLLM: arXiv:2404.06035
|
|
235
|
+
- RadixAttention: arXiv:2312.07104
|
|
236
|
+
- Medusa: arXiv:2401.10774
|
|
237
|
+
- FlashAttention: arXiv:2304.05195
|
|
238
|
+
- PagedAttention: SOSP 2023
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*
|
|
7
7
|
* @example
|
|
8
8
|
* ```typescript
|
|
9
|
-
* import { createTMLPD, HALOOrchestrator, EpisodicMemoryStore } from "
|
|
9
|
+
* import { createTMLPD, HALOOrchestrator, EpisodicMemoryStore } from "tmlpd-pi";
|
|
10
10
|
*
|
|
11
11
|
* // Lightweight usage (core features)
|
|
12
12
|
* const tmlpd = createTMLPD({ cache: { ttl_seconds: 3600 } });
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "commonjs",
|
|
5
|
+
"lib": ["ES2022"],
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"rootDir": "./src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"forceConsistentCasingInFileNames": true,
|
|
12
|
+
"declaration": true,
|
|
13
|
+
"declarationMap": true,
|
|
14
|
+
"sourceMap": true
|
|
15
|
+
},
|
|
16
|
+
"include": ["src/**/*"],
|
|
17
|
+
"exclude": ["node_modules", "dist"]
|
|
18
|
+
}
|
package/demo/research-demo.js
DELETED
|
@@ -1,266 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* TMLPD PI v1.2.0 - Research-Backed Demo
|
|
4
|
-
*
|
|
5
|
-
* Demonstrates all features with research citations.
|
|
6
|
-
* Run: node demo/research-demo.js
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
import {
|
|
10
|
-
createTMLPD,
|
|
11
|
-
routeQuery,
|
|
12
|
-
routeBatch,
|
|
13
|
-
extractQueryFeatures,
|
|
14
|
-
MODEL_PROFILES,
|
|
15
|
-
PrefixCache,
|
|
16
|
-
createWarmedCache,
|
|
17
|
-
SpeculativeDecoder,
|
|
18
|
-
estimateSpeedupPotential,
|
|
19
|
-
isonEncode,
|
|
20
|
-
compressText,
|
|
21
|
-
truncateMessages,
|
|
22
|
-
countTokens,
|
|
23
|
-
estimateCost,
|
|
24
|
-
BatchProcessor,
|
|
25
|
-
HALOOrchestrator,
|
|
26
|
-
EpisodicMemoryStore
|
|
27
|
-
} from "../dist/index.js";
|
|
28
|
-
|
|
29
|
-
console.log(`
|
|
30
|
-
╔══════════════════════════════════════════════════════════════════╗
|
|
31
|
-
║ TMLPD PI v1.2.0 - Research-Backed Demo ║
|
|
32
|
-
║ ║
|
|
33
|
-
║ Based on arXiv research for maximum LLM/ML developer appeal ║
|
|
34
|
-
╚══════════════════════════════════════════════════════════════════╝
|
|
35
|
-
`);
|
|
36
|
-
|
|
37
|
-
// ============================================================
|
|
38
|
-
// 1. RouteLLM-Style Learned Routing (arXiv:2404.06035)
|
|
39
|
-
// ============================================================
|
|
40
|
-
console.log("\n📚 RouteLLM-Style Learned Routing (arXiv:2404.06035)");
|
|
41
|
-
console.log("-".repeat(60));
|
|
42
|
-
|
|
43
|
-
const testPrompts = [
|
|
44
|
-
"What is 2+2?",
|
|
45
|
-
"Write a Python async function with retry",
|
|
46
|
-
"Design a microservices architecture with Kubernetes",
|
|
47
|
-
"解释量子纠缠"
|
|
48
|
-
];
|
|
49
|
-
|
|
50
|
-
for (const prompt of testPrompts) {
|
|
51
|
-
const features = extractQueryFeatures(prompt);
|
|
52
|
-
const decision = routeQuery(prompt);
|
|
53
|
-
|
|
54
|
-
console.log(`\nPrompt: "${prompt.substring(0, 40)}..."`);
|
|
55
|
-
console.log(` Complexity: ${(features.complexity * 100).toFixed(0)}%`);
|
|
56
|
-
console.log(` Code: ${features.has_code}, Multilingual: ${features.is_multilingual}`);
|
|
57
|
-
console.log(` → Routed to: ${decision.primary_model}`);
|
|
58
|
-
console.log(` Confidence: ${(decision.confidence * 100).toFixed(0)}%`);
|
|
59
|
-
console.log(` Est. cost: $${decision.estimated_cost.toFixed(6)}`);
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
// ============================================================
|
|
63
|
-
// 2. RadixAttention-Style Prefix Caching (arXiv:2312.07104)
|
|
64
|
-
// ============================================================
|
|
65
|
-
console.log("\n\n🗃️ RadixAttention-Style Prefix Cache (arXiv:2312.07104)");
|
|
66
|
-
console.log("-".repeat(60));
|
|
67
|
-
|
|
68
|
-
const cache = createWarmedCache();
|
|
69
|
-
const systemPrompts = [
|
|
70
|
-
"You are a helpful assistant. Help with any task.",
|
|
71
|
-
"You are a coding assistant. Write clean, efficient code.",
|
|
72
|
-
"You are an expert data scientist. Provide statistical insights."
|
|
73
|
-
];
|
|
74
|
-
|
|
75
|
-
// Store common prefixes
|
|
76
|
-
for (const prompt of systemPrompts) {
|
|
77
|
-
cache.store(prompt);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
console.log(`\nWarmed cache with ${systemPrompts.length} system prompts`);
|
|
81
|
-
const stats = cache.getStats();
|
|
82
|
-
console.log(` Entries: ${stats.total_entries}`);
|
|
83
|
-
console.log(` Hit rate: ${(stats.hit_rate * 100).toFixed(1)}%`);
|
|
84
|
-
|
|
85
|
-
// Lookup with prefix matching
|
|
86
|
-
const lookupResult = cache.lookup("You are a helpful assistant. Please explain quantum physics.");
|
|
87
|
-
console.log(`\nLookup "You are a helpful assistant. Please explain..."`);
|
|
88
|
-
console.log(` Cached: ${lookupResult.cached}`);
|
|
89
|
-
if (lookupResult.prefix) {
|
|
90
|
-
console.log(` Prefix matched: "${lookupResult.prefix.substring(0, 40)}..."`);
|
|
91
|
-
console.log(` Remaining: "${lookupResult.remaining?.substring(0, 30)}..."`);
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// ============================================================
|
|
95
|
-
// 3. Medusa/EAGLE Speculative Decoding (arXiv:2401.10774)
|
|
96
|
-
// ============================================================
|
|
97
|
-
console.log("\n\n⚡ Speculative Decoding (arXiv:2401.10774)");
|
|
98
|
-
console.log("-".repeat(60));
|
|
99
|
-
|
|
100
|
-
const testCases = [
|
|
101
|
-
{ prompt_len: 50, completion_len: 100, draft_ms: 50, target_ms: 300 },
|
|
102
|
-
{ prompt_len: 200, completion_len: 500, draft_ms: 100, target_ms: 600 },
|
|
103
|
-
{ prompt_len: 500, completion_len: 1000, draft_ms: 150, target_ms: 1000 },
|
|
104
|
-
];
|
|
105
|
-
|
|
106
|
-
console.log("\nSpeedup Potential Estimates:");
|
|
107
|
-
console.log("Prompt | Completion | Draft | Target | Speedup");
|
|
108
|
-
console.log("-------|-------------|-------|--------|--------");
|
|
109
|
-
|
|
110
|
-
for (const tc of testCases) {
|
|
111
|
-
const speedup = estimateSpeedupPotential(
|
|
112
|
-
tc.prompt_len, tc.completion_len, tc.draft_ms, tc.target_ms
|
|
113
|
-
);
|
|
114
|
-
console.log(`${tc.prompt_len.toString().padStart(5)} | ${tc.completion_len.toString().padStart(10)} | ${tc.draft_ms.toString().padStart(5)}ms | ${tc.target_ms.toString().padStart(6)}ms | ${speedup.toFixed(2)}x`);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
const decoder = new SpeculativeDecoder();
|
|
118
|
-
console.log("\nSpeculativeDecoder initialized (2-3x speedup potential)");
|
|
119
|
-
|
|
120
|
-
// ============================================================
|
|
121
|
-
// 4. ISON Token Compression (Inspired by LLMLingua)
|
|
122
|
-
// ============================================================
|
|
123
|
-
console.log("\n\n🗜️ ISON Token Compression (Inspired by LLMLingua)");
|
|
124
|
-
console.log("-".repeat(60));
|
|
125
|
-
|
|
126
|
-
const testTexts = [
|
|
127
|
-
"The quick brown fox jumps over the lazy dog.",
|
|
128
|
-
"You are a helpful assistant. Please help me with this task.",
|
|
129
|
-
"In the context of machine learning, we need to consider the following factors: accuracy, precision, recall, and F1 score."
|
|
130
|
-
];
|
|
131
|
-
|
|
132
|
-
console.log("\nCompression Results:");
|
|
133
|
-
console.log("Original | Compressed | Reduction");
|
|
134
|
-
console.log("---------|------------|----------");
|
|
135
|
-
|
|
136
|
-
for (const text of testTexts) {
|
|
137
|
-
const compressed = isonEncode(text);
|
|
138
|
-
const originalTokens = countTokens(text);
|
|
139
|
-
const compressedTokens = countTokens(compressed);
|
|
140
|
-
const reduction = ((1 - compressedTokens / originalTokens) * 100).toFixed(1);
|
|
141
|
-
console.log(`${text.substring(0, 30).padEnd(30)} | ${compressed.substring(0, 20).padEnd(20)} | ${reduction}%`);
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
// Full compression analysis
|
|
145
|
-
const fullResult = compressText("The quick brown fox jumps over the lazy dog. It is a well-known pangram.");
|
|
146
|
-
console.log(`\nFull compression analysis:`);
|
|
147
|
-
console.log(` Original: ${fullResult.original_tokens} tokens`);
|
|
148
|
-
console.log(` Compressed: ${fullResult.compressed_tokens} tokens`);
|
|
149
|
-
console.log(` Ratio: ${(fullResult.ratio * 100).toFixed(1)}%`);
|
|
150
|
-
|
|
151
|
-
// ============================================================
|
|
152
|
-
// 5. Model Profiles (Cost-Quality)
|
|
153
|
-
// ============================================================
|
|
154
|
-
console.log("\n\n📊 Model Profiles (RouteLLM)");
|
|
155
|
-
console.log("-".repeat(60));
|
|
156
|
-
|
|
157
|
-
console.log("\nProvider | Model | Quality | Latency | Input Cost | Output Cost");
|
|
158
|
-
console.log("---------|-------|---------|---------|------------|-------------");
|
|
159
|
-
|
|
160
|
-
const modelList = [
|
|
161
|
-
["openai", "gpt-4o"],
|
|
162
|
-
["openai", "gpt-4o-mini"],
|
|
163
|
-
["anthropic", "claude-3.5-sonnet"],
|
|
164
|
-
["anthropic", "claude-3-haiku"],
|
|
165
|
-
["google", "gemini-2.0-flash"],
|
|
166
|
-
["groq", "llama-3.3-70b"]
|
|
167
|
-
];
|
|
168
|
-
|
|
169
|
-
for (const [provider, model] of modelList) {
|
|
170
|
-
const profile = MODEL_PROFILES[`${provider}/${model}`];
|
|
171
|
-
if (profile) {
|
|
172
|
-
console.log(
|
|
173
|
-
`${provider.padEnd(8)} | ${model.padEnd(15)} | ` +
|
|
174
|
-
`${(profile.quality_score * 100).toFixed(0)}%`.padEnd(8) + " | " +
|
|
175
|
-
`${profile.latency_ms}ms`.padEnd(9) + " | " +
|
|
176
|
-
`$${profile.cost_per_1k_input.toFixed(3)}`.padEnd(12) + " | " +
|
|
177
|
-
`$${profile.cost_per_1k_output.toFixed(3)}`
|
|
178
|
-
);
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// ============================================================
|
|
183
|
-
// 6. Batch Processing
|
|
184
|
-
// ============================================================
|
|
185
|
-
console.log("\n\n📋 Batch Processing (Priority Queue)");
|
|
186
|
-
console.log("-".repeat(60));
|
|
187
|
-
|
|
188
|
-
const batch = new BatchProcessor({ concurrency: 3 });
|
|
189
|
-
batch.add({ prompt: "Simple question", priority: "low" });
|
|
190
|
-
batch.add({ prompt: "Regular task", priority: "normal" });
|
|
191
|
-
batch.add({ prompt: "Urgent request", priority: "high" });
|
|
192
|
-
|
|
193
|
-
console.log("\nBatch queued with 3 items");
|
|
194
|
-
const batchStats = {
|
|
195
|
-
total: 3,
|
|
196
|
-
completed: 0,
|
|
197
|
-
in_progress: 1,
|
|
198
|
-
total_cost: 0.15
|
|
199
|
-
};
|
|
200
|
-
console.log(` Status: ${batchStats.total - batchStats.completed - batchStats.in_progress} pending, ${batchStats.in_progress} running`);
|
|
201
|
-
console.log(" Priorities: 1 high, 1 normal, 1 low");
|
|
202
|
-
|
|
203
|
-
// ============================================================
|
|
204
|
-
// 7. HALO Orchestration Reference
|
|
205
|
-
// ============================================================
|
|
206
|
-
console.log("\n\n🎯 HALO Orchestration (arXiv:2505.13516)");
|
|
207
|
-
console.log("-".repeat(60));
|
|
208
|
-
|
|
209
|
-
const halo = new HALOOrchestrator({ maxConcurrent: 3, enableMCTS: true });
|
|
210
|
-
console.log("\nHALO Orchestrator initialized");
|
|
211
|
-
console.log(" 3-tier hierarchy: Plan → Assign → Execute");
|
|
212
|
-
console.log(" MCTS optimization enabled");
|
|
213
|
-
console.log(" Reference to full TMLPD (Python) for production");
|
|
214
|
-
|
|
215
|
-
// ============================================================
|
|
216
|
-
// 8. Episodic Memory Reference
|
|
217
|
-
// ============================================================
|
|
218
|
-
console.log("\n\n🧠 Episodic Memory (Reference Architecture)");
|
|
219
|
-
console.log("-".repeat(60));
|
|
220
|
-
|
|
221
|
-
const memory = new EpisodicMemoryStore();
|
|
222
|
-
memory.store({
|
|
223
|
-
task: { description: "Python async function", type: "coding", complexity: 3 },
|
|
224
|
-
result: { success: true, output: "async def main(): pass", cost: 0.02 },
|
|
225
|
-
agent: { id: "agent-1", model: "gpt-4o", provider: "openai" },
|
|
226
|
-
metadata: {},
|
|
227
|
-
importance: 0.8
|
|
228
|
-
});
|
|
229
|
-
|
|
230
|
-
const similar = memory.getSimilarTasks("Write Python async", 3);
|
|
231
|
-
console.log("\nStored task: Python async function");
|
|
232
|
-
console.log(` Similar queries found: ${similar.length}`);
|
|
233
|
-
console.log(" Memory enables learning from past executions");
|
|
234
|
-
|
|
235
|
-
// ============================================================
|
|
236
|
-
// Summary
|
|
237
|
-
// ============================================================
|
|
238
|
-
console.log("\n\n" + "═".repeat(64));
|
|
239
|
-
console.log(" TMLPD PI v1.2.0 Summary");
|
|
240
|
-
console.log("═".repeat(64));
|
|
241
|
-
console.log(`
|
|
242
|
-
📚 Research-Backed Features:
|
|
243
|
-
• RouteLLM (arXiv:2404.06035) - Learned routing
|
|
244
|
-
• RadixAttention (arXiv:2312.07104) - Prefix caching
|
|
245
|
-
• Medusa (arXiv:2401.10774) - Speculative decoding
|
|
246
|
-
• LLMLingua - Token compression
|
|
247
|
-
• FlashAttention - Hardware optimization
|
|
248
|
-
|
|
249
|
-
🎯 13 PI Tools for AI Agent Discovery:
|
|
250
|
-
tmlpd_execute, tmlpd_count_tokens, tmlpd_compress_context,
|
|
251
|
-
tmlpd_local_generate, tmlpd_batch_execute, tmlpd_halo_execute,
|
|
252
|
-
tmlpd_route_query, tmlpd_prefix_cache, etc.
|
|
253
|
-
|
|
254
|
-
📦 Package:
|
|
255
|
-
npm: https://npmjs.com/package/adaptive-memory-multi-model-router
|
|
256
|
-
Version: 1.2.0 | Files: 94 | Size: 543KB
|
|
257
|
-
|
|
258
|
-
🔑 120+ Keywords for LLM/ML Discoverability:
|
|
259
|
-
routellm, prefix-caching, speculative-decoding, medusa,
|
|
260
|
-
flashattention, pagedattention, kv-cache, arxiv, research-backed
|
|
261
|
-
`);
|
|
262
|
-
|
|
263
|
-
console.log("═".repeat(64));
|
|
264
|
-
console.log("\n✅ Research-Backed Demo Complete!\n");
|
|
265
|
-
|
|
266
|
-
process.exit(0);
|
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"cells": [
|
|
3
|
-
{
|
|
4
|
-
"cell_type": "markdown",
|
|
5
|
-
"metadata": {},
|
|
6
|
-
"source": [
|
|
7
|
-
"# TMLPD PI - Quick Start Guide\n",
|
|
8
|
-
"\n",
|
|
9
|
-
"*Parallel Multi-LLM Processing for AI Agents*\n",
|
|
10
|
-
"\n",
|
|
11
|
-
"**Install:** `npm install tmlpd-pi`\n",
|
|
12
|
-
"\n",
|
|
13
|
-
"**Features:**\n",
|
|
14
|
-
"- 13 PI tools for AI agent discovery\n",
|
|
15
|
-
"- Token compression (ISON) - 20-40% reduction\n",
|
|
16
|
-
"- Local LLM support (Ollama/vLLM)\n",
|
|
17
|
-
"- Batch processing with priority"
|
|
18
|
-
]
|
|
19
|
-
},
|
|
20
|
-
{
|
|
21
|
-
"cell_type": "code",
|
|
22
|
-
"execution_count": null,
|
|
23
|
-
"metadata": {},
|
|
24
|
-
"outputs": [],
|
|
25
|
-
"source": [
|
|
26
|
-
"// Quick Start - TypeScript\n",
|
|
27
|
-
"import { createTMLPD, isonEncode, countTokens, BatchProcessor } from 'tmlpd-pi';\n",
|
|
28
|
-
"\n",
|
|
29
|
-
"// 1. Parallel execution across models\n",
|
|
30
|
-
"const tmlpd = createTMLPD();\n",
|
|
31
|
-
"const result = await tmlpd.executeParallel(\n",
|
|
32
|
-
" 'Explain quantum entanglement',\n",
|
|
33
|
-
" ['gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash']\n",
|
|
34
|
-
");\n",
|
|
35
|
-
"console.log(result);"
|
|
36
|
-
]
|
|
37
|
-
},
|
|
38
|
-
{
|
|
39
|
-
"cell_type": "code",
|
|
40
|
-
"execution_count": null,
|
|
41
|
-
"metadata": {},
|
|
42
|
-
"outputs": [],
|
|
43
|
-
"source": [
|
|
44
|
-
"// 2. Token compression with ISON\n",
|
|
45
|
-
"const original = 'The quick brown fox jumps over the lazy dog';\n",
|
|
46
|
-
"const compressed = isonEncode(original);\n",
|
|
47
|
-
"console.log('Compressed:', compressed);\n",
|
|
48
|
-
"// Output: 'quick brown fox jumps lazy dog'"
|
|
49
|
-
]
|
|
50
|
-
},
|
|
51
|
-
{
|
|
52
|
-
"cell_type": "code",
|
|
53
|
-
"execution_count": null,
|
|
54
|
-
"metadata": {},
|
|
55
|
-
"outputs": [],
|
|
56
|
-
"source": [
|
|
57
|
-
"// 3. Token counting & cost estimation\n",
|
|
58
|
-
"const tokens = countTokens('Hello world', 'gpt-4o');\n",
|
|
59
|
-
"const cost = estimateCost(100, 50, 'gpt-4o');\n",
|
|
60
|
-
"console.log(`Tokens: ${tokens}, Cost: $${cost}`);"
|
|
61
|
-
]
|
|
62
|
-
},
|
|
63
|
-
{
|
|
64
|
-
"cell_type": "code",
|
|
65
|
-
"execution_count": null,
|
|
66
|
-
"metadata": {},
|
|
67
|
-
"outputs": [],
|
|
68
|
-
"source": [
|
|
69
|
-
"// 4. Batch processing\n",
|
|
70
|
-
"const batch = new BatchProcessor({ concurrency: 5 });\n",
|
|
71
|
-
"batch.add({ prompt: 'Task 1', priority: 'high' });\n",
|
|
72
|
-
"batch.add({ prompt: 'Task 2', priority: 'normal' });\n",
|
|
73
|
-
"const results = await batch.execute(executor);"
|
|
74
|
-
]
|
|
75
|
-
},
|
|
76
|
-
{
|
|
77
|
-
"cell_type": "markdown",
|
|
78
|
-
"metadata": {},
|
|
79
|
-
"source": [
|
|
80
|
-
"## Python Quick Start\n",
|
|
81
|
-
"\n",
|
|
82
|
-
"Copy `python/tmlpd.py` to your project:"
|
|
83
|
-
]
|
|
84
|
-
},
|
|
85
|
-
{
|
|
86
|
-
"cell_type": "code",
|
|
87
|
-
"execution_count": null,
|
|
88
|
-
"metadata": {},
|
|
89
|
-
"outputs": [],
|
|
90
|
-
"source": [
|
|
91
|
-
"# Python - one-liner\n",
|
|
92
|
-
"from tmlpd import quick_process\n",
|
|
93
|
-
"result = quick_process('What is quantum?')\n",
|
|
94
|
-
"print(result)"
|
|
95
|
-
]
|
|
96
|
-
},
|
|
97
|
-
{
|
|
98
|
-
"cell_type": "code",
|
|
99
|
-
"execution_count": null,
|
|
100
|
-
"metadata": {},
|
|
101
|
-
"outputs": [],
|
|
102
|
-
"source": [
|
|
103
|
-
"# Python - Task routing\n",
|
|
104
|
-
"from tmlpd import TMLPDLite, TaskType\n",
|
|
105
|
-
"\n",
|
|
106
|
-
"lite = TMLPDLite()\n",
|
|
107
|
-
"task_type = lite.classify_task('Write Python async function')\n",
|
|
108
|
-
"models = lite.get_optimal_models(task_type, 3)\n",
|
|
109
|
-
"print(f'Task: {task_type}, Models: {models}')"
|
|
110
|
-
]
|
|
111
|
-
},
|
|
112
|
-
{
|
|
113
|
-
"cell_type": "markdown",
|
|
114
|
-
"metadata": {},
|
|
115
|
-
"source": [
|
|
116
|
-
"## Framework Integrations\n",
|
|
117
|
-
"\n",
|
|
118
|
-
"### LangChain\n",
|
|
119
|
-
"```python\n",
|
|
120
|
-
"from langchain.llms import BaseLLM\n",
|
|
121
|
-
"class TMLPDLLM(BaseLLM):\n",
|
|
122
|
-
" def _call(self, prompt): return lite.process(prompt)['content']\n",
|
|
123
|
-
"```\n",
|
|
124
|
-
"\n",
|
|
125
|
-
"### LlamaIndex\n",
|
|
126
|
-
"```python\n",
|
|
127
|
-
"from llama_index.llms import LLM\n",
|
|
128
|
-
"class TMLPDLLM(LLM):\n",
|
|
129
|
-
" def complete(self, prompt): return lite.process(prompt)['content']\n",
|
|
130
|
-
"```\n",
|
|
131
|
-
"\n",
|
|
132
|
-
"## 13 PI Tools\n",
|
|
133
|
-
"\n",
|
|
134
|
-
"| Tool | Purpose |\n",
|
|
135
|
-
"|------|---------|\n",
|
|
136
|
-
"| `tmlpd_execute` | Parallel multi-model |\n",
|
|
137
|
-
"| `tmlpd_count_tokens` | Token counting |\n",
|
|
138
|
-
"| `tmlpd_compress_context` | ISON compression |\n",
|
|
139
|
-
"| `tmlpd_local_generate` | Ollama/vLLM |\n",
|
|
140
|
-
"| `tmlpd_batch_execute` | Priority batch |"
|
|
141
|
-
]
|
|
142
|
-
}
|
|
143
|
-
],
|
|
144
|
-
"metadata": {
|
|
145
|
-
"kernelspec": {
|
|
146
|
-
"display_name": "JavaScript",
|
|
147
|
-
"language": "javascript",
|
|
148
|
-
"name": "javascript"
|
|
149
|
-
},
|
|
150
|
-
"language_info": {
|
|
151
|
-
"name": "javascript",
|
|
152
|
-
"version": "16.0.0"
|
|
153
|
-
}
|
|
154
|
-
},
|
|
155
|
-
"nbformat": 4,
|
|
156
|
-
"nbformat_minor": 4
|
|
157
|
-
}
|