adaptive-memory-multi-model-router 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +114 -0
  2. package/demo/research-demo.js +266 -0
  3. package/dist/cache/prefixCache.d.ts +114 -0
  4. package/dist/cache/prefixCache.d.ts.map +1 -0
  5. package/dist/cache/prefixCache.js +285 -0
  6. package/dist/cache/prefixCache.js.map +1 -0
  7. package/dist/cache/responseCache.d.ts +58 -0
  8. package/dist/cache/responseCache.d.ts.map +1 -0
  9. package/dist/cache/responseCache.js +153 -0
  10. package/dist/cache/responseCache.js.map +1 -0
  11. package/dist/cli.js +59 -0
  12. package/dist/cost/costTracker.d.ts +95 -0
  13. package/dist/cost/costTracker.d.ts.map +1 -0
  14. package/dist/cost/costTracker.js +240 -0
  15. package/dist/cost/costTracker.js.map +1 -0
  16. package/dist/index.d.ts +723 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +239 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/memory/episodicMemory.d.ts +82 -0
  21. package/dist/memory/episodicMemory.d.ts.map +1 -0
  22. package/dist/memory/episodicMemory.js +145 -0
  23. package/dist/memory/episodicMemory.js.map +1 -0
  24. package/dist/orchestration/haloOrchestrator.d.ts +102 -0
  25. package/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  26. package/dist/orchestration/haloOrchestrator.js +207 -0
  27. package/dist/orchestration/haloOrchestrator.js.map +1 -0
  28. package/dist/orchestration/mctsWorkflow.d.ts +85 -0
  29. package/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  30. package/dist/orchestration/mctsWorkflow.js +210 -0
  31. package/dist/orchestration/mctsWorkflow.js.map +1 -0
  32. package/dist/providers/localProvider.d.ts +102 -0
  33. package/dist/providers/localProvider.d.ts.map +1 -0
  34. package/dist/providers/localProvider.js +338 -0
  35. package/dist/providers/localProvider.js.map +1 -0
  36. package/dist/providers/registry.d.ts +55 -0
  37. package/dist/providers/registry.d.ts.map +1 -0
  38. package/dist/providers/registry.js +138 -0
  39. package/dist/providers/registry.js.map +1 -0
  40. package/dist/routing/advancedRouter.d.ts +68 -0
  41. package/dist/routing/advancedRouter.d.ts.map +1 -0
  42. package/dist/routing/advancedRouter.js +332 -0
  43. package/dist/routing/advancedRouter.js.map +1 -0
  44. package/dist/tools/tmlpdTools.d.ts +101 -0
  45. package/dist/tools/tmlpdTools.d.ts.map +1 -0
  46. package/dist/tools/tmlpdTools.js +368 -0
  47. package/dist/tools/tmlpdTools.js.map +1 -0
  48. package/dist/utils/batchProcessor.d.ts +96 -0
  49. package/dist/utils/batchProcessor.d.ts.map +1 -0
  50. package/dist/utils/batchProcessor.js +170 -0
  51. package/dist/utils/batchProcessor.js.map +1 -0
  52. package/dist/utils/compression.d.ts +61 -0
  53. package/dist/utils/compression.d.ts.map +1 -0
  54. package/dist/utils/compression.js +281 -0
  55. package/dist/utils/compression.js.map +1 -0
  56. package/dist/utils/reliability.d.ts +74 -0
  57. package/dist/utils/reliability.d.ts.map +1 -0
  58. package/dist/utils/reliability.js +177 -0
  59. package/dist/utils/reliability.js.map +1 -0
  60. package/dist/utils/speculativeDecoding.d.ts +117 -0
  61. package/dist/utils/speculativeDecoding.d.ts.map +1 -0
  62. package/dist/utils/speculativeDecoding.js +246 -0
  63. package/dist/utils/speculativeDecoding.js.map +1 -0
  64. package/dist/utils/tokenUtils.d.ts +50 -0
  65. package/dist/utils/tokenUtils.d.ts.map +1 -0
  66. package/dist/utils/tokenUtils.js +124 -0
  67. package/dist/utils/tokenUtils.js.map +1 -0
  68. package/examples/QUICKSTART.md +183 -0
  69. package/notebooks/quickstart.ipynb +157 -0
  70. package/package.json +83 -0
  71. package/python/examples.py +53 -0
  72. package/python/integrations.py +330 -0
  73. package/python/setup.py +28 -0
  74. package/python/tmlpd.py +369 -0
  75. package/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  76. package/qna/TMLPD_QNA.md +751 -0
  77. package/rust/tmlpd.h +268 -0
  78. package/skill/SKILL.md +238 -0
  79. package/src/cache/prefixCache.ts +365 -0
  80. package/src/cache/responseCache.ts +147 -0
  81. package/src/cost/costTracker.ts +302 -0
  82. package/src/index.ts +224 -0
  83. package/src/memory/episodicMemory.ts +185 -0
  84. package/src/orchestration/haloOrchestrator.ts +266 -0
  85. package/src/orchestration/mctsWorkflow.ts +262 -0
  86. package/src/providers/localProvider.ts +406 -0
  87. package/src/providers/registry.ts +164 -0
  88. package/src/routing/advancedRouter.ts +406 -0
  89. package/src/tools/tmlpdTools.ts +433 -0
  90. package/src/utils/batchProcessor.ts +232 -0
  91. package/src/utils/compression.ts +325 -0
  92. package/src/utils/reliability.ts +221 -0
  93. package/src/utils/speculativeDecoding.ts +344 -0
  94. package/src/utils/tokenUtils.ts +145 -0
  95. package/tsconfig.json +18 -0
@@ -0,0 +1,183 @@
1
+ # TMLPD PI Extension - Usage Examples
2
+
3
+ ## Quick Start
4
+
5
+ ```python
6
+ from tmlpd import quick_process
7
+
8
+ # One-liner
9
+ result = quick_process("What is quantum entanglement?")
10
+ print(result["content"])
11
+ ```
12
+
13
+ ## Task Classification
14
+
15
+ ```python
16
+ from tmlpd import TMLPDLite, TaskType
17
+
18
+ lite = TMLPDLite()
19
+
20
+ # Automatic task classification
21
+ prompt = "Write Python async HTTP client"
22
+ task_type = lite.classify_task(prompt) # TaskType.CODING
23
+
24
+ # Get optimal models
25
+ models = lite.get_optimal_models(task_type, 3)
26
+ # ["codex", "claude-minimax", "claude"]
27
+ ```
28
+
29
+ ## Caching
30
+
31
+ ```python
32
+ from tmlpd import TMLPDLite
33
+
34
+ lite = TMLPDLite()
35
+
36
+ # First call - not cached
37
+ result1 = lite.process("What is 2+2?", use_cache=True)
38
+ print(f"Cached: {result1['cached']}") # False
39
+
40
+ # Second call - from cache
41
+ result2 = lite.process("What is 2+2?", use_cache=True)
42
+ print(f"Cached: {result2['cached']}") # True
43
+ ```
44
+
45
+ ## Async Client (Production)
46
+
47
+ ```python
48
+ import asyncio
49
+ from tmlpd import TMLPDClient, TMLPDConfig
50
+
51
+ async def main():
52
+ config = TMLPDConfig(
53
+ cache_ttl_seconds=3600,
54
+ daily_budget=10.0,
55
+ max_concurrent=5
56
+ )
57
+ client = TMLPDClient(config)
58
+
59
+ # Single execution
60
+ result = await client.execute("Explain quantum entanglement")
61
+
62
+ # Parallel execution
63
+ parallel = await client.execute_parallel(
64
+ "Compare Python and JavaScript",
65
+ models=["gpt-4o", "claude", "gemini"]
66
+ )
67
+
68
+ # Cost summary
69
+ summary = await client.get_cost_summary()
70
+ print(f"Total spent: ${summary.total_cost}")
71
+
72
+ asyncio.run(main())
73
+ ```
74
+
75
+ ## Cost Optimization
76
+
77
+ ```python
78
+ import asyncio
79
+ from tmlpd import TMLPDClient
80
+
81
+ async def cost_optimization():
82
+ client = TMLPDClient()
83
+
84
+ # Cheap for simple tasks
85
+ simple = await client.execute("What is 2+2?", model="cerebras/llama-3.3-70b")
86
+ print(f"Simple task cost: ${simple.cost:.6f}")
87
+
88
+ # Premium for complex tasks
89
+ complex = await client.execute("Design microservices", model="anthropic/claude-3.5-sonnet")
90
+ print(f"Complex task cost: ${complex.cost:.6f}")
91
+
92
+ asyncio.run(cost_optimization())
93
+ ```
94
+
95
+ ## LangChain Integration
96
+
97
+ ```python
98
+ from langchain.llms import BaseLLM
99
+ from tmlpd import TMLPDLite
100
+
101
+ class TMLPDLLM(BaseLLM):
102
+ def __init__(self, task_type="default"):
103
+ self.lite = TMLPDLite()
104
+ self.task_type = task_type
105
+
106
+ def _call(self, prompt: str) -> str:
107
+ result = self.lite.process(prompt)
108
+ return result["content"]
109
+
110
+ # Usage
111
+ llm = TMLPDLLM(task_type="coding")
112
+ response = llm("Write a Python function")
113
+ ```
114
+
115
+ ## API Server
116
+
117
+ ```python
118
+ # Run: node node_modules/adaptive-memory-multi-model-router/dist/server.js --port 18791
119
+ # Then use Python client:
120
+
121
+ import httpx
122
+
123
+ async def api_example():
124
+ async with httpx.AsyncClient() as client:
125
+ response = await client.post(
126
+ "http://localhost:18791/execute",
127
+ json={"prompt": "Hello world", "models": ["gpt-4o"]}
128
+ )
129
+ print(response.json())
130
+ ```
131
+
132
+ ## Task Types
133
+
134
+ | Task Type | Keywords | Best Models |
135
+ |-----------|----------|-------------|
136
+ | coding | python, javascript, code | codex, claude-minimax |
137
+ | frontend | react, vue, component | codex, claude-minimax |
138
+ | backend | api, server, database | codex, claude-glm |
139
+ | chinese | 中文, 汉语 | claude-glm, claude-minimax |
140
+ | fast | quick, simple | gemini, claude-haiku |
141
+ | premium | advanced, complex | claude-opus, gemini-pro |
142
+
143
+ ## Environment Variables
144
+
145
+ ```bash
146
+ # API Keys
147
+ export OPENAI_API_KEY="sk-..."
148
+ export ANTHROPIC_API_KEY="sk-ant-..."
149
+ export GROQ_API_KEY="gsk_..."
150
+
151
+ # TMLPD Configuration
152
+ export TMLPD_MAX_CONCURRENT=5
153
+ export TMLPD_DAILY_BUDGET=10.0
154
+ export TMLPD_CACHE_TTL=3600
155
+ ```
156
+
157
+ ## Full Example
158
+
159
+ ```python
160
+ import asyncio
161
+ from tmlpd import TMLPDClient, TMLPDLite
162
+
163
+ async def full_example():
164
+ # Lite for quick tasks
165
+ lite = TMLPDLite()
166
+ result = lite.process("What is Python?", use_cache=True)
167
+ print(f"Lite: {result['task_type']}")
168
+
169
+ # Full client for production
170
+ client = TMLPDClient()
171
+
172
+ # Batch processing
173
+ prompts = ["What is AI?", "What is ML?", "What is DL?"]
174
+ for prompt in prompts:
175
+ result = await client.execute(prompt)
176
+ print(f"Cost: ${result.cost:.6f}")
177
+
178
+ # Final stats
179
+ summary = await client.get_cost_summary()
180
+ print(f"Total: ${summary.total_cost:.6f}")
181
+
182
+ asyncio.run(full_example())
183
+ ```
@@ -0,0 +1,157 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# TMLPD PI - Quick Start Guide\n",
8
+ "\n",
9
+ "*Parallel Multi-LLM Processing for AI Agents*\n",
10
+ "\n",
11
+ "**Install:** `npm install tmlpd-pi`\n",
12
+ "\n",
13
+ "**Features:**\n",
14
+ "- 13 PI tools for AI agent discovery\n",
15
+ "- Token compression (ISON) - 20-40% reduction\n",
16
+ "- Local LLM support (Ollama/vLLM)\n",
17
+ "- Batch processing with priority"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": null,
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "// Quick Start - TypeScript\n",
27
+ "import { createTMLPD, isonEncode, countTokens, BatchProcessor } from 'tmlpd-pi';\n",
28
+ "\n",
29
+ "// 1. Parallel execution across models\n",
30
+ "const tmlpd = createTMLPD();\n",
31
+ "const result = await tmlpd.executeParallel(\n",
32
+ " 'Explain quantum entanglement',\n",
33
+ " ['gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash']\n",
34
+ ");\n",
35
+ "console.log(result);"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "// 2. Token compression with ISON\n",
45
+ "const original = 'The quick brown fox jumps over the lazy dog';\n",
46
+ "const compressed = isonEncode(original);\n",
47
+ "console.log('Compressed:', compressed);\n",
48
+ "// Output: 'quick brown fox jumps lazy dog'"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": null,
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "// 3. Token counting & cost estimation\n",
58
+ "const tokens = countTokens('Hello world', 'gpt-4o');\n",
59
+ "const cost = estimateCost(100, 50, 'gpt-4o');\n",
60
+ "console.log(`Tokens: ${tokens}, Cost: $${cost}`);"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": null,
66
+ "metadata": {},
67
+ "outputs": [],
68
+ "source": [
69
+ "// 4. Batch processing\n",
70
+ "const batch = new BatchProcessor({ concurrency: 5 });\n",
71
+ "batch.add({ prompt: 'Task 1', priority: 'high' });\n",
72
+ "batch.add({ prompt: 'Task 2', priority: 'normal' });\n",
73
+ "const results = await batch.execute(executor);"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "markdown",
78
+ "metadata": {},
79
+ "source": [
80
+ "## Python Quick Start\n",
81
+ "\n",
82
+ "Copy `python/tmlpd.py` to your project:"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": null,
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "# Python - one-liner\n",
92
+ "from tmlpd import quick_process\n",
93
+ "result = quick_process('What is quantum?')\n",
94
+ "print(result)"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": null,
100
+ "metadata": {},
101
+ "outputs": [],
102
+ "source": [
103
+ "# Python - Task routing\n",
104
+ "from tmlpd import TMLPDLite, TaskType\n",
105
+ "\n",
106
+ "lite = TMLPDLite()\n",
107
+ "task_type = lite.classify_task('Write Python async function')\n",
108
+ "models = lite.get_optimal_models(task_type, 3)\n",
109
+ "print(f'Task: {task_type}, Models: {models}')"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "markdown",
114
+ "metadata": {},
115
+ "source": [
116
+ "## Framework Integrations\n",
117
+ "\n",
118
+ "### LangChain\n",
119
+ "```python\n",
120
+ "from langchain.llms import BaseLLM\n",
121
+ "class TMLPDLLM(BaseLLM):\n",
122
+ " def _call(self, prompt): return lite.process(prompt)['content']\n",
123
+ "```\n",
124
+ "\n",
125
+ "### LlamaIndex\n",
126
+ "```python\n",
127
+ "from llama_index.llms import LLM\n",
128
+ "class TMLPDLLM(LLM):\n",
129
+ " def complete(self, prompt): return lite.process(prompt)['content']\n",
130
+ "```\n",
131
+ "\n",
132
+ "## 13 PI Tools\n",
133
+ "\n",
134
+ "| Tool | Purpose |\n",
135
+ "|------|---------|\n",
136
+ "| `tmlpd_execute` | Parallel multi-model |\n",
137
+ "| `tmlpd_count_tokens` | Token counting |\n",
138
+ "| `tmlpd_compress_context` | ISON compression |\n",
139
+ "| `tmlpd_local_generate` | Ollama/vLLM |\n",
140
+ "| `tmlpd_batch_execute` | Priority batch |"
141
+ ]
142
+ }
143
+ ],
144
+ "metadata": {
145
+ "kernelspec": {
146
+ "display_name": "JavaScript",
147
+ "language": "javascript",
148
+ "name": "javascript"
149
+ },
150
+ "language_info": {
151
+ "name": "javascript",
152
+ "version": "16.0.0"
153
+ }
154
+ },
155
+ "nbformat": 4,
156
+ "nbformat_minor": 4
157
+ }
package/package.json ADDED
@@ -0,0 +1,83 @@
1
+ {
2
+ "name": "adaptive-memory-multi-model-router",
3
+ "version": "1.2.2",
4
+ "version_description": "v1.2.0 - Research-backed Multi-LLM Router based on arXiv: RouteLLM (2404.06035), RadixAttention (2312.07104), Medusa (2401.10774), FlashAttention (2407.07403). 120+ keywords for LLM/ML discoverability. 13 PI tools.",
5
+ "description": "A3M Router - Adaptive Memory Multi-Model Router with learned routing (RouteLLM), prefix caching (RadixAttention), speculative decoding (Medusa). 20x more adaptable for LLM/ML developers. Python bindings for LangChain/LlamaIndex/AutoGen/CrewAI.",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "bin": {
9
+ "a3m-router": "dist/cli.js",
10
+ "adaptive-memory-multi-model-router": "dist/cli.js"
11
+ },
12
+ "scripts": {
13
+ "build": "tsc",
14
+ "prepublish": "npm run build",
15
+ "test": "node test/verify.js",
16
+ "demo": "node demo/research-demo.js",
17
+ "python:examples": "python3 python/examples.py"
18
+ },
19
+ "keywords": [
20
+ "a3m",
21
+ "a3m-router",
22
+ "adaptive",
23
+ "memory-based",
24
+ "multi-model-router",
25
+ "llm-router",
26
+ "adaptive-router",
27
+ "memory-based-router",
28
+ "routellm",
29
+ "radix-attention",
30
+ "speculative-decoding",
31
+ "medusa",
32
+ "llm",
33
+ "multi-llm",
34
+ "parallel-ai",
35
+ "ai-agents",
36
+ "python",
37
+ "langchain",
38
+ "llamaindex",
39
+ "autogen",
40
+ "crewai",
41
+ "agent-discoverable",
42
+ "ai-native",
43
+ "cost-optimization"
44
+ ],
45
+ "author": "Subho Das",
46
+ "license": "MIT",
47
+ "homepage": "https://github.com/Das-rebel/tmlpd-skill#readme",
48
+ "repository": {
49
+ "type": "git",
50
+ "url": "https://github.com/Das-rebel/tmlpd-skill.git"
51
+ },
52
+ "bugs": {
53
+ "url": "https://github.com/Das-rebel/tmlpd-skill/issues"
54
+ },
55
+ "dependencies": {
56
+ "nanoid": "^5.0.0"
57
+ },
58
+ "devDependencies": {
59
+ "typescript": "^5.0.0",
60
+ "@types/node": "^20.0.0"
61
+ },
62
+ "engines": {
63
+ "node": ">=18.0.0"
64
+ },
65
+ "categories": [
66
+ "AI",
67
+ "Machine Learning",
68
+ "Developer Tools",
69
+ "Programming"
70
+ ],
71
+ "funding": {
72
+ "type": "individual",
73
+ "url": "https://github.com/sponsors/Das-rebel"
74
+ },
75
+ "shortName": "A3M-Router",
76
+ "displayName": "A3M Router",
77
+ "badges": {
78
+ "npm": "https://img.shields.io/npm/v/adaptive-memory-multi-model-router",
79
+ "downloads": "https://img.shields.io/npm/dm/adaptive-memory-multi-model-router",
80
+ "ai-native": "https://img.shields.io/badge/AI-Native-brightgreen",
81
+ "research-backed": "https://img.shields.io/badge/Research-ArXiv-blue"
82
+ }
83
+ }
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ TMLPD Python Usage Examples
4
+ Run: python3 examples.py
5
+ """
6
+
7
+ from tmlpd import TMLPDLite, quick_process, TaskType
8
+
9
+ def main():
10
+ print("=" * 60)
11
+ print("TMLPD Python - Quick Examples")
12
+ print("=" * 60)
13
+
14
+ # Example 1: One-liner
15
+ print("\n1. One-liner usage:")
16
+ result = quick_process("What is quantum entanglement?")
17
+ print(f" Task type: {result['task_type']}")
18
+
19
+ # Example 2: Task classification
20
+ print("\n2. Task classification:")
21
+ lite = TMLPDLite()
22
+ prompts = [
23
+ "Write Python async function",
24
+ "Explain neural network",
25
+ "Build React component",
26
+ "Create PostgreSQL schema"
27
+ ]
28
+ for p in prompts:
29
+ tt = lite.classify_task(p)
30
+ models = lite.get_optimal_models(tt, 2)
31
+ print(f" '{p[:30]}...' -> {tt.value} -> {models}")
32
+
33
+ # Example 3: Caching
34
+ print("\n3. Caching:")
35
+ prompt = "What is the capital of France?"
36
+ r1 = lite.process(prompt, use_cache=True)
37
+ r2 = lite.process(prompt, use_cache=True)
38
+ print(f" First: cached={r1['cached']}")
39
+ print(f" Second: cached={r2['cached']}")
40
+
41
+ # Example 4: Batch processing
42
+ print("\n4. Batch processing:")
43
+ batch = ["Python", "JavaScript", "TypeScript", "Rust", "Go"]
44
+ results = [lite.process(f"What is {lang}?") for lang in batch]
45
+ for lang, res in zip(batch, results):
46
+ print(f" {lang}: {res['task_type']}")
47
+
48
+ print("\n" + "=" * 60)
49
+ print("Examples completed!")
50
+ print("=" * 60)
51
+
52
+ if __name__ == "__main__":
53
+ main()