npm - adaptive-memory-multi-model-router - Versions diffs - 1.2.2 - Mend

adaptive-memory-multi-model-router 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/README.md +114 -0
package/demo/research-demo.js +266 -0
package/dist/cache/prefixCache.d.ts +114 -0
package/dist/cache/prefixCache.d.ts.map +1 -0
package/dist/cache/prefixCache.js +285 -0
package/dist/cache/prefixCache.js.map +1 -0
package/dist/cache/responseCache.d.ts +58 -0
package/dist/cache/responseCache.d.ts.map +1 -0
package/dist/cache/responseCache.js +153 -0
package/dist/cache/responseCache.js.map +1 -0
package/dist/cli.js +59 -0
package/dist/cost/costTracker.d.ts +95 -0
package/dist/cost/costTracker.d.ts.map +1 -0
package/dist/cost/costTracker.js +240 -0
package/dist/cost/costTracker.js.map +1 -0
package/dist/index.d.ts +723 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +239 -0
package/dist/index.js.map +1 -0
package/dist/memory/episodicMemory.d.ts +82 -0
package/dist/memory/episodicMemory.d.ts.map +1 -0
package/dist/memory/episodicMemory.js +145 -0
package/dist/memory/episodicMemory.js.map +1 -0
package/dist/orchestration/haloOrchestrator.d.ts +102 -0
package/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
package/dist/orchestration/haloOrchestrator.js +207 -0
package/dist/orchestration/haloOrchestrator.js.map +1 -0
package/dist/orchestration/mctsWorkflow.d.ts +85 -0
package/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
package/dist/orchestration/mctsWorkflow.js +210 -0
package/dist/orchestration/mctsWorkflow.js.map +1 -0
package/dist/providers/localProvider.d.ts +102 -0
package/dist/providers/localProvider.d.ts.map +1 -0
package/dist/providers/localProvider.js +338 -0
package/dist/providers/localProvider.js.map +1 -0
package/dist/providers/registry.d.ts +55 -0
package/dist/providers/registry.d.ts.map +1 -0
package/dist/providers/registry.js +138 -0
package/dist/providers/registry.js.map +1 -0
package/dist/routing/advancedRouter.d.ts +68 -0
package/dist/routing/advancedRouter.d.ts.map +1 -0
package/dist/routing/advancedRouter.js +332 -0
package/dist/routing/advancedRouter.js.map +1 -0
package/dist/tools/tmlpdTools.d.ts +101 -0
package/dist/tools/tmlpdTools.d.ts.map +1 -0
package/dist/tools/tmlpdTools.js +368 -0
package/dist/tools/tmlpdTools.js.map +1 -0
package/dist/utils/batchProcessor.d.ts +96 -0
package/dist/utils/batchProcessor.d.ts.map +1 -0
package/dist/utils/batchProcessor.js +170 -0
package/dist/utils/batchProcessor.js.map +1 -0
package/dist/utils/compression.d.ts +61 -0
package/dist/utils/compression.d.ts.map +1 -0
package/dist/utils/compression.js +281 -0
package/dist/utils/compression.js.map +1 -0
package/dist/utils/reliability.d.ts +74 -0
package/dist/utils/reliability.d.ts.map +1 -0
package/dist/utils/reliability.js +177 -0
package/dist/utils/reliability.js.map +1 -0
package/dist/utils/speculativeDecoding.d.ts +117 -0
package/dist/utils/speculativeDecoding.d.ts.map +1 -0
package/dist/utils/speculativeDecoding.js +246 -0
package/dist/utils/speculativeDecoding.js.map +1 -0
package/dist/utils/tokenUtils.d.ts +50 -0
package/dist/utils/tokenUtils.d.ts.map +1 -0
package/dist/utils/tokenUtils.js +124 -0
package/dist/utils/tokenUtils.js.map +1 -0
package/examples/QUICKSTART.md +183 -0
package/notebooks/quickstart.ipynb +157 -0
package/package.json +83 -0
package/python/examples.py +53 -0
package/python/integrations.py +330 -0
package/python/setup.py +28 -0
package/python/tmlpd.py +369 -0
package/qna/REDDIT_GAP_ANALYSIS.md +299 -0
package/qna/TMLPD_QNA.md +751 -0
package/rust/tmlpd.h +268 -0
package/skill/SKILL.md +238 -0
package/src/cache/prefixCache.ts +365 -0
package/src/cache/responseCache.ts +147 -0
package/src/cost/costTracker.ts +302 -0
package/src/index.ts +224 -0
package/src/memory/episodicMemory.ts +185 -0
package/src/orchestration/haloOrchestrator.ts +266 -0
package/src/orchestration/mctsWorkflow.ts +262 -0
package/src/providers/localProvider.ts +406 -0
package/src/providers/registry.ts +164 -0
package/src/routing/advancedRouter.ts +406 -0
package/src/tools/tmlpdTools.ts +433 -0
package/src/utils/batchProcessor.ts +232 -0
package/src/utils/compression.ts +325 -0
package/src/utils/reliability.ts +221 -0
package/src/utils/speculativeDecoding.ts +344 -0
package/src/utils/tokenUtils.ts +145 -0
package/tsconfig.json +18 -0

package/examples/QUICKSTART.md ADDED Viewed

@@ -0,0 +1,183 @@
+# TMLPD PI Extension - Usage Examples
+## Quick Start
+```python
+from tmlpd import quick_process
+# One-liner
+result = quick_process("What is quantum entanglement?")
+print(result["content"])
+```
+## Task Classification
+```python
+from tmlpd import TMLPDLite, TaskType
+lite = TMLPDLite()
+# Automatic task classification
+prompt = "Write Python async HTTP client"
+task_type = lite.classify_task(prompt)  # TaskType.CODING
+# Get optimal models
+models = lite.get_optimal_models(task_type, 3)
+# ["codex", "claude-minimax", "claude"]
+```
+## Caching
+```python
+from tmlpd import TMLPDLite
+lite = TMLPDLite()
+# First call - not cached
+result1 = lite.process("What is 2+2?", use_cache=True)
+print(f"Cached: {result1['cached']}")  # False
+# Second call - from cache
+result2 = lite.process("What is 2+2?", use_cache=True)
+print(f"Cached: {result2['cached']}")  # True
+```
+## Async Client (Production)
+```python
+import asyncio
+from tmlpd import TMLPDClient, TMLPDConfig
+async def main():
+    config = TMLPDConfig(
+        cache_ttl_seconds=3600,
+        daily_budget=10.0,
+        max_concurrent=5
+    )
+    client = TMLPDClient(config)
+    # Single execution
+    result = await client.execute("Explain quantum entanglement")
+    # Parallel execution
+    parallel = await client.execute_parallel(
+        "Compare Python and JavaScript",
+        models=["gpt-4o", "claude", "gemini"]
+    )
+    # Cost summary
+    summary = await client.get_cost_summary()
+    print(f"Total spent: ${summary.total_cost}")
+asyncio.run(main())
+```
+## Cost Optimization
+```python
+import asyncio
+from tmlpd import TMLPDClient
+async def cost_optimization():
+    client = TMLPDClient()
+    # Cheap for simple tasks
+    simple = await client.execute("What is 2+2?", model="cerebras/llama-3.3-70b")
+    print(f"Simple task cost: ${simple.cost:.6f}")
+    # Premium for complex tasks
+    complex = await client.execute("Design microservices", model="anthropic/claude-3.5-sonnet")
+    print(f"Complex task cost: ${complex.cost:.6f}")
+asyncio.run(cost_optimization())
+```
+## LangChain Integration
+```python
+from langchain.llms import BaseLLM
+from tmlpd import TMLPDLite
+class TMLPDLLM(BaseLLM):
+    def __init__(self, task_type="default"):
+        self.lite = TMLPDLite()
+        self.task_type = task_type
+    def _call(self, prompt: str) -> str:
+        result = self.lite.process(prompt)
+        return result["content"]
+# Usage
+llm = TMLPDLLM(task_type="coding")
+response = llm("Write a Python function")
+```
+## API Server
+```python
+# Run: node node_modules/adaptive-memory-multi-model-router/dist/server.js --port 18791
+# Then use Python client:
+import httpx
+async def api_example():
+    async with httpx.AsyncClient() as client:
+        response = await client.post(
+            "http://localhost:18791/execute",
+            json={"prompt": "Hello world", "models": ["gpt-4o"]}
+        )
+        print(response.json())
+```
+## Task Types
+| Task Type | Keywords | Best Models |
+|-----------|----------|-------------|
+| coding | python, javascript, code | codex, claude-minimax |
+| frontend | react, vue, component | codex, claude-minimax |
+| backend | api, server, database | codex, claude-glm |
+| chinese | 中文, 汉语 | claude-glm, claude-minimax |
+| fast | quick, simple | gemini, claude-haiku |
+| premium | advanced, complex | claude-opus, gemini-pro |
+## Environment Variables
+```bash
+# API Keys
+export OPENAI_API_KEY="sk-..."
+export ANTHROPIC_API_KEY="sk-ant-..."
+export GROQ_API_KEY="gsk_..."
+# TMLPD Configuration
+export TMLPD_MAX_CONCURRENT=5
+export TMLPD_DAILY_BUDGET=10.0
+export TMLPD_CACHE_TTL=3600
+```
+## Full Example
+```python
+import asyncio
+from tmlpd import TMLPDClient, TMLPDLite
+async def full_example():
+    # Lite for quick tasks
+    lite = TMLPDLite()
+    result = lite.process("What is Python?", use_cache=True)
+    print(f"Lite: {result['task_type']}")
+    # Full client for production
+    client = TMLPDClient()
+    # Batch processing
+    prompts = ["What is AI?", "What is ML?", "What is DL?"]
+    for prompt in prompts:
+        result = await client.execute(prompt)
+        print(f"Cost: ${result.cost:.6f}")
+    # Final stats
+    summary = await client.get_cost_summary()
+    print(f"Total: ${summary.total_cost:.6f}")
+asyncio.run(full_example())
+```

package/notebooks/quickstart.ipynb ADDED Viewed

@@ -0,0 +1,157 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# TMLPD PI - Quick Start Guide\n",
+    "\n",
+    "*Parallel Multi-LLM Processing for AI Agents*\n",
+    "\n",
+    "**Install:** `npm install tmlpd-pi`\n",
+    "\n",
+    "**Features:**\n",
+    "- 13 PI tools for AI agent discovery\n",
+    "- Token compression (ISON) - 20-40% reduction\n",
+    "- Local LLM support (Ollama/vLLM)\n",
+    "- Batch processing with priority"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "// Quick Start - TypeScript\n",
+    "import { createTMLPD, isonEncode, countTokens, BatchProcessor } from 'tmlpd-pi';\n",
+    "\n",
+    "// 1. Parallel execution across models\n",
+    "const tmlpd = createTMLPD();\n",
+    "const result = await tmlpd.executeParallel(\n",
+    "  'Explain quantum entanglement',\n",
+    "  ['gpt-4o', 'claude-3.5-sonnet', 'gemini-2.0-flash']\n",
+    ");\n",
+    "console.log(result);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "// 2. Token compression with ISON\n",
+    "const original = 'The quick brown fox jumps over the lazy dog';\n",
+    "const compressed = isonEncode(original);\n",
+    "console.log('Compressed:', compressed);\n",
+    "// Output: 'quick brown fox jumps lazy dog'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "// 3. Token counting & cost estimation\n",
+    "const tokens = countTokens('Hello world', 'gpt-4o');\n",
+    "const cost = estimateCost(100, 50, 'gpt-4o');\n",
+    "console.log(`Tokens: ${tokens}, Cost: $${cost}`);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "// 4. Batch processing\n",
+    "const batch = new BatchProcessor({ concurrency: 5 });\n",
+    "batch.add({ prompt: 'Task 1', priority: 'high' });\n",
+    "batch.add({ prompt: 'Task 2', priority: 'normal' });\n",
+    "const results = await batch.execute(executor);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Python Quick Start\n",
+    "\n",
+    "Copy `python/tmlpd.py` to your project:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Python - one-liner\n",
+    "from tmlpd import quick_process\n",
+    "result = quick_process('What is quantum?')\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Python - Task routing\n",
+    "from tmlpd import TMLPDLite, TaskType\n",
+    "\n",
+    "lite = TMLPDLite()\n",
+    "task_type = lite.classify_task('Write Python async function')\n",
+    "models = lite.get_optimal_models(task_type, 3)\n",
+    "print(f'Task: {task_type}, Models: {models}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Framework Integrations\n",
+    "\n",
+    "### LangChain\n",
+    "```python\n",
+    "from langchain.llms import BaseLLM\n",
+    "class TMLPDLLM(BaseLLM):\n",
+    "    def _call(self, prompt): return lite.process(prompt)['content']\n",
+    "```\n",
+    "\n",
+    "### LlamaIndex\n",
+    "```python\n",
+    "from llama_index.llms import LLM\n",
+    "class TMLPDLLM(LLM):\n",
+    "    def complete(self, prompt): return lite.process(prompt)['content']\n",
+    "```\n",
+    "\n",
+    "## 13 PI Tools\n",
+    "\n",
+    "| Tool | Purpose |\n",
+    "|------|---------|\n",
+    "| `tmlpd_execute` | Parallel multi-model |\n",
+    "| `tmlpd_count_tokens` | Token counting |\n",
+    "| `tmlpd_compress_context` | ISON compression |\n",
+    "| `tmlpd_local_generate` | Ollama/vLLM |\n",
+    "| `tmlpd_batch_execute` | Priority batch |"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "JavaScript",
+   "language": "javascript",
+   "name": "javascript"
+  },
+  "language_info": {
+   "name": "javascript",
+   "version": "16.0.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

package/package.json ADDED Viewed

@@ -0,0 +1,83 @@
+{
+  "name": "adaptive-memory-multi-model-router",
+  "version": "1.2.2",
+  "version_description": "v1.2.0 - Research-backed Multi-LLM Router based on arXiv: RouteLLM (2404.06035), RadixAttention (2312.07104), Medusa (2401.10774), FlashAttention (2407.07403). 120+ keywords for LLM/ML discoverability. 13 PI tools.",
+  "description": "A3M Router - Adaptive Memory Multi-Model Router with learned routing (RouteLLM), prefix caching (RadixAttention), speculative decoding (Medusa). 20x more adaptable for LLM/ML developers. Python bindings for LangChain/LlamaIndex/AutoGen/CrewAI.",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "bin": {
+    "a3m-router": "dist/cli.js",
+    "adaptive-memory-multi-model-router": "dist/cli.js"
+  },
+  "scripts": {
+    "build": "tsc",
+    "prepublish": "npm run build",
+    "test": "node test/verify.js",
+    "demo": "node demo/research-demo.js",
+    "python:examples": "python3 python/examples.py"
+  },
+  "keywords": [
+    "a3m",
+    "a3m-router",
+    "adaptive",
+    "memory-based",
+    "multi-model-router",
+    "llm-router",
+    "adaptive-router",
+    "memory-based-router",
+    "routellm",
+    "radix-attention",
+    "speculative-decoding",
+    "medusa",
+    "llm",
+    "multi-llm",
+    "parallel-ai",
+    "ai-agents",
+    "python",
+    "langchain",
+    "llamaindex",
+    "autogen",
+    "crewai",
+    "agent-discoverable",
+    "ai-native",
+    "cost-optimization"
+  ],
+  "author": "Subho Das",
+  "license": "MIT",
+  "homepage": "https://github.com/Das-rebel/tmlpd-skill#readme",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/Das-rebel/tmlpd-skill.git"
+  },
+  "bugs": {
+    "url": "https://github.com/Das-rebel/tmlpd-skill/issues"
+  },
+  "dependencies": {
+    "nanoid": "^5.0.0"
+  },
+  "devDependencies": {
+    "typescript": "^5.0.0",
+    "@types/node": "^20.0.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "categories": [
+    "AI",
+    "Machine Learning",
+    "Developer Tools",
+    "Programming"
+  ],
+  "funding": {
+    "type": "individual",
+    "url": "https://github.com/sponsors/Das-rebel"
+  },
+  "shortName": "A3M-Router",
+  "displayName": "A3M Router",
+  "badges": {
+    "npm": "https://img.shields.io/npm/v/adaptive-memory-multi-model-router",
+    "downloads": "https://img.shields.io/npm/dm/adaptive-memory-multi-model-router",
+    "ai-native": "https://img.shields.io/badge/AI-Native-brightgreen",
+    "research-backed": "https://img.shields.io/badge/Research-ArXiv-blue"
+  }
+}

package/python/examples.py ADDED Viewed

@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+"""
+TMLPD Python Usage Examples
+Run: python3 examples.py
+"""
+from tmlpd import TMLPDLite, quick_process, TaskType
+def main():
+    print("=" * 60)
+    print("TMLPD Python - Quick Examples")
+    print("=" * 60)
+    # Example 1: One-liner
+    print("\n1. One-liner usage:")
+    result = quick_process("What is quantum entanglement?")
+    print(f"   Task type: {result['task_type']}")
+    # Example 2: Task classification
+    print("\n2. Task classification:")
+    lite = TMLPDLite()
+    prompts = [
+        "Write Python async function",
+        "Explain neural network",
+        "Build React component",
+        "Create PostgreSQL schema"
+    ]
+    for p in prompts:
+        tt = lite.classify_task(p)
+        models = lite.get_optimal_models(tt, 2)
+        print(f"   '{p[:30]}...' -> {tt.value} -> {models}")
+    # Example 3: Caching
+    print("\n3. Caching:")
+    prompt = "What is the capital of France?"
+    r1 = lite.process(prompt, use_cache=True)
+    r2 = lite.process(prompt, use_cache=True)
+    print(f"   First:  cached={r1['cached']}")
+    print(f"   Second: cached={r2['cached']}")
+    # Example 4: Batch processing
+    print("\n4. Batch processing:")
+    batch = ["Python", "JavaScript", "TypeScript", "Rust", "Go"]
+    results = [lite.process(f"What is {lang}?") for lang in batch]
+    for lang, res in zip(batch, results):
+        print(f"   {lang}: {res['task_type']}")
+    print("\n" + "=" * 60)
+    print("Examples completed!")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()