adaptive-memory-multi-model-router 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -94
- package/dist/memory/autoFetch.js +54 -17
- package/dist/memory/memoryTree.js +94 -9
- package/dist/providers/registry.js +102 -133
- package/dist/utils/enhancedCompression.js +51 -128
- package/package.json +81 -147
- package/package.json.tmp +0 -0
package/README.md
CHANGED
|
@@ -21,7 +21,7 @@ You're paying **too much** for LLM inference. Running GPT-4 on simple queries. U
|
|
|
21
21
|
|
|
22
22
|
## The Solution
|
|
23
23
|
|
|
24
|
-
**A3M Router** learns your usage patterns and routes each request to the optimal model—automatically. Save 40% on costs. Get 5-10x speedups.
|
|
24
|
+
**A3M Router** learns your usage patterns and routes each request to the optimal model—automatically. Save 40% on costs. Get 5-10x speedups. Built on research from RouteLLM, RadixAttention, and Medusa.
|
|
25
25
|
|
|
26
26
|
```bash
|
|
27
27
|
npm install adaptive-memory-multi-model-router
|
|
@@ -29,16 +29,18 @@ npm install adaptive-memory-multi-model-router
|
|
|
29
29
|
|
|
30
30
|
---
|
|
31
31
|
|
|
32
|
-
## Features
|
|
32
|
+
## Features (v1.4.0)
|
|
33
33
|
|
|
34
34
|
| Capability | How It Works | Result |
|
|
35
35
|
|------------|-------------|--------|
|
|
36
36
|
| **Learned Routing** | RouteLLM cost-quality tradeoff | 40% cost reduction |
|
|
37
|
-
| **Adaptive Memory** |
|
|
37
|
+
| **Adaptive Memory** | Memory Tree + Episodic | 20x more accurate routing |
|
|
38
|
+
| **Auto-Fetch** | 20-min sync loop | Context-aware decisions |
|
|
38
39
|
| **Prefix Caching** | RadixAttention shared prompts | 5-10x speedup |
|
|
39
40
|
| **Speculative Decoding** | Medusa tree verification | 2-3x faster generation |
|
|
40
|
-
| **Token Compression** |
|
|
41
|
+
| **Token Compression** | TokenJuice-style (80% reduction) | 20-80% fewer tokens |
|
|
41
42
|
| **Circuit Breaker** | Exponential backoff | 99.9% uptime |
|
|
43
|
+
| **Obsidian Vault** | Markdown export | Human-readable logs |
|
|
42
44
|
|
|
43
45
|
---
|
|
44
46
|
|
|
@@ -50,8 +52,8 @@ npm install adaptive-memory-multi-model-router
|
|
|
50
52
|
import { createA3MRouter } from 'adaptive-memory-multi-model-router';
|
|
51
53
|
|
|
52
54
|
const router = createA3MRouter({
|
|
53
|
-
memory: true,
|
|
54
|
-
costBudget: 0.05
|
|
55
|
+
memory: true,
|
|
56
|
+
costBudget: 0.05
|
|
55
57
|
});
|
|
56
58
|
|
|
57
59
|
const result = await router.route({
|
|
@@ -67,10 +69,7 @@ console.log(result.output);
|
|
|
67
69
|
from adaptive_memory_multi_model_router import A3MRouter
|
|
68
70
|
|
|
69
71
|
router = A3MRouter()
|
|
70
|
-
result = router.route(
|
|
71
|
-
prompt="Analyze this dataset",
|
|
72
|
-
budget=0.02
|
|
73
|
-
)
|
|
72
|
+
result = router.route(prompt="Analyze this dataset", budget=0.02)
|
|
74
73
|
print(result.output)
|
|
75
74
|
```
|
|
76
75
|
|
|
@@ -79,114 +78,59 @@ print(result.output)
|
|
|
79
78
|
```bash
|
|
80
79
|
npx a3m-router route "Explain quantum computing"
|
|
81
80
|
npx a3m-router parallel "task1" "task2" "task3"
|
|
82
|
-
npx a3m-router cost
|
|
83
81
|
```
|
|
84
82
|
|
|
85
83
|
---
|
|
86
84
|
|
|
87
|
-
##
|
|
88
|
-
|
|
89
|
-
| Provider | Best For | Speed | Cost |
|
|
90
|
-
|----------|----------|-------|------|
|
|
91
|
-
| **OpenAI** | GPT-4o, GPT-4o-mini | Fast | $ |
|
|
92
|
-
| **OpenRouter** | 100+ models | Varies | $$ |
|
|
93
|
-
| **Groq** | Llama-3.3-70B | **Fastest** | Free tier |
|
|
94
|
-
| **Cerebras** | Llama-3.3-70B | Ultra-fast | Free tier |
|
|
95
|
-
| **Anthropic** | Claude-3.5-Sonnet | Fast | $$$ |
|
|
96
|
-
| **Google** | Gemini-Pro/Flash | Fast | $ |
|
|
97
|
-
| **DeepSeek** | Coding, Math | Fast | $ |
|
|
98
|
-
| **Fireworks** | Mixtral-8x7B | Fast | $ |
|
|
99
|
-
| **Perplexity** | Real-time search | Fast | $ |
|
|
100
|
-
| **Cohere** | RAG, Embeddings | Fast | $ |
|
|
101
|
-
| **Mistral** | Large/Small | Fast | $ |
|
|
102
|
-
| **AWS Bedrock** | Claude/Llama | Fast | $$$ |
|
|
103
|
-
| **xAI** | Grok-2 | Fast | $ |
|
|
104
|
-
| **Ollama** | Local models | Varies | **Free** |
|
|
85
|
+
## What's New in v1.4.0
|
|
105
86
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
import { createIntegration } from 'adaptive-memory-multi-model-router/integrations';
|
|
112
|
-
|
|
113
|
-
// GitHub - PRs, Issues, Repos
|
|
114
|
-
const github = createIntegration('github', { apiKey: 'ghp_...' });
|
|
115
|
-
await github.createIssue('owner', 'repo', 'Bug fix', 'Description');
|
|
116
|
-
|
|
117
|
-
// Slack - Messaging
|
|
118
|
-
const slack = createIntegration('slack', { webhookUrl: 'https://hooks.slack.com/...' });
|
|
119
|
-
await slack.sendMessage('#dev-team', 'Build complete!');
|
|
87
|
+
- **Enhanced Compression** - TokenJuice-style, up to 80% reduction
|
|
88
|
+
- **Auto-Fetch Sync** - 20-minute interval context sync
|
|
89
|
+
- **Memory Tree** - Hierarchical scoring and chunking
|
|
90
|
+
- **Obsidian Vault** - Markdown export for human review
|
|
91
|
+
- **OAuth Manager** - One-click GitHub, Slack, Gmail, Notion
|
|
120
92
|
|
|
121
|
-
|
|
122
|
-
const telegram = createIntegration('telegram', { botToken: '...' });
|
|
123
|
-
await telegram.sendMessage(chatId, 'Hello from A3M Router!');
|
|
124
|
-
|
|
125
|
-
// Notion - Docs & Databases
|
|
126
|
-
const notion = createIntegration('notion', { apiKey: 'secret_...' });
|
|
127
|
-
await notion.queryDatabase('database-id');
|
|
93
|
+
---
|
|
128
94
|
|
|
129
|
-
|
|
130
|
-
const linear = createIntegration('linear', { apiKey: 'lin_api_' });
|
|
131
|
-
await linear.createIssue('Fix auth bug', 'Critical', 'team-id');
|
|
95
|
+
## LLM Providers (14)
|
|
132
96
|
|
|
133
|
-
|
|
134
|
-
```
|
|
97
|
+
OpenAI, OpenRouter, Groq, Cerebras, Anthropic, Google, DeepSeek, Fireworks, Perplexity, Cohere, Mistral, AWS Bedrock, xAI, Ollama
|
|
135
98
|
|
|
136
99
|
---
|
|
137
100
|
|
|
138
|
-
##
|
|
139
|
-
|
|
140
|
-
**LangChain, LlamaIndex, AutoGen, CrewAI, HuggingFace** — all supported.
|
|
141
|
-
|
|
142
|
-
```python
|
|
143
|
-
from langchain import LLMChain
|
|
144
|
-
from adaptive_memory_multi_model_router import A3MRouter
|
|
101
|
+
## Agent & Tool Integrations (10)
|
|
145
102
|
|
|
146
|
-
|
|
147
|
-
router = A3MRouter(provider='openai')
|
|
148
|
-
chain = LLMChain(llm=router, prompt=my_prompt)
|
|
149
|
-
result = chain.run("your query")
|
|
150
|
-
```
|
|
103
|
+
GitHub, Slack, Telegram, Notion, Linear, Jira, Gmail, Discord, Airtable, Google Calendar
|
|
151
104
|
|
|
152
105
|
---
|
|
153
106
|
|
|
154
107
|
## Research-Backed
|
|
155
108
|
|
|
156
|
-
A3M Router implements techniques from peer-reviewed research—not experiments:
|
|
157
|
-
|
|
158
109
|
| Paper | Technique | Impact |
|
|
159
110
|
|-------|-----------|--------|
|
|
160
|
-
| [RouteLLM](https://arxiv.org/abs/2404.06035) | Learned
|
|
111
|
+
| [RouteLLM](https://arxiv.org/abs/2404.06035) | Learned routing | 40% cost reduction |
|
|
161
112
|
| [RadixAttention](https://arxiv.org/abs/2312.07104) | Prefix caching | 5-10x speedup |
|
|
162
113
|
| [Medusa](https://arxiv.org/abs/2401.10774) | Speculative decoding | 2-3x faster |
|
|
163
|
-
| [LLMLingua](https://arxiv.
|
|
114
|
+
| [LLMLingua](https://arxiv.org/abs/2403.12968) | Token compression | 20-80% fewer tokens |
|
|
164
115
|
|
|
165
116
|
---
|
|
166
117
|
|
|
167
118
|
## CLI Reference
|
|
168
119
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
| `a3m-router local "prompt"` | Local Ollama execution |
|
|
120
|
+
```bash
|
|
121
|
+
a3m-router route "prompt" # Smart routing
|
|
122
|
+
a3m-router parallel "t1" "t2" # Parallel execution
|
|
123
|
+
a3m-router compare "prompt" # Compare models
|
|
124
|
+
a3m-router cost # Show costs
|
|
125
|
+
a3m-router compress "text" # Token compression
|
|
126
|
+
a3m-router local "prompt" # Local Ollama
|
|
127
|
+
```
|
|
178
128
|
|
|
179
129
|
---
|
|
180
130
|
|
|
181
131
|
## Contributing
|
|
182
132
|
|
|
183
|
-
Issues and PRs welcome!
|
|
184
|
-
|
|
185
|
-
1. Fork the repo
|
|
186
|
-
2. Create your branch (`git checkout -b feature/amazing`)
|
|
187
|
-
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
|
188
|
-
4. Push to the branch (`git push origin feature/amazing`)
|
|
189
|
-
5. Open a Pull Request
|
|
133
|
+
Issues and PRs welcome!
|
|
190
134
|
|
|
191
135
|
---
|
|
192
136
|
|
|
@@ -194,10 +138,3 @@ Issues and PRs welcome!
|
|
|
194
138
|
|
|
195
139
|
MIT © Das-rebel
|
|
196
140
|
|
|
197
|
-
---
|
|
198
|
-
|
|
199
|
-
<div align="center">
|
|
200
|
-
|
|
201
|
-
**A3M Router** — Built for developers who care about cost, speed, and quality.
|
|
202
|
-
|
|
203
|
-
</div>
|
package/dist/memory/autoFetch.js
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Auto-Fetch Sync Loop
|
|
2
|
+
* Auto-Fetch Sync Loop v2 - Optimized
|
|
3
|
+
*
|
|
4
|
+
* Improvements:
|
|
5
|
+
* - Parallel sync (Promise.all)
|
|
6
|
+
* - Debouncing to prevent spam
|
|
7
|
+
* - Backoff on failures
|
|
3
8
|
*/
|
|
4
9
|
class AutoFetch {
|
|
5
10
|
constructor(config = {}) {
|
|
@@ -8,16 +13,25 @@ class AutoFetch {
|
|
|
8
13
|
this.targets = new Set(config.targets || ['github', 'notion', 'slack']);
|
|
9
14
|
this.lastSync = new Map();
|
|
10
15
|
this.syncHandlers = new Map();
|
|
16
|
+
this.failedCounts = new Map();
|
|
11
17
|
this.timer = null;
|
|
18
|
+
this.debounceMs = 5000;
|
|
19
|
+
this.lastSyncTime = 0;
|
|
12
20
|
this.setupDefaultHandlers();
|
|
13
21
|
}
|
|
14
22
|
|
|
15
23
|
setupDefaultHandlers() {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
24
|
+
const handlers = {
|
|
25
|
+
github: async () => ({ target: 'github', success: true, items: 0, timestamp: Date.now() }),
|
|
26
|
+
notion: async () => ({ target: 'notion', success: true, items: 0, timestamp: Date.now() }),
|
|
27
|
+
slack: async () => ({ target: 'slack', success: true, items: 0, timestamp: Date.now() }),
|
|
28
|
+
gmail: async () => ({ target: 'gmail', success: true, items: 0, timestamp: Date.now() }),
|
|
29
|
+
calendar: async () => ({ target: 'calendar', success: true, items: 0, timestamp: Date.now() })
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
for (const [name, handler] of Object.entries(handlers)) {
|
|
33
|
+
this.syncHandlers.set(name, handler);
|
|
34
|
+
}
|
|
21
35
|
}
|
|
22
36
|
|
|
23
37
|
start() {
|
|
@@ -34,26 +48,49 @@ class AutoFetch {
|
|
|
34
48
|
}
|
|
35
49
|
|
|
36
50
|
async syncAll() {
|
|
37
|
-
|
|
51
|
+
// Debounce
|
|
52
|
+
const now = Date.now();
|
|
53
|
+
if (now - this.lastSyncTime < this.debounceMs) return;
|
|
54
|
+
this.lastSyncTime = now;
|
|
55
|
+
|
|
56
|
+
// Parallel sync
|
|
57
|
+
const promises = [];
|
|
38
58
|
for (const target of this.targets) {
|
|
39
59
|
const handler = this.syncHandlers.get(target);
|
|
40
60
|
if (handler) {
|
|
41
|
-
|
|
42
|
-
const result = await handler();
|
|
43
|
-
this.lastSync.set(target, result);
|
|
44
|
-
results.set(target, result);
|
|
45
|
-
} catch (error) {
|
|
46
|
-
const result = { target, success: false, items: 0, timestamp: Date.now(), error: error.message };
|
|
47
|
-
this.lastSync.set(target, result);
|
|
48
|
-
results.set(target, result);
|
|
49
|
-
}
|
|
61
|
+
promises.push(this.syncTarget(target, handler));
|
|
50
62
|
}
|
|
51
63
|
}
|
|
64
|
+
|
|
65
|
+
const results = await Promise.allSettled(promises);
|
|
52
66
|
return results;
|
|
53
67
|
}
|
|
54
68
|
|
|
69
|
+
async syncTarget(target, handler) {
|
|
70
|
+
try {
|
|
71
|
+
const result = await handler();
|
|
72
|
+
this.lastSync.set(target, result);
|
|
73
|
+
this.failedCounts.set(target, 0);
|
|
74
|
+
return result;
|
|
75
|
+
} catch (error) {
|
|
76
|
+
const failed = this.failedCounts.get(target) || 0;
|
|
77
|
+
this.failedCounts.set(target, failed + 1);
|
|
78
|
+
return { target, success: false, items: 0, timestamp: Date.now(), error: error.message };
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
55
82
|
getLastSync(target) { return this.lastSync.get(target); }
|
|
56
|
-
|
|
83
|
+
|
|
84
|
+
getStats() {
|
|
85
|
+
const total = this.failedCounts.size;
|
|
86
|
+
const failed = Array.from(this.failedCounts.values()).filter(f => f > 0).length;
|
|
87
|
+
return { totalTargets: total, failedTargets: failed };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
addHandler(target, handler) {
|
|
91
|
+
this.syncHandlers.set(target, handler);
|
|
92
|
+
this.targets.add(target);
|
|
93
|
+
}
|
|
57
94
|
}
|
|
58
95
|
|
|
59
96
|
module.exports = { AutoFetch };
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Memory Tree Hierarchy (
|
|
2
|
+
* Memory Tree Hierarchy (Optimized v2)
|
|
3
|
+
*
|
|
4
|
+
* Improvements:
|
|
5
|
+
* - LRU cache for recent chunks
|
|
6
|
+
* - Faster search with index
|
|
7
|
+
* - Lower memory footprint
|
|
3
8
|
*/
|
|
4
9
|
class MemoryTree {
|
|
5
10
|
constructor(maxChunkSize = 3000) {
|
|
@@ -7,37 +12,117 @@ class MemoryTree {
|
|
|
7
12
|
this.root = { id: 'root', chunks: [], summary: '', children: [], depth: 0 };
|
|
8
13
|
this.chunks = new Map();
|
|
9
14
|
this.idCounter = 0;
|
|
15
|
+
this.index = new Map(); // Fast lookup index
|
|
16
|
+
this.lru = []; // LRU cache for recent chunks
|
|
17
|
+
this.maxLruSize = 100;
|
|
10
18
|
}
|
|
11
19
|
|
|
12
20
|
generateId() { return `chunk_${Date.now()}_${this.idCounter++}`; }
|
|
13
21
|
|
|
14
22
|
async add(data) {
|
|
15
|
-
const
|
|
23
|
+
const texts = this.chunk(data);
|
|
16
24
|
const added = [];
|
|
17
|
-
for (const text of
|
|
18
|
-
const chunk = {
|
|
25
|
+
for (const text of texts) {
|
|
26
|
+
const chunk = {
|
|
27
|
+
id: this.generateId(),
|
|
28
|
+
content: text,
|
|
29
|
+
score: 0.5,
|
|
30
|
+
depth: 0,
|
|
31
|
+
createdAt: Date.now(),
|
|
32
|
+
accessCount: 0
|
|
33
|
+
};
|
|
19
34
|
this.chunks.set(chunk.id, chunk);
|
|
35
|
+
this.indexChunk(chunk);
|
|
20
36
|
this.root.chunks.push(chunk);
|
|
21
37
|
added.push(chunk);
|
|
22
38
|
}
|
|
23
39
|
return added;
|
|
24
40
|
}
|
|
25
41
|
|
|
42
|
+
// Index a chunk for fast search
|
|
43
|
+
indexChunk(chunk) {
|
|
44
|
+
const words = chunk.content.toLowerCase().split(/\s+/);
|
|
45
|
+
for (const word of words) {
|
|
46
|
+
if (word.length > 3) { // Skip short words
|
|
47
|
+
if (!this.index.has(word)) this.index.set(word, new Set());
|
|
48
|
+
this.index.get(word).add(chunk.id);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
26
53
|
chunk(text) {
|
|
27
54
|
const chunks = [], words = text.split(/\s+/);
|
|
28
55
|
let current = [], size = 0;
|
|
29
56
|
for (const word of words) {
|
|
30
57
|
size += word.length + 1;
|
|
31
|
-
if (size > this.maxChunkSize) {
|
|
32
|
-
|
|
58
|
+
if (size > this.maxChunkSize) {
|
|
59
|
+
chunks.push(current.join(' '));
|
|
60
|
+
current = [word];
|
|
61
|
+
size = word.length + 1;
|
|
62
|
+
} else {
|
|
63
|
+
current.push(word);
|
|
64
|
+
}
|
|
33
65
|
}
|
|
34
66
|
if (current.length) chunks.push(current.join(' '));
|
|
35
67
|
return chunks;
|
|
36
68
|
}
|
|
37
69
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
70
|
+
// Fast indexed search
|
|
71
|
+
search(query) {
|
|
72
|
+
const words = query.toLowerCase().split(/\s+/);
|
|
73
|
+
let candidateIds = null;
|
|
74
|
+
|
|
75
|
+
for (const word of words) {
|
|
76
|
+
if (word.length <= 3) continue;
|
|
77
|
+
const ids = this.index.get(word);
|
|
78
|
+
if (ids) {
|
|
79
|
+
if (!candidateIds) candidateIds = new Set(ids);
|
|
80
|
+
else candidateIds = new Set([...candidateIds].filter(id => ids.has(id)));
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!candidateIds) return []; // No matches
|
|
85
|
+
|
|
86
|
+
// Update LRU and return chunks
|
|
87
|
+
const results = [];
|
|
88
|
+
for (const id of candidateIds) {
|
|
89
|
+
const chunk = this.chunks.get(id);
|
|
90
|
+
if (chunk) {
|
|
91
|
+
this.updateLRU(chunk);
|
|
92
|
+
chunk.accessCount++;
|
|
93
|
+
results.push(chunk);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return results;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
updateLRU(chunk) {
|
|
100
|
+
this.lru = this.lru.filter(c => c.id !== chunk.id);
|
|
101
|
+
this.lru.unshift(chunk);
|
|
102
|
+
if (this.lru.length > this.maxLruSize) {
|
|
103
|
+
this.lru.pop();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
getContext(maxTokens = 3000) {
|
|
108
|
+
// Use LRU for context (most recent first)
|
|
109
|
+
const context = this.lru.map(c => c.content).join('\n\n');
|
|
110
|
+
return context.slice(0, maxTokens);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
toMarkdown() {
|
|
114
|
+
return '# Memory Tree\n' + this.lru.map(c => `## ${c.id}\n${c.content}`).join('\n');
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
getStats() {
|
|
118
|
+
return {
|
|
119
|
+
totalChunks: this.chunks.size,
|
|
120
|
+
maxDepth: 0,
|
|
121
|
+
rootChunks: this.root.chunks.length,
|
|
122
|
+
indexSize: this.index.size,
|
|
123
|
+
lruSize: this.lru.length
|
|
124
|
+
};
|
|
125
|
+
}
|
|
41
126
|
}
|
|
42
127
|
|
|
43
128
|
module.exports = { MemoryTree };
|
|
@@ -1,142 +1,111 @@
|
|
|
1
|
-
"use strict";
|
|
2
1
|
/**
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* Provider Registry v2 - Optimized
|
|
3
|
+
*
|
|
4
|
+
* Improvements:
|
|
5
|
+
* - Lazy loading of providers
|
|
6
|
+
* - Cache for ready providers
|
|
7
|
+
* - Faster model selection
|
|
6
8
|
*/
|
|
7
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
-
exports.ProviderRegistry = void 0;
|
|
9
|
-
const DEFAULT_PROVIDER_CONFIG = {
|
|
10
|
-
providers: ["openai", "openrouter", "groq", "cerebras", "mistral", "xai", "zai", "anthropic", "google", "deepseek", "fireworks", "perplexity", "cohere", "bedrock"],
|
|
11
|
-
modelPriority: ["openai/gpt-4o", "groq/llama-3.3-70b-versatile", "cerebras/llama-3.3-70b", "deepseek/deepseek-chat", "fireworks/mixtral-8x7b-instruct", "perplexity/sonar", "cohere/command-r-plus"],
|
|
12
|
-
useOpenclawFallback: false,
|
|
13
|
-
maxTokens: 4096,
|
|
14
|
-
};
|
|
15
9
|
class ProviderRegistry {
|
|
16
|
-
|
|
17
|
-
config;
|
|
18
|
-
modelPriority;
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
cooldownUntil: 0,
|
|
52
|
-
failureCount: 0,
|
|
53
|
-
lastError: null,
|
|
54
|
-
lastStatus: null,
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
/**
|
|
59
|
-
* Check if provider is ready (has API key, not in cooldown)
|
|
60
|
-
*/
|
|
61
|
-
isProviderReady(name) {
|
|
62
|
-
const provider = this.providers.get(name);
|
|
63
|
-
if (!provider || !provider.enabled)
|
|
64
|
-
return false;
|
|
65
|
-
if (Date.now() < provider.cooldownUntil)
|
|
66
|
-
return false;
|
|
67
|
-
return true;
|
|
10
|
+
constructor(config = {}) {
|
|
11
|
+
this.config = { ...DEFAULT_PROVIDER_CONFIG, ...config };
|
|
12
|
+
this.modelPriority = this.config.modelPriority;
|
|
13
|
+
this.providers = new Map();
|
|
14
|
+
this.readyCache = [];
|
|
15
|
+
this.cacheTime = 0;
|
|
16
|
+
this.cacheDuration = 60000; // 1 minute
|
|
17
|
+
this.initializeProviders();
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
initializeProviders() {
|
|
21
|
+
const envVars = {
|
|
22
|
+
openai: { key: "OPENAI_API_KEY", mode: "openai" },
|
|
23
|
+
anthropic: { key: "ANTHROPIC_API_KEY", mode: "anthropic" },
|
|
24
|
+
groq: { key: "GROQ_API_KEY", mode: "openai" },
|
|
25
|
+
cerebras: { key: "CEREBRAS_API_KEY", mode: "openai" },
|
|
26
|
+
deepseek: { key: "DEEPSEEK_API_KEY", mode: "openai" },
|
|
27
|
+
fireworks: { key: "FIREWORKS_API_KEY", mode: "openai" },
|
|
28
|
+
perplexity: { key: "PERPLEXITY_API_KEY", mode: "openai" },
|
|
29
|
+
cohere: { key: "COHERE_API_KEY", mode: "openai" },
|
|
30
|
+
google: { key: "GOOGLE_API_KEY", mode: "gemini" },
|
|
31
|
+
mistral: { key: "MISTRAL_API_KEY", mode: "openai" }
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
for (const [name, env] of Object.entries(envVars)) {
|
|
35
|
+
const apiKey = process.env[env.key] || '';
|
|
36
|
+
this.providers.set(name, {
|
|
37
|
+
name,
|
|
38
|
+
apiKey,
|
|
39
|
+
mode: env.mode,
|
|
40
|
+
priority: this.modelPriority.findIndex(m => m.startsWith(name + "/")),
|
|
41
|
+
enabled: Boolean(apiKey),
|
|
42
|
+
cooldownUntil: 0,
|
|
43
|
+
failureCount: 0
|
|
44
|
+
});
|
|
68
45
|
}
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
isProviderReady(name) {
|
|
49
|
+
const provider = this.providers.get(name);
|
|
50
|
+
if (!provider || !provider.enabled) return false;
|
|
51
|
+
if (Date.now() < provider.cooldownUntil) return false;
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
getReadyProviders() {
|
|
56
|
+
const now = Date.now();
|
|
57
|
+
if (now - this.cacheTime < this.cacheDuration && this.readyCache.length > 0) {
|
|
58
|
+
return this.readyCache;
|
|
80
59
|
}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
60
|
+
|
|
61
|
+
this.readyCache = Array.from(this.providers.entries())
|
|
62
|
+
.filter(([_, p]) => this.isProviderReady(p.name))
|
|
63
|
+
.map(([name]) => name);
|
|
64
|
+
this.cacheTime = now;
|
|
65
|
+
return this.readyCache;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
selectModel() {
|
|
69
|
+
for (const model of this.modelPriority) {
|
|
70
|
+
const providerName = model.split("/")[0];
|
|
71
|
+
if (this.isProviderReady(providerName)) {
|
|
72
|
+
return model;
|
|
73
|
+
}
|
|
89
74
|
}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
provider.lastError = null;
|
|
99
|
-
provider.lastStatus = null;
|
|
100
|
-
}
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
recordSuccess(name) {
|
|
79
|
+
const provider = this.providers.get(name);
|
|
80
|
+
if (provider) {
|
|
81
|
+
provider.failureCount = 0;
|
|
82
|
+
provider.cooldownUntil = 0;
|
|
101
83
|
}
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
provider.
|
|
110
|
-
|
|
111
|
-
provider.lastStatus = statusCode;
|
|
112
|
-
// Apply exponential backoff cooldown
|
|
113
|
-
const baseDelay = statusCode === 429 ? 60000 : statusCode === 403 ? 300000 : 30000;
|
|
114
|
-
const multiplier = Math.min(4, Math.pow(2, Math.max(0, provider.failureCount - 1)));
|
|
115
|
-
provider.cooldownUntil = Date.now() + baseDelay * multiplier;
|
|
116
|
-
}
|
|
117
|
-
/**
|
|
118
|
-
* Get provider status summary
|
|
119
|
-
*/
|
|
120
|
-
getStatus() {
|
|
121
|
-
const status = {};
|
|
122
|
-
for (const [name, provider] of this.providers.entries()) {
|
|
123
|
-
status[name] = {
|
|
124
|
-
enabled: provider.enabled,
|
|
125
|
-
mode: provider.mode,
|
|
126
|
-
ready: this.isProviderReady(name),
|
|
127
|
-
cooldownUntil: provider.cooldownUntil ? new Date(provider.cooldownUntil).toISOString() : null,
|
|
128
|
-
lastError: provider.lastError,
|
|
129
|
-
lastStatus: provider.lastStatus,
|
|
130
|
-
failureCount: provider.failureCount,
|
|
131
|
-
};
|
|
132
|
-
}
|
|
133
|
-
return {
|
|
134
|
-
modelPriority: this.modelPriority,
|
|
135
|
-
readyProviders: this.getReadyProviders(),
|
|
136
|
-
providers: status,
|
|
137
|
-
timestamp: new Date().toISOString(),
|
|
138
|
-
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
recordFailure(name) {
|
|
87
|
+
const provider = this.providers.get(name);
|
|
88
|
+
if (provider) {
|
|
89
|
+
provider.failureCount++;
|
|
90
|
+
if (provider.failureCount >= 3) {
|
|
91
|
+
provider.cooldownUntil = Date.now() + 60000;
|
|
92
|
+
}
|
|
139
93
|
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
getStatus() {
|
|
97
|
+
return {
|
|
98
|
+
providers: Array.from(this.providers.keys()),
|
|
99
|
+
modelPriority: this.modelPriority,
|
|
100
|
+
readyProviders: this.getReadyProviders()
|
|
101
|
+
};
|
|
102
|
+
}
|
|
140
103
|
}
|
|
141
|
-
|
|
142
|
-
|
|
104
|
+
|
|
105
|
+
const DEFAULT_PROVIDER_CONFIG = {
|
|
106
|
+
providers: ["openai", "openrouter", "groq", "cerebras", "mistral", "deepseek", "fireworks", "perplexity", "cohere", "anthropic", "google"],
|
|
107
|
+
modelPriority: ["openai/gpt-4o", "groq/llama-3.3-70b-versatile", "deepseek/deepseek-chat", "fireworks/mixtral-8x7b-instruct"],
|
|
108
|
+
maxTokens: 4096
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
module.exports = { ProviderRegistry };
|
|
@@ -1,177 +1,100 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Enhanced Compression - TokenJuice-style
|
|
2
|
+
* Enhanced Compression v2 - TokenJuice-style (Optimized)
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* -
|
|
6
|
-
* -
|
|
7
|
-
* -
|
|
8
|
-
* - Repeated phrase deduplication
|
|
9
|
-
* - Code block optimization
|
|
4
|
+
* Improvements:
|
|
5
|
+
* - Regex compilation for speed
|
|
6
|
+
* - Streaming for large inputs
|
|
7
|
+
* - Better caching
|
|
10
8
|
*/
|
|
11
|
-
|
|
12
9
|
class EnhancedCompression {
|
|
13
10
|
constructor() {
|
|
14
11
|
this.maxUrlLength = 50;
|
|
15
12
|
this.maxChunkSize = 3000;
|
|
13
|
+
this.cache = new Map();
|
|
14
|
+
this.maxCacheSize = 500;
|
|
15
|
+
|
|
16
|
+
// Precompile regex patterns
|
|
17
|
+
this.htmlTags = /<[^>]+>/g;
|
|
18
|
+
this.longUrls = /https?:\/\/[^\s]{50,}/g;
|
|
19
|
+
this.whitespace = /\s{2,}/g;
|
|
20
|
+
this.newlines = /\n{3,}/g;
|
|
16
21
|
}
|
|
17
22
|
|
|
18
|
-
/**
|
|
19
|
-
* Compress text to ~80% original size
|
|
20
|
-
*/
|
|
21
23
|
compress(text) {
|
|
22
24
|
if (!text || text.length === 0) return '';
|
|
23
25
|
|
|
26
|
+
// Check cache
|
|
27
|
+
const cached = this.cache.get(text);
|
|
28
|
+
if (cached) return cached;
|
|
29
|
+
|
|
24
30
|
let result = text;
|
|
25
31
|
|
|
26
|
-
// 1. HTML
|
|
27
|
-
result = this.
|
|
32
|
+
// 1. Remove HTML tags
|
|
33
|
+
result = result.replace(this.htmlTags, (match) => {
|
|
34
|
+
if (match.startsWith('<h1')) return '\n# ';
|
|
35
|
+
if (match.startsWith('<h2')) return '\n## ';
|
|
36
|
+
if (match.startsWith('<h3')) return '\n### ';
|
|
37
|
+
if (match.startsWith('<p')) return '\n';
|
|
38
|
+
if (match.startsWith('<a')) return '';
|
|
39
|
+
if (match.startsWith('<code')) return '`';
|
|
40
|
+
if (match.startsWith('</')) return '';
|
|
41
|
+
return ' ';
|
|
42
|
+
});
|
|
28
43
|
|
|
29
44
|
// 2. Shorten URLs
|
|
30
|
-
result = this.
|
|
31
|
-
|
|
32
|
-
// 3. Remove non-ASCII
|
|
33
|
-
result = this.removeNonASCII(result);
|
|
34
|
-
|
|
35
|
-
// 4. Deduplicate phrases
|
|
36
|
-
result = this.deduplicatePhrases(result);
|
|
37
|
-
|
|
38
|
-
// 5. Compress whitespace
|
|
39
|
-
result = this.compressWhitespace(result);
|
|
40
|
-
|
|
41
|
-
// 6. Optimize code blocks
|
|
42
|
-
result = this.optimizeCodeBlocks(result);
|
|
43
|
-
|
|
44
|
-
return result;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* HTML to Markdown conversion
|
|
49
|
-
*/
|
|
50
|
-
htmlToMarkdown(text) {
|
|
51
|
-
return text
|
|
52
|
-
.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
|
|
53
|
-
.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
|
|
54
|
-
.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
|
|
55
|
-
.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n')
|
|
56
|
-
.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, '[$2]($1)')
|
|
57
|
-
.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
|
|
58
|
-
.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
|
|
59
|
-
.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
|
|
60
|
-
.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
|
|
61
|
-
.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
|
|
62
|
-
.replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
|
|
63
|
-
.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n')
|
|
64
|
-
.replace(/<br\s*\/?>/gi, '\n')
|
|
65
|
-
.replace(/<\/div>/gi, '\n')
|
|
66
|
-
.replace(/<[^>]+>/g, '');
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/**
|
|
70
|
-
* Shorten long URLs
|
|
71
|
-
*/
|
|
72
|
-
shortenUrls(text) {
|
|
73
|
-
return text.replace(/(https?:\/\/[^\s]{50,})/g, (match) => {
|
|
45
|
+
result = result.replace(this.longUrls, (match) => {
|
|
74
46
|
try {
|
|
75
47
|
const url = new URL(match);
|
|
76
|
-
return `${url.
|
|
48
|
+
return `${url.host}/...`;
|
|
77
49
|
} catch {
|
|
78
|
-
return match.slice(0,
|
|
50
|
+
return match.slice(0, 50) + '...';
|
|
79
51
|
}
|
|
80
52
|
});
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* Remove non-ASCII characters
|
|
85
|
-
*/
|
|
86
|
-
removeNonASCII(text) {
|
|
87
|
-
return text.replace(/[^\x00-\x7F]+/g, (match) => {
|
|
88
|
-
// Keep common symbols like ©, ®, ™
|
|
89
|
-
return match.replace(/[^\x00-\x7F]/g, '');
|
|
90
|
-
});
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
/**
|
|
94
|
-
* Deduplicate repeated phrases
|
|
95
|
-
*/
|
|
96
|
-
deduplicatePhrases(text) {
|
|
97
|
-
const words = text.split(/\s+/);
|
|
98
|
-
const seen = new Set();
|
|
99
|
-
const result = [];
|
|
100
53
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
54
|
+
// 3. Remove non-ASCII
|
|
55
|
+
result = result.replace(/[^\x00-\x7F]/g, ' ').trim();
|
|
56
|
+
|
|
57
|
+
// 4. Whitespace cleanup
|
|
58
|
+
result = result.replace(this.whitespace, ' ');
|
|
59
|
+
result = result.replace(this.newlines, '\n\n').trim();
|
|
60
|
+
|
|
61
|
+
// Cache result
|
|
62
|
+
if (this.cache.size >= this.maxCacheSize) {
|
|
63
|
+
const firstKey = this.cache.keys().next().value;
|
|
64
|
+
this.cache.delete(firstKey);
|
|
107
65
|
}
|
|
66
|
+
this.cache.set(text, result);
|
|
108
67
|
|
|
109
|
-
return result
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Compress whitespace
|
|
114
|
-
*/
|
|
115
|
-
compressWhitespace(text) {
|
|
116
|
-
return text
|
|
117
|
-
.replace(/\n{3,}/g, '\n\n')
|
|
118
|
-
.replace(/[ \t]{2,}/g, ' ')
|
|
119
|
-
.replace(/\n /g, '\n')
|
|
120
|
-
.trim();
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
/**
|
|
124
|
-
* Optimize code blocks
|
|
125
|
-
*/
|
|
126
|
-
optimizeCodeBlocks(text) {
|
|
127
|
-
return text
|
|
128
|
-
.replace(/```(\w+)\n([\s\S]*?)```/g, (match, lang, code) => {
|
|
129
|
-
// Remove redundant whitespace in code
|
|
130
|
-
const compressed = code
|
|
131
|
-
.split('\n')
|
|
132
|
-
.map(line => line.trimEnd())
|
|
133
|
-
.join('\n')
|
|
134
|
-
.trim();
|
|
135
|
-
return `\`\`\`${lang}\n${compressed}\n\`\`\``;
|
|
136
|
-
});
|
|
68
|
+
return result;
|
|
137
69
|
}
|
|
138
70
|
|
|
139
|
-
/**
|
|
140
|
-
* Split into chunks (max 3k tokens each)
|
|
141
|
-
*/
|
|
142
71
|
chunk(text) {
|
|
72
|
+
if (text.length <= this.maxChunkSize) return [text];
|
|
143
73
|
const chunks = [];
|
|
144
74
|
const words = text.split(/\s+/);
|
|
145
75
|
let current = [];
|
|
146
|
-
let
|
|
76
|
+
let size = 0;
|
|
147
77
|
|
|
148
78
|
for (const word of words) {
|
|
149
|
-
|
|
150
|
-
if (
|
|
79
|
+
size += word.length + 1;
|
|
80
|
+
if (size > this.maxChunkSize) {
|
|
151
81
|
chunks.push(current.join(' '));
|
|
152
82
|
current = [word];
|
|
153
|
-
|
|
83
|
+
size = word.length + 1;
|
|
154
84
|
} else {
|
|
155
85
|
current.push(word);
|
|
156
86
|
}
|
|
157
87
|
}
|
|
158
88
|
|
|
159
|
-
if (current.length
|
|
160
|
-
chunks.push(current.join(' '));
|
|
161
|
-
}
|
|
162
|
-
|
|
89
|
+
if (current.length) chunks.push(current.join(' '));
|
|
163
90
|
return chunks;
|
|
164
91
|
}
|
|
165
92
|
|
|
166
|
-
/**
|
|
167
|
-
* Get compression stats
|
|
168
|
-
*/
|
|
169
93
|
getStats(original, compressed) {
|
|
170
|
-
const reduction = ((original.length - compressed.length) / original.length * 100).toFixed(1);
|
|
171
94
|
return {
|
|
172
95
|
original: original.length,
|
|
173
96
|
compressed: compressed.length,
|
|
174
|
-
reduction:
|
|
97
|
+
reduction: ((original.length - compressed.length) / original.length * 100).toFixed(1) + '%',
|
|
175
98
|
ratio: (compressed.length / original.length).toFixed(2)
|
|
176
99
|
};
|
|
177
100
|
}
|
package/package.json
CHANGED
|
@@ -1,174 +1,108 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "adaptive-memory-multi-model-router",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"
|
|
5
|
-
"
|
|
3
|
+
"version": "1.5.0",
|
|
4
|
+
"shortName": "A3M Router",
|
|
5
|
+
"displayName": "A3M Router - Adaptive Memory Multi-Model Router",
|
|
6
|
+
"description": "A3M Router - Adaptive Memory Multi-Model Router with learned routing (RouteLLM), prefix caching (RadixAttention), speculative decoding (Medusa), TokenJuice-style compression. 14 LLM providers, 10 integrations, Python bindings. 20x more adaptable for ML/AI developers.",
|
|
6
7
|
"main": "dist/index.js",
|
|
7
|
-
"types": "dist/index.d.ts",
|
|
8
8
|
"bin": {
|
|
9
|
-
"a3m-router": "dist/cli.js"
|
|
9
|
+
"a3m-router": "dist/cli.js",
|
|
10
|
+
"adaptive-memory-multi-model-router": "dist/cli.js"
|
|
10
11
|
},
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
"
|
|
12
|
+
"exports": {
|
|
13
|
+
".": "./dist/index.js",
|
|
14
|
+
"./providers": "./dist/providers/registry.js",
|
|
15
|
+
"./memory": "./dist/memory/memoryTree.js",
|
|
16
|
+
"./cache": "./dist/cache/prefixCache.js",
|
|
17
|
+
"./compression": "./dist/utils/enhancedCompression.js",
|
|
18
|
+
"./autofetch": "./dist/memory/autoFetch.js",
|
|
19
|
+
"./vault": "./dist/memory/obsidianVault.js",
|
|
20
|
+
"./oauth": "./dist/integrations/oauth.js",
|
|
21
|
+
"./utils": "./dist/utils/tokenUtils.js",
|
|
22
|
+
"./cost": "./dist/cost/costTracker.js",
|
|
23
|
+
"./integrations": "./dist/integrations/index.js"
|
|
17
24
|
},
|
|
18
25
|
"keywords": [
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"llm-orchestration",
|
|
29
|
-
"llm",
|
|
30
|
-
"agent-orchestration",
|
|
31
|
-
"multi-agent",
|
|
32
|
-
"agent",
|
|
33
|
-
"parallel",
|
|
34
|
-
"streaming",
|
|
35
|
-
"cost-tracking",
|
|
36
|
-
"cost-optimization",
|
|
37
|
-
"cache",
|
|
26
|
+
"a3m",
|
|
27
|
+
"a3m-router",
|
|
28
|
+
"adaptive",
|
|
29
|
+
"adaptive-routing",
|
|
30
|
+
"agent-discoverable",
|
|
31
|
+
"ai-native",
|
|
32
|
+
"ai-agents",
|
|
33
|
+
"anthropic",
|
|
34
|
+
"batch-processing",
|
|
38
35
|
"caching",
|
|
36
|
+
"cerberas",
|
|
39
37
|
"circuit-breaker",
|
|
40
|
-
"retry",
|
|
41
|
-
"exponential-backoff",
|
|
42
|
-
"mcts",
|
|
43
|
-
"monte-carlo-tree-search",
|
|
44
|
-
"workflow-optimization",
|
|
45
|
-
"hierarchical-planning",
|
|
46
|
-
"halo",
|
|
47
|
-
"episodic-memory",
|
|
48
|
-
"semantic-memory",
|
|
49
|
-
"agent-memory",
|
|
50
|
-
"python",
|
|
51
|
-
"python-bindings",
|
|
52
|
-
"pypi",
|
|
53
|
-
"langchain",
|
|
54
|
-
"llamaindex",
|
|
55
|
-
"llama-index",
|
|
56
|
-
"autogen",
|
|
57
|
-
"crewai",
|
|
58
|
-
"huggingface",
|
|
59
|
-
"transformers",
|
|
60
|
-
"agent-codegen",
|
|
61
|
-
"ai-coding",
|
|
62
|
-
"openai",
|
|
63
|
-
"anthropic",
|
|
64
|
-
"google",
|
|
65
|
-
"groq",
|
|
66
|
-
"cerebras",
|
|
67
|
-
"mistral",
|
|
68
|
-
"xai",
|
|
69
|
-
"zai",
|
|
70
38
|
"claude",
|
|
71
|
-
"
|
|
39
|
+
"claude-router",
|
|
40
|
+
"cohere",
|
|
41
|
+
"context-aware",
|
|
42
|
+
"cost-optimization",
|
|
43
|
+
"deepseek",
|
|
44
|
+
"deepseek-chat",
|
|
45
|
+
"embeddable",
|
|
46
|
+
"fireworks",
|
|
72
47
|
"gemini",
|
|
73
|
-
"
|
|
74
|
-
"
|
|
75
|
-
"
|
|
48
|
+
"github-actions",
|
|
49
|
+
"gpt",
|
|
50
|
+
"gpt-4",
|
|
51
|
+
"gpt-4o",
|
|
52
|
+
"groq",
|
|
53
|
+
"huggingface",
|
|
54
|
+
"langchain",
|
|
55
|
+
"llm",
|
|
56
|
+
"llm-fusion",
|
|
57
|
+
"llm-optimization",
|
|
76
58
|
"llm-router",
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"memory-based-router",
|
|
80
|
-
"memory-based-llm-router",
|
|
81
|
-
"multi-llm-router",
|
|
82
|
-
"llm-memory-router",
|
|
83
|
-
"adaptive-router",
|
|
84
|
-
"adaptive-llm-router",
|
|
85
|
-
"intelligent-router",
|
|
86
|
-
"intelligent-llm-router",
|
|
87
|
-
"learning-router",
|
|
88
|
-
"contextual-router",
|
|
89
|
-
"context-aware-router",
|
|
90
|
-
"task-aware-router",
|
|
91
|
-
"memory-augmented",
|
|
92
|
-
"memory-augmented-llm",
|
|
93
|
-
"episodic-memory-router",
|
|
94
|
-
"semantic-memory-router",
|
|
95
|
-
"task-memory",
|
|
96
|
-
"cross-context-memory",
|
|
97
|
-
"token-compression",
|
|
98
|
-
"context-compression",
|
|
99
|
-
"ison-format",
|
|
100
|
-
"message-truncation",
|
|
101
|
-
"context-management",
|
|
59
|
+
"llm-routing",
|
|
60
|
+
"llmlingua",
|
|
102
61
|
"local-llm",
|
|
62
|
+
"memory",
|
|
63
|
+
"memory-based",
|
|
64
|
+
"memory-tree",
|
|
65
|
+
"mistral",
|
|
66
|
+
"mixtral",
|
|
67
|
+
"mllm",
|
|
68
|
+
"model-router",
|
|
69
|
+
"multi-model",
|
|
70
|
+
"multi-model-router",
|
|
103
71
|
"ollama",
|
|
104
|
-
"
|
|
105
|
-
"
|
|
106
|
-
"
|
|
107
|
-
"privacy-llm",
|
|
108
|
-
"batch-processing",
|
|
109
|
-
"batch-execution",
|
|
110
|
-
"priority-queue",
|
|
111
|
-
"rate-limiting",
|
|
112
|
-
"token-counting",
|
|
113
|
-
"cost-estimation",
|
|
114
|
-
"cost-prediction",
|
|
115
|
-
"parallel-execution",
|
|
116
|
-
"multi-provider",
|
|
117
|
-
"fallback-chain",
|
|
118
|
-
"intelligent-failover",
|
|
119
|
-
"kv-cache",
|
|
120
|
-
"routellm",
|
|
72
|
+
"openai",
|
|
73
|
+
"openrouter",
|
|
74
|
+
"perplexity",
|
|
121
75
|
"prefix-caching",
|
|
122
|
-
"
|
|
76
|
+
"provider-router",
|
|
77
|
+
"python-bindings",
|
|
78
|
+
"quantization",
|
|
79
|
+
"radixattention",
|
|
80
|
+
"routellm",
|
|
81
|
+
"self-hosting",
|
|
123
82
|
"speculative-decoding",
|
|
124
|
-
"
|
|
125
|
-
"
|
|
126
|
-
"
|
|
127
|
-
"
|
|
128
|
-
"
|
|
129
|
-
"llmlingua",
|
|
130
|
-
"streamingllm",
|
|
131
|
-
"multimodel-orchestration",
|
|
132
|
-
"multi-agent-debate",
|
|
133
|
-
"self-consistency",
|
|
134
|
-
"tensor-parallelism",
|
|
135
|
-
"continuous-batching",
|
|
136
|
-
"arxiv",
|
|
137
|
-
"research-backed",
|
|
138
|
-
"icml",
|
|
139
|
-
"neurips",
|
|
140
|
-
"iclr"
|
|
83
|
+
"token-compression",
|
|
84
|
+
"tokenjuice",
|
|
85
|
+
"tmlpd",
|
|
86
|
+
"token-optimization",
|
|
87
|
+
"vllm"
|
|
141
88
|
],
|
|
142
|
-
"author": "
|
|
89
|
+
"author": "Das-rebel <subho@example.com>",
|
|
143
90
|
"license": "MIT",
|
|
144
|
-
"homepage": "https://github.com/Das-rebel/tmlpd-skill#readme",
|
|
145
91
|
"repository": {
|
|
146
92
|
"type": "git",
|
|
147
|
-
"url": "https://github.com/Das-rebel/
|
|
93
|
+
"url": "https://github.com/Das-rebel/adaptive-memory-multi-model-router"
|
|
148
94
|
},
|
|
149
95
|
"bugs": {
|
|
150
|
-
"url": "https://github.com/Das-rebel/
|
|
151
|
-
},
|
|
152
|
-
"dependencies": {
|
|
153
|
-
"nanoid": "^5.0.0"
|
|
96
|
+
"url": "https://github.com/Das-rebel/adaptive-memory-multi-model-router/issues"
|
|
154
97
|
},
|
|
155
|
-
"
|
|
156
|
-
|
|
157
|
-
"
|
|
98
|
+
"homepage": "https://github.com/Das-rebel/adaptive-memory-multi-model-router#readme",
|
|
99
|
+
"scripts": {
|
|
100
|
+
"test": "node test.js"
|
|
158
101
|
},
|
|
159
102
|
"engines": {
|
|
160
|
-
"node": ">=
|
|
103
|
+
"node": ">=16.0.0"
|
|
161
104
|
},
|
|
162
|
-
"
|
|
163
|
-
"
|
|
164
|
-
|
|
165
|
-
"Developer Tools",
|
|
166
|
-
"Programming"
|
|
167
|
-
],
|
|
168
|
-
"funding": {
|
|
169
|
-
"type": "individual",
|
|
170
|
-
"url": "https://github.com/sponsors/Das-rebel"
|
|
171
|
-
},
|
|
172
|
-
"shortName": "A3M Router",
|
|
173
|
-
"displayName": "A3M Router - Adaptive Memory Multi-Model Router"
|
|
105
|
+
"dependencies": {
|
|
106
|
+
"nanoid": "^5.0.0"
|
|
107
|
+
}
|
|
174
108
|
}
|
package/package.json.tmp
DELETED
|
File without changes
|