lynkr 9.3.3 → 9.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.impeccable/live/config.json +8 -0
- package/README.md +162 -181
- package/benchmark-configs/litellm_config.yaml +86 -0
- package/benchmark-configs/lynkr.env +48 -0
- package/benchmark-configs/portkey-config.json +60 -0
- package/benchmark-configs/portkey-docker.sh +23 -0
- package/benchmark-tier-routing.js +449 -0
- package/package.json +30 -8
- package/src/api/router.js +12 -0
- package/src/cache/semantic.js +5 -2
- package/src/context/toon.js +14 -9
- package/src/dashboard/router.js +1 -1
- package/src/tools/smart-selection.js +104 -281
package/README.md
CHANGED
|
@@ -1,22 +1,27 @@
|
|
|
1
1
|
# Lynkr
|
|
2
2
|
|
|
3
|
-
###
|
|
3
|
+
### The AI coding proxy that compresses tokens before they hit the model.
|
|
4
|
+
|
|
5
|
+
**87.6% fewer tokens on JSON tool results. 53% fewer tokens on tool-heavy requests. 171ms semantic cache hits. Zero code changes.**
|
|
4
6
|
|
|
5
7
|
[](https://www.npmjs.com/package/lynkr)
|
|
6
8
|
[](https://github.com/Fast-Editor/Lynkr)
|
|
7
9
|
[](LICENSE)
|
|
8
10
|
[](https://nodejs.org)
|
|
9
|
-
[](https://deepwiki.com/
|
|
11
|
+
[](https://deepwiki.com/Fast-Editor/Lynkr)
|
|
10
12
|
|
|
11
13
|
<table>
|
|
12
14
|
<tr>
|
|
13
|
-
<td align="center"><strong>
|
|
14
|
-
<td align="center"><strong>
|
|
15
|
-
<td align="center"><strong>
|
|
15
|
+
<td align="center"><strong>87.6%</strong><br/>JSON Compression</td>
|
|
16
|
+
<td align="center"><strong>53%</strong><br/>Tool Token Reduction</td>
|
|
17
|
+
<td align="center"><strong>171ms</strong><br/>Semantic Cache Hits</td>
|
|
18
|
+
<td align="center"><strong>13+</strong><br/>LLM Providers</td>
|
|
16
19
|
<td align="center"><strong>0</strong><br/>Code Changes Required</td>
|
|
17
20
|
</tr>
|
|
18
21
|
</table>
|
|
19
22
|
|
|
23
|
+
> Numbers from a live benchmark against LiteLLM on identical workloads. [See full report →](BENCHMARK_REPORT.md)
|
|
24
|
+
|
|
20
25
|
---
|
|
21
26
|
|
|
22
27
|
## Quick Start (2 Minutes)
|
|
@@ -27,201 +32,63 @@
|
|
|
27
32
|
npm install -g lynkr
|
|
28
33
|
```
|
|
29
34
|
|
|
30
|
-
### 2. Configure
|
|
35
|
+
### 2. Configure Lynkr
|
|
31
36
|
|
|
32
|
-
|
|
37
|
+
First run creates a `.env` file. Edit it with your provider settings.
|
|
33
38
|
|
|
34
|
-
|
|
39
|
+
**Option A: Free & Local (Ollama) - Recommended for Testing**
|
|
35
40
|
|
|
36
41
|
```bash
|
|
42
|
+
# Install Ollama first: https://ollama.com
|
|
37
43
|
ollama pull qwen2.5-coder:latest
|
|
38
|
-
ollama serve
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
**Step 2b: Set environment variables**
|
|
42
|
-
|
|
43
|
-
**Windows (Command Prompt):**
|
|
44
|
-
```cmd
|
|
45
|
-
set MODEL_PROVIDER=ollama
|
|
46
|
-
set FALLBACK_ENABLED=false
|
|
47
|
-
set OLLAMA_MODEL=qwen2.5-coder:latest
|
|
48
|
-
set OLLAMA_ENDPOINT=http://localhost:11434
|
|
49
|
-
set PORT=8081
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
**Windows (PowerShell):**
|
|
53
|
-
```powershell
|
|
54
|
-
$env:MODEL_PROVIDER="ollama"
|
|
55
|
-
$env:FALLBACK_ENABLED="false"
|
|
56
|
-
$env:OLLAMA_MODEL="qwen2.5-coder:latest"
|
|
57
|
-
$env:OLLAMA_ENDPOINT="http://localhost:11434"
|
|
58
|
-
$env:PORT="8081"
|
|
59
44
|
```
|
|
60
45
|
|
|
61
|
-
|
|
46
|
+
Create/edit `.env` in your project directory:
|
|
62
47
|
```bash
|
|
63
|
-
|
|
64
|
-
export FALLBACK_ENABLED=false
|
|
65
|
-
export OLLAMA_MODEL=qwen2.5-coder:latest
|
|
66
|
-
export OLLAMA_ENDPOINT=http://localhost:11434
|
|
67
|
-
export PORT=8081
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
**Alternative: Use a .env file (recommended for permanent config)**
|
|
71
|
-
|
|
72
|
-
Create a file named `.env` in your home directory or project folder:
|
|
73
|
-
|
|
74
|
-
```bash
|
|
75
|
-
# Required: Provider Configuration
|
|
48
|
+
# Provider
|
|
76
49
|
MODEL_PROVIDER=ollama
|
|
77
50
|
FALLBACK_ENABLED=false
|
|
78
51
|
|
|
79
|
-
#
|
|
80
|
-
OLLAMA_MODEL=qwen2.5-coder:latest
|
|
52
|
+
# Ollama Configuration
|
|
81
53
|
OLLAMA_ENDPOINT=http://localhost:11434
|
|
54
|
+
OLLAMA_MODEL=qwen2.5-coder:latest
|
|
82
55
|
|
|
83
|
-
#
|
|
56
|
+
# Server
|
|
84
57
|
PORT=8081
|
|
85
|
-
HOST=0.0.0.0
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
### 3. Start Lynkr
|
|
89
58
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
You should see:
|
|
95
|
-
```
|
|
96
|
-
[Ollama] Server ready, model "qwen2.5-coder:latest" available
|
|
97
|
-
Claude→Databricks proxy listening on http://localhost:8081
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
### 4. Connect Claude Code (or your AI tool)
|
|
101
|
-
|
|
102
|
-
**Windows (Command Prompt):**
|
|
103
|
-
```cmd
|
|
104
|
-
set ANTHROPIC_BASE_URL=http://localhost:8081
|
|
105
|
-
set ANTHROPIC_API_KEY=dummy
|
|
106
|
-
claude "write hello world in python"
|
|
107
|
-
```
|
|
59
|
+
# Optional: Limits (remove for unlimited)
|
|
60
|
+
POLICY_MAX_STEPS=50
|
|
61
|
+
POLICY_MAX_TOOL_CALLS=100
|
|
108
62
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
export ANTHROPIC_BASE_URL=http://localhost:8081
|
|
112
|
-
export ANTHROPIC_API_KEY=dummy
|
|
113
|
-
claude "write hello world in python"
|
|
63
|
+
# Disable overly strict command filtering
|
|
64
|
+
POLICY_SAFE_COMMANDS_ENABLED=false
|
|
114
65
|
```
|
|
115
66
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
---
|
|
119
|
-
|
|
120
|
-
## Alternative Setup: Cloud Provider (OpenRouter)
|
|
121
|
-
|
|
122
|
-
Get API key from https://openrouter.ai, then:
|
|
123
|
-
|
|
124
|
-
**Windows (Command Prompt):**
|
|
125
|
-
```cmd
|
|
126
|
-
set MODEL_PROVIDER=openrouter
|
|
127
|
-
set OPENROUTER_API_KEY=sk-or-v1-your-key-here
|
|
128
|
-
set FALLBACK_ENABLED=false
|
|
129
|
-
set PORT=8081
|
|
130
|
-
lynkr start
|
|
131
|
-
```
|
|
67
|
+
**Option B: Cloud (OpenRouter) - Recommended for Production**
|
|
132
68
|
|
|
133
|
-
**Linux/macOS:**
|
|
134
69
|
```bash
|
|
135
|
-
|
|
136
|
-
export OPENROUTER_API_KEY=sk-or-v1-your-key-here
|
|
137
|
-
export FALLBACK_ENABLED=false
|
|
138
|
-
export PORT=8081
|
|
139
|
-
lynkr start
|
|
70
|
+
# Get API key from https://openrouter.ai
|
|
140
71
|
```
|
|
141
72
|
|
|
142
|
-
|
|
73
|
+
Create/edit `.env`:
|
|
143
74
|
```bash
|
|
75
|
+
# Provider
|
|
144
76
|
MODEL_PROVIDER=openrouter
|
|
145
77
|
OPENROUTER_API_KEY=sk-or-v1-your-key-here
|
|
146
78
|
FALLBACK_ENABLED=false
|
|
147
|
-
PORT=8081
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
---
|
|
151
|
-
|
|
152
|
-
## Common Startup Errors (READ THIS IF IT DOESN'T WORK!)
|
|
153
|
-
|
|
154
|
-
### Error: `unable to determine transport target for "pino-pretty"`
|
|
155
|
-
|
|
156
|
-
**Problem:** This error happens on Windows or when NODE_ENV is not set.
|
|
157
|
-
|
|
158
|
-
**Solution:** Set NODE_ENV before starting:
|
|
159
|
-
|
|
160
|
-
**Windows (Command Prompt):**
|
|
161
|
-
```cmd
|
|
162
|
-
set NODE_ENV=production
|
|
163
|
-
lynkr start
|
|
164
|
-
```
|
|
165
|
-
|
|
166
|
-
**Windows (PowerShell):**
|
|
167
|
-
```powershell
|
|
168
|
-
$env:NODE_ENV="production"
|
|
169
|
-
lynkr start
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
**Linux/macOS:**
|
|
173
|
-
```bash
|
|
174
|
-
export NODE_ENV=production
|
|
175
|
-
lynkr start
|
|
176
|
-
```
|
|
177
|
-
|
|
178
|
-
Or add to your `.env` file:
|
|
179
|
-
```bash
|
|
180
|
-
NODE_ENV=production
|
|
181
|
-
```
|
|
182
|
-
|
|
183
|
-
### Warning: `Missing tier configuration: TIER_SIMPLE, TIER_MEDIUM...`
|
|
184
|
-
|
|
185
|
-
**This is just a warning - you can ignore it.** Tier routing is optional for advanced use.
|
|
186
|
-
|
|
187
|
-
To remove the warning, add these to your environment or `.env`:
|
|
188
|
-
|
|
189
|
-
```bash
|
|
190
|
-
TIER_SIMPLE=ollama:qwen2.5-coder:latest
|
|
191
|
-
TIER_MEDIUM=ollama:qwen2.5-coder:latest
|
|
192
|
-
TIER_COMPLEX=ollama:qwen2.5-coder:latest
|
|
193
|
-
TIER_REASONING=ollama:qwen2.5-coder:latest
|
|
194
|
-
```
|
|
195
|
-
|
|
196
|
-
(Use the same model for all tiers if you only have one model)
|
|
197
|
-
|
|
198
|
-
### Warning: `FALLBACK_PROVIDER='databricks' is enabled but missing credentials`
|
|
199
79
|
|
|
200
|
-
|
|
80
|
+
# Server
|
|
81
|
+
PORT=8081
|
|
201
82
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
```
|
|
83
|
+
# Optional: Limits (remove for unlimited)
|
|
84
|
+
POLICY_MAX_STEPS=50
|
|
85
|
+
POLICY_MAX_TOOL_CALLS=100
|
|
206
86
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
87
|
+
# Optional: Enable caching
|
|
88
|
+
PROMPT_CACHE_ENABLED=true
|
|
89
|
+
SEMANTIC_CACHE_ENABLED=true
|
|
210
90
|
```
|
|
211
91
|
|
|
212
|
-
### Error: `Ollama connection refused`
|
|
213
|
-
|
|
214
|
-
**Problem:** Ollama is not running.
|
|
215
|
-
|
|
216
|
-
**Solution:**
|
|
217
|
-
1. Check if Ollama is running: `ollama list`
|
|
218
|
-
2. Start Ollama: `ollama serve` (or restart the Ollama app)
|
|
219
|
-
3. Verify model is pulled: `ollama pull qwen2.5-coder:latest`
|
|
220
|
-
|
|
221
|
-
---
|
|
222
|
-
|
|
223
|
-
## Additional Provider Examples
|
|
224
|
-
|
|
225
92
|
**Option C: Enterprise (AWS Bedrock)**
|
|
226
93
|
|
|
227
94
|
Create/edit `.env`:
|
|
@@ -259,6 +126,7 @@ POLICY_MAX_TOOL_CALLS=100
|
|
|
259
126
|
```
|
|
260
127
|
|
|
261
128
|
Then start Lynkr:
|
|
129
|
+
|
|
262
130
|
```bash
|
|
263
131
|
lynkr start
|
|
264
132
|
```
|
|
@@ -266,6 +134,15 @@ lynkr start
|
|
|
266
134
|
### 3. Connect Your Tool
|
|
267
135
|
|
|
268
136
|
**Claude Code**
|
|
137
|
+
|
|
138
|
+
**Windows (Command Prompt):**
|
|
139
|
+
```cmd
|
|
140
|
+
set ANTHROPIC_BASE_URL=http://localhost:8081
|
|
141
|
+
set ANTHROPIC_API_KEY=dummy
|
|
142
|
+
claude "write a hello world in python"
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
**Linux/macOS:**
|
|
269
146
|
```bash
|
|
270
147
|
export ANTHROPIC_BASE_URL=http://localhost:8081
|
|
271
148
|
export ANTHROPIC_API_KEY=dummy
|
|
@@ -292,23 +169,87 @@ wire_api = "responses"
|
|
|
292
169
|
|
|
293
170
|
---
|
|
294
171
|
|
|
172
|
+
## Common Startup Errors
|
|
173
|
+
|
|
174
|
+
### Error: `unable to determine transport target for "pino-pretty"`
|
|
175
|
+
|
|
176
|
+
**Problem:** You're running an older version (< 9.3.0).
|
|
177
|
+
|
|
178
|
+
**Solution:** Update to the latest version:
|
|
179
|
+
```bash
|
|
180
|
+
npm install -g lynkr@latest
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
If you must use an older version, set `NODE_ENV=production` before starting.
|
|
184
|
+
|
|
185
|
+
### Warning: `Missing tier configuration: TIER_SIMPLE, TIER_MEDIUM...`
|
|
186
|
+
|
|
187
|
+
**This is just a warning - you can ignore it.** Tier routing is optional.
|
|
188
|
+
|
|
189
|
+
To remove the warning, add to `.env`:
|
|
190
|
+
```bash
|
|
191
|
+
TIER_SIMPLE=ollama:qwen2.5-coder:latest
|
|
192
|
+
TIER_MEDIUM=ollama:qwen2.5-coder:latest
|
|
193
|
+
TIER_COMPLEX=ollama:qwen2.5-coder:latest
|
|
194
|
+
TIER_REASONING=ollama:qwen2.5-coder:latest
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Warning: `FALLBACK_PROVIDER='databricks' is enabled but missing credentials`
|
|
198
|
+
|
|
199
|
+
**Solution:** Add to `.env`:
|
|
200
|
+
```bash
|
|
201
|
+
FALLBACK_ENABLED=false
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Error: `connect ECONNREFUSED ::1:11434` (Ollama)
|
|
205
|
+
|
|
206
|
+
**Problem:** Ollama is not running.
|
|
207
|
+
|
|
208
|
+
**Solution:**
|
|
209
|
+
```bash
|
|
210
|
+
ollama serve
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Keep this terminal open, and start Lynkr in a new terminal.
|
|
214
|
+
|
|
215
|
+
### Error: `Connection refused` or `404 Not Found`
|
|
216
|
+
|
|
217
|
+
**Problem:** Lynkr is not running or wrong port.
|
|
218
|
+
|
|
219
|
+
**Solution:** Check Lynkr is running on the correct port:
|
|
220
|
+
```bash
|
|
221
|
+
curl http://localhost:8081/
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
Should return: `{"service":"Lynkr","version":"9.x.x","status":"running"}`
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
295
228
|
## Why Lynkr?
|
|
296
229
|
|
|
297
|
-
AI coding tools lock you into one provider. Lynkr breaks
|
|
230
|
+
AI coding tools lock you into one provider and send every token raw. Lynkr breaks both locks.
|
|
298
231
|
|
|
299
232
|
```
|
|
300
233
|
Claude Code / Cursor / Codex / Cline / Continue
|
|
301
234
|
↓
|
|
302
235
|
Lynkr
|
|
236
|
+
┌─────────────────────┐
|
|
237
|
+
│ Strip unused tools │ ← 53% fewer tokens on tool calls
|
|
238
|
+
│ Compress JSON blobs │ ← 87.6% on large tool results
|
|
239
|
+
│ Semantic cache │ ← 171ms hits, 0 tokens billed
|
|
240
|
+
│ Route by complexity │ ← cheap model for simple, cloud for hard
|
|
241
|
+
└─────────────────────┘
|
|
303
242
|
↓
|
|
304
|
-
Ollama | Bedrock | Azure | OpenRouter | OpenAI
|
|
243
|
+
Ollama | Bedrock | Azure | Moonshot | OpenRouter | OpenAI
|
|
305
244
|
```
|
|
306
245
|
|
|
307
246
|
**What you get:**
|
|
308
|
-
- ✅
|
|
247
|
+
- ✅ **53% fewer tokens** on tool-heavy requests (Claude Code, Cursor sessions)
|
|
248
|
+
- ✅ **87.6% compression** on large JSON tool results (grep, file reads, test output)
|
|
249
|
+
- ✅ **Semantic cache** serves repeated queries in 171ms with 0 tokens billed
|
|
250
|
+
- ✅ **Automatic tier routing** — simple questions go to cheap models, complex ones escalate
|
|
309
251
|
- ✅ Route through **your company's infrastructure** (Databricks, Azure, Bedrock)
|
|
310
|
-
- ✅
|
|
311
|
-
- ✅ **Zero code changes** - just change one environment variable
|
|
252
|
+
- ✅ **Zero code changes** — just change one environment variable
|
|
312
253
|
|
|
313
254
|
---
|
|
314
255
|
|
|
@@ -670,7 +611,7 @@ curl -fsSL https://raw.githubusercontent.com/Fast-Editor/Lynkr/main/install.sh |
|
|
|
670
611
|
|
|
671
612
|
**Homebrew**
|
|
672
613
|
```bash
|
|
673
|
-
brew tap
|
|
614
|
+
brew tap fast-editor/lynkr
|
|
674
615
|
brew install lynkr
|
|
675
616
|
```
|
|
676
617
|
|
|
@@ -709,6 +650,42 @@ npm start
|
|
|
709
650
|
|
|
710
651
|
---
|
|
711
652
|
|
|
653
|
+
## Benchmark Results
|
|
654
|
+
|
|
655
|
+
Measured on real agentic coding workloads (Claude Code / Cursor sessions) with Ollama, Moonshot, and Azure OpenAI backends. Run with `node benchmark-tier-routing.js`.
|
|
656
|
+
|
|
657
|
+
### Token compression
|
|
658
|
+
|
|
659
|
+
| Scenario | Tokens without Lynkr | Tokens with Lynkr | Reduction |
|
|
660
|
+
|---|---|---|---|
|
|
661
|
+
| 14-tool request (read task) | 1,042 | **547** | **47%** |
|
|
662
|
+
| 14-tool request (write task) | 1,043 | **412** | **60%** |
|
|
663
|
+
| Large JSON grep result (60 items) | 3,458 | **427** | **87.6%** |
|
|
664
|
+
|
|
665
|
+
Lynkr strips irrelevant tool schemas before forwarding (smart tool selection) and binary-compresses large JSON tool results (TOON) — both happen in-process with no added latency.
|
|
666
|
+
|
|
667
|
+
### Semantic cache
|
|
668
|
+
|
|
669
|
+
| | Tokens billed | Response time |
|
|
670
|
+
|---|---|---|
|
|
671
|
+
| First call (cold) | 2,857 | 1,891ms |
|
|
672
|
+
| **Second call — paraphrased, cache hit** | **0** | **171ms** |
|
|
673
|
+
|
|
674
|
+
Near-identical prompts return cached responses in 171ms. Zero tokens billed on a cache hit.
|
|
675
|
+
|
|
676
|
+
### Tier routing
|
|
677
|
+
|
|
678
|
+
| Request | Routed to |
|
|
679
|
+
|---|---|
|
|
680
|
+
| "What does git stash do?" | SIMPLE → local model (free) |
|
|
681
|
+
| JWT vs cookies security analysis | COMPLEX → cloud model (correct) |
|
|
682
|
+
|
|
683
|
+
Lynkr scores each request on 15 dimensions (token count, code complexity, reasoning markers, risk signals, agentic patterns) and routes automatically. No caller changes needed.
|
|
684
|
+
|
|
685
|
+
→ [Full benchmark report with methodology](BENCHMARK_REPORT.md)
|
|
686
|
+
|
|
687
|
+
---
|
|
688
|
+
|
|
712
689
|
## Cost Comparison
|
|
713
690
|
|
|
714
691
|
| Scenario | Direct Anthropic | Lynkr + Ollama | Lynkr + OpenRouter |
|
|
@@ -716,7 +693,7 @@ npm start
|
|
|
716
693
|
| Daily coding (8h) | $10-30/day | **$0 (free)** | $2-8/day |
|
|
717
694
|
| Monthly (heavy use) | $300-900 | **$0** | $60-240 |
|
|
718
695
|
|
|
719
|
-
With tier routing + token optimization: **additional
|
|
696
|
+
With tier routing + token optimization: **additional 50-87% savings** on cloud providers depending on workload.
|
|
720
697
|
|
|
721
698
|
---
|
|
722
699
|
|
|
@@ -727,13 +704,17 @@ With tier routing + token optimization: **additional 60-80% savings** on cloud p
|
|
|
727
704
|
| **Setup** | `npm install -g lynkr` | Python + Docker + Postgres | Account signup | Docker stack |
|
|
728
705
|
| **Claude Code native** | ✅ Drop-in | ⚠️ Requires config | ❌ | ⚠️ Partial |
|
|
729
706
|
| **Cursor native** | ✅ Drop-in | ⚠️ Partial | ❌ | ⚠️ Partial |
|
|
730
|
-
| **Local models** | Ollama, llama.cpp, LM Studio
|
|
731
|
-
| **
|
|
732
|
-
| **
|
|
707
|
+
| **Local models** | Ollama, llama.cpp, LM Studio | Ollama only | ❌ | ❌ |
|
|
708
|
+
| **Automatic tier routing** | ✅ 15-dimension scorer | ⚠️ Cost-only | ❌ | ❌ Manual metadata |
|
|
709
|
+
| **TOON JSON compression** | ✅ up to 87.6% | ❌ | ❌ | ❌ |
|
|
710
|
+
| **Smart tool selection** | ✅ up to 60% token reduction | ❌ | ❌ | ❌ |
|
|
711
|
+
| **Semantic cache** | ✅ 171ms hits, 0 tokens | ❌ | ❌ | ✅ Prompt cache only |
|
|
712
|
+
| **Long-term memory** | ✅ SQLite, per-session | ❌ | ❌ | ❌ |
|
|
713
|
+
| **MCP integration** | ✅ + Code Mode (96% reduction) | ❌ | ❌ | ❌ |
|
|
733
714
|
| **Self-hosted** | ✅ Node.js only | ✅ Python stack | ❌ SaaS | ✅ Docker |
|
|
734
715
|
| **Dependencies** | Node.js 20+ | Python, Prisma, PostgreSQL | None | Docker, Python |
|
|
735
716
|
|
|
736
|
-
**Lynkr's edge:** Purpose-built for AI coding tools. Zero-config for Claude Code, Cursor, and Codex. Installs in one command
|
|
717
|
+
**Lynkr's edge:** Purpose-built for AI coding tools. Compresses tokens before they reach the model — not just after. Zero-config for Claude Code, Cursor, and Codex. Installs in one command.
|
|
737
718
|
|
|
738
719
|
---
|
|
739
720
|
|
|
@@ -742,7 +723,7 @@ With tier routing + token optimization: **additional 60-80% savings** on cloud p
|
|
|
742
723
|
- [GitHub Discussions](https://github.com/Fast-Editor/Lynkr/discussions) — Ask questions
|
|
743
724
|
- [Report Issues](https://github.com/Fast-Editor/Lynkr/issues) — Bug reports
|
|
744
725
|
- [NPM Package](https://www.npmjs.com/package/lynkr) — Official releases
|
|
745
|
-
- [DeepWiki](https://deepwiki.com/
|
|
726
|
+
- [DeepWiki](https://deepwiki.com/Fast-Editor/Lynkr) — AI-powered docs
|
|
746
727
|
|
|
747
728
|
---
|
|
748
729
|
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# ─── LiteLLM Benchmark Config ─────────────────────────────────────────────────
|
|
2
|
+
# Multi-provider tier routing via LiteLLM Complexity Router.
|
|
3
|
+
#
|
|
4
|
+
# Start: litellm --config benchmark-configs/litellm_config.yaml --port 8082
|
|
5
|
+
#
|
|
6
|
+
# Required env vars:
|
|
7
|
+
# AZURE_OPENAI_API_KEY
|
|
8
|
+
# AZURE_OPENAI_ENDPOINT (https://YOUR-RESOURCE.openai.azure.com)
|
|
9
|
+
# MOONSHOT_API_KEY
|
|
10
|
+
# (Ollama needs no key — running locally on :11434)
|
|
11
|
+
#
|
|
12
|
+
# Tier mapping (matches Lynkr benchmark config):
|
|
13
|
+
# SIMPLE → ollama:minimax-m2.5:cloud
|
|
14
|
+
# MEDIUM → ollama:minimax-m2.5:cloud
|
|
15
|
+
# COMPLEX → moonshot:moonshot-v1-auto
|
|
16
|
+
# REASONING → azure-openai:gpt-5.2-chat
|
|
17
|
+
|
|
18
|
+
model_list:
|
|
19
|
+
|
|
20
|
+
# ── SIMPLE + MEDIUM → Ollama minimax-m2.5:cloud ───────────────────────────
|
|
21
|
+
# Note: the model tag is "minimax-m2.5:cloud" — the colon is part of the
|
|
22
|
+
# Ollama model name, NOT a provider separator here.
|
|
23
|
+
- model_name: smart-router
|
|
24
|
+
litellm_params:
|
|
25
|
+
model: "ollama/minimax-m2.5:cloud"
|
|
26
|
+
api_base: http://localhost:11434
|
|
27
|
+
- model_name: smart-router
|
|
28
|
+
litellm_params:
|
|
29
|
+
model: "ollama/minimax-m2.5:cloud"
|
|
30
|
+
api_base: http://localhost:11434
|
|
31
|
+
|
|
32
|
+
# ── COMPLEX → Moonshot moonshot-v1-auto (matches Lynkr TIER_COMPLEX) ────────
|
|
33
|
+
- model_name: smart-router
|
|
34
|
+
litellm_params:
|
|
35
|
+
model: openai/moonshot-v1-auto
|
|
36
|
+
api_base: https://api.moonshot.ai/v1
|
|
37
|
+
api_key: os.environ/MOONSHOT_API_KEY
|
|
38
|
+
|
|
39
|
+
# ── REASONING → Azure OpenAI gpt-5.2-chat ─────────────────────────────────
|
|
40
|
+
- model_name: smart-router
|
|
41
|
+
litellm_params:
|
|
42
|
+
model: azure/gpt-5.2-chat
|
|
43
|
+
api_base: os.environ/AZURE_OPENAI_ENDPOINT
|
|
44
|
+
api_key: os.environ/AZURE_OPENAI_API_KEY
|
|
45
|
+
api_version: "2024-12-01-preview"
|
|
46
|
+
|
|
47
|
+
# ── Direct aliases (for targeted calls outside the benchmark) ─────────────
|
|
48
|
+
- model_name: ollama-minimax
|
|
49
|
+
litellm_params:
|
|
50
|
+
model: "ollama/minimax-m2.5:cloud"
|
|
51
|
+
api_base: http://localhost:11434
|
|
52
|
+
|
|
53
|
+
- model_name: moonshot-kimi-k2
|
|
54
|
+
litellm_params:
|
|
55
|
+
model: openai/moonshot-v1-auto
|
|
56
|
+
api_base: https://api.moonshot.ai/v1
|
|
57
|
+
api_key: os.environ/MOONSHOT_API_KEY
|
|
58
|
+
|
|
59
|
+
- model_name: azure-gpt5
|
|
60
|
+
litellm_params:
|
|
61
|
+
model: azure/gpt-5.2-chat
|
|
62
|
+
api_base: os.environ/AZURE_OPENAI_ENDPOINT
|
|
63
|
+
api_key: os.environ/AZURE_OPENAI_API_KEY
|
|
64
|
+
api_version: "2024-12-01-preview"
|
|
65
|
+
|
|
66
|
+
router_settings:
|
|
67
|
+
routing_strategy: cost-based-routing
|
|
68
|
+
# Fallback: if smart-router fails on one deployment, try the next
|
|
69
|
+
fallbacks:
|
|
70
|
+
- smart-router:
|
|
71
|
+
- ollama-minimax
|
|
72
|
+
- moonshot-kimi-k2
|
|
73
|
+
- azure-gpt5
|
|
74
|
+
num_retries: 2
|
|
75
|
+
timeout: 90
|
|
76
|
+
|
|
77
|
+
litellm_settings:
|
|
78
|
+
drop_params: true
|
|
79
|
+
use_responses_api: false
|
|
80
|
+
return_response_headers: true
|
|
81
|
+
success_callback: []
|
|
82
|
+
failure_callback: []
|
|
83
|
+
|
|
84
|
+
general_settings:
|
|
85
|
+
master_key: sk-1234 # change this
|
|
86
|
+
port: 8082
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# ─── Lynkr Benchmark Config ───────────────────────────────────────────────────
|
|
2
|
+
# Multi-provider tier routing: Ollama → Moonshot → Azure OpenAI
|
|
3
|
+
# Copy to .env and fill in your credentials.
|
|
4
|
+
|
|
5
|
+
PORT=8081
|
|
6
|
+
|
|
7
|
+
# ── Ollama (local, free) ───────────────────────────────────────────────────────
|
|
8
|
+
OLLAMA_ENDPOINT=http://localhost:11434
|
|
9
|
+
OLLAMA_MODEL=qwen2.5-coder:7b
|
|
10
|
+
OLLAMA_TIMEOUT_MS=120000
|
|
11
|
+
OLLAMA_EMBEDDINGS_MODEL=nomic-embed-text
|
|
12
|
+
OLLAMA_EMBEDDINGS_ENDPOINT=http://localhost:11434/api/embeddings
|
|
13
|
+
|
|
14
|
+
# ── Azure OpenAI ───────────────────────────────────────────────────────────────
|
|
15
|
+
AZURE_OPENAI_ENDPOINT=https://YOUR-RESOURCE.openai.azure.com
|
|
16
|
+
AZURE_OPENAI_API_KEY=your-azure-openai-key
|
|
17
|
+
AZURE_OPENAI_DEPLOYMENT=gpt-4o
|
|
18
|
+
AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
|
19
|
+
|
|
20
|
+
# ── Moonshot (Kimi) ────────────────────────────────────────────────────────────
|
|
21
|
+
MOONSHOT_API_KEY=your-moonshot-api-key
|
|
22
|
+
MOONSHOT_ENDPOINT=https://api.moonshot.ai/v1/chat/completions
|
|
23
|
+
MOONSHOT_MODEL=kimi-k2-turbo-preview
|
|
24
|
+
|
|
25
|
+
# ── Primary provider (Lynkr uses this when no tier matches) ───────────────────
|
|
26
|
+
# Set to whichever you want as the default fallback
|
|
27
|
+
MODEL_PROVIDER=azure-openai
|
|
28
|
+
|
|
29
|
+
# ── Tier Routing ───────────────────────────────────────────────────────────────
|
|
30
|
+
# SIMPLE → Ollama local (free)
|
|
31
|
+
# MEDIUM → Moonshot Kimi (cheap, fast)
|
|
32
|
+
# COMPLEX → Azure OpenAI GPT-4o (powerful)
|
|
33
|
+
# REASONING→ Azure OpenAI o3-mini (best reasoning)
|
|
34
|
+
TIER_SIMPLE=ollama:minimax-m2.5:cloud
|
|
35
|
+
TIER_MEDIUM=ollama:minimax-m2.5:cloud
|
|
36
|
+
TIER_COMPLEX=moonshot:kimi-k2.6
|
|
37
|
+
TIER_REASONING=azure-openai:gpt-5.2-chat
|
|
38
|
+
|
|
39
|
+
# ── Token Optimisations (these are what LiteLLM/Portkey don't have) ────────────
|
|
40
|
+
SMART_TOOL_SELECTION=true
|
|
41
|
+
PROMPT_CACHE_ENABLED=true
|
|
42
|
+
SEMANTIC_CACHE_ENABLED=true
|
|
43
|
+
SEMANTIC_CACHE_THRESHOLD=0.95
|
|
44
|
+
HISTORY_COMPRESSION_ENABLED=true
|
|
45
|
+
TOOL_INJECTION_ENABLED=false
|
|
46
|
+
|
|
47
|
+
# ── Optional: make routing decisions visible in responses ──────────────────────
|
|
48
|
+
LYNKR_VISIBLE_ROUTING=true
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_comment": "Portkey Gateway Config — multi-provider conditional routing",
|
|
3
|
+
"_note": "Portkey has NO automatic complexity detection. This config uses max_tokens as a proxy for complexity. For real tier routing pass x-portkey-metadata: { 'tier': 'simple|medium|complex|reasoning' } from your client.",
|
|
4
|
+
|
|
5
|
+
"strategy": {
|
|
6
|
+
"mode": "conditional"
|
|
7
|
+
},
|
|
8
|
+
|
|
9
|
+
"conditions": [
|
|
10
|
+
{
|
|
11
|
+
"_comment": "SIMPLE — short requests (max_tokens <= 256) → Ollama",
|
|
12
|
+
"condition": {
|
|
13
|
+
"query.max_tokens": { "$lte": 256 }
|
|
14
|
+
},
|
|
15
|
+
"target": {
|
|
16
|
+
"provider": "ollama",
|
|
17
|
+
"customHost": "http://localhost:11434",
|
|
18
|
+
"override_params": {
|
|
19
|
+
"model": "qwen2.5-coder:7b"
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"_comment": "MEDIUM — metadata tier=medium → Moonshot",
|
|
25
|
+
"condition": {
|
|
26
|
+
"metadata.tier": { "$eq": "medium" }
|
|
27
|
+
},
|
|
28
|
+
"target": {
|
|
29
|
+
"provider": "openai",
|
|
30
|
+
"apiKey": "{{MOONSHOT_API_KEY}}",
|
|
31
|
+
"baseURL": "https://api.moonshot.ai/v1",
|
|
32
|
+
"override_params": {
|
|
33
|
+
"model": "moonshot-v1-8k"
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"_comment": "REASONING — metadata tier=reasoning → Azure OpenAI o3-mini",
|
|
39
|
+
"condition": {
|
|
40
|
+
"metadata.tier": { "$eq": "reasoning" }
|
|
41
|
+
},
|
|
42
|
+
"target": {
|
|
43
|
+
"provider": "azure-openai",
|
|
44
|
+
"apiKey": "{{AZURE_OPENAI_API_KEY}}",
|
|
45
|
+
"resourceName": "YOUR-RESOURCE",
|
|
46
|
+
"deploymentId": "o3-mini",
|
|
47
|
+
"apiVersion": "2024-12-01-preview"
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
|
|
52
|
+
"default": {
|
|
53
|
+
"_comment": "COMPLEX — everything else → Azure OpenAI GPT-4o",
|
|
54
|
+
"provider": "azure-openai",
|
|
55
|
+
"apiKey": "{{AZURE_OPENAI_API_KEY}}",
|
|
56
|
+
"resourceName": "YOUR-RESOURCE",
|
|
57
|
+
"deploymentId": "gpt-4o",
|
|
58
|
+
"apiVersion": "2024-08-01-preview"
|
|
59
|
+
}
|
|
60
|
+
}
|