smartcontext-proxy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PLAN.md +406 -0
- package/PROGRESS.md +60 -0
- package/README.md +99 -0
- package/SPEC.md +915 -0
- package/adapters/openclaw/embedding.d.ts +8 -0
- package/adapters/openclaw/embedding.js +16 -0
- package/adapters/openclaw/embedding.ts +15 -0
- package/adapters/openclaw/index.d.ts +18 -0
- package/adapters/openclaw/index.js +42 -0
- package/adapters/openclaw/index.ts +43 -0
- package/adapters/openclaw/session-importer.d.ts +22 -0
- package/adapters/openclaw/session-importer.js +99 -0
- package/adapters/openclaw/session-importer.ts +105 -0
- package/adapters/openclaw/storage.d.ts +26 -0
- package/adapters/openclaw/storage.js +177 -0
- package/adapters/openclaw/storage.ts +183 -0
- package/dist/adapters/openclaw/embedding.d.ts +8 -0
- package/dist/adapters/openclaw/embedding.js +16 -0
- package/dist/adapters/openclaw/index.d.ts +18 -0
- package/dist/adapters/openclaw/index.js +42 -0
- package/dist/adapters/openclaw/session-importer.d.ts +22 -0
- package/dist/adapters/openclaw/session-importer.js +99 -0
- package/dist/adapters/openclaw/storage.d.ts +26 -0
- package/dist/adapters/openclaw/storage.js +177 -0
- package/dist/config/auto-detect.d.ts +3 -0
- package/dist/config/auto-detect.js +48 -0
- package/dist/config/defaults.d.ts +2 -0
- package/dist/config/defaults.js +28 -0
- package/dist/config/schema.d.ts +30 -0
- package/dist/config/schema.js +3 -0
- package/dist/context/budget.d.ts +25 -0
- package/dist/context/budget.js +85 -0
- package/dist/context/canonical.d.ts +39 -0
- package/dist/context/canonical.js +12 -0
- package/dist/context/chunker.d.ts +9 -0
- package/dist/context/chunker.js +148 -0
- package/dist/context/optimizer.d.ts +31 -0
- package/dist/context/optimizer.js +163 -0
- package/dist/context/retriever.d.ts +29 -0
- package/dist/context/retriever.js +103 -0
- package/dist/daemon/process.d.ts +6 -0
- package/dist/daemon/process.js +76 -0
- package/dist/daemon/service.d.ts +2 -0
- package/dist/daemon/service.js +99 -0
- package/dist/embedding/ollama.d.ts +11 -0
- package/dist/embedding/ollama.js +72 -0
- package/dist/embedding/types.d.ts +6 -0
- package/dist/embedding/types.js +3 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +190 -0
- package/dist/metrics/collector.d.ts +43 -0
- package/dist/metrics/collector.js +72 -0
- package/dist/providers/anthropic.d.ts +15 -0
- package/dist/providers/anthropic.js +109 -0
- package/dist/providers/google.d.ts +13 -0
- package/dist/providers/google.js +40 -0
- package/dist/providers/ollama.d.ts +13 -0
- package/dist/providers/ollama.js +82 -0
- package/dist/providers/openai.d.ts +15 -0
- package/dist/providers/openai.js +115 -0
- package/dist/providers/types.d.ts +18 -0
- package/dist/providers/types.js +3 -0
- package/dist/proxy/router.d.ts +12 -0
- package/dist/proxy/router.js +46 -0
- package/dist/proxy/server.d.ts +25 -0
- package/dist/proxy/server.js +265 -0
- package/dist/proxy/stream.d.ts +8 -0
- package/dist/proxy/stream.js +32 -0
- package/dist/src/config/auto-detect.d.ts +3 -0
- package/dist/src/config/auto-detect.js +48 -0
- package/dist/src/config/defaults.d.ts +2 -0
- package/dist/src/config/defaults.js +28 -0
- package/dist/src/config/schema.d.ts +30 -0
- package/dist/src/config/schema.js +3 -0
- package/dist/src/context/budget.d.ts +25 -0
- package/dist/src/context/budget.js +85 -0
- package/dist/src/context/canonical.d.ts +39 -0
- package/dist/src/context/canonical.js +12 -0
- package/dist/src/context/chunker.d.ts +9 -0
- package/dist/src/context/chunker.js +148 -0
- package/dist/src/context/optimizer.d.ts +31 -0
- package/dist/src/context/optimizer.js +163 -0
- package/dist/src/context/retriever.d.ts +29 -0
- package/dist/src/context/retriever.js +103 -0
- package/dist/src/daemon/process.d.ts +6 -0
- package/dist/src/daemon/process.js +76 -0
- package/dist/src/daemon/service.d.ts +2 -0
- package/dist/src/daemon/service.js +99 -0
- package/dist/src/embedding/ollama.d.ts +11 -0
- package/dist/src/embedding/ollama.js +72 -0
- package/dist/src/embedding/types.d.ts +6 -0
- package/dist/src/embedding/types.js +3 -0
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.js +190 -0
- package/dist/src/metrics/collector.d.ts +43 -0
- package/dist/src/metrics/collector.js +72 -0
- package/dist/src/providers/anthropic.d.ts +15 -0
- package/dist/src/providers/anthropic.js +109 -0
- package/dist/src/providers/google.d.ts +13 -0
- package/dist/src/providers/google.js +40 -0
- package/dist/src/providers/ollama.d.ts +13 -0
- package/dist/src/providers/ollama.js +82 -0
- package/dist/src/providers/openai.d.ts +15 -0
- package/dist/src/providers/openai.js +115 -0
- package/dist/src/providers/types.d.ts +18 -0
- package/dist/src/providers/types.js +3 -0
- package/dist/src/proxy/router.d.ts +12 -0
- package/dist/src/proxy/router.js +46 -0
- package/dist/src/proxy/server.d.ts +25 -0
- package/dist/src/proxy/server.js +265 -0
- package/dist/src/proxy/stream.d.ts +8 -0
- package/dist/src/proxy/stream.js +32 -0
- package/dist/src/storage/lancedb.d.ts +21 -0
- package/dist/src/storage/lancedb.js +158 -0
- package/dist/src/storage/types.d.ts +52 -0
- package/dist/src/storage/types.js +3 -0
- package/dist/src/test/context.test.d.ts +1 -0
- package/dist/src/test/context.test.js +141 -0
- package/dist/src/test/dashboard.test.d.ts +1 -0
- package/dist/src/test/dashboard.test.js +85 -0
- package/dist/src/test/proxy.test.d.ts +1 -0
- package/dist/src/test/proxy.test.js +188 -0
- package/dist/src/ui/dashboard.d.ts +2 -0
- package/dist/src/ui/dashboard.js +183 -0
- package/dist/storage/lancedb.d.ts +21 -0
- package/dist/storage/lancedb.js +158 -0
- package/dist/storage/types.d.ts +52 -0
- package/dist/storage/types.js +3 -0
- package/dist/test/context.test.d.ts +1 -0
- package/dist/test/context.test.js +141 -0
- package/dist/test/dashboard.test.d.ts +1 -0
- package/dist/test/dashboard.test.js +85 -0
- package/dist/test/proxy.test.d.ts +1 -0
- package/dist/test/proxy.test.js +188 -0
- package/dist/ui/dashboard.d.ts +2 -0
- package/dist/ui/dashboard.js +183 -0
- package/package.json +38 -0
- package/src/config/auto-detect.ts +51 -0
- package/src/config/defaults.ts +26 -0
- package/src/config/schema.ts +33 -0
- package/src/context/budget.ts +126 -0
- package/src/context/canonical.ts +50 -0
- package/src/context/chunker.ts +165 -0
- package/src/context/optimizer.ts +201 -0
- package/src/context/retriever.ts +123 -0
- package/src/daemon/process.ts +70 -0
- package/src/daemon/service.ts +103 -0
- package/src/embedding/ollama.ts +68 -0
- package/src/embedding/types.ts +6 -0
- package/src/index.ts +176 -0
- package/src/metrics/collector.ts +114 -0
- package/src/providers/anthropic.ts +117 -0
- package/src/providers/google.ts +42 -0
- package/src/providers/ollama.ts +87 -0
- package/src/providers/openai.ts +127 -0
- package/src/providers/types.ts +20 -0
- package/src/proxy/router.ts +48 -0
- package/src/proxy/server.ts +315 -0
- package/src/proxy/stream.ts +39 -0
- package/src/storage/lancedb.ts +169 -0
- package/src/storage/types.ts +47 -0
- package/src/test/context.test.ts +165 -0
- package/src/test/dashboard.test.ts +94 -0
- package/src/test/proxy.test.ts +218 -0
- package/src/ui/dashboard.ts +184 -0
- package/tsconfig.json +18 -0
package/PLAN.md
ADDED
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
# SmartContext Proxy — Implementation Plan
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
**Goal**: `npx smartcontext-proxy` → self-configures, starts proxy, works out of the box.
|
|
6
|
+
|
|
7
|
+
**Stack**: TypeScript, Node.js (no framework), LanceDB embedded, ONNX/Ollama embeddings.
|
|
8
|
+
|
|
9
|
+
**Phases**: 4 phases, each delivers a working increment.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Phase 1: Transparent Proxy (Week 1)
|
|
14
|
+
|
|
15
|
+
Deliverable: HTTP proxy that forwards requests to any detected provider without modification. Streaming works. Zero context optimization yet — just a transparent pipe.
|
|
16
|
+
|
|
17
|
+
### Steps
|
|
18
|
+
|
|
19
|
+
#### 1.1 Project scaffold
|
|
20
|
+
- `npm init`, `tsconfig.json`, `package.json` with `bin` entry
|
|
21
|
+
- Dependencies: `typescript`, `tiktoken` (token counting)
|
|
22
|
+
- Zero framework — raw `http.createServer`
|
|
23
|
+
- **Files**: `package.json`, `tsconfig.json`, `src/index.ts`
|
|
24
|
+
- **Complexity**: Low
|
|
25
|
+
|
|
26
|
+
#### 1.2 Provider auto-detection
|
|
27
|
+
- Scan env vars: `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GOOGLE_API_KEY`, `OPENROUTER_API_KEY`, `OLLAMA_HOST`
|
|
28
|
+
- Print detected providers on startup
|
|
29
|
+
- **Files**: `src/config/auto-detect.ts`, `src/config/defaults.ts`, `src/config/schema.ts`
|
|
30
|
+
- **Complexity**: Low
|
|
31
|
+
|
|
32
|
+
#### 1.3 Provider adapters (format translation)
|
|
33
|
+
- `ProviderAdapter` interface
|
|
34
|
+
- Anthropic adapter: parse Messages API, serialize back
|
|
35
|
+
- OpenAI adapter: parse Chat Completions, serialize back
|
|
36
|
+
- Ollama adapter: parse native `/api/chat`, serialize back
|
|
37
|
+
- Google adapter: stub (post-MVP detail)
|
|
38
|
+
- **Files**: `src/providers/types.ts`, `src/providers/anthropic.ts`, `src/providers/openai.ts`, `src/providers/ollama.ts`, `src/providers/google.ts`
|
|
39
|
+
- **Complexity**: Medium (format differences, edge cases)
|
|
40
|
+
|
|
41
|
+
#### 1.4 Canonical message format
|
|
42
|
+
- Internal representation all providers normalize to
|
|
43
|
+
- Bidirectional conversion: provider ↔ canonical
|
|
44
|
+
- Handle: text, tool_use, tool_result, images (pass-through)
|
|
45
|
+
- **Files**: `src/context/canonical.ts`
|
|
46
|
+
- **Complexity**: Medium
|
|
47
|
+
|
|
48
|
+
#### 1.5 HTTP proxy server + router
|
|
49
|
+
- `http.createServer`, route by URL path (`/v1/anthropic/*`, `/v1/openai/*`, etc.)
|
|
50
|
+
- Forward request to real provider, return response
|
|
51
|
+
- Non-streaming first, then streaming
|
|
52
|
+
- **Files**: `src/proxy/server.ts`, `src/proxy/router.ts`
|
|
53
|
+
- **Complexity**: Medium
|
|
54
|
+
|
|
55
|
+
#### 1.6 SSE stream pass-through
|
|
56
|
+
- Parse incoming SSE from provider
|
|
57
|
+
- Forward chunks to client byte-by-byte
|
|
58
|
+
- Buffer complete exchange for post-indexing
|
|
59
|
+
- **Files**: `src/proxy/stream.ts`
|
|
60
|
+
- **Complexity**: High (SSE parsing, backpressure, error handling)
|
|
61
|
+
|
|
62
|
+
#### 1.7 CLI entry point
|
|
63
|
+
- `npx smartcontext-proxy` starts proxy
|
|
64
|
+
- `--port`, `--config` flags
|
|
65
|
+
- Print startup banner with detected providers
|
|
66
|
+
- **Files**: `src/index.ts`
|
|
67
|
+
- **Complexity**: Low
|
|
68
|
+
|
|
69
|
+
### Phase 1 Tests
|
|
70
|
+
- [ ] Proxy forwards Anthropic request/response correctly (non-streaming)
|
|
71
|
+
- [ ] Proxy forwards Anthropic streaming request correctly
|
|
72
|
+
- [ ] Proxy forwards OpenAI request/response correctly
|
|
73
|
+
- [ ] Proxy forwards Ollama request correctly
|
|
74
|
+
- [ ] Auto-detect finds providers from env vars
|
|
75
|
+
- [ ] Unknown provider path returns 404
|
|
76
|
+
- [ ] Provider down returns proper error
|
|
77
|
+
|
|
78
|
+
### Phase 1 Exit Criteria
|
|
79
|
+
A Claude Code session pointing `ANTHROPIC_API_URL` at SmartContext works identically to direct API access. Zero behavior change, zero latency overhead beyond network hop.
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Phase 2: Context Optimization Core (Week 2-3)
|
|
84
|
+
|
|
85
|
+
Deliverable: Proxy now optimizes context — embeds queries, retrieves relevant chunks, assembles optimized context. Token savings visible in logs.
|
|
86
|
+
|
|
87
|
+
### Steps
|
|
88
|
+
|
|
89
|
+
#### 2.1 Embedding adapter interface + ONNX implementation
|
|
90
|
+
- `EmbeddingAdapter` interface
|
|
91
|
+
- ONNX adapter using `@xenova/transformers` with `nomic-embed-text-v1.5`
|
|
92
|
+
- Auto-download model on first run (~100MB)
|
|
93
|
+
- **Files**: `src/embedding/types.ts`, `src/embedding/onnx.ts`
|
|
94
|
+
- **Complexity**: Medium (ONNX runtime setup, model download)
|
|
95
|
+
|
|
96
|
+
#### 2.2 Ollama embedding adapter
|
|
97
|
+
- Connect to local/remote Ollama for embeddings
|
|
98
|
+
- Auto-detect: if Ollama available, prefer over ONNX (faster with GPU)
|
|
99
|
+
- **Files**: `src/embedding/ollama.ts`
|
|
100
|
+
- **Complexity**: Low
|
|
101
|
+
|
|
102
|
+
#### 2.3 Storage adapter interface + LanceDB implementation
|
|
103
|
+
- `StorageAdapter` interface
|
|
104
|
+
- LanceDB adapter: embedded, zero-config, `~/.smartcontext/data/`
|
|
105
|
+
- Schema: chunks table (embedding, text, metadata, sessionId, timestamp)
|
|
106
|
+
- Sessions table (raw logs)
|
|
107
|
+
- Summaries table
|
|
108
|
+
- **Files**: `src/storage/types.ts`, `src/storage/lancedb.ts`
|
|
109
|
+
- **Complexity**: Medium
|
|
110
|
+
|
|
111
|
+
#### 2.4 Chunker
|
|
112
|
+
- Split conversation into chunks at message-pair boundaries
|
|
113
|
+
- Handle long responses: split at paragraph boundaries if >2000 tokens
|
|
114
|
+
- Keep code blocks atomic
|
|
115
|
+
- Metadata extraction: file paths, tools, summary
|
|
116
|
+
- **Files**: `src/context/chunker.ts`
|
|
117
|
+
- **Complexity**: Medium
|
|
118
|
+
|
|
119
|
+
#### 2.5 Retriever
|
|
120
|
+
- Embed query → vector search → score + rank
|
|
121
|
+
- Recency boost (+0.15 current session)
|
|
122
|
+
- File-path boost (+0.20 for matching files)
|
|
123
|
+
- Dedup (>0.92 similarity)
|
|
124
|
+
- Confidence gate (<0.55 → skip retrieval)
|
|
125
|
+
- Min 3 chunks above threshold
|
|
126
|
+
- **Files**: `src/context/retriever.ts`
|
|
127
|
+
- **Complexity**: High (scoring logic, edge cases)
|
|
128
|
+
|
|
129
|
+
#### 2.6 Token budget allocator
|
|
130
|
+
- Calculate available budget per tier
|
|
131
|
+
- Greedy packing of T2 chunks by score
|
|
132
|
+
- T3 summary filling with remainder
|
|
133
|
+
- Model-specific context limits (lookup table)
|
|
134
|
+
- **Files**: `src/context/budget.ts`
|
|
135
|
+
- **Complexity**: Medium
|
|
136
|
+
|
|
137
|
+
#### 2.7 Context optimizer (orchestrator)
|
|
138
|
+
- Wire together: chunker → retriever → budget → assemble
|
|
139
|
+
- Preserve system prompt as stable prefix (KV-cache)
|
|
140
|
+
- Keep T1 (last 3 exchanges) verbatim
|
|
141
|
+
- Fill T2 from retrieval
|
|
142
|
+
- Append T3 summaries
|
|
143
|
+
- **Files**: `src/context/optimizer.ts`
|
|
144
|
+
- **Complexity**: Medium (integration, ordering logic)
|
|
145
|
+
|
|
146
|
+
#### 2.8 Async post-indexing
|
|
147
|
+
- After response completes (streaming done), index the full exchange
|
|
148
|
+
- Non-blocking — don't delay response to client
|
|
149
|
+
- Embed + store chunk + append raw log
|
|
150
|
+
- **Files**: update `src/proxy/server.ts`, `src/proxy/stream.ts`
|
|
151
|
+
- **Complexity**: Medium
|
|
152
|
+
|
|
153
|
+
#### 2.9 Graceful degradation
|
|
154
|
+
- If embedding fails → pass-through
|
|
155
|
+
- If storage fails → pass-through
|
|
156
|
+
- If no chunks above threshold → pass-through
|
|
157
|
+
- Log degradation events
|
|
158
|
+
- **Files**: update `src/context/optimizer.ts`
|
|
159
|
+
- **Complexity**: Low (but critical to get right)
|
|
160
|
+
|
|
161
|
+
#### 2.10 A/B Test Mode
|
|
162
|
+
- `--test-mode` flag: send each request twice (optimized + original)
|
|
163
|
+
- Compare responses: semantic similarity, token delta, content diff
|
|
164
|
+
- Store comparison results for dashboard
|
|
165
|
+
- `--test-sample N%` to limit extra cost (random sampling)
|
|
166
|
+
- **Files**: `src/test/ab-runner.ts`, `src/test/comparator.ts`
|
|
167
|
+
- **Complexity**: High (dual-path execution, response comparison)
|
|
168
|
+
|
|
169
|
+
#### 2.11 Verbose logging
|
|
170
|
+
- Per-request debug log: what was retrieved, scores, what was cut, budget allocation
|
|
171
|
+
- Per-request JSON dumps (`~/.smartcontext/logs/requests/`)
|
|
172
|
+
- Structured format for later LLM analysis
|
|
173
|
+
- **Files**: `src/logging/verbose.ts`, `src/logging/request-dump.ts`
|
|
174
|
+
- **Complexity**: Medium
|
|
175
|
+
|
|
176
|
+
### Phase 2 Tests
|
|
177
|
+
- [ ] Embedding produces consistent vectors for same text
|
|
178
|
+
- [ ] LanceDB stores and retrieves chunks correctly
|
|
179
|
+
- [ ] Chunker splits long conversations at message boundaries
|
|
180
|
+
- [ ] Chunker keeps code blocks atomic
|
|
181
|
+
- [ ] Retriever returns relevant chunks (known test corpus)
|
|
182
|
+
- [ ] File-path boost increases score for matching chunks
|
|
183
|
+
- [ ] Confidence gate skips retrieval when no good matches
|
|
184
|
+
- [ ] Token budget respects model limits
|
|
185
|
+
- [ ] Optimizer produces valid provider-format output
|
|
186
|
+
- [ ] Async indexing doesn't block response
|
|
187
|
+
- [ ] Graceful degradation: storage down → pass-through works
|
|
188
|
+
- [ ] A/B test mode sends both paths, compares correctly
|
|
189
|
+
- [ ] Verbose logs contain full retrieval decision trace
|
|
190
|
+
- [ ] End-to-end: real CC session → savings >30%
|
|
191
|
+
|
|
192
|
+
### Phase 2 Exit Criteria
|
|
193
|
+
Proxy reduces token usage by 40-60% on a real Claude Code session while maintaining response quality. A/B test confirms similarity >0.95 on 90%+ of requests. Latency overhead <20ms p95.
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## Phase 3: Dashboard, Daemon & Hardening (Week 3-4)
|
|
198
|
+
|
|
199
|
+
Deliverable: Web dashboard with full observability, daemon mode, pause/resume, adapter plugins. User sees real value.
|
|
200
|
+
|
|
201
|
+
### Steps
|
|
202
|
+
|
|
203
|
+
#### 3.1 Metrics collector + history
|
|
204
|
+
- Per-request: original tokens, optimized tokens, savings %, latency, chunks retrieved, top score
|
|
205
|
+
- Aggregate: total savings, avg latency, sessions indexed
|
|
206
|
+
- Persistent daily/monthly history (flush to storage)
|
|
207
|
+
- Cost calculation per provider/model (pricing lookup table)
|
|
208
|
+
- **Files**: `src/metrics/collector.ts`, `src/metrics/history.ts`
|
|
209
|
+
- **Complexity**: Medium
|
|
210
|
+
|
|
211
|
+
#### 3.2 REST API (`/_sc/*`)
|
|
212
|
+
- `GET /_sc/status` — running/paused, PID, uptime
|
|
213
|
+
- `GET /_sc/stats` — aggregate metrics
|
|
214
|
+
- `GET /_sc/stats/daily` — daily breakdown
|
|
215
|
+
- `GET /_sc/stats/providers` — per-provider savings
|
|
216
|
+
- `GET /_sc/stats/models` — per-model savings
|
|
217
|
+
- `GET /_sc/sessions` — session list
|
|
218
|
+
- `GET /_sc/sessions/:id` — session detail (exchanges, retrieval decisions)
|
|
219
|
+
- `GET /_sc/config` — current config (keys redacted)
|
|
220
|
+
- `PUT /_sc/config` — update config from UI
|
|
221
|
+
- `POST /_sc/pause` / `POST /_sc/resume` / `POST /_sc/stop`
|
|
222
|
+
- **Files**: `src/ui/api.ts`, `src/metrics/endpoint.ts`
|
|
223
|
+
- **Complexity**: Medium
|
|
224
|
+
|
|
225
|
+
#### 3.3 Web dashboard
|
|
226
|
+
- Single-page app served at `http://localhost:4800/` (root path)
|
|
227
|
+
- Vanilla HTML + CSS + minimal JS — all inlined in one .ts file
|
|
228
|
+
- Pages: Home/Status, Live Feed, Sessions, Savings Report, Settings
|
|
229
|
+
- Home: status badge, total savings ($), requests count, 7-day chart, provider status
|
|
230
|
+
- Live Feed: real-time request stream via WebSocket, click to expand details
|
|
231
|
+
- Savings Report: monthly breakdown, per-provider, per-model, projected annual savings
|
|
232
|
+
- Settings: edit context thresholds, manage providers, toggle pause
|
|
233
|
+
- All rendering server-side (HTML generation), JS only for WebSocket feed + interactivity
|
|
234
|
+
- **Files**: `src/ui/dashboard.ts`, `src/ui/ws-feed.ts`
|
|
235
|
+
- **Complexity**: High (but no deps — just string templates)
|
|
236
|
+
|
|
237
|
+
#### 3.4 Pause/Resume
|
|
238
|
+
- `POST /_sc/pause` — switch to pass-through mode (no optimization, indexing continues)
|
|
239
|
+
- `POST /_sc/resume` — re-enable optimization
|
|
240
|
+
- Dashboard button toggles state
|
|
241
|
+
- CLI: `npx smartcontext-proxy pause` / `resume`
|
|
242
|
+
- **Files**: `src/proxy/pause.ts`, update `src/proxy/server.ts`
|
|
243
|
+
- **Complexity**: Low
|
|
244
|
+
|
|
245
|
+
#### 3.5 Daemon mode
|
|
246
|
+
- `start` — fork process, write PID to `~/.smartcontext/smartcontext.pid`, redirect output to `~/.smartcontext/logs/proxy.log`
|
|
247
|
+
- `stop` — read PID, send SIGTERM, wait for graceful shutdown
|
|
248
|
+
- `restart` — stop + start
|
|
249
|
+
- Graceful shutdown: finish in-flight requests (5s), flush metrics, close storage, remove PID
|
|
250
|
+
- **Files**: `src/daemon/process.ts`
|
|
251
|
+
- **Complexity**: Medium
|
|
252
|
+
|
|
253
|
+
#### 3.6 System service installer
|
|
254
|
+
- `install-service` — generate LaunchAgent (macOS) or systemd user service (Linux)
|
|
255
|
+
- `uninstall-service` — remove
|
|
256
|
+
- Auto-start on boot, auto-restart on crash
|
|
257
|
+
- **Files**: `src/daemon/service.ts`
|
|
258
|
+
- **Complexity**: Low
|
|
259
|
+
|
|
260
|
+
#### 3.7 Debug headers
|
|
261
|
+
- Optional `X-SmartContext-*` headers on responses
|
|
262
|
+
- Enabled via config or `/_sc/config` toggle
|
|
263
|
+
- **Files**: update `src/proxy/server.ts`
|
|
264
|
+
- **Complexity**: Low
|
|
265
|
+
|
|
266
|
+
#### 3.8 Config management
|
|
267
|
+
- Auto-generate `~/.smartcontext/config.json` on first run
|
|
268
|
+
- Merge user edits with auto-detected values
|
|
269
|
+
- Editable from dashboard Settings page
|
|
270
|
+
- **Files**: update `src/config/` modules
|
|
271
|
+
- **Complexity**: Low
|
|
272
|
+
|
|
273
|
+
#### 3.9 Adapter plugin loader
|
|
274
|
+
- Scan `node_modules` for `smartcontext-adapter-*` packages
|
|
275
|
+
- Load and register storage/embedding adapters
|
|
276
|
+
- **Files**: `src/adapters/loader.ts`
|
|
277
|
+
- **Complexity**: Medium
|
|
278
|
+
|
|
279
|
+
#### 3.10 LLM-assisted diagnostics
|
|
280
|
+
- Diagnostic LLM analyzes quality diffs, retrieval misses, config issues
|
|
281
|
+
- Auto-triggered on A/B similarity <0.85
|
|
282
|
+
- Manual: `diagnose` CLI command, "Diagnose" button in dashboard
|
|
283
|
+
- Auto-tune: analyze 50+ requests, suggest config changes
|
|
284
|
+
- LLM selection: local Ollama first, cheapest cloud fallback
|
|
285
|
+
- Direct API calls (never through SmartContext — no recursion)
|
|
286
|
+
- **Files**: `src/diagnostics/analyzer.ts`, `src/diagnostics/auto-tune.ts`, `src/diagnostics/llm-client.ts`
|
|
287
|
+
- **Complexity**: High
|
|
288
|
+
|
|
289
|
+
#### 3.11 Filesystem storage adapter
|
|
290
|
+
- Fallback for users who don't want vector search
|
|
291
|
+
- Raw JSON logs, keyword search
|
|
292
|
+
- **Files**: `src/storage/filesystem.ts`
|
|
293
|
+
- **Complexity**: Low
|
|
294
|
+
|
|
295
|
+
### Phase 3 Tests
|
|
296
|
+
- [ ] Dashboard loads at localhost:4800
|
|
297
|
+
- [ ] Live Feed shows requests in real-time
|
|
298
|
+
- [ ] Savings Report shows correct per-provider breakdown
|
|
299
|
+
- [ ] Settings changes persist to config.json
|
|
300
|
+
- [ ] Pause disables optimization, resume re-enables
|
|
301
|
+
- [ ] Daemon start/stop/restart works (PID file lifecycle)
|
|
302
|
+
- [ ] `install-service` generates valid LaunchAgent plist
|
|
303
|
+
- [ ] REST API returns correct data for all endpoints
|
|
304
|
+
- [ ] Config auto-generates on first run
|
|
305
|
+
- [ ] Plugin loader discovers installed adapters
|
|
306
|
+
|
|
307
|
+
### Phase 3 Exit Criteria
|
|
308
|
+
User opens `localhost:4800` → sees dashboard with savings, live feed, controls. Can pause/resume from UI. Daemon mode works. The value is visible and tangible — "$X saved this month" front and center.
|
|
309
|
+
|
|
310
|
+
---
|
|
311
|
+
|
|
312
|
+
## Phase 4: OpenClaw Adapter + Benchmark (Week 4-5)
|
|
313
|
+
|
|
314
|
+
Deliverable: Our system uses SmartContext. Benchmark data proves value. Ready for public release.
|
|
315
|
+
|
|
316
|
+
### Steps
|
|
317
|
+
|
|
318
|
+
#### 4.1 smartcontext-adapter-openclaw package
|
|
319
|
+
- Separate npm package
|
|
320
|
+
- OpenSearch storage adapter (chunks + metrics indices)
|
|
321
|
+
- Beast Ollama embedding adapter
|
|
322
|
+
- Auto-discover config from `~/.openclaw/`
|
|
323
|
+
- **Files**: `adapters/openclaw/` directory
|
|
324
|
+
- **Complexity**: Medium
|
|
325
|
+
|
|
326
|
+
#### 4.2 OC session importer
|
|
327
|
+
- Parse OC gateway `.jsonl` session logs
|
|
328
|
+
- Chunk and index historical sessions
|
|
329
|
+
- Support incremental import (skip already indexed)
|
|
330
|
+
- **Files**: `adapters/openclaw/session-importer.ts`
|
|
331
|
+
- **Complexity**: Medium
|
|
332
|
+
|
|
333
|
+
#### 4.3 OC Gateway integration
|
|
334
|
+
- Configure OC to route API calls through SmartContext
|
|
335
|
+
- Test with cron jobs, TL pipeline tasks, A2A bridge
|
|
336
|
+
- Verify no regressions
|
|
337
|
+
- **Files**: OC config changes only
|
|
338
|
+
- **Complexity**: Low (config), High (testing)
|
|
339
|
+
|
|
340
|
+
#### 4.4 Dashboard SmartContext tab
|
|
341
|
+
- New tab in dashboard-ts
|
|
342
|
+
- Savings over time chart
|
|
343
|
+
- Per-session retrieval quality
|
|
344
|
+
- Provider breakdown
|
|
345
|
+
- Read from `smartcontext-metrics` OS index
|
|
346
|
+
- **Files**: dashboard-ts changes
|
|
347
|
+
- **Complexity**: Medium
|
|
348
|
+
|
|
349
|
+
#### 4.5 Benchmark: 10 CC sessions
|
|
350
|
+
- Select 2 sessions per type (bug fix, feature, cron, refactor, research)
|
|
351
|
+
- For each: run with/without SmartContext
|
|
352
|
+
- Measure: semantic similarity, token ratio, latency, retrieval precision
|
|
353
|
+
- Write benchmark report
|
|
354
|
+
- **Complexity**: High (manual evaluation)
|
|
355
|
+
|
|
356
|
+
#### 4.6 npm publish preparation
|
|
357
|
+
- README with quick-start, examples, architecture diagram
|
|
358
|
+
- `package.json` bin entry for npx
|
|
359
|
+
- GitHub repo setup
|
|
360
|
+
- License: Apache 2.0
|
|
361
|
+
- **Complexity**: Low
|
|
362
|
+
|
|
363
|
+
### Phase 4 Tests
|
|
364
|
+
- [ ] OpenClaw adapter reads OS correctly
|
|
365
|
+
- [ ] Session importer processes real OC sessions
|
|
366
|
+
- [ ] OC crons work through SmartContext proxy
|
|
367
|
+
- [ ] Dashboard tab renders metrics
|
|
368
|
+
- [ ] Benchmark shows >40% savings with <5% quality loss
|
|
369
|
+
- [ ] `npx smartcontext-proxy` works from clean npm install
|
|
370
|
+
|
|
371
|
+
### Phase 4 Exit Criteria
|
|
372
|
+
Our OC system runs through SmartContext daily. Benchmark proves value. Package published on npm.
|
|
373
|
+
|
|
374
|
+
---
|
|
375
|
+
|
|
376
|
+
## Risks
|
|
377
|
+
|
|
378
|
+
| Risk | Impact | Mitigation |
|
|
379
|
+
|------|--------|------------|
|
|
380
|
+
| Retrieval misses critical context | Quality degradation | Confidence gate + pass-through fallback |
|
|
381
|
+
| ONNX model too slow on CPU | High latency | Auto-prefer Ollama when available |
|
|
382
|
+
| LanceDB corruption | Data loss | Raw logs preserved separately, re-indexable |
|
|
383
|
+
| Provider API format changes | Proxy breaks | Adapter pattern isolates changes |
|
|
384
|
+
| Token counting inaccuracy | Budget overflow | Use tiktoken + 10% safety margin |
|
|
385
|
+
| SSE parsing edge cases | Broken streaming | Extensive streaming tests per provider |
|
|
386
|
+
|
|
387
|
+
## Dependencies
|
|
388
|
+
|
|
389
|
+
- `tiktoken` — token counting
|
|
390
|
+
- `@xenova/transformers` — ONNX embedding (optional, for users without Ollama)
|
|
391
|
+
- `vectordb` (lancedb) — embedded vector store
|
|
392
|
+
- `apache-arrow` — LanceDB dependency
|
|
393
|
+
- Zero web frameworks (raw `http` module)
|
|
394
|
+
|
|
395
|
+
## Decision Log
|
|
396
|
+
|
|
397
|
+
| # | Decision | Rationale |
|
|
398
|
+
|---|----------|-----------|
|
|
399
|
+
| 1 | Raw `http.createServer`, no Express | Minimal deps, full SSE control, proxy doesn't need middleware |
|
|
400
|
+
| 2 | LanceDB default, not Qdrant | Zero-config embedded, no server process needed |
|
|
401
|
+
| 3 | ONNX fallback, Ollama preferred | Works without GPU, but faster with Ollama |
|
|
402
|
+
| 4 | Provider-agnostic from day 1 | User has multiple providers, fallback chains cross providers |
|
|
403
|
+
| 5 | Firewall model (transparent) | Minimal client changes, maximum adoption |
|
|
404
|
+
| 6 | Apache 2.0 license | OSS-friendly, no BSL reputation risk |
|
|
405
|
+
| 7 | Adapter plugin system | Third parties extend without forking |
|
|
406
|
+
| 8 | OpenRouter last in fallback | Limited balance, use as last resort |
|
package/PROGRESS.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# SmartContext Proxy — Implementation Progress
|
|
2
|
+
|
|
3
|
+
## Repository
|
|
4
|
+
https://github.com/emilvrana/smartcontext-proxy (private)
|
|
5
|
+
|
|
6
|
+
## Phase 1: Transparent Proxy ✅
|
|
7
|
+
- [x] Project scaffold (TypeScript, raw http.createServer)
|
|
8
|
+
- [x] Config auto-detection from env vars
|
|
9
|
+
- [x] Canonical message format
|
|
10
|
+
- [x] Provider adapters: Anthropic, OpenAI, Ollama, Google (stub)
|
|
11
|
+
- [x] HTTP proxy server + URL-based routing
|
|
12
|
+
- [x] SSE stream pass-through (byte-level, zero buffering)
|
|
13
|
+
- [x] CLI: --port, --config, --help, --version
|
|
14
|
+
|
|
15
|
+
## Phase 2: Context Optimization Core ✅
|
|
16
|
+
- [x] Embedding adapter: Ollama (nomic-embed-text)
|
|
17
|
+
- [x] Storage adapter: LanceDB embedded (zero-config)
|
|
18
|
+
- [x] Chunker: message-pair splitting, paragraph boundaries, code blocks
|
|
19
|
+
- [x] Retriever: vector search + recency/filepath boosts + dedup + confidence gate
|
|
20
|
+
- [x] Token budget allocator (tiered: T0/T1/T2/T3)
|
|
21
|
+
- [x] Context optimizer (orchestrator)
|
|
22
|
+
- [x] Metrics collector (per-request + aggregate)
|
|
23
|
+
- [x] Graceful degradation (any failure → pass-through)
|
|
24
|
+
- [x] Async post-indexing
|
|
25
|
+
|
|
26
|
+
## Phase 3: Dashboard, Daemon & Hardening ✅
|
|
27
|
+
- [x] Web dashboard at localhost:4800 (dark theme, auto-refresh)
|
|
28
|
+
- [x] REST API: /_sc/status, stats, feed, pause, resume
|
|
29
|
+
- [x] Daemon mode: start/stop/restart with PID management
|
|
30
|
+
- [x] Service installer: LaunchAgent (macOS) + systemd (Linux)
|
|
31
|
+
- [x] Pause/resume optimization
|
|
32
|
+
- [x] Debug headers (X-SmartContext-*)
|
|
33
|
+
|
|
34
|
+
## Phase 4: OpenClaw Adapter + Release ✅
|
|
35
|
+
- [x] OpenClaw storage adapter (OpenSearch)
|
|
36
|
+
- [x] OpenClaw embedding adapter (Beast Ollama)
|
|
37
|
+
- [x] Session importer (OC gateway JSONL logs)
|
|
38
|
+
- [x] Auto-discovery from ~/.openclaw/ config
|
|
39
|
+
- [x] README with quick-start
|
|
40
|
+
- [x] Apache 2.0 license
|
|
41
|
+
|
|
42
|
+
## Test Results: 23/23 passing
|
|
43
|
+
- Chunker: 6 tests (token estimation, exchange pairs, long splits, code blocks, file paths, unique IDs)
|
|
44
|
+
- Budget: 3 tests (model limits, packing, empty retrieval)
|
|
45
|
+
- Metrics: 1 test (recording + aggregation)
|
|
46
|
+
- Dashboard & API: 6 tests (HTML, status, stats, pause/resume, feed, 404)
|
|
47
|
+
- Proxy: 7 tests (health, routing, 404/405, Anthropic sync/stream, OpenAI, config)
|
|
48
|
+
|
|
49
|
+
## Remaining (post-MVP)
|
|
50
|
+
- [ ] Google adapter full implementation
|
|
51
|
+
- [ ] OpenRouter adapter
|
|
52
|
+
- [ ] A/B test mode
|
|
53
|
+
- [ ] LLM-assisted diagnostics + auto-tune
|
|
54
|
+
- [ ] ONNX embedding fallback (for users without Ollama)
|
|
55
|
+
- [ ] Config file management (read/write ~/.smartcontext/config.json)
|
|
56
|
+
- [ ] Filesystem storage adapter fallback
|
|
57
|
+
- [ ] WebSocket live feed
|
|
58
|
+
- [ ] npm publish
|
|
59
|
+
- [ ] Benchmark: 10 CC sessions
|
|
60
|
+
- [ ] Dashboard SmartContext tab in dashboard-ts
|
package/README.md
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# SmartContext Proxy
|
|
2
|
+
|
|
3
|
+
Intelligent context window optimization proxy for LLM APIs. Sits between your client and LLM providers, dynamically replacing bloated conversation history with relevant context — saving 40-70% on token costs.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npx smartcontext-proxy
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
That's it. SmartContext auto-detects your providers from env vars and starts proxying.
|
|
12
|
+
|
|
13
|
+
### Client Integration
|
|
14
|
+
|
|
15
|
+
Change one env var:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Anthropic
|
|
19
|
+
ANTHROPIC_API_URL=http://localhost:4800/v1/anthropic
|
|
20
|
+
|
|
21
|
+
# OpenAI
|
|
22
|
+
OPENAI_BASE_URL=http://localhost:4800/v1/openai
|
|
23
|
+
|
|
24
|
+
# Ollama
|
|
25
|
+
OLLAMA_HOST=http://localhost:4800/v1/ollama
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## How It Works
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
Client App ──► SmartContext Proxy ──► LLM Provider
|
|
32
|
+
(unchanged) (intercept+optimize) (any provider)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
SmartContext operates like a network firewall — the client and provider don't know it exists. It intercepts conversations, replaces growing history with optimized context (recent exchanges + semantically retrieved chunks), and forwards transparently.
|
|
36
|
+
|
|
37
|
+
### Tiered Context Strategy
|
|
38
|
+
|
|
39
|
+
| Tier | What | Source |
|
|
40
|
+
|------|------|--------|
|
|
41
|
+
| T0 | System prompt | Kept stable (KV-cache friendly) |
|
|
42
|
+
| T1 | Last 3 exchanges | Verbatim from request |
|
|
43
|
+
| T2 | Relevant context | Vector search retrieval |
|
|
44
|
+
| T3 | Summaries | Pre-computed session summaries |
|
|
45
|
+
|
|
46
|
+
### Key Features
|
|
47
|
+
|
|
48
|
+
- **Zero-config**: Auto-detects providers, embeddings, and storage
|
|
49
|
+
- **Provider-agnostic**: Anthropic, OpenAI, Google, Ollama, OpenRouter
|
|
50
|
+
- **SSE streaming**: Zero-latency pass-through
|
|
51
|
+
- **Web dashboard**: Real-time stats at `localhost:4800`
|
|
52
|
+
- **Graceful degradation**: Any failure → transparent pass-through
|
|
53
|
+
- **Daemon mode**: `start`/`stop`/`restart` + system service
|
|
54
|
+
|
|
55
|
+
## CLI
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
npx smartcontext-proxy # Start (foreground)
|
|
59
|
+
npx smartcontext-proxy start # Start daemon
|
|
60
|
+
npx smartcontext-proxy stop # Stop daemon
|
|
61
|
+
npx smartcontext-proxy restart # Restart
|
|
62
|
+
npx smartcontext-proxy status # Check status
|
|
63
|
+
npx smartcontext-proxy install-service # Auto-start on boot
|
|
64
|
+
npx smartcontext-proxy --port 8080 # Custom port
|
|
65
|
+
npx smartcontext-proxy --no-optimize # Transparent proxy only
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## API
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
GET /health Health check
|
|
72
|
+
GET / Web dashboard
|
|
73
|
+
GET /_sc/status Proxy status
|
|
74
|
+
GET /_sc/stats Aggregate metrics
|
|
75
|
+
GET /_sc/feed Recent requests
|
|
76
|
+
POST /_sc/pause Pause optimization
|
|
77
|
+
POST /_sc/resume Resume optimization
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Architecture
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
smartcontext-proxy/
|
|
84
|
+
├── src/
|
|
85
|
+
│ ├── index.ts # CLI + entry point
|
|
86
|
+
│ ├── proxy/ # HTTP proxy, router, SSE streaming
|
|
87
|
+
│ ├── providers/ # Anthropic, OpenAI, Ollama, Google adapters
|
|
88
|
+
│ ├── context/ # Optimizer, chunker, retriever, budget
|
|
89
|
+
│ ├── embedding/ # Ollama embedding adapter
|
|
90
|
+
│ ├── storage/ # LanceDB storage adapter
|
|
91
|
+
│ ├── metrics/ # Request metrics collector
|
|
92
|
+
│ ├── ui/ # Web dashboard (inline HTML/CSS/JS)
|
|
93
|
+
│ └── daemon/ # Process management, service installer
|
|
94
|
+
└── adapters/openclaw/ # OpenClaw-specific adapter
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## License
|
|
98
|
+
|
|
99
|
+
Apache 2.0
|