smartcontext-proxy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PLAN.md +406 -0
- package/PROGRESS.md +60 -0
- package/README.md +99 -0
- package/SPEC.md +915 -0
- package/adapters/openclaw/embedding.d.ts +8 -0
- package/adapters/openclaw/embedding.js +16 -0
- package/adapters/openclaw/embedding.ts +15 -0
- package/adapters/openclaw/index.d.ts +18 -0
- package/adapters/openclaw/index.js +42 -0
- package/adapters/openclaw/index.ts +43 -0
- package/adapters/openclaw/session-importer.d.ts +22 -0
- package/adapters/openclaw/session-importer.js +99 -0
- package/adapters/openclaw/session-importer.ts +105 -0
- package/adapters/openclaw/storage.d.ts +26 -0
- package/adapters/openclaw/storage.js +177 -0
- package/adapters/openclaw/storage.ts +183 -0
- package/dist/adapters/openclaw/embedding.d.ts +8 -0
- package/dist/adapters/openclaw/embedding.js +16 -0
- package/dist/adapters/openclaw/index.d.ts +18 -0
- package/dist/adapters/openclaw/index.js +42 -0
- package/dist/adapters/openclaw/session-importer.d.ts +22 -0
- package/dist/adapters/openclaw/session-importer.js +99 -0
- package/dist/adapters/openclaw/storage.d.ts +26 -0
- package/dist/adapters/openclaw/storage.js +177 -0
- package/dist/config/auto-detect.d.ts +3 -0
- package/dist/config/auto-detect.js +48 -0
- package/dist/config/defaults.d.ts +2 -0
- package/dist/config/defaults.js +28 -0
- package/dist/config/schema.d.ts +30 -0
- package/dist/config/schema.js +3 -0
- package/dist/context/budget.d.ts +25 -0
- package/dist/context/budget.js +85 -0
- package/dist/context/canonical.d.ts +39 -0
- package/dist/context/canonical.js +12 -0
- package/dist/context/chunker.d.ts +9 -0
- package/dist/context/chunker.js +148 -0
- package/dist/context/optimizer.d.ts +31 -0
- package/dist/context/optimizer.js +163 -0
- package/dist/context/retriever.d.ts +29 -0
- package/dist/context/retriever.js +103 -0
- package/dist/daemon/process.d.ts +6 -0
- package/dist/daemon/process.js +76 -0
- package/dist/daemon/service.d.ts +2 -0
- package/dist/daemon/service.js +99 -0
- package/dist/embedding/ollama.d.ts +11 -0
- package/dist/embedding/ollama.js +72 -0
- package/dist/embedding/types.d.ts +6 -0
- package/dist/embedding/types.js +3 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +190 -0
- package/dist/metrics/collector.d.ts +43 -0
- package/dist/metrics/collector.js +72 -0
- package/dist/providers/anthropic.d.ts +15 -0
- package/dist/providers/anthropic.js +109 -0
- package/dist/providers/google.d.ts +13 -0
- package/dist/providers/google.js +40 -0
- package/dist/providers/ollama.d.ts +13 -0
- package/dist/providers/ollama.js +82 -0
- package/dist/providers/openai.d.ts +15 -0
- package/dist/providers/openai.js +115 -0
- package/dist/providers/types.d.ts +18 -0
- package/dist/providers/types.js +3 -0
- package/dist/proxy/router.d.ts +12 -0
- package/dist/proxy/router.js +46 -0
- package/dist/proxy/server.d.ts +25 -0
- package/dist/proxy/server.js +265 -0
- package/dist/proxy/stream.d.ts +8 -0
- package/dist/proxy/stream.js +32 -0
- package/dist/src/config/auto-detect.d.ts +3 -0
- package/dist/src/config/auto-detect.js +48 -0
- package/dist/src/config/defaults.d.ts +2 -0
- package/dist/src/config/defaults.js +28 -0
- package/dist/src/config/schema.d.ts +30 -0
- package/dist/src/config/schema.js +3 -0
- package/dist/src/context/budget.d.ts +25 -0
- package/dist/src/context/budget.js +85 -0
- package/dist/src/context/canonical.d.ts +39 -0
- package/dist/src/context/canonical.js +12 -0
- package/dist/src/context/chunker.d.ts +9 -0
- package/dist/src/context/chunker.js +148 -0
- package/dist/src/context/optimizer.d.ts +31 -0
- package/dist/src/context/optimizer.js +163 -0
- package/dist/src/context/retriever.d.ts +29 -0
- package/dist/src/context/retriever.js +103 -0
- package/dist/src/daemon/process.d.ts +6 -0
- package/dist/src/daemon/process.js +76 -0
- package/dist/src/daemon/service.d.ts +2 -0
- package/dist/src/daemon/service.js +99 -0
- package/dist/src/embedding/ollama.d.ts +11 -0
- package/dist/src/embedding/ollama.js +72 -0
- package/dist/src/embedding/types.d.ts +6 -0
- package/dist/src/embedding/types.js +3 -0
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.js +190 -0
- package/dist/src/metrics/collector.d.ts +43 -0
- package/dist/src/metrics/collector.js +72 -0
- package/dist/src/providers/anthropic.d.ts +15 -0
- package/dist/src/providers/anthropic.js +109 -0
- package/dist/src/providers/google.d.ts +13 -0
- package/dist/src/providers/google.js +40 -0
- package/dist/src/providers/ollama.d.ts +13 -0
- package/dist/src/providers/ollama.js +82 -0
- package/dist/src/providers/openai.d.ts +15 -0
- package/dist/src/providers/openai.js +115 -0
- package/dist/src/providers/types.d.ts +18 -0
- package/dist/src/providers/types.js +3 -0
- package/dist/src/proxy/router.d.ts +12 -0
- package/dist/src/proxy/router.js +46 -0
- package/dist/src/proxy/server.d.ts +25 -0
- package/dist/src/proxy/server.js +265 -0
- package/dist/src/proxy/stream.d.ts +8 -0
- package/dist/src/proxy/stream.js +32 -0
- package/dist/src/storage/lancedb.d.ts +21 -0
- package/dist/src/storage/lancedb.js +158 -0
- package/dist/src/storage/types.d.ts +52 -0
- package/dist/src/storage/types.js +3 -0
- package/dist/src/test/context.test.d.ts +1 -0
- package/dist/src/test/context.test.js +141 -0
- package/dist/src/test/dashboard.test.d.ts +1 -0
- package/dist/src/test/dashboard.test.js +85 -0
- package/dist/src/test/proxy.test.d.ts +1 -0
- package/dist/src/test/proxy.test.js +188 -0
- package/dist/src/ui/dashboard.d.ts +2 -0
- package/dist/src/ui/dashboard.js +183 -0
- package/dist/storage/lancedb.d.ts +21 -0
- package/dist/storage/lancedb.js +158 -0
- package/dist/storage/types.d.ts +52 -0
- package/dist/storage/types.js +3 -0
- package/dist/test/context.test.d.ts +1 -0
- package/dist/test/context.test.js +141 -0
- package/dist/test/dashboard.test.d.ts +1 -0
- package/dist/test/dashboard.test.js +85 -0
- package/dist/test/proxy.test.d.ts +1 -0
- package/dist/test/proxy.test.js +188 -0
- package/dist/ui/dashboard.d.ts +2 -0
- package/dist/ui/dashboard.js +183 -0
- package/package.json +38 -0
- package/src/config/auto-detect.ts +51 -0
- package/src/config/defaults.ts +26 -0
- package/src/config/schema.ts +33 -0
- package/src/context/budget.ts +126 -0
- package/src/context/canonical.ts +50 -0
- package/src/context/chunker.ts +165 -0
- package/src/context/optimizer.ts +201 -0
- package/src/context/retriever.ts +123 -0
- package/src/daemon/process.ts +70 -0
- package/src/daemon/service.ts +103 -0
- package/src/embedding/ollama.ts +68 -0
- package/src/embedding/types.ts +6 -0
- package/src/index.ts +176 -0
- package/src/metrics/collector.ts +114 -0
- package/src/providers/anthropic.ts +117 -0
- package/src/providers/google.ts +42 -0
- package/src/providers/ollama.ts +87 -0
- package/src/providers/openai.ts +127 -0
- package/src/providers/types.ts +20 -0
- package/src/proxy/router.ts +48 -0
- package/src/proxy/server.ts +315 -0
- package/src/proxy/stream.ts +39 -0
- package/src/storage/lancedb.ts +169 -0
- package/src/storage/types.ts +47 -0
- package/src/test/context.test.ts +165 -0
- package/src/test/dashboard.test.ts +94 -0
- package/src/test/proxy.test.ts +218 -0
- package/src/ui/dashboard.ts +184 -0
- package/tsconfig.json +18 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import type { EmbeddingAdapter } from './types.js';
|
|
2
|
+
import http from 'node:http';
|
|
3
|
+
import https from 'node:https';
|
|
4
|
+
import { URL } from 'node:url';
|
|
5
|
+
|
|
6
|
+
export class OllamaEmbeddingAdapter implements EmbeddingAdapter {
|
|
7
|
+
name = 'ollama';
|
|
8
|
+
dimensions = 768; // nomic-embed-text default
|
|
9
|
+
|
|
10
|
+
constructor(
|
|
11
|
+
private url: string = 'http://localhost:11434',
|
|
12
|
+
private model: string = 'nomic-embed-text',
|
|
13
|
+
) {}
|
|
14
|
+
|
|
15
|
+
async initialize(): Promise<void> {
|
|
16
|
+
// Verify Ollama is reachable and model exists
|
|
17
|
+
try {
|
|
18
|
+
await this.embed(['test']);
|
|
19
|
+
} catch (err) {
|
|
20
|
+
throw new Error(`Ollama embedding unavailable at ${this.url}: ${err}`);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async embed(texts: string[]): Promise<number[][]> {
|
|
25
|
+
const results: number[][] = [];
|
|
26
|
+
|
|
27
|
+
for (const text of texts) {
|
|
28
|
+
const embedding = await this.embedSingle(text);
|
|
29
|
+
results.push(embedding);
|
|
30
|
+
if (embedding.length !== this.dimensions) {
|
|
31
|
+
this.dimensions = embedding.length; // auto-detect dimensions
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return results;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
private async embedSingle(text: string): Promise<number[]> {
|
|
39
|
+
const parsed = new URL(`${this.url}/api/embed`);
|
|
40
|
+
const transport = parsed.protocol === 'https:' ? https : http;
|
|
41
|
+
|
|
42
|
+
const body = JSON.stringify({ model: this.model, input: text });
|
|
43
|
+
|
|
44
|
+
return new Promise((resolve, reject) => {
|
|
45
|
+
const req = transport.request(parsed, { method: 'POST', headers: { 'Content-Type': 'application/json' } }, (res) => {
|
|
46
|
+
let data = '';
|
|
47
|
+
res.on('data', (chunk) => (data += chunk));
|
|
48
|
+
res.on('end', () => {
|
|
49
|
+
try {
|
|
50
|
+
const parsed = JSON.parse(data);
|
|
51
|
+
if (parsed.embeddings?.[0]) {
|
|
52
|
+
resolve(parsed.embeddings[0]);
|
|
53
|
+
} else if (parsed.embedding) {
|
|
54
|
+
resolve(parsed.embedding);
|
|
55
|
+
} else {
|
|
56
|
+
reject(new Error(`Unexpected Ollama response: ${data.slice(0, 200)}`));
|
|
57
|
+
}
|
|
58
|
+
} catch (e) {
|
|
59
|
+
reject(new Error(`Failed to parse Ollama response: ${e}`));
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
req.on('error', reject);
|
|
64
|
+
req.write(body);
|
|
65
|
+
req.end();
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { buildConfig } from './config/auto-detect.js';
|
|
4
|
+
import { ProxyServer } from './proxy/server.js';
|
|
5
|
+
import { OllamaEmbeddingAdapter } from './embedding/ollama.js';
|
|
6
|
+
import { LanceDBAdapter } from './storage/lancedb.js';
|
|
7
|
+
import type { EmbeddingAdapter } from './embedding/types.js';
|
|
8
|
+
import type { StorageAdapter } from './storage/types.js';
|
|
9
|
+
import { writePid, removePid, getPid, stopDaemon, startDaemon, isDaemonChild } from './daemon/process.js';
|
|
10
|
+
import { installService, uninstallService } from './daemon/service.js';
|
|
11
|
+
import http from 'node:http';
|
|
12
|
+
|
|
13
|
+
const VERSION = '0.1.0';
|
|
14
|
+
|
|
15
|
+
function parseArgs(args: string[]): Record<string, string | boolean> {
|
|
16
|
+
const result: Record<string, string | boolean> = {};
|
|
17
|
+
for (let i = 0; i < args.length; i++) {
|
|
18
|
+
const arg = args[i];
|
|
19
|
+
if (arg === '--port' || arg === '-p') result.port = args[++i];
|
|
20
|
+
else if (arg === '--config' || arg === '-c') result.config = args[++i];
|
|
21
|
+
else if (arg === '--help' || arg === '-h') result.help = true;
|
|
22
|
+
else if (arg === '--version' || arg === '-v') result.version = true;
|
|
23
|
+
else if (arg === '--no-optimize') result.noOptimize = true;
|
|
24
|
+
else if (arg === '--embedding-url') result.embeddingUrl = args[++i];
|
|
25
|
+
else if (arg === '--embedding-model') result.embeddingModel = args[++i];
|
|
26
|
+
else if (arg === '--data-dir') result.dataDir = args[++i];
|
|
27
|
+
else if (!arg.startsWith('-')) result.command = arg;
|
|
28
|
+
}
|
|
29
|
+
return result;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function printHelp(): void {
|
|
33
|
+
console.log(`
|
|
34
|
+
SmartContext Proxy v${VERSION}
|
|
35
|
+
Intelligent context window optimization for LLM APIs
|
|
36
|
+
|
|
37
|
+
Usage:
|
|
38
|
+
smartcontext-proxy [options]
|
|
39
|
+
smartcontext-proxy status Show proxy status
|
|
40
|
+
|
|
41
|
+
Options:
|
|
42
|
+
--port, -p <port> Proxy port (default: 4800)
|
|
43
|
+
--config, -c <file> Config file path
|
|
44
|
+
--no-optimize Run in transparent proxy mode (no context optimization)
|
|
45
|
+
--embedding-url <url> Ollama URL for embeddings (default: http://localhost:11434)
|
|
46
|
+
--embedding-model <model> Embedding model (default: nomic-embed-text)
|
|
47
|
+
--data-dir <path> Data directory (default: ~/.smartcontext/data)
|
|
48
|
+
--help, -h Show help
|
|
49
|
+
--version, -v Show version
|
|
50
|
+
|
|
51
|
+
Client Integration:
|
|
52
|
+
ANTHROPIC_API_URL=http://localhost:4800/v1/anthropic
|
|
53
|
+
OPENAI_BASE_URL=http://localhost:4800/v1/openai
|
|
54
|
+
OLLAMA_HOST=http://localhost:4800/v1/ollama
|
|
55
|
+
|
|
56
|
+
API:
|
|
57
|
+
GET /health Health check
|
|
58
|
+
GET /_sc/status Proxy status
|
|
59
|
+
GET /_sc/stats Aggregate metrics
|
|
60
|
+
GET /_sc/feed Recent requests
|
|
61
|
+
POST /_sc/pause Pause optimization
|
|
62
|
+
POST /_sc/resume Resume optimization
|
|
63
|
+
`);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async function showStatus(port: number): Promise<void> {
|
|
67
|
+
return new Promise((resolve) => {
|
|
68
|
+
http.get(`http://127.0.0.1:${port}/_sc/status`, (res) => {
|
|
69
|
+
let data = '';
|
|
70
|
+
res.on('data', (chunk) => (data += chunk));
|
|
71
|
+
res.on('end', () => {
|
|
72
|
+
try {
|
|
73
|
+
const status = JSON.parse(data);
|
|
74
|
+
console.log(`SmartContext Proxy: ${status.state}`);
|
|
75
|
+
console.log(` Uptime: ${Math.round(status.uptime / 1000)}s`);
|
|
76
|
+
console.log(` Requests: ${status.requests}`);
|
|
77
|
+
console.log(` Mode: ${status.mode}`);
|
|
78
|
+
} catch {
|
|
79
|
+
console.log('Could not parse status response');
|
|
80
|
+
}
|
|
81
|
+
resolve();
|
|
82
|
+
});
|
|
83
|
+
}).on('error', () => {
|
|
84
|
+
console.log(`SmartContext Proxy: not running on port ${port}`);
|
|
85
|
+
resolve();
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async function main(): Promise<void> {
|
|
91
|
+
const args = parseArgs(process.argv.slice(2));
|
|
92
|
+
|
|
93
|
+
if (args.version) { console.log(VERSION); return; }
|
|
94
|
+
if (args.help) { printHelp(); return; }
|
|
95
|
+
|
|
96
|
+
const port = args.port ? parseInt(args.port as string, 10) : 4800;
|
|
97
|
+
|
|
98
|
+
if (args.command === 'status') { await showStatus(port); return; }
|
|
99
|
+
if (args.command === 'stop') { stopDaemon(); return; }
|
|
100
|
+
if (args.command === 'start') { startDaemon(process.argv.slice(3)); return; }
|
|
101
|
+
if (args.command === 'restart') { stopDaemon(); await new Promise(r => setTimeout(r, 1000)); startDaemon(process.argv.slice(3)); return; }
|
|
102
|
+
if (args.command === 'install-service') {
|
|
103
|
+
const path = installService(port);
|
|
104
|
+
console.log(`Service installed: ${path}`);
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
if (args.command === 'uninstall-service') {
|
|
108
|
+
console.log(uninstallService());
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const config = buildConfig({
|
|
113
|
+
proxy: { port, host: '127.0.0.1' },
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
// Initialize embedding and storage (unless --no-optimize)
|
|
117
|
+
let embedding: EmbeddingAdapter | undefined;
|
|
118
|
+
let storage: StorageAdapter | undefined;
|
|
119
|
+
|
|
120
|
+
if (!args.noOptimize) {
|
|
121
|
+
try {
|
|
122
|
+
const embeddingUrl = (args.embeddingUrl as string) || process.env['OLLAMA_HOST'] || 'http://localhost:11434';
|
|
123
|
+
const embeddingModel = (args.embeddingModel as string) || 'nomic-embed-text';
|
|
124
|
+
const dataDir = args.dataDir as string | undefined;
|
|
125
|
+
|
|
126
|
+
embedding = new OllamaEmbeddingAdapter(embeddingUrl, embeddingModel);
|
|
127
|
+
await embedding.initialize();
|
|
128
|
+
|
|
129
|
+
storage = new LanceDBAdapter(dataDir);
|
|
130
|
+
await storage.initialize();
|
|
131
|
+
|
|
132
|
+
console.log(` Embedding: ${embeddingModel} @ ${embeddingUrl}`);
|
|
133
|
+
console.log(` Storage: LanceDB`);
|
|
134
|
+
} catch (err) {
|
|
135
|
+
console.log(` Optimization unavailable: ${err}`);
|
|
136
|
+
console.log(` Running in transparent proxy mode`);
|
|
137
|
+
embedding = undefined;
|
|
138
|
+
storage = undefined;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const server = new ProxyServer(config, embedding, storage);
|
|
143
|
+
const providers = server.getProviderNames();
|
|
144
|
+
const mode = embedding && storage ? 'optimizing' : 'transparent';
|
|
145
|
+
|
|
146
|
+
await server.start();
|
|
147
|
+
|
|
148
|
+
console.log(`
|
|
149
|
+
┌─────────────────────────────────────────────┐
|
|
150
|
+
│ SmartContext Proxy v${VERSION} │
|
|
151
|
+
│ http://${config.proxy.host}:${config.proxy.port} │
|
|
152
|
+
│ │
|
|
153
|
+
│ Providers: ${providers.join(', ').padEnd(31)}│
|
|
154
|
+
│ Mode: ${mode.padEnd(36)}│
|
|
155
|
+
└─────────────────────────────────────────────┘
|
|
156
|
+
`);
|
|
157
|
+
|
|
158
|
+
// Write PID file
|
|
159
|
+
writePid();
|
|
160
|
+
|
|
161
|
+
const shutdown = async () => {
|
|
162
|
+
console.log('\nShutting down...');
|
|
163
|
+
removePid();
|
|
164
|
+
await server.stop();
|
|
165
|
+
if (storage) await storage.close();
|
|
166
|
+
process.exit(0);
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
process.on('SIGINT', shutdown);
|
|
170
|
+
process.on('SIGTERM', shutdown);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
main().catch((err) => {
|
|
174
|
+
console.error('Fatal:', err);
|
|
175
|
+
process.exit(1);
|
|
176
|
+
});
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
export interface RequestMetric {
|
|
2
|
+
id: number;
|
|
3
|
+
timestamp: number;
|
|
4
|
+
provider: string;
|
|
5
|
+
model: string;
|
|
6
|
+
streaming: boolean;
|
|
7
|
+
originalTokens: number;
|
|
8
|
+
optimizedTokens: number;
|
|
9
|
+
savingsPercent: number;
|
|
10
|
+
latencyOverheadMs: number;
|
|
11
|
+
chunksRetrieved: number;
|
|
12
|
+
topScore: number;
|
|
13
|
+
passThrough: boolean;
|
|
14
|
+
reason?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface AggregateStats {
|
|
18
|
+
totalRequests: number;
|
|
19
|
+
totalOriginalTokens: number;
|
|
20
|
+
totalOptimizedTokens: number;
|
|
21
|
+
totalSavingsPercent: number;
|
|
22
|
+
avgLatencyOverheadMs: number;
|
|
23
|
+
avgChunksRetrieved: number;
|
|
24
|
+
byProvider: Record<string, ProviderStats>;
|
|
25
|
+
byModel: Record<string, ModelStats>;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface ProviderStats {
|
|
29
|
+
requests: number;
|
|
30
|
+
tokensSaved: number;
|
|
31
|
+
savingsPercent: number;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface ModelStats {
|
|
35
|
+
requests: number;
|
|
36
|
+
tokensSaved: number;
|
|
37
|
+
savingsPercent: number;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export class MetricsCollector {
|
|
41
|
+
private metrics: RequestMetric[] = [];
|
|
42
|
+
private startTime = Date.now();
|
|
43
|
+
|
|
44
|
+
record(metric: RequestMetric): void {
|
|
45
|
+
this.metrics.push(metric);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
getRecent(limit: number = 50): RequestMetric[] {
|
|
49
|
+
return this.metrics.slice(-limit);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
getStats(): AggregateStats {
|
|
53
|
+
const total = this.metrics.length;
|
|
54
|
+
if (total === 0) {
|
|
55
|
+
return {
|
|
56
|
+
totalRequests: 0,
|
|
57
|
+
totalOriginalTokens: 0,
|
|
58
|
+
totalOptimizedTokens: 0,
|
|
59
|
+
totalSavingsPercent: 0,
|
|
60
|
+
avgLatencyOverheadMs: 0,
|
|
61
|
+
avgChunksRetrieved: 0,
|
|
62
|
+
byProvider: {},
|
|
63
|
+
byModel: {},
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
let totalOriginal = 0;
|
|
68
|
+
let totalOptimized = 0;
|
|
69
|
+
let totalLatency = 0;
|
|
70
|
+
let totalChunks = 0;
|
|
71
|
+
const byProvider: Record<string, ProviderStats> = {};
|
|
72
|
+
const byModel: Record<string, ModelStats> = {};
|
|
73
|
+
|
|
74
|
+
for (const m of this.metrics) {
|
|
75
|
+
totalOriginal += m.originalTokens;
|
|
76
|
+
totalOptimized += m.optimizedTokens;
|
|
77
|
+
totalLatency += m.latencyOverheadMs;
|
|
78
|
+
totalChunks += m.chunksRetrieved;
|
|
79
|
+
|
|
80
|
+
// By provider
|
|
81
|
+
if (!byProvider[m.provider]) byProvider[m.provider] = { requests: 0, tokensSaved: 0, savingsPercent: 0 };
|
|
82
|
+
byProvider[m.provider].requests++;
|
|
83
|
+
byProvider[m.provider].tokensSaved += m.originalTokens - m.optimizedTokens;
|
|
84
|
+
|
|
85
|
+
// By model
|
|
86
|
+
if (!byModel[m.model]) byModel[m.model] = { requests: 0, tokensSaved: 0, savingsPercent: 0 };
|
|
87
|
+
byModel[m.model].requests++;
|
|
88
|
+
byModel[m.model].tokensSaved += m.originalTokens - m.optimizedTokens;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Calculate percentages
|
|
92
|
+
for (const stats of Object.values(byProvider)) {
|
|
93
|
+
stats.savingsPercent = totalOriginal > 0 ? Math.round((stats.tokensSaved / totalOriginal) * 100) : 0;
|
|
94
|
+
}
|
|
95
|
+
for (const stats of Object.values(byModel)) {
|
|
96
|
+
stats.savingsPercent = totalOriginal > 0 ? Math.round((stats.tokensSaved / totalOriginal) * 100) : 0;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
totalRequests: total,
|
|
101
|
+
totalOriginalTokens: totalOriginal,
|
|
102
|
+
totalOptimizedTokens: totalOptimized,
|
|
103
|
+
totalSavingsPercent: totalOriginal > 0 ? Math.round((1 - totalOptimized / totalOriginal) * 100) : 0,
|
|
104
|
+
avgLatencyOverheadMs: Math.round(totalLatency / total),
|
|
105
|
+
avgChunksRetrieved: Math.round(totalChunks / total * 10) / 10,
|
|
106
|
+
byProvider,
|
|
107
|
+
byModel,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
getUptime(): number {
|
|
112
|
+
return Date.now() - this.startTime;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import type { ProviderAdapter } from './types.js';
|
|
2
|
+
import type { CanonicalMessage, CanonicalRequest, ContentBlock } from '../context/canonical.js';
|
|
3
|
+
|
|
4
|
+
export class AnthropicAdapter implements ProviderAdapter {
|
|
5
|
+
name = 'anthropic';
|
|
6
|
+
|
|
7
|
+
constructor(public baseUrl: string = 'https://api.anthropic.com') {}
|
|
8
|
+
|
|
9
|
+
parseRequest(body: unknown, headers: Record<string, string>): CanonicalRequest {
|
|
10
|
+
const b = body as Record<string, unknown>;
|
|
11
|
+
const messages: CanonicalMessage[] = [];
|
|
12
|
+
let systemPrompt: string | undefined;
|
|
13
|
+
|
|
14
|
+
// Extract system prompt
|
|
15
|
+
if (typeof b.system === 'string') {
|
|
16
|
+
systemPrompt = b.system;
|
|
17
|
+
} else if (Array.isArray(b.system)) {
|
|
18
|
+
systemPrompt = (b.system as Array<{ text?: string }>)
|
|
19
|
+
.map((s) => s.text || '')
|
|
20
|
+
.join('\n');
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Convert messages
|
|
24
|
+
if (Array.isArray(b.messages)) {
|
|
25
|
+
for (const msg of b.messages as Array<Record<string, unknown>>) {
|
|
26
|
+
messages.push(this.parseMessage(msg));
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
messages,
|
|
32
|
+
systemPrompt,
|
|
33
|
+
model: (b.model as string) || 'unknown',
|
|
34
|
+
stream: !!b.stream,
|
|
35
|
+
maxTokens: b.max_tokens as number | undefined,
|
|
36
|
+
temperature: b.temperature as number | undefined,
|
|
37
|
+
tools: b.tools as unknown[] | undefined,
|
|
38
|
+
rawHeaders: headers,
|
|
39
|
+
providerAuth: this.extractApiKey(headers),
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
serializeRequest(canonical: CanonicalRequest): unknown {
|
|
44
|
+
const body: Record<string, unknown> = {
|
|
45
|
+
model: canonical.model,
|
|
46
|
+
messages: canonical.messages.map((m) => this.serializeMessage(m)),
|
|
47
|
+
stream: canonical.stream,
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
if (canonical.systemPrompt) {
|
|
51
|
+
body.system = canonical.systemPrompt;
|
|
52
|
+
}
|
|
53
|
+
if (canonical.maxTokens) {
|
|
54
|
+
body.max_tokens = canonical.maxTokens;
|
|
55
|
+
}
|
|
56
|
+
if (canonical.temperature !== undefined) {
|
|
57
|
+
body.temperature = canonical.temperature;
|
|
58
|
+
}
|
|
59
|
+
if (canonical.tools) {
|
|
60
|
+
body.tools = canonical.tools;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return body;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
forwardUrl(originalPath: string): string {
|
|
67
|
+
// /v1/anthropic/v1/messages → https://api.anthropic.com/v1/messages
|
|
68
|
+
const stripped = originalPath.replace(/^\/v1\/anthropic/, '');
|
|
69
|
+
return `${this.baseUrl}${stripped}`;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
extractApiKey(headers: Record<string, string>): string {
|
|
73
|
+
return headers['x-api-key'] || '';
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
contentType(): string {
|
|
77
|
+
return 'application/json';
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
authHeaders(apiKey: string): Record<string, string> {
|
|
81
|
+
return {
|
|
82
|
+
'x-api-key': apiKey,
|
|
83
|
+
'anthropic-version': '2023-06-01',
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
private parseMessage(msg: Record<string, unknown>): CanonicalMessage {
|
|
88
|
+
const role = msg.role as string;
|
|
89
|
+
let content: string | ContentBlock[];
|
|
90
|
+
|
|
91
|
+
if (typeof msg.content === 'string') {
|
|
92
|
+
content = msg.content;
|
|
93
|
+
} else if (Array.isArray(msg.content)) {
|
|
94
|
+
content = (msg.content as Array<Record<string, unknown>>).map((block) => {
|
|
95
|
+
if (block.type === 'text') {
|
|
96
|
+
return { type: 'text' as const, text: block.text as string };
|
|
97
|
+
}
|
|
98
|
+
// Pass through tool_use, tool_result, image blocks as-is
|
|
99
|
+
return { ...block, type: block.type as ContentBlock['type'] };
|
|
100
|
+
});
|
|
101
|
+
} else {
|
|
102
|
+
content = '';
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
role: role as CanonicalMessage['role'],
|
|
107
|
+
content,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
private serializeMessage(msg: CanonicalMessage): Record<string, unknown> {
|
|
112
|
+
return {
|
|
113
|
+
role: msg.role,
|
|
114
|
+
content: msg.content,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { ProviderAdapter } from './types.js';
|
|
2
|
+
import type { CanonicalMessage, CanonicalRequest } from '../context/canonical.js';
|
|
3
|
+
|
|
4
|
+
export class GoogleAdapter implements ProviderAdapter {
|
|
5
|
+
name = 'google';
|
|
6
|
+
|
|
7
|
+
constructor(public baseUrl: string = 'https://generativelanguage.googleapis.com') {}
|
|
8
|
+
|
|
9
|
+
parseRequest(body: unknown, headers: Record<string, string>): CanonicalRequest {
|
|
10
|
+
// Stub: pass through as-is for Phase 1
|
|
11
|
+
const b = body as Record<string, unknown>;
|
|
12
|
+
return {
|
|
13
|
+
messages: [],
|
|
14
|
+
model: 'unknown',
|
|
15
|
+
stream: !!b.stream,
|
|
16
|
+
rawHeaders: headers,
|
|
17
|
+
providerAuth: this.extractApiKey(headers),
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
serializeRequest(canonical: CanonicalRequest): unknown {
|
|
22
|
+
// Stub: not implemented for Phase 1
|
|
23
|
+
return {};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
forwardUrl(originalPath: string): string {
|
|
27
|
+
const stripped = originalPath.replace(/^\/v1\/google/, '');
|
|
28
|
+
return `${this.baseUrl}${stripped}`;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
extractApiKey(headers: Record<string, string>): string {
|
|
32
|
+
return headers['x-goog-api-key'] || '';
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
contentType(): string {
|
|
36
|
+
return 'application/json';
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
authHeaders(apiKey: string): Record<string, string> {
|
|
40
|
+
return { 'x-goog-api-key': apiKey };
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import type { ProviderAdapter } from './types.js';
|
|
2
|
+
import type { CanonicalMessage, CanonicalRequest, ContentBlock } from '../context/canonical.js';
|
|
3
|
+
|
|
4
|
+
export class OllamaAdapter implements ProviderAdapter {
|
|
5
|
+
name = 'ollama';
|
|
6
|
+
|
|
7
|
+
constructor(public baseUrl: string = 'http://localhost:11434') {}
|
|
8
|
+
|
|
9
|
+
parseRequest(body: unknown, headers: Record<string, string>): CanonicalRequest {
|
|
10
|
+
const b = body as Record<string, unknown>;
|
|
11
|
+
const messages: CanonicalMessage[] = [];
|
|
12
|
+
let systemPrompt: string | undefined;
|
|
13
|
+
|
|
14
|
+
if (Array.isArray(b.messages)) {
|
|
15
|
+
for (const msg of b.messages as Array<Record<string, unknown>>) {
|
|
16
|
+
if (msg.role === 'system') {
|
|
17
|
+
systemPrompt = msg.content as string;
|
|
18
|
+
continue;
|
|
19
|
+
}
|
|
20
|
+
messages.push({
|
|
21
|
+
role: msg.role as CanonicalMessage['role'],
|
|
22
|
+
content: (msg.content as string) || '',
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
messages,
|
|
29
|
+
systemPrompt,
|
|
30
|
+
model: (b.model as string) || 'unknown',
|
|
31
|
+
stream: b.stream !== false, // Ollama streams by default
|
|
32
|
+
maxTokens: b.options ? (b.options as Record<string, unknown>).num_predict as number | undefined : undefined,
|
|
33
|
+
temperature: b.options ? (b.options as Record<string, unknown>).temperature as number | undefined : undefined,
|
|
34
|
+
rawHeaders: headers,
|
|
35
|
+
providerAuth: '',
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
serializeRequest(canonical: CanonicalRequest): unknown {
|
|
40
|
+
const messages: Array<Record<string, string>> = [];
|
|
41
|
+
|
|
42
|
+
if (canonical.systemPrompt) {
|
|
43
|
+
messages.push({ role: 'system', content: canonical.systemPrompt });
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
for (const msg of canonical.messages) {
|
|
47
|
+
messages.push({
|
|
48
|
+
role: msg.role,
|
|
49
|
+
content: typeof msg.content === 'string'
|
|
50
|
+
? msg.content
|
|
51
|
+
: (msg.content as ContentBlock[]).filter((b) => b.type === 'text').map((b) => b.text || '').join('\n'),
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const body: Record<string, unknown> = {
|
|
56
|
+
model: canonical.model,
|
|
57
|
+
messages,
|
|
58
|
+
stream: canonical.stream,
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
const options: Record<string, unknown> = {};
|
|
62
|
+
if (canonical.maxTokens) options.num_predict = canonical.maxTokens;
|
|
63
|
+
if (canonical.temperature !== undefined) options.temperature = canonical.temperature;
|
|
64
|
+
if (Object.keys(options).length > 0) body.options = options;
|
|
65
|
+
|
|
66
|
+
return body;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
forwardUrl(originalPath: string): string {
|
|
70
|
+
// /v1/ollama/api/chat → http://localhost:11434/api/chat
|
|
71
|
+
const stripped = originalPath.replace(/^\/v1\/ollama/, '');
|
|
72
|
+
return `${this.baseUrl}${stripped}`;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
extractApiKey(headers: Record<string, string>): string {
|
|
76
|
+
return headers['authorization']?.replace(/^Bearer\s+/i, '') || '';
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
contentType(): string {
|
|
80
|
+
return 'application/json';
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
authHeaders(apiKey: string): Record<string, string> {
|
|
84
|
+
if (!apiKey) return {};
|
|
85
|
+
return { 'Authorization': `Bearer ${apiKey}` };
|
|
86
|
+
}
|
|
87
|
+
}
|