@relayplane/proxy 1.8.37 → 1.8.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/budget.d.ts +39 -0
- package/dist/budget.d.ts.map +1 -1
- package/dist/budget.js +173 -0
- package/dist/budget.js.map +1 -1
- package/dist/cli.js +0 -0
- package/dist/config.d.ts +21 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js.map +1 -1
- package/dist/episode-writer.d.ts +28 -0
- package/dist/episode-writer.d.ts.map +1 -0
- package/dist/episode-writer.js +263 -0
- package/dist/episode-writer.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -1
- package/dist/index.js.map +1 -1
- package/dist/osmosis-store.d.ts +5 -0
- package/dist/osmosis-store.d.ts.map +1 -1
- package/dist/osmosis-store.js +83 -8
- package/dist/osmosis-store.js.map +1 -1
- package/dist/session-tracker.d.ts.map +1 -1
- package/dist/session-tracker.js +7 -1
- package/dist/session-tracker.js.map +1 -1
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +619 -24
- package/dist/standalone-proxy.js.map +1 -1
- package/dist/tool-router.d.ts +163 -0
- package/dist/tool-router.d.ts.map +1 -0
- package/dist/tool-router.js +528 -0
- package/dist/tool-router.js.map +1 -0
- package/dist/trace-writer.d.ts +155 -0
- package/dist/trace-writer.d.ts.map +1 -0
- package/dist/trace-writer.js +589 -0
- package/dist/trace-writer.js.map +1 -0
- package/package.json +14 -13
- package/LICENSE +0 -21
package/dist/standalone-proxy.js
CHANGED
|
@@ -94,7 +94,11 @@ const estimateRateMap = new Map();
|
|
|
94
94
|
// Without this, IPs that make one request and disappear stay in the map forever.
|
|
95
95
|
setInterval(() => (0, estimate_js_1.purgeExpiredRateLimitEntries)(estimateRateMap, Date.now()), 5 * 60 * 1000);
|
|
96
96
|
const osmosis_store_js_1 = require("./osmosis-store.js");
|
|
97
|
+
const episode_writer_js_1 = require("./episode-writer.js");
|
|
97
98
|
const session_tracker_js_1 = require("./session-tracker.js");
|
|
99
|
+
const trace_writer_js_1 = require("./trace-writer.js");
|
|
100
|
+
const tool_router_js_1 = require("./tool-router.js");
|
|
101
|
+
const node_crypto_1 = require("node:crypto");
|
|
98
102
|
const PROXY_VERSION = (() => {
|
|
99
103
|
try {
|
|
100
104
|
const pkgPath = path.join(__dirname, '..', 'package.json');
|
|
@@ -939,6 +943,18 @@ let _activeOllamaConfig;
|
|
|
939
943
|
function isContentLoggingEnabled() {
|
|
940
944
|
return _activeProxyConfig.dashboard?.showRequestContent !== false;
|
|
941
945
|
}
|
|
946
|
+
/**
|
|
947
|
+
* Whether procedural knowledge hints should be injected into system prompts.
|
|
948
|
+
* Default: false — no system prompt modification occurs.
|
|
949
|
+
* Config file: memory.proceduralInjectionEnabled
|
|
950
|
+
* Env override: RELAYPLANE_PROCEDURAL_INJECTION=true
|
|
951
|
+
*/
|
|
952
|
+
function isProceduralInjectionEnabled() {
|
|
953
|
+
const envVal = process.env['RELAYPLANE_PROCEDURAL_INJECTION'];
|
|
954
|
+
if (envVal !== undefined)
|
|
955
|
+
return envVal === 'true';
|
|
956
|
+
return _activeProxyConfig.memory?.proceduralInjectionEnabled === true;
|
|
957
|
+
}
|
|
942
958
|
function getProxyConfigPath() {
|
|
943
959
|
const customPath = process.env['RELAYPLANE_CONFIG_PATH'];
|
|
944
960
|
if (customPath && customPath.trim())
|
|
@@ -2533,7 +2549,7 @@ th{text-align:left;color:#64748b;font-weight:500;padding:8px 12px;border-bottom:
|
|
|
2533
2549
|
td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
2534
2550
|
.section{margin-bottom:32px}.section h2{font-size:1rem;font-weight:600;margin-bottom:12px;color:#94a3b8}
|
|
2535
2551
|
.dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.warn{background:#fbbf24}.dot.down{background:#ef4444}
|
|
2536
|
-
.section.collapsible h2{cursor:pointer;user-select:none;display:flex;align-items:center;gap:8px}.section.collapsible h2::after{content:'▾';font-size:.8rem;color:#475569;transition:transform .2s}.section.collapsed h2::after{transform:rotate(-90deg)}.section.collapsed>*:not(h2){display:none}
|
|
2552
|
+
.section.collapsible h2{cursor:pointer;user-select:none;display:flex;align-items:center;gap:8px}.section.collapsible h2::after{content:'▾';font-size:.8rem;color:#475569;transition:transform .2s}.section.collapsed h2::after{transform:rotate(-90deg)}.section.collapsed>*:not(h2){display:none!important}
|
|
2537
2553
|
.badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
|
|
2538
2554
|
.badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}.badge.err-auth{background:#2d0a0a;color:#ef4444}.badge.err-rate{background:#2d2a0a;color:#fbbf24}.badge.err-timeout{background:#2d1a0a;color:#fb923c}
|
|
2539
2555
|
.badge.tt-code{background:#1e3a5f;color:#60a5fa}.badge.tt-analysis{background:#3b1f6e;color:#a78bfa}.badge.tt-summarization{background:#1a3a2a;color:#6ee7b7}.badge.tt-qa{background:#3a2f1e;color:#fbbf24}.badge.tt-general{background:#1e293b;color:#94a3b8}
|
|
@@ -2558,10 +2574,10 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
|
2558
2574
|
<div class="section collapsible collapsed"><h2>Agent Cost Breakdown</h2>
|
|
2559
2575
|
<table><thead><tr><th>Agent</th><th>Requests</th><th>Total Cost</th><th>Last Active</th><th></th></tr></thead><tbody id="agents"></tbody></table></div>
|
|
2560
2576
|
<div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
|
|
2561
|
-
<div class="section collapsible collapsed"><h2>Learning</h2><div id="learning-panel" style="display:flex;flex-direction:column;gap:12px"><div id="learning-stats" style="display:flex;gap:12px;flex-wrap:wrap"></div><div id="learning-recent"></div
|
|
2577
|
+
<div class="section collapsible collapsed"><h2>Learning</h2><div id="learning-panel" style="display:flex;flex-direction:column;gap:12px"><div id="learning-stats" style="display:flex;gap:12px;flex-wrap:wrap"></div><div id="learning-recent"></div></div></div>
|
|
2562
2578
|
<div class="section collapsible collapsed" id="sessions-section"><h2>Sessions <span id="sessionsLabel" style="font-size:.75rem;color:#64748b;font-weight:400">(last 7d)</span></h2>
|
|
2563
|
-
<table><thead><tr><th>Session ID</th><th>Source</th><th>Started</th><th>Duration</th><th>Requests</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Status</th></tr></thead><tbody id="sessions"></tbody></table>
|
|
2564
|
-
|
|
2579
|
+
<table><thead><tr><th>Session ID</th><th>Source</th><th>Started</th><th>Duration</th><th>Requests</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Models</th><th>Status</th></tr></thead><tbody id="sessions"></tbody></table>
|
|
2580
|
+
</div>
|
|
2565
2581
|
<div class="section"><h2>Recent Runs <span id="historyLabel" style="font-size:.75rem;color:#64748b;font-weight:400">(7d window, history-capped)</span></h2>
|
|
2566
2582
|
<table><thead><tr><th>Time</th><th>Agent</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
2567
2583
|
<script>
|
|
@@ -2586,12 +2602,10 @@ async function loadSessions(){
|
|
|
2586
2602
|
const badge=isActive?'<span class="badge ok" style="font-size:.7rem">LIVE</span>':'<span style="color:#64748b;font-size:.75rem">idle</span>';
|
|
2587
2603
|
const srcBadge=s.session_source==='claude-code'?'<span style="color:#60a5fa;font-size:.75rem">claude-code</span>':'<span style="color:#94a3b8;font-size:.75rem">synthetic</span>';
|
|
2588
2604
|
const sid=s.id.length>20?s.id.slice(0,20)+'…':s.id;
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2605
|
+
const mix=s.model_mix&&Object.keys(s.model_mix).length?Object.entries(s.model_mix).map(([m,c])=>{const short=m.replace('claude-','').replace(/-\d{8}$/,'').replace('sonnet','Sonnet').replace('opus','Opus').replace('haiku','Haiku');return '<span style="font-size:.72rem;color:#94a3b8">'+short+'<span style="color:#475569">×</span>'+c+'</span>';}).join(' '):'<span style="color:#475569;font-size:.72rem">—</span>';
|
|
2606
|
+
return '<tr><td style="font-family:monospace;font-size:.8rem" title="'+esc(s.id)+'">'+sid+'</td><td>'+srcBadge+'</td><td>'+fmtTime(new Date(s.started_at).toISOString())+'</td><td>'+dur+'</td><td>'+s.request_count+'</td><td>'+(s.total_tokens_in||0)+'</td><td>'+(s.total_tokens_out||0)+'</td><td>$'+fmt(s.total_cost_usd,4)+'</td><td>'+mix+'</td><td>'+badge+'</td></tr>';
|
|
2607
|
+
}).join(''):'<tr><td colspan=10 style="color:#64748b">No sessions recorded yet</td></tr>';
|
|
2592
2608
|
const totalCost=sessions.reduce((s,r)=>s+(r.total_cost_usd||0),0);
|
|
2593
|
-
const upsell=$('sessions-upsell');
|
|
2594
|
-
if(upsell) upsell.style.display=(sessions.length>=3&&totalCost>2)?'block':'none';
|
|
2595
2609
|
}catch(e){console.error('sessions load error',e)}
|
|
2596
2610
|
}
|
|
2597
2611
|
async function load(){
|
|
@@ -2901,12 +2915,20 @@ async function startProxy(config = {}) {
|
|
|
2901
2915
|
(0, agent_tracker_js_1.flushAgentRegistry)();
|
|
2902
2916
|
meshHandle.stop();
|
|
2903
2917
|
shutdownHistory();
|
|
2918
|
+
trace_writer_js_1.TraceWriter.getInstance().shutdown();
|
|
2904
2919
|
process.exit(0);
|
|
2905
2920
|
};
|
|
2906
2921
|
process.on('SIGINT', handleShutdown);
|
|
2907
2922
|
process.on('SIGTERM', handleShutdown);
|
|
2908
2923
|
const configPath = getProxyConfigPath();
|
|
2909
2924
|
let proxyConfig = await loadProxyConfig(configPath, log);
|
|
2925
|
+
// ── Deterministic Traces: initialise TraceWriter with loaded config ──
|
|
2926
|
+
trace_writer_js_1.TraceWriter.getInstance({
|
|
2927
|
+
...((0, trace_writer_js_1.defaultTracesConfig)()),
|
|
2928
|
+
...(proxyConfig.traces ?? {}),
|
|
2929
|
+
});
|
|
2930
|
+
// ── Tool Router: initialise deny-by-default tool authorization ──
|
|
2931
|
+
(0, tool_router_js_1.getToolRouter)();
|
|
2910
2932
|
// Auto-config on startup: detect available auth and set optimal routing.
|
|
2911
2933
|
//
|
|
2912
2934
|
// Bug fix: `configExists` was always true because loadConfig() (CLI init) creates
|
|
@@ -3271,6 +3293,12 @@ async function startProxy(config = {}) {
|
|
|
3271
3293
|
}
|
|
3272
3294
|
// === Control endpoints ===
|
|
3273
3295
|
if (pathname.startsWith('/control/')) {
|
|
3296
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
3297
|
+
if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
|
|
3298
|
+
res.writeHead(403, { 'Content-Type': 'application/json' });
|
|
3299
|
+
res.end(JSON.stringify({ error: 'Control endpoints are localhost-only' }));
|
|
3300
|
+
return;
|
|
3301
|
+
}
|
|
3274
3302
|
if (req.method === 'POST' && pathname === '/control/enable') {
|
|
3275
3303
|
proxyConfig = normalizeProxyConfig({ ...proxyConfig, enabled: true });
|
|
3276
3304
|
await saveProxyConfig(configPath, proxyConfig);
|
|
@@ -3424,6 +3452,89 @@ async function startProxy(config = {}) {
|
|
|
3424
3452
|
res.end(JSON.stringify({ ok: true, message: 'Daily spend counter reset' }));
|
|
3425
3453
|
return;
|
|
3426
3454
|
}
|
|
3455
|
+
// === Session Budget endpoints ===
|
|
3456
|
+
if (req.method === 'GET' && pathname === '/control/session-budget') {
|
|
3457
|
+
const sbQs = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
3458
|
+
const sbParams = new URLSearchParams(sbQs);
|
|
3459
|
+
const sessionId = sbParams.get('sessionId');
|
|
3460
|
+
if (!sessionId) {
|
|
3461
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3462
|
+
res.end(JSON.stringify({ error: 'sessionId query parameter required' }));
|
|
3463
|
+
return;
|
|
3464
|
+
}
|
|
3465
|
+
const record = budgetManager.getSessionBudget(sessionId);
|
|
3466
|
+
if (!record) {
|
|
3467
|
+
const cap = budgetManager.getConfig().sessionCapUsd;
|
|
3468
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3469
|
+
res.end(JSON.stringify({
|
|
3470
|
+
sessionId,
|
|
3471
|
+
capUsd: cap,
|
|
3472
|
+
spentUsd: 0,
|
|
3473
|
+
remainingUsd: cap,
|
|
3474
|
+
pctUsed: 0,
|
|
3475
|
+
modelUsed: '',
|
|
3476
|
+
status: 'ok',
|
|
3477
|
+
}));
|
|
3478
|
+
return;
|
|
3479
|
+
}
|
|
3480
|
+
const remaining = Math.max(0, record.capUsd - record.spentUsd);
|
|
3481
|
+
const pctUsed = record.capUsd > 0 ? (record.spentUsd / record.capUsd) * 100 : 0;
|
|
3482
|
+
const status = pctUsed >= 100 ? 'exceeded' : pctUsed >= 80 ? 'warning' : 'ok';
|
|
3483
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3484
|
+
res.end(JSON.stringify({
|
|
3485
|
+
sessionId: record.sessionId,
|
|
3486
|
+
capUsd: record.capUsd,
|
|
3487
|
+
spentUsd: record.spentUsd,
|
|
3488
|
+
remainingUsd: remaining,
|
|
3489
|
+
pctUsed: Math.round(pctUsed * 10) / 10,
|
|
3490
|
+
modelUsed: record.modelUsed,
|
|
3491
|
+
createdAt: new Date(record.createdAt).toISOString(),
|
|
3492
|
+
updatedAt: new Date(record.updatedAt).toISOString(),
|
|
3493
|
+
status,
|
|
3494
|
+
}));
|
|
3495
|
+
return;
|
|
3496
|
+
}
|
|
3497
|
+
if (req.method === 'POST' && pathname === '/control/session-budget/set') {
|
|
3498
|
+
try {
|
|
3499
|
+
const body = await readJsonBody(req);
|
|
3500
|
+
if (!body.sessionId || typeof body.capUsd !== 'number' || !isFinite(body.capUsd) || body.capUsd <= 0) {
|
|
3501
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3502
|
+
res.end(JSON.stringify({ error: 'sessionId (string) and capUsd (positive number) required' }));
|
|
3503
|
+
return;
|
|
3504
|
+
}
|
|
3505
|
+
budgetManager.setSessionCap(body.sessionId, body.capUsd);
|
|
3506
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3507
|
+
res.end(JSON.stringify({ ok: true, sessionId: body.sessionId, capUsd: body.capUsd }));
|
|
3508
|
+
}
|
|
3509
|
+
catch {
|
|
3510
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3511
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
3512
|
+
}
|
|
3513
|
+
return;
|
|
3514
|
+
}
|
|
3515
|
+
if (req.method === 'GET' && pathname === '/control/session-budgets') {
|
|
3516
|
+
const slQs = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
3517
|
+
const slParams = new URLSearchParams(slQs);
|
|
3518
|
+
const limitParam = slParams.get('limit');
|
|
3519
|
+
const limit = limitParam ? Math.min(50, Math.max(1, parseInt(limitParam, 10) || 50)) : 50;
|
|
3520
|
+
const records = budgetManager.listSessionBudgets(limit);
|
|
3521
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3522
|
+
res.end(JSON.stringify({
|
|
3523
|
+
sessions: records.map(r => ({
|
|
3524
|
+
sessionId: r.sessionId,
|
|
3525
|
+
capUsd: r.capUsd,
|
|
3526
|
+
spentUsd: r.spentUsd,
|
|
3527
|
+
remainingUsd: Math.max(0, r.capUsd - r.spentUsd),
|
|
3528
|
+
pctUsed: r.capUsd > 0 ? Math.round((r.spentUsd / r.capUsd) * 1000) / 10 : 0,
|
|
3529
|
+
modelUsed: r.modelUsed,
|
|
3530
|
+
createdAt: new Date(r.createdAt).toISOString(),
|
|
3531
|
+
updatedAt: new Date(r.updatedAt).toISOString(),
|
|
3532
|
+
status: r.spentUsd >= r.capUsd ? 'exceeded' : (r.spentUsd / r.capUsd) >= 0.8 ? 'warning' : 'ok',
|
|
3533
|
+
})),
|
|
3534
|
+
count: records.length,
|
|
3535
|
+
}));
|
|
3536
|
+
return;
|
|
3537
|
+
}
|
|
3427
3538
|
if (req.method === 'POST' && pathname === '/control/model') {
|
|
3428
3539
|
try {
|
|
3429
3540
|
const body = await readJsonBody(req);
|
|
@@ -3432,6 +3543,11 @@ async function startProxy(config = {}) {
|
|
|
3432
3543
|
res.end(JSON.stringify({ error: 'model required' }));
|
|
3433
3544
|
return;
|
|
3434
3545
|
}
|
|
3546
|
+
if (body.model.length > 128 || !/^[a-zA-Z0-9._:/-]+$/.test(body.model)) {
|
|
3547
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3548
|
+
res.end(JSON.stringify({ error: 'model must be ≤128 characters and contain only [a-zA-Z0-9._:/-]' }));
|
|
3549
|
+
return;
|
|
3550
|
+
}
|
|
3435
3551
|
const previousRouting = proxyConfig.routing;
|
|
3436
3552
|
const target = body.model;
|
|
3437
3553
|
// Update all complexity tiers to the requested model
|
|
@@ -3836,18 +3952,48 @@ async function startProxy(config = {}) {
|
|
|
3836
3952
|
return;
|
|
3837
3953
|
}
|
|
3838
3954
|
// === Session Intelligence endpoints ===
|
|
3955
|
+
if (req.method === 'GET' && (pathname === '/v1/sessions' || pathname === '/v1/sessions/active')) {
|
|
3956
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
3957
|
+
if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
|
|
3958
|
+
res.writeHead(403, { 'Content-Type': 'application/json' });
|
|
3959
|
+
res.end(JSON.stringify({ error: 'Session endpoints are localhost-only' }));
|
|
3960
|
+
return;
|
|
3961
|
+
}
|
|
3962
|
+
}
|
|
3839
3963
|
if (req.method === 'GET' && pathname === '/v1/sessions') {
|
|
3840
3964
|
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
3841
3965
|
const params = new URLSearchParams(queryString);
|
|
3842
|
-
const
|
|
3843
|
-
const
|
|
3966
|
+
const rawLimit = parseInt(params.get('limit') || '20', 10);
|
|
3967
|
+
const rawDays = parseInt(params.get('days') || '7', 10);
|
|
3968
|
+
const limit = Math.min(Number.isFinite(rawLimit) && rawLimit > 0 ? rawLimit : 20, 100);
|
|
3969
|
+
const days = Number.isFinite(rawDays) && rawDays > 0 ? rawDays : 7;
|
|
3844
3970
|
const sessions = (0, session_tracker_js_1.getSessions)({ limit, days });
|
|
3845
3971
|
const now = Date.now();
|
|
3846
3972
|
const activeCutoff = now - 5 * 60 * 1000;
|
|
3973
|
+
// Build model_mix per session from episodic_events in osmosis.db
|
|
3974
|
+
const modelMixMap = new Map();
|
|
3975
|
+
try {
|
|
3976
|
+
const osmDb = (0, osmosis_store_js_1.getOsmosisDb)();
|
|
3977
|
+
if (osmDb && sessions.length > 0) {
|
|
3978
|
+
const placeholders = sessions.map(() => '?').join(',');
|
|
3979
|
+
const sessionIds = sessions.map(s => s.id);
|
|
3980
|
+
const rows = osmDb.prepare(`SELECT session_id, model_used, COUNT(*) as cnt
|
|
3981
|
+
FROM episodic_events
|
|
3982
|
+
WHERE session_id IN (${placeholders})
|
|
3983
|
+
GROUP BY session_id, model_used`).all(...sessionIds);
|
|
3984
|
+
for (const row of rows) {
|
|
3985
|
+
if (!modelMixMap.has(row.session_id))
|
|
3986
|
+
modelMixMap.set(row.session_id, {});
|
|
3987
|
+
modelMixMap.get(row.session_id)[row.model_used] = row.cnt;
|
|
3988
|
+
}
|
|
3989
|
+
}
|
|
3990
|
+
}
|
|
3991
|
+
catch { /* best-effort */ }
|
|
3847
3992
|
const result = sessions.map(s => ({
|
|
3848
3993
|
...s,
|
|
3849
3994
|
active: s.last_seen_at >= activeCutoff,
|
|
3850
3995
|
duration_ms: s.last_seen_at - s.started_at,
|
|
3996
|
+
model_mix: modelMixMap.get(s.id) ?? {},
|
|
3851
3997
|
}));
|
|
3852
3998
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3853
3999
|
res.end(JSON.stringify({ sessions: result, total: result.length }));
|
|
@@ -3865,6 +4011,168 @@ async function startProxy(config = {}) {
|
|
|
3865
4011
|
res.end(JSON.stringify({ sessions: result, total: result.length }));
|
|
3866
4012
|
return;
|
|
3867
4013
|
}
|
|
4014
|
+
// === Trace endpoints (CAP 3) ===
|
|
4015
|
+
if ((req.method === 'GET' || req.method === 'POST') && (pathname === '/v1/traces' || (pathname ?? '').startsWith('/v1/traces/'))) {
|
|
4016
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
4017
|
+
if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
|
|
4018
|
+
res.writeHead(403, { 'Content-Type': 'application/json' });
|
|
4019
|
+
res.end(JSON.stringify({ error: 'Trace endpoints are localhost-only' }));
|
|
4020
|
+
return;
|
|
4021
|
+
}
|
|
4022
|
+
}
|
|
4023
|
+
if (req.method === 'GET' && pathname === '/v1/traces') {
|
|
4024
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
4025
|
+
const params = new URLSearchParams(queryString);
|
|
4026
|
+
const limit = Math.min(parseInt(params.get('limit') ?? '20', 10) || 20, 100);
|
|
4027
|
+
const traceWriter = trace_writer_js_1.TraceWriter.getInstance();
|
|
4028
|
+
const traces = traceWriter.getRecentTraces(limit);
|
|
4029
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4030
|
+
res.end(JSON.stringify({ traces, total: traces.length }));
|
|
4031
|
+
return;
|
|
4032
|
+
}
|
|
4033
|
+
if (req.method === 'GET' && (pathname ?? '').match(/^\/v1\/traces\/([^/]+)\/graph$/)) {
|
|
4034
|
+
const sessionId = (pathname ?? '').split('/')[3] ?? '';
|
|
4035
|
+
const traceWriter = trace_writer_js_1.TraceWriter.getInstance();
|
|
4036
|
+
const graph = traceWriter.getSessionGraph(sessionId);
|
|
4037
|
+
if (!graph) {
|
|
4038
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
4039
|
+
res.end(JSON.stringify({ error: `No graph found for session: ${sessionId}` }));
|
|
4040
|
+
return;
|
|
4041
|
+
}
|
|
4042
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4043
|
+
res.end(JSON.stringify(graph));
|
|
4044
|
+
return;
|
|
4045
|
+
}
|
|
4046
|
+
if (req.method === 'GET' && (pathname ?? '').match(/^\/v1\/traces\/([^/]+)\/events$/)) {
|
|
4047
|
+
const traceId = (pathname ?? '').split('/')[3] ?? '';
|
|
4048
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4049
|
+
const events = tw.getTraceEvents(traceId);
|
|
4050
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4051
|
+
res.end(JSON.stringify({ traceId, events, total: events.length }));
|
|
4052
|
+
return;
|
|
4053
|
+
}
|
|
4054
|
+
if (req.method === 'POST' && pathname === '/v1/traces/export') {
|
|
4055
|
+
try {
|
|
4056
|
+
const exportOptions = await readJsonBody(req);
|
|
4057
|
+
const format = exportOptions.format ?? 'jsonl';
|
|
4058
|
+
if (!['jsonl', 'csv', 'markdown', 'traceops'].includes(format)) {
|
|
4059
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
4060
|
+
res.end(JSON.stringify({ error: 'format must be one of: jsonl, csv, markdown, traceops' }));
|
|
4061
|
+
return;
|
|
4062
|
+
}
|
|
4063
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4064
|
+
const exported = await tw.export({
|
|
4065
|
+
format: format,
|
|
4066
|
+
sessionIds: exportOptions.sessionIds,
|
|
4067
|
+
fromTimestamp: exportOptions.fromTimestamp,
|
|
4068
|
+
toTimestamp: exportOptions.toTimestamp,
|
|
4069
|
+
includeToolInputs: exportOptions.includeToolInputs,
|
|
4070
|
+
});
|
|
4071
|
+
const contentType = format === 'markdown' ? 'text/markdown' : 'application/x-ndjson';
|
|
4072
|
+
res.writeHead(200, { 'Content-Type': contentType });
|
|
4073
|
+
res.end(exported);
|
|
4074
|
+
}
|
|
4075
|
+
catch {
|
|
4076
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
4077
|
+
res.end(JSON.stringify({ error: 'Invalid JSON body' }));
|
|
4078
|
+
}
|
|
4079
|
+
return;
|
|
4080
|
+
}
|
|
4081
|
+
// === Memory endpoints (Session 4 — localhost-only) ===
|
|
4082
|
+
if ((pathname ?? '').startsWith('/v1/memory')) {
|
|
4083
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
4084
|
+
if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
|
|
4085
|
+
res.writeHead(403, { 'Content-Type': 'application/json' });
|
|
4086
|
+
res.end(JSON.stringify({ error: 'Memory endpoints are localhost-only' }));
|
|
4087
|
+
return;
|
|
4088
|
+
}
|
|
4089
|
+
}
|
|
4090
|
+
if (req.method === 'GET' && pathname === '/v1/memory/semantic') {
|
|
4091
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
4092
|
+
const params = new URLSearchParams(queryString);
|
|
4093
|
+
const sessionId = params.get('session_id') ?? undefined;
|
|
4094
|
+
const limit = Math.min(parseInt(params.get('limit') ?? '20', 10) || 20, 100);
|
|
4095
|
+
try {
|
|
4096
|
+
const db = (0, osmosis_store_js_1.getOsmosisDb)();
|
|
4097
|
+
if (!db) {
|
|
4098
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4099
|
+
res.end(JSON.stringify({ atoms: [], total: 0 }));
|
|
4100
|
+
return;
|
|
4101
|
+
}
|
|
4102
|
+
let atoms;
|
|
4103
|
+
if (sessionId) {
|
|
4104
|
+
atoms = db.prepare(`SELECT id, type, model, task_type, latency_ms, input_tokens, output_tokens, confidence, observation_count, timestamp, session_id
|
|
4105
|
+
FROM knowledge_atoms WHERE session_id = ? ORDER BY confidence DESC, timestamp DESC LIMIT ?`).all(sessionId, limit);
|
|
4106
|
+
}
|
|
4107
|
+
else {
|
|
4108
|
+
atoms = db.prepare(`SELECT id, type, model, task_type, latency_ms, input_tokens, output_tokens, confidence, observation_count, timestamp, session_id
|
|
4109
|
+
FROM knowledge_atoms ORDER BY confidence DESC, timestamp DESC LIMIT ?`).all(limit);
|
|
4110
|
+
}
|
|
4111
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4112
|
+
res.end(JSON.stringify({ atoms, total: atoms.length }));
|
|
4113
|
+
}
|
|
4114
|
+
catch {
|
|
4115
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
4116
|
+
res.end(JSON.stringify({ error: 'Memory query failed' }));
|
|
4117
|
+
}
|
|
4118
|
+
return;
|
|
4119
|
+
}
|
|
4120
|
+
if (req.method === 'GET' && pathname === '/v1/memory/episodic') {
|
|
4121
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
4122
|
+
const params = new URLSearchParams(queryString);
|
|
4123
|
+
const sessionId = params.get('session_id') ?? undefined;
|
|
4124
|
+
const limit = Math.min(parseInt(params.get('limit') ?? '50', 10) || 50, 200);
|
|
4125
|
+
try {
|
|
4126
|
+
const db = (0, osmosis_store_js_1.getOsmosisDb)();
|
|
4127
|
+
if (!db) {
|
|
4128
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4129
|
+
res.end(JSON.stringify({ events: [], total: 0 }));
|
|
4130
|
+
return;
|
|
4131
|
+
}
|
|
4132
|
+
let events;
|
|
4133
|
+
if (sessionId) {
|
|
4134
|
+
events = db.prepare(`SELECT id, session_id, event_type, timestamp, duration_ms, model_used, tokens_in, tokens_out, cost_usd, outcome, outcome_detail, trace_id
|
|
4135
|
+
FROM episodic_events WHERE session_id = ? ORDER BY timestamp DESC LIMIT ?`).all(sessionId, limit);
|
|
4136
|
+
}
|
|
4137
|
+
else {
|
|
4138
|
+
events = db.prepare(`SELECT id, session_id, event_type, timestamp, duration_ms, model_used, tokens_in, tokens_out, cost_usd, outcome, outcome_detail, trace_id
|
|
4139
|
+
FROM episodic_events ORDER BY timestamp DESC LIMIT ?`).all(limit);
|
|
4140
|
+
}
|
|
4141
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4142
|
+
res.end(JSON.stringify({ events, total: events.length }));
|
|
4143
|
+
}
|
|
4144
|
+
catch {
|
|
4145
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
4146
|
+
res.end(JSON.stringify({ error: 'Memory query failed' }));
|
|
4147
|
+
}
|
|
4148
|
+
return;
|
|
4149
|
+
}
|
|
4150
|
+
if (req.method === 'GET' && pathname === '/v1/memory/procedural') {
|
|
4151
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
4152
|
+
const params = new URLSearchParams(queryString);
|
|
4153
|
+
const limit = Math.min(parseInt(params.get('limit') ?? '10', 10) || 10, 50);
|
|
4154
|
+
try {
|
|
4155
|
+
// Read from mesh.db (procedural atom store)
|
|
4156
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
4157
|
+
const Database = require('better-sqlite3');
|
|
4158
|
+
const meshDbPath = require('node:path').join((0, osmosis_store_js_1.getRelayplaneDir)(), 'mesh.db');
|
|
4159
|
+
let atoms = [];
|
|
4160
|
+
try {
|
|
4161
|
+
const meshDb = new Database(meshDbPath, { readonly: true });
|
|
4162
|
+
atoms = meshDb.prepare(`SELECT id, type, observation, confidence, fitness_score, trust_tier, evidence_count, created_at, updated_at
|
|
4163
|
+
FROM atoms ORDER BY fitness_score DESC, confidence DESC LIMIT ?`).all(limit);
|
|
4164
|
+
meshDb.close();
|
|
4165
|
+
}
|
|
4166
|
+
catch { /* mesh.db may not exist yet */ }
|
|
4167
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4168
|
+
res.end(JSON.stringify({ atoms, total: atoms.length }));
|
|
4169
|
+
}
|
|
4170
|
+
catch {
|
|
4171
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
4172
|
+
res.end(JSON.stringify({ error: 'Procedural memory query failed' }));
|
|
4173
|
+
}
|
|
4174
|
+
return;
|
|
4175
|
+
}
|
|
3868
4176
|
// Extract auth context from incoming request
|
|
3869
4177
|
const ctx = extractRequestContext(req);
|
|
3870
4178
|
const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
|
|
@@ -3922,6 +4230,29 @@ async function startProxy(config = {}) {
|
|
|
3922
4230
|
const parsedModel = parseModelSuffix(requestedModel);
|
|
3923
4231
|
let routingSuffix = parsedModel.suffix;
|
|
3924
4232
|
requestedModel = parsedModel.baseModel;
|
|
4233
|
+
// ── CAP 3: Deterministic Traces — allocate traceId + emit request.start ──
|
|
4234
|
+
const nativeTraceId = (0, node_crypto_1.randomUUID)();
|
|
4235
|
+
{
|
|
4236
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4237
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4238
|
+
const sysHash = nativeSystemPrompt ? (0, trace_writer_js_1.sha256Hex)(nativeSystemPrompt) : undefined;
|
|
4239
|
+
const messages = requestBody['messages'];
|
|
4240
|
+
const reqTools = Array.isArray(requestBody['tools'])
|
|
4241
|
+
? requestBody['tools'].map(t => t?.name ?? '').filter(Boolean)
|
|
4242
|
+
: undefined;
|
|
4243
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4244
|
+
eventType: 'request.start',
|
|
4245
|
+
parentTraceId: getHeaderValue(req, 'x-parent-trace-id') || undefined,
|
|
4246
|
+
agentId: getHeaderValue(req, 'x-agent-id') || nativeExplicitAgentId || undefined,
|
|
4247
|
+
payload: {
|
|
4248
|
+
model: requestedModel,
|
|
4249
|
+
messageCount: messages?.length,
|
|
4250
|
+
requestedTools: reqTools,
|
|
4251
|
+
systemPromptHash: sysHash,
|
|
4252
|
+
},
|
|
4253
|
+
});
|
|
4254
|
+
}
|
|
4255
|
+
}
|
|
3925
4256
|
if (relayplaneEnabled && !relayplaneBypass && requestedModel) {
|
|
3926
4257
|
const override = proxyConfig.modelOverrides?.[requestedModel];
|
|
3927
4258
|
if (override) {
|
|
@@ -4180,6 +4511,109 @@ async function startProxy(config = {}) {
|
|
|
4180
4511
|
Object.assign(budgetExtraHeaders, budgetCheck.headers);
|
|
4181
4512
|
}
|
|
4182
4513
|
// ── End budget check ──
|
|
4514
|
+
// ── Session budget check (only when X-Claude-Code-Session-Id is present) ──
|
|
4515
|
+
let nativeSessionBudgetResult = null;
|
|
4516
|
+
if (nativeSessionSource === 'claude-code') {
|
|
4517
|
+
nativeSessionBudgetResult = budgetManager.checkSessionBudget(nativeSessionId, targetModel || requestedModel);
|
|
4518
|
+
if (!nativeSessionBudgetResult.allowed) {
|
|
4519
|
+
res.writeHead(429, { 'Content-Type': 'application/json' });
|
|
4520
|
+
res.end(JSON.stringify({
|
|
4521
|
+
error: 'session_budget_exceeded',
|
|
4522
|
+
spent: nativeSessionBudgetResult.spent,
|
|
4523
|
+
cap: nativeSessionBudgetResult.cap,
|
|
4524
|
+
type: 'session_budget_exceeded',
|
|
4525
|
+
}));
|
|
4526
|
+
return;
|
|
4527
|
+
}
|
|
4528
|
+
if (nativeSessionBudgetResult.model !== (targetModel || requestedModel)) {
|
|
4529
|
+
log(`Session budget downgrade: ${targetModel || requestedModel} → ${nativeSessionBudgetResult.model}`);
|
|
4530
|
+
// CAP 3: emit model.switch before we overwrite targetModel
|
|
4531
|
+
{
|
|
4532
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4533
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4534
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4535
|
+
eventType: 'model.switch',
|
|
4536
|
+
payload: {
|
|
4537
|
+
fromModel: targetModel || requestedModel,
|
|
4538
|
+
toModel: nativeSessionBudgetResult.model,
|
|
4539
|
+
switchReason: 'session_budget',
|
|
4540
|
+
},
|
|
4541
|
+
});
|
|
4542
|
+
}
|
|
4543
|
+
}
|
|
4544
|
+
targetModel = nativeSessionBudgetResult.model;
|
|
4545
|
+
if (requestBody)
|
|
4546
|
+
requestBody['model'] = targetModel;
|
|
4547
|
+
}
|
|
4548
|
+
// CAP 3: emit budget.checkpoint after every budget check
|
|
4549
|
+
{
|
|
4550
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4551
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4552
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4553
|
+
eventType: 'budget.checkpoint',
|
|
4554
|
+
payload: {
|
|
4555
|
+
sessionCostUsd: nativeSessionBudgetResult.spent,
|
|
4556
|
+
sessionCapUsd: nativeSessionBudgetResult.cap,
|
|
4557
|
+
sessionPct: nativeSessionBudgetResult.cap > 0
|
|
4558
|
+
? nativeSessionBudgetResult.spent / nativeSessionBudgetResult.cap
|
|
4559
|
+
: 0,
|
|
4560
|
+
},
|
|
4561
|
+
});
|
|
4562
|
+
}
|
|
4563
|
+
}
|
|
4564
|
+
}
|
|
4565
|
+
// ── End session budget check ──
|
|
4566
|
+
// ── Tool authorization check (deny-by-default, after budget gate) ──
|
|
4567
|
+
{
|
|
4568
|
+
const reqTools = Array.isArray(requestBody?.['tools'])
|
|
4569
|
+
? requestBody['tools'].map(t => t?.name ?? '').filter(Boolean)
|
|
4570
|
+
: [];
|
|
4571
|
+
if (reqTools.length > 0) {
|
|
4572
|
+
const tr = (0, tool_router_js_1.getToolRouter)();
|
|
4573
|
+
const toolCtx = (0, tool_router_js_1.extractToolContext)(req.headers, nativeSessionId, reqTools, tr);
|
|
4574
|
+
const authResult = tr.checkTools(toolCtx);
|
|
4575
|
+
if (authResult.denied.length > 0) {
|
|
4576
|
+
for (const toolName of authResult.denied) {
|
|
4577
|
+
tr.recordDenied(nativeSessionId, toolName, 'not_in_active_pack');
|
|
4578
|
+
}
|
|
4579
|
+
// Emit tool.denied trace event
|
|
4580
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4581
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4582
|
+
for (const toolName of authResult.denied) {
|
|
4583
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4584
|
+
eventType: 'tool.denied',
|
|
4585
|
+
payload: { toolName },
|
|
4586
|
+
});
|
|
4587
|
+
}
|
|
4588
|
+
}
|
|
4589
|
+
// If ALL requested tools are denied, block the request
|
|
4590
|
+
if (authResult.allowed.length === 0) {
|
|
4591
|
+
res.writeHead(403, {
|
|
4592
|
+
'Content-Type': 'application/json',
|
|
4593
|
+
'X-Relay-Tools-Denied': authResult.deniedHeader,
|
|
4594
|
+
});
|
|
4595
|
+
res.end(JSON.stringify({
|
|
4596
|
+
error: 'tool_not_authorized',
|
|
4597
|
+
denied: authResult.denied,
|
|
4598
|
+
message: 'All requested tools are denied by the active tool pack policy.',
|
|
4599
|
+
}));
|
|
4600
|
+
return;
|
|
4601
|
+
}
|
|
4602
|
+
}
|
|
4603
|
+
// Strip denied tools from the forwarded request body so the model
|
|
4604
|
+
// cannot call them even when only a partial set of tools was denied.
|
|
4605
|
+
if (Array.isArray(requestBody['tools'])) {
|
|
4606
|
+
const allowedSet = new Set(authResult.allowed);
|
|
4607
|
+
requestBody['tools'] = requestBody['tools'].filter(t => allowedSet.has(t?.name ?? ''));
|
|
4608
|
+
}
|
|
4609
|
+
// Set response header so callers know which tools were denied
|
|
4610
|
+
if (authResult.deniedHeader) {
|
|
4611
|
+
// Stash for later use when writing response headers
|
|
4612
|
+
budgetExtraHeaders['X-Relay-Tools-Denied'] = authResult.deniedHeader;
|
|
4613
|
+
}
|
|
4614
|
+
}
|
|
4615
|
+
}
|
|
4616
|
+
// ── End tool authorization check ──
|
|
4183
4617
|
// ── Rate limit check ──
|
|
4184
4618
|
const workspaceId = 'local'; // Local proxy uses single workspace
|
|
4185
4619
|
try {
|
|
@@ -4307,7 +4741,7 @@ async function startProxy(config = {}) {
|
|
|
4307
4741
|
// ── End cross-provider cascade ──
|
|
4308
4742
|
const durationMs = Date.now() - startTime;
|
|
4309
4743
|
const errMsg = extractProviderErrorMessage(errorPayload, providerResponse.status);
|
|
4310
|
-
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
4744
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, nativeAgentFingerprint, nativeExplicitAgentId, errMsg, providerResponse.status);
|
|
4311
4745
|
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
4312
4746
|
res.end(JSON.stringify(errorPayload));
|
|
4313
4747
|
return;
|
|
@@ -4322,6 +4756,8 @@ async function startProxy(config = {}) {
|
|
|
4322
4756
|
'Cache-Control': 'no-cache',
|
|
4323
4757
|
'Connection': 'keep-alive',
|
|
4324
4758
|
'X-RelayPlane-Cache': cacheBypass ? 'BYPASS' : 'MISS',
|
|
4759
|
+
'X-Relay-Trace-Id': nativeTraceId,
|
|
4760
|
+
'X-Relay-Memory-Hits': String((0, osmosis_store_js_1.countAtomsForSession)(nativeSessionId)),
|
|
4325
4761
|
...nativeStreamRpHeaders,
|
|
4326
4762
|
});
|
|
4327
4763
|
const reader = providerResponse.body?.getReader();
|
|
@@ -4416,7 +4852,7 @@ async function startProxy(config = {}) {
|
|
|
4416
4852
|
});
|
|
4417
4853
|
log(`Cache STORE for ${targetModel || requestedModel} (hash: ${cacheHash.slice(0, 8)})`);
|
|
4418
4854
|
}
|
|
4419
|
-
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': nativeCacheHeader, ...nativeRpHeaders });
|
|
4855
|
+
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': nativeCacheHeader, 'X-Relay-Trace-Id': nativeTraceId, 'X-Relay-Memory-Hits': String((0, osmosis_store_js_1.countAtomsForSession)(nativeSessionId)), ...nativeRpHeaders });
|
|
4420
4856
|
res.end(JSON.stringify(nativeResponseData));
|
|
4421
4857
|
}
|
|
4422
4858
|
}
|
|
@@ -4452,6 +4888,48 @@ async function startProxy(config = {}) {
|
|
|
4452
4888
|
}
|
|
4453
4889
|
// ── Session Intelligence: upsert session record ──
|
|
4454
4890
|
(0, session_tracker_js_1.upsertSession)(nativeSessionId, nativeSessionSource, nativeCostUsd, nativeTokIn, nativeTokOut);
|
|
4891
|
+
// ── Session 4: Episodic memory write (fire-and-forget) ──
|
|
4892
|
+
try {
|
|
4893
|
+
(0, episode_writer_js_1.writeEpisode)(nativeSessionId, {
|
|
4894
|
+
eventType: 'model-response',
|
|
4895
|
+
modelUsed: targetModel || requestedModel,
|
|
4896
|
+
tokensIn: nativeTokIn,
|
|
4897
|
+
tokensOut: nativeTokOut,
|
|
4898
|
+
costUsd: nativeCostUsd,
|
|
4899
|
+
outcome: 'success',
|
|
4900
|
+
traceId: nativeTraceId,
|
|
4901
|
+
durationMs,
|
|
4902
|
+
});
|
|
4903
|
+
}
|
|
4904
|
+
catch { /* never block hot path */ }
|
|
4905
|
+
// ── CAP 3: Deterministic Traces — emit request.end + finalize ──
|
|
4906
|
+
{
|
|
4907
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4908
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4909
|
+
const finishReason = nativeResponseData?.['stop_reason']
|
|
4910
|
+
?? (nativeResponseData?.['choices']?.[0]?.['finish_reason']);
|
|
4911
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4912
|
+
eventType: 'request.end',
|
|
4913
|
+
durationMs,
|
|
4914
|
+
payload: {
|
|
4915
|
+
modelUsed: targetModel || requestedModel,
|
|
4916
|
+
tokensIn: nativeTokIn,
|
|
4917
|
+
tokensOut: nativeTokOut,
|
|
4918
|
+
costUsd: nativeCostUsd,
|
|
4919
|
+
finishReason,
|
|
4920
|
+
},
|
|
4921
|
+
});
|
|
4922
|
+
void tw.finalizeTrace(nativeTraceId, nativeSessionId, {
|
|
4923
|
+
costUsd: nativeCostUsd,
|
|
4924
|
+
modelUsed: targetModel || requestedModel,
|
|
4925
|
+
durationMs,
|
|
4926
|
+
});
|
|
4927
|
+
}
|
|
4928
|
+
}
|
|
4929
|
+
// ── Session budget: record spend (fire-and-forget, only for claude-code sessions) ──
|
|
4930
|
+
if (nativeSessionSource === 'claude-code') {
|
|
4931
|
+
budgetManager.updateSessionBudget(nativeSessionId, nativeCostUsd, targetModel || requestedModel);
|
|
4932
|
+
}
|
|
4455
4933
|
// ── Post-request: budget spend + anomaly detection ──
|
|
4456
4934
|
postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
|
|
4457
4935
|
if (recordTelemetry) {
|
|
@@ -4482,7 +4960,20 @@ async function startProxy(config = {}) {
|
|
|
4482
4960
|
catchErrMsg = err instanceof Error ? err.message : String(err);
|
|
4483
4961
|
catchErrStatus = 500;
|
|
4484
4962
|
}
|
|
4485
|
-
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
4963
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, nativeAgentFingerprint, nativeExplicitAgentId, catchErrMsg, catchErrStatus);
|
|
4964
|
+
// ── CAP 3: Deterministic Traces — emit request.end (error) + finalize ──
|
|
4965
|
+
{
|
|
4966
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4967
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4968
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4969
|
+
eventType: 'request.end',
|
|
4970
|
+
durationMs,
|
|
4971
|
+
error: { code: String(catchErrStatus), message: catchErrMsg, retryable: catchErrStatus >= 500 },
|
|
4972
|
+
payload: { modelUsed: targetModel || requestedModel },
|
|
4973
|
+
});
|
|
4974
|
+
void tw.finalizeTrace(nativeTraceId, nativeSessionId, { durationMs, modelUsed: targetModel || requestedModel });
|
|
4975
|
+
}
|
|
4976
|
+
}
|
|
4486
4977
|
if (recordTelemetry) {
|
|
4487
4978
|
sendCloudTelemetry(taskType, targetModel || requestedModel, 0, 0, durationMs, false, 0, originalModel ?? undefined);
|
|
4488
4979
|
meshCapture(targetModel || requestedModel, targetProvider, taskType, 0, 0, 0, durationMs, false, catchErrMsg);
|
|
@@ -4606,6 +5097,24 @@ async function startProxy(config = {}) {
|
|
|
4606
5097
|
const agentResult = (0, agent_tracker_js_1.trackAgent)(chatSystemPrompt, 0, chatExplicitAgentId);
|
|
4607
5098
|
chatAgentFingerprint = agentResult.fingerprint;
|
|
4608
5099
|
}
|
|
5100
|
+
// ── CAP 3: Deterministic Traces — allocate chatTraceId + emit request.start ──
|
|
5101
|
+
const chatTraceId = (0, node_crypto_1.randomUUID)();
|
|
5102
|
+
{
|
|
5103
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
5104
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
5105
|
+
const chatSysHash = chatSystemPrompt ? (0, trace_writer_js_1.sha256Hex)(chatSystemPrompt) : undefined;
|
|
5106
|
+
void tw.write(chatSessionId, chatTraceId, {
|
|
5107
|
+
eventType: 'request.start',
|
|
5108
|
+
parentTraceId: getHeaderValue(req, 'x-parent-trace-id') || undefined,
|
|
5109
|
+
agentId: getHeaderValue(req, 'x-agent-id') || chatExplicitAgentId || undefined,
|
|
5110
|
+
payload: {
|
|
5111
|
+
model: request.model,
|
|
5112
|
+
messageCount: request.messages?.length,
|
|
5113
|
+
systemPromptHash: chatSysHash,
|
|
5114
|
+
},
|
|
5115
|
+
});
|
|
5116
|
+
}
|
|
5117
|
+
}
|
|
4609
5118
|
// ── Response Cache: check for cached response (chat/completions) ──
|
|
4610
5119
|
const chatCacheBypass = responseCache.shouldBypass(request);
|
|
4611
5120
|
let chatCacheHash;
|
|
@@ -4996,6 +5505,42 @@ async function startProxy(config = {}) {
|
|
|
4996
5505
|
if (chatAgentFingerprint && chatAgentFingerprint !== 'unknown')
|
|
4997
5506
|
(0, agent_tracker_js_1.updateAgentCost)(chatAgentFingerprint, cascadeCost);
|
|
4998
5507
|
(0, session_tracker_js_1.upsertSession)(chatSessionId, chatSessionSource, cascadeCost, cascadeTokensIn, cascadeTokensOut);
|
|
5508
|
+
// ── Session 4: Episodic memory write (fire-and-forget) ──
|
|
5509
|
+
try {
|
|
5510
|
+
(0, episode_writer_js_1.writeEpisode)(chatSessionId, {
|
|
5511
|
+
eventType: 'routing-decision',
|
|
5512
|
+
modelUsed: cascadeResult.model,
|
|
5513
|
+
tokensIn: cascadeTokensIn,
|
|
5514
|
+
tokensOut: cascadeTokensOut,
|
|
5515
|
+
costUsd: cascadeCost,
|
|
5516
|
+
outcome: 'success',
|
|
5517
|
+
outcomeDetail: 'cascade',
|
|
5518
|
+
traceId: chatTraceId,
|
|
5519
|
+
durationMs,
|
|
5520
|
+
});
|
|
5521
|
+
}
|
|
5522
|
+
catch { /* never block hot path */ }
|
|
5523
|
+
// ── CAP 3: Deterministic Traces — emit request.end + finalize (chat cascade) ──
|
|
5524
|
+
{
|
|
5525
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
5526
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
5527
|
+
void tw.write(chatSessionId, chatTraceId, {
|
|
5528
|
+
eventType: 'request.end',
|
|
5529
|
+
durationMs,
|
|
5530
|
+
payload: {
|
|
5531
|
+
modelUsed: cascadeResult.model,
|
|
5532
|
+
tokensIn: cascadeTokensIn,
|
|
5533
|
+
tokensOut: cascadeTokensOut,
|
|
5534
|
+
costUsd: cascadeCost,
|
|
5535
|
+
},
|
|
5536
|
+
});
|
|
5537
|
+
void tw.finalizeTrace(chatTraceId, chatSessionId, {
|
|
5538
|
+
costUsd: cascadeCost,
|
|
5539
|
+
modelUsed: cascadeResult.model,
|
|
5540
|
+
durationMs,
|
|
5541
|
+
});
|
|
5542
|
+
}
|
|
5543
|
+
}
|
|
4999
5544
|
if (recordTelemetry) {
|
|
5000
5545
|
try {
|
|
5001
5546
|
const runResult = await relay.run({
|
|
@@ -5023,7 +5568,7 @@ async function startProxy(config = {}) {
|
|
|
5023
5568
|
meshCapture(cascadeResult.model, cascadeResult.provider, taskType, cascadeTokensIn, cascadeTokensOut, cascadeCost, durationMs, true);
|
|
5024
5569
|
}
|
|
5025
5570
|
const chatCascadeRpHeaders = buildRelayPlaneResponseHeaders(cascadeResult.model, originalRequestedModel ?? 'unknown', complexity, cascadeResult.provider, 'cascade');
|
|
5026
|
-
res.writeHead(200, { 'Content-Type': 'application/json', ...chatCascadeRpHeaders });
|
|
5571
|
+
res.writeHead(200, { 'Content-Type': 'application/json', 'X-Relay-Trace-Id': chatTraceId, 'X-Relay-Memory-Hits': String((0, osmosis_store_js_1.countAtomsForSession)(chatSessionId)), ...chatCascadeRpHeaders });
|
|
5027
5572
|
res.end(JSON.stringify(responseData));
|
|
5028
5573
|
}
|
|
5029
5574
|
catch (err) {
|
|
@@ -5038,7 +5583,7 @@ async function startProxy(config = {}) {
|
|
|
5038
5583
|
cascadeErrMsg = err instanceof Error ? err.message : String(err);
|
|
5039
5584
|
cascadeErrStatus = 500;
|
|
5040
5585
|
}
|
|
5041
|
-
logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity,
|
|
5586
|
+
logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity, chatAgentFingerprint, chatExplicitAgentId, cascadeErrMsg, cascadeErrStatus);
|
|
5042
5587
|
if (recordTelemetry) {
|
|
5043
5588
|
sendCloudTelemetry(taskType, targetModel || 'unknown', 0, 0, durationMs, false, 0, originalRequestedModel ?? undefined);
|
|
5044
5589
|
meshCapture(targetModel || 'unknown', targetProvider, taskType, 0, 0, 0, durationMs, false, cascadeErrMsg);
|
|
@@ -5054,8 +5599,10 @@ async function startProxy(config = {}) {
|
|
|
5054
5599
|
}
|
|
5055
5600
|
}
|
|
5056
5601
|
else {
|
|
5602
|
+
res.setHeader('X-Relay-Trace-Id', chatTraceId);
|
|
5603
|
+
res.setHeader('X-Relay-Memory-Hits', String((0, osmosis_store_js_1.countAtomsForSession)(chatSessionId)));
|
|
5057
5604
|
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId, useAnthropicEnvKey, // pass for cross-provider cascade API key resolution (GH #38)
|
|
5058
|
-
chatSessionId, chatSessionSource);
|
|
5605
|
+
chatSessionId, chatSessionSource, chatTraceId);
|
|
5059
5606
|
}
|
|
5060
5607
|
}
|
|
5061
5608
|
});
|
|
@@ -5251,7 +5798,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5251
5798
|
if (!ollamaStream.success || !ollamaStream.stream) {
|
|
5252
5799
|
const durationMs = Date.now() - startTime;
|
|
5253
5800
|
const errMsg = ollamaStream.error?.message ?? 'Ollama stream failed';
|
|
5254
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
5801
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, errMsg, ollamaStream.error?.status);
|
|
5255
5802
|
res.writeHead(ollamaStream.error?.status ?? 502, { 'Content-Type': 'application/json' });
|
|
5256
5803
|
res.end(JSON.stringify({ error: ollamaStream.error }));
|
|
5257
5804
|
return;
|
|
@@ -5287,7 +5834,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5287
5834
|
}
|
|
5288
5835
|
const durationMs = Date.now() - startTime;
|
|
5289
5836
|
const streamErrMsg = extractProviderErrorMessage(errorData, providerResponse.status);
|
|
5290
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
5837
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, streamErrMsg, providerResponse.status);
|
|
5291
5838
|
if (recordTelemetry) {
|
|
5292
5839
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5293
5840
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, streamErrMsg);
|
|
@@ -5303,7 +5850,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5303
5850
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
5304
5851
|
}
|
|
5305
5852
|
const durationMs = Date.now() - startTime;
|
|
5306
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
5853
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, errorMsg, 500);
|
|
5307
5854
|
if (recordTelemetry) {
|
|
5308
5855
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5309
5856
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, errorMsg);
|
|
@@ -5431,6 +5978,21 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5431
5978
|
(0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, streamCost);
|
|
5432
5979
|
if (sessionId && sessionSource)
|
|
5433
5980
|
(0, session_tracker_js_1.upsertSession)(sessionId, sessionSource, streamCost, streamTokensIn, streamTokensOut);
|
|
5981
|
+
// ── Session 4: Episodic memory write (fire-and-forget) ──
|
|
5982
|
+
if (sessionId) {
|
|
5983
|
+
try {
|
|
5984
|
+
(0, episode_writer_js_1.writeEpisode)(sessionId, {
|
|
5985
|
+
eventType: 'model-response',
|
|
5986
|
+
modelUsed: targetModel,
|
|
5987
|
+
tokensIn: streamTokensIn,
|
|
5988
|
+
tokensOut: streamTokensOut,
|
|
5989
|
+
costUsd: streamCost,
|
|
5990
|
+
outcome: 'success',
|
|
5991
|
+
durationMs,
|
|
5992
|
+
});
|
|
5993
|
+
}
|
|
5994
|
+
catch { /* never block hot path */ }
|
|
5995
|
+
}
|
|
5434
5996
|
// ── Post-request: budget spend + anomaly detection ──
|
|
5435
5997
|
try {
|
|
5436
5998
|
(0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
|
|
@@ -5468,7 +6030,9 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5468
6030
|
*/
|
|
5469
6031
|
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId,
|
|
5470
6032
|
/** Anthropic env API key — required for cross-provider cascade API key resolution (GH #38) */
|
|
5471
|
-
anthropicEnvKeyForCascade, sessionId, sessionSource
|
|
6033
|
+
anthropicEnvKeyForCascade, sessionId, sessionSource,
|
|
6034
|
+
/** CAP 3: trace ID for deterministic trace write */
|
|
6035
|
+
traceId) {
|
|
5472
6036
|
let responseData;
|
|
5473
6037
|
try {
|
|
5474
6038
|
const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
|
|
@@ -5506,7 +6070,7 @@ anthropicEnvKeyForCascade, sessionId, sessionSource) {
|
|
|
5506
6070
|
// All fallbacks exhausted — return the primary error
|
|
5507
6071
|
const durationMs = Date.now() - startTime;
|
|
5508
6072
|
const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
|
|
5509
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, `${routingMode}+cascade`, undefined, taskType, complexity,
|
|
6073
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, `${routingMode}+cascade`, undefined, taskType, complexity, agentFingerprint, agentId, nsErrMsg, result.status);
|
|
5510
6074
|
if (recordTelemetry) {
|
|
5511
6075
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5512
6076
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
|
|
@@ -5520,7 +6084,7 @@ anthropicEnvKeyForCascade, sessionId, sessionSource) {
|
|
|
5520
6084
|
// No cascade — return error as-is
|
|
5521
6085
|
const durationMs = Date.now() - startTime;
|
|
5522
6086
|
const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
|
|
5523
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
6087
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, nsErrMsg, result.status);
|
|
5524
6088
|
if (recordTelemetry) {
|
|
5525
6089
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5526
6090
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
|
|
@@ -5538,7 +6102,7 @@ anthropicEnvKeyForCascade, sessionId, sessionSource) {
|
|
|
5538
6102
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
5539
6103
|
}
|
|
5540
6104
|
const durationMs = Date.now() - startTime;
|
|
5541
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
6105
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, errorMsg, 500);
|
|
5542
6106
|
if (recordTelemetry) {
|
|
5543
6107
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5544
6108
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, errorMsg);
|
|
@@ -5567,6 +6131,37 @@ anthropicEnvKeyForCascade, sessionId, sessionSource) {
|
|
|
5567
6131
|
(0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, cost);
|
|
5568
6132
|
if (sessionId && sessionSource)
|
|
5569
6133
|
(0, session_tracker_js_1.upsertSession)(sessionId, sessionSource, cost, tokensIn, tokensOut);
|
|
6134
|
+
// ── Session 4: Episodic memory write (fire-and-forget) ──
|
|
6135
|
+
if (sessionId) {
|
|
6136
|
+
try {
|
|
6137
|
+
(0, episode_writer_js_1.writeEpisode)(sessionId, {
|
|
6138
|
+
eventType: 'model-response',
|
|
6139
|
+
modelUsed: targetModel,
|
|
6140
|
+
tokensIn,
|
|
6141
|
+
tokensOut,
|
|
6142
|
+
costUsd: cost,
|
|
6143
|
+
outcome: 'success',
|
|
6144
|
+
traceId: traceId ?? undefined,
|
|
6145
|
+
durationMs,
|
|
6146
|
+
});
|
|
6147
|
+
}
|
|
6148
|
+
catch { /* never block hot path */ }
|
|
6149
|
+
}
|
|
6150
|
+
// ── CAP 3: Deterministic Traces — emit request.end + finalize (chat non-streaming) ──
|
|
6151
|
+
if (traceId && sessionId && recordTelemetry) {
|
|
6152
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
6153
|
+
if (tw.isEnabled()) {
|
|
6154
|
+
const finishReason = responseData?.['choices']
|
|
6155
|
+
? responseData['choices'][0]?.['finish_reason']
|
|
6156
|
+
: undefined;
|
|
6157
|
+
void tw.write(sessionId, traceId, {
|
|
6158
|
+
eventType: 'request.end',
|
|
6159
|
+
durationMs,
|
|
6160
|
+
payload: { modelUsed: targetModel, tokensIn, tokensOut, costUsd: cost, finishReason },
|
|
6161
|
+
});
|
|
6162
|
+
void tw.finalizeTrace(traceId, sessionId, { costUsd: cost, modelUsed: targetModel, durationMs });
|
|
6163
|
+
}
|
|
6164
|
+
}
|
|
5570
6165
|
// ── Post-request: budget spend + anomaly detection ──
|
|
5571
6166
|
try {
|
|
5572
6167
|
(0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);
|