@relayplane/proxy 1.8.35 → 1.8.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -7
- package/dist/budget.d.ts +39 -0
- package/dist/budget.d.ts.map +1 -1
- package/dist/budget.js +173 -0
- package/dist/budget.js.map +1 -1
- package/dist/config.d.ts +21 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js.map +1 -1
- package/dist/episode-writer.d.ts +28 -0
- package/dist/episode-writer.d.ts.map +1 -0
- package/dist/episode-writer.js +263 -0
- package/dist/episode-writer.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -1
- package/dist/index.js.map +1 -1
- package/dist/osmosis-store.d.ts +9 -1
- package/dist/osmosis-store.d.ts.map +1 -1
- package/dist/osmosis-store.js +93 -4
- package/dist/osmosis-store.js.map +1 -1
- package/dist/session-tracker.d.ts +61 -0
- package/dist/session-tracker.d.ts.map +1 -0
- package/dist/session-tracker.js +268 -0
- package/dist/session-tracker.js.map +1 -0
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +813 -18
- package/dist/standalone-proxy.js.map +1 -1
- package/dist/tool-router.d.ts +163 -0
- package/dist/tool-router.d.ts.map +1 -0
- package/dist/tool-router.js +528 -0
- package/dist/tool-router.js.map +1 -0
- package/dist/trace-writer.d.ts +155 -0
- package/dist/trace-writer.d.ts.map +1 -0
- package/dist/trace-writer.js +589 -0
- package/dist/trace-writer.js.map +1 -0
- package/package.json +1 -1
package/dist/standalone-proxy.js
CHANGED
|
@@ -94,6 +94,11 @@ const estimateRateMap = new Map();
|
|
|
94
94
|
// Without this, IPs that make one request and disappear stay in the map forever.
|
|
95
95
|
setInterval(() => (0, estimate_js_1.purgeExpiredRateLimitEntries)(estimateRateMap, Date.now()), 5 * 60 * 1000);
|
|
96
96
|
const osmosis_store_js_1 = require("./osmosis-store.js");
|
|
97
|
+
const episode_writer_js_1 = require("./episode-writer.js");
|
|
98
|
+
const session_tracker_js_1 = require("./session-tracker.js");
|
|
99
|
+
const trace_writer_js_1 = require("./trace-writer.js");
|
|
100
|
+
const tool_router_js_1 = require("./tool-router.js");
|
|
101
|
+
const node_crypto_1 = require("node:crypto");
|
|
97
102
|
const PROXY_VERSION = (() => {
|
|
98
103
|
try {
|
|
99
104
|
const pkgPath = path.join(__dirname, '..', 'package.json');
|
|
@@ -938,6 +943,18 @@ let _activeOllamaConfig;
|
|
|
938
943
|
function isContentLoggingEnabled() {
|
|
939
944
|
return _activeProxyConfig.dashboard?.showRequestContent !== false;
|
|
940
945
|
}
|
|
946
|
+
/**
|
|
947
|
+
* Whether procedural knowledge hints should be injected into system prompts.
|
|
948
|
+
* Default: false — no system prompt modification occurs.
|
|
949
|
+
* Config file: memory.proceduralInjectionEnabled
|
|
950
|
+
* Env override: RELAYPLANE_PROCEDURAL_INJECTION=true
|
|
951
|
+
*/
|
|
952
|
+
function isProceduralInjectionEnabled() {
|
|
953
|
+
const envVal = process.env['RELAYPLANE_PROCEDURAL_INJECTION'];
|
|
954
|
+
if (envVal !== undefined)
|
|
955
|
+
return envVal === 'true';
|
|
956
|
+
return _activeProxyConfig.memory?.proceduralInjectionEnabled === true;
|
|
957
|
+
}
|
|
941
958
|
function getProxyConfigPath() {
|
|
942
959
|
const customPath = process.env['RELAYPLANE_CONFIG_PATH'];
|
|
943
960
|
if (customPath && customPath.trim())
|
|
@@ -2532,7 +2549,7 @@ th{text-align:left;color:#64748b;font-weight:500;padding:8px 12px;border-bottom:
|
|
|
2532
2549
|
td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
2533
2550
|
.section{margin-bottom:32px}.section h2{font-size:1rem;font-weight:600;margin-bottom:12px;color:#94a3b8}
|
|
2534
2551
|
.dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.warn{background:#fbbf24}.dot.down{background:#ef4444}
|
|
2535
|
-
.section.collapsible h2{cursor:pointer;user-select:none;display:flex;align-items:center;gap:8px}.section.collapsible h2::after{content:'▾';font-size:.8rem;color:#475569;transition:transform .2s}.section.collapsed h2::after{transform:rotate(-90deg)}.section.collapsed>*:not(h2){display:none}
|
|
2552
|
+
.section.collapsible h2{cursor:pointer;user-select:none;display:flex;align-items:center;gap:8px}.section.collapsible h2::after{content:'▾';font-size:.8rem;color:#475569;transition:transform .2s}.section.collapsed h2::after{transform:rotate(-90deg)}.section.collapsed>*:not(h2){display:none!important}
|
|
2536
2553
|
.badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
|
|
2537
2554
|
.badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}.badge.err-auth{background:#2d0a0a;color:#ef4444}.badge.err-rate{background:#2d2a0a;color:#fbbf24}.badge.err-timeout{background:#2d1a0a;color:#fb923c}
|
|
2538
2555
|
.badge.tt-code{background:#1e3a5f;color:#60a5fa}.badge.tt-analysis{background:#3b1f6e;color:#a78bfa}.badge.tt-summarization{background:#1a3a2a;color:#6ee7b7}.badge.tt-qa{background:#3a2f1e;color:#fbbf24}.badge.tt-general{background:#1e293b;color:#94a3b8}
|
|
@@ -2557,7 +2574,10 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
|
2557
2574
|
<div class="section collapsible collapsed"><h2>Agent Cost Breakdown</h2>
|
|
2558
2575
|
<table><thead><tr><th>Agent</th><th>Requests</th><th>Total Cost</th><th>Last Active</th><th></th></tr></thead><tbody id="agents"></tbody></table></div>
|
|
2559
2576
|
<div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
|
|
2560
|
-
<div class="section collapsible collapsed"><h2>Learning</h2><div id="learning-panel" style="display:flex;flex-direction:column;gap:12px"><div id="learning-stats" style="display:flex;gap:12px;flex-wrap:wrap"></div><div id="learning-recent"></div
|
|
2577
|
+
<div class="section collapsible collapsed"><h2>Learning</h2><div id="learning-panel" style="display:flex;flex-direction:column;gap:12px"><div id="learning-stats" style="display:flex;gap:12px;flex-wrap:wrap"></div><div id="learning-recent"></div></div></div>
|
|
2578
|
+
<div class="section collapsible collapsed" id="sessions-section"><h2>Sessions <span id="sessionsLabel" style="font-size:.75rem;color:#64748b;font-weight:400">(last 7d)</span></h2>
|
|
2579
|
+
<table><thead><tr><th>Session ID</th><th>Source</th><th>Started</th><th>Duration</th><th>Requests</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Models</th><th>Status</th></tr></thead><tbody id="sessions"></tbody></table>
|
|
2580
|
+
</div>
|
|
2561
2581
|
<div class="section"><h2>Recent Runs <span id="historyLabel" style="font-size:.75rem;color:#64748b;font-weight:400">(7d window, history-capped)</span></h2>
|
|
2562
2582
|
<table><thead><tr><th>Time</th><th>Agent</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
2563
2583
|
<script>
|
|
@@ -2566,6 +2586,28 @@ document.querySelectorAll('.section.collapsible h2').forEach(h2=>h2.addEventList
|
|
|
2566
2586
|
function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
|
|
2567
2587
|
function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
|
|
2568
2588
|
function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
|
|
2589
|
+
async function loadSessions(){
|
|
2590
|
+
try{
|
|
2591
|
+
const [sessR,activeR]=await Promise.all([
|
|
2592
|
+
fetch('/v1/sessions?limit=20&days=7').then(r=>r.json()).catch(()=>({sessions:[]})),
|
|
2593
|
+
fetch('/v1/sessions/active').then(r=>r.json()).catch(()=>({sessions:[]}))
|
|
2594
|
+
]);
|
|
2595
|
+
const activeIds=new Set((activeR.sessions||[]).map(s=>s.id));
|
|
2596
|
+
const sessions=sessR.sessions||[];
|
|
2597
|
+
const el=$('sessions');
|
|
2598
|
+
if(!el)return;
|
|
2599
|
+
el.innerHTML=sessions.length?sessions.map(s=>{
|
|
2600
|
+
const isActive=activeIds.has(s.id)||s.active;
|
|
2601
|
+
const dur=s.duration_ms>0?Math.round(s.duration_ms/1000)+'s':'—';
|
|
2602
|
+
const badge=isActive?'<span class="badge ok" style="font-size:.7rem">LIVE</span>':'<span style="color:#64748b;font-size:.75rem">idle</span>';
|
|
2603
|
+
const srcBadge=s.session_source==='claude-code'?'<span style="color:#60a5fa;font-size:.75rem">claude-code</span>':'<span style="color:#94a3b8;font-size:.75rem">synthetic</span>';
|
|
2604
|
+
const sid=s.id.length>20?s.id.slice(0,20)+'…':s.id;
|
|
2605
|
+
const mix=s.model_mix&&Object.keys(s.model_mix).length?Object.entries(s.model_mix).map(([m,c])=>{const short=m.replace('claude-','').replace(/-\d{8}$/,'').replace('sonnet','Sonnet').replace('opus','Opus').replace('haiku','Haiku');return '<span style="font-size:.72rem;color:#94a3b8">'+short+'<span style="color:#475569">×</span>'+c+'</span>';}).join(' '):'<span style="color:#475569;font-size:.72rem">—</span>';
|
|
2606
|
+
return '<tr><td style="font-family:monospace;font-size:.8rem" title="'+esc(s.id)+'">'+sid+'</td><td>'+srcBadge+'</td><td>'+fmtTime(new Date(s.started_at).toISOString())+'</td><td>'+dur+'</td><td>'+s.request_count+'</td><td>'+(s.total_tokens_in||0)+'</td><td>'+(s.total_tokens_out||0)+'</td><td>$'+fmt(s.total_cost_usd,4)+'</td><td>'+mix+'</td><td>'+badge+'</td></tr>';
|
|
2607
|
+
}).join(''):'<tr><td colspan=10 style="color:#64748b">No sessions recorded yet</td></tr>';
|
|
2608
|
+
const totalCost=sessions.reduce((s,r)=>s+(r.total_cost_usd||0),0);
|
|
2609
|
+
}catch(e){console.error('sessions load error',e)}
|
|
2610
|
+
}
|
|
2569
2611
|
async function load(){
|
|
2570
2612
|
try{
|
|
2571
2613
|
const [health,stats,runsR,sav,provH,agentsR]=await Promise.all([
|
|
@@ -2710,7 +2752,7 @@ async function loadLearning(){
|
|
|
2710
2752
|
}
|
|
2711
2753
|
}catch(e){console.error('learning load error',e)}
|
|
2712
2754
|
}
|
|
2713
|
-
load();loadLearning();setInterval(load,5000);setInterval(loadLearning,30000);
|
|
2755
|
+
load();loadLearning();loadSessions();setInterval(load,5000);setInterval(loadLearning,30000);setInterval(loadSessions,10000);
|
|
2714
2756
|
</script><footer style="text-align:center;padding:20px 0;color:#475569;font-size:.75rem;border-top:1px solid #1e293b;margin-top:20px">🔒 Request content stays on your machine. Never sent to cloud.</footer></body></html>`;
|
|
2715
2757
|
}
|
|
2716
2758
|
async function getKnowledgeStats() {
|
|
@@ -2873,12 +2915,20 @@ async function startProxy(config = {}) {
|
|
|
2873
2915
|
(0, agent_tracker_js_1.flushAgentRegistry)();
|
|
2874
2916
|
meshHandle.stop();
|
|
2875
2917
|
shutdownHistory();
|
|
2918
|
+
trace_writer_js_1.TraceWriter.getInstance().shutdown();
|
|
2876
2919
|
process.exit(0);
|
|
2877
2920
|
};
|
|
2878
2921
|
process.on('SIGINT', handleShutdown);
|
|
2879
2922
|
process.on('SIGTERM', handleShutdown);
|
|
2880
2923
|
const configPath = getProxyConfigPath();
|
|
2881
2924
|
let proxyConfig = await loadProxyConfig(configPath, log);
|
|
2925
|
+
// ── Deterministic Traces: initialise TraceWriter with loaded config ──
|
|
2926
|
+
trace_writer_js_1.TraceWriter.getInstance({
|
|
2927
|
+
...((0, trace_writer_js_1.defaultTracesConfig)()),
|
|
2928
|
+
...(proxyConfig.traces ?? {}),
|
|
2929
|
+
});
|
|
2930
|
+
// ── Tool Router: initialise deny-by-default tool authorization ──
|
|
2931
|
+
(0, tool_router_js_1.getToolRouter)();
|
|
2882
2932
|
// Auto-config on startup: detect available auth and set optimal routing.
|
|
2883
2933
|
//
|
|
2884
2934
|
// Bug fix: `configExists` was always true because loadConfig() (CLI init) creates
|
|
@@ -3243,6 +3293,12 @@ async function startProxy(config = {}) {
|
|
|
3243
3293
|
}
|
|
3244
3294
|
// === Control endpoints ===
|
|
3245
3295
|
if (pathname.startsWith('/control/')) {
|
|
3296
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
3297
|
+
if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
|
|
3298
|
+
res.writeHead(403, { 'Content-Type': 'application/json' });
|
|
3299
|
+
res.end(JSON.stringify({ error: 'Control endpoints are localhost-only' }));
|
|
3300
|
+
return;
|
|
3301
|
+
}
|
|
3246
3302
|
if (req.method === 'POST' && pathname === '/control/enable') {
|
|
3247
3303
|
proxyConfig = normalizeProxyConfig({ ...proxyConfig, enabled: true });
|
|
3248
3304
|
await saveProxyConfig(configPath, proxyConfig);
|
|
@@ -3306,6 +3362,224 @@ async function startProxy(config = {}) {
|
|
|
3306
3362
|
}
|
|
3307
3363
|
return;
|
|
3308
3364
|
}
|
|
3365
|
+
// === Budget endpoints ===
|
|
3366
|
+
if (req.method === 'GET' && pathname === '/control/budget') {
|
|
3367
|
+
const status = budgetManager.getStatus();
|
|
3368
|
+
const cfg = budgetManager.getConfig();
|
|
3369
|
+
const now = Date.now();
|
|
3370
|
+
const weekCutoff = now - 7 * 86400000;
|
|
3371
|
+
const monthCutoff = now - 30 * 86400000;
|
|
3372
|
+
const weekCost = requestHistory
|
|
3373
|
+
.filter(r => new Date(r.timestamp).getTime() >= weekCutoff)
|
|
3374
|
+
.reduce((s, r) => s + r.costUsd, 0);
|
|
3375
|
+
const monthCost = requestHistory
|
|
3376
|
+
.filter(r => new Date(r.timestamp).getTime() >= monthCutoff)
|
|
3377
|
+
.reduce((s, r) => s + r.costUsd, 0);
|
|
3378
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3379
|
+
res.end(JSON.stringify({
|
|
3380
|
+
today_usd: Math.round(status.dailySpend * 10000) / 10000,
|
|
3381
|
+
limit_usd: status.dailyLimit,
|
|
3382
|
+
pct_used: Math.round(status.dailyPercent * 10) / 10,
|
|
3383
|
+
remaining_usd: Math.max(0, Math.round((status.dailyLimit - status.dailySpend) * 10000) / 10000),
|
|
3384
|
+
this_week_usd: Math.round(weekCost * 10000) / 10000,
|
|
3385
|
+
this_month_usd: Math.round(monthCost * 10000) / 10000,
|
|
3386
|
+
enabled: cfg.enabled,
|
|
3387
|
+
on_breach: cfg.onBreach,
|
|
3388
|
+
alert_thresholds: cfg.alertThresholds,
|
|
3389
|
+
hourly_usd: Math.round(status.hourlySpend * 10000) / 10000,
|
|
3390
|
+
hourly_limit_usd: status.hourlyLimit,
|
|
3391
|
+
hourly_pct_used: Math.round(status.hourlyPercent * 10) / 10,
|
|
3392
|
+
breached: status.breached,
|
|
3393
|
+
breach_type: status.breachType,
|
|
3394
|
+
}));
|
|
3395
|
+
return;
|
|
3396
|
+
}
|
|
3397
|
+
if (req.method === 'POST' && pathname === '/control/budget/set') {
|
|
3398
|
+
try {
|
|
3399
|
+
const body = await readJsonBody(req);
|
|
3400
|
+
const amount = Number(body.dailyUsd);
|
|
3401
|
+
if (!body.dailyUsd || isNaN(amount) || amount <= 0) {
|
|
3402
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3403
|
+
res.end(JSON.stringify({ error: 'dailyUsd must be a positive number' }));
|
|
3404
|
+
return;
|
|
3405
|
+
}
|
|
3406
|
+
budgetManager.setLimits({ dailyUsd: amount });
|
|
3407
|
+
budgetManager.updateConfig({ enabled: true });
|
|
3408
|
+
proxyConfig = normalizeProxyConfig({
|
|
3409
|
+
...proxyConfig,
|
|
3410
|
+
budget: { ...proxyConfig.budget, dailyUsd: amount, enabled: true },
|
|
3411
|
+
});
|
|
3412
|
+
await saveProxyConfig(configPath, proxyConfig);
|
|
3413
|
+
startConfigWatcher();
|
|
3414
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3415
|
+
res.end(JSON.stringify({ ok: true, dailyUsd: amount }));
|
|
3416
|
+
}
|
|
3417
|
+
catch {
|
|
3418
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3419
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
3420
|
+
}
|
|
3421
|
+
return;
|
|
3422
|
+
}
|
|
3423
|
+
if (req.method === 'POST' && pathname === '/control/budget/set-alert') {
|
|
3424
|
+
try {
|
|
3425
|
+
const body = await readJsonBody(req);
|
|
3426
|
+
const pct = Number(body.threshold);
|
|
3427
|
+
if (!body.threshold || isNaN(pct) || pct <= 0 || pct > 100) {
|
|
3428
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3429
|
+
res.end(JSON.stringify({ error: 'threshold must be 1-100' }));
|
|
3430
|
+
return;
|
|
3431
|
+
}
|
|
3432
|
+
const current = budgetManager.getConfig();
|
|
3433
|
+
const thresholds = [...new Set([...current.alertThresholds, pct])].sort((a, b) => a - b);
|
|
3434
|
+
budgetManager.updateConfig({ alertThresholds: thresholds });
|
|
3435
|
+
proxyConfig = normalizeProxyConfig({
|
|
3436
|
+
...proxyConfig,
|
|
3437
|
+
budget: { ...proxyConfig.budget, alertThresholds: thresholds },
|
|
3438
|
+
});
|
|
3439
|
+
await saveProxyConfig(configPath, proxyConfig);
|
|
3440
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3441
|
+
res.end(JSON.stringify({ ok: true, alertThresholds: thresholds }));
|
|
3442
|
+
}
|
|
3443
|
+
catch {
|
|
3444
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3445
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
3446
|
+
}
|
|
3447
|
+
return;
|
|
3448
|
+
}
|
|
3449
|
+
if (req.method === 'POST' && pathname === '/control/budget/reset') {
|
|
3450
|
+
budgetManager.reset();
|
|
3451
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3452
|
+
res.end(JSON.stringify({ ok: true, message: 'Daily spend counter reset' }));
|
|
3453
|
+
return;
|
|
3454
|
+
}
|
|
3455
|
+
// === Session Budget endpoints ===
|
|
3456
|
+
if (req.method === 'GET' && pathname === '/control/session-budget') {
|
|
3457
|
+
const sbQs = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
3458
|
+
const sbParams = new URLSearchParams(sbQs);
|
|
3459
|
+
const sessionId = sbParams.get('sessionId');
|
|
3460
|
+
if (!sessionId) {
|
|
3461
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3462
|
+
res.end(JSON.stringify({ error: 'sessionId query parameter required' }));
|
|
3463
|
+
return;
|
|
3464
|
+
}
|
|
3465
|
+
const record = budgetManager.getSessionBudget(sessionId);
|
|
3466
|
+
if (!record) {
|
|
3467
|
+
const cap = budgetManager.getConfig().sessionCapUsd;
|
|
3468
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3469
|
+
res.end(JSON.stringify({
|
|
3470
|
+
sessionId,
|
|
3471
|
+
capUsd: cap,
|
|
3472
|
+
spentUsd: 0,
|
|
3473
|
+
remainingUsd: cap,
|
|
3474
|
+
pctUsed: 0,
|
|
3475
|
+
modelUsed: '',
|
|
3476
|
+
status: 'ok',
|
|
3477
|
+
}));
|
|
3478
|
+
return;
|
|
3479
|
+
}
|
|
3480
|
+
const remaining = Math.max(0, record.capUsd - record.spentUsd);
|
|
3481
|
+
const pctUsed = record.capUsd > 0 ? (record.spentUsd / record.capUsd) * 100 : 0;
|
|
3482
|
+
const status = pctUsed >= 100 ? 'exceeded' : pctUsed >= 80 ? 'warning' : 'ok';
|
|
3483
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3484
|
+
res.end(JSON.stringify({
|
|
3485
|
+
sessionId: record.sessionId,
|
|
3486
|
+
capUsd: record.capUsd,
|
|
3487
|
+
spentUsd: record.spentUsd,
|
|
3488
|
+
remainingUsd: remaining,
|
|
3489
|
+
pctUsed: Math.round(pctUsed * 10) / 10,
|
|
3490
|
+
modelUsed: record.modelUsed,
|
|
3491
|
+
createdAt: new Date(record.createdAt).toISOString(),
|
|
3492
|
+
updatedAt: new Date(record.updatedAt).toISOString(),
|
|
3493
|
+
status,
|
|
3494
|
+
}));
|
|
3495
|
+
return;
|
|
3496
|
+
}
|
|
3497
|
+
if (req.method === 'POST' && pathname === '/control/session-budget/set') {
|
|
3498
|
+
try {
|
|
3499
|
+
const body = await readJsonBody(req);
|
|
3500
|
+
if (!body.sessionId || typeof body.capUsd !== 'number' || !isFinite(body.capUsd) || body.capUsd <= 0) {
|
|
3501
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3502
|
+
res.end(JSON.stringify({ error: 'sessionId (string) and capUsd (positive number) required' }));
|
|
3503
|
+
return;
|
|
3504
|
+
}
|
|
3505
|
+
budgetManager.setSessionCap(body.sessionId, body.capUsd);
|
|
3506
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3507
|
+
res.end(JSON.stringify({ ok: true, sessionId: body.sessionId, capUsd: body.capUsd }));
|
|
3508
|
+
}
|
|
3509
|
+
catch {
|
|
3510
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3511
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
3512
|
+
}
|
|
3513
|
+
return;
|
|
3514
|
+
}
|
|
3515
|
+
if (req.method === 'GET' && pathname === '/control/session-budgets') {
|
|
3516
|
+
const slQs = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
3517
|
+
const slParams = new URLSearchParams(slQs);
|
|
3518
|
+
const limitParam = slParams.get('limit');
|
|
3519
|
+
const limit = limitParam ? Math.min(50, Math.max(1, parseInt(limitParam, 10) || 50)) : 50;
|
|
3520
|
+
const records = budgetManager.listSessionBudgets(limit);
|
|
3521
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3522
|
+
res.end(JSON.stringify({
|
|
3523
|
+
sessions: records.map(r => ({
|
|
3524
|
+
sessionId: r.sessionId,
|
|
3525
|
+
capUsd: r.capUsd,
|
|
3526
|
+
spentUsd: r.spentUsd,
|
|
3527
|
+
remainingUsd: Math.max(0, r.capUsd - r.spentUsd),
|
|
3528
|
+
pctUsed: r.capUsd > 0 ? Math.round((r.spentUsd / r.capUsd) * 1000) / 10 : 0,
|
|
3529
|
+
modelUsed: r.modelUsed,
|
|
3530
|
+
createdAt: new Date(r.createdAt).toISOString(),
|
|
3531
|
+
updatedAt: new Date(r.updatedAt).toISOString(),
|
|
3532
|
+
status: r.spentUsd >= r.capUsd ? 'exceeded' : (r.spentUsd / r.capUsd) >= 0.8 ? 'warning' : 'ok',
|
|
3533
|
+
})),
|
|
3534
|
+
count: records.length,
|
|
3535
|
+
}));
|
|
3536
|
+
return;
|
|
3537
|
+
}
|
|
3538
|
+
if (req.method === 'POST' && pathname === '/control/model') {
|
|
3539
|
+
try {
|
|
3540
|
+
const body = await readJsonBody(req);
|
|
3541
|
+
if (!body.model) {
|
|
3542
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3543
|
+
res.end(JSON.stringify({ error: 'model required' }));
|
|
3544
|
+
return;
|
|
3545
|
+
}
|
|
3546
|
+
if (body.model.length > 128 || !/^[a-zA-Z0-9._:/-]+$/.test(body.model)) {
|
|
3547
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3548
|
+
res.end(JSON.stringify({ error: 'model must be ≤128 characters and contain only [a-zA-Z0-9._:/-]' }));
|
|
3549
|
+
return;
|
|
3550
|
+
}
|
|
3551
|
+
const previousRouting = proxyConfig.routing;
|
|
3552
|
+
const target = body.model;
|
|
3553
|
+
// Update all complexity tiers to the requested model
|
|
3554
|
+
proxyConfig = normalizeProxyConfig({
|
|
3555
|
+
...proxyConfig,
|
|
3556
|
+
routing: {
|
|
3557
|
+
...proxyConfig.routing,
|
|
3558
|
+
complexity: {
|
|
3559
|
+
enabled: proxyConfig.routing?.complexity?.enabled ?? true,
|
|
3560
|
+
simple: target,
|
|
3561
|
+
moderate: target,
|
|
3562
|
+
complex: target,
|
|
3563
|
+
},
|
|
3564
|
+
},
|
|
3565
|
+
});
|
|
3566
|
+
await saveProxyConfig(configPath, proxyConfig);
|
|
3567
|
+
startConfigWatcher();
|
|
3568
|
+
const prevModel = previousRouting?.complexity?.complex ?? previousRouting?.complexity?.moderate ?? 'unknown';
|
|
3569
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3570
|
+
res.end(JSON.stringify({
|
|
3571
|
+
switched: true,
|
|
3572
|
+
previous: prevModel,
|
|
3573
|
+
current: target,
|
|
3574
|
+
reason: body.reason ?? '',
|
|
3575
|
+
}));
|
|
3576
|
+
}
|
|
3577
|
+
catch {
|
|
3578
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
3579
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
3580
|
+
}
|
|
3581
|
+
return;
|
|
3582
|
+
}
|
|
3309
3583
|
}
|
|
3310
3584
|
if (req.method === 'POST' && pathname === '/control/kill') {
|
|
3311
3585
|
try {
|
|
@@ -3677,6 +3951,228 @@ async function startProxy(config = {}) {
|
|
|
3677
3951
|
}
|
|
3678
3952
|
return;
|
|
3679
3953
|
}
|
|
3954
|
+
// === Session Intelligence endpoints ===
|
|
3955
|
+
if (req.method === 'GET' && (pathname === '/v1/sessions' || pathname === '/v1/sessions/active')) {
|
|
3956
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
3957
|
+
if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
|
|
3958
|
+
res.writeHead(403, { 'Content-Type': 'application/json' });
|
|
3959
|
+
res.end(JSON.stringify({ error: 'Session endpoints are localhost-only' }));
|
|
3960
|
+
return;
|
|
3961
|
+
}
|
|
3962
|
+
}
|
|
3963
|
+
if (req.method === 'GET' && pathname === '/v1/sessions') {
|
|
3964
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
3965
|
+
const params = new URLSearchParams(queryString);
|
|
3966
|
+
const rawLimit = parseInt(params.get('limit') || '20', 10);
|
|
3967
|
+
const rawDays = parseInt(params.get('days') || '7', 10);
|
|
3968
|
+
const limit = Math.min(Number.isFinite(rawLimit) && rawLimit > 0 ? rawLimit : 20, 100);
|
|
3969
|
+
const days = Number.isFinite(rawDays) && rawDays > 0 ? rawDays : 7;
|
|
3970
|
+
const sessions = (0, session_tracker_js_1.getSessions)({ limit, days });
|
|
3971
|
+
const now = Date.now();
|
|
3972
|
+
const activeCutoff = now - 5 * 60 * 1000;
|
|
3973
|
+
// Build model_mix per session from episodic_events in osmosis.db
|
|
3974
|
+
const modelMixMap = new Map();
|
|
3975
|
+
try {
|
|
3976
|
+
const osmDb = (0, osmosis_store_js_1.getOsmosisDb)();
|
|
3977
|
+
if (osmDb && sessions.length > 0) {
|
|
3978
|
+
const placeholders = sessions.map(() => '?').join(',');
|
|
3979
|
+
const sessionIds = sessions.map(s => s.id);
|
|
3980
|
+
const rows = osmDb.prepare(`SELECT session_id, model_used, COUNT(*) as cnt
|
|
3981
|
+
FROM episodic_events
|
|
3982
|
+
WHERE session_id IN (${placeholders})
|
|
3983
|
+
GROUP BY session_id, model_used`).all(...sessionIds);
|
|
3984
|
+
for (const row of rows) {
|
|
3985
|
+
if (!modelMixMap.has(row.session_id))
|
|
3986
|
+
modelMixMap.set(row.session_id, {});
|
|
3987
|
+
modelMixMap.get(row.session_id)[row.model_used] = row.cnt;
|
|
3988
|
+
}
|
|
3989
|
+
}
|
|
3990
|
+
}
|
|
3991
|
+
catch { /* best-effort */ }
|
|
3992
|
+
const result = sessions.map(s => ({
|
|
3993
|
+
...s,
|
|
3994
|
+
active: s.last_seen_at >= activeCutoff,
|
|
3995
|
+
duration_ms: s.last_seen_at - s.started_at,
|
|
3996
|
+
model_mix: modelMixMap.get(s.id) ?? {},
|
|
3997
|
+
}));
|
|
3998
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
3999
|
+
res.end(JSON.stringify({ sessions: result, total: result.length }));
|
|
4000
|
+
return;
|
|
4001
|
+
}
|
|
4002
|
+
if (req.method === 'GET' && pathname === '/v1/sessions/active') {
|
|
4003
|
+
const active = (0, session_tracker_js_1.getActiveSessions)();
|
|
4004
|
+
const now = Date.now();
|
|
4005
|
+
const result = active.map(s => ({
|
|
4006
|
+
...s,
|
|
4007
|
+
active: true,
|
|
4008
|
+
duration_ms: s.last_seen_at - s.started_at,
|
|
4009
|
+
}));
|
|
4010
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4011
|
+
res.end(JSON.stringify({ sessions: result, total: result.length }));
|
|
4012
|
+
return;
|
|
4013
|
+
}
|
|
4014
|
+
// === Trace endpoints (CAP 3) ===
|
|
4015
|
+
if ((req.method === 'GET' || req.method === 'POST') && (pathname === '/v1/traces' || (pathname ?? '').startsWith('/v1/traces/'))) {
|
|
4016
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
4017
|
+
if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
|
|
4018
|
+
res.writeHead(403, { 'Content-Type': 'application/json' });
|
|
4019
|
+
res.end(JSON.stringify({ error: 'Trace endpoints are localhost-only' }));
|
|
4020
|
+
return;
|
|
4021
|
+
}
|
|
4022
|
+
}
|
|
4023
|
+
if (req.method === 'GET' && pathname === '/v1/traces') {
|
|
4024
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
4025
|
+
const params = new URLSearchParams(queryString);
|
|
4026
|
+
const limit = Math.min(parseInt(params.get('limit') ?? '20', 10) || 20, 100);
|
|
4027
|
+
const traceWriter = trace_writer_js_1.TraceWriter.getInstance();
|
|
4028
|
+
const traces = traceWriter.getRecentTraces(limit);
|
|
4029
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4030
|
+
res.end(JSON.stringify({ traces, total: traces.length }));
|
|
4031
|
+
return;
|
|
4032
|
+
}
|
|
4033
|
+
if (req.method === 'GET' && (pathname ?? '').match(/^\/v1\/traces\/([^/]+)\/graph$/)) {
|
|
4034
|
+
const sessionId = (pathname ?? '').split('/')[3] ?? '';
|
|
4035
|
+
const traceWriter = trace_writer_js_1.TraceWriter.getInstance();
|
|
4036
|
+
const graph = traceWriter.getSessionGraph(sessionId);
|
|
4037
|
+
if (!graph) {
|
|
4038
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
4039
|
+
res.end(JSON.stringify({ error: `No graph found for session: ${sessionId}` }));
|
|
4040
|
+
return;
|
|
4041
|
+
}
|
|
4042
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4043
|
+
res.end(JSON.stringify(graph));
|
|
4044
|
+
return;
|
|
4045
|
+
}
|
|
4046
|
+
if (req.method === 'GET' && (pathname ?? '').match(/^\/v1\/traces\/([^/]+)\/events$/)) {
|
|
4047
|
+
const traceId = (pathname ?? '').split('/')[3] ?? '';
|
|
4048
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4049
|
+
const events = tw.getTraceEvents(traceId);
|
|
4050
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4051
|
+
res.end(JSON.stringify({ traceId, events, total: events.length }));
|
|
4052
|
+
return;
|
|
4053
|
+
}
|
|
4054
|
+
if (req.method === 'POST' && pathname === '/v1/traces/export') {
|
|
4055
|
+
try {
|
|
4056
|
+
const exportOptions = await readJsonBody(req);
|
|
4057
|
+
const format = exportOptions.format ?? 'jsonl';
|
|
4058
|
+
if (!['jsonl', 'csv', 'markdown', 'traceops'].includes(format)) {
|
|
4059
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
4060
|
+
res.end(JSON.stringify({ error: 'format must be one of: jsonl, csv, markdown, traceops' }));
|
|
4061
|
+
return;
|
|
4062
|
+
}
|
|
4063
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4064
|
+
const exported = await tw.export({
|
|
4065
|
+
format: format,
|
|
4066
|
+
sessionIds: exportOptions.sessionIds,
|
|
4067
|
+
fromTimestamp: exportOptions.fromTimestamp,
|
|
4068
|
+
toTimestamp: exportOptions.toTimestamp,
|
|
4069
|
+
includeToolInputs: exportOptions.includeToolInputs,
|
|
4070
|
+
});
|
|
4071
|
+
const contentType = format === 'markdown' ? 'text/markdown' : 'application/x-ndjson';
|
|
4072
|
+
res.writeHead(200, { 'Content-Type': contentType });
|
|
4073
|
+
res.end(exported);
|
|
4074
|
+
}
|
|
4075
|
+
catch {
|
|
4076
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
4077
|
+
res.end(JSON.stringify({ error: 'Invalid JSON body' }));
|
|
4078
|
+
}
|
|
4079
|
+
return;
|
|
4080
|
+
}
|
|
4081
|
+
// === Memory endpoints (Session 4 — localhost-only) ===
|
|
4082
|
+
if ((pathname ?? '').startsWith('/v1/memory')) {
|
|
4083
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
4084
|
+
if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
|
|
4085
|
+
res.writeHead(403, { 'Content-Type': 'application/json' });
|
|
4086
|
+
res.end(JSON.stringify({ error: 'Memory endpoints are localhost-only' }));
|
|
4087
|
+
return;
|
|
4088
|
+
}
|
|
4089
|
+
}
|
|
4090
|
+
if (req.method === 'GET' && pathname === '/v1/memory/semantic') {
|
|
4091
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
4092
|
+
const params = new URLSearchParams(queryString);
|
|
4093
|
+
const sessionId = params.get('session_id') ?? undefined;
|
|
4094
|
+
const limit = Math.min(parseInt(params.get('limit') ?? '20', 10) || 20, 100);
|
|
4095
|
+
try {
|
|
4096
|
+
const db = (0, osmosis_store_js_1.getOsmosisDb)();
|
|
4097
|
+
if (!db) {
|
|
4098
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4099
|
+
res.end(JSON.stringify({ atoms: [], total: 0 }));
|
|
4100
|
+
return;
|
|
4101
|
+
}
|
|
4102
|
+
let atoms;
|
|
4103
|
+
if (sessionId) {
|
|
4104
|
+
atoms = db.prepare(`SELECT id, type, model, task_type, latency_ms, input_tokens, output_tokens, confidence, observation_count, timestamp, session_id
|
|
4105
|
+
FROM knowledge_atoms WHERE session_id = ? ORDER BY confidence DESC, timestamp DESC LIMIT ?`).all(sessionId, limit);
|
|
4106
|
+
}
|
|
4107
|
+
else {
|
|
4108
|
+
atoms = db.prepare(`SELECT id, type, model, task_type, latency_ms, input_tokens, output_tokens, confidence, observation_count, timestamp, session_id
|
|
4109
|
+
FROM knowledge_atoms ORDER BY confidence DESC, timestamp DESC LIMIT ?`).all(limit);
|
|
4110
|
+
}
|
|
4111
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4112
|
+
res.end(JSON.stringify({ atoms, total: atoms.length }));
|
|
4113
|
+
}
|
|
4114
|
+
catch {
|
|
4115
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
4116
|
+
res.end(JSON.stringify({ error: 'Memory query failed' }));
|
|
4117
|
+
}
|
|
4118
|
+
return;
|
|
4119
|
+
}
|
|
4120
|
+
if (req.method === 'GET' && pathname === '/v1/memory/episodic') {
|
|
4121
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
4122
|
+
const params = new URLSearchParams(queryString);
|
|
4123
|
+
const sessionId = params.get('session_id') ?? undefined;
|
|
4124
|
+
const limit = Math.min(parseInt(params.get('limit') ?? '50', 10) || 50, 200);
|
|
4125
|
+
try {
|
|
4126
|
+
const db = (0, osmosis_store_js_1.getOsmosisDb)();
|
|
4127
|
+
if (!db) {
|
|
4128
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4129
|
+
res.end(JSON.stringify({ events: [], total: 0 }));
|
|
4130
|
+
return;
|
|
4131
|
+
}
|
|
4132
|
+
let events;
|
|
4133
|
+
if (sessionId) {
|
|
4134
|
+
events = db.prepare(`SELECT id, session_id, event_type, timestamp, duration_ms, model_used, tokens_in, tokens_out, cost_usd, outcome, outcome_detail, trace_id
|
|
4135
|
+
FROM episodic_events WHERE session_id = ? ORDER BY timestamp DESC LIMIT ?`).all(sessionId, limit);
|
|
4136
|
+
}
|
|
4137
|
+
else {
|
|
4138
|
+
events = db.prepare(`SELECT id, session_id, event_type, timestamp, duration_ms, model_used, tokens_in, tokens_out, cost_usd, outcome, outcome_detail, trace_id
|
|
4139
|
+
FROM episodic_events ORDER BY timestamp DESC LIMIT ?`).all(limit);
|
|
4140
|
+
}
|
|
4141
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4142
|
+
res.end(JSON.stringify({ events, total: events.length }));
|
|
4143
|
+
}
|
|
4144
|
+
catch {
|
|
4145
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
4146
|
+
res.end(JSON.stringify({ error: 'Memory query failed' }));
|
|
4147
|
+
}
|
|
4148
|
+
return;
|
|
4149
|
+
}
|
|
4150
|
+
if (req.method === 'GET' && pathname === '/v1/memory/procedural') {
|
|
4151
|
+
const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
|
|
4152
|
+
const params = new URLSearchParams(queryString);
|
|
4153
|
+
const limit = Math.min(parseInt(params.get('limit') ?? '10', 10) || 10, 50);
|
|
4154
|
+
try {
|
|
4155
|
+
// Read from mesh.db (procedural atom store)
|
|
4156
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
4157
|
+
const Database = require('better-sqlite3');
|
|
4158
|
+
const meshDbPath = require('node:path').join((0, osmosis_store_js_1.getRelayplaneDir)(), 'mesh.db');
|
|
4159
|
+
let atoms = [];
|
|
4160
|
+
try {
|
|
4161
|
+
const meshDb = new Database(meshDbPath, { readonly: true });
|
|
4162
|
+
atoms = meshDb.prepare(`SELECT id, type, observation, confidence, fitness_score, trust_tier, evidence_count, created_at, updated_at
|
|
4163
|
+
FROM atoms ORDER BY fitness_score DESC, confidence DESC LIMIT ?`).all(limit);
|
|
4164
|
+
meshDb.close();
|
|
4165
|
+
}
|
|
4166
|
+
catch { /* mesh.db may not exist yet */ }
|
|
4167
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
4168
|
+
res.end(JSON.stringify({ atoms, total: atoms.length }));
|
|
4169
|
+
}
|
|
4170
|
+
catch {
|
|
4171
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
4172
|
+
res.end(JSON.stringify({ error: 'Procedural memory query failed' }));
|
|
4173
|
+
}
|
|
4174
|
+
return;
|
|
4175
|
+
}
|
|
3680
4176
|
// Extract auth context from incoming request
|
|
3681
4177
|
const ctx = extractRequestContext(req);
|
|
3682
4178
|
const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
|
|
@@ -3716,6 +4212,8 @@ async function startProxy(config = {}) {
|
|
|
3716
4212
|
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
3717
4213
|
return;
|
|
3718
4214
|
}
|
|
4215
|
+
// Extract session ID (X-Claude-Code-Session-Id or synthetic)
|
|
4216
|
+
const { sessionId: nativeSessionId, sessionSource: nativeSessionSource } = (0, session_tracker_js_1.getSessionId)(req, requestBody['model']);
|
|
3719
4217
|
// Extract agent fingerprint and explicit agent ID
|
|
3720
4218
|
const nativeSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(requestBody);
|
|
3721
4219
|
const nativeExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
|
|
@@ -3732,6 +4230,29 @@ async function startProxy(config = {}) {
|
|
|
3732
4230
|
const parsedModel = parseModelSuffix(requestedModel);
|
|
3733
4231
|
let routingSuffix = parsedModel.suffix;
|
|
3734
4232
|
requestedModel = parsedModel.baseModel;
|
|
4233
|
+
// ── CAP 3: Deterministic Traces — allocate traceId + emit request.start ──
|
|
4234
|
+
const nativeTraceId = (0, node_crypto_1.randomUUID)();
|
|
4235
|
+
{
|
|
4236
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4237
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4238
|
+
const sysHash = nativeSystemPrompt ? (0, trace_writer_js_1.sha256Hex)(nativeSystemPrompt) : undefined;
|
|
4239
|
+
const messages = requestBody['messages'];
|
|
4240
|
+
const reqTools = Array.isArray(requestBody['tools'])
|
|
4241
|
+
? requestBody['tools'].map(t => t?.name ?? '').filter(Boolean)
|
|
4242
|
+
: undefined;
|
|
4243
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4244
|
+
eventType: 'request.start',
|
|
4245
|
+
parentTraceId: getHeaderValue(req, 'x-parent-trace-id') || undefined,
|
|
4246
|
+
agentId: getHeaderValue(req, 'x-agent-id') || nativeExplicitAgentId || undefined,
|
|
4247
|
+
payload: {
|
|
4248
|
+
model: requestedModel,
|
|
4249
|
+
messageCount: messages?.length,
|
|
4250
|
+
requestedTools: reqTools,
|
|
4251
|
+
systemPromptHash: sysHash,
|
|
4252
|
+
},
|
|
4253
|
+
});
|
|
4254
|
+
}
|
|
4255
|
+
}
|
|
3735
4256
|
if (relayplaneEnabled && !relayplaneBypass && requestedModel) {
|
|
3736
4257
|
const override = proxyConfig.modelOverrides?.[requestedModel];
|
|
3737
4258
|
if (override) {
|
|
@@ -3990,6 +4511,109 @@ async function startProxy(config = {}) {
|
|
|
3990
4511
|
Object.assign(budgetExtraHeaders, budgetCheck.headers);
|
|
3991
4512
|
}
|
|
3992
4513
|
// ── End budget check ──
|
|
4514
|
+
// ── Session budget check (only when X-Claude-Code-Session-Id is present) ──
|
|
4515
|
+
let nativeSessionBudgetResult = null;
|
|
4516
|
+
if (nativeSessionSource === 'claude-code') {
|
|
4517
|
+
nativeSessionBudgetResult = budgetManager.checkSessionBudget(nativeSessionId, targetModel || requestedModel);
|
|
4518
|
+
if (!nativeSessionBudgetResult.allowed) {
|
|
4519
|
+
res.writeHead(429, { 'Content-Type': 'application/json' });
|
|
4520
|
+
res.end(JSON.stringify({
|
|
4521
|
+
error: 'session_budget_exceeded',
|
|
4522
|
+
spent: nativeSessionBudgetResult.spent,
|
|
4523
|
+
cap: nativeSessionBudgetResult.cap,
|
|
4524
|
+
type: 'session_budget_exceeded',
|
|
4525
|
+
}));
|
|
4526
|
+
return;
|
|
4527
|
+
}
|
|
4528
|
+
if (nativeSessionBudgetResult.model !== (targetModel || requestedModel)) {
|
|
4529
|
+
log(`Session budget downgrade: ${targetModel || requestedModel} → ${nativeSessionBudgetResult.model}`);
|
|
4530
|
+
// CAP 3: emit model.switch before we overwrite targetModel
|
|
4531
|
+
{
|
|
4532
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4533
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4534
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4535
|
+
eventType: 'model.switch',
|
|
4536
|
+
payload: {
|
|
4537
|
+
fromModel: targetModel || requestedModel,
|
|
4538
|
+
toModel: nativeSessionBudgetResult.model,
|
|
4539
|
+
switchReason: 'session_budget',
|
|
4540
|
+
},
|
|
4541
|
+
});
|
|
4542
|
+
}
|
|
4543
|
+
}
|
|
4544
|
+
targetModel = nativeSessionBudgetResult.model;
|
|
4545
|
+
if (requestBody)
|
|
4546
|
+
requestBody['model'] = targetModel;
|
|
4547
|
+
}
|
|
4548
|
+
// CAP 3: emit budget.checkpoint after every budget check
|
|
4549
|
+
{
|
|
4550
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4551
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4552
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4553
|
+
eventType: 'budget.checkpoint',
|
|
4554
|
+
payload: {
|
|
4555
|
+
sessionCostUsd: nativeSessionBudgetResult.spent,
|
|
4556
|
+
sessionCapUsd: nativeSessionBudgetResult.cap,
|
|
4557
|
+
sessionPct: nativeSessionBudgetResult.cap > 0
|
|
4558
|
+
? nativeSessionBudgetResult.spent / nativeSessionBudgetResult.cap
|
|
4559
|
+
: 0,
|
|
4560
|
+
},
|
|
4561
|
+
});
|
|
4562
|
+
}
|
|
4563
|
+
}
|
|
4564
|
+
}
|
|
4565
|
+
// ── End session budget check ──
|
|
4566
|
+
// ── Tool authorization check (deny-by-default, after budget gate) ──
|
|
4567
|
+
{
|
|
4568
|
+
const reqTools = Array.isArray(requestBody?.['tools'])
|
|
4569
|
+
? requestBody['tools'].map(t => t?.name ?? '').filter(Boolean)
|
|
4570
|
+
: [];
|
|
4571
|
+
if (reqTools.length > 0) {
|
|
4572
|
+
const tr = (0, tool_router_js_1.getToolRouter)();
|
|
4573
|
+
const toolCtx = (0, tool_router_js_1.extractToolContext)(req.headers, nativeSessionId, reqTools, tr);
|
|
4574
|
+
const authResult = tr.checkTools(toolCtx);
|
|
4575
|
+
if (authResult.denied.length > 0) {
|
|
4576
|
+
for (const toolName of authResult.denied) {
|
|
4577
|
+
tr.recordDenied(nativeSessionId, toolName, 'not_in_active_pack');
|
|
4578
|
+
}
|
|
4579
|
+
// Emit tool.denied trace event
|
|
4580
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4581
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4582
|
+
for (const toolName of authResult.denied) {
|
|
4583
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4584
|
+
eventType: 'tool.denied',
|
|
4585
|
+
payload: { toolName },
|
|
4586
|
+
});
|
|
4587
|
+
}
|
|
4588
|
+
}
|
|
4589
|
+
// If ALL requested tools are denied, block the request
|
|
4590
|
+
if (authResult.allowed.length === 0) {
|
|
4591
|
+
res.writeHead(403, {
|
|
4592
|
+
'Content-Type': 'application/json',
|
|
4593
|
+
'X-Relay-Tools-Denied': authResult.deniedHeader,
|
|
4594
|
+
});
|
|
4595
|
+
res.end(JSON.stringify({
|
|
4596
|
+
error: 'tool_not_authorized',
|
|
4597
|
+
denied: authResult.denied,
|
|
4598
|
+
message: 'All requested tools are denied by the active tool pack policy.',
|
|
4599
|
+
}));
|
|
4600
|
+
return;
|
|
4601
|
+
}
|
|
4602
|
+
}
|
|
4603
|
+
// Strip denied tools from the forwarded request body so the model
|
|
4604
|
+
// cannot call them even when only a partial set of tools was denied.
|
|
4605
|
+
if (Array.isArray(requestBody['tools'])) {
|
|
4606
|
+
const allowedSet = new Set(authResult.allowed);
|
|
4607
|
+
requestBody['tools'] = requestBody['tools'].filter(t => allowedSet.has(t?.name ?? ''));
|
|
4608
|
+
}
|
|
4609
|
+
// Set response header so callers know which tools were denied
|
|
4610
|
+
if (authResult.deniedHeader) {
|
|
4611
|
+
// Stash for later use when writing response headers
|
|
4612
|
+
budgetExtraHeaders['X-Relay-Tools-Denied'] = authResult.deniedHeader;
|
|
4613
|
+
}
|
|
4614
|
+
}
|
|
4615
|
+
}
|
|
4616
|
+
// ── End tool authorization check ──
|
|
3993
4617
|
// ── Rate limit check ──
|
|
3994
4618
|
const workspaceId = 'local'; // Local proxy uses single workspace
|
|
3995
4619
|
try {
|
|
@@ -4117,7 +4741,7 @@ async function startProxy(config = {}) {
|
|
|
4117
4741
|
// ── End cross-provider cascade ──
|
|
4118
4742
|
const durationMs = Date.now() - startTime;
|
|
4119
4743
|
const errMsg = extractProviderErrorMessage(errorPayload, providerResponse.status);
|
|
4120
|
-
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
4744
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, nativeAgentFingerprint, nativeExplicitAgentId, errMsg, providerResponse.status);
|
|
4121
4745
|
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
4122
4746
|
res.end(JSON.stringify(errorPayload));
|
|
4123
4747
|
return;
|
|
@@ -4132,6 +4756,8 @@ async function startProxy(config = {}) {
|
|
|
4132
4756
|
'Cache-Control': 'no-cache',
|
|
4133
4757
|
'Connection': 'keep-alive',
|
|
4134
4758
|
'X-RelayPlane-Cache': cacheBypass ? 'BYPASS' : 'MISS',
|
|
4759
|
+
'X-Relay-Trace-Id': nativeTraceId,
|
|
4760
|
+
'X-Relay-Memory-Hits': String((0, osmosis_store_js_1.countAtomsForSession)(nativeSessionId)),
|
|
4135
4761
|
...nativeStreamRpHeaders,
|
|
4136
4762
|
});
|
|
4137
4763
|
const reader = providerResponse.body?.getReader();
|
|
@@ -4226,7 +4852,7 @@ async function startProxy(config = {}) {
|
|
|
4226
4852
|
});
|
|
4227
4853
|
log(`Cache STORE for ${targetModel || requestedModel} (hash: ${cacheHash.slice(0, 8)})`);
|
|
4228
4854
|
}
|
|
4229
|
-
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': nativeCacheHeader, ...nativeRpHeaders });
|
|
4855
|
+
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': nativeCacheHeader, 'X-Relay-Trace-Id': nativeTraceId, 'X-Relay-Memory-Hits': String((0, osmosis_store_js_1.countAtomsForSession)(nativeSessionId)), ...nativeRpHeaders });
|
|
4230
4856
|
res.end(JSON.stringify(nativeResponseData));
|
|
4231
4857
|
}
|
|
4232
4858
|
}
|
|
@@ -4260,6 +4886,50 @@ async function startProxy(config = {}) {
|
|
|
4260
4886
|
if (nativeAgentFingerprint && nativeAgentFingerprint !== 'unknown') {
|
|
4261
4887
|
(0, agent_tracker_js_1.updateAgentCost)(nativeAgentFingerprint, nativeCostUsd);
|
|
4262
4888
|
}
|
|
4889
|
+
// ── Session Intelligence: upsert session record ──
|
|
4890
|
+
(0, session_tracker_js_1.upsertSession)(nativeSessionId, nativeSessionSource, nativeCostUsd, nativeTokIn, nativeTokOut);
|
|
4891
|
+
// ── Session 4: Episodic memory write (fire-and-forget) ──
|
|
4892
|
+
try {
|
|
4893
|
+
(0, episode_writer_js_1.writeEpisode)(nativeSessionId, {
|
|
4894
|
+
eventType: 'model-response',
|
|
4895
|
+
modelUsed: targetModel || requestedModel,
|
|
4896
|
+
tokensIn: nativeTokIn,
|
|
4897
|
+
tokensOut: nativeTokOut,
|
|
4898
|
+
costUsd: nativeCostUsd,
|
|
4899
|
+
outcome: 'success',
|
|
4900
|
+
traceId: nativeTraceId,
|
|
4901
|
+
durationMs,
|
|
4902
|
+
});
|
|
4903
|
+
}
|
|
4904
|
+
catch { /* never block hot path */ }
|
|
4905
|
+
// ── CAP 3: Deterministic Traces — emit request.end + finalize ──
|
|
4906
|
+
{
|
|
4907
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4908
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4909
|
+
const finishReason = nativeResponseData?.['stop_reason']
|
|
4910
|
+
?? (nativeResponseData?.['choices']?.[0]?.['finish_reason']);
|
|
4911
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4912
|
+
eventType: 'request.end',
|
|
4913
|
+
durationMs,
|
|
4914
|
+
payload: {
|
|
4915
|
+
modelUsed: targetModel || requestedModel,
|
|
4916
|
+
tokensIn: nativeTokIn,
|
|
4917
|
+
tokensOut: nativeTokOut,
|
|
4918
|
+
costUsd: nativeCostUsd,
|
|
4919
|
+
finishReason,
|
|
4920
|
+
},
|
|
4921
|
+
});
|
|
4922
|
+
void tw.finalizeTrace(nativeTraceId, nativeSessionId, {
|
|
4923
|
+
costUsd: nativeCostUsd,
|
|
4924
|
+
modelUsed: targetModel || requestedModel,
|
|
4925
|
+
durationMs,
|
|
4926
|
+
});
|
|
4927
|
+
}
|
|
4928
|
+
}
|
|
4929
|
+
// ── Session budget: record spend (fire-and-forget, only for claude-code sessions) ──
|
|
4930
|
+
if (nativeSessionSource === 'claude-code') {
|
|
4931
|
+
budgetManager.updateSessionBudget(nativeSessionId, nativeCostUsd, targetModel || requestedModel);
|
|
4932
|
+
}
|
|
4263
4933
|
// ── Post-request: budget spend + anomaly detection ──
|
|
4264
4934
|
postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
|
|
4265
4935
|
if (recordTelemetry) {
|
|
@@ -4290,7 +4960,20 @@ async function startProxy(config = {}) {
|
|
|
4290
4960
|
catchErrMsg = err instanceof Error ? err.message : String(err);
|
|
4291
4961
|
catchErrStatus = 500;
|
|
4292
4962
|
}
|
|
4293
|
-
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
4963
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, nativeAgentFingerprint, nativeExplicitAgentId, catchErrMsg, catchErrStatus);
|
|
4964
|
+
// ── CAP 3: Deterministic Traces — emit request.end (error) + finalize ──
|
|
4965
|
+
{
|
|
4966
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
4967
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
4968
|
+
void tw.write(nativeSessionId, nativeTraceId, {
|
|
4969
|
+
eventType: 'request.end',
|
|
4970
|
+
durationMs,
|
|
4971
|
+
error: { code: String(catchErrStatus), message: catchErrMsg, retryable: catchErrStatus >= 500 },
|
|
4972
|
+
payload: { modelUsed: targetModel || requestedModel },
|
|
4973
|
+
});
|
|
4974
|
+
void tw.finalizeTrace(nativeTraceId, nativeSessionId, { durationMs, modelUsed: targetModel || requestedModel });
|
|
4975
|
+
}
|
|
4976
|
+
}
|
|
4294
4977
|
if (recordTelemetry) {
|
|
4295
4978
|
sendCloudTelemetry(taskType, targetModel || requestedModel, 0, 0, durationMs, false, 0, originalModel ?? undefined);
|
|
4296
4979
|
meshCapture(targetModel || requestedModel, targetProvider, taskType, 0, 0, 0, durationMs, false, catchErrMsg);
|
|
@@ -4404,6 +5087,8 @@ async function startProxy(config = {}) {
|
|
|
4404
5087
|
return;
|
|
4405
5088
|
}
|
|
4406
5089
|
const isStreaming = request.stream === true;
|
|
5090
|
+
// Extract session ID for chat/completions
|
|
5091
|
+
const { sessionId: chatSessionId, sessionSource: chatSessionSource } = (0, session_tracker_js_1.getSessionId)(req, request.model);
|
|
4407
5092
|
// Extract agent fingerprint for chat/completions
|
|
4408
5093
|
const chatSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(request);
|
|
4409
5094
|
const chatExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
|
|
@@ -4412,6 +5097,24 @@ async function startProxy(config = {}) {
|
|
|
4412
5097
|
const agentResult = (0, agent_tracker_js_1.trackAgent)(chatSystemPrompt, 0, chatExplicitAgentId);
|
|
4413
5098
|
chatAgentFingerprint = agentResult.fingerprint;
|
|
4414
5099
|
}
|
|
5100
|
+
// ── CAP 3: Deterministic Traces — allocate chatTraceId + emit request.start ──
|
|
5101
|
+
const chatTraceId = (0, node_crypto_1.randomUUID)();
|
|
5102
|
+
{
|
|
5103
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
5104
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
5105
|
+
const chatSysHash = chatSystemPrompt ? (0, trace_writer_js_1.sha256Hex)(chatSystemPrompt) : undefined;
|
|
5106
|
+
void tw.write(chatSessionId, chatTraceId, {
|
|
5107
|
+
eventType: 'request.start',
|
|
5108
|
+
parentTraceId: getHeaderValue(req, 'x-parent-trace-id') || undefined,
|
|
5109
|
+
agentId: getHeaderValue(req, 'x-agent-id') || chatExplicitAgentId || undefined,
|
|
5110
|
+
payload: {
|
|
5111
|
+
model: request.model,
|
|
5112
|
+
messageCount: request.messages?.length,
|
|
5113
|
+
systemPromptHash: chatSysHash,
|
|
5114
|
+
},
|
|
5115
|
+
});
|
|
5116
|
+
}
|
|
5117
|
+
}
|
|
4415
5118
|
// ── Response Cache: check for cached response (chat/completions) ──
|
|
4416
5119
|
const chatCacheBypass = responseCache.shouldBypass(request);
|
|
4417
5120
|
let chatCacheHash;
|
|
@@ -4758,7 +5461,7 @@ async function startProxy(config = {}) {
|
|
|
4758
5461
|
const startTime = Date.now();
|
|
4759
5462
|
// Handle streaming vs non-streaming
|
|
4760
5463
|
if (isStreaming) {
|
|
4761
|
-
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass, chatAgentFingerprint, chatExplicitAgentId);
|
|
5464
|
+
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass, chatAgentFingerprint, chatExplicitAgentId, chatSessionId, chatSessionSource);
|
|
4762
5465
|
}
|
|
4763
5466
|
else {
|
|
4764
5467
|
if (useCascade && cascadeConfig) {
|
|
@@ -4801,6 +5504,43 @@ async function startProxy(config = {}) {
|
|
|
4801
5504
|
updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel, cascadeCacheCreation, cascadeCacheRead, chatAgentFingerprint, chatExplicitAgentId);
|
|
4802
5505
|
if (chatAgentFingerprint && chatAgentFingerprint !== 'unknown')
|
|
4803
5506
|
(0, agent_tracker_js_1.updateAgentCost)(chatAgentFingerprint, cascadeCost);
|
|
5507
|
+
(0, session_tracker_js_1.upsertSession)(chatSessionId, chatSessionSource, cascadeCost, cascadeTokensIn, cascadeTokensOut);
|
|
5508
|
+
// ── Session 4: Episodic memory write (fire-and-forget) ──
|
|
5509
|
+
try {
|
|
5510
|
+
(0, episode_writer_js_1.writeEpisode)(chatSessionId, {
|
|
5511
|
+
eventType: 'routing-decision',
|
|
5512
|
+
modelUsed: cascadeResult.model,
|
|
5513
|
+
tokensIn: cascadeTokensIn,
|
|
5514
|
+
tokensOut: cascadeTokensOut,
|
|
5515
|
+
costUsd: cascadeCost,
|
|
5516
|
+
outcome: 'success',
|
|
5517
|
+
outcomeDetail: 'cascade',
|
|
5518
|
+
traceId: chatTraceId,
|
|
5519
|
+
durationMs,
|
|
5520
|
+
});
|
|
5521
|
+
}
|
|
5522
|
+
catch { /* never block hot path */ }
|
|
5523
|
+
// ── CAP 3: Deterministic Traces — emit request.end + finalize (chat cascade) ──
|
|
5524
|
+
{
|
|
5525
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
5526
|
+
if (tw.isEnabled() && recordTelemetry) {
|
|
5527
|
+
void tw.write(chatSessionId, chatTraceId, {
|
|
5528
|
+
eventType: 'request.end',
|
|
5529
|
+
durationMs,
|
|
5530
|
+
payload: {
|
|
5531
|
+
modelUsed: cascadeResult.model,
|
|
5532
|
+
tokensIn: cascadeTokensIn,
|
|
5533
|
+
tokensOut: cascadeTokensOut,
|
|
5534
|
+
costUsd: cascadeCost,
|
|
5535
|
+
},
|
|
5536
|
+
});
|
|
5537
|
+
void tw.finalizeTrace(chatTraceId, chatSessionId, {
|
|
5538
|
+
costUsd: cascadeCost,
|
|
5539
|
+
modelUsed: cascadeResult.model,
|
|
5540
|
+
durationMs,
|
|
5541
|
+
});
|
|
5542
|
+
}
|
|
5543
|
+
}
|
|
4804
5544
|
if (recordTelemetry) {
|
|
4805
5545
|
try {
|
|
4806
5546
|
const runResult = await relay.run({
|
|
@@ -4828,7 +5568,7 @@ async function startProxy(config = {}) {
|
|
|
4828
5568
|
meshCapture(cascadeResult.model, cascadeResult.provider, taskType, cascadeTokensIn, cascadeTokensOut, cascadeCost, durationMs, true);
|
|
4829
5569
|
}
|
|
4830
5570
|
const chatCascadeRpHeaders = buildRelayPlaneResponseHeaders(cascadeResult.model, originalRequestedModel ?? 'unknown', complexity, cascadeResult.provider, 'cascade');
|
|
4831
|
-
res.writeHead(200, { 'Content-Type': 'application/json', ...chatCascadeRpHeaders });
|
|
5571
|
+
res.writeHead(200, { 'Content-Type': 'application/json', 'X-Relay-Trace-Id': chatTraceId, 'X-Relay-Memory-Hits': String((0, osmosis_store_js_1.countAtomsForSession)(chatSessionId)), ...chatCascadeRpHeaders });
|
|
4832
5572
|
res.end(JSON.stringify(responseData));
|
|
4833
5573
|
}
|
|
4834
5574
|
catch (err) {
|
|
@@ -4843,7 +5583,7 @@ async function startProxy(config = {}) {
|
|
|
4843
5583
|
cascadeErrMsg = err instanceof Error ? err.message : String(err);
|
|
4844
5584
|
cascadeErrStatus = 500;
|
|
4845
5585
|
}
|
|
4846
|
-
logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity,
|
|
5586
|
+
logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity, chatAgentFingerprint, chatExplicitAgentId, cascadeErrMsg, cascadeErrStatus);
|
|
4847
5587
|
if (recordTelemetry) {
|
|
4848
5588
|
sendCloudTelemetry(taskType, targetModel || 'unknown', 0, 0, durationMs, false, 0, originalRequestedModel ?? undefined);
|
|
4849
5589
|
meshCapture(targetModel || 'unknown', targetProvider, taskType, 0, 0, 0, durationMs, false, cascadeErrMsg);
|
|
@@ -4859,7 +5599,10 @@ async function startProxy(config = {}) {
|
|
|
4859
5599
|
}
|
|
4860
5600
|
}
|
|
4861
5601
|
else {
|
|
4862
|
-
|
|
5602
|
+
res.setHeader('X-Relay-Trace-Id', chatTraceId);
|
|
5603
|
+
res.setHeader('X-Relay-Memory-Hits', String((0, osmosis_store_js_1.countAtomsForSession)(chatSessionId)));
|
|
5604
|
+
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId, useAnthropicEnvKey, // pass for cross-provider cascade API key resolution (GH #38)
|
|
5605
|
+
chatSessionId, chatSessionSource, chatTraceId);
|
|
4863
5606
|
}
|
|
4864
5607
|
}
|
|
4865
5608
|
});
|
|
@@ -5024,7 +5767,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
|
|
|
5024
5767
|
}
|
|
5025
5768
|
return { responseData, ok: true, status: 200 };
|
|
5026
5769
|
}
|
|
5027
|
-
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass, agentFingerprint, agentId) {
|
|
5770
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass, agentFingerprint, agentId, sessionId, sessionSource) {
|
|
5028
5771
|
let providerResponse;
|
|
5029
5772
|
try {
|
|
5030
5773
|
switch (targetProvider) {
|
|
@@ -5055,7 +5798,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5055
5798
|
if (!ollamaStream.success || !ollamaStream.stream) {
|
|
5056
5799
|
const durationMs = Date.now() - startTime;
|
|
5057
5800
|
const errMsg = ollamaStream.error?.message ?? 'Ollama stream failed';
|
|
5058
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
5801
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, errMsg, ollamaStream.error?.status);
|
|
5059
5802
|
res.writeHead(ollamaStream.error?.status ?? 502, { 'Content-Type': 'application/json' });
|
|
5060
5803
|
res.end(JSON.stringify({ error: ollamaStream.error }));
|
|
5061
5804
|
return;
|
|
@@ -5091,7 +5834,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5091
5834
|
}
|
|
5092
5835
|
const durationMs = Date.now() - startTime;
|
|
5093
5836
|
const streamErrMsg = extractProviderErrorMessage(errorData, providerResponse.status);
|
|
5094
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
5837
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, streamErrMsg, providerResponse.status);
|
|
5095
5838
|
if (recordTelemetry) {
|
|
5096
5839
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5097
5840
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, streamErrMsg);
|
|
@@ -5107,7 +5850,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5107
5850
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
5108
5851
|
}
|
|
5109
5852
|
const durationMs = Date.now() - startTime;
|
|
5110
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
5853
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, errorMsg, 500);
|
|
5111
5854
|
if (recordTelemetry) {
|
|
5112
5855
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5113
5856
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, errorMsg);
|
|
@@ -5233,6 +5976,23 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5233
5976
|
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost, undefined, streamCacheCreation || undefined, streamCacheRead || undefined, agentFingerprint, agentId);
|
|
5234
5977
|
if (agentFingerprint && agentFingerprint !== 'unknown')
|
|
5235
5978
|
(0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, streamCost);
|
|
5979
|
+
if (sessionId && sessionSource)
|
|
5980
|
+
(0, session_tracker_js_1.upsertSession)(sessionId, sessionSource, streamCost, streamTokensIn, streamTokensOut);
|
|
5981
|
+
// ── Session 4: Episodic memory write (fire-and-forget) ──
|
|
5982
|
+
if (sessionId) {
|
|
5983
|
+
try {
|
|
5984
|
+
(0, episode_writer_js_1.writeEpisode)(sessionId, {
|
|
5985
|
+
eventType: 'model-response',
|
|
5986
|
+
modelUsed: targetModel,
|
|
5987
|
+
tokensIn: streamTokensIn,
|
|
5988
|
+
tokensOut: streamTokensOut,
|
|
5989
|
+
costUsd: streamCost,
|
|
5990
|
+
outcome: 'success',
|
|
5991
|
+
durationMs,
|
|
5992
|
+
});
|
|
5993
|
+
}
|
|
5994
|
+
catch { /* never block hot path */ }
|
|
5995
|
+
}
|
|
5236
5996
|
// ── Post-request: budget spend + anomaly detection ──
|
|
5237
5997
|
try {
|
|
5238
5998
|
(0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
|
|
@@ -5270,7 +6030,9 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
5270
6030
|
*/
|
|
5271
6031
|
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId,
|
|
5272
6032
|
/** Anthropic env API key — required for cross-provider cascade API key resolution (GH #38) */
|
|
5273
|
-
anthropicEnvKeyForCascade
|
|
6033
|
+
anthropicEnvKeyForCascade, sessionId, sessionSource,
|
|
6034
|
+
/** CAP 3: trace ID for deterministic trace write */
|
|
6035
|
+
traceId) {
|
|
5274
6036
|
let responseData;
|
|
5275
6037
|
try {
|
|
5276
6038
|
const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
|
|
@@ -5308,7 +6070,7 @@ anthropicEnvKeyForCascade) {
|
|
|
5308
6070
|
// All fallbacks exhausted — return the primary error
|
|
5309
6071
|
const durationMs = Date.now() - startTime;
|
|
5310
6072
|
const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
|
|
5311
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, `${routingMode}+cascade`, undefined, taskType, complexity,
|
|
6073
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, `${routingMode}+cascade`, undefined, taskType, complexity, agentFingerprint, agentId, nsErrMsg, result.status);
|
|
5312
6074
|
if (recordTelemetry) {
|
|
5313
6075
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5314
6076
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
|
|
@@ -5322,7 +6084,7 @@ anthropicEnvKeyForCascade) {
|
|
|
5322
6084
|
// No cascade — return error as-is
|
|
5323
6085
|
const durationMs = Date.now() - startTime;
|
|
5324
6086
|
const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
|
|
5325
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
6087
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, nsErrMsg, result.status);
|
|
5326
6088
|
if (recordTelemetry) {
|
|
5327
6089
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5328
6090
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
|
|
@@ -5340,7 +6102,7 @@ anthropicEnvKeyForCascade) {
|
|
|
5340
6102
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
5341
6103
|
}
|
|
5342
6104
|
const durationMs = Date.now() - startTime;
|
|
5343
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity,
|
|
6105
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, errorMsg, 500);
|
|
5344
6106
|
if (recordTelemetry) {
|
|
5345
6107
|
sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
|
|
5346
6108
|
meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, errorMsg);
|
|
@@ -5367,6 +6129,39 @@ anthropicEnvKeyForCascade) {
|
|
|
5367
6129
|
updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel, cacheCreationTokens || undefined, cacheReadTokens || undefined, agentFingerprint, agentId);
|
|
5368
6130
|
if (agentFingerprint && agentFingerprint !== 'unknown')
|
|
5369
6131
|
(0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, cost);
|
|
6132
|
+
if (sessionId && sessionSource)
|
|
6133
|
+
(0, session_tracker_js_1.upsertSession)(sessionId, sessionSource, cost, tokensIn, tokensOut);
|
|
6134
|
+
// ── Session 4: Episodic memory write (fire-and-forget) ──
|
|
6135
|
+
if (sessionId) {
|
|
6136
|
+
try {
|
|
6137
|
+
(0, episode_writer_js_1.writeEpisode)(sessionId, {
|
|
6138
|
+
eventType: 'model-response',
|
|
6139
|
+
modelUsed: targetModel,
|
|
6140
|
+
tokensIn,
|
|
6141
|
+
tokensOut,
|
|
6142
|
+
costUsd: cost,
|
|
6143
|
+
outcome: 'success',
|
|
6144
|
+
traceId: traceId ?? undefined,
|
|
6145
|
+
durationMs,
|
|
6146
|
+
});
|
|
6147
|
+
}
|
|
6148
|
+
catch { /* never block hot path */ }
|
|
6149
|
+
}
|
|
6150
|
+
// ── CAP 3: Deterministic Traces — emit request.end + finalize (chat non-streaming) ──
|
|
6151
|
+
if (traceId && sessionId && recordTelemetry) {
|
|
6152
|
+
const tw = trace_writer_js_1.TraceWriter.getInstance();
|
|
6153
|
+
if (tw.isEnabled()) {
|
|
6154
|
+
const finishReason = responseData?.['choices']
|
|
6155
|
+
? responseData['choices'][0]?.['finish_reason']
|
|
6156
|
+
: undefined;
|
|
6157
|
+
void tw.write(sessionId, traceId, {
|
|
6158
|
+
eventType: 'request.end',
|
|
6159
|
+
durationMs,
|
|
6160
|
+
payload: { modelUsed: targetModel, tokensIn, tokensOut, costUsd: cost, finishReason },
|
|
6161
|
+
});
|
|
6162
|
+
void tw.finalizeTrace(traceId, sessionId, { costUsd: cost, modelUsed: targetModel, durationMs });
|
|
6163
|
+
}
|
|
6164
|
+
}
|
|
5370
6165
|
// ── Post-request: budget spend + anomaly detection ──
|
|
5371
6166
|
try {
|
|
5372
6167
|
(0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);
|