@relayplane/proxy 1.8.37 → 1.8.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,7 +94,11 @@ const estimateRateMap = new Map();
94
94
  // Without this, IPs that make one request and disappear stay in the map forever.
95
95
  setInterval(() => (0, estimate_js_1.purgeExpiredRateLimitEntries)(estimateRateMap, Date.now()), 5 * 60 * 1000);
96
96
  const osmosis_store_js_1 = require("./osmosis-store.js");
97
+ const episode_writer_js_1 = require("./episode-writer.js");
97
98
  const session_tracker_js_1 = require("./session-tracker.js");
99
+ const trace_writer_js_1 = require("./trace-writer.js");
100
+ const tool_router_js_1 = require("./tool-router.js");
101
+ const node_crypto_1 = require("node:crypto");
98
102
  const PROXY_VERSION = (() => {
99
103
  try {
100
104
  const pkgPath = path.join(__dirname, '..', 'package.json');
@@ -939,6 +943,18 @@ let _activeOllamaConfig;
939
943
  function isContentLoggingEnabled() {
940
944
  return _activeProxyConfig.dashboard?.showRequestContent !== false;
941
945
  }
946
+ /**
947
+ * Whether procedural knowledge hints should be injected into system prompts.
948
+ * Default: false — no system prompt modification occurs.
949
+ * Config file: memory.proceduralInjectionEnabled
950
+ * Env override: RELAYPLANE_PROCEDURAL_INJECTION=true
951
+ */
952
+ function isProceduralInjectionEnabled() {
953
+ const envVal = process.env['RELAYPLANE_PROCEDURAL_INJECTION'];
954
+ if (envVal !== undefined)
955
+ return envVal === 'true';
956
+ return _activeProxyConfig.memory?.proceduralInjectionEnabled === true;
957
+ }
942
958
  function getProxyConfigPath() {
943
959
  const customPath = process.env['RELAYPLANE_CONFIG_PATH'];
944
960
  if (customPath && customPath.trim())
@@ -2533,7 +2549,7 @@ th{text-align:left;color:#64748b;font-weight:500;padding:8px 12px;border-bottom:
2533
2549
  td{padding:8px 12px;border-bottom:1px solid #111318}
2534
2550
  .section{margin-bottom:32px}.section h2{font-size:1rem;font-weight:600;margin-bottom:12px;color:#94a3b8}
2535
2551
  .dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.warn{background:#fbbf24}.dot.down{background:#ef4444}
2536
- .section.collapsible h2{cursor:pointer;user-select:none;display:flex;align-items:center;gap:8px}.section.collapsible h2::after{content:'▾';font-size:.8rem;color:#475569;transition:transform .2s}.section.collapsed h2::after{transform:rotate(-90deg)}.section.collapsed>*:not(h2){display:none}
2552
+ .section.collapsible h2{cursor:pointer;user-select:none;display:flex;align-items:center;gap:8px}.section.collapsible h2::after{content:'▾';font-size:.8rem;color:#475569;transition:transform .2s}.section.collapsed h2::after{transform:rotate(-90deg)}.section.collapsed>*:not(h2){display:none!important}
2537
2553
  .badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
2538
2554
  .badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}.badge.err-auth{background:#2d0a0a;color:#ef4444}.badge.err-rate{background:#2d2a0a;color:#fbbf24}.badge.err-timeout{background:#2d1a0a;color:#fb923c}
2539
2555
  .badge.tt-code{background:#1e3a5f;color:#60a5fa}.badge.tt-analysis{background:#3b1f6e;color:#a78bfa}.badge.tt-summarization{background:#1a3a2a;color:#6ee7b7}.badge.tt-qa{background:#3a2f1e;color:#fbbf24}.badge.tt-general{background:#1e293b;color:#94a3b8}
@@ -2558,10 +2574,10 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
2558
2574
  <div class="section collapsible collapsed"><h2>Agent Cost Breakdown</h2>
2559
2575
  <table><thead><tr><th>Agent</th><th>Requests</th><th>Total Cost</th><th>Last Active</th><th></th></tr></thead><tbody id="agents"></tbody></table></div>
2560
2576
  <div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
2561
- <div class="section collapsible collapsed"><h2>Learning</h2><div id="learning-panel" style="display:flex;flex-direction:column;gap:12px"><div id="learning-stats" style="display:flex;gap:12px;flex-wrap:wrap"></div><div id="learning-recent"></div><div style="margin-top:8px;padding:10px 14px;background:#0f1720;border:1px solid #1e3a5f;border-radius:8px;font-size:.8rem;color:#60a5fa">Network: Join the network to share with 1,000+ agent installations &rarr; <a href="https://relayplane.com/pricing" style="color:#34d399">Upgrade</a></div></div></div>
2577
+ <div class="section collapsible collapsed"><h2>Learning</h2><div id="learning-panel" style="display:flex;flex-direction:column;gap:12px"><div id="learning-stats" style="display:flex;gap:12px;flex-wrap:wrap"></div><div id="learning-recent"></div></div></div>
2562
2578
  <div class="section collapsible collapsed" id="sessions-section"><h2>Sessions <span id="sessionsLabel" style="font-size:.75rem;color:#64748b;font-weight:400">(last 7d)</span></h2>
2563
- <table><thead><tr><th>Session ID</th><th>Source</th><th>Started</th><th>Duration</th><th>Requests</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Status</th></tr></thead><tbody id="sessions"></tbody></table>
2564
- <div id="sessions-upsell" style="display:none;margin-top:12px;padding:10px 14px;background:#1a1020;border:1px solid #7c3aed55;border-radius:8px;font-size:.8rem;color:#a78bfa">3+ sessions totalling over $2 this week — <a href="https://relayplane.com/pricing" style="color:#34d399">Upgrade to Pro</a> for full session analytics &amp; budget controls.</div></div>
2579
+ <table><thead><tr><th>Session ID</th><th>Source</th><th>Started</th><th>Duration</th><th>Requests</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Models</th><th>Status</th></tr></thead><tbody id="sessions"></tbody></table>
2580
+ </div>
2565
2581
  <div class="section"><h2>Recent Runs <span id="historyLabel" style="font-size:.75rem;color:#64748b;font-weight:400">(7d window, history-capped)</span></h2>
2566
2582
  <table><thead><tr><th>Time</th><th>Agent</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
2567
2583
  <script>
@@ -2586,12 +2602,10 @@ async function loadSessions(){
2586
2602
  const badge=isActive?'<span class="badge ok" style="font-size:.7rem">LIVE</span>':'<span style="color:#64748b;font-size:.75rem">idle</span>';
2587
2603
  const srcBadge=s.session_source==='claude-code'?'<span style="color:#60a5fa;font-size:.75rem">claude-code</span>':'<span style="color:#94a3b8;font-size:.75rem">synthetic</span>';
2588
2604
  const sid=s.id.length>20?s.id.slice(0,20)+'…':s.id;
2589
- return '<tr><td style="font-family:monospace;font-size:.8rem" title="'+s.id+'">'+sid+'</td><td>'+srcBadge+'</td><td>'+fmtTime(new Date(s.started_at).toISOString())+'</td><td>'+dur+'</td><td>'+s.request_count+'</td><td>'+(s.total_tokens_in||0)+'</td><td>'+(s.total_tokens_out||0)+'</td><td>$'+fmt(s.total_cost_usd,4)+'</td><td>'+badge+'</td></tr>';
2590
- }).join(''):'<tr><td colspan=9 style="color:#64748b">No sessions recorded yet</td></tr>';
2591
- // Pro upsell: 3+ sessions with total cost > $2 this week
2605
+ const mix=s.model_mix&&Object.keys(s.model_mix).length?Object.entries(s.model_mix).map(([m,c])=>{const short=m.replace('claude-','').replace(/-\d{8}$/,'').replace('sonnet','Sonnet').replace('opus','Opus').replace('haiku','Haiku');return '<span style="font-size:.72rem;color:#94a3b8">'+short+'<span style="color:#475569">×</span>'+c+'</span>';}).join(' '):'<span style="color:#475569;font-size:.72rem">—</span>';
2606
+ return '<tr><td style="font-family:monospace;font-size:.8rem" title="'+esc(s.id)+'">'+sid+'</td><td>'+srcBadge+'</td><td>'+fmtTime(new Date(s.started_at).toISOString())+'</td><td>'+dur+'</td><td>'+s.request_count+'</td><td>'+(s.total_tokens_in||0)+'</td><td>'+(s.total_tokens_out||0)+'</td><td>$'+fmt(s.total_cost_usd,4)+'</td><td>'+mix+'</td><td>'+badge+'</td></tr>';
2607
+ }).join(''):'<tr><td colspan=10 style="color:#64748b">No sessions recorded yet</td></tr>';
2592
2608
  const totalCost=sessions.reduce((s,r)=>s+(r.total_cost_usd||0),0);
2593
- const upsell=$('sessions-upsell');
2594
- if(upsell) upsell.style.display=(sessions.length>=3&&totalCost>2)?'block':'none';
2595
2609
  }catch(e){console.error('sessions load error',e)}
2596
2610
  }
2597
2611
  async function load(){
@@ -2901,12 +2915,20 @@ async function startProxy(config = {}) {
2901
2915
  (0, agent_tracker_js_1.flushAgentRegistry)();
2902
2916
  meshHandle.stop();
2903
2917
  shutdownHistory();
2918
+ trace_writer_js_1.TraceWriter.getInstance().shutdown();
2904
2919
  process.exit(0);
2905
2920
  };
2906
2921
  process.on('SIGINT', handleShutdown);
2907
2922
  process.on('SIGTERM', handleShutdown);
2908
2923
  const configPath = getProxyConfigPath();
2909
2924
  let proxyConfig = await loadProxyConfig(configPath, log);
2925
+ // ── Deterministic Traces: initialise TraceWriter with loaded config ──
2926
+ trace_writer_js_1.TraceWriter.getInstance({
2927
+ ...((0, trace_writer_js_1.defaultTracesConfig)()),
2928
+ ...(proxyConfig.traces ?? {}),
2929
+ });
2930
+ // ── Tool Router: initialise deny-by-default tool authorization ──
2931
+ (0, tool_router_js_1.getToolRouter)();
2910
2932
  // Auto-config on startup: detect available auth and set optimal routing.
2911
2933
  //
2912
2934
  // Bug fix: `configExists` was always true because loadConfig() (CLI init) creates
@@ -3271,6 +3293,12 @@ async function startProxy(config = {}) {
3271
3293
  }
3272
3294
  // === Control endpoints ===
3273
3295
  if (pathname.startsWith('/control/')) {
3296
+ const remoteAddr = req.socket.remoteAddress;
3297
+ if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
3298
+ res.writeHead(403, { 'Content-Type': 'application/json' });
3299
+ res.end(JSON.stringify({ error: 'Control endpoints are localhost-only' }));
3300
+ return;
3301
+ }
3274
3302
  if (req.method === 'POST' && pathname === '/control/enable') {
3275
3303
  proxyConfig = normalizeProxyConfig({ ...proxyConfig, enabled: true });
3276
3304
  await saveProxyConfig(configPath, proxyConfig);
@@ -3424,6 +3452,89 @@ async function startProxy(config = {}) {
3424
3452
  res.end(JSON.stringify({ ok: true, message: 'Daily spend counter reset' }));
3425
3453
  return;
3426
3454
  }
3455
+ // === Session Budget endpoints ===
3456
+ if (req.method === 'GET' && pathname === '/control/session-budget') {
3457
+ const sbQs = url.includes('?') ? url.split('?')[1] ?? '' : '';
3458
+ const sbParams = new URLSearchParams(sbQs);
3459
+ const sessionId = sbParams.get('sessionId');
3460
+ if (!sessionId) {
3461
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3462
+ res.end(JSON.stringify({ error: 'sessionId query parameter required' }));
3463
+ return;
3464
+ }
3465
+ const record = budgetManager.getSessionBudget(sessionId);
3466
+ if (!record) {
3467
+ const cap = budgetManager.getConfig().sessionCapUsd;
3468
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3469
+ res.end(JSON.stringify({
3470
+ sessionId,
3471
+ capUsd: cap,
3472
+ spentUsd: 0,
3473
+ remainingUsd: cap,
3474
+ pctUsed: 0,
3475
+ modelUsed: '',
3476
+ status: 'ok',
3477
+ }));
3478
+ return;
3479
+ }
3480
+ const remaining = Math.max(0, record.capUsd - record.spentUsd);
3481
+ const pctUsed = record.capUsd > 0 ? (record.spentUsd / record.capUsd) * 100 : 0;
3482
+ const status = pctUsed >= 100 ? 'exceeded' : pctUsed >= 80 ? 'warning' : 'ok';
3483
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3484
+ res.end(JSON.stringify({
3485
+ sessionId: record.sessionId,
3486
+ capUsd: record.capUsd,
3487
+ spentUsd: record.spentUsd,
3488
+ remainingUsd: remaining,
3489
+ pctUsed: Math.round(pctUsed * 10) / 10,
3490
+ modelUsed: record.modelUsed,
3491
+ createdAt: new Date(record.createdAt).toISOString(),
3492
+ updatedAt: new Date(record.updatedAt).toISOString(),
3493
+ status,
3494
+ }));
3495
+ return;
3496
+ }
3497
+ if (req.method === 'POST' && pathname === '/control/session-budget/set') {
3498
+ try {
3499
+ const body = await readJsonBody(req);
3500
+ if (!body.sessionId || typeof body.capUsd !== 'number' || !isFinite(body.capUsd) || body.capUsd <= 0) {
3501
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3502
+ res.end(JSON.stringify({ error: 'sessionId (string) and capUsd (positive number) required' }));
3503
+ return;
3504
+ }
3505
+ budgetManager.setSessionCap(body.sessionId, body.capUsd);
3506
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3507
+ res.end(JSON.stringify({ ok: true, sessionId: body.sessionId, capUsd: body.capUsd }));
3508
+ }
3509
+ catch {
3510
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3511
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
3512
+ }
3513
+ return;
3514
+ }
3515
+ if (req.method === 'GET' && pathname === '/control/session-budgets') {
3516
+ const slQs = url.includes('?') ? url.split('?')[1] ?? '' : '';
3517
+ const slParams = new URLSearchParams(slQs);
3518
+ const limitParam = slParams.get('limit');
3519
+ const limit = limitParam ? Math.min(50, Math.max(1, parseInt(limitParam, 10) || 50)) : 50;
3520
+ const records = budgetManager.listSessionBudgets(limit);
3521
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3522
+ res.end(JSON.stringify({
3523
+ sessions: records.map(r => ({
3524
+ sessionId: r.sessionId,
3525
+ capUsd: r.capUsd,
3526
+ spentUsd: r.spentUsd,
3527
+ remainingUsd: Math.max(0, r.capUsd - r.spentUsd),
3528
+ pctUsed: r.capUsd > 0 ? Math.round((r.spentUsd / r.capUsd) * 1000) / 10 : 0,
3529
+ modelUsed: r.modelUsed,
3530
+ createdAt: new Date(r.createdAt).toISOString(),
3531
+ updatedAt: new Date(r.updatedAt).toISOString(),
3532
+ status: r.spentUsd >= r.capUsd ? 'exceeded' : (r.spentUsd / r.capUsd) >= 0.8 ? 'warning' : 'ok',
3533
+ })),
3534
+ count: records.length,
3535
+ }));
3536
+ return;
3537
+ }
3427
3538
  if (req.method === 'POST' && pathname === '/control/model') {
3428
3539
  try {
3429
3540
  const body = await readJsonBody(req);
@@ -3432,6 +3543,11 @@ async function startProxy(config = {}) {
3432
3543
  res.end(JSON.stringify({ error: 'model required' }));
3433
3544
  return;
3434
3545
  }
3546
+ if (body.model.length > 128 || !/^[a-zA-Z0-9._:/-]+$/.test(body.model)) {
3547
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3548
+ res.end(JSON.stringify({ error: 'model must be ≤128 characters and contain only [a-zA-Z0-9._:/-]' }));
3549
+ return;
3550
+ }
3435
3551
  const previousRouting = proxyConfig.routing;
3436
3552
  const target = body.model;
3437
3553
  // Update all complexity tiers to the requested model
@@ -3836,18 +3952,48 @@ async function startProxy(config = {}) {
3836
3952
  return;
3837
3953
  }
3838
3954
  // === Session Intelligence endpoints ===
3955
+ if (req.method === 'GET' && (pathname === '/v1/sessions' || pathname === '/v1/sessions/active')) {
3956
+ const remoteAddr = req.socket.remoteAddress;
3957
+ if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
3958
+ res.writeHead(403, { 'Content-Type': 'application/json' });
3959
+ res.end(JSON.stringify({ error: 'Session endpoints are localhost-only' }));
3960
+ return;
3961
+ }
3962
+ }
3839
3963
  if (req.method === 'GET' && pathname === '/v1/sessions') {
3840
3964
  const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
3841
3965
  const params = new URLSearchParams(queryString);
3842
- const limit = Math.min(parseInt(params.get('limit') || '20', 10), 100);
3843
- const days = parseInt(params.get('days') || '7', 10);
3966
+ const rawLimit = parseInt(params.get('limit') || '20', 10);
3967
+ const rawDays = parseInt(params.get('days') || '7', 10);
3968
+ const limit = Math.min(Number.isFinite(rawLimit) && rawLimit > 0 ? rawLimit : 20, 100);
3969
+ const days = Number.isFinite(rawDays) && rawDays > 0 ? rawDays : 7;
3844
3970
  const sessions = (0, session_tracker_js_1.getSessions)({ limit, days });
3845
3971
  const now = Date.now();
3846
3972
  const activeCutoff = now - 5 * 60 * 1000;
3973
+ // Build model_mix per session from episodic_events in osmosis.db
3974
+ const modelMixMap = new Map();
3975
+ try {
3976
+ const osmDb = (0, osmosis_store_js_1.getOsmosisDb)();
3977
+ if (osmDb && sessions.length > 0) {
3978
+ const placeholders = sessions.map(() => '?').join(',');
3979
+ const sessionIds = sessions.map(s => s.id);
3980
+ const rows = osmDb.prepare(`SELECT session_id, model_used, COUNT(*) as cnt
3981
+ FROM episodic_events
3982
+ WHERE session_id IN (${placeholders})
3983
+ GROUP BY session_id, model_used`).all(...sessionIds);
3984
+ for (const row of rows) {
3985
+ if (!modelMixMap.has(row.session_id))
3986
+ modelMixMap.set(row.session_id, {});
3987
+ modelMixMap.get(row.session_id)[row.model_used] = row.cnt;
3988
+ }
3989
+ }
3990
+ }
3991
+ catch { /* best-effort */ }
3847
3992
  const result = sessions.map(s => ({
3848
3993
  ...s,
3849
3994
  active: s.last_seen_at >= activeCutoff,
3850
3995
  duration_ms: s.last_seen_at - s.started_at,
3996
+ model_mix: modelMixMap.get(s.id) ?? {},
3851
3997
  }));
3852
3998
  res.writeHead(200, { 'Content-Type': 'application/json' });
3853
3999
  res.end(JSON.stringify({ sessions: result, total: result.length }));
@@ -3865,6 +4011,168 @@ async function startProxy(config = {}) {
3865
4011
  res.end(JSON.stringify({ sessions: result, total: result.length }));
3866
4012
  return;
3867
4013
  }
4014
+ // === Trace endpoints (CAP 3) ===
4015
+ if ((req.method === 'GET' || req.method === 'POST') && (pathname === '/v1/traces' || (pathname ?? '').startsWith('/v1/traces/'))) {
4016
+ const remoteAddr = req.socket.remoteAddress;
4017
+ if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
4018
+ res.writeHead(403, { 'Content-Type': 'application/json' });
4019
+ res.end(JSON.stringify({ error: 'Trace endpoints are localhost-only' }));
4020
+ return;
4021
+ }
4022
+ }
4023
+ if (req.method === 'GET' && pathname === '/v1/traces') {
4024
+ const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
4025
+ const params = new URLSearchParams(queryString);
4026
+ const limit = Math.min(parseInt(params.get('limit') ?? '20', 10) || 20, 100);
4027
+ const traceWriter = trace_writer_js_1.TraceWriter.getInstance();
4028
+ const traces = traceWriter.getRecentTraces(limit);
4029
+ res.writeHead(200, { 'Content-Type': 'application/json' });
4030
+ res.end(JSON.stringify({ traces, total: traces.length }));
4031
+ return;
4032
+ }
4033
+ if (req.method === 'GET' && (pathname ?? '').match(/^\/v1\/traces\/([^/]+)\/graph$/)) {
4034
+ const sessionId = (pathname ?? '').split('/')[3] ?? '';
4035
+ const traceWriter = trace_writer_js_1.TraceWriter.getInstance();
4036
+ const graph = traceWriter.getSessionGraph(sessionId);
4037
+ if (!graph) {
4038
+ res.writeHead(404, { 'Content-Type': 'application/json' });
4039
+ res.end(JSON.stringify({ error: `No graph found for session: ${sessionId}` }));
4040
+ return;
4041
+ }
4042
+ res.writeHead(200, { 'Content-Type': 'application/json' });
4043
+ res.end(JSON.stringify(graph));
4044
+ return;
4045
+ }
4046
+ if (req.method === 'GET' && (pathname ?? '').match(/^\/v1\/traces\/([^/]+)\/events$/)) {
4047
+ const traceId = (pathname ?? '').split('/')[3] ?? '';
4048
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
4049
+ const events = tw.getTraceEvents(traceId);
4050
+ res.writeHead(200, { 'Content-Type': 'application/json' });
4051
+ res.end(JSON.stringify({ traceId, events, total: events.length }));
4052
+ return;
4053
+ }
4054
+ if (req.method === 'POST' && pathname === '/v1/traces/export') {
4055
+ try {
4056
+ const exportOptions = await readJsonBody(req);
4057
+ const format = exportOptions.format ?? 'jsonl';
4058
+ if (!['jsonl', 'csv', 'markdown', 'traceops'].includes(format)) {
4059
+ res.writeHead(400, { 'Content-Type': 'application/json' });
4060
+ res.end(JSON.stringify({ error: 'format must be one of: jsonl, csv, markdown, traceops' }));
4061
+ return;
4062
+ }
4063
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
4064
+ const exported = await tw.export({
4065
+ format: format,
4066
+ sessionIds: exportOptions.sessionIds,
4067
+ fromTimestamp: exportOptions.fromTimestamp,
4068
+ toTimestamp: exportOptions.toTimestamp,
4069
+ includeToolInputs: exportOptions.includeToolInputs,
4070
+ });
4071
+ const contentType = format === 'markdown' ? 'text/markdown' : 'application/x-ndjson';
4072
+ res.writeHead(200, { 'Content-Type': contentType });
4073
+ res.end(exported);
4074
+ }
4075
+ catch {
4076
+ res.writeHead(400, { 'Content-Type': 'application/json' });
4077
+ res.end(JSON.stringify({ error: 'Invalid JSON body' }));
4078
+ }
4079
+ return;
4080
+ }
4081
+ // === Memory endpoints (Session 4 — localhost-only) ===
4082
+ if ((pathname ?? '').startsWith('/v1/memory')) {
4083
+ const remoteAddr = req.socket.remoteAddress;
4084
+ if (remoteAddr !== '127.0.0.1' && remoteAddr !== '::1' && remoteAddr !== '::ffff:127.0.0.1') {
4085
+ res.writeHead(403, { 'Content-Type': 'application/json' });
4086
+ res.end(JSON.stringify({ error: 'Memory endpoints are localhost-only' }));
4087
+ return;
4088
+ }
4089
+ }
4090
+ if (req.method === 'GET' && pathname === '/v1/memory/semantic') {
4091
+ const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
4092
+ const params = new URLSearchParams(queryString);
4093
+ const sessionId = params.get('session_id') ?? undefined;
4094
+ const limit = Math.min(parseInt(params.get('limit') ?? '20', 10) || 20, 100);
4095
+ try {
4096
+ const db = (0, osmosis_store_js_1.getOsmosisDb)();
4097
+ if (!db) {
4098
+ res.writeHead(200, { 'Content-Type': 'application/json' });
4099
+ res.end(JSON.stringify({ atoms: [], total: 0 }));
4100
+ return;
4101
+ }
4102
+ let atoms;
4103
+ if (sessionId) {
4104
+ atoms = db.prepare(`SELECT id, type, model, task_type, latency_ms, input_tokens, output_tokens, confidence, observation_count, timestamp, session_id
4105
+ FROM knowledge_atoms WHERE session_id = ? ORDER BY confidence DESC, timestamp DESC LIMIT ?`).all(sessionId, limit);
4106
+ }
4107
+ else {
4108
+ atoms = db.prepare(`SELECT id, type, model, task_type, latency_ms, input_tokens, output_tokens, confidence, observation_count, timestamp, session_id
4109
+ FROM knowledge_atoms ORDER BY confidence DESC, timestamp DESC LIMIT ?`).all(limit);
4110
+ }
4111
+ res.writeHead(200, { 'Content-Type': 'application/json' });
4112
+ res.end(JSON.stringify({ atoms, total: atoms.length }));
4113
+ }
4114
+ catch {
4115
+ res.writeHead(500, { 'Content-Type': 'application/json' });
4116
+ res.end(JSON.stringify({ error: 'Memory query failed' }));
4117
+ }
4118
+ return;
4119
+ }
4120
+ if (req.method === 'GET' && pathname === '/v1/memory/episodic') {
4121
+ const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
4122
+ const params = new URLSearchParams(queryString);
4123
+ const sessionId = params.get('session_id') ?? undefined;
4124
+ const limit = Math.min(parseInt(params.get('limit') ?? '50', 10) || 50, 200);
4125
+ try {
4126
+ const db = (0, osmosis_store_js_1.getOsmosisDb)();
4127
+ if (!db) {
4128
+ res.writeHead(200, { 'Content-Type': 'application/json' });
4129
+ res.end(JSON.stringify({ events: [], total: 0 }));
4130
+ return;
4131
+ }
4132
+ let events;
4133
+ if (sessionId) {
4134
+ events = db.prepare(`SELECT id, session_id, event_type, timestamp, duration_ms, model_used, tokens_in, tokens_out, cost_usd, outcome, outcome_detail, trace_id
4135
+ FROM episodic_events WHERE session_id = ? ORDER BY timestamp DESC LIMIT ?`).all(sessionId, limit);
4136
+ }
4137
+ else {
4138
+ events = db.prepare(`SELECT id, session_id, event_type, timestamp, duration_ms, model_used, tokens_in, tokens_out, cost_usd, outcome, outcome_detail, trace_id
4139
+ FROM episodic_events ORDER BY timestamp DESC LIMIT ?`).all(limit);
4140
+ }
4141
+ res.writeHead(200, { 'Content-Type': 'application/json' });
4142
+ res.end(JSON.stringify({ events, total: events.length }));
4143
+ }
4144
+ catch {
4145
+ res.writeHead(500, { 'Content-Type': 'application/json' });
4146
+ res.end(JSON.stringify({ error: 'Memory query failed' }));
4147
+ }
4148
+ return;
4149
+ }
4150
+ if (req.method === 'GET' && pathname === '/v1/memory/procedural') {
4151
+ const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
4152
+ const params = new URLSearchParams(queryString);
4153
+ const limit = Math.min(parseInt(params.get('limit') ?? '10', 10) || 10, 50);
4154
+ try {
4155
+ // Read from mesh.db (procedural atom store)
4156
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
4157
+ const Database = require('better-sqlite3');
4158
+ const meshDbPath = require('node:path').join((0, osmosis_store_js_1.getRelayplaneDir)(), 'mesh.db');
4159
+ let atoms = [];
4160
+ try {
4161
+ const meshDb = new Database(meshDbPath, { readonly: true });
4162
+ atoms = meshDb.prepare(`SELECT id, type, observation, confidence, fitness_score, trust_tier, evidence_count, created_at, updated_at
4163
+ FROM atoms ORDER BY fitness_score DESC, confidence DESC LIMIT ?`).all(limit);
4164
+ meshDb.close();
4165
+ }
4166
+ catch { /* mesh.db may not exist yet */ }
4167
+ res.writeHead(200, { 'Content-Type': 'application/json' });
4168
+ res.end(JSON.stringify({ atoms, total: atoms.length }));
4169
+ }
4170
+ catch {
4171
+ res.writeHead(500, { 'Content-Type': 'application/json' });
4172
+ res.end(JSON.stringify({ error: 'Procedural memory query failed' }));
4173
+ }
4174
+ return;
4175
+ }
3868
4176
  // Extract auth context from incoming request
3869
4177
  const ctx = extractRequestContext(req);
3870
4178
  const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
@@ -3922,6 +4230,29 @@ async function startProxy(config = {}) {
3922
4230
  const parsedModel = parseModelSuffix(requestedModel);
3923
4231
  let routingSuffix = parsedModel.suffix;
3924
4232
  requestedModel = parsedModel.baseModel;
4233
+ // ── CAP 3: Deterministic Traces — allocate traceId + emit request.start ──
4234
+ const nativeTraceId = (0, node_crypto_1.randomUUID)();
4235
+ {
4236
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
4237
+ if (tw.isEnabled() && recordTelemetry) {
4238
+ const sysHash = nativeSystemPrompt ? (0, trace_writer_js_1.sha256Hex)(nativeSystemPrompt) : undefined;
4239
+ const messages = requestBody['messages'];
4240
+ const reqTools = Array.isArray(requestBody['tools'])
4241
+ ? requestBody['tools'].map(t => t?.name ?? '').filter(Boolean)
4242
+ : undefined;
4243
+ void tw.write(nativeSessionId, nativeTraceId, {
4244
+ eventType: 'request.start',
4245
+ parentTraceId: getHeaderValue(req, 'x-parent-trace-id') || undefined,
4246
+ agentId: getHeaderValue(req, 'x-agent-id') || nativeExplicitAgentId || undefined,
4247
+ payload: {
4248
+ model: requestedModel,
4249
+ messageCount: messages?.length,
4250
+ requestedTools: reqTools,
4251
+ systemPromptHash: sysHash,
4252
+ },
4253
+ });
4254
+ }
4255
+ }
3925
4256
  if (relayplaneEnabled && !relayplaneBypass && requestedModel) {
3926
4257
  const override = proxyConfig.modelOverrides?.[requestedModel];
3927
4258
  if (override) {
@@ -4180,6 +4511,109 @@ async function startProxy(config = {}) {
4180
4511
  Object.assign(budgetExtraHeaders, budgetCheck.headers);
4181
4512
  }
4182
4513
  // ── End budget check ──
4514
+ // ── Session budget check (only when X-Claude-Code-Session-Id is present) ──
4515
+ let nativeSessionBudgetResult = null;
4516
+ if (nativeSessionSource === 'claude-code') {
4517
+ nativeSessionBudgetResult = budgetManager.checkSessionBudget(nativeSessionId, targetModel || requestedModel);
4518
+ if (!nativeSessionBudgetResult.allowed) {
4519
+ res.writeHead(429, { 'Content-Type': 'application/json' });
4520
+ res.end(JSON.stringify({
4521
+ error: 'session_budget_exceeded',
4522
+ spent: nativeSessionBudgetResult.spent,
4523
+ cap: nativeSessionBudgetResult.cap,
4524
+ type: 'session_budget_exceeded',
4525
+ }));
4526
+ return;
4527
+ }
4528
+ if (nativeSessionBudgetResult.model !== (targetModel || requestedModel)) {
4529
+ log(`Session budget downgrade: ${targetModel || requestedModel} → ${nativeSessionBudgetResult.model}`);
4530
+ // CAP 3: emit model.switch before we overwrite targetModel
4531
+ {
4532
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
4533
+ if (tw.isEnabled() && recordTelemetry) {
4534
+ void tw.write(nativeSessionId, nativeTraceId, {
4535
+ eventType: 'model.switch',
4536
+ payload: {
4537
+ fromModel: targetModel || requestedModel,
4538
+ toModel: nativeSessionBudgetResult.model,
4539
+ switchReason: 'session_budget',
4540
+ },
4541
+ });
4542
+ }
4543
+ }
4544
+ targetModel = nativeSessionBudgetResult.model;
4545
+ if (requestBody)
4546
+ requestBody['model'] = targetModel;
4547
+ }
4548
+ // CAP 3: emit budget.checkpoint after every budget check
4549
+ {
4550
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
4551
+ if (tw.isEnabled() && recordTelemetry) {
4552
+ void tw.write(nativeSessionId, nativeTraceId, {
4553
+ eventType: 'budget.checkpoint',
4554
+ payload: {
4555
+ sessionCostUsd: nativeSessionBudgetResult.spent,
4556
+ sessionCapUsd: nativeSessionBudgetResult.cap,
4557
+ sessionPct: nativeSessionBudgetResult.cap > 0
4558
+ ? nativeSessionBudgetResult.spent / nativeSessionBudgetResult.cap
4559
+ : 0,
4560
+ },
4561
+ });
4562
+ }
4563
+ }
4564
+ }
4565
+ // ── End session budget check ──
4566
+ // ── Tool authorization check (deny-by-default, after budget gate) ──
4567
+ {
4568
+ const reqTools = Array.isArray(requestBody?.['tools'])
4569
+ ? requestBody['tools'].map(t => t?.name ?? '').filter(Boolean)
4570
+ : [];
4571
+ if (reqTools.length > 0) {
4572
+ const tr = (0, tool_router_js_1.getToolRouter)();
4573
+ const toolCtx = (0, tool_router_js_1.extractToolContext)(req.headers, nativeSessionId, reqTools, tr);
4574
+ const authResult = tr.checkTools(toolCtx);
4575
+ if (authResult.denied.length > 0) {
4576
+ for (const toolName of authResult.denied) {
4577
+ tr.recordDenied(nativeSessionId, toolName, 'not_in_active_pack');
4578
+ }
4579
+ // Emit tool.denied trace event
4580
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
4581
+ if (tw.isEnabled() && recordTelemetry) {
4582
+ for (const toolName of authResult.denied) {
4583
+ void tw.write(nativeSessionId, nativeTraceId, {
4584
+ eventType: 'tool.denied',
4585
+ payload: { toolName },
4586
+ });
4587
+ }
4588
+ }
4589
+ // If ALL requested tools are denied, block the request
4590
+ if (authResult.allowed.length === 0) {
4591
+ res.writeHead(403, {
4592
+ 'Content-Type': 'application/json',
4593
+ 'X-Relay-Tools-Denied': authResult.deniedHeader,
4594
+ });
4595
+ res.end(JSON.stringify({
4596
+ error: 'tool_not_authorized',
4597
+ denied: authResult.denied,
4598
+ message: 'All requested tools are denied by the active tool pack policy.',
4599
+ }));
4600
+ return;
4601
+ }
4602
+ }
4603
+ // Strip denied tools from the forwarded request body so the model
4604
+ // cannot call them even when only a partial set of tools was denied.
4605
+ if (Array.isArray(requestBody['tools'])) {
4606
+ const allowedSet = new Set(authResult.allowed);
4607
+ requestBody['tools'] = requestBody['tools'].filter(t => allowedSet.has(t?.name ?? ''));
4608
+ }
4609
+ // Set response header so callers know which tools were denied
4610
+ if (authResult.deniedHeader) {
4611
+ // Stash for later use when writing response headers
4612
+ budgetExtraHeaders['X-Relay-Tools-Denied'] = authResult.deniedHeader;
4613
+ }
4614
+ }
4615
+ }
4616
+ // ── End tool authorization check ──
4183
4617
  // ── Rate limit check ──
4184
4618
  const workspaceId = 'local'; // Local proxy uses single workspace
4185
4619
  try {
@@ -4307,7 +4741,7 @@ async function startProxy(config = {}) {
4307
4741
  // ── End cross-provider cascade ──
4308
4742
  const durationMs = Date.now() - startTime;
4309
4743
  const errMsg = extractProviderErrorMessage(errorPayload, providerResponse.status);
4310
- logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errMsg, providerResponse.status);
4744
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, nativeAgentFingerprint, nativeExplicitAgentId, errMsg, providerResponse.status);
4311
4745
  res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
4312
4746
  res.end(JSON.stringify(errorPayload));
4313
4747
  return;
@@ -4322,6 +4756,8 @@ async function startProxy(config = {}) {
4322
4756
  'Cache-Control': 'no-cache',
4323
4757
  'Connection': 'keep-alive',
4324
4758
  'X-RelayPlane-Cache': cacheBypass ? 'BYPASS' : 'MISS',
4759
+ 'X-Relay-Trace-Id': nativeTraceId,
4760
+ 'X-Relay-Memory-Hits': String((0, osmosis_store_js_1.countAtomsForSession)(nativeSessionId)),
4325
4761
  ...nativeStreamRpHeaders,
4326
4762
  });
4327
4763
  const reader = providerResponse.body?.getReader();
@@ -4416,7 +4852,7 @@ async function startProxy(config = {}) {
4416
4852
  });
4417
4853
  log(`Cache STORE for ${targetModel || requestedModel} (hash: ${cacheHash.slice(0, 8)})`);
4418
4854
  }
4419
- res.writeHead(providerResponse.status, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': nativeCacheHeader, ...nativeRpHeaders });
4855
+ res.writeHead(providerResponse.status, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': nativeCacheHeader, 'X-Relay-Trace-Id': nativeTraceId, 'X-Relay-Memory-Hits': String((0, osmosis_store_js_1.countAtomsForSession)(nativeSessionId)), ...nativeRpHeaders });
4420
4856
  res.end(JSON.stringify(nativeResponseData));
4421
4857
  }
4422
4858
  }
@@ -4452,6 +4888,48 @@ async function startProxy(config = {}) {
4452
4888
  }
4453
4889
  // ── Session Intelligence: upsert session record ──
4454
4890
  (0, session_tracker_js_1.upsertSession)(nativeSessionId, nativeSessionSource, nativeCostUsd, nativeTokIn, nativeTokOut);
4891
+ // ── Session 4: Episodic memory write (fire-and-forget) ──
4892
+ try {
4893
+ (0, episode_writer_js_1.writeEpisode)(nativeSessionId, {
4894
+ eventType: 'model-response',
4895
+ modelUsed: targetModel || requestedModel,
4896
+ tokensIn: nativeTokIn,
4897
+ tokensOut: nativeTokOut,
4898
+ costUsd: nativeCostUsd,
4899
+ outcome: 'success',
4900
+ traceId: nativeTraceId,
4901
+ durationMs,
4902
+ });
4903
+ }
4904
+ catch { /* never block hot path */ }
4905
+ // ── CAP 3: Deterministic Traces — emit request.end + finalize ──
4906
+ {
4907
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
4908
+ if (tw.isEnabled() && recordTelemetry) {
4909
+ const finishReason = nativeResponseData?.['stop_reason']
4910
+ ?? (nativeResponseData?.['choices']?.[0]?.['finish_reason']);
4911
+ void tw.write(nativeSessionId, nativeTraceId, {
4912
+ eventType: 'request.end',
4913
+ durationMs,
4914
+ payload: {
4915
+ modelUsed: targetModel || requestedModel,
4916
+ tokensIn: nativeTokIn,
4917
+ tokensOut: nativeTokOut,
4918
+ costUsd: nativeCostUsd,
4919
+ finishReason,
4920
+ },
4921
+ });
4922
+ void tw.finalizeTrace(nativeTraceId, nativeSessionId, {
4923
+ costUsd: nativeCostUsd,
4924
+ modelUsed: targetModel || requestedModel,
4925
+ durationMs,
4926
+ });
4927
+ }
4928
+ }
4929
+ // ── Session budget: record spend (fire-and-forget, only for claude-code sessions) ──
4930
+ if (nativeSessionSource === 'claude-code') {
4931
+ budgetManager.updateSessionBudget(nativeSessionId, nativeCostUsd, targetModel || requestedModel);
4932
+ }
4455
4933
  // ── Post-request: budget spend + anomaly detection ──
4456
4934
  postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
4457
4935
  if (recordTelemetry) {
@@ -4482,7 +4960,20 @@ async function startProxy(config = {}) {
4482
4960
  catchErrMsg = err instanceof Error ? err.message : String(err);
4483
4961
  catchErrStatus = 500;
4484
4962
  }
4485
- logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, catchErrMsg, catchErrStatus);
4963
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, nativeAgentFingerprint, nativeExplicitAgentId, catchErrMsg, catchErrStatus);
4964
+ // ── CAP 3: Deterministic Traces — emit request.end (error) + finalize ──
4965
+ {
4966
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
4967
+ if (tw.isEnabled() && recordTelemetry) {
4968
+ void tw.write(nativeSessionId, nativeTraceId, {
4969
+ eventType: 'request.end',
4970
+ durationMs,
4971
+ error: { code: String(catchErrStatus), message: catchErrMsg, retryable: catchErrStatus >= 500 },
4972
+ payload: { modelUsed: targetModel || requestedModel },
4973
+ });
4974
+ void tw.finalizeTrace(nativeTraceId, nativeSessionId, { durationMs, modelUsed: targetModel || requestedModel });
4975
+ }
4976
+ }
4486
4977
  if (recordTelemetry) {
4487
4978
  sendCloudTelemetry(taskType, targetModel || requestedModel, 0, 0, durationMs, false, 0, originalModel ?? undefined);
4488
4979
  meshCapture(targetModel || requestedModel, targetProvider, taskType, 0, 0, 0, durationMs, false, catchErrMsg);
@@ -4606,6 +5097,24 @@ async function startProxy(config = {}) {
4606
5097
  const agentResult = (0, agent_tracker_js_1.trackAgent)(chatSystemPrompt, 0, chatExplicitAgentId);
4607
5098
  chatAgentFingerprint = agentResult.fingerprint;
4608
5099
  }
5100
+ // ── CAP 3: Deterministic Traces — allocate chatTraceId + emit request.start ──
5101
+ const chatTraceId = (0, node_crypto_1.randomUUID)();
5102
+ {
5103
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
5104
+ if (tw.isEnabled() && recordTelemetry) {
5105
+ const chatSysHash = chatSystemPrompt ? (0, trace_writer_js_1.sha256Hex)(chatSystemPrompt) : undefined;
5106
+ void tw.write(chatSessionId, chatTraceId, {
5107
+ eventType: 'request.start',
5108
+ parentTraceId: getHeaderValue(req, 'x-parent-trace-id') || undefined,
5109
+ agentId: getHeaderValue(req, 'x-agent-id') || chatExplicitAgentId || undefined,
5110
+ payload: {
5111
+ model: request.model,
5112
+ messageCount: request.messages?.length,
5113
+ systemPromptHash: chatSysHash,
5114
+ },
5115
+ });
5116
+ }
5117
+ }
4609
5118
  // ── Response Cache: check for cached response (chat/completions) ──
4610
5119
  const chatCacheBypass = responseCache.shouldBypass(request);
4611
5120
  let chatCacheHash;
@@ -4996,6 +5505,42 @@ async function startProxy(config = {}) {
4996
5505
  if (chatAgentFingerprint && chatAgentFingerprint !== 'unknown')
4997
5506
  (0, agent_tracker_js_1.updateAgentCost)(chatAgentFingerprint, cascadeCost);
4998
5507
  (0, session_tracker_js_1.upsertSession)(chatSessionId, chatSessionSource, cascadeCost, cascadeTokensIn, cascadeTokensOut);
5508
+ // ── Session 4: Episodic memory write (fire-and-forget) ──
5509
+ try {
5510
+ (0, episode_writer_js_1.writeEpisode)(chatSessionId, {
5511
+ eventType: 'routing-decision',
5512
+ modelUsed: cascadeResult.model,
5513
+ tokensIn: cascadeTokensIn,
5514
+ tokensOut: cascadeTokensOut,
5515
+ costUsd: cascadeCost,
5516
+ outcome: 'success',
5517
+ outcomeDetail: 'cascade',
5518
+ traceId: chatTraceId,
5519
+ durationMs,
5520
+ });
5521
+ }
5522
+ catch { /* never block hot path */ }
5523
+ // ── CAP 3: Deterministic Traces — emit request.end + finalize (chat cascade) ──
5524
+ {
5525
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
5526
+ if (tw.isEnabled() && recordTelemetry) {
5527
+ void tw.write(chatSessionId, chatTraceId, {
5528
+ eventType: 'request.end',
5529
+ durationMs,
5530
+ payload: {
5531
+ modelUsed: cascadeResult.model,
5532
+ tokensIn: cascadeTokensIn,
5533
+ tokensOut: cascadeTokensOut,
5534
+ costUsd: cascadeCost,
5535
+ },
5536
+ });
5537
+ void tw.finalizeTrace(chatTraceId, chatSessionId, {
5538
+ costUsd: cascadeCost,
5539
+ modelUsed: cascadeResult.model,
5540
+ durationMs,
5541
+ });
5542
+ }
5543
+ }
4999
5544
  if (recordTelemetry) {
5000
5545
  try {
5001
5546
  const runResult = await relay.run({
@@ -5023,7 +5568,7 @@ async function startProxy(config = {}) {
5023
5568
  meshCapture(cascadeResult.model, cascadeResult.provider, taskType, cascadeTokensIn, cascadeTokensOut, cascadeCost, durationMs, true);
5024
5569
  }
5025
5570
  const chatCascadeRpHeaders = buildRelayPlaneResponseHeaders(cascadeResult.model, originalRequestedModel ?? 'unknown', complexity, cascadeResult.provider, 'cascade');
5026
- res.writeHead(200, { 'Content-Type': 'application/json', ...chatCascadeRpHeaders });
5571
+ res.writeHead(200, { 'Content-Type': 'application/json', 'X-Relay-Trace-Id': chatTraceId, 'X-Relay-Memory-Hits': String((0, osmosis_store_js_1.countAtomsForSession)(chatSessionId)), ...chatCascadeRpHeaders });
5027
5572
  res.end(JSON.stringify(responseData));
5028
5573
  }
5029
5574
  catch (err) {
@@ -5038,7 +5583,7 @@ async function startProxy(config = {}) {
5038
5583
  cascadeErrMsg = err instanceof Error ? err.message : String(err);
5039
5584
  cascadeErrStatus = 500;
5040
5585
  }
5041
- logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity, undefined, undefined, cascadeErrMsg, cascadeErrStatus);
5586
+ logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity, chatAgentFingerprint, chatExplicitAgentId, cascadeErrMsg, cascadeErrStatus);
5042
5587
  if (recordTelemetry) {
5043
5588
  sendCloudTelemetry(taskType, targetModel || 'unknown', 0, 0, durationMs, false, 0, originalRequestedModel ?? undefined);
5044
5589
  meshCapture(targetModel || 'unknown', targetProvider, taskType, 0, 0, 0, durationMs, false, cascadeErrMsg);
@@ -5054,8 +5599,10 @@ async function startProxy(config = {}) {
5054
5599
  }
5055
5600
  }
5056
5601
  else {
5602
+ res.setHeader('X-Relay-Trace-Id', chatTraceId);
5603
+ res.setHeader('X-Relay-Memory-Hits', String((0, osmosis_store_js_1.countAtomsForSession)(chatSessionId)));
5057
5604
  await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId, useAnthropicEnvKey, // pass for cross-provider cascade API key resolution (GH #38)
5058
- chatSessionId, chatSessionSource);
5605
+ chatSessionId, chatSessionSource, chatTraceId);
5059
5606
  }
5060
5607
  }
5061
5608
  });
@@ -5251,7 +5798,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
5251
5798
  if (!ollamaStream.success || !ollamaStream.stream) {
5252
5799
  const durationMs = Date.now() - startTime;
5253
5800
  const errMsg = ollamaStream.error?.message ?? 'Ollama stream failed';
5254
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errMsg, ollamaStream.error?.status);
5801
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, errMsg, ollamaStream.error?.status);
5255
5802
  res.writeHead(ollamaStream.error?.status ?? 502, { 'Content-Type': 'application/json' });
5256
5803
  res.end(JSON.stringify({ error: ollamaStream.error }));
5257
5804
  return;
@@ -5287,7 +5834,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
5287
5834
  }
5288
5835
  const durationMs = Date.now() - startTime;
5289
5836
  const streamErrMsg = extractProviderErrorMessage(errorData, providerResponse.status);
5290
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, streamErrMsg, providerResponse.status);
5837
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, streamErrMsg, providerResponse.status);
5291
5838
  if (recordTelemetry) {
5292
5839
  sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
5293
5840
  meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, streamErrMsg);
@@ -5303,7 +5850,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
5303
5850
  cooldownManager.recordFailure(targetProvider, errorMsg);
5304
5851
  }
5305
5852
  const durationMs = Date.now() - startTime;
5306
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errorMsg, 500);
5853
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, errorMsg, 500);
5307
5854
  if (recordTelemetry) {
5308
5855
  sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
5309
5856
  meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, errorMsg);
@@ -5431,6 +5978,21 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
5431
5978
  (0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, streamCost);
5432
5979
  if (sessionId && sessionSource)
5433
5980
  (0, session_tracker_js_1.upsertSession)(sessionId, sessionSource, streamCost, streamTokensIn, streamTokensOut);
5981
+ // ── Session 4: Episodic memory write (fire-and-forget) ──
5982
+ if (sessionId) {
5983
+ try {
5984
+ (0, episode_writer_js_1.writeEpisode)(sessionId, {
5985
+ eventType: 'model-response',
5986
+ modelUsed: targetModel,
5987
+ tokensIn: streamTokensIn,
5988
+ tokensOut: streamTokensOut,
5989
+ costUsd: streamCost,
5990
+ outcome: 'success',
5991
+ durationMs,
5992
+ });
5993
+ }
5994
+ catch { /* never block hot path */ }
5995
+ }
5434
5996
  // ── Post-request: budget spend + anomaly detection ──
5435
5997
  try {
5436
5998
  (0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
@@ -5468,7 +6030,9 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
5468
6030
  */
5469
6031
  async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId,
5470
6032
  /** Anthropic env API key — required for cross-provider cascade API key resolution (GH #38) */
5471
- anthropicEnvKeyForCascade, sessionId, sessionSource) {
6033
+ anthropicEnvKeyForCascade, sessionId, sessionSource,
6034
+ /** CAP 3: trace ID for deterministic trace write */
6035
+ traceId) {
5472
6036
  let responseData;
5473
6037
  try {
5474
6038
  const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
@@ -5506,7 +6070,7 @@ anthropicEnvKeyForCascade, sessionId, sessionSource) {
5506
6070
  // All fallbacks exhausted — return the primary error
5507
6071
  const durationMs = Date.now() - startTime;
5508
6072
  const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
5509
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, `${routingMode}+cascade`, undefined, taskType, complexity, undefined, undefined, nsErrMsg, result.status);
6073
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, `${routingMode}+cascade`, undefined, taskType, complexity, agentFingerprint, agentId, nsErrMsg, result.status);
5510
6074
  if (recordTelemetry) {
5511
6075
  sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
5512
6076
  meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
@@ -5520,7 +6084,7 @@ anthropicEnvKeyForCascade, sessionId, sessionSource) {
5520
6084
  // No cascade — return error as-is
5521
6085
  const durationMs = Date.now() - startTime;
5522
6086
  const nsErrMsg = extractProviderErrorMessage(responseData, result.status);
5523
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, nsErrMsg, result.status);
6087
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, nsErrMsg, result.status);
5524
6088
  if (recordTelemetry) {
5525
6089
  sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
5526
6090
  meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, nsErrMsg);
@@ -5538,7 +6102,7 @@ anthropicEnvKeyForCascade, sessionId, sessionSource) {
5538
6102
  cooldownManager.recordFailure(targetProvider, errorMsg);
5539
6103
  }
5540
6104
  const durationMs = Date.now() - startTime;
5541
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, undefined, undefined, errorMsg, 500);
6105
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity, agentFingerprint, agentId, errorMsg, 500);
5542
6106
  if (recordTelemetry) {
5543
6107
  sendCloudTelemetry(taskType, targetModel, 0, 0, durationMs, false, 0, request.model ?? undefined);
5544
6108
  meshCapture(targetModel, targetProvider, taskType, 0, 0, 0, durationMs, false, errorMsg);
@@ -5567,6 +6131,37 @@ anthropicEnvKeyForCascade, sessionId, sessionSource) {
5567
6131
  (0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, cost);
5568
6132
  if (sessionId && sessionSource)
5569
6133
  (0, session_tracker_js_1.upsertSession)(sessionId, sessionSource, cost, tokensIn, tokensOut);
6134
+ // ── Session 4: Episodic memory write (fire-and-forget) ──
6135
+ if (sessionId) {
6136
+ try {
6137
+ (0, episode_writer_js_1.writeEpisode)(sessionId, {
6138
+ eventType: 'model-response',
6139
+ modelUsed: targetModel,
6140
+ tokensIn,
6141
+ tokensOut,
6142
+ costUsd: cost,
6143
+ outcome: 'success',
6144
+ traceId: traceId ?? undefined,
6145
+ durationMs,
6146
+ });
6147
+ }
6148
+ catch { /* never block hot path */ }
6149
+ }
6150
+ // ── CAP 3: Deterministic Traces — emit request.end + finalize (chat non-streaming) ──
6151
+ if (traceId && sessionId && recordTelemetry) {
6152
+ const tw = trace_writer_js_1.TraceWriter.getInstance();
6153
+ if (tw.isEnabled()) {
6154
+ const finishReason = responseData?.['choices']
6155
+ ? responseData['choices'][0]?.['finish_reason']
6156
+ : undefined;
6157
+ void tw.write(sessionId, traceId, {
6158
+ eventType: 'request.end',
6159
+ durationMs,
6160
+ payload: { modelUsed: targetModel, tokensIn, tokensOut, costUsd: cost, finishReason },
6161
+ });
6162
+ void tw.finalizeTrace(traceId, sessionId, { costUsd: cost, modelUsed: targetModel, durationMs });
6163
+ }
6164
+ }
5570
6165
  // ── Post-request: budget spend + anomaly detection ──
5571
6166
  try {
5572
6167
  (0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);