@relayplane/proxy 1.8.35 → 1.8.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,6 +94,7 @@ const estimateRateMap = new Map();
94
94
  // Without this, IPs that make one request and disappear stay in the map forever.
95
95
  setInterval(() => (0, estimate_js_1.purgeExpiredRateLimitEntries)(estimateRateMap, Date.now()), 5 * 60 * 1000);
96
96
  const osmosis_store_js_1 = require("./osmosis-store.js");
97
+ const session_tracker_js_1 = require("./session-tracker.js");
97
98
  const PROXY_VERSION = (() => {
98
99
  try {
99
100
  const pkgPath = path.join(__dirname, '..', 'package.json');
@@ -2558,6 +2559,9 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
2558
2559
  <table><thead><tr><th>Agent</th><th>Requests</th><th>Total Cost</th><th>Last Active</th><th></th></tr></thead><tbody id="agents"></tbody></table></div>
2559
2560
  <div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
2560
2561
  <div class="section collapsible collapsed"><h2>Learning</h2><div id="learning-panel" style="display:flex;flex-direction:column;gap:12px"><div id="learning-stats" style="display:flex;gap:12px;flex-wrap:wrap"></div><div id="learning-recent"></div><div style="margin-top:8px;padding:10px 14px;background:#0f1720;border:1px solid #1e3a5f;border-radius:8px;font-size:.8rem;color:#60a5fa">Network: Join the network to share with 1,000+ agent installations &rarr; <a href="https://relayplane.com/pricing" style="color:#34d399">Upgrade</a></div></div></div>
2562
+ <div class="section collapsible collapsed" id="sessions-section"><h2>Sessions <span id="sessionsLabel" style="font-size:.75rem;color:#64748b;font-weight:400">(last 7d)</span></h2>
2563
+ <table><thead><tr><th>Session ID</th><th>Source</th><th>Started</th><th>Duration</th><th>Requests</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Status</th></tr></thead><tbody id="sessions"></tbody></table>
2564
+ <div id="sessions-upsell" style="display:none;margin-top:12px;padding:10px 14px;background:#1a1020;border:1px solid #7c3aed55;border-radius:8px;font-size:.8rem;color:#a78bfa">3+ sessions totalling over $2 this week — <a href="https://relayplane.com/pricing" style="color:#34d399">Upgrade to Pro</a> for full session analytics &amp; budget controls.</div></div>
2561
2565
  <div class="section"><h2>Recent Runs <span id="historyLabel" style="font-size:.75rem;color:#64748b;font-weight:400">(7d window, history-capped)</span></h2>
2562
2566
  <table><thead><tr><th>Time</th><th>Agent</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th class="col-cache">Cache Create</th><th class="col-cache">Cache Read</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
2563
2567
  <script>
@@ -2566,6 +2570,30 @@ document.querySelectorAll('.section.collapsible h2').forEach(h2=>h2.addEventList
2566
2570
  function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
2567
2571
  function fmtTime(s){const d=new Date(s);return d.toLocaleTimeString()}
2568
2572
  function dur(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60);return h?h+'h '+m+'m':m+'m'}
2573
+ async function loadSessions(){
2574
+ try{
2575
+ const [sessR,activeR]=await Promise.all([
2576
+ fetch('/v1/sessions?limit=20&days=7').then(r=>r.json()).catch(()=>({sessions:[]})),
2577
+ fetch('/v1/sessions/active').then(r=>r.json()).catch(()=>({sessions:[]}))
2578
+ ]);
2579
+ const activeIds=new Set((activeR.sessions||[]).map(s=>s.id));
2580
+ const sessions=sessR.sessions||[];
2581
+ const el=$('sessions');
2582
+ if(!el)return;
2583
+ el.innerHTML=sessions.length?sessions.map(s=>{
2584
+ const isActive=activeIds.has(s.id)||s.active;
2585
+ const dur=s.duration_ms>0?Math.round(s.duration_ms/1000)+'s':'—';
2586
+ const badge=isActive?'<span class="badge ok" style="font-size:.7rem">LIVE</span>':'<span style="color:#64748b;font-size:.75rem">idle</span>';
2587
+ const srcBadge=s.session_source==='claude-code'?'<span style="color:#60a5fa;font-size:.75rem">claude-code</span>':'<span style="color:#94a3b8;font-size:.75rem">synthetic</span>';
2588
+ const sid=s.id.length>20?s.id.slice(0,20)+'…':s.id;
2589
+ return '<tr><td style="font-family:monospace;font-size:.8rem" title="'+s.id+'">'+sid+'</td><td>'+srcBadge+'</td><td>'+fmtTime(new Date(s.started_at).toISOString())+'</td><td>'+dur+'</td><td>'+s.request_count+'</td><td>'+(s.total_tokens_in||0)+'</td><td>'+(s.total_tokens_out||0)+'</td><td>$'+fmt(s.total_cost_usd,4)+'</td><td>'+badge+'</td></tr>';
2590
+ }).join(''):'<tr><td colspan=9 style="color:#64748b">No sessions recorded yet</td></tr>';
2591
+ // Pro upsell: 3+ sessions with total cost > $2 this week
2592
+ const totalCost=sessions.reduce((s,r)=>s+(r.total_cost_usd||0),0);
2593
+ const upsell=$('sessions-upsell');
2594
+ if(upsell) upsell.style.display=(sessions.length>=3&&totalCost>2)?'block':'none';
2595
+ }catch(e){console.error('sessions load error',e)}
2596
+ }
2569
2597
  async function load(){
2570
2598
  try{
2571
2599
  const [health,stats,runsR,sav,provH,agentsR]=await Promise.all([
@@ -2710,7 +2738,7 @@ async function loadLearning(){
2710
2738
  }
2711
2739
  }catch(e){console.error('learning load error',e)}
2712
2740
  }
2713
- load();loadLearning();setInterval(load,5000);setInterval(loadLearning,30000);
2741
+ load();loadLearning();loadSessions();setInterval(load,5000);setInterval(loadLearning,30000);setInterval(loadSessions,10000);
2714
2742
  </script><footer style="text-align:center;padding:20px 0;color:#475569;font-size:.75rem;border-top:1px solid #1e293b;margin-top:20px">🔒 Request content stays on your machine. Never sent to cloud.</footer></body></html>`;
2715
2743
  }
2716
2744
  async function getKnowledgeStats() {
@@ -3306,6 +3334,136 @@ async function startProxy(config = {}) {
3306
3334
  }
3307
3335
  return;
3308
3336
  }
3337
+ // === Budget endpoints ===
3338
+ if (req.method === 'GET' && pathname === '/control/budget') {
3339
+ const status = budgetManager.getStatus();
3340
+ const cfg = budgetManager.getConfig();
3341
+ const now = Date.now();
3342
+ const weekCutoff = now - 7 * 86400000;
3343
+ const monthCutoff = now - 30 * 86400000;
3344
+ const weekCost = requestHistory
3345
+ .filter(r => new Date(r.timestamp).getTime() >= weekCutoff)
3346
+ .reduce((s, r) => s + r.costUsd, 0);
3347
+ const monthCost = requestHistory
3348
+ .filter(r => new Date(r.timestamp).getTime() >= monthCutoff)
3349
+ .reduce((s, r) => s + r.costUsd, 0);
3350
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3351
+ res.end(JSON.stringify({
3352
+ today_usd: Math.round(status.dailySpend * 10000) / 10000,
3353
+ limit_usd: status.dailyLimit,
3354
+ pct_used: Math.round(status.dailyPercent * 10) / 10,
3355
+ remaining_usd: Math.max(0, Math.round((status.dailyLimit - status.dailySpend) * 10000) / 10000),
3356
+ this_week_usd: Math.round(weekCost * 10000) / 10000,
3357
+ this_month_usd: Math.round(monthCost * 10000) / 10000,
3358
+ enabled: cfg.enabled,
3359
+ on_breach: cfg.onBreach,
3360
+ alert_thresholds: cfg.alertThresholds,
3361
+ hourly_usd: Math.round(status.hourlySpend * 10000) / 10000,
3362
+ hourly_limit_usd: status.hourlyLimit,
3363
+ hourly_pct_used: Math.round(status.hourlyPercent * 10) / 10,
3364
+ breached: status.breached,
3365
+ breach_type: status.breachType,
3366
+ }));
3367
+ return;
3368
+ }
3369
+ if (req.method === 'POST' && pathname === '/control/budget/set') {
3370
+ try {
3371
+ const body = await readJsonBody(req);
3372
+ const amount = Number(body.dailyUsd);
3373
+ if (!body.dailyUsd || isNaN(amount) || amount <= 0) {
3374
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3375
+ res.end(JSON.stringify({ error: 'dailyUsd must be a positive number' }));
3376
+ return;
3377
+ }
3378
+ budgetManager.setLimits({ dailyUsd: amount });
3379
+ budgetManager.updateConfig({ enabled: true });
3380
+ proxyConfig = normalizeProxyConfig({
3381
+ ...proxyConfig,
3382
+ budget: { ...proxyConfig.budget, dailyUsd: amount, enabled: true },
3383
+ });
3384
+ await saveProxyConfig(configPath, proxyConfig);
3385
+ startConfigWatcher();
3386
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3387
+ res.end(JSON.stringify({ ok: true, dailyUsd: amount }));
3388
+ }
3389
+ catch {
3390
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3391
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
3392
+ }
3393
+ return;
3394
+ }
3395
+ if (req.method === 'POST' && pathname === '/control/budget/set-alert') {
3396
+ try {
3397
+ const body = await readJsonBody(req);
3398
+ const pct = Number(body.threshold);
3399
+ if (!body.threshold || isNaN(pct) || pct <= 0 || pct > 100) {
3400
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3401
+ res.end(JSON.stringify({ error: 'threshold must be 1-100' }));
3402
+ return;
3403
+ }
3404
+ const current = budgetManager.getConfig();
3405
+ const thresholds = [...new Set([...current.alertThresholds, pct])].sort((a, b) => a - b);
3406
+ budgetManager.updateConfig({ alertThresholds: thresholds });
3407
+ proxyConfig = normalizeProxyConfig({
3408
+ ...proxyConfig,
3409
+ budget: { ...proxyConfig.budget, alertThresholds: thresholds },
3410
+ });
3411
+ await saveProxyConfig(configPath, proxyConfig);
3412
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3413
+ res.end(JSON.stringify({ ok: true, alertThresholds: thresholds }));
3414
+ }
3415
+ catch {
3416
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3417
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
3418
+ }
3419
+ return;
3420
+ }
3421
+ if (req.method === 'POST' && pathname === '/control/budget/reset') {
3422
+ budgetManager.reset();
3423
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3424
+ res.end(JSON.stringify({ ok: true, message: 'Daily spend counter reset' }));
3425
+ return;
3426
+ }
3427
+ if (req.method === 'POST' && pathname === '/control/model') {
3428
+ try {
3429
+ const body = await readJsonBody(req);
3430
+ if (!body.model) {
3431
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3432
+ res.end(JSON.stringify({ error: 'model required' }));
3433
+ return;
3434
+ }
3435
+ const previousRouting = proxyConfig.routing;
3436
+ const target = body.model;
3437
+ // Update all complexity tiers to the requested model
3438
+ proxyConfig = normalizeProxyConfig({
3439
+ ...proxyConfig,
3440
+ routing: {
3441
+ ...proxyConfig.routing,
3442
+ complexity: {
3443
+ enabled: proxyConfig.routing?.complexity?.enabled ?? true,
3444
+ simple: target,
3445
+ moderate: target,
3446
+ complex: target,
3447
+ },
3448
+ },
3449
+ });
3450
+ await saveProxyConfig(configPath, proxyConfig);
3451
+ startConfigWatcher();
3452
+ const prevModel = previousRouting?.complexity?.complex ?? previousRouting?.complexity?.moderate ?? 'unknown';
3453
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3454
+ res.end(JSON.stringify({
3455
+ switched: true,
3456
+ previous: prevModel,
3457
+ current: target,
3458
+ reason: body.reason ?? '',
3459
+ }));
3460
+ }
3461
+ catch {
3462
+ res.writeHead(400, { 'Content-Type': 'application/json' });
3463
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
3464
+ }
3465
+ return;
3466
+ }
3309
3467
  }
3310
3468
  if (req.method === 'POST' && pathname === '/control/kill') {
3311
3469
  try {
@@ -3677,6 +3835,36 @@ async function startProxy(config = {}) {
3677
3835
  }
3678
3836
  return;
3679
3837
  }
3838
+ // === Session Intelligence endpoints ===
3839
+ if (req.method === 'GET' && pathname === '/v1/sessions') {
3840
+ const queryString = url.includes('?') ? url.split('?')[1] ?? '' : '';
3841
+ const params = new URLSearchParams(queryString);
3842
+ const limit = Math.min(parseInt(params.get('limit') || '20', 10), 100);
3843
+ const days = parseInt(params.get('days') || '7', 10);
3844
+ const sessions = (0, session_tracker_js_1.getSessions)({ limit, days });
3845
+ const now = Date.now();
3846
+ const activeCutoff = now - 5 * 60 * 1000;
3847
+ const result = sessions.map(s => ({
3848
+ ...s,
3849
+ active: s.last_seen_at >= activeCutoff,
3850
+ duration_ms: s.last_seen_at - s.started_at,
3851
+ }));
3852
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3853
+ res.end(JSON.stringify({ sessions: result, total: result.length }));
3854
+ return;
3855
+ }
3856
+ if (req.method === 'GET' && pathname === '/v1/sessions/active') {
3857
+ const active = (0, session_tracker_js_1.getActiveSessions)();
3858
+ const now = Date.now();
3859
+ const result = active.map(s => ({
3860
+ ...s,
3861
+ active: true,
3862
+ duration_ms: s.last_seen_at - s.started_at,
3863
+ }));
3864
+ res.writeHead(200, { 'Content-Type': 'application/json' });
3865
+ res.end(JSON.stringify({ sessions: result, total: result.length }));
3866
+ return;
3867
+ }
3680
3868
  // Extract auth context from incoming request
3681
3869
  const ctx = extractRequestContext(req);
3682
3870
  const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
@@ -3716,6 +3904,8 @@ async function startProxy(config = {}) {
3716
3904
  res.end(JSON.stringify({ error: 'Invalid JSON' }));
3717
3905
  return;
3718
3906
  }
3907
+ // Extract session ID (X-Claude-Code-Session-Id or synthetic)
3908
+ const { sessionId: nativeSessionId, sessionSource: nativeSessionSource } = (0, session_tracker_js_1.getSessionId)(req, requestBody['model']);
3719
3909
  // Extract agent fingerprint and explicit agent ID
3720
3910
  const nativeSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(requestBody);
3721
3911
  const nativeExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
@@ -4260,6 +4450,8 @@ async function startProxy(config = {}) {
4260
4450
  if (nativeAgentFingerprint && nativeAgentFingerprint !== 'unknown') {
4261
4451
  (0, agent_tracker_js_1.updateAgentCost)(nativeAgentFingerprint, nativeCostUsd);
4262
4452
  }
4453
+ // ── Session Intelligence: upsert session record ──
4454
+ (0, session_tracker_js_1.upsertSession)(nativeSessionId, nativeSessionSource, nativeCostUsd, nativeTokIn, nativeTokOut);
4263
4455
  // ── Post-request: budget spend + anomaly detection ──
4264
4456
  postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
4265
4457
  if (recordTelemetry) {
@@ -4404,6 +4596,8 @@ async function startProxy(config = {}) {
4404
4596
  return;
4405
4597
  }
4406
4598
  const isStreaming = request.stream === true;
4599
+ // Extract session ID for chat/completions
4600
+ const { sessionId: chatSessionId, sessionSource: chatSessionSource } = (0, session_tracker_js_1.getSessionId)(req, request.model);
4407
4601
  // Extract agent fingerprint for chat/completions
4408
4602
  const chatSystemPrompt = (0, agent_tracker_js_1.extractSystemPromptFromBody)(request);
4409
4603
  const chatExplicitAgentId = getHeaderValue(req, 'x-relayplane-agent') || undefined;
@@ -4758,7 +4952,7 @@ async function startProxy(config = {}) {
4758
4952
  const startTime = Date.now();
4759
4953
  // Handle streaming vs non-streaming
4760
4954
  if (isStreaming) {
4761
- await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass, chatAgentFingerprint, chatExplicitAgentId);
4955
+ await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass, chatAgentFingerprint, chatExplicitAgentId, chatSessionId, chatSessionSource);
4762
4956
  }
4763
4957
  else {
4764
4958
  if (useCascade && cascadeConfig) {
@@ -4801,6 +4995,7 @@ async function startProxy(config = {}) {
4801
4995
  updateLastHistoryEntry(cascadeTokensIn, cascadeTokensOut, cascadeCost, chatCascadeRespModel, cascadeCacheCreation, cascadeCacheRead, chatAgentFingerprint, chatExplicitAgentId);
4802
4996
  if (chatAgentFingerprint && chatAgentFingerprint !== 'unknown')
4803
4997
  (0, agent_tracker_js_1.updateAgentCost)(chatAgentFingerprint, cascadeCost);
4998
+ (0, session_tracker_js_1.upsertSession)(chatSessionId, chatSessionSource, cascadeCost, cascadeTokensIn, cascadeTokensOut);
4804
4999
  if (recordTelemetry) {
4805
5000
  try {
4806
5001
  const runResult = await relay.run({
@@ -4859,7 +5054,8 @@ async function startProxy(config = {}) {
4859
5054
  }
4860
5055
  }
4861
5056
  else {
4862
- await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId, useAnthropicEnvKey);
5057
+ await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatAgentFingerprint, chatExplicitAgentId, useAnthropicEnvKey, // pass for cross-provider cascade API key resolution (GH #38)
5058
+ chatSessionId, chatSessionSource);
4863
5059
  }
4864
5060
  }
4865
5061
  });
@@ -5024,7 +5220,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
5024
5220
  }
5025
5221
  return { responseData, ok: true, status: 200 };
5026
5222
  }
5027
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass, agentFingerprint, agentId) {
5223
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass, agentFingerprint, agentId, sessionId, sessionSource) {
5028
5224
  let providerResponse;
5029
5225
  try {
5030
5226
  switch (targetProvider) {
@@ -5233,6 +5429,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
5233
5429
  updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost, undefined, streamCacheCreation || undefined, streamCacheRead || undefined, agentFingerprint, agentId);
5234
5430
  if (agentFingerprint && agentFingerprint !== 'unknown')
5235
5431
  (0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, streamCost);
5432
+ if (sessionId && sessionSource)
5433
+ (0, session_tracker_js_1.upsertSession)(sessionId, sessionSource, streamCost, streamTokensIn, streamTokensOut);
5236
5434
  // ── Post-request: budget spend + anomaly detection ──
5237
5435
  try {
5238
5436
  (0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
@@ -5270,7 +5468,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
5270
5468
  */
5271
5469
  async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', agentFingerprint, agentId,
5272
5470
  /** Anthropic env API key — required for cross-provider cascade API key resolution (GH #38) */
5273
- anthropicEnvKeyForCascade) {
5471
+ anthropicEnvKeyForCascade, sessionId, sessionSource) {
5274
5472
  let responseData;
5275
5473
  try {
5276
5474
  const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
@@ -5367,6 +5565,8 @@ anthropicEnvKeyForCascade) {
5367
5565
  updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel, cacheCreationTokens || undefined, cacheReadTokens || undefined, agentFingerprint, agentId);
5368
5566
  if (agentFingerprint && agentFingerprint !== 'unknown')
5369
5567
  (0, agent_tracker_js_1.updateAgentCost)(agentFingerprint, cost);
5568
+ if (sessionId && sessionSource)
5569
+ (0, session_tracker_js_1.upsertSession)(sessionId, sessionSource, cost, tokensIn, tokensOut);
5370
5570
  // ── Post-request: budget spend + anomaly detection ──
5371
5571
  try {
5372
5572
  (0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);