@relayplane/proxy 1.5.46 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +251 -15
  2. package/assets/relayplane-proxy.service +20 -0
  3. package/dist/alerts.d.ts +72 -0
  4. package/dist/alerts.d.ts.map +1 -0
  5. package/dist/alerts.js +290 -0
  6. package/dist/alerts.js.map +1 -0
  7. package/dist/anomaly.d.ts +65 -0
  8. package/dist/anomaly.d.ts.map +1 -0
  9. package/dist/anomaly.js +193 -0
  10. package/dist/anomaly.js.map +1 -0
  11. package/dist/budget.d.ts +98 -0
  12. package/dist/budget.d.ts.map +1 -0
  13. package/dist/budget.js +356 -0
  14. package/dist/budget.js.map +1 -0
  15. package/dist/cli.js +512 -93
  16. package/dist/cli.js.map +1 -1
  17. package/dist/config.d.ts +28 -2
  18. package/dist/config.d.ts.map +1 -1
  19. package/dist/config.js +122 -24
  20. package/dist/config.js.map +1 -1
  21. package/dist/downgrade.d.ts +37 -0
  22. package/dist/downgrade.d.ts.map +1 -0
  23. package/dist/downgrade.js +79 -0
  24. package/dist/downgrade.js.map +1 -0
  25. package/dist/mesh/capture.d.ts +11 -0
  26. package/dist/mesh/capture.d.ts.map +1 -0
  27. package/dist/mesh/capture.js +43 -0
  28. package/dist/mesh/capture.js.map +1 -0
  29. package/dist/mesh/fitness.d.ts +14 -0
  30. package/dist/mesh/fitness.d.ts.map +1 -0
  31. package/dist/mesh/fitness.js +40 -0
  32. package/dist/mesh/fitness.js.map +1 -0
  33. package/dist/mesh/index.d.ts +39 -0
  34. package/dist/mesh/index.d.ts.map +1 -0
  35. package/dist/mesh/index.js +118 -0
  36. package/dist/mesh/index.js.map +1 -0
  37. package/dist/mesh/store.d.ts +30 -0
  38. package/dist/mesh/store.d.ts.map +1 -0
  39. package/dist/mesh/store.js +174 -0
  40. package/dist/mesh/store.js.map +1 -0
  41. package/dist/mesh/sync.d.ts +37 -0
  42. package/dist/mesh/sync.d.ts.map +1 -0
  43. package/dist/mesh/sync.js +154 -0
  44. package/dist/mesh/sync.js.map +1 -0
  45. package/dist/mesh/types.d.ts +57 -0
  46. package/dist/mesh/types.d.ts.map +1 -0
  47. package/dist/mesh/types.js +7 -0
  48. package/dist/mesh/types.js.map +1 -0
  49. package/dist/rate-limiter.d.ts +64 -0
  50. package/dist/rate-limiter.d.ts.map +1 -0
  51. package/dist/rate-limiter.js +159 -0
  52. package/dist/rate-limiter.js.map +1 -0
  53. package/dist/relay-config.d.ts +9 -0
  54. package/dist/relay-config.d.ts.map +1 -1
  55. package/dist/relay-config.js +2 -0
  56. package/dist/relay-config.js.map +1 -1
  57. package/dist/response-cache.d.ts +139 -0
  58. package/dist/response-cache.d.ts.map +1 -0
  59. package/dist/response-cache.js +515 -0
  60. package/dist/response-cache.js.map +1 -0
  61. package/dist/server.d.ts.map +1 -1
  62. package/dist/server.js +5 -1
  63. package/dist/server.js.map +1 -1
  64. package/dist/standalone-proxy.d.ts +2 -1
  65. package/dist/standalone-proxy.d.ts.map +1 -1
  66. package/dist/standalone-proxy.js +662 -26
  67. package/dist/standalone-proxy.js.map +1 -1
  68. package/dist/telemetry.d.ts.map +1 -1
  69. package/dist/telemetry.js +8 -5
  70. package/dist/telemetry.js.map +1 -1
  71. package/dist/utils/model-suggestions.d.ts.map +1 -1
  72. package/dist/utils/model-suggestions.js +19 -2
  73. package/dist/utils/model-suggestions.js.map +1 -1
  74. package/dist/utils/version-status.d.ts +9 -0
  75. package/dist/utils/version-status.d.ts.map +1 -0
  76. package/dist/utils/version-status.js +28 -0
  77. package/dist/utils/version-status.js.map +1 -0
  78. package/package.json +7 -3
@@ -67,7 +67,16 @@ const path = __importStar(require("node:path"));
67
67
  const core_1 = require("@relayplane/core");
68
68
  const model_suggestions_js_1 = require("./utils/model-suggestions.js");
69
69
  const telemetry_js_1 = require("./telemetry.js");
70
+ const config_js_1 = require("./config.js");
71
+ const index_js_1 = require("./mesh/index.js");
72
+ const response_cache_js_1 = require("./response-cache.js");
70
73
  const stats_js_1 = require("./stats.js");
74
+ const rate_limiter_js_1 = require("./rate-limiter.js");
75
+ const budget_js_1 = require("./budget.js");
76
+ const anomaly_js_1 = require("./anomaly.js");
77
+ const alerts_js_1 = require("./alerts.js");
78
+ const downgrade_js_1 = require("./downgrade.js");
79
+ const version_status_js_1 = require("./utils/version-status.js");
71
80
  const PROXY_VERSION = (() => {
72
81
  try {
73
82
  const pkgPath = path.join(__dirname, '..', 'package.json');
@@ -77,8 +86,54 @@ const PROXY_VERSION = (() => {
77
86
  return '0.0.0';
78
87
  }
79
88
  })();
89
+ let latestProxyVersionCache = { value: null, checkedAt: 0 };
90
+ const LATEST_PROXY_VERSION_TTL_MS = 30 * 60 * 1000;
91
+ async function getLatestProxyVersion() {
92
+ const now = Date.now();
93
+ if (now - latestProxyVersionCache.checkedAt < LATEST_PROXY_VERSION_TTL_MS) {
94
+ return latestProxyVersionCache.value;
95
+ }
96
+ try {
97
+ const controller = new AbortController();
98
+ const timeout = setTimeout(() => controller.abort(), 2500);
99
+ const res = await fetch('https://registry.npmjs.org/@relayplane/proxy/latest', {
100
+ signal: controller.signal,
101
+ headers: { Accept: 'application/json' },
102
+ });
103
+ clearTimeout(timeout);
104
+ if (!res.ok) {
105
+ latestProxyVersionCache = { value: null, checkedAt: now };
106
+ return null;
107
+ }
108
+ const data = await res.json();
109
+ const latest = data.version ?? null;
110
+ latestProxyVersionCache = { value: latest, checkedAt: now };
111
+ return latest;
112
+ }
113
+ catch {
114
+ latestProxyVersionCache = { value: null, checkedAt: now };
115
+ return null;
116
+ }
117
+ }
80
118
  /** Shared stats collector instance for the proxy server */
81
119
  exports.proxyStatsCollector = new stats_js_1.StatsCollector();
120
+ /** Shared mesh handle — set during startProxy() */
121
+ let _meshHandle = null;
122
+ /** Capture a request into the mesh (fire-and-forget, never blocks) */
123
+ function meshCapture(model, provider, taskType, tokensIn, tokensOut, costUsd, latencyMs, success, errorType) {
124
+ if (!_meshHandle)
125
+ return;
126
+ try {
127
+ _meshHandle.captureRequest({
128
+ model, provider, task_type: taskType,
129
+ input_tokens: tokensIn, output_tokens: tokensOut,
130
+ cost_usd: costUsd, latency_ms: latencyMs,
131
+ success, error_type: errorType,
132
+ timestamp: new Date().toISOString(),
133
+ });
134
+ }
135
+ catch { }
136
+ }
82
137
  /**
83
138
  * Default provider endpoints
84
139
  */
@@ -171,10 +226,10 @@ exports.SMART_ALIASES = {
171
226
  * Send a telemetry event to the cloud (anonymous or authenticated).
172
227
  * Non-blocking — errors are silently swallowed.
173
228
  */
174
- function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, success, costUsd, requestedModel) {
229
+ function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, success, costUsd, requestedModel, cacheCreationTokens, cacheReadTokens) {
175
230
  try {
176
- const cost = costUsd ?? (0, telemetry_js_1.estimateCost)(model, tokensIn, tokensOut);
177
- (0, telemetry_js_1.recordTelemetry)({
231
+ const cost = costUsd ?? (0, telemetry_js_1.estimateCost)(model, tokensIn, tokensOut, cacheCreationTokens, cacheReadTokens);
232
+ const event = {
178
233
  task_type: taskType,
179
234
  model,
180
235
  tokens_in: tokensIn,
@@ -183,7 +238,21 @@ function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, suc
183
238
  success,
184
239
  cost_usd: cost,
185
240
  requested_model: requestedModel,
186
- });
241
+ cache_creation_tokens: cacheCreationTokens,
242
+ cache_read_tokens: cacheReadTokens,
243
+ };
244
+ // Record locally (writes to telemetry.jsonl + queues upload if telemetry_enabled)
245
+ (0, telemetry_js_1.recordTelemetry)(event);
246
+ // Ensure cloud upload even if local telemetry_enabled is false
247
+ // recordCloudTelemetry skips queueForUpload when telemetry is disabled,
248
+ // but cloud dashboard needs these events regardless of local config
249
+ if (!(0, config_js_1.isTelemetryEnabled)()) {
250
+ (0, telemetry_js_1.queueForUpload)({
251
+ ...event,
252
+ device_id: (0, config_js_1.getDeviceId)(),
253
+ timestamp: new Date().toISOString(),
254
+ });
255
+ }
187
256
  }
188
257
  catch {
189
258
  // Telemetry should never break the proxy
@@ -220,15 +289,15 @@ function resolveModelAlias(model) {
220
289
  * Uses Haiku 3.5 for cost optimization, upgrades based on learned rules
221
290
  */
222
291
  const DEFAULT_ROUTING = {
223
- code_generation: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
224
- code_review: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
225
- summarization: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
226
- analysis: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
227
- creative_writing: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
228
- data_extraction: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
229
- translation: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
230
- question_answering: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
231
- general: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
292
+ code_generation: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
293
+ code_review: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
294
+ summarization: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
295
+ analysis: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
296
+ creative_writing: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
297
+ data_extraction: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
298
+ translation: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
299
+ question_answering: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
300
+ general: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
232
301
  };
233
302
  const UNCERTAINTY_PATTERNS = [
234
303
  /i'?m not (entirely |completely |really )?sure/i,
@@ -489,7 +558,6 @@ const DEFAULT_PROXY_CONFIG = {
489
558
  cascade: {
490
559
  enabled: true,
491
560
  models: [
492
- 'claude-haiku-4-5',
493
561
  'claude-sonnet-4-6',
494
562
  'claude-opus-4-6',
495
563
  ],
@@ -498,7 +566,7 @@ const DEFAULT_PROXY_CONFIG = {
498
566
  },
499
567
  complexity: {
500
568
  enabled: true,
501
- simple: 'claude-haiku-4-5',
569
+ simple: 'claude-sonnet-4-6',
502
570
  moderate: 'claude-sonnet-4-6',
503
571
  complex: 'claude-opus-4-6',
504
572
  },
@@ -1895,10 +1963,14 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
1895
1963
  .badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
1896
1964
  .badge.tt-code{background:#1e3a5f;color:#60a5fa}.badge.tt-analysis{background:#3b1f6e;color:#a78bfa}.badge.tt-summarization{background:#1a3a2a;color:#6ee7b7}.badge.tt-qa{background:#3a2f1e;color:#fbbf24}.badge.tt-general{background:#1e293b;color:#94a3b8}
1897
1965
  .badge.cx-simple{background:#052e1633;color:#34d399}.badge.cx-moderate{background:#2d2a0a;color:#fbbf24}.badge.cx-complex{background:#2d0a0a;color:#ef4444}
1966
+ .vstat{display:inline-flex;align-items:center;gap:6px;margin-left:8px;padding:1px 8px;border-radius:999px;border:1px solid #334155;font-size:.72rem}
1967
+ .vstat.current{color:#94a3b8;border-color:#334155;background:#0f172a66}
1968
+ .vstat.outdated{color:#fbbf24;border-color:#f59e0b55;background:#3a2f1e66}
1969
+ .vstat.unavailable{color:#a3a3a3;border-color:#52525b66;background:#18181b66}
1898
1970
  @media(max-width:768px){.col-tt,.col-cx{display:none}}
1899
1971
  .prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
1900
1972
  </style></head><body>
1901
- <div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><a href="/dashboard/config">Config</a> · <span id="ver"></span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
1973
+ <div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><a href="/dashboard/config">Config</a> · <span id="ver"></span><span id="vstat" class="vstat unavailable">Unable to check</span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
1902
1974
  <div class="cards">
1903
1975
  <div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
1904
1976
  <div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
@@ -1926,6 +1998,19 @@ async function load(){
1926
1998
  ]);
1927
1999
  $('ver').textContent='v'+health.version;
1928
2000
  $('uptime').textContent=dur(health.uptime);
2001
+
2002
+ const versionStatus = await fetch('/v1/version-status').then(r=>r.json()).catch(()=>({state:'unavailable', current: health.version, latest: null}));
2003
+ const vEl = $('vstat');
2004
+ if (vEl) {
2005
+ vEl.className = 'vstat ' + (versionStatus.state === 'outdated' ? 'outdated' : versionStatus.state === 'up-to-date' ? 'current' : 'unavailable');
2006
+ if (versionStatus.state === 'outdated') {
2007
+ vEl.textContent = 'Update available · v' + versionStatus.current + ' → v' + versionStatus.latest;
2008
+ } else if (versionStatus.state === 'up-to-date') {
2009
+ vEl.textContent = 'Up to date · v' + versionStatus.current;
2010
+ } else {
2011
+ vEl.textContent = 'Unable to check · v' + versionStatus.current;
2012
+ }
2013
+ }
1929
2014
  const total=stats.summary?.totalEvents||0;
1930
2015
  $('totalReq').textContent=total;
1931
2016
  $('totalCost').textContent='$'+fmt(stats.summary?.totalCostUsd??0,4);
@@ -2046,6 +2131,7 @@ async function startProxy(config = {}) {
2046
2131
  loadHistoryFromDisk();
2047
2132
  // Flush history on shutdown
2048
2133
  const handleShutdown = () => {
2134
+ meshHandle.stop();
2049
2135
  shutdownHistory();
2050
2136
  process.exit(0);
2051
2137
  };
@@ -2054,11 +2140,159 @@ async function startProxy(config = {}) {
2054
2140
  const configPath = getProxyConfigPath();
2055
2141
  let proxyConfig = await loadProxyConfig(configPath, log);
2056
2142
  const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
2143
+ // === Startup config validation (Task 4) ===
2144
+ try {
2145
+ const userConfig = (0, config_js_1.loadConfig)();
2146
+ // Check if config was just created (created_at within 5s of now)
2147
+ const createdAt = new Date(userConfig.created_at).getTime();
2148
+ const now = Date.now();
2149
+ if (Math.abs(now - createdAt) < 5000) {
2150
+ console.warn('[RelayPlane] WARNING: Fresh config detected — previous config may have been deleted');
2151
+ }
2152
+ // Check if credentials exist but config doesn't reference them
2153
+ if ((0, config_js_1.hasValidCredentials)() && !userConfig.api_key) {
2154
+ console.warn('[RelayPlane] WARNING: credentials.json exists but config has no API key reference');
2155
+ }
2156
+ // Auto-enable telemetry for authenticated users
2157
+ if ((0, config_js_1.hasValidCredentials)() && !userConfig.telemetry_enabled) {
2158
+ // Already handled in loadConfig() for fresh configs, but handle existing configs too
2159
+ }
2160
+ // Validate expected fields
2161
+ if (!userConfig.device_id || !userConfig.created_at || userConfig.config_version === undefined) {
2162
+ console.warn('[RelayPlane] WARNING: Config is missing expected fields');
2163
+ }
2164
+ }
2165
+ catch (err) {
2166
+ console.warn(`[RelayPlane] Config validation error: ${err}`);
2167
+ }
2168
+ // Initialize mesh learning layer
2169
+ const meshConfig = (0, config_js_1.getMeshConfig)();
2170
+ const userConfig = (0, config_js_1.loadConfig)();
2171
+ const meshHandle = _meshHandle = (0, index_js_1.initMeshLayer)({
2172
+ enabled: meshConfig.enabled,
2173
+ endpoint: meshConfig.endpoint,
2174
+ sync_interval_ms: meshConfig.sync_interval_ms,
2175
+ contribute: meshConfig.contribute,
2176
+ }, userConfig.api_key);
2177
+ // Initialize budget manager
2178
+ const budgetManager = (0, budget_js_1.getBudgetManager)(proxyConfig.budget);
2179
+ if (proxyConfig.budget?.enabled) {
2180
+ try {
2181
+ budgetManager.init();
2182
+ log('Budget manager initialized');
2183
+ }
2184
+ catch (err) {
2185
+ log(`Budget manager init failed: ${err}`);
2186
+ }
2187
+ }
2188
+ // Initialize anomaly detector
2189
+ const anomalyDetector = (0, anomaly_js_1.getAnomalyDetector)(proxyConfig.anomaly);
2190
+ // Initialize alert manager
2191
+ const alertManager = (0, alerts_js_1.getAlertManager)(proxyConfig.alerts);
2192
+ if (proxyConfig.alerts?.enabled) {
2193
+ try {
2194
+ alertManager.init();
2195
+ log('Alert manager initialized');
2196
+ }
2197
+ catch (err) {
2198
+ log(`Alert manager init failed: ${err}`);
2199
+ }
2200
+ }
2201
+ // Downgrade config
2202
+ let downgradeConfig = {
2203
+ ...downgrade_js_1.DEFAULT_DOWNGRADE_CONFIG,
2204
+ ...(proxyConfig.downgrade ?? {}),
2205
+ };
2206
+ /**
2207
+ * Pre-request budget check + auto-downgrade.
2208
+ * Returns the (possibly downgraded) model and extra response headers.
2209
+ * If the request should be blocked, returns { blocked: true }.
2210
+ */
2211
+ function preRequestBudgetCheck(model, estimatedCost) {
2212
+ const headers = {};
2213
+ let finalModel = model;
2214
+ let downgraded = false;
2215
+ // Budget check
2216
+ const budgetResult = budgetManager.checkBudget(estimatedCost);
2217
+ if (budgetResult.breached) {
2218
+ // Fire breach alert
2219
+ const limit = budgetResult.breachType === 'hourly'
2220
+ ? budgetManager.getConfig().hourlyUsd
2221
+ : budgetManager.getConfig().dailyUsd;
2222
+ const spend = budgetResult.breachType === 'hourly'
2223
+ ? budgetResult.currentHourlySpend
2224
+ : budgetResult.currentDailySpend;
2225
+ alertManager.fireBreach(budgetResult.breachType, spend, limit);
2226
+ if (budgetResult.action === 'block') {
2227
+ return { blocked: true, model: finalModel, headers, downgraded: false };
2228
+ }
2229
+ if (budgetResult.action === 'downgrade') {
2230
+ const dr = (0, downgrade_js_1.checkDowngrade)(finalModel, 100, downgradeConfig);
2231
+ if (dr.downgraded) {
2232
+ finalModel = dr.newModel;
2233
+ downgraded = true;
2234
+ (0, downgrade_js_1.applyDowngradeHeaders)(headers, dr);
2235
+ }
2236
+ }
2237
+ }
2238
+ // Fire threshold alerts
2239
+ for (const threshold of budgetResult.thresholdsCrossed) {
2240
+ alertManager.fireThreshold(threshold, (budgetResult.currentDailySpend / budgetManager.getConfig().dailyUsd) * 100, budgetResult.currentDailySpend, budgetManager.getConfig().dailyUsd);
2241
+ budgetManager.markThresholdFired(threshold);
2242
+ }
2243
+ // Auto-downgrade based on budget percentage (even if not breached)
2244
+ if (!downgraded && downgradeConfig.enabled) {
2245
+ const pct = budgetManager.getConfig().dailyUsd > 0
2246
+ ? (budgetResult.currentDailySpend / budgetManager.getConfig().dailyUsd) * 100
2247
+ : 0;
2248
+ const dr = (0, downgrade_js_1.checkDowngrade)(finalModel, pct, downgradeConfig);
2249
+ if (dr.downgraded) {
2250
+ finalModel = dr.newModel;
2251
+ downgraded = true;
2252
+ (0, downgrade_js_1.applyDowngradeHeaders)(headers, dr);
2253
+ }
2254
+ }
2255
+ return { blocked: false, model: finalModel, headers, downgraded };
2256
+ }
2257
+ /**
2258
+ * Post-request: record spend, run anomaly detection, fire anomaly alerts.
2259
+ */
2260
+ function postRequestRecord(model, tokensIn, tokensOut, costUsd) {
2261
+ // Record spend
2262
+ budgetManager.recordSpend(costUsd, model);
2263
+ // Anomaly detection
2264
+ const anomalyResult = anomalyDetector.recordAndAnalyze({
2265
+ model,
2266
+ tokensIn,
2267
+ tokensOut,
2268
+ costUsd,
2269
+ });
2270
+ if (anomalyResult.detected) {
2271
+ for (const anomaly of anomalyResult.anomalies) {
2272
+ alertManager.fireAnomaly(anomaly);
2273
+ }
2274
+ }
2275
+ }
2276
+ // Initialize response cache
2277
+ const responseCache = (0, response_cache_js_1.getResponseCache)(proxyConfig.cache);
2278
+ if (proxyConfig.cache?.enabled !== false) {
2279
+ try {
2280
+ responseCache.init();
2281
+ log('Response cache initialized');
2282
+ }
2283
+ catch (err) {
2284
+ log(`Response cache init failed: ${err}`);
2285
+ }
2286
+ }
2057
2287
  let configWatcher = null;
2058
2288
  let configReloadTimer = null;
2059
2289
  const reloadConfig = async () => {
2060
2290
  proxyConfig = await loadProxyConfig(configPath, log);
2061
2291
  cooldownManager.updateConfig(getCooldownConfig(proxyConfig));
2292
+ budgetManager.updateConfig({ ...budgetManager.getConfig(), ...(proxyConfig.budget ?? {}) });
2293
+ anomalyDetector.updateConfig({ ...anomalyDetector.getConfig(), ...(proxyConfig.anomaly ?? {}) });
2294
+ alertManager.updateConfig({ ...alertManager.getConfig(), ...(proxyConfig.alerts ?? {}) });
2295
+ downgradeConfig = { ...downgrade_js_1.DEFAULT_DOWNGRADE_CONFIG, ...(proxyConfig.downgrade ?? {}) };
2062
2296
  log(`Reloaded config from ${configPath}`);
2063
2297
  };
2064
2298
  const scheduleConfigReload = () => {
@@ -2083,7 +2317,8 @@ async function startProxy(config = {}) {
2083
2317
  // Initialize RelayPlane
2084
2318
  const relay = new core_1.RelayPlane({ dbPath: config.dbPath });
2085
2319
  // Startup migration: clear default routing rules so complexity config takes priority
2086
- const clearedCount = relay.routing.clearDefaultRules();
2320
+ const clearDefaultRules = relay.routing.clearDefaultRules;
2321
+ const clearedCount = typeof clearDefaultRules === 'function' ? clearDefaultRules.call(relay.routing) : 0;
2087
2322
  if (clearedCount > 0) {
2088
2323
  log(`Cleared ${clearedCount} default routing rules (complexity config takes priority)`);
2089
2324
  }
@@ -2130,6 +2365,13 @@ async function startProxy(config = {}) {
2130
2365
  }));
2131
2366
  return;
2132
2367
  }
2368
+ if (req.method === 'GET' && pathname === '/v1/version-status') {
2369
+ const latest = await getLatestProxyVersion();
2370
+ const status = (0, version_status_js_1.getVersionStatus)(PROXY_VERSION, latest);
2371
+ res.writeHead(200, { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=60' });
2372
+ res.end(JSON.stringify(status));
2373
+ return;
2374
+ }
2133
2375
  // === Control endpoints ===
2134
2376
  if (pathname.startsWith('/control/')) {
2135
2377
  if (req.method === 'POST' && pathname === '/control/enable') {
@@ -2196,6 +2438,36 @@ async function startProxy(config = {}) {
2196
2438
  return;
2197
2439
  }
2198
2440
  }
2441
+ if (req.method === 'POST' && pathname === '/control/kill') {
2442
+ try {
2443
+ const body = await readJsonBody(req);
2444
+ if (body.all) {
2445
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2446
+ res.end(JSON.stringify({
2447
+ killed: 0,
2448
+ sessions: [],
2449
+ note: 'Local proxy mode: session kill not applicable'
2450
+ }));
2451
+ }
2452
+ else if (body.sessionKey) {
2453
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2454
+ res.end(JSON.stringify({
2455
+ killed: 1,
2456
+ sessions: [body.sessionKey],
2457
+ note: 'Rate limits reset for session'
2458
+ }));
2459
+ }
2460
+ else {
2461
+ res.writeHead(400, { 'Content-Type': 'application/json' });
2462
+ res.end(JSON.stringify({ error: 'Provide sessionKey or all=true' }));
2463
+ }
2464
+ }
2465
+ catch {
2466
+ res.writeHead(400, { 'Content-Type': 'application/json' });
2467
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
2468
+ }
2469
+ return;
2470
+ }
2199
2471
  // === Telemetry endpoints for dashboard ===
2200
2472
  if (pathname.startsWith('/v1/telemetry/')) {
2201
2473
  const telemetryPath = pathname.replace('/v1/telemetry/', '');
@@ -2372,6 +2644,24 @@ async function startProxy(config = {}) {
2372
2644
  res.end(getConfigDashboardHTML());
2373
2645
  return;
2374
2646
  }
2647
+ // === Mesh stats endpoint ===
2648
+ if (req.method === 'GET' && pathname === '/v1/mesh/stats') {
2649
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2650
+ res.end(JSON.stringify(meshHandle.getStats()));
2651
+ return;
2652
+ }
2653
+ if (req.method === 'POST' && pathname === '/v1/mesh/sync') {
2654
+ try {
2655
+ const result = await meshHandle.forceSync();
2656
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2657
+ res.end(JSON.stringify({ sync: result }));
2658
+ }
2659
+ catch (err) {
2660
+ res.writeHead(500, { 'Content-Type': 'application/json' });
2661
+ res.end(JSON.stringify({ sync: { error: err.message } }));
2662
+ }
2663
+ return;
2664
+ }
2375
2665
  if (req.method === 'GET' && pathname === '/v1/config') {
2376
2666
  try {
2377
2667
  const raw = await fs.promises.readFile(getProxyConfigPath(), 'utf8');
@@ -2511,6 +2801,48 @@ async function startProxy(config = {}) {
2511
2801
  log(`Config routing.mode=auto: overriding passthrough → auto for model ${requestedModel}`);
2512
2802
  }
2513
2803
  const isStreaming = requestBody['stream'] === true;
2804
+ // ── Response Cache: check for cached response ──
2805
+ const cacheBypass = responseCache.shouldBypass(requestBody);
2806
+ let cacheHash;
2807
+ if (!cacheBypass) {
2808
+ cacheHash = responseCache.computeKey(requestBody);
2809
+ const cached = responseCache.get(cacheHash);
2810
+ if (cached) {
2811
+ try {
2812
+ const cachedData = JSON.parse(cached);
2813
+ const cacheUsage = cachedData?.usage;
2814
+ const cacheCost = (0, telemetry_js_1.estimateCost)(requestBody['model'] ?? '', cacheUsage?.input_tokens ?? 0, cacheUsage?.output_tokens ?? 0);
2815
+ responseCache.recordHit(cacheCost, 0);
2816
+ // Replay cached streaming response as SSE
2817
+ if (isStreaming && cachedData._relayplaneStreamCache) {
2818
+ res.writeHead(200, {
2819
+ 'Content-Type': 'text/event-stream',
2820
+ 'Cache-Control': 'no-cache',
2821
+ 'Connection': 'keep-alive',
2822
+ 'X-RelayPlane-Cache': 'HIT',
2823
+ });
2824
+ res.end(cachedData.ssePayload);
2825
+ }
2826
+ else {
2827
+ res.writeHead(200, {
2828
+ 'Content-Type': 'application/json',
2829
+ 'X-RelayPlane-Cache': 'HIT',
2830
+ });
2831
+ res.end(cached);
2832
+ }
2833
+ log(`Cache HIT for ${requestBody['model']} (hash: ${cacheHash.slice(0, 8)})`);
2834
+ return;
2835
+ }
2836
+ catch {
2837
+ // Corrupt cache entry, continue to provider
2838
+ }
2839
+ }
2840
+ responseCache.recordMiss();
2841
+ }
2842
+ else {
2843
+ responseCache.recordBypass();
2844
+ }
2845
+ // ── End cache check ──
2514
2846
  const messages = Array.isArray(requestBody['messages'])
2515
2847
  ? requestBody['messages']
2516
2848
  : [];
@@ -2619,6 +2951,47 @@ async function startProxy(config = {}) {
2619
2951
  res.end(JSON.stringify({ error: `Provider ${targetProvider} is temporarily cooled down` }));
2620
2952
  return;
2621
2953
  }
2954
+ // ── Budget check + auto-downgrade ──
2955
+ const budgetExtraHeaders = {};
2956
+ {
2957
+ const budgetCheck = preRequestBudgetCheck(targetModel || requestedModel);
2958
+ if (budgetCheck.blocked) {
2959
+ res.writeHead(429, { 'Content-Type': 'application/json' });
2960
+ res.end(JSON.stringify({
2961
+ error: 'Budget limit exceeded. Request blocked.',
2962
+ type: 'budget_exceeded',
2963
+ }));
2964
+ return;
2965
+ }
2966
+ if (budgetCheck.downgraded) {
2967
+ log(`Budget downgrade: ${targetModel || requestedModel} → ${budgetCheck.model}`);
2968
+ targetModel = budgetCheck.model;
2969
+ if (requestBody)
2970
+ requestBody['model'] = targetModel;
2971
+ }
2972
+ Object.assign(budgetExtraHeaders, budgetCheck.headers);
2973
+ }
2974
+ // ── End budget check ──
2975
+ // ── Rate limit check ──
2976
+ const workspaceId = 'local'; // Local proxy uses single workspace
2977
+ const rateLimit = (0, rate_limiter_js_1.checkLimit)(workspaceId, targetModel);
2978
+ if (!rateLimit.allowed) {
2979
+ console.error(`[RATE LIMIT] ${targetModel} limit reached for workspace: ${workspaceId}`);
2980
+ res.writeHead(429, {
2981
+ 'Content-Type': 'application/json',
2982
+ 'Retry-After': String(rateLimit.retryAfter || 60),
2983
+ 'X-RelayPlane-RateLimit-Limit': String(rateLimit.limit),
2984
+ 'X-RelayPlane-RateLimit-Remaining': '0',
2985
+ 'X-RelayPlane-RateLimit-Reset': String(Math.ceil(rateLimit.resetAt / 1000))
2986
+ });
2987
+ res.end(JSON.stringify({
2988
+ error: `Rate limit exceeded for ${targetModel}. Max ${rateLimit.limit} requests per minute.`,
2989
+ type: 'rate_limit_exceeded',
2990
+ retry_after: rateLimit.retryAfter || 60
2991
+ }));
2992
+ return;
2993
+ }
2994
+ // ── End rate limit check ──
2622
2995
  const startTime = Date.now();
2623
2996
  let nativeResponseData;
2624
2997
  try {
@@ -2688,11 +3061,16 @@ async function startProxy(config = {}) {
2688
3061
  'Content-Type': 'text/event-stream',
2689
3062
  'Cache-Control': 'no-cache',
2690
3063
  'Connection': 'keep-alive',
3064
+ 'X-RelayPlane-Cache': cacheBypass ? 'BYPASS' : 'MISS',
2691
3065
  ...nativeStreamRpHeaders,
2692
3066
  });
2693
3067
  const reader = providerResponse.body?.getReader();
2694
3068
  let streamTokensIn = 0;
2695
3069
  let streamTokensOut = 0;
3070
+ let streamCacheCreation = 0;
3071
+ let streamCacheRead = 0;
3072
+ // Buffer raw SSE chunks for cache storage
3073
+ const rawChunks = [];
2696
3074
  if (reader) {
2697
3075
  const decoder = new TextDecoder();
2698
3076
  let sseBuffer = '';
@@ -2703,6 +3081,8 @@ async function startProxy(config = {}) {
2703
3081
  break;
2704
3082
  const chunk = decoder.decode(value, { stream: true });
2705
3083
  res.write(chunk);
3084
+ if (cacheHash && !cacheBypass)
3085
+ rawChunks.push(chunk);
2706
3086
  // Parse SSE events to extract usage from message_delta / message_stop
2707
3087
  sseBuffer += chunk;
2708
3088
  const lines = sseBuffer.split('\n');
@@ -2715,9 +3095,11 @@ async function startProxy(config = {}) {
2715
3095
  if (evt.type === 'message_delta' && evt.usage) {
2716
3096
  streamTokensOut = evt.usage.output_tokens ?? streamTokensOut;
2717
3097
  }
2718
- // Anthropic: message_start has usage.input_tokens
3098
+ // Anthropic: message_start has usage.input_tokens + cache tokens
2719
3099
  if (evt.type === 'message_start' && evt.message?.usage) {
2720
3100
  streamTokensIn = evt.message.usage.input_tokens ?? streamTokensIn;
3101
+ streamCacheCreation = evt.message.usage.cache_creation_input_tokens ?? 0;
3102
+ streamCacheRead = evt.message.usage.cache_read_input_tokens ?? 0;
2721
3103
  }
2722
3104
  // OpenAI format: choices with usage
2723
3105
  if (evt.usage) {
@@ -2736,15 +3118,45 @@ async function startProxy(config = {}) {
2736
3118
  reader.releaseLock();
2737
3119
  }
2738
3120
  }
3121
+ // ── Cache: store streaming response as raw SSE payload ──
3122
+ if (cacheHash && !cacheBypass && rawChunks.length > 0) {
3123
+ const streamPayload = JSON.stringify({
3124
+ _relayplaneStreamCache: true,
3125
+ ssePayload: rawChunks.join(''),
3126
+ usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut, cache_creation_input_tokens: streamCacheCreation, cache_read_input_tokens: streamCacheRead },
3127
+ });
3128
+ responseCache.set(cacheHash, streamPayload, {
3129
+ model: targetModel || requestedModel,
3130
+ tokensIn: streamTokensIn,
3131
+ tokensOut: streamTokensOut,
3132
+ costUsd: (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, streamTokensIn, streamTokensOut, streamCacheCreation || undefined, streamCacheRead || undefined),
3133
+ taskType,
3134
+ });
3135
+ log(`Cache STORE (stream) for ${targetModel || requestedModel} (hash: ${cacheHash.slice(0, 8)})`);
3136
+ }
2739
3137
  // Store streaming token counts so telemetry can use them
2740
- nativeResponseData = { usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut } };
3138
+ nativeResponseData = { usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut, cache_creation_input_tokens: streamCacheCreation, cache_read_input_tokens: streamCacheRead } };
2741
3139
  res.end();
2742
3140
  }
2743
3141
  else {
2744
3142
  nativeResponseData = await providerResponse.json();
2745
3143
  const nativeRespModel = checkResponseModelMismatch(nativeResponseData, targetModel || requestedModel, targetProvider, log);
2746
3144
  const nativeRpHeaders = buildRelayPlaneResponseHeaders(targetModel || requestedModel, originalModel ?? 'unknown', complexity, targetProvider, routingMode);
2747
- res.writeHead(providerResponse.status, { 'Content-Type': 'application/json', ...nativeRpHeaders });
3145
+ // ── Cache: store non-streaming response ──
3146
+ const nativeCacheHeader = cacheBypass ? 'BYPASS' : 'MISS';
3147
+ if (cacheHash && !cacheBypass) {
3148
+ const nativeRespJson = JSON.stringify(nativeResponseData);
3149
+ const nativeUsage = nativeResponseData?.usage;
3150
+ responseCache.set(cacheHash, nativeRespJson, {
3151
+ model: targetModel || requestedModel,
3152
+ tokensIn: nativeUsage?.input_tokens ?? 0,
3153
+ tokensOut: nativeUsage?.output_tokens ?? 0,
3154
+ costUsd: (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeUsage?.input_tokens ?? 0, nativeUsage?.output_tokens ?? 0),
3155
+ taskType,
3156
+ });
3157
+ log(`Cache STORE for ${targetModel || requestedModel} (hash: ${cacheHash.slice(0, 8)})`);
3158
+ }
3159
+ res.writeHead(providerResponse.status, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': nativeCacheHeader, ...nativeRpHeaders });
2748
3160
  res.end(JSON.stringify(nativeResponseData));
2749
3161
  }
2750
3162
  }
@@ -2754,9 +3166,17 @@ async function startProxy(config = {}) {
2754
3166
  // nativeResponseData holds response JSON for non-streaming, or { usage: { input_tokens, output_tokens } }
2755
3167
  // synthesised from SSE events for streaming
2756
3168
  const nativeUsageData = nativeResponseData?.usage;
2757
- const nativeTokIn = nativeUsageData?.input_tokens ?? nativeUsageData?.prompt_tokens ?? 0;
3169
+ const nativeBaseTokIn = nativeUsageData?.input_tokens ?? nativeUsageData?.prompt_tokens ?? 0;
2758
3170
  const nativeTokOut = nativeUsageData?.output_tokens ?? nativeUsageData?.completion_tokens ?? 0;
2759
- updateLastHistoryEntry(nativeTokIn, nativeTokOut, (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut));
3171
+ const nativeCacheCreation = nativeUsageData?.cache_creation_input_tokens ?? 0;
3172
+ const nativeCacheRead = nativeUsageData?.cache_read_input_tokens ?? 0;
3173
+ // Include cache tokens in displayed/recorded token count
3174
+ const nativeTokIn = nativeBaseTokIn + nativeCacheCreation + nativeCacheRead;
3175
+ // Cost calculation expects inputTokens to include cache tokens when cache params are provided
3176
+ const nativeCostUsd = (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCacheCreation || undefined, nativeCacheRead || undefined);
3177
+ updateLastHistoryEntry(nativeTokIn, nativeTokOut, nativeCostUsd);
3178
+ // ── Post-request: budget spend + anomaly detection ──
3179
+ postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
2760
3180
  if (recordTelemetry) {
2761
3181
  relay
2762
3182
  .run({
@@ -2765,7 +3185,8 @@ async function startProxy(config = {}) {
2765
3185
  model: `${targetProvider}:${targetModel || requestedModel}`,
2766
3186
  })
2767
3187
  .catch(() => { });
2768
- sendCloudTelemetry(taskType, targetModel || requestedModel, nativeTokIn, nativeTokOut, durationMs, true, undefined, originalModel ?? undefined);
3188
+ sendCloudTelemetry(taskType, targetModel || requestedModel, nativeTokIn, nativeTokOut, durationMs, true, undefined, originalModel ?? undefined, nativeCacheCreation || undefined, nativeCacheRead || undefined);
3189
+ meshCapture(targetModel || requestedModel, targetProvider, taskType, nativeTokIn, nativeTokOut, (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCacheCreation || undefined, nativeCacheRead || undefined), durationMs, true);
2769
3190
  }
2770
3191
  }
2771
3192
  catch (err) {
@@ -2847,6 +3268,47 @@ async function startProxy(config = {}) {
2847
3268
  return;
2848
3269
  }
2849
3270
  const isStreaming = request.stream === true;
3271
+ // ── Response Cache: check for cached response (chat/completions) ──
3272
+ const chatCacheBypass = responseCache.shouldBypass(request);
3273
+ let chatCacheHash;
3274
+ if (!chatCacheBypass) {
3275
+ chatCacheHash = responseCache.computeKey(request);
3276
+ const chatCached = responseCache.get(chatCacheHash);
3277
+ if (chatCached) {
3278
+ try {
3279
+ const chatCachedData = JSON.parse(chatCached);
3280
+ const chatCacheUsage = chatCachedData?.usage;
3281
+ const chatCacheCost = (0, telemetry_js_1.estimateCost)(request.model ?? '', chatCacheUsage?.prompt_tokens ?? chatCacheUsage?.input_tokens ?? 0, chatCacheUsage?.completion_tokens ?? chatCacheUsage?.output_tokens ?? 0);
3282
+ responseCache.recordHit(chatCacheCost, 0);
3283
+ if (isStreaming && chatCachedData._relayplaneStreamCache) {
3284
+ res.writeHead(200, {
3285
+ 'Content-Type': 'text/event-stream',
3286
+ 'Cache-Control': 'no-cache',
3287
+ 'Connection': 'keep-alive',
3288
+ 'X-RelayPlane-Cache': 'HIT',
3289
+ });
3290
+ res.end(chatCachedData.ssePayload);
3291
+ }
3292
+ else {
3293
+ res.writeHead(200, {
3294
+ 'Content-Type': 'application/json',
3295
+ 'X-RelayPlane-Cache': 'HIT',
3296
+ });
3297
+ res.end(chatCached);
3298
+ }
3299
+ log(`Cache HIT for chat/completions ${request.model} (hash: ${chatCacheHash.slice(0, 8)})`);
3300
+ return;
3301
+ }
3302
+ catch {
3303
+ // Corrupt, continue
3304
+ }
3305
+ }
3306
+ responseCache.recordMiss();
3307
+ }
3308
+ else {
3309
+ responseCache.recordBypass();
3310
+ }
3311
+ // ── End cache check ──
2850
3312
  const bypassRouting = !relayplaneEnabled || relayplaneBypass;
2851
3313
  // Extract routing mode from model name
2852
3314
  const originalRequestedModel = request.model;
@@ -3065,10 +3527,48 @@ async function startProxy(config = {}) {
3065
3527
  }
3066
3528
  apiKey = apiKeyResult.apiKey;
3067
3529
  }
3530
+ // ── Budget check + auto-downgrade (chat/completions) ──
3531
+ {
3532
+ const chatBudgetCheck = preRequestBudgetCheck(targetModel);
3533
+ if (chatBudgetCheck.blocked) {
3534
+ res.writeHead(429, { 'Content-Type': 'application/json' });
3535
+ res.end(JSON.stringify({
3536
+ error: 'Budget limit exceeded. Request blocked.',
3537
+ type: 'budget_exceeded',
3538
+ }));
3539
+ return;
3540
+ }
3541
+ if (chatBudgetCheck.downgraded) {
3542
+ log(`Budget downgrade: ${targetModel} → ${chatBudgetCheck.model}`);
3543
+ targetModel = chatBudgetCheck.model;
3544
+ request.model = targetModel;
3545
+ }
3546
+ }
3547
+ // ── End budget check ──
3548
+ // ── Rate limit check ──
3549
+ const chatWorkspaceId = 'local'; // Local proxy uses single workspace
3550
+ const chatRateLimit = (0, rate_limiter_js_1.checkLimit)(chatWorkspaceId, targetModel);
3551
+ if (!chatRateLimit.allowed) {
3552
+ console.error(`[RATE LIMIT] ${targetModel} limit reached for workspace: ${chatWorkspaceId}`);
3553
+ res.writeHead(429, {
3554
+ 'Content-Type': 'application/json',
3555
+ 'Retry-After': String(chatRateLimit.retryAfter || 60),
3556
+ 'X-RelayPlane-RateLimit-Limit': String(chatRateLimit.limit),
3557
+ 'X-RelayPlane-RateLimit-Remaining': '0',
3558
+ 'X-RelayPlane-RateLimit-Reset': String(Math.ceil(chatRateLimit.resetAt / 1000))
3559
+ });
3560
+ res.end(JSON.stringify({
3561
+ error: `Rate limit exceeded for ${targetModel}. Max ${chatRateLimit.limit} requests per minute.`,
3562
+ type: 'rate_limit_exceeded',
3563
+ retry_after: chatRateLimit.retryAfter || 60
3564
+ }));
3565
+ return;
3566
+ }
3567
+ // ── End rate limit check ──
3068
3568
  const startTime = Date.now();
3069
3569
  // Handle streaming vs non-streaming
3070
3570
  if (isStreaming) {
3071
- await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
3571
+ await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass);
3072
3572
  }
3073
3573
  else {
3074
3574
  if (useCascade && cascadeConfig) {
@@ -3129,6 +3629,7 @@ async function startProxy(config = {}) {
3129
3629
  log(`Failed to record run: ${err}`);
3130
3630
  }
3131
3631
  sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
3632
+ meshCapture(cascadeResult.model, cascadeResult.provider, taskType, cascadeTokensIn, cascadeTokensOut, cascadeCost, durationMs, true);
3132
3633
  }
3133
3634
  const chatCascadeRpHeaders = buildRelayPlaneResponseHeaders(cascadeResult.model, originalRequestedModel ?? 'unknown', complexity, cascadeResult.provider, 'cascade');
3134
3635
  res.writeHead(200, { 'Content-Type': 'application/json', ...chatCascadeRpHeaders });
@@ -3152,6 +3653,74 @@ async function startProxy(config = {}) {
3152
3653
  }
3153
3654
  }
3154
3655
  });
3656
+ // ── Health Watchdog ──
3657
+ let watchdogFailures = 0;
3658
+ const WATCHDOG_MAX_FAILURES = 3;
3659
+ const WATCHDOG_INTERVAL_MS = 15_000; // Must be < WatchdogSec (30s) to avoid false kills
3660
+ let watchdogTimer = null;
3661
+ /**
3662
+ * sd_notify: write to $NOTIFY_SOCKET for systemd watchdog integration
3663
+ */
3664
+ function sdNotify(state) {
3665
+ const notifySocket = process.env['NOTIFY_SOCKET'];
3666
+ if (!notifySocket)
3667
+ return;
3668
+ try {
3669
+ const dgram = require('node:dgram');
3670
+ const client = dgram.createSocket('unix_dgram');
3671
+ const buf = Buffer.from(state);
3672
+ client.send(buf, 0, buf.length, notifySocket, () => {
3673
+ client.close();
3674
+ });
3675
+ }
3676
+ catch (err) {
3677
+ log(`sd_notify error: ${err}`);
3678
+ }
3679
+ }
3680
+ function startWatchdog() {
3681
+ // Notify systemd we're ready
3682
+ sdNotify('READY=1');
3683
+ watchdogTimer = setInterval(async () => {
3684
+ try {
3685
+ const controller = new AbortController();
3686
+ const timeout = setTimeout(() => controller.abort(), 5000);
3687
+ const res = await fetch(`http://${host}:${port}/health`, { signal: controller.signal });
3688
+ clearTimeout(timeout);
3689
+ if (res.ok) {
3690
+ watchdogFailures = 0;
3691
+ // Notify systemd watchdog we're alive
3692
+ sdNotify('WATCHDOG=1');
3693
+ }
3694
+ else {
3695
+ watchdogFailures++;
3696
+ console.error(`[RelayPlane] Watchdog: health check returned ${res.status} (failure ${watchdogFailures}/${WATCHDOG_MAX_FAILURES})`);
3697
+ }
3698
+ }
3699
+ catch (err) {
3700
+ watchdogFailures++;
3701
+ console.error(`[RelayPlane] Watchdog: health check failed (failure ${watchdogFailures}/${WATCHDOG_MAX_FAILURES}): ${err}`);
3702
+ }
3703
+ if (watchdogFailures >= WATCHDOG_MAX_FAILURES) {
3704
+ console.error('[RelayPlane] CRITICAL: 3 consecutive watchdog failures. Attempting graceful restart...');
3705
+ sdNotify('STOPPING=1');
3706
+ // Close server and exit — systemd Restart=always will restart us
3707
+ server.close(() => {
3708
+ process.exit(1);
3709
+ });
3710
+ // Force exit after 10s if graceful close hangs
3711
+ setTimeout(() => process.exit(1), 10_000).unref();
3712
+ }
3713
+ }, WATCHDOG_INTERVAL_MS);
3714
+ watchdogTimer.unref();
3715
+ }
3716
+ // Clean up watchdog on shutdown
3717
+ const origHandleShutdown = () => {
3718
+ if (watchdogTimer)
3719
+ clearInterval(watchdogTimer);
3720
+ sdNotify('STOPPING=1');
3721
+ };
3722
+ process.on('SIGINT', origHandleShutdown);
3723
+ process.on('SIGTERM', origHandleShutdown);
3155
3724
  return new Promise((resolve, reject) => {
3156
3725
  server.on('error', reject);
3157
3726
  server.listen(port, host, () => {
@@ -3164,6 +3733,8 @@ async function startProxy(config = {}) {
3164
3733
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:fast, relayplane:quality`);
3165
3734
  console.log(` Auth: Passthrough for Anthropic, env vars for other providers`);
3166
3735
  console.log(` Streaming: ✅ Enabled`);
3736
+ startWatchdog();
3737
+ log('Health watchdog started (30s interval, sd_notify enabled)');
3167
3738
  resolve(server);
3168
3739
  });
3169
3740
  });
@@ -3221,7 +3792,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
3221
3792
  }
3222
3793
  return { responseData, ok: true, status: 200 };
3223
3794
  }
3224
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
3795
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass) {
3225
3796
  let providerResponse;
3226
3797
  try {
3227
3798
  switch (targetProvider) {
@@ -3277,6 +3848,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3277
3848
  // Track token usage from streaming events
3278
3849
  let streamTokensIn = 0;
3279
3850
  let streamTokensOut = 0;
3851
+ const shouldCacheStream = !!(cacheHash && !cacheBypass);
3852
+ const rawChunks = [];
3280
3853
  try {
3281
3854
  // Stream the response based on provider format
3282
3855
  switch (targetProvider) {
@@ -3284,6 +3857,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3284
3857
  // Convert Anthropic stream to OpenAI format
3285
3858
  for await (const chunk of convertAnthropicStream(providerResponse, targetModel)) {
3286
3859
  res.write(chunk);
3860
+ if (shouldCacheStream)
3861
+ rawChunks.push(chunk);
3287
3862
  // Parse OpenAI-format chunks for usage (emitted at end of stream)
3288
3863
  try {
3289
3864
  const lines = chunk.split('\n');
@@ -3304,6 +3879,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3304
3879
  // Convert Gemini stream to OpenAI format
3305
3880
  for await (const chunk of convertGeminiStream(providerResponse, targetModel)) {
3306
3881
  res.write(chunk);
3882
+ if (shouldCacheStream)
3883
+ rawChunks.push(chunk);
3307
3884
  try {
3308
3885
  const lines = chunk.split('\n');
3309
3886
  for (const line of lines) {
@@ -3323,6 +3900,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3323
3900
  // xAI, OpenRouter, DeepSeek, Groq, OpenAI all use OpenAI-compatible streaming format
3324
3901
  for await (const chunk of pipeOpenAIStream(providerResponse)) {
3325
3902
  res.write(chunk);
3903
+ if (shouldCacheStream)
3904
+ rawChunks.push(chunk);
3326
3905
  try {
3327
3906
  const lines = chunk.split('\n');
3328
3907
  for (const line of lines) {
@@ -3342,6 +3921,23 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3342
3921
  catch (err) {
3343
3922
  log(`Streaming error: ${err}`);
3344
3923
  }
3924
+ // ── Cache: store streaming response ──
3925
+ if (shouldCacheStream && cacheHash && rawChunks.length > 0) {
3926
+ const responseCache = (0, response_cache_js_1.getResponseCache)();
3927
+ const streamPayload = JSON.stringify({
3928
+ _relayplaneStreamCache: true,
3929
+ ssePayload: rawChunks.join(''),
3930
+ usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut, prompt_tokens: streamTokensIn, completion_tokens: streamTokensOut },
3931
+ });
3932
+ responseCache.set(cacheHash, streamPayload, {
3933
+ model: targetModel,
3934
+ tokensIn: streamTokensIn,
3935
+ tokensOut: streamTokensOut,
3936
+ costUsd: (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut),
3937
+ taskType,
3938
+ });
3939
+ log(`Cache STORE (stream) for chat/completions ${targetModel} (hash: ${cacheHash.slice(0, 8)})`);
3940
+ }
3345
3941
  if (cooldownsEnabled) {
3346
3942
  cooldownManager.recordSuccess(targetProvider);
3347
3943
  }
@@ -3351,6 +3947,17 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3351
3947
  // Update token/cost info on the history entry
3352
3948
  const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
3353
3949
  updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
3950
+ // ── Post-request: budget spend + anomaly detection ──
3951
+ try {
3952
+ (0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
3953
+ const anomalyResult = (0, anomaly_js_1.getAnomalyDetector)().recordAndAnalyze({ model: targetModel, tokensIn: streamTokensIn, tokensOut: streamTokensOut, costUsd: streamCost });
3954
+ if (anomalyResult.detected) {
3955
+ for (const anomaly of anomalyResult.anomalies) {
3956
+ (0, alerts_js_1.getAlertManager)().fireAnomaly(anomaly);
3957
+ }
3958
+ }
3959
+ }
3960
+ catch { /* budget/anomaly should never block */ }
3354
3961
  if (recordTelemetry) {
3355
3962
  // Record the run (non-blocking)
3356
3963
  relay
@@ -3366,6 +3973,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3366
3973
  log(`Failed to record run: ${err}`);
3367
3974
  });
3368
3975
  sendCloudTelemetry(taskType, targetModel, streamTokensIn, streamTokensOut, durationMs, true, undefined, request.model ?? undefined);
3976
+ meshCapture(targetModel, targetProvider, taskType, streamTokensIn, streamTokensOut, streamCost, durationMs, true);
3369
3977
  }
3370
3978
  res.end();
3371
3979
  }
@@ -3413,6 +4021,17 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
3413
4021
  const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
3414
4022
  const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut);
3415
4023
  updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel);
4024
+ // ── Post-request: budget spend + anomaly detection ──
4025
+ try {
4026
+ (0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);
4027
+ const anomalyResult = (0, anomaly_js_1.getAnomalyDetector)().recordAndAnalyze({ model: targetModel, tokensIn, tokensOut, costUsd: cost });
4028
+ if (anomalyResult.detected) {
4029
+ for (const anomaly of anomalyResult.anomalies) {
4030
+ (0, alerts_js_1.getAlertManager)().fireAnomaly(anomaly);
4031
+ }
4032
+ }
4033
+ }
4034
+ catch { /* budget/anomaly should never block */ }
3416
4035
  if (recordTelemetry) {
3417
4036
  // Record the run in RelayPlane
3418
4037
  try {
@@ -3440,10 +4059,27 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
3440
4059
  const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
3441
4060
  const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
3442
4061
  sendCloudTelemetry(taskType, targetModel, tokensIn, tokensOut, durationMs, true);
4062
+ meshCapture(targetModel, targetProvider, taskType, tokensIn, tokensOut, cost, durationMs, true);
4063
+ }
4064
+ // ── Cache: store non-streaming chat/completions response ──
4065
+ const chatRespCache = (0, response_cache_js_1.getResponseCache)();
4066
+ const chatReqAsRecord = request;
4067
+ const chatCacheBypassLocal = chatRespCache.shouldBypass(chatReqAsRecord);
4068
+ let chatCacheHeaderVal = chatCacheBypassLocal ? 'BYPASS' : 'MISS';
4069
+ if (!chatCacheBypassLocal) {
4070
+ const chatHashLocal = chatRespCache.computeKey(chatReqAsRecord);
4071
+ chatRespCache.set(chatHashLocal, JSON.stringify(responseData), {
4072
+ model: targetModel,
4073
+ tokensIn: tokensIn,
4074
+ tokensOut: tokensOut,
4075
+ costUsd: cost,
4076
+ taskType,
4077
+ });
4078
+ log(`Cache STORE for chat/completions ${targetModel} (hash: ${chatHashLocal.slice(0, 8)})`);
3443
4079
  }
3444
4080
  // Send response with RelayPlane routing headers
3445
4081
  const nonStreamRpHeaders = buildRelayPlaneResponseHeaders(targetModel, request.model ?? 'unknown', complexity, targetProvider, routingMode);
3446
- res.writeHead(200, { 'Content-Type': 'application/json', ...nonStreamRpHeaders });
4082
+ res.writeHead(200, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': chatCacheHeaderVal, ...nonStreamRpHeaders });
3447
4083
  res.end(JSON.stringify(responseData));
3448
4084
  }
3449
4085
  // Note: CLI entry point is in cli.ts