@relayplane/proxy 1.5.46 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +251 -15
- package/assets/relayplane-proxy.service +20 -0
- package/dist/alerts.d.ts +72 -0
- package/dist/alerts.d.ts.map +1 -0
- package/dist/alerts.js +290 -0
- package/dist/alerts.js.map +1 -0
- package/dist/anomaly.d.ts +65 -0
- package/dist/anomaly.d.ts.map +1 -0
- package/dist/anomaly.js +193 -0
- package/dist/anomaly.js.map +1 -0
- package/dist/budget.d.ts +98 -0
- package/dist/budget.d.ts.map +1 -0
- package/dist/budget.js +356 -0
- package/dist/budget.js.map +1 -0
- package/dist/cli.js +512 -93
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +122 -24
- package/dist/config.js.map +1 -1
- package/dist/downgrade.d.ts +37 -0
- package/dist/downgrade.d.ts.map +1 -0
- package/dist/downgrade.js +79 -0
- package/dist/downgrade.js.map +1 -0
- package/dist/mesh/capture.d.ts +11 -0
- package/dist/mesh/capture.d.ts.map +1 -0
- package/dist/mesh/capture.js +43 -0
- package/dist/mesh/capture.js.map +1 -0
- package/dist/mesh/fitness.d.ts +14 -0
- package/dist/mesh/fitness.d.ts.map +1 -0
- package/dist/mesh/fitness.js +40 -0
- package/dist/mesh/fitness.js.map +1 -0
- package/dist/mesh/index.d.ts +39 -0
- package/dist/mesh/index.d.ts.map +1 -0
- package/dist/mesh/index.js +118 -0
- package/dist/mesh/index.js.map +1 -0
- package/dist/mesh/store.d.ts +30 -0
- package/dist/mesh/store.d.ts.map +1 -0
- package/dist/mesh/store.js +174 -0
- package/dist/mesh/store.js.map +1 -0
- package/dist/mesh/sync.d.ts +37 -0
- package/dist/mesh/sync.d.ts.map +1 -0
- package/dist/mesh/sync.js +154 -0
- package/dist/mesh/sync.js.map +1 -0
- package/dist/mesh/types.d.ts +57 -0
- package/dist/mesh/types.d.ts.map +1 -0
- package/dist/mesh/types.js +7 -0
- package/dist/mesh/types.js.map +1 -0
- package/dist/rate-limiter.d.ts +64 -0
- package/dist/rate-limiter.d.ts.map +1 -0
- package/dist/rate-limiter.js +159 -0
- package/dist/rate-limiter.js.map +1 -0
- package/dist/relay-config.d.ts +9 -0
- package/dist/relay-config.d.ts.map +1 -1
- package/dist/relay-config.js +2 -0
- package/dist/relay-config.js.map +1 -1
- package/dist/response-cache.d.ts +139 -0
- package/dist/response-cache.d.ts.map +1 -0
- package/dist/response-cache.js +515 -0
- package/dist/response-cache.js.map +1 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +5 -1
- package/dist/server.js.map +1 -1
- package/dist/standalone-proxy.d.ts +2 -1
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +662 -26
- package/dist/standalone-proxy.js.map +1 -1
- package/dist/telemetry.d.ts.map +1 -1
- package/dist/telemetry.js +8 -5
- package/dist/telemetry.js.map +1 -1
- package/dist/utils/model-suggestions.d.ts.map +1 -1
- package/dist/utils/model-suggestions.js +19 -2
- package/dist/utils/model-suggestions.js.map +1 -1
- package/dist/utils/version-status.d.ts +9 -0
- package/dist/utils/version-status.d.ts.map +1 -0
- package/dist/utils/version-status.js +28 -0
- package/dist/utils/version-status.js.map +1 -0
- package/package.json +7 -3
package/dist/standalone-proxy.js
CHANGED
|
@@ -67,7 +67,16 @@ const path = __importStar(require("node:path"));
|
|
|
67
67
|
const core_1 = require("@relayplane/core");
|
|
68
68
|
const model_suggestions_js_1 = require("./utils/model-suggestions.js");
|
|
69
69
|
const telemetry_js_1 = require("./telemetry.js");
|
|
70
|
+
const config_js_1 = require("./config.js");
|
|
71
|
+
const index_js_1 = require("./mesh/index.js");
|
|
72
|
+
const response_cache_js_1 = require("./response-cache.js");
|
|
70
73
|
const stats_js_1 = require("./stats.js");
|
|
74
|
+
const rate_limiter_js_1 = require("./rate-limiter.js");
|
|
75
|
+
const budget_js_1 = require("./budget.js");
|
|
76
|
+
const anomaly_js_1 = require("./anomaly.js");
|
|
77
|
+
const alerts_js_1 = require("./alerts.js");
|
|
78
|
+
const downgrade_js_1 = require("./downgrade.js");
|
|
79
|
+
const version_status_js_1 = require("./utils/version-status.js");
|
|
71
80
|
const PROXY_VERSION = (() => {
|
|
72
81
|
try {
|
|
73
82
|
const pkgPath = path.join(__dirname, '..', 'package.json');
|
|
@@ -77,8 +86,54 @@ const PROXY_VERSION = (() => {
|
|
|
77
86
|
return '0.0.0';
|
|
78
87
|
}
|
|
79
88
|
})();
|
|
89
|
+
let latestProxyVersionCache = { value: null, checkedAt: 0 };
|
|
90
|
+
const LATEST_PROXY_VERSION_TTL_MS = 30 * 60 * 1000;
|
|
91
|
+
async function getLatestProxyVersion() {
|
|
92
|
+
const now = Date.now();
|
|
93
|
+
if (now - latestProxyVersionCache.checkedAt < LATEST_PROXY_VERSION_TTL_MS) {
|
|
94
|
+
return latestProxyVersionCache.value;
|
|
95
|
+
}
|
|
96
|
+
try {
|
|
97
|
+
const controller = new AbortController();
|
|
98
|
+
const timeout = setTimeout(() => controller.abort(), 2500);
|
|
99
|
+
const res = await fetch('https://registry.npmjs.org/@relayplane/proxy/latest', {
|
|
100
|
+
signal: controller.signal,
|
|
101
|
+
headers: { Accept: 'application/json' },
|
|
102
|
+
});
|
|
103
|
+
clearTimeout(timeout);
|
|
104
|
+
if (!res.ok) {
|
|
105
|
+
latestProxyVersionCache = { value: null, checkedAt: now };
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
const data = await res.json();
|
|
109
|
+
const latest = data.version ?? null;
|
|
110
|
+
latestProxyVersionCache = { value: latest, checkedAt: now };
|
|
111
|
+
return latest;
|
|
112
|
+
}
|
|
113
|
+
catch {
|
|
114
|
+
latestProxyVersionCache = { value: null, checkedAt: now };
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
80
118
|
/** Shared stats collector instance for the proxy server */
|
|
81
119
|
exports.proxyStatsCollector = new stats_js_1.StatsCollector();
|
|
120
|
+
/** Shared mesh handle — set during startProxy() */
|
|
121
|
+
let _meshHandle = null;
|
|
122
|
+
/** Capture a request into the mesh (fire-and-forget, never blocks) */
|
|
123
|
+
function meshCapture(model, provider, taskType, tokensIn, tokensOut, costUsd, latencyMs, success, errorType) {
|
|
124
|
+
if (!_meshHandle)
|
|
125
|
+
return;
|
|
126
|
+
try {
|
|
127
|
+
_meshHandle.captureRequest({
|
|
128
|
+
model, provider, task_type: taskType,
|
|
129
|
+
input_tokens: tokensIn, output_tokens: tokensOut,
|
|
130
|
+
cost_usd: costUsd, latency_ms: latencyMs,
|
|
131
|
+
success, error_type: errorType,
|
|
132
|
+
timestamp: new Date().toISOString(),
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
catch { }
|
|
136
|
+
}
|
|
82
137
|
/**
|
|
83
138
|
* Default provider endpoints
|
|
84
139
|
*/
|
|
@@ -171,10 +226,10 @@ exports.SMART_ALIASES = {
|
|
|
171
226
|
* Send a telemetry event to the cloud (anonymous or authenticated).
|
|
172
227
|
* Non-blocking — errors are silently swallowed.
|
|
173
228
|
*/
|
|
174
|
-
function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, success, costUsd, requestedModel) {
|
|
229
|
+
function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, success, costUsd, requestedModel, cacheCreationTokens, cacheReadTokens) {
|
|
175
230
|
try {
|
|
176
|
-
const cost = costUsd ?? (0, telemetry_js_1.estimateCost)(model, tokensIn, tokensOut);
|
|
177
|
-
|
|
231
|
+
const cost = costUsd ?? (0, telemetry_js_1.estimateCost)(model, tokensIn, tokensOut, cacheCreationTokens, cacheReadTokens);
|
|
232
|
+
const event = {
|
|
178
233
|
task_type: taskType,
|
|
179
234
|
model,
|
|
180
235
|
tokens_in: tokensIn,
|
|
@@ -183,7 +238,21 @@ function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, suc
|
|
|
183
238
|
success,
|
|
184
239
|
cost_usd: cost,
|
|
185
240
|
requested_model: requestedModel,
|
|
186
|
-
|
|
241
|
+
cache_creation_tokens: cacheCreationTokens,
|
|
242
|
+
cache_read_tokens: cacheReadTokens,
|
|
243
|
+
};
|
|
244
|
+
// Record locally (writes to telemetry.jsonl + queues upload if telemetry_enabled)
|
|
245
|
+
(0, telemetry_js_1.recordTelemetry)(event);
|
|
246
|
+
// Ensure cloud upload even if local telemetry_enabled is false
|
|
247
|
+
// recordCloudTelemetry skips queueForUpload when telemetry is disabled,
|
|
248
|
+
// but cloud dashboard needs these events regardless of local config
|
|
249
|
+
if (!(0, config_js_1.isTelemetryEnabled)()) {
|
|
250
|
+
(0, telemetry_js_1.queueForUpload)({
|
|
251
|
+
...event,
|
|
252
|
+
device_id: (0, config_js_1.getDeviceId)(),
|
|
253
|
+
timestamp: new Date().toISOString(),
|
|
254
|
+
});
|
|
255
|
+
}
|
|
187
256
|
}
|
|
188
257
|
catch {
|
|
189
258
|
// Telemetry should never break the proxy
|
|
@@ -220,15 +289,15 @@ function resolveModelAlias(model) {
|
|
|
220
289
|
* Uses Haiku 3.5 for cost optimization, upgrades based on learned rules
|
|
221
290
|
*/
|
|
222
291
|
const DEFAULT_ROUTING = {
|
|
223
|
-
code_generation: { provider: 'anthropic', model: 'claude-
|
|
224
|
-
code_review: { provider: 'anthropic', model: 'claude-
|
|
225
|
-
summarization: { provider: 'anthropic', model: 'claude-
|
|
226
|
-
analysis: { provider: 'anthropic', model: 'claude-
|
|
227
|
-
creative_writing: { provider: 'anthropic', model: 'claude-
|
|
228
|
-
data_extraction: { provider: 'anthropic', model: 'claude-
|
|
229
|
-
translation: { provider: 'anthropic', model: 'claude-
|
|
230
|
-
question_answering: { provider: 'anthropic', model: 'claude-
|
|
231
|
-
general: { provider: 'anthropic', model: 'claude-
|
|
292
|
+
code_generation: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
293
|
+
code_review: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
294
|
+
summarization: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
295
|
+
analysis: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
296
|
+
creative_writing: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
297
|
+
data_extraction: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
298
|
+
translation: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
299
|
+
question_answering: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
300
|
+
general: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
|
|
232
301
|
};
|
|
233
302
|
const UNCERTAINTY_PATTERNS = [
|
|
234
303
|
/i'?m not (entirely |completely |really )?sure/i,
|
|
@@ -489,7 +558,6 @@ const DEFAULT_PROXY_CONFIG = {
|
|
|
489
558
|
cascade: {
|
|
490
559
|
enabled: true,
|
|
491
560
|
models: [
|
|
492
|
-
'claude-haiku-4-5',
|
|
493
561
|
'claude-sonnet-4-6',
|
|
494
562
|
'claude-opus-4-6',
|
|
495
563
|
],
|
|
@@ -498,7 +566,7 @@ const DEFAULT_PROXY_CONFIG = {
|
|
|
498
566
|
},
|
|
499
567
|
complexity: {
|
|
500
568
|
enabled: true,
|
|
501
|
-
simple: 'claude-
|
|
569
|
+
simple: 'claude-sonnet-4-6',
|
|
502
570
|
moderate: 'claude-sonnet-4-6',
|
|
503
571
|
complex: 'claude-opus-4-6',
|
|
504
572
|
},
|
|
@@ -1895,10 +1963,14 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
|
1895
1963
|
.badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
|
|
1896
1964
|
.badge.tt-code{background:#1e3a5f;color:#60a5fa}.badge.tt-analysis{background:#3b1f6e;color:#a78bfa}.badge.tt-summarization{background:#1a3a2a;color:#6ee7b7}.badge.tt-qa{background:#3a2f1e;color:#fbbf24}.badge.tt-general{background:#1e293b;color:#94a3b8}
|
|
1897
1965
|
.badge.cx-simple{background:#052e1633;color:#34d399}.badge.cx-moderate{background:#2d2a0a;color:#fbbf24}.badge.cx-complex{background:#2d0a0a;color:#ef4444}
|
|
1966
|
+
.vstat{display:inline-flex;align-items:center;gap:6px;margin-left:8px;padding:1px 8px;border-radius:999px;border:1px solid #334155;font-size:.72rem}
|
|
1967
|
+
.vstat.current{color:#94a3b8;border-color:#334155;background:#0f172a66}
|
|
1968
|
+
.vstat.outdated{color:#fbbf24;border-color:#f59e0b55;background:#3a2f1e66}
|
|
1969
|
+
.vstat.unavailable{color:#a3a3a3;border-color:#52525b66;background:#18181b66}
|
|
1898
1970
|
@media(max-width:768px){.col-tt,.col-cx{display:none}}
|
|
1899
1971
|
.prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
|
|
1900
1972
|
</style></head><body>
|
|
1901
|
-
<div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><a href="/dashboard/config">Config</a> · <span id="ver"></span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
|
|
1973
|
+
<div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><a href="/dashboard/config">Config</a> · <span id="ver"></span><span id="vstat" class="vstat unavailable">Unable to check</span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
|
|
1902
1974
|
<div class="cards">
|
|
1903
1975
|
<div class="card"><div class="label">Total Requests</div><div class="value" id="totalReq">—</div></div>
|
|
1904
1976
|
<div class="card"><div class="label">Total Cost</div><div class="value" id="totalCost">—</div></div>
|
|
@@ -1926,6 +1998,19 @@ async function load(){
|
|
|
1926
1998
|
]);
|
|
1927
1999
|
$('ver').textContent='v'+health.version;
|
|
1928
2000
|
$('uptime').textContent=dur(health.uptime);
|
|
2001
|
+
|
|
2002
|
+
const versionStatus = await fetch('/v1/version-status').then(r=>r.json()).catch(()=>({state:'unavailable', current: health.version, latest: null}));
|
|
2003
|
+
const vEl = $('vstat');
|
|
2004
|
+
if (vEl) {
|
|
2005
|
+
vEl.className = 'vstat ' + (versionStatus.state === 'outdated' ? 'outdated' : versionStatus.state === 'up-to-date' ? 'current' : 'unavailable');
|
|
2006
|
+
if (versionStatus.state === 'outdated') {
|
|
2007
|
+
vEl.textContent = 'Update available · v' + versionStatus.current + ' → v' + versionStatus.latest;
|
|
2008
|
+
} else if (versionStatus.state === 'up-to-date') {
|
|
2009
|
+
vEl.textContent = 'Up to date · v' + versionStatus.current;
|
|
2010
|
+
} else {
|
|
2011
|
+
vEl.textContent = 'Unable to check · v' + versionStatus.current;
|
|
2012
|
+
}
|
|
2013
|
+
}
|
|
1929
2014
|
const total=stats.summary?.totalEvents||0;
|
|
1930
2015
|
$('totalReq').textContent=total;
|
|
1931
2016
|
$('totalCost').textContent='$'+fmt(stats.summary?.totalCostUsd??0,4);
|
|
@@ -2046,6 +2131,7 @@ async function startProxy(config = {}) {
|
|
|
2046
2131
|
loadHistoryFromDisk();
|
|
2047
2132
|
// Flush history on shutdown
|
|
2048
2133
|
const handleShutdown = () => {
|
|
2134
|
+
meshHandle.stop();
|
|
2049
2135
|
shutdownHistory();
|
|
2050
2136
|
process.exit(0);
|
|
2051
2137
|
};
|
|
@@ -2054,11 +2140,159 @@ async function startProxy(config = {}) {
|
|
|
2054
2140
|
const configPath = getProxyConfigPath();
|
|
2055
2141
|
let proxyConfig = await loadProxyConfig(configPath, log);
|
|
2056
2142
|
const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
|
|
2143
|
+
// === Startup config validation (Task 4) ===
|
|
2144
|
+
try {
|
|
2145
|
+
const userConfig = (0, config_js_1.loadConfig)();
|
|
2146
|
+
// Check if config was just created (created_at within 5s of now)
|
|
2147
|
+
const createdAt = new Date(userConfig.created_at).getTime();
|
|
2148
|
+
const now = Date.now();
|
|
2149
|
+
if (Math.abs(now - createdAt) < 5000) {
|
|
2150
|
+
console.warn('[RelayPlane] WARNING: Fresh config detected — previous config may have been deleted');
|
|
2151
|
+
}
|
|
2152
|
+
// Check if credentials exist but config doesn't reference them
|
|
2153
|
+
if ((0, config_js_1.hasValidCredentials)() && !userConfig.api_key) {
|
|
2154
|
+
console.warn('[RelayPlane] WARNING: credentials.json exists but config has no API key reference');
|
|
2155
|
+
}
|
|
2156
|
+
// Auto-enable telemetry for authenticated users
|
|
2157
|
+
if ((0, config_js_1.hasValidCredentials)() && !userConfig.telemetry_enabled) {
|
|
2158
|
+
// Already handled in loadConfig() for fresh configs, but handle existing configs too
|
|
2159
|
+
}
|
|
2160
|
+
// Validate expected fields
|
|
2161
|
+
if (!userConfig.device_id || !userConfig.created_at || userConfig.config_version === undefined) {
|
|
2162
|
+
console.warn('[RelayPlane] WARNING: Config is missing expected fields');
|
|
2163
|
+
}
|
|
2164
|
+
}
|
|
2165
|
+
catch (err) {
|
|
2166
|
+
console.warn(`[RelayPlane] Config validation error: ${err}`);
|
|
2167
|
+
}
|
|
2168
|
+
// Initialize mesh learning layer
|
|
2169
|
+
const meshConfig = (0, config_js_1.getMeshConfig)();
|
|
2170
|
+
const userConfig = (0, config_js_1.loadConfig)();
|
|
2171
|
+
const meshHandle = _meshHandle = (0, index_js_1.initMeshLayer)({
|
|
2172
|
+
enabled: meshConfig.enabled,
|
|
2173
|
+
endpoint: meshConfig.endpoint,
|
|
2174
|
+
sync_interval_ms: meshConfig.sync_interval_ms,
|
|
2175
|
+
contribute: meshConfig.contribute,
|
|
2176
|
+
}, userConfig.api_key);
|
|
2177
|
+
// Initialize budget manager
|
|
2178
|
+
const budgetManager = (0, budget_js_1.getBudgetManager)(proxyConfig.budget);
|
|
2179
|
+
if (proxyConfig.budget?.enabled) {
|
|
2180
|
+
try {
|
|
2181
|
+
budgetManager.init();
|
|
2182
|
+
log('Budget manager initialized');
|
|
2183
|
+
}
|
|
2184
|
+
catch (err) {
|
|
2185
|
+
log(`Budget manager init failed: ${err}`);
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
// Initialize anomaly detector
|
|
2189
|
+
const anomalyDetector = (0, anomaly_js_1.getAnomalyDetector)(proxyConfig.anomaly);
|
|
2190
|
+
// Initialize alert manager
|
|
2191
|
+
const alertManager = (0, alerts_js_1.getAlertManager)(proxyConfig.alerts);
|
|
2192
|
+
if (proxyConfig.alerts?.enabled) {
|
|
2193
|
+
try {
|
|
2194
|
+
alertManager.init();
|
|
2195
|
+
log('Alert manager initialized');
|
|
2196
|
+
}
|
|
2197
|
+
catch (err) {
|
|
2198
|
+
log(`Alert manager init failed: ${err}`);
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
// Downgrade config
|
|
2202
|
+
let downgradeConfig = {
|
|
2203
|
+
...downgrade_js_1.DEFAULT_DOWNGRADE_CONFIG,
|
|
2204
|
+
...(proxyConfig.downgrade ?? {}),
|
|
2205
|
+
};
|
|
2206
|
+
/**
|
|
2207
|
+
* Pre-request budget check + auto-downgrade.
|
|
2208
|
+
* Returns the (possibly downgraded) model and extra response headers.
|
|
2209
|
+
* If the request should be blocked, returns { blocked: true }.
|
|
2210
|
+
*/
|
|
2211
|
+
function preRequestBudgetCheck(model, estimatedCost) {
|
|
2212
|
+
const headers = {};
|
|
2213
|
+
let finalModel = model;
|
|
2214
|
+
let downgraded = false;
|
|
2215
|
+
// Budget check
|
|
2216
|
+
const budgetResult = budgetManager.checkBudget(estimatedCost);
|
|
2217
|
+
if (budgetResult.breached) {
|
|
2218
|
+
// Fire breach alert
|
|
2219
|
+
const limit = budgetResult.breachType === 'hourly'
|
|
2220
|
+
? budgetManager.getConfig().hourlyUsd
|
|
2221
|
+
: budgetManager.getConfig().dailyUsd;
|
|
2222
|
+
const spend = budgetResult.breachType === 'hourly'
|
|
2223
|
+
? budgetResult.currentHourlySpend
|
|
2224
|
+
: budgetResult.currentDailySpend;
|
|
2225
|
+
alertManager.fireBreach(budgetResult.breachType, spend, limit);
|
|
2226
|
+
if (budgetResult.action === 'block') {
|
|
2227
|
+
return { blocked: true, model: finalModel, headers, downgraded: false };
|
|
2228
|
+
}
|
|
2229
|
+
if (budgetResult.action === 'downgrade') {
|
|
2230
|
+
const dr = (0, downgrade_js_1.checkDowngrade)(finalModel, 100, downgradeConfig);
|
|
2231
|
+
if (dr.downgraded) {
|
|
2232
|
+
finalModel = dr.newModel;
|
|
2233
|
+
downgraded = true;
|
|
2234
|
+
(0, downgrade_js_1.applyDowngradeHeaders)(headers, dr);
|
|
2235
|
+
}
|
|
2236
|
+
}
|
|
2237
|
+
}
|
|
2238
|
+
// Fire threshold alerts
|
|
2239
|
+
for (const threshold of budgetResult.thresholdsCrossed) {
|
|
2240
|
+
alertManager.fireThreshold(threshold, (budgetResult.currentDailySpend / budgetManager.getConfig().dailyUsd) * 100, budgetResult.currentDailySpend, budgetManager.getConfig().dailyUsd);
|
|
2241
|
+
budgetManager.markThresholdFired(threshold);
|
|
2242
|
+
}
|
|
2243
|
+
// Auto-downgrade based on budget percentage (even if not breached)
|
|
2244
|
+
if (!downgraded && downgradeConfig.enabled) {
|
|
2245
|
+
const pct = budgetManager.getConfig().dailyUsd > 0
|
|
2246
|
+
? (budgetResult.currentDailySpend / budgetManager.getConfig().dailyUsd) * 100
|
|
2247
|
+
: 0;
|
|
2248
|
+
const dr = (0, downgrade_js_1.checkDowngrade)(finalModel, pct, downgradeConfig);
|
|
2249
|
+
if (dr.downgraded) {
|
|
2250
|
+
finalModel = dr.newModel;
|
|
2251
|
+
downgraded = true;
|
|
2252
|
+
(0, downgrade_js_1.applyDowngradeHeaders)(headers, dr);
|
|
2253
|
+
}
|
|
2254
|
+
}
|
|
2255
|
+
return { blocked: false, model: finalModel, headers, downgraded };
|
|
2256
|
+
}
|
|
2257
|
+
/**
|
|
2258
|
+
* Post-request: record spend, run anomaly detection, fire anomaly alerts.
|
|
2259
|
+
*/
|
|
2260
|
+
function postRequestRecord(model, tokensIn, tokensOut, costUsd) {
|
|
2261
|
+
// Record spend
|
|
2262
|
+
budgetManager.recordSpend(costUsd, model);
|
|
2263
|
+
// Anomaly detection
|
|
2264
|
+
const anomalyResult = anomalyDetector.recordAndAnalyze({
|
|
2265
|
+
model,
|
|
2266
|
+
tokensIn,
|
|
2267
|
+
tokensOut,
|
|
2268
|
+
costUsd,
|
|
2269
|
+
});
|
|
2270
|
+
if (anomalyResult.detected) {
|
|
2271
|
+
for (const anomaly of anomalyResult.anomalies) {
|
|
2272
|
+
alertManager.fireAnomaly(anomaly);
|
|
2273
|
+
}
|
|
2274
|
+
}
|
|
2275
|
+
}
|
|
2276
|
+
// Initialize response cache
|
|
2277
|
+
const responseCache = (0, response_cache_js_1.getResponseCache)(proxyConfig.cache);
|
|
2278
|
+
if (proxyConfig.cache?.enabled !== false) {
|
|
2279
|
+
try {
|
|
2280
|
+
responseCache.init();
|
|
2281
|
+
log('Response cache initialized');
|
|
2282
|
+
}
|
|
2283
|
+
catch (err) {
|
|
2284
|
+
log(`Response cache init failed: ${err}`);
|
|
2285
|
+
}
|
|
2286
|
+
}
|
|
2057
2287
|
let configWatcher = null;
|
|
2058
2288
|
let configReloadTimer = null;
|
|
2059
2289
|
const reloadConfig = async () => {
|
|
2060
2290
|
proxyConfig = await loadProxyConfig(configPath, log);
|
|
2061
2291
|
cooldownManager.updateConfig(getCooldownConfig(proxyConfig));
|
|
2292
|
+
budgetManager.updateConfig({ ...budgetManager.getConfig(), ...(proxyConfig.budget ?? {}) });
|
|
2293
|
+
anomalyDetector.updateConfig({ ...anomalyDetector.getConfig(), ...(proxyConfig.anomaly ?? {}) });
|
|
2294
|
+
alertManager.updateConfig({ ...alertManager.getConfig(), ...(proxyConfig.alerts ?? {}) });
|
|
2295
|
+
downgradeConfig = { ...downgrade_js_1.DEFAULT_DOWNGRADE_CONFIG, ...(proxyConfig.downgrade ?? {}) };
|
|
2062
2296
|
log(`Reloaded config from ${configPath}`);
|
|
2063
2297
|
};
|
|
2064
2298
|
const scheduleConfigReload = () => {
|
|
@@ -2083,7 +2317,8 @@ async function startProxy(config = {}) {
|
|
|
2083
2317
|
// Initialize RelayPlane
|
|
2084
2318
|
const relay = new core_1.RelayPlane({ dbPath: config.dbPath });
|
|
2085
2319
|
// Startup migration: clear default routing rules so complexity config takes priority
|
|
2086
|
-
const
|
|
2320
|
+
const clearDefaultRules = relay.routing.clearDefaultRules;
|
|
2321
|
+
const clearedCount = typeof clearDefaultRules === 'function' ? clearDefaultRules.call(relay.routing) : 0;
|
|
2087
2322
|
if (clearedCount > 0) {
|
|
2088
2323
|
log(`Cleared ${clearedCount} default routing rules (complexity config takes priority)`);
|
|
2089
2324
|
}
|
|
@@ -2130,6 +2365,13 @@ async function startProxy(config = {}) {
|
|
|
2130
2365
|
}));
|
|
2131
2366
|
return;
|
|
2132
2367
|
}
|
|
2368
|
+
if (req.method === 'GET' && pathname === '/v1/version-status') {
|
|
2369
|
+
const latest = await getLatestProxyVersion();
|
|
2370
|
+
const status = (0, version_status_js_1.getVersionStatus)(PROXY_VERSION, latest);
|
|
2371
|
+
res.writeHead(200, { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=60' });
|
|
2372
|
+
res.end(JSON.stringify(status));
|
|
2373
|
+
return;
|
|
2374
|
+
}
|
|
2133
2375
|
// === Control endpoints ===
|
|
2134
2376
|
if (pathname.startsWith('/control/')) {
|
|
2135
2377
|
if (req.method === 'POST' && pathname === '/control/enable') {
|
|
@@ -2196,6 +2438,36 @@ async function startProxy(config = {}) {
|
|
|
2196
2438
|
return;
|
|
2197
2439
|
}
|
|
2198
2440
|
}
|
|
2441
|
+
if (req.method === 'POST' && pathname === '/control/kill') {
|
|
2442
|
+
try {
|
|
2443
|
+
const body = await readJsonBody(req);
|
|
2444
|
+
if (body.all) {
|
|
2445
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2446
|
+
res.end(JSON.stringify({
|
|
2447
|
+
killed: 0,
|
|
2448
|
+
sessions: [],
|
|
2449
|
+
note: 'Local proxy mode: session kill not applicable'
|
|
2450
|
+
}));
|
|
2451
|
+
}
|
|
2452
|
+
else if (body.sessionKey) {
|
|
2453
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2454
|
+
res.end(JSON.stringify({
|
|
2455
|
+
killed: 1,
|
|
2456
|
+
sessions: [body.sessionKey],
|
|
2457
|
+
note: 'Rate limits reset for session'
|
|
2458
|
+
}));
|
|
2459
|
+
}
|
|
2460
|
+
else {
|
|
2461
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
2462
|
+
res.end(JSON.stringify({ error: 'Provide sessionKey or all=true' }));
|
|
2463
|
+
}
|
|
2464
|
+
}
|
|
2465
|
+
catch {
|
|
2466
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
2467
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
2468
|
+
}
|
|
2469
|
+
return;
|
|
2470
|
+
}
|
|
2199
2471
|
// === Telemetry endpoints for dashboard ===
|
|
2200
2472
|
if (pathname.startsWith('/v1/telemetry/')) {
|
|
2201
2473
|
const telemetryPath = pathname.replace('/v1/telemetry/', '');
|
|
@@ -2372,6 +2644,24 @@ async function startProxy(config = {}) {
|
|
|
2372
2644
|
res.end(getConfigDashboardHTML());
|
|
2373
2645
|
return;
|
|
2374
2646
|
}
|
|
2647
|
+
// === Mesh stats endpoint ===
|
|
2648
|
+
if (req.method === 'GET' && pathname === '/v1/mesh/stats') {
|
|
2649
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2650
|
+
res.end(JSON.stringify(meshHandle.getStats()));
|
|
2651
|
+
return;
|
|
2652
|
+
}
|
|
2653
|
+
if (req.method === 'POST' && pathname === '/v1/mesh/sync') {
|
|
2654
|
+
try {
|
|
2655
|
+
const result = await meshHandle.forceSync();
|
|
2656
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2657
|
+
res.end(JSON.stringify({ sync: result }));
|
|
2658
|
+
}
|
|
2659
|
+
catch (err) {
|
|
2660
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
2661
|
+
res.end(JSON.stringify({ sync: { error: err.message } }));
|
|
2662
|
+
}
|
|
2663
|
+
return;
|
|
2664
|
+
}
|
|
2375
2665
|
if (req.method === 'GET' && pathname === '/v1/config') {
|
|
2376
2666
|
try {
|
|
2377
2667
|
const raw = await fs.promises.readFile(getProxyConfigPath(), 'utf8');
|
|
@@ -2511,6 +2801,48 @@ async function startProxy(config = {}) {
|
|
|
2511
2801
|
log(`Config routing.mode=auto: overriding passthrough → auto for model ${requestedModel}`);
|
|
2512
2802
|
}
|
|
2513
2803
|
const isStreaming = requestBody['stream'] === true;
|
|
2804
|
+
// ── Response Cache: check for cached response ──
|
|
2805
|
+
const cacheBypass = responseCache.shouldBypass(requestBody);
|
|
2806
|
+
let cacheHash;
|
|
2807
|
+
if (!cacheBypass) {
|
|
2808
|
+
cacheHash = responseCache.computeKey(requestBody);
|
|
2809
|
+
const cached = responseCache.get(cacheHash);
|
|
2810
|
+
if (cached) {
|
|
2811
|
+
try {
|
|
2812
|
+
const cachedData = JSON.parse(cached);
|
|
2813
|
+
const cacheUsage = cachedData?.usage;
|
|
2814
|
+
const cacheCost = (0, telemetry_js_1.estimateCost)(requestBody['model'] ?? '', cacheUsage?.input_tokens ?? 0, cacheUsage?.output_tokens ?? 0);
|
|
2815
|
+
responseCache.recordHit(cacheCost, 0);
|
|
2816
|
+
// Replay cached streaming response as SSE
|
|
2817
|
+
if (isStreaming && cachedData._relayplaneStreamCache) {
|
|
2818
|
+
res.writeHead(200, {
|
|
2819
|
+
'Content-Type': 'text/event-stream',
|
|
2820
|
+
'Cache-Control': 'no-cache',
|
|
2821
|
+
'Connection': 'keep-alive',
|
|
2822
|
+
'X-RelayPlane-Cache': 'HIT',
|
|
2823
|
+
});
|
|
2824
|
+
res.end(cachedData.ssePayload);
|
|
2825
|
+
}
|
|
2826
|
+
else {
|
|
2827
|
+
res.writeHead(200, {
|
|
2828
|
+
'Content-Type': 'application/json',
|
|
2829
|
+
'X-RelayPlane-Cache': 'HIT',
|
|
2830
|
+
});
|
|
2831
|
+
res.end(cached);
|
|
2832
|
+
}
|
|
2833
|
+
log(`Cache HIT for ${requestBody['model']} (hash: ${cacheHash.slice(0, 8)})`);
|
|
2834
|
+
return;
|
|
2835
|
+
}
|
|
2836
|
+
catch {
|
|
2837
|
+
// Corrupt cache entry, continue to provider
|
|
2838
|
+
}
|
|
2839
|
+
}
|
|
2840
|
+
responseCache.recordMiss();
|
|
2841
|
+
}
|
|
2842
|
+
else {
|
|
2843
|
+
responseCache.recordBypass();
|
|
2844
|
+
}
|
|
2845
|
+
// ── End cache check ──
|
|
2514
2846
|
const messages = Array.isArray(requestBody['messages'])
|
|
2515
2847
|
? requestBody['messages']
|
|
2516
2848
|
: [];
|
|
@@ -2619,6 +2951,47 @@ async function startProxy(config = {}) {
|
|
|
2619
2951
|
res.end(JSON.stringify({ error: `Provider ${targetProvider} is temporarily cooled down` }));
|
|
2620
2952
|
return;
|
|
2621
2953
|
}
|
|
2954
|
+
// ── Budget check + auto-downgrade ──
|
|
2955
|
+
const budgetExtraHeaders = {};
|
|
2956
|
+
{
|
|
2957
|
+
const budgetCheck = preRequestBudgetCheck(targetModel || requestedModel);
|
|
2958
|
+
if (budgetCheck.blocked) {
|
|
2959
|
+
res.writeHead(429, { 'Content-Type': 'application/json' });
|
|
2960
|
+
res.end(JSON.stringify({
|
|
2961
|
+
error: 'Budget limit exceeded. Request blocked.',
|
|
2962
|
+
type: 'budget_exceeded',
|
|
2963
|
+
}));
|
|
2964
|
+
return;
|
|
2965
|
+
}
|
|
2966
|
+
if (budgetCheck.downgraded) {
|
|
2967
|
+
log(`Budget downgrade: ${targetModel || requestedModel} → ${budgetCheck.model}`);
|
|
2968
|
+
targetModel = budgetCheck.model;
|
|
2969
|
+
if (requestBody)
|
|
2970
|
+
requestBody['model'] = targetModel;
|
|
2971
|
+
}
|
|
2972
|
+
Object.assign(budgetExtraHeaders, budgetCheck.headers);
|
|
2973
|
+
}
|
|
2974
|
+
// ── End budget check ──
|
|
2975
|
+
// ── Rate limit check ──
|
|
2976
|
+
const workspaceId = 'local'; // Local proxy uses single workspace
|
|
2977
|
+
const rateLimit = (0, rate_limiter_js_1.checkLimit)(workspaceId, targetModel);
|
|
2978
|
+
if (!rateLimit.allowed) {
|
|
2979
|
+
console.error(`[RATE LIMIT] ${targetModel} limit reached for workspace: ${workspaceId}`);
|
|
2980
|
+
res.writeHead(429, {
|
|
2981
|
+
'Content-Type': 'application/json',
|
|
2982
|
+
'Retry-After': String(rateLimit.retryAfter || 60),
|
|
2983
|
+
'X-RelayPlane-RateLimit-Limit': String(rateLimit.limit),
|
|
2984
|
+
'X-RelayPlane-RateLimit-Remaining': '0',
|
|
2985
|
+
'X-RelayPlane-RateLimit-Reset': String(Math.ceil(rateLimit.resetAt / 1000))
|
|
2986
|
+
});
|
|
2987
|
+
res.end(JSON.stringify({
|
|
2988
|
+
error: `Rate limit exceeded for ${targetModel}. Max ${rateLimit.limit} requests per minute.`,
|
|
2989
|
+
type: 'rate_limit_exceeded',
|
|
2990
|
+
retry_after: rateLimit.retryAfter || 60
|
|
2991
|
+
}));
|
|
2992
|
+
return;
|
|
2993
|
+
}
|
|
2994
|
+
// ── End rate limit check ──
|
|
2622
2995
|
const startTime = Date.now();
|
|
2623
2996
|
let nativeResponseData;
|
|
2624
2997
|
try {
|
|
@@ -2688,11 +3061,16 @@ async function startProxy(config = {}) {
|
|
|
2688
3061
|
'Content-Type': 'text/event-stream',
|
|
2689
3062
|
'Cache-Control': 'no-cache',
|
|
2690
3063
|
'Connection': 'keep-alive',
|
|
3064
|
+
'X-RelayPlane-Cache': cacheBypass ? 'BYPASS' : 'MISS',
|
|
2691
3065
|
...nativeStreamRpHeaders,
|
|
2692
3066
|
});
|
|
2693
3067
|
const reader = providerResponse.body?.getReader();
|
|
2694
3068
|
let streamTokensIn = 0;
|
|
2695
3069
|
let streamTokensOut = 0;
|
|
3070
|
+
let streamCacheCreation = 0;
|
|
3071
|
+
let streamCacheRead = 0;
|
|
3072
|
+
// Buffer raw SSE chunks for cache storage
|
|
3073
|
+
const rawChunks = [];
|
|
2696
3074
|
if (reader) {
|
|
2697
3075
|
const decoder = new TextDecoder();
|
|
2698
3076
|
let sseBuffer = '';
|
|
@@ -2703,6 +3081,8 @@ async function startProxy(config = {}) {
|
|
|
2703
3081
|
break;
|
|
2704
3082
|
const chunk = decoder.decode(value, { stream: true });
|
|
2705
3083
|
res.write(chunk);
|
|
3084
|
+
if (cacheHash && !cacheBypass)
|
|
3085
|
+
rawChunks.push(chunk);
|
|
2706
3086
|
// Parse SSE events to extract usage from message_delta / message_stop
|
|
2707
3087
|
sseBuffer += chunk;
|
|
2708
3088
|
const lines = sseBuffer.split('\n');
|
|
@@ -2715,9 +3095,11 @@ async function startProxy(config = {}) {
|
|
|
2715
3095
|
if (evt.type === 'message_delta' && evt.usage) {
|
|
2716
3096
|
streamTokensOut = evt.usage.output_tokens ?? streamTokensOut;
|
|
2717
3097
|
}
|
|
2718
|
-
// Anthropic: message_start has usage.input_tokens
|
|
3098
|
+
// Anthropic: message_start has usage.input_tokens + cache tokens
|
|
2719
3099
|
if (evt.type === 'message_start' && evt.message?.usage) {
|
|
2720
3100
|
streamTokensIn = evt.message.usage.input_tokens ?? streamTokensIn;
|
|
3101
|
+
streamCacheCreation = evt.message.usage.cache_creation_input_tokens ?? 0;
|
|
3102
|
+
streamCacheRead = evt.message.usage.cache_read_input_tokens ?? 0;
|
|
2721
3103
|
}
|
|
2722
3104
|
// OpenAI format: choices with usage
|
|
2723
3105
|
if (evt.usage) {
|
|
@@ -2736,15 +3118,45 @@ async function startProxy(config = {}) {
|
|
|
2736
3118
|
reader.releaseLock();
|
|
2737
3119
|
}
|
|
2738
3120
|
}
|
|
3121
|
+
// ── Cache: store streaming response as raw SSE payload ──
|
|
3122
|
+
if (cacheHash && !cacheBypass && rawChunks.length > 0) {
|
|
3123
|
+
const streamPayload = JSON.stringify({
|
|
3124
|
+
_relayplaneStreamCache: true,
|
|
3125
|
+
ssePayload: rawChunks.join(''),
|
|
3126
|
+
usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut, cache_creation_input_tokens: streamCacheCreation, cache_read_input_tokens: streamCacheRead },
|
|
3127
|
+
});
|
|
3128
|
+
responseCache.set(cacheHash, streamPayload, {
|
|
3129
|
+
model: targetModel || requestedModel,
|
|
3130
|
+
tokensIn: streamTokensIn,
|
|
3131
|
+
tokensOut: streamTokensOut,
|
|
3132
|
+
costUsd: (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, streamTokensIn, streamTokensOut, streamCacheCreation || undefined, streamCacheRead || undefined),
|
|
3133
|
+
taskType,
|
|
3134
|
+
});
|
|
3135
|
+
log(`Cache STORE (stream) for ${targetModel || requestedModel} (hash: ${cacheHash.slice(0, 8)})`);
|
|
3136
|
+
}
|
|
2739
3137
|
// Store streaming token counts so telemetry can use them
|
|
2740
|
-
nativeResponseData = { usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut } };
|
|
3138
|
+
nativeResponseData = { usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut, cache_creation_input_tokens: streamCacheCreation, cache_read_input_tokens: streamCacheRead } };
|
|
2741
3139
|
res.end();
|
|
2742
3140
|
}
|
|
2743
3141
|
else {
|
|
2744
3142
|
nativeResponseData = await providerResponse.json();
|
|
2745
3143
|
const nativeRespModel = checkResponseModelMismatch(nativeResponseData, targetModel || requestedModel, targetProvider, log);
|
|
2746
3144
|
const nativeRpHeaders = buildRelayPlaneResponseHeaders(targetModel || requestedModel, originalModel ?? 'unknown', complexity, targetProvider, routingMode);
|
|
2747
|
-
|
|
3145
|
+
// ── Cache: store non-streaming response ──
|
|
3146
|
+
const nativeCacheHeader = cacheBypass ? 'BYPASS' : 'MISS';
|
|
3147
|
+
if (cacheHash && !cacheBypass) {
|
|
3148
|
+
const nativeRespJson = JSON.stringify(nativeResponseData);
|
|
3149
|
+
const nativeUsage = nativeResponseData?.usage;
|
|
3150
|
+
responseCache.set(cacheHash, nativeRespJson, {
|
|
3151
|
+
model: targetModel || requestedModel,
|
|
3152
|
+
tokensIn: nativeUsage?.input_tokens ?? 0,
|
|
3153
|
+
tokensOut: nativeUsage?.output_tokens ?? 0,
|
|
3154
|
+
costUsd: (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeUsage?.input_tokens ?? 0, nativeUsage?.output_tokens ?? 0),
|
|
3155
|
+
taskType,
|
|
3156
|
+
});
|
|
3157
|
+
log(`Cache STORE for ${targetModel || requestedModel} (hash: ${cacheHash.slice(0, 8)})`);
|
|
3158
|
+
}
|
|
3159
|
+
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': nativeCacheHeader, ...nativeRpHeaders });
|
|
2748
3160
|
res.end(JSON.stringify(nativeResponseData));
|
|
2749
3161
|
}
|
|
2750
3162
|
}
|
|
@@ -2754,9 +3166,17 @@ async function startProxy(config = {}) {
|
|
|
2754
3166
|
// nativeResponseData holds response JSON for non-streaming, or { usage: { input_tokens, output_tokens } }
|
|
2755
3167
|
// synthesised from SSE events for streaming
|
|
2756
3168
|
const nativeUsageData = nativeResponseData?.usage;
|
|
2757
|
-
const
|
|
3169
|
+
const nativeBaseTokIn = nativeUsageData?.input_tokens ?? nativeUsageData?.prompt_tokens ?? 0;
|
|
2758
3170
|
const nativeTokOut = nativeUsageData?.output_tokens ?? nativeUsageData?.completion_tokens ?? 0;
|
|
2759
|
-
|
|
3171
|
+
const nativeCacheCreation = nativeUsageData?.cache_creation_input_tokens ?? 0;
|
|
3172
|
+
const nativeCacheRead = nativeUsageData?.cache_read_input_tokens ?? 0;
|
|
3173
|
+
// Include cache tokens in displayed/recorded token count
|
|
3174
|
+
const nativeTokIn = nativeBaseTokIn + nativeCacheCreation + nativeCacheRead;
|
|
3175
|
+
// Cost calculation expects inputTokens to include cache tokens when cache params are provided
|
|
3176
|
+
const nativeCostUsd = (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCacheCreation || undefined, nativeCacheRead || undefined);
|
|
3177
|
+
updateLastHistoryEntry(nativeTokIn, nativeTokOut, nativeCostUsd);
|
|
3178
|
+
// ── Post-request: budget spend + anomaly detection ──
|
|
3179
|
+
postRequestRecord(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCostUsd);
|
|
2760
3180
|
if (recordTelemetry) {
|
|
2761
3181
|
relay
|
|
2762
3182
|
.run({
|
|
@@ -2765,7 +3185,8 @@ async function startProxy(config = {}) {
|
|
|
2765
3185
|
model: `${targetProvider}:${targetModel || requestedModel}`,
|
|
2766
3186
|
})
|
|
2767
3187
|
.catch(() => { });
|
|
2768
|
-
sendCloudTelemetry(taskType, targetModel || requestedModel, nativeTokIn, nativeTokOut, durationMs, true, undefined, originalModel ?? undefined);
|
|
3188
|
+
sendCloudTelemetry(taskType, targetModel || requestedModel, nativeTokIn, nativeTokOut, durationMs, true, undefined, originalModel ?? undefined, nativeCacheCreation || undefined, nativeCacheRead || undefined);
|
|
3189
|
+
meshCapture(targetModel || requestedModel, targetProvider, taskType, nativeTokIn, nativeTokOut, (0, telemetry_js_1.estimateCost)(targetModel || requestedModel, nativeTokIn, nativeTokOut, nativeCacheCreation || undefined, nativeCacheRead || undefined), durationMs, true);
|
|
2769
3190
|
}
|
|
2770
3191
|
}
|
|
2771
3192
|
catch (err) {
|
|
@@ -2847,6 +3268,47 @@ async function startProxy(config = {}) {
|
|
|
2847
3268
|
return;
|
|
2848
3269
|
}
|
|
2849
3270
|
const isStreaming = request.stream === true;
|
|
3271
|
+
// ── Response Cache: check for cached response (chat/completions) ──
|
|
3272
|
+
const chatCacheBypass = responseCache.shouldBypass(request);
|
|
3273
|
+
let chatCacheHash;
|
|
3274
|
+
if (!chatCacheBypass) {
|
|
3275
|
+
chatCacheHash = responseCache.computeKey(request);
|
|
3276
|
+
const chatCached = responseCache.get(chatCacheHash);
|
|
3277
|
+
if (chatCached) {
|
|
3278
|
+
try {
|
|
3279
|
+
const chatCachedData = JSON.parse(chatCached);
|
|
3280
|
+
const chatCacheUsage = chatCachedData?.usage;
|
|
3281
|
+
const chatCacheCost = (0, telemetry_js_1.estimateCost)(request.model ?? '', chatCacheUsage?.prompt_tokens ?? chatCacheUsage?.input_tokens ?? 0, chatCacheUsage?.completion_tokens ?? chatCacheUsage?.output_tokens ?? 0);
|
|
3282
|
+
responseCache.recordHit(chatCacheCost, 0);
|
|
3283
|
+
if (isStreaming && chatCachedData._relayplaneStreamCache) {
|
|
3284
|
+
res.writeHead(200, {
|
|
3285
|
+
'Content-Type': 'text/event-stream',
|
|
3286
|
+
'Cache-Control': 'no-cache',
|
|
3287
|
+
'Connection': 'keep-alive',
|
|
3288
|
+
'X-RelayPlane-Cache': 'HIT',
|
|
3289
|
+
});
|
|
3290
|
+
res.end(chatCachedData.ssePayload);
|
|
3291
|
+
}
|
|
3292
|
+
else {
|
|
3293
|
+
res.writeHead(200, {
|
|
3294
|
+
'Content-Type': 'application/json',
|
|
3295
|
+
'X-RelayPlane-Cache': 'HIT',
|
|
3296
|
+
});
|
|
3297
|
+
res.end(chatCached);
|
|
3298
|
+
}
|
|
3299
|
+
log(`Cache HIT for chat/completions ${request.model} (hash: ${chatCacheHash.slice(0, 8)})`);
|
|
3300
|
+
return;
|
|
3301
|
+
}
|
|
3302
|
+
catch {
|
|
3303
|
+
// Corrupt, continue
|
|
3304
|
+
}
|
|
3305
|
+
}
|
|
3306
|
+
responseCache.recordMiss();
|
|
3307
|
+
}
|
|
3308
|
+
else {
|
|
3309
|
+
responseCache.recordBypass();
|
|
3310
|
+
}
|
|
3311
|
+
// ── End cache check ──
|
|
2850
3312
|
const bypassRouting = !relayplaneEnabled || relayplaneBypass;
|
|
2851
3313
|
// Extract routing mode from model name
|
|
2852
3314
|
const originalRequestedModel = request.model;
|
|
@@ -3065,10 +3527,48 @@ async function startProxy(config = {}) {
|
|
|
3065
3527
|
}
|
|
3066
3528
|
apiKey = apiKeyResult.apiKey;
|
|
3067
3529
|
}
|
|
3530
|
+
// ── Budget check + auto-downgrade (chat/completions) ──
|
|
3531
|
+
{
|
|
3532
|
+
const chatBudgetCheck = preRequestBudgetCheck(targetModel);
|
|
3533
|
+
if (chatBudgetCheck.blocked) {
|
|
3534
|
+
res.writeHead(429, { 'Content-Type': 'application/json' });
|
|
3535
|
+
res.end(JSON.stringify({
|
|
3536
|
+
error: 'Budget limit exceeded. Request blocked.',
|
|
3537
|
+
type: 'budget_exceeded',
|
|
3538
|
+
}));
|
|
3539
|
+
return;
|
|
3540
|
+
}
|
|
3541
|
+
if (chatBudgetCheck.downgraded) {
|
|
3542
|
+
log(`Budget downgrade: ${targetModel} → ${chatBudgetCheck.model}`);
|
|
3543
|
+
targetModel = chatBudgetCheck.model;
|
|
3544
|
+
request.model = targetModel;
|
|
3545
|
+
}
|
|
3546
|
+
}
|
|
3547
|
+
// ── End budget check ──
|
|
3548
|
+
// ── Rate limit check ──
|
|
3549
|
+
const chatWorkspaceId = 'local'; // Local proxy uses single workspace
|
|
3550
|
+
const chatRateLimit = (0, rate_limiter_js_1.checkLimit)(chatWorkspaceId, targetModel);
|
|
3551
|
+
if (!chatRateLimit.allowed) {
|
|
3552
|
+
console.error(`[RATE LIMIT] ${targetModel} limit reached for workspace: ${chatWorkspaceId}`);
|
|
3553
|
+
res.writeHead(429, {
|
|
3554
|
+
'Content-Type': 'application/json',
|
|
3555
|
+
'Retry-After': String(chatRateLimit.retryAfter || 60),
|
|
3556
|
+
'X-RelayPlane-RateLimit-Limit': String(chatRateLimit.limit),
|
|
3557
|
+
'X-RelayPlane-RateLimit-Remaining': '0',
|
|
3558
|
+
'X-RelayPlane-RateLimit-Reset': String(Math.ceil(chatRateLimit.resetAt / 1000))
|
|
3559
|
+
});
|
|
3560
|
+
res.end(JSON.stringify({
|
|
3561
|
+
error: `Rate limit exceeded for ${targetModel}. Max ${chatRateLimit.limit} requests per minute.`,
|
|
3562
|
+
type: 'rate_limit_exceeded',
|
|
3563
|
+
retry_after: chatRateLimit.retryAfter || 60
|
|
3564
|
+
}));
|
|
3565
|
+
return;
|
|
3566
|
+
}
|
|
3567
|
+
// ── End rate limit check ──
|
|
3068
3568
|
const startTime = Date.now();
|
|
3069
3569
|
// Handle streaming vs non-streaming
|
|
3070
3570
|
if (isStreaming) {
|
|
3071
|
-
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
|
|
3571
|
+
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity, chatCacheHash, chatCacheBypass);
|
|
3072
3572
|
}
|
|
3073
3573
|
else {
|
|
3074
3574
|
if (useCascade && cascadeConfig) {
|
|
@@ -3129,6 +3629,7 @@ async function startProxy(config = {}) {
|
|
|
3129
3629
|
log(`Failed to record run: ${err}`);
|
|
3130
3630
|
}
|
|
3131
3631
|
sendCloudTelemetry(taskType, cascadeResult.model, cascadeTokensIn, cascadeTokensOut, durationMs, true, undefined, originalRequestedModel ?? undefined);
|
|
3632
|
+
meshCapture(cascadeResult.model, cascadeResult.provider, taskType, cascadeTokensIn, cascadeTokensOut, cascadeCost, durationMs, true);
|
|
3132
3633
|
}
|
|
3133
3634
|
const chatCascadeRpHeaders = buildRelayPlaneResponseHeaders(cascadeResult.model, originalRequestedModel ?? 'unknown', complexity, cascadeResult.provider, 'cascade');
|
|
3134
3635
|
res.writeHead(200, { 'Content-Type': 'application/json', ...chatCascadeRpHeaders });
|
|
@@ -3152,6 +3653,74 @@ async function startProxy(config = {}) {
|
|
|
3152
3653
|
}
|
|
3153
3654
|
}
|
|
3154
3655
|
});
|
|
3656
|
+
// ── Health Watchdog ──
|
|
3657
|
+
let watchdogFailures = 0;
|
|
3658
|
+
const WATCHDOG_MAX_FAILURES = 3;
|
|
3659
|
+
const WATCHDOG_INTERVAL_MS = 15_000; // Must be < WatchdogSec (30s) to avoid false kills
|
|
3660
|
+
let watchdogTimer = null;
|
|
3661
|
+
/**
|
|
3662
|
+
* sd_notify: write to $NOTIFY_SOCKET for systemd watchdog integration
|
|
3663
|
+
*/
|
|
3664
|
+
function sdNotify(state) {
|
|
3665
|
+
const notifySocket = process.env['NOTIFY_SOCKET'];
|
|
3666
|
+
if (!notifySocket)
|
|
3667
|
+
return;
|
|
3668
|
+
try {
|
|
3669
|
+
const dgram = require('node:dgram');
|
|
3670
|
+
const client = dgram.createSocket('unix_dgram');
|
|
3671
|
+
const buf = Buffer.from(state);
|
|
3672
|
+
client.send(buf, 0, buf.length, notifySocket, () => {
|
|
3673
|
+
client.close();
|
|
3674
|
+
});
|
|
3675
|
+
}
|
|
3676
|
+
catch (err) {
|
|
3677
|
+
log(`sd_notify error: ${err}`);
|
|
3678
|
+
}
|
|
3679
|
+
}
|
|
3680
|
+
function startWatchdog() {
|
|
3681
|
+
// Notify systemd we're ready
|
|
3682
|
+
sdNotify('READY=1');
|
|
3683
|
+
watchdogTimer = setInterval(async () => {
|
|
3684
|
+
try {
|
|
3685
|
+
const controller = new AbortController();
|
|
3686
|
+
const timeout = setTimeout(() => controller.abort(), 5000);
|
|
3687
|
+
const res = await fetch(`http://${host}:${port}/health`, { signal: controller.signal });
|
|
3688
|
+
clearTimeout(timeout);
|
|
3689
|
+
if (res.ok) {
|
|
3690
|
+
watchdogFailures = 0;
|
|
3691
|
+
// Notify systemd watchdog we're alive
|
|
3692
|
+
sdNotify('WATCHDOG=1');
|
|
3693
|
+
}
|
|
3694
|
+
else {
|
|
3695
|
+
watchdogFailures++;
|
|
3696
|
+
console.error(`[RelayPlane] Watchdog: health check returned ${res.status} (failure ${watchdogFailures}/${WATCHDOG_MAX_FAILURES})`);
|
|
3697
|
+
}
|
|
3698
|
+
}
|
|
3699
|
+
catch (err) {
|
|
3700
|
+
watchdogFailures++;
|
|
3701
|
+
console.error(`[RelayPlane] Watchdog: health check failed (failure ${watchdogFailures}/${WATCHDOG_MAX_FAILURES}): ${err}`);
|
|
3702
|
+
}
|
|
3703
|
+
if (watchdogFailures >= WATCHDOG_MAX_FAILURES) {
|
|
3704
|
+
console.error('[RelayPlane] CRITICAL: 3 consecutive watchdog failures. Attempting graceful restart...');
|
|
3705
|
+
sdNotify('STOPPING=1');
|
|
3706
|
+
// Close server and exit — systemd Restart=always will restart us
|
|
3707
|
+
server.close(() => {
|
|
3708
|
+
process.exit(1);
|
|
3709
|
+
});
|
|
3710
|
+
// Force exit after 10s if graceful close hangs
|
|
3711
|
+
setTimeout(() => process.exit(1), 10_000).unref();
|
|
3712
|
+
}
|
|
3713
|
+
}, WATCHDOG_INTERVAL_MS);
|
|
3714
|
+
watchdogTimer.unref();
|
|
3715
|
+
}
|
|
3716
|
+
// Clean up watchdog on shutdown
|
|
3717
|
+
const origHandleShutdown = () => {
|
|
3718
|
+
if (watchdogTimer)
|
|
3719
|
+
clearInterval(watchdogTimer);
|
|
3720
|
+
sdNotify('STOPPING=1');
|
|
3721
|
+
};
|
|
3722
|
+
process.on('SIGINT', origHandleShutdown);
|
|
3723
|
+
process.on('SIGTERM', origHandleShutdown);
|
|
3155
3724
|
return new Promise((resolve, reject) => {
|
|
3156
3725
|
server.on('error', reject);
|
|
3157
3726
|
server.listen(port, host, () => {
|
|
@@ -3164,6 +3733,8 @@ async function startProxy(config = {}) {
|
|
|
3164
3733
|
console.log(` Models: relayplane:auto, relayplane:cost, relayplane:fast, relayplane:quality`);
|
|
3165
3734
|
console.log(` Auth: Passthrough for Anthropic, env vars for other providers`);
|
|
3166
3735
|
console.log(` Streaming: ✅ Enabled`);
|
|
3736
|
+
startWatchdog();
|
|
3737
|
+
log('Health watchdog started (30s interval, sd_notify enabled)');
|
|
3167
3738
|
resolve(server);
|
|
3168
3739
|
});
|
|
3169
3740
|
});
|
|
@@ -3221,7 +3792,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
|
|
|
3221
3792
|
}
|
|
3222
3793
|
return { responseData, ok: true, status: 200 };
|
|
3223
3794
|
}
|
|
3224
|
-
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
|
|
3795
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple', cacheHash, cacheBypass) {
|
|
3225
3796
|
let providerResponse;
|
|
3226
3797
|
try {
|
|
3227
3798
|
switch (targetProvider) {
|
|
@@ -3277,6 +3848,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3277
3848
|
// Track token usage from streaming events
|
|
3278
3849
|
let streamTokensIn = 0;
|
|
3279
3850
|
let streamTokensOut = 0;
|
|
3851
|
+
const shouldCacheStream = !!(cacheHash && !cacheBypass);
|
|
3852
|
+
const rawChunks = [];
|
|
3280
3853
|
try {
|
|
3281
3854
|
// Stream the response based on provider format
|
|
3282
3855
|
switch (targetProvider) {
|
|
@@ -3284,6 +3857,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3284
3857
|
// Convert Anthropic stream to OpenAI format
|
|
3285
3858
|
for await (const chunk of convertAnthropicStream(providerResponse, targetModel)) {
|
|
3286
3859
|
res.write(chunk);
|
|
3860
|
+
if (shouldCacheStream)
|
|
3861
|
+
rawChunks.push(chunk);
|
|
3287
3862
|
// Parse OpenAI-format chunks for usage (emitted at end of stream)
|
|
3288
3863
|
try {
|
|
3289
3864
|
const lines = chunk.split('\n');
|
|
@@ -3304,6 +3879,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3304
3879
|
// Convert Gemini stream to OpenAI format
|
|
3305
3880
|
for await (const chunk of convertGeminiStream(providerResponse, targetModel)) {
|
|
3306
3881
|
res.write(chunk);
|
|
3882
|
+
if (shouldCacheStream)
|
|
3883
|
+
rawChunks.push(chunk);
|
|
3307
3884
|
try {
|
|
3308
3885
|
const lines = chunk.split('\n');
|
|
3309
3886
|
for (const line of lines) {
|
|
@@ -3323,6 +3900,8 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3323
3900
|
// xAI, OpenRouter, DeepSeek, Groq, OpenAI all use OpenAI-compatible streaming format
|
|
3324
3901
|
for await (const chunk of pipeOpenAIStream(providerResponse)) {
|
|
3325
3902
|
res.write(chunk);
|
|
3903
|
+
if (shouldCacheStream)
|
|
3904
|
+
rawChunks.push(chunk);
|
|
3326
3905
|
try {
|
|
3327
3906
|
const lines = chunk.split('\n');
|
|
3328
3907
|
for (const line of lines) {
|
|
@@ -3342,6 +3921,23 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3342
3921
|
catch (err) {
|
|
3343
3922
|
log(`Streaming error: ${err}`);
|
|
3344
3923
|
}
|
|
3924
|
+
// ── Cache: store streaming response ──
|
|
3925
|
+
if (shouldCacheStream && cacheHash && rawChunks.length > 0) {
|
|
3926
|
+
const responseCache = (0, response_cache_js_1.getResponseCache)();
|
|
3927
|
+
const streamPayload = JSON.stringify({
|
|
3928
|
+
_relayplaneStreamCache: true,
|
|
3929
|
+
ssePayload: rawChunks.join(''),
|
|
3930
|
+
usage: { input_tokens: streamTokensIn, output_tokens: streamTokensOut, prompt_tokens: streamTokensIn, completion_tokens: streamTokensOut },
|
|
3931
|
+
});
|
|
3932
|
+
responseCache.set(cacheHash, streamPayload, {
|
|
3933
|
+
model: targetModel,
|
|
3934
|
+
tokensIn: streamTokensIn,
|
|
3935
|
+
tokensOut: streamTokensOut,
|
|
3936
|
+
costUsd: (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut),
|
|
3937
|
+
taskType,
|
|
3938
|
+
});
|
|
3939
|
+
log(`Cache STORE (stream) for chat/completions ${targetModel} (hash: ${cacheHash.slice(0, 8)})`);
|
|
3940
|
+
}
|
|
3345
3941
|
if (cooldownsEnabled) {
|
|
3346
3942
|
cooldownManager.recordSuccess(targetProvider);
|
|
3347
3943
|
}
|
|
@@ -3351,6 +3947,17 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3351
3947
|
// Update token/cost info on the history entry
|
|
3352
3948
|
const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
|
|
3353
3949
|
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
|
|
3950
|
+
// ── Post-request: budget spend + anomaly detection ──
|
|
3951
|
+
try {
|
|
3952
|
+
(0, budget_js_1.getBudgetManager)().recordSpend(streamCost, targetModel);
|
|
3953
|
+
const anomalyResult = (0, anomaly_js_1.getAnomalyDetector)().recordAndAnalyze({ model: targetModel, tokensIn: streamTokensIn, tokensOut: streamTokensOut, costUsd: streamCost });
|
|
3954
|
+
if (anomalyResult.detected) {
|
|
3955
|
+
for (const anomaly of anomalyResult.anomalies) {
|
|
3956
|
+
(0, alerts_js_1.getAlertManager)().fireAnomaly(anomaly);
|
|
3957
|
+
}
|
|
3958
|
+
}
|
|
3959
|
+
}
|
|
3960
|
+
catch { /* budget/anomaly should never block */ }
|
|
3354
3961
|
if (recordTelemetry) {
|
|
3355
3962
|
// Record the run (non-blocking)
|
|
3356
3963
|
relay
|
|
@@ -3366,6 +3973,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3366
3973
|
log(`Failed to record run: ${err}`);
|
|
3367
3974
|
});
|
|
3368
3975
|
sendCloudTelemetry(taskType, targetModel, streamTokensIn, streamTokensOut, durationMs, true, undefined, request.model ?? undefined);
|
|
3976
|
+
meshCapture(targetModel, targetProvider, taskType, streamTokensIn, streamTokensOut, streamCost, durationMs, true);
|
|
3369
3977
|
}
|
|
3370
3978
|
res.end();
|
|
3371
3979
|
}
|
|
@@ -3413,6 +4021,17 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
3413
4021
|
const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
|
|
3414
4022
|
const cost = (0, telemetry_js_1.estimateCost)(targetModel, tokensIn, tokensOut);
|
|
3415
4023
|
updateLastHistoryEntry(tokensIn, tokensOut, cost, nonStreamRespModel);
|
|
4024
|
+
// ── Post-request: budget spend + anomaly detection ──
|
|
4025
|
+
try {
|
|
4026
|
+
(0, budget_js_1.getBudgetManager)().recordSpend(cost, targetModel);
|
|
4027
|
+
const anomalyResult = (0, anomaly_js_1.getAnomalyDetector)().recordAndAnalyze({ model: targetModel, tokensIn, tokensOut, costUsd: cost });
|
|
4028
|
+
if (anomalyResult.detected) {
|
|
4029
|
+
for (const anomaly of anomalyResult.anomalies) {
|
|
4030
|
+
(0, alerts_js_1.getAlertManager)().fireAnomaly(anomaly);
|
|
4031
|
+
}
|
|
4032
|
+
}
|
|
4033
|
+
}
|
|
4034
|
+
catch { /* budget/anomaly should never block */ }
|
|
3416
4035
|
if (recordTelemetry) {
|
|
3417
4036
|
// Record the run in RelayPlane
|
|
3418
4037
|
try {
|
|
@@ -3440,10 +4059,27 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
3440
4059
|
const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|
|
3441
4060
|
const tokensOut = usage?.output_tokens ?? usage?.completion_tokens ?? 0;
|
|
3442
4061
|
sendCloudTelemetry(taskType, targetModel, tokensIn, tokensOut, durationMs, true);
|
|
4062
|
+
meshCapture(targetModel, targetProvider, taskType, tokensIn, tokensOut, cost, durationMs, true);
|
|
4063
|
+
}
|
|
4064
|
+
// ── Cache: store non-streaming chat/completions response ──
|
|
4065
|
+
const chatRespCache = (0, response_cache_js_1.getResponseCache)();
|
|
4066
|
+
const chatReqAsRecord = request;
|
|
4067
|
+
const chatCacheBypassLocal = chatRespCache.shouldBypass(chatReqAsRecord);
|
|
4068
|
+
let chatCacheHeaderVal = chatCacheBypassLocal ? 'BYPASS' : 'MISS';
|
|
4069
|
+
if (!chatCacheBypassLocal) {
|
|
4070
|
+
const chatHashLocal = chatRespCache.computeKey(chatReqAsRecord);
|
|
4071
|
+
chatRespCache.set(chatHashLocal, JSON.stringify(responseData), {
|
|
4072
|
+
model: targetModel,
|
|
4073
|
+
tokensIn: tokensIn,
|
|
4074
|
+
tokensOut: tokensOut,
|
|
4075
|
+
costUsd: cost,
|
|
4076
|
+
taskType,
|
|
4077
|
+
});
|
|
4078
|
+
log(`Cache STORE for chat/completions ${targetModel} (hash: ${chatHashLocal.slice(0, 8)})`);
|
|
3443
4079
|
}
|
|
3444
4080
|
// Send response with RelayPlane routing headers
|
|
3445
4081
|
const nonStreamRpHeaders = buildRelayPlaneResponseHeaders(targetModel, request.model ?? 'unknown', complexity, targetProvider, routingMode);
|
|
3446
|
-
res.writeHead(200, { 'Content-Type': 'application/json', ...nonStreamRpHeaders });
|
|
4082
|
+
res.writeHead(200, { 'Content-Type': 'application/json', 'X-RelayPlane-Cache': chatCacheHeaderVal, ...nonStreamRpHeaders });
|
|
3447
4083
|
res.end(JSON.stringify(responseData));
|
|
3448
4084
|
}
|
|
3449
4085
|
// Note: CLI entry point is in cli.ts
|