@relayplane/proxy 1.5.2 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +255 -21
- package/dist/cli.js +257 -2
- package/dist/cli.js.map +1 -1
- package/dist/server.js +2 -2
- package/dist/server.js.map +1 -1
- package/dist/standalone-proxy.d.ts.map +1 -1
- package/dist/standalone-proxy.js +86 -31
- package/dist/standalone-proxy.js.map +1 -1
- package/dist/telemetry.d.ts.map +1 -1
- package/dist/telemetry.js +11 -0
- package/dist/telemetry.js.map +1 -1
- package/package.json +3 -6
- package/dist/__tests__/model-suggestions.test.d.ts +0 -2
- package/dist/__tests__/model-suggestions.test.d.ts.map +0 -1
- package/dist/__tests__/model-suggestions.test.js +0 -67
- package/dist/__tests__/model-suggestions.test.js.map +0 -1
- package/dist/__tests__/routing-aliases.test.d.ts +0 -2
- package/dist/__tests__/routing-aliases.test.d.ts.map +0 -1
- package/dist/__tests__/routing-aliases.test.js +0 -81
- package/dist/__tests__/routing-aliases.test.js.map +0 -1
package/dist/standalone-proxy.js
CHANGED
|
@@ -418,7 +418,7 @@ function shutdownHistory() {
|
|
|
418
418
|
}
|
|
419
419
|
flushHistoryBuffer();
|
|
420
420
|
}
|
|
421
|
-
function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated) {
|
|
421
|
+
function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity) {
|
|
422
422
|
const timestamp = new Date().toISOString();
|
|
423
423
|
const status = success ? '✓' : '✗';
|
|
424
424
|
const escalateTag = escalated ? ' [ESCALATED]' : '';
|
|
@@ -459,6 +459,8 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
|
|
|
459
459
|
tokensIn: 0,
|
|
460
460
|
tokensOut: 0,
|
|
461
461
|
costUsd: 0,
|
|
462
|
+
taskType: taskType || 'general',
|
|
463
|
+
complexity: complexity || 'simple',
|
|
462
464
|
};
|
|
463
465
|
requestHistory.push(entry);
|
|
464
466
|
if (requestHistory.length > MAX_HISTORY) {
|
|
@@ -1111,6 +1113,15 @@ async function forwardToGemini(request, targetModel, apiKey) {
|
|
|
1111
1113
|
parts: [{ text: systemMessage.content }],
|
|
1112
1114
|
};
|
|
1113
1115
|
}
|
|
1116
|
+
if (request.tools && request.tools.length > 0) {
|
|
1117
|
+
geminiBody["tools"] = [{
|
|
1118
|
+
functionDeclarations: request.tools.map((t) => ({
|
|
1119
|
+
name: t.function.name,
|
|
1120
|
+
description: t.function.description || "",
|
|
1121
|
+
parameters: t.function.parameters || {}
|
|
1122
|
+
}))
|
|
1123
|
+
}];
|
|
1124
|
+
}
|
|
1114
1125
|
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:generateContent?key=${apiKey}`, {
|
|
1115
1126
|
method: 'POST',
|
|
1116
1127
|
headers: {
|
|
@@ -1140,6 +1151,15 @@ async function forwardToGeminiStream(request, targetModel, apiKey) {
|
|
|
1140
1151
|
parts: [{ text: systemMessage.content }],
|
|
1141
1152
|
};
|
|
1142
1153
|
}
|
|
1154
|
+
if (request.tools && request.tools.length > 0) {
|
|
1155
|
+
geminiBody["tools"] = [{
|
|
1156
|
+
functionDeclarations: request.tools.map((t) => ({
|
|
1157
|
+
name: t.function.name,
|
|
1158
|
+
description: t.function.description || "",
|
|
1159
|
+
parameters: t.function.parameters || {}
|
|
1160
|
+
}))
|
|
1161
|
+
}];
|
|
1162
|
+
}
|
|
1143
1163
|
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:streamGenerateContent?alt=sse&key=${apiKey}`, {
|
|
1144
1164
|
method: 'POST',
|
|
1145
1165
|
headers: {
|
|
@@ -1154,14 +1174,30 @@ async function forwardToGeminiStream(request, targetModel, apiKey) {
|
|
|
1154
1174
|
*/
|
|
1155
1175
|
function convertGeminiResponse(geminiData, model) {
|
|
1156
1176
|
const candidate = geminiData.candidates?.[0];
|
|
1157
|
-
const
|
|
1177
|
+
const parts = candidate?.content?.parts ?? [];
|
|
1178
|
+
const text = parts.map((p) => p.text ?? '').join('');
|
|
1179
|
+
const functionCalls = parts.filter((p) => p.functionCall);
|
|
1158
1180
|
let finishReason = 'stop';
|
|
1159
|
-
if (
|
|
1181
|
+
if (functionCalls.length > 0) {
|
|
1182
|
+
finishReason = 'tool_calls';
|
|
1183
|
+
}
|
|
1184
|
+
else if (candidate?.finishReason === 'MAX_TOKENS') {
|
|
1160
1185
|
finishReason = 'length';
|
|
1161
1186
|
}
|
|
1162
1187
|
else if (candidate?.finishReason === 'SAFETY') {
|
|
1163
1188
|
finishReason = 'content_filter';
|
|
1164
1189
|
}
|
|
1190
|
+
const message = { role: 'assistant', content: text || null };
|
|
1191
|
+
if (functionCalls.length > 0) {
|
|
1192
|
+
message['tool_calls'] = functionCalls.map((p, i) => ({
|
|
1193
|
+
id: `call_${Date.now()}_${i}`,
|
|
1194
|
+
type: 'function',
|
|
1195
|
+
function: {
|
|
1196
|
+
name: p.functionCall.name,
|
|
1197
|
+
arguments: JSON.stringify(p.functionCall.args || {})
|
|
1198
|
+
}
|
|
1199
|
+
}));
|
|
1200
|
+
}
|
|
1165
1201
|
return {
|
|
1166
1202
|
id: `chatcmpl-${Date.now()}`,
|
|
1167
1203
|
object: 'chat.completion',
|
|
@@ -1170,10 +1206,7 @@ function convertGeminiResponse(geminiData, model) {
|
|
|
1170
1206
|
choices: [
|
|
1171
1207
|
{
|
|
1172
1208
|
index: 0,
|
|
1173
|
-
message
|
|
1174
|
-
role: 'assistant',
|
|
1175
|
-
content: text,
|
|
1176
|
-
},
|
|
1209
|
+
message,
|
|
1177
1210
|
finish_reason: finishReason,
|
|
1178
1211
|
},
|
|
1179
1212
|
],
|
|
@@ -1190,20 +1223,36 @@ function convertGeminiResponse(geminiData, model) {
|
|
|
1190
1223
|
*/
|
|
1191
1224
|
function convertGeminiStreamEvent(eventData, messageId, model, isFirst) {
|
|
1192
1225
|
const candidate = eventData.candidates?.[0];
|
|
1193
|
-
const
|
|
1226
|
+
const parts = candidate?.content?.parts ?? [];
|
|
1227
|
+
const text = parts.map((p) => p.text ?? '').join('');
|
|
1228
|
+
const functionCalls = parts.filter((p) => p.functionCall);
|
|
1194
1229
|
const choice = {
|
|
1195
1230
|
index: 0,
|
|
1196
1231
|
delta: {},
|
|
1197
1232
|
finish_reason: null,
|
|
1198
1233
|
};
|
|
1234
|
+
const delta = {};
|
|
1199
1235
|
if (isFirst) {
|
|
1200
|
-
|
|
1236
|
+
delta['role'] = 'assistant';
|
|
1237
|
+
}
|
|
1238
|
+
if (text) {
|
|
1239
|
+
delta['content'] = text;
|
|
1201
1240
|
}
|
|
1202
|
-
|
|
1203
|
-
|
|
1241
|
+
if (functionCalls.length > 0) {
|
|
1242
|
+
delta['tool_calls'] = functionCalls.map((p, i) => ({
|
|
1243
|
+
index: i,
|
|
1244
|
+
id: `call_${messageId}_${i}`,
|
|
1245
|
+
type: 'function',
|
|
1246
|
+
function: {
|
|
1247
|
+
name: p.functionCall.name,
|
|
1248
|
+
arguments: JSON.stringify(p.functionCall.args || {})
|
|
1249
|
+
}
|
|
1250
|
+
}));
|
|
1251
|
+
choice['finish_reason'] = 'tool_calls';
|
|
1204
1252
|
}
|
|
1253
|
+
choice['delta'] = delta;
|
|
1205
1254
|
// Check for finish
|
|
1206
|
-
if (candidate?.finishReason) {
|
|
1255
|
+
if (candidate?.finishReason && choice['finish_reason'] === null) {
|
|
1207
1256
|
let finishReason = 'stop';
|
|
1208
1257
|
if (candidate.finishReason === 'MAX_TOKENS') {
|
|
1209
1258
|
finishReason = 'length';
|
|
@@ -1773,6 +1822,9 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
|
1773
1822
|
.dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.down{background:#ef4444}
|
|
1774
1823
|
.badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
|
|
1775
1824
|
.badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
|
|
1825
|
+
.badge.tt-code{background:#1e3a5f;color:#60a5fa}.badge.tt-analysis{background:#3b1f6e;color:#a78bfa}.badge.tt-summarization{background:#1a3a2a;color:#6ee7b7}.badge.tt-qa{background:#3a2f1e;color:#fbbf24}.badge.tt-general{background:#1e293b;color:#94a3b8}
|
|
1826
|
+
.badge.cx-simple{background:#052e1633;color:#34d399}.badge.cx-moderate{background:#2d2a0a;color:#fbbf24}.badge.cx-complex{background:#2d0a0a;color:#ef4444}
|
|
1827
|
+
@media(max-width:768px){.col-tt,.col-cx{display:none}}
|
|
1776
1828
|
.prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
|
|
1777
1829
|
</style></head><body>
|
|
1778
1830
|
<div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><span id="ver"></span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
|
|
@@ -1786,7 +1838,7 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
|
|
|
1786
1838
|
<table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
|
|
1787
1839
|
<div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
|
|
1788
1840
|
<div class="section"><h2>Recent Runs</h2>
|
|
1789
|
-
<table><thead><tr><th>Time</th><th>Model</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
1841
|
+
<table><thead><tr><th>Time</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
|
|
1790
1842
|
<script>
|
|
1791
1843
|
const $ = id => document.getElementById(id);
|
|
1792
1844
|
function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
|
|
@@ -1811,9 +1863,11 @@ async function load(){
|
|
|
1811
1863
|
$('models').innerHTML=(stats.byModel||[]).map(m=>
|
|
1812
1864
|
'<tr><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(m.count/total*100,1)+'%</td></tr>'
|
|
1813
1865
|
).join('')||'<tr><td colspan=4 style="color:#64748b">No data yet</td></tr>';
|
|
1866
|
+
function ttCls(t){const m={code_generation:'tt-code',analysis:'tt-analysis',summarization:'tt-summarization',question_answering:'tt-qa'};return m[t]||'tt-general'}
|
|
1867
|
+
function cxCls(c){const m={simple:'cx-simple',moderate:'cx-moderate',complex:'cx-complex'};return m[c]||'cx-simple'}
|
|
1814
1868
|
$('runs').innerHTML=(runsR.runs||[]).map(r=>
|
|
1815
|
-
'<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
|
|
1816
|
-
).join('')||'<tr><td colspan=
|
|
1869
|
+
'<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td class="col-tt"><span class="badge '+ttCls(r.taskType)+'">'+(r.taskType||'general').replace(/_/g,' ')+'</span></td><td class="col-cx"><span class="badge '+cxCls(r.complexity)+'">'+(r.complexity||'simple')+'</span></td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
|
|
1870
|
+
).join('')||'<tr><td colspan=9 style="color:#64748b">No runs yet</td></tr>';
|
|
1817
1871
|
$('providers').innerHTML=(provH.providers||[]).map(p=>
|
|
1818
1872
|
'<div class="prov-item"><span class="dot '+(p.status==='healthy'?'up':'down')+'"></span>'+p.provider+'</div>'
|
|
1819
1873
|
).join('');
|
|
@@ -2036,7 +2090,8 @@ async function startProxy(config = {}) {
|
|
|
2036
2090
|
started_at: r.timestamp,
|
|
2037
2091
|
model: r.targetModel,
|
|
2038
2092
|
routed_to: `${r.provider}/${r.targetModel}`,
|
|
2039
|
-
taskType: r.
|
|
2093
|
+
taskType: r.taskType || 'general',
|
|
2094
|
+
complexity: r.complexity || 'simple',
|
|
2040
2095
|
costUsd: r.costUsd,
|
|
2041
2096
|
latencyMs: r.latencyMs,
|
|
2042
2097
|
tokensIn: r.tokensIn,
|
|
@@ -2366,7 +2421,7 @@ async function startProxy(config = {}) {
|
|
|
2366
2421
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
|
|
2367
2422
|
}
|
|
2368
2423
|
const durationMs = Date.now() - startTime;
|
|
2369
|
-
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode);
|
|
2424
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
|
|
2370
2425
|
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
2371
2426
|
res.end(JSON.stringify(errorPayload));
|
|
2372
2427
|
return;
|
|
@@ -2437,7 +2492,7 @@ async function startProxy(config = {}) {
|
|
|
2437
2492
|
}
|
|
2438
2493
|
}
|
|
2439
2494
|
const durationMs = Date.now() - startTime;
|
|
2440
|
-
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, true, routingMode, useCascade && cascadeConfig ? undefined : false);
|
|
2495
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, true, routingMode, useCascade && cascadeConfig ? undefined : false, taskType, complexity);
|
|
2441
2496
|
if (recordTelemetry) {
|
|
2442
2497
|
relay
|
|
2443
2498
|
.run({
|
|
@@ -2454,7 +2509,7 @@ async function startProxy(config = {}) {
|
|
|
2454
2509
|
}
|
|
2455
2510
|
catch (err) {
|
|
2456
2511
|
const durationMs = Date.now() - startTime;
|
|
2457
|
-
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode);
|
|
2512
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
|
|
2458
2513
|
if (err instanceof ProviderResponseError) {
|
|
2459
2514
|
res.writeHead(err.status, { 'Content-Type': 'application/json' });
|
|
2460
2515
|
res.end(JSON.stringify(err.payload));
|
|
@@ -2738,7 +2793,7 @@ async function startProxy(config = {}) {
|
|
|
2738
2793
|
const startTime = Date.now();
|
|
2739
2794
|
// Handle streaming vs non-streaming
|
|
2740
2795
|
if (isStreaming) {
|
|
2741
|
-
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled);
|
|
2796
|
+
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
|
|
2742
2797
|
}
|
|
2743
2798
|
else {
|
|
2744
2799
|
if (useCascade && cascadeConfig) {
|
|
@@ -2770,7 +2825,7 @@ async function startProxy(config = {}) {
|
|
|
2770
2825
|
const durationMs = Date.now() - startTime;
|
|
2771
2826
|
let responseData = cascadeResult.responseData;
|
|
2772
2827
|
// Log cascade request for stats tracking
|
|
2773
|
-
logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0);
|
|
2828
|
+
logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0, taskType, complexity);
|
|
2774
2829
|
const cascadeUsage = responseData?.usage;
|
|
2775
2830
|
const cascadeTokensIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
|
|
2776
2831
|
const cascadeTokensOut = cascadeUsage?.output_tokens ?? cascadeUsage?.completion_tokens ?? 0;
|
|
@@ -2804,7 +2859,7 @@ async function startProxy(config = {}) {
|
|
|
2804
2859
|
}
|
|
2805
2860
|
catch (err) {
|
|
2806
2861
|
const durationMs = Date.now() - startTime;
|
|
2807
|
-
logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade');
|
|
2862
|
+
logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity);
|
|
2808
2863
|
if (err instanceof ProviderResponseError) {
|
|
2809
2864
|
res.writeHead(err.status, { 'Content-Type': 'application/json' });
|
|
2810
2865
|
res.end(JSON.stringify(err.payload));
|
|
@@ -2816,7 +2871,7 @@ async function startProxy(config = {}) {
|
|
|
2816
2871
|
}
|
|
2817
2872
|
}
|
|
2818
2873
|
else {
|
|
2819
|
-
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled);
|
|
2874
|
+
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
|
|
2820
2875
|
}
|
|
2821
2876
|
}
|
|
2822
2877
|
});
|
|
@@ -2889,7 +2944,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
|
|
|
2889
2944
|
}
|
|
2890
2945
|
return { responseData, ok: true, status: 200 };
|
|
2891
2946
|
}
|
|
2892
|
-
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled) {
|
|
2947
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
|
|
2893
2948
|
let providerResponse;
|
|
2894
2949
|
try {
|
|
2895
2950
|
switch (targetProvider) {
|
|
@@ -2917,7 +2972,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2917
2972
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
|
|
2918
2973
|
}
|
|
2919
2974
|
const durationMs = Date.now() - startTime;
|
|
2920
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2975
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
|
|
2921
2976
|
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
2922
2977
|
res.end(JSON.stringify(errorData));
|
|
2923
2978
|
return;
|
|
@@ -2929,7 +2984,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2929
2984
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
2930
2985
|
}
|
|
2931
2986
|
const durationMs = Date.now() - startTime;
|
|
2932
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
2987
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
|
|
2933
2988
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
2934
2989
|
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2935
2990
|
return;
|
|
@@ -3013,7 +3068,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3013
3068
|
}
|
|
3014
3069
|
const durationMs = Date.now() - startTime;
|
|
3015
3070
|
// Always log the request for stats/telemetry tracking
|
|
3016
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
|
|
3071
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
|
|
3017
3072
|
// Update token/cost info on the history entry
|
|
3018
3073
|
const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
|
|
3019
3074
|
updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
|
|
@@ -3038,7 +3093,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
3038
3093
|
/**
|
|
3039
3094
|
* Handle non-streaming request
|
|
3040
3095
|
*/
|
|
3041
|
-
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled) {
|
|
3096
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
|
|
3042
3097
|
let responseData;
|
|
3043
3098
|
try {
|
|
3044
3099
|
const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
|
|
@@ -3048,7 +3103,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
3048
3103
|
cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
|
|
3049
3104
|
}
|
|
3050
3105
|
const durationMs = Date.now() - startTime;
|
|
3051
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
3106
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
|
|
3052
3107
|
res.writeHead(result.status, { 'Content-Type': 'application/json' });
|
|
3053
3108
|
res.end(JSON.stringify(responseData));
|
|
3054
3109
|
return;
|
|
@@ -3060,7 +3115,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
3060
3115
|
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
3061
3116
|
}
|
|
3062
3117
|
const durationMs = Date.now() - startTime;
|
|
3063
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
|
|
3118
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
|
|
3064
3119
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
3065
3120
|
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
3066
3121
|
return;
|
|
@@ -3070,7 +3125,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
3070
3125
|
}
|
|
3071
3126
|
const durationMs = Date.now() - startTime;
|
|
3072
3127
|
// Log the successful request
|
|
3073
|
-
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
|
|
3128
|
+
logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
|
|
3074
3129
|
// Update token/cost info
|
|
3075
3130
|
const usage = responseData?.usage;
|
|
3076
3131
|
const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;
|