@relayplane/proxy 1.5.3 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -418,7 +418,7 @@ function shutdownHistory() {
418
418
  }
419
419
  flushHistoryBuffer();
420
420
  }
421
- function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated) {
421
+ function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated, taskType, complexity) {
422
422
  const timestamp = new Date().toISOString();
423
423
  const status = success ? '✓' : '✗';
424
424
  const escalateTag = escalated ? ' [ESCALATED]' : '';
@@ -459,6 +459,8 @@ function logRequest(originalModel, targetModel, provider, latencyMs, success, mo
459
459
  tokensIn: 0,
460
460
  tokensOut: 0,
461
461
  costUsd: 0,
462
+ taskType: taskType || 'general',
463
+ complexity: complexity || 'simple',
462
464
  };
463
465
  requestHistory.push(entry);
464
466
  if (requestHistory.length > MAX_HISTORY) {
@@ -1111,6 +1113,15 @@ async function forwardToGemini(request, targetModel, apiKey) {
1111
1113
  parts: [{ text: systemMessage.content }],
1112
1114
  };
1113
1115
  }
1116
+ if (request.tools && request.tools.length > 0) {
1117
+ geminiBody["tools"] = [{
1118
+ functionDeclarations: request.tools.map((t) => ({
1119
+ name: t.function.name,
1120
+ description: t.function.description || "",
1121
+ parameters: t.function.parameters || {}
1122
+ }))
1123
+ }];
1124
+ }
1114
1125
  const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:generateContent?key=${apiKey}`, {
1115
1126
  method: 'POST',
1116
1127
  headers: {
@@ -1140,6 +1151,15 @@ async function forwardToGeminiStream(request, targetModel, apiKey) {
1140
1151
  parts: [{ text: systemMessage.content }],
1141
1152
  };
1142
1153
  }
1154
+ if (request.tools && request.tools.length > 0) {
1155
+ geminiBody["tools"] = [{
1156
+ functionDeclarations: request.tools.map((t) => ({
1157
+ name: t.function.name,
1158
+ description: t.function.description || "",
1159
+ parameters: t.function.parameters || {}
1160
+ }))
1161
+ }];
1162
+ }
1143
1163
  const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:streamGenerateContent?alt=sse&key=${apiKey}`, {
1144
1164
  method: 'POST',
1145
1165
  headers: {
@@ -1154,14 +1174,30 @@ async function forwardToGeminiStream(request, targetModel, apiKey) {
1154
1174
  */
1155
1175
  function convertGeminiResponse(geminiData, model) {
1156
1176
  const candidate = geminiData.candidates?.[0];
1157
- const text = candidate?.content?.parts?.map((p) => p.text ?? '').join('') ?? '';
1177
+ const parts = candidate?.content?.parts ?? [];
1178
+ const text = parts.map((p) => p.text ?? '').join('');
1179
+ const functionCalls = parts.filter((p) => p.functionCall);
1158
1180
  let finishReason = 'stop';
1159
- if (candidate?.finishReason === 'MAX_TOKENS') {
1181
+ if (functionCalls.length > 0) {
1182
+ finishReason = 'tool_calls';
1183
+ }
1184
+ else if (candidate?.finishReason === 'MAX_TOKENS') {
1160
1185
  finishReason = 'length';
1161
1186
  }
1162
1187
  else if (candidate?.finishReason === 'SAFETY') {
1163
1188
  finishReason = 'content_filter';
1164
1189
  }
1190
+ const message = { role: 'assistant', content: text || null };
1191
+ if (functionCalls.length > 0) {
1192
+ message['tool_calls'] = functionCalls.map((p, i) => ({
1193
+ id: `call_${Date.now()}_${i}`,
1194
+ type: 'function',
1195
+ function: {
1196
+ name: p.functionCall.name,
1197
+ arguments: JSON.stringify(p.functionCall.args || {})
1198
+ }
1199
+ }));
1200
+ }
1165
1201
  return {
1166
1202
  id: `chatcmpl-${Date.now()}`,
1167
1203
  object: 'chat.completion',
@@ -1170,10 +1206,7 @@ function convertGeminiResponse(geminiData, model) {
1170
1206
  choices: [
1171
1207
  {
1172
1208
  index: 0,
1173
- message: {
1174
- role: 'assistant',
1175
- content: text,
1176
- },
1209
+ message,
1177
1210
  finish_reason: finishReason,
1178
1211
  },
1179
1212
  ],
@@ -1190,20 +1223,36 @@ function convertGeminiResponse(geminiData, model) {
1190
1223
  */
1191
1224
  function convertGeminiStreamEvent(eventData, messageId, model, isFirst) {
1192
1225
  const candidate = eventData.candidates?.[0];
1193
- const text = candidate?.content?.parts?.map((p) => p.text ?? '').join('') ?? '';
1226
+ const parts = candidate?.content?.parts ?? [];
1227
+ const text = parts.map((p) => p.text ?? '').join('');
1228
+ const functionCalls = parts.filter((p) => p.functionCall);
1194
1229
  const choice = {
1195
1230
  index: 0,
1196
1231
  delta: {},
1197
1232
  finish_reason: null,
1198
1233
  };
1234
+ const delta = {};
1199
1235
  if (isFirst) {
1200
- choice['delta'] = { role: 'assistant', content: text };
1236
+ delta['role'] = 'assistant';
1237
+ }
1238
+ if (text) {
1239
+ delta['content'] = text;
1201
1240
  }
1202
- else if (text) {
1203
- choice['delta'] = { content: text };
1241
+ if (functionCalls.length > 0) {
1242
+ delta['tool_calls'] = functionCalls.map((p, i) => ({
1243
+ index: i,
1244
+ id: `call_${messageId}_${i}`,
1245
+ type: 'function',
1246
+ function: {
1247
+ name: p.functionCall.name,
1248
+ arguments: JSON.stringify(p.functionCall.args || {})
1249
+ }
1250
+ }));
1251
+ choice['finish_reason'] = 'tool_calls';
1204
1252
  }
1253
+ choice['delta'] = delta;
1205
1254
  // Check for finish
1206
- if (candidate?.finishReason) {
1255
+ if (candidate?.finishReason && choice['finish_reason'] === null) {
1207
1256
  let finishReason = 'stop';
1208
1257
  if (candidate.finishReason === 'MAX_TOKENS') {
1209
1258
  finishReason = 'length';
@@ -1773,6 +1822,9 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
1773
1822
  .dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px}.dot.up{background:#34d399}.dot.down{background:#ef4444}
1774
1823
  .badge{display:inline-block;padding:2px 8px;border-radius:6px;font-size:.75rem;font-weight:500}
1775
1824
  .badge.ok{background:#052e1633;color:#34d399}.badge.err{background:#2d0a0a;color:#ef4444}
1825
+ .badge.tt-code{background:#1e3a5f;color:#60a5fa}.badge.tt-analysis{background:#3b1f6e;color:#a78bfa}.badge.tt-summarization{background:#1a3a2a;color:#6ee7b7}.badge.tt-qa{background:#3a2f1e;color:#fbbf24}.badge.tt-general{background:#1e293b;color:#94a3b8}
1826
+ .badge.cx-simple{background:#052e1633;color:#34d399}.badge.cx-moderate{background:#2d2a0a;color:#fbbf24}.badge.cx-complex{background:#2d0a0a;color:#ef4444}
1827
+ @media(max-width:768px){.col-tt,.col-cx{display:none}}
1776
1828
  .prov{display:flex;gap:16px;flex-wrap:wrap}.prov-item{display:flex;align-items:center;font-size:.85rem;background:#111318;padding:8px 14px;border-radius:8px;border:1px solid #1e293b}
1777
1829
  </style></head><body>
1778
1830
  <div class="header"><div><h1>⚡ RelayPlane Dashboard</h1></div><div class="meta"><span id="ver"></span> · up <span id="uptime"></span> · refreshes every 5s</div></div>
@@ -1786,7 +1838,7 @@ td{padding:8px 12px;border-bottom:1px solid #111318}
1786
1838
  <table><thead><tr><th>Model</th><th>Requests</th><th>Cost</th><th>% of Total</th></tr></thead><tbody id="models"></tbody></table></div>
1787
1839
  <div class="section"><h2>Provider Status</h2><div class="prov" id="providers"></div></div>
1788
1840
  <div class="section"><h2>Recent Runs</h2>
1789
- <table><thead><tr><th>Time</th><th>Model</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
1841
+ <table><thead><tr><th>Time</th><th>Model</th><th class="col-tt">Task Type</th><th class="col-cx">Complexity</th><th>Tokens In</th><th>Tokens Out</th><th>Cost</th><th>Latency</th><th>Status</th></tr></thead><tbody id="runs"></tbody></table></div>
1790
1842
  <script>
1791
1843
  const $ = id => document.getElementById(id);
1792
1844
  function fmt(n,d=2){return typeof n==='number'?n.toFixed(d):'-'}
@@ -1811,9 +1863,11 @@ async function load(){
1811
1863
  $('models').innerHTML=(stats.byModel||[]).map(m=>
1812
1864
  '<tr><td>'+m.model+'</td><td>'+m.count+'</td><td>$'+fmt(m.costUsd,4)+'</td><td>'+fmt(m.count/total*100,1)+'%</td></tr>'
1813
1865
  ).join('')||'<tr><td colspan=4 style="color:#64748b">No data yet</td></tr>';
1866
+ function ttCls(t){const m={code_generation:'tt-code',analysis:'tt-analysis',summarization:'tt-summarization',question_answering:'tt-qa'};return m[t]||'tt-general'}
1867
+ function cxCls(c){const m={simple:'cx-simple',moderate:'cx-moderate',complex:'cx-complex'};return m[c]||'cx-simple'}
1814
1868
  $('runs').innerHTML=(runsR.runs||[]).map(r=>
1815
- '<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
1816
- ).join('')||'<tr><td colspan=7 style="color:#64748b">No runs yet</td></tr>';
1869
+ '<tr><td>'+fmtTime(r.started_at)+'</td><td>'+r.model+'</td><td class="col-tt"><span class="badge '+ttCls(r.taskType)+'">'+(r.taskType||'general').replace(/_/g,' ')+'</span></td><td class="col-cx"><span class="badge '+cxCls(r.complexity)+'">'+(r.complexity||'simple')+'</span></td><td>'+(r.tokensIn||0)+'</td><td>'+(r.tokensOut||0)+'</td><td>$'+fmt(r.costUsd,4)+'</td><td>'+r.latencyMs+'ms</td><td><span class="badge '+(r.status==='success'?'ok':'err')+'">'+r.status+'</span></td></tr>'
1870
+ ).join('')||'<tr><td colspan=9 style="color:#64748b">No runs yet</td></tr>';
1817
1871
  $('providers').innerHTML=(provH.providers||[]).map(p=>
1818
1872
  '<div class="prov-item"><span class="dot '+(p.status==='healthy'?'up':'down')+'"></span>'+p.provider+'</div>'
1819
1873
  ).join('');
@@ -2036,7 +2090,8 @@ async function startProxy(config = {}) {
2036
2090
  started_at: r.timestamp,
2037
2091
  model: r.targetModel,
2038
2092
  routed_to: `${r.provider}/${r.targetModel}`,
2039
- taskType: r.mode,
2093
+ taskType: r.taskType || 'general',
2094
+ complexity: r.complexity || 'simple',
2040
2095
  costUsd: r.costUsd,
2041
2096
  latencyMs: r.latencyMs,
2042
2097
  tokensIn: r.tokensIn,
@@ -2366,7 +2421,7 @@ async function startProxy(config = {}) {
2366
2421
  cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
2367
2422
  }
2368
2423
  const durationMs = Date.now() - startTime;
2369
- logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode);
2424
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
2370
2425
  res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
2371
2426
  res.end(JSON.stringify(errorPayload));
2372
2427
  return;
@@ -2437,7 +2492,7 @@ async function startProxy(config = {}) {
2437
2492
  }
2438
2493
  }
2439
2494
  const durationMs = Date.now() - startTime;
2440
- logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, true, routingMode, useCascade && cascadeConfig ? undefined : false);
2495
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, true, routingMode, useCascade && cascadeConfig ? undefined : false, taskType, complexity);
2441
2496
  if (recordTelemetry) {
2442
2497
  relay
2443
2498
  .run({
@@ -2454,7 +2509,7 @@ async function startProxy(config = {}) {
2454
2509
  }
2455
2510
  catch (err) {
2456
2511
  const durationMs = Date.now() - startTime;
2457
- logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode);
2512
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
2458
2513
  if (err instanceof ProviderResponseError) {
2459
2514
  res.writeHead(err.status, { 'Content-Type': 'application/json' });
2460
2515
  res.end(JSON.stringify(err.payload));
@@ -2738,7 +2793,7 @@ async function startProxy(config = {}) {
2738
2793
  const startTime = Date.now();
2739
2794
  // Handle streaming vs non-streaming
2740
2795
  if (isStreaming) {
2741
- await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled);
2796
+ await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
2742
2797
  }
2743
2798
  else {
2744
2799
  if (useCascade && cascadeConfig) {
@@ -2770,7 +2825,7 @@ async function startProxy(config = {}) {
2770
2825
  const durationMs = Date.now() - startTime;
2771
2826
  let responseData = cascadeResult.responseData;
2772
2827
  // Log cascade request for stats tracking
2773
- logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0);
2828
+ logRequest(originalRequestedModel ?? 'unknown', cascadeResult.model, cascadeResult.provider, durationMs, true, 'cascade', cascadeResult.escalations > 0, taskType, complexity);
2774
2829
  const cascadeUsage = responseData?.usage;
2775
2830
  const cascadeTokensIn = cascadeUsage?.input_tokens ?? cascadeUsage?.prompt_tokens ?? 0;
2776
2831
  const cascadeTokensOut = cascadeUsage?.output_tokens ?? cascadeUsage?.completion_tokens ?? 0;
@@ -2804,7 +2859,7 @@ async function startProxy(config = {}) {
2804
2859
  }
2805
2860
  catch (err) {
2806
2861
  const durationMs = Date.now() - startTime;
2807
- logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade');
2862
+ logRequest(originalRequestedModel ?? 'unknown', targetModel || 'unknown', targetProvider, durationMs, false, 'cascade', undefined, taskType, complexity);
2808
2863
  if (err instanceof ProviderResponseError) {
2809
2864
  res.writeHead(err.status, { 'Content-Type': 'application/json' });
2810
2865
  res.end(JSON.stringify(err.payload));
@@ -2816,7 +2871,7 @@ async function startProxy(config = {}) {
2816
2871
  }
2817
2872
  }
2818
2873
  else {
2819
- await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled);
2874
+ await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity);
2820
2875
  }
2821
2876
  }
2822
2877
  });
@@ -2889,7 +2944,7 @@ async function executeNonStreamingProviderRequest(request, targetProvider, targe
2889
2944
  }
2890
2945
  return { responseData, ok: true, status: 200 };
2891
2946
  }
2892
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled) {
2947
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
2893
2948
  let providerResponse;
2894
2949
  try {
2895
2950
  switch (targetProvider) {
@@ -2917,7 +2972,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2917
2972
  cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
2918
2973
  }
2919
2974
  const durationMs = Date.now() - startTime;
2920
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2975
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
2921
2976
  res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
2922
2977
  res.end(JSON.stringify(errorData));
2923
2978
  return;
@@ -2929,7 +2984,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2929
2984
  cooldownManager.recordFailure(targetProvider, errorMsg);
2930
2985
  }
2931
2986
  const durationMs = Date.now() - startTime;
2932
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
2987
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
2933
2988
  res.writeHead(500, { 'Content-Type': 'application/json' });
2934
2989
  res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2935
2990
  return;
@@ -3013,7 +3068,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3013
3068
  }
3014
3069
  const durationMs = Date.now() - startTime;
3015
3070
  // Always log the request for stats/telemetry tracking
3016
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
3071
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
3017
3072
  // Update token/cost info on the history entry
3018
3073
  const streamCost = (0, telemetry_js_1.estimateCost)(targetModel, streamTokensIn, streamTokensOut);
3019
3074
  updateLastHistoryEntry(streamTokensIn, streamTokensOut, streamCost);
@@ -3038,7 +3093,7 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
3038
3093
  /**
3039
3094
  * Handle non-streaming request
3040
3095
  */
3041
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled) {
3096
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled, complexity = 'simple') {
3042
3097
  let responseData;
3043
3098
  try {
3044
3099
  const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
@@ -3048,7 +3103,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
3048
3103
  cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
3049
3104
  }
3050
3105
  const durationMs = Date.now() - startTime;
3051
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
3106
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
3052
3107
  res.writeHead(result.status, { 'Content-Type': 'application/json' });
3053
3108
  res.end(JSON.stringify(responseData));
3054
3109
  return;
@@ -3060,7 +3115,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
3060
3115
  cooldownManager.recordFailure(targetProvider, errorMsg);
3061
3116
  }
3062
3117
  const durationMs = Date.now() - startTime;
3063
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode);
3118
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, false, routingMode, undefined, taskType, complexity);
3064
3119
  res.writeHead(500, { 'Content-Type': 'application/json' });
3065
3120
  res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
3066
3121
  return;
@@ -3070,7 +3125,7 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
3070
3125
  }
3071
3126
  const durationMs = Date.now() - startTime;
3072
3127
  // Log the successful request
3073
- logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode);
3128
+ logRequest(request.model ?? 'unknown', targetModel, targetProvider, durationMs, true, routingMode, undefined, taskType, complexity);
3074
3129
  // Update token/cost info
3075
3130
  const usage = responseData?.usage;
3076
3131
  const tokensIn = usage?.input_tokens ?? usage?.prompt_tokens ?? 0;