@khanglvm/llm-router 2.6.1 → 2.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.6.1",
3
+ "version": "2.6.2",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -66,6 +66,7 @@ import {
66
66
  sanitizeConfigForDisplay,
67
67
  validateRuntimeConfig
68
68
  } from "../runtime/config.js";
69
+ import { normalizeQuotaProbeConfig } from "../runtime/quota-probe.js";
69
70
  import {
70
71
  CODEX_SUBSCRIPTION_MODELS,
71
72
  CLAUDE_CODE_SUBSCRIPTION_MODELS
@@ -8041,6 +8042,234 @@ async function doSetProviderRateLimits(context) {
8041
8042
  };
8042
8043
  }
8043
8044
 
8045
+ function parseProbeHeaders(raw) {
8046
+ if (!raw) return undefined;
8047
+ const str = String(raw).trim();
8048
+ if (!str) return undefined;
8049
+ try {
8050
+ const parsed = JSON.parse(str);
8051
+ if (Array.isArray(parsed)) return parsed;
8052
+ if (typeof parsed === "object" && parsed !== null) {
8053
+ return Object.entries(parsed).map(([key, value]) => ({ key, value: String(value) }));
8054
+ }
8055
+ } catch {
8056
+ // not JSON — ignore
8057
+ }
8058
+ return undefined;
8059
+ }
8060
+
8061
+ function parseProbeMapping(raw) {
8062
+ if (!raw) return undefined;
8063
+ const str = String(raw).trim();
8064
+ if (!str) return undefined;
8065
+ try {
8066
+ return JSON.parse(str);
8067
+ } catch {
8068
+ return undefined;
8069
+ }
8070
+ }
8071
+
8072
+ function buildMappingFieldEntry(pathStr, coerceAs) {
8073
+ if (!pathStr) return undefined;
8074
+ return { path: String(pathStr).trim(), as: coerceAs || "number" };
8075
+ }
8076
+
8077
+ export function setProviderQuotaProbeInConfig(config, { providerId, quotaProbe }) {
8078
+ const next = structuredClone(config);
8079
+ const normalizedProviderId = String(providerId || "").trim();
8080
+ if (!normalizedProviderId) {
8081
+ return { config: next, changed: false, reason: "provider-id is required." };
8082
+ }
8083
+ const provider = (next.providers || []).find((item) => item.id === normalizedProviderId);
8084
+ if (!provider) {
8085
+ return { config: next, changed: false, reason: `Provider '${normalizedProviderId}' not found.` };
8086
+ }
8087
+ const previous = provider.quotaProbe || null;
8088
+ provider.quotaProbe = quotaProbe;
8089
+ const validationErrors = findIntroducedConfigValidationErrors(config, next);
8090
+ if (validationErrors.length > 0) {
8091
+ return { config, changed: false, reason: formatConfigValidationError(validationErrors) };
8092
+ }
8093
+ return {
8094
+ config: next,
8095
+ changed: serializeStable(previous) !== serializeStable(provider.quotaProbe),
8096
+ reason: "",
8097
+ providerId: normalizedProviderId,
8098
+ quotaProbe: provider.quotaProbe
8099
+ };
8100
+ }
8101
+
8102
+ function buildQuotaProbeReport(providerId, probe) {
8103
+ if (!probe) return `Provider '${providerId}': quota probe disabled.`;
8104
+ const lines = [`Provider '${providerId}': quota probe configured.`];
8105
+ lines.push(` enabled: ${probe.enabled}`);
8106
+ lines.push(` mode: ${probe.mode}`);
8107
+ lines.push(` capKind: ${probe.capKind}`);
8108
+ lines.push(` combinator: ${probe.combinator}`);
8109
+ lines.push(` enforce: ${probe.enforce}`);
8110
+ if (probe.safetyMargin) {
8111
+ lines.push(` margin: $${probe.safetyMargin.dollars} or ${probe.safetyMargin.percent}%`);
8112
+ }
8113
+ if (probe.mode === "http" && probe.http) {
8114
+ lines.push(` url: ${probe.http.method} ${probe.http.url}`);
8115
+ lines.push(` timeout: ${probe.http.timeoutMs}ms`);
8116
+ if (probe.http.headers?.length) {
8117
+ lines.push(` headers: ${probe.http.headers.map((h) => h.key).join(", ")}`);
8118
+ }
8119
+ }
8120
+ if (probe.mode === "custom" && probe.custom) {
8121
+ lines.push(` timeout: ${probe.custom.timeoutMs}ms`);
8122
+ lines.push(` source: ${probe.custom.source.length} chars`);
8123
+ }
8124
+ return lines.join("\n");
8125
+ }
8126
+
8127
+ async function doSetQuotaProbe(context) {
8128
+ const args = context.args || {};
8129
+ const configPath = readArg(args, ["config", "configPath"], getDefaultConfigPath());
8130
+ const config = await readConfigFile(configPath);
8131
+ const providerId = String(readArg(args, ["provider-id", "providerId"], "") || "").trim();
8132
+
8133
+ if (!providerId) {
8134
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: "provider-id is required." };
8135
+ }
8136
+
8137
+ const provider = config.providers.find((item) => item.id === providerId);
8138
+ if (!provider) {
8139
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: `Provider '${providerId}' not found.` };
8140
+ }
8141
+
8142
+ const disableProbe = toBoolean(readArg(args, ["disable-quota-probe", "disableQuotaProbe"], false), false);
8143
+ if (disableProbe) {
8144
+ const result = setProviderQuotaProbeInConfig(config, { providerId, quotaProbe: null });
8145
+ if (!result.changed && result.reason) {
8146
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: result.reason };
8147
+ }
8148
+ await writeConfigFile(result.config, configPath);
8149
+ return { ok: true, mode: context.mode, exitCode: EXIT_SUCCESS, data: `Provider '${providerId}': quota probe disabled.` };
8150
+ }
8151
+
8152
+ const quotaProbeJsonRaw = readArg(args, ["quota-probe-json", "quotaProbeJson"], undefined);
8153
+ let probeConfig;
8154
+
8155
+ if (quotaProbeJsonRaw) {
8156
+ try {
8157
+ probeConfig = JSON.parse(String(quotaProbeJsonRaw));
8158
+ } catch {
8159
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: "quota-probe-json must be valid JSON." };
8160
+ }
8161
+ if (typeof probeConfig !== "object" || probeConfig === null) {
8162
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: "quota-probe-json must be a JSON object." };
8163
+ }
8164
+ if (!("enabled" in probeConfig)) probeConfig.enabled = true;
8165
+ } else {
8166
+ const existing = provider.quotaProbe || {};
8167
+ const mode = String(readArg(args, ["probe-mode", "probeMode"], existing.mode || "http") || "http").trim();
8168
+ const capKind = String(readArg(args, ["cap-kind", "capKind"], existing.capKind || "") || "").trim();
8169
+ const combinator = String(readArg(args, ["combinator"], existing.combinator || "AND") || "AND").trim().toUpperCase();
8170
+ const enforce = String(readArg(args, ["enforce"], existing.enforce || "gate") || "gate").trim();
8171
+ const marginDollars = toNumber(readArg(args, ["safety-margin-dollars", "safetyMarginDollars"], existing.safetyMargin?.dollars), 0);
8172
+ const marginPercent = toNumber(readArg(args, ["safety-margin-percent", "safetyMarginPercent"], existing.safetyMargin?.percent), 0);
8173
+
8174
+ if (!capKind) {
8175
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: "cap-kind is required (dollars | tokens | requests)." };
8176
+ }
8177
+
8178
+ probeConfig = {
8179
+ enabled: true,
8180
+ capKind,
8181
+ combinator,
8182
+ enforce,
8183
+ mode,
8184
+ safetyMargin: { dollars: marginDollars, percent: marginPercent }
8185
+ };
8186
+
8187
+ if (mode === "http") {
8188
+ const existingHttp = existing.http || {};
8189
+ const url = String(readArg(args, ["probe-url", "probeUrl"], existingHttp.url || "") || "").trim();
8190
+ const method = String(readArg(args, ["probe-method", "probeMethod"], existingHttp.method || "GET") || "GET").trim().toUpperCase();
8191
+ const timeoutMs = toNumber(readArg(args, ["probe-timeout", "probeTimeout"], existingHttp.timeoutMs), undefined);
8192
+ const headersRaw = readArg(args, ["probe-headers", "probeHeaders"], undefined);
8193
+ const bodyRaw = readArg(args, ["probe-body", "probeBody"], undefined);
8194
+
8195
+ if (!url) {
8196
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: "probe-url is required for HTTP mode." };
8197
+ }
8198
+
8199
+ const headers = parseProbeHeaders(headersRaw) ?? existingHttp.headers ?? [];
8200
+ const body = bodyRaw !== undefined ? String(bodyRaw) : existingHttp.body;
8201
+
8202
+ const mappingJsonRaw = readArg(args, ["probe-mapping", "probeMapping"], undefined);
8203
+ let mapping;
8204
+ if (mappingJsonRaw) {
8205
+ mapping = parseProbeMapping(mappingJsonRaw);
8206
+ if (!mapping) {
8207
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: "probe-mapping must be valid JSON." };
8208
+ }
8209
+ } else {
8210
+ const existingMapping = existingHttp.mapping || {};
8211
+ const usedPath = readArg(args, ["probe-mapping-used", "probeMappingUsed"], undefined);
8212
+ const limitPath = readArg(args, ["probe-mapping-limit", "probeMappingLimit"], undefined);
8213
+ const remainingPath = readArg(args, ["probe-mapping-remaining", "probeMappingRemaining"], undefined);
8214
+ const resetAtPath = readArg(args, ["probe-mapping-reset-at", "probeMappingResetAt"], undefined);
8215
+ const isUnlimitedPath = readArg(args, ["probe-mapping-is-unlimited", "probeMappingIsUnlimited"], undefined);
8216
+ mapping = { ...existingMapping };
8217
+ if (usedPath) mapping.used = buildMappingFieldEntry(usedPath, "number");
8218
+ if (limitPath) mapping.limit = buildMappingFieldEntry(limitPath, "number");
8219
+ if (remainingPath) mapping.remaining = buildMappingFieldEntry(remainingPath, "number");
8220
+ if (resetAtPath) mapping.resetAt = buildMappingFieldEntry(resetAtPath, "datetime");
8221
+ if (isUnlimitedPath) mapping.isUnlimited = buildMappingFieldEntry(isUnlimitedPath, "boolean");
8222
+ }
8223
+
8224
+ probeConfig.http = { method, url, headers, timeoutMs, mapping };
8225
+ if (body !== undefined) probeConfig.http.body = body;
8226
+ } else if (mode === "custom") {
8227
+ const existingCustom = existing.custom || {};
8228
+ const source = readArg(args, ["custom-source", "customSource"], existingCustom.source || "");
8229
+ const timeoutMs = toNumber(readArg(args, ["probe-timeout", "probeTimeout"], existingCustom.timeoutMs), undefined);
8230
+ if (!source) {
8231
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: "custom-source is required for custom mode." };
8232
+ }
8233
+ probeConfig.custom = { source: String(source), timeoutMs };
8234
+ }
8235
+
8236
+ const refreshOnUiOpen = toBoolean(readArg(args, ["refresh-on-ui-open", "refreshOnUiOpen"], undefined), undefined);
8237
+ const refreshOnResetAt = toBoolean(readArg(args, ["refresh-on-reset-at", "refreshOnResetAt"], undefined), undefined);
8238
+ const refreshOnErrorRaw = readArg(args, ["refresh-on-upstream-error", "refreshOnUpstreamError"], undefined);
8239
+
8240
+ if (refreshOnUiOpen !== undefined || refreshOnResetAt !== undefined || refreshOnErrorRaw !== undefined) {
8241
+ const existingTriggers = existing.refreshTriggers || {};
8242
+ probeConfig.refreshTriggers = {
8243
+ onUiOpen: refreshOnUiOpen !== undefined ? refreshOnUiOpen : !!existingTriggers.onUiOpen,
8244
+ onManual: true,
8245
+ onResetAt: refreshOnResetAt !== undefined ? refreshOnResetAt : !!existingTriggers.onResetAt,
8246
+ onUpstreamError: null
8247
+ };
8248
+ if (refreshOnErrorRaw) {
8249
+ const codes = String(refreshOnErrorRaw).split(",").map((s) => Number(s.trim())).filter((n) => Number.isFinite(n));
8250
+ if (codes.length > 0) probeConfig.refreshTriggers.onUpstreamError = { statusCodes: codes };
8251
+ } else if (existingTriggers.onUpstreamError) {
8252
+ probeConfig.refreshTriggers.onUpstreamError = existingTriggers.onUpstreamError;
8253
+ }
8254
+ }
8255
+ }
8256
+
8257
+ const normalized = normalizeQuotaProbeConfig(probeConfig);
8258
+ if (!normalized) {
8259
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: "Invalid quota probe config. Ensure enabled=true and capKind is one of: dollars, tokens, requests." };
8260
+ }
8261
+
8262
+ const result = setProviderQuotaProbeInConfig(config, { providerId, quotaProbe: probeConfig });
8263
+ if (!result.changed && result.reason) {
8264
+ return { ok: false, mode: context.mode, exitCode: EXIT_VALIDATION, errorMessage: result.reason };
8265
+ }
8266
+ if (!result.changed) {
8267
+ return { ok: true, mode: context.mode, exitCode: EXIT_SUCCESS, data: buildQuotaProbeReport(providerId, normalized) + "\n(no changes)" };
8268
+ }
8269
+ await writeConfigFile(result.config, configPath);
8270
+ return { ok: true, mode: context.mode, exitCode: EXIT_SUCCESS, data: buildQuotaProbeReport(providerId, normalized) };
8271
+ }
8272
+
8044
8273
  async function doSetMasterKey(context) {
8045
8274
  const args = context.args || {};
8046
8275
  const configPath = readArg(args, ["config", "configPath"], getDefaultConfigPath());
@@ -8554,6 +8783,9 @@ async function runConfigAction(context) {
8554
8783
  case "set-model-fallbacks":
8555
8784
  case "set-model-fallback":
8556
8785
  return doSetModelFallbacks(context);
8786
+ case "set-quota-probe":
8787
+ case "set-provider-quota-probe":
8788
+ return doSetQuotaProbe(context);
8557
8789
  case "set-master-key":
8558
8790
  return doSetMasterKey(context);
8559
8791
  case "set-amp-config":
@@ -10575,7 +10807,7 @@ const routerModule = {
10575
10807
  },
10576
10808
  {
10577
10809
  actionId: "config",
10578
- description: "Config manager for providers, diagnostics, coding-tool routing, AMP, and startup service.",
10810
+ description: "Config manager for providers, diagnostics, coding-tool routing, AMP, quota probes, and startup service.",
10579
10811
  tui: { steps: ["cli-only"] },
10580
10812
  commandline: {
10581
10813
  requiredArgs: [],
@@ -10610,6 +10842,29 @@ const routerModule = {
10610
10842
  "rate-limits",
10611
10843
  "remove-bucket",
10612
10844
  "replace-rate-limits",
10845
+ "probe-mode",
10846
+ "probe-url",
10847
+ "probe-method",
10848
+ "probe-headers",
10849
+ "probe-body",
10850
+ "probe-timeout",
10851
+ "probe-mapping",
10852
+ "probe-mapping-used",
10853
+ "probe-mapping-limit",
10854
+ "probe-mapping-remaining",
10855
+ "probe-mapping-reset-at",
10856
+ "probe-mapping-is-unlimited",
10857
+ "cap-kind",
10858
+ "combinator",
10859
+ "enforce",
10860
+ "safety-margin-dollars",
10861
+ "safety-margin-percent",
10862
+ "custom-source",
10863
+ "quota-probe-json",
10864
+ "disable-quota-probe",
10865
+ "refresh-on-ui-open",
10866
+ "refresh-on-reset-at",
10867
+ "refresh-on-upstream-error",
10613
10868
  "alias-id",
10614
10869
  "alias",
10615
10870
  "targets",
@@ -10681,7 +10936,7 @@ const routerModule = {
10681
10936
  ]
10682
10937
  },
10683
10938
  help: {
10684
- summary: `Manage providers, diagnostics, config validation, coding-tool routing, model aliases, rate-limit buckets, AMP proxy settings, master key, and OS startup. \`${CLI_COMMAND} config\` opens the web console by default; use \`--operation\` for direct CLI actions.`,
10939
+ summary: `Manage providers, diagnostics, config validation, coding-tool routing, model aliases, rate-limit buckets, quota probes (external provider budget monitoring), AMP proxy settings, master key, and OS startup. \`${CLI_COMMAND} config\` opens the web console by default; use \`--operation\` for direct CLI actions.`,
10685
10940
  args: [
10686
10941
  { name: "operation", required: false, description: "Config operation (optional; defaults to a config summary when omitted in direct CLI mode).", example: "--operation=upsert-provider" },
10687
10942
  { name: "provider-id", required: false, description: "Provider id (lowercase letters/numbers/dashes).", example: "--provider-id=openrouter-primary" },
@@ -10718,6 +10973,29 @@ const routerModule = {
10718
10973
  { name: "remove-bucket", required: false, description: "Remove bucket by --bucket-id in set-provider-rate-limits.", example: "--remove-bucket=true" },
10719
10974
  { name: "replace-rate-limits", required: false, description: "Replace all provider buckets with provided entries.", example: "--replace-rate-limits=true" },
10720
10975
  { name: "rate-limits", required: false, description: "Rate-limit bucket JSON object/array for bulk update.", example: "--rate-limits='[{\"id\":\"or-month\",\"models\":[\"all\"],\"requests\":20000,\"window\":{\"unit\":\"month\",\"size\":1}}]'" },
10976
+ { name: "probe-mode", required: false, description: "For set-quota-probe: probe execution mode. 'http' sends an HTTP request to a provider quota/usage endpoint and maps the JSON response to a normalized snapshot. 'custom' runs a sandboxed JS function. Default: http.", example: "--probe-mode=http" },
10977
+ { name: "probe-url", required: false, description: "For set-quota-probe (HTTP mode): the full URL of the provider's quota/usage/subscription API endpoint that returns JSON with usage data. Supports {{providerApiKey}}, {{providerBaseUrl}}, {{providerId}}, and {{env.VAR_NAME}} shortcodes for secret interpolation.", example: "--probe-url=https://ramclouds.me/api/subscription/self" },
10978
+ { name: "probe-method", required: false, description: "For set-quota-probe (HTTP mode): HTTP method. Default: GET.", example: "--probe-method=GET" },
10979
+ { name: "probe-headers", required: false, description: "For set-quota-probe (HTTP mode): request headers as a JSON array of {key,value} objects or a JSON object {key:value}. Use {{providerApiKey}} to interpolate the provider's API key, or {{env.VAR_NAME}} to interpolate environment variables for secrets that differ from the provider API key (e.g. a separate system token).", example: "--probe-headers='[{\"key\":\"Authorization\",\"value\":\"Bearer {{env.RC_TOKEN}}\"},{\"key\":\"New-Api-User\",\"value\":\"{{env.RC_USER}}\"}]'" },
10980
+ { name: "probe-body", required: false, description: "For set-quota-probe (HTTP mode, POST only): request body string. Supports the same {{shortcode}} interpolation as probe-headers.", example: "--probe-body='{\"action\":\"get_usage\"}'" },
10981
+ { name: "probe-timeout", required: false, description: "For set-quota-probe: request timeout in milliseconds. HTTP mode default: 5000 (max 15000). Custom mode default: 2000 (max 10000).", example: "--probe-timeout=10000" },
10982
+ { name: "probe-mapping", required: false, description: "For set-quota-probe (HTTP mode): full JSON mapping object that maps provider API response JSON paths to normalized snapshot fields. Each field has {path, as} where 'path' is a dot-path like '$.data.used_quota' and 'as' is the coercion type. Coercion types: 'number' (numeric), 'dollars-from-cents' (divides by 100), 'boolean', 'datetime' (ISO-8601/epoch/duration), 'raw'. Alternative to individual --probe-mapping-* flags.", example: "--probe-mapping='{\"used\":{\"path\":\"$.data.used\",\"as\":\"number\"},\"limit\":{\"path\":\"$.data.limit\",\"as\":\"number\"}}'" },
10983
+ { name: "probe-mapping-used", required: false, description: "For set-quota-probe (HTTP mode): JSON dot-path in the provider API response to the 'used' quota value (how much has been consumed). Coerced as number. At least 2 of {used, limit, remaining} are required; the third is auto-derived.", example: "--probe-mapping-used=$.data.used_quota" },
10984
+ { name: "probe-mapping-limit", required: false, description: "For set-quota-probe (HTTP mode): JSON dot-path to the 'limit' value (total quota cap). Coerced as number.", example: "--probe-mapping-limit=$.data.quota_limit" },
10985
+ { name: "probe-mapping-remaining", required: false, description: "For set-quota-probe (HTTP mode): JSON dot-path to the 'remaining' value (quota left). Coerced as number.", example: "--probe-mapping-remaining=$.data.remaining_quota" },
10986
+ { name: "probe-mapping-reset-at", required: false, description: "For set-quota-probe (HTTP mode): JSON dot-path to the reset timestamp (when quota resets). Coerced as datetime (auto-detects ISO-8601, epoch seconds, epoch milliseconds, or duration strings like '2h' or 'PT30M').", example: "--probe-mapping-reset-at=$.data.reset_at" },
10987
+ { name: "probe-mapping-is-unlimited", required: false, description: "For set-quota-probe (HTTP mode): JSON dot-path to a boolean indicating unlimited quota. When true, the probe always reports 'available' regardless of used/limit values.", example: "--probe-mapping-is-unlimited=$.data.is_unlimited" },
10988
+ { name: "cap-kind", required: false, description: "For set-quota-probe: the unit of the quota cap reported by the provider. Determines how used/limit/remaining values are interpreted. Values: 'dollars' (monetary budget), 'tokens' (token count), 'requests' (request count).", example: "--cap-kind=dollars" },
10989
+ { name: "combinator", required: false, description: "For set-quota-probe: how the quota probe verdict combines with local rate-limit verdict to decide if a provider is eligible. 'AND' = both must pass (default, safest). 'OR' = either can pass (lenient). 'REPLACE' = probe verdict replaces rate-limit entirely.", example: "--combinator=AND" },
10990
+ { name: "enforce", required: false, description: "For set-quota-probe: enforcement mode. 'gate' = blocks routing when quota exhausted (production use). 'observe' = logs verdict but never blocks (dry-run/testing). Default: gate.", example: "--enforce=gate" },
10991
+ { name: "safety-margin-dollars", required: false, description: "For set-quota-probe: dollar-based safety margin. Provider is considered exhausted when remaining ≤ this value. Applied as max(dollars, limit×percent/100). Default: 0.", example: "--safety-margin-dollars=1" },
10992
+ { name: "safety-margin-percent", required: false, description: "For set-quota-probe: percentage-based safety margin. Provider is considered exhausted when remaining ≤ limit×percent/100. Applied as max(dollars, limit×percent/100). Default: 0.", example: "--safety-margin-percent=2" },
10993
+ { name: "custom-source", required: false, description: "For set-quota-probe (custom mode): JavaScript async function source that runs in a sandboxed VM. Receives ctx object with {fetch, providerApiKey, providerBaseUrl, providerId}. Must return {capKind, used, limit} or {capKind, remaining, limit}. No access to process, require, or globalThis.", example: "--custom-source='export default async function(ctx) { const r = await ctx.fetch(\"https://api.example.com/usage\", {headers:{\"Authorization\":\"Bearer \"+ctx.providerApiKey}}); const d = await r.json(); return {capKind:\"dollars\",used:d.used,limit:d.limit}; }'" },
10994
+ { name: "quota-probe-json", required: false, description: "For set-quota-probe: provide the full quotaProbe config as a single JSON object. Overrides all other probe flags. Useful when the config is complex or pre-built. The object is written directly to the provider's quotaProbe field.", example: "--quota-probe-json='{\"enabled\":true,\"capKind\":\"dollars\",\"mode\":\"http\",\"combinator\":\"AND\",\"enforce\":\"gate\",\"http\":{\"method\":\"GET\",\"url\":\"https://example.com/api/usage\",\"headers\":[{\"key\":\"Authorization\",\"value\":\"Bearer {{providerApiKey}}\"}],\"mapping\":{\"used\":{\"path\":\"$.used\",\"as\":\"number\"},\"limit\":{\"path\":\"$.limit\",\"as\":\"number\"}}}}'" },
10995
+ { name: "disable-quota-probe", required: false, description: "For set-quota-probe: set to true to disable and remove the quota probe config from the provider. The provider will no longer be gated by external quota checks.", example: "--disable-quota-probe=true" },
10996
+ { name: "refresh-on-ui-open", required: false, description: "For set-quota-probe: auto-refresh the quota snapshot when the web console UI is opened. Default: false.", example: "--refresh-on-ui-open=true" },
10997
+ { name: "refresh-on-reset-at", required: false, description: "For set-quota-probe: schedule an automatic refresh at the resetAt timestamp returned by the probe. Useful when the provider reports when the quota window rolls over. Default: false.", example: "--refresh-on-reset-at=true" },
10998
+ { name: "refresh-on-upstream-error", required: false, description: "For set-quota-probe: comma-separated HTTP status codes from upstream provider errors that should trigger a quota probe refresh. Common: 429 (rate limited), 402 (payment required).", example: "--refresh-on-upstream-error=429,402" },
10721
10999
  { name: "format", required: false, description: "Manual format if probe is skipped.", example: "--format=openai" },
10722
11000
  { name: "headers", required: false, description: "Custom provider headers as JSON object (default User-Agent applied when omitted).", example: "--headers={\"User-Agent\":\"Mozilla/5.0\"}" },
10723
11001
  { name: "skip-probe", required: false, description: "Skip live endpoint/model probe.", example: "--skip-probe=true" },
@@ -10795,6 +11073,10 @@ const routerModule = {
10795
11073
  `${CLI_COMMAND} config --operation=set-provider-rate-limits --provider-id=openrouter --bucket-id=openrouter-all-month --bucket-models=all --bucket-requests=20000 --bucket-window=month:1`,
10796
11074
  `${CLI_COMMAND} config --operation=set-provider-rate-limits --provider-id=openrouter --bucket-name="6-hours cap" --bucket-models=all --bucket-requests=600 --bucket-window=hour:6`,
10797
11075
  `${CLI_COMMAND} config --operation=migrate-config --target-version=2 --create-backup=true`,
11076
+ `${CLI_COMMAND} config --operation=set-quota-probe --provider-id=ramclouds --cap-kind=dollars --probe-url=https://ramclouds.me/api/subscription/self --probe-headers='[{"key":"Authorization","value":"Bearer {{env.RC_TOKEN}}"},{"key":"New-Api-User","value":"{{env.RC_USER}}"}]' --probe-mapping-used=$.data.used_quota --probe-mapping-limit=$.data.quota_limit --safety-margin-dollars=1 --combinator=AND --enforce=gate`,
11077
+ `${CLI_COMMAND} config --operation=set-quota-probe --provider-id=openrouter --cap-kind=dollars --probe-url=https://openrouter.ai/api/v1/auth/key --probe-headers='{"Authorization":"Bearer {{providerApiKey}}"}' --probe-mapping-used=$.data.usage --probe-mapping-limit=$.data.limit --refresh-on-upstream-error=429,402`,
11078
+ `${CLI_COMMAND} config --operation=set-quota-probe --provider-id=myapi --cap-kind=tokens --probe-url=https://api.example.com/usage --probe-method=GET --probe-headers='{"Authorization":"Bearer {{providerApiKey}}"}' --probe-mapping-remaining=$.remaining --probe-mapping-limit=$.total --enforce=observe`,
11079
+ `${CLI_COMMAND} config --operation=set-quota-probe --provider-id=ramclouds --disable-quota-probe=true`,
10798
11080
  `${CLI_COMMAND} config --operation=set-model-fallbacks --provider-id=openrouter --model=gpt-4o --fallback-models=anthropic/claude-3-7-sonnet,openrouter/gpt-4.1-mini`,
10799
11081
  `${CLI_COMMAND} config --operation=remove-model --provider-id=openrouter --model=gpt-4o`,
10800
11082
  `${CLI_COMMAND} config --operation=set-amp-config --patch-amp-client-config=true --amp-client-settings-scope=workspace --amp-client-url=${LOCAL_ROUTER_ORIGIN}`,
@@ -126,6 +126,13 @@ export function claudeToOpenAIRequest(model, body, stream) {
126
126
  // Fix missing tool responses
127
127
  fixMissingToolResponses(result.messages);
128
128
 
129
+ // Strip trailing empty assistant message (prefill).
130
+ // Claude Code sends {"role":"assistant","content":[]} to prefill responses, but OpenAI-
131
+ // compatible providers backed by Claude reject it with "This model does not support
132
+ // assistant message prefill." An empty trailing assistant adds no semantic value in
133
+ // OpenAI format, so we drop it.
134
+ stripTrailingEmptyAssistant(result.messages);
135
+
129
136
  // Tools
130
137
  if (body.tools && Array.isArray(body.tools)) {
131
138
  result.tools = body.tools.map(tool => {
@@ -164,6 +171,27 @@ export function claudeToOpenAIRequest(model, body, stream) {
164
171
  return result;
165
172
  }
166
173
 
174
+ function isEmptyAssistantContent(content) {
175
+ if (content === "" || content === null || content === undefined) return true;
176
+ if (Array.isArray(content)) {
177
+ return content.length === 0
178
+ || content.every((part) =>
179
+ part?.type === "text" && (typeof part.text !== "string" || !part.text.trim()));
180
+ }
181
+ if (typeof content === "string") return !content.trim();
182
+ return false;
183
+ }
184
+
185
+ function stripTrailingEmptyAssistant(messages) {
186
+ while (messages.length > 0) {
187
+ const last = messages[messages.length - 1];
188
+ if (last.role !== "assistant") break;
189
+ if (last.tool_calls?.length) break;
190
+ if (!isEmptyAssistantContent(last.content)) break;
191
+ messages.pop();
192
+ }
193
+ }
194
+
167
195
  /**
168
196
  * Fix missing tool responses
169
197
  */