@tokenbuddy/tokenbuddy 1.0.29 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/src/daemon.d.ts +11 -4
  2. package/dist/src/daemon.d.ts.map +1 -1
  3. package/dist/src/daemon.js +130 -42
  4. package/dist/src/daemon.js.map +1 -1
  5. package/dist/src/doctor-diagnostics.d.ts.map +1 -1
  6. package/dist/src/doctor-diagnostics.js +7 -1
  7. package/dist/src/doctor-diagnostics.js.map +1 -1
  8. package/dist/src/prewarm-cache.d.ts +4 -0
  9. package/dist/src/prewarm-cache.d.ts.map +1 -1
  10. package/dist/src/prewarm-cache.js +1 -0
  11. package/dist/src/prewarm-cache.js.map +1 -1
  12. package/dist/src/prewarm-scheduler.d.ts +2 -0
  13. package/dist/src/prewarm-scheduler.d.ts.map +1 -1
  14. package/dist/src/prewarm-scheduler.js +4 -1
  15. package/dist/src/prewarm-scheduler.js.map +1 -1
  16. package/dist/src/provider-install.d.ts.map +1 -1
  17. package/dist/src/provider-install.js +196 -18
  18. package/dist/src/provider-install.js.map +1 -1
  19. package/dist/src/seller-catalog.d.ts +4 -0
  20. package/dist/src/seller-catalog.d.ts.map +1 -1
  21. package/dist/src/seller-catalog.js.map +1 -1
  22. package/dist/src/seller-pool.d.ts +13 -0
  23. package/dist/src/seller-pool.d.ts.map +1 -1
  24. package/dist/src/seller-pool.js +43 -2
  25. package/dist/src/seller-pool.js.map +1 -1
  26. package/dist/src/seller-route-planner.d.ts +9 -0
  27. package/dist/src/seller-route-planner.d.ts.map +1 -1
  28. package/dist/src/seller-route-planner.js +39 -15
  29. package/dist/src/seller-route-planner.js.map +1 -1
  30. package/dist/src/seller-routing-strategy.d.ts +6 -4
  31. package/dist/src/seller-routing-strategy.d.ts.map +1 -1
  32. package/dist/src/seller-routing-strategy.js +15 -12
  33. package/dist/src/seller-routing-strategy.js.map +1 -1
  34. package/dist/src/terminal-detect.d.ts +5 -5
  35. package/dist/src/terminal-detect.d.ts.map +1 -1
  36. package/dist/src/terminal-detect.js +79 -26
  37. package/dist/src/terminal-detect.js.map +1 -1
  38. package/package.json +1 -1
  39. package/src/daemon.ts +168 -46
  40. package/src/doctor-diagnostics.ts +5 -1
  41. package/src/prewarm-cache.ts +5 -0
  42. package/src/prewarm-scheduler.ts +6 -1
  43. package/src/provider-install.ts +203 -18
  44. package/src/seller-catalog.ts +4 -0
  45. package/src/seller-pool.ts +68 -2
  46. package/src/seller-route-planner.ts +61 -15
  47. package/src/seller-routing-strategy.ts +21 -16
  48. package/src/terminal-detect.ts +81 -24
  49. package/static/ui/assets/index-DEDEl8o2.js +236 -0
  50. package/static/ui/assets/{index-UAfOhbwC.js.map → index-DEDEl8o2.js.map} +1 -1
  51. package/static/ui/index.html +1 -1
  52. package/tests/control-plane-ui-endpoints.test.ts +73 -0
  53. package/tests/seller-pool.test.ts +55 -0
  54. package/tests/seller-route-planner.test.ts +45 -1
  55. package/tests/seller-routing-strategy.test.ts +6 -5
  56. package/tests/tokenbuddy.test.ts +346 -38
  57. package/static/ui/assets/index-UAfOhbwC.js +0 -236
@@ -27,7 +27,11 @@ import {
27
27
  previewProviderInstall,
28
28
  rollbackProviderInstall
29
29
  } from "../src/provider-install.js";
30
- import { detectTerminals } from "../src/terminal-detect.js";
30
+ import {
31
+ detectTerminals,
32
+ rewriteHermes,
33
+ rewriteOpenclaw,
34
+ } from "../src/terminal-detect.js";
31
35
  import {
32
36
  buildInitSuccessMessage,
33
37
  buildInitTerminalSelectionState,
@@ -1915,7 +1919,7 @@ describe("TokenBuddy JSON inspection commands", () => {
1915
1919
  expect.objectContaining({
1916
1920
  id: "gpt-4",
1917
1921
  sellerCount: 1,
1918
- discountRange: "0.25",
1922
+ discountRange: "2.5折",
1919
1923
  priceRange: "in $1 / out $3"
1920
1924
  })
1921
1925
  ]);
@@ -1938,7 +1942,7 @@ describe("TokenBuddy JSON inspection commands", () => {
1938
1942
  expect(joined).toContain("Discount Range");
1939
1943
  expect(joined).toContain("Price Range");
1940
1944
  expect(joined).toContain("gpt-4");
1941
- expect(joined).toContain("0.25");
1945
+ expect(joined).toContain("2.5折");
1942
1946
  expect(joined).toContain("$1");
1943
1947
  expect(joined).toContain("$3");
1944
1948
  });
@@ -1975,7 +1979,22 @@ describe("Provider install planning", () => {
1975
1979
  process.env.PATH = `${PROVIDER_BIN_ROOT}${path.delimiter}${previousPath || ""}`;
1976
1980
  fs.writeFileSync(path.join(PROVIDER_HOME, ".codex", "config.toml"), "approval_policy = \"never\"\n", "utf8");
1977
1981
  fs.writeFileSync(path.join(PROVIDER_HOME, ".claude", "settings.json"), JSON.stringify({ theme: "dark" }, null, 2), "utf8");
1978
- fs.writeFileSync(path.join(PROVIDER_HOME, ".openclaw", "config.json"), JSON.stringify({ keep: "field" }, null, 2), "utf8");
1982
+ fs.writeFileSync(path.join(PROVIDER_HOME, ".openclaw", "openclaw.json"), JSON.stringify({
1983
+ keep: "field",
1984
+ models: {
1985
+ providers: {
1986
+ existing: {
1987
+ baseUrl: "https://example.invalid/v1",
1988
+ models: [{ id: "existing-model", name: "existing-model" }],
1989
+ },
1990
+ },
1991
+ },
1992
+ agents: {
1993
+ defaults: {
1994
+ model: "existing/existing-model",
1995
+ },
1996
+ },
1997
+ }, null, 2), "utf8");
1979
1998
  fs.writeFileSync(path.join(PROVIDER_HOME, ".config", "opencode", "opencode.json"), JSON.stringify({ share: "disabled" }, null, 2), "utf8");
1980
1999
  });
1981
2000
 
@@ -1996,7 +2015,7 @@ describe("Provider install planning", () => {
1996
2015
  expect(providers).toEqual(expect.arrayContaining([
1997
2016
  expect.objectContaining({ id: "codex", status: "configured", configured: true }),
1998
2017
  expect.objectContaining({ id: "claude-code", status: "configured", configured: true }),
1999
- expect.objectContaining({ id: "openclaw", status: "configured", configured: true }),
2018
+ expect.objectContaining({ id: "openclaw", status: "installed", configured: false }),
2000
2019
  expect.objectContaining({ id: "hermes", status: "installed", configured: false })
2001
2020
  ]));
2002
2021
 
@@ -2043,10 +2062,10 @@ describe("Provider install planning", () => {
2043
2062
  });
2044
2063
 
2045
2064
  test("reports installed-only providers when executable or native config hints exist", () => {
2046
- fs.rmSync(path.join(PROVIDER_HOME, ".openclaw", "config.json"), { force: true });
2047
- fs.writeFileSync(path.join(PROVIDER_HOME, ".openclaw", "openclaw.json"), JSON.stringify({ profile: "default" }, null, 2), "utf8");
2065
+ fs.rmSync(path.join(PROVIDER_HOME, ".openclaw", "openclaw.json"), { force: true });
2066
+ fs.writeFileSync(path.join(PROVIDER_HOME, ".openclaw", "config.json"), JSON.stringify({ profile: "default" }, null, 2), "utf8");
2048
2067
  fs.mkdirSync(path.join(PROVIDER_HOME, ".hermes"), { recursive: true });
2049
- fs.writeFileSync(path.join(PROVIDER_HOME, ".hermes", "config.yaml"), "model: gpt-4\n", "utf8");
2068
+ fs.writeFileSync(path.join(PROVIDER_HOME, ".hermes", "settings.json"), JSON.stringify({ openai: { model: "gpt-4" } }, null, 2), "utf8");
2050
2069
 
2051
2070
  const providers = detectProviders({ home: PROVIDER_HOME });
2052
2071
  expect(providers).toEqual(expect.arrayContaining([
@@ -2055,14 +2074,14 @@ describe("Provider install planning", () => {
2055
2074
  status: "installed",
2056
2075
  configured: false,
2057
2076
  executablePath: expect.stringContaining(path.join("provider-bin", "openclaw")),
2058
- observedPaths: expect.arrayContaining([path.join(PROVIDER_HOME, ".openclaw", "openclaw.json")]),
2077
+ observedPaths: expect.arrayContaining([path.join(PROVIDER_HOME, ".openclaw", "config.json")]),
2059
2078
  }),
2060
2079
  expect.objectContaining({
2061
2080
  id: "hermes",
2062
2081
  status: "installed",
2063
2082
  configured: false,
2064
2083
  executablePath: expect.stringContaining(path.join("provider-bin", "hermes")),
2065
- observedPaths: expect.arrayContaining([path.join(PROVIDER_HOME, ".hermes", "config.yaml")]),
2084
+ observedPaths: expect.arrayContaining([path.join(PROVIDER_HOME, ".hermes", "settings.json")]),
2066
2085
  }),
2067
2086
  ]));
2068
2087
  });
@@ -2153,9 +2172,15 @@ describe("Provider install planning", () => {
2153
2172
  expect(store.getDaemonRuntimeConfig("routing")).toBeUndefined();
2154
2173
  expect(store.getProviderRuntimeConfig("opencode")?.config).not.toHaveProperty("sellerId");
2155
2174
 
2156
- const openclaw = JSON.parse(fs.readFileSync(path.join(PROVIDER_HOME, ".openclaw", "config.json"), "utf8"));
2175
+ const openclaw = JSON.parse(fs.readFileSync(path.join(PROVIDER_HOME, ".openclaw", "openclaw.json"), "utf8"));
2157
2176
  expect(openclaw.keep).toBe("field");
2158
- expect(openclaw.api_url).toBe(proxyUrl);
2177
+ expect(openclaw.models.providers.existing.baseUrl).toBe("https://example.invalid/v1");
2178
+ expect(openclaw.models.providers.tokenbuddy.baseUrl).toBe(`${proxyUrl}/v1`);
2179
+ expect(openclaw.models.providers.tokenbuddy.apiKey).toBe("TOKENBUDDY_PROXY");
2180
+ expect(openclaw.models.providers.tokenbuddy.models).toEqual(expect.arrayContaining([
2181
+ expect.objectContaining({ id: "gpt-4", name: "gpt-4", api: "openai-completions" }),
2182
+ ]));
2183
+ expect(openclaw.agents.defaults.model).toBe("tokenbuddy/gpt-4");
2159
2184
  const opencode = JSON.parse(fs.readFileSync(path.join(PROVIDER_HOME, ".config", "opencode", "opencode.json"), "utf8"));
2160
2185
  expect(opencode.share).toBe("disabled");
2161
2186
  expect(JSON.stringify(opencode)).not.toContain("sellerId");
@@ -2168,7 +2193,13 @@ describe("Provider install planning", () => {
2168
2193
  configured: true,
2169
2194
  }),
2170
2195
  ]));
2171
- expect(fs.existsSync(path.join(PROVIDER_HOME, ".hermes", "settings.json"))).toBe(true);
2196
+ const hermesConfig = fs.readFileSync(path.join(PROVIDER_HOME, ".hermes", "config.yaml"), "utf8");
2197
+ expect(hermesConfig).toContain("model:");
2198
+ expect(hermesConfig).toContain("default: gpt-4");
2199
+ expect(hermesConfig).toContain("provider: custom");
2200
+ expect(hermesConfig).toContain(`base_url: "${proxyUrl}/v1"`);
2201
+ expect(hermesConfig).toContain("api_key: TOKENBUDDY_PROXY");
2202
+ expect(hermesConfig).toContain("api_mode: chat_completions");
2172
2203
  expect(store.getProviderInstallSnapshot("codex")).toBeDefined();
2173
2204
 
2174
2205
  const rolledBack = rollbackProviderInstall({
@@ -2181,9 +2212,16 @@ describe("Provider install planning", () => {
2181
2212
  expect.objectContaining({ providerId: "hermes", action: "removed" })
2182
2213
  ]));
2183
2214
  expect(fs.readFileSync(path.join(PROVIDER_HOME, ".codex", "config.toml"), "utf8")).toBe("approval_policy = \"never\"\n");
2184
- expect(JSON.parse(fs.readFileSync(path.join(PROVIDER_HOME, ".openclaw", "config.json"), "utf8"))).toEqual({ keep: "field" });
2215
+ expect(JSON.parse(fs.readFileSync(path.join(PROVIDER_HOME, ".openclaw", "openclaw.json"), "utf8"))).toMatchObject({
2216
+ keep: "field",
2217
+ agents: {
2218
+ defaults: {
2219
+ model: "existing/existing-model",
2220
+ },
2221
+ },
2222
+ });
2185
2223
  expect(JSON.parse(fs.readFileSync(path.join(PROVIDER_HOME, ".config", "opencode", "opencode.json"), "utf8"))).toEqual({ share: "disabled" });
2186
- expect(fs.existsSync(path.join(PROVIDER_HOME, ".hermes", "settings.json"))).toBe(false);
2224
+ expect(fs.existsSync(path.join(PROVIDER_HOME, ".hermes", "config.yaml"))).toBe(false);
2187
2225
  expect(store.getProviderInstallSnapshot("codex")).toBeUndefined();
2188
2226
  expect(store.getProviderRuntimeConfig("claude-code")).toBeUndefined();
2189
2227
  } finally {
@@ -2211,6 +2249,97 @@ describe("Provider install planning", () => {
2211
2249
  expect(parsed.model).toBe("tokenbuddy/gpt-5.4");
2212
2250
  expect(parsed.provider.tokenbuddy.options.baseURL).toBe("http://127.0.0.1:17821/v1");
2213
2251
  });
2252
+
2253
+ test("hermes install preserves existing config and writes active config.yaml model section", () => {
2254
+ fs.mkdirSync(path.join(PROVIDER_HOME, ".hermes"), { recursive: true });
2255
+ fs.writeFileSync(path.join(PROVIDER_HOME, ".hermes", "config.yaml"), [
2256
+ "display:",
2257
+ " compact: false",
2258
+ "model:",
2259
+ " default: existing-model",
2260
+ " provider: existing-provider",
2261
+ " base_url: https://existing.invalid/v1",
2262
+ "fallback_providers: []",
2263
+ "",
2264
+ ].join("\n"), "utf8");
2265
+
2266
+ const changes = previewProviderInstall({
2267
+ providers: ["hermes"],
2268
+ proxyUrl,
2269
+ providerSelections: {
2270
+ hermes: {
2271
+ selectionKind: "single-model",
2272
+ protocolPreference: "chat_completions",
2273
+ defaultModel: "gpt-5.4",
2274
+ },
2275
+ },
2276
+ home: PROVIDER_HOME,
2277
+ });
2278
+
2279
+ const parsed = changes.find((change) => change.providerId === "hermes")?.content || "";
2280
+ expect(parsed).toContain("display:");
2281
+ expect(parsed).toContain("compact: false");
2282
+ expect(parsed).toContain("fallback_providers:");
2283
+ expect(parsed).toContain("default: gpt-5.4");
2284
+ expect(parsed).toContain("provider: custom");
2285
+ expect(parsed).toContain(`base_url: "${proxyUrl}/v1"`);
2286
+ expect(parsed).toContain("api_key: TOKENBUDDY_PROXY");
2287
+ expect(parsed).toContain("api_mode: chat_completions");
2288
+ });
2289
+
2290
+ test("legacy terminal rewrite helpers use active OpenClaw and Hermes config files", () => {
2291
+ const helperHome = path.join(PROVIDER_HOME, "terminal-helper-home");
2292
+ const openclawPath = path.join(helperHome, ".openclaw", "openclaw.json");
2293
+ const hermesPath = path.join(helperHome, ".hermes", "config.yaml");
2294
+ fs.mkdirSync(path.dirname(openclawPath), { recursive: true });
2295
+ fs.mkdirSync(path.dirname(hermesPath), { recursive: true });
2296
+ fs.writeFileSync(openclawPath, JSON.stringify({
2297
+ models: {
2298
+ providers: {
2299
+ existing: {
2300
+ baseUrl: "https://example.invalid/v1",
2301
+ models: [{ id: "existing-model", name: "existing-model" }],
2302
+ },
2303
+ },
2304
+ },
2305
+ agents: {
2306
+ defaults: {
2307
+ model: "existing/existing-model",
2308
+ },
2309
+ },
2310
+ }, null, 2), "utf8");
2311
+ fs.writeFileSync(hermesPath, [
2312
+ "display:",
2313
+ " compact: false",
2314
+ "model:",
2315
+ " default: existing-model",
2316
+ " provider: existing-provider",
2317
+ "fallback_providers: []",
2318
+ "",
2319
+ ].join("\n"), "utf8");
2320
+
2321
+ rewriteOpenclaw(openclawPath, proxyUrl, "gpt-5.4");
2322
+ rewriteHermes(hermesPath, proxyUrl, "gpt-5.4");
2323
+
2324
+ const openclaw = JSON.parse(fs.readFileSync(openclawPath, "utf8"));
2325
+ expect(openclaw.models.providers.existing.baseUrl).toBe("https://example.invalid/v1");
2326
+ expect(openclaw.models.providers.tokenbuddy.baseUrl).toBe(`${proxyUrl}/v1`);
2327
+ expect(openclaw.models.providers.tokenbuddy.apiKey).toBe("TOKENBUDDY_PROXY");
2328
+ expect(openclaw.models.providers.tokenbuddy.models).toEqual(expect.arrayContaining([
2329
+ expect.objectContaining({ id: "gpt-5.4", name: "gpt-5.4", api: "openai-completions" }),
2330
+ ]));
2331
+ expect(openclaw.agents.defaults.model).toBe("tokenbuddy/gpt-5.4");
2332
+
2333
+ const hermes = fs.readFileSync(hermesPath, "utf8");
2334
+ expect(hermes).toContain("display:");
2335
+ expect(hermes).toContain("compact: false");
2336
+ expect(hermes).toContain("fallback_providers:");
2337
+ expect(hermes).toContain("default: gpt-5.4");
2338
+ expect(hermes).toContain("provider: custom");
2339
+ expect(hermes).toContain(`base_url: "${proxyUrl}/v1"`);
2340
+ expect(hermes).toContain("api_key: TOKENBUDDY_PROXY");
2341
+ expect(hermes).toContain("api_mode: chat_completions");
2342
+ });
2214
2343
  });
2215
2344
 
2216
2345
  describe("TokenBuddy CLI and Daemon Integration Tests", () => {
@@ -2987,14 +3116,18 @@ describe("TokenBuddy seller routing strategies", () => {
2987
3116
  let daemon: TokenbuddyDaemon;
2988
3117
  let daemonProxyPort: number;
2989
3118
  let daemonControlPort: number;
2990
- const events: Array<{ seller: string; url?: string }> = [];
3119
+ const events: Array<{ seller: string; url?: string; body?: any; idempotencyKey?: string }> = [];
2991
3120
  let primaryPurchaseSucceeds = false;
2992
3121
  let primaryInferenceFails = false;
3122
+ let primaryInferenceFailsOnceWithIdempotencyConflict = false;
3123
+ const primaryInferenceSeenRequestIds = new Set<string>();
2993
3124
  let primaryInferenceBusy = false;
2994
3125
  let primaryInferenceDelayMs = 0;
2995
3126
  const dbPath = path.resolve(__dirname, "../../data-test/manual-routing-test.db");
2996
3127
  const routeEvents = (): Array<{ seller: string; url?: string }> => events
2997
- .filter((event) => event.url !== "/primary/health" && event.url !== "/backup/health");
3128
+ .filter((event) => event.url !== "/primary/health" && event.url !== "/backup/health")
3129
+ .filter((event) => event.url !== "/primary/manifest" && event.url !== "/backup/manifest")
3130
+ .map((event) => ({ seller: event.seller, url: event.url }));
2998
3131
 
2999
3132
  const readJsonBody = (req: http.IncomingMessage): Promise<any> => new Promise((resolve) => {
3000
3133
  let body = "";
@@ -3018,7 +3151,7 @@ describe("TokenBuddy seller routing strategies", () => {
3018
3151
  id: "primary-seller",
3019
3152
  name: "Primary Seller",
3020
3153
  url: `http://127.0.0.1:${sellerPort}/primary`,
3021
- supportedProtocols: ["chat_completions"],
3154
+ supportedProtocols: ["chat_completions", "responses"],
3022
3155
  paymentMethods: ["mock"],
3023
3156
  models: ["gpt-manual"]
3024
3157
  },
@@ -3026,7 +3159,7 @@ describe("TokenBuddy seller routing strategies", () => {
3026
3159
  id: "backup-seller",
3027
3160
  name: "Backup Seller",
3028
3161
  url: `http://127.0.0.1:${sellerPort}/backup`,
3029
- supportedProtocols: ["chat_completions"],
3162
+ supportedProtocols: ["chat_completions", "responses"],
3030
3163
  paymentMethods: ["mock"],
3031
3164
  models: ["gpt-manual"]
3032
3165
  }
@@ -3039,8 +3172,9 @@ describe("TokenBuddy seller routing strategies", () => {
3039
3172
  events.push({ seller: "primary-seller", url: req.url });
3040
3173
  res.end(JSON.stringify({
3041
3174
  sellerId: "primary-seller",
3042
- supportedProtocols: ["chat_completions"],
3175
+ supportedProtocols: ["chat_completions", "responses"],
3043
3176
  paymentMethods: ["mock"],
3177
+ selection: { discountRatio: 1 },
3044
3178
  models: [{ id: "gpt-manual" }]
3045
3179
  }));
3046
3180
  return;
@@ -3050,8 +3184,9 @@ describe("TokenBuddy seller routing strategies", () => {
3050
3184
  events.push({ seller: "backup-seller", url: req.url });
3051
3185
  res.end(JSON.stringify({
3052
3186
  sellerId: "backup-seller",
3053
- supportedProtocols: ["chat_completions"],
3187
+ supportedProtocols: ["chat_completions", "responses"],
3054
3188
  paymentMethods: ["mock"],
3189
+ selection: { discountRatio: 0.01 },
3055
3190
  models: [{ id: "gpt-manual" }]
3056
3191
  }));
3057
3192
  return;
@@ -3089,8 +3224,13 @@ describe("TokenBuddy seller routing strategies", () => {
3089
3224
  return;
3090
3225
  }
3091
3226
 
3092
- if (req.url === "/primary/v1/chat/completions") {
3093
- events.push({ seller: "primary-seller", url: req.url });
3227
+ if (req.url === "/primary/v1/chat/completions" || req.url === "/primary/v1/responses") {
3228
+ events.push({
3229
+ seller: "primary-seller",
3230
+ url: req.url,
3231
+ body,
3232
+ idempotencyKey: req.headers["idempotency-key"] as string | undefined
3233
+ });
3094
3234
  if (primaryInferenceDelayMs > 0) {
3095
3235
  await new Promise((resolve) => setTimeout(resolve, primaryInferenceDelayMs));
3096
3236
  }
@@ -3104,10 +3244,35 @@ describe("TokenBuddy seller routing strategies", () => {
3104
3244
  res.end(JSON.stringify({ error: { code: "upstream_failed", message: "primary seller failed" } }));
3105
3245
  return;
3106
3246
  }
3107
- res.end(JSON.stringify({
3108
- id: "primary-chat",
3109
- usage: { prompt_tokens: 4, completion_tokens: 5 }
3110
- }));
3247
+ if (primaryInferenceFailsOnceWithIdempotencyConflict) {
3248
+ if (primaryInferenceSeenRequestIds.has(body.requestId)) {
3249
+ res.statusCode = 409;
3250
+ res.end(JSON.stringify({
3251
+ error: {
3252
+ code: "idempotency_conflict",
3253
+ message: "Idempotency key already belongs to an existing request."
3254
+ }
3255
+ }));
3256
+ return;
3257
+ }
3258
+ primaryInferenceSeenRequestIds.add(body.requestId);
3259
+ if (primaryInferenceSeenRequestIds.size === 1) {
3260
+ res.statusCode = 502;
3261
+ res.end(JSON.stringify({ error: { code: "upstream_failed", message: "primary seller failed once" } }));
3262
+ return;
3263
+ }
3264
+ }
3265
+ if (req.url === "/primary/v1/responses") {
3266
+ res.end(JSON.stringify({
3267
+ id: "primary-response",
3268
+ usage: { input_tokens: 4, output_tokens: 5 }
3269
+ }));
3270
+ } else {
3271
+ res.end(JSON.stringify({
3272
+ id: "primary-chat",
3273
+ usage: { prompt_tokens: 4, completion_tokens: 5 }
3274
+ }));
3275
+ }
3111
3276
  return;
3112
3277
  }
3113
3278
 
@@ -3137,12 +3302,19 @@ describe("TokenBuddy seller routing strategies", () => {
3137
3302
  return;
3138
3303
  }
3139
3304
 
3140
- if (req.url === "/backup/v1/chat/completions") {
3305
+ if (req.url === "/backup/v1/chat/completions" || req.url === "/backup/v1/responses") {
3141
3306
  events.push({ seller: "backup-seller", url: req.url });
3142
- res.end(JSON.stringify({
3143
- id: "backup-chat",
3144
- usage: { prompt_tokens: 4, completion_tokens: 5 }
3145
- }));
3307
+ if (req.url === "/backup/v1/responses") {
3308
+ res.end(JSON.stringify({
3309
+ id: "backup-response",
3310
+ usage: { input_tokens: 4, output_tokens: 5 }
3311
+ }));
3312
+ } else {
3313
+ res.end(JSON.stringify({
3314
+ id: "backup-chat",
3315
+ usage: { prompt_tokens: 4, completion_tokens: 5 }
3316
+ }));
3317
+ }
3146
3318
  return;
3147
3319
  }
3148
3320
 
@@ -3170,6 +3342,8 @@ describe("TokenBuddy seller routing strategies", () => {
3170
3342
  events.length = 0;
3171
3343
  primaryPurchaseSucceeds = false;
3172
3344
  primaryInferenceFails = false;
3345
+ primaryInferenceFailsOnceWithIdempotencyConflict = false;
3346
+ primaryInferenceSeenRequestIds.clear();
3173
3347
  primaryInferenceBusy = false;
3174
3348
  primaryInferenceDelayMs = 0;
3175
3349
  rmSqliteFiles(dbPath);
@@ -3371,7 +3545,6 @@ describe("TokenBuddy seller routing strategies", () => {
3371
3545
  expect(response.ok).toBe(true);
3372
3546
  expect((await response.json() as any).id).toBe("backup-chat");
3373
3547
  expect(routeEvents()).toEqual([
3374
- { seller: "primary-seller", url: "/primary/purchase/create" },
3375
3548
  { seller: "backup-seller", url: "/backup/purchase/create" },
3376
3549
  { seller: "backup-seller", url: "/backup/purchase/complete" },
3377
3550
  { seller: "backup-seller", url: "/backup/v1/chat/completions" }
@@ -3380,6 +3553,23 @@ describe("TokenBuddy seller routing strategies", () => {
3380
3553
  expect(prewarmAfterRequest.scheduler.totalScheduled).toBeGreaterThan(scheduledBeforeRequest);
3381
3554
  });
3382
3555
 
3556
+ test("routing preview uses seller manifest discount metadata", async () => {
3557
+ const response = await fetch(
3558
+ `http://127.0.0.1:${daemonControlPort}/routing/preview?modelId=gpt-manual&protocol=chat_completions&paymentMethod=mock&mode=fullAuto&scorer=discount`
3559
+ );
3560
+
3561
+ expect(response.ok).toBe(true);
3562
+ const preview = await response.json() as any;
3563
+ expect(preview.plan.reason).toBe("fullAuto:discount:routes_2");
3564
+ expect(preview.plan.routes.map((route: any) => route.seller.id)).toEqual(["backup-seller", "primary-seller"]);
3565
+ expect(preview.plan.routes[0].metrics.discountRatio).toBe(0.01);
3566
+ expect(preview.plan.routes[1].metrics.discountRatio).toBe(1);
3567
+ expect(events).toEqual(expect.arrayContaining([
3568
+ { seller: "primary-seller", url: "/primary/manifest" },
3569
+ { seller: "backup-seller", url: "/backup/manifest" }
3570
+ ]));
3571
+ });
3572
+
3383
3573
  test("fixedSet routing only uses sellers in the configured pool", async () => {
3384
3574
  daemon.stop();
3385
3575
  events.length = 0;
@@ -3434,7 +3624,7 @@ describe("TokenBuddy seller routing strategies", () => {
3434
3624
  sellerRegistryUrl: `http://127.0.0.1:${sellerPort}/registry/sellers`,
3435
3625
  sellerRouting: {
3436
3626
  mode: "fullAuto",
3437
- scorer: "balanced"
3627
+ scorer: "speed"
3438
3628
  }
3439
3629
  });
3440
3630
  daemon.start();
@@ -3481,7 +3671,7 @@ describe("TokenBuddy seller routing strategies", () => {
3481
3671
  expect(requestLogs).toContain("event=route.candidates.prewarmed");
3482
3672
  expect(requestLogs).toContain("event=route.selected");
3483
3673
  expect(requestLogs).toContain("routePlanSource=registry_fallback");
3484
- expect(requestLogs).toContain("routePlanReason=fullAuto:balanced:routes_2");
3674
+ expect(requestLogs).toContain("routePlanReason=fullAuto:speed:routes_2");
3485
3675
  expect(requestLogs).toContain("candidateDiagnostics=");
3486
3676
  expect(requestLogs).toContain("hasNextRoute=true");
3487
3677
  expect(requestLogs).toContain("attemptNumber=");
@@ -3493,6 +3683,124 @@ describe("TokenBuddy seller routing strategies", () => {
3493
3683
  expect(logs).not.toContain(rawPrompt);
3494
3684
  });
3495
3685
 
3686
+ test("soft failure retry uses a fresh seller attempt id after upstream failure", async () => {
3687
+ daemon.stop();
3688
+ events.length = 0;
3689
+ primaryPurchaseSucceeds = true;
3690
+ primaryInferenceFailsOnceWithIdempotencyConflict = true;
3691
+ const requestId = "auto_retry_fresh_seller_attempt_id";
3692
+ daemon = new TokenbuddyDaemon({
3693
+ controlPort: 0,
3694
+ proxyPort: 0,
3695
+ dbPath,
3696
+ sellerRegistryUrl: `http://127.0.0.1:${sellerPort}/registry/sellers`,
3697
+ sellerRouting: {
3698
+ mode: "fixed",
3699
+ sellerId: "primary-seller",
3700
+ scorer: "balanced"
3701
+ }
3702
+ });
3703
+ daemon.start();
3704
+ daemonControlPort = ((daemon as any).controlServer.address() as AddressInfo).port;
3705
+ daemonProxyPort = ((daemon as any).proxyServer.address() as AddressInfo).port;
3706
+
3707
+ const response = await fetch(`http://127.0.0.1:${daemonProxyPort}/v1/chat/completions`, {
3708
+ method: "POST",
3709
+ headers: {
3710
+ "Content-Type": "application/json",
3711
+ "Idempotency-Key": "idem-fresh-seller-attempt"
3712
+ },
3713
+ body: JSON.stringify({
3714
+ model: "gpt-manual",
3715
+ messages: [{ role: "user", content: "retry should not reuse seller request id" }],
3716
+ requestId
3717
+ })
3718
+ });
3719
+
3720
+ expect(response.ok).toBe(true);
3721
+ expect((await response.json() as any).id).toBe("primary-chat");
3722
+ const primaryInferenceCalls = events.filter((event) => event.url === "/primary/v1/chat/completions");
3723
+ expect(primaryInferenceCalls).toHaveLength(2);
3724
+ expect(primaryInferenceCalls.map((event) => event.body?.requestId)).toEqual([
3725
+ requestId,
3726
+ `${requestId}_r0_a1_n0`
3727
+ ]);
3728
+ expect(primaryInferenceCalls.map((event) => event.idempotencyKey)).toEqual([
3729
+ "idem-fresh-seller-attempt",
3730
+ "idem-fresh-seller-attempt_r0_a1_n0"
3731
+ ]);
3732
+
3733
+ const inferences = await (await fetch(`http://127.0.0.1:${daemonControlPort}/ledger/inferences`)).json() as any;
3734
+ expect(inferences.inferences).toEqual(expect.arrayContaining([
3735
+ expect.objectContaining({
3736
+ requestId,
3737
+ sellerKey: "primary-seller",
3738
+ endpoint: "/v1/chat/completions",
3739
+ status: "estimated"
3740
+ })
3741
+ ]));
3742
+ expect(JSON.stringify(inferences)).not.toContain(`${requestId}_r0_a1_n0`);
3743
+ });
3744
+
3745
+ test("responses retry uses a fresh seller attempt id after upstream failure", async () => {
3746
+ daemon.stop();
3747
+ events.length = 0;
3748
+ primaryPurchaseSucceeds = true;
3749
+ primaryInferenceFailsOnceWithIdempotencyConflict = true;
3750
+ const requestId = "responses_retry_fresh_seller_attempt_id";
3751
+ daemon = new TokenbuddyDaemon({
3752
+ controlPort: 0,
3753
+ proxyPort: 0,
3754
+ dbPath,
3755
+ sellerRegistryUrl: `http://127.0.0.1:${sellerPort}/registry/sellers`,
3756
+ sellerRouting: {
3757
+ mode: "fixed",
3758
+ sellerId: "primary-seller",
3759
+ scorer: "balanced"
3760
+ }
3761
+ });
3762
+ daemon.start();
3763
+ daemonControlPort = ((daemon as any).controlServer.address() as AddressInfo).port;
3764
+ daemonProxyPort = ((daemon as any).proxyServer.address() as AddressInfo).port;
3765
+
3766
+ const response = await fetch(`http://127.0.0.1:${daemonProxyPort}/v1/responses`, {
3767
+ method: "POST",
3768
+ headers: {
3769
+ "Content-Type": "application/json",
3770
+ "Idempotency-Key": "idem-responses-fresh-seller-attempt"
3771
+ },
3772
+ body: JSON.stringify({
3773
+ model: "gpt-manual",
3774
+ input: "retry should not reuse seller request id",
3775
+ requestId
3776
+ })
3777
+ });
3778
+
3779
+ expect(response.ok).toBe(true);
3780
+ expect((await response.json() as any).id).toBe("primary-response");
3781
+ const primaryInferenceCalls = events.filter((event) => event.url === "/primary/v1/responses");
3782
+ expect(primaryInferenceCalls).toHaveLength(2);
3783
+ expect(primaryInferenceCalls.map((event) => event.body?.requestId)).toEqual([
3784
+ requestId,
3785
+ `${requestId}_r0_a1_n0`
3786
+ ]);
3787
+ expect(primaryInferenceCalls.map((event) => event.idempotencyKey)).toEqual([
3788
+ "idem-responses-fresh-seller-attempt",
3789
+ "idem-responses-fresh-seller-attempt_r0_a1_n0"
3790
+ ]);
3791
+
3792
+ const inferences = await (await fetch(`http://127.0.0.1:${daemonControlPort}/ledger/inferences`)).json() as any;
3793
+ expect(inferences.inferences).toEqual(expect.arrayContaining([
3794
+ expect.objectContaining({
3795
+ requestId,
3796
+ sellerKey: "primary-seller",
3797
+ endpoint: "/v1/responses",
3798
+ status: "estimated"
3799
+ })
3800
+ ]));
3801
+ expect(JSON.stringify(inferences)).not.toContain(`${requestId}_r0_a1_n0`);
3802
+ });
3803
+
3496
3804
  test("fullAuto routing treats busy_capacity as a capacity block and starts the next request on backup", async () => {
3497
3805
  daemon.stop();
3498
3806
  events.length = 0;
@@ -3505,7 +3813,7 @@ describe("TokenBuddy seller routing strategies", () => {
3505
3813
  sellerRegistryUrl: `http://127.0.0.1:${sellerPort}/registry/sellers`,
3506
3814
  sellerRouting: {
3507
3815
  mode: "fullAuto",
3508
- scorer: "balanced"
3816
+ scorer: "speed"
3509
3817
  }
3510
3818
  });
3511
3819
  daemon.start();
@@ -3561,7 +3869,7 @@ describe("TokenBuddy seller routing strategies", () => {
3561
3869
  sellerRegistryUrl: `http://127.0.0.1:${sellerPort}/registry/sellers`,
3562
3870
  sellerRouting: {
3563
3871
  mode: "fullAuto",
3564
- scorer: "balanced"
3872
+ scorer: "speed"
3565
3873
  },
3566
3874
  sellerConcurrency: {
3567
3875
  enabled: true,
@@ -3666,7 +3974,7 @@ describe("TokenBuddy seller routing strategies", () => {
3666
3974
  sellerRegistryUrl: `http://127.0.0.1:${sellerPort}/registry/sellers`,
3667
3975
  sellerRouting: {
3668
3976
  mode: "fullAuto",
3669
- scorer: "balanced"
3977
+ scorer: "speed"
3670
3978
  }
3671
3979
  });
3672
3980
  daemon.start();