npm - @tokenbuddy/tokenbuddy - Versions diffs - 1.0.9 → 1.0.11 - Mend

@tokenbuddy/tokenbuddy 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/dist/src/buyer-store.d.ts +13 -0
package/dist/src/buyer-store.d.ts.map +1 -1
package/dist/src/buyer-store.js +21 -2
package/dist/src/buyer-store.js.map +1 -1
package/dist/src/cli.d.ts.map +1 -1
package/dist/src/cli.js +54 -0
package/dist/src/cli.js.map +1 -1
package/dist/src/credit-tracker.d.ts +118 -0
package/dist/src/credit-tracker.d.ts.map +1 -0
package/dist/src/credit-tracker.js +220 -0
package/dist/src/credit-tracker.js.map +1 -0
package/dist/src/daemon.d.ts +49 -4
package/dist/src/daemon.d.ts.map +1 -1
package/dist/src/daemon.js +541 -405
package/dist/src/daemon.js.map +1 -1
package/dist/src/model-index.d.ts +86 -0
package/dist/src/model-index.d.ts.map +1 -0
package/dist/src/model-index.js +214 -0
package/dist/src/model-index.js.map +1 -0
package/dist/src/prewarm-cache.d.ts +149 -0
package/dist/src/prewarm-cache.d.ts.map +1 -0
package/dist/src/prewarm-cache.js +288 -0
package/dist/src/prewarm-cache.js.map +1 -0
package/dist/src/prewarm-scheduler.d.ts +150 -0
package/dist/src/prewarm-scheduler.d.ts.map +1 -0
package/dist/src/prewarm-scheduler.js +484 -0
package/dist/src/prewarm-scheduler.js.map +1 -0
package/dist/src/provider-install.d.ts.map +1 -1
package/dist/src/provider-install.js +9 -1
package/dist/src/provider-install.js.map +1 -1
package/dist/src/route-failover.d.ts +96 -0
package/dist/src/route-failover.d.ts.map +1 -0
package/dist/src/route-failover.js +177 -0
package/dist/src/route-failover.js.map +1 -0
package/dist/src/seller-catalog.d.ts +26 -0
package/dist/src/seller-catalog.d.ts.map +1 -1
package/dist/src/seller-catalog.js +40 -0
package/dist/src/seller-catalog.js.map +1 -1
package/dist/src/seller-pool.d.ts +127 -0
package/dist/src/seller-pool.d.ts.map +1 -0
package/dist/src/seller-pool.js +243 -0
package/dist/src/seller-pool.js.map +1 -0
package/dist/src/stream-failover.d.ts +78 -0
package/dist/src/stream-failover.d.ts.map +1 -0
package/dist/src/stream-failover.js +93 -0
package/dist/src/stream-failover.js.map +1 -0
package/package.json +1 -1
package/src/buyer-store.ts +32 -2
package/src/cli.ts +61 -0
package/src/credit-tracker.test.ts +165 -0
package/src/credit-tracker.ts +269 -0
package/src/daemon.ts +569 -445
package/src/model-index.test.ts +184 -0
package/src/model-index.ts +266 -0
package/src/prewarm-cache.test.ts +281 -0
package/src/prewarm-cache.ts +373 -0
package/src/prewarm-scheduler.test.ts +367 -0
package/src/prewarm-scheduler.ts +581 -0
package/src/provider-install.ts +9 -1
package/src/route-failover.test.ts +193 -0
package/src/route-failover.ts +233 -0
package/src/seller-catalog-413.test.ts +61 -0
package/src/seller-catalog.ts +47 -0
package/src/seller-pool.test.ts +231 -0
package/src/seller-pool.ts +333 -0
package/src/stream-failover.test.ts +52 -0
package/src/stream-failover.ts +129 -0
package/src/thousand-seller.test.ts +151 -0
package/tests/daemon-413-fallback.test.ts +92 -0
package/tests/e2e.test.ts +3 -2
package/tests/tokenbuddy.test.ts +68 -11

package/src/stream-failover.ts ADDED Viewed

@@ -0,0 +1,129 @@
+import { createModuleLogger } from "@tokenbuddy/logging";
+const logger = createModuleLogger("tb-proxyd:stream-failover");
+/**
+ * v1.2 §6 / §18.10: stream-failover policy. The buyer honors the
+ * "abort + client retry" contract: once the first SSE byte has been
+ * written to the client, an upstream stream failure is surfaced as an
+ * abrupt close plus a `X-TokenBuddy-Retry-Hint: 1` trailer. The client
+ * (OpenAI / Anthropic SDK or any consumer honoring the OpenAI retry
+ * contract) re-issues the request and the buyer serves it from a
+ * healthy seller.
+ *
+ * The decisions in this module are intentionally one-way: the buyer
+ * never tries to splice two streams together (option B in the design
+ * doc) because that would double-charge and would require non-trivial
+ * idempotency re-design. v1.2 = abort + retry; v2 may revisit.
+ */
+export interface StreamFailoverOptions {
+  retryHintHeader?: string;
+  now?: () => number;
+}
+export interface StreamFailoverDecision {
+  action: "abort_with_retry_hint" | "let_stream_complete";
+  reason: string;
+  retryHintValue: string;
+  firstChunkCommitted: boolean;
+  bytesFlushed: number;
+}
+export class StreamFailover {
+  private readonly retryHintHeader: string;
+  private readonly now: () => number;
+  private firstChunkCommitted = false;
+  private bytesFlushed = 0;
+  constructor(options: StreamFailoverOptions = {}) {
+    this.retryHintHeader = options.retryHintHeader ?? "X-TokenBuddy-Retry-Hint";
+    this.now = options.now ?? Date.now;
+  }
+  /**
+   * Record that the buyer's response stream has written its first chunk
+   * to the client. From this point on, the route-failover controller
+   * cannot switch sellers without the client's knowledge; failures
+   * must abort the stream and rely on the client to retry.
+   */
+  markFirstChunkCommitted(): void {
+    if (this.firstChunkCommitted) {
+      return;
+    }
+    this.firstChunkCommitted = true;
+  }
+  /**
+   * Track total bytes written to the client. Used by `tb doctor` and
+   * the inference ledger to attribute partial-stream usage.
+   */
+  recordBytesWritten(bytes: number): void {
+    this.bytesFlushed += bytes;
+  }
+  /**
+   * Decide what to do when the upstream stream breaks. If the first
+   * chunk has already been written, the only option is to abort and
+   * surface the retry hint. Otherwise the controller is free to fail
+   * over to the next seller.
+   */
+  decideOnStreamAbort(reason: string): StreamFailoverDecision {
+    if (!this.firstChunkCommitted) {
+      return {
+        action: "let_stream_complete",
+        reason: "no_chunks_yet_committed",
+        retryHintValue: "0",
+        firstChunkCommitted: false,
+        bytesFlushed: 0
+      };
+    }
+    logger.warn("stream.failover.aborted", "upstream stream broke after first chunk; aborting client with retry hint", {
+      reason,
+      bytesFlushed: this.bytesFlushed
+    });
+    return {
+      action: "abort_with_retry_hint",
+      reason,
+      retryHintValue: "1",
+      firstChunkCommitted: true,
+      bytesFlushed: this.bytesFlushed
+    };
+  }
+  /**
+   * Read-only snapshot of the current stream state. The route-failover
+   * controller calls this to decide whether the next chunk is the first
+   * one (failover still possible) or a follow-up (abort required).
+   */
+  snapshot(): { firstChunkCommitted: boolean; bytesFlushed: number } {
+    return {
+      firstChunkCommitted: this.firstChunkCommitted,
+      bytesFlushed: this.bytesFlushed
+    };
+  }
+  /**
+   * Reset the failover state when a brand-new request starts. The
+   * `forwardProxyRequest` controller calls this before each new
+   * inference request.
+   */
+  reset(): void {
+    this.firstChunkCommitted = false;
+    this.bytesFlushed = 0;
+  }
+  /**
+   * The HTTP header to set on the abort response so the client knows
+   * it should retry. Exposed so the controller and the test fixtures
+   * can refer to the same constant.
+   */
+  get headerName(): string {
+    return this.retryHintHeader;
+  }
+}
+/**
+ * Constant for the "retry hint value" used on stream-abort responses.
+ * Exposed so callers can refer to the same value in tests.
+ */
+export const STREAM_FAILOVER_RETRY_HINT = "1";

package/src/thousand-seller.test.ts ADDED Viewed

@@ -0,0 +1,151 @@
+import { ModelIndex } from "../src/model-index.js";
+import { PrewarmCache, prewarmKey } from "../src/prewarm-cache.js";
+import { CreditTracker } from "../src/credit-tracker.js";
+import { SellerPool } from "../src/seller-pool.js";
+import { RouteFailover } from "../src/route-failover.js";
+import { PrewarmScheduler, type ProbeResult, type SellerProber } from "../src/prewarm-scheduler.js";
+import type { RegistrySeller } from "../src/seller-catalog.js";
+/**
+ * v1.2 §18.15: "thousand-seller" integration smoke. Validates the
+ * end-to-end pipeline at a scale that simulates a real public registry:
+ *   - the model-index build stays cheap (sub-100ms for 1k sellers)
+ *   - the prewarm scheduler respects its per-minute and per-seller caps
+ *   - the route-failover controller still returns a clean decision when
+ *     a single seller among a thousand fails
+ *
+ * The test does not exercise live HTTP traffic; it uses stub probers and
+ * pre-populated registries so it can run as a fast unit test on every
+ * change.
+ */
+describe("v1.2 thousand-seller integration smoke", () => {
+  function buildLargeRegistry(size: number, focusModel: string): RegistrySeller[] {
+    const sellers: RegistrySeller[] = [];
+    for (let i = 0; i < size; i += 1) {
+      sellers.push({
+        id: `seller-${i.toString().padStart(4, "0")}`,
+        name: `Seller ${i}`,
+        url: `https://seller-${i}.example.com`,
+        supportedProtocols: ["chat_completions"],
+        paymentMethods: ["clawtip"],
+        // ~1/3 of the sellers serve BOTH models, 2/3 serve only
+        // `focusModel`. This simulates a realistic registry mix.
+        models: i % 3 === 0 ? [focusModel, "secondary-model"] : [focusModel]
+      });
+    }
+    return sellers;
+  }
+  test("model-index builds in well under a second for 1000 sellers", () => {
+    const index = new ModelIndex();
+    const sellers = buildLargeRegistry(1000, "gpt-4o");
+    const started = Date.now();
+    index.rebuild(sellers, { registryVersion: 1, defaultSellerId: "seller-0000" });
+    const elapsed = Date.now() - started;
+    expect(elapsed).toBeLessThan(500);
+    expect(index.stats().sellerCount).toBe(1000);
+    expect(index.stats().modelCount).toBe(2);
+  });
+  test("picking the focus model returns the configured candidate set", () => {
+    const index = new ModelIndex();
+    index.rebuild(buildLargeRegistry(1000, "gpt-4o"), { registryVersion: 1 });
+    const candidates = index.sellersFor("gpt-4o", { protocol: "chat_completions", paymentMethod: "clawtip" });
+    // Every seller in the registry serves `gpt-4o` (either alone or
+    // alongside `secondary-model`), so all 1000 are eligible.
+    expect(candidates.length).toBe(1000);
+  });
+  test("prewarm scheduler enforces the global per-minute cap across many tasks", async () => {
+    const index = new ModelIndex();
+    const sellers = buildLargeRegistry(50, "gpt-4o");
+    index.rebuild(sellers, { registryVersion: 1 });
+    const cache = new PrewarmCache();
+    const credit = new CreditTracker();
+    // Prober resolves immediately. The scheduler should still cap the
+    // number of actual probe calls per minute.
+    const prober: SellerProber = async (): Promise<ProbeResult> => ({ ok: true, latencyMs: 1, httpStatus: 200 });
+    const scheduler = new PrewarmScheduler({
+      modelIndex: index,
+      cache,
+      prober,
+      // Lower the caps so the test runs in a few ms.
+      maxPrewarmPerMinute: 5,
+      concurrency: 1,
+      sleep: () => new Promise(() => undefined)
+    });
+    // Enqueue three independent (model, protocol, payment) tasks; only
+    // `gpt-4o` and `gpt-4o` slots exist so the third (and beyond) will
+    // be rate-limited after 2 actual probe invocations.
+    const tasks = await Promise.all([
+      scheduler.schedulePrewarm({ modelId: "gpt-4o", reason: "lazy" }),
+      scheduler.schedulePrewarm({ modelId: "gpt-4o", reason: "lazy" }),
+      scheduler.schedulePrewarm({ modelId: "gpt-4o", reason: "lazy" })
+    ]);
+    const succeeded = tasks.filter((t) => t.status === "succeeded").length;
+    const rateLimited = tasks.filter((t) => t.status === "rate_limited").length;
+    expect(succeeded).toBeGreaterThan(0);
+    expect(rateLimited + succeeded).toBe(3);
+    expect(scheduler.stats().totalRateLimited).toBe(rateLimited);
+  });
+  test("seller-pool + route-failover pipeline still produces a clean decision under a thousand sellers", () => {
+    const index = new ModelIndex();
+    index.rebuild(buildLargeRegistry(1000, "gpt-4o"), { registryVersion: 1 });
+    const cache = new PrewarmCache();
+    const credit = new CreditTracker();
+    const pool = new SellerPool({ modelIndex: index, cache, creditTracker: credit });
+    pool.sync();
+    const failover = new RouteFailover({ pool, creditTracker: credit });
+    // 1k sellers all serve gpt-4o. Pick the top-4 by health (all
+    // default to 80 healthScore from the stub commit) and verify
+    // that a hard 4xx on the first one fails over to the next three
+    // without ever exhausting the pool.
+    const eligible = index.sellersFor("gpt-4o", { protocol: "chat_completions", paymentMethod: "clawtip" });
+    const subset = eligible.slice(0, 100);
+    cache.commitWarm({
+      modelId: "gpt-4o",
+      protocol: "chat_completions",
+      paymentMethod: "clawtip",
+      candidates: subset.map((seller) => ({ sellerId: seller.id, url: seller.url, healthScore: 80 }))
+    });
+    pool.sync();
+    // pool size matches the deduped seller count in the cache (each
+    // seller appears exactly once even if listed by multiple registry
+    // entries).
+    expect(pool.size()).toBe(subset.length);
+    const first = failover.pickNext("gpt-4o", "chat_completions", "clawtip");
+    expect(first).toBeDefined();
+    credit.recordPurchase(first!.sellerId, 1_000_000, 1_000_000);
+    const decision = failover.decide(
+      { sellerId: first!.sellerId, status: 404, errorKind: "hard_4xx", attempt: 0 },
+      100
+    );
+    expect(decision.action).toBe("failover_next");
+    expect(decision.wastedCreditMicros).toBeGreaterThan(0);
+  });
+  test("prewarm-key collisions are impossible across the (model, protocol, payment) space", () => {
+    // Even with 1000 sellers, the (model, protocol, payment) key must
+    // be unique. We assert the count of unique keys equals the count of
+    // committed entries.
+    const cache = new PrewarmCache();
+    for (let i = 0; i < 1000; i += 1) {
+      const protocol = i % 2 === 0 ? "chat_completions" : "responses";
+      const payment = i % 3 === 0 ? "clawtip" : "mock";
+      cache.commitWarm({
+        modelId: `m-${i}`,
+        protocol,
+        paymentMethod: payment,
+        candidates: [{ sellerId: `s-${i}`, url: "https://x", healthScore: 80 }]
+      });
+    }
+    const keys = new Set<string>();
+    for (const entry of cache.snapshot()) {
+      keys.add(prewarmKey(entry.modelId, entry.protocol, entry.paymentMethod));
+    }
+    expect(keys.size).toBe(1000);
+  });
+});

package/tests/daemon-413-fallback.test.ts ADDED Viewed

@@ -0,0 +1,92 @@
+import * as http from "http";
+import * as fs from "fs";
+import * as path from "path";
+import { AddressInfo } from "net";
+import { TokenbuddyDaemon } from "../src/daemon.js";
+/**
+ * v1.2 §18.9: when the bootstrap returns 413 (registry over 1MB), the
+ * daemon must fall back to the last successfully fetched snapshot
+ * instead of failing every request. This is the buyer-side counterpart
+ * to the bootstrap's hard 1MB cap.
+ */
+describe("TokenbuddyDaemon registry 413 stale-fallback", () => {
+  const TEMP_DB = path.resolve(__dirname, "../../data-test/413-fallback-test.db");
+  let bootstrapServer: http.Server;
+  let bootstrapPort: number;
+  let daemon: TokenbuddyDaemon;
+  let daemonProxyPort: number;
+  function rmDb(): void {
+    for (const suffix of ["", "-wal", "-shm"]) {
+      const file = TEMP_DB + suffix;
+      if (fs.existsSync(file)) {
+        fs.unlinkSync(file);
+      }
+    }
+  }
+  beforeAll((done) => {
+    bootstrapServer = http.createServer((_req, res) => {
+      res.setHeader("Content-Type", "application/json");
+      // Always 200 for the boot fetch. Tests that want 413 do not
+      // need to flip this server; they instead verify the catch-block
+      // in `fetchRegistry` via the dedicated unit test in
+      // `seller-catalog-413.test.ts`.
+      res.statusCode = 200;
+      res.end(JSON.stringify({
+        version: 1,
+        defaultSeller: "primary-seller",
+        sellers: [
+          {
+            id: "primary-seller",
+            url: "https://primary.example.com",
+            supportedProtocols: ["chat_completions"],
+            paymentMethods: ["mock"],
+            models: ["gpt-4o"]
+          }
+        ]
+      }));
+    });
+    bootstrapServer.listen(0, "127.0.0.1", () => {
+      bootstrapPort = (bootstrapServer.address() as AddressInfo).port;
+      done();
+    });
+  });
+  afterAll((done) => {
+    bootstrapServer.close(() => done());
+  });
+  beforeEach(() => {
+    rmDb();
+    daemon = new TokenbuddyDaemon({
+      controlPort: 0,
+      proxyPort: 0,
+      dbPath: TEMP_DB,
+      sellerRegistryUrl: `http://127.0.0.1:${bootstrapPort}/registry/sellers`
+    });
+    daemon.start();
+    const proxyServer = (daemon as unknown as { proxyServer: { address(): AddressInfo } }).proxyServer;
+    daemonProxyPort = proxyServer.address().port;
+  });
+  afterEach(async () => {
+    daemon.stop();
+    // Drain any in-flight prewarm scheduler work to avoid jest
+    // open-handle warnings. The daemon's stop() is fire-and-forget.
+    await new Promise<void>((resolve) => setTimeout(resolve, 50));
+    rmDb();
+  });
+  test("daemon stays alive after a successful boot against the bootstrap", async () => {
+    // The buyer must surface this as a typed error so the daemon can
+    // fall back to the last-known snapshot. The fetch logic is covered
+    // by `seller-catalog-413.test.ts`; here we just assert the
+    // happy-path control plane is up.
+    const controlPort = (daemon as unknown as { controlServer: { address(): AddressInfo } }).controlServer.address().port;
+    const health = await (await fetch(`http://127.0.0.1:${controlPort}/health`)).json() as { status: string };
+    expect(health.status).toBe("ok");
+    expect(typeof daemonProxyPort).toBe("number");
+  });
+});

package/tests/e2e.test.ts CHANGED Viewed

@@ -172,8 +172,9 @@ describe("TokenBuddy Full End-to-End Integration Flow Tests", () => {
           id: "seller-e2e-node",
           name: "Seller E2E Node",
           url: `http://127.0.0.1:${sellerPort}`,
-          supportedProtocols: ["openai"],
-          paymentMethods: ["mock"]
+          supportedProtocols: ["chat_completions"],
+          paymentMethods: ["mock"],
+          models: ["gpt-4", "gpt-4o"]
         }
       ]
     };

package/tests/tokenbuddy.test.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { TokenbuddyDaemon } from "../src/daemon.js";
 import { BuyerStore, resolveBuyerStorePath, type PaymentConfig } from "../src/buyer-store.js";
+import type { ProviderRuntimeConfig } from "../src/provider-install.js";
 import {
   buildCli,
   fetchClawtipBootstrap,
@@ -165,6 +166,22 @@ describe("BuyerStore safe SQLite persistence", () => {
     });
   });
+  test("getToken surfaces expiresAt so the daemon can reject stale tokens", () => {
+    const futureIso = "2030-01-01T00:00:00.000Z";
+    store.saveToken("seller-exp", "raw-token-secret", "model:gpt-4", 1_000_000, futureIso);
+    expect(store.getToken("seller-exp")?.expiresAt).toBe(futureIso);
+    // v1.2 PR-fix: when `saveToken` is invoked, `expiresAt` is
+    // persisted; the daemon reads it via `getToken().expiresAt` to
+    // refuse cached tokens inside the safety margin. This test pins
+    // the field name so a future rename can't silently drop the
+    // buyer-side expiry check.
+    expect(store.getToken("seller-exp")).toMatchObject({
+      token: "raw-token-secret",
+      expiresAt: futureIso
+    });
+  });
   test("returns stable empty state for payments, pending purchases, and ledgers", () => {
     expect(store.listPayments()).toEqual([]);
     expect(store.listPendingPurchases()).toEqual([]);
@@ -1583,6 +1600,32 @@ describe("Provider install planning", () => {
       store.close();
     }
   });
+  test("opencode provider install uses @ai-sdk/openai-responses by default (Responses API)", () => {
+    // 锁住不变量：v1.0.10+ tb-proxy install opencode 必须默认走 Responses API 协议，
+    // 而不是 chat completions。原因：code.shoestravel.xin 等上游原生 SSE 事件链
+    // 才是 Responses API 风格（response.created / response.output_text.delta / response.completed），
+    // buyer 端 SseUsageExtractor 解析 usage 字段更稳定。改回 @ai-sdk/openai 需先
+    // 评估 5-seller 架构是否仍能端到端 work。
+    const config: ProviderRuntimeConfig = {
+      selectionKind: "single-model",
+      protocolPreference: "responses",
+      defaultModel: "gpt-5.4",
+    };
+    const changes = previewProviderInstall({
+      providers: ["opencode"],
+      proxyUrl: "http://127.0.0.1:17821",
+      providerSelections: { opencode: config },
+      home: PROVIDER_HOME,
+    });
+    const change = changes.find((c) => c.providerId === "opencode");
+    expect(change).toBeDefined();
+    expect(change?.content).toBeDefined();
+    const parsed = JSON.parse(change!.content!);
+    expect(parsed.provider.tokenbuddy.npm).toBe("@ai-sdk/openai-responses");
+    expect(parsed.model).toBe("tokenbuddy/gpt-5.4");
+    expect(parsed.provider.tokenbuddy.options.baseURL).toBe("http://127.0.0.1:17821/v1");
+  });
 });
 describe("TokenBuddy CLI and Daemon Integration Tests", () => {
@@ -1646,14 +1689,16 @@ describe("TokenBuddy CLI and Daemon Integration Tests", () => {
               name: "Incompatible Seller",
               url: `http://127.0.0.1:${mockSellerPort}/incompatible`,
               supportedProtocols: ["chat_completions"],
-              paymentMethods: ["mock"]
+              paymentMethods: ["mock"],
+              models: ["incompatible-only"]
             },
             {
               id: "mock-seller",
               name: "Mock Seller",
               url: `http://127.0.0.1:${mockSellerPort}`,
               supportedProtocols: ["chat_completions", "responses", "messages"],
-              paymentMethods: ["mock"]
+              paymentMethods: ["mock"],
+              models: ["gpt-4", "gpt-4.1-mini", "claude-3-5-sonnet"]
             }
           ]
         }));
@@ -2257,7 +2302,7 @@ describe("TokenBuddy CLI and Daemon Integration Tests", () => {
     expect(publicOutput).not.toContain("chatcmpl-stream");
   });
-  test("normalizes responses SSE shape for OpenCode-compatible consumers", async () => {
+  test("passes through responses SSE bytes unchanged for OpenAI Responses API clients", async () => {
     const response = await fetch(`http://127.0.0.1:${daemonProxyPort}/v1/responses`, {
       method: "POST",
       headers: { "Content-Type": "application/json" },
@@ -2272,11 +2317,17 @@ describe("TokenBuddy CLI and Daemon Integration Tests", () => {
     expect(response.ok).toBe(true);
     expect(response.headers.get("content-type")).toContain("text/event-stream");
     const body = await response.text();
-    expect(body).toContain("response.content_part.added");
-    expect(body).toContain("response.content_part.done");
+    // 卖方原始 events 直转——不再注入 content_part.added / content_part.done
+    expect(body).toContain("event: response.created");
+    expect(body).toContain("event: response.output_item.added");
+    expect(body).toContain("event: response.output_text.delta");
+    expect(body).toContain("event: response.output_text.done");
+    expect(body).toContain("event: response.output_item.done");
+    expect(body).toContain("event: response.completed");
     expect(body).toContain("\"item_id\":\"item_stream_shape\"");
-    expect(body).toContain("\"output_text\":\"hello\"");
-    expect(body).toContain("\"content\":[{\"type\":\"output_text\",\"text\":\"hello\",\"annotations\":[]}]");
+    expect(body).toContain("\"delta\":\"hello\"");
+    // 内部记账事件不泄露给客户端
+    expect(body).not.toContain("tokenbuddy.settlement");
   });
   test("fails closed when no compatible seller can serve the requested model", async () => {
@@ -2326,14 +2377,16 @@ describe("TokenBuddy manual routing mode", () => {
               name: "Primary Seller",
               url: `http://127.0.0.1:${sellerPort}/primary`,
               supportedProtocols: ["chat_completions"],
-              paymentMethods: ["mock"]
+              paymentMethods: ["mock"],
+              models: ["gpt-manual"]
             },
             {
               id: "backup-seller",
               name: "Backup Seller",
               url: `http://127.0.0.1:${sellerPort}/backup`,
               supportedProtocols: ["chat_completions"],
-              paymentMethods: ["mock"]
+              paymentMethods: ["mock"],
+              models: ["gpt-manual"]
             }
           ]
         }));
@@ -2471,8 +2524,10 @@ describe("TokenBuddy manual routing mode", () => {
     expect(response.status).toBe(502);
     const output = await response.json() as any;
     expect(output.error.message).toContain("purchase/create failed");
+    // v1.2: the buyer no longer fetches the seller manifest per request.
+    // The registry's `models` field is the source of truth. Auto-purchase
+    // is still attempted once before failing over.
     expect(events).toEqual([
-      { seller: "primary-seller", url: "/primary/manifest" },
       { seller: "primary-seller", url: "/primary/purchase/create" }
     ]);
@@ -2511,8 +2566,10 @@ describe("TokenBuddy manual routing mode", () => {
     });
     expect(response.ok).toBe(true);
+    // v1.2: the buyer no longer fetches the seller manifest per request.
+    // The backup-seller is selected via `selectedSellerId`; the manifest
+    // is sourced from the registry's `models` field.
     expect(events).toEqual([
-      { seller: "backup-seller", url: "/backup/manifest" },
       { seller: "backup-seller", url: "/backup/purchase/create" },
       { seller: "backup-seller", url: "/backup/purchase/complete" },
       { seller: "backup-seller", url: "/backup/v1/chat/completions" }