@tokenbuddy/tokenbuddy 1.0.28 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/daemon.d.ts +11 -4
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +130 -42
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/doctor-diagnostics.d.ts.map +1 -1
- package/dist/src/doctor-diagnostics.js +7 -1
- package/dist/src/doctor-diagnostics.js.map +1 -1
- package/dist/src/prewarm-cache.d.ts +4 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -1
- package/dist/src/prewarm-cache.js +1 -0
- package/dist/src/prewarm-cache.js.map +1 -1
- package/dist/src/prewarm-scheduler.d.ts +2 -0
- package/dist/src/prewarm-scheduler.d.ts.map +1 -1
- package/dist/src/prewarm-scheduler.js +4 -1
- package/dist/src/prewarm-scheduler.js.map +1 -1
- package/dist/src/provider-install.d.ts.map +1 -1
- package/dist/src/provider-install.js +196 -18
- package/dist/src/provider-install.js.map +1 -1
- package/dist/src/seller-catalog.d.ts +4 -0
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-pool.d.ts +13 -0
- package/dist/src/seller-pool.d.ts.map +1 -1
- package/dist/src/seller-pool.js +43 -2
- package/dist/src/seller-pool.js.map +1 -1
- package/dist/src/seller-route-planner.d.ts +9 -0
- package/dist/src/seller-route-planner.d.ts.map +1 -1
- package/dist/src/seller-route-planner.js +39 -15
- package/dist/src/seller-route-planner.js.map +1 -1
- package/dist/src/seller-routing-strategy.d.ts +6 -4
- package/dist/src/seller-routing-strategy.d.ts.map +1 -1
- package/dist/src/seller-routing-strategy.js +15 -12
- package/dist/src/seller-routing-strategy.js.map +1 -1
- package/dist/src/terminal-detect.d.ts +5 -5
- package/dist/src/terminal-detect.d.ts.map +1 -1
- package/dist/src/terminal-detect.js +79 -26
- package/dist/src/terminal-detect.js.map +1 -1
- package/package.json +1 -1
- package/src/daemon.ts +168 -46
- package/src/doctor-diagnostics.ts +5 -1
- package/src/prewarm-cache.ts +5 -0
- package/src/prewarm-scheduler.ts +6 -1
- package/src/provider-install.ts +203 -18
- package/src/seller-catalog.ts +4 -0
- package/src/seller-pool.ts +68 -2
- package/src/seller-route-planner.ts +61 -15
- package/src/seller-routing-strategy.ts +21 -16
- package/src/terminal-detect.ts +81 -24
- package/static/ui/assets/index-DEDEl8o2.js +236 -0
- package/static/ui/assets/{index-UAfOhbwC.js.map → index-DEDEl8o2.js.map} +1 -1
- package/static/ui/index.html +1 -1
- package/tests/control-plane-ui-endpoints.test.ts +73 -0
- package/tests/seller-pool.test.ts +55 -0
- package/tests/seller-route-planner.test.ts +45 -1
- package/tests/seller-routing-strategy.test.ts +6 -5
- package/tests/tokenbuddy.test.ts +346 -38
- package/static/ui/assets/index-UAfOhbwC.js +0 -236
package/dist/src/daemon.d.ts
CHANGED
|
@@ -77,6 +77,7 @@ export declare class TokenbuddyDaemon {
|
|
|
77
77
|
private activePurchases;
|
|
78
78
|
private readonly modelIndex;
|
|
79
79
|
private readonly prewarmCache;
|
|
80
|
+
private readonly sellerMetadataCache;
|
|
80
81
|
private readonly creditTracker;
|
|
81
82
|
private readonly sellerPool;
|
|
82
83
|
private readonly routeFailover;
|
|
@@ -95,6 +96,7 @@ export declare class TokenbuddyDaemon {
|
|
|
95
96
|
private scheduleClawtipActivationWait;
|
|
96
97
|
private clawtipRechargeQr;
|
|
97
98
|
private lastRegistrySnapshot;
|
|
99
|
+
private forceRegistrySnapshotForTest;
|
|
98
100
|
private fetchRegistry;
|
|
99
101
|
private saveTrustedRegistryCache;
|
|
100
102
|
private loadTrustedRegistryCache;
|
|
@@ -105,6 +107,7 @@ export declare class TokenbuddyDaemon {
|
|
|
105
107
|
private initStateSnapshot;
|
|
106
108
|
private buildInitDoctorReport;
|
|
107
109
|
private initDoctorCatalogSnapshot;
|
|
110
|
+
private refreshSellerRuntimeMetrics;
|
|
108
111
|
private endpointProtocol;
|
|
109
112
|
private extractModelId;
|
|
110
113
|
private stripLocalClaudeOneMMarker;
|
|
@@ -138,6 +141,9 @@ export declare class TokenbuddyDaemon {
|
|
|
138
141
|
private logPurchaseLedgerRecorded;
|
|
139
142
|
private logTokenBalanceReconciled;
|
|
140
143
|
private listSellerBackedModels;
|
|
144
|
+
private sellerCatalogWithRuntimeMetrics;
|
|
145
|
+
private refreshSellerRouteMetadata;
|
|
146
|
+
private routeMetricFromPoolEntry;
|
|
141
147
|
private readUsage;
|
|
142
148
|
private parseSellerSettlementSummary;
|
|
143
149
|
private recordReconciledInference;
|
|
@@ -188,26 +194,27 @@ export declare class TokenbuddyDaemon {
|
|
|
188
194
|
/**
|
|
189
195
|
* tb-ui v1 `GET /routing/preview` 和 `PUT /routing/strategy` 复用的 preview 计算。
|
|
190
196
|
* 接受任意 routing 覆盖(来自 request body)算「假如改成这个,路由会是啥」。
|
|
191
|
-
*
|
|
197
|
+
* 不修改 routing state;registry / seller metadata 可按需刷新,保证 preview
|
|
198
|
+
* 使用的候选和折扣信息与真实请求路径一致。
|
|
192
199
|
*/
|
|
193
200
|
buildRoutingPreview(input: {
|
|
194
201
|
modelId?: string;
|
|
195
202
|
protocol?: string;
|
|
196
203
|
paymentMethod?: string;
|
|
197
204
|
routing?: Partial<BuyerSellerRoutingConfig>;
|
|
198
|
-
}): {
|
|
205
|
+
}): Promise<{
|
|
199
206
|
modelId: string;
|
|
200
207
|
protocol: string;
|
|
201
208
|
paymentMethod: string;
|
|
202
209
|
plan: SellerRoutePlan | {
|
|
203
210
|
error: string;
|
|
204
211
|
};
|
|
205
|
-
}
|
|
212
|
+
}>;
|
|
206
213
|
private runStartupPrewarmSweep;
|
|
207
214
|
private resolvePrewarmProtocol;
|
|
208
215
|
stop(): void;
|
|
209
216
|
/**
|
|
210
|
-
* @internal
|
|
217
|
+
* @internal - test-only hook to inject a registry snapshot without
|
|
211
218
|
* hitting the network. Used by `tests/control-plane-ui-endpoints.test.ts`
|
|
212
219
|
* to drive `buildRoutingPreview` deterministically. Production code
|
|
213
220
|
* must NOT call this; the real `fetchRegistry()` populates the snapshot.
|
package/dist/src/daemon.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"daemon.d.ts","sourceRoot":"","sources":["../../src/daemon.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AAIvE,OAAO,EAGL,KAAK,uBAAuB,EAC5B,KAAK,mBAAmB,EACxB,KAAK,+BAA+B,EACrC,MAAM,8BAA8B,CAAC;AAStC,OAAO,
|
|
1
|
+
{"version":3,"file":"daemon.d.ts","sourceRoot":"","sources":["../../src/daemon.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AAIvE,OAAO,EAGL,KAAK,uBAAuB,EAC5B,KAAK,mBAAmB,EACxB,KAAK,+BAA+B,EACrC,MAAM,8BAA8B,CAAC;AAStC,OAAO,EAaL,KAAK,sBAAsB,EAE5B,MAAM,qBAAqB,CAAC;AAQ7B,OAAO,EAA4B,KAAK,+BAA+B,EAAE,MAAM,iCAAiC,CAAC;AAIjH,OAAO,KAAK,EAAqB,eAAe,EAAE,MAAM,2BAA2B,CAAC;AACpF,OAAO,EAML,KAAK,wBAAwB,EAC9B,MAAM,4BAA4B,CAAC;AA0JpC;;;GAGG;AACH,MAAM,WAAW,YAAY;IAC3B,+BAA+B;IAC/B,WAAW,EAAE,MAAM,CAAC;IACpB,oDAAoD;IACpD,SAAS,EAAE,MAAM,CAAC;IAClB,+DAA+D;IAC/D,MAAM,EAAE,MAAM,CAAC;IACf,mEAAmE;IACnE,iBAAiB,EAAE,MAAM,CAAC;IAC1B,6CAA6C;IAC7C,aAAa,CAAC,EAAE,wBAAwB,CAAC;IACzC,8DAA8D;IAC9D,uBAAuB,CAAC,EAAE,CAAC,YAAY,EAAE,MAAM,KAAK,OAAO,CAAC,wBAAwB,CAAC,CAAC;IACtF,6EAA6E;IAC7E,6BAA6B,CAAC,EAAE,CAC9B,OAAO,EAAE,uBAAuB,EAChC,OAAO,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,KACxB,OAAO,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,mBAAmB,CAAC;QAAC,aAAa,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC/F,wFAAwF;IACxF,uBAAuB,CAAC,EAAE,CAAC,OAAO,CAAC,EAAE,+BAA+B,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAC1F,2FAA2F;IAC3F,uBAAuB,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC;IACzC,oEAAoE;IACpE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,2EAA2E;IAC3E,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,8FAA8F;IAC9F,qBAAqB,CAAC,EAAE,MAAM,EAAE,CAAC;IACjC,oBAAoB;IACpB,yBAAyB,CAAC,EAAE,MAAM,CAAC;IACnC,iBAAiB;IACjB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,yCAAyC;IACzC,iBAAiB,CAAC,EAAE,+BAA+B,CAAC;CACrD;AA6SD;;;;;GAKG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,aAAa,CAAC,CAAM;IAC5B,OAAO,CAAC,WAAW,CAAC,CAAM;IAC1B,OAAO,CAAC,aAAa,CAAoB;IACzC,OAAO,CAAC,gBAAgB,CAAC,CAAS;IAClC,OAAO,CAAC,aAAa,CAA2B;IAChD,OAAO,CAAC,qBAAqB,CAAC,CAAS;IACvC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAqB;IACrD,OAAO,CAAC,qBAAqB,CAAC,CAAgB;IAC9C,OAAO,CAAC,gCAAgC,CAAC,CAAyB;IAClE;;;OAGG;IACH,OAAO,CAAC,eAAe,CAAyB;IAEhD,OAAO,CAAC,eAAe,CAAsC;IAK7D,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAoB;IAC/C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAsB;IACnD,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAA6B;IACjE,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAuB;IACrD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAIxB;IACH,OAAO,CAAC,QAAQ,CAAC,aAAa,CAG3B;IAIH,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAoB;IACrD,OAAO,CAAC,QAAQ,CAAC,wBAAwB,CAA2B;gBAExD,MAAM,EAAE,YAAY;IA+BhC,OAAO,CAAC,iBAAiB;IA6CzB,OAAO,CAAC,iBAAiB;IAKzB,OAAO,CAAC,eAAe;IAKvB,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,uBAAuB;IAgB/B,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,yBAAyB;IAcjC,OAAO,CAAC,qBAAqB;YAiBf,wBAAwB;IA4DtC,OAAO,CAAC,6BAA6B;IAuDrC,OAAO,CAAC,iBAAiB;IA8BzB,OAAO,CAAC,oBAAoB,CAAuC;IACnE,OAAO,CAAC,4BAA4B,CAAS;YAE/B,aAAa;IA4C3B,OAAO,CAAC,wBAAwB;IAqBhC,OAAO,CAAC,wBAAwB;IAgEhC,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,YAAY;IAIpB,OAAO,CAAC,kBAAkB;IAmB1B,OAAO,CAAC,gBAAgB;IA8BxB,OAAO,CAAC,iBAAiB;YAyCX,qBAAqB;YAmGrB,yBAAyB;YA8BzB,2BAA2B;IAoCzC,OAAO,CAAC,gBAAgB;IAaxB,OAAO,CAAC,cAAc;IAYtB,OAAO,CAAC,0BAA0B;IAOlC,OAAO,CAAC,sBAAsB;IAoB9B,OAAO,CAAC,mBAAmB;IAiB3B,OAAO,CAAC,wBAAwB;IAehC,OAAO,CAAC,oBAAoB;YAKd,kBAAkB;IA6EhC,OAAO,CAAC,0BAA0B;YA0BpB,sBAAsB;IAwCpC,OAAO,CAAC,2BAA2B;IA0BnC,OAAO,CAAC,uBAAuB;IAuB/B,OAAO,CAAC,oBAAoB;IAI5B;;;;;;;;OAQG;IACH,OAAO,CAAC,qBAAqB;IAgB7B;;;;OAIG;IACH,OAAO,CAAC,sBAAsB;IA0E9B,OAAO,CAAC,uBAAuB;IAW/B,OAAO,CAAC,yBAAyB;IAwBjC,OAAO,CAAC,yBAAyB;YAoBnB,sBAAsB;IA+BpC,OAAO,CAAC,+BAA+B;YAYzB,0BAA0B;IAUxC,OAAO,CAAC,wBAAwB;IAmBhC,OAAO,CAAC,SAAS;IA8BjB,OAAO,CAAC,4BAA4B;IAQpC,OAAO,CAAC,yBAAyB;YAmFnB,oBAAoB;IA0ClC,OAAO,CAAC,2BAA2B;YAcrB,4BAA4B;IAmB1C,OAAO,CAAC,kBAAkB;IAQ1B,OAAO,CAAC,2BAA2B;IASnC,OAAO,CAAC,0BAA0B;IASlC;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;IAYzB;;;;;OAKG;IACH,OAAO,CAAC,yBAAyB;YAYnB,kBAAkB;YA6NlB,mBAAmB;IAgCjC,OAAO,CAAC,sBAAsB;IAqF9B,OAAO,CAAC,qBAAqB;IAS7B,OAAO,CAAC,mBAAmB;YAUb,mBAAmB;IAoc1B,KAAK;IA4oBZ;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAgBvB;;;;OAIG;IACI,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,GAAG;QAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;QAAC,MAAM,EAAE,UAAU,GAAG,KAAK,GAAG,YAAY,GAAG,OAAO,CAAA;KAAE;IA8B1H;;;;;OAKG;IACU,mBAAmB,CAAC,KAAK,EAAE;QACtC,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,CAAC,EAAE,OAAO,CAAC,wBAAwB,CAAC,CAAC;KAC7C,GAAG,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,eAAe,GAAG;YAAE,KAAK,EAAE,MAAM,CAAA;SAAE,CAAA;KAAE,CAAC;YAqCtG,sBAAsB;IAyBpC,OAAO,CAAC,sBAAsB;IASvB,IAAI;IAUX;;;;;OAKG;IACI,8BAA8B,CAAC,QAAQ,EAAE,sBAAsB,GAAG,IAAI,GAAG,IAAI;CAIrF"}
|
package/dist/src/daemon.js
CHANGED
|
@@ -19,6 +19,7 @@ import { SellerPool } from "./seller-pool.js";
|
|
|
19
19
|
import { RouteFailover } from "./route-failover.js";
|
|
20
20
|
import { PrewarmScheduler } from "./prewarm-scheduler.js";
|
|
21
21
|
import { SellerConcurrencyLimiter } from "./seller-concurrency-limiter.js";
|
|
22
|
+
import { SellerMetadataCache } from "./seller-metadata-cache.js";
|
|
22
23
|
import { planSellerRouteSet } from "./seller-route-planner.js";
|
|
23
24
|
import { assertSellerRoutingConfig, mergeSellerRoutingConfig, normalizeSellerRoutingConfig, parseSellerIdList, ROUTING_CONFIG_KEY } from "./seller-routing-config.js";
|
|
24
25
|
import { assertInitSetupSteps, buildCompletedInitSetupMarker, INIT_SETUP_CONFIG_KEY, INIT_SETUP_STEPS, isFreshInitMachine, normalizeInitSetupMarker, resolveInitRecommendedModels, } from "./init-setup.js";
|
|
@@ -186,6 +187,16 @@ function parseSellerSettlementObject(raw) {
|
|
|
186
187
|
return undefined;
|
|
187
188
|
}
|
|
188
189
|
}
|
|
190
|
+
function sellerAttemptRequestContext(requestId, idempotencyKey, routeIndex, attempt, retryOrdinal) {
|
|
191
|
+
if (routeIndex === 0 && attempt === 0 && retryOrdinal === 0) {
|
|
192
|
+
return { requestId, idempotencyKey };
|
|
193
|
+
}
|
|
194
|
+
const suffix = `r${routeIndex}_a${attempt}_n${retryOrdinal}`;
|
|
195
|
+
return {
|
|
196
|
+
requestId: `${requestId}_${suffix}`,
|
|
197
|
+
idempotencyKey: `${idempotencyKey}_${suffix}`
|
|
198
|
+
};
|
|
199
|
+
}
|
|
189
200
|
function arrayLength(value) {
|
|
190
201
|
return Array.isArray(value) ? value.length : undefined;
|
|
191
202
|
}
|
|
@@ -296,6 +307,7 @@ export class TokenbuddyDaemon {
|
|
|
296
307
|
// "fetchRegistry + manifest per request" path.
|
|
297
308
|
modelIndex = new ModelIndex();
|
|
298
309
|
prewarmCache = new PrewarmCache();
|
|
310
|
+
sellerMetadataCache = new SellerMetadataCache();
|
|
299
311
|
creditTracker = new CreditTracker();
|
|
300
312
|
sellerPool = new SellerPool({
|
|
301
313
|
modelIndex: this.modelIndex,
|
|
@@ -360,11 +372,15 @@ export class TokenbuddyDaemon {
|
|
|
360
372
|
const now = Date.now();
|
|
361
373
|
const body = await res.json();
|
|
362
374
|
const upstream = body.upstream;
|
|
375
|
+
const latency = body.latency;
|
|
363
376
|
const upstreamErrorClass = upstream?.lastErrorClass ?? upstream?.last_error_class;
|
|
364
377
|
return {
|
|
365
378
|
ok: true,
|
|
366
379
|
latencyMs: now - startedAt,
|
|
367
380
|
httpStatus: res.status,
|
|
381
|
+
ttftMs: finiteNumber(latency?.ttftMs ?? latency?.ttft_ms),
|
|
382
|
+
avgInferenceMs: finiteNumber(latency?.avgInferenceMs ?? latency?.avg_inference_ms),
|
|
383
|
+
avgTokensPerSecond: finiteNumber(latency?.avgTokensPerSecond ?? latency?.avg_tokens_per_second),
|
|
368
384
|
upstreamStatus: typeof upstream?.status === "string"
|
|
369
385
|
? upstream.status
|
|
370
386
|
: undefined,
|
|
@@ -581,6 +597,7 @@ export class TokenbuddyDaemon {
|
|
|
581
597
|
// trades freshness for availability: requests still route, but the
|
|
582
598
|
// model set is whatever was cached before the registry outgrew 1MB.
|
|
583
599
|
lastRegistrySnapshot = null;
|
|
600
|
+
forceRegistrySnapshotForTest = false;
|
|
584
601
|
async fetchRegistry() {
|
|
585
602
|
try {
|
|
586
603
|
const fetched = await fetchSellerRegistryWithTrust(this.config.sellerRegistryUrl);
|
|
@@ -817,7 +834,7 @@ export class TokenbuddyDaemon {
|
|
|
817
834
|
const payments = this.livePayments().filter((payment) => payment.enabled);
|
|
818
835
|
const clients = this.clientToolsSummary();
|
|
819
836
|
const routeModelId = this.resolveFocusSet()[0] || catalog.models[0]?.id;
|
|
820
|
-
const routingPreview = routeModelId ? this.buildRoutingPreview({ modelId: routeModelId, routing: currentRouting }) : undefined;
|
|
837
|
+
const routingPreview = routeModelId ? await this.buildRoutingPreview({ modelId: routeModelId, routing: currentRouting }) : undefined;
|
|
821
838
|
const checks = [
|
|
822
839
|
{
|
|
823
840
|
id: "local_service",
|
|
@@ -939,6 +956,43 @@ export class TokenbuddyDaemon {
|
|
|
939
956
|
};
|
|
940
957
|
}
|
|
941
958
|
}
|
|
959
|
+
async refreshSellerRuntimeMetrics(route, requestId) {
|
|
960
|
+
let timer;
|
|
961
|
+
try {
|
|
962
|
+
const ac = new AbortController();
|
|
963
|
+
timer = setTimeout(() => ac.abort(new Error("health timeout")), this.config.warmupProbeTimeoutMs ?? 3000);
|
|
964
|
+
const startedAt = Date.now();
|
|
965
|
+
const res = await fetch(`${route.seller.url.replace(/\/+$/, "")}/health`, { signal: ac.signal });
|
|
966
|
+
if (!res.ok) {
|
|
967
|
+
logger.warn("pool.runtime_metrics.refresh_failed", "seller health refresh failed after inference", {
|
|
968
|
+
requestId,
|
|
969
|
+
sellerId: route.seller.id,
|
|
970
|
+
status: res.status,
|
|
971
|
+
durationMs: Date.now() - startedAt
|
|
972
|
+
});
|
|
973
|
+
return;
|
|
974
|
+
}
|
|
975
|
+
const body = await res.json();
|
|
976
|
+
const latency = body.latency;
|
|
977
|
+
this.sellerPool.recordRuntimeMetrics(route.seller.id, {
|
|
978
|
+
ttftMs: finiteNumber(latency?.ttftMs ?? latency?.ttft_ms),
|
|
979
|
+
avgInferenceMs: finiteNumber(latency?.avgInferenceMs ?? latency?.avg_inference_ms),
|
|
980
|
+
avgTokensPerSecond: finiteNumber(latency?.avgTokensPerSecond ?? latency?.avg_tokens_per_second)
|
|
981
|
+
});
|
|
982
|
+
}
|
|
983
|
+
catch (error) {
|
|
984
|
+
logger.warn("pool.runtime_metrics.refresh_failed", "seller health refresh failed after inference", {
|
|
985
|
+
requestId,
|
|
986
|
+
sellerId: route.seller.id,
|
|
987
|
+
errorMessage: error instanceof Error ? error.message : String(error)
|
|
988
|
+
});
|
|
989
|
+
}
|
|
990
|
+
finally {
|
|
991
|
+
if (timer) {
|
|
992
|
+
clearTimeout(timer);
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
}
|
|
942
996
|
endpointProtocol(endpoint) {
|
|
943
997
|
if (endpoint === "/v1/chat/completions") {
|
|
944
998
|
return "chat_completions";
|
|
@@ -1033,6 +1087,7 @@ export class TokenbuddyDaemon {
|
|
|
1033
1087
|
const registry = await this.fetchRegistry();
|
|
1034
1088
|
const routing = resolveSellerRoutingForModel(this.refreshSellerRoutingConfig(), modelId);
|
|
1035
1089
|
const registrySellers = reorderDefaultSellerFirst(registry.sellers, registry.defaultSeller);
|
|
1090
|
+
await this.refreshSellerRouteMetadata(registrySellers);
|
|
1036
1091
|
this.sellerPool.ensureRegistrySellers(registrySellers);
|
|
1037
1092
|
this.scheduleLazyPrewarmIfNeeded(modelId, protocol, paymentMethod);
|
|
1038
1093
|
this.sellerPool.recycleOpenCircuits();
|
|
@@ -1046,21 +1101,12 @@ export class TokenbuddyDaemon {
|
|
|
1046
1101
|
registrySellers,
|
|
1047
1102
|
routing,
|
|
1048
1103
|
prewarmCandidates: this.prewarmCache.get(modelId, protocol, paymentMethod)?.candidates,
|
|
1049
|
-
sellerMetrics: Array.from(poolById.values()).map((entry) => (
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
circuit: entry.circuit,
|
|
1056
|
-
capacityBlockedUntil: entry.capacityBlockedUntil,
|
|
1057
|
-
...(concurrencySnapshot.enabled
|
|
1058
|
-
? {
|
|
1059
|
-
localConcurrencyActive: localConcurrencyBySellerId.get(entry.sellerId) ?? 0,
|
|
1060
|
-
localConcurrencyLimit: concurrencySnapshot.maxInFlightPerSeller
|
|
1061
|
-
}
|
|
1062
|
-
: {})
|
|
1063
|
-
})),
|
|
1104
|
+
sellerMetrics: Array.from(poolById.values()).map((entry) => this.routeMetricFromPoolEntry(entry, concurrencySnapshot.enabled
|
|
1105
|
+
? {
|
|
1106
|
+
localConcurrencyActive: localConcurrencyBySellerId.get(entry.sellerId) ?? 0,
|
|
1107
|
+
localConcurrencyLimit: concurrencySnapshot.maxInFlightPerSeller
|
|
1108
|
+
}
|
|
1109
|
+
: undefined)),
|
|
1064
1110
|
now: Date.now()
|
|
1065
1111
|
});
|
|
1066
1112
|
logger.info("route.candidates.prewarmed", "seller route candidates prewarmed", {
|
|
@@ -1314,7 +1360,8 @@ export class TokenbuddyDaemon {
|
|
|
1314
1360
|
}
|
|
1315
1361
|
logTokenBalanceReconciled(route, requestId, settlement) {
|
|
1316
1362
|
logger.info("token.balance.reconciled", "seller token balance reconciled from settlement", {
|
|
1317
|
-
requestId
|
|
1363
|
+
requestId,
|
|
1364
|
+
sellerRequestId: settlement.requestId !== requestId ? settlement.requestId : undefined,
|
|
1318
1365
|
sellerKey: route.seller.id,
|
|
1319
1366
|
model: route.modelId,
|
|
1320
1367
|
remainingCreditMicros: settlement.remainingCreditMicros,
|
|
@@ -1339,7 +1386,7 @@ export class TokenbuddyDaemon {
|
|
|
1339
1386
|
}
|
|
1340
1387
|
return {
|
|
1341
1388
|
models: catalog.models,
|
|
1342
|
-
sellers: catalog.sellers
|
|
1389
|
+
sellers: this.sellerCatalogWithRuntimeMetrics(catalog.sellers)
|
|
1343
1390
|
};
|
|
1344
1391
|
}
|
|
1345
1392
|
catch (error) {
|
|
@@ -1350,10 +1397,46 @@ export class TokenbuddyDaemon {
|
|
|
1350
1397
|
const snapshot = catalogSnapshotFromRegistry(cached);
|
|
1351
1398
|
return {
|
|
1352
1399
|
models: snapshot.models,
|
|
1353
|
-
sellers: snapshot.sellers
|
|
1400
|
+
sellers: this.sellerCatalogWithRuntimeMetrics(snapshot.sellers)
|
|
1354
1401
|
};
|
|
1355
1402
|
}
|
|
1356
1403
|
}
|
|
1404
|
+
sellerCatalogWithRuntimeMetrics(sellers) {
|
|
1405
|
+
const runtimeBySellerId = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
|
|
1406
|
+
return sellers.map((seller) => {
|
|
1407
|
+
const runtime = runtimeBySellerId.get(seller.id);
|
|
1408
|
+
return {
|
|
1409
|
+
...seller,
|
|
1410
|
+
ttftMs: runtime?.ttftMs ?? seller.ttftMs,
|
|
1411
|
+
avgTokensPerSecond: runtime?.avgTokensPerSecond ?? seller.avgTokensPerSecond ?? 0
|
|
1412
|
+
};
|
|
1413
|
+
});
|
|
1414
|
+
}
|
|
1415
|
+
async refreshSellerRouteMetadata(sellers) {
|
|
1416
|
+
try {
|
|
1417
|
+
await this.sellerMetadataCache.refreshIfStale(sellers.filter(isBuyerVisibleRegistrySeller));
|
|
1418
|
+
}
|
|
1419
|
+
catch (error) {
|
|
1420
|
+
logger.warn("route.metadata.refresh_failed", "seller route metadata refresh failed", {
|
|
1421
|
+
errorMessage: error instanceof Error ? error.message : String(error)
|
|
1422
|
+
});
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
routeMetricFromPoolEntry(entry, concurrency) {
|
|
1426
|
+
const metadata = this.sellerMetadataCache.snapshot().find((item) => item.sellerId === entry.sellerId);
|
|
1427
|
+
return {
|
|
1428
|
+
sellerId: entry.sellerId,
|
|
1429
|
+
healthScore: entry.healthScore,
|
|
1430
|
+
avgLatencyMs: entry.avgLatencyMs,
|
|
1431
|
+
ttftMs: entry.ttftMs,
|
|
1432
|
+
avgInferenceMs: entry.avgInferenceMs,
|
|
1433
|
+
avgTokensPerSecond: entry.avgTokensPerSecond,
|
|
1434
|
+
discountRatio: metadata?.discountRatio,
|
|
1435
|
+
circuit: entry.circuit,
|
|
1436
|
+
capacityBlockedUntil: entry.capacityBlockedUntil,
|
|
1437
|
+
...(concurrency ?? {})
|
|
1438
|
+
};
|
|
1439
|
+
}
|
|
1357
1440
|
readUsage(bodyText) {
|
|
1358
1441
|
const fallback = {
|
|
1359
1442
|
promptTokens: 0,
|
|
@@ -1396,8 +1479,11 @@ export class TokenbuddyDaemon {
|
|
|
1396
1479
|
this.logTokenBalanceReconciled(route, requestId, settlement);
|
|
1397
1480
|
}
|
|
1398
1481
|
const settledMicros = settlement?.settledMicros;
|
|
1482
|
+
const sellerRequestId = settlement?.requestId && settlement.requestId !== requestId
|
|
1483
|
+
? settlement.requestId
|
|
1484
|
+
: undefined;
|
|
1399
1485
|
this.tokenStore.recordInferenceLedger({
|
|
1400
|
-
requestId
|
|
1486
|
+
requestId,
|
|
1401
1487
|
sellerKey: route.seller.id,
|
|
1402
1488
|
modelId: route.modelId,
|
|
1403
1489
|
endpoint,
|
|
@@ -1422,7 +1508,7 @@ export class TokenbuddyDaemon {
|
|
|
1422
1508
|
paymentMethod: extras?.paymentMethod
|
|
1423
1509
|
});
|
|
1424
1510
|
logger.info("inference.ledger.recorded", "safe inference ledger recorded", {
|
|
1425
|
-
requestId
|
|
1511
|
+
requestId,
|
|
1426
1512
|
sellerKey: route.seller.id,
|
|
1427
1513
|
model: route.modelId,
|
|
1428
1514
|
endpoint,
|
|
@@ -1435,6 +1521,7 @@ export class TokenbuddyDaemon {
|
|
|
1435
1521
|
completionTokens: usage.completionTokens,
|
|
1436
1522
|
balanceSnapshotMicros: settlement?.remainingCreditMicros,
|
|
1437
1523
|
balanceSource: settlement ? "seller_authoritative" : "estimated",
|
|
1524
|
+
sellerRequestId,
|
|
1438
1525
|
ttftMs: extras?.ttftMs,
|
|
1439
1526
|
fallbackCount: extras?.fallbackCount,
|
|
1440
1527
|
routeReason: extras?.routeReason,
|
|
@@ -2069,21 +2156,25 @@ export class TokenbuddyDaemon {
|
|
|
2069
2156
|
// the `X-TokenBuddy-Deadline-Ms` header (PR-6) can propagate
|
|
2070
2157
|
// it to their own upstream fetch via the same signal.
|
|
2071
2158
|
const deadlineMs = this.requestDeadlineMs();
|
|
2072
|
-
const sendSellerRequest = async (token) => {
|
|
2159
|
+
const sendSellerRequest = async (token, retryOrdinal = 0) => {
|
|
2160
|
+
const attemptContext = sellerAttemptRequestContext(requestId, idempotencyKey, routeIndex, attempt, retryOrdinal);
|
|
2073
2161
|
const requestAc = new AbortController();
|
|
2074
2162
|
const requestTimer = setTimeout(() => requestAc.abort(new Error("buyer deadline exceeded")), deadlineMs);
|
|
2075
2163
|
const headers = {
|
|
2076
2164
|
"Content-Type": "application/json",
|
|
2077
2165
|
"Authorization": `Bearer ${token}`,
|
|
2078
|
-
"X-Request-Id": requestId,
|
|
2079
|
-
"Idempotency-Key": idempotencyKey
|
|
2166
|
+
"X-Request-Id": attemptContext.requestId,
|
|
2167
|
+
"Idempotency-Key": attemptContext.idempotencyKey
|
|
2080
2168
|
};
|
|
2081
2169
|
headers["X-TokenBuddy-Deadline-Ms"] = String(deadlineMs);
|
|
2082
2170
|
try {
|
|
2083
2171
|
return await fetch(`${sellerUrl}${endpoint}`, {
|
|
2084
2172
|
method: "POST",
|
|
2085
2173
|
headers,
|
|
2086
|
-
body: JSON.stringify(
|
|
2174
|
+
body: JSON.stringify({
|
|
2175
|
+
...upstreamBody,
|
|
2176
|
+
requestId: attemptContext.requestId
|
|
2177
|
+
}),
|
|
2087
2178
|
signal: requestAc.signal
|
|
2088
2179
|
});
|
|
2089
2180
|
}
|
|
@@ -2227,6 +2318,7 @@ export class TokenbuddyDaemon {
|
|
|
2227
2318
|
res.write(settlementTrailing.downstream);
|
|
2228
2319
|
}
|
|
2229
2320
|
res.end();
|
|
2321
|
+
void this.refreshSellerRuntimeMetrics(route, requestId);
|
|
2230
2322
|
this.recordReconciledInference(route, endpoint, requestId, { promptTokens: 0, completionTokens: 0, billedMicros: Math.max(1, bytes) }, this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(), this.inferPromptForHash(body), undefined, {
|
|
2231
2323
|
ttftMs: firstByteAt ? firstByteAt - startedAt : undefined,
|
|
2232
2324
|
fallbackCount: routeIndex,
|
|
@@ -2243,6 +2335,7 @@ export class TokenbuddyDaemon {
|
|
|
2243
2335
|
markFirstByte();
|
|
2244
2336
|
res.send(responseBody);
|
|
2245
2337
|
const usage = this.readUsage(responseBody);
|
|
2338
|
+
void this.refreshSellerRuntimeMetrics(route, requestId);
|
|
2246
2339
|
this.recordReconciledInference(route, endpoint, requestId, usage, this.parseSellerSettlementSummary(upstreamResponse.headers), this.inferPromptForHash(body), responseBody, {
|
|
2247
2340
|
ttftMs: firstByteAt ? firstByteAt - startedAt : undefined,
|
|
2248
2341
|
fallbackCount: routeIndex,
|
|
@@ -2757,10 +2850,10 @@ export class TokenbuddyDaemon {
|
|
|
2757
2850
|
});
|
|
2758
2851
|
// 2) GET /routing/preview — 算「假如改完会怎样」,不改 state
|
|
2759
2852
|
// query: modelId? protocol? paymentMethod? mode? scorer? sellerId? sellerIds?(逗号分隔)
|
|
2760
|
-
controlApp.get("/routing/preview", (req, res) => {
|
|
2853
|
+
controlApp.get("/routing/preview", async (req, res) => {
|
|
2761
2854
|
try {
|
|
2762
2855
|
const override = buildRoutingConfigFromQuery(req.query);
|
|
2763
|
-
const result = this.buildRoutingPreview({
|
|
2856
|
+
const result = await this.buildRoutingPreview({
|
|
2764
2857
|
modelId: typeof req.query.modelId === "string" ? req.query.modelId : undefined,
|
|
2765
2858
|
protocol: typeof req.query.protocol === "string" ? req.query.protocol : undefined,
|
|
2766
2859
|
paymentMethod: typeof req.query.paymentMethod === "string" ? req.query.paymentMethod : undefined,
|
|
@@ -2789,7 +2882,7 @@ export class TokenbuddyDaemon {
|
|
|
2789
2882
|
}
|
|
2790
2883
|
});
|
|
2791
2884
|
// 3) PUT /routing/strategy — 写策略 + 热更新 + 返回 preview
|
|
2792
|
-
controlApp.put("/routing/strategy", (req, res) => {
|
|
2885
|
+
controlApp.put("/routing/strategy", async (req, res) => {
|
|
2793
2886
|
try {
|
|
2794
2887
|
const body = (req.body ?? {});
|
|
2795
2888
|
const normalized = normalizeSellerRoutingConfig(body);
|
|
@@ -2803,7 +2896,7 @@ export class TokenbuddyDaemon {
|
|
|
2803
2896
|
sellerId: current.sellerId,
|
|
2804
2897
|
sellerIds: current.sellerIds
|
|
2805
2898
|
});
|
|
2806
|
-
const preview = this.buildRoutingPreview({ routing: current });
|
|
2899
|
+
const preview = await this.buildRoutingPreview({ routing: current });
|
|
2807
2900
|
const previewPayload = "error" in preview.plan
|
|
2808
2901
|
? { error: preview.plan.error }
|
|
2809
2902
|
: {
|
|
@@ -3012,10 +3105,11 @@ export class TokenbuddyDaemon {
|
|
|
3012
3105
|
/**
|
|
3013
3106
|
* tb-ui v1 `GET /routing/preview` 和 `PUT /routing/strategy` 复用的 preview 计算。
|
|
3014
3107
|
* 接受任意 routing 覆盖(来自 request body)算「假如改成这个,路由会是啥」。
|
|
3015
|
-
*
|
|
3108
|
+
* 不修改 routing state;registry / seller metadata 可按需刷新,保证 preview
|
|
3109
|
+
* 使用的候选和折扣信息与真实请求路径一致。
|
|
3016
3110
|
*/
|
|
3017
|
-
buildRoutingPreview(input) {
|
|
3018
|
-
const registry = this.lastRegistrySnapshot;
|
|
3111
|
+
async buildRoutingPreview(input) {
|
|
3112
|
+
const registry = this.lastRegistrySnapshot ?? (this.forceRegistrySnapshotForTest ? null : await this.fetchRegistry());
|
|
3019
3113
|
const focusFirst = this.resolveFocusSet()[0];
|
|
3020
3114
|
const registryFirst = registry?.sellers[0]?.models?.[0];
|
|
3021
3115
|
const modelId = input.modelId?.trim() || focusFirst || registryFirst || "";
|
|
@@ -3033,6 +3127,7 @@ export class TokenbuddyDaemon {
|
|
|
3033
3127
|
: current;
|
|
3034
3128
|
const resolvedRouting = resolveSellerRoutingForModel(routing, modelId);
|
|
3035
3129
|
const registrySellers = reorderDefaultSellerFirst(registry.sellers, registry.defaultSeller);
|
|
3130
|
+
await this.refreshSellerRouteMetadata(registrySellers);
|
|
3036
3131
|
this.sellerPool.ensureRegistrySellers(registrySellers);
|
|
3037
3132
|
const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
|
|
3038
3133
|
const plan = planSellerRouteSet({
|
|
@@ -3042,15 +3137,7 @@ export class TokenbuddyDaemon {
|
|
|
3042
3137
|
registrySellers,
|
|
3043
3138
|
routing: resolvedRouting,
|
|
3044
3139
|
prewarmCandidates: this.prewarmCache.get(modelId, protocol, paymentMethod)?.candidates,
|
|
3045
|
-
sellerMetrics: Array.from(poolById.values()).map((entry) => (
|
|
3046
|
-
sellerId: entry.sellerId,
|
|
3047
|
-
healthScore: entry.healthScore,
|
|
3048
|
-
avgLatencyMs: entry.avgLatencyMs,
|
|
3049
|
-
ttftMs: entry.ttftMs,
|
|
3050
|
-
avgInferenceMs: entry.avgInferenceMs,
|
|
3051
|
-
circuit: entry.circuit,
|
|
3052
|
-
capacityBlockedUntil: entry.capacityBlockedUntil
|
|
3053
|
-
})),
|
|
3140
|
+
sellerMetrics: Array.from(poolById.values()).map((entry) => this.routeMetricFromPoolEntry(entry)),
|
|
3054
3141
|
now: Date.now()
|
|
3055
3142
|
});
|
|
3056
3143
|
return { modelId, protocol, paymentMethod, plan };
|
|
@@ -3098,12 +3185,13 @@ export class TokenbuddyDaemon {
|
|
|
3098
3185
|
this.tokenStore.close();
|
|
3099
3186
|
}
|
|
3100
3187
|
/**
|
|
3101
|
-
* @internal
|
|
3188
|
+
* @internal - test-only hook to inject a registry snapshot without
|
|
3102
3189
|
* hitting the network. Used by `tests/control-plane-ui-endpoints.test.ts`
|
|
3103
3190
|
* to drive `buildRoutingPreview` deterministically. Production code
|
|
3104
3191
|
* must NOT call this; the real `fetchRegistry()` populates the snapshot.
|
|
3105
3192
|
*/
|
|
3106
3193
|
setLastRegistrySnapshotForTest(snapshot) {
|
|
3194
|
+
this.forceRegistrySnapshotForTest = true;
|
|
3107
3195
|
this.lastRegistrySnapshot = snapshot;
|
|
3108
3196
|
}
|
|
3109
3197
|
}
|