@tokenbuddy/tokenbuddy 1.0.29 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/daemon.d.ts +11 -4
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +130 -42
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/doctor-diagnostics.d.ts.map +1 -1
- package/dist/src/doctor-diagnostics.js +7 -1
- package/dist/src/doctor-diagnostics.js.map +1 -1
- package/dist/src/prewarm-cache.d.ts +4 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -1
- package/dist/src/prewarm-cache.js +1 -0
- package/dist/src/prewarm-cache.js.map +1 -1
- package/dist/src/prewarm-scheduler.d.ts +2 -0
- package/dist/src/prewarm-scheduler.d.ts.map +1 -1
- package/dist/src/prewarm-scheduler.js +4 -1
- package/dist/src/prewarm-scheduler.js.map +1 -1
- package/dist/src/provider-install.d.ts.map +1 -1
- package/dist/src/provider-install.js +196 -18
- package/dist/src/provider-install.js.map +1 -1
- package/dist/src/seller-catalog.d.ts +4 -0
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-pool.d.ts +13 -0
- package/dist/src/seller-pool.d.ts.map +1 -1
- package/dist/src/seller-pool.js +43 -2
- package/dist/src/seller-pool.js.map +1 -1
- package/dist/src/seller-route-planner.d.ts +9 -0
- package/dist/src/seller-route-planner.d.ts.map +1 -1
- package/dist/src/seller-route-planner.js +39 -15
- package/dist/src/seller-route-planner.js.map +1 -1
- package/dist/src/seller-routing-strategy.d.ts +6 -4
- package/dist/src/seller-routing-strategy.d.ts.map +1 -1
- package/dist/src/seller-routing-strategy.js +15 -12
- package/dist/src/seller-routing-strategy.js.map +1 -1
- package/dist/src/terminal-detect.d.ts +5 -5
- package/dist/src/terminal-detect.d.ts.map +1 -1
- package/dist/src/terminal-detect.js +79 -26
- package/dist/src/terminal-detect.js.map +1 -1
- package/package.json +1 -1
- package/src/daemon.ts +168 -46
- package/src/doctor-diagnostics.ts +5 -1
- package/src/prewarm-cache.ts +5 -0
- package/src/prewarm-scheduler.ts +6 -1
- package/src/provider-install.ts +203 -18
- package/src/seller-catalog.ts +4 -0
- package/src/seller-pool.ts +68 -2
- package/src/seller-route-planner.ts +61 -15
- package/src/seller-routing-strategy.ts +21 -16
- package/src/terminal-detect.ts +81 -24
- package/static/ui/assets/index-DEDEl8o2.js +236 -0
- package/static/ui/assets/{index-UAfOhbwC.js.map → index-DEDEl8o2.js.map} +1 -1
- package/static/ui/index.html +1 -1
- package/tests/control-plane-ui-endpoints.test.ts +73 -0
- package/tests/seller-pool.test.ts +55 -0
- package/tests/seller-route-planner.test.ts +45 -1
- package/tests/seller-routing-strategy.test.ts +6 -5
- package/tests/tokenbuddy.test.ts +346 -38
- package/static/ui/assets/index-UAfOhbwC.js +0 -236
package/src/daemon.ts
CHANGED
|
@@ -36,7 +36,9 @@ import {
|
|
|
36
36
|
isBuyerVisibleRegistrySeller,
|
|
37
37
|
normalizeSellerUrl,
|
|
38
38
|
RegistryTooLargeError,
|
|
39
|
+
type ModelCatalogEntry,
|
|
39
40
|
type RegistrySeller,
|
|
41
|
+
type SellerCatalogEntry,
|
|
40
42
|
type SellerManifest,
|
|
41
43
|
type SellerRegistryDocument,
|
|
42
44
|
type SellerRegistryTrustMetadata,
|
|
@@ -49,9 +51,10 @@ import { SellerPool, type FailureKind } from "./seller-pool.js";
|
|
|
49
51
|
import { RouteFailover, type FailoverDecision, type RouteCandidate } from "./route-failover.js";
|
|
50
52
|
import { PrewarmScheduler, type PrewarmReason, type SellerProber } from "./prewarm-scheduler.js";
|
|
51
53
|
import { SellerConcurrencyLimiter, type SellerConcurrencyLimiterOptions } from "./seller-concurrency-limiter.js";
|
|
54
|
+
import { SellerMetadataCache } from "./seller-metadata-cache.js";
|
|
52
55
|
import type { PoolEntry } from "./seller-pool.js";
|
|
53
56
|
import { planSellerRouteSet } from "./seller-route-planner.js";
|
|
54
|
-
import type { SellerRoutePlan } from "./seller-route-planner.js";
|
|
57
|
+
import type { SellerRouteMetric, SellerRoutePlan } from "./seller-route-planner.js";
|
|
55
58
|
import {
|
|
56
59
|
assertSellerRoutingConfig,
|
|
57
60
|
mergeSellerRoutingConfig,
|
|
@@ -294,6 +297,14 @@ interface SellerHealthBody {
|
|
|
294
297
|
lastErrorClass?: unknown;
|
|
295
298
|
last_error_class?: unknown;
|
|
296
299
|
};
|
|
300
|
+
latency?: {
|
|
301
|
+
ttftMs?: unknown;
|
|
302
|
+
ttft_ms?: unknown;
|
|
303
|
+
avgInferenceMs?: unknown;
|
|
304
|
+
avg_inference_ms?: unknown;
|
|
305
|
+
avgTokensPerSecond?: unknown;
|
|
306
|
+
avg_tokens_per_second?: unknown;
|
|
307
|
+
};
|
|
297
308
|
capacity?: {
|
|
298
309
|
activeConnections?: unknown;
|
|
299
310
|
active_connections?: unknown;
|
|
@@ -316,6 +327,11 @@ interface SellerSettlementSummary {
|
|
|
316
327
|
priceVersion?: string;
|
|
317
328
|
}
|
|
318
329
|
|
|
330
|
+
interface SellerAttemptRequestContext {
|
|
331
|
+
requestId: string;
|
|
332
|
+
idempotencyKey: string;
|
|
333
|
+
}
|
|
334
|
+
|
|
319
335
|
interface SellerBalanceSnapshot {
|
|
320
336
|
creditMicros: number;
|
|
321
337
|
reservedMicros: number;
|
|
@@ -413,6 +429,23 @@ function parseSellerSettlementObject(raw: string): SellerSettlementSummary | und
|
|
|
413
429
|
}
|
|
414
430
|
}
|
|
415
431
|
|
|
432
|
+
function sellerAttemptRequestContext(
|
|
433
|
+
requestId: string,
|
|
434
|
+
idempotencyKey: string,
|
|
435
|
+
routeIndex: number,
|
|
436
|
+
attempt: number,
|
|
437
|
+
retryOrdinal: number
|
|
438
|
+
): SellerAttemptRequestContext {
|
|
439
|
+
if (routeIndex === 0 && attempt === 0 && retryOrdinal === 0) {
|
|
440
|
+
return { requestId, idempotencyKey };
|
|
441
|
+
}
|
|
442
|
+
const suffix = `r${routeIndex}_a${attempt}_n${retryOrdinal}`;
|
|
443
|
+
return {
|
|
444
|
+
requestId: `${requestId}_${suffix}`,
|
|
445
|
+
idempotencyKey: `${idempotencyKey}_${suffix}`
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
|
|
416
449
|
function arrayLength(value: unknown): number | undefined {
|
|
417
450
|
return Array.isArray(value) ? value.length : undefined;
|
|
418
451
|
}
|
|
@@ -558,6 +591,7 @@ export class TokenbuddyDaemon {
|
|
|
558
591
|
// "fetchRegistry + manifest per request" path.
|
|
559
592
|
private readonly modelIndex = new ModelIndex();
|
|
560
593
|
private readonly prewarmCache = new PrewarmCache();
|
|
594
|
+
private readonly sellerMetadataCache = new SellerMetadataCache();
|
|
561
595
|
private readonly creditTracker = new CreditTracker();
|
|
562
596
|
private readonly sellerPool = new SellerPool({
|
|
563
597
|
modelIndex: this.modelIndex,
|
|
@@ -626,11 +660,15 @@ export class TokenbuddyDaemon {
|
|
|
626
660
|
const now = Date.now();
|
|
627
661
|
const body = await res.json() as SellerHealthBody;
|
|
628
662
|
const upstream = body.upstream;
|
|
663
|
+
const latency = body.latency;
|
|
629
664
|
const upstreamErrorClass = upstream?.lastErrorClass ?? upstream?.last_error_class;
|
|
630
665
|
return {
|
|
631
666
|
ok: true,
|
|
632
667
|
latencyMs: now - startedAt,
|
|
633
668
|
httpStatus: res.status,
|
|
669
|
+
ttftMs: finiteNumber(latency?.ttftMs ?? latency?.ttft_ms),
|
|
670
|
+
avgInferenceMs: finiteNumber(latency?.avgInferenceMs ?? latency?.avg_inference_ms),
|
|
671
|
+
avgTokensPerSecond: finiteNumber(latency?.avgTokensPerSecond ?? latency?.avg_tokens_per_second),
|
|
634
672
|
upstreamStatus: typeof upstream?.status === "string"
|
|
635
673
|
? upstream.status as "healthy" | "degraded" | "unhealthy" | "unknown"
|
|
636
674
|
: undefined,
|
|
@@ -857,6 +895,7 @@ export class TokenbuddyDaemon {
|
|
|
857
895
|
// trades freshness for availability: requests still route, but the
|
|
858
896
|
// model set is whatever was cached before the registry outgrew 1MB.
|
|
859
897
|
private lastRegistrySnapshot: SellerRegistryDocument | null = null;
|
|
898
|
+
private forceRegistrySnapshotForTest = false;
|
|
860
899
|
|
|
861
900
|
private async fetchRegistry(): Promise<SellerRegistryDocument> {
|
|
862
901
|
try {
|
|
@@ -1106,7 +1145,7 @@ export class TokenbuddyDaemon {
|
|
|
1106
1145
|
const payments = this.livePayments().filter((payment) => payment.enabled);
|
|
1107
1146
|
const clients = this.clientToolsSummary();
|
|
1108
1147
|
const routeModelId = this.resolveFocusSet()[0] || catalog.models[0]?.id;
|
|
1109
|
-
const routingPreview = routeModelId ? this.buildRoutingPreview({ modelId: routeModelId, routing: currentRouting }) : undefined;
|
|
1148
|
+
const routingPreview = routeModelId ? await this.buildRoutingPreview({ modelId: routeModelId, routing: currentRouting }) : undefined;
|
|
1110
1149
|
const checks: InitDoctorCheck[] = [
|
|
1111
1150
|
{
|
|
1112
1151
|
id: "local_service",
|
|
@@ -1229,6 +1268,42 @@ export class TokenbuddyDaemon {
|
|
|
1229
1268
|
}
|
|
1230
1269
|
}
|
|
1231
1270
|
|
|
1271
|
+
private async refreshSellerRuntimeMetrics(route: SellerRoute, requestId: string | undefined): Promise<void> {
|
|
1272
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
1273
|
+
try {
|
|
1274
|
+
const ac = new AbortController();
|
|
1275
|
+
timer = setTimeout(() => ac.abort(new Error("health timeout")), this.config.warmupProbeTimeoutMs ?? 3000);
|
|
1276
|
+
const startedAt = Date.now();
|
|
1277
|
+
const res = await fetch(`${route.seller.url.replace(/\/+$/, "")}/health`, { signal: ac.signal });
|
|
1278
|
+
if (!res.ok) {
|
|
1279
|
+
logger.warn("pool.runtime_metrics.refresh_failed", "seller health refresh failed after inference", {
|
|
1280
|
+
requestId,
|
|
1281
|
+
sellerId: route.seller.id,
|
|
1282
|
+
status: res.status,
|
|
1283
|
+
durationMs: Date.now() - startedAt
|
|
1284
|
+
});
|
|
1285
|
+
return;
|
|
1286
|
+
}
|
|
1287
|
+
const body = await res.json() as SellerHealthBody;
|
|
1288
|
+
const latency = body.latency;
|
|
1289
|
+
this.sellerPool.recordRuntimeMetrics(route.seller.id, {
|
|
1290
|
+
ttftMs: finiteNumber(latency?.ttftMs ?? latency?.ttft_ms),
|
|
1291
|
+
avgInferenceMs: finiteNumber(latency?.avgInferenceMs ?? latency?.avg_inference_ms),
|
|
1292
|
+
avgTokensPerSecond: finiteNumber(latency?.avgTokensPerSecond ?? latency?.avg_tokens_per_second)
|
|
1293
|
+
});
|
|
1294
|
+
} catch (error: unknown) {
|
|
1295
|
+
logger.warn("pool.runtime_metrics.refresh_failed", "seller health refresh failed after inference", {
|
|
1296
|
+
requestId,
|
|
1297
|
+
sellerId: route.seller.id,
|
|
1298
|
+
errorMessage: error instanceof Error ? error.message : String(error)
|
|
1299
|
+
});
|
|
1300
|
+
} finally {
|
|
1301
|
+
if (timer) {
|
|
1302
|
+
clearTimeout(timer);
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1232
1307
|
private endpointProtocol(endpoint: string): string | undefined {
|
|
1233
1308
|
if (endpoint === "/v1/chat/completions") {
|
|
1234
1309
|
return "chat_completions";
|
|
@@ -1337,6 +1412,7 @@ export class TokenbuddyDaemon {
|
|
|
1337
1412
|
|
|
1338
1413
|
const routing = resolveSellerRoutingForModel(this.refreshSellerRoutingConfig(), modelId);
|
|
1339
1414
|
const registrySellers = reorderDefaultSellerFirst(registry.sellers, registry.defaultSeller);
|
|
1415
|
+
await this.refreshSellerRouteMetadata(registrySellers);
|
|
1340
1416
|
this.sellerPool.ensureRegistrySellers(registrySellers);
|
|
1341
1417
|
this.scheduleLazyPrewarmIfNeeded(modelId, protocol, paymentMethod);
|
|
1342
1418
|
this.sellerPool.recycleOpenCircuits();
|
|
@@ -1350,21 +1426,12 @@ export class TokenbuddyDaemon {
|
|
|
1350
1426
|
registrySellers,
|
|
1351
1427
|
routing,
|
|
1352
1428
|
prewarmCandidates: this.prewarmCache.get(modelId, protocol, paymentMethod)?.candidates,
|
|
1353
|
-
sellerMetrics: Array.from(poolById.values()).map((entry) => (
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
circuit: entry.circuit,
|
|
1360
|
-
capacityBlockedUntil: entry.capacityBlockedUntil,
|
|
1361
|
-
...(concurrencySnapshot.enabled
|
|
1362
|
-
? {
|
|
1363
|
-
localConcurrencyActive: localConcurrencyBySellerId.get(entry.sellerId) ?? 0,
|
|
1364
|
-
localConcurrencyLimit: concurrencySnapshot.maxInFlightPerSeller
|
|
1365
|
-
}
|
|
1366
|
-
: {})
|
|
1367
|
-
})),
|
|
1429
|
+
sellerMetrics: Array.from(poolById.values()).map((entry) => this.routeMetricFromPoolEntry(entry, concurrencySnapshot.enabled
|
|
1430
|
+
? {
|
|
1431
|
+
localConcurrencyActive: localConcurrencyBySellerId.get(entry.sellerId) ?? 0,
|
|
1432
|
+
localConcurrencyLimit: concurrencySnapshot.maxInFlightPerSeller
|
|
1433
|
+
}
|
|
1434
|
+
: undefined)),
|
|
1368
1435
|
now: Date.now()
|
|
1369
1436
|
});
|
|
1370
1437
|
|
|
@@ -1667,7 +1734,8 @@ export class TokenbuddyDaemon {
|
|
|
1667
1734
|
settlement: SellerSettlementSummary
|
|
1668
1735
|
): void {
|
|
1669
1736
|
logger.info("token.balance.reconciled", "seller token balance reconciled from settlement", {
|
|
1670
|
-
requestId
|
|
1737
|
+
requestId,
|
|
1738
|
+
sellerRequestId: settlement.requestId !== requestId ? settlement.requestId : undefined,
|
|
1671
1739
|
sellerKey: route.seller.id,
|
|
1672
1740
|
model: route.modelId,
|
|
1673
1741
|
remainingCreditMicros: settlement.remainingCreditMicros,
|
|
@@ -1681,8 +1749,8 @@ export class TokenbuddyDaemon {
|
|
|
1681
1749
|
}
|
|
1682
1750
|
|
|
1683
1751
|
private async listSellerBackedModels(): Promise<{
|
|
1684
|
-
models:
|
|
1685
|
-
sellers:
|
|
1752
|
+
models: ModelCatalogEntry[];
|
|
1753
|
+
sellers: SellerCatalogEntry[];
|
|
1686
1754
|
}> {
|
|
1687
1755
|
try {
|
|
1688
1756
|
const catalog = await discoverSellerBackedModels(this.config.sellerRegistryUrl);
|
|
@@ -1696,7 +1764,7 @@ export class TokenbuddyDaemon {
|
|
|
1696
1764
|
}
|
|
1697
1765
|
return {
|
|
1698
1766
|
models: catalog.models,
|
|
1699
|
-
sellers: catalog.sellers
|
|
1767
|
+
sellers: this.sellerCatalogWithRuntimeMetrics(catalog.sellers)
|
|
1700
1768
|
};
|
|
1701
1769
|
} catch (error) {
|
|
1702
1770
|
const cached = this.loadTrustedRegistryCache(error);
|
|
@@ -1706,11 +1774,52 @@ export class TokenbuddyDaemon {
|
|
|
1706
1774
|
const snapshot = catalogSnapshotFromRegistry(cached);
|
|
1707
1775
|
return {
|
|
1708
1776
|
models: snapshot.models,
|
|
1709
|
-
sellers: snapshot.sellers
|
|
1777
|
+
sellers: this.sellerCatalogWithRuntimeMetrics(snapshot.sellers)
|
|
1778
|
+
};
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
private sellerCatalogWithRuntimeMetrics(sellers: SellerCatalogEntry[]): SellerCatalogEntry[] {
|
|
1783
|
+
const runtimeBySellerId = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
|
|
1784
|
+
return sellers.map((seller) => {
|
|
1785
|
+
const runtime = runtimeBySellerId.get(seller.id);
|
|
1786
|
+
return {
|
|
1787
|
+
...seller,
|
|
1788
|
+
ttftMs: runtime?.ttftMs ?? seller.ttftMs,
|
|
1789
|
+
avgTokensPerSecond: runtime?.avgTokensPerSecond ?? seller.avgTokensPerSecond ?? 0
|
|
1710
1790
|
};
|
|
1791
|
+
});
|
|
1792
|
+
}
|
|
1793
|
+
|
|
1794
|
+
private async refreshSellerRouteMetadata(sellers: RegistrySeller[]): Promise<void> {
|
|
1795
|
+
try {
|
|
1796
|
+
await this.sellerMetadataCache.refreshIfStale(sellers.filter(isBuyerVisibleRegistrySeller));
|
|
1797
|
+
} catch (error: unknown) {
|
|
1798
|
+
logger.warn("route.metadata.refresh_failed", "seller route metadata refresh failed", {
|
|
1799
|
+
errorMessage: error instanceof Error ? error.message : String(error)
|
|
1800
|
+
});
|
|
1711
1801
|
}
|
|
1712
1802
|
}
|
|
1713
1803
|
|
|
1804
|
+
private routeMetricFromPoolEntry(
|
|
1805
|
+
entry: PoolEntry,
|
|
1806
|
+
concurrency?: { localConcurrencyActive: number; localConcurrencyLimit: number }
|
|
1807
|
+
): SellerRouteMetric {
|
|
1808
|
+
const metadata = this.sellerMetadataCache.snapshot().find((item) => item.sellerId === entry.sellerId);
|
|
1809
|
+
return {
|
|
1810
|
+
sellerId: entry.sellerId,
|
|
1811
|
+
healthScore: entry.healthScore,
|
|
1812
|
+
avgLatencyMs: entry.avgLatencyMs,
|
|
1813
|
+
ttftMs: entry.ttftMs,
|
|
1814
|
+
avgInferenceMs: entry.avgInferenceMs,
|
|
1815
|
+
avgTokensPerSecond: entry.avgTokensPerSecond,
|
|
1816
|
+
discountRatio: metadata?.discountRatio,
|
|
1817
|
+
circuit: entry.circuit,
|
|
1818
|
+
capacityBlockedUntil: entry.capacityBlockedUntil,
|
|
1819
|
+
...(concurrency ?? {})
|
|
1820
|
+
};
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1714
1823
|
private readUsage(bodyText: string): UsageSummary {
|
|
1715
1824
|
const fallback: UsageSummary = {
|
|
1716
1825
|
promptTokens: 0,
|
|
@@ -1779,8 +1888,11 @@ export class TokenbuddyDaemon {
|
|
|
1779
1888
|
}
|
|
1780
1889
|
|
|
1781
1890
|
const settledMicros = settlement?.settledMicros;
|
|
1891
|
+
const sellerRequestId = settlement?.requestId && settlement.requestId !== requestId
|
|
1892
|
+
? settlement.requestId
|
|
1893
|
+
: undefined;
|
|
1782
1894
|
this.tokenStore.recordInferenceLedger({
|
|
1783
|
-
requestId
|
|
1895
|
+
requestId,
|
|
1784
1896
|
sellerKey: route.seller.id,
|
|
1785
1897
|
modelId: route.modelId,
|
|
1786
1898
|
endpoint,
|
|
@@ -1805,7 +1917,7 @@ export class TokenbuddyDaemon {
|
|
|
1805
1917
|
paymentMethod: extras?.paymentMethod
|
|
1806
1918
|
});
|
|
1807
1919
|
logger.info("inference.ledger.recorded", "safe inference ledger recorded", {
|
|
1808
|
-
requestId
|
|
1920
|
+
requestId,
|
|
1809
1921
|
sellerKey: route.seller.id,
|
|
1810
1922
|
model: route.modelId,
|
|
1811
1923
|
endpoint,
|
|
@@ -1818,6 +1930,7 @@ export class TokenbuddyDaemon {
|
|
|
1818
1930
|
completionTokens: usage.completionTokens,
|
|
1819
1931
|
balanceSnapshotMicros: settlement?.remainingCreditMicros,
|
|
1820
1932
|
balanceSource: settlement ? "seller_authoritative" : "estimated",
|
|
1933
|
+
sellerRequestId,
|
|
1821
1934
|
ttftMs: extras?.ttftMs,
|
|
1822
1935
|
fallbackCount: extras?.fallbackCount,
|
|
1823
1936
|
routeReason: extras?.routeReason,
|
|
@@ -2488,21 +2601,31 @@ export class TokenbuddyDaemon {
|
|
|
2488
2601
|
// the `X-TokenBuddy-Deadline-Ms` header (PR-6) can propagate
|
|
2489
2602
|
// it to their own upstream fetch via the same signal.
|
|
2490
2603
|
const deadlineMs = this.requestDeadlineMs();
|
|
2491
|
-
const sendSellerRequest = async (token: string) => {
|
|
2604
|
+
const sendSellerRequest = async (token: string, retryOrdinal = 0) => {
|
|
2605
|
+
const attemptContext = sellerAttemptRequestContext(
|
|
2606
|
+
requestId,
|
|
2607
|
+
idempotencyKey,
|
|
2608
|
+
routeIndex,
|
|
2609
|
+
attempt,
|
|
2610
|
+
retryOrdinal
|
|
2611
|
+
);
|
|
2492
2612
|
const requestAc = new AbortController();
|
|
2493
2613
|
const requestTimer = setTimeout(() => requestAc.abort(new Error("buyer deadline exceeded")), deadlineMs);
|
|
2494
2614
|
const headers: Record<string, string> = {
|
|
2495
2615
|
"Content-Type": "application/json",
|
|
2496
2616
|
"Authorization": `Bearer ${token}`,
|
|
2497
|
-
"X-Request-Id": requestId,
|
|
2498
|
-
"Idempotency-Key": idempotencyKey
|
|
2617
|
+
"X-Request-Id": attemptContext.requestId,
|
|
2618
|
+
"Idempotency-Key": attemptContext.idempotencyKey
|
|
2499
2619
|
};
|
|
2500
2620
|
headers["X-TokenBuddy-Deadline-Ms"] = String(deadlineMs);
|
|
2501
2621
|
try {
|
|
2502
2622
|
return await fetch(`${sellerUrl}${endpoint}`, {
|
|
2503
2623
|
method: "POST",
|
|
2504
2624
|
headers,
|
|
2505
|
-
body: JSON.stringify(
|
|
2625
|
+
body: JSON.stringify({
|
|
2626
|
+
...upstreamBody,
|
|
2627
|
+
requestId: attemptContext.requestId
|
|
2628
|
+
}),
|
|
2506
2629
|
signal: requestAc.signal
|
|
2507
2630
|
});
|
|
2508
2631
|
} finally {
|
|
@@ -2649,6 +2772,7 @@ export class TokenbuddyDaemon {
|
|
|
2649
2772
|
res.write(settlementTrailing.downstream);
|
|
2650
2773
|
}
|
|
2651
2774
|
res.end();
|
|
2775
|
+
void this.refreshSellerRuntimeMetrics(route, requestId);
|
|
2652
2776
|
this.recordReconciledInference(
|
|
2653
2777
|
route,
|
|
2654
2778
|
endpoint,
|
|
@@ -2675,6 +2799,7 @@ export class TokenbuddyDaemon {
|
|
|
2675
2799
|
markFirstByte();
|
|
2676
2800
|
res.send(responseBody);
|
|
2677
2801
|
const usage = this.readUsage(responseBody);
|
|
2802
|
+
void this.refreshSellerRuntimeMetrics(route, requestId);
|
|
2678
2803
|
this.recordReconciledInference(
|
|
2679
2804
|
route,
|
|
2680
2805
|
endpoint,
|
|
@@ -3212,10 +3337,10 @@ export class TokenbuddyDaemon {
|
|
|
3212
3337
|
|
|
3213
3338
|
// 2) GET /routing/preview — 算「假如改完会怎样」,不改 state
|
|
3214
3339
|
// query: modelId? protocol? paymentMethod? mode? scorer? sellerId? sellerIds?(逗号分隔)
|
|
3215
|
-
controlApp.get("/routing/preview", (req, res) => {
|
|
3340
|
+
controlApp.get("/routing/preview", async (req, res) => {
|
|
3216
3341
|
try {
|
|
3217
3342
|
const override = buildRoutingConfigFromQuery(req.query);
|
|
3218
|
-
const result = this.buildRoutingPreview({
|
|
3343
|
+
const result = await this.buildRoutingPreview({
|
|
3219
3344
|
modelId: typeof req.query.modelId === "string" ? req.query.modelId : undefined,
|
|
3220
3345
|
protocol: typeof req.query.protocol === "string" ? req.query.protocol : undefined,
|
|
3221
3346
|
paymentMethod: typeof req.query.paymentMethod === "string" ? req.query.paymentMethod : undefined,
|
|
@@ -3244,7 +3369,7 @@ export class TokenbuddyDaemon {
|
|
|
3244
3369
|
});
|
|
3245
3370
|
|
|
3246
3371
|
// 3) PUT /routing/strategy — 写策略 + 热更新 + 返回 preview
|
|
3247
|
-
controlApp.put("/routing/strategy", (req, res) => {
|
|
3372
|
+
controlApp.put("/routing/strategy", async (req, res) => {
|
|
3248
3373
|
try {
|
|
3249
3374
|
const body = (req.body ?? {}) as Record<string, unknown>;
|
|
3250
3375
|
const normalized = normalizeSellerRoutingConfig(body);
|
|
@@ -3258,7 +3383,7 @@ export class TokenbuddyDaemon {
|
|
|
3258
3383
|
sellerId: current.sellerId,
|
|
3259
3384
|
sellerIds: current.sellerIds
|
|
3260
3385
|
});
|
|
3261
|
-
const preview = this.buildRoutingPreview({ routing: current });
|
|
3386
|
+
const preview = await this.buildRoutingPreview({ routing: current });
|
|
3262
3387
|
const previewPayload = "error" in preview.plan
|
|
3263
3388
|
? { error: preview.plan.error }
|
|
3264
3389
|
: {
|
|
@@ -3475,15 +3600,18 @@ export class TokenbuddyDaemon {
|
|
|
3475
3600
|
/**
|
|
3476
3601
|
* tb-ui v1 `GET /routing/preview` 和 `PUT /routing/strategy` 复用的 preview 计算。
|
|
3477
3602
|
* 接受任意 routing 覆盖(来自 request body)算「假如改成这个,路由会是啥」。
|
|
3478
|
-
*
|
|
3603
|
+
* 不修改 routing state;registry / seller metadata 可按需刷新,保证 preview
|
|
3604
|
+
* 使用的候选和折扣信息与真实请求路径一致。
|
|
3479
3605
|
*/
|
|
3480
|
-
public buildRoutingPreview(input: {
|
|
3606
|
+
public async buildRoutingPreview(input: {
|
|
3481
3607
|
modelId?: string;
|
|
3482
3608
|
protocol?: string;
|
|
3483
3609
|
paymentMethod?: string;
|
|
3484
3610
|
routing?: Partial<BuyerSellerRoutingConfig>;
|
|
3485
|
-
}): { modelId: string; protocol: string; paymentMethod: string; plan: SellerRoutePlan | { error: string } } {
|
|
3486
|
-
const registry = this.lastRegistrySnapshot
|
|
3611
|
+
}): Promise<{ modelId: string; protocol: string; paymentMethod: string; plan: SellerRoutePlan | { error: string } }> {
|
|
3612
|
+
const registry = this.lastRegistrySnapshot ?? (
|
|
3613
|
+
this.forceRegistrySnapshotForTest ? null : await this.fetchRegistry()
|
|
3614
|
+
);
|
|
3487
3615
|
const focusFirst = this.resolveFocusSet()[0];
|
|
3488
3616
|
const registryFirst = registry?.sellers[0]?.models?.[0];
|
|
3489
3617
|
const modelId = input.modelId?.trim() || focusFirst || registryFirst || "";
|
|
@@ -3501,6 +3629,7 @@ export class TokenbuddyDaemon {
|
|
|
3501
3629
|
: current;
|
|
3502
3630
|
const resolvedRouting = resolveSellerRoutingForModel(routing, modelId);
|
|
3503
3631
|
const registrySellers = reorderDefaultSellerFirst(registry.sellers, registry.defaultSeller);
|
|
3632
|
+
await this.refreshSellerRouteMetadata(registrySellers);
|
|
3504
3633
|
this.sellerPool.ensureRegistrySellers(registrySellers);
|
|
3505
3634
|
const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
|
|
3506
3635
|
const plan = planSellerRouteSet({
|
|
@@ -3510,15 +3639,7 @@ export class TokenbuddyDaemon {
|
|
|
3510
3639
|
registrySellers,
|
|
3511
3640
|
routing: resolvedRouting,
|
|
3512
3641
|
prewarmCandidates: this.prewarmCache.get(modelId, protocol, paymentMethod)?.candidates,
|
|
3513
|
-
sellerMetrics: Array.from(poolById.values()).map((entry) => (
|
|
3514
|
-
sellerId: entry.sellerId,
|
|
3515
|
-
healthScore: entry.healthScore,
|
|
3516
|
-
avgLatencyMs: entry.avgLatencyMs,
|
|
3517
|
-
ttftMs: entry.ttftMs,
|
|
3518
|
-
avgInferenceMs: entry.avgInferenceMs,
|
|
3519
|
-
circuit: entry.circuit,
|
|
3520
|
-
capacityBlockedUntil: entry.capacityBlockedUntil
|
|
3521
|
-
})),
|
|
3642
|
+
sellerMetrics: Array.from(poolById.values()).map((entry) => this.routeMetricFromPoolEntry(entry)),
|
|
3522
3643
|
now: Date.now()
|
|
3523
3644
|
});
|
|
3524
3645
|
return { modelId, protocol, paymentMethod, plan };
|
|
@@ -3569,12 +3690,13 @@ export class TokenbuddyDaemon {
|
|
|
3569
3690
|
}
|
|
3570
3691
|
|
|
3571
3692
|
/**
|
|
3572
|
-
* @internal
|
|
3693
|
+
* @internal - test-only hook to inject a registry snapshot without
|
|
3573
3694
|
* hitting the network. Used by `tests/control-plane-ui-endpoints.test.ts`
|
|
3574
3695
|
* to drive `buildRoutingPreview` deterministically. Production code
|
|
3575
3696
|
* must NOT call this; the real `fetchRegistry()` populates the snapshot.
|
|
3576
3697
|
*/
|
|
3577
3698
|
public setLastRegistrySnapshotForTest(snapshot: SellerRegistryDocument | null): void {
|
|
3699
|
+
this.forceRegistrySnapshotForTest = true;
|
|
3578
3700
|
this.lastRegistrySnapshot = snapshot;
|
|
3579
3701
|
}
|
|
3580
3702
|
}
|
|
@@ -341,7 +341,11 @@ function discountRatioFromSeller(seller: DoctorSellerEntry): number | undefined
|
|
|
341
341
|
}
|
|
342
342
|
|
|
343
343
|
function formatDiscountRatio(value: number): string {
|
|
344
|
-
|
|
344
|
+
const ratio = Math.max(0, value);
|
|
345
|
+
if (ratio === 0) return "免费";
|
|
346
|
+
if (Math.abs(ratio - 1) < 0.0001) return "原价";
|
|
347
|
+
const folded = Math.round(ratio * 100) / 10;
|
|
348
|
+
return `${Number.isInteger(folded) ? String(folded) : folded.toFixed(1)}折`;
|
|
345
349
|
}
|
|
346
350
|
|
|
347
351
|
function formatUsdPer1m(microsPer1m: number): string {
|
package/src/prewarm-cache.ts
CHANGED
|
@@ -42,6 +42,8 @@ export interface PrewarmCandidate {
|
|
|
42
42
|
ttftMs?: number;
|
|
43
43
|
/** 平均推理延迟(毫秒),可选 */
|
|
44
44
|
avgInferenceMs?: number;
|
|
45
|
+
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
46
|
+
avgTokensPerSecond?: number;
|
|
45
47
|
/** 上游状态(与 seller 上报的语义对齐) */
|
|
46
48
|
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
47
49
|
/** 上游错误类名(HTTP status / 错误码),仅在失败时存在 */
|
|
@@ -98,6 +100,8 @@ export interface PrewarmCandidateInput {
|
|
|
98
100
|
ttftMs?: number;
|
|
99
101
|
/** 平均推理延迟(毫秒),可选 */
|
|
100
102
|
avgInferenceMs?: number;
|
|
103
|
+
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
104
|
+
avgTokensPerSecond?: number;
|
|
101
105
|
/** 上游状态,可选 */
|
|
102
106
|
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
103
107
|
/** 上游错误类名,可选 */
|
|
@@ -452,6 +456,7 @@ function toCandidate(input: PrewarmCandidateInput): PrewarmCandidate {
|
|
|
452
456
|
healthProbeLatencyMs: finiteNonNegative(input.healthProbeLatencyMs),
|
|
453
457
|
ttftMs: finiteNonNegative(input.ttftMs),
|
|
454
458
|
avgInferenceMs: finiteNonNegative(input.avgInferenceMs),
|
|
459
|
+
avgTokensPerSecond: finiteNonNegative(input.avgTokensPerSecond),
|
|
455
460
|
upstreamStatus: input.upstreamStatus,
|
|
456
461
|
upstreamErrorClass: input.upstreamErrorClass,
|
|
457
462
|
capacityBlockedUntil: finiteNonNegative(input.capacityBlockedUntil)
|
package/src/prewarm-scheduler.ts
CHANGED
|
@@ -35,6 +35,8 @@ export interface ProbeResult {
|
|
|
35
35
|
ttftMs?: number;
|
|
36
36
|
/** 平均推理延迟(毫秒),可选 */
|
|
37
37
|
avgInferenceMs?: number;
|
|
38
|
+
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
39
|
+
avgTokensPerSecond?: number;
|
|
38
40
|
/** 临时容量避让截止时间;大于当前时间时不参与路由 */
|
|
39
41
|
capacityBlockedUntil?: number;
|
|
40
42
|
}
|
|
@@ -499,6 +501,7 @@ export class PrewarmScheduler {
|
|
|
499
501
|
healthProbeLatencyMs: result.latencyMs,
|
|
500
502
|
ttftMs: result.ttftMs,
|
|
501
503
|
avgInferenceMs: result.avgInferenceMs,
|
|
504
|
+
avgTokensPerSecond: result.avgTokensPerSecond,
|
|
502
505
|
upstreamStatus: result.upstreamStatus,
|
|
503
506
|
upstreamErrorClass: result.upstreamErrorClass,
|
|
504
507
|
capacityBlockedUntil: result.capacityBlockedUntil
|
|
@@ -513,7 +516,8 @@ export class PrewarmScheduler {
|
|
|
513
516
|
upstreamStatus: result.upstreamStatus,
|
|
514
517
|
upstreamErrorClass: result.upstreamErrorClass,
|
|
515
518
|
ttftMs: result.ttftMs,
|
|
516
|
-
avgInferenceMs: result.avgInferenceMs
|
|
519
|
+
avgInferenceMs: result.avgInferenceMs,
|
|
520
|
+
avgTokensPerSecond: result.avgTokensPerSecond
|
|
517
521
|
});
|
|
518
522
|
} else {
|
|
519
523
|
candidates.push({
|
|
@@ -526,6 +530,7 @@ export class PrewarmScheduler {
|
|
|
526
530
|
healthProbeLatencyMs: result.latencyMs,
|
|
527
531
|
ttftMs: result.ttftMs,
|
|
528
532
|
avgInferenceMs: result.avgInferenceMs,
|
|
533
|
+
avgTokensPerSecond: result.avgTokensPerSecond,
|
|
529
534
|
upstreamStatus: result.upstreamStatus,
|
|
530
535
|
upstreamErrorClass: result.upstreamErrorClass,
|
|
531
536
|
capacityBlockedUntil: result.capacityBlockedUntil
|