@tokenbuddy/tokenbuddy 1.0.29 → 1.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/daemon.d.ts +11 -4
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +130 -42
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/doctor-diagnostics.d.ts.map +1 -1
- package/dist/src/doctor-diagnostics.js +7 -1
- package/dist/src/doctor-diagnostics.js.map +1 -1
- package/dist/src/prewarm-cache.d.ts +4 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -1
- package/dist/src/prewarm-cache.js +1 -0
- package/dist/src/prewarm-cache.js.map +1 -1
- package/dist/src/prewarm-scheduler.d.ts +2 -0
- package/dist/src/prewarm-scheduler.d.ts.map +1 -1
- package/dist/src/prewarm-scheduler.js +4 -1
- package/dist/src/prewarm-scheduler.js.map +1 -1
- package/dist/src/provider-install.d.ts.map +1 -1
- package/dist/src/provider-install.js +196 -18
- package/dist/src/provider-install.js.map +1 -1
- package/dist/src/seller-catalog.d.ts +4 -0
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-pool.d.ts +13 -0
- package/dist/src/seller-pool.d.ts.map +1 -1
- package/dist/src/seller-pool.js +43 -2
- package/dist/src/seller-pool.js.map +1 -1
- package/dist/src/seller-route-planner.d.ts +9 -0
- package/dist/src/seller-route-planner.d.ts.map +1 -1
- package/dist/src/seller-route-planner.js +39 -15
- package/dist/src/seller-route-planner.js.map +1 -1
- package/dist/src/seller-routing-strategy.d.ts +6 -4
- package/dist/src/seller-routing-strategy.d.ts.map +1 -1
- package/dist/src/seller-routing-strategy.js +15 -12
- package/dist/src/seller-routing-strategy.js.map +1 -1
- package/dist/src/terminal-detect.d.ts +5 -5
- package/dist/src/terminal-detect.d.ts.map +1 -1
- package/dist/src/terminal-detect.js +79 -26
- package/dist/src/terminal-detect.js.map +1 -1
- package/package.json +1 -1
- package/src/daemon.ts +168 -46
- package/src/doctor-diagnostics.ts +5 -1
- package/src/prewarm-cache.ts +5 -0
- package/src/prewarm-scheduler.ts +6 -1
- package/src/provider-install.ts +203 -18
- package/src/seller-catalog.ts +4 -0
- package/src/seller-pool.ts +68 -2
- package/src/seller-route-planner.ts +61 -15
- package/src/seller-routing-strategy.ts +21 -16
- package/src/terminal-detect.ts +81 -24
- package/static/ui/assets/index-DEDEl8o2.js +236 -0
- package/static/ui/assets/{index-UAfOhbwC.js.map → index-DEDEl8o2.js.map} +1 -1
- package/static/ui/index.html +1 -1
- package/tests/control-plane-ui-endpoints.test.ts +73 -0
- package/tests/seller-pool.test.ts +55 -0
- package/tests/seller-route-planner.test.ts +45 -1
- package/tests/seller-routing-strategy.test.ts +6 -5
- package/tests/tokenbuddy.test.ts +346 -38
- package/static/ui/assets/index-UAfOhbwC.js +0 -236
package/src/provider-install.ts
CHANGED
|
@@ -163,6 +163,10 @@ interface ProviderDefinition {
|
|
|
163
163
|
protocolPreference?: ProtocolPreference;
|
|
164
164
|
}
|
|
165
165
|
|
|
166
|
+
function isPlainRecord(value: unknown): value is Record<string, unknown> {
|
|
167
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
168
|
+
}
|
|
169
|
+
|
|
166
170
|
function resolveHome(home?: string): string {
|
|
167
171
|
return home && home.trim() ? home : os.homedir();
|
|
168
172
|
}
|
|
@@ -209,6 +213,124 @@ function readJsonObject(filePath: string): Record<string, unknown> {
|
|
|
209
213
|
}
|
|
210
214
|
}
|
|
211
215
|
|
|
216
|
+
function parseYamlScalar(value: string): unknown {
|
|
217
|
+
const trimmed = value.trim();
|
|
218
|
+
if (!trimmed) {
|
|
219
|
+
return "";
|
|
220
|
+
}
|
|
221
|
+
if (trimmed === "true") {
|
|
222
|
+
return true;
|
|
223
|
+
}
|
|
224
|
+
if (trimmed === "false") {
|
|
225
|
+
return false;
|
|
226
|
+
}
|
|
227
|
+
if (trimmed === "null") {
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
if ((trimmed.startsWith('"') && trimmed.endsWith('"')) || (trimmed.startsWith("'") && trimmed.endsWith("'"))) {
|
|
231
|
+
return trimmed.slice(1, -1);
|
|
232
|
+
}
|
|
233
|
+
const numeric = Number(trimmed);
|
|
234
|
+
if (Number.isFinite(numeric) && /^-?\d+(?:\.\d+)?$/.test(trimmed)) {
|
|
235
|
+
return numeric;
|
|
236
|
+
}
|
|
237
|
+
return trimmed;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function parseSimpleYamlObject(text: string): Record<string, unknown> {
|
|
241
|
+
const root: Record<string, unknown> = {};
|
|
242
|
+
const stack: Array<{ indent: number; value: Record<string, unknown> }> = [{ indent: -1, value: root }];
|
|
243
|
+
for (const rawLine of text.split(/\r?\n/)) {
|
|
244
|
+
if (!rawLine.trim() || rawLine.trimStart().startsWith("#")) {
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
const indent = rawLine.match(/^ */)?.[0].length ?? 0;
|
|
248
|
+
const trimmed = rawLine.trim();
|
|
249
|
+
const separatorIndex = trimmed.indexOf(":");
|
|
250
|
+
if (separatorIndex <= 0) {
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
const key = trimmed.slice(0, separatorIndex).trim();
|
|
254
|
+
const rest = trimmed.slice(separatorIndex + 1).trim();
|
|
255
|
+
while (stack.length > 1 && indent <= stack[stack.length - 1].indent) {
|
|
256
|
+
stack.pop();
|
|
257
|
+
}
|
|
258
|
+
const parent = stack[stack.length - 1].value;
|
|
259
|
+
if (!rest) {
|
|
260
|
+
const child = isPlainRecord(parent[key]) ? parent[key] as Record<string, unknown> : {};
|
|
261
|
+
parent[key] = child;
|
|
262
|
+
stack.push({ indent, value: child });
|
|
263
|
+
} else {
|
|
264
|
+
parent[key] = parseYamlScalar(rest);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
return root;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function readYamlObject(filePath: string): Record<string, unknown> {
|
|
271
|
+
const text = readText(filePath);
|
|
272
|
+
if (!text) {
|
|
273
|
+
return {};
|
|
274
|
+
}
|
|
275
|
+
return parseSimpleYamlObject(text);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
function yamlScalarContent(value: unknown): string {
|
|
279
|
+
if (typeof value === "number" || typeof value === "boolean") {
|
|
280
|
+
return String(value);
|
|
281
|
+
}
|
|
282
|
+
if (value === null) {
|
|
283
|
+
return "null";
|
|
284
|
+
}
|
|
285
|
+
const text = String(value ?? "");
|
|
286
|
+
if (!text || /[:#\n\r\t]|^\s|\s$|^(true|false|null)$/i.test(text) || /^-?\d+(?:\.\d+)?$/.test(text)) {
|
|
287
|
+
return JSON.stringify(text);
|
|
288
|
+
}
|
|
289
|
+
return text;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function yamlContent(value: Record<string, unknown>): string {
|
|
293
|
+
const lines: string[] = [];
|
|
294
|
+
const writeObject = (objectValue: Record<string, unknown>, indent: number): void => {
|
|
295
|
+
for (const [key, entry] of Object.entries(objectValue)) {
|
|
296
|
+
const prefix = " ".repeat(indent);
|
|
297
|
+
if (isPlainRecord(entry)) {
|
|
298
|
+
lines.push(`${prefix}${key}:`);
|
|
299
|
+
writeObject(entry, indent + 2);
|
|
300
|
+
} else {
|
|
301
|
+
lines.push(`${prefix}${key}: ${yamlScalarContent(entry)}`);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
};
|
|
305
|
+
writeObject(value, 0);
|
|
306
|
+
return `${lines.join("\n")}\n`;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function replaceTopLevelYamlSection(existing: string, sectionName: string, sectionBody: string): string {
|
|
310
|
+
const lines = existing.split(/\r?\n/);
|
|
311
|
+
const sectionStart = lines.findIndex((line) => {
|
|
312
|
+
return line === `${sectionName}:` || line.startsWith(`${sectionName}: `);
|
|
313
|
+
});
|
|
314
|
+
const bodyLines = [`${sectionName}:`, ...sectionBody.trimEnd().split(/\r?\n/).map((line) => ` ${line}`)];
|
|
315
|
+
if (sectionStart < 0) {
|
|
316
|
+
const prefix = existing.trimEnd();
|
|
317
|
+
return `${prefix}${prefix ? "\n" : ""}${bodyLines.join("\n")}\n`;
|
|
318
|
+
}
|
|
319
|
+
let sectionEnd = sectionStart + 1;
|
|
320
|
+
while (sectionEnd < lines.length) {
|
|
321
|
+
const line = lines[sectionEnd];
|
|
322
|
+
if (line.trim() && !line.startsWith(" ") && !line.startsWith("\t")) {
|
|
323
|
+
break;
|
|
324
|
+
}
|
|
325
|
+
sectionEnd += 1;
|
|
326
|
+
}
|
|
327
|
+
return `${[
|
|
328
|
+
...lines.slice(0, sectionStart),
|
|
329
|
+
...bodyLines,
|
|
330
|
+
...lines.slice(sectionEnd),
|
|
331
|
+
].join("\n").replace(/\n*$/, "")}\n`;
|
|
332
|
+
}
|
|
333
|
+
|
|
212
334
|
function readObjectField(value: unknown, key: string): Record<string, unknown> | undefined {
|
|
213
335
|
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
214
336
|
return undefined;
|
|
@@ -493,14 +615,57 @@ function claudeDesktopConfig(home: string, proxyUrl: string, config: ProviderRun
|
|
|
493
615
|
|
|
494
616
|
function openclawConfig(home: string, proxyUrl: string, config: ProviderRuntimeConfig): ProviderFileChange[] {
|
|
495
617
|
const model = pickConfiguredModel(config);
|
|
496
|
-
const configPath = path.join(home, ".openclaw", "
|
|
618
|
+
const configPath = path.join(home, ".openclaw", "openclaw.json");
|
|
497
619
|
const current = readJsonObject(configPath);
|
|
498
|
-
current.
|
|
499
|
-
|
|
500
|
-
|
|
620
|
+
const models = isPlainRecord(current.models) ? current.models : {};
|
|
621
|
+
const providers = isPlainRecord(models.providers) ? models.providers : {};
|
|
622
|
+
const existingProvider = isPlainRecord(providers.tokenbuddy) ? providers.tokenbuddy : {};
|
|
623
|
+
const existingModels = Array.isArray(existingProvider.models) ? existingProvider.models : [];
|
|
624
|
+
const nextModels = [
|
|
625
|
+
...existingModels.filter((entry) => {
|
|
626
|
+
return !(isPlainRecord(entry) && entry.id === model);
|
|
627
|
+
}),
|
|
628
|
+
{
|
|
629
|
+
id: model,
|
|
630
|
+
name: model,
|
|
631
|
+
api: "openai-completions",
|
|
632
|
+
input: ["text", "image"],
|
|
633
|
+
},
|
|
634
|
+
];
|
|
635
|
+
providers.tokenbuddy = {
|
|
636
|
+
...existingProvider,
|
|
637
|
+
baseUrl: openAiBaseUrl(proxyUrl),
|
|
638
|
+
apiKey: PROXY_ACCESS_TOKEN_PLACEHOLDER,
|
|
639
|
+
auth: "api-key",
|
|
640
|
+
api: "openai-completions",
|
|
641
|
+
models: nextModels,
|
|
642
|
+
};
|
|
643
|
+
models.providers = providers;
|
|
644
|
+
current.models = models;
|
|
645
|
+
const agents = isPlainRecord(current.agents) ? current.agents : {};
|
|
646
|
+
const defaults = isPlainRecord(agents.defaults) ? agents.defaults : {};
|
|
647
|
+
defaults.model = `tokenbuddy/${model}`;
|
|
648
|
+
agents.defaults = defaults;
|
|
649
|
+
current.agents = agents;
|
|
501
650
|
return [makeChange("openclaw", configPath, "configure OpenClaw proxy settings", jsonContent(current))];
|
|
502
651
|
}
|
|
503
652
|
|
|
653
|
+
function isOpenclawTokenBuddyConfigured(filePath: string): boolean {
|
|
654
|
+
const current = readJsonObject(filePath);
|
|
655
|
+
const tokenbuddy = readObjectField(readObjectField(readObjectField(current, "models"), "providers"), "tokenbuddy");
|
|
656
|
+
const defaults = readObjectField(readObjectField(current, "agents"), "defaults");
|
|
657
|
+
if (!tokenbuddy || !defaults) {
|
|
658
|
+
return false;
|
|
659
|
+
}
|
|
660
|
+
const defaultModel = defaults.model;
|
|
661
|
+
return tokenbuddy.apiKey === PROXY_ACCESS_TOKEN_PLACEHOLDER &&
|
|
662
|
+
typeof tokenbuddy.baseUrl === "string" &&
|
|
663
|
+
tokenbuddy.baseUrl.includes("127.0.0.1") &&
|
|
664
|
+
tokenbuddy.baseUrl.endsWith("/v1") &&
|
|
665
|
+
typeof defaultModel === "string" &&
|
|
666
|
+
defaultModel.startsWith("tokenbuddy/");
|
|
667
|
+
}
|
|
668
|
+
|
|
504
669
|
function openAiBaseUrl(proxyUrl: string): string {
|
|
505
670
|
const normalized = proxyUrl.replace(/\/+$/, "");
|
|
506
671
|
return normalized.endsWith("/v1") ? normalized : `${normalized}/v1`;
|
|
@@ -556,18 +721,36 @@ function isOpencodeTokenBuddyConfigured(filePath: string): boolean {
|
|
|
556
721
|
|
|
557
722
|
function hermesConfig(home: string, proxyUrl: string, config: ProviderRuntimeConfig): ProviderFileChange[] {
|
|
558
723
|
const model = pickConfiguredModel(config);
|
|
559
|
-
const configPath = path.join(home, ".hermes", "
|
|
560
|
-
const
|
|
561
|
-
const
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
724
|
+
const configPath = path.join(home, ".hermes", "config.yaml");
|
|
725
|
+
const existing = readText(configPath) || "";
|
|
726
|
+
const current = parseSimpleYamlObject(existing);
|
|
727
|
+
const modelConfig = isPlainRecord(current.model) ? current.model : {};
|
|
728
|
+
const nextModelConfig = {
|
|
729
|
+
...modelConfig,
|
|
730
|
+
default: model,
|
|
731
|
+
provider: "custom",
|
|
732
|
+
base_url: openAiBaseUrl(proxyUrl),
|
|
567
733
|
api_key: PROXY_ACCESS_TOKEN_PLACEHOLDER,
|
|
568
|
-
|
|
734
|
+
api_mode: "chat_completions",
|
|
569
735
|
};
|
|
570
|
-
|
|
736
|
+
const content = replaceTopLevelYamlSection(existing, "model", yamlContent(nextModelConfig));
|
|
737
|
+
return [makeChange("hermes", configPath, "configure Hermes OpenAI proxy settings", content)];
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
function isHermesTokenBuddyConfigured(filePath: string): boolean {
|
|
741
|
+
const current = readYamlObject(filePath);
|
|
742
|
+
const modelConfig = readObjectField(current, "model");
|
|
743
|
+
if (!modelConfig) {
|
|
744
|
+
return false;
|
|
745
|
+
}
|
|
746
|
+
return modelConfig.provider === "custom" &&
|
|
747
|
+
modelConfig.api_key === PROXY_ACCESS_TOKEN_PLACEHOLDER &&
|
|
748
|
+
modelConfig.api_mode === "chat_completions" &&
|
|
749
|
+
typeof modelConfig.base_url === "string" &&
|
|
750
|
+
modelConfig.base_url.includes("127.0.0.1") &&
|
|
751
|
+
modelConfig.base_url.endsWith("/v1") &&
|
|
752
|
+
typeof modelConfig.default === "string" &&
|
|
753
|
+
modelConfig.default.length > 0;
|
|
571
754
|
}
|
|
572
755
|
|
|
573
756
|
const PROVIDERS: ProviderDefinition[] = [
|
|
@@ -601,10 +784,11 @@ const PROVIDERS: ProviderDefinition[] = [
|
|
|
601
784
|
id: "openclaw",
|
|
602
785
|
name: "OpenClaw Agent",
|
|
603
786
|
commandName: "openclaw",
|
|
604
|
-
configPath: (home) => path.join(home, ".openclaw", "
|
|
787
|
+
configPath: (home) => path.join(home, ".openclaw", "openclaw.json"),
|
|
788
|
+
isConfigured: isOpenclawTokenBuddyConfigured,
|
|
605
789
|
observedPaths: (home) => [
|
|
606
|
-
path.join(home, ".openclaw", "openclaw.json"),
|
|
607
790
|
path.join(home, ".openclaw", "configs"),
|
|
791
|
+
path.join(home, ".openclaw", "config.json"),
|
|
608
792
|
],
|
|
609
793
|
changes: openclawConfig,
|
|
610
794
|
modelSelectionKind: "single-model",
|
|
@@ -624,9 +808,10 @@ const PROVIDERS: ProviderDefinition[] = [
|
|
|
624
808
|
id: "hermes",
|
|
625
809
|
name: "Hermes Terminal",
|
|
626
810
|
commandName: "hermes",
|
|
627
|
-
configPath: (home) => path.join(home, ".hermes", "
|
|
811
|
+
configPath: (home) => path.join(home, ".hermes", "config.yaml"),
|
|
812
|
+
isConfigured: isHermesTokenBuddyConfigured,
|
|
628
813
|
observedPaths: (home) => [
|
|
629
|
-
path.join(home, ".hermes", "
|
|
814
|
+
path.join(home, ".hermes", "settings.json"),
|
|
630
815
|
path.join(home, ".hermes", "auth.json"),
|
|
631
816
|
],
|
|
632
817
|
changes: hermesConfig,
|
package/src/seller-catalog.ts
CHANGED
|
@@ -174,6 +174,10 @@ export interface SellerCatalogEntry {
|
|
|
174
174
|
discountRatio?: number;
|
|
175
175
|
/** 服务手续费系数(来自 manifest.selection) */
|
|
176
176
|
serviceFeeRatio?: number;
|
|
177
|
+
/** 最近一次 TTFT(毫秒),来自本地 seller pool 运行时指标 */
|
|
178
|
+
ttftMs?: number;
|
|
179
|
+
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),来自本地 seller pool 运行时指标 */
|
|
180
|
+
avgTokensPerSecond?: number;
|
|
177
181
|
/** 模型数(来自 manifest) */
|
|
178
182
|
modelCount?: number;
|
|
179
183
|
/** seller 支持的协议(manifest > registry fallback) */
|
package/src/seller-pool.ts
CHANGED
|
@@ -65,6 +65,10 @@ export interface PoolEntry {
|
|
|
65
65
|
ttftMs?: number;
|
|
66
66
|
/** 平均推理延迟(毫秒),可选 */
|
|
67
67
|
avgInferenceMs?: number;
|
|
68
|
+
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
69
|
+
avgTokensPerSecond?: number;
|
|
70
|
+
/** 最近一次 runtime speed 指标观测时间;用于避免旧 prewarm 覆盖 live inference 指标 */
|
|
71
|
+
runtimeMetricsObservedAt?: number;
|
|
68
72
|
/** 上游状态,可选 */
|
|
69
73
|
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
70
74
|
/** 上游错误类名,可选 */
|
|
@@ -73,6 +77,15 @@ export interface PoolEntry {
|
|
|
73
77
|
capacityBlockedUntil?: number;
|
|
74
78
|
}
|
|
75
79
|
|
|
80
|
+
export interface SellerRuntimeMetricsUpdate {
|
|
81
|
+
/** TTFT(毫秒),可选 */
|
|
82
|
+
ttftMs?: number;
|
|
83
|
+
/** 平均推理延迟(毫秒),可选 */
|
|
84
|
+
avgInferenceMs?: number;
|
|
85
|
+
/** 输出吞吐(tokens/s),可选 */
|
|
86
|
+
avgTokensPerSecond?: number;
|
|
87
|
+
}
|
|
88
|
+
|
|
76
89
|
/**
|
|
77
90
|
* `SellerPool.pick()` 的入参:标识一次路由请求 + 可选的时间/数量约束。
|
|
78
91
|
*/
|
|
@@ -214,8 +227,10 @@ export class SellerPool {
|
|
|
214
227
|
healthScore: candidate.healthScore,
|
|
215
228
|
avgLatencyMs: candidate.avgLatencyMs,
|
|
216
229
|
healthProbeLatencyMs: candidate.healthProbeLatencyMs,
|
|
217
|
-
ttftMs: candidate.ttftMs,
|
|
218
|
-
avgInferenceMs: candidate.avgInferenceMs,
|
|
230
|
+
ttftMs: preferRuntimeMetric(candidate.ttftMs, candidate.lastSuccessAt, previous?.ttftMs, previous?.runtimeMetricsObservedAt),
|
|
231
|
+
avgInferenceMs: preferRuntimeMetric(candidate.avgInferenceMs, candidate.lastSuccessAt, previous?.avgInferenceMs, previous?.runtimeMetricsObservedAt),
|
|
232
|
+
avgTokensPerSecond: preferRuntimeMetric(candidate.avgTokensPerSecond, candidate.lastSuccessAt, previous?.avgTokensPerSecond, previous?.runtimeMetricsObservedAt),
|
|
233
|
+
runtimeMetricsObservedAt: Math.max(previous?.runtimeMetricsObservedAt ?? 0, candidate.lastSuccessAt || 0) || undefined,
|
|
219
234
|
upstreamStatus: candidate.upstreamStatus,
|
|
220
235
|
upstreamErrorClass: candidate.upstreamErrorClass,
|
|
221
236
|
capacityBlockedUntil: candidate.capacityBlockedUntil ?? previous?.capacityBlockedUntil
|
|
@@ -340,6 +355,38 @@ export class SellerPool {
|
|
|
340
355
|
return next;
|
|
341
356
|
}
|
|
342
357
|
|
|
358
|
+
recordRuntimeMetrics(
|
|
359
|
+
sellerId: string,
|
|
360
|
+
metrics: SellerRuntimeMetricsUpdate,
|
|
361
|
+
now: number = this.now()
|
|
362
|
+
): PoolEntry | undefined {
|
|
363
|
+
const entry = this.entries.get(sellerId);
|
|
364
|
+
if (!entry) {
|
|
365
|
+
return undefined;
|
|
366
|
+
}
|
|
367
|
+
const ttftMs = finiteNonNegative(metrics.ttftMs);
|
|
368
|
+
const avgInferenceMs = finiteNonNegative(metrics.avgInferenceMs);
|
|
369
|
+
const avgTokensPerSecond = finiteNonNegative(metrics.avgTokensPerSecond);
|
|
370
|
+
const next: PoolEntry = {
|
|
371
|
+
...entry,
|
|
372
|
+
lastSuccessAt: now,
|
|
373
|
+
healthScore: Math.min(100, Math.max(entry.healthScore, 60)),
|
|
374
|
+
avgLatencyMs: avgInferenceMs ?? entry.avgLatencyMs,
|
|
375
|
+
ttftMs: ttftMs ?? entry.ttftMs,
|
|
376
|
+
avgInferenceMs: avgInferenceMs ?? entry.avgInferenceMs,
|
|
377
|
+
avgTokensPerSecond: avgTokensPerSecond ?? entry.avgTokensPerSecond,
|
|
378
|
+
runtimeMetricsObservedAt: Math.max(entry.runtimeMetricsObservedAt ?? 0, now)
|
|
379
|
+
};
|
|
380
|
+
this.entries.set(sellerId, next);
|
|
381
|
+
logger.info("pool.runtime_metrics.recorded", "seller pool runtime metrics updated", {
|
|
382
|
+
sellerId,
|
|
383
|
+
ttftMs: next.ttftMs,
|
|
384
|
+
avgInferenceMs: next.avgInferenceMs,
|
|
385
|
+
avgTokensPerSecond: next.avgTokensPerSecond
|
|
386
|
+
});
|
|
387
|
+
return next;
|
|
388
|
+
}
|
|
389
|
+
|
|
343
390
|
/**
|
|
344
391
|
* Record a failure against `sellerId`. Returns the new PoolEntry. The
|
|
345
392
|
* caller (route-failover) uses the returned `entry.circuit` and the
|
|
@@ -475,6 +522,25 @@ export class SellerPool {
|
|
|
475
522
|
}
|
|
476
523
|
}
|
|
477
524
|
|
|
525
|
+
function finiteNonNegative(value: number | undefined): number | undefined {
|
|
526
|
+
return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
function preferRuntimeMetric(
|
|
530
|
+
prewarmValue: number | undefined,
|
|
531
|
+
prewarmObservedAt: number | undefined,
|
|
532
|
+
previousValue: number | undefined,
|
|
533
|
+
previousObservedAt: number | undefined
|
|
534
|
+
): number | undefined {
|
|
535
|
+
if (prewarmValue === undefined) {
|
|
536
|
+
return previousValue;
|
|
537
|
+
}
|
|
538
|
+
if (previousValue !== undefined && (previousObservedAt ?? 0) > (prewarmObservedAt ?? 0)) {
|
|
539
|
+
return previousValue;
|
|
540
|
+
}
|
|
541
|
+
return prewarmValue;
|
|
542
|
+
}
|
|
543
|
+
|
|
478
544
|
function isCapacityBlocked(entry: PoolEntry, now: number): boolean {
|
|
479
545
|
return Number.isFinite(entry.capacityBlockedUntil) && (entry.capacityBlockedUntil as number) > now;
|
|
480
546
|
}
|
|
@@ -28,6 +28,8 @@ export interface SellerRouteMetric {
|
|
|
28
28
|
ttftMs?: number;
|
|
29
29
|
/** 平均推理延迟(毫秒),可选 */
|
|
30
30
|
avgInferenceMs?: number;
|
|
31
|
+
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
32
|
+
avgTokensPerSecond?: number;
|
|
31
33
|
/** 折扣系数(0-1),可选;缺省时 scoring 视为"无折扣信息" */
|
|
32
34
|
discountRatio?: number;
|
|
33
35
|
/** 当前熔断状态,可选;`open` 的 seller 直接被剔除候选 */
|
|
@@ -52,6 +54,12 @@ export interface SellerRoutePrewarmCandidate {
|
|
|
52
54
|
healthScore?: number;
|
|
53
55
|
/** 平均延迟(毫秒),可选 */
|
|
54
56
|
avgLatencyMs?: number;
|
|
57
|
+
/** TTFT(毫秒),可选 */
|
|
58
|
+
ttftMs?: number;
|
|
59
|
+
/** 平均推理延迟(毫秒),可选 */
|
|
60
|
+
avgInferenceMs?: number;
|
|
61
|
+
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
62
|
+
avgTokensPerSecond?: number;
|
|
55
63
|
}
|
|
56
64
|
|
|
57
65
|
/**
|
|
@@ -103,6 +111,7 @@ export interface PlannedSellerRoute {
|
|
|
103
111
|
avgLatencyMs?: number;
|
|
104
112
|
ttftMs?: number;
|
|
105
113
|
avgInferenceMs?: number;
|
|
114
|
+
avgTokensPerSecond?: number;
|
|
106
115
|
discountRatio?: number;
|
|
107
116
|
/** 在 registry 里的声明顺序(0-based,tie-breaker) */
|
|
108
117
|
registryOrder: number;
|
|
@@ -205,6 +214,7 @@ export function planSellerRouteSet(input: SellerRoutePlannerInput): SellerRouteP
|
|
|
205
214
|
avgLatencyMs: candidate.avgLatencyMs,
|
|
206
215
|
ttftMs: candidate.ttftMs,
|
|
207
216
|
avgInferenceMs: candidate.avgInferenceMs,
|
|
217
|
+
avgTokensPerSecond: candidate.avgTokensPerSecond,
|
|
208
218
|
discountRatio: candidate.discountRatio,
|
|
209
219
|
registryOrder: candidate.registryOrder
|
|
210
220
|
}
|
|
@@ -230,6 +240,7 @@ function chooseCandidateSource(
|
|
|
230
240
|
): CandidateSourceResult {
|
|
231
241
|
const prewarm = input.prewarmCandidates ?? [];
|
|
232
242
|
let prewarmDiagnostics: PrewarmSourceDiagnostics = emptyPrewarmDiagnostics();
|
|
243
|
+
const prewarmBySellerId = new Map(prewarm.map((candidate) => [candidate.sellerId, candidate]));
|
|
233
244
|
if (prewarm.length > 0) {
|
|
234
245
|
const missingSellerIds: string[] = [];
|
|
235
246
|
const blockedSellerIds: string[] = [];
|
|
@@ -263,26 +274,28 @@ function chooseCandidateSource(
|
|
|
263
274
|
};
|
|
264
275
|
|
|
265
276
|
if (prewarmCandidates.length > 0) {
|
|
277
|
+
const registryCandidatesBeforeCompatibility = buildRegistryCandidates({
|
|
278
|
+
input,
|
|
279
|
+
indexed,
|
|
280
|
+
metrics,
|
|
281
|
+
prewarmBySellerId
|
|
282
|
+
});
|
|
266
283
|
return {
|
|
267
284
|
source: "prewarm_cache",
|
|
268
|
-
sourceReason: "
|
|
269
|
-
candidates:
|
|
270
|
-
incompatibleSellerIds:
|
|
285
|
+
sourceReason: "prewarm_metrics_merged_with_registry",
|
|
286
|
+
candidates: registryCandidatesBeforeCompatibility.filter(isSelectableCandidate),
|
|
287
|
+
incompatibleSellerIds: incompatibleSellerIds(registryCandidatesBeforeCompatibility),
|
|
271
288
|
prewarmDiagnostics
|
|
272
289
|
};
|
|
273
290
|
}
|
|
274
291
|
}
|
|
275
292
|
|
|
276
|
-
const registryCandidatesBeforeCompatibility =
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
protocol: input.protocol,
|
|
283
|
-
paymentMethod: input.paymentMethod,
|
|
284
|
-
metric: metrics.bySellerId.get(entry.seller.id)
|
|
285
|
-
}));
|
|
293
|
+
const registryCandidatesBeforeCompatibility = buildRegistryCandidates({
|
|
294
|
+
input,
|
|
295
|
+
indexed,
|
|
296
|
+
metrics,
|
|
297
|
+
prewarmBySellerId
|
|
298
|
+
});
|
|
286
299
|
|
|
287
300
|
return {
|
|
288
301
|
source: "registry_fallback",
|
|
@@ -293,6 +306,27 @@ function chooseCandidateSource(
|
|
|
293
306
|
};
|
|
294
307
|
}
|
|
295
308
|
|
|
309
|
+
function buildRegistryCandidates(input: {
|
|
310
|
+
input: SellerRoutePlannerInput;
|
|
311
|
+
indexed: ReturnType<typeof indexRegistrySellers>;
|
|
312
|
+
metrics: MetricIndex;
|
|
313
|
+
prewarmBySellerId: Map<string, SellerRoutePrewarmCandidate>;
|
|
314
|
+
}): RoutingCandidate[] {
|
|
315
|
+
return input.indexed.ordered
|
|
316
|
+
.filter((entry) => !input.metrics.blockedSellerIds.has(entry.seller.id))
|
|
317
|
+
.map((entry) => buildCandidate({
|
|
318
|
+
seller: entry.seller,
|
|
319
|
+
registryOrder: entry.registryOrder,
|
|
320
|
+
modelId: input.input.modelId,
|
|
321
|
+
protocol: input.input.protocol,
|
|
322
|
+
paymentMethod: input.input.paymentMethod,
|
|
323
|
+
metric: mergeOptionalMetric(
|
|
324
|
+
input.metrics.bySellerId.get(entry.seller.id),
|
|
325
|
+
input.prewarmBySellerId.get(entry.seller.id)
|
|
326
|
+
)
|
|
327
|
+
}));
|
|
328
|
+
}
|
|
329
|
+
|
|
296
330
|
function buildCandidate(input: {
|
|
297
331
|
seller: RegistrySeller;
|
|
298
332
|
registryOrder: number;
|
|
@@ -311,6 +345,7 @@ function buildCandidate(input: {
|
|
|
311
345
|
avgLatencyMs: input.metric?.avgLatencyMs,
|
|
312
346
|
ttftMs: input.metric?.ttftMs,
|
|
313
347
|
avgInferenceMs: input.metric?.avgInferenceMs,
|
|
348
|
+
avgTokensPerSecond: input.metric?.avgTokensPerSecond,
|
|
314
349
|
discountRatio: input.metric?.discountRatio,
|
|
315
350
|
registryOrder: input.registryOrder
|
|
316
351
|
};
|
|
@@ -417,8 +452,9 @@ function mergeMetric(
|
|
|
417
452
|
sellerId: prewarm.sellerId,
|
|
418
453
|
healthScore: prewarm.healthScore ?? metric?.healthScore,
|
|
419
454
|
avgLatencyMs: prewarm.avgLatencyMs ?? metric?.avgLatencyMs,
|
|
420
|
-
ttftMs: metric?.ttftMs,
|
|
421
|
-
avgInferenceMs: metric?.avgInferenceMs,
|
|
455
|
+
ttftMs: metric?.ttftMs ?? prewarm.ttftMs,
|
|
456
|
+
avgInferenceMs: metric?.avgInferenceMs ?? prewarm.avgInferenceMs,
|
|
457
|
+
avgTokensPerSecond: metric?.avgTokensPerSecond ?? prewarm.avgTokensPerSecond,
|
|
422
458
|
discountRatio: metric?.discountRatio,
|
|
423
459
|
circuit: metric?.circuit,
|
|
424
460
|
capacityBlockedUntil: metric?.capacityBlockedUntil,
|
|
@@ -427,6 +463,16 @@ function mergeMetric(
|
|
|
427
463
|
};
|
|
428
464
|
}
|
|
429
465
|
|
|
466
|
+
function mergeOptionalMetric(
|
|
467
|
+
metric: SellerRouteMetric | undefined,
|
|
468
|
+
prewarm: SellerRoutePrewarmCandidate | undefined
|
|
469
|
+
): SellerRouteMetric | undefined {
|
|
470
|
+
if (!prewarm) {
|
|
471
|
+
return metric;
|
|
472
|
+
}
|
|
473
|
+
return mergeMetric(metric, prewarm);
|
|
474
|
+
}
|
|
475
|
+
|
|
430
476
|
function isCapacityBlocked(metric: SellerRouteMetric, now: number): boolean {
|
|
431
477
|
return Number.isFinite(metric.capacityBlockedUntil) && (metric.capacityBlockedUntil as number) > now;
|
|
432
478
|
}
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
export type SellerRoutingMode = "fixed" | "fixedSet" | "fullAuto";
|
|
8
8
|
/**
|
|
9
9
|
* 评分器:决定如何把候选的健康/延迟/折扣分折算成总分。
|
|
10
|
-
* - `speed`:TTFT /
|
|
10
|
+
* - `speed`:TTFT / 输出吞吐优先
|
|
11
11
|
* - `discount`:折扣系数优先
|
|
12
12
|
* - `balanced`:三方面加权均衡
|
|
13
13
|
*/
|
|
@@ -55,6 +55,8 @@ export interface RoutingCandidate {
|
|
|
55
55
|
ttftMs?: number;
|
|
56
56
|
/** 平均推理延迟(毫秒),可选 */
|
|
57
57
|
avgInferenceMs?: number;
|
|
58
|
+
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
59
|
+
avgTokensPerSecond?: number;
|
|
58
60
|
/** 折扣系数 0-1,可选;缺省视为"无折扣信息" */
|
|
59
61
|
discountRatio?: number;
|
|
60
62
|
/** 上游状态,可选 */
|
|
@@ -92,12 +94,12 @@ export interface CandidateScoreBreakdown {
|
|
|
92
94
|
healthComponent?: number;
|
|
93
95
|
/** TTFT 分量(仅 `speed` / `balanced` 有意义) */
|
|
94
96
|
ttftComponent?: number;
|
|
95
|
-
/**
|
|
96
|
-
|
|
97
|
+
/** 输出吞吐分量(仅 `speed` / `balanced` 有意义) */
|
|
98
|
+
avgTokensPerSecondComponent?: number;
|
|
97
99
|
/** 折扣分量(仅 `discount` / `balanced` 有意义) */
|
|
98
100
|
discountComponent?: number;
|
|
99
101
|
/** 打分时缺失的输入项;缺越多则越说明"无依据" */
|
|
100
|
-
missingInputs: Array<"healthScore" | "ttftMs" | "
|
|
102
|
+
missingInputs: Array<"healthScore" | "ttftMs" | "avgTokensPerSecond" | "discountRatio">;
|
|
101
103
|
}
|
|
102
104
|
|
|
103
105
|
type SortableCandidate = RoutingCandidate & { score: number };
|
|
@@ -201,7 +203,7 @@ function compareCandidates(a: SortableCandidate, b: SortableCandidate, scorer: S
|
|
|
201
203
|
|
|
202
204
|
if (scorer === "speed") {
|
|
203
205
|
return compareFiniteAsc(effectiveTtftMs(a), effectiveTtftMs(b))
|
|
204
|
-
||
|
|
206
|
+
|| compareFiniteDesc(a.avgTokensPerSecond, b.avgTokensPerSecond)
|
|
205
207
|
|| compareFiniteDesc(a.healthScore, b.healthScore)
|
|
206
208
|
|| compareRegistryOrder(a, b);
|
|
207
209
|
}
|
|
@@ -227,14 +229,14 @@ export function scoreCandidateBreakdown(candidate: RoutingCandidate, scorer: Sel
|
|
|
227
229
|
const missingInputs = missingScoreInputs(candidate);
|
|
228
230
|
if (scorer === "speed") {
|
|
229
231
|
const ttftComponent = latencyScore(effectiveTtftMs(candidate)) * 0.65;
|
|
230
|
-
const
|
|
232
|
+
const avgTokensPerSecondComponent = tokensPerSecondScore(candidate.avgTokensPerSecond) * 0.25;
|
|
231
233
|
const healthComponent = finiteOr(candidate.healthScore, 0) * 0.1;
|
|
232
234
|
return {
|
|
233
235
|
scorer,
|
|
234
|
-
totalScore: ttftComponent +
|
|
236
|
+
totalScore: ttftComponent + avgTokensPerSecondComponent + healthComponent,
|
|
235
237
|
healthComponent,
|
|
236
238
|
ttftComponent,
|
|
237
|
-
|
|
239
|
+
avgTokensPerSecondComponent,
|
|
238
240
|
missingInputs
|
|
239
241
|
};
|
|
240
242
|
}
|
|
@@ -251,14 +253,14 @@ export function scoreCandidateBreakdown(candidate: RoutingCandidate, scorer: Sel
|
|
|
251
253
|
|
|
252
254
|
const healthComponent = finiteOr(candidate.healthScore, 0) * 0.35;
|
|
253
255
|
const ttftComponent = latencyScore(effectiveTtftMs(candidate)) * 0.2;
|
|
254
|
-
const
|
|
256
|
+
const avgTokensPerSecondComponent = tokensPerSecondScore(candidate.avgTokensPerSecond) * 0.2;
|
|
255
257
|
const discountComponent = discountScore(candidate.discountRatio) * 0.25;
|
|
256
258
|
return {
|
|
257
259
|
scorer,
|
|
258
|
-
totalScore: healthComponent + ttftComponent +
|
|
260
|
+
totalScore: healthComponent + ttftComponent + avgTokensPerSecondComponent + discountComponent,
|
|
259
261
|
healthComponent,
|
|
260
262
|
ttftComponent,
|
|
261
|
-
|
|
263
|
+
avgTokensPerSecondComponent,
|
|
262
264
|
discountComponent,
|
|
263
265
|
missingInputs
|
|
264
266
|
};
|
|
@@ -271,6 +273,13 @@ function latencyScore(latencyMs: number | undefined): number {
|
|
|
271
273
|
return Math.max(0, 100 - Math.max(0, latencyMs as number) / 10);
|
|
272
274
|
}
|
|
273
275
|
|
|
276
|
+
function tokensPerSecondScore(value: number | undefined): number {
|
|
277
|
+
if (!Number.isFinite(value)) {
|
|
278
|
+
return 0;
|
|
279
|
+
}
|
|
280
|
+
return Math.max(0, Math.min(100, value as number));
|
|
281
|
+
}
|
|
282
|
+
|
|
274
283
|
function discountScore(discountRatio: number | undefined): number {
|
|
275
284
|
if (!Number.isFinite(discountRatio)) {
|
|
276
285
|
return 0;
|
|
@@ -294,10 +303,6 @@ function effectiveTtftMs(candidate: RoutingCandidate): number | undefined {
|
|
|
294
303
|
return candidate.ttftMs ?? candidate.healthProbeLatencyMs ?? candidate.avgLatencyMs;
|
|
295
304
|
}
|
|
296
305
|
|
|
297
|
-
function effectiveAvgInferenceMs(candidate: RoutingCandidate): number | undefined {
|
|
298
|
-
return candidate.avgInferenceMs ?? candidate.avgLatencyMs ?? candidate.healthProbeLatencyMs;
|
|
299
|
-
}
|
|
300
|
-
|
|
301
306
|
function compareRegistryOrder(a: RoutingCandidate, b: RoutingCandidate): number {
|
|
302
307
|
return a.registryOrder - b.registryOrder;
|
|
303
308
|
}
|
|
@@ -310,7 +315,7 @@ function missingScoreInputs(candidate: RoutingCandidate): CandidateScoreBreakdow
|
|
|
310
315
|
const missing: CandidateScoreBreakdown["missingInputs"] = [];
|
|
311
316
|
if (!Number.isFinite(candidate.healthScore)) missing.push("healthScore");
|
|
312
317
|
if (!Number.isFinite(candidate.ttftMs)) missing.push("ttftMs");
|
|
313
|
-
if (!Number.isFinite(candidate.
|
|
318
|
+
if (!Number.isFinite(candidate.avgTokensPerSecond)) missing.push("avgTokensPerSecond");
|
|
314
319
|
if (!Number.isFinite(candidate.discountRatio)) missing.push("discountRatio");
|
|
315
320
|
return missing;
|
|
316
321
|
}
|