omp-cache-optimizer 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +197 -299
- package/package.json +1 -1
package/index.ts
CHANGED
|
@@ -1068,18 +1068,18 @@ function isRuntimeOptimizerEnabled(): boolean {
|
|
|
1068
1068
|
}
|
|
1069
1069
|
|
|
1070
1070
|
function getOptimizerRuntimeModeLines(): string[] {
|
|
1071
|
-
const state = runtimeOptimizerEnabled ? "
|
|
1071
|
+
const state = runtimeOptimizerEnabled ? "已启用" : "已关闭";
|
|
1072
1072
|
const lines: string[] = [];
|
|
1073
|
-
lines.push(
|
|
1074
|
-
lines.push(`• Prompt
|
|
1075
|
-
lines.push(`• OpenAI prompt_cache_key
|
|
1076
|
-
lines.push(`• Footer
|
|
1077
|
-
lines.push(`• Compat
|
|
1078
|
-
lines.push(`• ${PI_CACHE_RETENTION_ENV}
|
|
1073
|
+
lines.push(`运行状态:${state}`);
|
|
1074
|
+
lines.push(`• Prompt 重写:${runtimeOptimizerEnabled && !isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) ? "开启" : "关闭"}`);
|
|
1075
|
+
lines.push(`• OpenAI prompt_cache_key 回退:${shouldInjectOpenAIPromptCacheKey() ? "开启" : "关闭"}`);
|
|
1076
|
+
lines.push(`• Footer 缓存统计:开启${runtimeOptimizerEnabled ? "" : "(对比模式)"}`);
|
|
1077
|
+
lines.push(`• Compat 提示:${runtimeOptimizerEnabled ? "开启" : "关闭"}`);
|
|
1078
|
+
lines.push(`• ${PI_CACHE_RETENTION_ENV}:${process.env[PI_CACHE_RETENTION_ENV] ?? "(未设置)"}`);
|
|
1079
1079
|
if (!runtimeOptimizerEnabled) {
|
|
1080
|
-
lines.push("
|
|
1080
|
+
lines.push("这是当前进程内开关。运行 /reload 或重启 OMP 可恢复到启动时行为。");
|
|
1081
1081
|
} else if (isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) || !shouldInjectOpenAIPromptCacheKey()) {
|
|
1082
|
-
lines.push("
|
|
1082
|
+
lines.push("仍有部分能力被环境变量关闭。");
|
|
1083
1083
|
}
|
|
1084
1084
|
return lines;
|
|
1085
1085
|
}
|
|
@@ -1214,9 +1214,9 @@ function buildAdaptiveThinkingCompatSuggestion(_missing: string[]): Record<strin
|
|
|
1214
1214
|
}
|
|
1215
1215
|
|
|
1216
1216
|
function appendAdaptiveThinkingCompatAdviceLines(lines: string[], _missing: string[], placement: CompatAdvicePlacement = {}): void {
|
|
1217
|
-
lines.push("-
|
|
1218
|
-
lines.push("
|
|
1219
|
-
lines.push("
|
|
1217
|
+
lines.push("- 自适应思考:OMP 内置模型目录会为官方 Claude 模型自动设置。");
|
|
1218
|
+
lines.push(" 自定义 Anthropic 渠道应依赖内置 catalog 元数据;");
|
|
1219
|
+
lines.push(" 如果上游拒绝 adaptive thinking,请确认模型 id 是否匹配官方发布版本。");
|
|
1220
1220
|
appendCredentialSafeProviderGuidance(lines, placement, {});
|
|
1221
1221
|
}
|
|
1222
1222
|
|
|
@@ -1226,10 +1226,10 @@ function buildAdaptiveThinkingCompatWarningText(key: string, _missing: string[])
|
|
|
1226
1226
|
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
1227
1227
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1228
1228
|
const lines: string[] = [
|
|
1229
|
-
`ℹ️ omp-cache-optimizer
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1229
|
+
`ℹ️ omp-cache-optimizer:${key} 是支持自适应生成的 Claude 模型。`,
|
|
1230
|
+
"OMP 内置 catalog 会自动处理自适应思考;官方模型不需要额外的 models.yml compat 键。",
|
|
1231
|
+
"如果是转发 Anthropic 的自定义渠道,可能仍需要显式 catalog 元数据。",
|
|
1232
|
+
`可参考 ${modelsJsonPath} -> providers["${providerLabel}"] -> models -> "${modelId ?? '<id>'}"。`,
|
|
1233
1233
|
"",
|
|
1234
1234
|
];
|
|
1235
1235
|
appendAdaptiveThinkingCompatAdviceLines(lines, [], { providerLabel, modelId });
|
|
@@ -2103,7 +2103,7 @@ function buildSafeOpenAIProxyCompatSuggestion(_missing: string[]): Record<string
|
|
|
2103
2103
|
}
|
|
2104
2104
|
|
|
2105
2105
|
function getPromptCacheRetentionUnsupportedHint(): string {
|
|
2106
|
-
return "
|
|
2106
|
+
return "如果这个渠道返回 `400 Unsupported parameter: prompt_cache_retention`,请移除或避免 `supportsLongPromptCacheRetention`;扩展本身不会直接写这个字段,但当 compat 声明支持长缓存保留时,OMP 可能会发送它。";
|
|
2107
2107
|
}
|
|
2108
2108
|
|
|
2109
2109
|
function hasPromptCacheRetentionUnsupportedSignal(headers: Record<string, string> | undefined): boolean {
|
|
@@ -2157,20 +2157,20 @@ function appendCredentialSafeProviderGuidance(lines: string[], placement: Compat
|
|
|
2157
2157
|
if (!providerLabel) return;
|
|
2158
2158
|
|
|
2159
2159
|
lines.push("");
|
|
2160
|
-
lines.push("
|
|
2161
|
-
lines.push("-
|
|
2162
|
-
lines.push(`-
|
|
2160
|
+
lines.push("如果这个渠道在 models.yml 里还没有 provider 配置:");
|
|
2161
|
+
lines.push("- 保留现有认证方式;不要复制 credential、token 或 API key。");
|
|
2162
|
+
lines.push(`- 只在 ${getModelsJsonDisplayPath()} 里添加缓存/路由 compat 覆盖。`);
|
|
2163
2163
|
|
|
2164
2164
|
if (Object.keys(compatSuggestion).length === 0) {
|
|
2165
|
-
lines.push("-
|
|
2165
|
+
lines.push("- 上面这些缺失项目前没有安全可复制的 override。");
|
|
2166
2166
|
return;
|
|
2167
2167
|
}
|
|
2168
2168
|
|
|
2169
|
-
lines.push("Provider
|
|
2169
|
+
lines.push("Provider 级最小覆盖:");
|
|
2170
2170
|
lines.push(JSON.stringify(buildProviderCompatOverride(providerLabel, compatSuggestion), null, 2));
|
|
2171
2171
|
|
|
2172
2172
|
if (placement.modelId) {
|
|
2173
|
-
lines.push("
|
|
2173
|
+
lines.push("单模型 override(只想影响当前模型时使用):");
|
|
2174
2174
|
lines.push(JSON.stringify(buildModelCompatOverride(providerLabel, placement.modelId, compatSuggestion), null, 2));
|
|
2175
2175
|
}
|
|
2176
2176
|
}
|
|
@@ -2181,21 +2181,19 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
|
|
|
2181
2181
|
|
|
2182
2182
|
if (hasSafeSuggestion) {
|
|
2183
2183
|
if (options.includeJsonIntro !== false) {
|
|
2184
|
-
lines.push("
|
|
2184
|
+
lines.push("安全默认建议:");
|
|
2185
2185
|
}
|
|
2186
2186
|
lines.push(JSON.stringify(suggestion, null, 2));
|
|
2187
2187
|
}
|
|
2188
2188
|
|
|
2189
|
-
// OMP divergence: session affinity is handled by multi-credential auth, not compat.
|
|
2190
|
-
// No per-flag advice lines remain; only the optional long-retention guidance below.
|
|
2191
2189
|
appendCredentialSafeProviderGuidance(lines, options, suggestion);
|
|
2192
2190
|
}
|
|
2193
2191
|
|
|
2194
2192
|
function appendOptionalOpenAIProxyCompatAdviceLines(lines: string[], optional: string[]): void {
|
|
2195
2193
|
if (!optional.includes("supportsLongPromptCacheRetention")) return;
|
|
2196
2194
|
lines.push("");
|
|
2197
|
-
lines.push("
|
|
2198
|
-
lines.push("- supportsLongPromptCacheRetention
|
|
2195
|
+
lines.push("可选项(非必需,不会自动修复):");
|
|
2196
|
+
lines.push("- supportsLongPromptCacheRetention:仅当 endpoint / proxy 明确支持 OpenAI long prompt cache retention 时再开启。");
|
|
2199
2197
|
lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
|
|
2200
2198
|
}
|
|
2201
2199
|
|
|
@@ -2212,17 +2210,15 @@ function appendOptionalOpenAIProxyCompatAdviceLines(lines: string[], optional: s
|
|
|
2212
2210
|
* exercise it via __internals_for_tests.
|
|
2213
2211
|
*/
|
|
2214
2212
|
function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): string {
|
|
2215
|
-
// Extract provider id from the model key (e.g. "otokapi/gpt-5.5" -> "otokapi").
|
|
2216
|
-
// If no slash is found, fall back to the key itself.
|
|
2217
2213
|
const slashIdx = key.indexOf("/");
|
|
2218
2214
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
2219
2215
|
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
2220
2216
|
|
|
2221
2217
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
2222
2218
|
const lines: string[] = [
|
|
2223
|
-
`💡 omp-cache-optimizer
|
|
2224
|
-
|
|
2225
|
-
|
|
2219
|
+
`💡 omp-cache-optimizer:${key} 是第三方 GPT/OpenAI 兼容代理,但合并后的 compat 缺少 ${missing.join(" 和 ")}。`,
|
|
2220
|
+
`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`,
|
|
2221
|
+
"",
|
|
2226
2222
|
];
|
|
2227
2223
|
|
|
2228
2224
|
appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId });
|
|
@@ -2281,20 +2277,16 @@ function buildDeepSeekCompatSuggestion(missing: string[]): Record<string, unknow
|
|
|
2281
2277
|
function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[], placement: CompatAdvicePlacement = {}): void {
|
|
2282
2278
|
const suggestion = buildDeepSeekCompatSuggestion(missing);
|
|
2283
2279
|
if (Object.keys(suggestion).length > 0) {
|
|
2284
|
-
lines.push("
|
|
2280
|
+
lines.push("推荐的 DeepSeek compat 片段:");
|
|
2285
2281
|
lines.push(JSON.stringify(suggestion, null, 2));
|
|
2286
2282
|
}
|
|
2287
2283
|
|
|
2288
2284
|
if (missing.includes("requiresReasoningContentForToolCalls")) {
|
|
2289
|
-
lines.push("- requiresReasoningContentForToolCalls
|
|
2285
|
+
lines.push("- requiresReasoningContentForToolCalls:保持带工具调用的 assistant 重放与 DeepSeek 的 reasoning_content 要求兼容。");
|
|
2290
2286
|
}
|
|
2291
2287
|
if (missing.includes("supportsLongPromptCacheRetention")) {
|
|
2292
|
-
lines.push("- supportsLongPromptCacheRetention
|
|
2288
|
+
lines.push("- supportsLongPromptCacheRetention:仅当 DeepSeek 兼容 endpoint 支持长缓存保留时再开启。");
|
|
2293
2289
|
}
|
|
2294
|
-
// OMP divergence: thinkingFormat is no longer flagged. DeepSeek reasoning format
|
|
2295
|
-
// is auto-detected by OMP's openai-completions transport; the "deepseek" value
|
|
2296
|
-
// is not a valid OMP thinkingFormat (OMP uses openai|openrouter|zai|qwen|...).
|
|
2297
|
-
// Session affinity is handled by OMP multi-credential auth, not compat keys.
|
|
2298
2290
|
|
|
2299
2291
|
appendCredentialSafeProviderGuidance(lines, placement, suggestion);
|
|
2300
2292
|
}
|
|
@@ -2305,8 +2297,8 @@ function buildDeepSeekCompatWarningText(key: string, missing: string[]): string
|
|
|
2305
2297
|
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
2306
2298
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
2307
2299
|
const lines: string[] = [
|
|
2308
|
-
`💡 omp-cache-optimizer
|
|
2309
|
-
|
|
2300
|
+
`💡 omp-cache-optimizer:${key} 看起来是 DeepSeek 风格模型,但合并后的 compat 缺少 ${missing.join(" 和 ")}。`,
|
|
2301
|
+
`这可能让代理降低或隐藏缓存命中。编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`,
|
|
2310
2302
|
"",
|
|
2311
2303
|
];
|
|
2312
2304
|
|
|
@@ -2354,8 +2346,8 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
2354
2346
|
if (getCompat(model).cacheControlFormat === "anthropic") return undefined;
|
|
2355
2347
|
|
|
2356
2348
|
return (
|
|
2357
|
-
`💡
|
|
2358
|
-
"
|
|
2349
|
+
`💡 omp-cache-optimizer:${modelKey(model)} 看起来是 Claude/Anthropic 风格模型,但 OpenAI 兼容 compat 缺少 cacheControlFormat: "anthropic"。` +
|
|
2350
|
+
"只有当 endpoint 支持并启用了这个 compat 字段时,OMP 才能放置 Anthropic 的 cache_control 断点。"
|
|
2359
2351
|
);
|
|
2360
2352
|
},
|
|
2361
2353
|
},
|
|
@@ -3352,30 +3344,26 @@ function formatTokenCount(value: number): string {
|
|
|
3352
3344
|
return `${millions.toFixed(2)}M`;
|
|
3353
3345
|
}
|
|
3354
3346
|
|
|
3347
|
+
function localizeAdapterLabel(label: string): string {
|
|
3348
|
+
return label.endsWith(" cache") ? `${label.slice(0, -6)} 缓存` : label;
|
|
3349
|
+
}
|
|
3350
|
+
|
|
3355
3351
|
function formatCacheStats(adapter: CacheProviderAdapter, stats: CacheStats): string {
|
|
3356
3352
|
const percent = stats.totalInputTokens > 0
|
|
3357
3353
|
? ` (${Math.round((stats.cachedInputTokens / stats.totalInputTokens) * 100)}%)`
|
|
3358
3354
|
: "";
|
|
3359
3355
|
const writeText = adapter.showCacheWrite && stats.cacheWriteInputTokens > 0
|
|
3360
|
-
? ` ·
|
|
3356
|
+
? ` · 写入 ${formatTokenCount(stats.cacheWriteInputTokens)} tok`
|
|
3361
3357
|
: "";
|
|
3362
3358
|
|
|
3363
|
-
return `${adapter.label} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
|
|
3359
|
+
return `${localizeAdapterLabel(adapter.label)} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
|
|
3364
3360
|
}
|
|
3365
3361
|
|
|
3366
|
-
/**
|
|
3367
|
-
* Compute a hit-ratio percentage string for a value between 0 and 1.
|
|
3368
|
-
* Returns e.g. "75%", "0%", "100%", or "N/A" for zero total.
|
|
3369
|
-
*/
|
|
3370
3362
|
function formatHitRatio(hits: number, total: number): string {
|
|
3371
|
-
if (total <= 0) return "
|
|
3363
|
+
if (total <= 0) return "无数据";
|
|
3372
3364
|
return `${Math.round((hits / total) * 100)}%`;
|
|
3373
3365
|
}
|
|
3374
3366
|
|
|
3375
|
-
/**
|
|
3376
|
-
* Format a token-to-M abbreviation for stats output.
|
|
3377
|
-
* Example: 1500000 → "1.50M"
|
|
3378
|
-
*/
|
|
3379
3367
|
function formatTokenM(value: number): string {
|
|
3380
3368
|
const millions = Math.max(0, Math.round(value)) / 1_000_000;
|
|
3381
3369
|
if (millions === 0) return "0";
|
|
@@ -3384,27 +3372,18 @@ function formatTokenM(value: number): string {
|
|
|
3384
3372
|
return millions.toFixed(2);
|
|
3385
3373
|
}
|
|
3386
3374
|
|
|
3387
|
-
/**
|
|
3388
|
-
* Check if an assistant message's usage fields appear to be missing or empty.
|
|
3389
|
-
* Returns true when normalized fields (input, cacheRead, cacheWrite) are all
|
|
3390
|
-
* absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
|
|
3391
|
-
* for the given adapter.
|
|
3392
|
-
*/
|
|
3393
3375
|
function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter): boolean {
|
|
3394
3376
|
const usage = usageRecordFromAssistant(message);
|
|
3395
3377
|
if (!usage) return true;
|
|
3396
3378
|
|
|
3397
|
-
// Check normalized fields
|
|
3398
3379
|
const input = getNonNegativeNumber(usage, "input");
|
|
3399
3380
|
const cacheRead = getNonNegativeNumber(usage, "cacheRead");
|
|
3400
3381
|
const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
|
|
3401
3382
|
|
|
3402
|
-
// If normalized fields exist with non-zero values, usage is present
|
|
3403
3383
|
if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
|
|
3404
3384
|
return false;
|
|
3405
3385
|
}
|
|
3406
3386
|
|
|
3407
|
-
// Check raw usage for the adapter's provider family
|
|
3408
3387
|
const rawUsage = adapter.normalizeUsage(message);
|
|
3409
3388
|
if (!rawUsage || (rawUsage.cacheRead === 0 && rawUsage.cacheWrite === 0 && rawUsage.totalInput === 0)) {
|
|
3410
3389
|
return true;
|
|
@@ -3413,64 +3392,55 @@ function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter):
|
|
|
3413
3392
|
return false;
|
|
3414
3393
|
}
|
|
3415
3394
|
|
|
3416
|
-
/**
|
|
3417
|
-
* Build a summary string for the recent trend (last N samples).
|
|
3418
|
-
* Example: "Recent 10: 7/10 hits · 65% tok cached · no missing usage"
|
|
3419
|
-
*/
|
|
3420
3395
|
function formatRecentTrendSummary(samples: CacheUsageSample[], maxCount: number): string {
|
|
3421
3396
|
const recent = samples.slice(-maxCount);
|
|
3422
|
-
if (recent.length === 0) return
|
|
3397
|
+
if (recent.length === 0) return `最近 ${maxCount} 次:暂无样本`;
|
|
3423
3398
|
|
|
3424
3399
|
const hits = recent.filter((s) => s.hit).length;
|
|
3425
3400
|
const totalCached = recent.reduce((sum, s) => sum + s.cachedInputTokens, 0);
|
|
3426
3401
|
const totalInput = recent.reduce((sum, s) => sum + s.totalInputTokens, 0);
|
|
3427
3402
|
const missingCount = recent.filter((s) => s.missingUsageFields).length;
|
|
3428
3403
|
|
|
3429
|
-
const
|
|
3430
|
-
const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "N/A";
|
|
3404
|
+
const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "无数据";
|
|
3431
3405
|
|
|
3432
|
-
let result =
|
|
3406
|
+
let result = `最近 ${recent.length}/${maxCount} 次:${hits}/${recent.length} 次命中 · ${tokenRatio} tok 已缓存`;
|
|
3433
3407
|
if (missingCount > 0) {
|
|
3434
|
-
result += ` · ${missingCount}
|
|
3408
|
+
result += ` · ${missingCount} 条 usage 缺失`;
|
|
3435
3409
|
}
|
|
3436
3410
|
return result;
|
|
3437
3411
|
}
|
|
3438
3412
|
|
|
3439
|
-
/**
|
|
3440
|
-
* Build the output for `/cache-optimizer stats`.
|
|
3441
|
-
*/
|
|
3442
3413
|
function buildStatsOutput(model: PiModel | undefined, adapter: CacheProviderAdapter | undefined, stats: CacheStats | undefined, recentSamples: CacheUsageSample[]): string {
|
|
3443
3414
|
const lines: string[] = [];
|
|
3444
3415
|
|
|
3445
3416
|
if (!model || !adapter) {
|
|
3446
|
-
lines.push("ℹ️
|
|
3417
|
+
lines.push("ℹ️ 当前活动模型未匹配到缓存适配器。请选择可识别模型家族后再查看统计。");
|
|
3447
3418
|
return lines.join("\n");
|
|
3448
3419
|
}
|
|
3449
3420
|
|
|
3450
3421
|
const key = modelKey(model);
|
|
3451
3422
|
const currentStats = stats ?? emptyCacheStats();
|
|
3452
3423
|
|
|
3453
|
-
lines.push(
|
|
3454
|
-
lines.push(
|
|
3424
|
+
lines.push(`模型键:${key}`);
|
|
3425
|
+
lines.push(`适配器:${localizeAdapterLabel(adapter.label)}`);
|
|
3455
3426
|
lines.push("");
|
|
3456
|
-
lines.push("──
|
|
3457
|
-
lines.push(
|
|
3458
|
-
lines.push(
|
|
3427
|
+
lines.push("── 今日 ──");
|
|
3428
|
+
lines.push(`请求数:${currentStats.hitRequests} 次命中 / ${currentStats.totalRequests} 次总计 · ${formatHitRatio(currentStats.hitRequests, currentStats.totalRequests)}`);
|
|
3429
|
+
lines.push(`缓存 tokens:${formatTokenM(currentStats.cachedInputTokens)}M / ${formatTokenM(currentStats.totalInputTokens)}M 输入 · ${currentStats.totalInputTokens > 0 ? `${Math.round((currentStats.cachedInputTokens / currentStats.totalInputTokens) * 100)}%` : "无数据"}`);
|
|
3459
3430
|
if (currentStats.cacheWriteInputTokens > 0) {
|
|
3460
|
-
lines.push(
|
|
3431
|
+
lines.push(`缓存写入:${formatTokenM(currentStats.cacheWriteInputTokens)}M tok`);
|
|
3461
3432
|
}
|
|
3462
3433
|
|
|
3463
3434
|
lines.push("");
|
|
3464
|
-
lines.push("──
|
|
3435
|
+
lines.push("── 近期趋势 ──");
|
|
3465
3436
|
lines.push(formatRecentTrendSummary(recentSamples, 10));
|
|
3466
3437
|
lines.push(formatRecentTrendSummary(recentSamples, 30));
|
|
3467
3438
|
|
|
3468
|
-
// Check if any sample has missingUsageFields flagged
|
|
3469
3439
|
const missingAny = recentSamples.some((s) => s.missingUsageFields);
|
|
3470
3440
|
if (missingAny) {
|
|
3471
3441
|
lines.push("");
|
|
3472
|
-
lines.push("⚠️
|
|
3473
|
-
lines.push("
|
|
3442
|
+
lines.push("⚠️ 近期有响应缺少或返回了空的缓存 usage 字段,footer 命中率可能偏低。");
|
|
3443
|
+
lines.push(" 代理可能没有返回 prompt_cache_hit_tokens,或没有返回 usage.input/cacheRead 等字段。");
|
|
3474
3444
|
}
|
|
3475
3445
|
|
|
3476
3446
|
return lines.join("\n");
|
|
@@ -3911,19 +3881,14 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3911
3881
|
const baseUrl = lower(model.baseUrl || "");
|
|
3912
3882
|
const provider = lower(model.provider);
|
|
3913
3883
|
|
|
3914
|
-
// Router/channel diagnostics only apply to OpenAI-compatible proxy APIs.
|
|
3915
|
-
// Native APIs like mistral-conversations, azure-openai-responses,
|
|
3916
|
-
// anthropic-messages, or bedrock-converse-stream are intentionally excluded.
|
|
3917
3884
|
if (api === "azure-openai-responses" || isMistralConversationsApi(api) || !isOpenAICompatibleApi(api)) {
|
|
3918
3885
|
return notes;
|
|
3919
3886
|
}
|
|
3920
3887
|
|
|
3921
|
-
// Official OpenAI bypass — no notes needed.
|
|
3922
3888
|
if (isOfficialOpenAIBaseUrl(model)) {
|
|
3923
3889
|
return notes;
|
|
3924
3890
|
}
|
|
3925
3891
|
|
|
3926
|
-
// ── 1. OpenRouter ────────────────────────────────────────────────
|
|
3927
3892
|
if (
|
|
3928
3893
|
baseUrl.includes("openrouter.ai") ||
|
|
3929
3894
|
baseUrl.includes("openrouter") ||
|
|
@@ -3935,32 +3900,28 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3935
3900
|
const hasOrder = !!routing?.order;
|
|
3936
3901
|
|
|
3937
3902
|
notes.push(
|
|
3938
|
-
"🔀
|
|
3939
|
-
"low cache hit rates are common when each turn lands on a different upstream provider.",
|
|
3903
|
+
"🔀 路由/渠道:检测到 OpenRouter。OpenRouter 是多上游路由器;如果每一轮落到不同上游,缓存命中率偏低很常见。",
|
|
3940
3904
|
);
|
|
3941
3905
|
|
|
3942
3906
|
if (!hasOnly && !hasOrder) {
|
|
3943
3907
|
notes.push(
|
|
3944
|
-
|
|
3945
|
-
"Example for models.yml -> providers[\"<providerId>\"] -> compat:",
|
|
3908
|
+
' 建议:添加 openRouterRouting,把上游固定住。位置:models.yml -> providers["<providerId>"] -> compat:',
|
|
3946
3909
|
);
|
|
3947
3910
|
notes.push(
|
|
3948
3911
|
` { "supportsLongPromptCacheRetention": true, ` +
|
|
3949
3912
|
`"openRouterRouting": { "only": ["<provider-slug>"] } }`,
|
|
3950
3913
|
);
|
|
3951
3914
|
notes.push(
|
|
3952
|
-
'
|
|
3915
|
+
' 把 <provider-slug> 替换成真实的 OpenRouter provider slug(如 "openai"、"anthropic")。',
|
|
3953
3916
|
);
|
|
3954
3917
|
notes.push(
|
|
3955
|
-
|
|
3956
|
-
"Only set supportsLongPromptCacheRetention if your upstream supports long cache retention.",
|
|
3918
|
+
' 也可以用 openRouterRouting.order: ["<provider-slug>", "..."] 作为回退顺序。只有在上游支持长缓存保留时才设置 supportsLongPromptCacheRetention。',
|
|
3957
3919
|
);
|
|
3958
3920
|
}
|
|
3959
3921
|
|
|
3960
3922
|
return notes;
|
|
3961
3923
|
}
|
|
3962
3924
|
|
|
3963
|
-
// ── 2. Vercel AI Gateway ─────────────────────────────────────────
|
|
3964
3925
|
if (
|
|
3965
3926
|
baseUrl.includes("ai-gateway.vercel.sh") ||
|
|
3966
3927
|
provider.includes("vercel") ||
|
|
@@ -3972,81 +3933,54 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3972
3933
|
const hasOrder = !!routing?.order;
|
|
3973
3934
|
|
|
3974
3935
|
notes.push(
|
|
3975
|
-
"🔀
|
|
3976
|
-
"provider endpoints per request, reducing cache locality.",
|
|
3936
|
+
"🔀 路由/渠道:检测到 Vercel AI Gateway。这个网关可能把不同请求分发到不同 provider endpoint,降低缓存局部性。",
|
|
3977
3937
|
);
|
|
3978
3938
|
|
|
3979
3939
|
if (!hasOnly && !hasOrder) {
|
|
3980
3940
|
notes.push(
|
|
3981
|
-
|
|
3982
|
-
"Example for models.yml -> providers[\"<providerId>\"] -> compat:",
|
|
3941
|
+
' 建议:添加 vercelGatewayRouting,把上游固定住。位置:models.yml -> providers["<providerId>"] -> compat:',
|
|
3983
3942
|
);
|
|
3984
3943
|
notes.push(
|
|
3985
3944
|
` { "supportsLongPromptCacheRetention": true, ` +
|
|
3986
3945
|
`"vercelGatewayRouting": { "only": ["<provider-id>"] } }`,
|
|
3987
3946
|
);
|
|
3988
3947
|
notes.push(
|
|
3989
|
-
|
|
3948
|
+
' 把 <provider-id> 替换成真实的 Vercel provider ID(如 "openai")。',
|
|
3990
3949
|
);
|
|
3991
3950
|
notes.push(
|
|
3992
|
-
"
|
|
3951
|
+
" 只有在上游支持长缓存保留时才设置 supportsLongPromptCacheRetention。",
|
|
3993
3952
|
);
|
|
3994
3953
|
}
|
|
3995
3954
|
|
|
3996
3955
|
return notes;
|
|
3997
3956
|
}
|
|
3998
3957
|
|
|
3999
|
-
// ── 3. LiteLLM / OneAPI / NewAPI / VoAPI (self-hosted aggregation) ──
|
|
4000
3958
|
const aggregationPatterns = ["litellm", "oneapi", "one-api", "newapi", "new-api", "voapi", "vo-api"];
|
|
4001
3959
|
if (
|
|
4002
3960
|
aggregationPatterns.some((p) => baseUrl.includes(p)) ||
|
|
4003
3961
|
aggregationPatterns.some((p) => provider.includes(p))
|
|
4004
3962
|
) {
|
|
4005
3963
|
notes.push(
|
|
4006
|
-
"🔀
|
|
4007
|
-
"These proxies route to multiple upstream accounts or instances, which can split the cache.",
|
|
4008
|
-
);
|
|
4009
|
-
notes.push(
|
|
4010
|
-
" Suggestions:",
|
|
4011
|
-
);
|
|
4012
|
-
notes.push(
|
|
4013
|
-
" • Ensure the proxy can fix to a single upstream per session (session_id affinity).",
|
|
4014
|
-
);
|
|
4015
|
-
notes.push(
|
|
4016
|
-
" • Forward prompt_cache_key and session-affinity headers to the upstream.",
|
|
4017
|
-
);
|
|
4018
|
-
notes.push(
|
|
4019
|
-
" • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
|
|
4020
|
-
);
|
|
4021
|
-
notes.push(
|
|
4022
|
-
` Safe compat default: { "supportsLongPromptCacheRetention": true }`,
|
|
4023
|
-
);
|
|
4024
|
-
notes.push(
|
|
4025
|
-
` Add supportsLongPromptCacheRetention only if the proxy explicitly supports prompt_cache_retention.`,
|
|
3964
|
+
"🔀 路由/渠道:检测到自建聚合代理(LiteLLM / OneAPI / NewAPI / VoAPI)。这类代理常把请求分到多个上游账号或实例,导致缓存被拆散。",
|
|
4026
3965
|
);
|
|
3966
|
+
notes.push(" 建议:");
|
|
3967
|
+
notes.push(" • 确保代理能按 session 固定到单一上游(session_id affinity)。");
|
|
3968
|
+
notes.push(" • 向上游透传 prompt_cache_key 与会话亲和性相关 header。");
|
|
3969
|
+
notes.push(" • 在响应里返回缓存 usage 字段(如 prompt_cache_hit_tokens)。");
|
|
3970
|
+
notes.push(` 可作为起点的 compat:{ "supportsLongPromptCacheRetention": true }`);
|
|
3971
|
+
notes.push(" 只有在代理明确支持 prompt_cache_retention 时才加 supportsLongPromptCacheRetention。");
|
|
4027
3972
|
|
|
4028
3973
|
return notes;
|
|
4029
3974
|
}
|
|
4030
3975
|
|
|
4031
|
-
// ── 4. Generic third-party OpenAI-compatible proxy ─────────────────
|
|
4032
3976
|
if (api === "openai-completions" && baseUrl) {
|
|
4033
3977
|
const missing = describeMissingCacheCompatForModel(model);
|
|
4034
|
-
notes.push(
|
|
4035
|
-
|
|
4036
|
-
);
|
|
4037
|
-
notes.push(
|
|
4038
|
-
" • Verify the proxy routes to the same upstream account/instance per session.",
|
|
4039
|
-
);
|
|
4040
|
-
notes.push(
|
|
4041
|
-
" • Ensure the proxy forwards prompt_cache_key and sends session-affinity headers.",
|
|
4042
|
-
);
|
|
4043
|
-
notes.push(
|
|
4044
|
-
" • Check that the proxy returns cache usage fields (prompt_cache_hit_tokens etc.).",
|
|
4045
|
-
);
|
|
3978
|
+
notes.push("🔀 路由/渠道:第三方 OpenAI 兼容代理。如果缓存命中率偏低:");
|
|
3979
|
+
notes.push(" • 确认代理会把同一 session 路由到同一个上游账号/实例。");
|
|
3980
|
+
notes.push(" • 确认代理会透传 prompt_cache_key,并发送会话亲和性相关 header。");
|
|
3981
|
+
notes.push(" • 确认代理会返回缓存 usage 字段(如 prompt_cache_hit_tokens)。");
|
|
4046
3982
|
if (missing.length > 0) {
|
|
4047
|
-
notes.push(
|
|
4048
|
-
` • The compat flags above (${missing.join(", ")}) are recommended for cache stability.`,
|
|
4049
|
-
);
|
|
3983
|
+
notes.push(` • 上面这些 compat 字段(${missing.join(", ")})有助于提升缓存稳定性。`);
|
|
4050
3984
|
}
|
|
4051
3985
|
|
|
4052
3986
|
return notes;
|
|
@@ -4060,38 +3994,38 @@ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
|
|
|
4060
3994
|
|
|
4061
3995
|
if (isMistralConversationsApi(api)) {
|
|
4062
3996
|
return [
|
|
4063
|
-
"ℹ️
|
|
4064
|
-
"
|
|
3997
|
+
"ℹ️ 当前模型不适用 compat 检查。",
|
|
3998
|
+
" 原生 Mistral `mistral-conversations` 使用 provider 原生传输;OpenAI 兼容代理 compat 不适用。",
|
|
4065
3999
|
];
|
|
4066
4000
|
}
|
|
4067
4001
|
|
|
4068
4002
|
if (api === "azure-openai-responses") {
|
|
4069
4003
|
return [
|
|
4070
|
-
"ℹ️
|
|
4071
|
-
"
|
|
4004
|
+
"ℹ️ 当前模型不适用 compat 检查。",
|
|
4005
|
+
" 原生 Azure OpenAI Responses 使用 Responses 传输;OpenAI 兼容代理 compat 不适用。",
|
|
4072
4006
|
];
|
|
4073
4007
|
}
|
|
4074
4008
|
|
|
4075
4009
|
if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
|
|
4076
4010
|
return [
|
|
4077
|
-
"ℹ️
|
|
4078
|
-
"
|
|
4011
|
+
"ℹ️ 当前模型不适用 compat 检查。",
|
|
4012
|
+
" 原生 Responses 传输已经使用运行时核心请求链路;OpenAI 兼容代理 compat 不适用。",
|
|
4079
4013
|
];
|
|
4080
4014
|
}
|
|
4081
4015
|
|
|
4082
|
-
return ["ℹ️
|
|
4016
|
+
return ["ℹ️ 当前模型不适用 compat 检查。"];
|
|
4083
4017
|
}
|
|
4084
4018
|
|
|
4085
4019
|
function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400?: boolean } = {}): string {
|
|
4086
4020
|
const lines: string[] = [];
|
|
4087
|
-
lines.push(
|
|
4088
|
-
lines.push(
|
|
4089
|
-
if (model.name && model.name !== model.id) lines.push(
|
|
4090
|
-
lines.push(`API
|
|
4091
|
-
lines.push(`Base URL
|
|
4021
|
+
lines.push(`提供方:${model.provider}`);
|
|
4022
|
+
lines.push(`模型: ${model.id}`);
|
|
4023
|
+
if (model.name && model.name !== model.id) lines.push(`名称: ${model.name}`);
|
|
4024
|
+
lines.push(`API: ${model.api}`);
|
|
4025
|
+
lines.push(`Base URL: ${model.baseUrl || "(默认)"}`);
|
|
4092
4026
|
|
|
4093
4027
|
const compat = getCompat(model);
|
|
4094
|
-
lines.push(`Compat
|
|
4028
|
+
lines.push(`Compat: ${JSON.stringify(compat)}`);
|
|
4095
4029
|
|
|
4096
4030
|
const adaptiveThinkingApplicable = isAdaptiveThinkingCompatApplicable(model);
|
|
4097
4031
|
const deepSeekCompatApplicable = isDeepSeekCompatCheckApplicable(model);
|
|
@@ -4104,10 +4038,10 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4104
4038
|
const advisoryMissing = missing.filter(m => !safeFixableMissing.includes(m));
|
|
4105
4039
|
|
|
4106
4040
|
if (safeFixableMissing.length > 0) {
|
|
4107
|
-
lines.push(`⚠️
|
|
4041
|
+
lines.push(`⚠️ 缺少 compat 字段:${safeFixableMissing.join(", ")}`);
|
|
4108
4042
|
}
|
|
4109
4043
|
if (advisoryMissing.length > 0) {
|
|
4110
|
-
lines.push(`ℹ️
|
|
4044
|
+
lines.push(`ℹ️ 可选项:${advisoryMissing.join(", ")}(仅在确认支持时启用)`);
|
|
4111
4045
|
}
|
|
4112
4046
|
|
|
4113
4047
|
if (missing.length > 0) {
|
|
@@ -4115,7 +4049,7 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4115
4049
|
const slashIdx = key.indexOf("/");
|
|
4116
4050
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
4117
4051
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
4118
|
-
lines.push(
|
|
4052
|
+
lines.push(`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`);
|
|
4119
4053
|
if (adaptiveThinkingApplicable) {
|
|
4120
4054
|
appendAdaptiveThinkingCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
4121
4055
|
} else if (deepSeekCompatApplicable) {
|
|
@@ -4125,7 +4059,7 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4125
4059
|
appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
|
|
4126
4060
|
}
|
|
4127
4061
|
} else if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
|
|
4128
|
-
lines.push("✅
|
|
4062
|
+
lines.push("✅ compat 配置完整。");
|
|
4129
4063
|
appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
|
|
4130
4064
|
} else {
|
|
4131
4065
|
lines.push(...getCompatCheckNotApplicableLines(model));
|
|
@@ -4134,14 +4068,13 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4134
4068
|
if (isPromptCacheRetention400Applicable(model)) {
|
|
4135
4069
|
lines.push("");
|
|
4136
4070
|
if (options.promptCacheRetention400) {
|
|
4137
|
-
lines.push("⚠️
|
|
4071
|
+
lines.push("⚠️ 在启用 supportsLongPromptCacheRetention 时观测到一次 400 响应。");
|
|
4138
4072
|
lines.push(` ${getPromptCacheRetentionUnsupportedHint()}`);
|
|
4139
4073
|
} else {
|
|
4140
|
-
lines.push(`ℹ️
|
|
4074
|
+
lines.push(`ℹ️ 已启用长缓存保留。${getPromptCacheRetentionUnsupportedHint()}`);
|
|
4141
4075
|
}
|
|
4142
4076
|
}
|
|
4143
4077
|
|
|
4144
|
-
// ── Router/channel diagnostics ──
|
|
4145
4078
|
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
4146
4079
|
if (routerNotes.length > 0) {
|
|
4147
4080
|
lines.push("");
|
|
@@ -4150,31 +4083,24 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4150
4083
|
}
|
|
4151
4084
|
}
|
|
4152
4085
|
|
|
4153
|
-
// ── Integrity diagnostics ──
|
|
4154
4086
|
if (lastPromptIntegrityWarningAt > 0) {
|
|
4155
4087
|
const ago = Date.now() - lastPromptIntegrityWarningAt;
|
|
4156
4088
|
const mins = Math.floor(ago / 60000);
|
|
4157
4089
|
if (mins < 5) {
|
|
4158
4090
|
lines.push("");
|
|
4159
|
-
lines.push("⚠️
|
|
4160
|
-
lines.push(`
|
|
4161
|
-
lines.push(
|
|
4162
|
-
lines.push(
|
|
4163
|
-
lines.push(
|
|
4164
|
-
lines.push(
|
|
4165
|
-
lines.push(
|
|
4166
|
-
lines.push(` 3. If persistent, file an issue with this doctor output.`);
|
|
4091
|
+
lines.push("⚠️ 最近检测到 prompt 完整性问题:");
|
|
4092
|
+
lines.push(` 最近一次检测于 ${mins > 0 ? `${mins} 分钟` : `${Math.floor(ago / 1000)} 秒`}前;该轮已跳过 prompt 重排以保留结构标记。`);
|
|
4093
|
+
lines.push(" 常见原因:扩展的 system prompt 格式变化,或子串碰撞。");
|
|
4094
|
+
lines.push(" 建议步骤:");
|
|
4095
|
+
lines.push(" 1. 运行 /reload 重置(可清除瞬态问题)。");
|
|
4096
|
+
lines.push(" 2. 设置 PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 后 /reload,禁用重排。");
|
|
4097
|
+
lines.push(" 3. 若持续复现,请带 doctor 输出提 issue。");
|
|
4167
4098
|
}
|
|
4168
4099
|
}
|
|
4169
4100
|
|
|
4170
4101
|
return lines.join("\n");
|
|
4171
4102
|
}
|
|
4172
4103
|
|
|
4173
|
-
/**
|
|
4174
|
-
* Build a "Cache diagnosis" section for low-hit causes, appended to doctor output.
|
|
4175
|
-
* This is a separate function because it depends on per-session state (recent samples,
|
|
4176
|
-
* per-model stats) that is not available at the module level.
|
|
4177
|
-
*/
|
|
4178
4104
|
function buildLowHitDiagnosis(
|
|
4179
4105
|
model: PiModel,
|
|
4180
4106
|
adapter: CacheProviderAdapter | undefined,
|
|
@@ -4183,101 +4109,74 @@ function buildLowHitDiagnosis(
|
|
|
4183
4109
|
): string[] {
|
|
4184
4110
|
const lines: string[] = [];
|
|
4185
4111
|
|
|
4186
|
-
// 1. Missing compat flags (adapter-aware: DeepSeek has extra reasoning compat)
|
|
4187
4112
|
const fixSugLHD = buildFixSuggestion(model);
|
|
4188
4113
|
const safeFixableMissingLHD = fixSugLHD ? Object.keys(fixSugLHD.compatKeys) : [];
|
|
4189
|
-
|
|
4190
|
-
// 2. Router/channel risk (reuse existing check)
|
|
4191
4114
|
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
4192
|
-
|
|
4193
|
-
// 3. Recent samples missing usage fields
|
|
4194
4115
|
const missingUsageSamples = samples.filter((s) => s.missingUsageFields).length;
|
|
4195
|
-
|
|
4196
|
-
// 4. Recent trend analysis
|
|
4197
4116
|
const recent10 = samples.slice(-10);
|
|
4198
4117
|
const recent10Hits = recent10.filter((s) => s.hit).length;
|
|
4199
4118
|
const recent10Total = recent10.length;
|
|
4200
4119
|
const recent10Cached = recent10.reduce((sum, s) => sum + s.cachedInputTokens, 0);
|
|
4201
4120
|
const recent10Input = recent10.reduce((sum, s) => sum + s.totalInputTokens, 0);
|
|
4202
|
-
|
|
4203
|
-
// 5. Today's overall trend from persisted stats
|
|
4204
4121
|
const todayStats = stats ?? emptyCacheStats();
|
|
4205
4122
|
|
|
4206
4123
|
const hasMissingCompat = safeFixableMissingLHD.length > 0;
|
|
4207
4124
|
const hasRouterRisk = routerNotes.length > 0;
|
|
4208
4125
|
const hasUsageMissing = missingUsageSamples > 0;
|
|
4209
|
-
|
|
4210
|
-
// Today's cached-token ratio is used both inside and outside the recent-sample
|
|
4211
|
-
// branch. Keep it block-external so doctor/stats never throw for low-hit
|
|
4212
|
-
// models that have persisted counters but no recent in-memory samples.
|
|
4213
4126
|
const todayHitRatio = todayStats.totalInputTokens > 0
|
|
4214
4127
|
? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
|
|
4215
4128
|
: 0;
|
|
4216
4129
|
|
|
4217
|
-
// Determine if there are actual issues worth flagging
|
|
4218
4130
|
const hasActualIssues = hasMissingCompat || hasUsageMissing ||
|
|
4219
|
-
// Low hit trend (today total > 3 and hit ratio < 30%)
|
|
4220
4131
|
(todayStats.totalRequests > 3 && todayStats.totalInputTokens > 0 &&
|
|
4221
4132
|
(todayStats.cachedInputTokens / todayStats.totalInputTokens) < 0.3) ||
|
|
4222
|
-
// Low hit rate in recent samples (recent10Total >= 3 and all misses)
|
|
4223
4133
|
(recent10Total >= 3 && recent10Hits === 0);
|
|
4224
4134
|
|
|
4225
|
-
// Skip section if no issues
|
|
4226
4135
|
if (!hasActualIssues && !(hasRouterRisk && (hasMissingCompat || hasUsageMissing))) {
|
|
4227
4136
|
return lines;
|
|
4228
4137
|
}
|
|
4229
4138
|
|
|
4230
4139
|
lines.push("");
|
|
4231
|
-
lines.push("──
|
|
4140
|
+
lines.push("── 缓存诊断 ──");
|
|
4232
4141
|
|
|
4233
|
-
// Priority 1: missing compat flags
|
|
4234
4142
|
if (hasMissingCompat) {
|
|
4235
|
-
lines.push(`⚠️
|
|
4236
|
-
lines.push("
|
|
4237
|
-
lines.push("
|
|
4143
|
+
lines.push(`⚠️ 缺少 compat 字段:${safeFixableMissingLHD.join(", ")}`);
|
|
4144
|
+
lines.push(" 这些字段有助于稳定 prompt 缓存与上游路由粘性。");
|
|
4145
|
+
lines.push(" 可运行 /cache-optimizer compat 查看编辑建议。");
|
|
4238
4146
|
}
|
|
4239
4147
|
|
|
4240
|
-
// Priority 2: router/channel risk (only flag when there are other issues)
|
|
4241
|
-
// Router notes are already shown in the main doctor output, so we only
|
|
4242
|
-
// mention them in the diagnosis section when they compound a problem.
|
|
4243
4148
|
if (hasRouterRisk && (hasMissingCompat || hasUsageMissing || hasActualIssues)) {
|
|
4244
|
-
lines.push("🔀
|
|
4149
|
+
lines.push("🔀 检测到路由/代理风险 —— 详见上方路由诊断。");
|
|
4245
4150
|
}
|
|
4246
4151
|
|
|
4247
|
-
// Priority 3: usage fields missing
|
|
4248
4152
|
if (hasUsageMissing) {
|
|
4249
|
-
lines.push(`⚠️
|
|
4250
|
-
lines.push(" Footer
|
|
4251
|
-
lines.push("
|
|
4153
|
+
lines.push(`⚠️ 最近 ${samples.length} 条样本里有 ${missingUsageSamples} 条缺少或返回了空的 usage 字段。`);
|
|
4154
|
+
lines.push(" Footer 命中率可能会被低估。");
|
|
4155
|
+
lines.push(" 请确认代理会返回 prompt 级 usage(如 prompt_tokens、input_tokens_details)。");
|
|
4252
4156
|
}
|
|
4253
4157
|
|
|
4254
|
-
// Priority 4: recent trend low
|
|
4255
4158
|
if (recent10Total > 0) {
|
|
4256
|
-
const hitRatio = recent10Input > 0 ? Math.round((recent10Cached / recent10Input) * 100) : 0;
|
|
4257
4159
|
if (recent10Hits === 0 && todayStats.totalRequests > 3 && todayHitRatio < 30) {
|
|
4258
|
-
lines.push(`📉
|
|
4259
|
-
lines.push("
|
|
4260
|
-
lines.push("
|
|
4261
|
-
lines.push(" Verify upstream routing stickiness and supportsLongPromptCacheRetention compat.");
|
|
4160
|
+
lines.push(`📉 今日缓存命中率偏低:${todayHitRatio}%(最近 ${recent10Total} 条样本)。`);
|
|
4161
|
+
lines.push(" 常见原因:代理把请求路由到不同后端,或 prompt 前缀在各轮之间变化。");
|
|
4162
|
+
lines.push(" 请检查上游路由粘性,以及 supportsLongPromptCacheRetention 配置是否正确。");
|
|
4262
4163
|
} else if (todayHitRatio < 30 && todayStats.totalRequests > 3) {
|
|
4263
|
-
lines.push(`📉
|
|
4264
|
-
lines.push("
|
|
4164
|
+
lines.push(`📉 今日缓存命中率偏低:${todayHitRatio}%(共 ${todayStats.totalRequests} 次请求)。`);
|
|
4165
|
+
lines.push(" 请检查 compat 配置与代理上游路由。");
|
|
4265
4166
|
}
|
|
4266
4167
|
|
|
4267
|
-
// Show brief trend summary if there are enough samples
|
|
4268
4168
|
if (recent10Total >= 3) {
|
|
4269
4169
|
const trend = formatRecentTrendSummary(samples, 10);
|
|
4270
4170
|
lines.push(`📊 ${trend}`);
|
|
4271
4171
|
}
|
|
4272
4172
|
}
|
|
4273
4173
|
|
|
4274
|
-
// For fully configured but low hit models, emphasize sticky routing
|
|
4275
4174
|
if (!hasMissingCompat && !hasRouterRisk && todayStats.totalRequests > 3 && todayHitRatio < 30) {
|
|
4276
|
-
lines.push("💡
|
|
4277
|
-
lines.push("
|
|
4278
|
-
lines.push(" •
|
|
4279
|
-
lines.push(" •
|
|
4280
|
-
lines.push(" •
|
|
4175
|
+
lines.push("💡 compat 已配置完整,但缓存命中率仍然偏低。");
|
|
4176
|
+
lines.push(" 可能原因:");
|
|
4177
|
+
lines.push(" • 代理仍把请求分发到多个后端 —— 请检查代理侧的会话粘性。");
|
|
4178
|
+
lines.push(" • prompt 前缀每轮都在变化 —— 请检查 system prompt 中的动态上下文。");
|
|
4179
|
+
lines.push(" • provider 没有返回缓存 usage 字段 —— footer 无法准确测量命中。");
|
|
4281
4180
|
}
|
|
4282
4181
|
|
|
4283
4182
|
return lines;
|
|
@@ -4304,16 +4203,16 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
|
4304
4203
|
const slashIdx = key.indexOf("/");
|
|
4305
4204
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
4306
4205
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
4307
|
-
lines.push(
|
|
4206
|
+
lines.push(`当前模型:${key}`);
|
|
4308
4207
|
if (safeFixableMissingC.length > 0) {
|
|
4309
|
-
lines.push(
|
|
4208
|
+
lines.push(`可安全修复:${safeFixableMissingC.join(", ")}`);
|
|
4310
4209
|
}
|
|
4311
4210
|
if (advisoryMissingC.length > 0) {
|
|
4312
|
-
lines.push(
|
|
4211
|
+
lines.push(`可选项:${advisoryMissingC.join(", ")}(仅在确认支持时启用)`);
|
|
4313
4212
|
}
|
|
4314
4213
|
lines.push("");
|
|
4315
|
-
lines.push(
|
|
4316
|
-
lines.push(
|
|
4214
|
+
lines.push(`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
|
|
4215
|
+
lines.push("(与 baseUrl/api/apiKey/models 同级)。");
|
|
4317
4216
|
if (adaptiveThinkingApplicable) {
|
|
4318
4217
|
appendAdaptiveThinkingCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
4319
4218
|
} else if (deepSeekCompatApplicable) {
|
|
@@ -4324,10 +4223,9 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
|
4324
4223
|
}
|
|
4325
4224
|
}
|
|
4326
4225
|
|
|
4327
|
-
// When compat is fully configured but router/optional notes exist, prefix the status.
|
|
4328
4226
|
if ((routerNotes.length > 0 || optionalOpenAIProxyCompat.length > 0) && missing.length === 0) {
|
|
4329
4227
|
if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
|
|
4330
|
-
lines.push("✅
|
|
4228
|
+
lines.push("✅ compat 配置完整。");
|
|
4331
4229
|
if (isPromptCacheRetention400Applicable(model)) {
|
|
4332
4230
|
lines.push(getPromptCacheRetentionUnsupportedHint());
|
|
4333
4231
|
}
|
|
@@ -5848,7 +5746,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5848
5746
|
const statsText = formatCacheStats(realEntry.adapter, realEntry.stats);
|
|
5849
5747
|
statusText = runtimeOptimizerEnabled
|
|
5850
5748
|
? statsText
|
|
5851
|
-
:
|
|
5749
|
+
: `缓存优化已关闭 · ${statsText}`;
|
|
5852
5750
|
}
|
|
5853
5751
|
}
|
|
5854
5752
|
|
|
@@ -5859,7 +5757,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5859
5757
|
const sk = displayModel ? sessionModelKey(displayModel) : undefined;
|
|
5860
5758
|
const stats = sk ? cacheStatsByModel[sk] : undefined;
|
|
5861
5759
|
const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
|
|
5862
|
-
statusText = runtimeOptimizerEnabled ? statsText :
|
|
5760
|
+
statusText = runtimeOptimizerEnabled ? statsText : `缓存优化已关闭 · ${statsText}`;
|
|
5863
5761
|
}
|
|
5864
5762
|
|
|
5865
5763
|
// If optimizeSystemPrompt detected structural truncation on this or
|
|
@@ -5867,7 +5765,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5867
5765
|
// /reload before continuing. The flag resets after emission so a
|
|
5868
5766
|
// single-turn glitch does not permanently taint the footer.
|
|
5869
5767
|
if (promptTruncationDetected && statusText !== undefined) {
|
|
5870
|
-
statusText = statusText + " ⚠️
|
|
5768
|
+
statusText = statusText + " ⚠️ 完整性";
|
|
5871
5769
|
promptTruncationDetected = false;
|
|
5872
5770
|
lastPromptIntegrityWarningAt = Date.now();
|
|
5873
5771
|
|
|
@@ -5875,12 +5773,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
5875
5773
|
if (!integrityNotificationShown) {
|
|
5876
5774
|
integrityNotificationShown = true;
|
|
5877
5775
|
ctx.ui.notify(
|
|
5878
|
-
`⚠️ ${LOG_PREFIX}
|
|
5879
|
-
|
|
5880
|
-
|
|
5881
|
-
`1.
|
|
5882
|
-
`2.
|
|
5883
|
-
`3.
|
|
5776
|
+
`⚠️ ${LOG_PREFIX}:本轮重排导致一个 prompt 结构标记丢失。` +
|
|
5777
|
+
`为保证完整性,已回退到原始 prompt。\n\n` +
|
|
5778
|
+
`恢复步骤:\n` +
|
|
5779
|
+
`1. 运行 /reload 重置(可清除瞬态问题)。\n` +
|
|
5780
|
+
`2. 设置 PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 后 /reload,禁用重排。\n` +
|
|
5781
|
+
`3. 若持续复现,请运行 /cache-optimizer doctor 并提 issue(不要包含 API key / prompt)。`,
|
|
5884
5782
|
"warning",
|
|
5885
5783
|
);
|
|
5886
5784
|
}
|
|
@@ -5899,7 +5797,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5899
5797
|
// OpenAI-compatible proxies) do NOT trigger the marker — the doctor/compat
|
|
5900
5798
|
// commands still mention them as optional guidance.
|
|
5901
5799
|
if (buildFixSuggestion(displayModel) !== undefined) {
|
|
5902
|
-
statusText = statusText + " ⚠️
|
|
5800
|
+
statusText = statusText + " ⚠️ 配置";
|
|
5903
5801
|
}
|
|
5904
5802
|
}
|
|
5905
5803
|
|
|
@@ -6049,9 +5947,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
6049
5947
|
if (warnedPromptCacheRetention400Models.has(key)) return;
|
|
6050
5948
|
warnedPromptCacheRetention400Models.add(key);
|
|
6051
5949
|
ctx.ui.notify(
|
|
6052
|
-
`⚠️ ${LOG_PREFIX}
|
|
5950
|
+
`⚠️ ${LOG_PREFIX}:${key} 在启用 supportsLongPromptCacheRetention 时返回了 HTTP 400。` +
|
|
6053
5951
|
getPromptCacheRetentionUnsupportedHint() +
|
|
6054
|
-
`
|
|
5952
|
+
` 可运行 /cache-optimizer doctor 查看精确编辑位置。`,
|
|
6055
5953
|
"warning",
|
|
6056
5954
|
);
|
|
6057
5955
|
});
|
|
@@ -6140,16 +6038,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
6140
6038
|
resetCurrentSessionStats();
|
|
6141
6039
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6142
6040
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6143
|
-
cmdCtx.ui.notify(`✅
|
|
6041
|
+
cmdCtx.ui.notify(`✅ 已为当前 OMP 进程开启缓存优化。已重置当前 session 统计,方便做前后对比。\n${formatOptimizerRuntimeMode()}`, "info");
|
|
6144
6042
|
} else if (subcommand === "disable") {
|
|
6145
6043
|
setRuntimeOptimizerEnabled(false);
|
|
6146
6044
|
resetCurrentSessionStats();
|
|
6147
6045
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6148
6046
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6149
|
-
cmdCtx.ui.notify(`⏸️
|
|
6047
|
+
cmdCtx.ui.notify(`⏸️ 已为当前 OMP 进程关闭缓存优化。已重置当前 session 统计,并会在关闭状态下继续采集用于对比。\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
6150
6048
|
} else if (subcommand === "doctor") {
|
|
6151
6049
|
if (!model) {
|
|
6152
|
-
cmdCtx.ui.notify("
|
|
6050
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6153
6051
|
return;
|
|
6154
6052
|
}
|
|
6155
6053
|
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
@@ -6164,7 +6062,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6164
6062
|
cmdCtx.ui.notify(fullDiagnosis, "info");
|
|
6165
6063
|
} else if (subcommand === "stats") {
|
|
6166
6064
|
if (!model) {
|
|
6167
|
-
cmdCtx.ui.notify("
|
|
6065
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6168
6066
|
return;
|
|
6169
6067
|
}
|
|
6170
6068
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6175,7 +6073,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6175
6073
|
cmdCtx.ui.notify(output, "info");
|
|
6176
6074
|
} else if (subcommand === "compat") {
|
|
6177
6075
|
if (!model) {
|
|
6178
|
-
cmdCtx.ui.notify("
|
|
6076
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6179
6077
|
return;
|
|
6180
6078
|
}
|
|
6181
6079
|
const compatResult = buildCompatDiagnosis(model);
|
|
@@ -6184,19 +6082,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
6184
6082
|
} else {
|
|
6185
6083
|
cmdCtx.ui.notify(
|
|
6186
6084
|
isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
|
|
6187
|
-
? "✅
|
|
6085
|
+
? "✅ compat 配置完整。"
|
|
6188
6086
|
: getCompatCheckNotApplicableLines(model).join("\n"),
|
|
6189
6087
|
"info",
|
|
6190
6088
|
);
|
|
6191
6089
|
}
|
|
6192
6090
|
} else if (subcommand === "reset") {
|
|
6193
6091
|
if (!model) {
|
|
6194
|
-
cmdCtx.ui.notify("
|
|
6092
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6195
6093
|
return;
|
|
6196
6094
|
}
|
|
6197
6095
|
const adapter = selectAdapterForModel(model);
|
|
6198
6096
|
if (!adapter) {
|
|
6199
|
-
cmdCtx.ui.notify("ℹ️
|
|
6097
|
+
cmdCtx.ui.notify("ℹ️ 当前活动模型未匹配到缓存适配器,无需重置统计。", "info");
|
|
6200
6098
|
return;
|
|
6201
6099
|
}
|
|
6202
6100
|
|
|
@@ -6214,21 +6112,21 @@ export default function (pi: ExtensionAPI) {
|
|
|
6214
6112
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6215
6113
|
|
|
6216
6114
|
cmdCtx.ui.notify(
|
|
6217
|
-
`✅
|
|
6218
|
-
"
|
|
6219
|
-
"
|
|
6115
|
+
`✅ 已重置 "${displayKey}" 的本地 session 缓存统计。` +
|
|
6116
|
+
"上游 provider 的 prompt cache 未被修改。" +
|
|
6117
|
+
"后续请求会为当前 OMP session 开始新的统计桶。",
|
|
6220
6118
|
"info",
|
|
6221
6119
|
);
|
|
6222
6120
|
} else if (subcommand === "fix") {
|
|
6223
6121
|
if (!model) {
|
|
6224
|
-
cmdCtx.ui.notify("
|
|
6122
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6225
6123
|
return;
|
|
6226
6124
|
}
|
|
6227
6125
|
|
|
6228
6126
|
const suggestion = buildFixSuggestion(model);
|
|
6229
6127
|
if (!suggestion) {
|
|
6230
6128
|
const key = modelKey(model);
|
|
6231
|
-
cmdCtx.ui.notify(`✅
|
|
6129
|
+
cmdCtx.ui.notify(`✅ "${key}" 当前无需修复,compat 已配置完成。`, "info");
|
|
6232
6130
|
return;
|
|
6233
6131
|
}
|
|
6234
6132
|
|
|
@@ -6239,14 +6137,14 @@ export default function (pi: ExtensionAPI) {
|
|
|
6239
6137
|
const compatResult = buildCompatDiagnosis(model);
|
|
6240
6138
|
const yamlSnippet = formatCompatKeysForInsertion(suggestion.compatKeys);
|
|
6241
6139
|
cmdCtx.ui.notify(
|
|
6242
|
-
`📝
|
|
6243
|
-
|
|
6244
|
-
|
|
6245
|
-
|
|
6140
|
+
`📝 ${getModelsJsonDisplayPath()} 的手动修复建议:\n\n` +
|
|
6141
|
+
`提供方:${suggestion.providerLabel}\n` +
|
|
6142
|
+
`模型:${suggestion.modelId}\n\n` +
|
|
6143
|
+
`在模型级 compat(模型条目下)添加这些键:\n\n` +
|
|
6246
6144
|
`compat:\n${yamlSnippet}\n\n` +
|
|
6247
|
-
|
|
6145
|
+
`或放到 provider 级(providers["${suggestion.providerLabel}"] 下):\n\n` +
|
|
6248
6146
|
`compat:\n${yamlSnippet}\n\n` +
|
|
6249
|
-
|
|
6147
|
+
`编辑后运行 /reload。\n` +
|
|
6250
6148
|
(compatResult ? `\n${compatResult}` : ""),
|
|
6251
6149
|
"info",
|
|
6252
6150
|
);
|
|
@@ -6254,31 +6152,31 @@ export default function (pi: ExtensionAPI) {
|
|
|
6254
6152
|
// Try interactive selection menu when UI supports it
|
|
6255
6153
|
if (cmdCtx.hasUI) {
|
|
6256
6154
|
const menuOptions = [
|
|
6257
|
-
"
|
|
6258
|
-
"
|
|
6259
|
-
"
|
|
6260
|
-
"
|
|
6261
|
-
"
|
|
6262
|
-
"
|
|
6263
|
-
"
|
|
6264
|
-
"
|
|
6155
|
+
"启用 —— 打开运行时优化",
|
|
6156
|
+
"关闭 —— 关闭运行时优化",
|
|
6157
|
+
"诊断 —— 查看缓存配置",
|
|
6158
|
+
"统计 —— 查看缓存统计与趋势",
|
|
6159
|
+
"兼容 —— 查看 compat 建议",
|
|
6160
|
+
"修复 —— 查看 compat 修复建议(会写 models.yml 时另行提示)",
|
|
6161
|
+
"重置 —— 重置本地 session 统计",
|
|
6162
|
+
"取消",
|
|
6265
6163
|
];
|
|
6266
|
-
const choice = await cmdCtx.ui.select("
|
|
6164
|
+
const choice = await cmdCtx.ui.select("缓存优化器", menuOptions);
|
|
6267
6165
|
if (choice === menuOptions[0]) {
|
|
6268
6166
|
setRuntimeOptimizerEnabled(true);
|
|
6269
6167
|
resetCurrentSessionStats();
|
|
6270
6168
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6271
6169
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6272
|
-
cmdCtx.ui.notify(`✅
|
|
6170
|
+
cmdCtx.ui.notify(`✅ 已为当前 OMP 进程开启缓存优化。已重置当前 session 统计,方便做前后对比。\n${formatOptimizerRuntimeMode()}`, "info");
|
|
6273
6171
|
} else if (choice === menuOptions[1]) {
|
|
6274
6172
|
setRuntimeOptimizerEnabled(false);
|
|
6275
6173
|
resetCurrentSessionStats();
|
|
6276
6174
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6277
6175
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6278
|
-
cmdCtx.ui.notify(`⏸️
|
|
6176
|
+
cmdCtx.ui.notify(`⏸️ 已为当前 OMP 进程关闭缓存优化。已重置当前 session 统计,并会在关闭状态下继续采集用于对比。\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
6279
6177
|
} else if (choice === menuOptions[2]) {
|
|
6280
6178
|
if (!model) {
|
|
6281
|
-
cmdCtx.ui.notify("
|
|
6179
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6282
6180
|
} else {
|
|
6283
6181
|
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
6284
6182
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6293,7 +6191,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6293
6191
|
}
|
|
6294
6192
|
} else if (choice === menuOptions[3]) {
|
|
6295
6193
|
if (!model) {
|
|
6296
|
-
cmdCtx.ui.notify("
|
|
6194
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6297
6195
|
} else {
|
|
6298
6196
|
const adapter = selectAdapterForModel(model);
|
|
6299
6197
|
const sk = model ? sessionModelKey(model) : undefined;
|
|
@@ -6304,7 +6202,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6304
6202
|
}
|
|
6305
6203
|
} else if (choice === menuOptions[4]) {
|
|
6306
6204
|
if (!model) {
|
|
6307
|
-
cmdCtx.ui.notify("
|
|
6205
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6308
6206
|
} else {
|
|
6309
6207
|
const compatResult = buildCompatDiagnosis(model);
|
|
6310
6208
|
if (compatResult) {
|
|
@@ -6312,7 +6210,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6312
6210
|
} else {
|
|
6313
6211
|
cmdCtx.ui.notify(
|
|
6314
6212
|
isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
|
|
6315
|
-
? "✅
|
|
6213
|
+
? "✅ compat 配置完整。"
|
|
6316
6214
|
: getCompatCheckNotApplicableLines(model).join("\n"),
|
|
6317
6215
|
"info",
|
|
6318
6216
|
);
|
|
@@ -6321,13 +6219,13 @@ export default function (pi: ExtensionAPI) {
|
|
|
6321
6219
|
} else if (choice === menuOptions[5]) {
|
|
6322
6220
|
// Fix — auto-fix compat issues
|
|
6323
6221
|
if (!model) {
|
|
6324
|
-
cmdCtx.ui.notify("
|
|
6222
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6325
6223
|
return;
|
|
6326
6224
|
}
|
|
6327
6225
|
const suggestion = buildFixSuggestion(model);
|
|
6328
6226
|
if (!suggestion) {
|
|
6329
6227
|
const key = modelKey(model);
|
|
6330
|
-
cmdCtx.ui.notify(`✅
|
|
6228
|
+
cmdCtx.ui.notify(`✅ "${key}" 当前无需修复,compat 已配置完成。`, "info");
|
|
6331
6229
|
return;
|
|
6332
6230
|
}
|
|
6333
6231
|
|
|
@@ -6335,30 +6233,30 @@ export default function (pi: ExtensionAPI) {
|
|
|
6335
6233
|
const compatResult = buildCompatDiagnosis(model);
|
|
6336
6234
|
const yamlSnippet = formatCompatKeysForInsertion(suggestion.compatKeys);
|
|
6337
6235
|
cmdCtx.ui.notify(
|
|
6338
|
-
`📝
|
|
6339
|
-
|
|
6340
|
-
|
|
6341
|
-
|
|
6236
|
+
`📝 ${getModelsJsonDisplayPath()} 的手动修复建议:\n\n` +
|
|
6237
|
+
`提供方:${suggestion.providerLabel}\n` +
|
|
6238
|
+
`模型:${suggestion.modelId}\n\n` +
|
|
6239
|
+
`添加这些 compat 键:\n\n` +
|
|
6342
6240
|
`compat:\n${yamlSnippet}\n\n` +
|
|
6343
|
-
|
|
6241
|
+
`编辑后运行 /reload。\n` +
|
|
6344
6242
|
(compatResult ? `\n${compatResult}` : ""),
|
|
6345
6243
|
"info",
|
|
6346
6244
|
);
|
|
6347
6245
|
} else if (choice === menuOptions[6]) {
|
|
6348
6246
|
if (!model) {
|
|
6349
|
-
cmdCtx.ui.notify("
|
|
6247
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6350
6248
|
} else {
|
|
6351
6249
|
const adapter = selectAdapterForModel(model);
|
|
6352
6250
|
if (!adapter) {
|
|
6353
|
-
cmdCtx.ui.notify("ℹ️
|
|
6251
|
+
cmdCtx.ui.notify("ℹ️ 当前活动模型未匹配到缓存适配器,无需重置统计。", "info");
|
|
6354
6252
|
} else {
|
|
6355
6253
|
const displayKey = modelKey(model);
|
|
6356
6254
|
resetStatsForModel(model);
|
|
6357
6255
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6358
6256
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6359
6257
|
cmdCtx.ui.notify(
|
|
6360
|
-
`✅
|
|
6361
|
-
"
|
|
6258
|
+
`✅ 已重置 "${displayKey}" 的本地 session 缓存统计。` +
|
|
6259
|
+
"上游 provider 的 prompt cache 未被修改。",
|
|
6362
6260
|
"info",
|
|
6363
6261
|
);
|
|
6364
6262
|
}
|
|
@@ -6370,14 +6268,14 @@ export default function (pi: ExtensionAPI) {
|
|
|
6370
6268
|
|
|
6371
6269
|
// Fallback: text help when no interactive UI
|
|
6372
6270
|
const diagnosis: string[] = [];
|
|
6373
|
-
diagnosis.push("📋 /cache-optimizer
|
|
6374
|
-
diagnosis.push(" enable
|
|
6375
|
-
diagnosis.push(" disable
|
|
6376
|
-
diagnosis.push(" doctor
|
|
6377
|
-
diagnosis.push(" stats
|
|
6378
|
-
diagnosis.push(" compat
|
|
6379
|
-
diagnosis.push(" fix
|
|
6380
|
-
diagnosis.push(" reset
|
|
6271
|
+
diagnosis.push("📋 /cache-optimizer 命令:");
|
|
6272
|
+
diagnosis.push(" enable —— 为当前 OMP 进程开启 prompt/cache 优化");
|
|
6273
|
+
diagnosis.push(" disable —— 为当前 OMP 进程关闭 prompt/cache 优化");
|
|
6274
|
+
diagnosis.push(" doctor —— 查看当前模型/provider/api/baseUrl/compat 与低命中诊断");
|
|
6275
|
+
diagnosis.push(" stats —— 查看当前活动模型的统计桶与近期趋势");
|
|
6276
|
+
diagnosis.push(" compat —— 查看 compat 建议与编辑位置");
|
|
6277
|
+
diagnosis.push(" fix —— 查看 compat 修复建议(需要 UI 时另有提示)");
|
|
6278
|
+
diagnosis.push(" reset —— 重置当前模型的本地 session 统计(不影响上游)");
|
|
6381
6279
|
diagnosis.push("");
|
|
6382
6280
|
diagnosis.push(formatOptimizerRuntimeMode());
|
|
6383
6281
|
diagnosis.push("");
|
|
@@ -6385,17 +6283,17 @@ export default function (pi: ExtensionAPI) {
|
|
|
6385
6283
|
const displayKey = modelKey(model);
|
|
6386
6284
|
const missing = describeMissingCacheCompatForModel(model);
|
|
6387
6285
|
if (missing.length > 0) {
|
|
6388
|
-
diagnosis.push(`⚠️
|
|
6389
|
-
diagnosis.push('
|
|
6286
|
+
diagnosis.push(`⚠️ 当前模型 "${displayKey}" 缺少 compat:${missing.join(", ")}`);
|
|
6287
|
+
diagnosis.push('可运行 "/cache-optimizer compat" 查看编辑建议。');
|
|
6390
6288
|
} else if (isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)) {
|
|
6391
|
-
diagnosis.push(`✅
|
|
6289
|
+
diagnosis.push(`✅ 当前模型 "${displayKey}":compat 配置完整。`);
|
|
6392
6290
|
} else {
|
|
6393
|
-
diagnosis.push(`ℹ️
|
|
6291
|
+
diagnosis.push(`ℹ️ 当前模型 "${displayKey}":不适用 compat 检查。`);
|
|
6394
6292
|
const detailLines = getCompatCheckNotApplicableLines(model).slice(1);
|
|
6395
6293
|
for (const line of detailLines) diagnosis.push(line);
|
|
6396
6294
|
}
|
|
6397
6295
|
} else {
|
|
6398
|
-
diagnosis.push("
|
|
6296
|
+
diagnosis.push("当前没有活动模型。");
|
|
6399
6297
|
}
|
|
6400
6298
|
cmdCtx.ui.notify(diagnosis.join("\n"), "info");
|
|
6401
6299
|
}
|
package/package.json
CHANGED