omp-cache-optimizer 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.ts +197 -299
  2. package/package.json +1 -1
package/index.ts CHANGED
@@ -1068,18 +1068,18 @@ function isRuntimeOptimizerEnabled(): boolean {
1068
1068
  }
1069
1069
 
1070
1070
  function getOptimizerRuntimeModeLines(): string[] {
1071
- const state = runtimeOptimizerEnabled ? "enabled" : "disabled";
1071
+ const state = runtimeOptimizerEnabled ? "已启用" : "已关闭";
1072
1072
  const lines: string[] = [];
1073
- lines.push(`Runtime state: ${state}`);
1074
- lines.push(`• Prompt rewrite: ${runtimeOptimizerEnabled && !isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) ? "on" : "off"}`);
1075
- lines.push(`• OpenAI prompt_cache_key fallback: ${shouldInjectOpenAIPromptCacheKey() ? "on" : "off"}`);
1076
- lines.push(`• Footer cache stats: on${runtimeOptimizerEnabled ? "" : " (comparison mode)"}`);
1077
- lines.push(`• Compat warnings: ${runtimeOptimizerEnabled ? "on" : "off"}`);
1078
- lines.push(`• ${PI_CACHE_RETENTION_ENV}: ${process.env[PI_CACHE_RETENTION_ENV] ?? "(unset)"}`);
1073
+ lines.push(`运行状态:${state}`);
1074
+ lines.push(`• Prompt 重写:${runtimeOptimizerEnabled && !isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) ? "开启" : "关闭"}`);
1075
+ lines.push(`• OpenAI prompt_cache_key 回退:${shouldInjectOpenAIPromptCacheKey() ? "开启" : "关闭"}`);
1076
+ lines.push(`• Footer 缓存统计:开启${runtimeOptimizerEnabled ? "" : "(对比模式)"}`);
1077
+ lines.push(`• Compat 提示:${runtimeOptimizerEnabled ? "开启" : "关闭"}`);
1078
+ lines.push(`• ${PI_CACHE_RETENTION_ENV}:${process.env[PI_CACHE_RETENTION_ENV] ?? "(未设置)"}`);
1079
1079
  if (!runtimeOptimizerEnabled) {
1080
- lines.push("This is a current-process switch. Run /reload or restart OMP to return to startup behavior.");
1080
+ lines.push("这是当前进程内开关。运行 /reload 或重启 OMP 可恢复到启动时行为。");
1081
1081
  } else if (isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) || !shouldInjectOpenAIPromptCacheKey()) {
1082
- lines.push("Some features are still disabled by environment variables.");
1082
+ lines.push("仍有部分能力被环境变量关闭。");
1083
1083
  }
1084
1084
  return lines;
1085
1085
  }
@@ -1214,9 +1214,9 @@ function buildAdaptiveThinkingCompatSuggestion(_missing: string[]): Record<strin
1214
1214
  }
1215
1215
 
1216
1216
  function appendAdaptiveThinkingCompatAdviceLines(lines: string[], _missing: string[], placement: CompatAdvicePlacement = {}): void {
1217
- lines.push("- Adaptive thinking: OMP's built-in model catalog sets this automatically for official Claude models.");
1218
- lines.push(" Custom channels fronting Anthropic should rely on the bundled catalog metadata;");
1219
- lines.push(" if the upstream rejects adaptive thinking, verify the model id matches an official release.");
1217
+ lines.push("- 自适应思考:OMP 内置模型目录会为官方 Claude 模型自动设置。");
1218
+ lines.push(" 自定义 Anthropic 渠道应依赖内置 catalog 元数据;");
1219
+ lines.push(" 如果上游拒绝 adaptive thinking,请确认模型 id 是否匹配官方发布版本。");
1220
1220
  appendCredentialSafeProviderGuidance(lines, placement, {});
1221
1221
  }
1222
1222
 
@@ -1226,10 +1226,10 @@ function buildAdaptiveThinkingCompatWarningText(key: string, _missing: string[])
1226
1226
  const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
1227
1227
  const modelsJsonPath = getModelsJsonDisplayPath();
1228
1228
  const lines: string[] = [
1229
- `ℹ️ omp-cache-optimizer: ${key} is an adaptive-generation Claude model.`,
1230
- `OMP's built-in catalog handles adaptive thinking automatically; no models.yml compat key is needed`,
1231
- `for official models. Custom channels fronting Anthropic may need explicit catalog metadata.`,
1232
- `See ${modelsJsonPath} -> providers["${providerLabel}"] -> models -> "${modelId ?? "<id>"}".`,
1229
+ `ℹ️ omp-cache-optimizer:${key} 是支持自适应生成的 Claude 模型。`,
1230
+ "OMP 内置 catalog 会自动处理自适应思考;官方模型不需要额外的 models.yml compat 键。",
1231
+ "如果是转发 Anthropic 的自定义渠道,可能仍需要显式 catalog 元数据。",
1232
+ `可参考 ${modelsJsonPath} -> providers["${providerLabel}"] -> models -> "${modelId ?? '<id>'}"。`,
1233
1233
  "",
1234
1234
  ];
1235
1235
  appendAdaptiveThinkingCompatAdviceLines(lines, [], { providerLabel, modelId });
@@ -2103,7 +2103,7 @@ function buildSafeOpenAIProxyCompatSuggestion(_missing: string[]): Record<string
2103
2103
  }
2104
2104
 
2105
2105
  function getPromptCacheRetentionUnsupportedHint(): string {
2106
- return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongPromptCacheRetention`; this extension does not write that field directly, but OMP may send it when long retention is requested and compat says the proxy supports it.";
2106
+ return "如果这个渠道返回 `400 Unsupported parameter: prompt_cache_retention`,请移除或避免 `supportsLongPromptCacheRetention`;扩展本身不会直接写这个字段,但当 compat 声明支持长缓存保留时,OMP 可能会发送它。";
2107
2107
  }
2108
2108
 
2109
2109
  function hasPromptCacheRetentionUnsupportedSignal(headers: Record<string, string> | undefined): boolean {
@@ -2157,20 +2157,20 @@ function appendCredentialSafeProviderGuidance(lines: string[], placement: Compat
2157
2157
  if (!providerLabel) return;
2158
2158
 
2159
2159
  lines.push("");
2160
- lines.push("If this channel has no models.yml provider entry yet:");
2161
- lines.push("- Keep existing authentication as-is; do not copy credentials, tokens, or API keys.");
2162
- lines.push(`- Add only cache/routing compat overrides in ${getModelsJsonDisplayPath()}.`);
2160
+ lines.push("如果这个渠道在 models.yml 里还没有 provider 配置:");
2161
+ lines.push("- 保留现有认证方式;不要复制 credential、token API key。");
2162
+ lines.push(`- 只在 ${getModelsJsonDisplayPath()} 里添加缓存/路由 compat 覆盖。`);
2163
2163
 
2164
2164
  if (Object.keys(compatSuggestion).length === 0) {
2165
- lines.push("- No safe copyable override is available for the missing flags shown above.");
2165
+ lines.push("- 上面这些缺失项目前没有安全可复制的 override");
2166
2166
  return;
2167
2167
  }
2168
2168
 
2169
- lines.push("Provider-level minimal override:");
2169
+ lines.push("Provider 级最小覆盖:");
2170
2170
  lines.push(JSON.stringify(buildProviderCompatOverride(providerLabel, compatSuggestion), null, 2));
2171
2171
 
2172
2172
  if (placement.modelId) {
2173
- lines.push("Single-model override (use this if only this model should change):");
2173
+ lines.push("单模型 override(只想影响当前模型时使用):");
2174
2174
  lines.push(JSON.stringify(buildModelCompatOverride(providerLabel, placement.modelId, compatSuggestion), null, 2));
2175
2175
  }
2176
2176
  }
@@ -2181,21 +2181,19 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
2181
2181
 
2182
2182
  if (hasSafeSuggestion) {
2183
2183
  if (options.includeJsonIntro !== false) {
2184
- lines.push("Safe default suggestion:");
2184
+ lines.push("安全默认建议:");
2185
2185
  }
2186
2186
  lines.push(JSON.stringify(suggestion, null, 2));
2187
2187
  }
2188
2188
 
2189
- // OMP divergence: session affinity is handled by multi-credential auth, not compat.
2190
- // No per-flag advice lines remain; only the optional long-retention guidance below.
2191
2189
  appendCredentialSafeProviderGuidance(lines, options, suggestion);
2192
2190
  }
2193
2191
 
2194
2192
  function appendOptionalOpenAIProxyCompatAdviceLines(lines: string[], optional: string[]): void {
2195
2193
  if (!optional.includes("supportsLongPromptCacheRetention")) return;
2196
2194
  lines.push("");
2197
- lines.push("Optional (not required, not auto-fixed):");
2198
- lines.push("- supportsLongPromptCacheRetention: enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
2195
+ lines.push("可选项(非必需,不会自动修复):");
2196
+ lines.push("- supportsLongPromptCacheRetention:仅当 endpoint / proxy 明确支持 OpenAI long prompt cache retention 时再开启。");
2199
2197
  lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
2200
2198
  }
2201
2199
 
@@ -2212,17 +2210,15 @@ function appendOptionalOpenAIProxyCompatAdviceLines(lines: string[], optional: s
2212
2210
  * exercise it via __internals_for_tests.
2213
2211
  */
2214
2212
  function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): string {
2215
- // Extract provider id from the model key (e.g. "otokapi/gpt-5.5" -> "otokapi").
2216
- // If no slash is found, fall back to the key itself.
2217
2213
  const slashIdx = key.indexOf("/");
2218
2214
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
2219
2215
  const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
2220
2216
 
2221
2217
  const modelsJsonPath = getModelsJsonDisplayPath();
2222
2218
  const lines: string[] = [
2223
- `💡 omp-cache-optimizer: ${key} is a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}.`,
2224
- `Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models).`,
2225
- ``,
2219
+ `💡 omp-cache-optimizer:${key} 是第三方 GPT/OpenAI 兼容代理,但合并后的 compat 缺少 ${missing.join(" ")}。`,
2220
+ `编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`,
2221
+ "",
2226
2222
  ];
2227
2223
 
2228
2224
  appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId });
@@ -2281,20 +2277,16 @@ function buildDeepSeekCompatSuggestion(missing: string[]): Record<string, unknow
2281
2277
  function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[], placement: CompatAdvicePlacement = {}): void {
2282
2278
  const suggestion = buildDeepSeekCompatSuggestion(missing);
2283
2279
  if (Object.keys(suggestion).length > 0) {
2284
- lines.push("Recommended DeepSeek compat snippet:");
2280
+ lines.push("推荐的 DeepSeek compat 片段:");
2285
2281
  lines.push(JSON.stringify(suggestion, null, 2));
2286
2282
  }
2287
2283
 
2288
2284
  if (missing.includes("requiresReasoningContentForToolCalls")) {
2289
- lines.push("- requiresReasoningContentForToolCalls: true keeps replayed assistant tool-call turns compatible with DeepSeek reasoning_content requirements.");
2285
+ lines.push("- requiresReasoningContentForToolCalls:保持带工具调用的 assistant 重放与 DeepSeek reasoning_content 要求兼容。");
2290
2286
  }
2291
2287
  if (missing.includes("supportsLongPromptCacheRetention")) {
2292
- lines.push("- supportsLongPromptCacheRetention: enable for DeepSeek-compatible endpoints that support long cache retention.");
2288
+ lines.push("- supportsLongPromptCacheRetention:仅当 DeepSeek 兼容 endpoint 支持长缓存保留时再开启。");
2293
2289
  }
2294
- // OMP divergence: thinkingFormat is no longer flagged. DeepSeek reasoning format
2295
- // is auto-detected by OMP's openai-completions transport; the "deepseek" value
2296
- // is not a valid OMP thinkingFormat (OMP uses openai|openrouter|zai|qwen|...).
2297
- // Session affinity is handled by OMP multi-credential auth, not compat keys.
2298
2290
 
2299
2291
  appendCredentialSafeProviderGuidance(lines, placement, suggestion);
2300
2292
  }
@@ -2305,8 +2297,8 @@ function buildDeepSeekCompatWarningText(key: string, missing: string[]): string
2305
2297
  const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
2306
2298
  const modelsJsonPath = getModelsJsonDisplayPath();
2307
2299
  const lines: string[] = [
2308
- `💡 omp-cache-optimizer: ${key} is DeepSeek-like but merged compat lacks ${missing.join(" and ")}.`,
2309
- `Proxies may reduce or hide cache hits. Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models).`,
2300
+ `💡 omp-cache-optimizer:${key} 看起来是 DeepSeek 风格模型,但合并后的 compat 缺少 ${missing.join(" ")}。`,
2301
+ `这可能让代理降低或隐藏缓存命中。编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`,
2310
2302
  "",
2311
2303
  ];
2312
2304
 
@@ -2354,8 +2346,8 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
2354
2346
  if (getCompat(model).cacheControlFormat === "anthropic") return undefined;
2355
2347
 
2356
2348
  return (
2357
- `💡 Cache optimizer: ${modelKey(model)} looks Claude/Anthropic-like but OpenAI-compatible compat lacks cacheControlFormat: "anthropic". ` +
2358
- "OMP may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
2349
+ `💡 omp-cache-optimizer:${modelKey(model)} 看起来是 Claude/Anthropic 风格模型,但 OpenAI 兼容 compat 缺少 cacheControlFormat: "anthropic"。` +
2350
+ "只有当 endpoint 支持并启用了这个 compat 字段时,OMP 才能放置 Anthropic cache_control 断点。"
2359
2351
  );
2360
2352
  },
2361
2353
  },
@@ -3352,30 +3344,26 @@ function formatTokenCount(value: number): string {
3352
3344
  return `${millions.toFixed(2)}M`;
3353
3345
  }
3354
3346
 
3347
+ function localizeAdapterLabel(label: string): string {
3348
+ return label.endsWith(" cache") ? `${label.slice(0, -6)} 缓存` : label;
3349
+ }
3350
+
3355
3351
  function formatCacheStats(adapter: CacheProviderAdapter, stats: CacheStats): string {
3356
3352
  const percent = stats.totalInputTokens > 0
3357
3353
  ? ` (${Math.round((stats.cachedInputTokens / stats.totalInputTokens) * 100)}%)`
3358
3354
  : "";
3359
3355
  const writeText = adapter.showCacheWrite && stats.cacheWriteInputTokens > 0
3360
- ? ` · write ${formatTokenCount(stats.cacheWriteInputTokens)} tok`
3356
+ ? ` · 写入 ${formatTokenCount(stats.cacheWriteInputTokens)} tok`
3361
3357
  : "";
3362
3358
 
3363
- return `${adapter.label} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
3359
+ return `${localizeAdapterLabel(adapter.label)} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
3364
3360
  }
3365
3361
 
3366
- /**
3367
- * Compute a hit-ratio percentage string for a value between 0 and 1.
3368
- * Returns e.g. "75%", "0%", "100%", or "N/A" for zero total.
3369
- */
3370
3362
  function formatHitRatio(hits: number, total: number): string {
3371
- if (total <= 0) return "N/A";
3363
+ if (total <= 0) return "无数据";
3372
3364
  return `${Math.round((hits / total) * 100)}%`;
3373
3365
  }
3374
3366
 
3375
- /**
3376
- * Format a token-to-M abbreviation for stats output.
3377
- * Example: 1500000 → "1.50M"
3378
- */
3379
3367
  function formatTokenM(value: number): string {
3380
3368
  const millions = Math.max(0, Math.round(value)) / 1_000_000;
3381
3369
  if (millions === 0) return "0";
@@ -3384,27 +3372,18 @@ function formatTokenM(value: number): string {
3384
3372
  return millions.toFixed(2);
3385
3373
  }
3386
3374
 
3387
- /**
3388
- * Check if an assistant message's usage fields appear to be missing or empty.
3389
- * Returns true when normalized fields (input, cacheRead, cacheWrite) are all
3390
- * absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
3391
- * for the given adapter.
3392
- */
3393
3375
  function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter): boolean {
3394
3376
  const usage = usageRecordFromAssistant(message);
3395
3377
  if (!usage) return true;
3396
3378
 
3397
- // Check normalized fields
3398
3379
  const input = getNonNegativeNumber(usage, "input");
3399
3380
  const cacheRead = getNonNegativeNumber(usage, "cacheRead");
3400
3381
  const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
3401
3382
 
3402
- // If normalized fields exist with non-zero values, usage is present
3403
3383
  if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
3404
3384
  return false;
3405
3385
  }
3406
3386
 
3407
- // Check raw usage for the adapter's provider family
3408
3387
  const rawUsage = adapter.normalizeUsage(message);
3409
3388
  if (!rawUsage || (rawUsage.cacheRead === 0 && rawUsage.cacheWrite === 0 && rawUsage.totalInput === 0)) {
3410
3389
  return true;
@@ -3413,64 +3392,55 @@ function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter):
3413
3392
  return false;
3414
3393
  }
3415
3394
 
3416
- /**
3417
- * Build a summary string for the recent trend (last N samples).
3418
- * Example: "Recent 10: 7/10 hits · 65% tok cached · no missing usage"
3419
- */
3420
3395
  function formatRecentTrendSummary(samples: CacheUsageSample[], maxCount: number): string {
3421
3396
  const recent = samples.slice(-maxCount);
3422
- if (recent.length === 0) return `Recent ${maxCount}: no samples yet`;
3397
+ if (recent.length === 0) return `最近 ${maxCount} 次:暂无样本`;
3423
3398
 
3424
3399
  const hits = recent.filter((s) => s.hit).length;
3425
3400
  const totalCached = recent.reduce((sum, s) => sum + s.cachedInputTokens, 0);
3426
3401
  const totalInput = recent.reduce((sum, s) => sum + s.totalInputTokens, 0);
3427
3402
  const missingCount = recent.filter((s) => s.missingUsageFields).length;
3428
3403
 
3429
- const hitRatio = formatHitRatio(hits, recent.length);
3430
- const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "N/A";
3404
+ const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "无数据";
3431
3405
 
3432
- let result = `Recent ${recent.length}/${maxCount}: ${hits}/${recent.length} hits · ${tokenRatio} tok cached`;
3406
+ let result = `最近 ${recent.length}/${maxCount} 次:${hits}/${recent.length} 次命中 · ${tokenRatio} tok 已缓存`;
3433
3407
  if (missingCount > 0) {
3434
- result += ` · ${missingCount} missing usage`;
3408
+ result += ` · ${missingCount} usage 缺失`;
3435
3409
  }
3436
3410
  return result;
3437
3411
  }
3438
3412
 
3439
- /**
3440
- * Build the output for `/cache-optimizer stats`.
3441
- */
3442
3413
  function buildStatsOutput(model: PiModel | undefined, adapter: CacheProviderAdapter | undefined, stats: CacheStats | undefined, recentSamples: CacheUsageSample[]): string {
3443
3414
  const lines: string[] = [];
3444
3415
 
3445
3416
  if (!model || !adapter) {
3446
- lines.push("ℹ️ No cache-adapter-matched model active. Select a model with a recognized provider family.");
3417
+ lines.push("ℹ️ 当前活动模型未匹配到缓存适配器。请选择可识别模型家族后再查看统计。");
3447
3418
  return lines.join("\n");
3448
3419
  }
3449
3420
 
3450
3421
  const key = modelKey(model);
3451
3422
  const currentStats = stats ?? emptyCacheStats();
3452
3423
 
3453
- lines.push(`Model key: ${key}`);
3454
- lines.push(`Adapter: ${adapter.label}`);
3424
+ lines.push(`模型键:${key}`);
3425
+ lines.push(`适配器:${localizeAdapterLabel(adapter.label)}`);
3455
3426
  lines.push("");
3456
- lines.push("── Today ──");
3457
- lines.push(`Requests: ${currentStats.hitRequests} hit / ${currentStats.totalRequests} total · ${formatHitRatio(currentStats.hitRequests, currentStats.totalRequests)}`);
3458
- lines.push(`Cached tokens: ${formatTokenM(currentStats.cachedInputTokens)}M / ${formatTokenM(currentStats.totalInputTokens)}M input · ${currentStats.totalInputTokens > 0 ? `${Math.round((currentStats.cachedInputTokens / currentStats.totalInputTokens) * 100)}%` : "N/A"}`);
3427
+ lines.push("── 今日 ──");
3428
+ lines.push(`请求数:${currentStats.hitRequests} 次命中 / ${currentStats.totalRequests} 次总计 · ${formatHitRatio(currentStats.hitRequests, currentStats.totalRequests)}`);
3429
+ lines.push(`缓存 tokens:${formatTokenM(currentStats.cachedInputTokens)}M / ${formatTokenM(currentStats.totalInputTokens)}M 输入 · ${currentStats.totalInputTokens > 0 ? `${Math.round((currentStats.cachedInputTokens / currentStats.totalInputTokens) * 100)}%` : "无数据"}`);
3459
3430
  if (currentStats.cacheWriteInputTokens > 0) {
3460
- lines.push(`Cache write: ${formatTokenM(currentStats.cacheWriteInputTokens)}M tok`);
3431
+ lines.push(`缓存写入:${formatTokenM(currentStats.cacheWriteInputTokens)}M tok`);
3461
3432
  }
3462
3433
 
3463
3434
  lines.push("");
3464
- lines.push("── Recent trend ──");
3435
+ lines.push("── 近期趋势 ──");
3465
3436
  lines.push(formatRecentTrendSummary(recentSamples, 10));
3466
3437
  lines.push(formatRecentTrendSummary(recentSamples, 30));
3467
3438
 
3468
- // Check if any sample has missingUsageFields flagged
3469
3439
  const missingAny = recentSamples.some((s) => s.missingUsageFields);
3470
3440
  if (missingAny) {
3471
3441
  lines.push("");
3472
- lines.push("⚠️ Some recent responses had missing or empty cache usage fields. Footer may under-report hits.");
3473
- lines.push(" The proxy may not return prompt_cache_hit_tokens or usage.input/cacheRead in responses.");
3442
+ lines.push("⚠️ 近期有响应缺少或返回了空的缓存 usage 字段,footer 命中率可能偏低。");
3443
+ lines.push(" 代理可能没有返回 prompt_cache_hit_tokens,或没有返回 usage.input/cacheRead 等字段。");
3474
3444
  }
3475
3445
 
3476
3446
  return lines.join("\n");
@@ -3911,19 +3881,14 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3911
3881
  const baseUrl = lower(model.baseUrl || "");
3912
3882
  const provider = lower(model.provider);
3913
3883
 
3914
- // Router/channel diagnostics only apply to OpenAI-compatible proxy APIs.
3915
- // Native APIs like mistral-conversations, azure-openai-responses,
3916
- // anthropic-messages, or bedrock-converse-stream are intentionally excluded.
3917
3884
  if (api === "azure-openai-responses" || isMistralConversationsApi(api) || !isOpenAICompatibleApi(api)) {
3918
3885
  return notes;
3919
3886
  }
3920
3887
 
3921
- // Official OpenAI bypass — no notes needed.
3922
3888
  if (isOfficialOpenAIBaseUrl(model)) {
3923
3889
  return notes;
3924
3890
  }
3925
3891
 
3926
- // ── 1. OpenRouter ────────────────────────────────────────────────
3927
3892
  if (
3928
3893
  baseUrl.includes("openrouter.ai") ||
3929
3894
  baseUrl.includes("openrouter") ||
@@ -3935,32 +3900,28 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3935
3900
  const hasOrder = !!routing?.order;
3936
3901
 
3937
3902
  notes.push(
3938
- "🔀 Router/channel: OpenRouter detected. OpenRouter is a multi-provider router; " +
3939
- "low cache hit rates are common when each turn lands on a different upstream provider.",
3903
+ "🔀 路由/渠道:检测到 OpenRouterOpenRouter 是多上游路由器;如果每一轮落到不同上游,缓存命中率偏低很常见。",
3940
3904
  );
3941
3905
 
3942
3906
  if (!hasOnly && !hasOrder) {
3943
3907
  notes.push(
3944
- " Suggestion: Add an openRouterRouting config to fix the upstream provider. " +
3945
- "Example for models.yml -> providers[\"<providerId>\"] -> compat:",
3908
+ ' 建议:添加 openRouterRouting,把上游固定住。位置:models.yml -> providers["<providerId>"] -> compat:',
3946
3909
  );
3947
3910
  notes.push(
3948
3911
  ` { "supportsLongPromptCacheRetention": true, ` +
3949
3912
  `"openRouterRouting": { "only": ["<provider-slug>"] } }`,
3950
3913
  );
3951
3914
  notes.push(
3952
- ' Replace <provider-slug> with the actual OpenRouter provider slug (e.g. "openai", "anthropic").',
3915
+ ' <provider-slug> 替换成真实的 OpenRouter provider slug(如 "openai""anthropic")。',
3953
3916
  );
3954
3917
  notes.push(
3955
- " Alternatively, use openRouterRouting.order: [\"<provider-slug>\", \"...\"] for fallback order. " +
3956
- "Only set supportsLongPromptCacheRetention if your upstream supports long cache retention.",
3918
+ ' 也可以用 openRouterRouting.order: ["<provider-slug>", "..."] 作为回退顺序。只有在上游支持长缓存保留时才设置 supportsLongPromptCacheRetention。',
3957
3919
  );
3958
3920
  }
3959
3921
 
3960
3922
  return notes;
3961
3923
  }
3962
3924
 
3963
- // ── 2. Vercel AI Gateway ─────────────────────────────────────────
3964
3925
  if (
3965
3926
  baseUrl.includes("ai-gateway.vercel.sh") ||
3966
3927
  provider.includes("vercel") ||
@@ -3972,81 +3933,54 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3972
3933
  const hasOrder = !!routing?.order;
3973
3934
 
3974
3935
  notes.push(
3975
- "🔀 Router/channel: Vercel AI Gateway detected. The gateway may route to different " +
3976
- "provider endpoints per request, reducing cache locality.",
3936
+ "🔀 路由/渠道:检测到 Vercel AI Gateway。这个网关可能把不同请求分发到不同 provider endpoint,降低缓存局部性。",
3977
3937
  );
3978
3938
 
3979
3939
  if (!hasOnly && !hasOrder) {
3980
3940
  notes.push(
3981
- " Suggestion: Add a vercelGatewayRouting config to fix the upstream. " +
3982
- "Example for models.yml -> providers[\"<providerId>\"] -> compat:",
3941
+ ' 建议:添加 vercelGatewayRouting,把上游固定住。位置:models.yml -> providers["<providerId>"] -> compat:',
3983
3942
  );
3984
3943
  notes.push(
3985
3944
  ` { "supportsLongPromptCacheRetention": true, ` +
3986
3945
  `"vercelGatewayRouting": { "only": ["<provider-id>"] } }`,
3987
3946
  );
3988
3947
  notes.push(
3989
- " Replace <provider-id> with the actual Vercel provider ID (e.g. \"openai\").",
3948
+ ' <provider-id> 替换成真实的 Vercel provider ID(如 "openai")。',
3990
3949
  );
3991
3950
  notes.push(
3992
- " Only set supportsLongPromptCacheRetention if your upstream supports it.",
3951
+ " 只有在上游支持长缓存保留时才设置 supportsLongPromptCacheRetention",
3993
3952
  );
3994
3953
  }
3995
3954
 
3996
3955
  return notes;
3997
3956
  }
3998
3957
 
3999
- // ── 3. LiteLLM / OneAPI / NewAPI / VoAPI (self-hosted aggregation) ──
4000
3958
  const aggregationPatterns = ["litellm", "oneapi", "one-api", "newapi", "new-api", "voapi", "vo-api"];
4001
3959
  if (
4002
3960
  aggregationPatterns.some((p) => baseUrl.includes(p)) ||
4003
3961
  aggregationPatterns.some((p) => provider.includes(p))
4004
3962
  ) {
4005
3963
  notes.push(
4006
- "🔀 Router/channel: Self-hosted aggregation proxy detected (LiteLLM / OneAPI / NewAPI / VoAPI). " +
4007
- "These proxies route to multiple upstream accounts or instances, which can split the cache.",
4008
- );
4009
- notes.push(
4010
- " Suggestions:",
4011
- );
4012
- notes.push(
4013
- " • Ensure the proxy can fix to a single upstream per session (session_id affinity).",
4014
- );
4015
- notes.push(
4016
- " • Forward prompt_cache_key and session-affinity headers to the upstream.",
4017
- );
4018
- notes.push(
4019
- " • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
4020
- );
4021
- notes.push(
4022
- ` Safe compat default: { "supportsLongPromptCacheRetention": true }`,
4023
- );
4024
- notes.push(
4025
- ` Add supportsLongPromptCacheRetention only if the proxy explicitly supports prompt_cache_retention.`,
3964
+ "🔀 路由/渠道:检测到自建聚合代理(LiteLLM / OneAPI / NewAPI / VoAPI)。这类代理常把请求分到多个上游账号或实例,导致缓存被拆散。",
4026
3965
  );
3966
+ notes.push(" 建议:");
3967
+ notes.push(" • 确保代理能按 session 固定到单一上游(session_id affinity)。");
3968
+ notes.push(" • 向上游透传 prompt_cache_key 与会话亲和性相关 header。");
3969
+ notes.push(" • 在响应里返回缓存 usage 字段(如 prompt_cache_hit_tokens)。");
3970
+ notes.push(` 可作为起点的 compat:{ "supportsLongPromptCacheRetention": true }`);
3971
+ notes.push(" 只有在代理明确支持 prompt_cache_retention 时才加 supportsLongPromptCacheRetention。");
4027
3972
 
4028
3973
  return notes;
4029
3974
  }
4030
3975
 
4031
- // ── 4. Generic third-party OpenAI-compatible proxy ─────────────────
4032
3976
  if (api === "openai-completions" && baseUrl) {
4033
3977
  const missing = describeMissingCacheCompatForModel(model);
4034
- notes.push(
4035
- "🔀 Router/channel: Third-party OpenAI-compatible proxy. If cache hit rates are low:",
4036
- );
4037
- notes.push(
4038
- " • Verify the proxy routes to the same upstream account/instance per session.",
4039
- );
4040
- notes.push(
4041
- " • Ensure the proxy forwards prompt_cache_key and sends session-affinity headers.",
4042
- );
4043
- notes.push(
4044
- " • Check that the proxy returns cache usage fields (prompt_cache_hit_tokens etc.).",
4045
- );
3978
+ notes.push("🔀 路由/渠道:第三方 OpenAI 兼容代理。如果缓存命中率偏低:");
3979
+ notes.push(" 确认代理会把同一 session 路由到同一个上游账号/实例。");
3980
+ notes.push(" • 确认代理会透传 prompt_cache_key,并发送会话亲和性相关 header。");
3981
+ notes.push(" • 确认代理会返回缓存 usage 字段(如 prompt_cache_hit_tokens)。");
4046
3982
  if (missing.length > 0) {
4047
- notes.push(
4048
- ` • The compat flags above (${missing.join(", ")}) are recommended for cache stability.`,
4049
- );
3983
+ notes.push(` • 上面这些 compat 字段(${missing.join(", ")})有助于提升缓存稳定性。`);
4050
3984
  }
4051
3985
 
4052
3986
  return notes;
@@ -4060,38 +3994,38 @@ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
4060
3994
 
4061
3995
  if (isMistralConversationsApi(api)) {
4062
3996
  return [
4063
- "ℹ️ Compat check not applicable for this model.",
4064
- " Native Mistral `mistral-conversations` uses provider-native transport; OpenAI-compatible proxy compat flags do not apply.",
3997
+ "ℹ️ 当前模型不适用 compat 检查。",
3998
+ " 原生 Mistral `mistral-conversations` 使用 provider 原生传输;OpenAI 兼容代理 compat 不适用。",
4065
3999
  ];
4066
4000
  }
4067
4001
 
4068
4002
  if (api === "azure-openai-responses") {
4069
4003
  return [
4070
- "ℹ️ Compat check not applicable for this model.",
4071
- " Native Azure OpenAI Responses uses the Responses transport; OpenAI-compatible proxy compat flags do not apply.",
4004
+ "ℹ️ 当前模型不适用 compat 检查。",
4005
+ " 原生 Azure OpenAI Responses 使用 Responses 传输;OpenAI 兼容代理 compat 不适用。",
4072
4006
  ];
4073
4007
  }
4074
4008
 
4075
4009
  if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
4076
4010
  return [
4077
- "ℹ️ Compat check not applicable for this model.",
4078
- " Native Responses transports already use core runtime request handling; OpenAI-compatible proxy compat flags do not apply.",
4011
+ "ℹ️ 当前模型不适用 compat 检查。",
4012
+ " 原生 Responses 传输已经使用运行时核心请求链路;OpenAI 兼容代理 compat 不适用。",
4079
4013
  ];
4080
4014
  }
4081
4015
 
4082
- return ["ℹ️ Compat check not applicable for this model."];
4016
+ return ["ℹ️ 当前模型不适用 compat 检查。"];
4083
4017
  }
4084
4018
 
4085
4019
  function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400?: boolean } = {}): string {
4086
4020
  const lines: string[] = [];
4087
- lines.push(`Provider: ${model.provider}`);
4088
- lines.push(`Model: ${model.id}`);
4089
- if (model.name && model.name !== model.id) lines.push(`Name: ${model.name}`);
4090
- lines.push(`API: ${model.api}`);
4091
- lines.push(`Base URL: ${model.baseUrl || "(default)"}`);
4021
+ lines.push(`提供方:${model.provider}`);
4022
+ lines.push(`模型: ${model.id}`);
4023
+ if (model.name && model.name !== model.id) lines.push(`名称: ${model.name}`);
4024
+ lines.push(`API ${model.api}`);
4025
+ lines.push(`Base URL ${model.baseUrl || "(默认)"}`);
4092
4026
 
4093
4027
  const compat = getCompat(model);
4094
- lines.push(`Compat: ${JSON.stringify(compat)}`);
4028
+ lines.push(`Compat ${JSON.stringify(compat)}`);
4095
4029
 
4096
4030
  const adaptiveThinkingApplicable = isAdaptiveThinkingCompatApplicable(model);
4097
4031
  const deepSeekCompatApplicable = isDeepSeekCompatCheckApplicable(model);
@@ -4104,10 +4038,10 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4104
4038
  const advisoryMissing = missing.filter(m => !safeFixableMissing.includes(m));
4105
4039
 
4106
4040
  if (safeFixableMissing.length > 0) {
4107
- lines.push(`⚠️ Missing compat flags: ${safeFixableMissing.join(", ")}`);
4041
+ lines.push(`⚠️ 缺少 compat 字段:${safeFixableMissing.join(", ")}`);
4108
4042
  }
4109
4043
  if (advisoryMissing.length > 0) {
4110
- lines.push(`ℹ️ Optional: ${advisoryMissing.join(", ")} (enable only if needed)`);
4044
+ lines.push(`ℹ️ 可选项:${advisoryMissing.join(", ")}(仅在确认支持时启用)`);
4111
4045
  }
4112
4046
 
4113
4047
  if (missing.length > 0) {
@@ -4115,7 +4049,7 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4115
4049
  const slashIdx = key.indexOf("/");
4116
4050
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
4117
4051
  const modelsJsonPath = getModelsJsonDisplayPath();
4118
- lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
4052
+ lines.push(`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`);
4119
4053
  if (adaptiveThinkingApplicable) {
4120
4054
  appendAdaptiveThinkingCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
4121
4055
  } else if (deepSeekCompatApplicable) {
@@ -4125,7 +4059,7 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4125
4059
  appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
4126
4060
  }
4127
4061
  } else if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
4128
- lines.push("✅ Compat fully configured.");
4062
+ lines.push("✅ compat 配置完整。");
4129
4063
  appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
4130
4064
  } else {
4131
4065
  lines.push(...getCompatCheckNotApplicableLines(model));
@@ -4134,14 +4068,13 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4134
4068
  if (isPromptCacheRetention400Applicable(model)) {
4135
4069
  lines.push("");
4136
4070
  if (options.promptCacheRetention400) {
4137
- lines.push("⚠️ A 400 response was observed while supportsLongPromptCacheRetention is enabled.");
4071
+ lines.push("⚠️ 在启用 supportsLongPromptCacheRetention 时观测到一次 400 响应。");
4138
4072
  lines.push(` ${getPromptCacheRetentionUnsupportedHint()}`);
4139
4073
  } else {
4140
- lines.push(`ℹ️ Long retention is enabled. ${getPromptCacheRetentionUnsupportedHint()}`);
4074
+ lines.push(`ℹ️ 已启用长缓存保留。${getPromptCacheRetentionUnsupportedHint()}`);
4141
4075
  }
4142
4076
  }
4143
4077
 
4144
- // ── Router/channel diagnostics ──
4145
4078
  const routerNotes = describeRouterChannelDiagnostics(model);
4146
4079
  if (routerNotes.length > 0) {
4147
4080
  lines.push("");
@@ -4150,31 +4083,24 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4150
4083
  }
4151
4084
  }
4152
4085
 
4153
- // ── Integrity diagnostics ──
4154
4086
  if (lastPromptIntegrityWarningAt > 0) {
4155
4087
  const ago = Date.now() - lastPromptIntegrityWarningAt;
4156
4088
  const mins = Math.floor(ago / 60000);
4157
4089
  if (mins < 5) {
4158
4090
  lines.push("");
4159
- lines.push("⚠️ Recent prompt integrity issue detected:");
4160
- lines.push(` Last detected ${mins > 0 ? `${mins} min` : `${Math.floor(ago / 1000)}s`} ago. The prompt reorder was`);
4161
- lines.push(` skipped on that turn to preserve structural markers.`);
4162
- lines.push(` Common causes: extension system prompt format change, substring collision.`);
4163
- lines.push(` Steps:`);
4164
- lines.push(` 1. Run /reload to reset (may clear transient issues).`);
4165
- lines.push(` 2. Set PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 & /reload to disable reorder.`);
4166
- lines.push(` 3. If persistent, file an issue with this doctor output.`);
4091
+ lines.push("⚠️ 最近检测到 prompt 完整性问题:");
4092
+ lines.push(` 最近一次检测于 ${mins > 0 ? `${mins} 分钟` : `${Math.floor(ago / 1000)} 秒`}前;该轮已跳过 prompt 重排以保留结构标记。`);
4093
+ lines.push(" 常见原因:扩展的 system prompt 格式变化,或子串碰撞。");
4094
+ lines.push(" 建议步骤:");
4095
+ lines.push(" 1. 运行 /reload 重置(可清除瞬态问题)。");
4096
+ lines.push(" 2. 设置 PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 后 /reload,禁用重排。");
4097
+ lines.push(" 3. 若持续复现,请带 doctor 输出提 issue。");
4167
4098
  }
4168
4099
  }
4169
4100
 
4170
4101
  return lines.join("\n");
4171
4102
  }
4172
4103
 
4173
- /**
4174
- * Build a "Cache diagnosis" section for low-hit causes, appended to doctor output.
4175
- * This is a separate function because it depends on per-session state (recent samples,
4176
- * per-model stats) that is not available at the module level.
4177
- */
4178
4104
  function buildLowHitDiagnosis(
4179
4105
  model: PiModel,
4180
4106
  adapter: CacheProviderAdapter | undefined,
@@ -4183,101 +4109,74 @@ function buildLowHitDiagnosis(
4183
4109
  ): string[] {
4184
4110
  const lines: string[] = [];
4185
4111
 
4186
- // 1. Missing compat flags (adapter-aware: DeepSeek has extra reasoning compat)
4187
4112
  const fixSugLHD = buildFixSuggestion(model);
4188
4113
  const safeFixableMissingLHD = fixSugLHD ? Object.keys(fixSugLHD.compatKeys) : [];
4189
-
4190
- // 2. Router/channel risk (reuse existing check)
4191
4114
  const routerNotes = describeRouterChannelDiagnostics(model);
4192
-
4193
- // 3. Recent samples missing usage fields
4194
4115
  const missingUsageSamples = samples.filter((s) => s.missingUsageFields).length;
4195
-
4196
- // 4. Recent trend analysis
4197
4116
  const recent10 = samples.slice(-10);
4198
4117
  const recent10Hits = recent10.filter((s) => s.hit).length;
4199
4118
  const recent10Total = recent10.length;
4200
4119
  const recent10Cached = recent10.reduce((sum, s) => sum + s.cachedInputTokens, 0);
4201
4120
  const recent10Input = recent10.reduce((sum, s) => sum + s.totalInputTokens, 0);
4202
-
4203
- // 5. Today's overall trend from persisted stats
4204
4121
  const todayStats = stats ?? emptyCacheStats();
4205
4122
 
4206
4123
  const hasMissingCompat = safeFixableMissingLHD.length > 0;
4207
4124
  const hasRouterRisk = routerNotes.length > 0;
4208
4125
  const hasUsageMissing = missingUsageSamples > 0;
4209
-
4210
- // Today's cached-token ratio is used both inside and outside the recent-sample
4211
- // branch. Keep it block-external so doctor/stats never throw for low-hit
4212
- // models that have persisted counters but no recent in-memory samples.
4213
4126
  const todayHitRatio = todayStats.totalInputTokens > 0
4214
4127
  ? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
4215
4128
  : 0;
4216
4129
 
4217
- // Determine if there are actual issues worth flagging
4218
4130
  const hasActualIssues = hasMissingCompat || hasUsageMissing ||
4219
- // Low hit trend (today total > 3 and hit ratio < 30%)
4220
4131
  (todayStats.totalRequests > 3 && todayStats.totalInputTokens > 0 &&
4221
4132
  (todayStats.cachedInputTokens / todayStats.totalInputTokens) < 0.3) ||
4222
- // Low hit rate in recent samples (recent10Total >= 3 and all misses)
4223
4133
  (recent10Total >= 3 && recent10Hits === 0);
4224
4134
 
4225
- // Skip section if no issues
4226
4135
  if (!hasActualIssues && !(hasRouterRisk && (hasMissingCompat || hasUsageMissing))) {
4227
4136
  return lines;
4228
4137
  }
4229
4138
 
4230
4139
  lines.push("");
4231
- lines.push("── Cache diagnosis ──");
4140
+ lines.push("── 缓存诊断 ──");
4232
4141
 
4233
- // Priority 1: missing compat flags
4234
4142
  if (hasMissingCompat) {
4235
- lines.push(`⚠️ Missing compat flags: ${safeFixableMissingLHD.join(", ")}`);
4236
- lines.push(" These flags enable prompt caching and session-affinity routing.");
4237
- lines.push(" Run /cache-optimizer compat for edit instructions.");
4143
+ lines.push(`⚠️ 缺少 compat 字段:${safeFixableMissingLHD.join(", ")}`);
4144
+ lines.push(" 这些字段有助于稳定 prompt 缓存与上游路由粘性。");
4145
+ lines.push(" 可运行 /cache-optimizer compat 查看编辑建议。");
4238
4146
  }
4239
4147
 
4240
- // Priority 2: router/channel risk (only flag when there are other issues)
4241
- // Router notes are already shown in the main doctor output, so we only
4242
- // mention them in the diagnosis section when they compound a problem.
4243
4148
  if (hasRouterRisk && (hasMissingCompat || hasUsageMissing || hasActualIssues)) {
4244
- lines.push("🔀 Router/channel proxy detected — see routing notes above.");
4149
+ lines.push("🔀 检测到路由/代理风险 —— 详见上方路由诊断。");
4245
4150
  }
4246
4151
 
4247
- // Priority 3: usage fields missing
4248
4152
  if (hasUsageMissing) {
4249
- lines.push(`⚠️ ${missingUsageSamples}/${samples.length} recent responses had missing/empty usage fields.`);
4250
- lines.push(" Footer may under-report cache hit rate.");
4251
- lines.push(" Verify the proxy returns prompt-level usage (prompt_tokens, input_tokens_details).");
4153
+ lines.push(`⚠️ 最近 ${samples.length} 条样本里有 ${missingUsageSamples} 条缺少或返回了空的 usage 字段。`);
4154
+ lines.push(" Footer 命中率可能会被低估。");
4155
+ lines.push(" 请确认代理会返回 prompt usage(如 prompt_tokensinput_tokens_details)。");
4252
4156
  }
4253
4157
 
4254
- // Priority 4: recent trend low
4255
4158
  if (recent10Total > 0) {
4256
- const hitRatio = recent10Input > 0 ? Math.round((recent10Cached / recent10Input) * 100) : 0;
4257
4159
  if (recent10Hits === 0 && todayStats.totalRequests > 3 && todayHitRatio < 30) {
4258
- lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${recent10Total} recent samples).`);
4259
- lines.push(" Likely causes: proxy routing to different backends per request,");
4260
- lines.push(" or prompt prefix changes across turns.");
4261
- lines.push(" Verify upstream routing stickiness and supportsLongPromptCacheRetention compat.");
4160
+ lines.push(`📉 今日缓存命中率偏低:${todayHitRatio}%(最近 ${recent10Total} 条样本)。`);
4161
+ lines.push(" 常见原因:代理把请求路由到不同后端,或 prompt 前缀在各轮之间变化。");
4162
+ lines.push(" 请检查上游路由粘性,以及 supportsLongPromptCacheRetention 配置是否正确。");
4262
4163
  } else if (todayHitRatio < 30 && todayStats.totalRequests > 3) {
4263
- lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${todayStats.totalRequests} total requests).`);
4264
- lines.push(" Check compat flags and proxy upstream routing.");
4164
+ lines.push(`📉 今日缓存命中率偏低:${todayHitRatio}%(共 ${todayStats.totalRequests} 次请求)。`);
4165
+ lines.push(" 请检查 compat 配置与代理上游路由。");
4265
4166
  }
4266
4167
 
4267
- // Show brief trend summary if there are enough samples
4268
4168
  if (recent10Total >= 3) {
4269
4169
  const trend = formatRecentTrendSummary(samples, 10);
4270
4170
  lines.push(`📊 ${trend}`);
4271
4171
  }
4272
4172
  }
4273
4173
 
4274
- // For fully configured but low hit models, emphasize sticky routing
4275
4174
  if (!hasMissingCompat && !hasRouterRisk && todayStats.totalRequests > 3 && todayHitRatio < 30) {
4276
- lines.push("💡 Compat is configured but cache hit rate remains low.");
4277
- lines.push(" Possible causes:");
4278
- lines.push(" • Proxy still routes to multiple backends — check session affinity on the proxy side.");
4279
- lines.push(" • Prompt prefix varies per turn — check dynamic context in system prompt.");
4280
- lines.push(" • Provider does not return cache usage fields footer can't measure hits.");
4175
+ lines.push("💡 compat 已配置完整,但缓存命中率仍然偏低。");
4176
+ lines.push(" 可能原因:");
4177
+ lines.push(" • 代理仍把请求分发到多个后端 —— 请检查代理侧的会话粘性。");
4178
+ lines.push(" • prompt 前缀每轮都在变化 —— 请检查 system prompt 中的动态上下文。");
4179
+ lines.push(" • provider 没有返回缓存 usage 字段 —— footer 无法准确测量命中。");
4281
4180
  }
4282
4181
 
4283
4182
  return lines;
@@ -4304,16 +4203,16 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
4304
4203
  const slashIdx = key.indexOf("/");
4305
4204
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
4306
4205
  const modelsJsonPath = getModelsJsonDisplayPath();
4307
- lines.push(`Active model: ${key}`);
4206
+ lines.push(`当前模型:${key}`);
4308
4207
  if (safeFixableMissingC.length > 0) {
4309
- lines.push(`Safe-fixable: ${safeFixableMissingC.join(", ")}`);
4208
+ lines.push(`可安全修复:${safeFixableMissingC.join(", ")}`);
4310
4209
  }
4311
4210
  if (advisoryMissingC.length > 0) {
4312
- lines.push(`Optional: ${advisoryMissingC.join(", ")} (enable only if needed)`);
4211
+ lines.push(`可选项:${advisoryMissingC.join(", ")}(仅在确认支持时启用)`);
4313
4212
  }
4314
4213
  lines.push("");
4315
- lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
4316
- lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
4214
+ lines.push(`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
4215
+ lines.push("(与 baseUrl/api/apiKey/models 同级)。");
4317
4216
  if (adaptiveThinkingApplicable) {
4318
4217
  appendAdaptiveThinkingCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
4319
4218
  } else if (deepSeekCompatApplicable) {
@@ -4324,10 +4223,9 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
4324
4223
  }
4325
4224
  }
4326
4225
 
4327
- // When compat is fully configured but router/optional notes exist, prefix the status.
4328
4226
  if ((routerNotes.length > 0 || optionalOpenAIProxyCompat.length > 0) && missing.length === 0) {
4329
4227
  if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
4330
- lines.push("✅ Compat fully configured.");
4228
+ lines.push("✅ compat 配置完整。");
4331
4229
  if (isPromptCacheRetention400Applicable(model)) {
4332
4230
  lines.push(getPromptCacheRetentionUnsupportedHint());
4333
4231
  }
@@ -5848,7 +5746,7 @@ export default function (pi: ExtensionAPI) {
5848
5746
  const statsText = formatCacheStats(realEntry.adapter, realEntry.stats);
5849
5747
  statusText = runtimeOptimizerEnabled
5850
5748
  ? statsText
5851
- : `Cache Optimizer disabled · ${statsText}`;
5749
+ : `缓存优化已关闭 · ${statsText}`;
5852
5750
  }
5853
5751
  }
5854
5752
 
@@ -5859,7 +5757,7 @@ export default function (pi: ExtensionAPI) {
5859
5757
  const sk = displayModel ? sessionModelKey(displayModel) : undefined;
5860
5758
  const stats = sk ? cacheStatsByModel[sk] : undefined;
5861
5759
  const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
5862
- statusText = runtimeOptimizerEnabled ? statsText : `Cache Optimizer disabled · ${statsText}`;
5760
+ statusText = runtimeOptimizerEnabled ? statsText : `缓存优化已关闭 · ${statsText}`;
5863
5761
  }
5864
5762
 
5865
5763
  // If optimizeSystemPrompt detected structural truncation on this or
@@ -5867,7 +5765,7 @@ export default function (pi: ExtensionAPI) {
5867
5765
  // /reload before continuing. The flag resets after emission so a
5868
5766
  // single-turn glitch does not permanently taint the footer.
5869
5767
  if (promptTruncationDetected && statusText !== undefined) {
5870
- statusText = statusText + " ⚠️ integrity";
5768
+ statusText = statusText + " ⚠️ 完整性";
5871
5769
  promptTruncationDetected = false;
5872
5770
  lastPromptIntegrityWarningAt = Date.now();
5873
5771
 
@@ -5875,12 +5773,12 @@ export default function (pi: ExtensionAPI) {
5875
5773
  if (!integrityNotificationShown) {
5876
5774
  integrityNotificationShown = true;
5877
5775
  ctx.ui.notify(
5878
- `⚠️ ${LOG_PREFIX}: A prompt structural marker was lost during reorder on this turn. ` +
5879
- `The original prompt was used instead to preserve integrity.\n\n` +
5880
- `Recovery steps:\n` +
5881
- `1. Run /reload to reset (may clear transient issues).\n` +
5882
- `2. Set PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 and /reload to disable reorder.\n` +
5883
- `3. If persistent, run /cache-optimizer doctor and file an issue (no API keys/prompts).`,
5776
+ `⚠️ ${LOG_PREFIX}:本轮重排导致一个 prompt 结构标记丢失。` +
5777
+ `为保证完整性,已回退到原始 prompt。\n\n` +
5778
+ `恢复步骤:\n` +
5779
+ `1. 运行 /reload 重置(可清除瞬态问题)。\n` +
5780
+ `2. 设置 PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 /reload,禁用重排。\n` +
5781
+ `3. 若持续复现,请运行 /cache-optimizer doctor 并提 issue(不要包含 API key / prompt)。`,
5884
5782
  "warning",
5885
5783
  );
5886
5784
  }
@@ -5899,7 +5797,7 @@ export default function (pi: ExtensionAPI) {
5899
5797
  // OpenAI-compatible proxies) do NOT trigger the marker — the doctor/compat
5900
5798
  // commands still mention them as optional guidance.
5901
5799
  if (buildFixSuggestion(displayModel) !== undefined) {
5902
- statusText = statusText + " ⚠️ compat";
5800
+ statusText = statusText + " ⚠️ 配置";
5903
5801
  }
5904
5802
  }
5905
5803
 
@@ -6049,9 +5947,9 @@ export default function (pi: ExtensionAPI) {
6049
5947
  if (warnedPromptCacheRetention400Models.has(key)) return;
6050
5948
  warnedPromptCacheRetention400Models.add(key);
6051
5949
  ctx.ui.notify(
6052
- `⚠️ ${LOG_PREFIX}: ${key} returned HTTP 400 while supportsLongPromptCacheRetention is enabled. ` +
5950
+ `⚠️ ${LOG_PREFIX}:${key} 在启用 supportsLongPromptCacheRetention 时返回了 HTTP 400。` +
6053
5951
  getPromptCacheRetentionUnsupportedHint() +
6054
- ` Run /cache-optimizer doctor for the exact edit location.`,
5952
+ ` 可运行 /cache-optimizer doctor 查看精确编辑位置。`,
6055
5953
  "warning",
6056
5954
  );
6057
5955
  });
@@ -6140,16 +6038,16 @@ export default function (pi: ExtensionAPI) {
6140
6038
  resetCurrentSessionStats();
6141
6039
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6142
6040
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6143
- cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6041
+ cmdCtx.ui.notify(`✅ 已为当前 OMP 进程开启缓存优化。已重置当前 session 统计,方便做前后对比。\n${formatOptimizerRuntimeMode()}`, "info");
6144
6042
  } else if (subcommand === "disable") {
6145
6043
  setRuntimeOptimizerEnabled(false);
6146
6044
  resetCurrentSessionStats();
6147
6045
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6148
6046
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6149
- cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6047
+ cmdCtx.ui.notify(`⏸️ 已为当前 OMP 进程关闭缓存优化。已重置当前 session 统计,并会在关闭状态下继续采集用于对比。\n${formatOptimizerRuntimeMode()}`, "warning");
6150
6048
  } else if (subcommand === "doctor") {
6151
6049
  if (!model) {
6152
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6050
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6153
6051
  return;
6154
6052
  }
6155
6053
  const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
@@ -6164,7 +6062,7 @@ export default function (pi: ExtensionAPI) {
6164
6062
  cmdCtx.ui.notify(fullDiagnosis, "info");
6165
6063
  } else if (subcommand === "stats") {
6166
6064
  if (!model) {
6167
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6065
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6168
6066
  return;
6169
6067
  }
6170
6068
  const adapter = selectAdapterForModel(model);
@@ -6175,7 +6073,7 @@ export default function (pi: ExtensionAPI) {
6175
6073
  cmdCtx.ui.notify(output, "info");
6176
6074
  } else if (subcommand === "compat") {
6177
6075
  if (!model) {
6178
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6076
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6179
6077
  return;
6180
6078
  }
6181
6079
  const compatResult = buildCompatDiagnosis(model);
@@ -6184,19 +6082,19 @@ export default function (pi: ExtensionAPI) {
6184
6082
  } else {
6185
6083
  cmdCtx.ui.notify(
6186
6084
  isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
6187
- ? "✅ Compat fully configured."
6085
+ ? "✅ compat 配置完整。"
6188
6086
  : getCompatCheckNotApplicableLines(model).join("\n"),
6189
6087
  "info",
6190
6088
  );
6191
6089
  }
6192
6090
  } else if (subcommand === "reset") {
6193
6091
  if (!model) {
6194
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6092
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6195
6093
  return;
6196
6094
  }
6197
6095
  const adapter = selectAdapterForModel(model);
6198
6096
  if (!adapter) {
6199
- cmdCtx.ui.notify("ℹ️ Active model does not match a cache adapter. No stats to reset.", "info");
6097
+ cmdCtx.ui.notify("ℹ️ 当前活动模型未匹配到缓存适配器,无需重置统计。", "info");
6200
6098
  return;
6201
6099
  }
6202
6100
 
@@ -6214,21 +6112,21 @@ export default function (pi: ExtensionAPI) {
6214
6112
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6215
6113
 
6216
6114
  cmdCtx.ui.notify(
6217
- `✅ Reset local session cache stats for "${displayKey}". ` +
6218
- "Upstream provider prompt cache was not modified. " +
6219
- "New requests will start a fresh stats bucket for this OMP session.",
6115
+ `✅ 已重置 "${displayKey}" 的本地 session 缓存统计。` +
6116
+ "上游 provider prompt cache 未被修改。" +
6117
+ "后续请求会为当前 OMP session 开始新的统计桶。",
6220
6118
  "info",
6221
6119
  );
6222
6120
  } else if (subcommand === "fix") {
6223
6121
  if (!model) {
6224
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6122
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6225
6123
  return;
6226
6124
  }
6227
6125
 
6228
6126
  const suggestion = buildFixSuggestion(model);
6229
6127
  if (!suggestion) {
6230
6128
  const key = modelKey(model);
6231
- cmdCtx.ui.notify(`✅ Nothing to fix for "${key}". Compat already configured.`, "info");
6129
+ cmdCtx.ui.notify(`✅ "${key}" 当前无需修复,compat 已配置完成。`, "info");
6232
6130
  return;
6233
6131
  }
6234
6132
 
@@ -6239,14 +6137,14 @@ export default function (pi: ExtensionAPI) {
6239
6137
  const compatResult = buildCompatDiagnosis(model);
6240
6138
  const yamlSnippet = formatCompatKeysForInsertion(suggestion.compatKeys);
6241
6139
  cmdCtx.ui.notify(
6242
- `📝 Manual fix for ${getModelsJsonDisplayPath()}:\n\n` +
6243
- `Provider: ${suggestion.providerLabel}\n` +
6244
- `Model: ${suggestion.modelId}\n\n` +
6245
- `Add these compat keys (model level, under the model entry):\n\n` +
6140
+ `📝 ${getModelsJsonDisplayPath()} 的手动修复建议:\n\n` +
6141
+ `提供方:${suggestion.providerLabel}\n` +
6142
+ `模型:${suggestion.modelId}\n\n` +
6143
+ `在模型级 compat(模型条目下)添加这些键:\n\n` +
6246
6144
  `compat:\n${yamlSnippet}\n\n` +
6247
- `Or at provider level (under providers["${suggestion.providerLabel}"]):\n\n` +
6145
+ `或放到 provider 级(providers["${suggestion.providerLabel}"] 下):\n\n` +
6248
6146
  `compat:\n${yamlSnippet}\n\n` +
6249
- `After editing, run /reload.\n` +
6147
+ `编辑后运行 /reload。\n` +
6250
6148
  (compatResult ? `\n${compatResult}` : ""),
6251
6149
  "info",
6252
6150
  );
@@ -6254,31 +6152,31 @@ export default function (pi: ExtensionAPI) {
6254
6152
  // Try interactive selection menu when UI supports it
6255
6153
  if (cmdCtx.hasUI) {
6256
6154
  const menuOptions = [
6257
- "Enable Turn on runtime optimizations",
6258
- "Disable Turn off runtime optimizations",
6259
- "Doctor Show cache configuration",
6260
- "Stats Show cache stats and trend",
6261
- "Compat Show compat suggestion",
6262
- "Fix Auto-fix compat issues (writes models.yml)",
6263
- "Reset Reset local session stats",
6264
- "Cancel",
6155
+ "启用 —— 打开运行时优化",
6156
+ "关闭 —— 关闭运行时优化",
6157
+ "诊断 —— 查看缓存配置",
6158
+ "统计 —— 查看缓存统计与趋势",
6159
+ "兼容 —— 查看 compat 建议",
6160
+ "修复 —— 查看 compat 修复建议(会写 models.yml 时另行提示)",
6161
+ "重置 —— 重置本地 session 统计",
6162
+ "取消",
6265
6163
  ];
6266
- const choice = await cmdCtx.ui.select("Cache Optimizer", menuOptions);
6164
+ const choice = await cmdCtx.ui.select("缓存优化器", menuOptions);
6267
6165
  if (choice === menuOptions[0]) {
6268
6166
  setRuntimeOptimizerEnabled(true);
6269
6167
  resetCurrentSessionStats();
6270
6168
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6271
6169
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6272
- cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6170
+ cmdCtx.ui.notify(`✅ 已为当前 OMP 进程开启缓存优化。已重置当前 session 统计,方便做前后对比。\n${formatOptimizerRuntimeMode()}`, "info");
6273
6171
  } else if (choice === menuOptions[1]) {
6274
6172
  setRuntimeOptimizerEnabled(false);
6275
6173
  resetCurrentSessionStats();
6276
6174
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6277
6175
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6278
- cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6176
+ cmdCtx.ui.notify(`⏸️ 已为当前 OMP 进程关闭缓存优化。已重置当前 session 统计,并会在关闭状态下继续采集用于对比。\n${formatOptimizerRuntimeMode()}`, "warning");
6279
6177
  } else if (choice === menuOptions[2]) {
6280
6178
  if (!model) {
6281
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6179
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6282
6180
  } else {
6283
6181
  const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
6284
6182
  const adapter = selectAdapterForModel(model);
@@ -6293,7 +6191,7 @@ export default function (pi: ExtensionAPI) {
6293
6191
  }
6294
6192
  } else if (choice === menuOptions[3]) {
6295
6193
  if (!model) {
6296
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6194
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6297
6195
  } else {
6298
6196
  const adapter = selectAdapterForModel(model);
6299
6197
  const sk = model ? sessionModelKey(model) : undefined;
@@ -6304,7 +6202,7 @@ export default function (pi: ExtensionAPI) {
6304
6202
  }
6305
6203
  } else if (choice === menuOptions[4]) {
6306
6204
  if (!model) {
6307
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6205
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6308
6206
  } else {
6309
6207
  const compatResult = buildCompatDiagnosis(model);
6310
6208
  if (compatResult) {
@@ -6312,7 +6210,7 @@ export default function (pi: ExtensionAPI) {
6312
6210
  } else {
6313
6211
  cmdCtx.ui.notify(
6314
6212
  isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
6315
- ? "✅ Compat fully configured."
6213
+ ? "✅ compat 配置完整。"
6316
6214
  : getCompatCheckNotApplicableLines(model).join("\n"),
6317
6215
  "info",
6318
6216
  );
@@ -6321,13 +6219,13 @@ export default function (pi: ExtensionAPI) {
6321
6219
  } else if (choice === menuOptions[5]) {
6322
6220
  // Fix — auto-fix compat issues
6323
6221
  if (!model) {
6324
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6222
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6325
6223
  return;
6326
6224
  }
6327
6225
  const suggestion = buildFixSuggestion(model);
6328
6226
  if (!suggestion) {
6329
6227
  const key = modelKey(model);
6330
- cmdCtx.ui.notify(`✅ Nothing to fix for "${key}". Compat already configured.`, "info");
6228
+ cmdCtx.ui.notify(`✅ "${key}" 当前无需修复,compat 已配置完成。`, "info");
6331
6229
  return;
6332
6230
  }
6333
6231
 
@@ -6335,30 +6233,30 @@ export default function (pi: ExtensionAPI) {
6335
6233
  const compatResult = buildCompatDiagnosis(model);
6336
6234
  const yamlSnippet = formatCompatKeysForInsertion(suggestion.compatKeys);
6337
6235
  cmdCtx.ui.notify(
6338
- `📝 Manual fix for ${getModelsJsonDisplayPath()}:\n\n` +
6339
- `Provider: ${suggestion.providerLabel}\n` +
6340
- `Model: ${suggestion.modelId}\n\n` +
6341
- `Add these compat keys:\n\n` +
6236
+ `📝 ${getModelsJsonDisplayPath()} 的手动修复建议:\n\n` +
6237
+ `提供方:${suggestion.providerLabel}\n` +
6238
+ `模型:${suggestion.modelId}\n\n` +
6239
+ `添加这些 compat 键:\n\n` +
6342
6240
  `compat:\n${yamlSnippet}\n\n` +
6343
- `After editing, run /reload.\n` +
6241
+ `编辑后运行 /reload。\n` +
6344
6242
  (compatResult ? `\n${compatResult}` : ""),
6345
6243
  "info",
6346
6244
  );
6347
6245
  } else if (choice === menuOptions[6]) {
6348
6246
  if (!model) {
6349
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6247
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6350
6248
  } else {
6351
6249
  const adapter = selectAdapterForModel(model);
6352
6250
  if (!adapter) {
6353
- cmdCtx.ui.notify("ℹ️ Active model does not match a cache adapter. No stats to reset.", "info");
6251
+ cmdCtx.ui.notify("ℹ️ 当前活动模型未匹配到缓存适配器,无需重置统计。", "info");
6354
6252
  } else {
6355
6253
  const displayKey = modelKey(model);
6356
6254
  resetStatsForModel(model);
6357
6255
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6358
6256
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6359
6257
  cmdCtx.ui.notify(
6360
- `✅ Reset local session cache stats for "${displayKey}". ` +
6361
- "Upstream provider prompt cache was not modified.",
6258
+ `✅ 已重置 "${displayKey}" 的本地 session 缓存统计。` +
6259
+ "上游 provider prompt cache 未被修改。",
6362
6260
  "info",
6363
6261
  );
6364
6262
  }
@@ -6370,14 +6268,14 @@ export default function (pi: ExtensionAPI) {
6370
6268
 
6371
6269
  // Fallback: text help when no interactive UI
6372
6270
  const diagnosis: string[] = [];
6373
- diagnosis.push("📋 /cache-optimizer commands:");
6374
- diagnosis.push(" enable Enable prompt/cache optimizations for this OMP process");
6375
- diagnosis.push(" disable Disable prompt/cache optimizations for this OMP process");
6376
- diagnosis.push(" doctor Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
6377
- diagnosis.push(" stats Show active model stats bucket and recent trend");
6378
- diagnosis.push(" compat Show compat suggestion with edit location");
6379
- diagnosis.push(" fix Auto-fix compat issues (writes models.yml, requires UI)");
6380
- diagnosis.push(" reset Reset local session stats for current model (does not affect upstream)");
6271
+ diagnosis.push("📋 /cache-optimizer 命令:");
6272
+ diagnosis.push(" enable —— 为当前 OMP 进程开启 prompt/cache 优化");
6273
+ diagnosis.push(" disable —— 为当前 OMP 进程关闭 prompt/cache 优化");
6274
+ diagnosis.push(" doctor —— 查看当前模型/provider/api/baseUrl/compat 与低命中诊断");
6275
+ diagnosis.push(" stats —— 查看当前活动模型的统计桶与近期趋势");
6276
+ diagnosis.push(" compat —— 查看 compat 建议与编辑位置");
6277
+ diagnosis.push(" fix —— 查看 compat 修复建议(需要 UI 时另有提示)");
6278
+ diagnosis.push(" reset —— 重置当前模型的本地 session 统计(不影响上游)");
6381
6279
  diagnosis.push("");
6382
6280
  diagnosis.push(formatOptimizerRuntimeMode());
6383
6281
  diagnosis.push("");
@@ -6385,17 +6283,17 @@ export default function (pi: ExtensionAPI) {
6385
6283
  const displayKey = modelKey(model);
6386
6284
  const missing = describeMissingCacheCompatForModel(model);
6387
6285
  if (missing.length > 0) {
6388
- diagnosis.push(`⚠️ Active model "${displayKey}" missing compat: ${missing.join(", ")}`);
6389
- diagnosis.push('Run "/cache-optimizer compat" for edit instructions.');
6286
+ diagnosis.push(`⚠️ 当前模型 "${displayKey}" 缺少 compat:${missing.join(", ")}`);
6287
+ diagnosis.push('可运行 "/cache-optimizer compat" 查看编辑建议。');
6390
6288
  } else if (isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)) {
6391
- diagnosis.push(`✅ Active model "${displayKey}": compat fully configured.`);
6289
+ diagnosis.push(`✅ 当前模型 "${displayKey}"compat 配置完整。`);
6392
6290
  } else {
6393
- diagnosis.push(`ℹ️ Active model "${displayKey}": compat check not applicable.`);
6291
+ diagnosis.push(`ℹ️ 当前模型 "${displayKey}":不适用 compat 检查。`);
6394
6292
  const detailLines = getCompatCheckNotApplicableLines(model).slice(1);
6395
6293
  for (const line of detailLines) diagnosis.push(line);
6396
6294
  }
6397
6295
  } else {
6398
- diagnosis.push("No active model selected.");
6296
+ diagnosis.push("当前没有活动模型。");
6399
6297
  }
6400
6298
  cmdCtx.ui.notify(diagnosis.join("\n"), "info");
6401
6299
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omp-cache-optimizer",
3
- "version": "1.0.2",
3
+ "version": "1.0.3",
4
4
  "description": "Improve OMP prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "omp-package",