omp-cache-optimizer 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +6 -6
  2. package/index.ts +62 -61
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  用于提升 OMP 中 provider 侧 KV Cache / Prompt Cache 命中率的扩展:把稳定 prompt 内容前置,给 OpenAI-compatible 请求补保守的 `prompt_cache_key`,提示代理渠道常见缓存路由兼容问题,并在底部显示只读缓存统计。
10
10
 
11
- > 本包从 `pi-cache-optimizer` fork 而来。已有底部统计会自动从 `~/.pi/agent/` 迁移到 `~/.omp/agent/`。正常运行时扩展不会触碰你的 `~/.omp/agent/models.yml`;`/cache-optimizer fix` 当前显示可复制的 YAML compat 片段供手动编辑(自动写入的外科 YAML 编辑器计划在后续版本实现)。
11
+ > 本包从 `pi-cache-optimizer` fork 而来。已有底部统计会自动从旧状态目录 `~/.pi/agent/` 迁移到 `~/.omp/agent/`。正常运行时扩展不会触碰你的 `~/.omp/agent/models.yml`;`/cache-optimizer fix` 当前显示可复制的 YAML compat 片段供手动编辑(自动写入的外科 YAML 编辑器计划在后续版本实现)。
12
12
 
13
13
  ## 与原项目的关键差异
14
14
 
@@ -63,7 +63,7 @@
63
63
  omp install npm:omp-cache-optimizer
64
64
  ```
65
65
 
66
- 如果之前安装过 Pi 版本:
66
+ 如果之前安装过原版本:
67
67
 
68
68
  ```bash
69
69
  omp remove npm:pi-cache-optimizer && omp install npm:omp-cache-optimizer
@@ -102,7 +102,7 @@ OMP 0.79.7 及之后,`omp update` 默认只更新 OMP 本体。若要更新已
102
102
 
103
103
  LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completions` 代理,常会把同一个 session 分散到多个上游后端,导致 provider 侧 prompt cache 被拆散。
104
104
 
105
- **OMP 差异**:OMP 不再使用 `sendSessionAffinityHeaders` compat 字段(Pi 时代的字段),而是通过多凭据 auth + `agent.db` 中的会话亲和性实现上游粘性。长缓存保留改用 `supportsLongPromptCacheRetention` 字段。
105
+ **OMP 差异**:OMP 不再使用 `sendSessionAffinityHeaders` compat 字段(原项目中的旧字段),而是通过多凭据 auth + `agent.db` 中的会话亲和性实现上游粘性。长缓存保留改用 `supportsLongPromptCacheRetention` 字段。
106
106
 
107
107
  `models.yml` 示例:
108
108
 
@@ -129,13 +129,13 @@ providers:
129
129
 
130
130
  ## Anthropic adaptive thinking 模型
131
131
 
132
- **OMP 差异**:OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking(通过 `disableAdaptiveThinking` 字段,语义与 Pi 的 `forceAdaptiveThinking` 相反),且不可从 `models.yml` 用户配置。因此本扩展对 adaptive thinking 的检测改为信息性提示,不再提供自动修复。
132
+ **OMP 差异**:OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking(通过 `disableAdaptiveThinking` 字段,语义与原项目中的 `forceAdaptiveThinking` 相反),且不可从 `models.yml` 用户配置。因此本扩展对 adaptive thinking 的检测改为信息性提示,不再提供自动修复。
133
133
 
134
134
  `/cache-optimizer doctor` 和 `/cache-optimizer compat` 会检测 adaptive thinking 模型并显示信息性说明。自定义渠道 fronting Anthropic 时,请确保模型 id 匹配官方发布版本,以便 OMP catalog 正确识别。
135
135
 
136
136
  ## 使用 `/cache-optimizer fix` 手动修复
137
137
 
138
- **OMP 差异**:当前 `/cache-optimizer fix` 降级为手动建议模式。原 Pi 版本的自动写入安全协议(backup → 预览 + 确认 → 原子 temp+rename → 写入后自检 → 失败回滚)将在后续 PR 中为 YAML 重新实现。
138
+ **OMP 差异**:当前 `/cache-optimizer fix` 降级为手动建议模式。原项目中的自动写入安全协议(backup → 预览 + 确认 → 原子 temp+rename → 写入后自检 → 失败回滚)将在后续 PR 中为 YAML 重新实现。
139
139
 
140
140
  当前行为:
141
141
 
@@ -274,7 +274,7 @@ registry?.registerRouter({
274
274
  });
275
275
  ```
276
276
 
277
- cache hints 协议(`Symbol.for("omp.cache.hints.v1")`)形状与 Pi 版本一致,用于预响应阶段透传优化后的 system prompt / prompt cache key / cache retention hint。
277
+ cache hints 协议(`Symbol.for("omp.cache.hints.v1")`)形状与原项目一致,用于预响应阶段透传优化后的 system prompt / prompt cache key / cache retention hint。
278
278
 
279
279
  ## 卸载
280
280
 
package/index.ts CHANGED
@@ -64,7 +64,7 @@ const LOG_PREFIX = "omp-cache-optimizer";
64
64
  const STATUS_KEY = "omp-cache-stats";
65
65
  const STATE_DIR = join(homedir(), ".omp", "agent");
66
66
  const STATE_FILE_PATH = join(STATE_DIR, "omp-cache-optimizer-stats.json");
67
- // Legacy Pi-era state file path: read for one-way migration only, never written.
67
+ // Legacy source-project state file path: read for one-way migration only, never written.
68
68
  const LEGACY_PI_STATE_FILE_PATH = join(homedir(), ".pi", "agent", "pi-cache-optimizer-stats.json");
69
69
  const LEGACY_STATE_FILE_PATH = join(STATE_DIR, "deepseek-cache-optimizer-stats.json");
70
70
  const CACHE_PROVIDER_IDS: CacheProviderId[] = ["deepseek", "openai", "claude", "gemini"];
@@ -77,7 +77,7 @@ const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
77
77
  const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
78
78
  const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
79
79
  // Inter-extension protocol symbols are versioned under the omp.* namespace. The v1
80
- // shape is identical to the legacy pi.* symbols; router/hints integrators on OMP
80
+ // shape is identical to the legacy symbols; router/hints integrators on OMP
81
81
  // should register under omp.routing.registry.v1 / omp.cache.hints.v1.
82
82
  const PI_ROUTING_REGISTRY_SYMBOL = Symbol.for("omp.routing.registry.v1");
83
83
  const PI_CACHE_HINTS_SYMBOL = Symbol.for("omp.cache.hints.v1");
@@ -104,7 +104,7 @@ function getLastPromptIntegrityWarningAt(): number {
104
104
  }
105
105
 
106
106
  // Minimum count of skills before compression is worth applying.
107
- // Below this, pi's verbose XML block is small enough that the overhead of
107
+ // Below this, the runtime's verbose XML block is small enough that the overhead of
108
108
  // an additional one-line index isn't worth the loss of per-skill
109
109
  // description hints. The 31-skill snapshot in this repo was 13.3 KB; one
110
110
  // or two skills is well under 1 KB and not worth touching.
@@ -122,7 +122,7 @@ const SKILL_COMPRESSION_MIN_COUNT = 4;
122
122
  // The threshold also caps the upstream string-vs-array regression we saw with
123
123
  // trellis 0.5.16 / 0.6.0-beta.17 (subagent tool registration passing
124
124
  // `promptGuidelines: "<long string>"` instead of `["<long string>"]`, which
125
- // pi then iterates char-by-char). Even if a similar bug recurs upstream, this
125
+ // the runtime then iterates char-by-char). Even if a similar bug recurs upstream, this
126
126
  // extension will not lift its single-character byproducts into the stable
127
127
  // prefix candidate list.
128
128
  //
@@ -268,7 +268,7 @@ type PersistedCacheStatsV3 = {
268
268
 
269
269
  /**
270
270
  * V4 format: session-scoped stats buckets.
271
- * Each Pi process/session gets its own stats isolated by a hashed session id.
271
+ * Each session in the host runtime gets its own stats isolated by a hashed session id.
272
272
  *
273
273
  * sessions: sessionHash → modelKey (provider/id) → CacheStats
274
274
  * legacyFamily: unchanged from v3 (migration/fallback when ctx.model is unknown)
@@ -375,7 +375,7 @@ function formatSkillsForPrompt(skills: NonNullable<BuildSystemPromptOptions["ski
375
375
  /**
376
376
  * Compressed alternative to `formatSkillsForPrompt`.
377
377
  *
378
- * Pi emits a four-line XML block per skill (`<name>`, `<description>`,
378
+ * The host runtime emits a four-line XML block per skill (`<name>`, `<description>`,
379
379
  * `<location>`) plus a three-sentence preamble. With 31 skills active in
380
380
  * this repo that block measured 13.3 KB — 61.5 % of the total system
381
381
  * prompt. The full description text matters when the model has to decide
@@ -454,7 +454,7 @@ function formatSkillsForPromptCompressed(
454
454
  }
455
455
 
456
456
  /**
457
- * Replace pi's verbose `<available_skills>` block in `prompt` with the
457
+ * Replace the runtime's verbose `<available_skills>` block in `prompt` with the
458
458
  * compressed one-index form. Idempotent: if the verbose form is not
459
459
  * present (compression already applied, or skill count below threshold),
460
460
  * the prompt is returned unchanged.
@@ -465,7 +465,7 @@ function formatSkillsForPromptCompressed(
465
465
  * - opts.skills present and visible-skill count >= SKILL_COMPRESSION_MIN_COUNT
466
466
  * - Verbose block (built from the same `opts.skills`) is found in
467
467
  * `prompt` (substring match, no regex). This anchors the substitution
468
- * to pi's own emitter; if pi changes the format, we no-op rather
468
+ * to the runtime's own emitter; if the format changes, we no-op rather
469
469
  * than mangle.
470
470
  */
471
471
  function compressSkillsInSystemPrompt(
@@ -589,14 +589,14 @@ function stripSessionOverviewChurn(prompt: string): string {
589
589
  * prompt rather than ship a corrupted one.
590
590
  *
591
591
  * Three marker categories are recognized (covers ~99% of real-world
592
- * extension injection patterns in the pi ecosystem):
592
+ * extension injection patterns in the host runtime ecosystem):
593
593
  *
594
594
  * 1. XML-style opening tags `<tagname>` (lowercase, alpha-num + `_`/`-`)
595
595
  * 2. XML-style closing tags `</tagname>`
596
596
  * 3. HTML comment START/END `<!-- NAME:START -->` / `<!-- NAME:END -->`
597
597
  *
598
598
  * Tags with attributes (e.g., `<task id="42">`) are not currently emitted
599
- * by any pi extension we know of and are skipped to keep the regex tight.
599
+ * by any runtime extension we know of and are skipped to keep the regex tight.
600
600
  * Markdown headers, horizontal rules, and timestamp patterns are not
601
601
  * usable as guards because they have no closing form to verify.
602
602
  *
@@ -676,7 +676,7 @@ function optimizeSystemPrompt(
676
676
  // protected without code changes when new extensions ship.
677
677
  //
678
678
  // Our skills compression runs BEFORE optimizeSystemPrompt and replaces
679
- // pi's verbose `<available_skills>` block with a compressed text
679
+ // the runtime's verbose `<available_skills>` block with a compressed text
680
680
  // section that has no XML tag. So `original` here (post-compression)
681
681
  // does not contain `<available_skills>` and the result doesn't either
682
682
  // — no false positive.
@@ -969,12 +969,13 @@ function getNonNegativeNumber(record: UnknownRecord, key: string): number | unde
969
969
  function getCompat(model: PiModel | undefined): CacheCompat {
970
970
  if (!model) return {} as CacheCompat;
971
971
 
972
- // Pi merges provider.compat with model.compat (model wins on conflicts)
973
- // We approximate this by reading from ctx.model which should already have merged compat
974
- // However, for safety, we check both levels if available
972
+ // The host runtime merges provider.compat with model.compat (model wins on conflicts).
973
+ // We approximate this by reading from ctx.model which should already have merged compat.
974
+ // However, for safety, we check both levels if available.
975
975
  const modelCompat = (model.compat ?? {}) as CacheCompat;
976
-
977
- // Note: ctx.model from Pi should already contain merged compat,
976
+
977
+ // Note: ctx.model from the host runtime should already contain merged compat,
978
+ // but we document the two-level structure for clarity.
978
979
  // but we document the two-level structure for clarity
979
980
  return modelCompat;
980
981
  }
@@ -1141,7 +1142,7 @@ function isGeminiLikeAssistantMessage(message: unknown, model: PiModel | undefin
1141
1142
  * Check whether the model id uses Anthropic's adaptive generation (thinking)
1142
1143
  * that requires `forceAdaptiveThinking: true` in compat.
1143
1144
  *
1144
- * Adaptive-generation models (from pi-ai built-in catalog) include:
1145
+ * Adaptive-generation models (from the bundled model catalog) include:
1145
1146
  * claude-opus-4-6, claude-opus-4-7, claude-opus-4-8 (also dotted 4.6/4.7/4.8)
1146
1147
  * claude-sonnet-4-6
1147
1148
  * claude-fable-5
@@ -1162,7 +1163,7 @@ function isAdaptiveGenerationModel(model: PiModel | undefined): boolean {
1162
1163
  // OMP divergence: adaptive thinking is set automatically by the OMP built-in model
1163
1164
  // catalog (via disableAdaptiveThinking, with reversed semantics) and is NOT
1164
1165
  // user-configurable from models.yml (see omp models.md §Anthropic compatibility).
1165
- // The Pi-era forceAdaptiveThinking flag no longer exists. We keep model detection
1166
+ // The legacy `forceAdaptiveThinking` flag no longer exists. We keep model detection
1166
1167
  // (isAdaptiveGenerationModel) for informational doctor output, but drop the fixable
1167
1168
  // compat-suggestion path entirely.
1168
1169
  function isAdaptiveThinkingCompatApplicable(_model: PiModel): boolean {
@@ -1734,14 +1735,14 @@ function readCacheWriteFromDetails(details: UnknownRecord | undefined): number |
1734
1735
  return getFirstNonNegativeNumber(details?.cache_write_tokens, details?.cacheWriteTokens);
1735
1736
  }
1736
1737
 
1737
- // Pi normalizes provider-specific raw usage (prompt_cache_hit_tokens, cached_tokens,
1738
+ // The host runtime normalizes provider-specific raw usage (prompt_cache_hit_tokens, cached_tokens,
1738
1739
  // cache_read_input_tokens, etc.) into a common shape:
1739
1740
  // input = uncached prompt portion (total prompt minus cacheRead minus cacheWrite)
1740
1741
  // cacheRead = tokens read from a previously-cached prefix
1741
1742
  // cacheWrite= tokens newly written into cache in this request
1742
1743
  //
1743
1744
  // We reconstruct the total prompt-token count as input + cacheRead + cacheWrite.
1744
- // Pi guarantees that input, cacheRead, and cacheWrite are always present on
1745
+ // The host runtime guarantees that input, cacheRead, and cacheWrite are always present on
1745
1746
  // assistant messages processed through its provider pipeline (at least as zero).
1746
1747
  //
1747
1748
  // Only DeepSeek sets allowInputOnly=true so that a cache miss (cacheRead=0) still
@@ -1757,10 +1758,10 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
1757
1758
 
1758
1759
  if (!hasCacheSignal && (input === undefined || !allowInputOnly)) return undefined;
1759
1760
 
1760
- // Under healthy Pi normalization input is the uncached portion, so
1761
+ // Under healthy runtime normalization input is the uncached portion, so
1761
1762
  // totalInput = input + cacheRead + cacheWrite gives the full prompt token count.
1762
1763
  // Guard against degenerate reads where a broken proxy omits prompt_tokens and
1763
- // Pi's input falls to zero: totalInput must never be less than cacheRead + cacheWrite.
1764
+ // normalized input falls to zero: totalInput must never be less than cacheRead + cacheWrite.
1764
1765
  const computed = (input ?? 0) + (cacheRead ?? 0) + (cacheWrite ?? 0);
1765
1766
  const floor = (cacheRead ?? 0) + (cacheWrite ?? 0);
1766
1767
  return {
@@ -1771,8 +1772,8 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
1771
1772
  }
1772
1773
 
1773
1774
  // Raw fallback for DeepSeek responses that still carry their native usage fields.
1774
- // In practice Pi normalizes usage before message_end fires, so this path is only
1775
- // reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
1775
+ // In practice the runtime normalizes usage before message_end fires, so this path is only
1776
+ // reached when normalized fields are absent (e.g. custom/foreign providers).
1776
1777
  function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
1777
1778
  const usage = usageRecordFromAssistant(message);
1778
1779
  if (!usage) return undefined;
@@ -1789,8 +1790,8 @@ function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
1789
1790
  }
1790
1791
 
1791
1792
  // Raw fallback for OpenAI-family responses that still carry their native usage fields.
1792
- // In practice Pi normalizes usage before message_end fires, so this path is only
1793
- // reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
1793
+ // In practice the runtime normalizes usage before message_end fires, so this path is only
1794
+ // reached when normalized fields are absent (e.g. custom/foreign providers).
1794
1795
  function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
1795
1796
  const usage = usageRecordFromAssistant(message);
1796
1797
  if (!usage) return undefined;
@@ -1812,8 +1813,8 @@ function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
1812
1813
  }
1813
1814
 
1814
1815
  // Raw fallback for Anthropic/Claude responses that still carry their native usage fields.
1815
- // In practice Pi normalizes usage before message_end fires, so this path is only
1816
- // reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
1816
+ // In practice the runtime normalizes usage before message_end fires, so this path is only
1817
+ // reached when normalized fields are absent (e.g. custom/foreign providers).
1817
1818
  function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
1818
1819
  const usage = usageRecordFromAssistant(message);
1819
1820
  if (!usage) return undefined;
@@ -1832,8 +1833,8 @@ function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
1832
1833
  }
1833
1834
 
1834
1835
  // Raw fallback for Gemini/Vertex responses that still carry their native usage fields.
1835
- // In practice Pi normalizes usage before message_end fires, so this path is only
1836
- // reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
1836
+ // In practice the runtime normalizes usage before message_end fires, so this path is only
1837
+ // reached when normalized fields are absent (e.g. custom/foreign providers).
1837
1838
  function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
1838
1839
  const record = getAssistantRecord(message);
1839
1840
  if (!record) return undefined;
@@ -1867,8 +1868,8 @@ function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
1867
1868
  return { cacheRead, cacheWrite: 0, totalInput };
1868
1869
  }
1869
1870
 
1870
- // Try Pi-normalized usage first (always present for messages that went through Pi's
1871
- // provider pipeline). Fall back to provider-specific raw-field readers when Pi-normalized
1871
+ // Try normalized usage first (always present for messages that went through the runtime's
1872
+ // provider pipeline). Fall back to provider-specific raw-field readers when normalized
1872
1873
  // fields are absent (e.g. messages from custom/foreign providers whose raw usage shape
1873
1874
  // matches the official API).
1874
1875
  function normalizeWithFallback(
@@ -2038,7 +2039,7 @@ function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
2038
2039
  }
2039
2040
 
2040
2041
  function describeMissingOpenAIFamilyProxyCompat(_model: PiModel): string[] {
2041
- // OMP divergence: Pi's sendSessionAffinityHeaders has no compat equivalent.
2042
+ // OMP divergence: the legacy `sendSessionAffinityHeaders` flag has no compat equivalent.
2042
2043
  // OMP achieves upstream stickiness via multi-credential auth + session affinity
2043
2044
  // in agent.db (see omp models.md §Auth). There is no required compat key for
2044
2045
  // OpenAI-family proxies on OMP, so this returns an empty list. Optional long
@@ -2332,7 +2333,7 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
2332
2333
 
2333
2334
  return (
2334
2335
  `💡 Cache optimizer: ${modelKey(model)} looks Claude/Anthropic-like but OpenAI-compatible compat lacks cacheControlFormat: "anthropic". ` +
2335
- "Pi may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
2336
+ "OMP may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
2336
2337
  );
2337
2338
  },
2338
2339
  },
@@ -3363,7 +3364,7 @@ function formatTokenM(value: number): string {
3363
3364
 
3364
3365
  /**
3365
3366
  * Check if an assistant message's usage fields appear to be missing or empty.
3366
- * Returns true when Pi-normalized fields (input, cacheRead, cacheWrite) are all
3367
+ * Returns true when normalized fields (input, cacheRead, cacheWrite) are all
3367
3368
  * absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
3368
3369
  * for the given adapter.
3369
3370
  */
@@ -3371,12 +3372,12 @@ function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter):
3371
3372
  const usage = usageRecordFromAssistant(message);
3372
3373
  if (!usage) return true;
3373
3374
 
3374
- // Check Pi-normalized fields
3375
+ // Check normalized fields
3375
3376
  const input = getNonNegativeNumber(usage, "input");
3376
3377
  const cacheRead = getNonNegativeNumber(usage, "cacheRead");
3377
3378
  const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
3378
3379
 
3379
- // If Pi-normalized fields exist with non-zero values, usage is present
3380
+ // If normalized fields exist with non-zero values, usage is present
3380
3381
  if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
3381
3382
  return false;
3382
3383
  }
@@ -4052,7 +4053,7 @@ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
4052
4053
  if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
4053
4054
  return [
4054
4055
  "ℹ️ Compat check not applicable for this model.",
4055
- " Native Responses transports already use Pi core request handling; OpenAI-compatible proxy compat flags do not apply.",
4056
+ " Native Responses transports already use core runtime request handling; OpenAI-compatible proxy compat flags do not apply.",
4056
4057
  ];
4057
4058
  }
4058
4059
 
@@ -4934,9 +4935,9 @@ function chooseFixPlacement(
4934
4935
  Object.keys(compatKeys),
4935
4936
  );
4936
4937
 
4937
- // Provider-level writes cannot override a model-level compat key because Pi's
4938
+ // Provider-level writes cannot override a model-level compat key because the runtime's
4938
4939
  // merge order is provider.compat then model.compat. If the active model already
4939
- // has one of the keys we need to repair (e.g. thinkingFormat: "legacy"), write
4940
+ // has one of the keys we need to repair (e.g. thinkingFormat: \"legacy\"), write
4940
4941
  // at model level even when the key would otherwise be provider-safe.
4941
4942
  if (decision.placement === "provider" && existingModelKeys.length > 0) {
4942
4943
  return {
@@ -5103,7 +5104,7 @@ function selfCheckFix(
5103
5104
  }
5104
5105
 
5105
5106
  // Step 5: Compute the EFFECTIVE merged compat (provider-level + model-level),
5106
- // mirroring Pi's mergeCompat behavior (model wins on conflicts). The fix may
5107
+ // mirroring the runtime's mergeCompat behavior (model wins on conflicts). The fix may
5107
5108
  // have written either level, so validation must check the merged result.
5108
5109
  const provCompatRaw = (provider as Record<string, unknown>).compat;
5109
5110
  const provCompat = (provCompatRaw && typeof provCompatRaw === 'object' && !Array.isArray(provCompatRaw))
@@ -5226,7 +5227,7 @@ function backupTimestamp(): string {
5226
5227
 
5227
5228
  // Internal helpers exported only so the task verification script
5228
5229
  // (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
5229
- // extension's public API; pi only invokes the default export below.
5230
+ // extension's public API; the host runtime only invokes the default export below.
5230
5231
  export const __internals_for_tests = {
5231
5232
  buildStableCandidates,
5232
5233
  optimizeSystemPrompt,
@@ -5709,7 +5710,7 @@ export default function (pi: ExtensionAPI) {
5709
5710
 
5710
5711
  if (reason === "reload") {
5711
5712
  // /reload: preserve session-scoped stats (same session hash).
5712
- // Pi extension reload creates a fresh closure, so cacheStatsByModel
5713
+ // OMP extension reload creates a fresh closure, so cacheStatsByModel
5713
5714
  // starts empty. Read persisted data and filter for current session.
5714
5715
  lastStatusText = undefined;
5715
5716
  lastPromptIntegrityWarningAt = 0;
@@ -6098,7 +6099,7 @@ export default function (pi: ExtensionAPI) {
6098
6099
  // (no args) — interactive menu (with UI) or help summary
6099
6100
  // ────────────────────────────────────────────────────────────────
6100
6101
  pi.registerCommand("cache-optimizer", {
6101
- description: "Diagnose Pi cache configuration",
6102
+ description: "Diagnose OMP cache configuration",
6102
6103
  handler: async (args: string, cmdCtx) => {
6103
6104
  syncSessionHash(cmdCtx);
6104
6105
  const selectedModel = cmdCtx.model;
@@ -6110,16 +6111,16 @@ export default function (pi: ExtensionAPI) {
6110
6111
  resetCurrentSessionStats();
6111
6112
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6112
6113
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6113
- cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6114
+ cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6114
6115
  } else if (subcommand === "disable") {
6115
6116
  setRuntimeOptimizerEnabled(false);
6116
6117
  resetCurrentSessionStats();
6117
6118
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6118
6119
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6119
- cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6120
+ cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6120
6121
  } else if (subcommand === "doctor") {
6121
6122
  if (!model) {
6122
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6123
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6123
6124
  return;
6124
6125
  }
6125
6126
  const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
@@ -6134,7 +6135,7 @@ export default function (pi: ExtensionAPI) {
6134
6135
  cmdCtx.ui.notify(fullDiagnosis, "info");
6135
6136
  } else if (subcommand === "stats") {
6136
6137
  if (!model) {
6137
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6138
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6138
6139
  return;
6139
6140
  }
6140
6141
  const adapter = selectAdapterForModel(model);
@@ -6145,7 +6146,7 @@ export default function (pi: ExtensionAPI) {
6145
6146
  cmdCtx.ui.notify(output, "info");
6146
6147
  } else if (subcommand === "compat") {
6147
6148
  if (!model) {
6148
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6149
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6149
6150
  return;
6150
6151
  }
6151
6152
  const compatResult = buildCompatDiagnosis(model);
@@ -6161,7 +6162,7 @@ export default function (pi: ExtensionAPI) {
6161
6162
  }
6162
6163
  } else if (subcommand === "reset") {
6163
6164
  if (!model) {
6164
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6165
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6165
6166
  return;
6166
6167
  }
6167
6168
  const adapter = selectAdapterForModel(model);
@@ -6186,12 +6187,12 @@ export default function (pi: ExtensionAPI) {
6186
6187
  cmdCtx.ui.notify(
6187
6188
  `✅ Reset local session cache stats for "${displayKey}". ` +
6188
6189
  "Upstream provider prompt cache was not modified. " +
6189
- "New requests will start a fresh stats bucket for this Pi session.",
6190
+ "New requests will start a fresh stats bucket for this OMP session.",
6190
6191
  "info",
6191
6192
  );
6192
6193
  } else if (subcommand === "fix") {
6193
6194
  if (!model) {
6194
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6195
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6195
6196
  return;
6196
6197
  }
6197
6198
 
@@ -6239,16 +6240,16 @@ export default function (pi: ExtensionAPI) {
6239
6240
  resetCurrentSessionStats();
6240
6241
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6241
6242
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6242
- cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6243
+ cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6243
6244
  } else if (choice === menuOptions[1]) {
6244
6245
  setRuntimeOptimizerEnabled(false);
6245
6246
  resetCurrentSessionStats();
6246
6247
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6247
6248
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6248
- cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6249
+ cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6249
6250
  } else if (choice === menuOptions[2]) {
6250
6251
  if (!model) {
6251
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6252
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6252
6253
  } else {
6253
6254
  const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
6254
6255
  const adapter = selectAdapterForModel(model);
@@ -6263,7 +6264,7 @@ export default function (pi: ExtensionAPI) {
6263
6264
  }
6264
6265
  } else if (choice === menuOptions[3]) {
6265
6266
  if (!model) {
6266
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6267
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6267
6268
  } else {
6268
6269
  const adapter = selectAdapterForModel(model);
6269
6270
  const sk = model ? sessionModelKey(model) : undefined;
@@ -6274,7 +6275,7 @@ export default function (pi: ExtensionAPI) {
6274
6275
  }
6275
6276
  } else if (choice === menuOptions[4]) {
6276
6277
  if (!model) {
6277
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6278
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6278
6279
  } else {
6279
6280
  const compatResult = buildCompatDiagnosis(model);
6280
6281
  if (compatResult) {
@@ -6291,7 +6292,7 @@ export default function (pi: ExtensionAPI) {
6291
6292
  } else if (choice === menuOptions[5]) {
6292
6293
  // Fix — auto-fix compat issues
6293
6294
  if (!model) {
6294
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6295
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6295
6296
  return;
6296
6297
  }
6297
6298
  const suggestion = buildFixSuggestion(model);
@@ -6316,7 +6317,7 @@ export default function (pi: ExtensionAPI) {
6316
6317
  );
6317
6318
  } else if (choice === menuOptions[6]) {
6318
6319
  if (!model) {
6319
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6320
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6320
6321
  } else {
6321
6322
  const adapter = selectAdapterForModel(model);
6322
6323
  if (!adapter) {
@@ -6341,8 +6342,8 @@ export default function (pi: ExtensionAPI) {
6341
6342
  // Fallback: text help when no interactive UI
6342
6343
  const diagnosis: string[] = [];
6343
6344
  diagnosis.push("📋 /cache-optimizer commands:");
6344
- diagnosis.push(" enable — Enable prompt/cache optimizations for this Pi process");
6345
- diagnosis.push(" disable — Disable prompt/cache optimizations for this Pi process");
6345
+ diagnosis.push(" enable — Enable prompt/cache optimizations for this OMP process");
6346
+ diagnosis.push(" disable — Disable prompt/cache optimizations for this OMP process");
6346
6347
  diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
6347
6348
  diagnosis.push(" stats — Show active model stats bucket and recent trend");
6348
6349
  diagnosis.push(" compat — Show compat suggestion with edit location");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omp-cache-optimizer",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "description": "Improve OMP prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "omp-package",