omp-cache-optimizer 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +6 -6
  2. package/index.ts +133 -103
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  用于提升 OMP 中 provider 侧 KV Cache / Prompt Cache 命中率的扩展:把稳定 prompt 内容前置,给 OpenAI-compatible 请求补保守的 `prompt_cache_key`,提示代理渠道常见缓存路由兼容问题,并在底部显示只读缓存统计。
10
10
 
11
- > 本包从 `pi-cache-optimizer` fork 而来。已有底部统计会自动从 `~/.pi/agent/` 迁移到 `~/.omp/agent/`。正常运行时扩展不会触碰你的 `~/.omp/agent/models.yml`;`/cache-optimizer fix` 当前显示可复制的 YAML compat 片段供手动编辑(自动写入的外科 YAML 编辑器计划在后续版本实现)。
11
+ > 本包从 `pi-cache-optimizer` fork 而来。已有底部统计会自动从旧状态目录 `~/.pi/agent/` 迁移到 `~/.omp/agent/`。正常运行时扩展不会触碰你的 `~/.omp/agent/models.yml`;`/cache-optimizer fix` 当前显示可复制的 YAML compat 片段供手动编辑(自动写入的外科 YAML 编辑器计划在后续版本实现)。
12
12
 
13
13
  ## 与原项目的关键差异
14
14
 
@@ -63,7 +63,7 @@
63
63
  omp install npm:omp-cache-optimizer
64
64
  ```
65
65
 
66
- 如果之前安装过 Pi 版本:
66
+ 如果之前安装过原版本:
67
67
 
68
68
  ```bash
69
69
  omp remove npm:pi-cache-optimizer && omp install npm:omp-cache-optimizer
@@ -102,7 +102,7 @@ OMP 0.79.7 及之后,`omp update` 默认只更新 OMP 本体。若要更新已
102
102
 
103
103
  LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completions` 代理,常会把同一个 session 分散到多个上游后端,导致 provider 侧 prompt cache 被拆散。
104
104
 
105
- **OMP 差异**:OMP 不再使用 `sendSessionAffinityHeaders` compat 字段(Pi 时代的字段),而是通过多凭据 auth + `agent.db` 中的会话亲和性实现上游粘性。长缓存保留改用 `supportsLongPromptCacheRetention` 字段。
105
+ **OMP 差异**:OMP 不再使用 `sendSessionAffinityHeaders` compat 字段(原项目中的旧字段),而是通过多凭据 auth + `agent.db` 中的会话亲和性实现上游粘性。长缓存保留改用 `supportsLongPromptCacheRetention` 字段。
106
106
 
107
107
  `models.yml` 示例:
108
108
 
@@ -129,13 +129,13 @@ providers:
129
129
 
130
130
  ## Anthropic adaptive thinking 模型
131
131
 
132
- **OMP 差异**:OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking(通过 `disableAdaptiveThinking` 字段,语义与 Pi 的 `forceAdaptiveThinking` 相反),且不可从 `models.yml` 用户配置。因此本扩展对 adaptive thinking 的检测改为信息性提示,不再提供自动修复。
132
+ **OMP 差异**:OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking(通过 `disableAdaptiveThinking` 字段,语义与原项目中的 `forceAdaptiveThinking` 相反),且不可从 `models.yml` 用户配置。因此本扩展对 adaptive thinking 的检测改为信息性提示,不再提供自动修复。
133
133
 
134
134
  `/cache-optimizer doctor` 和 `/cache-optimizer compat` 会检测 adaptive thinking 模型并显示信息性说明。自定义渠道 fronting Anthropic 时,请确保模型 id 匹配官方发布版本,以便 OMP catalog 正确识别。
135
135
 
136
136
  ## 使用 `/cache-optimizer fix` 手动修复
137
137
 
138
- **OMP 差异**:当前 `/cache-optimizer fix` 降级为手动建议模式。原 Pi 版本的自动写入安全协议(backup → 预览 + 确认 → 原子 temp+rename → 写入后自检 → 失败回滚)将在后续 PR 中为 YAML 重新实现。
138
+ **OMP 差异**:当前 `/cache-optimizer fix` 降级为手动建议模式。原项目中的自动写入安全协议(backup → 预览 + 确认 → 原子 temp+rename → 写入后自检 → 失败回滚)将在后续 PR 中为 YAML 重新实现。
139
139
 
140
140
  当前行为:
141
141
 
@@ -274,7 +274,7 @@ registry?.registerRouter({
274
274
  });
275
275
  ```
276
276
 
277
- cache hints 协议(`Symbol.for("omp.cache.hints.v1")`)形状与 Pi 版本一致,用于预响应阶段透传优化后的 system prompt / prompt cache key / cache retention hint。
277
+ cache hints 协议(`Symbol.for("omp.cache.hints.v1")`)形状与原项目一致,用于预响应阶段透传优化后的 system prompt / prompt cache key / cache retention hint。
278
278
 
279
279
  ## 卸载
280
280
 
package/index.ts CHANGED
@@ -64,7 +64,7 @@ const LOG_PREFIX = "omp-cache-optimizer";
64
64
  const STATUS_KEY = "omp-cache-stats";
65
65
  const STATE_DIR = join(homedir(), ".omp", "agent");
66
66
  const STATE_FILE_PATH = join(STATE_DIR, "omp-cache-optimizer-stats.json");
67
- // Legacy Pi-era state file path: read for one-way migration only, never written.
67
+ // Legacy source-project state file path: read for one-way migration only, never written.
68
68
  const LEGACY_PI_STATE_FILE_PATH = join(homedir(), ".pi", "agent", "pi-cache-optimizer-stats.json");
69
69
  const LEGACY_STATE_FILE_PATH = join(STATE_DIR, "deepseek-cache-optimizer-stats.json");
70
70
  const CACHE_PROVIDER_IDS: CacheProviderId[] = ["deepseek", "openai", "claude", "gemini"];
@@ -77,7 +77,7 @@ const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
77
77
  const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
78
78
  const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
79
79
  // Inter-extension protocol symbols are versioned under the omp.* namespace. The v1
80
- // shape is identical to the legacy pi.* symbols; router/hints integrators on OMP
80
+ // shape is identical to the legacy symbols; router/hints integrators on OMP
81
81
  // should register under omp.routing.registry.v1 / omp.cache.hints.v1.
82
82
  const PI_ROUTING_REGISTRY_SYMBOL = Symbol.for("omp.routing.registry.v1");
83
83
  const PI_CACHE_HINTS_SYMBOL = Symbol.for("omp.cache.hints.v1");
@@ -104,7 +104,7 @@ function getLastPromptIntegrityWarningAt(): number {
104
104
  }
105
105
 
106
106
  // Minimum count of skills before compression is worth applying.
107
- // Below this, pi's verbose XML block is small enough that the overhead of
107
+ // Below this, the runtime's verbose XML block is small enough that the overhead of
108
108
  // an additional one-line index isn't worth the loss of per-skill
109
109
  // description hints. The 31-skill snapshot in this repo was 13.3 KB; one
110
110
  // or two skills is well under 1 KB and not worth touching.
@@ -122,7 +122,7 @@ const SKILL_COMPRESSION_MIN_COUNT = 4;
122
122
  // The threshold also caps the upstream string-vs-array regression we saw with
123
123
  // trellis 0.5.16 / 0.6.0-beta.17 (subagent tool registration passing
124
124
  // `promptGuidelines: "<long string>"` instead of `["<long string>"]`, which
125
- // pi then iterates char-by-char). Even if a similar bug recurs upstream, this
125
+ // the runtime then iterates char-by-char). Even if a similar bug recurs upstream, this
126
126
  // extension will not lift its single-character byproducts into the stable
127
127
  // prefix candidate list.
128
128
  //
@@ -268,7 +268,7 @@ type PersistedCacheStatsV3 = {
268
268
 
269
269
  /**
270
270
  * V4 format: session-scoped stats buckets.
271
- * Each Pi process/session gets its own stats isolated by a hashed session id.
271
+ * Each session in the host runtime gets its own stats isolated by a hashed session id.
272
272
  *
273
273
  * sessions: sessionHash → modelKey (provider/id) → CacheStats
274
274
  * legacyFamily: unchanged from v3 (migration/fallback when ctx.model is unknown)
@@ -312,6 +312,15 @@ type CacheUsageSample = {
312
312
  missingUsageFields: boolean;
313
313
  };
314
314
 
315
+ type PromptRewriteContext = {
316
+ options?: BuildSystemPromptOptions;
317
+ routeSnapshot?: PiRouteSnapshot;
318
+ routedModel?: PiModel;
319
+ timestamp: number;
320
+ };
321
+
322
+ const PROMPT_REWRITE_CONTEXT_TTL_MS = 10_000;
323
+
315
324
  /** Maximum number of recent samples kept per model key (in-memory only, not persisted). */
316
325
  const MAX_RECENT_SAMPLES = 50;
317
326
 
@@ -375,7 +384,7 @@ function formatSkillsForPrompt(skills: NonNullable<BuildSystemPromptOptions["ski
375
384
  /**
376
385
  * Compressed alternative to `formatSkillsForPrompt`.
377
386
  *
378
- * Pi emits a four-line XML block per skill (`<name>`, `<description>`,
387
+ * The host runtime emits a four-line XML block per skill (`<name>`, `<description>`,
379
388
  * `<location>`) plus a three-sentence preamble. With 31 skills active in
380
389
  * this repo that block measured 13.3 KB — 61.5 % of the total system
381
390
  * prompt. The full description text matters when the model has to decide
@@ -454,7 +463,7 @@ function formatSkillsForPromptCompressed(
454
463
  }
455
464
 
456
465
  /**
457
- * Replace pi's verbose `<available_skills>` block in `prompt` with the
466
+ * Replace the runtime's verbose `<available_skills>` block in `prompt` with the
458
467
  * compressed one-index form. Idempotent: if the verbose form is not
459
468
  * present (compression already applied, or skill count below threshold),
460
469
  * the prompt is returned unchanged.
@@ -465,7 +474,7 @@ function formatSkillsForPromptCompressed(
465
474
  * - opts.skills present and visible-skill count >= SKILL_COMPRESSION_MIN_COUNT
466
475
  * - Verbose block (built from the same `opts.skills`) is found in
467
476
  * `prompt` (substring match, no regex). This anchors the substitution
468
- * to pi's own emitter; if pi changes the format, we no-op rather
477
+ * to the runtime's own emitter; if the format changes, we no-op rather
469
478
  * than mangle.
470
479
  */
471
480
  function compressSkillsInSystemPrompt(
@@ -589,14 +598,14 @@ function stripSessionOverviewChurn(prompt: string): string {
589
598
  * prompt rather than ship a corrupted one.
590
599
  *
591
600
  * Three marker categories are recognized (covers ~99% of real-world
592
- * extension injection patterns in the pi ecosystem):
601
+ * extension injection patterns in the host runtime ecosystem):
593
602
  *
594
603
  * 1. XML-style opening tags `<tagname>` (lowercase, alpha-num + `_`/`-`)
595
604
  * 2. XML-style closing tags `</tagname>`
596
605
  * 3. HTML comment START/END `<!-- NAME:START -->` / `<!-- NAME:END -->`
597
606
  *
598
607
  * Tags with attributes (e.g., `<task id="42">`) are not currently emitted
599
- * by any pi extension we know of and are skipped to keep the regex tight.
608
+ * by any runtime extension we know of and are skipped to keep the regex tight.
600
609
  * Markdown headers, horizontal rules, and timestamp patterns are not
601
610
  * usable as guards because they have no closing form to verify.
602
611
  *
@@ -676,7 +685,7 @@ function optimizeSystemPrompt(
676
685
  // protected without code changes when new extensions ship.
677
686
  //
678
687
  // Our skills compression runs BEFORE optimizeSystemPrompt and replaces
679
- // pi's verbose `<available_skills>` block with a compressed text
688
+ // the runtime's verbose `<available_skills>` block with a compressed text
680
689
  // section that has no XML tag. So `original` here (post-compression)
681
690
  // does not contain `<available_skills>` and the result doesn't either
682
691
  // — no false positive.
@@ -968,15 +977,42 @@ function getNonNegativeNumber(record: UnknownRecord, key: string): number | unde
968
977
  */
969
978
  function getCompat(model: PiModel | undefined): CacheCompat {
970
979
  if (!model) return {} as CacheCompat;
971
-
972
- // Pi merges provider.compat with model.compat (model wins on conflicts)
973
- // We approximate this by reading from ctx.model which should already have merged compat
974
- // However, for safety, we check both levels if available
975
- const modelCompat = (model.compat ?? {}) as CacheCompat;
976
-
977
- // Note: ctx.model from Pi should already contain merged compat,
978
- // but we document the two-level structure for clarity
979
- return modelCompat;
980
+
981
+ const record = model as PiModel & { compatConfig?: Record<string, unknown> };
982
+ return {
983
+ ...((record.compatConfig ?? {}) as CacheCompat),
984
+ ...((record.compat ?? {}) as CacheCompat),
985
+ };
986
+ }
987
+
988
+ function makePromptRewriteContextKey(sessionHash: string | undefined, model: PiModel | undefined): string | undefined {
989
+ if (!sessionHash || !model) return undefined;
990
+ return `${sessionHash}:${modelKey(model)}`;
991
+ }
992
+
993
+ function rememberPromptRewriteContext(
994
+ contexts: Map<string, PromptRewriteContext>,
995
+ key: string | undefined,
996
+ context: PromptRewriteContext,
997
+ ): void {
998
+ if (!key) return;
999
+ contexts.set(key, context);
1000
+ }
1001
+
1002
+ function getPromptRewriteContext(
1003
+ contexts: Map<string, PromptRewriteContext>,
1004
+ key: string | undefined,
1005
+ now = Date.now(),
1006
+ ttlMs = PROMPT_REWRITE_CONTEXT_TTL_MS,
1007
+ ): PromptRewriteContext | undefined {
1008
+ if (!key) return undefined;
1009
+ const context = contexts.get(key);
1010
+ if (!context) return undefined;
1011
+ if (now - context.timestamp > ttlMs) {
1012
+ contexts.delete(key);
1013
+ return undefined;
1014
+ }
1015
+ return context;
980
1016
  }
981
1017
 
982
1018
  /**
@@ -1141,7 +1177,7 @@ function isGeminiLikeAssistantMessage(message: unknown, model: PiModel | undefin
1141
1177
  * Check whether the model id uses Anthropic's adaptive generation (thinking)
1142
1178
  * that requires `forceAdaptiveThinking: true` in compat.
1143
1179
  *
1144
- * Adaptive-generation models (from pi-ai built-in catalog) include:
1180
+ * Adaptive-generation models (from the bundled model catalog) include:
1145
1181
  * claude-opus-4-6, claude-opus-4-7, claude-opus-4-8 (also dotted 4.6/4.7/4.8)
1146
1182
  * claude-sonnet-4-6
1147
1183
  * claude-fable-5
@@ -1162,7 +1198,7 @@ function isAdaptiveGenerationModel(model: PiModel | undefined): boolean {
1162
1198
  // OMP divergence: adaptive thinking is set automatically by the OMP built-in model
1163
1199
  // catalog (via disableAdaptiveThinking, with reversed semantics) and is NOT
1164
1200
  // user-configurable from models.yml (see omp models.md §Anthropic compatibility).
1165
- // The Pi-era forceAdaptiveThinking flag no longer exists. We keep model detection
1201
+ // The legacy `forceAdaptiveThinking` flag no longer exists. We keep model detection
1166
1202
  // (isAdaptiveGenerationModel) for informational doctor output, but drop the fixable
1167
1203
  // compat-suggestion path entirely.
1168
1204
  function isAdaptiveThinkingCompatApplicable(_model: PiModel): boolean {
@@ -1734,14 +1770,14 @@ function readCacheWriteFromDetails(details: UnknownRecord | undefined): number |
1734
1770
  return getFirstNonNegativeNumber(details?.cache_write_tokens, details?.cacheWriteTokens);
1735
1771
  }
1736
1772
 
1737
- // Pi normalizes provider-specific raw usage (prompt_cache_hit_tokens, cached_tokens,
1773
+ // The host runtime normalizes provider-specific raw usage (prompt_cache_hit_tokens, cached_tokens,
1738
1774
  // cache_read_input_tokens, etc.) into a common shape:
1739
1775
  // input = uncached prompt portion (total prompt minus cacheRead minus cacheWrite)
1740
1776
  // cacheRead = tokens read from a previously-cached prefix
1741
1777
  // cacheWrite= tokens newly written into cache in this request
1742
1778
  //
1743
1779
  // We reconstruct the total prompt-token count as input + cacheRead + cacheWrite.
1744
- // Pi guarantees that input, cacheRead, and cacheWrite are always present on
1780
+ // The host runtime guarantees that input, cacheRead, and cacheWrite are always present on
1745
1781
  // assistant messages processed through its provider pipeline (at least as zero).
1746
1782
  //
1747
1783
  // Only DeepSeek sets allowInputOnly=true so that a cache miss (cacheRead=0) still
@@ -1757,10 +1793,10 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
1757
1793
 
1758
1794
  if (!hasCacheSignal && (input === undefined || !allowInputOnly)) return undefined;
1759
1795
 
1760
- // Under healthy Pi normalization input is the uncached portion, so
1796
+ // Under healthy runtime normalization input is the uncached portion, so
1761
1797
  // totalInput = input + cacheRead + cacheWrite gives the full prompt token count.
1762
1798
  // Guard against degenerate reads where a broken proxy omits prompt_tokens and
1763
- // Pi's input falls to zero: totalInput must never be less than cacheRead + cacheWrite.
1799
+ // normalized input falls to zero: totalInput must never be less than cacheRead + cacheWrite.
1764
1800
  const computed = (input ?? 0) + (cacheRead ?? 0) + (cacheWrite ?? 0);
1765
1801
  const floor = (cacheRead ?? 0) + (cacheWrite ?? 0);
1766
1802
  return {
@@ -1771,8 +1807,8 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
1771
1807
  }
1772
1808
 
1773
1809
  // Raw fallback for DeepSeek responses that still carry their native usage fields.
1774
- // In practice Pi normalizes usage before message_end fires, so this path is only
1775
- // reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
1810
+ // In practice the runtime normalizes usage before message_end fires, so this path is only
1811
+ // reached when normalized fields are absent (e.g. custom/foreign providers).
1776
1812
  function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
1777
1813
  const usage = usageRecordFromAssistant(message);
1778
1814
  if (!usage) return undefined;
@@ -1789,8 +1825,8 @@ function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
1789
1825
  }
1790
1826
 
1791
1827
  // Raw fallback for OpenAI-family responses that still carry their native usage fields.
1792
- // In practice Pi normalizes usage before message_end fires, so this path is only
1793
- // reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
1828
+ // In practice the runtime normalizes usage before message_end fires, so this path is only
1829
+ // reached when normalized fields are absent (e.g. custom/foreign providers).
1794
1830
  function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
1795
1831
  const usage = usageRecordFromAssistant(message);
1796
1832
  if (!usage) return undefined;
@@ -1812,8 +1848,8 @@ function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
1812
1848
  }
1813
1849
 
1814
1850
  // Raw fallback for Anthropic/Claude responses that still carry their native usage fields.
1815
- // In practice Pi normalizes usage before message_end fires, so this path is only
1816
- // reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
1851
+ // In practice the runtime normalizes usage before message_end fires, so this path is only
1852
+ // reached when normalized fields are absent (e.g. custom/foreign providers).
1817
1853
  function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
1818
1854
  const usage = usageRecordFromAssistant(message);
1819
1855
  if (!usage) return undefined;
@@ -1832,8 +1868,8 @@ function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
1832
1868
  }
1833
1869
 
1834
1870
  // Raw fallback for Gemini/Vertex responses that still carry their native usage fields.
1835
- // In practice Pi normalizes usage before message_end fires, so this path is only
1836
- // reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
1871
+ // In practice the runtime normalizes usage before message_end fires, so this path is only
1872
+ // reached when normalized fields are absent (e.g. custom/foreign providers).
1837
1873
  function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
1838
1874
  const record = getAssistantRecord(message);
1839
1875
  if (!record) return undefined;
@@ -1867,8 +1903,8 @@ function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
1867
1903
  return { cacheRead, cacheWrite: 0, totalInput };
1868
1904
  }
1869
1905
 
1870
- // Try Pi-normalized usage first (always present for messages that went through Pi's
1871
- // provider pipeline). Fall back to provider-specific raw-field readers when Pi-normalized
1906
+ // Try normalized usage first (always present for messages that went through the runtime's
1907
+ // provider pipeline). Fall back to provider-specific raw-field readers when normalized
1872
1908
  // fields are absent (e.g. messages from custom/foreign providers whose raw usage shape
1873
1909
  // matches the official API).
1874
1910
  function normalizeWithFallback(
@@ -1971,13 +2007,6 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
1971
2007
  return true;
1972
2008
  }
1973
2009
  if (Array.isArray(record.system) && record.system.length > 0) {
1974
- // Replace first text block, keep structure
1975
- const first = asRecord(record.system[0]);
1976
- if (first && typeof first.text === "string") {
1977
- first.text = text;
1978
- return true;
1979
- }
1980
- // Fallback: convert to single-block string form
1981
2010
  record.system = [{ type: "text", text }];
1982
2011
  return true;
1983
2012
  }
@@ -1985,11 +2014,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
1985
2014
  // google-generative-ai: payload.systemInstruction
1986
2015
  const systemInstruction = asRecord(record.systemInstruction);
1987
2016
  if (systemInstruction && Array.isArray(systemInstruction.parts) && systemInstruction.parts.length > 0) {
1988
- const firstPart = asRecord(systemInstruction.parts[0]);
1989
- if (firstPart && typeof firstPart.text === "string") {
1990
- firstPart.text = text;
1991
- return true;
1992
- }
2017
+ systemInstruction.parts = [{ text }];
2018
+ return true;
1993
2019
  }
1994
2020
 
1995
2021
  // openai-completions / openai-responses: payload.messages[] first system/developer message
@@ -2004,11 +2030,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
2004
2030
  return true;
2005
2031
  }
2006
2032
  if (Array.isArray(r.content) && r.content.length > 0) {
2007
- const first = asRecord(r.content[0]);
2008
- if (first && typeof first.text === "string") {
2009
- first.text = text;
2010
- return true;
2011
- }
2033
+ r.content = text;
2034
+ return true;
2012
2035
  }
2013
2036
  }
2014
2037
  }
@@ -2038,7 +2061,7 @@ function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
2038
2061
  }
2039
2062
 
2040
2063
  function describeMissingOpenAIFamilyProxyCompat(_model: PiModel): string[] {
2041
- // OMP divergence: Pi's sendSessionAffinityHeaders has no compat equivalent.
2064
+ // OMP divergence: the legacy `sendSessionAffinityHeaders` flag has no compat equivalent.
2042
2065
  // OMP achieves upstream stickiness via multi-credential auth + session affinity
2043
2066
  // in agent.db (see omp models.md §Auth). There is no required compat key for
2044
2067
  // OpenAI-family proxies on OMP, so this returns an empty list. Optional long
@@ -2332,7 +2355,7 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
2332
2355
 
2333
2356
  return (
2334
2357
  `💡 Cache optimizer: ${modelKey(model)} looks Claude/Anthropic-like but OpenAI-compatible compat lacks cacheControlFormat: "anthropic". ` +
2335
- "Pi may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
2358
+ "OMP may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
2336
2359
  );
2337
2360
  },
2338
2361
  },
@@ -3363,7 +3386,7 @@ function formatTokenM(value: number): string {
3363
3386
 
3364
3387
  /**
3365
3388
  * Check if an assistant message's usage fields appear to be missing or empty.
3366
- * Returns true when Pi-normalized fields (input, cacheRead, cacheWrite) are all
3389
+ * Returns true when normalized fields (input, cacheRead, cacheWrite) are all
3367
3390
  * absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
3368
3391
  * for the given adapter.
3369
3392
  */
@@ -3371,12 +3394,12 @@ function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter):
3371
3394
  const usage = usageRecordFromAssistant(message);
3372
3395
  if (!usage) return true;
3373
3396
 
3374
- // Check Pi-normalized fields
3397
+ // Check normalized fields
3375
3398
  const input = getNonNegativeNumber(usage, "input");
3376
3399
  const cacheRead = getNonNegativeNumber(usage, "cacheRead");
3377
3400
  const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
3378
3401
 
3379
- // If Pi-normalized fields exist with non-zero values, usage is present
3402
+ // If normalized fields exist with non-zero values, usage is present
3380
3403
  if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
3381
3404
  return false;
3382
3405
  }
@@ -4052,7 +4075,7 @@ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
4052
4075
  if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
4053
4076
  return [
4054
4077
  "ℹ️ Compat check not applicable for this model.",
4055
- " Native Responses transports already use Pi core request handling; OpenAI-compatible proxy compat flags do not apply.",
4078
+ " Native Responses transports already use core runtime request handling; OpenAI-compatible proxy compat flags do not apply.",
4056
4079
  ];
4057
4080
  }
4058
4081
 
@@ -4934,9 +4957,9 @@ function chooseFixPlacement(
4934
4957
  Object.keys(compatKeys),
4935
4958
  );
4936
4959
 
4937
- // Provider-level writes cannot override a model-level compat key because Pi's
4960
+ // Provider-level writes cannot override a model-level compat key because the runtime's
4938
4961
  // merge order is provider.compat then model.compat. If the active model already
4939
- // has one of the keys we need to repair (e.g. thinkingFormat: "legacy"), write
4962
+ // has one of the keys we need to repair (e.g. thinkingFormat: \"legacy\"), write
4940
4963
  // at model level even when the key would otherwise be provider-safe.
4941
4964
  if (decision.placement === "provider" && existingModelKeys.length > 0) {
4942
4965
  return {
@@ -5103,7 +5126,7 @@ function selfCheckFix(
5103
5126
  }
5104
5127
 
5105
5128
  // Step 5: Compute the EFFECTIVE merged compat (provider-level + model-level),
5106
- // mirroring Pi's mergeCompat behavior (model wins on conflicts). The fix may
5129
+ // mirroring the runtime's mergeCompat behavior (model wins on conflicts). The fix may
5107
5130
  // have written either level, so validation must check the merged result.
5108
5131
  const provCompatRaw = (provider as Record<string, unknown>).compat;
5109
5132
  const provCompat = (provCompatRaw && typeof provCompatRaw === 'object' && !Array.isArray(provCompatRaw))
@@ -5226,7 +5249,7 @@ function backupTimestamp(): string {
5226
5249
 
5227
5250
  // Internal helpers exported only so the task verification script
5228
5251
  // (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
5229
- // extension's public API; pi only invokes the default export below.
5252
+ // extension's public API; the host runtime only invokes the default export below.
5230
5253
  export const __internals_for_tests = {
5231
5254
  buildStableCandidates,
5232
5255
  optimizeSystemPrompt,
@@ -5421,6 +5444,10 @@ export const __internals_for_tests = {
5421
5444
  hashSessionId,
5422
5445
  makeSessionModelKey,
5423
5446
  modelKeyFromSessionKey,
5447
+ makePromptRewriteContextKey,
5448
+ rememberPromptRewriteContext,
5449
+ getPromptRewriteContext,
5450
+ PROMPT_REWRITE_CONTEXT_TTL_MS,
5424
5451
  filterRestorableStatsForSession,
5425
5452
  parsePersistedRoutedModelRef,
5426
5453
  routedModelRefToPiModel,
@@ -5494,11 +5521,10 @@ export default function (pi: ExtensionAPI) {
5494
5521
  let latestCacheHint: PiCacheHintSnapshot | undefined;
5495
5522
  // OMP divergence: prompt rewriting moved from before_agent_start to
5496
5523
  // before_provider_request (OMP's before_agent_start can only inject messages,
5497
- // not mutate systemPrompt). We cache systemPromptOptions + route snapshot here
5498
- // so before_provider_request can apply the 3-step pipeline to the payload.
5499
- let pendingPromptOptions: BuildSystemPromptOptions | undefined;
5500
- let pendingRouteSnapshot: PiRouteSnapshot | undefined;
5501
- let pendingRoutedModel: PiModel | undefined;
5524
+ // not mutate systemPrompt). Store prompt options per session/model so an
5525
+ // overlapping turn or sub-agent cannot overwrite another request's rewrite
5526
+ // context before before_provider_request fires.
5527
+ const promptRewriteContexts = new Map<string, PromptRewriteContext>();
5502
5528
  const PERSIST_DEBOUNCE_MS = 2000;
5503
5529
  /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
5504
5530
  const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
@@ -5709,7 +5735,7 @@ export default function (pi: ExtensionAPI) {
5709
5735
 
5710
5736
  if (reason === "reload") {
5711
5737
  // /reload: preserve session-scoped stats (same session hash).
5712
- // Pi extension reload creates a fresh closure, so cacheStatsByModel
5738
+ // OMP extension reload creates a fresh closure, so cacheStatsByModel
5713
5739
  // starts empty. Read persisted data and filter for current session.
5714
5740
  lastStatusText = undefined;
5715
5741
  lastPromptIntegrityWarningAt = 0;
@@ -5912,17 +5938,18 @@ export default function (pi: ExtensionAPI) {
5912
5938
  ? findModelInRegistry(_ctx.modelRegistry, routeSnapshot.provider, routeSnapshot.modelId) ?? routeSnapshotToPiModel(routeSnapshot, _ctx.model)
5913
5939
  : undefined;
5914
5940
 
5915
- // OMP divergence: before_agent_start in OMP can only inject messages (return
5916
- // { message }), NOT mutate systemPrompt. We cache the prompt options + route
5917
- // snapshot here so before_provider_request can apply the 3-step pipeline to
5918
- // the provider payload. If OMP does not supply systemPromptOptions, skill
5919
- // compression and stable-prefix reorder are skipped (only churn strip runs).
5920
5941
  const eventRecord = asRecord(event);
5921
- pendingPromptOptions = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
5922
- pendingRouteSnapshot = routeSnapshot;
5923
- pendingRoutedModel = routedModel ?? _ctx.model;
5924
-
5942
+ const options = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
5925
5943
  const model = routedModel ?? _ctx.model;
5944
+ const contextKey = makePromptRewriteContextKey(sessionHashFromContext(_ctx), model);
5945
+ rememberPromptRewriteContext(promptRewriteContexts, contextKey, {
5946
+ options,
5947
+ routeSnapshot,
5948
+ routedModel: model,
5949
+ timestamp: Date.now(),
5950
+ });
5951
+
5952
+ const modelForHint = model;
5926
5953
  const promptCacheKey = getSessionPromptCacheKey(_ctx);
5927
5954
  const cacheRetention = process.env[PI_CACHE_RETENTION_ENV] === LONG_CACHE_RETENTION_VALUE ? LONG_CACHE_RETENTION_VALUE : undefined;
5928
5955
  const rawSystemPrompt = typeof eventRecord?.systemPrompt === "string" ? eventRecord.systemPrompt : "";
@@ -5930,9 +5957,9 @@ export default function (pi: ExtensionAPI) {
5930
5957
  sessionIdHash: currentSessionHashSet ? currentSessionHash : sessionHashFromContext(_ctx),
5931
5958
  virtualProvider: routeSnapshot?.virtualProvider ?? _ctx.model?.provider,
5932
5959
  virtualModelId: routeSnapshot?.virtualModelId ?? _ctx.model?.id,
5933
- upstreamProvider: routeSnapshot?.provider ?? model?.provider,
5934
- upstreamModelId: routeSnapshot?.modelId ?? model?.id,
5935
- api: model?.api,
5960
+ upstreamProvider: routeSnapshot?.provider ?? modelForHint?.provider,
5961
+ upstreamModelId: routeSnapshot?.modelId ?? modelForHint?.id,
5962
+ api: modelForHint?.api,
5936
5963
  systemPrompt: rawSystemPrompt,
5937
5964
  promptCacheKey,
5938
5965
  cacheRetention,
@@ -5960,21 +5987,24 @@ export default function (pi: ExtensionAPI) {
5960
5987
  requestModel &&
5961
5988
  !isResponsesPromptRewriteBypassApi(requestModel.api)
5962
5989
  ) {
5990
+ const contextKey = makePromptRewriteContextKey(sessionHashFromContext(ctx), requestModel);
5991
+ const rewriteContext = getPromptRewriteContext(promptRewriteContexts, contextKey);
5992
+ const promptOptions = rewriteContext?.options;
5963
5993
  const original = extractSystemPrompt(resultPayload);
5964
5994
  if (original && original.trim().length > 0) {
5965
5995
  // Step 1: strip per-turn churn from <session-overview>.
5966
5996
  const stripped = stripSessionOverviewChurn(original);
5967
5997
 
5968
5998
  // Step 2: compress skills XML → one-line index (requires cached options).
5969
- const compressed = pendingPromptOptions
5970
- ? compressSkillsInSystemPrompt(stripped, pendingPromptOptions)
5999
+ const compressed = promptOptions
6000
+ ? compressSkillsInSystemPrompt(stripped, promptOptions)
5971
6001
  : stripped;
5972
6002
 
5973
6003
  // Step 3: lift stable content above dynamic content (requires cached options).
5974
6004
  let finalPrompt = compressed;
5975
6005
  let changed = false;
5976
- if (pendingPromptOptions) {
5977
- const optimized = optimizeSystemPrompt(compressed, pendingPromptOptions);
6006
+ if (promptOptions) {
6007
+ const optimized = optimizeSystemPrompt(compressed, promptOptions);
5978
6008
  if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
5979
6009
  finalPrompt = optimized.systemPrompt;
5980
6010
  changed = true;
@@ -6098,7 +6128,7 @@ export default function (pi: ExtensionAPI) {
6098
6128
  // (no args) — interactive menu (with UI) or help summary
6099
6129
  // ────────────────────────────────────────────────────────────────
6100
6130
  pi.registerCommand("cache-optimizer", {
6101
- description: "Diagnose Pi cache configuration",
6131
+ description: "Diagnose OMP cache configuration",
6102
6132
  handler: async (args: string, cmdCtx) => {
6103
6133
  syncSessionHash(cmdCtx);
6104
6134
  const selectedModel = cmdCtx.model;
@@ -6110,16 +6140,16 @@ export default function (pi: ExtensionAPI) {
6110
6140
  resetCurrentSessionStats();
6111
6141
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6112
6142
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6113
- cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6143
+ cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6114
6144
  } else if (subcommand === "disable") {
6115
6145
  setRuntimeOptimizerEnabled(false);
6116
6146
  resetCurrentSessionStats();
6117
6147
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6118
6148
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6119
- cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6149
+ cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6120
6150
  } else if (subcommand === "doctor") {
6121
6151
  if (!model) {
6122
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6152
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6123
6153
  return;
6124
6154
  }
6125
6155
  const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
@@ -6134,7 +6164,7 @@ export default function (pi: ExtensionAPI) {
6134
6164
  cmdCtx.ui.notify(fullDiagnosis, "info");
6135
6165
  } else if (subcommand === "stats") {
6136
6166
  if (!model) {
6137
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6167
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6138
6168
  return;
6139
6169
  }
6140
6170
  const adapter = selectAdapterForModel(model);
@@ -6145,7 +6175,7 @@ export default function (pi: ExtensionAPI) {
6145
6175
  cmdCtx.ui.notify(output, "info");
6146
6176
  } else if (subcommand === "compat") {
6147
6177
  if (!model) {
6148
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6178
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6149
6179
  return;
6150
6180
  }
6151
6181
  const compatResult = buildCompatDiagnosis(model);
@@ -6161,7 +6191,7 @@ export default function (pi: ExtensionAPI) {
6161
6191
  }
6162
6192
  } else if (subcommand === "reset") {
6163
6193
  if (!model) {
6164
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6194
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6165
6195
  return;
6166
6196
  }
6167
6197
  const adapter = selectAdapterForModel(model);
@@ -6186,12 +6216,12 @@ export default function (pi: ExtensionAPI) {
6186
6216
  cmdCtx.ui.notify(
6187
6217
  `✅ Reset local session cache stats for "${displayKey}". ` +
6188
6218
  "Upstream provider prompt cache was not modified. " +
6189
- "New requests will start a fresh stats bucket for this Pi session.",
6219
+ "New requests will start a fresh stats bucket for this OMP session.",
6190
6220
  "info",
6191
6221
  );
6192
6222
  } else if (subcommand === "fix") {
6193
6223
  if (!model) {
6194
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6224
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6195
6225
  return;
6196
6226
  }
6197
6227
 
@@ -6239,16 +6269,16 @@ export default function (pi: ExtensionAPI) {
6239
6269
  resetCurrentSessionStats();
6240
6270
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6241
6271
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6242
- cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6272
+ cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6243
6273
  } else if (choice === menuOptions[1]) {
6244
6274
  setRuntimeOptimizerEnabled(false);
6245
6275
  resetCurrentSessionStats();
6246
6276
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6247
6277
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6248
- cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6278
+ cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6249
6279
  } else if (choice === menuOptions[2]) {
6250
6280
  if (!model) {
6251
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6281
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6252
6282
  } else {
6253
6283
  const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
6254
6284
  const adapter = selectAdapterForModel(model);
@@ -6263,7 +6293,7 @@ export default function (pi: ExtensionAPI) {
6263
6293
  }
6264
6294
  } else if (choice === menuOptions[3]) {
6265
6295
  if (!model) {
6266
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6296
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6267
6297
  } else {
6268
6298
  const adapter = selectAdapterForModel(model);
6269
6299
  const sk = model ? sessionModelKey(model) : undefined;
@@ -6274,7 +6304,7 @@ export default function (pi: ExtensionAPI) {
6274
6304
  }
6275
6305
  } else if (choice === menuOptions[4]) {
6276
6306
  if (!model) {
6277
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6307
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6278
6308
  } else {
6279
6309
  const compatResult = buildCompatDiagnosis(model);
6280
6310
  if (compatResult) {
@@ -6291,7 +6321,7 @@ export default function (pi: ExtensionAPI) {
6291
6321
  } else if (choice === menuOptions[5]) {
6292
6322
  // Fix — auto-fix compat issues
6293
6323
  if (!model) {
6294
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6324
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6295
6325
  return;
6296
6326
  }
6297
6327
  const suggestion = buildFixSuggestion(model);
@@ -6316,7 +6346,7 @@ export default function (pi: ExtensionAPI) {
6316
6346
  );
6317
6347
  } else if (choice === menuOptions[6]) {
6318
6348
  if (!model) {
6319
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
6349
+ cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6320
6350
  } else {
6321
6351
  const adapter = selectAdapterForModel(model);
6322
6352
  if (!adapter) {
@@ -6341,8 +6371,8 @@ export default function (pi: ExtensionAPI) {
6341
6371
  // Fallback: text help when no interactive UI
6342
6372
  const diagnosis: string[] = [];
6343
6373
  diagnosis.push("📋 /cache-optimizer commands:");
6344
- diagnosis.push(" enable — Enable prompt/cache optimizations for this Pi process");
6345
- diagnosis.push(" disable — Disable prompt/cache optimizations for this Pi process");
6374
+ diagnosis.push(" enable — Enable prompt/cache optimizations for this OMP process");
6375
+ diagnosis.push(" disable — Disable prompt/cache optimizations for this OMP process");
6346
6376
  diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
6347
6377
  diagnosis.push(" stats — Show active model stats bucket and recent trend");
6348
6378
  diagnosis.push(" compat — Show compat suggestion with edit location");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omp-cache-optimizer",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "Improve OMP prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "omp-package",