npm - omp-cache-optimizer - Versions diffs - 1.0.0 → 1.0.2 - Mend

omp-cache-optimizer 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -8,7 +8,7 @@
 用于提升 OMP 中 provider 侧 KV Cache / Prompt Cache 命中率的扩展：把稳定 prompt 内容前置，给 OpenAI-compatible 请求补保守的 `prompt_cache_key`，提示代理渠道常见缓存路由兼容问题，并在底部显示只读缓存统计。
-> 本包从 `pi-cache-optimizer` fork 而来。已有底部统计会自动从 `~/.pi/agent/` 迁移到 `~/.omp/agent/`。正常运行时扩展不会触碰你的 `~/.omp/agent/models.yml`；`/cache-optimizer fix` 当前显示可复制的 YAML compat 片段供手动编辑（自动写入的外科 YAML 编辑器计划在后续版本实现）。
+> 本包从 `pi-cache-optimizer` fork 而来。已有底部统计会自动从旧状态目录 `~/.pi/agent/` 迁移到 `~/.omp/agent/`。正常运行时扩展不会触碰你的 `~/.omp/agent/models.yml`；`/cache-optimizer fix` 当前显示可复制的 YAML compat 片段供手动编辑（自动写入的外科 YAML 编辑器计划在后续版本实现）。
 ## 与原项目的关键差异
@@ -63,7 +63,7 @@
 omp install npm:omp-cache-optimizer
 ```
-如果之前安装过 Pi 版本：
+如果之前安装过原版本：
 ```bash
 omp remove npm:pi-cache-optimizer && omp install npm:omp-cache-optimizer
@@ -102,7 +102,7 @@ OMP 0.79.7 及之后，`omp update` 默认只更新 OMP 本体。若要更新已
 LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completions` 代理，常会把同一个 session 分散到多个上游后端，导致 provider 侧 prompt cache 被拆散。
-**OMP 差异**：OMP 不再使用 `sendSessionAffinityHeaders` compat 字段（Pi 时代的字段），而是通过多凭据 auth + `agent.db` 中的会话亲和性实现上游粘性。长缓存保留改用 `supportsLongPromptCacheRetention` 字段。
+**OMP 差异**：OMP 不再使用 `sendSessionAffinityHeaders` compat 字段（原项目中的旧字段），而是通过多凭据 auth + `agent.db` 中的会话亲和性实现上游粘性。长缓存保留改用 `supportsLongPromptCacheRetention` 字段。
 `models.yml` 示例：
@@ -129,13 +129,13 @@ providers:
 ## Anthropic adaptive thinking 模型
-**OMP 差异**：OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking（通过 `disableAdaptiveThinking` 字段，语义与 Pi 的 `forceAdaptiveThinking` 相反），且不可从 `models.yml` 用户配置。因此本扩展对 adaptive thinking 的检测改为信息性提示，不再提供自动修复。
+**OMP 差异**：OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking（通过 `disableAdaptiveThinking` 字段，语义与原项目中的 `forceAdaptiveThinking` 相反），且不可从 `models.yml` 用户配置。因此本扩展对 adaptive thinking 的检测改为信息性提示，不再提供自动修复。
 `/cache-optimizer doctor` 和 `/cache-optimizer compat` 会检测 adaptive thinking 模型并显示信息性说明。自定义渠道 fronting Anthropic 时，请确保模型 id 匹配官方发布版本，以便 OMP catalog 正确识别。
 ## 使用 `/cache-optimizer fix` 手动修复
-**OMP 差异**：当前 `/cache-optimizer fix` 降级为手动建议模式。原 Pi 版本的自动写入安全协议（backup → 预览 + 确认 → 原子 temp+rename → 写入后自检 → 失败回滚）将在后续 PR 中为 YAML 重新实现。
+**OMP 差异**：当前 `/cache-optimizer fix` 降级为手动建议模式。原项目中的自动写入安全协议（backup → 预览 + 确认 → 原子 temp+rename → 写入后自检 → 失败回滚）将在后续 PR 中为 YAML 重新实现。
 当前行为：
@@ -274,7 +274,7 @@ registry?.registerRouter({
 });
 ```
-cache hints 协议（`Symbol.for("omp.cache.hints.v1")`）形状与 Pi 版本一致，用于预响应阶段透传优化后的 system prompt / prompt cache key / cache retention hint。
+cache hints 协议（`Symbol.for("omp.cache.hints.v1")`）形状与原项目一致，用于预响应阶段透传优化后的 system prompt / prompt cache key / cache retention hint。
 ## 卸载

package/index.ts CHANGED Viewed

@@ -64,7 +64,7 @@ const LOG_PREFIX = "omp-cache-optimizer";
 const STATUS_KEY = "omp-cache-stats";
 const STATE_DIR = join(homedir(), ".omp", "agent");
 const STATE_FILE_PATH = join(STATE_DIR, "omp-cache-optimizer-stats.json");
-// Legacy Pi-era state file path: read for one-way migration only, never written.
+// Legacy source-project state file path: read for one-way migration only, never written.
 const LEGACY_PI_STATE_FILE_PATH = join(homedir(), ".pi", "agent", "pi-cache-optimizer-stats.json");
 const LEGACY_STATE_FILE_PATH = join(STATE_DIR, "deepseek-cache-optimizer-stats.json");
 const CACHE_PROVIDER_IDS: CacheProviderId[] = ["deepseek", "openai", "claude", "gemini"];
@@ -77,7 +77,7 @@ const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
 const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
 const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
 // Inter-extension protocol symbols are versioned under the omp.* namespace. The v1
-// shape is identical to the legacy pi.* symbols; router/hints integrators on OMP
+// shape is identical to the legacy symbols; router/hints integrators on OMP
 // should register under omp.routing.registry.v1 / omp.cache.hints.v1.
 const PI_ROUTING_REGISTRY_SYMBOL = Symbol.for("omp.routing.registry.v1");
 const PI_CACHE_HINTS_SYMBOL = Symbol.for("omp.cache.hints.v1");
@@ -104,7 +104,7 @@ function getLastPromptIntegrityWarningAt(): number {
 }
 // Minimum count of skills before compression is worth applying.
-// Below this, pi's verbose XML block is small enough that the overhead of
+// Below this, the runtime's verbose XML block is small enough that the overhead of
 // an additional one-line index isn't worth the loss of per-skill
 // description hints. The 31-skill snapshot in this repo was 13.3 KB; one
 // or two skills is well under 1 KB and not worth touching.
@@ -122,7 +122,7 @@ const SKILL_COMPRESSION_MIN_COUNT = 4;
 // The threshold also caps the upstream string-vs-array regression we saw with
 // trellis 0.5.16 / 0.6.0-beta.17 (subagent tool registration passing
 // `promptGuidelines: "<long string>"` instead of `["<long string>"]`, which
-// pi then iterates char-by-char). Even if a similar bug recurs upstream, this
+// the runtime then iterates char-by-char). Even if a similar bug recurs upstream, this
 // extension will not lift its single-character byproducts into the stable
 // prefix candidate list.
 //
@@ -268,7 +268,7 @@ type PersistedCacheStatsV3 = {
 /**
  * V4 format: session-scoped stats buckets.
- * Each Pi process/session gets its own stats isolated by a hashed session id.
+ * Each session in the host runtime gets its own stats isolated by a hashed session id.
  *
  * sessions: sessionHash → modelKey (provider/id) → CacheStats
  * legacyFamily: unchanged from v3 (migration/fallback when ctx.model is unknown)
@@ -312,6 +312,15 @@ type CacheUsageSample = {
   missingUsageFields: boolean;
 };
+type PromptRewriteContext = {
+  options?: BuildSystemPromptOptions;
+  routeSnapshot?: PiRouteSnapshot;
+  routedModel?: PiModel;
+  timestamp: number;
+};
+const PROMPT_REWRITE_CONTEXT_TTL_MS = 10_000;
 /** Maximum number of recent samples kept per model key (in-memory only, not persisted). */
 const MAX_RECENT_SAMPLES = 50;
@@ -375,7 +384,7 @@ function formatSkillsForPrompt(skills: NonNullable<BuildSystemPromptOptions["ski
 /**
  * Compressed alternative to `formatSkillsForPrompt`.
  *
- * Pi emits a four-line XML block per skill (`<name>`, `<description>`,
+ * The host runtime emits a four-line XML block per skill (`<name>`, `<description>`,
  * `<location>`) plus a three-sentence preamble. With 31 skills active in
  * this repo that block measured 13.3 KB — 61.5 % of the total system
  * prompt. The full description text matters when the model has to decide
@@ -454,7 +463,7 @@ function formatSkillsForPromptCompressed(
 }
 /**
- * Replace pi's verbose `<available_skills>` block in `prompt` with the
+ * Replace the runtime's verbose `<available_skills>` block in `prompt` with the
  * compressed one-index form. Idempotent: if the verbose form is not
  * present (compression already applied, or skill count below threshold),
  * the prompt is returned unchanged.
@@ -465,7 +474,7 @@ function formatSkillsForPromptCompressed(
  *   - opts.skills present and visible-skill count >= SKILL_COMPRESSION_MIN_COUNT
  *   - Verbose block (built from the same `opts.skills`) is found in
  *     `prompt` (substring match, no regex). This anchors the substitution
- *     to pi's own emitter; if pi changes the format, we no-op rather
+ *     to the runtime's own emitter; if the format changes, we no-op rather
  *     than mangle.
  */
 function compressSkillsInSystemPrompt(
@@ -589,14 +598,14 @@ function stripSessionOverviewChurn(prompt: string): string {
  * prompt rather than ship a corrupted one.
  *
  * Three marker categories are recognized (covers ~99% of real-world
- * extension injection patterns in the pi ecosystem):
+ * extension injection patterns in the host runtime ecosystem):
  *
  *   1. XML-style opening tags  `<tagname>` (lowercase, alpha-num + `_`/`-`)
  *   2. XML-style closing tags  `</tagname>`
  *   3. HTML comment START/END  `<!-- NAME:START -->` / `<!-- NAME:END -->`
  *
  * Tags with attributes (e.g., `<task id="42">`) are not currently emitted
- * by any pi extension we know of and are skipped to keep the regex tight.
+ * by any runtime extension we know of and are skipped to keep the regex tight.
  * Markdown headers, horizontal rules, and timestamp patterns are not
  * usable as guards because they have no closing form to verify.
  *
@@ -676,7 +685,7 @@ function optimizeSystemPrompt(
   // protected without code changes when new extensions ship.
   //
   // Our skills compression runs BEFORE optimizeSystemPrompt and replaces
-  // pi's verbose `<available_skills>` block with a compressed text
+  // the runtime's verbose `<available_skills>` block with a compressed text
   // section that has no XML tag. So `original` here (post-compression)
   // does not contain `<available_skills>` and the result doesn't either
   // — no false positive.
@@ -968,15 +977,42 @@ function getNonNegativeNumber(record: UnknownRecord, key: string): number | unde
  */
 function getCompat(model: PiModel | undefined): CacheCompat {
   if (!model) return {} as CacheCompat;
-  // Pi merges provider.compat with model.compat (model wins on conflicts)
-  // We approximate this by reading from ctx.model which should already have merged compat
-  // However, for safety, we check both levels if available
-  const modelCompat = (model.compat ?? {}) as CacheCompat;
-  // Note: ctx.model from Pi should already contain merged compat,
-  // but we document the two-level structure for clarity
-  return modelCompat;
+  const record = model as PiModel & { compatConfig?: Record<string, unknown> };
+  return {
+    ...((record.compatConfig ?? {}) as CacheCompat),
+    ...((record.compat ?? {}) as CacheCompat),
+  };
+}
+function makePromptRewriteContextKey(sessionHash: string | undefined, model: PiModel | undefined): string | undefined {
+  if (!sessionHash || !model) return undefined;
+  return `${sessionHash}:${modelKey(model)}`;
+}
+function rememberPromptRewriteContext(
+  contexts: Map<string, PromptRewriteContext>,
+  key: string | undefined,
+  context: PromptRewriteContext,
+): void {
+  if (!key) return;
+  contexts.set(key, context);
+}
+function getPromptRewriteContext(
+  contexts: Map<string, PromptRewriteContext>,
+  key: string | undefined,
+  now = Date.now(),
+  ttlMs = PROMPT_REWRITE_CONTEXT_TTL_MS,
+): PromptRewriteContext | undefined {
+  if (!key) return undefined;
+  const context = contexts.get(key);
+  if (!context) return undefined;
+  if (now - context.timestamp > ttlMs) {
+    contexts.delete(key);
+    return undefined;
+  }
+  return context;
 }
 /**
@@ -1141,7 +1177,7 @@ function isGeminiLikeAssistantMessage(message: unknown, model: PiModel | undefin
  * Check whether the model id uses Anthropic's adaptive generation (thinking)
  * that requires `forceAdaptiveThinking: true` in compat.
  *
- * Adaptive-generation models (from pi-ai built-in catalog) include:
+ * Adaptive-generation models (from the bundled model catalog) include:
  *   claude-opus-4-6, claude-opus-4-7, claude-opus-4-8 (also dotted 4.6/4.7/4.8)
  *   claude-sonnet-4-6
  *   claude-fable-5
@@ -1162,7 +1198,7 @@ function isAdaptiveGenerationModel(model: PiModel | undefined): boolean {
 // OMP divergence: adaptive thinking is set automatically by the OMP built-in model
 // catalog (via disableAdaptiveThinking, with reversed semantics) and is NOT
 // user-configurable from models.yml (see omp models.md §Anthropic compatibility).
-// The Pi-era forceAdaptiveThinking flag no longer exists. We keep model detection
+// The legacy `forceAdaptiveThinking` flag no longer exists. We keep model detection
 // (isAdaptiveGenerationModel) for informational doctor output, but drop the fixable
 // compat-suggestion path entirely.
 function isAdaptiveThinkingCompatApplicable(_model: PiModel): boolean {
@@ -1734,14 +1770,14 @@ function readCacheWriteFromDetails(details: UnknownRecord | undefined): number |
   return getFirstNonNegativeNumber(details?.cache_write_tokens, details?.cacheWriteTokens);
 }
-// Pi normalizes provider-specific raw usage (prompt_cache_hit_tokens, cached_tokens,
+// The host runtime normalizes provider-specific raw usage (prompt_cache_hit_tokens, cached_tokens,
 // cache_read_input_tokens, etc.) into a common shape:
 //   input     = uncached prompt portion (total prompt minus cacheRead minus cacheWrite)
 //   cacheRead = tokens read from a previously-cached prefix
 //   cacheWrite= tokens newly written into cache in this request
 //
 // We reconstruct the total prompt-token count as input + cacheRead + cacheWrite.
-// Pi guarantees that input, cacheRead, and cacheWrite are always present on
+// The host runtime guarantees that input, cacheRead, and cacheWrite are always present on
 // assistant messages processed through its provider pipeline (at least as zero).
 //
 // Only DeepSeek sets allowInputOnly=true so that a cache miss (cacheRead=0) still
@@ -1757,10 +1793,10 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
   if (!hasCacheSignal && (input === undefined || !allowInputOnly)) return undefined;
-  // Under healthy Pi normalization input is the uncached portion, so
+  // Under healthy runtime normalization input is the uncached portion, so
   // totalInput = input + cacheRead + cacheWrite gives the full prompt token count.
   // Guard against degenerate reads where a broken proxy omits prompt_tokens and
-  // Pi's input falls to zero: totalInput must never be less than cacheRead + cacheWrite.
+  // normalized input falls to zero: totalInput must never be less than cacheRead + cacheWrite.
   const computed = (input ?? 0) + (cacheRead ?? 0) + (cacheWrite ?? 0);
   const floor = (cacheRead ?? 0) + (cacheWrite ?? 0);
   return {
@@ -1771,8 +1807,8 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
 }
 // Raw fallback for DeepSeek responses that still carry their native usage fields.
-// In practice Pi normalizes usage before message_end fires, so this path is only
-// reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
+// In practice the runtime normalizes usage before message_end fires, so this path is only
+// reached when normalized fields are absent (e.g. custom/foreign providers).
 function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
   const usage = usageRecordFromAssistant(message);
   if (!usage) return undefined;
@@ -1789,8 +1825,8 @@ function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
 }
 // Raw fallback for OpenAI-family responses that still carry their native usage fields.
-// In practice Pi normalizes usage before message_end fires, so this path is only
-// reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
+// In practice the runtime normalizes usage before message_end fires, so this path is only
+// reached when normalized fields are absent (e.g. custom/foreign providers).
 function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
   const usage = usageRecordFromAssistant(message);
   if (!usage) return undefined;
@@ -1812,8 +1848,8 @@ function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
 }
 // Raw fallback for Anthropic/Claude responses that still carry their native usage fields.
-// In practice Pi normalizes usage before message_end fires, so this path is only
-// reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
+// In practice the runtime normalizes usage before message_end fires, so this path is only
+// reached when normalized fields are absent (e.g. custom/foreign providers).
 function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
   const usage = usageRecordFromAssistant(message);
   if (!usage) return undefined;
@@ -1832,8 +1868,8 @@ function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
 }
 // Raw fallback for Gemini/Vertex responses that still carry their native usage fields.
-// In practice Pi normalizes usage before message_end fires, so this path is only
-// reached when Pi-normalized fields are absent (e.g. custom/foreign providers).
+// In practice the runtime normalizes usage before message_end fires, so this path is only
+// reached when normalized fields are absent (e.g. custom/foreign providers).
 function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
   const record = getAssistantRecord(message);
   if (!record) return undefined;
@@ -1867,8 +1903,8 @@ function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
   return { cacheRead, cacheWrite: 0, totalInput };
 }
-// Try Pi-normalized usage first (always present for messages that went through Pi's
-// provider pipeline). Fall back to provider-specific raw-field readers when Pi-normalized
+// Try normalized usage first (always present for messages that went through the runtime's
+// provider pipeline). Fall back to provider-specific raw-field readers when normalized
 // fields are absent (e.g. messages from custom/foreign providers whose raw usage shape
 // matches the official API).
 function normalizeWithFallback(
@@ -1971,13 +2007,6 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
     return true;
   }
   if (Array.isArray(record.system) && record.system.length > 0) {
-    // Replace first text block, keep structure
-    const first = asRecord(record.system[0]);
-    if (first && typeof first.text === "string") {
-      first.text = text;
-      return true;
-    }
-    // Fallback: convert to single-block string form
     record.system = [{ type: "text", text }];
     return true;
   }
@@ -1985,11 +2014,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
   // google-generative-ai: payload.systemInstruction
   const systemInstruction = asRecord(record.systemInstruction);
   if (systemInstruction && Array.isArray(systemInstruction.parts) && systemInstruction.parts.length > 0) {
-    const firstPart = asRecord(systemInstruction.parts[0]);
-    if (firstPart && typeof firstPart.text === "string") {
-      firstPart.text = text;
-      return true;
-    }
+    systemInstruction.parts = [{ text }];
+    return true;
   }
   // openai-completions / openai-responses: payload.messages[] first system/developer message
@@ -2004,11 +2030,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
           return true;
         }
         if (Array.isArray(r.content) && r.content.length > 0) {
-          const first = asRecord(r.content[0]);
-          if (first && typeof first.text === "string") {
-            first.text = text;
-            return true;
-          }
+          r.content = text;
+          return true;
         }
       }
     }
@@ -2038,7 +2061,7 @@ function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
 }
 function describeMissingOpenAIFamilyProxyCompat(_model: PiModel): string[] {
-  // OMP divergence: Pi's sendSessionAffinityHeaders has no compat equivalent.
+  // OMP divergence: the legacy `sendSessionAffinityHeaders` flag has no compat equivalent.
   // OMP achieves upstream stickiness via multi-credential auth + session affinity
   // in agent.db (see omp models.md §Auth). There is no required compat key for
   // OpenAI-family proxies on OMP, so this returns an empty list. Optional long
@@ -2332,7 +2355,7 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
       return (
         `💡 Cache optimizer: ${modelKey(model)} looks Claude/Anthropic-like but OpenAI-compatible compat lacks cacheControlFormat: "anthropic". ` +
-        "Pi may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
+        "OMP may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
       );
     },
   },
@@ -3363,7 +3386,7 @@ function formatTokenM(value: number): string {
 /**
  * Check if an assistant message's usage fields appear to be missing or empty.
- * Returns true when Pi-normalized fields (input, cacheRead, cacheWrite) are all
+ * Returns true when normalized fields (input, cacheRead, cacheWrite) are all
  * absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
  * for the given adapter.
  */
@@ -3371,12 +3394,12 @@ function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter):
   const usage = usageRecordFromAssistant(message);
   if (!usage) return true;
-  // Check Pi-normalized fields
+  // Check normalized fields
   const input = getNonNegativeNumber(usage, "input");
   const cacheRead = getNonNegativeNumber(usage, "cacheRead");
   const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
-  // If Pi-normalized fields exist with non-zero values, usage is present
+  // If normalized fields exist with non-zero values, usage is present
   if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
     return false;
   }
@@ -4052,7 +4075,7 @@ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
   if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
     return [
       "ℹ️ Compat check not applicable for this model.",
-      "   Native Responses transports already use Pi core request handling; OpenAI-compatible proxy compat flags do not apply.",
+      "   Native Responses transports already use core runtime request handling; OpenAI-compatible proxy compat flags do not apply.",
     ];
   }
@@ -4934,9 +4957,9 @@ function chooseFixPlacement(
     Object.keys(compatKeys),
   );
-  // Provider-level writes cannot override a model-level compat key because Pi's
+  // Provider-level writes cannot override a model-level compat key because the runtime's
   // merge order is provider.compat then model.compat. If the active model already
-  // has one of the keys we need to repair (e.g. thinkingFormat: "legacy"), write
+  // has one of the keys we need to repair (e.g. thinkingFormat: \"legacy\"), write
   // at model level even when the key would otherwise be provider-safe.
   if (decision.placement === "provider" && existingModelKeys.length > 0) {
     return {
@@ -5103,7 +5126,7 @@ function selfCheckFix(
     }
     // Step 5: Compute the EFFECTIVE merged compat (provider-level + model-level),
-    // mirroring Pi's mergeCompat behavior (model wins on conflicts). The fix may
+    // mirroring the runtime's mergeCompat behavior (model wins on conflicts). The fix may
     // have written either level, so validation must check the merged result.
     const provCompatRaw = (provider as Record<string, unknown>).compat;
     const provCompat = (provCompatRaw && typeof provCompatRaw === 'object' && !Array.isArray(provCompatRaw))
@@ -5226,7 +5249,7 @@ function backupTimestamp(): string {
 // Internal helpers exported only so the task verification script
 // (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
-// extension's public API; pi only invokes the default export below.
+// extension's public API; the host runtime only invokes the default export below.
 export const __internals_for_tests = {
   buildStableCandidates,
   optimizeSystemPrompt,
@@ -5421,6 +5444,10 @@ export const __internals_for_tests = {
   hashSessionId,
   makeSessionModelKey,
   modelKeyFromSessionKey,
+  makePromptRewriteContextKey,
+  rememberPromptRewriteContext,
+  getPromptRewriteContext,
+  PROMPT_REWRITE_CONTEXT_TTL_MS,
   filterRestorableStatsForSession,
   parsePersistedRoutedModelRef,
   routedModelRefToPiModel,
@@ -5494,11 +5521,10 @@ export default function (pi: ExtensionAPI) {
   let latestCacheHint: PiCacheHintSnapshot | undefined;
   // OMP divergence: prompt rewriting moved from before_agent_start to
   // before_provider_request (OMP's before_agent_start can only inject messages,
-  // not mutate systemPrompt). We cache systemPromptOptions + route snapshot here
-  // so before_provider_request can apply the 3-step pipeline to the payload.
-  let pendingPromptOptions: BuildSystemPromptOptions | undefined;
-  let pendingRouteSnapshot: PiRouteSnapshot | undefined;
-  let pendingRoutedModel: PiModel | undefined;
+  // not mutate systemPrompt). Store prompt options per session/model so an
+  // overlapping turn or sub-agent cannot overwrite another request's rewrite
+  // context before before_provider_request fires.
+  const promptRewriteContexts = new Map<string, PromptRewriteContext>();
   const PERSIST_DEBOUNCE_MS = 2000;
   /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
   const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
@@ -5709,7 +5735,7 @@ export default function (pi: ExtensionAPI) {
     if (reason === "reload") {
       // /reload: preserve session-scoped stats (same session hash).
-      // Pi extension reload creates a fresh closure, so cacheStatsByModel
+      // OMP extension reload creates a fresh closure, so cacheStatsByModel
       // starts empty. Read persisted data and filter for current session.
       lastStatusText = undefined;
       lastPromptIntegrityWarningAt = 0;
@@ -5912,17 +5938,18 @@ export default function (pi: ExtensionAPI) {
       ? findModelInRegistry(_ctx.modelRegistry, routeSnapshot.provider, routeSnapshot.modelId) ?? routeSnapshotToPiModel(routeSnapshot, _ctx.model)
       : undefined;
-    // OMP divergence: before_agent_start in OMP can only inject messages (return
-    // { message }), NOT mutate systemPrompt. We cache the prompt options + route
-    // snapshot here so before_provider_request can apply the 3-step pipeline to
-    // the provider payload. If OMP does not supply systemPromptOptions, skill
-    // compression and stable-prefix reorder are skipped (only churn strip runs).
     const eventRecord = asRecord(event);
-    pendingPromptOptions = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
-    pendingRouteSnapshot = routeSnapshot;
-    pendingRoutedModel = routedModel ?? _ctx.model;
+    const options = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
     const model = routedModel ?? _ctx.model;
+    const contextKey = makePromptRewriteContextKey(sessionHashFromContext(_ctx), model);
+    rememberPromptRewriteContext(promptRewriteContexts, contextKey, {
+      options,
+      routeSnapshot,
+      routedModel: model,
+      timestamp: Date.now(),
+    });
+    const modelForHint = model;
     const promptCacheKey = getSessionPromptCacheKey(_ctx);
     const cacheRetention = process.env[PI_CACHE_RETENTION_ENV] === LONG_CACHE_RETENTION_VALUE ? LONG_CACHE_RETENTION_VALUE : undefined;
     const rawSystemPrompt = typeof eventRecord?.systemPrompt === "string" ? eventRecord.systemPrompt : "";
@@ -5930,9 +5957,9 @@ export default function (pi: ExtensionAPI) {
       sessionIdHash: currentSessionHashSet ? currentSessionHash : sessionHashFromContext(_ctx),
       virtualProvider: routeSnapshot?.virtualProvider ?? _ctx.model?.provider,
       virtualModelId: routeSnapshot?.virtualModelId ?? _ctx.model?.id,
-      upstreamProvider: routeSnapshot?.provider ?? model?.provider,
-      upstreamModelId: routeSnapshot?.modelId ?? model?.id,
-      api: model?.api,
+      upstreamProvider: routeSnapshot?.provider ?? modelForHint?.provider,
+      upstreamModelId: routeSnapshot?.modelId ?? modelForHint?.id,
+      api: modelForHint?.api,
       systemPrompt: rawSystemPrompt,
       promptCacheKey,
       cacheRetention,
@@ -5960,21 +5987,24 @@ export default function (pi: ExtensionAPI) {
       requestModel &&
       !isResponsesPromptRewriteBypassApi(requestModel.api)
     ) {
+      const contextKey = makePromptRewriteContextKey(sessionHashFromContext(ctx), requestModel);
+      const rewriteContext = getPromptRewriteContext(promptRewriteContexts, contextKey);
+      const promptOptions = rewriteContext?.options;
       const original = extractSystemPrompt(resultPayload);
       if (original && original.trim().length > 0) {
         // Step 1: strip per-turn churn from <session-overview>.
         const stripped = stripSessionOverviewChurn(original);
         // Step 2: compress skills XML → one-line index (requires cached options).
-        const compressed = pendingPromptOptions
-          ? compressSkillsInSystemPrompt(stripped, pendingPromptOptions)
+        const compressed = promptOptions
+          ? compressSkillsInSystemPrompt(stripped, promptOptions)
           : stripped;
         // Step 3: lift stable content above dynamic content (requires cached options).
         let finalPrompt = compressed;
         let changed = false;
-        if (pendingPromptOptions) {
-          const optimized = optimizeSystemPrompt(compressed, pendingPromptOptions);
+        if (promptOptions) {
+          const optimized = optimizeSystemPrompt(compressed, promptOptions);
           if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
             finalPrompt = optimized.systemPrompt;
             changed = true;
@@ -6098,7 +6128,7 @@ export default function (pi: ExtensionAPI) {
   //   (no args) — interactive menu (with UI) or help summary
   // ────────────────────────────────────────────────────────────────
   pi.registerCommand("cache-optimizer", {
-    description: "Diagnose Pi cache configuration",
+    description: "Diagnose OMP cache configuration",
     handler: async (args: string, cmdCtx) => {
       syncSessionHash(cmdCtx);
       const selectedModel = cmdCtx.model;
@@ -6110,16 +6140,16 @@ export default function (pi: ExtensionAPI) {
         resetCurrentSessionStats();
         await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
         await publishStatus(cmdCtx as unknown as ExtensionContext, model);
-        cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
+        cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
       } else if (subcommand === "disable") {
         setRuntimeOptimizerEnabled(false);
         resetCurrentSessionStats();
         await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
         await publishStatus(cmdCtx as unknown as ExtensionContext, model);
-        cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
+        cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
       } else if (subcommand === "doctor") {
         if (!model) {
-          cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+          cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
           return;
         }
         const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
@@ -6134,7 +6164,7 @@ export default function (pi: ExtensionAPI) {
         cmdCtx.ui.notify(fullDiagnosis, "info");
       } else if (subcommand === "stats") {
         if (!model) {
-          cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+          cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
           return;
         }
         const adapter = selectAdapterForModel(model);
@@ -6145,7 +6175,7 @@ export default function (pi: ExtensionAPI) {
         cmdCtx.ui.notify(output, "info");
       } else if (subcommand === "compat") {
         if (!model) {
-          cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+          cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
           return;
         }
         const compatResult = buildCompatDiagnosis(model);
@@ -6161,7 +6191,7 @@ export default function (pi: ExtensionAPI) {
         }
       } else if (subcommand === "reset") {
         if (!model) {
-          cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+          cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
           return;
         }
         const adapter = selectAdapterForModel(model);
@@ -6186,12 +6216,12 @@ export default function (pi: ExtensionAPI) {
         cmdCtx.ui.notify(
           `✅ Reset local session cache stats for "${displayKey}". ` +
           "Upstream provider prompt cache was not modified. " +
-          "New requests will start a fresh stats bucket for this Pi session.",
+          "New requests will start a fresh stats bucket for this OMP session.",
           "info",
         );
       } else if (subcommand === "fix") {
         if (!model) {
-          cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+          cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
           return;
         }
@@ -6239,16 +6269,16 @@ export default function (pi: ExtensionAPI) {
             resetCurrentSessionStats();
             await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
             await publishStatus(cmdCtx as unknown as ExtensionContext, model);
-            cmdCtx.ui.notify(`✅ Pi Cache Optimizer enabled for this Pi process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
+            cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
           } else if (choice === menuOptions[1]) {
             setRuntimeOptimizerEnabled(false);
             resetCurrentSessionStats();
             await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
             await publishStatus(cmdCtx as unknown as ExtensionContext, model);
-            cmdCtx.ui.notify(`⏸️ Pi Cache Optimizer disabled for this Pi process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
+            cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
           } else if (choice === menuOptions[2]) {
             if (!model) {
-              cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+              cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
             } else {
               const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
               const adapter = selectAdapterForModel(model);
@@ -6263,7 +6293,7 @@ export default function (pi: ExtensionAPI) {
             }
           } else if (choice === menuOptions[3]) {
             if (!model) {
-              cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+              cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
             } else {
               const adapter = selectAdapterForModel(model);
               const sk = model ? sessionModelKey(model) : undefined;
@@ -6274,7 +6304,7 @@ export default function (pi: ExtensionAPI) {
             }
           } else if (choice === menuOptions[4]) {
             if (!model) {
-              cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+              cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
             } else {
               const compatResult = buildCompatDiagnosis(model);
               if (compatResult) {
@@ -6291,7 +6321,7 @@ export default function (pi: ExtensionAPI) {
           } else if (choice === menuOptions[5]) {
             // Fix — auto-fix compat issues
             if (!model) {
-              cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+              cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
               return;
             }
             const suggestion = buildFixSuggestion(model);
@@ -6316,7 +6346,7 @@ export default function (pi: ExtensionAPI) {
             );
           } else if (choice === menuOptions[6]) {
             if (!model) {
-              cmdCtx.ui.notify("No active model selected. Select a model first with /model or pi --model.", "warning");
+              cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
             } else {
               const adapter = selectAdapterForModel(model);
               if (!adapter) {
@@ -6341,8 +6371,8 @@ export default function (pi: ExtensionAPI) {
         // Fallback: text help when no interactive UI
         const diagnosis: string[] = [];
         diagnosis.push("📋 /cache-optimizer commands:");
-        diagnosis.push("  enable  — Enable prompt/cache optimizations for this Pi process");
-        diagnosis.push("  disable — Disable prompt/cache optimizations for this Pi process");
+        diagnosis.push("  enable  — Enable prompt/cache optimizations for this OMP process");
+        diagnosis.push("  disable — Disable prompt/cache optimizations for this OMP process");
         diagnosis.push("  doctor  — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
         diagnosis.push("  stats   — Show active model stats bucket and recent trend");
         diagnosis.push("  compat  — Show compat suggestion with edit location");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omp-cache-optimizer",
-  "version": "1.0.0",
+  "version": "1.0.2",
   "description": "Improve OMP prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
   "keywords": [
     "omp-package",