omp-cache-optimizer 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.ts +76 -47
  2. package/package.json +1 -1
package/index.ts CHANGED
@@ -312,6 +312,15 @@ type CacheUsageSample = {
312
312
  missingUsageFields: boolean;
313
313
  };
314
314
 
315
+ type PromptRewriteContext = {
316
+ options?: BuildSystemPromptOptions;
317
+ routeSnapshot?: PiRouteSnapshot;
318
+ routedModel?: PiModel;
319
+ timestamp: number;
320
+ };
321
+
322
+ const PROMPT_REWRITE_CONTEXT_TTL_MS = 10_000;
323
+
315
324
  /** Maximum number of recent samples kept per model key (in-memory only, not persisted). */
316
325
  const MAX_RECENT_SAMPLES = 50;
317
326
 
@@ -968,16 +977,42 @@ function getNonNegativeNumber(record: UnknownRecord, key: string): number | unde
968
977
  */
969
978
  function getCompat(model: PiModel | undefined): CacheCompat {
970
979
  if (!model) return {} as CacheCompat;
971
-
972
- // The host runtime merges provider.compat with model.compat (model wins on conflicts).
973
- // We approximate this by reading from ctx.model which should already have merged compat.
974
- // However, for safety, we check both levels if available.
975
- const modelCompat = (model.compat ?? {}) as CacheCompat;
976
980
 
977
- // Note: ctx.model from the host runtime should already contain merged compat,
978
- // but we document the two-level structure for clarity.
979
- // but we document the two-level structure for clarity
980
- return modelCompat;
981
+ const record = model as PiModel & { compatConfig?: Record<string, unknown> };
982
+ return {
983
+ ...((record.compatConfig ?? {}) as CacheCompat),
984
+ ...((record.compat ?? {}) as CacheCompat),
985
+ };
986
+ }
987
+
988
+ function makePromptRewriteContextKey(sessionHash: string | undefined, model: PiModel | undefined): string | undefined {
989
+ if (!sessionHash || !model) return undefined;
990
+ return `${sessionHash}:${modelKey(model)}`;
991
+ }
992
+
993
+ function rememberPromptRewriteContext(
994
+ contexts: Map<string, PromptRewriteContext>,
995
+ key: string | undefined,
996
+ context: PromptRewriteContext,
997
+ ): void {
998
+ if (!key) return;
999
+ contexts.set(key, context);
1000
+ }
1001
+
1002
+ function getPromptRewriteContext(
1003
+ contexts: Map<string, PromptRewriteContext>,
1004
+ key: string | undefined,
1005
+ now = Date.now(),
1006
+ ttlMs = PROMPT_REWRITE_CONTEXT_TTL_MS,
1007
+ ): PromptRewriteContext | undefined {
1008
+ if (!key) return undefined;
1009
+ const context = contexts.get(key);
1010
+ if (!context) return undefined;
1011
+ if (now - context.timestamp > ttlMs) {
1012
+ contexts.delete(key);
1013
+ return undefined;
1014
+ }
1015
+ return context;
981
1016
  }
982
1017
 
983
1018
  /**
@@ -1972,13 +2007,6 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
1972
2007
  return true;
1973
2008
  }
1974
2009
  if (Array.isArray(record.system) && record.system.length > 0) {
1975
- // Replace first text block, keep structure
1976
- const first = asRecord(record.system[0]);
1977
- if (first && typeof first.text === "string") {
1978
- first.text = text;
1979
- return true;
1980
- }
1981
- // Fallback: convert to single-block string form
1982
2010
  record.system = [{ type: "text", text }];
1983
2011
  return true;
1984
2012
  }
@@ -1986,11 +2014,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
1986
2014
  // google-generative-ai: payload.systemInstruction
1987
2015
  const systemInstruction = asRecord(record.systemInstruction);
1988
2016
  if (systemInstruction && Array.isArray(systemInstruction.parts) && systemInstruction.parts.length > 0) {
1989
- const firstPart = asRecord(systemInstruction.parts[0]);
1990
- if (firstPart && typeof firstPart.text === "string") {
1991
- firstPart.text = text;
1992
- return true;
1993
- }
2017
+ systemInstruction.parts = [{ text }];
2018
+ return true;
1994
2019
  }
1995
2020
 
1996
2021
  // openai-completions / openai-responses: payload.messages[] first system/developer message
@@ -2005,11 +2030,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
2005
2030
  return true;
2006
2031
  }
2007
2032
  if (Array.isArray(r.content) && r.content.length > 0) {
2008
- const first = asRecord(r.content[0]);
2009
- if (first && typeof first.text === "string") {
2010
- first.text = text;
2011
- return true;
2012
- }
2033
+ r.content = text;
2034
+ return true;
2013
2035
  }
2014
2036
  }
2015
2037
  }
@@ -5422,6 +5444,10 @@ export const __internals_for_tests = {
5422
5444
  hashSessionId,
5423
5445
  makeSessionModelKey,
5424
5446
  modelKeyFromSessionKey,
5447
+ makePromptRewriteContextKey,
5448
+ rememberPromptRewriteContext,
5449
+ getPromptRewriteContext,
5450
+ PROMPT_REWRITE_CONTEXT_TTL_MS,
5425
5451
  filterRestorableStatsForSession,
5426
5452
  parsePersistedRoutedModelRef,
5427
5453
  routedModelRefToPiModel,
@@ -5495,11 +5521,10 @@ export default function (pi: ExtensionAPI) {
5495
5521
  let latestCacheHint: PiCacheHintSnapshot | undefined;
5496
5522
  // OMP divergence: prompt rewriting moved from before_agent_start to
5497
5523
  // before_provider_request (OMP's before_agent_start can only inject messages,
5498
- // not mutate systemPrompt). We cache systemPromptOptions + route snapshot here
5499
- // so before_provider_request can apply the 3-step pipeline to the payload.
5500
- let pendingPromptOptions: BuildSystemPromptOptions | undefined;
5501
- let pendingRouteSnapshot: PiRouteSnapshot | undefined;
5502
- let pendingRoutedModel: PiModel | undefined;
5524
+ // not mutate systemPrompt). Store prompt options per session/model so an
5525
+ // overlapping turn or sub-agent cannot overwrite another request's rewrite
5526
+ // context before before_provider_request fires.
5527
+ const promptRewriteContexts = new Map<string, PromptRewriteContext>();
5503
5528
  const PERSIST_DEBOUNCE_MS = 2000;
5504
5529
  /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
5505
5530
  const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
@@ -5913,17 +5938,18 @@ export default function (pi: ExtensionAPI) {
5913
5938
  ? findModelInRegistry(_ctx.modelRegistry, routeSnapshot.provider, routeSnapshot.modelId) ?? routeSnapshotToPiModel(routeSnapshot, _ctx.model)
5914
5939
  : undefined;
5915
5940
 
5916
- // OMP divergence: before_agent_start in OMP can only inject messages (return
5917
- // { message }), NOT mutate systemPrompt. We cache the prompt options + route
5918
- // snapshot here so before_provider_request can apply the 3-step pipeline to
5919
- // the provider payload. If OMP does not supply systemPromptOptions, skill
5920
- // compression and stable-prefix reorder are skipped (only churn strip runs).
5921
5941
  const eventRecord = asRecord(event);
5922
- pendingPromptOptions = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
5923
- pendingRouteSnapshot = routeSnapshot;
5924
- pendingRoutedModel = routedModel ?? _ctx.model;
5925
-
5942
+ const options = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
5926
5943
  const model = routedModel ?? _ctx.model;
5944
+ const contextKey = makePromptRewriteContextKey(sessionHashFromContext(_ctx), model);
5945
+ rememberPromptRewriteContext(promptRewriteContexts, contextKey, {
5946
+ options,
5947
+ routeSnapshot,
5948
+ routedModel: model,
5949
+ timestamp: Date.now(),
5950
+ });
5951
+
5952
+ const modelForHint = model;
5927
5953
  const promptCacheKey = getSessionPromptCacheKey(_ctx);
5928
5954
  const cacheRetention = process.env[PI_CACHE_RETENTION_ENV] === LONG_CACHE_RETENTION_VALUE ? LONG_CACHE_RETENTION_VALUE : undefined;
5929
5955
  const rawSystemPrompt = typeof eventRecord?.systemPrompt === "string" ? eventRecord.systemPrompt : "";
@@ -5931,9 +5957,9 @@ export default function (pi: ExtensionAPI) {
5931
5957
  sessionIdHash: currentSessionHashSet ? currentSessionHash : sessionHashFromContext(_ctx),
5932
5958
  virtualProvider: routeSnapshot?.virtualProvider ?? _ctx.model?.provider,
5933
5959
  virtualModelId: routeSnapshot?.virtualModelId ?? _ctx.model?.id,
5934
- upstreamProvider: routeSnapshot?.provider ?? model?.provider,
5935
- upstreamModelId: routeSnapshot?.modelId ?? model?.id,
5936
- api: model?.api,
5960
+ upstreamProvider: routeSnapshot?.provider ?? modelForHint?.provider,
5961
+ upstreamModelId: routeSnapshot?.modelId ?? modelForHint?.id,
5962
+ api: modelForHint?.api,
5937
5963
  systemPrompt: rawSystemPrompt,
5938
5964
  promptCacheKey,
5939
5965
  cacheRetention,
@@ -5961,21 +5987,24 @@ export default function (pi: ExtensionAPI) {
5961
5987
  requestModel &&
5962
5988
  !isResponsesPromptRewriteBypassApi(requestModel.api)
5963
5989
  ) {
5990
+ const contextKey = makePromptRewriteContextKey(sessionHashFromContext(ctx), requestModel);
5991
+ const rewriteContext = getPromptRewriteContext(promptRewriteContexts, contextKey);
5992
+ const promptOptions = rewriteContext?.options;
5964
5993
  const original = extractSystemPrompt(resultPayload);
5965
5994
  if (original && original.trim().length > 0) {
5966
5995
  // Step 1: strip per-turn churn from <session-overview>.
5967
5996
  const stripped = stripSessionOverviewChurn(original);
5968
5997
 
5969
5998
  // Step 2: compress skills XML → one-line index (requires cached options).
5970
- const compressed = pendingPromptOptions
5971
- ? compressSkillsInSystemPrompt(stripped, pendingPromptOptions)
5999
+ const compressed = promptOptions
6000
+ ? compressSkillsInSystemPrompt(stripped, promptOptions)
5972
6001
  : stripped;
5973
6002
 
5974
6003
  // Step 3: lift stable content above dynamic content (requires cached options).
5975
6004
  let finalPrompt = compressed;
5976
6005
  let changed = false;
5977
- if (pendingPromptOptions) {
5978
- const optimized = optimizeSystemPrompt(compressed, pendingPromptOptions);
6006
+ if (promptOptions) {
6007
+ const optimized = optimizeSystemPrompt(compressed, promptOptions);
5979
6008
  if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
5980
6009
  finalPrompt = optimized.systemPrompt;
5981
6010
  changed = true;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omp-cache-optimizer",
3
- "version": "1.0.1",
3
+ "version": "1.0.2",
4
4
  "description": "Improve OMP prompt/KV cache hit rates with stable prompts, OpenAI-compatible cache keys, proxy compat warnings, and footer cache stats.",
5
5
  "keywords": [
6
6
  "omp-package",