npm - agent-cache-optimizer - Versions diffs - 0.1.1 - Mend

agent-cache-optimizer 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +17 -0
package/LICENSE +21 -0
package/README.md +274 -0
package/README.zh-CN.md +200 -0
package/adapters/claude-code.md +119 -0
package/docs/cross-cli.md +89 -0
package/docs/upstream.md +65 -0
package/package.json +70 -0
package/scripts/cache-status.sh +170 -0
package/scripts/check-cache-friendly.sh +122 -0
package/skills/cache-status/SKILL.md +81 -0
package/src/__tests__/core.test.ts +97 -0
package/src/core.ts +98 -0
package/src/heuristics.ts +109 -0
package/src/index.ts +127 -0
package/src/splitting.ts +66 -0
package/src/types.ts +39 -0
package/tsconfig.json +25 -0

package/src/__tests__/core.test.ts ADDED Viewed

@@ -0,0 +1,97 @@
+import { describe, it, expect } from "vitest"
+import { hashContent, emptyDB, updateDB, lookupScore, isWarm } from "../core"
+describe("hashContent", () => {
+  it("produces consistent hashes", () => {
+    const a = hashContent("hello")
+    const b = hashContent("hello")
+    expect(a).toBe(b)
+    expect(a.length).toBe(16)
+  })
+  it("produces different hashes for different content", () => {
+    expect(hashContent("hello")).not.toBe(hashContent("world"))
+  })
+})
+describe("emptyDB", () => {
+  it("returns a fresh database", () => {
+    const db = emptyDB()
+    expect(db.observations).toBe(0)
+    expect(db.updated).toBe(0)
+    expect(Object.keys(db.positions)).toHaveLength(0)
+    expect(Object.keys(db.scores)).toHaveLength(0)
+  })
+})
+describe("updateDB", () => {
+  it("tracks fingerprints at positions", () => {
+    let db = emptyDB()
+    db = updateDB(db, ["block-a", "block-b", "block-c"])
+    expect(db.observations).toBe(1)
+    expect(db.positions[0]).toHaveLength(1)
+    expect(db.positions[1]).toHaveLength(1)
+    expect(db.positions[2]).toHaveLength(1)
+  })
+  it("counts repeated hashes at the same position", () => {
+    let db = emptyDB()
+    // Session 1
+    db = updateDB(db, ["HANDOFF-v1", "CLAUDE-stable", "MEMORY-v1"])
+    // Session 2: same CLAUDE, different HANDOFF and MEMORY
+    db = updateDB(db, ["HANDOFF-v2", "CLAUDE-stable", "MEMORY-v2"])
+    // Position 0 has 2 distinct hashes (HANDOFF changed)
+    expect(db.positions[0]).toHaveLength(2)
+    // Position 1 has 1 hash, count=2 (CLAUDE stable)
+    expect(db.positions[1]).toHaveLength(1)
+    expect(db.positions[1]?.[0]?.count).toBe(2)
+    // Position 2 has 2 distinct hashes (MEMORY changed)
+    expect(db.positions[2]).toHaveLength(2)
+  })
+  it("assigns high scores to stable blocks", () => {
+    let db = emptyDB()
+    // 4 sessions with stable CLAUDE, changing HANDOFF
+    for (const v of ["v1", "v2", "v3", "v4"]) {
+      db = updateDB(db, [`HANDOFF-${v}`, "CLAUDE-stable"])
+    }
+    const claudeHash = hashContent("CLAUDE-stable")
+    const claudeScore = lookupScore(db, claudeHash)
+    expect(claudeScore).toBeGreaterThan(0.7)
+    const handoffHash = hashContent("HANDOFF-v4")
+    const handoffScore = lookupScore(db, handoffHash)
+    expect(handoffScore).toBeLessThan(0.5)
+  })
+  it("clamps scores to [0, 1]", () => {
+    let db = emptyDB()
+    for (let i = 0; i < 10; i++) {
+      db = updateDB(db, ["stable-block"])
+    }
+    const hash = hashContent("stable-block")
+    const score = lookupScore(db, hash)
+    expect(score).not.toBeNull()
+    expect(score!).toBeGreaterThanOrEqual(0)
+    expect(score!).toBeLessThanOrEqual(1)
+  })
+})
+describe("isWarm", () => {
+  it("returns false below threshold", () => {
+    const db = emptyDB()
+    expect(isWarm(db, 2)).toBe(false)
+  })
+  it("returns true at or above threshold", () => {
+    let db = emptyDB()
+    db.observations = 3
+    expect(isWarm(db, 2)).toBe(true)
+  })
+})

package/src/core.ts ADDED Viewed

@@ -0,0 +1,98 @@
+import { createHash } from "node:crypto"
+import type { StabilityDB } from "./types"
+/**
+ * Core hash-tracking engine — fully CLI-agnostic.
+ *
+ * Input:  string[] of system prompt blocks
+ * Output: updated StabilityDB with per-position fingerprints and scores
+ *
+ * This module has ZERO external dependencies and can be used by any
+ * CLI agent adapter (OpenCode, Claude Code, Codex, etc.).
+ */
+// ── Hashing ──────────────────────────────────────────────────────────
+/** SHA-256 truncated to 16 hex chars — collision-safe for ~10⁵ blocks */
+export function hashContent(content: string): string {
+  return createHash("sha256").update(content).digest("hex").slice(0, 16)
+}
+// ── DB persistence ───────────────────────────────────────────────────
+export function emptyDB(): StabilityDB {
+  return { positions: {}, scores: {}, observations: 0, updated: 0 }
+}
+// ── Stability scoring ────────────────────────────────────────────────
+/**
+ * Look up the current stability score for a block hash.
+ * Returns null if this hash has never been seen.
+ */
+export function lookupScore(db: StabilityDB, hash: string): number | null {
+  const val = db.scores[hash]
+  return val !== undefined ? val : null
+}
+/**
+ * Update the stability database with a new observation.
+ *
+ * For each block position, records the hash fingerprint.  Then recomputes
+ * stability scores for all known hashes:
+ *
+ *   score = positionalFidelity × recency × varietyPenalty
+ *
+ * - positionalFidelity: how often this hash appears at this position
+ * - recency: 1.0 if seen in the last 24h, 0.7 otherwise
+ * - varietyPenalty: penalizes positions where many different hashes appear
+ *
+ * All scores are clamped to [0, 1].
+ */
+export function updateDB(db: StabilityDB, blocks: string[]): StabilityDB {
+  const now = Date.now()
+  const hashes = blocks.map(hashContent)
+  // Record fingerprints at each position
+  for (let i = 0; i < hashes.length; i++) {
+    const h = hashes[i]
+    if (h === undefined) continue
+    if (!db.positions[i]) db.positions[i] = []
+    const fps = db.positions[i]
+    if (!fps) continue
+    const existing = fps.find((f) => f.hash === h)
+    if (existing) {
+      existing.lastSeen = now
+      existing.count++
+    } else {
+      fps.push({ hash: h, firstSeen: now, lastSeen: now, count: 1 })
+    }
+  }
+  // Recompute stability scores
+  for (const [posStr, fps] of Object.entries(db.positions)) {
+    const pos = Number(posStr)
+    for (const fp of fps) {
+      const fidelity = fp.count / Math.max(1, db.observations)
+      const recency = now - fp.lastSeen < 24 * 60 * 60 * 1000 ? 1.0 : 0.7
+      const varietyCount = db.positions[pos]?.length || 1
+      const varietyPenalty = 1 / Math.max(1, varietyCount)
+      db.scores[fp.hash] = Math.min(
+        1.0,
+        Math.max(0.0, fidelity * recency * (0.5 + 0.5 * varietyPenalty)),
+      )
+    }
+  }
+  db.observations++
+  return db
+}
+/**
+ * Check whether the database has enough observations for hash-based
+ * (warm) decisions.  Below this threshold, cold-start heuristics are used.
+ */
+export function isWarm(db: StabilityDB, threshold = 2): boolean {
+  return db.observations >= threshold
+}

package/src/heuristics.ts ADDED Viewed

@@ -0,0 +1,109 @@
+import type { StabilityDB, Classified } from "./types"
+import { splitAll } from "./splitting"
+import { hashContent, lookupScore, isWarm } from "./core"
+/**
+ * Cold-start heuristics — universal position/size/structure signals.
+ *
+ * These work across ANY agent framework, skill set, or config without
+ * any content-specific patterns.  Principles:
+ *
+ *   - Position 0 is almost always status/handoff → dynamic
+ *   - Positions 1-7 with substantial content are config → stable
+ *   - Very large blocks (>3KB) are config/definitions → stable
+ *   - Very small blocks (<100B) are status/date → dynamic
+ *   - High date density signals log/diary content → dynamic
+ *   - Structural delimiters ({, [, <, ```) signal config → stable
+ *   - Second-person role assignment → agent prompt → stable
+ *   - Short-line documents (avg < 30 chars) → log/diary → dynamic
+ *   - Tail blocks (last 2) are dynamic UNLESS they look structural
+ */
+export function coldStartScore(block: string, index: number, total: number): number {
+  let score = 0.5
+  // ── Position signals ──────────────────────────────────────────
+  // Block 0 is status/handoff in virtually every agent framework
+  if (index === 0) score = 0.15
+  // Blocks at positions 1-7 with non-trivial content are stable config
+  if (index >= 1 && index <= 7 && block.length > 200) score = 0.8
+  // Last 2 blocks are usually dynamic, but structured blocks ({, [, <)
+  // at the tail are probably split artifacts, not real injections.
+  const isStructured = /^[<\{\[]/.test(block.trim())
+  if (index >= total - 2 && !isStructured) score = Math.min(score, 0.25)
+  // ── Size signals ──────────────────────────────────────────────
+  if (block.length > 3000) score = Math.max(score, 0.85)
+  if (block.length < 100) score = Math.min(score, 0.2)
+  // ── Structure signals ─────────────────────────────────────────
+  // High density of date stamps → log/diary → dynamic
+  const dateCount = (block.match(/\d{4}-\d{2}-\d{2}/g) || []).length
+  if (dateCount >= 3) score = Math.min(score, 0.25)
+  // Starts with structural delimiter → JSON, XML, or code fence → config.
+  // Skip the boost for tail blocks (they're likely <memory> injections).
+  const trimmed = block.trim()
+  if (/^[<\{\[]|^```/.test(trimmed) && index < total - 2) {
+    score = Math.max(score, 0.8)
+  }
+  // Second-person role assignment → agent system prompt → stable
+  if (/^(You are|Your (job|role|task)|As an? )/m.test(block)) {
+    score = Math.max(score, 0.8)
+  }
+  // Many very short lines (avg < 30 chars) suggests log/diary → dynamic
+  const lines = block.split("\n")
+  const avgLineLen = block.length / Math.max(1, lines.length)
+  if (lines.length > 15 && avgLineLen < 30) score = Math.min(score, 0.3)
+  return score
+}
+// ── Classification ───────────────────────────────────────────────────
+/**
+ * Classify blocks into stable / unknown / dynamic.
+ *
+ * In warm mode (hash-based), uses historical stability scores.
+ * In cold mode (first few calls per agent), uses position/size heuristics.
+ */
+export function classify(
+  blocks: string[],
+  db: StabilityDB,
+  opts?: { warmThreshold?: number; splitThreshold?: number },
+): Classified {
+  // Split large blocks first
+  const items = splitAll(blocks, opts?.splitThreshold)
+  const result: Classified = { stable: [], unknown: [], dynamic: [] }
+  const warm = isWarm(db, opts?.warmThreshold ?? 2)
+  const total = items.length
+  for (let i = 0; i < items.length; i++) {
+    const item = items[i]
+    if (item === undefined) continue
+    const hash = hashContent(item)
+    const known = lookupScore(db, hash)
+    let score: number
+    if (known !== null && warm) {
+      score = known
+    } else {
+      score = coldStartScore(item, i, total)
+    }
+    if (score >= 0.7) result.stable.push(item)
+    else if (score <= 0.3) result.dynamic.push(item)
+    else result.unknown.push(item)
+  }
+  return result
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,127 @@
+/**
+ * agent-cache-optimizer — OpenCode Plugin Entry Point
+ *
+ * Content-agnostic KV cache optimizer.  Reorders system prompt blocks so
+ * that stable content (config, agent definitions, tool schemas) comes
+ * FIRST and dynamic content (session handoff, memory injections, dates)
+ * comes LAST.  This maximizes prefix-match cache reuse across sessions.
+ *
+ * Installation:
+ *   1. Add to opencode.json plugins: "agent-cache-optimizer"
+ *   2. Or use file:// path for local development
+ *   3. Restart OpenCode
+ *
+ * @license MIT
+ */
+import type { Plugin } from "@opencode-ai/plugin"
+import { join } from "node:path"
+import { homedir } from "node:os"
+import { readFileSync, writeFileSync, mkdirSync, existsSync } from "node:fs"
+import { emptyDB, updateDB } from "./core"
+import { classify } from "./heuristics"
+import type { StabilityDB } from "./types"
+// ── Persistence ──────────────────────────────────────────────────────
+const STATE_DIR = join(
+  process.env.XDG_CACHE_HOME || join(homedir(), ".cache"),
+  "opencode",
+  "agent-cache-optimizer",
+)
+function dbPath(agent: string): string {
+  const safe = agent.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64) || "default"
+  return join(STATE_DIR, `stability-${safe}.json`)
+}
+function loadDB(agent: string): StabilityDB {
+  try {
+    return JSON.parse(readFileSync(dbPath(agent), "utf-8")) as StabilityDB
+  } catch {
+    return emptyDB()
+  }
+}
+function saveDB(agent: string, db: StabilityDB): void {
+  try {
+    if (!existsSync(STATE_DIR)) mkdirSync(STATE_DIR, { recursive: true })
+    db.updated = Date.now()
+    writeFileSync(dbPath(agent), JSON.stringify(db, null, 2))
+  } catch {
+    /* best-effort */
+  }
+}
+// ── Diagnostics ──────────────────────────────────────────────────────
+let firstCallLogged = false
+function diag(agent: string, msg: string): void {
+  try {
+    if (!existsSync(STATE_DIR)) mkdirSync(STATE_DIR, { recursive: true })
+    const ts = new Date().toISOString()
+    writeFileSync(join(STATE_DIR, "diag.log"), `[${ts}] [${agent}] ${msg}\n`, { flag: "a" })
+  } catch {
+    /* silent */
+  }
+}
+// ── Plugin ───────────────────────────────────────────────────────────
+export const CacheOptimizerPlugin: Plugin = async () => {
+  return {
+    // ── Primary hook: system prompt reordering ─────────────────────
+    "experimental.chat.system.transform": async (input, output) => {
+      const rawBlocks = output.system
+      if (!rawBlocks || rawBlocks.length <= 1) return
+      const agent = input.model?.id ?? "default"
+      const db = loadDB(agent)
+      const classified = classify(rawBlocks, db)
+      // Reorder: stable → unknown → dynamic
+      output.system = [...classified.stable, ...classified.unknown, ...classified.dynamic]
+      // Persist for next call
+      const updated = updateDB(db, output.system)
+      saveDB(agent, updated)
+      diag(
+        agent,
+        `S:${classified.stable.length} U:${classified.unknown.length} ` +
+          `D:${classified.dynamic.length} T:${output.system.length} ` +
+          `obs:${updated.observations}`,
+      )
+    },
+    // ── Diagnostic: chat.params (confirms plugin loaded) ──────────
+    "chat.params": async (input, _output) => {
+      if (!firstCallLogged) {
+        firstCallLogged = true
+        diag(
+          input.agent ?? "unknown",
+          `plugin-loaded agent=${input.agent ?? "?"} model=${input.model?.id ?? "?"}`,
+        )
+      }
+    },
+    // ── Provider cache headers ────────────────────────────────────
+    "chat.headers": async (input, output) => {
+      if (input.provider?.info?.name?.toLowerCase().includes("anthropic")) {
+        if (!output.headers["anthropic-beta"]) {
+          output.headers["anthropic-beta"] = "prompt-caching-2024-07-31"
+        }
+      }
+    },
+  }
+}
+// Re-export core for standalone usage
+export { emptyDB, updateDB, hashContent, lookupScore, isWarm } from "./core"
+export { coldStartScore, classify } from "./heuristics"
+export { splitBlock, splitAll } from "./splitting"
+export type { StabilityDB, Classified, BlockFingerprint, CacheOptimizerOptions } from "./types"

package/src/splitting.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * Block splitting — split large prompt blocks at natural boundaries.
+ *
+ * Large blocks (>4KB) like tool definition arrays or long agent prompts
+ * can contain multiple independent items.  Splitting them allows individual
+ * sub-blocks to be classified independently:
+ *
+ *   - JSON arrays of tool definitions → individual tool objects
+ *   - Markdown files with ## sections → individual sections
+ *   - XML/HTML blocks → individual elements
+ *   - Otherwise → paragraph boundaries (double newline)
+ *
+ * This is fully content-agnostic: it only looks at structural delimiters,
+ * never at specific keywords or names.
+ */
+const DEFAULT_SPLIT_THRESHOLD = 4000
+/**
+ * Split a block into sub-blocks at natural structural boundaries.
+ * Returns [block] unchanged if no split is needed or possible.
+ */
+export function splitBlock(block: string, threshold = DEFAULT_SPLIT_THRESHOLD): string[] {
+  if (block.length <= threshold) return [block]
+  const trimmed = block.trim()
+  // ── JSON object array: {"name": "A", ...}, {"name": "B", ...} ──
+  if (trimmed.startsWith("{")) {
+    const objects = block.match(/\{[^}{]*"name"\s*:\s*"[^"]+"[^}]*\}/g)
+    if (objects && objects.length >= 2) return objects
+  }
+  // ── Markdown: split at ## section headers ──────────────────────
+  if (block.includes("\n## ")) {
+    const sections = block.split(/\n(?=## )/)
+    if (sections.length >= 2) return sections
+  }
+  // ── XML/HTML: split at top-level closing tags ──────────────────
+  if (/^<(\w+)[^>]*>/.test(trimmed)) {
+    const tagMatch = trimmed.match(/^<(\w+)[^>]*>/)
+    if (tagMatch) {
+      const tag = tagMatch[1]
+      const parts = block.split(new RegExp(`(?=</?${tag}[>\\s])`))
+      if (parts.length >= 2) return parts
+    }
+  }
+  // ── Fallback: paragraph boundaries ─────────────────────────────
+  const paragraphs = block.split(/\n\n+/)
+  if (paragraphs.length >= 3) return paragraphs
+  return [block]
+}
+/**
+ * Apply splitting to an array of blocks, returning a flat array.
+ */
+export function splitAll(blocks: string[], threshold?: number): string[] {
+  const result: string[] = []
+  for (const b of blocks) {
+    result.push(...splitBlock(b, threshold))
+  }
+  return result
+}

package/src/types.ts ADDED Viewed

@@ -0,0 +1,39 @@
+/** A fingerprint record for one hash observed at one position */
+export interface BlockFingerprint {
+  hash: string
+  /** First time this exact hash was seen (epoch ms) */
+  firstSeen: number
+  /** Most recent time this hash was seen */
+  lastSeen: number
+  /** Total observations of this hash at this position */
+  count: number
+}
+/** Stability database — persisted per-agent to track block stability over time */
+export interface StabilityDB {
+  /** Block position → fingerprints observed at that position */
+  positions: Record<number, BlockFingerprint[]>
+  /** Hash → stability score (1.0 = never changes, 0.0 = changes every call) */
+  scores: Record<string, number>
+  /** Total calls observed */
+  observations: number
+  /** Last write timestamp */
+  updated: number
+}
+/** Classification result after scoring all blocks */
+export interface Classified {
+  stable: string[]
+  unknown: string[]
+  dynamic: string[]
+}
+/** Options for the cache optimizer plugin */
+export interface CacheOptimizerOptions {
+  /** Minimum block size in bytes to attempt splitting (default: 4000) */
+  splitThreshold: number
+  /** Path to store stability databases and logs */
+  stateDir: string
+  /** Minimum observations before switching from heuristics to hash-based scoring */
+  warmThreshold: number
+}

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,25 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "bundler",
+    "lib": ["ES2022"],
+    "outDir": "dist",
+    "rootDir": "src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "noUncheckedIndexedAccess": true,
+    "noImplicitReturns": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUnusedLocals": false,
+    "noUnusedParameters": false,
+    "types": ["node"]
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "dist"]
+}