npm - context-mode - Versions diffs - 1.0.167 → 1.0.169 - Mend

context-mode 1.0.167 → 1.0.169

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.codex-plugin/plugin.json +1 -1
package/.openclaw-plugin/openclaw.plugin.json +1 -1
package/.openclaw-plugin/package.json +1 -1
package/build/server.js +26 -5
package/build/session/analytics.d.ts +28 -0
package/build/session/analytics.js +82 -1
package/build/session/extract.d.ts +16 -0
package/build/session/extract.js +44 -28
package/build/session/retrieval-marker.d.ts +39 -0
package/build/session/retrieval-marker.js +65 -0
package/cli.bundle.mjs +224 -216
package/configs/antigravity-cli/plugin.json +1 -1
package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
package/hooks/posttooluse.mjs +39 -1
package/hooks/session-extract.bundle.mjs +3 -3
package/hooks/session-loaders.mjs +8 -1
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/server.bundle.mjs +138 -130
package/build/cache-heal.d.ts +0 -48
package/build/cache-heal.js +0 -150
package/build/concurrency/runPool.d.ts +0 -36
package/build/concurrency/runPool.js +0 -51
package/build/openclaw/mcp-tools.d.ts +0 -54
package/build/openclaw/mcp-tools.js +0 -198
package/build/openclaw/workspace-router.d.ts +0 -29
package/build/openclaw/workspace-router.js +0 -64
package/build/openclaw-plugin.d.ts +0 -130
package/build/openclaw-plugin.js +0 -626
package/build/opencode-plugin.d.ts +0 -122
package/build/opencode-plugin.js +0 -375
package/build/pi-extension.d.ts +0 -14
package/build/pi-extension.js +0 -451
package/build/routing-block.d.ts +0 -8
package/build/routing-block.js +0 -86
package/build/tool-naming.d.ts +0 -4
package/build/tool-naming.js +0 -24

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -6,14 +6,14 @@
   },
   "metadata": {
     "description": "Claude Code plugins by Mert Koseoğlu",
-    "version": "1.0.167"
+    "version": "1.0.169"
   },
   "plugins": [
     {
       "name": "context-mode",
       "source": "./",
       "description": "Claude Code MCP plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
-      "version": "1.0.167",
+      "version": "1.0.169",
       "author": {
         "name": "Mert Koseoğlu"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "context-mode",
-  "version": "1.0.167",
+  "version": "1.0.169",
   "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
   "author": {
     "name": "Mert Koseoğlu",

package/.codex-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "context-mode",
-  "version": "1.0.167",
+  "version": "1.0.169",
   "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
   "author": {
     "name": "Mert Koseoğlu",

package/.openclaw-plugin/openclaw.plugin.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "name": "Context Mode",
   "kind": "tool",
   "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
-  "version": "1.0.167",
+  "version": "1.0.169",
   "sandbox": {
     "mode": "permissive",
     "filesystem_access": "full",

package/.openclaw-plugin/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "context-mode",
-  "version": "1.0.167",
+  "version": "1.0.169",
   "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
   "author": {
     "name": "Mert Koseoğlu",

package/build/server.js CHANGED Viewed

@@ -23,6 +23,7 @@ import { describeStorageDirectorySource, ensureWritableStorageDir, formatStorage
 import { purgeSession } from "./session/purge.js";
 import { emitCacheHitEvent, emitIndexWriteEvent, emitSandboxExecuteEvent, } from "./session/event-emit.js";
 import { persistToolCallCounter, restoreSessionStats } from "./session/persist-tool-calls.js";
+import { appendRetrievalBytes } from "./session/retrieval-marker.js";
 import { searchAllSources } from "./search/unified.js";
 import { buildCtxSearchInputSchema, CTX_SEARCH_SHARED_MODE, resolveProjectScope, } from "./search/ctx-search-schema.js";
 import { FloodGuard } from "./search/flood-guard.js";
@@ -34,7 +35,7 @@ import { stripJsonComments } from "./util/jsonc.js";
 import { resolveClaudeConfigDir } from "./util/claude-config.js";
 import { resolveProjectDir } from "./util/project-dir.js";
 import { loadDatabase } from "./db-base.js";
-import { AnalyticsEngine, formatReport, getConversationStats, getContentBytesAllSessions, getLifetimeStats, getMultiAdapterLifetimeStats, getRealBytesStats, pricePerToken } from "./session/analytics.js";
+import { AnalyticsEngine, formatReport, getConversationStats, getContentBytesAllSessions, getConversationWindowStats, getLifetimeStats, getMultiAdapterLifetimeStats, getRealBytesStats, pricePerToken } from "./session/analytics.js";
 const __pkg_dir = dirname(fileURLToPath(import.meta.url));
 const VERSION = (() => {
     for (const rel of ["../package.json", "./package.json"]) {
@@ -857,6 +858,16 @@ function trackResponse(toolName, response) {
             bytesReturned: bytes,
         }));
     }
+    // Retrieval ("With context-mode") bridge — ctx_search / ctx_fetch_and_index
+    // response bytes are the kept-out content the model paid to access. The
+    // PostToolUse hook never fires for the plugin's OWN MCP tools, so the
+    // hook-side extractMcpToolCall can never see these calls (bytes_retrieved
+    // was 0/124454 in prod). Drop the count into a marker keyed by the session
+    // DB; the next ordinary-tool PostToolUse consumes it and emits a forwardable
+    // bytes_retrieved event. Off the hot path; never throws.
+    if (toolName === "ctx_search" || toolName === "ctx_fetch_and_index") {
+        setImmediate(() => appendRetrievalBytes(getSessionDbPath(), bytes));
+    }
     return response;
 }
 function trackIndexed(bytes, source = "unknown") {
@@ -3654,12 +3665,22 @@ server.registerTool("ctx_stats", {
                                 }
                                 catch { /* skip unreadable DB */ }
                             }
-                            convReal = projectDirForSid
-                                ? getRealBytesStats({ projectDir: projectDirForSid, sessionsDir: getSessionDir(), worktreeHash: dbHash, contentDbPath })
-                                : getRealBytesStats({ sessionId: sid, sessionsDir: getSessionDir(), worktreeHash: dbHash, contentDbPath });
+                            // Section 1 "Where you are now" = the LIVE conversation window.
+                            // Sub-agents + ctx_execute sub-process sessions write to this
+                            // SAME worktree DB (same worktreeHash = sha256(cwd)) under their
+                            // own session_ids; their retrieval hit their own disposable
+                            // windows, not yours. getConversationWindowStats credits the
+                            // whole worktree's kept-out bytes while counting only THIS
+                            // session's retrieval as "With context-mode", and the
+                            // worktreeHash scope keeps the user's OTHER parallel worktrees
+                            // out. projectDirForSid is intentionally dropped — it
+                            // under-counted (missed empty-project_dir sub-process sessions)
+                            // and could not separate sub-agent retrieval from the window's.
+                            void projectDirForSid;
+                            convReal = getConversationWindowStats({ sessionId: sid, worktreeHash: dbHash, sessionsDir: getSessionDir(), contentDbPath });
                         }
                         catch {
-                            convReal = getRealBytesStats({ sessionId: sid, sessionsDir: getSessionDir(), worktreeHash: dbHash, contentDbPath });
+                            convReal = getConversationWindowStats({ sessionId: sid, worktreeHash: dbHash, sessionsDir: getSessionDir(), contentDbPath });
                         }
                         const lifeRealBase = getRealBytesStats({ sessionsDir: getSessionDir() });
                         // v1.0.134 SLICE C: lifetime tier sums ALL chunks (no

package/build/session/analytics.d.ts CHANGED Viewed

@@ -468,6 +468,34 @@ export declare function getRealBytesStats(opts: {
     contentDbPath?: string;
     loadDatabase?: () => unknown;
 }): RealBytesStats;
+/**
+ * v1.0.169 — Section 1 "Where you are now" = the LIVE conversation window.
+ *
+ * A single live conversation fans out into sub-agents and ctx_execute
+ * sub-process sessions. Each runs in its OWN, disposable context window (its
+ * own session_id) — but all under the SAME worktree DB, because the worktree
+ * hash is sha256(cwd) and they share the cwd. Their retrieval (ctx_search /
+ * ctx_fetch_and_index returns) entered THOSE windows and was thrown away when
+ * each returned its short summary; it never touched the window the user is
+ * reading now. So the live-window savings bar must split the worktree by
+ * which retrieval actually landed in the user's window:
+ *
+ *   bytesReturned ("With context-mode")  = THIS session's retrieval only —
+ *       what genuinely entered the live window.
+ *   bytesAvoided  ("kept out")           = everything the whole worktree moved
+ *       (avoided + every session's retrieval) MINUS what landed in your window.
+ *
+ * Scoping by `worktreeHash` (not project-root + time) means the user's OTHER
+ * parallel worktrees never bleed in — a different worktree is a different
+ * cwd-hash, hence a different DB file the prefix filter excludes — while the
+ * sub-agent fan-out this conversation actually spawned is fully credited.
+ */
+export declare function getConversationWindowStats(opts: {
+    sessionId: string;
+    worktreeHash: string;
+    sessionsDir?: string;
+    contentDbPath?: string;
+}): RealBytesStats;
 /**
  * Real-usage filter thresholds. Decided in the B3a /diagnose conversation
  * to suppress fixture-noise dirs (test runs that touched ~/.X but never

package/build/session/analytics.js CHANGED Viewed

@@ -900,6 +900,18 @@ export function getRealBytesStats(opts) {
                             snapshotBytes += Number(snap.bytes);
                     }
                     catch { /* old schema */ }
+                    try {
+                        // "With context-mode" = the bytes the model paid to ACCESS the
+                        // kept-out content: ctx_search (query the index) + ctx_fetch_and_index
+                        // (fetch + index a URL). Sandbox compute (ctx_execute/batch/file) is
+                        // work-output the model would see regardless — NOT redirect savings —
+                        // so it is excluded; folding it crushed the bar to a false ~43%.
+                        const tc = sdb.prepare(`SELECT COALESCE(SUM(bytes_returned), 0) AS bytes FROM tool_calls
+               WHERE session_id = ? AND tool IN ('ctx_search', 'ctx_fetch_and_index')`).get(opts.sessionId);
+                        if (tc?.bytes)
+                            bytesReturned += Number(tc.bytes);
+                    }
+                    catch { /* old schema: no tool_calls table */ }
                 }
                 else if (opts.projectDir) {
                     // Bug E+F: META-scoped aggregation. Take every session_id whose
@@ -930,6 +942,17 @@ export function getRealBytesStats(opts) {
                             snapshotBytes += Number(snap.bytes);
                     }
                     catch { /* old schema */ }
+                    try {
+                        const tc = sdb.prepare(`SELECT COALESCE(SUM(bytes_returned), 0) AS bytes
+               FROM tool_calls
+               WHERE session_id IN (
+                 SELECT session_id FROM session_meta WHERE project_dir = ?
+               )
+               AND tool IN ('ctx_search', 'ctx_fetch_and_index')`).get(opts.projectDir);
+                        if (tc?.bytes)
+                            bytesReturned += Number(tc.bytes);
+                    }
+                    catch { /* old schema: no tool_calls table */ }
                 }
                 else {
                     const row = sdb.prepare(`SELECT
@@ -948,6 +971,13 @@ export function getRealBytesStats(opts) {
                             snapshotBytes += Number(snap.bytes);
                     }
                     catch { /* old schema */ }
+                    try {
+                        const tc = sdb.prepare(`SELECT COALESCE(SUM(bytes_returned), 0) AS bytes FROM tool_calls
+               WHERE tool IN ('ctx_search', 'ctx_fetch_and_index')`).get();
+                        if (tc?.bytes)
+                            bytesReturned += Number(tc.bytes);
+                    }
+                    catch { /* old schema: no tool_calls table */ }
                 }
             }
             finally {
@@ -970,6 +1000,57 @@ export function getRealBytesStats(opts) {
     const totalSavedTokens = Math.floor((eventDataBytes + bytesAvoided + snapshotBytes) / 4);
     return { eventDataBytes, bytesAvoided, bytesReturned, snapshotBytes, contentBytes, totalSavedTokens };
 }
+/**
+ * v1.0.169 — Section 1 "Where you are now" = the LIVE conversation window.
+ *
+ * A single live conversation fans out into sub-agents and ctx_execute
+ * sub-process sessions. Each runs in its OWN, disposable context window (its
+ * own session_id) — but all under the SAME worktree DB, because the worktree
+ * hash is sha256(cwd) and they share the cwd. Their retrieval (ctx_search /
+ * ctx_fetch_and_index returns) entered THOSE windows and was thrown away when
+ * each returned its short summary; it never touched the window the user is
+ * reading now. So the live-window savings bar must split the worktree by
+ * which retrieval actually landed in the user's window:
+ *
+ *   bytesReturned ("With context-mode")  = THIS session's retrieval only —
+ *       what genuinely entered the live window.
+ *   bytesAvoided  ("kept out")           = everything the whole worktree moved
+ *       (avoided + every session's retrieval) MINUS what landed in your window.
+ *
+ * Scoping by `worktreeHash` (not project-root + time) means the user's OTHER
+ * parallel worktrees never bleed in — a different worktree is a different
+ * cwd-hash, hence a different DB file the prefix filter excludes — while the
+ * sub-agent fan-out this conversation actually spawned is fully credited.
+ */
+export function getConversationWindowStats(opts) {
+    // Whole current worktree: every session that shares this cwd-hash DB.
+    const pool = getRealBytesStats({
+        worktreeHash: opts.worktreeHash,
+        sessionsDir: opts.sessionsDir,
+    });
+    // Just the live window: this session_id (folds its own ctx_search/ctx_fetch
+    // retrieval + content chunks).
+    const mine = getRealBytesStats({
+        sessionId: opts.sessionId,
+        worktreeHash: opts.worktreeHash,
+        sessionsDir: opts.sessionsDir,
+        contentDbPath: opts.contentDbPath,
+    });
+    const windowReturned = mine.bytesReturned;
+    const movedTotal = pool.bytesAvoided + pool.bytesReturned;
+    // What context-mode kept OUT of the live window = everything moved across the
+    // worktree minus the slice that actually entered this window. Clamp at 0 so a
+    // stale/edge DB can never produce a negative bar.
+    const keptOut = Math.max(0, movedTotal - windowReturned);
+    return {
+        eventDataBytes: pool.eventDataBytes,
+        bytesAvoided: keptOut,
+        bytesReturned: windowReturned,
+        snapshotBytes: pool.snapshotBytes,
+        contentBytes: mine.contentBytes,
+        totalSavedTokens: Math.floor((pool.eventDataBytes + keptOut + pool.snapshotBytes) / 4),
+    };
+}
 const DEFAULT_REAL_USAGE_FILTER = {
     minEvents: 100,
     minProjects: 5,
@@ -1620,7 +1701,7 @@ function renderNarrative5Section(args) {
         const convMult = Math.max(1, Math.round(convTokensWithout / convTokensWith));
         out.push(`  Without context-mode  ${kb(convBytesWithout).padStart(8)}  ${withoutBar}   ${fmtNum(convTokensWithout).padStart(7)} tokens`);
         out.push(`  With context-mode     ${kb(convBytesWith).padStart(8)}  ${withBar}   ${fmtNum(convTokensWith).padStart(7)} tokens`);
-        out.push(`                          ${convPct.toFixed(0)}% kept out of context · your AI ran ${convMult}× longer before /compact fired`);
+        out.push(`                          ${convPct.toFixed(1)}% kept out of context · your AI ran ${convMult}× longer before /compact fired`);
         out.push("");
     }
     // Timeline — drop-in if conversation has byDay.

package/build/session/extract.d.ts CHANGED Viewed

@@ -22,6 +22,15 @@ export interface SessionEvent {
      * `Fetched and indexed N sections (XKB)` preamble.
      */
     bytes_avoided?: number;
+    /**
+     * Optional — bytes the model PAID to ACCESS kept-out content for this event:
+     * the tool_response byte length of a `ctx_search` / `ctx_fetch_and_index`
+     * call. This is the OTHER half of the with/without ratio (bytes_avoided is
+     * the kept-out half). Sandbox compute (ctx_execute/batch/file) is work-output
+     * and is excluded. Present only when the call is a retrieval call and its
+     * tool_response is non-empty.
+     */
+    bytes_retrieved?: number;
     /**
      * Optional structured cost/usage fields (Wave 2b). Emitted by
      * extractAgentUsage alongside the colon-string `data` so the forward
@@ -35,6 +44,13 @@ export interface SessionEvent {
     cache_read_tokens?: number;
     cache_creation_tokens?: number;
     cost_usd?: number;
+    /**
+     * "task_cumulative" on agent_usage events whose tokens are a Task sub-agent's
+     * usage SUMMED across its whole run (not one turn). The platform buckets these
+     * as lifetime spend and never prices them per-turn — see
+     * docs/handoff/cumulative-cost-bug.md.
+     */
+    usage_scope?: string;
 }
 export interface ToolCall {
     toolName: string;

package/build/session/extract.js CHANGED Viewed

@@ -912,12 +912,40 @@ function extractMcpToolCall(input) {
     const payload = truncated
         ? `{"tool_name":${JSON.stringify(tool_name)},"params_raw":${JSON.stringify(cappedStr)},"truncated":true}`
         : `{"tool_name":${JSON.stringify(tool_name)},"params":${cappedStr}}`;
-    return [{
-            type: "mcp_tool_call",
-            category: "mcp_tool_call",
-            data: safeString(payload),
-            priority: 4,
-        }];
+    const event = {
+        type: "mcp_tool_call",
+        category: "mcp_tool_call",
+        data: safeString(payload),
+        priority: 4,
+    };
+    // Retrieval cost (the OTHER half of the with/without ratio): when this MCP
+    // call is a `ctx_search` or `ctx_fetch_and_index` retrieval, the tool_response
+    // IS the kept-out content the model paid to access — record its byte length.
+    // Sandbox compute (ctx_execute/batch/file) is work-output, NOT retrieval, so
+    // it is intentionally excluded. Match by suffix char-algorithmically (host
+    // prefixes the name like `mcp__plugin_…__ctx_search`); NO regex.
+    if (isRetrievalToolName(tool_name)) {
+        const response = safeString(input.tool_response);
+        if (response.length > 0) {
+            event.bytes_retrieved = Buffer.byteLength(response, "utf8");
+        }
+    }
+    return [event];
+}
+/** Tool-name suffixes that denote a RETRIEVAL call (kept-out content accessed). */
+const RETRIEVAL_TOOL_SUFFIXES = ["ctx_search", "ctx_fetch_and_index"];
+/**
+ * True when `toolName` ends with one of the retrieval suffixes. Char-level
+ * suffix comparison via String.prototype.endsWith — no regex. MCP host names
+ * arrive prefixed (e.g. `mcp__plugin_context-mode_context-mode__ctx_search`),
+ * so an exact-name check would miss them; suffix match is host-agnostic.
+ */
+function isRetrievalToolName(toolName) {
+    for (const suffix of RETRIEVAL_TOOL_SUFFIXES) {
+        if (toolName.endsWith(suffix))
+            return true;
+    }
+    return false;
 }
 /**
  * Category 6 (tool-based): decision
@@ -1377,27 +1405,16 @@ function extractAgentUsage(input) {
     if (typeof usage.service_tier === "string") {
         parts.push(`tier:${usage.service_tier.slice(0, 32)}`);
     }
-    // Gap #1 (16-oss-verify-gap-prd) — derive cost_usd from per-model pricing
-    // when at least one token count is present. Zero-token case skips cost
-    // so dashboard never shows misleading "$0.00 for nothing" rows.
-    const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : 0;
-    const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : 0;
-    const cacheCreate = typeof usage.cache_creation_input_tokens === "number"
-        ? usage.cache_creation_input_tokens
-        : 0;
-    const cacheRead = typeof usage.cache_read_input_tokens === "number"
-        ? usage.cache_read_input_tokens
-        : 0;
+    // CUMULATIVE-USAGE GUARD (docs/handoff/cumulative-cost-bug.md): a Task
+    // tool_response carries the sub-agent's usage SUMMED across its entire run —
+    // every internal turn re-reads the cache, so cache_read reaches the billions.
+    // Pricing that cumulative figure as a single turn produced four-figure
+    // per-event costs ($3,532 with cache_read 4.7B) that poisoned every FinOps
+    // aggregate. We therefore do NOT derive cost_usd here. The raw token counts
+    // stay, tagged usage_scope="task_cumulative", so the platform buckets them as
+    // lifetime spend; real per-turn cost comes only from per-turn signals
+    // (extractTranscriptUsage + each adapter's own session).
     const modelId = resolveModelId(input, out);
-    const anyTokens = inputTokens > 0 || outputTokens > 0 || cacheCreate > 0 || cacheRead > 0;
-    let cost = null;
-    if (anyTokens) {
-        // null ⇒ unmatched model id (catalog warned once) — skip the cost token
-        // rather than blend a wrong Claude rate (the old non-Claude bug).
-        cost = computeTurnCostUsd(modelId, inputTokens, outputTokens, cacheCreate, cacheRead);
-        if (cost !== null)
-            parts.push(`cost_usd:${formatCostUsd(cost)}`);
-    }
     // Wave 2b — emit structured top-level fields alongside the colon-string so
     // the forward envelope (which spreads `...event`) hands the platform typed
     // columns. Each field is set only when its source signal is present, so the
@@ -1421,8 +1438,7 @@ function extractAgentUsage(input) {
     if (typeof usage.cache_creation_input_tokens === "number") {
         event.cache_creation_tokens = usage.cache_creation_input_tokens;
     }
-    if (cost !== null)
-        event.cost_usd = cost;
+    event.usage_scope = "task_cumulative";
     return [event];
 }
 // ── Kimi Code (kimi-code) usage parsers ────────────────────────────────────

package/build/session/retrieval-marker.d.ts ADDED Viewed

@@ -0,0 +1,39 @@
+/**
+ * Server→hook bridge for the retrieval ("With context-mode") byte count.
+ *
+ * WHY THIS EXISTS — context-mode's OWN MCP retrieval tools (ctx_search /
+ * ctx_fetch_and_index) never fire a PostToolUse hook for the plugin's own
+ * server, so the hook-side `extractMcpToolCall` path can never observe them
+ * (verified empirically: 0 `mcp_tool_call` events locally, bytes_retrieved
+ * 0/124454 in production D1). The MCP server, however, measures each
+ * retrieval response's byte length directly.
+ *
+ * The server appends that count to a tmp marker keyed by the session DB
+ * *basename* — the one identifier the server process and the hook process
+ * both resolve reliably (CLAUDE_SESSION_ID is not guaranteed in the server
+ * env; the per-project session DB path is). The next PostToolUse fire — which
+ * DOES run for ordinary tools (Bash/Read/Edit) — consumes the marker and
+ * emits a forwardable event carrying `bytes_retrieved`. Mirrors the existing
+ * redirect / latency / rejected marker handshake in posttooluse.mjs.
+ */
+/**
+ * Tmp marker path for a session DB. Keyed by basename so the server (which
+ * holds the DB path via getSessionDbPath) and the hook (getSessionDBPath)
+ * derive the SAME file. Session DB filenames embed the worktree hash
+ * (`<hash>__<suffix>.db`), so basename collisions across projects are
+ * negligible.
+ */
+export declare function retrievalMarkerPath(sessionDbPath: string, tmpDir?: string): string;
+/**
+ * Record one retrieval's response byte count. Positive-only (a 0-byte or
+ * failed retrieval is not a context cost). Append-only so several retrievals
+ * between two hook fires accumulate. Best-effort — never throws into the
+ * MCP response path.
+ */
+export declare function appendRetrievalBytes(sessionDbPath: string, bytes: number, tmpDir?: string): void;
+/**
+ * Sum every recorded retrieval and delete the marker (consume-once) so the
+ * next PostToolUse fire cannot re-forward the same bytes. Returns 0 when no
+ * marker exists (phantom-event guard).
+ */
+export declare function consumeRetrievalBytes(sessionDbPath: string, tmpDir?: string): number;

package/build/session/retrieval-marker.js ADDED Viewed

@@ -0,0 +1,65 @@
+/**
+ * Server→hook bridge for the retrieval ("With context-mode") byte count.
+ *
+ * WHY THIS EXISTS — context-mode's OWN MCP retrieval tools (ctx_search /
+ * ctx_fetch_and_index) never fire a PostToolUse hook for the plugin's own
+ * server, so the hook-side `extractMcpToolCall` path can never observe them
+ * (verified empirically: 0 `mcp_tool_call` events locally, bytes_retrieved
+ * 0/124454 in production D1). The MCP server, however, measures each
+ * retrieval response's byte length directly.
+ *
+ * The server appends that count to a tmp marker keyed by the session DB
+ * *basename* — the one identifier the server process and the hook process
+ * both resolve reliably (CLAUDE_SESSION_ID is not guaranteed in the server
+ * env; the per-project session DB path is). The next PostToolUse fire — which
+ * DOES run for ordinary tools (Bash/Read/Edit) — consumes the marker and
+ * emits a forwardable event carrying `bytes_retrieved`. Mirrors the existing
+ * redirect / latency / rejected marker handshake in posttooluse.mjs.
+ */
+import { appendFileSync, readFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { basename, join } from "node:path";
+/**
+ * Tmp marker path for a session DB. Keyed by basename so the server (which
+ * holds the DB path via getSessionDbPath) and the hook (getSessionDBPath)
+ * derive the SAME file. Session DB filenames embed the worktree hash
+ * (`<hash>__<suffix>.db`), so basename collisions across projects are
+ * negligible.
+ */
+export function retrievalMarkerPath(sessionDbPath, tmpDir = tmpdir()) {
+    return join(tmpDir, `context-mode-retrieval-${basename(sessionDbPath)}.txt`);
+}
+/**
+ * Record one retrieval's response byte count. Positive-only (a 0-byte or
+ * failed retrieval is not a context cost). Append-only so several retrievals
+ * between two hook fires accumulate. Best-effort — never throws into the
+ * MCP response path.
+ */
+export function appendRetrievalBytes(sessionDbPath, bytes, tmpDir) {
+    if (!Number.isFinite(bytes) || bytes <= 0)
+        return;
+    try {
+        appendFileSync(retrievalMarkerPath(sessionDbPath, tmpDir), `${Math.floor(bytes)}\n`);
+    }
+    catch { /* best-effort — never block the MCP response */ }
+}
+/**
+ * Sum every recorded retrieval and delete the marker (consume-once) so the
+ * next PostToolUse fire cannot re-forward the same bytes. Returns 0 when no
+ * marker exists (phantom-event guard).
+ */
+export function consumeRetrievalBytes(sessionDbPath, tmpDir) {
+    const path = retrievalMarkerPath(sessionDbPath, tmpDir);
+    let total = 0;
+    try {
+        const raw = readFileSync(path, "utf8");
+        for (const line of raw.split("\n")) {
+            const n = Number.parseInt(line, 10);
+            if (Number.isFinite(n) && n > 0)
+                total += n;
+        }
+        rmSync(path, { force: true });
+    }
+    catch { /* no marker — phantom-event guard */ }
+    return total;
+}