context-mode 1.0.167 → 1.0.169

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.codex-plugin/plugin.json +1 -1
  4. package/.openclaw-plugin/openclaw.plugin.json +1 -1
  5. package/.openclaw-plugin/package.json +1 -1
  6. package/build/server.js +26 -5
  7. package/build/session/analytics.d.ts +28 -0
  8. package/build/session/analytics.js +82 -1
  9. package/build/session/extract.d.ts +16 -0
  10. package/build/session/extract.js +44 -28
  11. package/build/session/retrieval-marker.d.ts +39 -0
  12. package/build/session/retrieval-marker.js +65 -0
  13. package/cli.bundle.mjs +224 -216
  14. package/configs/antigravity-cli/plugin.json +1 -1
  15. package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
  16. package/hooks/posttooluse.mjs +39 -1
  17. package/hooks/session-extract.bundle.mjs +3 -3
  18. package/hooks/session-loaders.mjs +8 -1
  19. package/openclaw.plugin.json +1 -1
  20. package/package.json +1 -1
  21. package/server.bundle.mjs +138 -130
  22. package/build/cache-heal.d.ts +0 -48
  23. package/build/cache-heal.js +0 -150
  24. package/build/concurrency/runPool.d.ts +0 -36
  25. package/build/concurrency/runPool.js +0 -51
  26. package/build/openclaw/mcp-tools.d.ts +0 -54
  27. package/build/openclaw/mcp-tools.js +0 -198
  28. package/build/openclaw/workspace-router.d.ts +0 -29
  29. package/build/openclaw/workspace-router.js +0 -64
  30. package/build/openclaw-plugin.d.ts +0 -130
  31. package/build/openclaw-plugin.js +0 -626
  32. package/build/opencode-plugin.d.ts +0 -122
  33. package/build/opencode-plugin.js +0 -375
  34. package/build/pi-extension.d.ts +0 -14
  35. package/build/pi-extension.js +0 -451
  36. package/build/routing-block.d.ts +0 -8
  37. package/build/routing-block.js +0 -86
  38. package/build/tool-naming.d.ts +0 -4
  39. package/build/tool-naming.js +0 -24
@@ -6,14 +6,14 @@
6
6
  },
7
7
  "metadata": {
8
8
  "description": "Claude Code plugins by Mert Koseoğlu",
9
- "version": "1.0.167"
9
+ "version": "1.0.169"
10
10
  },
11
11
  "plugins": [
12
12
  {
13
13
  "name": "context-mode",
14
14
  "source": "./",
15
15
  "description": "Claude Code MCP plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
16
- "version": "1.0.167",
16
+ "version": "1.0.169",
17
17
  "author": {
18
18
  "name": "Mert Koseoğlu"
19
19
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "context-mode",
3
- "version": "1.0.167",
3
+ "version": "1.0.169",
4
4
  "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
5
5
  "author": {
6
6
  "name": "Mert Koseoğlu",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "context-mode",
3
- "version": "1.0.167",
3
+ "version": "1.0.169",
4
4
  "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
5
5
  "author": {
6
6
  "name": "Mert Koseoğlu",
@@ -3,7 +3,7 @@
3
3
  "name": "Context Mode",
4
4
  "kind": "tool",
5
5
  "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
6
- "version": "1.0.167",
6
+ "version": "1.0.169",
7
7
  "sandbox": {
8
8
  "mode": "permissive",
9
9
  "filesystem_access": "full",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "context-mode",
3
- "version": "1.0.167",
3
+ "version": "1.0.169",
4
4
  "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
5
5
  "author": {
6
6
  "name": "Mert Koseoğlu",
package/build/server.js CHANGED
@@ -23,6 +23,7 @@ import { describeStorageDirectorySource, ensureWritableStorageDir, formatStorage
23
23
  import { purgeSession } from "./session/purge.js";
24
24
  import { emitCacheHitEvent, emitIndexWriteEvent, emitSandboxExecuteEvent, } from "./session/event-emit.js";
25
25
  import { persistToolCallCounter, restoreSessionStats } from "./session/persist-tool-calls.js";
26
+ import { appendRetrievalBytes } from "./session/retrieval-marker.js";
26
27
  import { searchAllSources } from "./search/unified.js";
27
28
  import { buildCtxSearchInputSchema, CTX_SEARCH_SHARED_MODE, resolveProjectScope, } from "./search/ctx-search-schema.js";
28
29
  import { FloodGuard } from "./search/flood-guard.js";
@@ -34,7 +35,7 @@ import { stripJsonComments } from "./util/jsonc.js";
34
35
  import { resolveClaudeConfigDir } from "./util/claude-config.js";
35
36
  import { resolveProjectDir } from "./util/project-dir.js";
36
37
  import { loadDatabase } from "./db-base.js";
37
- import { AnalyticsEngine, formatReport, getConversationStats, getContentBytesAllSessions, getLifetimeStats, getMultiAdapterLifetimeStats, getRealBytesStats, pricePerToken } from "./session/analytics.js";
38
+ import { AnalyticsEngine, formatReport, getConversationStats, getContentBytesAllSessions, getConversationWindowStats, getLifetimeStats, getMultiAdapterLifetimeStats, getRealBytesStats, pricePerToken } from "./session/analytics.js";
38
39
  const __pkg_dir = dirname(fileURLToPath(import.meta.url));
39
40
  const VERSION = (() => {
40
41
  for (const rel of ["../package.json", "./package.json"]) {
@@ -857,6 +858,16 @@ function trackResponse(toolName, response) {
857
858
  bytesReturned: bytes,
858
859
  }));
859
860
  }
861
+ // Retrieval ("With context-mode") bridge — ctx_search / ctx_fetch_and_index
862
+ // response bytes are the kept-out content the model paid to access. The
863
+ // PostToolUse hook never fires for the plugin's OWN MCP tools, so the
864
+ // hook-side extractMcpToolCall can never see these calls (bytes_retrieved
865
+ // was 0/124454 in prod). Drop the count into a marker keyed by the session
866
+ // DB; the next ordinary-tool PostToolUse consumes it and emits a forwardable
867
+ // bytes_retrieved event. Off the hot path; never throws.
868
+ if (toolName === "ctx_search" || toolName === "ctx_fetch_and_index") {
869
+ setImmediate(() => appendRetrievalBytes(getSessionDbPath(), bytes));
870
+ }
860
871
  return response;
861
872
  }
862
873
  function trackIndexed(bytes, source = "unknown") {
@@ -3654,12 +3665,22 @@ server.registerTool("ctx_stats", {
3654
3665
  }
3655
3666
  catch { /* skip unreadable DB */ }
3656
3667
  }
3657
- convReal = projectDirForSid
3658
- ? getRealBytesStats({ projectDir: projectDirForSid, sessionsDir: getSessionDir(), worktreeHash: dbHash, contentDbPath })
3659
- : getRealBytesStats({ sessionId: sid, sessionsDir: getSessionDir(), worktreeHash: dbHash, contentDbPath });
3668
+ // Section 1 "Where you are now" = the LIVE conversation window.
3669
+ // Sub-agents + ctx_execute sub-process sessions write to this
3670
+ // SAME worktree DB (same worktreeHash = sha256(cwd)) under their
3671
+ // own session_ids; their retrieval hit their own disposable
3672
+ // windows, not yours. getConversationWindowStats credits the
3673
+ // whole worktree's kept-out bytes while counting only THIS
3674
+ // session's retrieval as "With context-mode", and the
3675
+ // worktreeHash scope keeps the user's OTHER parallel worktrees
3676
+ // out. projectDirForSid is intentionally dropped — it
3677
+ // under-counted (missed empty-project_dir sub-process sessions)
3678
+ // and could not separate sub-agent retrieval from the window's.
3679
+ void projectDirForSid;
3680
+ convReal = getConversationWindowStats({ sessionId: sid, worktreeHash: dbHash, sessionsDir: getSessionDir(), contentDbPath });
3660
3681
  }
3661
3682
  catch {
3662
- convReal = getRealBytesStats({ sessionId: sid, sessionsDir: getSessionDir(), worktreeHash: dbHash, contentDbPath });
3683
+ convReal = getConversationWindowStats({ sessionId: sid, worktreeHash: dbHash, sessionsDir: getSessionDir(), contentDbPath });
3663
3684
  }
3664
3685
  const lifeRealBase = getRealBytesStats({ sessionsDir: getSessionDir() });
3665
3686
  // v1.0.134 SLICE C: lifetime tier sums ALL chunks (no
@@ -468,6 +468,34 @@ export declare function getRealBytesStats(opts: {
468
468
  contentDbPath?: string;
469
469
  loadDatabase?: () => unknown;
470
470
  }): RealBytesStats;
471
+ /**
472
+ * v1.0.169 — Section 1 "Where you are now" = the LIVE conversation window.
473
+ *
474
+ * A single live conversation fans out into sub-agents and ctx_execute
475
+ * sub-process sessions. Each runs in its OWN, disposable context window (its
476
+ * own session_id) — but all under the SAME worktree DB, because the worktree
477
+ * hash is sha256(cwd) and they share the cwd. Their retrieval (ctx_search /
478
+ * ctx_fetch_and_index returns) entered THOSE windows and was thrown away when
479
+ * each returned its short summary; it never touched the window the user is
480
+ * reading now. So the live-window savings bar must split the worktree by
481
+ * which retrieval actually landed in the user's window:
482
+ *
483
+ * bytesReturned ("With context-mode") = THIS session's retrieval only —
484
+ * what genuinely entered the live window.
485
+ * bytesAvoided ("kept out") = everything the whole worktree moved
486
+ * (avoided + every session's retrieval) MINUS what landed in your window.
487
+ *
488
+ * Scoping by `worktreeHash` (not project-root + time) means the user's OTHER
489
+ * parallel worktrees never bleed in — a different worktree is a different
490
+ * cwd-hash, hence a different DB file the prefix filter excludes — while the
491
+ * sub-agent fan-out this conversation actually spawned is fully credited.
492
+ */
493
+ export declare function getConversationWindowStats(opts: {
494
+ sessionId: string;
495
+ worktreeHash: string;
496
+ sessionsDir?: string;
497
+ contentDbPath?: string;
498
+ }): RealBytesStats;
471
499
  /**
472
500
  * Real-usage filter thresholds. Decided in the B3a /diagnose conversation
473
501
  * to suppress fixture-noise dirs (test runs that touched ~/.X but never
@@ -900,6 +900,18 @@ export function getRealBytesStats(opts) {
900
900
  snapshotBytes += Number(snap.bytes);
901
901
  }
902
902
  catch { /* old schema */ }
903
+ try {
904
+ // "With context-mode" = the bytes the model paid to ACCESS the
905
+ // kept-out content: ctx_search (query the index) + ctx_fetch_and_index
906
+ // (fetch + index a URL). Sandbox compute (ctx_execute/batch/file) is
907
+ // work-output the model would see regardless — NOT redirect savings —
908
+ // so it is excluded; folding it crushed the bar to a false ~43%.
909
+ const tc = sdb.prepare(`SELECT COALESCE(SUM(bytes_returned), 0) AS bytes FROM tool_calls
910
+ WHERE session_id = ? AND tool IN ('ctx_search', 'ctx_fetch_and_index')`).get(opts.sessionId);
911
+ if (tc?.bytes)
912
+ bytesReturned += Number(tc.bytes);
913
+ }
914
+ catch { /* old schema: no tool_calls table */ }
903
915
  }
904
916
  else if (opts.projectDir) {
905
917
  // Bug E+F: META-scoped aggregation. Take every session_id whose
@@ -930,6 +942,17 @@ export function getRealBytesStats(opts) {
930
942
  snapshotBytes += Number(snap.bytes);
931
943
  }
932
944
  catch { /* old schema */ }
945
+ try {
946
+ const tc = sdb.prepare(`SELECT COALESCE(SUM(bytes_returned), 0) AS bytes
947
+ FROM tool_calls
948
+ WHERE session_id IN (
949
+ SELECT session_id FROM session_meta WHERE project_dir = ?
950
+ )
951
+ AND tool IN ('ctx_search', 'ctx_fetch_and_index')`).get(opts.projectDir);
952
+ if (tc?.bytes)
953
+ bytesReturned += Number(tc.bytes);
954
+ }
955
+ catch { /* old schema: no tool_calls table */ }
933
956
  }
934
957
  else {
935
958
  const row = sdb.prepare(`SELECT
@@ -948,6 +971,13 @@ export function getRealBytesStats(opts) {
948
971
  snapshotBytes += Number(snap.bytes);
949
972
  }
950
973
  catch { /* old schema */ }
974
+ try {
975
+ const tc = sdb.prepare(`SELECT COALESCE(SUM(bytes_returned), 0) AS bytes FROM tool_calls
976
+ WHERE tool IN ('ctx_search', 'ctx_fetch_and_index')`).get();
977
+ if (tc?.bytes)
978
+ bytesReturned += Number(tc.bytes);
979
+ }
980
+ catch { /* old schema: no tool_calls table */ }
951
981
  }
952
982
  }
953
983
  finally {
@@ -970,6 +1000,57 @@ export function getRealBytesStats(opts) {
970
1000
  const totalSavedTokens = Math.floor((eventDataBytes + bytesAvoided + snapshotBytes) / 4);
971
1001
  return { eventDataBytes, bytesAvoided, bytesReturned, snapshotBytes, contentBytes, totalSavedTokens };
972
1002
  }
1003
+ /**
1004
+ * v1.0.169 — Section 1 "Where you are now" = the LIVE conversation window.
1005
+ *
1006
+ * A single live conversation fans out into sub-agents and ctx_execute
1007
+ * sub-process sessions. Each runs in its OWN, disposable context window (its
1008
+ * own session_id) — but all under the SAME worktree DB, because the worktree
1009
+ * hash is sha256(cwd) and they share the cwd. Their retrieval (ctx_search /
1010
+ * ctx_fetch_and_index returns) entered THOSE windows and was thrown away when
1011
+ * each returned its short summary; it never touched the window the user is
1012
+ * reading now. So the live-window savings bar must split the worktree by
1013
+ * which retrieval actually landed in the user's window:
1014
+ *
1015
+ * bytesReturned ("With context-mode") = THIS session's retrieval only —
1016
+ * what genuinely entered the live window.
1017
+ * bytesAvoided ("kept out") = everything the whole worktree moved
1018
+ * (avoided + every session's retrieval) MINUS what landed in your window.
1019
+ *
1020
+ * Scoping by `worktreeHash` (not project-root + time) means the user's OTHER
1021
+ * parallel worktrees never bleed in — a different worktree is a different
1022
+ * cwd-hash, hence a different DB file the prefix filter excludes — while the
1023
+ * sub-agent fan-out this conversation actually spawned is fully credited.
1024
+ */
1025
+ export function getConversationWindowStats(opts) {
1026
+ // Whole current worktree: every session that shares this cwd-hash DB.
1027
+ const pool = getRealBytesStats({
1028
+ worktreeHash: opts.worktreeHash,
1029
+ sessionsDir: opts.sessionsDir,
1030
+ });
1031
+ // Just the live window: this session_id (folds its own ctx_search/ctx_fetch
1032
+ // retrieval + content chunks).
1033
+ const mine = getRealBytesStats({
1034
+ sessionId: opts.sessionId,
1035
+ worktreeHash: opts.worktreeHash,
1036
+ sessionsDir: opts.sessionsDir,
1037
+ contentDbPath: opts.contentDbPath,
1038
+ });
1039
+ const windowReturned = mine.bytesReturned;
1040
+ const movedTotal = pool.bytesAvoided + pool.bytesReturned;
1041
+ // What context-mode kept OUT of the live window = everything moved across the
1042
+ // worktree minus the slice that actually entered this window. Clamp at 0 so a
1043
+ // stale/edge DB can never produce a negative bar.
1044
+ const keptOut = Math.max(0, movedTotal - windowReturned);
1045
+ return {
1046
+ eventDataBytes: pool.eventDataBytes,
1047
+ bytesAvoided: keptOut,
1048
+ bytesReturned: windowReturned,
1049
+ snapshotBytes: pool.snapshotBytes,
1050
+ contentBytes: mine.contentBytes,
1051
+ totalSavedTokens: Math.floor((pool.eventDataBytes + keptOut + pool.snapshotBytes) / 4),
1052
+ };
1053
+ }
973
1054
  const DEFAULT_REAL_USAGE_FILTER = {
974
1055
  minEvents: 100,
975
1056
  minProjects: 5,
@@ -1620,7 +1701,7 @@ function renderNarrative5Section(args) {
1620
1701
  const convMult = Math.max(1, Math.round(convTokensWithout / convTokensWith));
1621
1702
  out.push(` Without context-mode ${kb(convBytesWithout).padStart(8)} ${withoutBar} ${fmtNum(convTokensWithout).padStart(7)} tokens`);
1622
1703
  out.push(` With context-mode ${kb(convBytesWith).padStart(8)} ${withBar} ${fmtNum(convTokensWith).padStart(7)} tokens`);
1623
- out.push(` ${convPct.toFixed(0)}% kept out of context · your AI ran ${convMult}× longer before /compact fired`);
1704
+ out.push(` ${convPct.toFixed(1)}% kept out of context · your AI ran ${convMult}× longer before /compact fired`);
1624
1705
  out.push("");
1625
1706
  }
1626
1707
  // Timeline — drop-in if conversation has byDay.
@@ -22,6 +22,15 @@ export interface SessionEvent {
22
22
  * `Fetched and indexed N sections (XKB)` preamble.
23
23
  */
24
24
  bytes_avoided?: number;
25
+ /**
26
+ * Optional — bytes the model PAID to ACCESS kept-out content for this event:
27
+ * the tool_response byte length of a `ctx_search` / `ctx_fetch_and_index`
28
+ * call. This is the OTHER half of the with/without ratio (bytes_avoided is
29
+ * the kept-out half). Sandbox compute (ctx_execute/batch/file) is work-output
30
+ * and is excluded. Present only when the call is a retrieval call and its
31
+ * tool_response is non-empty.
32
+ */
33
+ bytes_retrieved?: number;
25
34
  /**
26
35
  * Optional structured cost/usage fields (Wave 2b). Emitted by
27
36
  * extractAgentUsage alongside the colon-string `data` so the forward
@@ -35,6 +44,13 @@ export interface SessionEvent {
35
44
  cache_read_tokens?: number;
36
45
  cache_creation_tokens?: number;
37
46
  cost_usd?: number;
47
+ /**
48
+ * "task_cumulative" on agent_usage events whose tokens are a Task sub-agent's
49
+ * usage SUMMED across its whole run (not one turn). The platform buckets these
50
+ * as lifetime spend and never prices them per-turn — see
51
+ * docs/handoff/cumulative-cost-bug.md.
52
+ */
53
+ usage_scope?: string;
38
54
  }
39
55
  export interface ToolCall {
40
56
  toolName: string;
@@ -912,12 +912,40 @@ function extractMcpToolCall(input) {
912
912
  const payload = truncated
913
913
  ? `{"tool_name":${JSON.stringify(tool_name)},"params_raw":${JSON.stringify(cappedStr)},"truncated":true}`
914
914
  : `{"tool_name":${JSON.stringify(tool_name)},"params":${cappedStr}}`;
915
- return [{
916
- type: "mcp_tool_call",
917
- category: "mcp_tool_call",
918
- data: safeString(payload),
919
- priority: 4,
920
- }];
915
+ const event = {
916
+ type: "mcp_tool_call",
917
+ category: "mcp_tool_call",
918
+ data: safeString(payload),
919
+ priority: 4,
920
+ };
921
+ // Retrieval cost (the OTHER half of the with/without ratio): when this MCP
922
+ // call is a `ctx_search` or `ctx_fetch_and_index` retrieval, the tool_response
923
+ // IS the kept-out content the model paid to access — record its byte length.
924
+ // Sandbox compute (ctx_execute/batch/file) is work-output, NOT retrieval, so
925
+ // it is intentionally excluded. Match by suffix char-algorithmically (host
926
+ // prefixes the name like `mcp__plugin_…__ctx_search`); NO regex.
927
+ if (isRetrievalToolName(tool_name)) {
928
+ const response = safeString(input.tool_response);
929
+ if (response.length > 0) {
930
+ event.bytes_retrieved = Buffer.byteLength(response, "utf8");
931
+ }
932
+ }
933
+ return [event];
934
+ }
935
+ /** Tool-name suffixes that denote a RETRIEVAL call (kept-out content accessed). */
936
+ const RETRIEVAL_TOOL_SUFFIXES = ["ctx_search", "ctx_fetch_and_index"];
937
+ /**
938
+ * True when `toolName` ends with one of the retrieval suffixes. Char-level
939
+ * suffix comparison via String.prototype.endsWith — no regex. MCP host names
940
+ * arrive prefixed (e.g. `mcp__plugin_context-mode_context-mode__ctx_search`),
941
+ * so an exact-name check would miss them; suffix match is host-agnostic.
942
+ */
943
+ function isRetrievalToolName(toolName) {
944
+ for (const suffix of RETRIEVAL_TOOL_SUFFIXES) {
945
+ if (toolName.endsWith(suffix))
946
+ return true;
947
+ }
948
+ return false;
921
949
  }
922
950
  /**
923
951
  * Category 6 (tool-based): decision
@@ -1377,27 +1405,16 @@ function extractAgentUsage(input) {
1377
1405
  if (typeof usage.service_tier === "string") {
1378
1406
  parts.push(`tier:${usage.service_tier.slice(0, 32)}`);
1379
1407
  }
1380
- // Gap #1 (16-oss-verify-gap-prd) derive cost_usd from per-model pricing
1381
- // when at least one token count is present. Zero-token case skips cost
1382
- // so dashboard never shows misleading "$0.00 for nothing" rows.
1383
- const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : 0;
1384
- const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : 0;
1385
- const cacheCreate = typeof usage.cache_creation_input_tokens === "number"
1386
- ? usage.cache_creation_input_tokens
1387
- : 0;
1388
- const cacheRead = typeof usage.cache_read_input_tokens === "number"
1389
- ? usage.cache_read_input_tokens
1390
- : 0;
1408
+ // CUMULATIVE-USAGE GUARD (docs/handoff/cumulative-cost-bug.md): a Task
1409
+ // tool_response carries the sub-agent's usage SUMMED across its entire run
1410
+ // every internal turn re-reads the cache, so cache_read reaches the billions.
1411
+ // Pricing that cumulative figure as a single turn produced four-figure
1412
+ // per-event costs ($3,532 with cache_read 4.7B) that poisoned every FinOps
1413
+ // aggregate. We therefore do NOT derive cost_usd here. The raw token counts
1414
+ // stay, tagged usage_scope="task_cumulative", so the platform buckets them as
1415
+ // lifetime spend; real per-turn cost comes only from per-turn signals
1416
+ // (extractTranscriptUsage + each adapter's own session).
1391
1417
  const modelId = resolveModelId(input, out);
1392
- const anyTokens = inputTokens > 0 || outputTokens > 0 || cacheCreate > 0 || cacheRead > 0;
1393
- let cost = null;
1394
- if (anyTokens) {
1395
- // null ⇒ unmatched model id (catalog warned once) — skip the cost token
1396
- // rather than blend a wrong Claude rate (the old non-Claude bug).
1397
- cost = computeTurnCostUsd(modelId, inputTokens, outputTokens, cacheCreate, cacheRead);
1398
- if (cost !== null)
1399
- parts.push(`cost_usd:${formatCostUsd(cost)}`);
1400
- }
1401
1418
  // Wave 2b — emit structured top-level fields alongside the colon-string so
1402
1419
  // the forward envelope (which spreads `...event`) hands the platform typed
1403
1420
  // columns. Each field is set only when its source signal is present, so the
@@ -1421,8 +1438,7 @@ function extractAgentUsage(input) {
1421
1438
  if (typeof usage.cache_creation_input_tokens === "number") {
1422
1439
  event.cache_creation_tokens = usage.cache_creation_input_tokens;
1423
1440
  }
1424
- if (cost !== null)
1425
- event.cost_usd = cost;
1441
+ event.usage_scope = "task_cumulative";
1426
1442
  return [event];
1427
1443
  }
1428
1444
  // ── Kimi Code (kimi-code) usage parsers ────────────────────────────────────
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Server→hook bridge for the retrieval ("With context-mode") byte count.
3
+ *
4
+ * WHY THIS EXISTS — context-mode's OWN MCP retrieval tools (ctx_search /
5
+ * ctx_fetch_and_index) never fire a PostToolUse hook for the plugin's own
6
+ * server, so the hook-side `extractMcpToolCall` path can never observe them
7
+ * (verified empirically: 0 `mcp_tool_call` events locally, bytes_retrieved
8
+ * 0/124454 in production D1). The MCP server, however, measures each
9
+ * retrieval response's byte length directly.
10
+ *
11
+ * The server appends that count to a tmp marker keyed by the session DB
12
+ * *basename* — the one identifier the server process and the hook process
13
+ * both resolve reliably (CLAUDE_SESSION_ID is not guaranteed in the server
14
+ * env; the per-project session DB path is). The next PostToolUse fire — which
15
+ * DOES run for ordinary tools (Bash/Read/Edit) — consumes the marker and
16
+ * emits a forwardable event carrying `bytes_retrieved`. Mirrors the existing
17
+ * redirect / latency / rejected marker handshake in posttooluse.mjs.
18
+ */
19
+ /**
20
+ * Tmp marker path for a session DB. Keyed by basename so the server (which
21
+ * holds the DB path via getSessionDbPath) and the hook (getSessionDBPath)
22
+ * derive the SAME file. Session DB filenames embed the worktree hash
23
+ * (`<hash>__<suffix>.db`), so basename collisions across projects are
24
+ * negligible.
25
+ */
26
+ export declare function retrievalMarkerPath(sessionDbPath: string, tmpDir?: string): string;
27
+ /**
28
+ * Record one retrieval's response byte count. Positive-only (a 0-byte or
29
+ * failed retrieval is not a context cost). Append-only so several retrievals
30
+ * between two hook fires accumulate. Best-effort — never throws into the
31
+ * MCP response path.
32
+ */
33
+ export declare function appendRetrievalBytes(sessionDbPath: string, bytes: number, tmpDir?: string): void;
34
+ /**
35
+ * Sum every recorded retrieval and delete the marker (consume-once) so the
36
+ * next PostToolUse fire cannot re-forward the same bytes. Returns 0 when no
37
+ * marker exists (phantom-event guard).
38
+ */
39
+ export declare function consumeRetrievalBytes(sessionDbPath: string, tmpDir?: string): number;
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Server→hook bridge for the retrieval ("With context-mode") byte count.
3
+ *
4
+ * WHY THIS EXISTS — context-mode's OWN MCP retrieval tools (ctx_search /
5
+ * ctx_fetch_and_index) never fire a PostToolUse hook for the plugin's own
6
+ * server, so the hook-side `extractMcpToolCall` path can never observe them
7
+ * (verified empirically: 0 `mcp_tool_call` events locally, bytes_retrieved
8
+ * 0/124454 in production D1). The MCP server, however, measures each
9
+ * retrieval response's byte length directly.
10
+ *
11
+ * The server appends that count to a tmp marker keyed by the session DB
12
+ * *basename* — the one identifier the server process and the hook process
13
+ * both resolve reliably (CLAUDE_SESSION_ID is not guaranteed in the server
14
+ * env; the per-project session DB path is). The next PostToolUse fire — which
15
+ * DOES run for ordinary tools (Bash/Read/Edit) — consumes the marker and
16
+ * emits a forwardable event carrying `bytes_retrieved`. Mirrors the existing
17
+ * redirect / latency / rejected marker handshake in posttooluse.mjs.
18
+ */
19
+ import { appendFileSync, readFileSync, rmSync } from "node:fs";
20
+ import { tmpdir } from "node:os";
21
+ import { basename, join } from "node:path";
22
+ /**
23
+ * Tmp marker path for a session DB. Keyed by basename so the server (which
24
+ * holds the DB path via getSessionDbPath) and the hook (getSessionDBPath)
25
+ * derive the SAME file. Session DB filenames embed the worktree hash
26
+ * (`<hash>__<suffix>.db`), so basename collisions across projects are
27
+ * negligible.
28
+ */
29
+ export function retrievalMarkerPath(sessionDbPath, tmpDir = tmpdir()) {
30
+ return join(tmpDir, `context-mode-retrieval-${basename(sessionDbPath)}.txt`);
31
+ }
32
+ /**
33
+ * Record one retrieval's response byte count. Positive-only (a 0-byte or
34
+ * failed retrieval is not a context cost). Append-only so several retrievals
35
+ * between two hook fires accumulate. Best-effort — never throws into the
36
+ * MCP response path.
37
+ */
38
+ export function appendRetrievalBytes(sessionDbPath, bytes, tmpDir) {
39
+ if (!Number.isFinite(bytes) || bytes <= 0)
40
+ return;
41
+ try {
42
+ appendFileSync(retrievalMarkerPath(sessionDbPath, tmpDir), `${Math.floor(bytes)}\n`);
43
+ }
44
+ catch { /* best-effort — never block the MCP response */ }
45
+ }
46
+ /**
47
+ * Sum every recorded retrieval and delete the marker (consume-once) so the
48
+ * next PostToolUse fire cannot re-forward the same bytes. Returns 0 when no
49
+ * marker exists (phantom-event guard).
50
+ */
51
+ export function consumeRetrievalBytes(sessionDbPath, tmpDir) {
52
+ const path = retrievalMarkerPath(sessionDbPath, tmpDir);
53
+ let total = 0;
54
+ try {
55
+ const raw = readFileSync(path, "utf8");
56
+ for (const line of raw.split("\n")) {
57
+ const n = Number.parseInt(line, 10);
58
+ if (Number.isFinite(n) && n > 0)
59
+ total += n;
60
+ }
61
+ rmSync(path, { force: true });
62
+ }
63
+ catch { /* no marker — phantom-event guard */ }
64
+ return total;
65
+ }