context-mode 1.0.167 → 1.0.168

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,14 +6,14 @@
6
6
  },
7
7
  "metadata": {
8
8
  "description": "Claude Code plugins by Mert Koseoğlu",
9
- "version": "1.0.167"
9
+ "version": "1.0.168"
10
10
  },
11
11
  "plugins": [
12
12
  {
13
13
  "name": "context-mode",
14
14
  "source": "./",
15
15
  "description": "Claude Code MCP plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
16
- "version": "1.0.167",
16
+ "version": "1.0.168",
17
17
  "author": {
18
18
  "name": "Mert Koseoğlu"
19
19
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "context-mode",
3
- "version": "1.0.167",
3
+ "version": "1.0.168",
4
4
  "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
5
5
  "author": {
6
6
  "name": "Mert Koseoğlu",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "context-mode",
3
- "version": "1.0.167",
3
+ "version": "1.0.168",
4
4
  "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
5
5
  "author": {
6
6
  "name": "Mert Koseoğlu",
@@ -3,7 +3,7 @@
3
3
  "name": "Context Mode",
4
4
  "kind": "tool",
5
5
  "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
6
- "version": "1.0.167",
6
+ "version": "1.0.168",
7
7
  "sandbox": {
8
8
  "mode": "permissive",
9
9
  "filesystem_access": "full",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "context-mode",
3
- "version": "1.0.167",
3
+ "version": "1.0.168",
4
4
  "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
5
5
  "author": {
6
6
  "name": "Mert Koseoğlu",
@@ -900,6 +900,18 @@ export function getRealBytesStats(opts) {
900
900
  snapshotBytes += Number(snap.bytes);
901
901
  }
902
902
  catch { /* old schema */ }
903
+ try {
904
+ // "With context-mode" = the bytes the model paid to ACCESS the
905
+ // kept-out content: ctx_search (query the index) + ctx_fetch_and_index
906
+ // (fetch + index a URL). Sandbox compute (ctx_execute/batch/file) is
907
+ // work-output the model would see regardless — NOT redirect savings —
908
+ // so it is excluded; folding it crushed the bar to a false ~43%.
909
+ const tc = sdb.prepare(`SELECT COALESCE(SUM(bytes_returned), 0) AS bytes FROM tool_calls
910
+ WHERE session_id = ? AND tool IN ('ctx_search', 'ctx_fetch_and_index')`).get(opts.sessionId);
911
+ if (tc?.bytes)
912
+ bytesReturned += Number(tc.bytes);
913
+ }
914
+ catch { /* old schema: no tool_calls table */ }
903
915
  }
904
916
  else if (opts.projectDir) {
905
917
  // Bug E+F: META-scoped aggregation. Take every session_id whose
@@ -930,6 +942,17 @@ export function getRealBytesStats(opts) {
930
942
  snapshotBytes += Number(snap.bytes);
931
943
  }
932
944
  catch { /* old schema */ }
945
+ try {
946
+ const tc = sdb.prepare(`SELECT COALESCE(SUM(bytes_returned), 0) AS bytes
947
+ FROM tool_calls
948
+ WHERE session_id IN (
949
+ SELECT session_id FROM session_meta WHERE project_dir = ?
950
+ )
951
+ AND tool IN ('ctx_search', 'ctx_fetch_and_index')`).get(opts.projectDir);
952
+ if (tc?.bytes)
953
+ bytesReturned += Number(tc.bytes);
954
+ }
955
+ catch { /* old schema: no tool_calls table */ }
933
956
  }
934
957
  else {
935
958
  const row = sdb.prepare(`SELECT
@@ -948,6 +971,13 @@ export function getRealBytesStats(opts) {
948
971
  snapshotBytes += Number(snap.bytes);
949
972
  }
950
973
  catch { /* old schema */ }
974
+ try {
975
+ const tc = sdb.prepare(`SELECT COALESCE(SUM(bytes_returned), 0) AS bytes FROM tool_calls
976
+ WHERE tool IN ('ctx_search', 'ctx_fetch_and_index')`).get();
977
+ if (tc?.bytes)
978
+ bytesReturned += Number(tc.bytes);
979
+ }
980
+ catch { /* old schema: no tool_calls table */ }
951
981
  }
952
982
  }
953
983
  finally {
@@ -22,6 +22,15 @@ export interface SessionEvent {
22
22
  * `Fetched and indexed N sections (XKB)` preamble.
23
23
  */
24
24
  bytes_avoided?: number;
25
+ /**
26
+ * Optional — bytes the model PAID to ACCESS kept-out content for this event:
27
+ * the tool_response byte length of a `ctx_search` / `ctx_fetch_and_index`
28
+ * call. This is the OTHER half of the with/without ratio (bytes_avoided is
29
+ * the kept-out half). Sandbox compute (ctx_execute/batch/file) is work-output
30
+ * and is excluded. Present only when the call is a retrieval call and its
31
+ * tool_response is non-empty.
32
+ */
33
+ bytes_retrieved?: number;
25
34
  /**
26
35
  * Optional structured cost/usage fields (Wave 2b). Emitted by
27
36
  * extractAgentUsage alongside the colon-string `data` so the forward
@@ -35,6 +44,13 @@ export interface SessionEvent {
35
44
  cache_read_tokens?: number;
36
45
  cache_creation_tokens?: number;
37
46
  cost_usd?: number;
47
+ /**
48
+ * "task_cumulative" on agent_usage events whose tokens are a Task sub-agent's
49
+ * usage SUMMED across its whole run (not one turn). The platform buckets these
50
+ * as lifetime spend and never prices them per-turn — see
51
+ * docs/handoff/cumulative-cost-bug.md.
52
+ */
53
+ usage_scope?: string;
38
54
  }
39
55
  export interface ToolCall {
40
56
  toolName: string;
@@ -912,12 +912,40 @@ function extractMcpToolCall(input) {
912
912
  const payload = truncated
913
913
  ? `{"tool_name":${JSON.stringify(tool_name)},"params_raw":${JSON.stringify(cappedStr)},"truncated":true}`
914
914
  : `{"tool_name":${JSON.stringify(tool_name)},"params":${cappedStr}}`;
915
- return [{
916
- type: "mcp_tool_call",
917
- category: "mcp_tool_call",
918
- data: safeString(payload),
919
- priority: 4,
920
- }];
915
+ const event = {
916
+ type: "mcp_tool_call",
917
+ category: "mcp_tool_call",
918
+ data: safeString(payload),
919
+ priority: 4,
920
+ };
921
+ // Retrieval cost (the OTHER half of the with/without ratio): when this MCP
922
+ // call is a `ctx_search` or `ctx_fetch_and_index` retrieval, the tool_response
923
+ // IS the kept-out content the model paid to access — record its byte length.
924
+ // Sandbox compute (ctx_execute/batch/file) is work-output, NOT retrieval, so
925
+ // it is intentionally excluded. Match by suffix char-algorithmically (host
926
+ // prefixes the name like `mcp__plugin_…__ctx_search`); NO regex.
927
+ if (isRetrievalToolName(tool_name)) {
928
+ const response = safeString(input.tool_response);
929
+ if (response.length > 0) {
930
+ event.bytes_retrieved = Buffer.byteLength(response, "utf8");
931
+ }
932
+ }
933
+ return [event];
934
+ }
935
+ /** Tool-name suffixes that denote a RETRIEVAL call (kept-out content accessed). */
936
+ const RETRIEVAL_TOOL_SUFFIXES = ["ctx_search", "ctx_fetch_and_index"];
937
+ /**
938
+ * True when `toolName` ends with one of the retrieval suffixes. Char-level
939
+ * suffix comparison via String.prototype.endsWith — no regex. MCP host names
940
+ * arrive prefixed (e.g. `mcp__plugin_context-mode_context-mode__ctx_search`),
941
+ * so an exact-name check would miss them; suffix match is host-agnostic.
942
+ */
943
+ function isRetrievalToolName(toolName) {
944
+ for (const suffix of RETRIEVAL_TOOL_SUFFIXES) {
945
+ if (toolName.endsWith(suffix))
946
+ return true;
947
+ }
948
+ return false;
921
949
  }
922
950
  /**
923
951
  * Category 6 (tool-based): decision
@@ -1377,27 +1405,16 @@ function extractAgentUsage(input) {
1377
1405
  if (typeof usage.service_tier === "string") {
1378
1406
  parts.push(`tier:${usage.service_tier.slice(0, 32)}`);
1379
1407
  }
1380
- // Gap #1 (16-oss-verify-gap-prd) derive cost_usd from per-model pricing
1381
- // when at least one token count is present. Zero-token case skips cost
1382
- // so dashboard never shows misleading "$0.00 for nothing" rows.
1383
- const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : 0;
1384
- const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : 0;
1385
- const cacheCreate = typeof usage.cache_creation_input_tokens === "number"
1386
- ? usage.cache_creation_input_tokens
1387
- : 0;
1388
- const cacheRead = typeof usage.cache_read_input_tokens === "number"
1389
- ? usage.cache_read_input_tokens
1390
- : 0;
1408
+ // CUMULATIVE-USAGE GUARD (docs/handoff/cumulative-cost-bug.md): a Task
1409
+ // tool_response carries the sub-agent's usage SUMMED across its entire run
1410
+ // every internal turn re-reads the cache, so cache_read reaches the billions.
1411
+ // Pricing that cumulative figure as a single turn produced four-figure
1412
+ // per-event costs ($3,532 with cache_read 4.7B) that poisoned every FinOps
1413
+ // aggregate. We therefore do NOT derive cost_usd here. The raw token counts
1414
+ // stay, tagged usage_scope="task_cumulative", so the platform buckets them as
1415
+ // lifetime spend; real per-turn cost comes only from per-turn signals
1416
+ // (extractTranscriptUsage + each adapter's own session).
1391
1417
  const modelId = resolveModelId(input, out);
1392
- const anyTokens = inputTokens > 0 || outputTokens > 0 || cacheCreate > 0 || cacheRead > 0;
1393
- let cost = null;
1394
- if (anyTokens) {
1395
- // null ⇒ unmatched model id (catalog warned once) — skip the cost token
1396
- // rather than blend a wrong Claude rate (the old non-Claude bug).
1397
- cost = computeTurnCostUsd(modelId, inputTokens, outputTokens, cacheCreate, cacheRead);
1398
- if (cost !== null)
1399
- parts.push(`cost_usd:${formatCostUsd(cost)}`);
1400
- }
1401
1418
  // Wave 2b — emit structured top-level fields alongside the colon-string so
1402
1419
  // the forward envelope (which spreads `...event`) hands the platform typed
1403
1420
  // columns. Each field is set only when its source signal is present, so the
@@ -1421,8 +1438,7 @@ function extractAgentUsage(input) {
1421
1438
  if (typeof usage.cache_creation_input_tokens === "number") {
1422
1439
  event.cache_creation_tokens = usage.cache_creation_input_tokens;
1423
1440
  }
1424
- if (cost !== null)
1425
- event.cost_usd = cost;
1441
+ event.usage_scope = "task_cumulative";
1426
1442
  return [event];
1427
1443
  }
1428
1444
  // ── Kimi Code (kimi-code) usage parsers ────────────────────────────────────