code-context-control 2.42.0__py3-none-any.whl → 2.43.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cli/c3.py CHANGED
@@ -85,7 +85,7 @@ console = Console() if HAS_RICH else None
85
85
  # Config
86
86
  CONFIG_DIR = ".c3"
87
87
  CONFIG_FILE = ".c3/config.json"
88
- __version__ = "2.42.0"
88
+ __version__ = "2.43.0"
89
89
 
90
90
 
91
91
  def _command_deps() -> CommandDeps:
@@ -6475,6 +6475,24 @@ def cmd_upgrade(args):
6475
6475
  print(" In each project, run c3 init . --force to apply any migrations.")
6476
6476
 
6477
6477
 
6478
+ def _stdio_is_interactive() -> bool:
6479
+ """True when stdin AND stdout are attached to a real terminal.
6480
+
6481
+ Used to decide whether bare `c3` may launch the full-screen TUI. With
6482
+ redirected stdio (pytest capture_output, CI, shell pipes) a TUI child
6483
+ would inherit our pipe handles and keep them open past our own death;
6484
+ on Windows the caller's communicate() then blocks forever because
6485
+ subprocess timeouts kill only the direct child, never the tree.
6486
+ """
6487
+ try:
6488
+ return bool(
6489
+ sys.stdin is not None and sys.stdin.isatty()
6490
+ and sys.stdout is not None and sys.stdout.isatty()
6491
+ )
6492
+ except Exception:
6493
+ return False
6494
+
6495
+
6478
6496
  def _launch_tui() -> None:
6479
6497
  """Launch the interactive TUI — what `c3` with no arguments does.
6480
6498
 
@@ -6524,8 +6542,16 @@ def main():
6524
6542
  args = parser.parse_args()
6525
6543
 
6526
6544
  if not args.command:
6527
- # Bare `c3` launches the interactive TUI (replaces the old c3.bat wrapper).
6528
- _launch_tui()
6545
+ # Bare `c3` launches the interactive TUI (replaces the old c3.bat
6546
+ # wrapper) — but only when attached to a real console. With redirected
6547
+ # stdio there is no terminal for a full-screen app anyway, and the TUI
6548
+ # child would inherit our stdout/stderr pipe handles and hold them
6549
+ # open past our own death (a caller's communicate() then hangs forever
6550
+ # on Windows). Print help instead of spawning anything.
6551
+ if _stdio_is_interactive():
6552
+ _launch_tui()
6553
+ else:
6554
+ parser.print_help()
6529
6555
  return
6530
6556
 
6531
6557
  commands = {
cli/hook_ghost_files.py CHANGED
@@ -212,6 +212,21 @@ def cleanup_ghost_files(ghosts: list[dict]) -> list[str]:
212
212
  return deleted
213
213
 
214
214
 
215
+ def sweep_ghost_files(project_root) -> list[str]:
216
+ """Scan *project_root* and delete any ghost files in one call.
217
+
218
+ Convenience wrapper (scan + cleanup) so callers outside the Bash PostToolUse
219
+ hook — e.g. long-lived MCP-server background agents whose cwd is the project
220
+ root, or git worktrees where no PostToolUse hook runs — can self-clean the
221
+ root. Returns the list of deleted file names (empty if none). Never raises.
222
+ """
223
+ try:
224
+ root = project_root if isinstance(project_root, Path) else Path(project_root)
225
+ return cleanup_ghost_files(scan_ghost_files(root))
226
+ except Exception:
227
+ return []
228
+
229
+
215
230
  # Tools whose output can carry shell-meta text that leaks into 0-byte files:
216
231
  # native shells, c3_shell (its `N->Mtok` filter header), and file reads whose
217
232
  # content has `-> Type` hints. A downstream shell sees `> word` and creates an
cli/mcp_server.py CHANGED
@@ -423,9 +423,10 @@ async def c3_session(action: str, data: str = "", reasoning: str = "",
423
423
  @mcp.tool()
424
424
  async def c3_memory(action: str, query: str = "", fact: str = "",
425
425
  category: str = "", top_k: int = 3,
426
- fact_id: str = "", ctx: Context = None) -> str:
426
+ fact_id: str = "", include_scores: bool = False,
427
+ ctx: Context = None) -> str:
427
428
  """Durable facts — cross-session knowledge. Read-only actions safe in plan mode.
428
- Retrieve: recall (search), index (compact IDs+snippets, then fetch), fetch (full text by fact_id="id1,id2"), query (multi-source: facts+sessions+files).
429
+ Retrieve: recall (search; include_scores=True adds per-fact salience), index (compact IDs+snippets, then fetch), fetch (full text by fact_id="id1,id2"), query (multi-source: facts+sessions+files).
429
430
  Write: add (fact+category, empty category→'general'), update (fact_id+fact), delete (fact_id).
430
431
  Browse: list (category='' shows all; 'foo' filters), export (markdown).
431
432
  Audit: review (health), ground (verify against code), score (salience), graph (edges), trends, lifespan, consolidate, consolidate_deep."""
@@ -435,7 +436,7 @@ async def c3_memory(action: str, query: str = "", fact: str = "",
435
436
  return _finalize_response(ctx, name, args, resp, summ, **kw)
436
437
 
437
438
  return await asyncio.to_thread(handle_memory, action, query, fact, category, top_k, svc, finalize,
438
- fact_id=fact_id)
439
+ fact_id=fact_id, include_scores=include_scores)
439
440
 
440
441
 
441
442
  @mcp.tool()
cli/tools/_helpers.py CHANGED
@@ -8,6 +8,26 @@ def maybe_related_facts(svc, topic: str, top_k: int = 3, width: int = 100) -> st
8
8
  return ""
9
9
 
10
10
 
11
+ # ── Response boilerplate diet (P6) ───────────────────────────────────────────
12
+
13
+ def show_token_ratios(svc) -> bool:
14
+ """Debug flag: restore per-call "raw->optimized tok" ratio headers.
15
+
16
+ Off by default — the ratio header was ~100-200 tokens/session of
17
+ boilerplate the model does nothing with. Accounting no longer depends on
18
+ the displayed header: migrated tools report (raw_tokens, optimized_tokens)
19
+ structurally via finalize_with_tokens(). Set
20
+ ``{"hybrid": {"show_token_ratios": true}}`` in .c3/config.json to see the
21
+ old headers again (same convention as SHOW_SAVINGS_SUMMARY /
22
+ show_savings_footer).
23
+ """
24
+ try:
25
+ return bool((getattr(svc, "hybrid_config", None) or {}).get(
26
+ "show_token_ratios", False))
27
+ except Exception:
28
+ return False
29
+
30
+
11
31
  # ── Structured token accounting (honest measurement layer) ──────────────────
12
32
 
13
33
  def finalize_with_tokens(finalize, svc, tool_name: str, args: dict,
cli/tools/compress.py CHANGED
@@ -10,6 +10,8 @@ from pathlib import Path
10
10
 
11
11
  from core import count_tokens
12
12
 
13
+ from cli.tools._helpers import finalize_with_tokens, show_token_ratios
14
+
13
15
 
14
16
  def _run_memory_mcp_cli(args: list, cwd: str, timeout: int = 30) -> tuple:
15
17
  """Run codebase-memory-mcp CLI and return (success, output_or_error)."""
@@ -139,23 +141,32 @@ def _compress_single(file_path: str, mode: str, svc, finalize, maybe_facts) -> s
139
141
  res = (svc.file_memory.get_or_build_dense_map(rel)
140
142
  if mode == "dense_map"
141
143
  else svc.file_memory.get_or_build_map(rel))
144
+ # Structured accounting: pass the (full-read baseline, map) pair via
145
+ # record_tool_tokens() instead of a "raw->maptok" summary for the
146
+ # legacy regex fallback to scrape.
147
+ raw_tokens = None
142
148
  map_tokens = 0
143
149
  try:
144
150
  raw_tokens = count_tokens(full.read_text(encoding="utf-8", errors="replace"))
145
151
  map_tokens = count_tokens(res)
146
- summary = f"{raw_tokens}->{map_tokens}tok"
152
+ summary = mode
147
153
  except Exception:
148
154
  summary = "mapped"
149
- return finalize("c3_compress", {"file_path": file_path, "mode": mode}, res, summary,
150
- response_tokens=map_tokens)
155
+ return finalize_with_tokens(
156
+ finalize, svc, "c3_compress", {"file_path": file_path, "mode": mode},
157
+ res, summary,
158
+ raw_tokens=raw_tokens, optimized_tokens=map_tokens or None,
159
+ response_tokens=map_tokens)
151
160
 
152
161
  res = svc.compressor.compress_file(str(full), mode)
153
162
  if "error" in res:
154
163
  return f"Error: {res['error']}"
155
164
  resp = res['compressed']
156
- summary = f"{res['original_tokens']}->{res['compressed_tokens']}tok"
157
- return finalize("c3_compress", {"file_path": file_path},
158
- resp + maybe_facts(svc, Path(file_path).name), summary)
165
+ return finalize_with_tokens(
166
+ finalize, svc, "c3_compress", {"file_path": file_path},
167
+ resp + maybe_facts(svc, Path(file_path).name), mode,
168
+ raw_tokens=res.get('original_tokens'),
169
+ optimized_tokens=res.get('compressed_tokens'))
159
170
 
160
171
 
161
172
  def _compress_batch(paths: list, mode: str, svc, finalize, maybe_facts) -> str:
@@ -166,12 +177,13 @@ def _compress_batch(paths: list, mode: str, svc, finalize, maybe_facts) -> str:
166
177
  results = {}
167
178
 
168
179
  def _do_one(fp):
180
+ """Returns (fp, compressed_text, raw_tokens, optimized_tokens, error)."""
169
181
  try:
170
182
  full = Path(svc.project_path) / fp
171
183
  if not full.exists():
172
184
  full = Path(fp)
173
185
  if not full.exists():
174
- return fp, None, "not found"
186
+ return fp, None, None, None, "not found"
175
187
 
176
188
  if mode in ("map", "dense_map"):
177
189
  rel = str(full.resolve().relative_to(
@@ -182,34 +194,50 @@ def _compress_batch(paths: list, mode: str, svc, finalize, maybe_facts) -> str:
182
194
  try:
183
195
  raw_tok = count_tokens(full.read_text(encoding="utf-8", errors="replace"))
184
196
  map_tok = count_tokens(res)
185
- return fp, res, f"{raw_tok}->{map_tok}tok"
197
+ return fp, res, raw_tok, map_tok, None
186
198
  except Exception:
187
- return fp, res, "mapped"
199
+ return fp, res, None, None, None
188
200
  else:
189
201
  res = svc.compressor.compress_file(str(full), mode)
190
202
  if "error" in res:
191
- return fp, None, res["error"]
192
- return fp, res["compressed"], f"{res['original_tokens']}->{res['compressed_tokens']}tok"
203
+ return fp, None, None, None, res["error"]
204
+ return (fp, res["compressed"], res.get("original_tokens"),
205
+ res.get("compressed_tokens"), None)
193
206
  except Exception as e:
194
- return fp, None, str(e)
207
+ return fp, None, None, None, str(e)
195
208
 
196
209
  with ThreadPoolExecutor(max_workers=min(len(paths), 8)) as pool:
197
210
  futures = {pool.submit(_do_one, fp): fp for fp in paths}
198
211
  for fut in as_completed(futures):
199
- fp, compressed, summary = fut.result()
200
- results[fp] = (compressed, summary)
212
+ fp, compressed, raw_tok, opt_tok, err = fut.result()
213
+ results[fp] = (compressed, raw_tok, opt_tok, err)
201
214
 
215
+ ratios = show_token_ratios(svc)
202
216
  parts = []
203
217
  total_ok = 0
218
+ total_raw = 0
219
+ total_opt = 0
220
+ measured = 0
204
221
  for fp in paths:
205
- compressed, summary = results.get(fp, (None, "unknown"))
222
+ compressed, raw_tok, opt_tok, err = results.get(fp, (None, None, None, "unknown"))
206
223
  if compressed:
207
- parts.append(f"## {fp} ({summary})\n{compressed}")
224
+ tag = ""
225
+ if raw_tok is not None and opt_tok is not None:
226
+ measured += 1
227
+ total_raw += raw_tok
228
+ total_opt += opt_tok
229
+ if ratios:
230
+ tag = f" ({raw_tok}->{opt_tok}tok)"
231
+ parts.append(f"## {fp}{tag}\n{compressed}")
208
232
  total_ok += 1
209
233
  else:
210
- parts.append(f"## {fp} — ERROR: {summary}")
234
+ parts.append(f"## {fp} — ERROR: {err}")
211
235
 
212
236
  header = f"[compress:batch] {total_ok}/{len(paths)} files ({mode})"
213
237
  body = header + "\n\n" + "\n\n".join(parts)
214
- return finalize("c3_compress", {"file_path": ",".join(paths), "mode": mode, "batch": True},
215
- body, f"batch {total_ok}/{len(paths)}")
238
+ return finalize_with_tokens(
239
+ finalize, svc, "c3_compress",
240
+ {"file_path": ",".join(paths), "mode": mode, "batch": True},
241
+ body, f"batch {total_ok}/{len(paths)}",
242
+ raw_tokens=total_raw if measured else None,
243
+ optimized_tokens=total_opt if measured else None)
cli/tools/delegate.py CHANGED
@@ -17,6 +17,7 @@ from pathlib import Path
17
17
 
18
18
  from core import count_tokens
19
19
  from services.circuit_breaker import CircuitBreaker
20
+ from services.win_subprocess import harden_win_argv
20
21
 
21
22
  log = logging.getLogger(__name__)
22
23
 
@@ -220,7 +221,7 @@ def _run_claude(task: str, context: str, cwd: str | None = None,
220
221
  cmd = [exe, "-p", prompt, "--output-format", "text"]
221
222
  try:
222
223
  proc = subprocess.Popen(
223
- cmd,
224
+ harden_win_argv(cmd),
224
225
  stdout=subprocess.PIPE, stderr=subprocess.PIPE,
225
226
  stdin=subprocess.DEVNULL,
226
227
  text=True, encoding="utf-8", errors="replace", cwd=cwd,
@@ -466,7 +467,7 @@ def _run_gemini(task: str, context: str, model: str,
466
467
 
467
468
  try:
468
469
  proc = subprocess.Popen(
469
- cmd,
470
+ harden_win_argv(cmd),
470
471
  stdout=subprocess.PIPE, stderr=subprocess.PIPE,
471
472
  stdin=subprocess.DEVNULL,
472
473
  text=True, encoding="utf-8", errors="replace",
@@ -581,7 +582,7 @@ def _run_codex(task: str, context: str, model: str, sandbox: str,
581
582
  ]
582
583
  try:
583
584
  proc = subprocess.Popen(
584
- cmd,
585
+ harden_win_argv(cmd),
585
586
  stdout=subprocess.PIPE, stderr=subprocess.PIPE,
586
587
  stdin=subprocess.DEVNULL,
587
588
  text=True, encoding="utf-8", errors="replace",
cli/tools/filter.py CHANGED
@@ -12,6 +12,8 @@ from pathlib import Path
12
12
 
13
13
  from core import count_tokens
14
14
 
15
+ from cli.tools._helpers import finalize_with_tokens, show_token_ratios
16
+
15
17
 
16
18
  def handle_filter(file_path: str, text: str, pattern: str, max_lines: int,
17
19
  depth: str, use_llm: bool, svc, finalize) -> str:
@@ -62,13 +64,19 @@ def _filter_text(text: str, depth: str, svc, finalize) -> str:
62
64
 
63
65
  filtered_tokens = count_tokens(result_text)
64
66
  raw_tokens = res['raw_tokens']
65
- savings_pct = round((1 - filtered_tokens / raw_tokens) * 100, 1) if raw_tokens > 0 else 0
66
67
 
67
- header = f"[filter:{method}] {raw_tokens}→{filtered_tokens}tok ({savings_pct}%saved)"
68
+ # The method tag is actionable signal (which pass ran); the token ratio is
69
+ # boilerplate — shown only under the show_token_ratios debug flag. The
70
+ # (raw, filtered) pair still flows to accounting structurally below.
71
+ header = f"[filter:{method}]"
72
+ if show_token_ratios(svc):
73
+ savings_pct = round((1 - filtered_tokens / raw_tokens) * 100, 1) if raw_tokens > 0 else 0
74
+ header = f"[filter:{method}] {raw_tokens}→{filtered_tokens}tok ({savings_pct}%saved)"
68
75
  resp = f"{header}\n{result_text}"
69
- return finalize("c3_filter", {"depth": depth},
70
- resp, f"{raw_tokens}→{filtered_tokens}tok",
71
- response_tokens=filtered_tokens)
76
+ return finalize_with_tokens(
77
+ finalize, svc, "c3_filter", {"depth": depth}, resp, method,
78
+ raw_tokens=raw_tokens, optimized_tokens=filtered_tokens,
79
+ response_tokens=filtered_tokens)
72
80
 
73
81
 
74
82
  def _heuristic_collapse(text: str) -> str | None:
@@ -279,7 +287,12 @@ def _filter_file(full: Path, file_path: str, pattern: str, max_lines: int,
279
287
  extracted = "\n".join(lines[:max_lines])
280
288
 
281
289
  res_tok = count_tokens(extracted)
282
- saved = round((1 - res_tok / orig_tok) * 100) if orig_tok > 0 else 0
283
- return finalize("c3_filter", {"file": file_path, "pattern": pattern},
284
- f"[extract:{ext}] {orig_tok}->{res_tok}tok ({saved}% saved)\n{extracted}",
285
- f"{orig_tok}->{res_tok}tok")
290
+ header = f"[extract:{ext}]"
291
+ if show_token_ratios(svc):
292
+ saved = round((1 - res_tok / orig_tok) * 100) if orig_tok > 0 else 0
293
+ header = f"[extract:{ext}] {orig_tok}->{res_tok}tok ({saved}% saved)"
294
+ return finalize_with_tokens(
295
+ finalize, svc, "c3_filter", {"file": file_path, "pattern": pattern},
296
+ f"{header}\n{extracted}", "extract",
297
+ raw_tokens=orig_tok, optimized_tokens=res_tok,
298
+ response_tokens=res_tok)
cli/tools/memory.py CHANGED
@@ -3,7 +3,8 @@ from datetime import datetime, timezone
3
3
 
4
4
 
5
5
  def handle_memory(action: str, query: str, fact: str, category: str,
6
- top_k: int, svc, finalize, fact_id: str = "") -> str:
6
+ top_k: int, svc, finalize, fact_id: str = "",
7
+ include_scores: bool = False) -> str:
7
8
  if action == "add":
8
9
  if not fact or not fact.strip():
9
10
  return finalize("c3_memory", {"action": action},
@@ -17,8 +18,9 @@ def handle_memory(action: str, query: str, fact: str, category: str,
17
18
  if action == "recall":
18
19
  session_id = (svc.session_mgr.current_session or {}).get("id", "")
19
20
  results = svc.memory.recall(query, top_k=top_k, session_id=session_id)
20
- # Small recalls skip scoring + graph spreading to stay fast —
21
- # agents using top_k<=3 want quick lookups, not full enrichment.
21
+ # Small recalls skip graph spreading to stay fast — agents using
22
+ # top_k<=3 want quick lookups, not full enrichment. (Salience scoring
23
+ # is opt-in via include_scores, independent of this.)
22
24
  fast_mode = top_k <= 3
23
25
  backend = "tfidf"
24
26
  if svc.vector_store:
@@ -43,9 +45,11 @@ def handle_memory(action: str, query: str, fact: str, category: str,
43
45
  if len(recalled_ids) >= 2:
44
46
  graph.record_co_recall(recalled_ids[:top_k])
45
47
 
46
- # Enrich results with salience scores (skipped in fast_mode)
48
+ # Enrich results with salience scores opt-in only. Per-fact scores
49
+ # on every recall were display boilerplate; callers who want them ask
50
+ # via include_scores=True (explicit request overrides fast_mode).
47
51
  scorer = getattr(svc, "memory_scorer", None)
48
- if scorer and not fast_mode:
52
+ if scorer and include_scores:
49
53
  for r in results:
50
54
  if r.get("id"):
51
55
  s = scorer.score(r, graph)
@@ -75,7 +79,8 @@ def handle_memory(action: str, query: str, fact: str, category: str,
75
79
  f"[memory:recall:{query}] 0 results (backend:{backend})", "0")
76
80
  parts = []
77
81
  for f in results[:top_k]:
78
- sal = f" sal={f['salience']:.2f}/{f['tier']}" if f.get("salience") is not None else ""
82
+ sal = (f" sal={f['salience']:.2f}/{f['tier']}"
83
+ if include_scores and f.get("salience") is not None else "")
79
84
  parts.append(f"[{f['category']}]{sal} {f['fact']}")
80
85
  if activated_extra:
81
86
  parts.append(f" [graph:activated] {len(activated_extra)} related facts:")
cli/tools/search.py CHANGED
@@ -7,6 +7,8 @@ from pathlib import Path
7
7
 
8
8
  from core import count_tokens
9
9
 
10
+ from cli.tools._helpers import finalize_with_tokens, show_token_ratios
11
+
10
12
  # Hard cap: responses above this are truncated to avoid filling context.
11
13
  _RESPONSE_TOKEN_CAP = 2400
12
14
 
@@ -173,6 +175,7 @@ def _transcript_search(query, top_k, max_tokens, svc, finalize):
173
175
  srcs = ",".join(sorted(available_names))
174
176
  return finalize("c3_search", {"action": "transcript"},
175
177
  f"[transcript:{query}] 0 results sources:{srcs}", "0")
178
+ ratios = show_token_ratios(svc)
176
179
  parts = []
177
180
  total_tokens = 0
178
181
  emitted = 0
@@ -188,14 +191,23 @@ def _transcript_search(query, top_k, max_tokens, svc, finalize):
188
191
  ts_str = ""
189
192
  source = r.get("source") or r.get("turn_source") or "manual"
190
193
  role = r.get("role", "")
191
- session_id = r.get("session_id", "")
192
- header = f"--- {source}:{session_id} [{ts_str}] role:{role} score:{r['score']}"
194
+ session_id = str(r.get("session_id", ""))
195
+ if ratios:
196
+ # Debug view: full session id + relevance score (old header).
197
+ header = f"--- {source}:{session_id} [{ts_str}] role:{role} score:{r['score']}"
198
+ else:
199
+ # Minimal per-item header — full UUIDs and scores were ~40 tokens
200
+ # of boilerplate per result the model does nothing with.
201
+ header = f"--- {source}:{session_id[:8]} {ts_str} {role}".rstrip()
193
202
  text = r.get("text", "")
194
203
  parts.extend([header, text])
195
204
  emitted += 1
196
205
  if emitted >= top_k:
197
206
  break
198
- resp = f"[transcript:{query}] {emitted}r,{total_tokens}tok\n" + "\n".join(parts)
207
+ head = f"[transcript:{query}] {emitted}r"
208
+ if ratios:
209
+ head += f",{total_tokens}tok"
210
+ resp = head + "\n" + "\n".join(parts)
199
211
  return finalize("c3_search", {"action": "transcript"}, resp, f"{emitted}r")
200
212
 
201
213
 
@@ -221,8 +233,10 @@ def _semantic_search(query, top_k, max_tokens, svc, finalize, maybe_facts):
221
233
 
222
234
  resp = "\n".join(lines)
223
235
  resp += maybe_facts(svc, query, top_k=2)
224
- return finalize("c3_search", {"query": query, "action": "semantic"}, resp,
225
- f"{len(results)}r,{total_tokens}tok", response_tokens=total_tokens)
236
+ return finalize_with_tokens(
237
+ finalize, svc, "c3_search", {"query": query, "action": "semantic"}, resp,
238
+ f"{len(results)}r",
239
+ optimized_tokens=total_tokens, response_tokens=total_tokens)
226
240
 
227
241
 
228
242
  def _code_search(query, top_k, max_tokens, svc, finalize, maybe_facts):
@@ -255,10 +269,14 @@ def _code_search(query, top_k, max_tokens, svc, finalize, maybe_facts):
255
269
 
256
270
  resp = "\n".join(lines)
257
271
  resp += maybe_facts(svc, query, top_k=2)
272
+ # Structured accounting: the (full-read baseline, returned) pair flows via
273
+ # record_tool_tokens() instead of being regex-scraped from the summary.
258
274
  full_tokens = sum(r.get("file_tokens", r["tokens"]) for r in deduped)
259
- summary = f"{full_tokens}->{total_tokens}tok" if total_tokens < full_tokens else f"{len(deduped)}r"
260
- return finalize("c3_search", {"query": query, "top_k": top_k}, resp, summary,
261
- response_tokens=total_tokens)
275
+ return finalize_with_tokens(
276
+ finalize, svc, "c3_search", {"query": query, "top_k": top_k}, resp,
277
+ f"{len(deduped)}r",
278
+ raw_tokens=full_tokens, optimized_tokens=total_tokens,
279
+ response_tokens=total_tokens)
262
280
 
263
281
 
264
282
  def _append_prefetch(resp: str, query: str, top_k: int, svc) -> str:
cli/tools/status.py CHANGED
@@ -89,16 +89,31 @@ def _budget_view(svc, detailed, finalize):
89
89
  if c3_calls + native_calls > 0:
90
90
  lines.append(f"[c3_adoption] {adoption}% ({c3_calls}c3/{native_calls}native)")
91
91
 
92
- # Per-tool token breakdown
92
+ # Per-tool token breakdown — adaptive: only tools actually used this
93
+ # session (non-zero tokens), no fixed-width padding.
93
94
  by_tool = snap.get("by_tool", {})
94
- if by_tool:
95
- sorted_tools = sorted(by_tool.items(), key=lambda x: -x[1])
96
- shown = sorted_tools[:6]
95
+ used_tools = sorted(((n, t) for n, t in by_tool.items() if t > 0),
96
+ key=lambda x: -x[1])
97
+ if used_tools:
98
+ shown = used_tools[:6]
97
99
  breakdown = " | ".join(f"{n}:{t}tok" for n, t in shown)
98
- if len(sorted_tools) > 6:
99
- breakdown += f" (+{len(sorted_tools) - 6} more)"
100
+ if len(used_tools) > 6:
101
+ breakdown += f" (+{len(used_tools) - 6} more)"
100
102
  lines.append(f"[breakdown] {breakdown}")
101
103
 
104
+ # ONE aggregate savings line — the session-level story lives here, not in
105
+ # per-call response headers. Fed by structured record_tool_tokens()
106
+ # accounting; labeled honestly (full-read baseline is a counterfactual).
107
+ try:
108
+ usage = (svc.session_mgr.current_session or {}).get("token_usage") or {}
109
+ saved = int(usage.get("estimated_saved_vs_full_read", 0) or 0)
110
+ ops = int(usage.get("measured_ops", 0) or 0)
111
+ if saved > 0:
112
+ lines.append(f"[savings] ~{format_token_count(saved)} est. saved "
113
+ f"vs full-read baseline ({ops} measured ops)")
114
+ except Exception:
115
+ pass
116
+
102
117
  if detailed:
103
118
  stats = svc.indexer.get_stats()
104
119
  lines.append(f"[index] files:{stats['files_indexed']} "
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-context-control
3
- Version: 2.42.0
3
+ Version: 2.43.0
4
4
  Summary: Local code-intelligence layer for AI coding tools (Claude Code, Codex, Gemini, Copilot). Retrieve less, read less, edit safer.
5
5
  Author-email: Dimitri Tselenchuk <dtselenc@gmail.com>
6
6
  License-Expression: Apache-2.0