code-context-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. code_context_engine-0.4.0.dist-info/METADATA +389 -0
  2. code_context_engine-0.4.0.dist-info/RECORD +63 -0
  3. code_context_engine-0.4.0.dist-info/WHEEL +5 -0
  4. code_context_engine-0.4.0.dist-info/entry_points.txt +4 -0
  5. code_context_engine-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. code_context_engine-0.4.0.dist-info/top_level.txt +1 -0
  7. context_engine/__init__.py +3 -0
  8. context_engine/cli.py +2848 -0
  9. context_engine/cli_style.py +66 -0
  10. context_engine/compression/__init__.py +0 -0
  11. context_engine/compression/compressor.py +144 -0
  12. context_engine/compression/ollama_client.py +33 -0
  13. context_engine/compression/output_rules.py +77 -0
  14. context_engine/compression/prompts.py +9 -0
  15. context_engine/compression/quality.py +37 -0
  16. context_engine/config.py +198 -0
  17. context_engine/dashboard/__init__.py +0 -0
  18. context_engine/dashboard/_page.py +1548 -0
  19. context_engine/dashboard/server.py +429 -0
  20. context_engine/editors.py +265 -0
  21. context_engine/event_bus.py +24 -0
  22. context_engine/indexer/__init__.py +0 -0
  23. context_engine/indexer/chunker.py +147 -0
  24. context_engine/indexer/embedder.py +154 -0
  25. context_engine/indexer/embedding_cache.py +168 -0
  26. context_engine/indexer/git_hooks.py +73 -0
  27. context_engine/indexer/git_indexer.py +136 -0
  28. context_engine/indexer/ignorefile.py +96 -0
  29. context_engine/indexer/manifest.py +78 -0
  30. context_engine/indexer/pipeline.py +624 -0
  31. context_engine/indexer/secrets.py +332 -0
  32. context_engine/indexer/watcher.py +109 -0
  33. context_engine/integration/__init__.py +0 -0
  34. context_engine/integration/bootstrap.py +76 -0
  35. context_engine/integration/git_context.py +132 -0
  36. context_engine/integration/mcp_server.py +1825 -0
  37. context_engine/integration/session_capture.py +306 -0
  38. context_engine/memory/__init__.py +6 -0
  39. context_engine/memory/compressor.py +344 -0
  40. context_engine/memory/db.py +922 -0
  41. context_engine/memory/extractive.py +106 -0
  42. context_engine/memory/grammar.py +419 -0
  43. context_engine/memory/hook_installer.py +258 -0
  44. context_engine/memory/hook_server.py +83 -0
  45. context_engine/memory/hooks.py +327 -0
  46. context_engine/memory/migrate.py +268 -0
  47. context_engine/models.py +96 -0
  48. context_engine/pricing.py +104 -0
  49. context_engine/project_commands.py +296 -0
  50. context_engine/retrieval/__init__.py +0 -0
  51. context_engine/retrieval/confidence.py +47 -0
  52. context_engine/retrieval/query_parser.py +105 -0
  53. context_engine/retrieval/retriever.py +199 -0
  54. context_engine/serve_http.py +208 -0
  55. context_engine/services.py +252 -0
  56. context_engine/storage/__init__.py +0 -0
  57. context_engine/storage/backend.py +39 -0
  58. context_engine/storage/fts_store.py +112 -0
  59. context_engine/storage/graph_store.py +219 -0
  60. context_engine/storage/local_backend.py +109 -0
  61. context_engine/storage/remote_backend.py +117 -0
  62. context_engine/storage/vector_store.py +357 -0
  63. context_engine/utils.py +72 -0
@@ -0,0 +1,1825 @@
1
+ """MCP server exposing context engine tools to Claude Code."""
2
+ import json
3
+ import logging
4
+ import re
5
+ import sqlite3
6
+ import threading
7
+ from pathlib import Path
8
+
9
+ from context_engine.utils import atomic_write_text as _atomic_write_text
10
+
11
+ from mcp.server import Server
12
+ from mcp.types import Tool, TextContent
13
+
14
+ from context_engine.compression.output_rules import (
15
+ ADVERTISED_PCT,
16
+ ESTIMATED_AVG_REPLY_TOKENS,
17
+ get_output_rules,
18
+ get_level_description,
19
+ LEVELS,
20
+ )
21
+ from context_engine.integration.bootstrap import BootstrapBuilder
22
+ from context_engine.integration.git_context import (
23
+ get_recent_commits,
24
+ get_recently_modified_files,
25
+ get_working_state,
26
+ )
27
+ from context_engine.integration.session_capture import SessionCapture
28
+ from context_engine.memory import db as memory_db
29
+ from context_engine.memory.extractive import extractive_summary
30
+ from context_engine.memory.grammar import (
31
+ compress as _grammar_compress,
32
+ compress_with_counts as _grammar_compress_counted,
33
+ expand as _grammar_expand,
34
+ DEFAULT_LEVEL as _GRAMMAR_LEVEL,
35
+ )
36
+
37
+ log = logging.getLogger(__name__)
38
+
39
+ _CHARS_PER_TOKEN = 4
40
+ # JSON-heavy text (tool_event_payloads.raw_input/raw_output) tokenises at
41
+ # ~6 chars/token because the structural noise (braces, commas, quoted keys)
42
+ # packs more chars into a single token than prose does. Used for
43
+ # progressive_disclosure baseline so the counterfactual ("what dumping all
44
+ # raw payloads would have cost") doesn't over-claim by ~50%.
45
+ _JSON_CHARS_PER_TOKEN = 6
46
+ _MAX_QUERY_CHARS = 10_000
47
+ _MAX_TOP_K = 100
48
+ # Search up to this many recent session files when recalling decisions.
49
+ # Older files past this window are silently dropped — see roadmap item
50
+ # "persistent session search across projects" for how this should evolve.
51
+ _SESSION_RECALL_WINDOW = 50
52
+ # Minimum cosine similarity for a JSON-history entry to qualify as a topic
53
+ # match. bge-small's noise floor on short English is ~0.50 (a random off-
54
+ # topic query against trading decisions hits 0.535), so anything below ~0.55
55
+ # is statistical noise. 0.55 keeps real paraphrase matches (~0.59-0.65) and
56
+ # rejects "how is the weather today" against unrelated decisions.
57
+ _SESSION_RECALL_MIN_SIM = 0.55
58
+
59
+
60
+ def _count_tokens(text: str) -> int:
61
+ return max(1, len(text) // _CHARS_PER_TOKEN)
62
+
63
+
64
+ def _cosine_sim(a, b) -> float:
65
+ """Cosine similarity between two equal-length numeric sequences. Returns 0
66
+ on degenerate input (zero norm) instead of NaN.
67
+
68
+ Length mismatch returns 0 and logs at debug — the embedder always returns
69
+ fixed-dimension vectors, so a mismatch means something is wrong upstream
70
+ (model swap mid-process, corrupted cached vector). We prefer "no match"
71
+ over a silently truncated similarity that zip()'d to the shorter length.
72
+ """
73
+ if len(a) != len(b):
74
+ log.debug("_cosine_sim length mismatch: %d vs %d", len(a), len(b))
75
+ return 0.0
76
+ dot = 0.0
77
+ na = 0.0
78
+ nb = 0.0
79
+ for x, y in zip(a, b):
80
+ dot += x * y
81
+ na += x * x
82
+ nb += y * y
83
+ if na <= 0.0 or nb <= 0.0:
84
+ return 0.0
85
+ return dot / (na**0.5 * nb**0.5)
86
+
87
+
88
+ def _clamp_top_k(value, default: int = 10) -> int:
89
+ try:
90
+ n = int(value)
91
+ except (TypeError, ValueError):
92
+ return default
93
+ return max(1, min(n, _MAX_TOP_K))
94
+
95
+
96
+ def _clamp_int(value, *, default: int, lo: int, hi: int) -> int:
97
+ if value is None or value == "":
98
+ return default
99
+ try:
100
+ n = int(value)
101
+ except (TypeError, ValueError):
102
+ return default
103
+ return max(lo, min(n, hi))
104
+
105
+
106
+ _FTS_RECALL_LIMIT = 100
107
+ _VEC_RECALL_K = 30
108
+ # Display cap for session_recall body. Empirically the RRF score drops
109
+ # sharply after rank ~5 across decisions/turns/code-areas, so showing 20
110
+ # matches dilutes the response with low-signal entries that aren't worth
111
+ # the tokens.
112
+ #
113
+ # Tunable via CCE_RECALL_DISPLAY_CAP (positive integer). Power users with
114
+ # a mature decisions corpus may want to raise this if the rank 7-20 tail
115
+ # carries useful matches in their workflow — the previous default was 20.
116
+ # Invalid values fall back to the default so a typo can't break recall.
117
+ def _recall_display_cap() -> int:
118
+ import os
119
+ raw = os.environ.get("CCE_RECALL_DISPLAY_CAP")
120
+ if raw is None:
121
+ return 7
122
+ try:
123
+ n = int(raw)
124
+ return n if n > 0 else 7
125
+ except ValueError:
126
+ return 7
127
+ # Read-time cap on session_event payloads. Inputs already have a 4 KB write
128
+ # cap (compressor._TOOL_INPUT_CHAR_CAP) but outputs are stored uncapped, so a
129
+ # 50 KB Bash stdout would re-feed ~12 k tokens on every fetch without this.
130
+ _EVENT_PAYLOAD_READ_CAP = 4_000
131
+ # RRF (reciprocal rank fusion) constant. 60 is the canonical value from the
132
+ # Cormack/Clarke/Buettcher 2009 paper — small enough that early ranks
133
+ # dominate, large enough to keep the late tail relevant.
134
+ _RRF_K = 60
135
+ # How many of the top RRF-ranked candidates to feed the extractive summariser
136
+ # for the TL;DR header. More = broader summary, slower extract.
137
+ _TLDR_TOP_N = 10
138
+ # Strip the "[decision src=...|sid:...] " style prefix the formatter adds —
139
+ # the summariser should see the actual content, not our metadata tags.
140
+ _TAG_PREFIX_RE = re.compile(r"^\[[^\]]*\]\s*")
141
+ # Strip the trailing " · 5m ago · → session_timeline(\"abc\")" affordance the
142
+ # formatter appends. Used for dedup so the same decision rendered with vs.
143
+ # without the affordance (e.g. JSON-history vs. memory.db dual-write) collapses.
144
+ _AFFORDANCE_TAIL_RE = re.compile(r"\s*·\s+(?:just now|\d+[mhdy]o? ago|\d+[mhd] ago)(?:\s*·\s+→\s+session_(?:timeline|event)\([^)]*\))?\s*$")
145
+
146
+
147
+ def _strip_tag(text: str) -> str:
148
+ return _TAG_PREFIX_RE.sub("", text)
149
+
150
+
151
+ def _content_key(text: str) -> str:
152
+ """Stable dedup key for a recall match.
153
+
154
+ Strips:
155
+ 1. The `[tag]` prefix the formatter adds.
156
+ 2. The " · 5m ago · → session_timeline(...)" affordance suffix.
157
+ 3. Articles (via grammar.compress at lite level), so the SAME decision
158
+ stored compressed in memory.db and stored raw in JSON history
159
+ collapses to one canonical key. Without (3), `_handle_record_decision`
160
+ dual-writes produce two recall hits in the dual-write window — one
161
+ from memory.db ("Adopt JWT") and one from JSON ("Adopt the JWT") —
162
+ that look distinct to RRF but are the same decision.
163
+ """
164
+ body = _TAG_PREFIX_RE.sub("", text)
165
+ body = _AFFORDANCE_TAIL_RE.sub("", body)
166
+ return _grammar_compress(body.strip(), level="lite")
167
+
168
+
169
+ def _humanise_relative_time(epoch: int | None) -> str:
170
+ """Best-effort "3d ago" / "5m ago" string. Empty on bad/missing input.
171
+
172
+ Only surfaces what's helpful to the model — sub-minute deltas would be
173
+ noise on a recall hit, so we round to minute granularity at minimum.
174
+ """
175
+ if epoch is None:
176
+ return ""
177
+ import time as _time
178
+ try:
179
+ delta = max(0, int(_time.time()) - int(epoch))
180
+ except (TypeError, ValueError):
181
+ return ""
182
+ if delta < 60:
183
+ return "just now"
184
+ if delta < 3600:
185
+ return f"{delta // 60}m ago"
186
+ if delta < 86_400:
187
+ return f"{delta // 3600}h ago"
188
+ if delta < 30 * 86_400:
189
+ return f"{delta // 86_400}d ago"
190
+ if delta < 365 * 86_400:
191
+ return f"{delta // (30 * 86_400)}mo ago"
192
+ return f"{delta // (365 * 86_400)}y ago"
193
+
194
+
195
+ def _truncate_payload(text: str | None, cap: int) -> str:
196
+ """Trim a captured tool payload at read time. Empty NULL → placeholder."""
197
+ if text is None:
198
+ return "<no value>"
199
+ if len(text) <= cap:
200
+ return text
201
+ suffix = f"\n…[truncated, {len(text) - cap} more chars]"
202
+ return text[:cap] + suffix
203
+
204
+
205
+ def _rrf_merge(*ranked_lists: list[str], top: int) -> list[str]:
206
+ """Reciprocal rank fusion of multiple ranked lists.
207
+
208
+ Each input list is `[item_at_rank_0, item_at_rank_1, ...]`. Items in
209
+ common across lists rise; items in only one list still surface
210
+ proportional to their rank there. Returns up to `top` items.
211
+
212
+ Dedup key strips both the [tag] prefix *and* the affordance tail
213
+ (" · 5m ago · → session_timeline(...)"), so the same decision rendered
214
+ through multiple paths (memory.db with hints + JSON history without)
215
+ collapses into a single boosted entry instead of inflating recall.
216
+ """
217
+ scores: dict[str, float] = {}
218
+ repr_for_key: dict[str, str] = {}
219
+ for items in ranked_lists:
220
+ for rank, item in enumerate(items):
221
+ key = _content_key(item)
222
+ scores[key] = scores.get(key, 0.0) + 1.0 / (_RRF_K + rank + 1)
223
+ # Prefer the richer-rendered form (with affordance hints) when
224
+ # multiple paths produced the same decision — same key, different
225
+ # text. The hint-bearing form is strictly more useful to the agent.
226
+ existing = repr_for_key.get(key)
227
+ if existing is None or (
228
+ len(item) > len(existing) and " · " in item
229
+ ):
230
+ repr_for_key[key] = item
231
+ ordered_keys = sorted(scores, key=lambda k: scores[k], reverse=True)[:top]
232
+ return [repr_for_key[k] for k in ordered_keys]
233
+
234
+
235
+ # Conservative function-word list. We strip these from FTS5 queries so that
236
+ # `is OR the OR today OR we OR can` doesn't match every decision in the
237
+ # corpus. Restricted to genuine grammatical glue — articles, auxiliaries,
238
+ # pronouns, prepositions, conjunctions, common interrogatives. Topic words
239
+ # (code, auth, database, improve, scale, etc.) are NOT in this list.
240
+ #
241
+ # Vec search still runs in parallel against the original query, so even if
242
+ # every token is filtered out, semantic recall still surfaces matches.
243
+ _FTS_STOP_WORDS = frozenset({
244
+ # articles / determiners
245
+ "a", "an", "the", "this", "that", "these", "those", "some", "any",
246
+ "no", "all", "both", "each", "every", "other", "another", "such",
247
+ # auxiliaries / modals
248
+ "is", "are", "was", "were", "be", "been", "being", "am",
249
+ "have", "has", "had", "having",
250
+ "do", "does", "did", "doing", "done",
251
+ "will", "would", "shall", "should", "can", "could", "may", "might",
252
+ "must", "ought",
253
+ # pronouns
254
+ "i", "we", "you", "he", "she", "it", "they", "me", "us", "him", "her",
255
+ "them", "my", "our", "your", "his", "its", "their", "mine", "ours",
256
+ "yours", "hers", "theirs", "myself", "ourselves", "yourself",
257
+ "yourselves", "himself", "herself", "itself", "themselves",
258
+ # prepositions / conjunctions
259
+ "of", "in", "on", "at", "to", "for", "with", "by", "from", "as",
260
+ "into", "onto", "upon", "about", "above", "below", "under", "over",
261
+ "between", "among", "through", "during", "before", "after", "since",
262
+ "until", "while", "and", "or", "but", "nor", "so", "if", "than",
263
+ "then", "because", "though", "although", "unless",
264
+ # interrogatives / proforms
265
+ "how", "what", "when", "where", "which", "who", "whom", "whose",
266
+ "why", "here", "there",
267
+ # filler / generic time words
268
+ "just", "only", "even", "also", "very", "too", "still", "now",
269
+ "today", "tomorrow", "yesterday",
270
+ # common verbs that carry no topic signal in this domain
271
+ "get", "got", "make", "made", "go", "went", "see", "saw", "let",
272
+ })
273
+
274
+
275
+ def _strip_stop_words(topic: str) -> str:
276
+ """Return `topic` with function words removed; falls back to the
277
+ original if every token is a stop word (rare)."""
278
+ tokens = [t.strip().lower() for t in topic.split() if t.strip()]
279
+ content = [t for t in tokens if t not in _FTS_STOP_WORDS]
280
+ return " ".join(content) if content else topic
281
+
282
+
283
+ def _fts_match_query(topic: str) -> str:
284
+ """Build a safe FTS5 MATCH query from `topic` — OR of phrase-quoted
285
+ *content* tokens (function words like "is/the/today/we/can" stripped).
286
+
287
+ Returns "" when the topic has no usable content tokens left; callers
288
+ skip the FTS query in that case rather than passing an empty MATCH
289
+ (FTS5 would raise). When this happens, the vec semantic-search path
290
+ still runs against the original query string, so meaning isn't lost.
291
+ """
292
+ content = _strip_stop_words(topic).split()
293
+ if not content:
294
+ return ""
295
+ safe = ['"' + t.replace('"', '""') + '"' for t in content]
296
+ return " OR ".join(safe)
297
+
298
+
299
+ def _split_inline_overflow(
300
+ chunks: list, max_tokens: int
301
+ ) -> tuple[list, list]:
302
+ """Split chunks into inline (fits budget) and overflow (references only)."""
303
+ inline: list = []
304
+ overflow: list = []
305
+ budget = max_tokens
306
+ for chunk in chunks:
307
+ served_text = chunk.compressed_content or chunk.content
308
+ chunk_tokens = _count_tokens(served_text)
309
+ if chunk_tokens <= budget:
310
+ inline.append(chunk)
311
+ budget -= chunk_tokens
312
+ else:
313
+ overflow.append(chunk)
314
+ return inline, overflow
315
+
316
+
317
+ def _format_results_with_overflow(inline_chunks: list, overflow_chunks: list) -> str:
318
+ """Format inline results and append compact overflow references."""
319
+ parts = []
320
+ for chunk in inline_chunks:
321
+ served_text = chunk.compressed_content or chunk.content
322
+ parts.append(
323
+ f"[{chunk.file_path}:{chunk.start_line}] "
324
+ f"(confidence: {chunk.confidence_score:.2f})\n{served_text}"
325
+ )
326
+
327
+ if overflow_chunks:
328
+ lines = [
329
+ f"\n---\n{len(overflow_chunks)} more result(s) available "
330
+ f"(not shown to save tokens):"
331
+ ]
332
+ for chunk in overflow_chunks:
333
+ lines.append(
334
+ f' expand_chunk(chunk_id="{chunk.id}") '
335
+ f"→ {chunk.file_path}:{chunk.start_line} "
336
+ f"(confidence: {chunk.confidence_score:.2f})"
337
+ )
338
+ parts.append("\n".join(lines))
339
+
340
+ return "\n\n---\n\n".join(parts) if parts else "No results found."
341
+
342
+
343
+ class ContextEngineMCP:
344
+ TOOL_NAMES = [
345
+ "context_search",
346
+ "expand_chunk",
347
+ "related_context",
348
+ "session_recall",
349
+ "session_timeline",
350
+ "session_event",
351
+ "record_decision",
352
+ "record_code_area",
353
+ "index_status",
354
+ "reindex",
355
+ "set_output_compression",
356
+ ]
357
+
358
+ def __init__(self, retriever, backend, compressor, embedder, config) -> None:
359
+ self._retriever = retriever
360
+ self._backend = backend
361
+ self._compressor = compressor
362
+ self._embedder = embedder
363
+ self._config = config
364
+ # Propagate the PII-redaction toggle to the memory module's
365
+ # process-global state. Done at MCPServer boot — the compressor
366
+ # and migrate paths read from the same module-level flag.
367
+ memory_db.set_pii_redaction(getattr(config, "memory_redact_pii", True))
368
+ self._server = Server("code-context-engine")
369
+
370
+ project_name = Path.cwd().name
371
+ self._project_name = project_name
372
+ self._project_dir = str(Path.cwd())
373
+ self._storage_base = Path(config.storage_path) / project_name
374
+ self._storage_base.mkdir(parents=True, exist_ok=True)
375
+ self._stats_path = self._storage_base / "stats.json"
376
+ self._state_path = self._storage_base / "state.json"
377
+ self._stats = self._load_stats()
378
+
379
+ # `state.json` overrides the config default so `set_output_compression`
380
+ # survives server restarts.
381
+ persisted_state = self._load_state()
382
+ self._output_level = persisted_state.get(
383
+ "output_level", config.output_compression
384
+ )
385
+
386
+ # Session capture — persists decisions and code-area notes across runs.
387
+ # Both the legacy JSON path and the new memory.db path are written to
388
+ # for record_decision / record_code_area; recall queries both. Once a
389
+ # release cycle of dual-write confirms parity, the JSON write side
390
+ # can be retired.
391
+ self._session_capture = SessionCapture(
392
+ sessions_dir=str(self._storage_base / "sessions")
393
+ )
394
+ self._session_id = self._session_capture.start_session(project_name)
395
+ try:
396
+ self._memory_conn = memory_db.connect(
397
+ memory_db.memory_db_path(self._storage_base)
398
+ )
399
+ # Ensure the sessions row exists so dual-writes don't trip the FK.
400
+ # The SessionStart hook normally creates this, but the MCP server
401
+ # may start in environments without hook coverage (e.g. tests).
402
+ import time as _t
403
+ _epoch = int(_t.time())
404
+ self._memory_conn.execute(
405
+ "INSERT OR IGNORE INTO sessions (id, project, started_at_epoch, "
406
+ "started_at, status) VALUES (?, ?, ?, ?, 'active')",
407
+ (self._session_id, project_name, _epoch,
408
+ _t.strftime("%Y-%m-%dT%H:%M:%S", _t.gmtime(_epoch))),
409
+ )
410
+ self._memory_conn.commit()
411
+ # Semantic backfill on a daemon thread — projects with thousands
412
+ # of historical decisions/turns shouldn't pay a multi-second
413
+ # embed-everything stall on every MCP startup. Each thread opens
414
+ # its own connection (sqlite3 enforces check_same_thread).
415
+ self._spawn_vec_backfill()
416
+ except Exception as exc:
417
+ log.warning("memory.db open failed; recall will fall back to JSON: %s", exc)
418
+ self._memory_conn = None
419
+ # Cheap maintenance on start: if the project has accumulated more than
420
+ # _PRUNE_THRESHOLD session files, consolidate the oldest decisions
421
+ # into decisions_log.json and remove the source files. No-op when
422
+ # under threshold (the common case).
423
+ try:
424
+ summary = self._session_capture.prune_old_sessions()
425
+ if summary.get("pruned"):
426
+ log.info(
427
+ "Pruned %d old session files (%d decisions archived)",
428
+ summary["pruned"],
429
+ summary.get("decisions_appended", 0),
430
+ )
431
+ except Exception as exc:
432
+ log.debug("Session prune skipped: %s", exc)
433
+
434
+ # Bootstrap builder — used by the `context-engine-init` prompt handler.
435
+ self._bootstrap = BootstrapBuilder(max_tokens=config.bootstrap_max_tokens)
436
+
437
+ # Lazy indexing flag — triggers on first context_search if index is empty.
438
+ self._lazy_indexed = False
439
+
440
+ self._register_tools()
441
+ self._register_prompts()
442
+
443
+ # ── state / stats persistence ───────────────────────────────────────────
444
+
445
+ def _load_stats(self) -> dict:
446
+ empty_buckets = {
447
+ b: {"baseline": 0, "served": 0, "calls": 0}
448
+ for b in memory_db.BUCKETS
449
+ }
450
+ if self._stats_path.exists():
451
+ try:
452
+ data = json.loads(self._stats_path.read_text())
453
+ # Backfill new keys for stats files written by older versions.
454
+ data.setdefault("queries", 0)
455
+ data.setdefault("raw_tokens", 0)
456
+ data.setdefault("served_tokens", 0)
457
+ data.setdefault("full_file_tokens", 0)
458
+ # v3: per-bucket breakdown. Merge so older files gain any
459
+ # newly-added buckets without losing existing totals.
460
+ buckets = data.get("buckets") or {}
461
+ for name, default in empty_buckets.items():
462
+ b = buckets.get(name) or {}
463
+ buckets[name] = {
464
+ "baseline": int(b.get("baseline", 0)),
465
+ "served": int(b.get("served", 0)),
466
+ "calls": int(b.get("calls", 0)),
467
+ }
468
+ data["buckets"] = buckets
469
+ return data
470
+ except (json.JSONDecodeError, OSError):
471
+ pass
472
+ return {
473
+ "queries": 0,
474
+ "raw_tokens": 0,
475
+ "served_tokens": 0,
476
+ "full_file_tokens": 0,
477
+ "buckets": empty_buckets,
478
+ }
479
+
480
+ def _save_stats(self) -> None:
481
+ try:
482
+ _atomic_write_text(self._stats_path, json.dumps(self._stats))
483
+ except Exception as exc:
484
+ self._append_error_log(f"_save_stats failed: {exc}")
485
+
486
+ def _append_query_log(self) -> None:
487
+ import datetime
488
+ try:
489
+ # Verify the write actually landed
490
+ on_disk = self._stats_path.read_text() if self._stats_path.exists() else "missing"
491
+ log_path = self._storage_base / "query.log"
492
+ q = self._stats["queries"]
493
+ entry = (
494
+ f"{datetime.datetime.now().isoformat()} query #{q} "
495
+ f"stats_written={self._stats_path} "
496
+ f"disk_queries={on_disk} "
497
+ f"cwd={self._project_dir}\n"
498
+ )
499
+ with log_path.open("a") as f:
500
+ f.write(entry)
501
+ except OSError:
502
+ pass
503
+
504
+ def _append_audit_log(
505
+ self,
506
+ *,
507
+ query: str,
508
+ top_k: int,
509
+ served_chunks: list[dict],
510
+ score_range: tuple[float, float] | None,
511
+ ) -> None:
512
+ """Structured audit trail — one JSON line per context_search.
513
+
514
+ Off by default; turned on via config.audit_log_enabled. The query
515
+ text itself is hashed (12-char sha256 prefix), not stored — the
516
+ log answers "what did Claude see and when?" for compliance, not
517
+ "what did the user ask?". Also logs the active output-compression
518
+ level so audits can correlate retrieval with response shape.
519
+ """
520
+ if not getattr(self._config, "audit_log_enabled", False):
521
+ return
522
+ import datetime
523
+ import hashlib
524
+ try:
525
+ query_hash = hashlib.sha256(query.encode("utf-8")).hexdigest()[:12]
526
+ entry = {
527
+ "ts": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds").replace("+00:00", "Z"),
528
+ "session_id": self._session_id,
529
+ "query_hash": query_hash,
530
+ "query_len": len(query),
531
+ "top_k": int(top_k),
532
+ "served": served_chunks,
533
+ "score_range": list(score_range) if score_range else None,
534
+ "output_level": self._output_level,
535
+ }
536
+ audit_path = self._storage_base / "audit.log"
537
+ with audit_path.open("a") as f:
538
+ f.write(json.dumps(entry) + "\n")
539
+ except OSError as exc:
540
+ self._append_error_log(f"_append_audit_log failed: {exc}")
541
+
542
+ def _append_error_log(self, msg: str) -> None:
543
+ import datetime
544
+ try:
545
+ log_path = self._storage_base / "query.log"
546
+ entry = f"{datetime.datetime.now().isoformat()} ERROR {msg}\n"
547
+ with log_path.open("a") as f:
548
+ f.write(entry)
549
+ except OSError:
550
+ pass
551
+
552
+ def _load_state(self) -> dict:
553
+ if self._state_path.exists():
554
+ try:
555
+ return json.loads(self._state_path.read_text())
556
+ except (json.JSONDecodeError, OSError):
557
+ pass
558
+ return {}
559
+
560
+ def _save_state(self) -> None:
561
+ try:
562
+ state = {"output_level": self._output_level}
563
+ _atomic_write_text(self._state_path, json.dumps(state))
564
+ except OSError:
565
+ pass
566
+
567
+ def _spawn_vec_backfill(self) -> None:
568
+ """Run vec-table backfill on a daemon thread with its own DB connection.
569
+
570
+ sqlite3 connections are bound to the thread that opened them, so we
571
+ can't reuse `self._memory_conn` here. The thread opens its own
572
+ connection and closes it when done. Daemon=True means the thread
573
+ won't block process exit.
574
+ """
575
+ storage_base = self._storage_base
576
+ embedder = self._embedder
577
+
578
+ def _runner():
579
+ try:
580
+ conn = memory_db.connect(memory_db.memory_db_path(storage_base))
581
+ try:
582
+ counts = memory_db.backfill_vec_tables(conn, embedder)
583
+ if counts.get("decisions") or counts.get("turn_summaries"):
584
+ log.info("memory.db vec backfill done: %s", counts)
585
+ finally:
586
+ conn.close()
587
+ except Exception:
588
+ log.exception("memory.db vec backfill thread failed")
589
+
590
+ threading.Thread(
591
+ target=_runner, daemon=True, name="cce-vec-backfill"
592
+ ).start()
593
+
594
+ def _record(self, raw_tokens: int, served_tokens: int, full_file_tokens: int = 0) -> None:
595
+ """Legacy retrieval-pipeline writer. Splits into two bucket events:
596
+ retrieval (full_file → raw) and chunk_compression (raw → served),
597
+ so per-bucket attribution matches what `cce savings` displays.
598
+ """
599
+ self._stats["queries"] += 1
600
+ self._stats["raw_tokens"] += raw_tokens
601
+ self._stats["served_tokens"] += served_tokens
602
+ self._stats.setdefault("full_file_tokens", 0)
603
+ self._stats["full_file_tokens"] += full_file_tokens
604
+ if full_file_tokens > 0:
605
+ self._record_bucket("retrieval", full_file_tokens, raw_tokens)
606
+ if raw_tokens > 0:
607
+ self._record_bucket("chunk_compression", raw_tokens, served_tokens)
608
+ # Cover the no-bucket path (raw_tokens == 0) — _record_bucket would
609
+ # have saved otherwise.
610
+ if raw_tokens <= 0 and full_file_tokens <= 0:
611
+ self._save_stats()
612
+ self._append_query_log()
613
+
614
+ def _record_bucket(
615
+ self,
616
+ bucket: str,
617
+ baseline: int,
618
+ served: int,
619
+ meta: dict | None = None,
620
+ ) -> None:
621
+ """Append one savings event to memory.db and the in-memory totals.
622
+
623
+ Best-effort — never raises so a misbehaving instrumentation point
624
+ can't break a tool response. Callers don't need to call _save_stats
625
+ unless they also want the legacy top-level fields refreshed.
626
+ """
627
+ baseline = max(0, int(baseline))
628
+ served = max(0, int(served))
629
+ b = self._stats.setdefault("buckets", {}).setdefault(
630
+ bucket, {"baseline": 0, "served": 0, "calls": 0},
631
+ )
632
+ b["baseline"] += baseline
633
+ b["served"] += served
634
+ b["calls"] += 1
635
+ if self._memory_conn is not None:
636
+ try:
637
+ memory_db.record_savings(
638
+ self._memory_conn,
639
+ bucket=bucket,
640
+ baseline=baseline,
641
+ served=served,
642
+ meta=meta,
643
+ )
644
+ except Exception as exc: # pragma: no cover — defensive
645
+ self._append_error_log(f"_record_bucket({bucket}) failed: {exc}")
646
+ # Persist the in-memory rollup. Cheap (~few hundred bytes JSON write).
647
+ self._save_stats()
648
+
649
+ def _apply_output_compression(self, body: str) -> str:
650
+ """Append the active output-compression directive (if any) and record
651
+ one estimate event for the output_compression bucket. Returns the
652
+ possibly-augmented body. No-op when level == off.
653
+
654
+ Centralised so every tool handler that returns prose to the model
655
+ participates in output compression — not just context_search. Skipping
656
+ a handler means the model's reply to that tool bypasses compression
657
+ entirely, so the bucket undercounts and (worse) real tokens get spent
658
+ that the directive would have shaved.
659
+ """
660
+ if not get_output_rules(self._output_level):
661
+ return body
662
+ out = body + (
663
+ f"\n\n---\n[Respond using {self._output_level} output compression]"
664
+ )
665
+ pct = ADVERTISED_PCT.get(self._output_level, 0.0)
666
+ if pct > 0.0:
667
+ self._record_bucket(
668
+ "output_compression",
669
+ baseline=ESTIMATED_AVG_REPLY_TOKENS,
670
+ served=int(ESTIMATED_AVG_REPLY_TOKENS * (1 - pct)),
671
+ meta={"level": self._output_level},
672
+ )
673
+ return out
674
+
675
+ def get_tool_names(self) -> list[str]:
676
+ return list(self.TOOL_NAMES)
677
+
678
+ # ── tool registration ───────────────────────────────────────────────────
679
+
680
+ def _register_tools(self) -> None:
681
+ @self._server.list_tools()
682
+ async def list_tools():
683
+ return [
684
+ Tool(
685
+ name="context_search",
686
+ description=(
687
+ "PREFERRED tool for ANY question about this project's "
688
+ "code, structure, or behavior. Use INSTEAD OF Read, "
689
+ "Grep, or Glob when exploring the codebase, locating "
690
+ "functions, or answering 'how does X work / where is "
691
+ "Y' questions. Returns the most relevant code chunks "
692
+ "with confidence scores from a hybrid vector + BM25 "
693
+ "index, so you do not pay tokens for files you do not "
694
+ "need. Read should be reserved for opening a known "
695
+ "file path you intend to edit."
696
+ ),
697
+ inputSchema={
698
+ "type": "object",
699
+ "properties": {
700
+ "query": {"type": "string"},
701
+ "top_k": {"type": "integer", "default": 10},
702
+ "max_tokens": {"type": "integer", "default": 8000},
703
+ },
704
+ "required": ["query"],
705
+ },
706
+ ),
707
+ Tool(
708
+ name="expand_chunk",
709
+ description="Get the full original content for a compressed chunk",
710
+ inputSchema={
711
+ "type": "object",
712
+ "properties": {"chunk_id": {"type": "string"}},
713
+ "required": ["chunk_id"],
714
+ },
715
+ ),
716
+ Tool(
717
+ name="related_context",
718
+ description="Find related code via graph edges",
719
+ inputSchema={
720
+ "type": "object",
721
+ "properties": {"chunk_id": {"type": "string"}},
722
+ "required": ["chunk_id"],
723
+ },
724
+ ),
725
+ Tool(
726
+ name="session_recall",
727
+ description=(
728
+ "Recall past decisions, prompts, and turn summaries via topic search. "
729
+ "Returns compact-index hits across the whole project history."
730
+ ),
731
+ inputSchema={
732
+ "type": "object",
733
+ "properties": {"topic": {"type": "string"}},
734
+ "required": ["topic"],
735
+ },
736
+ ),
737
+ Tool(
738
+ name="session_timeline",
739
+ description=(
740
+ "List the turn summaries for a session, oldest first. "
741
+ "Layer 2 of progressive disclosure — drill into a session_id "
742
+ "returned by session_recall."
743
+ ),
744
+ inputSchema={
745
+ "type": "object",
746
+ "properties": {
747
+ "session_id": {"type": "string"},
748
+ "limit": {"type": "integer", "default": 20},
749
+ },
750
+ "required": ["session_id"],
751
+ },
752
+ ),
753
+ Tool(
754
+ name="session_event",
755
+ description=(
756
+ "Return the raw input/output payload for a single tool_event. "
757
+ "Layer 3 of progressive disclosure — drill into an event_id "
758
+ "from session_timeline."
759
+ ),
760
+ inputSchema={
761
+ "type": "object",
762
+ "properties": {"event_id": {"type": "integer"}},
763
+ "required": ["event_id"],
764
+ },
765
+ ),
766
+ Tool(
767
+ name="record_decision",
768
+ description="Record a decision (with reason) for future session_recall",
769
+ inputSchema={
770
+ "type": "object",
771
+ "properties": {
772
+ "decision": {"type": "string"},
773
+ "reason": {"type": "string"},
774
+ },
775
+ "required": ["decision", "reason"],
776
+ },
777
+ ),
778
+ Tool(
779
+ name="record_code_area",
780
+ description="Record a code area (file + description) worked on, for future session_recall",
781
+ inputSchema={
782
+ "type": "object",
783
+ "properties": {
784
+ "file_path": {"type": "string"},
785
+ "description": {"type": "string"},
786
+ },
787
+ "required": ["file_path", "description"],
788
+ },
789
+ ),
790
+ Tool(
791
+ name="index_status",
792
+ description="Check when the index was last updated",
793
+ inputSchema={"type": "object", "properties": {}},
794
+ ),
795
+ Tool(
796
+ name="reindex",
797
+ description="Trigger re-indexing of a file or the entire project",
798
+ inputSchema={
799
+ "type": "object",
800
+ "properties": {"path": {"type": "string"}},
801
+ },
802
+ ),
803
+ Tool(
804
+ name="set_output_compression",
805
+ description=(
806
+ "Set output compression level to reduce response token cost. "
807
+ "Levels: off, lite, standard, max"
808
+ ),
809
+ inputSchema={
810
+ "type": "object",
811
+ "properties": {
812
+ "level": {
813
+ "type": "string",
814
+ "enum": list(LEVELS),
815
+ "description": (
816
+ "off=normal, lite=no filler, standard=fragments "
817
+ "~65% savings, max=telegraphic ~75% savings"
818
+ ),
819
+ },
820
+ },
821
+ "required": ["level"],
822
+ },
823
+ ),
824
+ ]
825
+
826
+ @self._server.call_tool()
827
+ async def call_tool(name: str, arguments: dict):
828
+ arguments = arguments or {}
829
+ try:
830
+ if name == "context_search":
831
+ return await self._handle_context_search(arguments)
832
+ elif name == "expand_chunk":
833
+ return await self._handle_expand_chunk(arguments)
834
+ elif name == "related_context":
835
+ return await self._handle_related_context(arguments)
836
+ elif name == "session_recall":
837
+ return await self._handle_session_recall(arguments)
838
+ elif name == "session_timeline":
839
+ return self._handle_session_timeline(arguments)
840
+ elif name == "session_event":
841
+ return self._handle_session_event(arguments)
842
+ elif name == "record_decision":
843
+ return self._handle_record_decision(arguments)
844
+ elif name == "record_code_area":
845
+ return self._handle_record_code_area(arguments)
846
+ elif name == "index_status":
847
+ return await self._handle_index_status()
848
+ elif name == "reindex":
849
+ return await self._handle_reindex(arguments)
850
+ elif name == "set_output_compression":
851
+ return self._handle_set_output_compression(arguments)
852
+ return [TextContent(type="text", text=f"Unknown tool: {name}")]
853
+ except Exception as exc: # pragma: no cover - defensive
854
+ log.exception("MCP tool %s failed", name)
855
+ return [TextContent(type="text", text=f"Tool {name} failed: {exc}")]
856
+
857
+ # ── tool handlers ───────────────────────────────────────────────────────
858
+
859
+ async def _ensure_indexed(self) -> None:
860
+ """Lazy indexing: if the index is empty, trigger indexing on first query."""
861
+ if self._lazy_indexed:
862
+ return
863
+ self._lazy_indexed = True
864
+ try:
865
+ count = self._backend._vector_store.count()
866
+ if count > 0:
867
+ return
868
+ except Exception:
869
+ pass
870
+ # Index is empty — trigger on-the-fly indexing
871
+ log.info("Index empty — triggering lazy indexing for %s", self._project_name)
872
+ try:
873
+ from context_engine.indexer.pipeline import run_indexing
874
+ await run_indexing(self._config, self._project_dir, full=False)
875
+ except Exception as exc:
876
+ log.warning("Lazy indexing failed: %s", exc)
877
+
878
+ async def _handle_context_search(self, args):
879
+ query = (args.get("query") or "").strip()
880
+ if not query:
881
+ return [TextContent(type="text", text="Query cannot be empty.")]
882
+ if len(query) > _MAX_QUERY_CHARS:
883
+ return [
884
+ TextContent(
885
+ type="text",
886
+ text=f"Query too long (max {_MAX_QUERY_CHARS} characters).",
887
+ )
888
+ ]
889
+
890
+ # Lazy index if this is the first query and index is empty
891
+ await self._ensure_indexed()
892
+
893
+ top_k = _clamp_top_k(args.get("top_k", 10))
894
+ max_tokens = args.get("max_tokens", 8000)
895
+ try:
896
+ max_tokens = int(max_tokens)
897
+ except (TypeError, ValueError):
898
+ max_tokens = 8000
899
+
900
+ # Fetch 2x candidates so overflow can offer references
901
+ all_chunks = await self._retriever.retrieve(
902
+ query,
903
+ top_k=top_k * 2,
904
+ confidence_threshold=self._config.retrieval_confidence_threshold,
905
+ max_tokens=None,
906
+ )
907
+ all_chunks = await self._compressor.compress(all_chunks, self._config.compression_level)
908
+
909
+ inline_chunks, overflow_chunks = _split_inline_overflow(all_chunks, max_tokens)
910
+
911
+ # Accounting
912
+ raw_tokens = 0
913
+ served_tokens = 0
914
+ seen_files: set[str] = set()
915
+ for chunk in inline_chunks:
916
+ served_text = chunk.compressed_content or chunk.content
917
+ raw_tokens += _count_tokens(chunk.content)
918
+ served_tokens += _count_tokens(served_text)
919
+ seen_files.add(chunk.file_path)
920
+ for chunk in overflow_chunks:
921
+ raw_tokens += _count_tokens(chunk.content)
922
+ served_tokens += 30 # compact reference ~30 tokens
923
+ seen_files.add(chunk.file_path)
924
+
925
+ full_file_tokens = self._estimate_full_file_tokens(seen_files)
926
+
927
+ # Auto-capture: every file that surfaced as a relevant result counts as
928
+ # "touched" — we can't tell from here whether Claude will act on it,
929
+ # but a file appearing in a search result is a stronger signal than
930
+ # silence. Persisted into the session log alongside explicit
931
+ # record_code_area calls.
932
+ self._session_capture.touch_files(self._session_id, seen_files)
933
+ self._persist_current_session()
934
+
935
+ body = _format_results_with_overflow(inline_chunks, overflow_chunks)
936
+ body = self._apply_output_compression(body)
937
+ self._record(raw_tokens, served_tokens, full_file_tokens)
938
+ # Compliance audit log — file:line refs of every served chunk + the
939
+ # score range. Off by default; enable via config.audit_log_enabled.
940
+ served_refs = [
941
+ {
942
+ "file": c.file_path,
943
+ "lines": f"{c.start_line}-{c.end_line}",
944
+ "score": round(float(getattr(c, "final_score", 0.0)), 3),
945
+ "kind": "inline",
946
+ }
947
+ for c in inline_chunks
948
+ ] + [
949
+ {
950
+ "file": c.file_path,
951
+ "lines": f"{c.start_line}-{c.end_line}",
952
+ "score": round(float(getattr(c, "final_score", 0.0)), 3),
953
+ "kind": "overflow",
954
+ }
955
+ for c in overflow_chunks
956
+ ]
957
+ scores = [r["score"] for r in served_refs if r["score"] > 0]
958
+ score_range = (min(scores), max(scores)) if scores else None
959
+ self._append_audit_log(
960
+ query=query, top_k=top_k,
961
+ served_chunks=served_refs, score_range=score_range,
962
+ )
963
+ return [TextContent(type="text", text=body)]
964
+
965
+ def _estimate_full_file_tokens(self, file_paths: set[str]) -> int:
966
+ """Estimate token count if the user had read the full source files.
967
+
968
+ Uses file size (~4 bytes per token, the typical English/code ratio
969
+ produced by `_count_tokens` heuristic) rather than reading every file
970
+ into memory — that ran on every search and could load hundreds of MB.
971
+ """
972
+ from pathlib import Path as _Path
973
+ total = 0
974
+ project_dir = _Path.cwd()
975
+ for fp in file_paths:
976
+ full_path = project_dir / fp
977
+ try:
978
+ size = full_path.stat().st_size
979
+ except OSError:
980
+ continue
981
+ total += max(1, size // _CHARS_PER_TOKEN)
982
+ return total
983
+
984
+ async def _handle_expand_chunk(self, args):
985
+ chunk_id = (args.get("chunk_id") or "").strip()
986
+ if not chunk_id:
987
+ return [TextContent(type="text", text="chunk_id is required.")]
988
+ chunk = await self._backend.get_chunk_by_id(chunk_id)
989
+ if chunk is None:
990
+ return [TextContent(type="text", text="Chunk not found.")]
991
+ tokens = _count_tokens(chunk.content)
992
+ self._record(tokens, tokens)
993
+ # Opening a chunk is a much stronger "I care about this file" signal
994
+ # than just seeing it in a result list — bump the touch counter.
995
+ self._session_capture.touch_files(self._session_id, [chunk.file_path])
996
+ self._persist_current_session()
997
+ return [
998
+ TextContent(
999
+ type="text",
1000
+ text=(
1001
+ f"[{chunk.file_path}:{chunk.start_line}-{chunk.end_line}]\n"
1002
+ f"{chunk.content}"
1003
+ ),
1004
+ )
1005
+ ]
1006
+
1007
+ async def _handle_related_context(self, args):
1008
+ chunk_id = (args.get("chunk_id") or "").strip()
1009
+ if not chunk_id:
1010
+ return [TextContent(type="text", text="chunk_id is required.")]
1011
+ neighbors = await self._backend.graph_neighbors(chunk_id)
1012
+ if not neighbors:
1013
+ return [
1014
+ TextContent(
1015
+ type="text",
1016
+ text="No related context found for this chunk.",
1017
+ )
1018
+ ]
1019
+ lines = [
1020
+ f"- {n.node_type.value}: {n.name} ({n.file_path})" for n in neighbors
1021
+ ]
1022
+ return [TextContent(type="text", text="\n".join(lines))]
1023
+
1024
+ async def _handle_session_recall(self, args):
1025
+ topic = (args.get("topic") or "").strip()
1026
+ if not topic:
1027
+ return [TextContent(type="text", text="topic is required.")]
1028
+ matches = self._search_sessions(topic)
1029
+ if not matches:
1030
+ return [
1031
+ TextContent(
1032
+ type="text",
1033
+ text=(
1034
+ f"No recorded decisions or code-area notes matching '{topic}'. "
1035
+ "Use record_decision or record_code_area to capture notes "
1036
+ "during the session."
1037
+ ),
1038
+ )
1039
+ ]
1040
+ body = self._format_recall(topic, matches)
1041
+ # memory_recall savings: baseline = all matched entries dumped raw,
1042
+ # served = TL;DR + top-N bullets actually returned. Filtering and
1043
+ # summarisation are the two compression mechanics in this path.
1044
+ baseline = sum(_count_tokens(m) for m in matches)
1045
+ served = _count_tokens(body)
1046
+ if baseline > 0:
1047
+ self._record_bucket(
1048
+ "memory_recall", baseline=baseline, served=served,
1049
+ meta={"matches": len(matches), "topic_len": len(topic)},
1050
+ )
1051
+ body = self._apply_output_compression(body)
1052
+ return [TextContent(type="text", text=body)]
1053
+
1054
+ def _format_recall(self, topic: str, matches: list[str]) -> str:
1055
+ """Render recall hits as a TL;DR header + provenance-tagged matches.
1056
+
1057
+ The TL;DR is extractive — it picks real sentences from the top hits
1058
+ using the same bge-small-driven extractive summariser the compressor
1059
+ uses. No LLM call, no hallucination, ~50 ms wall-time on the asyncio
1060
+ thread for a 10-match input. Header is suppressed when there are too
1061
+ few matches to summarise meaningfully.
1062
+ """
1063
+ head_matches = matches[:_recall_display_cap()]
1064
+ tldr_lines: list[str] = []
1065
+ if len(head_matches) >= 3:
1066
+ # Embed each match's clean content (no [tag] prefix, no affordance
1067
+ # tail), pick the 3 most central by cosine-to-centroid, render
1068
+ # them as bullets so the TL;DR is scannable instead of a wall of
1069
+ # space-joined fragments.
1070
+ from context_engine.memory.extractive import _cosine
1071
+ try:
1072
+ cleaned = [_content_key(m) for m in head_matches[:_TLDR_TOP_N]]
1073
+ cleaned = [c for c in cleaned if c]
1074
+ if cleaned:
1075
+ vecs = [list(self._embedder.embed_query(c)) for c in cleaned]
1076
+ centroid = [
1077
+ sum(col) / len(vecs) for col in zip(*vecs)
1078
+ ]
1079
+ scored = sorted(
1080
+ zip(cleaned, vecs),
1081
+ key=lambda pair: _cosine(pair[1], centroid),
1082
+ reverse=True,
1083
+ )
1084
+ tldr_lines = [c for c, _ in scored[:3]]
1085
+ except Exception:
1086
+ log.debug("recall TL;DR extractive failed; omitting header")
1087
+ tldr_lines = []
1088
+ body_lines = [f"- {m}" for m in head_matches]
1089
+ if tldr_lines:
1090
+ n = len(matches)
1091
+ head = (
1092
+ f"TL;DR ({n} match{'es' if n != 1 else ''} for '{topic}'):\n"
1093
+ + "\n".join(f" • {line}" for line in tldr_lines)
1094
+ )
1095
+ return head + "\n\nSource matches:\n" + "\n".join(body_lines)
1096
+ return "\n".join(body_lines)
1097
+
1098
+ def _handle_record_decision(self, args):
1099
+ decision = (args.get("decision") or "").strip()
1100
+ reason = (args.get("reason") or "").strip()
1101
+ if not decision:
1102
+ return [TextContent(type="text", text="decision is required.")]
1103
+ # Scrub PII (emails / IPs / SSNs / cards / phones) before any
1104
+ # downstream write — JSON session capture AND memory.db both
1105
+ # consume these strings, so this needs to happen at the entry
1106
+ # point, not deep in the dual-write block.
1107
+ decision = memory_db.scrub_pii(decision)
1108
+ reason = memory_db.scrub_pii(reason)
1109
+ self._session_capture.record_decision(self._session_id, decision, reason)
1110
+ self._persist_current_session()
1111
+ # Dual-write into memory.db. `decision` and `reason` are compressed
1112
+ # via the grammar module before INSERT — structured tokens (paths,
1113
+ # versions, identifiers) are preserved byte-for-byte; only prose
1114
+ # words get articles/fillers dropped. The vec embedding is computed
1115
+ # on the *compressed* form so recall scores are consistent with
1116
+ # what's stored. session_recall expands on the read side via
1117
+ # `_format_*_in_id_order` so the agent sees natural prose.
1118
+ if self._memory_conn is not None:
1119
+ try:
1120
+ import time as _time
1121
+ epoch = int(_time.time())
1122
+ stored_decision, dec_raw, dec_comp = _grammar_compress_counted(
1123
+ decision, level=_GRAMMAR_LEVEL,
1124
+ )
1125
+ stored_reason, rsn_raw, rsn_comp = _grammar_compress_counted(
1126
+ reason, level=_GRAMMAR_LEVEL,
1127
+ )
1128
+ # One bucket event for the combined decision+reason write.
1129
+ self._record_bucket(
1130
+ "grammar",
1131
+ baseline=dec_raw + rsn_raw,
1132
+ served=dec_comp + rsn_comp,
1133
+ )
1134
+ cur = self._memory_conn.execute(
1135
+ "INSERT INTO decisions (session_id, decision, reason, source, "
1136
+ "created_at_epoch, created_at) "
1137
+ "VALUES (?, ?, ?, 'manual', ?, ?)",
1138
+ (self._session_id, stored_decision, stored_reason, epoch,
1139
+ _time.strftime("%Y-%m-%dT%H:%M:%S", _time.gmtime(epoch))),
1140
+ )
1141
+ memory_db.record_decision_vec(
1142
+ self._memory_conn, self._embedder,
1143
+ decision_id=cur.lastrowid,
1144
+ decision=stored_decision, reason=stored_reason,
1145
+ )
1146
+ self._memory_conn.commit()
1147
+ except Exception:
1148
+ log.exception("memory.db decision dual-write failed")
1149
+ return [
1150
+ TextContent(
1151
+ type="text",
1152
+ text=f"✓ Decision recorded: {decision}",
1153
+ )
1154
+ ]
1155
+
1156
+ def _handle_record_code_area(self, args):
1157
+ file_path = (args.get("file_path") or "").strip()
1158
+ description = (args.get("description") or "").strip()
1159
+ if not file_path:
1160
+ return [TextContent(type="text", text="file_path is required.")]
1161
+ # Scrub PII from the free-form description; file_path is a
1162
+ # structured token (path) that the redactor would mangle and
1163
+ # almost never carries PII.
1164
+ description = memory_db.scrub_pii(description)
1165
+ self._session_capture.record_code_area(
1166
+ self._session_id, file_path, description
1167
+ )
1168
+ self._persist_current_session()
1169
+ if self._memory_conn is not None:
1170
+ try:
1171
+ import time as _time
1172
+ epoch = int(_time.time())
1173
+ self._memory_conn.execute(
1174
+ "INSERT INTO code_areas (session_id, file_path, description, "
1175
+ "source, created_at_epoch) VALUES (?, ?, ?, 'manual', ?)",
1176
+ (self._session_id, file_path, description, epoch),
1177
+ )
1178
+ self._memory_conn.commit()
1179
+ except Exception:
1180
+ log.exception("memory.db code_area dual-write failed")
1181
+ return [
1182
+ TextContent(
1183
+ type="text",
1184
+ text=f"✓ Code area noted: {file_path} — {description}",
1185
+ )
1186
+ ]
1187
+
1188
+ def _handle_session_timeline(self, args):
1189
+ session_id = (args.get("session_id") or "").strip()
1190
+ limit = _clamp_int(args.get("limit"), default=20, lo=1, hi=200)
1191
+ if not session_id:
1192
+ return [TextContent(type="text", text="session_id is required.")]
1193
+ if self._memory_conn is None:
1194
+ return [TextContent(type="text", text="Memory store not available.")]
1195
+ try:
1196
+ rows = list(self._memory_conn.execute(
1197
+ "SELECT prompt_number, summary, tier FROM turn_summaries "
1198
+ "WHERE session_id = ? ORDER BY prompt_number ASC LIMIT ?",
1199
+ (session_id, limit),
1200
+ ))
1201
+ except Exception as exc:
1202
+ return [TextContent(type="text", text=f"timeline query failed: {exc}")]
1203
+ if not rows:
1204
+ return [TextContent(
1205
+ type="text",
1206
+ text=f"No turn summaries for session {session_id} yet.",
1207
+ )]
1208
+ try:
1209
+ meta = self._memory_conn.execute(
1210
+ "SELECT project, started_at, ended_at, status, prompt_count, "
1211
+ "rollup_summary FROM sessions WHERE id = ?",
1212
+ (session_id,),
1213
+ ).fetchone()
1214
+ except Exception as exc:
1215
+ return [TextContent(type="text", text=f"timeline query failed: {exc}")]
1216
+ header = []
1217
+ if meta:
1218
+ header.append(f"session: {session_id} · {meta['project']} · {meta['status']}")
1219
+ header.append(f"started: {meta['started_at']} ended: {meta['ended_at'] or '—'}")
1220
+ if meta["rollup_summary"]:
1221
+ # Stored compressed; expand for the agent's view.
1222
+ header.append(f"rollup: {_grammar_expand(meta['rollup_summary'])}")
1223
+ body = "\n".join(
1224
+ f" turn {r['prompt_number']:>3} [{r['tier']}] "
1225
+ f"{_grammar_expand(r['summary'] or '')}"
1226
+ for r in rows
1227
+ )
1228
+ text = "\n".join(header) + ("\n\n" + body if header else body)
1229
+ # progressive_disclosure: what we didn't deliver at this layer is the
1230
+ # raw event payloads. Counterfactual baseline = sum of every payload
1231
+ # in this session (what a "dump it all" tool would have returned);
1232
+ # served = the timeline body the agent actually got.
1233
+ try:
1234
+ row = self._memory_conn.execute(
1235
+ "SELECT COALESCE(SUM(p.size_bytes), 0) AS total "
1236
+ "FROM tool_events te "
1237
+ "LEFT JOIN tool_event_payloads p ON p.id = te.payload_id "
1238
+ "WHERE te.session_id = ?",
1239
+ (session_id,),
1240
+ ).fetchone()
1241
+ payload_bytes = int(row["total"] or 0)
1242
+ except sqlite3.Error:
1243
+ payload_bytes = 0
1244
+ if payload_bytes > 0:
1245
+ self._record_bucket(
1246
+ "progressive_disclosure",
1247
+ baseline=payload_bytes // _JSON_CHARS_PER_TOKEN,
1248
+ served=_count_tokens(text),
1249
+ meta={"layer": "timeline", "session_id": session_id},
1250
+ )
1251
+ text = self._apply_output_compression(text)
1252
+ return [TextContent(type="text", text=text)]
1253
+
1254
+ def _handle_session_event(self, args):
1255
+ try:
1256
+ event_id = int(args.get("event_id"))
1257
+ except (TypeError, ValueError):
1258
+ return [TextContent(type="text", text="event_id must be an integer.")]
1259
+ if self._memory_conn is None:
1260
+ return [TextContent(type="text", text="Memory store not available.")]
1261
+ try:
1262
+ row = self._memory_conn.execute(
1263
+ "SELECT te.tool_name, te.session_id, te.prompt_number, te.created_at, "
1264
+ "te.payload_id, p.raw_input, p.raw_output FROM tool_events te "
1265
+ "LEFT JOIN tool_event_payloads p ON p.id = te.payload_id "
1266
+ "WHERE te.id = ?",
1267
+ (event_id,),
1268
+ ).fetchone()
1269
+ except Exception as exc:
1270
+ return [TextContent(type="text", text=f"event query failed: {exc}")]
1271
+ if row is None:
1272
+ return [TextContent(
1273
+ type="text",
1274
+ text=f"No event with id={event_id}.",
1275
+ )]
1276
+ # Three states for the payload:
1277
+ # (a) payload_id IS NULL — event was captured without a payload row
1278
+ # (e.g. a hook that only logs the descriptor).
1279
+ # (b) payload_id present, raw_input='' / raw_output=NULL — pruned
1280
+ # by `cce sessions prune`'s retention pass.
1281
+ # (c) payload_id present, raws populated — normal case.
1282
+ if row["payload_id"] is None:
1283
+ return [TextContent(
1284
+ type="text",
1285
+ text=(
1286
+ f"Event {event_id} ({row['tool_name']}) has no captured payload "
1287
+ "— only its descriptor was recorded."
1288
+ ),
1289
+ )]
1290
+ if not row["raw_input"] and row["raw_output"] is None:
1291
+ return [TextContent(
1292
+ type="text",
1293
+ text=(
1294
+ f"Event {event_id} ({row['tool_name']}) was retained as a summary "
1295
+ "only — its raw payload aged out of the retention window."
1296
+ ),
1297
+ )]
1298
+ raw_input = _truncate_payload(row["raw_input"], _EVENT_PAYLOAD_READ_CAP)
1299
+ raw_output = _truncate_payload(row["raw_output"], _EVENT_PAYLOAD_READ_CAP)
1300
+ body = (
1301
+ f"event {event_id} · {row['tool_name']} · session {row['session_id']} · "
1302
+ f"turn {row['prompt_number']} · {row['created_at']}\n\n"
1303
+ f"input:\n{raw_input}\n\n"
1304
+ f"output:\n{raw_output}"
1305
+ )
1306
+ # progressive_disclosure: counterfactual = full session payload dump
1307
+ # (every event's raw payload). Served = just this one event's body.
1308
+ try:
1309
+ sib = self._memory_conn.execute(
1310
+ "SELECT COALESCE(SUM(p.size_bytes), 0) AS total "
1311
+ "FROM tool_events te "
1312
+ "LEFT JOIN tool_event_payloads p ON p.id = te.payload_id "
1313
+ "WHERE te.session_id = ?",
1314
+ (row["session_id"],),
1315
+ ).fetchone()
1316
+ session_bytes = int(sib["total"] or 0)
1317
+ except sqlite3.Error:
1318
+ session_bytes = 0
1319
+ if session_bytes > 0:
1320
+ self._record_bucket(
1321
+ "progressive_disclosure",
1322
+ baseline=session_bytes // _JSON_CHARS_PER_TOKEN,
1323
+ served=_count_tokens(body),
1324
+ meta={"layer": "event", "event_id": event_id},
1325
+ )
1326
+ body = self._apply_output_compression(body)
1327
+ return [TextContent(type="text", text=body)]
1328
+
1329
+ async def _handle_index_status(self):
1330
+ queries = self._stats["queries"]
1331
+ raw = self._stats["raw_tokens"]
1332
+ served = self._stats["served_tokens"]
1333
+ saved = raw - served
1334
+ pct = int(saved / raw * 100) if raw > 0 else 0
1335
+
1336
+ status_parts = [
1337
+ "Index status: operational",
1338
+ f"Output compression: {self._output_level} — "
1339
+ f"{get_level_description(self._output_level)}",
1340
+ ]
1341
+ if queries > 0:
1342
+ status_parts.append(
1343
+ f"Token savings ({queries} queries): {raw:,} raw → {served:,} served "
1344
+ f"({saved:,} saved, {pct}%)"
1345
+ )
1346
+ else:
1347
+ status_parts.append("Token savings: no queries recorded yet")
1348
+ return [TextContent(type="text", text="\n".join(status_parts))]
1349
+
1350
+ async def _handle_reindex(self, args):
1351
+ """Run the real indexing pipeline, either project-wide or on a path."""
1352
+ from context_engine.indexer.pipeline import run_indexing
1353
+
1354
+ path = (args.get("path") or "").strip() or None
1355
+ try:
1356
+ result = await run_indexing(
1357
+ self._config,
1358
+ self._project_dir,
1359
+ full=False,
1360
+ target_path=path,
1361
+ )
1362
+ except Exception as exc:
1363
+ log.exception("reindex failed")
1364
+ return [TextContent(type="text", text=f"✗ Re-index failed: {exc}")]
1365
+
1366
+ lines = [
1367
+ "✓ Re-index complete",
1368
+ f" Indexed: {len(result.indexed_files)} file(s), {result.total_chunks} chunk(s)",
1369
+ ]
1370
+ if result.deleted_files:
1371
+ lines.append(f" Pruned stale: {len(result.deleted_files)}")
1372
+ if result.skipped_files:
1373
+ lines.append(f" Skipped (binary/unreadable): {len(result.skipped_files)}")
1374
+ if result.errors:
1375
+ lines.append(f" Errors: {len(result.errors)}")
1376
+ lines.extend(f" - {e}" for e in result.errors[:5])
1377
+ return [TextContent(type="text", text="\n".join(lines))]
1378
+
1379
+ def _handle_set_output_compression(self, args):
1380
+ level = (args.get("level") or "standard").strip()
1381
+ if level not in LEVELS:
1382
+ return [
1383
+ TextContent(
1384
+ type="text",
1385
+ text=f"Invalid level: {level}. Use: {', '.join(LEVELS)}",
1386
+ )
1387
+ ]
1388
+ self._output_level = level
1389
+ self._save_state() # persist so restarts keep the user's choice
1390
+ desc = get_level_description(level)
1391
+ rules = get_output_rules(level)
1392
+ if rules:
1393
+ return [
1394
+ TextContent(
1395
+ type="text",
1396
+ text=f"Output compression set to: {level}\n{desc}\n\n{rules}",
1397
+ )
1398
+ ]
1399
+ return [
1400
+ TextContent(
1401
+ type="text",
1402
+ text="Output compression disabled. Claude will respond normally.",
1403
+ )
1404
+ ]
1405
+
1406
+ # ── session helpers ─────────────────────────────────────────────────────
1407
+
1408
+ def _persist_current_session(self) -> None:
1409
+ """Flush the in-memory current session to disk after every record.
1410
+
1411
+ `SessionCapture.end_session` normally flushes on shutdown, but the MCP
1412
+ process doesn't always get a clean shutdown signal, so we persist after
1413
+ each record to avoid data loss.
1414
+ """
1415
+ sessions_dir = Path(self._session_capture._sessions_dir) # noqa: SLF001
1416
+ session = self._session_capture.get_session_snapshot(self._session_id)
1417
+ if not session:
1418
+ return
1419
+ try:
1420
+ file_path = sessions_dir / f"{self._session_id}.json"
1421
+ _atomic_write_text(file_path, json.dumps(session, indent=2))
1422
+ except OSError:
1423
+ log.warning("Failed to persist session %s", self._session_id)
1424
+
1425
+ def _search_sessions(self, topic: str) -> list[str]:
1426
+ """Hybrid recall: union ranked candidates from JSON history, FTS5,
1427
+ and sqlite-vec, then merge via reciprocal rank fusion.
1428
+
1429
+ Each source produces its own ranked list; RRF fuses them so an item
1430
+ that appears in multiple sources rises, and items unique to one
1431
+ source still surface. The previous "embed every candidate" pipeline
1432
+ is gone — vec hits already carry a rank from sqlite-vec, so we don't
1433
+ re-embed them. JSON-history rows still go through cosine since
1434
+ there's no index for them.
1435
+ """
1436
+ topic = topic.strip()
1437
+ if not topic:
1438
+ return []
1439
+
1440
+ json_candidates = self._collect_json_candidates()
1441
+ json_ranked = self._rank_json_candidates(topic, json_candidates)
1442
+
1443
+ memory_lists = self._collect_memory_db_candidates(topic)
1444
+
1445
+ ranked = _rrf_merge(json_ranked, *memory_lists, top=50)
1446
+ if ranked:
1447
+ return ranked
1448
+ # Total fallback: tolerant substring match against everything we
1449
+ # collected so callers always get *something* useful even if every
1450
+ # ranking source failed.
1451
+ needle = topic.lower()
1452
+ all_candidates = list(json_candidates)
1453
+ for items in memory_lists:
1454
+ all_candidates.extend(items)
1455
+ return [t for t in all_candidates if needle in t.lower()]
1456
+
1457
+ def _collect_json_candidates(self) -> list[str]:
1458
+ """Decisions / code_areas / Q&A pulled from JSON sessions on disk.
1459
+
1460
+ These predate the memory.db path. Dedup is by formatted text so an
1461
+ entry that exists in both stores (the dual-write window) doesn't
1462
+ get scored twice.
1463
+ """
1464
+ current = self._session_capture.get_session_snapshot(self._session_id)
1465
+ sessions: list[dict] = []
1466
+ if current:
1467
+ sessions.append(current)
1468
+ sessions.extend(
1469
+ self._session_capture.load_recent_sessions(limit=_SESSION_RECALL_WINDOW)
1470
+ )
1471
+
1472
+ out: list[str] = []
1473
+ seen: set[str] = set()
1474
+ for session in sessions:
1475
+ for decision in session.get("decisions", []):
1476
+ t = (
1477
+ f"[decision] {decision.get('decision', '')} — "
1478
+ f"{decision.get('reason', '')}"
1479
+ )
1480
+ if t not in seen:
1481
+ seen.add(t)
1482
+ out.append(t)
1483
+ for area in session.get("code_areas", []):
1484
+ t = (
1485
+ f"[code_area] {area.get('file_path', '')} — "
1486
+ f"{area.get('description', '')}"
1487
+ )
1488
+ if t not in seen:
1489
+ seen.add(t)
1490
+ out.append(t)
1491
+ for question in session.get("questions", []):
1492
+ t = (
1493
+ f"[q&a] {question.get('question', '')} → "
1494
+ f"{question.get('answer', '')}"
1495
+ )
1496
+ if t not in seen:
1497
+ seen.add(t)
1498
+ out.append(t)
1499
+ for decision in self._session_capture._load_consolidated_decisions():
1500
+ t = (
1501
+ f"[decision] {decision.get('decision', '')} — "
1502
+ f"{decision.get('reason', '')}"
1503
+ )
1504
+ if t not in seen:
1505
+ seen.add(t)
1506
+ out.append(t)
1507
+ return out
1508
+
1509
+ def _rank_json_candidates(self, topic: str, candidates: list[str]) -> list[str]:
1510
+ """Cosine-rank JSON candidates and drop sub-threshold entries.
1511
+
1512
+ Memory.db rows already get FTS/vec ranking, but JSON-history rows
1513
+ have no index, so we still pay the per-candidate embed_query() here.
1514
+ Mitigated by `Embedder.embed_query`'s @lru_cache.
1515
+
1516
+ Embeds the topic with stop words stripped — "how can we improve code
1517
+ quality" → "improve code quality" — so the topic vector lands on
1518
+ the topic words rather than the question framing. Sharpens the
1519
+ signal substantially on conversational queries.
1520
+ """
1521
+ if not candidates:
1522
+ return []
1523
+ topic_for_embed = _strip_stop_words(topic) or topic
1524
+ try:
1525
+ topic_vec = list(self._embedder.embed_query(topic_for_embed))
1526
+ except Exception as exc:
1527
+ log.debug("topic embed failed (%s); JSON candidates ranked by recency", exc)
1528
+ return candidates
1529
+ scored: list[tuple[float, str]] = []
1530
+ for text in candidates:
1531
+ # Embed the *content* (no [tag] prefix) so the metadata noise
1532
+ # doesn't inflate similarity for unrelated topics. The agent
1533
+ # still sees the tagged form in the output.
1534
+ content = _content_key(text)
1535
+ try:
1536
+ vec = list(self._embedder.embed_query(content))
1537
+ except Exception:
1538
+ continue
1539
+ sim = _cosine_sim(topic_vec, vec)
1540
+ if sim >= _SESSION_RECALL_MIN_SIM:
1541
+ scored.append((sim, text))
1542
+ scored.sort(key=lambda pair: pair[0], reverse=True)
1543
+ return [text for _, text in scored]
1544
+
1545
+ def _collect_memory_db_candidates(self, topic: str) -> list[list[str]]:
1546
+ """Return one ranked list per memory.db source (FTS5 + sqlite-vec).
1547
+
1548
+ Each list is in the source's own rank order; RRF combines them.
1549
+ Empty lists are returned (not omitted) so callers see a stable shape.
1550
+ """
1551
+ if self._memory_conn is None:
1552
+ return []
1553
+ fts_q = _fts_match_query(topic)
1554
+ like_needle = f"%{topic.strip()}%" if topic.strip() else None
1555
+
1556
+ fts_decisions: list[str] = []
1557
+ fts_turns: list[str] = []
1558
+ vec_decisions: list[str] = []
1559
+ vec_turns: list[str] = []
1560
+ code_areas_hits: list[str] = []
1561
+
1562
+ try:
1563
+ if fts_q:
1564
+ fts_decisions = self._fetch_decisions_by_query(
1565
+ "SELECT d.id FROM decisions d "
1566
+ "JOIN decisions_fts f ON f.rowid = d.id "
1567
+ "WHERE decisions_fts MATCH ? ORDER BY rank LIMIT ?",
1568
+ (fts_q, _FTS_RECALL_LIMIT),
1569
+ )
1570
+ fts_turns = self._fetch_turns_by_query(
1571
+ "SELECT t.id FROM turn_summaries t "
1572
+ "JOIN turn_summaries_fts f ON f.rowid = t.id "
1573
+ "WHERE turn_summaries_fts MATCH ? ORDER BY rank LIMIT ?",
1574
+ (fts_q, _FTS_RECALL_LIMIT),
1575
+ )
1576
+ # Embed the stop-word-stripped form so conversational queries
1577
+ # ("how can we improve code") embed on their topic words rather
1578
+ # than the question framing.
1579
+ vec_topic = _strip_stop_words(topic) or topic
1580
+ vec_decision_ids = memory_db.search_decisions_vec(
1581
+ self._memory_conn, self._embedder, vec_topic, k=_VEC_RECALL_K,
1582
+ )
1583
+ if vec_decision_ids:
1584
+ vec_decisions = self._format_decisions_in_id_order(vec_decision_ids)
1585
+ vec_turn_ids = memory_db.search_turn_summaries_vec(
1586
+ self._memory_conn, self._embedder, vec_topic, k=_VEC_RECALL_K,
1587
+ )
1588
+ if vec_turn_ids:
1589
+ vec_turns = self._format_turns_in_id_order(vec_turn_ids)
1590
+ if like_needle is not None:
1591
+ for row in self._memory_conn.execute(
1592
+ "SELECT file_path, description, source, session_id "
1593
+ "FROM code_areas WHERE file_path LIKE ? OR description LIKE ? "
1594
+ "ORDER BY created_at_epoch DESC LIMIT ?",
1595
+ (like_needle, like_needle, _FTS_RECALL_LIMIT),
1596
+ ):
1597
+ code_areas_hits.append(
1598
+ f"[code_area src={row['source']}|sid:{row['session_id'] or '-'}] "
1599
+ f"{row['file_path']} — {row['description']}"
1600
+ )
1601
+ except Exception:
1602
+ log.exception("memory.db recall query failed; FTS+vec lists may be partial")
1603
+
1604
+ return [fts_decisions, fts_turns, vec_decisions, vec_turns, code_areas_hits]
1605
+
1606
+ def _fetch_decisions_by_query(self, sql: str, params: tuple) -> list[str]:
1607
+ """Run an id-returning query, fetch the rows, format in *query* order."""
1608
+ ids = [r["id"] for r in self._memory_conn.execute(sql, params)]
1609
+ return self._format_decisions_in_id_order(ids)
1610
+
1611
+ def _fetch_turns_by_query(self, sql: str, params: tuple) -> list[str]:
1612
+ ids = [r["id"] for r in self._memory_conn.execute(sql, params)]
1613
+ return self._format_turns_in_id_order(ids)
1614
+
1615
+ def _format_decisions_in_id_order(self, ids: list[int]) -> list[str]:
1616
+ """Fetch decisions and emit them in the order of `ids` (preserves rank).
1617
+
1618
+ Each line includes a relative-time hint and a drill-down affordance
1619
+ so the agent rarely needs a follow-up call to figure out how to
1620
+ navigate from a recall hit back to its session. Decision text and
1621
+ reason are run through `grammar.expand()` to restore well-known
1622
+ abbreviations (b/c → because, prod → production) before display —
1623
+ on-disk storage stays compressed.
1624
+ """
1625
+ if not ids:
1626
+ return []
1627
+ placeholders = ",".join("?" * len(ids))
1628
+ rows = {
1629
+ r["id"]: r for r in self._memory_conn.execute(
1630
+ f"SELECT id, decision, reason, source, session_id, "
1631
+ f"created_at_epoch FROM decisions WHERE id IN ({placeholders})",
1632
+ tuple(ids),
1633
+ )
1634
+ }
1635
+ out: list[str] = []
1636
+ for rid in ids:
1637
+ r = rows.get(rid)
1638
+ if r is None:
1639
+ continue
1640
+ recency = _humanise_relative_time(r["created_at_epoch"])
1641
+ sid = r["session_id"]
1642
+ tail = f" · {recency}" if recency else ""
1643
+ if sid:
1644
+ tail += f' · → session_timeline("{sid}")'
1645
+ decision = _grammar_expand(r["decision"] or "")
1646
+ reason = _grammar_expand(r["reason"] or "")
1647
+ out.append(
1648
+ f"[decision src={r['source']}|sid:{sid or '-'}] "
1649
+ f"{decision} — {reason}{tail}"
1650
+ )
1651
+ return out
1652
+
1653
+ def _format_turns_in_id_order(self, ids: list[int]) -> list[str]:
1654
+ if not ids:
1655
+ return []
1656
+ placeholders = ",".join("?" * len(ids))
1657
+ rows = {
1658
+ r["id"]: r for r in self._memory_conn.execute(
1659
+ f"SELECT id, session_id, prompt_number, summary, "
1660
+ f"created_at_epoch FROM turn_summaries WHERE id IN ({placeholders})",
1661
+ tuple(ids),
1662
+ )
1663
+ }
1664
+ out: list[str] = []
1665
+ for rid in ids:
1666
+ r = rows.get(rid)
1667
+ if r is None:
1668
+ continue
1669
+ recency = _humanise_relative_time(r["created_at_epoch"])
1670
+ tail = f" · {recency}" if recency else ""
1671
+ tail += f' · → session_event(id={r["id"]})'
1672
+ summary = _grammar_expand(r["summary"] or "")
1673
+ out.append(
1674
+ f"[turn sid:{r['session_id']}|n:{r['prompt_number']}] "
1675
+ f"{summary}{tail}"
1676
+ )
1677
+ return out
1678
+
1679
+ # ── MCP prompts ─────────────────────────────────────────────────────────
1680
+
1681
+ def _register_prompts(self):
1682
+ """Register MCP prompts for session-start context injection."""
1683
+ from mcp.types import Prompt, PromptMessage, PromptArgument
1684
+
1685
+ @self._server.list_prompts()
1686
+ async def list_prompts():
1687
+ return [
1688
+ Prompt(
1689
+ name="context-engine-init",
1690
+ description=(
1691
+ "Initialize context engine with project overview and "
1692
+ "output compression rules"
1693
+ ),
1694
+ arguments=[
1695
+ PromptArgument(
1696
+ name="output_level",
1697
+ description="Output compression level: off, lite, standard, max",
1698
+ required=False,
1699
+ ),
1700
+ ],
1701
+ ),
1702
+ ]
1703
+
1704
+ @self._server.get_prompt()
1705
+ async def get_prompt(name: str, arguments: dict | None = None):
1706
+ if name != "context-engine-init":
1707
+ return None
1708
+ level = (arguments or {}).get("output_level", self._output_level)
1709
+
1710
+ # Compose a rich project bootstrap with git context, session
1711
+ # decisions, and chunks relevant to current work.
1712
+ try:
1713
+ # Start with architecture overview chunks
1714
+ chunks = await self._retriever.retrieve(
1715
+ "architecture overview", top_k=10
1716
+ )
1717
+ # Also retrieve chunks for recently modified files so the
1718
+ # init prompt reflects current work, not just static structure.
1719
+ modified_files = get_recently_modified_files(self._project_dir)
1720
+ if modified_files:
1721
+ file_query = " ".join(
1722
+ f.rsplit("/", 1)[-1].rsplit(".", 1)[0]
1723
+ for f in modified_files[:5]
1724
+ )
1725
+ try:
1726
+ recent_chunks = await self._retriever.retrieve(
1727
+ file_query, top_k=5
1728
+ )
1729
+ # Merge without duplicates
1730
+ seen_ids = {c.id for c in chunks}
1731
+ for c in recent_chunks:
1732
+ if c.id not in seen_ids:
1733
+ chunks.append(c)
1734
+ seen_ids.add(c.id)
1735
+ except Exception as exc:
1736
+ log.debug("Recent-file chunk retrieval failed: %s", exc)
1737
+ except Exception as exc:
1738
+ log.warning("Init prompt chunk retrieval failed: %s", exc)
1739
+ chunks = []
1740
+
1741
+ # Git history and working state
1742
+ recent_commits = get_recent_commits(self._project_dir)
1743
+ working_state = get_working_state(self._project_dir)
1744
+
1745
+ # Surface the files that got the most attention in the most-recent
1746
+ # past session. Auto-captured every time a file appears in a
1747
+ # context_search result or is opened via expand_chunk — gives the
1748
+ # next session a "where you left off" hint without requiring
1749
+ # Claude to have explicitly called record_code_area.
1750
+ recent_sessions = self._session_capture.load_recent_sessions(limit=1)
1751
+ if recent_sessions:
1752
+ touched = recent_sessions[0].get("touched_files") or {}
1753
+ if touched:
1754
+ top = sorted(touched.items(), key=lambda kv: kv[1], reverse=True)[:5]
1755
+ working_state = list(working_state or [])
1756
+ working_state.append(
1757
+ "Recently touched files (prior session): "
1758
+ + ", ".join(f"{fp} ({n})" for fp, n in top)
1759
+ )
1760
+
1761
+ # Active decisions from past sessions — surface the most recent
1762
+ # entries unconditionally rather than substring-matching on the
1763
+ # word "decision" (which usually misses since recorded decisions
1764
+ # rarely contain that literal token).
1765
+ active_decisions = self._session_capture.get_recent_decisions(limit=10)
1766
+
1767
+ # Get total indexed chunk count for the status line.
1768
+ try:
1769
+ chunk_count = self._backend._vector_store.count()
1770
+ except Exception:
1771
+ chunk_count = 0
1772
+
1773
+ # Load project-specific commands from .cce/commands.yaml
1774
+ from context_engine.project_commands import load_commands, format_for_prompt
1775
+ proj_commands = load_commands(self._project_dir)
1776
+ proj_commands_text = format_for_prompt(proj_commands)
1777
+
1778
+ bootstrap_text = self._bootstrap.build(
1779
+ project_name=self._project_name,
1780
+ chunks=chunks,
1781
+ recent_commits=recent_commits,
1782
+ active_decisions=active_decisions,
1783
+ working_state=working_state,
1784
+ chunk_count=chunk_count,
1785
+ project_commands_text=proj_commands_text,
1786
+ )
1787
+
1788
+ # Tool routing instructions — injected at session start so the
1789
+ # model uses context_search instead of Read for exploration.
1790
+ tool_instructions = (
1791
+ "\n\n---\n"
1792
+ "## Tool Routing (context-engine)\n\n"
1793
+ "This project has a semantic search index. "
1794
+ "**You MUST use the `context_search` MCP tool** for ANY of these:\n"
1795
+ "- Questions about the codebase (\"what does X do?\", \"how does Y work?\")\n"
1796
+ "- Exploring code, finding functions, understanding structure\n"
1797
+ "- Finding related code or patterns\n\n"
1798
+ "Use `Read` ONLY when you need to edit a specific file.\n\n"
1799
+ "Call `context_search` with a natural language query. "
1800
+ "Example: `context_search({\"query\": \"twitter feed layout\"})`\n"
1801
+ "Do NOT use Read, Glob, or Grep to answer questions about the code.\n"
1802
+ )
1803
+
1804
+ rules = get_output_rules(level)
1805
+ content = bootstrap_text + tool_instructions
1806
+ if rules:
1807
+ content += f"\n\n{rules}"
1808
+ return {
1809
+ "messages": [
1810
+ PromptMessage(
1811
+ role="user",
1812
+ content=TextContent(type="text", text=content),
1813
+ ),
1814
+ ],
1815
+ }
1816
+
1817
+ async def run_stdio(self):
1818
+ from mcp.server.stdio import stdio_server
1819
+
1820
+ async with stdio_server() as (read_stream, write_stream):
1821
+ await self._server.run(
1822
+ read_stream,
1823
+ write_stream,
1824
+ self._server.create_initialization_options(),
1825
+ )