code-context-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. code_context_engine-0.4.0.dist-info/METADATA +389 -0
  2. code_context_engine-0.4.0.dist-info/RECORD +63 -0
  3. code_context_engine-0.4.0.dist-info/WHEEL +5 -0
  4. code_context_engine-0.4.0.dist-info/entry_points.txt +4 -0
  5. code_context_engine-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. code_context_engine-0.4.0.dist-info/top_level.txt +1 -0
  7. context_engine/__init__.py +3 -0
  8. context_engine/cli.py +2848 -0
  9. context_engine/cli_style.py +66 -0
  10. context_engine/compression/__init__.py +0 -0
  11. context_engine/compression/compressor.py +144 -0
  12. context_engine/compression/ollama_client.py +33 -0
  13. context_engine/compression/output_rules.py +77 -0
  14. context_engine/compression/prompts.py +9 -0
  15. context_engine/compression/quality.py +37 -0
  16. context_engine/config.py +198 -0
  17. context_engine/dashboard/__init__.py +0 -0
  18. context_engine/dashboard/_page.py +1548 -0
  19. context_engine/dashboard/server.py +429 -0
  20. context_engine/editors.py +265 -0
  21. context_engine/event_bus.py +24 -0
  22. context_engine/indexer/__init__.py +0 -0
  23. context_engine/indexer/chunker.py +147 -0
  24. context_engine/indexer/embedder.py +154 -0
  25. context_engine/indexer/embedding_cache.py +168 -0
  26. context_engine/indexer/git_hooks.py +73 -0
  27. context_engine/indexer/git_indexer.py +136 -0
  28. context_engine/indexer/ignorefile.py +96 -0
  29. context_engine/indexer/manifest.py +78 -0
  30. context_engine/indexer/pipeline.py +624 -0
  31. context_engine/indexer/secrets.py +332 -0
  32. context_engine/indexer/watcher.py +109 -0
  33. context_engine/integration/__init__.py +0 -0
  34. context_engine/integration/bootstrap.py +76 -0
  35. context_engine/integration/git_context.py +132 -0
  36. context_engine/integration/mcp_server.py +1825 -0
  37. context_engine/integration/session_capture.py +306 -0
  38. context_engine/memory/__init__.py +6 -0
  39. context_engine/memory/compressor.py +344 -0
  40. context_engine/memory/db.py +922 -0
  41. context_engine/memory/extractive.py +106 -0
  42. context_engine/memory/grammar.py +419 -0
  43. context_engine/memory/hook_installer.py +258 -0
  44. context_engine/memory/hook_server.py +83 -0
  45. context_engine/memory/hooks.py +327 -0
  46. context_engine/memory/migrate.py +268 -0
  47. context_engine/models.py +96 -0
  48. context_engine/pricing.py +104 -0
  49. context_engine/project_commands.py +296 -0
  50. context_engine/retrieval/__init__.py +0 -0
  51. context_engine/retrieval/confidence.py +47 -0
  52. context_engine/retrieval/query_parser.py +105 -0
  53. context_engine/retrieval/retriever.py +199 -0
  54. context_engine/serve_http.py +208 -0
  55. context_engine/services.py +252 -0
  56. context_engine/storage/__init__.py +0 -0
  57. context_engine/storage/backend.py +39 -0
  58. context_engine/storage/fts_store.py +112 -0
  59. context_engine/storage/graph_store.py +219 -0
  60. context_engine/storage/local_backend.py +109 -0
  61. context_engine/storage/remote_backend.py +117 -0
  62. context_engine/storage/vector_store.py +357 -0
  63. context_engine/utils.py +72 -0
@@ -0,0 +1,306 @@
1
+ """Session history capture — records decisions, code areas, and Q&A for future recall."""
2
+ import json
3
+ import logging
4
+ import threading
5
+ import time
6
+ import uuid
7
+ from pathlib import Path
8
+
9
+ from context_engine.utils import atomic_write_text as _atomic_write_text
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+ # Once a project accumulates more session JSONs than this, the oldest are
14
+ # consolidated into decisions_log.json (decisions only — the durable signal)
15
+ # and the source files are removed. The most recent _PRUNE_KEEP files are
16
+ # always preserved verbatim.
17
+ _PRUNE_THRESHOLD = 100
18
+ _PRUNE_KEEP = 50
19
+ _DECISIONS_LOG_NAME = "decisions_log.json"
20
+
21
+ class SessionCapture:
22
+ """Thread-safe session log. All `_active` access goes through `_lock` so
23
+ concurrent MCP tool calls (e.g. record_decision while end_session flushes)
24
+ can't interleave a half-mutation."""
25
+
26
+ def __init__(self, sessions_dir: str) -> None:
27
+ self._sessions_dir = sessions_dir
28
+ Path(sessions_dir).mkdir(parents=True, exist_ok=True)
29
+ self._active: dict[str, dict] = {}
30
+ self._lock = threading.RLock()
31
+
32
+ def start_session(self, project_name: str) -> str:
33
+ session_id = uuid.uuid4().hex[:12]
34
+ with self._lock:
35
+ self._active[session_id] = {
36
+ "id": session_id, "project": project_name, "started_at": time.time(),
37
+ "decisions": [], "code_areas": [], "questions": [],
38
+ # touched_files: per-file count of how many times the chunk was
39
+ # surfaced or opened during the session. Auto-captured by the
40
+ # MCP server so even sessions where Claude never explicitly
41
+ # calls `record_code_area` leave a useful breadcrumb.
42
+ "touched_files": {},
43
+ }
44
+ return session_id
45
+
46
+ def record_decision(self, session_id, decision, reason):
47
+ with self._lock:
48
+ session = self._active.get(session_id)
49
+ if session:
50
+ session["decisions"].append({"decision": decision, "reason": reason, "timestamp": time.time()})
51
+
52
+ def record_code_area(self, session_id, file_path, description):
53
+ with self._lock:
54
+ session = self._active.get(session_id)
55
+ if session:
56
+ session["code_areas"].append({"file_path": file_path, "description": description, "timestamp": time.time()})
57
+
58
+ def touch_files(self, session_id, file_paths) -> None:
59
+ """Bump the touched-files counter for each path. Auto-called by the
60
+ MCP server whenever a result references a file or a chunk is opened.
61
+ Cheap (in-memory dict update); persisted on the next flush."""
62
+ if not file_paths:
63
+ return
64
+ with self._lock:
65
+ session = self._active.get(session_id)
66
+ if not session:
67
+ return
68
+ counts = session.setdefault("touched_files", {})
69
+ for fp in file_paths:
70
+ if not fp or fp.startswith("git:"):
71
+ continue
72
+ counts[fp] = counts.get(fp, 0) + 1
73
+
74
+ def get_session_snapshot(self, session_id) -> dict | None:
75
+ """Return a shallow copy of the active session for safe inspection.
76
+ Returns None if the session_id isn't in _active."""
77
+ with self._lock:
78
+ session = self._active.get(session_id)
79
+ if session is None:
80
+ return None
81
+ return dict(session)
82
+
83
+ def get_decisions(self, session_id):
84
+ with self._lock:
85
+ session = self._active.get(session_id)
86
+ # Defensive copy so the caller can iterate without holding the lock.
87
+ return list(session["decisions"]) if session else []
88
+
89
+ def get_code_areas(self, session_id):
90
+ with self._lock:
91
+ session = self._active.get(session_id)
92
+ return list(session["code_areas"]) if session else []
93
+
94
+ def end_session(self, session_id):
95
+ with self._lock:
96
+ session = self._active.pop(session_id, None)
97
+ if session:
98
+ session["ended_at"] = time.time()
99
+ file_path = Path(self._sessions_dir) / f"{session_id}.json"
100
+ _atomic_write_text(file_path, json.dumps(session, indent=2))
101
+
102
+ def load_recent_sessions(self, limit=5):
103
+ sessions_path = Path(self._sessions_dir)
104
+ files = [
105
+ f for f in sessions_path.glob("*.json")
106
+ # decisions_log.json is the consolidated archive, not a session.
107
+ if f.name != _DECISIONS_LOG_NAME
108
+ ]
109
+ files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
110
+ sessions = []
111
+ for f in files[:limit]:
112
+ try:
113
+ with open(f) as fp:
114
+ sessions.append(json.load(fp))
115
+ except (json.JSONDecodeError, OSError):
116
+ # Skip corrupt session files; don't blow up recall.
117
+ continue
118
+ return sessions
119
+
120
+ def prune_old_sessions(
121
+ self,
122
+ threshold: int = _PRUNE_THRESHOLD,
123
+ keep: int = _PRUNE_KEEP,
124
+ ) -> dict:
125
+ """Consolidate old session JSONs into decisions_log.json + delete them.
126
+
127
+ Triggered automatically at server start when there are more than
128
+ `threshold` session files; can also be run from the CLI as
129
+ `cce sessions prune`. Returns a summary dict so the caller can report.
130
+
131
+ Only the *decisions* (and their reasons + timestamps + originating
132
+ session id) survive consolidation. code_areas and questions in old
133
+ sessions are dropped — they were heuristic auto-captures and the
134
+ signal-to-noise drops fast as they age.
135
+
136
+ Uses an fcntl advisory lock on `.prune.lock` in the sessions dir so
137
+ two processes can't race the read-append-write on decisions_log.json
138
+ (last-write-wins would clobber one process's appended decisions).
139
+ On Windows fcntl is unavailable; we fall through without a lock and
140
+ accept the rare race — Windows isn't a supported deploy target today.
141
+ """
142
+ sessions_path = Path(self._sessions_dir)
143
+ sessions_path.mkdir(parents=True, exist_ok=True)
144
+ lock_path = sessions_path / ".prune.lock"
145
+ # Acquire an exclusive flock; fall back to no-op on platforms where
146
+ # fcntl isn't available so the prune still runs (just unlocked).
147
+ lock_fh = None
148
+ try:
149
+ import fcntl # POSIX only; ImportError on Windows
150
+ lock_fh = open(lock_path, "w")
151
+ try:
152
+ fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
153
+ except BlockingIOError:
154
+ # Another process is pruning right now — let it finish.
155
+ lock_fh.close()
156
+ return {"pruned": 0, "kept": -1, "reason": "another prune in progress"}
157
+ except ImportError:
158
+ lock_fh = None
159
+
160
+ try:
161
+ return self._prune_locked(sessions_path, threshold, keep)
162
+ finally:
163
+ if lock_fh is not None:
164
+ try:
165
+ import fcntl
166
+ fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
167
+ except Exception:
168
+ pass
169
+ lock_fh.close()
170
+
171
+ def _prune_locked(
172
+ self,
173
+ sessions_path: Path,
174
+ threshold: int,
175
+ keep: int,
176
+ ) -> dict:
177
+ """The actual prune work. Caller holds the cross-process flock."""
178
+ files = sorted(
179
+ (f for f in sessions_path.glob("*.json") if f.name != _DECISIONS_LOG_NAME),
180
+ key=lambda p: p.stat().st_mtime,
181
+ reverse=True,
182
+ )
183
+ if len(files) <= threshold:
184
+ return {"pruned": 0, "kept": len(files), "reason": "below threshold"}
185
+
186
+ keep_files = files[:keep]
187
+ old_files = files[keep:]
188
+
189
+ log_path = sessions_path / _DECISIONS_LOG_NAME
190
+ existing: list[dict] = []
191
+ if log_path.exists():
192
+ try:
193
+ existing = json.loads(log_path.read_text())
194
+ if not isinstance(existing, list):
195
+ existing = []
196
+ except (json.JSONDecodeError, OSError):
197
+ existing = []
198
+
199
+ appended = 0
200
+ for f in old_files:
201
+ if f == log_path:
202
+ continue
203
+ try:
204
+ data = json.loads(f.read_text())
205
+ except (json.JSONDecodeError, OSError) as exc:
206
+ log.warning("Skipping unreadable session file %s: %s", f, exc)
207
+ continue
208
+ for d in data.get("decisions", []):
209
+ existing.append({
210
+ "decision": d.get("decision", ""),
211
+ "reason": d.get("reason", ""),
212
+ "timestamp": d.get("timestamp", 0.0),
213
+ "session_id": data.get("id", ""),
214
+ })
215
+ appended += 1
216
+
217
+ try:
218
+ _atomic_write_text(log_path, json.dumps(existing, indent=2))
219
+ except OSError as exc:
220
+ log.warning("Failed to write decisions_log: %s", exc)
221
+ return {"pruned": 0, "kept": len(files), "reason": f"write failed: {exc}"}
222
+
223
+ deleted = 0
224
+ for f in old_files:
225
+ if f == log_path:
226
+ continue
227
+ try:
228
+ f.unlink()
229
+ deleted += 1
230
+ except OSError as exc:
231
+ log.warning("Failed to remove old session %s: %s", f, exc)
232
+
233
+ return {
234
+ "pruned": deleted,
235
+ "kept": len(keep_files),
236
+ "decisions_appended": appended,
237
+ "decisions_log": str(log_path),
238
+ }
239
+
240
+ def _load_consolidated_decisions(self) -> list[dict]:
241
+ """Read decisions_log.json (the consolidated archive). Returns []
242
+ when absent or unreadable — never raises."""
243
+ log_path = Path(self._sessions_dir) / _DECISIONS_LOG_NAME
244
+ if not log_path.exists():
245
+ return []
246
+ try:
247
+ data = json.loads(log_path.read_text())
248
+ return data if isinstance(data, list) else []
249
+ except (json.JSONDecodeError, OSError):
250
+ return []
251
+
252
+ def get_recent_decisions(self, limit: int = 10, session_limit: int = 50) -> list[str]:
253
+ """Return the most-recent decision strings across recent sessions.
254
+
255
+ Used by the bootstrap prompt to inject prior decisions at session
256
+ start without relying on a topic-grep that often returns nothing.
257
+ Includes any decisions in the currently active in-memory session.
258
+ Order: newest first by recorded timestamp.
259
+ """
260
+ decisions: list[tuple[float, str]] = []
261
+
262
+ # Active in-memory sessions first (may not yet be flushed to disk).
263
+ # Snapshot under the lock so a concurrent record_decision can't mutate
264
+ # the list while we're iterating it.
265
+ import copy
266
+
267
+ with self._lock:
268
+ active_snapshot = copy.deepcopy(list(self._active.values()))
269
+ for session in active_snapshot:
270
+ for d in session.get("decisions", []):
271
+ ts = d.get("timestamp", 0.0)
272
+ text = (
273
+ f"[decision] {d.get('decision', '')} — {d.get('reason', '')}"
274
+ )
275
+ decisions.append((ts, text))
276
+
277
+ for session in self.load_recent_sessions(limit=session_limit):
278
+ for d in session.get("decisions", []):
279
+ ts = d.get("timestamp", 0.0)
280
+ text = (
281
+ f"[decision] {d.get('decision', '')} — {d.get('reason', '')}"
282
+ )
283
+ decisions.append((ts, text))
284
+
285
+ # Pull from the consolidated archive as well — `prune_old_sessions`
286
+ # writes decisions there before deleting the source files, so without
287
+ # this step a recall on a long-lived project would forget anything
288
+ # past the most-recent session_limit files.
289
+ for d in self._load_consolidated_decisions():
290
+ ts = d.get("timestamp", 0.0)
291
+ text = (
292
+ f"[decision] {d.get('decision', '')} — {d.get('reason', '')}"
293
+ )
294
+ decisions.append((ts, text))
295
+
296
+ # Dedup keeping the newest occurrence of each text.
297
+ seen: set[str] = set()
298
+ ordered: list[str] = []
299
+ for _, text in sorted(decisions, key=lambda pair: pair[0], reverse=True):
300
+ if text in seen:
301
+ continue
302
+ seen.add(text)
303
+ ordered.append(text)
304
+ if len(ordered) >= limit:
305
+ break
306
+ return ordered
@@ -0,0 +1,6 @@
1
+ """Per-project memory store — SQLite tables backing cross-session recall.
2
+
3
+ This package introduces the new memory.db storage. The legacy JSON-per-session
4
+ capture path in `context_engine.integration.session_capture` continues to work
5
+ unchanged; it is retired in a follow-up PR once hooks land.
6
+ """
@@ -0,0 +1,344 @@
1
+ """Background compression worker for the memory store.
2
+
3
+ Drains `pending_compressions` rows on a fixed interval, calls the extractive
4
+ summariser for each, writes the result to `turn_summaries` (or
5
+ `sessions.rollup_summary` for kind='session_rollup'), and removes the queue
6
+ row. Failures bump the row's `attempts` and log; the row remains queued for
7
+ retry on the next pass.
8
+
9
+ Designed to run as an asyncio task inside `cce serve`. Single-flight by
10
+ construction — only one worker drains at a time.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import json
16
+ import logging
17
+ import sqlite3
18
+ import time
19
+
20
+ from context_engine.memory import db as memory_db
21
+ from context_engine.memory.extractive import extractive_summary, truncation_summary
22
+ from context_engine.memory.grammar import (
23
+ compress as _grammar_compress,
24
+ compress_with_counts as _grammar_compress_counted,
25
+ DEFAULT_LEVEL as _GRAMMAR_LEVEL,
26
+ )
27
+
28
+
29
+ def _approx_tokens(text: str) -> int:
30
+ """Cheap heuristic — chars // 4. Matches mcp_server._count_tokens so
31
+ bucket totals across writers stay comparable.
32
+ """
33
+ return max(1, len(text) // 4) if text else 0
34
+
35
+ log = logging.getLogger(__name__)
36
+
37
+ _DEFAULT_TURN_TOP_K = 3
38
+ _DEFAULT_ROLLUP_TOP_K = 5
39
+ _DEFAULT_INTERVAL_SECONDS = 5.0
40
+ _TOOL_OUTPUT_CHAR_CAP = 1500 # avoid embedding multi-MB tool outputs
41
+ _TOOL_INPUT_CHAR_CAP = 4000 # skip JSON parsing for huge tool inputs (e.g. patches)
42
+
43
+
44
+ def compress_turn(
45
+ conn: sqlite3.Connection,
46
+ *,
47
+ session_id: str,
48
+ prompt_number: int,
49
+ embedder,
50
+ ) -> str:
51
+ """Compute and persist a turn summary. Returns the summary text.
52
+
53
+ Two compression passes apply:
54
+ 1. Extractive: pick the top-K most central sentences from the turn
55
+ (the existing `_summarise` step).
56
+ 2. Grammar: drop articles/fillers from prose tokens; structured
57
+ tokens (paths, identifiers, code) survive byte-for-byte.
58
+
59
+ The returned text is the post-grammar form (what the model will see
60
+ after expand() on the read side).
61
+ """
62
+ text = _build_turn_text(conn, session_id=session_id, prompt_number=prompt_number)
63
+ raw_tokens = _approx_tokens(text)
64
+ summary, tier = _summarise(text, embedder=embedder, top_k=_DEFAULT_TURN_TOP_K)
65
+ extractive_tokens = _approx_tokens(summary)
66
+ if summary:
67
+ # Scrub PII before grammar compression — emails / IPs / SSNs that
68
+ # leaked into a turn (the user pasted a real value into a prompt
69
+ # or tool input) shouldn't end up indexed in turn_summaries.
70
+ summary = memory_db.scrub_pii(summary)
71
+ summary, gram_raw, gram_comp = _grammar_compress_counted(
72
+ summary, level=_GRAMMAR_LEVEL,
73
+ )
74
+ memory_db.record_savings(
75
+ conn, bucket="grammar", baseline=gram_raw, served=gram_comp,
76
+ )
77
+ # Turn-summarization savings: raw turn text (prompt + tool inputs/outputs)
78
+ # vs the extractive summary that ends up in turn_summaries.
79
+ if raw_tokens > 0 and extractive_tokens > 0:
80
+ memory_db.record_savings(
81
+ conn, bucket="turn_summarization",
82
+ baseline=raw_tokens, served=extractive_tokens,
83
+ meta={"kind": "turn", "tier": tier},
84
+ )
85
+ epoch = int(time.time())
86
+ cur = conn.execute(
87
+ "INSERT OR REPLACE INTO turn_summaries "
88
+ "(session_id, prompt_number, summary, tier, created_at_epoch) "
89
+ "VALUES (?, ?, ?, ?, ?)",
90
+ (session_id, prompt_number, summary, tier, epoch),
91
+ )
92
+ if summary:
93
+ memory_db.record_turn_summary_vec(
94
+ conn, embedder, turn_id=cur.lastrowid, summary=summary,
95
+ )
96
+ return summary
97
+
98
+
99
+ def compress_session_rollup(
100
+ conn: sqlite3.Connection,
101
+ *,
102
+ session_id: str,
103
+ embedder,
104
+ ) -> str:
105
+ """Compute the session rollup summary from existing turn summaries.
106
+
107
+ If a session has no turn_summaries yet (e.g. SessionEnd fired before the
108
+ worker drained any turns), we fall through to an empty rollup; the
109
+ session row is still updated so the timeline view shows it as completed.
110
+ """
111
+ rows = list(conn.execute(
112
+ "SELECT summary FROM turn_summaries WHERE session_id = ? "
113
+ "ORDER BY prompt_number ASC",
114
+ (session_id,),
115
+ ))
116
+ text = "\n".join(r["summary"] for r in rows if r["summary"])
117
+ raw_tokens = _approx_tokens(text)
118
+ if not text:
119
+ rollup = ""
120
+ tier = "empty"
121
+ else:
122
+ rollup, tier = _summarise(text, embedder=embedder, top_k=_DEFAULT_ROLLUP_TOP_K)
123
+ extractive_tokens = _approx_tokens(rollup)
124
+ # Belt-and-braces PII scrub on the rollup. Each turn summary
125
+ # already went through scrub_pii in compress_turn(), but the
126
+ # rollup is the long-lived "canonical history" view of a
127
+ # session — worth re-scrubbing in case a turn slipped through.
128
+ rollup = memory_db.scrub_pii(rollup)
129
+ # Re-pass through grammar — turn summaries are already compressed,
130
+ # so this is mostly idempotent, but extractive may concatenate
131
+ # sentences with newlines that re-introduce articles via the join
132
+ # mechanics. Cheap, makes the on-disk form consistent.
133
+ rollup, gram_raw, gram_comp = _grammar_compress_counted(
134
+ rollup, level=_GRAMMAR_LEVEL,
135
+ )
136
+ memory_db.record_savings(
137
+ conn, bucket="grammar", baseline=gram_raw, served=gram_comp,
138
+ )
139
+ if raw_tokens > 0 and extractive_tokens > 0:
140
+ memory_db.record_savings(
141
+ conn, bucket="turn_summarization",
142
+ baseline=raw_tokens, served=extractive_tokens,
143
+ meta={"kind": "session_rollup", "tier": tier},
144
+ )
145
+ epoch = int(time.time())
146
+ conn.execute(
147
+ "UPDATE sessions SET rollup_summary = ?, rollup_summary_at_epoch = ? "
148
+ "WHERE id = ?",
149
+ (rollup, epoch, session_id),
150
+ )
151
+ log.debug("session rollup tier=%s len=%d", tier, len(rollup))
152
+ return rollup
153
+
154
+
155
+ def _build_turn_text(
156
+ conn: sqlite3.Connection,
157
+ *,
158
+ session_id: str,
159
+ prompt_number: int,
160
+ ) -> str:
161
+ """Concatenate prompt + tool inputs/outputs into one big text blob."""
162
+ parts: list[str] = []
163
+
164
+ prompt = conn.execute(
165
+ "SELECT prompt_text FROM prompts WHERE session_id = ? AND prompt_number = ?",
166
+ (session_id, prompt_number),
167
+ ).fetchone()
168
+ if prompt and prompt["prompt_text"]:
169
+ parts.append(f"User: {prompt['prompt_text']}")
170
+
171
+ events = conn.execute(
172
+ "SELECT te.tool_name, p.raw_input, p.raw_output FROM tool_events te "
173
+ "LEFT JOIN tool_event_payloads p ON p.id = te.payload_id "
174
+ "WHERE te.session_id = ? AND te.prompt_number = ? "
175
+ "ORDER BY te.id ASC",
176
+ (session_id, prompt_number),
177
+ ).fetchall()
178
+
179
+ for ev in events:
180
+ descriptor = _describe_input(ev["tool_name"], ev["raw_input"] or "")
181
+ parts.append(descriptor)
182
+ out = (ev["raw_output"] or "").strip()
183
+ if out:
184
+ if len(out) > _TOOL_OUTPUT_CHAR_CAP:
185
+ out = out[:_TOOL_OUTPUT_CHAR_CAP] + "…"
186
+ parts.append(out)
187
+ return "\n".join(parts)
188
+
189
+
190
+ def _describe_input(tool_name: str, raw_input: str) -> str:
191
+ """One-line descriptor of a tool invocation for the summary candidates."""
192
+ if not raw_input:
193
+ return tool_name
194
+ # Skip JSON parsing on oversize payloads (patches, large file contents) —
195
+ # the compression worker runs on the asyncio thread and we don't want it
196
+ # spending tens of ms parsing megabytes just to format a one-liner.
197
+ if len(raw_input) > _TOOL_INPUT_CHAR_CAP:
198
+ return f"{tool_name}: {raw_input[:120]}"
199
+ try:
200
+ data = json.loads(raw_input)
201
+ except (json.JSONDecodeError, ValueError):
202
+ return f"{tool_name}: {raw_input[:120]}"
203
+ if not isinstance(data, dict):
204
+ return f"{tool_name}: {raw_input[:120]}"
205
+ # Surface common high-signal fields explicitly.
206
+ for key in ("file_path", "command", "pattern", "path", "query"):
207
+ if key in data and data[key]:
208
+ return f"{tool_name} {key}={data[key]!r}"
209
+ keys = list(data.keys())[:2]
210
+ return f"{tool_name} {keys}"
211
+
212
+
213
+ def _summarise(text: str, *, embedder, top_k: int) -> tuple[str, str]:
214
+ """Run extractive summarisation, falling back to truncation on failure."""
215
+ if not text.strip():
216
+ return "", "empty"
217
+ if embedder is None:
218
+ return truncation_summary(text), "truncation"
219
+ try:
220
+ out = extractive_summary(text, embedder=embedder, top_k=top_k)
221
+ return out, "extractive"
222
+ except Exception:
223
+ log.exception("extractive failed; falling back to truncation")
224
+ return truncation_summary(text), "truncation"
225
+
226
+
227
+ def _drain_one_sync(conn: sqlite3.Connection, embedder) -> bool:
228
+ """Pop and process the oldest pending row. Pure-sync; safe for either the
229
+ main thread (tests) or a worker thread (production via to_thread).
230
+ Returns True iff work was done.
231
+ """
232
+ row = conn.execute(
233
+ "SELECT id, kind, session_id, prompt_number, attempts FROM pending_compressions "
234
+ "ORDER BY enqueued_at_epoch ASC LIMIT 1"
235
+ ).fetchone()
236
+ if row is None:
237
+ return False
238
+ try:
239
+ if row["kind"] == "turn":
240
+ compress_turn(
241
+ conn,
242
+ session_id=row["session_id"],
243
+ prompt_number=row["prompt_number"],
244
+ embedder=embedder,
245
+ )
246
+ else:
247
+ compress_session_rollup(
248
+ conn,
249
+ session_id=row["session_id"],
250
+ embedder=embedder,
251
+ )
252
+ conn.execute("DELETE FROM pending_compressions WHERE id = ?", (row["id"],))
253
+ conn.commit()
254
+ except Exception as exc:
255
+ log.exception("Compression failed for %s/%s/%s",
256
+ row["kind"], row["session_id"], row["prompt_number"])
257
+ conn.execute(
258
+ "UPDATE pending_compressions SET attempts = attempts + 1, "
259
+ "last_error = ? WHERE id = ?",
260
+ (str(exc)[:500], row["id"]),
261
+ )
262
+ conn.commit()
263
+ return True
264
+
265
+
266
+ def _drain_one_threaded(db_path) -> bool:
267
+ """Open a worker-local connection, drain one, close. Designed to run on a
268
+ thread via `asyncio.to_thread` — that's the whole point of this function:
269
+ every byte of work below the to_thread call lives off the asyncio loop so
270
+ `mcp.run_stdio()` stays responsive even under a 50-turn backlog.
271
+ """
272
+ # Importing here avoids a circular import at module load.
273
+ from context_engine.memory import db as _memory_db
274
+ conn = _memory_db.connect(db_path)
275
+ try:
276
+ # Resolve the embedder lazily so the worker thread doesn't pin a
277
+ # cross-thread reference; the embedder is process-global anyway.
278
+ from context_engine.indexer.embedder import Embedder as _EmbedderCls # noqa: F401
279
+ # Embedder is held by the caller — see compression_loop's closure.
280
+ return _drain_one_sync(conn, _drain_one_threaded._embedder)
281
+ finally:
282
+ conn.close()
283
+
284
+
285
+ async def _drain_one(conn: sqlite3.Connection, embedder) -> bool:
286
+ """Async test-only shim around `_drain_one_sync` for tests that already
287
+ own a connection and don't want to pay the open/close round-trip.
288
+ """
289
+ return _drain_one_sync(conn, embedder)
290
+
291
+
292
+ _BACKLOG_BATCH = 5 # drain at most this many items before yielding to other tasks
293
+
294
+
295
+ async def compression_loop(
296
+ db_path,
297
+ embedder,
298
+ *,
299
+ interval_seconds: float = _DEFAULT_INTERVAL_SECONDS,
300
+ stop_event: asyncio.Event | None = None,
301
+ ) -> None:
302
+ """Run forever, draining the queue off the asyncio thread.
303
+
304
+ Each iteration runs the heavy work (embed + SQLite write) on a worker
305
+ thread via `asyncio.to_thread`, so `mcp.run_stdio()` stays responsive
306
+ under backlog. We still pace with sleep(0) per item and a 50 ms breath
307
+ every `_BACKLOG_BATCH` items to keep CPU contention bounded.
308
+
309
+ `db_path` may also be a `sqlite3.Connection` for compatibility with the
310
+ test suite, in which case we drive `_drain_one_sync` directly.
311
+ """
312
+ legacy_conn = isinstance(db_path, sqlite3.Connection)
313
+ # Stash the embedder on the function for the worker thread to read; this
314
+ # avoids passing it through asyncio.to_thread's positional plumbing while
315
+ # keeping the thread closure-free (no risk of capturing the asyncio loop).
316
+ _drain_one_threaded._embedder = embedder
317
+
318
+ consecutive = 0
319
+ while True:
320
+ if stop_event is not None and stop_event.is_set():
321
+ return
322
+ try:
323
+ if legacy_conn:
324
+ did_work = _drain_one_sync(db_path, embedder)
325
+ else:
326
+ did_work = await asyncio.to_thread(
327
+ _drain_one_threaded, db_path,
328
+ )
329
+ if did_work:
330
+ consecutive += 1
331
+ if consecutive >= _BACKLOG_BATCH:
332
+ consecutive = 0
333
+ await asyncio.sleep(0.05)
334
+ else:
335
+ await asyncio.sleep(0)
336
+ else:
337
+ consecutive = 0
338
+ await asyncio.sleep(interval_seconds)
339
+ except asyncio.CancelledError:
340
+ raise
341
+ except Exception:
342
+ log.exception("compression_loop iteration crashed; backing off")
343
+ consecutive = 0
344
+ await asyncio.sleep(interval_seconds)