code-context-control 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. cli/__init__.py +1 -0
  2. cli/_hook_utils.py +99 -0
  3. cli/c3.py +6152 -0
  4. cli/commands/__init__.py +1 -0
  5. cli/commands/common.py +312 -0
  6. cli/commands/parser.py +286 -0
  7. cli/docs.html +3178 -0
  8. cli/edits.html +878 -0
  9. cli/hook_auto_snapshot.py +142 -0
  10. cli/hook_c3_signal.py +61 -0
  11. cli/hook_c3read.py +116 -0
  12. cli/hook_edit_ledger.py +213 -0
  13. cli/hook_edit_unlock.py +170 -0
  14. cli/hook_filter.py +130 -0
  15. cli/hook_ghost_files.py +238 -0
  16. cli/hook_pretool_enforce.py +334 -0
  17. cli/hook_read.py +200 -0
  18. cli/hook_session_stats.py +62 -0
  19. cli/hook_terse_advisor.py +190 -0
  20. cli/hub.html +3764 -0
  21. cli/hub_server.py +1619 -0
  22. cli/mcp_proxy.py +428 -0
  23. cli/mcp_server.py +660 -0
  24. cli/server.py +2985 -0
  25. cli/tools/__init__.py +4 -0
  26. cli/tools/_helpers.py +65 -0
  27. cli/tools/agent.py +1165 -0
  28. cli/tools/compress.py +215 -0
  29. cli/tools/delegate.py +1184 -0
  30. cli/tools/edit.py +313 -0
  31. cli/tools/edits.py +118 -0
  32. cli/tools/filter.py +285 -0
  33. cli/tools/impact.py +163 -0
  34. cli/tools/memory.py +469 -0
  35. cli/tools/read.py +224 -0
  36. cli/tools/search.py +337 -0
  37. cli/tools/session.py +95 -0
  38. cli/tools/shell.py +193 -0
  39. cli/tools/status.py +306 -0
  40. cli/tools/validate.py +310 -0
  41. cli/ui/api.js +36 -0
  42. cli/ui/app.js +207 -0
  43. cli/ui/components/chat.js +758 -0
  44. cli/ui/components/dashboard.js +689 -0
  45. cli/ui/components/edits.js +220 -0
  46. cli/ui/components/instructions.js +481 -0
  47. cli/ui/components/memory.js +626 -0
  48. cli/ui/components/sessions.js +606 -0
  49. cli/ui/components/settings.js +1404 -0
  50. cli/ui/components/sidebar.js +156 -0
  51. cli/ui/icons.js +51 -0
  52. cli/ui/shared.js +119 -0
  53. cli/ui/theme.js +22 -0
  54. cli/ui.html +168 -0
  55. cli/ui_legacy.html +6797 -0
  56. cli/ui_nano.html +503 -0
  57. code_context_control-2.28.0.dist-info/METADATA +248 -0
  58. code_context_control-2.28.0.dist-info/RECORD +150 -0
  59. code_context_control-2.28.0.dist-info/WHEEL +5 -0
  60. code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
  61. code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
  62. code_context_control-2.28.0.dist-info/top_level.txt +5 -0
  63. core/__init__.py +75 -0
  64. core/config.py +269 -0
  65. core/ide.py +188 -0
  66. oracle/__init__.py +1 -0
  67. oracle/config.py +75 -0
  68. oracle/oracle.html +3900 -0
  69. oracle/oracle_server.py +663 -0
  70. oracle/services/__init__.py +1 -0
  71. oracle/services/c3_bridge.py +210 -0
  72. oracle/services/chat_engine.py +1103 -0
  73. oracle/services/chat_store.py +155 -0
  74. oracle/services/cross_memory.py +154 -0
  75. oracle/services/federated_graph.py +463 -0
  76. oracle/services/health_checker.py +117 -0
  77. oracle/services/insight_engine.py +307 -0
  78. oracle/services/memory_reader.py +106 -0
  79. oracle/services/memory_writer.py +182 -0
  80. oracle/services/ollama_bridge.py +332 -0
  81. oracle/services/project_scanner.py +87 -0
  82. oracle/services/review_agent.py +206 -0
  83. services/__init__.py +1 -0
  84. services/activity_log.py +93 -0
  85. services/agent_base.py +124 -0
  86. services/agents.py +1529 -0
  87. services/auto_memory.py +407 -0
  88. services/bench/__init__.py +6 -0
  89. services/bench/external/__init__.py +29 -0
  90. services/bench/external/aider_polyglot.py +405 -0
  91. services/bench/external/swe_bench.py +485 -0
  92. services/benchmark_dashboard.py +596 -0
  93. services/claude_md.py +785 -0
  94. services/compressor.py +592 -0
  95. services/context_snapshot.py +356 -0
  96. services/conversation_store.py +870 -0
  97. services/doc_index.py +537 -0
  98. services/e2e_benchmark.py +2884 -0
  99. services/e2e_evaluator.py +396 -0
  100. services/e2e_tasks.py +743 -0
  101. services/edit_ledger.py +459 -0
  102. services/embedding_index.py +341 -0
  103. services/error_reporting.py +123 -0
  104. services/file_memory.py +734 -0
  105. services/hub_service.py +585 -0
  106. services/indexer.py +712 -0
  107. services/memory.py +318 -0
  108. services/memory_consolidator.py +538 -0
  109. services/memory_graph.py +382 -0
  110. services/memory_grounder.py +304 -0
  111. services/memory_scorer.py +246 -0
  112. services/metrics.py +86 -0
  113. services/notifications.py +209 -0
  114. services/ollama_client.py +201 -0
  115. services/output_filter.py +488 -0
  116. services/parser.py +1238 -0
  117. services/project_manager.py +579 -0
  118. services/protocol.py +306 -0
  119. services/proxy_state.py +152 -0
  120. services/retrieval_broker.py +129 -0
  121. services/router.py +414 -0
  122. services/runtime.py +326 -0
  123. services/session_benchmark.py +1945 -0
  124. services/session_manager.py +1026 -0
  125. services/session_preloader.py +251 -0
  126. services/text_index.py +90 -0
  127. services/tool_classifier.py +176 -0
  128. services/transcript_index.py +340 -0
  129. services/validation_cache.py +155 -0
  130. services/vector_store.py +299 -0
  131. services/version_tracker.py +271 -0
  132. services/watcher.py +192 -0
  133. tui/__init__.py +0 -0
  134. tui/backend.py +59 -0
  135. tui/main.py +145 -0
  136. tui/screens/__init__.py +1 -0
  137. tui/screens/benchmark_view.py +109 -0
  138. tui/screens/claudemd_view.py +46 -0
  139. tui/screens/compress_view.py +52 -0
  140. tui/screens/index_view.py +74 -0
  141. tui/screens/init_view.py +82 -0
  142. tui/screens/mcp_view.py +73 -0
  143. tui/screens/optimize_view.py +41 -0
  144. tui/screens/pipe_view.py +46 -0
  145. tui/screens/projects_view.py +355 -0
  146. tui/screens/search_view.py +55 -0
  147. tui/screens/session_view.py +143 -0
  148. tui/screens/stats.py +158 -0
  149. tui/screens/ui_view.py +54 -0
  150. tui/theme.tcss +335 -0
@@ -0,0 +1,870 @@
1
+ """
2
+ ConversationStore — records and searches full conversation turns.
3
+
4
+ Storage layout under .c3/conversations/:
5
+ sessions.json — session metadata index
6
+ {session_id}.jsonl — full turn records (one JSON object per line)
7
+ {session_id}.jsonl.gz — gzip-compressed archive for old sessions
8
+
9
+ Sources:
10
+ - Claude Code: auto-synced from ~/.claude/projects/<slug>/*.jsonl
11
+ - All IDEs: manual logging via add_turn() (called by c3_convo_log MCP tool)
12
+ """
13
+
14
+ import gzip
15
+ import json
16
+ import math
17
+ import re
18
+ import time
19
+ from collections import Counter
20
+ from datetime import datetime
21
+ from pathlib import Path
22
+
23
+ from core import count_tokens
24
+ from services.text_index import TextIndex
25
+
26
+
27
+ class ConversationStore:
28
+ """Stores, indexes, and searches full conversation turns."""
29
+
30
+ COMPRESS_AFTER_DAYS = 30
31
+ MAX_TURNS_PER_SESSION = 1000
32
+ MAX_TEXT_LEN = 50000 # characters kept per turn (preserve full assistant outputs)
33
+ MAX_SEARCH_TEXT = 1200 # characters used per chunk for TF-IDF scoring
34
+ MAX_TRANSCRIPT_FILES = 100
35
+ SEARCH_CHUNK_CHARS = 1200
36
+ SEARCH_CHUNK_OVERLAP = 200
37
+
38
+ def __init__(self, project_path: str):
39
+ self.project_path = Path(project_path).resolve()
40
+ self.store_dir = self.project_path / ".c3" / "conversations"
41
+ self.store_dir.mkdir(parents=True, exist_ok=True)
42
+ self._sessions_file = self.store_dir / "sessions.json"
43
+ self._sessions: list = [] # in-memory cache, cleared on write
44
+ self._search_index = TextIndex()
45
+ self._search_meta: dict[str, dict] = {}
46
+ self._search_dirty = True
47
+
48
+ # ── Public API ──────────────────────────────────────────────────────────
49
+
50
+ def sync(self, source: str = "all", force: bool = False) -> dict:
51
+ """Sync conversations from known transcript providers and import adapters.
52
+
53
+ source:
54
+ - "all" (default): sync all supported sources
55
+ - "claude": sync Claude Code transcripts only
56
+ - "gemini": sync Gemini CLI transcripts only
57
+ - "imports": sync external import files from .c3/conversations/imports
58
+
59
+ Returns {synced, total, by_source, errors?}.
60
+ """
61
+ selected = self._normalize_source(source)
62
+ claude_dir = self._find_transcript_dir()
63
+ gemini_dir = self._find_gemini_transcript_dir()
64
+ imports_root = self.store_dir / "imports"
65
+ imports_available = imports_root.exists() and any(imports_root.rglob("*.jsonl"))
66
+
67
+ availability = {
68
+ "claude": bool(claude_dir),
69
+ "gemini": bool(gemini_dir),
70
+ "imports": bool(imports_available),
71
+ }
72
+
73
+ providers = []
74
+ warnings = []
75
+ if selected in ("all", "claude"):
76
+ if availability["claude"]:
77
+ providers.append(("claude", lambda: self._sync_claude(force=force)))
78
+ elif selected == "claude":
79
+ warnings.append("Claude transcript directory not found for this project")
80
+ elif not availability["gemini"]:
81
+ if availability["imports"]:
82
+ warnings.append("Claude transcript directory not found; synced imports source instead")
83
+ else:
84
+ warnings.append("Claude transcript directory not found; skipped claude source")
85
+
86
+ if selected in ("all", "gemini"):
87
+ if availability["gemini"]:
88
+ providers.append(("gemini", lambda: self._sync_gemini(force=force)))
89
+ elif selected == "gemini":
90
+ warnings.append("Gemini transcript directory not found for this project")
91
+
92
+ if selected in ("all", "imports"):
93
+ if availability["imports"]:
94
+ providers.append(("imports", lambda: self._sync_imports(force=force)))
95
+ elif selected == "imports":
96
+ warnings.append("No import transcripts found under .c3/conversations/imports")
97
+
98
+ total_synced = 0
99
+ by_source = {}
100
+ errors = []
101
+ for name, provider in providers:
102
+ try:
103
+ synced = int(provider() or 0)
104
+ by_source[name] = synced
105
+ total_synced += synced
106
+ except Exception as e:
107
+ by_source[name] = 0
108
+ errors.append(f"{name}: {str(e)[:120]}")
109
+
110
+ self._compress_old()
111
+ result = {
112
+ "synced": total_synced,
113
+ "total": len(self._load_sessions()),
114
+ "by_source": by_source,
115
+ "requested_source": selected,
116
+ "forced": bool(force),
117
+ "available_sources": availability,
118
+ }
119
+ if warnings:
120
+ result["warnings"] = warnings
121
+ if errors:
122
+ result["errors"] = errors
123
+ self._search_dirty = True
124
+ return result
125
+
126
+ def list_sessions(self, limit: int = 100) -> list:
127
+ """Return session metadata sorted by most recent first."""
128
+ sessions = self._load_sessions()
129
+ sessions.sort(key=lambda s: s.get("started", 0), reverse=True)
130
+ return sessions[:limit]
131
+
132
+ def get_session(self, session_id: str, offset: int = 0, limit: int = None) -> list:
133
+ """Return turn list for a session, optionally paginated."""
134
+ turns = self._read_turns(session_id)
135
+ if offset < 0:
136
+ offset = 0
137
+ if limit is None or limit <= 0:
138
+ return turns[offset:]
139
+ return turns[offset:offset + limit]
140
+
141
+ def add_turn(self, session_id: str, role: str, text: str,
142
+ tool_calls: list = None, ts: float = None, source: str = "manual") -> dict:
143
+ """Manually append a single turn to a session (for non-Claude-Code IDEs).
144
+
145
+ Creates the session metadata if it does not exist yet.
146
+ """
147
+ ts = ts or time.time()
148
+ tokens = count_tokens(text)
149
+ turn = {
150
+ "id": f"t{int(ts * 1000)}",
151
+ "ts": ts,
152
+ "role": role,
153
+ "text": text[:self.MAX_TEXT_LEN],
154
+ "tokens": tokens,
155
+ "source": self._normalize_source(source),
156
+ }
157
+ if tool_calls:
158
+ turn["tool_calls"] = tool_calls
159
+
160
+ # Append to JSONL
161
+ session_file = self.store_dir / f"{session_id}.jsonl"
162
+ with open(session_file, "a", encoding="utf-8") as f:
163
+ f.write(json.dumps(turn, ensure_ascii=False) + "\n")
164
+
165
+ # Update session index
166
+ sessions = self._load_sessions()
167
+ existing = next((s for s in sessions if s["session_id"] == session_id), None)
168
+ if existing:
169
+ existing["turns"] = existing.get("turns", 0) + 1
170
+ existing["ended"] = ts
171
+ existing["user_tokens"] = existing.get("user_tokens", 0) + (tokens if role == "user" else 0)
172
+ existing["assistant_tokens"] = existing.get("assistant_tokens", 0) + (tokens if role == "assistant" else 0)
173
+ if source and existing.get("source") in (None, "", "manual"):
174
+ existing["source"] = self._normalize_source(source)
175
+ if role == "user" and existing.get("turns", 0) == 1:
176
+ existing["title"] = text[:100].replace("\n", " ")
177
+ else:
178
+ sessions.append({
179
+ "session_id": session_id,
180
+ "title": (text[:100].replace("\n", " ") if role == "user" else session_id[:24]),
181
+ "source": self._normalize_source(source),
182
+ "source_file": None,
183
+ "source_mtime": 0,
184
+ "started": ts,
185
+ "ended": ts,
186
+ "turns": 1,
187
+ "user_tokens": tokens if role == "user" else 0,
188
+ "assistant_tokens": tokens if role == "assistant" else 0,
189
+ "compressed": False,
190
+ })
191
+ self._save_sessions(sessions)
192
+ self._search_dirty = True
193
+ return turn
194
+
195
+ def search(self, query: str, limit: int = 30, session_id: str = None) -> list:
196
+ """TF-IDF search over chunked conversation turns."""
197
+ self._ensure_search_index()
198
+ if not self._search_meta:
199
+ return []
200
+
201
+ ranked = self._search_index.search(query, top_k=max(limit * 4, 20))
202
+ results = []
203
+ seen_turns = set()
204
+ for key, score in ranked:
205
+ meta = self._search_meta.get(key)
206
+ if not meta:
207
+ continue
208
+ if session_id and meta["session_id"] != session_id:
209
+ continue
210
+ turn_key = meta["turn_key"]
211
+ if turn_key in seen_turns:
212
+ continue
213
+ seen_turns.add(turn_key)
214
+ results.append({
215
+ "session_id": meta["session_id"],
216
+ "session_title": meta["session_title"],
217
+ "source": meta["source"],
218
+ "ts": meta["ts"],
219
+ "role": meta["role"],
220
+ "text": meta["text"],
221
+ "snippet": meta["snippet"],
222
+ "tokens": meta["tokens"],
223
+ "turn_source": meta["turn_source"],
224
+ "turn_key": turn_key,
225
+ "chunk_key": key,
226
+ "chunk_index": meta["chunk_index"],
227
+ "score": round(score, 4),
228
+ })
229
+ if len(results) >= limit:
230
+ break
231
+ return results
232
+
233
+ def get_stats(self) -> dict:
234
+ """Return aggregate statistics."""
235
+ sessions = self._load_sessions()
236
+ by_source = Counter(self._normalize_source(s.get("source", "manual")) for s in sessions)
237
+ return {
238
+ "sessions": len(sessions),
239
+ "turns": sum(s.get("turns", 0) for s in sessions),
240
+ "user_tokens": sum(s.get("user_tokens", 0) for s in sessions),
241
+ "assistant_tokens": sum(s.get("assistant_tokens", 0) for s in sessions),
242
+ "compressed_sessions": sum(1 for s in sessions if s.get("compressed")),
243
+ "sources": dict(by_source),
244
+ }
245
+
246
+ # ── Transcript Parsing ─────────────────────────────────────────────────
247
+
248
+ def _find_transcript_dir(self):
249
+ """Locate Claude Code's project transcript directory."""
250
+ import re as _re
251
+ home = Path.home()
252
+ projects_dir = home / ".claude" / "projects"
253
+ if not projects_dir.exists():
254
+ return None
255
+
256
+ project_str = str(self.project_path)
257
+
258
+ # Claude Code slugifies the absolute path by replacing every
259
+ # non-alphanumeric character with '-' and stripping leading dashes.
260
+ slug = _re.sub(r"[^a-zA-Z0-9]", "-", project_str).lstrip("-")
261
+ direct = projects_dir / slug
262
+ if direct.exists():
263
+ return direct
264
+
265
+ # Fallback 1: old slug algorithm (kept for backwards compat with
266
+ # directories created by earlier versions of C3 or other tools).
267
+ old_slug = project_str.replace("\\", "--").replace("/", "--").replace(":", "").lstrip("-")
268
+ if old_slug != slug:
269
+ old_direct = projects_dir / old_slug
270
+ if old_direct.exists():
271
+ return old_direct
272
+
273
+ # Fallback 2: normalize both sides to bare alphanumerics and find the
274
+ # best-matching directory (handles edge-cases in slugification variants).
275
+ def _bare(s):
276
+ return _re.sub(r"[^a-z0-9]", "", s.lower())
277
+
278
+ target_bare = _bare(project_str)
279
+ best = None
280
+ best_len = 0
281
+ for d in projects_dir.iterdir():
282
+ if not d.is_dir():
283
+ continue
284
+ d_bare = _bare(d.name)
285
+ # Must share all alphanumeric chars of the project path
286
+ if d_bare == target_bare:
287
+ n = len(list(d.glob("*.jsonl")))
288
+ if n > best_len:
289
+ best = d
290
+ best_len = n
291
+ return best
292
+
293
+ def _sync_claude(self, force: bool = False) -> int:
294
+ """Sync transcripts from Claude Code."""
295
+ transcript_dir = self._find_transcript_dir()
296
+ if not transcript_dir:
297
+ return 0
298
+
299
+ jsonl_files = sorted(
300
+ transcript_dir.glob("*.jsonl"),
301
+ key=lambda f: f.stat().st_mtime,
302
+ reverse=True,
303
+ )[:self.MAX_TRANSCRIPT_FILES]
304
+
305
+ existing = {
306
+ f"claude::{s.get('source_file')}": s
307
+ for s in self._load_sessions()
308
+ if s.get("source_file")
309
+ }
310
+ synced = 0
311
+ for jf in jsonl_files:
312
+ try:
313
+ mtime = jf.stat().st_mtime
314
+ source_file = jf.name
315
+ existing_key = f"claude::{source_file}"
316
+ if (not force) and existing_key in existing and abs(existing[existing_key].get("source_mtime", 0) - mtime) < 1:
317
+ continue
318
+
319
+ turns = self._extract_turns(jf)
320
+ if not turns:
321
+ continue
322
+ for t in turns:
323
+ t["source"] = "claude"
324
+
325
+ session_id = jf.stem
326
+ self._upsert_synced_session(
327
+ session_id=session_id,
328
+ turns=turns,
329
+ source="claude",
330
+ source_file=source_file,
331
+ source_mtime=mtime,
332
+ )
333
+ synced += 1
334
+ except Exception:
335
+ continue
336
+ return synced
337
+
338
+ def _find_gemini_transcript_dir(self):
339
+ """Locate Gemini CLI's project transcript directory."""
340
+ import hashlib
341
+ home = Path.home()
342
+ project_str = str(self.project_path)
343
+ slug = hashlib.sha256(project_str.encode('utf-8')).hexdigest()
344
+ chats_dir = home / ".gemini" / "tmp" / slug / "chats"
345
+ if chats_dir.exists() and chats_dir.is_dir():
346
+ return chats_dir
347
+ return None
348
+
349
+ def _sync_gemini(self, force: bool = False) -> int:
350
+ """Sync transcripts from Gemini CLI."""
351
+ transcript_dir = self._find_gemini_transcript_dir()
352
+ if not transcript_dir:
353
+ return 0
354
+
355
+ json_files = sorted(
356
+ transcript_dir.glob("*.json"),
357
+ key=lambda f: f.stat().st_mtime,
358
+ reverse=True,
359
+ )[:self.MAX_TRANSCRIPT_FILES]
360
+
361
+ existing = {
362
+ f"gemini::{s.get('source_file')}": s
363
+ for s in self._load_sessions()
364
+ if s.get("source_file")
365
+ }
366
+ synced = 0
367
+ for jf in json_files:
368
+ try:
369
+ mtime = jf.stat().st_mtime
370
+ source_file = jf.name
371
+ existing_key = f"gemini::{source_file}"
372
+ if (not force) and existing_key in existing and abs(existing[existing_key].get("source_mtime", 0) - mtime) < 1:
373
+ continue
374
+
375
+ turns = self._extract_turns_gemini(jf)
376
+ if not turns:
377
+ continue
378
+
379
+ with open(jf, "r", encoding="utf-8") as f:
380
+ data = json.load(f)
381
+ session_id = data.get("sessionId", jf.stem)
382
+
383
+ self._upsert_synced_session(
384
+ session_id=session_id,
385
+ turns=turns,
386
+ source="gemini",
387
+ source_file=source_file,
388
+ source_mtime=mtime,
389
+ )
390
+ synced += 1
391
+ except Exception:
392
+ continue
393
+ return synced
394
+
395
+ def _extract_turns_gemini(self, json_path: Path) -> list:
396
+ turns = []
397
+ try:
398
+ with open(json_path, "r", encoding="utf-8") as f:
399
+ data = json.load(f)
400
+
401
+ messages = data.get("messages", [])
402
+ for i, msg in enumerate(messages):
403
+ role = "assistant" if msg.get("type") == "gemini" else msg.get("type", "user")
404
+ text = msg.get("content", "")
405
+ ts_raw = msg.get("timestamp")
406
+
407
+ ts = json_path.stat().st_mtime
408
+ if ts_raw:
409
+ try:
410
+ ts_raw = ts_raw.replace("Z", "+00:00")
411
+ dt = datetime.fromisoformat(ts_raw)
412
+ ts = dt.timestamp()
413
+ except:
414
+ pass
415
+
416
+ tokens = 0
417
+ tok_data = msg.get("tokens", {})
418
+ if isinstance(tok_data, dict):
419
+ tokens = tok_data.get("output", 0) if role == "assistant" else tok_data.get("input", 0)
420
+ if not tokens:
421
+ tokens = count_tokens(text)
422
+
423
+ turns.append({
424
+ "id": msg.get("id", f"t{int(ts * 1000)}_{i}"),
425
+ "ts": ts,
426
+ "role": role,
427
+ "text": text[:self.MAX_TEXT_LEN],
428
+ "tokens": tokens,
429
+ "source": "gemini",
430
+ "tool_calls": []
431
+ })
432
+ except Exception:
433
+ pass
434
+ return turns
435
+
436
+ def _sync_imports(self, force: bool = False) -> int:
437
+ """Sync generic JSONL imports from .c3/conversations/imports/<source>/*.jsonl."""
438
+ imports_root = self.store_dir / "imports"
439
+ if not imports_root.exists():
440
+ return 0
441
+
442
+ jsonl_files = sorted(imports_root.rglob("*.jsonl"), key=lambda f: f.stat().st_mtime, reverse=True)
443
+ existing = {
444
+ f"{self._normalize_source(s.get('source', 'manual'))}::{s.get('source_file')}": s
445
+ for s in self._load_sessions()
446
+ if s.get("source_file")
447
+ }
448
+
449
+ synced = 0
450
+ for jf in jsonl_files[:self.MAX_TRANSCRIPT_FILES]:
451
+ try:
452
+ rel = jf.relative_to(imports_root)
453
+ source = self._normalize_source(rel.parts[0] if len(rel.parts) > 1 else "imports")
454
+ mtime = jf.stat().st_mtime
455
+ source_file = str(rel).replace("\\", "/")
456
+ existing_key = f"{source}::{source_file}"
457
+ if (not force) and existing_key in existing and abs(existing[existing_key].get("source_mtime", 0) - mtime) < 1:
458
+ continue
459
+
460
+ turns = self._extract_turns_generic(jf, source=source)
461
+ if not turns:
462
+ continue
463
+
464
+ # Keep source namespaced to avoid collisions with Claude stem ids.
465
+ session_id = f"{source}_{jf.stem}"
466
+ self._upsert_synced_session(
467
+ session_id=session_id,
468
+ turns=turns,
469
+ source=source,
470
+ source_file=source_file,
471
+ source_mtime=mtime,
472
+ )
473
+ synced += 1
474
+ except Exception:
475
+ continue
476
+ return synced
477
+
478
+ def _upsert_synced_session(self, session_id: str, turns: list, source: str, source_file: str, source_mtime: float):
479
+ """Write synced turns and upsert session metadata."""
480
+ first_user = next((t.get("text", "") for t in turns if t.get("role") == "user"), "")
481
+ title = (first_user[:100].strip() or session_id[:24]).replace("\n", " ")
482
+ user_tok = sum(t.get("tokens", 0) for t in turns if t.get("role") == "user")
483
+ asst_tok = sum(t.get("tokens", 0) for t in turns if t.get("role") == "assistant")
484
+
485
+ meta = {
486
+ "session_id": session_id,
487
+ "title": title,
488
+ "source": self._normalize_source(source),
489
+ "source_file": source_file,
490
+ "source_mtime": source_mtime,
491
+ "started": turns[0]["ts"] if turns else time.time(),
492
+ "ended": turns[-1]["ts"] if turns else time.time(),
493
+ "turns": len(turns),
494
+ "user_tokens": user_tok,
495
+ "assistant_tokens": asst_tok,
496
+ "compressed": False,
497
+ }
498
+ self._write_turns(session_id, turns)
499
+ self._upsert_session(meta)
500
+
501
+ def _extract_turns(self, jsonl_path: Path) -> list:
502
+ """Parse a Claude Code JSONL file into a list of turn dicts."""
503
+ entries = []
504
+ try:
505
+ with open(jsonl_path, encoding="utf-8", errors="replace") as f:
506
+ for line in f:
507
+ line = line.strip()
508
+ if not line:
509
+ continue
510
+ try:
511
+ entries.append(json.loads(line))
512
+ except json.JSONDecodeError:
513
+ continue
514
+ except Exception:
515
+ return []
516
+
517
+ turns = []
518
+ turn_num = 0
519
+ for entry in entries:
520
+ etype = entry.get("type", "")
521
+ if etype in ("progress", "file-history-snapshot", "system"):
522
+ continue
523
+
524
+ role = entry.get("role", "")
525
+ msg = entry.get("message", {})
526
+ if isinstance(msg, dict):
527
+ role = role or msg.get("role", "")
528
+
529
+ if role not in ("user", "assistant"):
530
+ continue
531
+
532
+ text, tool_calls = self._extract_content(entry)
533
+ if not text and not tool_calls:
534
+ continue
535
+
536
+ turn_num += 1
537
+ ts_raw = entry.get("timestamp", "")
538
+ t = {
539
+ "id": f"t{turn_num:04d}",
540
+ "ts": self._parse_ts(ts_raw),
541
+ "role": role,
542
+ "text": text[:self.MAX_TEXT_LEN],
543
+ "tokens": count_tokens(text),
544
+ }
545
+ if tool_calls:
546
+ t["tool_calls"] = tool_calls
547
+ turns.append(t)
548
+
549
+ if len(turns) >= self.MAX_TURNS_PER_SESSION:
550
+ break
551
+
552
+ return turns
553
+
554
+ def _extract_turns_generic(self, jsonl_path: Path, source: str = "imports") -> list:
555
+ """Parse generic JSONL transcripts from non-Claude systems.
556
+
557
+ Supported line shapes:
558
+ - {"role":"user|assistant","text":"...","ts":...}
559
+ - {"role":"...","content":"..."}
560
+ - {"message":{"role":"...","content":"..."},"timestamp":"..."}
561
+ - {"content":[{"type":"text","text":"..."}], "role":"..."}
562
+ """
563
+ entries = []
564
+ try:
565
+ with open(jsonl_path, encoding="utf-8", errors="replace") as f:
566
+ for line in f:
567
+ line = line.strip()
568
+ if not line:
569
+ continue
570
+ try:
571
+ entries.append(json.loads(line))
572
+ except json.JSONDecodeError:
573
+ continue
574
+ except Exception:
575
+ return []
576
+
577
+ turns = []
578
+ turn_num = 0
579
+ for entry in entries:
580
+ if not isinstance(entry, dict):
581
+ continue
582
+ role = (entry.get("role") or "").strip().lower()
583
+ msg = entry.get("message", {})
584
+ if isinstance(msg, dict):
585
+ role = role or (msg.get("role") or "").strip().lower()
586
+ if role not in ("user", "assistant"):
587
+ continue
588
+
589
+ text, tool_calls = self._extract_content(entry)
590
+ if not text and isinstance(entry.get("text"), str):
591
+ text = entry.get("text", "")
592
+ if not text and isinstance(msg, dict):
593
+ mcontent = msg.get("content")
594
+ if isinstance(mcontent, str):
595
+ text = mcontent
596
+
597
+ if not text and not tool_calls:
598
+ continue
599
+
600
+ turn_num += 1
601
+ ts_raw = entry.get("ts", entry.get("timestamp", ""))
602
+ t = {
603
+ "id": f"t{turn_num:04d}",
604
+ "ts": self._parse_ts(ts_raw),
605
+ "role": role,
606
+ "text": (text or "")[:self.MAX_TEXT_LEN],
607
+ "tokens": count_tokens(text or ""),
608
+ "source": self._normalize_source(source),
609
+ }
610
+ if tool_calls:
611
+ t["tool_calls"] = tool_calls
612
+ turns.append(t)
613
+ if len(turns) >= self.MAX_TURNS_PER_SESSION:
614
+ break
615
+ return turns
616
+
617
+ def _extract_content(self, entry: dict):
618
+ """Return (text_str, tool_calls_list) from a transcript entry."""
619
+ parts = []
620
+ tool_calls = []
621
+
622
+ content = entry.get("content", "")
623
+ msg = entry.get("message", {})
624
+ if isinstance(msg, dict):
625
+ content = content or msg.get("content", "")
626
+
627
+ if isinstance(content, str) and content:
628
+ parts.append(content)
629
+ elif isinstance(content, list):
630
+ for block in content:
631
+ if not isinstance(block, dict):
632
+ if isinstance(block, str):
633
+ parts.append(block)
634
+ continue
635
+ btype = block.get("type", "")
636
+ if btype == "text":
637
+ t = block.get("text", "")
638
+ if t:
639
+ parts.append(t)
640
+ elif btype == "tool_use":
641
+ name = block.get("name", "")
642
+ inp = block.get("input", {})
643
+ args_str = ""
644
+ if isinstance(inp, dict):
645
+ args_str = " ".join(
646
+ f"{k}={str(v)[:80]}" for k, v in list(inp.items())[:4]
647
+ )[:160]
648
+ tool_calls.append({"tool": name, "args": args_str})
649
+ # Skip tool_result and thinking blocks
650
+
651
+ # Preserve line boundaries so markdown lists/checkboxes remain parseable in UI.
652
+ return "\n".join(p for p in parts if p).strip(), tool_calls
653
+
654
+ @staticmethod
655
+ def _parse_ts(ts_raw) -> float:
656
+ if isinstance(ts_raw, (int, float)):
657
+ return float(ts_raw)
658
+ if isinstance(ts_raw, str) and ts_raw:
659
+ try:
660
+ dt = datetime.fromisoformat(ts_raw.replace("Z", "+00:00"))
661
+ return dt.timestamp()
662
+ except Exception:
663
+ pass
664
+ return time.time()
665
+
666
+ # ── Storage ────────────────────────────────────────────────────────────
667
+
668
+ def _write_turns(self, session_id: str, turns: list):
669
+ path = self.store_dir / f"{session_id}.jsonl"
670
+ with open(path, "w", encoding="utf-8") as f:
671
+ for t in turns:
672
+ f.write(json.dumps(t, ensure_ascii=False) + "\n")
673
+ self._search_dirty = True
674
+
675
+ def _read_turns(self, session_id: str) -> list:
676
+ gz_path = self.store_dir / f"{session_id}.jsonl.gz"
677
+ plain_path = self.store_dir / f"{session_id}.jsonl"
678
+ turns = []
679
+
680
+ source = None
681
+ if gz_path.exists():
682
+ source = gz_path
683
+ opener = lambda p: gzip.open(p, "rt", encoding="utf-8")
684
+ elif plain_path.exists():
685
+ source = plain_path
686
+ opener = lambda p: open(p, encoding="utf-8")
687
+
688
+ if source:
689
+ try:
690
+ with opener(source) as f:
691
+ for line in f:
692
+ line = line.strip()
693
+ if line:
694
+ try:
695
+ turns.append(json.loads(line))
696
+ except Exception:
697
+ pass
698
+ except Exception:
699
+ pass
700
+
701
+ return turns
702
+
703
+ def _compress_old(self):
704
+ """gzip-compress session files older than COMPRESS_AFTER_DAYS."""
705
+ cutoff = time.time() - self.COMPRESS_AFTER_DAYS * 86400
706
+ sessions = self._load_sessions()
707
+ changed = False
708
+ for s in sessions:
709
+ if s.get("compressed") or s.get("ended", 0) > cutoff:
710
+ continue
711
+ sid = s["session_id"]
712
+ plain = self.store_dir / f"{sid}.jsonl"
713
+ gz = self.store_dir / f"{sid}.jsonl.gz"
714
+ if not plain.exists() or gz.exists():
715
+ continue
716
+ try:
717
+ with open(plain, "rb") as fin, gzip.open(gz, "wb") as fout:
718
+ fout.write(fin.read())
719
+ plain.unlink()
720
+ s["compressed"] = True
721
+ changed = True
722
+ except Exception:
723
+ pass
724
+ if changed:
725
+ self._save_sessions(sessions)
726
+ self._search_dirty = True
727
+
728
+ def _upsert_session(self, meta: dict):
729
+ sessions = self._load_sessions()
730
+ for i, s in enumerate(sessions):
731
+ if s["session_id"] == meta["session_id"]:
732
+ sessions[i] = meta
733
+ self._save_sessions(sessions)
734
+ return
735
+ sessions.append(meta)
736
+ self._save_sessions(sessions)
737
+
738
+ def _load_sessions(self) -> list:
739
+ if self._sessions:
740
+ return self._sessions
741
+ if self._sessions_file.exists():
742
+ try:
743
+ with open(self._sessions_file, encoding="utf-8") as f:
744
+ loaded = json.load(f)
745
+ changed = False
746
+ for s in loaded:
747
+ src = s.get("source")
748
+ if not src:
749
+ src = "claude" if s.get("source_file") else "manual"
750
+ s["source"] = src
751
+ changed = True
752
+ norm = self._normalize_source(src)
753
+ if norm != src:
754
+ s["source"] = norm
755
+ changed = True
756
+ self._sessions = loaded
757
+ if changed:
758
+ self._save_sessions(self._sessions)
759
+ return self._sessions
760
+ except Exception:
761
+ pass
762
+ self._sessions = []
763
+ return self._sessions
764
+
765
+ def _ensure_search_index(self):
766
+ if not self._search_dirty:
767
+ return
768
+
769
+ docs = {}
770
+ meta = {}
771
+ for session in self._load_sessions():
772
+ sid = session.get("session_id", "")
773
+ if not sid:
774
+ continue
775
+ try:
776
+ turns = self._read_turns(sid)
777
+ except Exception:
778
+ continue
779
+ for turn in turns:
780
+ turn_key = f"{sid}:{turn.get('id', '')}"
781
+ for chunk_index, snippet in enumerate(self._chunk_text(turn.get("text", ""))):
782
+ chunk_key = f"{turn_key}:{chunk_index}"
783
+ docs[chunk_key] = snippet[:self.MAX_SEARCH_TEXT]
784
+ meta[chunk_key] = {
785
+ "turn_key": turn_key,
786
+ "session_id": sid,
787
+ "session_title": session.get("title", ""),
788
+ "source": session.get("source", "manual"),
789
+ "ts": turn.get("ts", 0),
790
+ "role": turn.get("role", ""),
791
+ "text": turn.get("text", ""),
792
+ "snippet": snippet,
793
+ "tokens": turn.get("tokens", 0),
794
+ "turn_source": turn.get("source", session.get("source", "manual")),
795
+ "chunk_index": chunk_index,
796
+ }
797
+ self._search_index.rebuild(docs)
798
+ self._search_meta = meta
799
+ self._search_dirty = False
800
+
801
+ def _save_sessions(self, sessions: list):
802
+ self._sessions = sessions
803
+ with open(self._sessions_file, "w", encoding="utf-8") as f:
804
+ json.dump(sessions, f, ensure_ascii=False, indent=2)
805
+ self._search_dirty = True
806
+
807
+ def _chunk_text(self, text: str) -> list[str]:
808
+ text = (text or "").strip()
809
+ if not text:
810
+ return []
811
+ if len(text) <= self.SEARCH_CHUNK_CHARS:
812
+ return [text]
813
+
814
+ chunks = []
815
+ step = max(1, self.SEARCH_CHUNK_CHARS - self.SEARCH_CHUNK_OVERLAP)
816
+ start = 0
817
+ while start < len(text):
818
+ end = min(len(text), start + self.SEARCH_CHUNK_CHARS)
819
+ chunks.append(text[start:end])
820
+ if end >= len(text):
821
+ break
822
+ start += step
823
+ return chunks
824
+
825
+ # ── TF-IDF Search ──────────────────────────────────────────────────────
826
+
827
+ def _tfidf_search(self, query: str, docs: dict, top_k: int) -> list:
828
+ q_terms = Counter(self._tokenize(query))
829
+ N = len(docs)
830
+ if N == 0 or not q_terms:
831
+ return []
832
+
833
+ # Build document-frequency table
834
+ df: Counter = Counter()
835
+ tok_docs: dict = {}
836
+ for key, text in docs.items():
837
+ terms = self._tokenize(text)
838
+ tok_docs[key] = Counter(terms)
839
+ for t in set(terms):
840
+ df[t] += 1
841
+
842
+ scores: dict = {}
843
+ for key, term_counts in tok_docs.items():
844
+ total = sum(term_counts.values()) or 1
845
+ score = 0.0
846
+ for term, q_count in q_terms.items():
847
+ tf = term_counts.get(term, 0) / total
848
+ idf = math.log((N + 1) / (df.get(term, 0) + 1)) + 1
849
+ score += tf * idf * q_count
850
+ if score > 0:
851
+ scores[key] = score
852
+
853
+ return sorted(scores.items(), key=lambda x: x[1], reverse=True)[:top_k]
854
+
855
+ @staticmethod
856
+ def _tokenize(text: str) -> list:
857
+ return re.findall(r"[a-z0-9_]+", text.lower())
858
+
859
+ @staticmethod
860
+ def _normalize_source(source: str) -> str:
861
+ raw = (source or "manual").strip().lower()
862
+ if not raw:
863
+ return "manual"
864
+ aliases = {
865
+ "transcript": "claude",
866
+ "claude-code": "claude",
867
+ "claude_code": "claude",
868
+ "mcp": "manual",
869
+ }
870
+ return aliases.get(raw, raw)