code-context-control 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. cli/__init__.py +1 -0
  2. cli/_hook_utils.py +99 -0
  3. cli/c3.py +6152 -0
  4. cli/commands/__init__.py +1 -0
  5. cli/commands/common.py +312 -0
  6. cli/commands/parser.py +286 -0
  7. cli/docs.html +3178 -0
  8. cli/edits.html +878 -0
  9. cli/hook_auto_snapshot.py +142 -0
  10. cli/hook_c3_signal.py +61 -0
  11. cli/hook_c3read.py +116 -0
  12. cli/hook_edit_ledger.py +213 -0
  13. cli/hook_edit_unlock.py +170 -0
  14. cli/hook_filter.py +130 -0
  15. cli/hook_ghost_files.py +238 -0
  16. cli/hook_pretool_enforce.py +334 -0
  17. cli/hook_read.py +200 -0
  18. cli/hook_session_stats.py +62 -0
  19. cli/hook_terse_advisor.py +190 -0
  20. cli/hub.html +3764 -0
  21. cli/hub_server.py +1619 -0
  22. cli/mcp_proxy.py +428 -0
  23. cli/mcp_server.py +660 -0
  24. cli/server.py +2985 -0
  25. cli/tools/__init__.py +4 -0
  26. cli/tools/_helpers.py +65 -0
  27. cli/tools/agent.py +1165 -0
  28. cli/tools/compress.py +215 -0
  29. cli/tools/delegate.py +1184 -0
  30. cli/tools/edit.py +313 -0
  31. cli/tools/edits.py +118 -0
  32. cli/tools/filter.py +285 -0
  33. cli/tools/impact.py +163 -0
  34. cli/tools/memory.py +469 -0
  35. cli/tools/read.py +224 -0
  36. cli/tools/search.py +337 -0
  37. cli/tools/session.py +95 -0
  38. cli/tools/shell.py +193 -0
  39. cli/tools/status.py +306 -0
  40. cli/tools/validate.py +310 -0
  41. cli/ui/api.js +36 -0
  42. cli/ui/app.js +207 -0
  43. cli/ui/components/chat.js +758 -0
  44. cli/ui/components/dashboard.js +689 -0
  45. cli/ui/components/edits.js +220 -0
  46. cli/ui/components/instructions.js +481 -0
  47. cli/ui/components/memory.js +626 -0
  48. cli/ui/components/sessions.js +606 -0
  49. cli/ui/components/settings.js +1404 -0
  50. cli/ui/components/sidebar.js +156 -0
  51. cli/ui/icons.js +51 -0
  52. cli/ui/shared.js +119 -0
  53. cli/ui/theme.js +22 -0
  54. cli/ui.html +168 -0
  55. cli/ui_legacy.html +6797 -0
  56. cli/ui_nano.html +503 -0
  57. code_context_control-2.28.0.dist-info/METADATA +248 -0
  58. code_context_control-2.28.0.dist-info/RECORD +150 -0
  59. code_context_control-2.28.0.dist-info/WHEEL +5 -0
  60. code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
  61. code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
  62. code_context_control-2.28.0.dist-info/top_level.txt +5 -0
  63. core/__init__.py +75 -0
  64. core/config.py +269 -0
  65. core/ide.py +188 -0
  66. oracle/__init__.py +1 -0
  67. oracle/config.py +75 -0
  68. oracle/oracle.html +3900 -0
  69. oracle/oracle_server.py +663 -0
  70. oracle/services/__init__.py +1 -0
  71. oracle/services/c3_bridge.py +210 -0
  72. oracle/services/chat_engine.py +1103 -0
  73. oracle/services/chat_store.py +155 -0
  74. oracle/services/cross_memory.py +154 -0
  75. oracle/services/federated_graph.py +463 -0
  76. oracle/services/health_checker.py +117 -0
  77. oracle/services/insight_engine.py +307 -0
  78. oracle/services/memory_reader.py +106 -0
  79. oracle/services/memory_writer.py +182 -0
  80. oracle/services/ollama_bridge.py +332 -0
  81. oracle/services/project_scanner.py +87 -0
  82. oracle/services/review_agent.py +206 -0
  83. services/__init__.py +1 -0
  84. services/activity_log.py +93 -0
  85. services/agent_base.py +124 -0
  86. services/agents.py +1529 -0
  87. services/auto_memory.py +407 -0
  88. services/bench/__init__.py +6 -0
  89. services/bench/external/__init__.py +29 -0
  90. services/bench/external/aider_polyglot.py +405 -0
  91. services/bench/external/swe_bench.py +485 -0
  92. services/benchmark_dashboard.py +596 -0
  93. services/claude_md.py +785 -0
  94. services/compressor.py +592 -0
  95. services/context_snapshot.py +356 -0
  96. services/conversation_store.py +870 -0
  97. services/doc_index.py +537 -0
  98. services/e2e_benchmark.py +2884 -0
  99. services/e2e_evaluator.py +396 -0
  100. services/e2e_tasks.py +743 -0
  101. services/edit_ledger.py +459 -0
  102. services/embedding_index.py +341 -0
  103. services/error_reporting.py +123 -0
  104. services/file_memory.py +734 -0
  105. services/hub_service.py +585 -0
  106. services/indexer.py +712 -0
  107. services/memory.py +318 -0
  108. services/memory_consolidator.py +538 -0
  109. services/memory_graph.py +382 -0
  110. services/memory_grounder.py +304 -0
  111. services/memory_scorer.py +246 -0
  112. services/metrics.py +86 -0
  113. services/notifications.py +209 -0
  114. services/ollama_client.py +201 -0
  115. services/output_filter.py +488 -0
  116. services/parser.py +1238 -0
  117. services/project_manager.py +579 -0
  118. services/protocol.py +306 -0
  119. services/proxy_state.py +152 -0
  120. services/retrieval_broker.py +129 -0
  121. services/router.py +414 -0
  122. services/runtime.py +326 -0
  123. services/session_benchmark.py +1945 -0
  124. services/session_manager.py +1026 -0
  125. services/session_preloader.py +251 -0
  126. services/text_index.py +90 -0
  127. services/tool_classifier.py +176 -0
  128. services/transcript_index.py +340 -0
  129. services/validation_cache.py +155 -0
  130. services/vector_store.py +299 -0
  131. services/version_tracker.py +271 -0
  132. services/watcher.py +192 -0
  133. tui/__init__.py +0 -0
  134. tui/backend.py +59 -0
  135. tui/main.py +145 -0
  136. tui/screens/__init__.py +1 -0
  137. tui/screens/benchmark_view.py +109 -0
  138. tui/screens/claudemd_view.py +46 -0
  139. tui/screens/compress_view.py +52 -0
  140. tui/screens/index_view.py +74 -0
  141. tui/screens/init_view.py +82 -0
  142. tui/screens/mcp_view.py +73 -0
  143. tui/screens/optimize_view.py +41 -0
  144. tui/screens/pipe_view.py +46 -0
  145. tui/screens/projects_view.py +355 -0
  146. tui/screens/search_view.py +55 -0
  147. tui/screens/session_view.py +143 -0
  148. tui/screens/stats.py +158 -0
  149. tui/screens/ui_view.py +54 -0
  150. tui/theme.tcss +335 -0
@@ -0,0 +1,734 @@
1
+ """File Memory Store — persistent structural index of source files.
2
+
3
+ Maintains per-file records with section maps (classes, functions, imports)
4
+ and exact line ranges so Claude can do targeted reads with offset/limit
5
+ instead of reading entire files.
6
+
7
+ Storage: .c3/file_memory/ directory, one JSON file per source file.
8
+ """
9
+ import hashlib
10
+ import json
11
+ import re
12
+ import time
13
+ from pathlib import Path
14
+ from typing import Optional
15
+
16
+ from services.compressor import STRUCTURE_PATTERNS
17
+ from services.parser import PARSER_VERSION, extract_sections_ast
18
+ from services.text_index import TextIndex
19
+
20
+ # Extensions we know how to extract structure from
21
+ CODE_EXTENSIONS = {'.py', '.js', '.ts', '.tsx', '.jsx', '.r', '.R',
22
+ '.go', '.rs', '.java', '.rb', '.c', '.cpp', '.h', '.cs',
23
+ '.html', '.htm', '.md', '.css', '.json', '.yaml', '.yml'}
24
+
25
+ # Language detection by extension
26
+ LANG_MAP = {
27
+ '.py': 'python', '.js': 'javascript', '.ts': 'typescript',
28
+ '.tsx': 'typescript', '.jsx': 'javascript', '.r': 'R', '.R': 'R',
29
+ '.go': 'go', '.rs': 'rust', '.java': 'java', '.rb': 'ruby',
30
+ '.c': 'c', '.cpp': 'cpp', '.h': 'c', '.cs': 'csharp',
31
+ '.html': 'html', '.htm': 'html', '.md': 'markdown', '.css': 'css',
32
+ '.json': 'json', '.yaml': 'yaml', '.yml': 'yaml',
33
+ }
34
+
35
+
36
+ class FileMemoryStore:
37
+ """Persistent structural index of source files."""
38
+
39
+ def __init__(self, project_path: str):
40
+ self.project_path = Path(project_path)
41
+ self.store_dir = self.project_path / ".c3" / "file_memory"
42
+ self.store_dir.mkdir(parents=True, exist_ok=True)
43
+ self._queue_state_path = self.store_dir / "_queue.json"
44
+ self._diag_path = self.store_dir / "_diagnostics.jsonl"
45
+ self._map_cache = {}
46
+ self._search_index = TextIndex()
47
+ self._rebuild_search_index()
48
+
49
+ def get(self, rel_path: str) -> Optional[dict]:
50
+ """Load a file's memory record, or None if not tracked."""
51
+ store_file = self._store_path(rel_path)
52
+ if not store_file.exists():
53
+ return None
54
+ try:
55
+ with open(store_file, encoding="utf-8") as f:
56
+ return json.load(f)
57
+ except Exception:
58
+ return None
59
+
60
+ def update(self, rel_path: str, ai_summary: str = None) -> Optional[dict]:
61
+ """Re-extract sections from file and persist the record.
62
+
63
+ Returns the updated record, or None if the file doesn't exist.
64
+ """
65
+ full_path = self.project_path / rel_path
66
+ if not full_path.exists():
67
+ return None
68
+
69
+ try:
70
+ stat = full_path.stat()
71
+ except Exception:
72
+ return None
73
+
74
+ try:
75
+ content = full_path.read_text(encoding="utf-8", errors="replace")
76
+ except Exception:
77
+ return None
78
+
79
+ ext = full_path.suffix.lower()
80
+ lines = content.splitlines()
81
+ content_hash = hashlib.md5(content.encode()).hexdigest()
82
+
83
+ # Check if we already have an up-to-date record
84
+ existing = self.get(rel_path)
85
+ if existing and existing.get("content_hash") == content_hash:
86
+ # If it was a generic "full file" but now we have structural tools, force update
87
+ was_generic = len(existing.get("sections", [])) <= 1 and existing.get("sections", [{}])[0].get("name") == "(full file)"
88
+ # Also force re-extraction when the parser logic has been bumped
89
+ stale_parser = existing.get("parser_version") != PARSER_VERSION
90
+ if not ((was_generic and ext in CODE_EXTENSIONS) or stale_parser):
91
+ # Only update AI summary if provided and different
92
+ if ai_summary and existing.get("summary") != ai_summary:
93
+ existing["summary"] = ai_summary
94
+ existing["updated_at"] = time.strftime("%Y-%m-%dT%H:%M:%S")
95
+ self._save(rel_path, existing)
96
+ self._search_index.add_or_update(rel_path, self._search_doc(existing))
97
+ self._cache_map(rel_path, existing)
98
+ return existing
99
+ # If we are here, we are forcing a fresh extraction
100
+
101
+ sections = self._extract_sections(full_path, content)
102
+
103
+ record = {
104
+ "path": rel_path,
105
+ "content_hash": content_hash,
106
+ "updated_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
107
+ "lines": len(lines),
108
+ "size_bytes": stat.st_size,
109
+ "mtime_ns": getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000)),
110
+ "language": LANG_MAP.get(ext, ext.lstrip('.')),
111
+ "summary": ai_summary or (existing.get("summary") if existing else None),
112
+ "parser_version": PARSER_VERSION,
113
+ "sections": sections,
114
+ }
115
+
116
+ self._save(rel_path, record)
117
+ self._cache_map(rel_path, record)
118
+ self._search_index.add_or_update(rel_path, self._search_doc(record))
119
+ return record
120
+
121
+ def get_map(self, rel_path: str) -> Optional[str]:
122
+ """Return a formatted structural map for Claude consumption.
123
+
124
+ Returns None if no record exists. Call update() first to ensure fresh data.
125
+ """
126
+ record = self.get(rel_path)
127
+ if not record:
128
+ return None
129
+ return self._cache_map(rel_path, record)
130
+
131
+ def get_or_build_map(self, rel_path: str) -> str:
132
+ """Get map if cached, otherwise build it on-demand."""
133
+ record = self.get(rel_path)
134
+
135
+ # Check staleness
136
+ if record and not self.needs_update(rel_path):
137
+ return self._cache_map(rel_path, record)
138
+
139
+ # Build fresh
140
+ updated = self.update(rel_path)
141
+ if updated:
142
+ return self._cache_map(rel_path, updated)
143
+
144
+ return f"[file_map] Could not build map for {rel_path} — file not found or unreadable."
145
+
146
+ def get_or_build_dense_map(self, rel_path: str) -> str:
147
+ """Get a compact single-line-per-symbol map (4b). Saves ~40% tokens vs full map."""
148
+ record = self.get(rel_path)
149
+ if record and not self.needs_update(rel_path):
150
+ return self._format_dense_map(record)
151
+ updated = self.update(rel_path)
152
+ if updated:
153
+ return self._format_dense_map(updated)
154
+ return f"[file_map] Could not build map for {rel_path} — file not found or unreadable."
155
+
156
+ def needs_update(self, rel_path: str) -> bool:
157
+ """True if the file has changed since we last indexed it."""
158
+ record = self.get(rel_path)
159
+ if not record:
160
+ return True
161
+
162
+ full_path = self.project_path / rel_path
163
+ if not full_path.exists():
164
+ return False
165
+
166
+ try:
167
+ stat = full_path.stat()
168
+ current_mtime_ns = getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000))
169
+ if (
170
+ record.get("mtime_ns") == current_mtime_ns
171
+ and record.get("size_bytes") == stat.st_size
172
+ ):
173
+ return False
174
+ content = full_path.read_text(encoding="utf-8", errors="replace")
175
+ current_hash = hashlib.md5(content.encode()).hexdigest()
176
+ return current_hash != record.get("content_hash")
177
+ except Exception:
178
+ return True
179
+
180
+ def get_symbol_ranges(self, rel_path: str, symbol_names: list[str], return_matches: bool = False) -> list:
181
+ """Resolve symbol names to line ranges (1-indexed).
182
+ Supports exact match and substring/partial match (e.g. 'handle_req' matches 'handle_request_data').
183
+ Supports exact regex if anchored (e.g. '^cmd_benchmark$').
184
+ """
185
+ record = self.get(rel_path)
186
+ if not record or "sections" not in record:
187
+ return []
188
+
189
+ ranges = []
190
+ matches = []
191
+
192
+ # Pre-compile regexes
193
+ compiled_targets = []
194
+ for name in symbol_names:
195
+ if name.startswith('^') and name.endswith('$'):
196
+ try:
197
+ compiled_targets.append((name, re.compile(name, re.IGNORECASE)))
198
+ except Exception:
199
+ compiled_targets.append((name, name.lower()))
200
+ elif name in ('<main>', '<globals>', '<imports>'):
201
+ compiled_targets.append((name, name))
202
+ else:
203
+ compiled_targets.append((name, name.lower()))
204
+
205
+ def _matches(section_name: str, target_data) -> bool:
206
+ orig_name, target = target_data
207
+ sn = section_name.lower()
208
+ if isinstance(target, re.Pattern):
209
+ return bool(target.match(section_name))
210
+ if orig_name in ('<main>', '<globals>', '<imports>'):
211
+ return False # Handled separately if needed, or matched below if actually named that
212
+ if sn == target:
213
+ return True
214
+ # Substring match
215
+ if target in sn or sn in target:
216
+ return True
217
+ return False
218
+
219
+ def search_sections(sections):
220
+ for sec in sections:
221
+ sec_name = sec.get("name", "")
222
+ for target_data in compiled_targets:
223
+ if _matches(sec_name, target_data):
224
+ ranges.append((sec["line_start"], sec["line_end"]))
225
+ matches.append({"target": target_data[0], "match": sec_name, "range": (sec["line_start"], sec["line_end"])})
226
+ # Don't break here, let it find all matches for this section if multiple targets apply
227
+ # But wait, if one target matches, we don't want to add the section multiple times for the same target
228
+ # We'll deduplicate later
229
+
230
+ if "children" in sec:
231
+ search_sections(sec["children"])
232
+
233
+ search_sections(record["sections"])
234
+
235
+ # Deduplicate matches
236
+ unique_matches = []
237
+ seen = set()
238
+ for m in matches:
239
+ key = (m["target"], m["match"], m["range"])
240
+ if key not in seen:
241
+ seen.add(key)
242
+ unique_matches.append(m)
243
+
244
+ unique_ranges = list(set(ranges))
245
+
246
+ if return_matches:
247
+ return unique_matches
248
+ return unique_ranges
249
+
250
+ def list_tracked(self) -> list:
251
+ """Return relative paths of all tracked files."""
252
+ tracked = []
253
+ for f in self.store_dir.glob("*.json"):
254
+ if f.name.startswith("_"):
255
+ continue
256
+ try:
257
+ with open(f, encoding="utf-8") as fh:
258
+ data = json.load(fh)
259
+ tracked.append(data.get("path", ""))
260
+ except Exception:
261
+ continue
262
+ return [p for p in tracked if p]
263
+
264
+ def search(self, query: str, top_k: int = 5) -> list[dict]:
265
+ results = []
266
+ for rel_path, score in self._search_index.search(query, top_k=top_k):
267
+ record = self.get(rel_path)
268
+ if not record:
269
+ continue
270
+ results.append({
271
+ "path": rel_path,
272
+ "language": record.get("language", ""),
273
+ "summary": record.get("summary"),
274
+ "score": round(score, 4),
275
+ "sections": len(record.get("sections", [])),
276
+ })
277
+ return results
278
+
279
+ def queue_for_update(self, rel_path: str):
280
+ """Add a file to the async update queue (used by Read hook)."""
281
+ try:
282
+ state = self._load_queue_state()
283
+ pending = state.get("pending", [])
284
+ inflight = state.get("inflight", [])
285
+ if rel_path not in pending and rel_path not in inflight:
286
+ pending.append(rel_path)
287
+ state["pending"] = pending
288
+ self._save_queue_state(state)
289
+ except Exception:
290
+ self._record_diag("queue_for_update_failed", rel_path)
291
+
292
+ def drain_queue(self) -> list:
293
+ """Claim queued work without dropping it on crash."""
294
+ try:
295
+ state = self._load_queue_state()
296
+ pending = state.get("pending", [])
297
+ inflight = state.get("inflight", [])
298
+ if inflight:
299
+ claimed = inflight
300
+ else:
301
+ claimed = []
302
+ seen = set()
303
+ for path in pending:
304
+ clean = path.strip()
305
+ if clean and clean not in seen:
306
+ seen.add(clean)
307
+ claimed.append(clean)
308
+ state["pending"] = []
309
+ state["inflight"] = claimed
310
+ self._save_queue_state(state)
311
+ return claimed
312
+ except Exception:
313
+ self._record_diag("drain_queue_failed", "")
314
+ return []
315
+
316
+ def complete_updates(self, rel_paths: list[str], failed: bool = False):
317
+ try:
318
+ state = self._load_queue_state()
319
+ inflight = [p for p in state.get("inflight", []) if p not in set(rel_paths)]
320
+ if failed:
321
+ pending = state.get("pending", [])
322
+ for path in rel_paths:
323
+ if path not in pending:
324
+ pending.append(path)
325
+ state["pending"] = pending
326
+ state["inflight"] = inflight
327
+ self._save_queue_state(state)
328
+ except Exception:
329
+ self._record_diag("complete_updates_failed", ",".join(rel_paths))
330
+
331
+ # ── Private ──────────────────────────────────────────────
332
+
333
+ def _store_path(self, rel_path: str) -> Path:
334
+ """Map a relative file path to its JSON store file."""
335
+ key = hashlib.md5(rel_path.replace("\\", "/").encode()).hexdigest()
336
+ return self.store_dir / f"{key}.json"
337
+
338
+ def _save(self, rel_path: str, record: dict):
339
+ """Persist a record to disk."""
340
+ store_file = self._store_path(rel_path)
341
+ try:
342
+ with open(store_file, "w", encoding="utf-8") as f:
343
+ json.dump(record, f, indent=2)
344
+ except Exception:
345
+ self._record_diag("save_failed", rel_path)
346
+
347
+ def _cache_map(self, rel_path: str, record: dict) -> str:
348
+ """Return cached formatted map when the record content hash is unchanged."""
349
+ cache_key = rel_path.replace("\\", "/")
350
+ content_hash = record.get("content_hash")
351
+ cached = self._map_cache.get(cache_key)
352
+ if cached and cached[0] == content_hash:
353
+ return cached[1]
354
+ rendered = self._format_map(record)
355
+ self._map_cache[cache_key] = (content_hash, rendered)
356
+ return rendered
357
+
358
+ def _search_doc(self, record: dict) -> str:
359
+ fields = [record.get("path", ""), record.get("language", ""), record.get("summary", "")]
360
+ for section in record.get("sections", []):
361
+ fields.append(section.get("name", ""))
362
+ fields.append(section.get("type", ""))
363
+ fields.append(section.get("doc", ""))
364
+ for child in section.get("children", []):
365
+ fields.append(child.get("name", ""))
366
+ fields.append(child.get("type", ""))
367
+ return " ".join(str(field) for field in fields if field)
368
+
369
+ def _rebuild_search_index(self):
370
+ docs = {}
371
+ for rel_path in self.list_tracked():
372
+ record = self.get(rel_path)
373
+ if record:
374
+ docs[rel_path] = self._search_doc(record)
375
+ self._search_index.rebuild(docs)
376
+
377
+ def _load_queue_state(self) -> dict:
378
+ if not self._queue_state_path.exists():
379
+ return {"pending": [], "inflight": []}
380
+ try:
381
+ with open(self._queue_state_path, encoding="utf-8") as handle:
382
+ state = json.load(handle)
383
+ except Exception:
384
+ return {"pending": [], "inflight": []}
385
+ state.setdefault("pending", [])
386
+ state.setdefault("inflight", [])
387
+ return state
388
+
389
+ def _save_queue_state(self, state: dict):
390
+ with open(self._queue_state_path, "w", encoding="utf-8") as handle:
391
+ json.dump(state, handle, indent=2)
392
+
393
+ def _record_diag(self, kind: str, rel_path: str, detail: str = ""):
394
+ entry = {
395
+ "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
396
+ "kind": kind,
397
+ "path": rel_path,
398
+ "detail": detail,
399
+ }
400
+ try:
401
+ with open(self._diag_path, "a", encoding="utf-8") as handle:
402
+ handle.write(json.dumps(entry) + "\n")
403
+ except Exception:
404
+ pass
405
+
406
+ def _extract_sections(self, filepath: Path, content: str) -> list:
407
+ """Extract structural sections with line ranges from source code."""
408
+ ext = filepath.suffix.lower()
409
+
410
+ # Try AST parser first
411
+ ast_sections = extract_sections_ast(content, ext)
412
+ if ast_sections is not None:
413
+ return ast_sections
414
+
415
+ lines = content.splitlines()
416
+ patterns = STRUCTURE_PATTERNS.get(ext, {})
417
+
418
+ if not patterns:
419
+ return self._extract_generic_sections(lines)
420
+
421
+ sections = []
422
+ i = 0
423
+ current_class = None # Track current class for method nesting
424
+
425
+ while i < len(lines):
426
+ line = lines[i]
427
+ stripped = line.rstrip()
428
+ lstripped = line.lstrip()
429
+ indent = len(line) - len(lstripped)
430
+
431
+ # End class scope when indent returns to class level or lower
432
+ if current_class and indent <= current_class.get("_indent", 0) and lstripped:
433
+ # Finalize the class's line_end
434
+ current_class["line_end"] = i # Previous line (0-indexed, but we display 1-indexed)
435
+ current_class = None
436
+
437
+ for kind, pattern in patterns.items():
438
+ # Match against lstripped so indented methods are detected
439
+ if re.match(pattern, lstripped, re.MULTILINE):
440
+ line_start = i + 1 # 1-indexed
441
+ line_end = self._find_block_end(lines, i, ext)
442
+
443
+ section = {
444
+ "type": self._normalize_type(kind),
445
+ "name": self._extract_name(kind, lstripped),
446
+ "line_start": line_start,
447
+ "line_end": line_end,
448
+ "signature": lstripped,
449
+ }
450
+
451
+ # Extract docstring
452
+ doc = self._extract_docstring(lines, i + 1, ext)
453
+ if doc:
454
+ section["doc"] = doc
455
+
456
+ if kind == 'decorator':
457
+ # Skip standalone decorator lines — they'll be captured
458
+ # as part of the next function/class definition
459
+ pass
460
+ elif kind in ('class', 'interface', 'enum'):
461
+ section["children"] = []
462
+ section["_indent"] = indent
463
+ sections.append(section)
464
+ current_class = section
465
+ elif current_class and indent > current_class.get("_indent", 0):
466
+ # Method inside a class
467
+ section["type"] = "method"
468
+ current_class["children"].append(section)
469
+ else:
470
+ sections.append(section)
471
+
472
+ break
473
+ i += 1
474
+
475
+ # Finalize any open class
476
+ if current_class:
477
+ current_class["line_end"] = len(lines)
478
+
479
+ # Clean up internal tracking keys
480
+ for s in sections:
481
+ s.pop("_indent", None)
482
+ for child in s.get("children", []):
483
+ child.pop("_indent", None)
484
+
485
+ return sections
486
+
487
+ def _extract_generic_sections(self, lines: list) -> list:
488
+ """Fallback for unknown languages — just report line count."""
489
+ return [{"type": "content", "name": "(full file)", "line_start": 1, "line_end": len(lines)}]
490
+
491
+ def _find_block_end(self, lines: list, start: int, ext: str) -> int:
492
+ """Find the end line of a code block starting at `start`."""
493
+ if ext == '.py':
494
+ return self._find_python_block_end(lines, start)
495
+ # For brace-based languages, find matching brace
496
+ if ext in ('.js', '.ts', '.tsx', '.jsx', '.java', '.go', '.rs', '.c', '.cpp', '.h', '.cs'):
497
+ return self._find_brace_block_end(lines, start)
498
+ # Default: use indentation
499
+ return self._find_python_block_end(lines, start)
500
+
501
+ def _find_python_block_end(self, lines: list, start: int) -> int:
502
+ """Find end of a Python block by indentation."""
503
+ if start >= len(lines):
504
+ return start + 1
505
+
506
+ base_indent = len(lines[start]) - len(lines[start].lstrip())
507
+
508
+ for i in range(start + 1, len(lines)):
509
+ line = lines[i]
510
+ stripped = line.strip()
511
+ if not stripped:
512
+ continue
513
+ current_indent = len(line) - len(line.lstrip())
514
+ if current_indent <= base_indent:
515
+ return i # 1-indexed
516
+ # Check for decorators at same level (next function)
517
+ if current_indent == base_indent and stripped.startswith('@'):
518
+ return i
519
+ return len(lines)
520
+
521
+ def _find_brace_block_end(self, lines: list, start: int) -> int:
522
+ """Find end of a brace-delimited block."""
523
+ depth = 0
524
+ found_open = False
525
+ for i in range(start, len(lines)):
526
+ line = lines[i]
527
+ for ch in line:
528
+ if ch == '{':
529
+ depth += 1
530
+ found_open = True
531
+ elif ch == '}':
532
+ depth -= 1
533
+ if found_open and depth == 0:
534
+ return i + 1 # 1-indexed
535
+ return len(lines)
536
+
537
+ def _normalize_type(self, kind: str) -> str:
538
+ """Map pattern kind to standard section type."""
539
+ mapping = {
540
+ 'arrow': 'function',
541
+ 'assignment': 'constant',
542
+ 'library': 'import',
543
+ 'export': 'function',
544
+ 'decorator': 'decorator',
545
+ }
546
+ return mapping.get(kind, kind)
547
+
548
+ def _extract_name(self, kind: str, line: str) -> str:
549
+ """Extract the name from a matched line."""
550
+ if kind in ('import', 'library'):
551
+ return line.strip()
552
+
553
+ # Try to extract identifier from common patterns
554
+ # class Foo, def foo, function foo, const foo, etc.
555
+ m = re.match(r'.*?(?:class|def|function|interface|enum|type|const|let|var)\s+(\w+)', line)
556
+ if m:
557
+ return m.group(1)
558
+
559
+ # Assignment: FOO_BAR = ...
560
+ m = re.match(r'^([A-Z_][A-Z_0-9]*)\s*=', line.strip())
561
+ if m:
562
+ return m.group(1)
563
+
564
+ # Arrow: const foo = (...) =>
565
+ m = re.match(r'(?:export\s+)?(?:const|let|var)\s+(\w+)', line.strip())
566
+ if m:
567
+ return m.group(1)
568
+
569
+ return line.strip()[:50]
570
+
571
+ def _extract_docstring(self, lines: list, start: int, ext: str) -> Optional[str]:
572
+ """Extract first line of docstring/JSDoc if present."""
573
+ if start >= len(lines):
574
+ return None
575
+ line = lines[start].strip()
576
+
577
+ if ext == '.py' and (line.startswith('"""') or line.startswith("'''")):
578
+ quote = line[:3]
579
+ if line.endswith(quote) and len(line) > 6:
580
+ return line[3:-3].strip()
581
+ first = line[3:].strip()
582
+ if first:
583
+ return first
584
+ if start + 1 < len(lines):
585
+ return lines[start + 1].strip()
586
+ elif line.startswith('/**'):
587
+ for j in range(start, min(start + 10, len(lines))):
588
+ cleaned = lines[j].strip().lstrip('/*').rstrip('*/').strip()
589
+ if cleaned:
590
+ return cleaned
591
+ if '*/' in lines[j]:
592
+ break
593
+ return None
594
+
595
+ def _format_map(self, record: dict) -> str:
596
+ """Format a record into a readable structural map."""
597
+ path = record["path"]
598
+ total_lines = record.get("lines", 0)
599
+ lang = record.get("language", "")
600
+ summary = record.get("summary")
601
+ sections = record.get("sections", [])
602
+
603
+ parts = [f"# {path} ({total_lines} lines, {lang})"]
604
+
605
+ if summary:
606
+ parts.append(summary)
607
+
608
+ parts.append("") # blank line
609
+
610
+ icons = {
611
+ "class": "🏗️",
612
+ "function": "✨",
613
+ "method": "⚙️",
614
+ "import": "📦",
615
+ "constant": "💎",
616
+ "variable": "📄",
617
+ "interface": "🧩",
618
+ "type": "🏷️",
619
+ "enum": "🔢",
620
+ "comment": "💬",
621
+ "property": "🔧",
622
+ "decorator": "🎨",
623
+ "heading": "🔖",
624
+ "section": "📍",
625
+ "struct": "🧱",
626
+ "trait": "📜",
627
+ "impl": "🛠️"
628
+ }
629
+
630
+ import_sections = [section for section in sections if section.get("type") == "import"]
631
+ other_sections = [section for section in sections if section.get("type") != "import"]
632
+
633
+ if len(import_sections) > 6:
634
+ parts.append(f" imports {len(import_sections)} statements (collapsed)")
635
+ else:
636
+ # Re-integrate imports if few
637
+ other_sections = sections
638
+
639
+ for section in other_sections:
640
+ stype = section.get("type", "")
641
+ name = section.get("name", "")
642
+ ls = section.get("line_start", 0)
643
+ le = section.get("line_end", 0)
644
+ doc = section.get("doc")
645
+ is_async = section.get("async", False)
646
+ access = section.get("access")
647
+
648
+ line_range = f"{ls}-{le}".ljust(10)
649
+ icon = icons.get(stype, " ")
650
+
651
+ if stype == "import":
652
+ label = f"{icon} {name}"
653
+ elif stype == "comment":
654
+ label = f"{icon} {name}"
655
+ elif stype in ("heading", "section"):
656
+ label = f"{icon} {name}"
657
+ else:
658
+ async_prefix = "async " if is_async else ""
659
+ access_prefix = f"{access} " if access else ""
660
+ sig = section.get("signature", "")
661
+ params = f"({self._extract_params(sig)})" if stype in ("function", "method") else ""
662
+ label = f"{icon} {access_prefix}{async_prefix}{stype} {name}{params}"
663
+
664
+ parts.append(f" {line_range}{label}")
665
+ if doc:
666
+ parts.append(f" {doc}")
667
+
668
+ # Children (methods inside classes)
669
+ for child in section.get("children", []):
670
+ ctype = child.get("type", "")
671
+ cname = child.get("name", "")
672
+ cls = child.get("line_start", 0)
673
+ cle = child.get("line_end", 0)
674
+ sig = child.get("signature", "")
675
+ c_async = child.get("async", False)
676
+ c_access = child.get("access")
677
+
678
+ child_range = f"{cls}-{cle}".ljust(8)
679
+ c_icon = icons.get(ctype, " ")
680
+
681
+ async_prefix = "async " if c_async else ""
682
+ access_prefix = f"{c_access} " if c_access else ""
683
+
684
+ if ctype == "method":
685
+ parts.append(f" {child_range}{c_icon} {access_prefix}{async_prefix}{cname}({self._extract_params(sig)})")
686
+ else:
687
+ parts.append(f" {child_range}{c_icon} {access_prefix}{async_prefix}{ctype} {cname}")
688
+
689
+ return "\n".join(parts)
690
+
691
+ def _extract_params(self, signature: str) -> str:
692
+ """Extract parameter list from a function signature."""
693
+ m = re.search(r'\(([^)]*)\)', signature)
694
+ if m:
695
+ params = m.group(1).strip()
696
+ # Shorten if too long
697
+ if len(params) > 60:
698
+ params = params[:57] + "..."
699
+ return params
700
+ return ""
701
+
702
+ def _format_dense_map(self, record: dict) -> str:
703
+ """Format a compact one-line-per-symbol map (4b). ~40% fewer tokens than full map."""
704
+ path = record["path"]
705
+ total_lines = record.get("lines", 0)
706
+ lang = record.get("language", "")
707
+ sections = record.get("sections", [])
708
+
709
+ abbrev = {"class": "C", "function": "F", "method": "M", "import": "I",
710
+ "constant": "K", "interface": "IF", "type": "T", "enum": "E",
711
+ "variable": "V", "decorator": "D", "property": "P"}
712
+
713
+ parts = [f"# {path} ({total_lines}L {lang})"]
714
+
715
+ # Collapse imports into a single count
716
+ imports = [s for s in sections if s.get("type") == "import"]
717
+ others = [s for s in sections if s.get("type") != "import"]
718
+ if imports:
719
+ parts.append(f" I: {len(imports)} imports")
720
+
721
+ for s in others:
722
+ t = abbrev.get(s.get("type", ""), "?")
723
+ name = s.get("name", "")
724
+ ls = s.get("line_start", 0)
725
+ le = s.get("line_end", 0)
726
+ parts.append(f" {t} {name} [{ls}-{le}]")
727
+ for child in s.get("children", []):
728
+ ct = abbrev.get(child.get("type", ""), "?")
729
+ cn = child.get("name", "")
730
+ cls = child.get("line_start", 0)
731
+ cle = child.get("line_end", 0)
732
+ parts.append(f" {ct} {cn} [{cls}-{cle}]")
733
+
734
+ return "\n".join(parts)