code-context-control 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/_hook_utils.py +99 -0
- cli/c3.py +6152 -0
- cli/commands/__init__.py +1 -0
- cli/commands/common.py +312 -0
- cli/commands/parser.py +286 -0
- cli/docs.html +3178 -0
- cli/edits.html +878 -0
- cli/hook_auto_snapshot.py +142 -0
- cli/hook_c3_signal.py +61 -0
- cli/hook_c3read.py +116 -0
- cli/hook_edit_ledger.py +213 -0
- cli/hook_edit_unlock.py +170 -0
- cli/hook_filter.py +130 -0
- cli/hook_ghost_files.py +238 -0
- cli/hook_pretool_enforce.py +334 -0
- cli/hook_read.py +200 -0
- cli/hook_session_stats.py +62 -0
- cli/hook_terse_advisor.py +190 -0
- cli/hub.html +3764 -0
- cli/hub_server.py +1619 -0
- cli/mcp_proxy.py +428 -0
- cli/mcp_server.py +660 -0
- cli/server.py +2985 -0
- cli/tools/__init__.py +4 -0
- cli/tools/_helpers.py +65 -0
- cli/tools/agent.py +1165 -0
- cli/tools/compress.py +215 -0
- cli/tools/delegate.py +1184 -0
- cli/tools/edit.py +313 -0
- cli/tools/edits.py +118 -0
- cli/tools/filter.py +285 -0
- cli/tools/impact.py +163 -0
- cli/tools/memory.py +469 -0
- cli/tools/read.py +224 -0
- cli/tools/search.py +337 -0
- cli/tools/session.py +95 -0
- cli/tools/shell.py +193 -0
- cli/tools/status.py +306 -0
- cli/tools/validate.py +310 -0
- cli/ui/api.js +36 -0
- cli/ui/app.js +207 -0
- cli/ui/components/chat.js +758 -0
- cli/ui/components/dashboard.js +689 -0
- cli/ui/components/edits.js +220 -0
- cli/ui/components/instructions.js +481 -0
- cli/ui/components/memory.js +626 -0
- cli/ui/components/sessions.js +606 -0
- cli/ui/components/settings.js +1404 -0
- cli/ui/components/sidebar.js +156 -0
- cli/ui/icons.js +51 -0
- cli/ui/shared.js +119 -0
- cli/ui/theme.js +22 -0
- cli/ui.html +168 -0
- cli/ui_legacy.html +6797 -0
- cli/ui_nano.html +503 -0
- code_context_control-2.28.0.dist-info/METADATA +248 -0
- code_context_control-2.28.0.dist-info/RECORD +150 -0
- code_context_control-2.28.0.dist-info/WHEEL +5 -0
- code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
- code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
- code_context_control-2.28.0.dist-info/top_level.txt +5 -0
- core/__init__.py +75 -0
- core/config.py +269 -0
- core/ide.py +188 -0
- oracle/__init__.py +1 -0
- oracle/config.py +75 -0
- oracle/oracle.html +3900 -0
- oracle/oracle_server.py +663 -0
- oracle/services/__init__.py +1 -0
- oracle/services/c3_bridge.py +210 -0
- oracle/services/chat_engine.py +1103 -0
- oracle/services/chat_store.py +155 -0
- oracle/services/cross_memory.py +154 -0
- oracle/services/federated_graph.py +463 -0
- oracle/services/health_checker.py +117 -0
- oracle/services/insight_engine.py +307 -0
- oracle/services/memory_reader.py +106 -0
- oracle/services/memory_writer.py +182 -0
- oracle/services/ollama_bridge.py +332 -0
- oracle/services/project_scanner.py +87 -0
- oracle/services/review_agent.py +206 -0
- services/__init__.py +1 -0
- services/activity_log.py +93 -0
- services/agent_base.py +124 -0
- services/agents.py +1529 -0
- services/auto_memory.py +407 -0
- services/bench/__init__.py +6 -0
- services/bench/external/__init__.py +29 -0
- services/bench/external/aider_polyglot.py +405 -0
- services/bench/external/swe_bench.py +485 -0
- services/benchmark_dashboard.py +596 -0
- services/claude_md.py +785 -0
- services/compressor.py +592 -0
- services/context_snapshot.py +356 -0
- services/conversation_store.py +870 -0
- services/doc_index.py +537 -0
- services/e2e_benchmark.py +2884 -0
- services/e2e_evaluator.py +396 -0
- services/e2e_tasks.py +743 -0
- services/edit_ledger.py +459 -0
- services/embedding_index.py +341 -0
- services/error_reporting.py +123 -0
- services/file_memory.py +734 -0
- services/hub_service.py +585 -0
- services/indexer.py +712 -0
- services/memory.py +318 -0
- services/memory_consolidator.py +538 -0
- services/memory_graph.py +382 -0
- services/memory_grounder.py +304 -0
- services/memory_scorer.py +246 -0
- services/metrics.py +86 -0
- services/notifications.py +209 -0
- services/ollama_client.py +201 -0
- services/output_filter.py +488 -0
- services/parser.py +1238 -0
- services/project_manager.py +579 -0
- services/protocol.py +306 -0
- services/proxy_state.py +152 -0
- services/retrieval_broker.py +129 -0
- services/router.py +414 -0
- services/runtime.py +326 -0
- services/session_benchmark.py +1945 -0
- services/session_manager.py +1026 -0
- services/session_preloader.py +251 -0
- services/text_index.py +90 -0
- services/tool_classifier.py +176 -0
- services/transcript_index.py +340 -0
- services/validation_cache.py +155 -0
- services/vector_store.py +299 -0
- services/version_tracker.py +271 -0
- services/watcher.py +192 -0
- tui/__init__.py +0 -0
- tui/backend.py +59 -0
- tui/main.py +145 -0
- tui/screens/__init__.py +1 -0
- tui/screens/benchmark_view.py +109 -0
- tui/screens/claudemd_view.py +46 -0
- tui/screens/compress_view.py +52 -0
- tui/screens/index_view.py +74 -0
- tui/screens/init_view.py +82 -0
- tui/screens/mcp_view.py +73 -0
- tui/screens/optimize_view.py +41 -0
- tui/screens/pipe_view.py +46 -0
- tui/screens/projects_view.py +355 -0
- tui/screens/search_view.py +55 -0
- tui/screens/session_view.py +143 -0
- tui/screens/stats.py +158 -0
- tui/screens/ui_view.py +54 -0
- tui/theme.tcss +335 -0
services/file_memory.py
ADDED
|
@@ -0,0 +1,734 @@
|
|
|
1
|
+
"""File Memory Store — persistent structural index of source files.
|
|
2
|
+
|
|
3
|
+
Maintains per-file records with section maps (classes, functions, imports)
|
|
4
|
+
and exact line ranges so Claude can do targeted reads with offset/limit
|
|
5
|
+
instead of reading entire files.
|
|
6
|
+
|
|
7
|
+
Storage: .c3/file_memory/ directory, one JSON file per source file.
|
|
8
|
+
"""
|
|
9
|
+
import hashlib
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
from services.compressor import STRUCTURE_PATTERNS
|
|
17
|
+
from services.parser import PARSER_VERSION, extract_sections_ast
|
|
18
|
+
from services.text_index import TextIndex
|
|
19
|
+
|
|
20
|
+
# Extensions we know how to extract structure from
|
|
21
|
+
CODE_EXTENSIONS = {'.py', '.js', '.ts', '.tsx', '.jsx', '.r', '.R',
|
|
22
|
+
'.go', '.rs', '.java', '.rb', '.c', '.cpp', '.h', '.cs',
|
|
23
|
+
'.html', '.htm', '.md', '.css', '.json', '.yaml', '.yml'}
|
|
24
|
+
|
|
25
|
+
# Language detection by extension
|
|
26
|
+
LANG_MAP = {
|
|
27
|
+
'.py': 'python', '.js': 'javascript', '.ts': 'typescript',
|
|
28
|
+
'.tsx': 'typescript', '.jsx': 'javascript', '.r': 'R', '.R': 'R',
|
|
29
|
+
'.go': 'go', '.rs': 'rust', '.java': 'java', '.rb': 'ruby',
|
|
30
|
+
'.c': 'c', '.cpp': 'cpp', '.h': 'c', '.cs': 'csharp',
|
|
31
|
+
'.html': 'html', '.htm': 'html', '.md': 'markdown', '.css': 'css',
|
|
32
|
+
'.json': 'json', '.yaml': 'yaml', '.yml': 'yaml',
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class FileMemoryStore:
|
|
37
|
+
"""Persistent structural index of source files."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, project_path: str):
|
|
40
|
+
self.project_path = Path(project_path)
|
|
41
|
+
self.store_dir = self.project_path / ".c3" / "file_memory"
|
|
42
|
+
self.store_dir.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
self._queue_state_path = self.store_dir / "_queue.json"
|
|
44
|
+
self._diag_path = self.store_dir / "_diagnostics.jsonl"
|
|
45
|
+
self._map_cache = {}
|
|
46
|
+
self._search_index = TextIndex()
|
|
47
|
+
self._rebuild_search_index()
|
|
48
|
+
|
|
49
|
+
def get(self, rel_path: str) -> Optional[dict]:
|
|
50
|
+
"""Load a file's memory record, or None if not tracked."""
|
|
51
|
+
store_file = self._store_path(rel_path)
|
|
52
|
+
if not store_file.exists():
|
|
53
|
+
return None
|
|
54
|
+
try:
|
|
55
|
+
with open(store_file, encoding="utf-8") as f:
|
|
56
|
+
return json.load(f)
|
|
57
|
+
except Exception:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
def update(self, rel_path: str, ai_summary: str = None) -> Optional[dict]:
|
|
61
|
+
"""Re-extract sections from file and persist the record.
|
|
62
|
+
|
|
63
|
+
Returns the updated record, or None if the file doesn't exist.
|
|
64
|
+
"""
|
|
65
|
+
full_path = self.project_path / rel_path
|
|
66
|
+
if not full_path.exists():
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
stat = full_path.stat()
|
|
71
|
+
except Exception:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
content = full_path.read_text(encoding="utf-8", errors="replace")
|
|
76
|
+
except Exception:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
ext = full_path.suffix.lower()
|
|
80
|
+
lines = content.splitlines()
|
|
81
|
+
content_hash = hashlib.md5(content.encode()).hexdigest()
|
|
82
|
+
|
|
83
|
+
# Check if we already have an up-to-date record
|
|
84
|
+
existing = self.get(rel_path)
|
|
85
|
+
if existing and existing.get("content_hash") == content_hash:
|
|
86
|
+
# If it was a generic "full file" but now we have structural tools, force update
|
|
87
|
+
was_generic = len(existing.get("sections", [])) <= 1 and existing.get("sections", [{}])[0].get("name") == "(full file)"
|
|
88
|
+
# Also force re-extraction when the parser logic has been bumped
|
|
89
|
+
stale_parser = existing.get("parser_version") != PARSER_VERSION
|
|
90
|
+
if not ((was_generic and ext in CODE_EXTENSIONS) or stale_parser):
|
|
91
|
+
# Only update AI summary if provided and different
|
|
92
|
+
if ai_summary and existing.get("summary") != ai_summary:
|
|
93
|
+
existing["summary"] = ai_summary
|
|
94
|
+
existing["updated_at"] = time.strftime("%Y-%m-%dT%H:%M:%S")
|
|
95
|
+
self._save(rel_path, existing)
|
|
96
|
+
self._search_index.add_or_update(rel_path, self._search_doc(existing))
|
|
97
|
+
self._cache_map(rel_path, existing)
|
|
98
|
+
return existing
|
|
99
|
+
# If we are here, we are forcing a fresh extraction
|
|
100
|
+
|
|
101
|
+
sections = self._extract_sections(full_path, content)
|
|
102
|
+
|
|
103
|
+
record = {
|
|
104
|
+
"path": rel_path,
|
|
105
|
+
"content_hash": content_hash,
|
|
106
|
+
"updated_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
|
107
|
+
"lines": len(lines),
|
|
108
|
+
"size_bytes": stat.st_size,
|
|
109
|
+
"mtime_ns": getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000)),
|
|
110
|
+
"language": LANG_MAP.get(ext, ext.lstrip('.')),
|
|
111
|
+
"summary": ai_summary or (existing.get("summary") if existing else None),
|
|
112
|
+
"parser_version": PARSER_VERSION,
|
|
113
|
+
"sections": sections,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
self._save(rel_path, record)
|
|
117
|
+
self._cache_map(rel_path, record)
|
|
118
|
+
self._search_index.add_or_update(rel_path, self._search_doc(record))
|
|
119
|
+
return record
|
|
120
|
+
|
|
121
|
+
def get_map(self, rel_path: str) -> Optional[str]:
|
|
122
|
+
"""Return a formatted structural map for Claude consumption.
|
|
123
|
+
|
|
124
|
+
Returns None if no record exists. Call update() first to ensure fresh data.
|
|
125
|
+
"""
|
|
126
|
+
record = self.get(rel_path)
|
|
127
|
+
if not record:
|
|
128
|
+
return None
|
|
129
|
+
return self._cache_map(rel_path, record)
|
|
130
|
+
|
|
131
|
+
def get_or_build_map(self, rel_path: str) -> str:
|
|
132
|
+
"""Get map if cached, otherwise build it on-demand."""
|
|
133
|
+
record = self.get(rel_path)
|
|
134
|
+
|
|
135
|
+
# Check staleness
|
|
136
|
+
if record and not self.needs_update(rel_path):
|
|
137
|
+
return self._cache_map(rel_path, record)
|
|
138
|
+
|
|
139
|
+
# Build fresh
|
|
140
|
+
updated = self.update(rel_path)
|
|
141
|
+
if updated:
|
|
142
|
+
return self._cache_map(rel_path, updated)
|
|
143
|
+
|
|
144
|
+
return f"[file_map] Could not build map for {rel_path} — file not found or unreadable."
|
|
145
|
+
|
|
146
|
+
def get_or_build_dense_map(self, rel_path: str) -> str:
|
|
147
|
+
"""Get a compact single-line-per-symbol map (4b). Saves ~40% tokens vs full map."""
|
|
148
|
+
record = self.get(rel_path)
|
|
149
|
+
if record and not self.needs_update(rel_path):
|
|
150
|
+
return self._format_dense_map(record)
|
|
151
|
+
updated = self.update(rel_path)
|
|
152
|
+
if updated:
|
|
153
|
+
return self._format_dense_map(updated)
|
|
154
|
+
return f"[file_map] Could not build map for {rel_path} — file not found or unreadable."
|
|
155
|
+
|
|
156
|
+
def needs_update(self, rel_path: str) -> bool:
|
|
157
|
+
"""True if the file has changed since we last indexed it."""
|
|
158
|
+
record = self.get(rel_path)
|
|
159
|
+
if not record:
|
|
160
|
+
return True
|
|
161
|
+
|
|
162
|
+
full_path = self.project_path / rel_path
|
|
163
|
+
if not full_path.exists():
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
stat = full_path.stat()
|
|
168
|
+
current_mtime_ns = getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000))
|
|
169
|
+
if (
|
|
170
|
+
record.get("mtime_ns") == current_mtime_ns
|
|
171
|
+
and record.get("size_bytes") == stat.st_size
|
|
172
|
+
):
|
|
173
|
+
return False
|
|
174
|
+
content = full_path.read_text(encoding="utf-8", errors="replace")
|
|
175
|
+
current_hash = hashlib.md5(content.encode()).hexdigest()
|
|
176
|
+
return current_hash != record.get("content_hash")
|
|
177
|
+
except Exception:
|
|
178
|
+
return True
|
|
179
|
+
|
|
180
|
+
def get_symbol_ranges(self, rel_path: str, symbol_names: list[str], return_matches: bool = False) -> list:
|
|
181
|
+
"""Resolve symbol names to line ranges (1-indexed).
|
|
182
|
+
Supports exact match and substring/partial match (e.g. 'handle_req' matches 'handle_request_data').
|
|
183
|
+
Supports exact regex if anchored (e.g. '^cmd_benchmark$').
|
|
184
|
+
"""
|
|
185
|
+
record = self.get(rel_path)
|
|
186
|
+
if not record or "sections" not in record:
|
|
187
|
+
return []
|
|
188
|
+
|
|
189
|
+
ranges = []
|
|
190
|
+
matches = []
|
|
191
|
+
|
|
192
|
+
# Pre-compile regexes
|
|
193
|
+
compiled_targets = []
|
|
194
|
+
for name in symbol_names:
|
|
195
|
+
if name.startswith('^') and name.endswith('$'):
|
|
196
|
+
try:
|
|
197
|
+
compiled_targets.append((name, re.compile(name, re.IGNORECASE)))
|
|
198
|
+
except Exception:
|
|
199
|
+
compiled_targets.append((name, name.lower()))
|
|
200
|
+
elif name in ('<main>', '<globals>', '<imports>'):
|
|
201
|
+
compiled_targets.append((name, name))
|
|
202
|
+
else:
|
|
203
|
+
compiled_targets.append((name, name.lower()))
|
|
204
|
+
|
|
205
|
+
def _matches(section_name: str, target_data) -> bool:
|
|
206
|
+
orig_name, target = target_data
|
|
207
|
+
sn = section_name.lower()
|
|
208
|
+
if isinstance(target, re.Pattern):
|
|
209
|
+
return bool(target.match(section_name))
|
|
210
|
+
if orig_name in ('<main>', '<globals>', '<imports>'):
|
|
211
|
+
return False # Handled separately if needed, or matched below if actually named that
|
|
212
|
+
if sn == target:
|
|
213
|
+
return True
|
|
214
|
+
# Substring match
|
|
215
|
+
if target in sn or sn in target:
|
|
216
|
+
return True
|
|
217
|
+
return False
|
|
218
|
+
|
|
219
|
+
def search_sections(sections):
|
|
220
|
+
for sec in sections:
|
|
221
|
+
sec_name = sec.get("name", "")
|
|
222
|
+
for target_data in compiled_targets:
|
|
223
|
+
if _matches(sec_name, target_data):
|
|
224
|
+
ranges.append((sec["line_start"], sec["line_end"]))
|
|
225
|
+
matches.append({"target": target_data[0], "match": sec_name, "range": (sec["line_start"], sec["line_end"])})
|
|
226
|
+
# Don't break here, let it find all matches for this section if multiple targets apply
|
|
227
|
+
# But wait, if one target matches, we don't want to add the section multiple times for the same target
|
|
228
|
+
# We'll deduplicate later
|
|
229
|
+
|
|
230
|
+
if "children" in sec:
|
|
231
|
+
search_sections(sec["children"])
|
|
232
|
+
|
|
233
|
+
search_sections(record["sections"])
|
|
234
|
+
|
|
235
|
+
# Deduplicate matches
|
|
236
|
+
unique_matches = []
|
|
237
|
+
seen = set()
|
|
238
|
+
for m in matches:
|
|
239
|
+
key = (m["target"], m["match"], m["range"])
|
|
240
|
+
if key not in seen:
|
|
241
|
+
seen.add(key)
|
|
242
|
+
unique_matches.append(m)
|
|
243
|
+
|
|
244
|
+
unique_ranges = list(set(ranges))
|
|
245
|
+
|
|
246
|
+
if return_matches:
|
|
247
|
+
return unique_matches
|
|
248
|
+
return unique_ranges
|
|
249
|
+
|
|
250
|
+
def list_tracked(self) -> list:
|
|
251
|
+
"""Return relative paths of all tracked files."""
|
|
252
|
+
tracked = []
|
|
253
|
+
for f in self.store_dir.glob("*.json"):
|
|
254
|
+
if f.name.startswith("_"):
|
|
255
|
+
continue
|
|
256
|
+
try:
|
|
257
|
+
with open(f, encoding="utf-8") as fh:
|
|
258
|
+
data = json.load(fh)
|
|
259
|
+
tracked.append(data.get("path", ""))
|
|
260
|
+
except Exception:
|
|
261
|
+
continue
|
|
262
|
+
return [p for p in tracked if p]
|
|
263
|
+
|
|
264
|
+
def search(self, query: str, top_k: int = 5) -> list[dict]:
|
|
265
|
+
results = []
|
|
266
|
+
for rel_path, score in self._search_index.search(query, top_k=top_k):
|
|
267
|
+
record = self.get(rel_path)
|
|
268
|
+
if not record:
|
|
269
|
+
continue
|
|
270
|
+
results.append({
|
|
271
|
+
"path": rel_path,
|
|
272
|
+
"language": record.get("language", ""),
|
|
273
|
+
"summary": record.get("summary"),
|
|
274
|
+
"score": round(score, 4),
|
|
275
|
+
"sections": len(record.get("sections", [])),
|
|
276
|
+
})
|
|
277
|
+
return results
|
|
278
|
+
|
|
279
|
+
def queue_for_update(self, rel_path: str):
|
|
280
|
+
"""Add a file to the async update queue (used by Read hook)."""
|
|
281
|
+
try:
|
|
282
|
+
state = self._load_queue_state()
|
|
283
|
+
pending = state.get("pending", [])
|
|
284
|
+
inflight = state.get("inflight", [])
|
|
285
|
+
if rel_path not in pending and rel_path not in inflight:
|
|
286
|
+
pending.append(rel_path)
|
|
287
|
+
state["pending"] = pending
|
|
288
|
+
self._save_queue_state(state)
|
|
289
|
+
except Exception:
|
|
290
|
+
self._record_diag("queue_for_update_failed", rel_path)
|
|
291
|
+
|
|
292
|
+
def drain_queue(self) -> list:
|
|
293
|
+
"""Claim queued work without dropping it on crash."""
|
|
294
|
+
try:
|
|
295
|
+
state = self._load_queue_state()
|
|
296
|
+
pending = state.get("pending", [])
|
|
297
|
+
inflight = state.get("inflight", [])
|
|
298
|
+
if inflight:
|
|
299
|
+
claimed = inflight
|
|
300
|
+
else:
|
|
301
|
+
claimed = []
|
|
302
|
+
seen = set()
|
|
303
|
+
for path in pending:
|
|
304
|
+
clean = path.strip()
|
|
305
|
+
if clean and clean not in seen:
|
|
306
|
+
seen.add(clean)
|
|
307
|
+
claimed.append(clean)
|
|
308
|
+
state["pending"] = []
|
|
309
|
+
state["inflight"] = claimed
|
|
310
|
+
self._save_queue_state(state)
|
|
311
|
+
return claimed
|
|
312
|
+
except Exception:
|
|
313
|
+
self._record_diag("drain_queue_failed", "")
|
|
314
|
+
return []
|
|
315
|
+
|
|
316
|
+
def complete_updates(self, rel_paths: list[str], failed: bool = False):
|
|
317
|
+
try:
|
|
318
|
+
state = self._load_queue_state()
|
|
319
|
+
inflight = [p for p in state.get("inflight", []) if p not in set(rel_paths)]
|
|
320
|
+
if failed:
|
|
321
|
+
pending = state.get("pending", [])
|
|
322
|
+
for path in rel_paths:
|
|
323
|
+
if path not in pending:
|
|
324
|
+
pending.append(path)
|
|
325
|
+
state["pending"] = pending
|
|
326
|
+
state["inflight"] = inflight
|
|
327
|
+
self._save_queue_state(state)
|
|
328
|
+
except Exception:
|
|
329
|
+
self._record_diag("complete_updates_failed", ",".join(rel_paths))
|
|
330
|
+
|
|
331
|
+
# ── Private ──────────────────────────────────────────────
|
|
332
|
+
|
|
333
|
+
def _store_path(self, rel_path: str) -> Path:
|
|
334
|
+
"""Map a relative file path to its JSON store file."""
|
|
335
|
+
key = hashlib.md5(rel_path.replace("\\", "/").encode()).hexdigest()
|
|
336
|
+
return self.store_dir / f"{key}.json"
|
|
337
|
+
|
|
338
|
+
def _save(self, rel_path: str, record: dict):
|
|
339
|
+
"""Persist a record to disk."""
|
|
340
|
+
store_file = self._store_path(rel_path)
|
|
341
|
+
try:
|
|
342
|
+
with open(store_file, "w", encoding="utf-8") as f:
|
|
343
|
+
json.dump(record, f, indent=2)
|
|
344
|
+
except Exception:
|
|
345
|
+
self._record_diag("save_failed", rel_path)
|
|
346
|
+
|
|
347
|
+
def _cache_map(self, rel_path: str, record: dict) -> str:
|
|
348
|
+
"""Return cached formatted map when the record content hash is unchanged."""
|
|
349
|
+
cache_key = rel_path.replace("\\", "/")
|
|
350
|
+
content_hash = record.get("content_hash")
|
|
351
|
+
cached = self._map_cache.get(cache_key)
|
|
352
|
+
if cached and cached[0] == content_hash:
|
|
353
|
+
return cached[1]
|
|
354
|
+
rendered = self._format_map(record)
|
|
355
|
+
self._map_cache[cache_key] = (content_hash, rendered)
|
|
356
|
+
return rendered
|
|
357
|
+
|
|
358
|
+
def _search_doc(self, record: dict) -> str:
|
|
359
|
+
fields = [record.get("path", ""), record.get("language", ""), record.get("summary", "")]
|
|
360
|
+
for section in record.get("sections", []):
|
|
361
|
+
fields.append(section.get("name", ""))
|
|
362
|
+
fields.append(section.get("type", ""))
|
|
363
|
+
fields.append(section.get("doc", ""))
|
|
364
|
+
for child in section.get("children", []):
|
|
365
|
+
fields.append(child.get("name", ""))
|
|
366
|
+
fields.append(child.get("type", ""))
|
|
367
|
+
return " ".join(str(field) for field in fields if field)
|
|
368
|
+
|
|
369
|
+
def _rebuild_search_index(self):
|
|
370
|
+
docs = {}
|
|
371
|
+
for rel_path in self.list_tracked():
|
|
372
|
+
record = self.get(rel_path)
|
|
373
|
+
if record:
|
|
374
|
+
docs[rel_path] = self._search_doc(record)
|
|
375
|
+
self._search_index.rebuild(docs)
|
|
376
|
+
|
|
377
|
+
def _load_queue_state(self) -> dict:
|
|
378
|
+
if not self._queue_state_path.exists():
|
|
379
|
+
return {"pending": [], "inflight": []}
|
|
380
|
+
try:
|
|
381
|
+
with open(self._queue_state_path, encoding="utf-8") as handle:
|
|
382
|
+
state = json.load(handle)
|
|
383
|
+
except Exception:
|
|
384
|
+
return {"pending": [], "inflight": []}
|
|
385
|
+
state.setdefault("pending", [])
|
|
386
|
+
state.setdefault("inflight", [])
|
|
387
|
+
return state
|
|
388
|
+
|
|
389
|
+
def _save_queue_state(self, state: dict):
|
|
390
|
+
with open(self._queue_state_path, "w", encoding="utf-8") as handle:
|
|
391
|
+
json.dump(state, handle, indent=2)
|
|
392
|
+
|
|
393
|
+
def _record_diag(self, kind: str, rel_path: str, detail: str = ""):
|
|
394
|
+
entry = {
|
|
395
|
+
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
|
396
|
+
"kind": kind,
|
|
397
|
+
"path": rel_path,
|
|
398
|
+
"detail": detail,
|
|
399
|
+
}
|
|
400
|
+
try:
|
|
401
|
+
with open(self._diag_path, "a", encoding="utf-8") as handle:
|
|
402
|
+
handle.write(json.dumps(entry) + "\n")
|
|
403
|
+
except Exception:
|
|
404
|
+
pass
|
|
405
|
+
|
|
406
|
+
def _extract_sections(self, filepath: Path, content: str) -> list:
|
|
407
|
+
"""Extract structural sections with line ranges from source code."""
|
|
408
|
+
ext = filepath.suffix.lower()
|
|
409
|
+
|
|
410
|
+
# Try AST parser first
|
|
411
|
+
ast_sections = extract_sections_ast(content, ext)
|
|
412
|
+
if ast_sections is not None:
|
|
413
|
+
return ast_sections
|
|
414
|
+
|
|
415
|
+
lines = content.splitlines()
|
|
416
|
+
patterns = STRUCTURE_PATTERNS.get(ext, {})
|
|
417
|
+
|
|
418
|
+
if not patterns:
|
|
419
|
+
return self._extract_generic_sections(lines)
|
|
420
|
+
|
|
421
|
+
sections = []
|
|
422
|
+
i = 0
|
|
423
|
+
current_class = None # Track current class for method nesting
|
|
424
|
+
|
|
425
|
+
while i < len(lines):
|
|
426
|
+
line = lines[i]
|
|
427
|
+
stripped = line.rstrip()
|
|
428
|
+
lstripped = line.lstrip()
|
|
429
|
+
indent = len(line) - len(lstripped)
|
|
430
|
+
|
|
431
|
+
# End class scope when indent returns to class level or lower
|
|
432
|
+
if current_class and indent <= current_class.get("_indent", 0) and lstripped:
|
|
433
|
+
# Finalize the class's line_end
|
|
434
|
+
current_class["line_end"] = i # Previous line (0-indexed, but we display 1-indexed)
|
|
435
|
+
current_class = None
|
|
436
|
+
|
|
437
|
+
for kind, pattern in patterns.items():
|
|
438
|
+
# Match against lstripped so indented methods are detected
|
|
439
|
+
if re.match(pattern, lstripped, re.MULTILINE):
|
|
440
|
+
line_start = i + 1 # 1-indexed
|
|
441
|
+
line_end = self._find_block_end(lines, i, ext)
|
|
442
|
+
|
|
443
|
+
section = {
|
|
444
|
+
"type": self._normalize_type(kind),
|
|
445
|
+
"name": self._extract_name(kind, lstripped),
|
|
446
|
+
"line_start": line_start,
|
|
447
|
+
"line_end": line_end,
|
|
448
|
+
"signature": lstripped,
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
# Extract docstring
|
|
452
|
+
doc = self._extract_docstring(lines, i + 1, ext)
|
|
453
|
+
if doc:
|
|
454
|
+
section["doc"] = doc
|
|
455
|
+
|
|
456
|
+
if kind == 'decorator':
|
|
457
|
+
# Skip standalone decorator lines — they'll be captured
|
|
458
|
+
# as part of the next function/class definition
|
|
459
|
+
pass
|
|
460
|
+
elif kind in ('class', 'interface', 'enum'):
|
|
461
|
+
section["children"] = []
|
|
462
|
+
section["_indent"] = indent
|
|
463
|
+
sections.append(section)
|
|
464
|
+
current_class = section
|
|
465
|
+
elif current_class and indent > current_class.get("_indent", 0):
|
|
466
|
+
# Method inside a class
|
|
467
|
+
section["type"] = "method"
|
|
468
|
+
current_class["children"].append(section)
|
|
469
|
+
else:
|
|
470
|
+
sections.append(section)
|
|
471
|
+
|
|
472
|
+
break
|
|
473
|
+
i += 1
|
|
474
|
+
|
|
475
|
+
# Finalize any open class
|
|
476
|
+
if current_class:
|
|
477
|
+
current_class["line_end"] = len(lines)
|
|
478
|
+
|
|
479
|
+
# Clean up internal tracking keys
|
|
480
|
+
for s in sections:
|
|
481
|
+
s.pop("_indent", None)
|
|
482
|
+
for child in s.get("children", []):
|
|
483
|
+
child.pop("_indent", None)
|
|
484
|
+
|
|
485
|
+
return sections
|
|
486
|
+
|
|
487
|
+
def _extract_generic_sections(self, lines: list) -> list:
|
|
488
|
+
"""Fallback for unknown languages — just report line count."""
|
|
489
|
+
return [{"type": "content", "name": "(full file)", "line_start": 1, "line_end": len(lines)}]
|
|
490
|
+
|
|
491
|
+
def _find_block_end(self, lines: list, start: int, ext: str) -> int:
|
|
492
|
+
"""Find the end line of a code block starting at `start`."""
|
|
493
|
+
if ext == '.py':
|
|
494
|
+
return self._find_python_block_end(lines, start)
|
|
495
|
+
# For brace-based languages, find matching brace
|
|
496
|
+
if ext in ('.js', '.ts', '.tsx', '.jsx', '.java', '.go', '.rs', '.c', '.cpp', '.h', '.cs'):
|
|
497
|
+
return self._find_brace_block_end(lines, start)
|
|
498
|
+
# Default: use indentation
|
|
499
|
+
return self._find_python_block_end(lines, start)
|
|
500
|
+
|
|
501
|
+
def _find_python_block_end(self, lines: list, start: int) -> int:
|
|
502
|
+
"""Find end of a Python block by indentation."""
|
|
503
|
+
if start >= len(lines):
|
|
504
|
+
return start + 1
|
|
505
|
+
|
|
506
|
+
base_indent = len(lines[start]) - len(lines[start].lstrip())
|
|
507
|
+
|
|
508
|
+
for i in range(start + 1, len(lines)):
|
|
509
|
+
line = lines[i]
|
|
510
|
+
stripped = line.strip()
|
|
511
|
+
if not stripped:
|
|
512
|
+
continue
|
|
513
|
+
current_indent = len(line) - len(line.lstrip())
|
|
514
|
+
if current_indent <= base_indent:
|
|
515
|
+
return i # 1-indexed
|
|
516
|
+
# Check for decorators at same level (next function)
|
|
517
|
+
if current_indent == base_indent and stripped.startswith('@'):
|
|
518
|
+
return i
|
|
519
|
+
return len(lines)
|
|
520
|
+
|
|
521
|
+
def _find_brace_block_end(self, lines: list, start: int) -> int:
|
|
522
|
+
"""Find end of a brace-delimited block."""
|
|
523
|
+
depth = 0
|
|
524
|
+
found_open = False
|
|
525
|
+
for i in range(start, len(lines)):
|
|
526
|
+
line = lines[i]
|
|
527
|
+
for ch in line:
|
|
528
|
+
if ch == '{':
|
|
529
|
+
depth += 1
|
|
530
|
+
found_open = True
|
|
531
|
+
elif ch == '}':
|
|
532
|
+
depth -= 1
|
|
533
|
+
if found_open and depth == 0:
|
|
534
|
+
return i + 1 # 1-indexed
|
|
535
|
+
return len(lines)
|
|
536
|
+
|
|
537
|
+
def _normalize_type(self, kind: str) -> str:
|
|
538
|
+
"""Map pattern kind to standard section type."""
|
|
539
|
+
mapping = {
|
|
540
|
+
'arrow': 'function',
|
|
541
|
+
'assignment': 'constant',
|
|
542
|
+
'library': 'import',
|
|
543
|
+
'export': 'function',
|
|
544
|
+
'decorator': 'decorator',
|
|
545
|
+
}
|
|
546
|
+
return mapping.get(kind, kind)
|
|
547
|
+
|
|
548
|
+
def _extract_name(self, kind: str, line: str) -> str:
|
|
549
|
+
"""Extract the name from a matched line."""
|
|
550
|
+
if kind in ('import', 'library'):
|
|
551
|
+
return line.strip()
|
|
552
|
+
|
|
553
|
+
# Try to extract identifier from common patterns
|
|
554
|
+
# class Foo, def foo, function foo, const foo, etc.
|
|
555
|
+
m = re.match(r'.*?(?:class|def|function|interface|enum|type|const|let|var)\s+(\w+)', line)
|
|
556
|
+
if m:
|
|
557
|
+
return m.group(1)
|
|
558
|
+
|
|
559
|
+
# Assignment: FOO_BAR = ...
|
|
560
|
+
m = re.match(r'^([A-Z_][A-Z_0-9]*)\s*=', line.strip())
|
|
561
|
+
if m:
|
|
562
|
+
return m.group(1)
|
|
563
|
+
|
|
564
|
+
# Arrow: const foo = (...) =>
|
|
565
|
+
m = re.match(r'(?:export\s+)?(?:const|let|var)\s+(\w+)', line.strip())
|
|
566
|
+
if m:
|
|
567
|
+
return m.group(1)
|
|
568
|
+
|
|
569
|
+
return line.strip()[:50]
|
|
570
|
+
|
|
571
|
+
def _extract_docstring(self, lines: list, start: int, ext: str) -> Optional[str]:
|
|
572
|
+
"""Extract first line of docstring/JSDoc if present."""
|
|
573
|
+
if start >= len(lines):
|
|
574
|
+
return None
|
|
575
|
+
line = lines[start].strip()
|
|
576
|
+
|
|
577
|
+
if ext == '.py' and (line.startswith('"""') or line.startswith("'''")):
|
|
578
|
+
quote = line[:3]
|
|
579
|
+
if line.endswith(quote) and len(line) > 6:
|
|
580
|
+
return line[3:-3].strip()
|
|
581
|
+
first = line[3:].strip()
|
|
582
|
+
if first:
|
|
583
|
+
return first
|
|
584
|
+
if start + 1 < len(lines):
|
|
585
|
+
return lines[start + 1].strip()
|
|
586
|
+
elif line.startswith('/**'):
|
|
587
|
+
for j in range(start, min(start + 10, len(lines))):
|
|
588
|
+
cleaned = lines[j].strip().lstrip('/*').rstrip('*/').strip()
|
|
589
|
+
if cleaned:
|
|
590
|
+
return cleaned
|
|
591
|
+
if '*/' in lines[j]:
|
|
592
|
+
break
|
|
593
|
+
return None
|
|
594
|
+
|
|
595
|
+
def _format_map(self, record: dict) -> str:
|
|
596
|
+
"""Format a record into a readable structural map."""
|
|
597
|
+
path = record["path"]
|
|
598
|
+
total_lines = record.get("lines", 0)
|
|
599
|
+
lang = record.get("language", "")
|
|
600
|
+
summary = record.get("summary")
|
|
601
|
+
sections = record.get("sections", [])
|
|
602
|
+
|
|
603
|
+
parts = [f"# {path} ({total_lines} lines, {lang})"]
|
|
604
|
+
|
|
605
|
+
if summary:
|
|
606
|
+
parts.append(summary)
|
|
607
|
+
|
|
608
|
+
parts.append("") # blank line
|
|
609
|
+
|
|
610
|
+
icons = {
|
|
611
|
+
"class": "🏗️",
|
|
612
|
+
"function": "✨",
|
|
613
|
+
"method": "⚙️",
|
|
614
|
+
"import": "📦",
|
|
615
|
+
"constant": "💎",
|
|
616
|
+
"variable": "📄",
|
|
617
|
+
"interface": "🧩",
|
|
618
|
+
"type": "🏷️",
|
|
619
|
+
"enum": "🔢",
|
|
620
|
+
"comment": "💬",
|
|
621
|
+
"property": "🔧",
|
|
622
|
+
"decorator": "🎨",
|
|
623
|
+
"heading": "🔖",
|
|
624
|
+
"section": "📍",
|
|
625
|
+
"struct": "🧱",
|
|
626
|
+
"trait": "📜",
|
|
627
|
+
"impl": "🛠️"
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
import_sections = [section for section in sections if section.get("type") == "import"]
|
|
631
|
+
other_sections = [section for section in sections if section.get("type") != "import"]
|
|
632
|
+
|
|
633
|
+
if len(import_sections) > 6:
|
|
634
|
+
parts.append(f" imports {len(import_sections)} statements (collapsed)")
|
|
635
|
+
else:
|
|
636
|
+
# Re-integrate imports if few
|
|
637
|
+
other_sections = sections
|
|
638
|
+
|
|
639
|
+
for section in other_sections:
|
|
640
|
+
stype = section.get("type", "")
|
|
641
|
+
name = section.get("name", "")
|
|
642
|
+
ls = section.get("line_start", 0)
|
|
643
|
+
le = section.get("line_end", 0)
|
|
644
|
+
doc = section.get("doc")
|
|
645
|
+
is_async = section.get("async", False)
|
|
646
|
+
access = section.get("access")
|
|
647
|
+
|
|
648
|
+
line_range = f"{ls}-{le}".ljust(10)
|
|
649
|
+
icon = icons.get(stype, " ")
|
|
650
|
+
|
|
651
|
+
if stype == "import":
|
|
652
|
+
label = f"{icon} {name}"
|
|
653
|
+
elif stype == "comment":
|
|
654
|
+
label = f"{icon} {name}"
|
|
655
|
+
elif stype in ("heading", "section"):
|
|
656
|
+
label = f"{icon} {name}"
|
|
657
|
+
else:
|
|
658
|
+
async_prefix = "async " if is_async else ""
|
|
659
|
+
access_prefix = f"{access} " if access else ""
|
|
660
|
+
sig = section.get("signature", "")
|
|
661
|
+
params = f"({self._extract_params(sig)})" if stype in ("function", "method") else ""
|
|
662
|
+
label = f"{icon} {access_prefix}{async_prefix}{stype} {name}{params}"
|
|
663
|
+
|
|
664
|
+
parts.append(f" {line_range}{label}")
|
|
665
|
+
if doc:
|
|
666
|
+
parts.append(f" {doc}")
|
|
667
|
+
|
|
668
|
+
# Children (methods inside classes)
|
|
669
|
+
for child in section.get("children", []):
|
|
670
|
+
ctype = child.get("type", "")
|
|
671
|
+
cname = child.get("name", "")
|
|
672
|
+
cls = child.get("line_start", 0)
|
|
673
|
+
cle = child.get("line_end", 0)
|
|
674
|
+
sig = child.get("signature", "")
|
|
675
|
+
c_async = child.get("async", False)
|
|
676
|
+
c_access = child.get("access")
|
|
677
|
+
|
|
678
|
+
child_range = f"{cls}-{cle}".ljust(8)
|
|
679
|
+
c_icon = icons.get(ctype, " ")
|
|
680
|
+
|
|
681
|
+
async_prefix = "async " if c_async else ""
|
|
682
|
+
access_prefix = f"{c_access} " if c_access else ""
|
|
683
|
+
|
|
684
|
+
if ctype == "method":
|
|
685
|
+
parts.append(f" {child_range}{c_icon} {access_prefix}{async_prefix}{cname}({self._extract_params(sig)})")
|
|
686
|
+
else:
|
|
687
|
+
parts.append(f" {child_range}{c_icon} {access_prefix}{async_prefix}{ctype} {cname}")
|
|
688
|
+
|
|
689
|
+
return "\n".join(parts)
|
|
690
|
+
|
|
691
|
+
def _extract_params(self, signature: str) -> str:
|
|
692
|
+
"""Extract parameter list from a function signature."""
|
|
693
|
+
m = re.search(r'\(([^)]*)\)', signature)
|
|
694
|
+
if m:
|
|
695
|
+
params = m.group(1).strip()
|
|
696
|
+
# Shorten if too long
|
|
697
|
+
if len(params) > 60:
|
|
698
|
+
params = params[:57] + "..."
|
|
699
|
+
return params
|
|
700
|
+
return ""
|
|
701
|
+
|
|
702
|
+
def _format_dense_map(self, record: dict) -> str:
|
|
703
|
+
"""Format a compact one-line-per-symbol map (4b). ~40% fewer tokens than full map."""
|
|
704
|
+
path = record["path"]
|
|
705
|
+
total_lines = record.get("lines", 0)
|
|
706
|
+
lang = record.get("language", "")
|
|
707
|
+
sections = record.get("sections", [])
|
|
708
|
+
|
|
709
|
+
abbrev = {"class": "C", "function": "F", "method": "M", "import": "I",
|
|
710
|
+
"constant": "K", "interface": "IF", "type": "T", "enum": "E",
|
|
711
|
+
"variable": "V", "decorator": "D", "property": "P"}
|
|
712
|
+
|
|
713
|
+
parts = [f"# {path} ({total_lines}L {lang})"]
|
|
714
|
+
|
|
715
|
+
# Collapse imports into a single count
|
|
716
|
+
imports = [s for s in sections if s.get("type") == "import"]
|
|
717
|
+
others = [s for s in sections if s.get("type") != "import"]
|
|
718
|
+
if imports:
|
|
719
|
+
parts.append(f" I: {len(imports)} imports")
|
|
720
|
+
|
|
721
|
+
for s in others:
|
|
722
|
+
t = abbrev.get(s.get("type", ""), "?")
|
|
723
|
+
name = s.get("name", "")
|
|
724
|
+
ls = s.get("line_start", 0)
|
|
725
|
+
le = s.get("line_end", 0)
|
|
726
|
+
parts.append(f" {t} {name} [{ls}-{le}]")
|
|
727
|
+
for child in s.get("children", []):
|
|
728
|
+
ct = abbrev.get(child.get("type", ""), "?")
|
|
729
|
+
cn = child.get("name", "")
|
|
730
|
+
cls = child.get("line_start", 0)
|
|
731
|
+
cle = child.get("line_end", 0)
|
|
732
|
+
parts.append(f" {ct} {cn} [{cls}-{cle}]")
|
|
733
|
+
|
|
734
|
+
return "\n".join(parts)
|