code-context-control 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/_hook_utils.py +99 -0
- cli/c3.py +6152 -0
- cli/commands/__init__.py +1 -0
- cli/commands/common.py +312 -0
- cli/commands/parser.py +286 -0
- cli/docs.html +3178 -0
- cli/edits.html +878 -0
- cli/hook_auto_snapshot.py +142 -0
- cli/hook_c3_signal.py +61 -0
- cli/hook_c3read.py +116 -0
- cli/hook_edit_ledger.py +213 -0
- cli/hook_edit_unlock.py +170 -0
- cli/hook_filter.py +130 -0
- cli/hook_ghost_files.py +238 -0
- cli/hook_pretool_enforce.py +334 -0
- cli/hook_read.py +200 -0
- cli/hook_session_stats.py +62 -0
- cli/hook_terse_advisor.py +190 -0
- cli/hub.html +3764 -0
- cli/hub_server.py +1619 -0
- cli/mcp_proxy.py +428 -0
- cli/mcp_server.py +660 -0
- cli/server.py +2985 -0
- cli/tools/__init__.py +4 -0
- cli/tools/_helpers.py +65 -0
- cli/tools/agent.py +1165 -0
- cli/tools/compress.py +215 -0
- cli/tools/delegate.py +1184 -0
- cli/tools/edit.py +313 -0
- cli/tools/edits.py +118 -0
- cli/tools/filter.py +285 -0
- cli/tools/impact.py +163 -0
- cli/tools/memory.py +469 -0
- cli/tools/read.py +224 -0
- cli/tools/search.py +337 -0
- cli/tools/session.py +95 -0
- cli/tools/shell.py +193 -0
- cli/tools/status.py +306 -0
- cli/tools/validate.py +310 -0
- cli/ui/api.js +36 -0
- cli/ui/app.js +207 -0
- cli/ui/components/chat.js +758 -0
- cli/ui/components/dashboard.js +689 -0
- cli/ui/components/edits.js +220 -0
- cli/ui/components/instructions.js +481 -0
- cli/ui/components/memory.js +626 -0
- cli/ui/components/sessions.js +606 -0
- cli/ui/components/settings.js +1404 -0
- cli/ui/components/sidebar.js +156 -0
- cli/ui/icons.js +51 -0
- cli/ui/shared.js +119 -0
- cli/ui/theme.js +22 -0
- cli/ui.html +168 -0
- cli/ui_legacy.html +6797 -0
- cli/ui_nano.html +503 -0
- code_context_control-2.28.0.dist-info/METADATA +248 -0
- code_context_control-2.28.0.dist-info/RECORD +150 -0
- code_context_control-2.28.0.dist-info/WHEEL +5 -0
- code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
- code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
- code_context_control-2.28.0.dist-info/top_level.txt +5 -0
- core/__init__.py +75 -0
- core/config.py +269 -0
- core/ide.py +188 -0
- oracle/__init__.py +1 -0
- oracle/config.py +75 -0
- oracle/oracle.html +3900 -0
- oracle/oracle_server.py +663 -0
- oracle/services/__init__.py +1 -0
- oracle/services/c3_bridge.py +210 -0
- oracle/services/chat_engine.py +1103 -0
- oracle/services/chat_store.py +155 -0
- oracle/services/cross_memory.py +154 -0
- oracle/services/federated_graph.py +463 -0
- oracle/services/health_checker.py +117 -0
- oracle/services/insight_engine.py +307 -0
- oracle/services/memory_reader.py +106 -0
- oracle/services/memory_writer.py +182 -0
- oracle/services/ollama_bridge.py +332 -0
- oracle/services/project_scanner.py +87 -0
- oracle/services/review_agent.py +206 -0
- services/__init__.py +1 -0
- services/activity_log.py +93 -0
- services/agent_base.py +124 -0
- services/agents.py +1529 -0
- services/auto_memory.py +407 -0
- services/bench/__init__.py +6 -0
- services/bench/external/__init__.py +29 -0
- services/bench/external/aider_polyglot.py +405 -0
- services/bench/external/swe_bench.py +485 -0
- services/benchmark_dashboard.py +596 -0
- services/claude_md.py +785 -0
- services/compressor.py +592 -0
- services/context_snapshot.py +356 -0
- services/conversation_store.py +870 -0
- services/doc_index.py +537 -0
- services/e2e_benchmark.py +2884 -0
- services/e2e_evaluator.py +396 -0
- services/e2e_tasks.py +743 -0
- services/edit_ledger.py +459 -0
- services/embedding_index.py +341 -0
- services/error_reporting.py +123 -0
- services/file_memory.py +734 -0
- services/hub_service.py +585 -0
- services/indexer.py +712 -0
- services/memory.py +318 -0
- services/memory_consolidator.py +538 -0
- services/memory_graph.py +382 -0
- services/memory_grounder.py +304 -0
- services/memory_scorer.py +246 -0
- services/metrics.py +86 -0
- services/notifications.py +209 -0
- services/ollama_client.py +201 -0
- services/output_filter.py +488 -0
- services/parser.py +1238 -0
- services/project_manager.py +579 -0
- services/protocol.py +306 -0
- services/proxy_state.py +152 -0
- services/retrieval_broker.py +129 -0
- services/router.py +414 -0
- services/runtime.py +326 -0
- services/session_benchmark.py +1945 -0
- services/session_manager.py +1026 -0
- services/session_preloader.py +251 -0
- services/text_index.py +90 -0
- services/tool_classifier.py +176 -0
- services/transcript_index.py +340 -0
- services/validation_cache.py +155 -0
- services/vector_store.py +299 -0
- services/version_tracker.py +271 -0
- services/watcher.py +192 -0
- tui/__init__.py +0 -0
- tui/backend.py +59 -0
- tui/main.py +145 -0
- tui/screens/__init__.py +1 -0
- tui/screens/benchmark_view.py +109 -0
- tui/screens/claudemd_view.py +46 -0
- tui/screens/compress_view.py +52 -0
- tui/screens/index_view.py +74 -0
- tui/screens/init_view.py +82 -0
- tui/screens/mcp_view.py +73 -0
- tui/screens/optimize_view.py +41 -0
- tui/screens/pipe_view.py +46 -0
- tui/screens/projects_view.py +355 -0
- tui/screens/search_view.py +55 -0
- tui/screens/session_view.py +143 -0
- tui/screens/stats.py +158 -0
- tui/screens/ui_view.py +54 -0
- tui/theme.tcss +335 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Transcript Index — TF-IDF search over Claude Code .jsonl conversation transcripts.
|
|
3
|
+
|
|
4
|
+
Indexes past Claude Code sessions for semantic retrieval, enabling
|
|
5
|
+
context recall from previous conversations without re-reading full transcripts.
|
|
6
|
+
"""
|
|
7
|
+
import json
|
|
8
|
+
import math
|
|
9
|
+
import re
|
|
10
|
+
from collections import Counter
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from core import count_tokens
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TranscriptIndex:
|
|
17
|
+
"""TF-IDF index over Claude Code .jsonl transcripts."""
|
|
18
|
+
|
|
19
|
+
MAX_TRANSCRIPT_FILES = 50
|
|
20
|
+
MAX_CHARS_PER_TURN = 2000
|
|
21
|
+
MAX_TOOL_INPUT_CHARS = 200
|
|
22
|
+
|
|
23
|
+
def __init__(self, project_path: str, data_dir: str = ".c3/transcript_index"):
|
|
24
|
+
self.project_path = Path(project_path)
|
|
25
|
+
self.data_dir = self.project_path / data_dir
|
|
26
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
self.index_file = self.data_dir / "index.json"
|
|
28
|
+
self.manifest_file = self.data_dir / "manifest.json"
|
|
29
|
+
self.index = {} # {turn_id: {text, session_file, timestamp, turn_num}}
|
|
30
|
+
self.manifest = {} # {file_path: {size, line_count}}
|
|
31
|
+
|
|
32
|
+
def find_transcript_dir(self) -> "Path | None":
|
|
33
|
+
"""Locate Claude Code transcript directory for this project."""
|
|
34
|
+
import re as _re
|
|
35
|
+
home = Path.home()
|
|
36
|
+
projects_dir = home / ".claude" / "projects"
|
|
37
|
+
if not projects_dir.exists():
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
# Claude Code slugifies the absolute path by replacing every
|
|
41
|
+
# non-alphanumeric character with '-' and stripping leading dashes.
|
|
42
|
+
project_str = str(self.project_path.resolve())
|
|
43
|
+
slug = _re.sub(r"[^a-zA-Z0-9]", "-", project_str).lstrip("-")
|
|
44
|
+
|
|
45
|
+
transcript_dir = projects_dir / slug
|
|
46
|
+
if transcript_dir.exists():
|
|
47
|
+
return transcript_dir
|
|
48
|
+
|
|
49
|
+
# Fallback: normalize both sides to bare alphanumerics for a
|
|
50
|
+
# variant-proof comparison (handles old slug formats).
|
|
51
|
+
def _bare(s):
|
|
52
|
+
return _re.sub(r"[^a-z0-9]", "", s.lower())
|
|
53
|
+
|
|
54
|
+
target_bare = _bare(project_str)
|
|
55
|
+
project_name = self.project_path.resolve().name.lower()
|
|
56
|
+
for d in projects_dir.iterdir():
|
|
57
|
+
if d.is_dir() and (_bare(d.name) == target_bare or project_name in d.name.lower()):
|
|
58
|
+
# Check if it has .jsonl files
|
|
59
|
+
if list(d.glob("*.jsonl")):
|
|
60
|
+
return d
|
|
61
|
+
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
def build_index(self, force: bool = False) -> dict:
|
|
65
|
+
"""Build or incrementally update the transcript index.
|
|
66
|
+
|
|
67
|
+
Returns {files_scanned, turns_indexed, new_files}.
|
|
68
|
+
"""
|
|
69
|
+
transcript_dir = self.find_transcript_dir()
|
|
70
|
+
if not transcript_dir:
|
|
71
|
+
return {"files_scanned": 0, "turns_indexed": 0, "new_files": 0,
|
|
72
|
+
"error": "No transcript directory found"}
|
|
73
|
+
|
|
74
|
+
# Load existing manifest and index
|
|
75
|
+
if not force:
|
|
76
|
+
self._load_manifest()
|
|
77
|
+
self._load_index()
|
|
78
|
+
|
|
79
|
+
# Find .jsonl files, limited to most recent
|
|
80
|
+
jsonl_files = sorted(
|
|
81
|
+
transcript_dir.glob("*.jsonl"),
|
|
82
|
+
key=lambda f: f.stat().st_mtime,
|
|
83
|
+
reverse=True
|
|
84
|
+
)[:self.MAX_TRANSCRIPT_FILES]
|
|
85
|
+
|
|
86
|
+
files_scanned = 0
|
|
87
|
+
new_files = 0
|
|
88
|
+
total_turns = len(self.index)
|
|
89
|
+
|
|
90
|
+
for jf in jsonl_files:
|
|
91
|
+
fpath = str(jf)
|
|
92
|
+
try:
|
|
93
|
+
stat = jf.stat()
|
|
94
|
+
file_info = {"size": stat.st_size, "line_count": sum(1 for _ in open(jf, encoding="utf-8", errors="replace"))}
|
|
95
|
+
except Exception:
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# Skip unchanged files (unless force)
|
|
99
|
+
if not force and fpath in self.manifest:
|
|
100
|
+
existing = self.manifest[fpath]
|
|
101
|
+
if existing.get("size") == file_info["size"] and existing.get("line_count") == file_info["line_count"]:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
# Extract turns from this file
|
|
105
|
+
turns = self._extract_turns(jf)
|
|
106
|
+
session_name = jf.stem
|
|
107
|
+
|
|
108
|
+
# Remove old turns from this file
|
|
109
|
+
self.index = {
|
|
110
|
+
tid: data for tid, data in self.index.items()
|
|
111
|
+
if data.get("session_file") != session_name
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
# Add new turns
|
|
115
|
+
for turn in turns:
|
|
116
|
+
turn["session_file"] = session_name
|
|
117
|
+
self.index[turn["turn_id"]] = turn
|
|
118
|
+
|
|
119
|
+
self.manifest[fpath] = file_info
|
|
120
|
+
files_scanned += 1
|
|
121
|
+
new_files += 1
|
|
122
|
+
|
|
123
|
+
# Save
|
|
124
|
+
self._save_index()
|
|
125
|
+
self._save_manifest()
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
"files_scanned": files_scanned,
|
|
129
|
+
"turns_indexed": len(self.index),
|
|
130
|
+
"new_files": new_files,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
def _extract_turns(self, jsonl_path: Path) -> list:
|
|
134
|
+
"""Extract conversation turns from a .jsonl transcript.
|
|
135
|
+
|
|
136
|
+
Groups sequential user+assistant entries into turns.
|
|
137
|
+
"""
|
|
138
|
+
turns = []
|
|
139
|
+
entries = []
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
with open(jsonl_path, encoding="utf-8", errors="replace") as f:
|
|
143
|
+
for line in f:
|
|
144
|
+
line = line.strip()
|
|
145
|
+
if not line:
|
|
146
|
+
continue
|
|
147
|
+
try:
|
|
148
|
+
entry = json.loads(line)
|
|
149
|
+
entries.append(entry)
|
|
150
|
+
except json.JSONDecodeError:
|
|
151
|
+
continue
|
|
152
|
+
except Exception:
|
|
153
|
+
return []
|
|
154
|
+
|
|
155
|
+
turn_num = 0
|
|
156
|
+
i = 0
|
|
157
|
+
while i < len(entries):
|
|
158
|
+
entry = entries[i]
|
|
159
|
+
|
|
160
|
+
# Skip non-message types
|
|
161
|
+
entry_type = entry.get("type", "")
|
|
162
|
+
if entry_type in ("progress", "file-history-snapshot", "system"):
|
|
163
|
+
i += 1
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
role = entry.get("role", "")
|
|
167
|
+
msg = entry.get("message", {})
|
|
168
|
+
if isinstance(msg, dict):
|
|
169
|
+
role = role or msg.get("role", "")
|
|
170
|
+
|
|
171
|
+
if role == "user":
|
|
172
|
+
# Collect user text
|
|
173
|
+
user_text = self._extract_text_from_entry(entry)
|
|
174
|
+
|
|
175
|
+
# Look ahead for assistant response
|
|
176
|
+
assistant_text = ""
|
|
177
|
+
j = i + 1
|
|
178
|
+
while j < len(entries):
|
|
179
|
+
next_entry = entries[j]
|
|
180
|
+
next_type = next_entry.get("type", "")
|
|
181
|
+
if next_type in ("progress", "file-history-snapshot"):
|
|
182
|
+
j += 1
|
|
183
|
+
continue
|
|
184
|
+
next_role = next_entry.get("role", "")
|
|
185
|
+
next_msg = next_entry.get("message", {})
|
|
186
|
+
if isinstance(next_msg, dict):
|
|
187
|
+
next_role = next_role or next_msg.get("role", "")
|
|
188
|
+
if next_role == "assistant":
|
|
189
|
+
assistant_text = self._extract_text_from_entry(next_entry)
|
|
190
|
+
j += 1
|
|
191
|
+
break
|
|
192
|
+
else:
|
|
193
|
+
break
|
|
194
|
+
|
|
195
|
+
combined = (user_text + " " + assistant_text).strip()
|
|
196
|
+
if combined:
|
|
197
|
+
turn_num += 1
|
|
198
|
+
turn_id = f"{jsonl_path.stem}_t{turn_num}"
|
|
199
|
+
turns.append({
|
|
200
|
+
"turn_id": turn_id,
|
|
201
|
+
"text": combined[:self.MAX_CHARS_PER_TURN],
|
|
202
|
+
"timestamp": entry.get("timestamp", ""),
|
|
203
|
+
"turn_num": turn_num,
|
|
204
|
+
})
|
|
205
|
+
i = j
|
|
206
|
+
else:
|
|
207
|
+
i += 1
|
|
208
|
+
|
|
209
|
+
return turns
|
|
210
|
+
|
|
211
|
+
def _extract_text_from_entry(self, entry: dict) -> str:
|
|
212
|
+
"""Extract searchable text from a transcript entry."""
|
|
213
|
+
parts = []
|
|
214
|
+
|
|
215
|
+
# Direct content field
|
|
216
|
+
content = entry.get("content", "")
|
|
217
|
+
msg = entry.get("message", {})
|
|
218
|
+
if isinstance(msg, dict):
|
|
219
|
+
content = content or msg.get("content", "")
|
|
220
|
+
|
|
221
|
+
if isinstance(content, str) and content:
|
|
222
|
+
parts.append(content)
|
|
223
|
+
elif isinstance(content, list):
|
|
224
|
+
for block in content:
|
|
225
|
+
if isinstance(block, dict):
|
|
226
|
+
btype = block.get("type", "")
|
|
227
|
+
if btype == "text":
|
|
228
|
+
parts.append(block.get("text", ""))
|
|
229
|
+
elif btype == "tool_use":
|
|
230
|
+
tool_name = block.get("name", "")
|
|
231
|
+
tool_input = str(block.get("input", ""))[:self.MAX_TOOL_INPUT_CHARS]
|
|
232
|
+
parts.append(f"[tool:{tool_name}] {tool_input}")
|
|
233
|
+
elif btype == "tool_result":
|
|
234
|
+
pass # Skip — too verbose
|
|
235
|
+
# Skip thinking blocks
|
|
236
|
+
elif isinstance(block, str):
|
|
237
|
+
parts.append(block)
|
|
238
|
+
|
|
239
|
+
return " ".join(parts)
|
|
240
|
+
|
|
241
|
+
def search(self, query: str, top_k: int = 5, max_tokens: int = 4000) -> list:
|
|
242
|
+
"""Search transcript index via TF-IDF.
|
|
243
|
+
|
|
244
|
+
Returns [{turn_id, text, session_file, timestamp, score, tokens}].
|
|
245
|
+
"""
|
|
246
|
+
if not self.index:
|
|
247
|
+
self._load_index()
|
|
248
|
+
if not self.index:
|
|
249
|
+
return []
|
|
250
|
+
|
|
251
|
+
docs = {tid: data["text"] for tid, data in self.index.items()}
|
|
252
|
+
ranked = self._tfidf_search(query, docs, top_k)
|
|
253
|
+
|
|
254
|
+
results = []
|
|
255
|
+
total_tokens = 0
|
|
256
|
+
for turn_id, score in ranked:
|
|
257
|
+
data = self.index[turn_id]
|
|
258
|
+
text = data["text"]
|
|
259
|
+
tokens = count_tokens(text)
|
|
260
|
+
if total_tokens + tokens > max_tokens and results:
|
|
261
|
+
break
|
|
262
|
+
total_tokens += tokens
|
|
263
|
+
results.append({
|
|
264
|
+
"turn_id": turn_id,
|
|
265
|
+
"text": text,
|
|
266
|
+
"session_file": data.get("session_file", ""),
|
|
267
|
+
"timestamp": data.get("timestamp", ""),
|
|
268
|
+
"score": round(score, 3),
|
|
269
|
+
"tokens": tokens,
|
|
270
|
+
})
|
|
271
|
+
|
|
272
|
+
return results
|
|
273
|
+
|
|
274
|
+
# ─── TF-IDF (same algorithm as MemoryStore) ─────────────
|
|
275
|
+
|
|
276
|
+
def _tokenize(self, text: str) -> list:
|
|
277
|
+
"""Tokenize text — camelCase split, snake_case split."""
|
|
278
|
+
text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)
|
|
279
|
+
text = text.replace('_', ' ').replace('-', ' ')
|
|
280
|
+
return re.findall(r'[a-zA-Z]{2,}', text.lower())
|
|
281
|
+
|
|
282
|
+
def _tfidf_search(self, query: str, docs: dict, top_k: int) -> list:
|
|
283
|
+
"""Generic TF-IDF search over a dict of {id: text}."""
|
|
284
|
+
if not docs:
|
|
285
|
+
return []
|
|
286
|
+
query_tokens = self._tokenize(query)
|
|
287
|
+
if not query_tokens:
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
N = len(docs)
|
|
291
|
+
df = Counter()
|
|
292
|
+
doc_tf = {}
|
|
293
|
+
for doc_id, text in docs.items():
|
|
294
|
+
tokens = self._tokenize(text)
|
|
295
|
+
tf = Counter(tokens)
|
|
296
|
+
doc_tf[doc_id] = tf
|
|
297
|
+
for t in set(tokens):
|
|
298
|
+
df[t] += 1
|
|
299
|
+
|
|
300
|
+
idf = {t: math.log(N / (1 + freq)) for t, freq in df.items()}
|
|
301
|
+
|
|
302
|
+
scores = {}
|
|
303
|
+
for doc_id, tf in doc_tf.items():
|
|
304
|
+
max_tf = max(tf.values()) if tf else 1
|
|
305
|
+
score = 0
|
|
306
|
+
for qt in query_tokens:
|
|
307
|
+
if qt in tf:
|
|
308
|
+
ntf = 0.5 + 0.5 * (tf[qt] / max_tf)
|
|
309
|
+
score += ntf * idf.get(qt, 0)
|
|
310
|
+
if score > 0:
|
|
311
|
+
scores[doc_id] = score
|
|
312
|
+
|
|
313
|
+
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
|
314
|
+
return ranked[:top_k]
|
|
315
|
+
|
|
316
|
+
# ─── Persistence ─────────────────────────────────────────
|
|
317
|
+
|
|
318
|
+
def _load_index(self):
|
|
319
|
+
if self.index_file.exists():
|
|
320
|
+
try:
|
|
321
|
+
with open(self.index_file, encoding="utf-8") as f:
|
|
322
|
+
self.index = json.load(f)
|
|
323
|
+
except Exception:
|
|
324
|
+
self.index = {}
|
|
325
|
+
|
|
326
|
+
def _save_index(self):
|
|
327
|
+
with open(self.index_file, 'w', encoding="utf-8") as f:
|
|
328
|
+
json.dump(self.index, f)
|
|
329
|
+
|
|
330
|
+
def _load_manifest(self):
|
|
331
|
+
if self.manifest_file.exists():
|
|
332
|
+
try:
|
|
333
|
+
with open(self.manifest_file, encoding="utf-8") as f:
|
|
334
|
+
self.manifest = json.load(f)
|
|
335
|
+
except Exception:
|
|
336
|
+
self.manifest = {}
|
|
337
|
+
|
|
338
|
+
def _save_manifest(self):
|
|
339
|
+
with open(self.manifest_file, 'w', encoding="utf-8") as f:
|
|
340
|
+
json.dump(self.manifest, f)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Offline validation pipeline — background syntax checking with result caching."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class _CacheEntry:
|
|
13
|
+
rel_path: str
|
|
14
|
+
mtime: float
|
|
15
|
+
size: int
|
|
16
|
+
result: dict
|
|
17
|
+
validated_at: float = field(default_factory=time.time)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Extensions validated with pure-Python checkers (fast, no subprocess).
|
|
21
|
+
_FAST_EXTENSIONS = {
|
|
22
|
+
".py", ".json", ".yaml", ".yml", ".xml", ".svg",
|
|
23
|
+
".toml", ".html", ".htm", ".css",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Extensions requiring subprocess checkers (slower, opt-in).
|
|
27
|
+
_SUBPROCESS_EXTENSIONS = {
|
|
28
|
+
".js", ".jsx", ".ts", ".tsx", ".java", ".go", ".rs",
|
|
29
|
+
".r", ".php", ".rb", ".pl", ".pm", ".lua", ".sh", ".bash",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ValidationCache:
|
|
34
|
+
"""In-memory cache of syntax validation results keyed by (path, mtime, size)."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, project_path: str, config: Optional[dict] = None):
|
|
37
|
+
self._project_path = str(Path(project_path).resolve())
|
|
38
|
+
self._cache: Dict[str, _CacheEntry] = {}
|
|
39
|
+
self._lock = threading.Lock()
|
|
40
|
+
cfg = config or {}
|
|
41
|
+
self._enabled = cfg.get("enabled", True)
|
|
42
|
+
self._bg_subprocess = cfg.get("background_subprocess_checkers", False)
|
|
43
|
+
self._debounce_seconds = max(0.5, float(cfg.get("debounce_seconds", 2.0)))
|
|
44
|
+
# Limit concurrent subprocess validations to 1.
|
|
45
|
+
self._subprocess_sem = threading.Semaphore(1)
|
|
46
|
+
|
|
47
|
+
# ── Public API ──────────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
def get(self, rel_path: str) -> Optional[dict]:
|
|
50
|
+
"""Return cached result if the file hasn't changed since last validation."""
|
|
51
|
+
if not self._enabled:
|
|
52
|
+
return None
|
|
53
|
+
full = os.path.join(self._project_path, rel_path)
|
|
54
|
+
try:
|
|
55
|
+
st = os.stat(full)
|
|
56
|
+
except OSError:
|
|
57
|
+
self.evict(rel_path)
|
|
58
|
+
return None
|
|
59
|
+
with self._lock:
|
|
60
|
+
entry = self._cache.get(rel_path)
|
|
61
|
+
if entry and entry.mtime == st.st_mtime and entry.size == st.st_size:
|
|
62
|
+
return entry.result
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
def put(self, rel_path: str, result: dict, mtime: float, size: int) -> None:
|
|
66
|
+
"""Store a validation result."""
|
|
67
|
+
with self._lock:
|
|
68
|
+
self._cache[rel_path] = _CacheEntry(
|
|
69
|
+
rel_path=rel_path,
|
|
70
|
+
mtime=mtime,
|
|
71
|
+
size=size,
|
|
72
|
+
result=result,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def evict(self, rel_path: str) -> None:
|
|
76
|
+
"""Remove a cached entry (e.g. file was deleted)."""
|
|
77
|
+
with self._lock:
|
|
78
|
+
self._cache.pop(rel_path, None)
|
|
79
|
+
|
|
80
|
+
def get_errors(self) -> List[dict]:
|
|
81
|
+
"""Return all cached entries that have syntax errors."""
|
|
82
|
+
with self._lock:
|
|
83
|
+
return [
|
|
84
|
+
{"path": e.rel_path, "detail": e.result.get("detail", ""), "checker": e.result.get("checker", "")}
|
|
85
|
+
for e in self._cache.values()
|
|
86
|
+
if e.result.get("status") == "syntax_error"
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
def summary(self) -> dict:
|
|
90
|
+
"""Return cache statistics."""
|
|
91
|
+
with self._lock:
|
|
92
|
+
entries = list(self._cache.values())
|
|
93
|
+
total = len(entries)
|
|
94
|
+
errors = sum(1 for e in entries if e.result.get("status") == "syntax_error")
|
|
95
|
+
clean = sum(1 for e in entries if e.result.get("status") == "clean")
|
|
96
|
+
return {"cached_files": total, "errors": errors, "clean": clean}
|
|
97
|
+
|
|
98
|
+
# ── Background validation entry point ───────────────────────────
|
|
99
|
+
|
|
100
|
+
def validate_file(self, rel_path: str) -> Optional[dict]:
|
|
101
|
+
"""Validate a file: return cached if fresh, else run checker and cache.
|
|
102
|
+
|
|
103
|
+
Called by the watcher background worker. Returns the result dict
|
|
104
|
+
or None if the extension is not eligible for background validation.
|
|
105
|
+
"""
|
|
106
|
+
if not self._enabled:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
ext = Path(rel_path).suffix.lower()
|
|
110
|
+
is_fast = ext in _FAST_EXTENSIONS
|
|
111
|
+
is_subprocess = ext in _SUBPROCESS_EXTENSIONS
|
|
112
|
+
|
|
113
|
+
if not is_fast and not is_subprocess:
|
|
114
|
+
return None
|
|
115
|
+
if is_subprocess and not self._bg_subprocess:
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
# Check cache freshness first.
|
|
119
|
+
cached = self.get(rel_path)
|
|
120
|
+
if cached is not None:
|
|
121
|
+
return cached
|
|
122
|
+
|
|
123
|
+
full = os.path.join(self._project_path, rel_path)
|
|
124
|
+
try:
|
|
125
|
+
st = os.stat(full)
|
|
126
|
+
except OSError:
|
|
127
|
+
self.evict(rel_path)
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
with open(full, "r", encoding="utf-8", errors="replace") as f:
|
|
132
|
+
content = f.read()
|
|
133
|
+
except Exception:
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
from services.parser import check_syntax_native_with_timeout
|
|
137
|
+
|
|
138
|
+
if is_subprocess:
|
|
139
|
+
# Guard subprocess checkers with semaphore.
|
|
140
|
+
acquired = self._subprocess_sem.acquire(timeout=0.1)
|
|
141
|
+
if not acquired:
|
|
142
|
+
return None # Another subprocess validation is running; skip.
|
|
143
|
+
try:
|
|
144
|
+
result = check_syntax_native_with_timeout(content, ext, timeout_seconds=35)
|
|
145
|
+
finally:
|
|
146
|
+
self._subprocess_sem.release()
|
|
147
|
+
else:
|
|
148
|
+
result = check_syntax_native_with_timeout(content, ext, timeout_seconds=10)
|
|
149
|
+
|
|
150
|
+
self.put(rel_path, result, st.st_mtime, st.st_size)
|
|
151
|
+
return result
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def debounce_seconds(self) -> float:
|
|
155
|
+
return self._debounce_seconds
|