code-context-control 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/_hook_utils.py +99 -0
- cli/c3.py +6152 -0
- cli/commands/__init__.py +1 -0
- cli/commands/common.py +312 -0
- cli/commands/parser.py +286 -0
- cli/docs.html +3178 -0
- cli/edits.html +878 -0
- cli/hook_auto_snapshot.py +142 -0
- cli/hook_c3_signal.py +61 -0
- cli/hook_c3read.py +116 -0
- cli/hook_edit_ledger.py +213 -0
- cli/hook_edit_unlock.py +170 -0
- cli/hook_filter.py +130 -0
- cli/hook_ghost_files.py +238 -0
- cli/hook_pretool_enforce.py +334 -0
- cli/hook_read.py +200 -0
- cli/hook_session_stats.py +62 -0
- cli/hook_terse_advisor.py +190 -0
- cli/hub.html +3764 -0
- cli/hub_server.py +1619 -0
- cli/mcp_proxy.py +428 -0
- cli/mcp_server.py +660 -0
- cli/server.py +2985 -0
- cli/tools/__init__.py +4 -0
- cli/tools/_helpers.py +65 -0
- cli/tools/agent.py +1165 -0
- cli/tools/compress.py +215 -0
- cli/tools/delegate.py +1184 -0
- cli/tools/edit.py +313 -0
- cli/tools/edits.py +118 -0
- cli/tools/filter.py +285 -0
- cli/tools/impact.py +163 -0
- cli/tools/memory.py +469 -0
- cli/tools/read.py +224 -0
- cli/tools/search.py +337 -0
- cli/tools/session.py +95 -0
- cli/tools/shell.py +193 -0
- cli/tools/status.py +306 -0
- cli/tools/validate.py +310 -0
- cli/ui/api.js +36 -0
- cli/ui/app.js +207 -0
- cli/ui/components/chat.js +758 -0
- cli/ui/components/dashboard.js +689 -0
- cli/ui/components/edits.js +220 -0
- cli/ui/components/instructions.js +481 -0
- cli/ui/components/memory.js +626 -0
- cli/ui/components/sessions.js +606 -0
- cli/ui/components/settings.js +1404 -0
- cli/ui/components/sidebar.js +156 -0
- cli/ui/icons.js +51 -0
- cli/ui/shared.js +119 -0
- cli/ui/theme.js +22 -0
- cli/ui.html +168 -0
- cli/ui_legacy.html +6797 -0
- cli/ui_nano.html +503 -0
- code_context_control-2.28.0.dist-info/METADATA +248 -0
- code_context_control-2.28.0.dist-info/RECORD +150 -0
- code_context_control-2.28.0.dist-info/WHEEL +5 -0
- code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
- code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
- code_context_control-2.28.0.dist-info/top_level.txt +5 -0
- core/__init__.py +75 -0
- core/config.py +269 -0
- core/ide.py +188 -0
- oracle/__init__.py +1 -0
- oracle/config.py +75 -0
- oracle/oracle.html +3900 -0
- oracle/oracle_server.py +663 -0
- oracle/services/__init__.py +1 -0
- oracle/services/c3_bridge.py +210 -0
- oracle/services/chat_engine.py +1103 -0
- oracle/services/chat_store.py +155 -0
- oracle/services/cross_memory.py +154 -0
- oracle/services/federated_graph.py +463 -0
- oracle/services/health_checker.py +117 -0
- oracle/services/insight_engine.py +307 -0
- oracle/services/memory_reader.py +106 -0
- oracle/services/memory_writer.py +182 -0
- oracle/services/ollama_bridge.py +332 -0
- oracle/services/project_scanner.py +87 -0
- oracle/services/review_agent.py +206 -0
- services/__init__.py +1 -0
- services/activity_log.py +93 -0
- services/agent_base.py +124 -0
- services/agents.py +1529 -0
- services/auto_memory.py +407 -0
- services/bench/__init__.py +6 -0
- services/bench/external/__init__.py +29 -0
- services/bench/external/aider_polyglot.py +405 -0
- services/bench/external/swe_bench.py +485 -0
- services/benchmark_dashboard.py +596 -0
- services/claude_md.py +785 -0
- services/compressor.py +592 -0
- services/context_snapshot.py +356 -0
- services/conversation_store.py +870 -0
- services/doc_index.py +537 -0
- services/e2e_benchmark.py +2884 -0
- services/e2e_evaluator.py +396 -0
- services/e2e_tasks.py +743 -0
- services/edit_ledger.py +459 -0
- services/embedding_index.py +341 -0
- services/error_reporting.py +123 -0
- services/file_memory.py +734 -0
- services/hub_service.py +585 -0
- services/indexer.py +712 -0
- services/memory.py +318 -0
- services/memory_consolidator.py +538 -0
- services/memory_graph.py +382 -0
- services/memory_grounder.py +304 -0
- services/memory_scorer.py +246 -0
- services/metrics.py +86 -0
- services/notifications.py +209 -0
- services/ollama_client.py +201 -0
- services/output_filter.py +488 -0
- services/parser.py +1238 -0
- services/project_manager.py +579 -0
- services/protocol.py +306 -0
- services/proxy_state.py +152 -0
- services/retrieval_broker.py +129 -0
- services/router.py +414 -0
- services/runtime.py +326 -0
- services/session_benchmark.py +1945 -0
- services/session_manager.py +1026 -0
- services/session_preloader.py +251 -0
- services/text_index.py +90 -0
- services/tool_classifier.py +176 -0
- services/transcript_index.py +340 -0
- services/validation_cache.py +155 -0
- services/vector_store.py +299 -0
- services/version_tracker.py +271 -0
- services/watcher.py +192 -0
- tui/__init__.py +0 -0
- tui/backend.py +59 -0
- tui/main.py +145 -0
- tui/screens/__init__.py +1 -0
- tui/screens/benchmark_view.py +109 -0
- tui/screens/claudemd_view.py +46 -0
- tui/screens/compress_view.py +52 -0
- tui/screens/index_view.py +74 -0
- tui/screens/init_view.py +82 -0
- tui/screens/mcp_view.py +73 -0
- tui/screens/optimize_view.py +41 -0
- tui/screens/pipe_view.py +46 -0
- tui/screens/projects_view.py +355 -0
- tui/screens/search_view.py +55 -0
- tui/screens/session_view.py +143 -0
- tui/screens/stats.py +158 -0
- tui/screens/ui_view.py +54 -0
- tui/theme.tcss +335 -0
services/compressor.py
ADDED
|
@@ -0,0 +1,592 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AST-based Code Compression Service
|
|
3
|
+
|
|
4
|
+
Parses source files into AST and generates compressed summaries:
|
|
5
|
+
- Function/class signatures only (skip bodies)
|
|
6
|
+
- Structural outline mode
|
|
7
|
+
- Smart truncation with context preservation
|
|
8
|
+
- Diff-only mode for edits
|
|
9
|
+
"""
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
import re
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Iterable, Optional
|
|
15
|
+
|
|
16
|
+
from core import count_tokens, measure_savings
|
|
17
|
+
from services.parser import HAS_TREE_SITTER, extract_sections_ast
|
|
18
|
+
|
|
19
|
+
# Language-specific comment patterns
|
|
20
|
+
COMMENT_PATTERNS = {
|
|
21
|
+
'.py': (r'#.*$', r'"""[\s\S]*?"""', r"'''[\s\S]*?'''"),
|
|
22
|
+
'.js': (r'//.*$', r'/\*[\s\S]*?\*/'),
|
|
23
|
+
'.ts': (r'//.*$', r'/\*[\s\S]*?\*/'),
|
|
24
|
+
'.tsx': (r'//.*$', r'/\*[\s\S]*?\*/'),
|
|
25
|
+
'.jsx': (r'//.*$', r'/\*[\s\S]*?\*/'),
|
|
26
|
+
'.r': (r'#.*$',),
|
|
27
|
+
'.R': (r'#.*$',),
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# Regex-based structural extractors (fallback when tree-sitter unavailable)
|
|
31
|
+
STRUCTURE_PATTERNS = {
|
|
32
|
+
'.py': {
|
|
33
|
+
'class': r'^class\s+(\w+).*?:',
|
|
34
|
+
'function': r'^(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?:',
|
|
35
|
+
'import': r'^(?:from\s+\S+\s+)?import\s+.+$',
|
|
36
|
+
'decorator': r'^@\w+',
|
|
37
|
+
'assignment': r'^([A-Z_][A-Z_0-9]*)\s*=',
|
|
38
|
+
},
|
|
39
|
+
'.js': {
|
|
40
|
+
'class': r'^(?:export\s+)?class\s+(\w+)',
|
|
41
|
+
'function': r'^(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)',
|
|
42
|
+
'arrow': r'^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>',
|
|
43
|
+
'import': r'^import\s+.+$',
|
|
44
|
+
'export': r'^export\s+(?:default\s+)?(?:const|let|var|function|class)\s+(\w+)',
|
|
45
|
+
},
|
|
46
|
+
'.ts': {
|
|
47
|
+
'interface': r'^(?:export\s+)?interface\s+(\w+)',
|
|
48
|
+
'type': r'^(?:export\s+)?type\s+(\w+)',
|
|
49
|
+
'class': r'^(?:export\s+)?class\s+(\w+)',
|
|
50
|
+
'function': r'^(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*[<(]',
|
|
51
|
+
'arrow': r'^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*(?::\s*.+?\s*)?=\s*(?:async\s+)?\([^)]*\)\s*=>',
|
|
52
|
+
'import': r'^import\s+.+$',
|
|
53
|
+
'enum': r'^(?:export\s+)?enum\s+(\w+)',
|
|
54
|
+
},
|
|
55
|
+
'.r': {
|
|
56
|
+
'function': r'^(\w+)\s*<-\s*function\s*\(([^)]*)\)',
|
|
57
|
+
'assignment': r'^(\w+)\s*<-',
|
|
58
|
+
'library': r'^(?:library|require)\s*\(.+\)',
|
|
59
|
+
},
|
|
60
|
+
'.R': {
|
|
61
|
+
'function': r'^(\w+)\s*<-\s*function\s*\(([^)]*)\)',
|
|
62
|
+
'assignment': r'^(\w+)\s*<-',
|
|
63
|
+
'library': r'^(?:library|require)\s*\(.+\)',
|
|
64
|
+
},
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Extend for more file types
|
|
68
|
+
for ext in ['.tsx', '.jsx']:
|
|
69
|
+
STRUCTURE_PATTERNS[ext] = STRUCTURE_PATTERNS['.ts'].copy()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
PROTECTED_COMPRESS_FILES = {
|
|
73
|
+
"cli/c3.py",
|
|
74
|
+
"cli/ui.html",
|
|
75
|
+
"cli/docs.html",
|
|
76
|
+
"core/config.py",
|
|
77
|
+
"CLAUDE.md",
|
|
78
|
+
"GEMINI.md",
|
|
79
|
+
"AGENTS.md",
|
|
80
|
+
"README.md",
|
|
81
|
+
"c3.bat",
|
|
82
|
+
"install.bat",
|
|
83
|
+
"install.sh",
|
|
84
|
+
"pyproject.toml",
|
|
85
|
+
".codex/config.toml",
|
|
86
|
+
".vscode/mcp.json",
|
|
87
|
+
".gemini/settings.json",
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class CodeCompressor:
|
|
92
|
+
"""Compresses source code files into token-efficient summaries."""
|
|
93
|
+
|
|
94
|
+
def __init__(self, cache_dir: str = ".c3/cache",
|
|
95
|
+
project_root: Optional[str] = None,
|
|
96
|
+
protected_files: Optional[Iterable[str]] = None,
|
|
97
|
+
router: Optional[Any] = None):
|
|
98
|
+
self.cache_dir = Path(cache_dir)
|
|
99
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
100
|
+
self._file_hashes = {}
|
|
101
|
+
self._mem_cache = {} # In-memory LRU: {cache_key: result} — skips JSON parse on repeat access
|
|
102
|
+
self._MEM_CACHE_MAX = 128
|
|
103
|
+
self.project_root = (Path(project_root).resolve()
|
|
104
|
+
if project_root else self.cache_dir.parent.parent.resolve())
|
|
105
|
+
self._protected_files = set(PROTECTED_COMPRESS_FILES)
|
|
106
|
+
if protected_files:
|
|
107
|
+
self._protected_files.update(self._normalize_rel_path(p) for p in protected_files)
|
|
108
|
+
self.router = router
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def _normalize_rel_path(path: str) -> str:
|
|
112
|
+
return str(path).replace("\\", "/").lstrip("./")
|
|
113
|
+
|
|
114
|
+
def _relative_to_project(self, path: Path) -> str:
|
|
115
|
+
try:
|
|
116
|
+
return path.resolve().relative_to(self.project_root).as_posix()
|
|
117
|
+
except Exception:
|
|
118
|
+
return self._normalize_rel_path(str(path))
|
|
119
|
+
|
|
120
|
+
def is_protected_file(self, filepath: Path) -> bool:
|
|
121
|
+
normalized = self._normalize_rel_path(self._relative_to_project(filepath))
|
|
122
|
+
return normalized in self._protected_files
|
|
123
|
+
|
|
124
|
+
def get_protected_files(self) -> list:
|
|
125
|
+
return sorted(self._protected_files)
|
|
126
|
+
|
|
127
|
+
def compress_file(self, filepath: str, mode: str = "structure") -> dict:
|
|
128
|
+
"""
|
|
129
|
+
Compress a source file.
|
|
130
|
+
|
|
131
|
+
Modes:
|
|
132
|
+
- "structure": Function/class signatures + imports (most compressed)
|
|
133
|
+
- "outline": Structure + docstrings + key comments
|
|
134
|
+
- "smart": Adaptive - more detail for small files, less for large
|
|
135
|
+
- "diff": Only changes since last seen (requires prior state)
|
|
136
|
+
- "summary": High-level LLM summary (requires router)
|
|
137
|
+
- "bug_scan": Structure map + annotated exception-handling hotspots with line numbers
|
|
138
|
+
"""
|
|
139
|
+
filepath = Path(filepath).resolve()
|
|
140
|
+
if not filepath.exists():
|
|
141
|
+
return {"error": f"File not found: {filepath}", "compressed": ""}
|
|
142
|
+
if self.is_protected_file(filepath):
|
|
143
|
+
return {
|
|
144
|
+
"error": f"Compression is blocked for protected file: {self._relative_to_project(filepath)}",
|
|
145
|
+
"compressed": "",
|
|
146
|
+
"protected_files": self.get_protected_files(),
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
content = filepath.read_text(encoding="utf-8", errors='replace')
|
|
150
|
+
content_hash = hashlib.md5(content.encode()).hexdigest()
|
|
151
|
+
ext = filepath.suffix.lower()
|
|
152
|
+
|
|
153
|
+
# Check persistent cache (except for diff/summary which have their own logic)
|
|
154
|
+
if mode not in ("diff", "summary"):
|
|
155
|
+
cache_key = f"{content_hash}_{mode}{ext}.json"
|
|
156
|
+
# Fast path: in-memory cache (no JSON parse / disk I/O)
|
|
157
|
+
if cache_key in self._mem_cache:
|
|
158
|
+
hit = dict(self._mem_cache[cache_key])
|
|
159
|
+
hit["filepath"] = str(filepath)
|
|
160
|
+
return hit
|
|
161
|
+
cache_file = self.cache_dir / cache_key
|
|
162
|
+
if cache_file.exists():
|
|
163
|
+
try:
|
|
164
|
+
with open(cache_file, encoding="utf-8") as f:
|
|
165
|
+
cached_result = json.load(f)
|
|
166
|
+
cached_result["filepath"] = str(filepath)
|
|
167
|
+
self._mem_cache[cache_key] = cached_result
|
|
168
|
+
return cached_result
|
|
169
|
+
except Exception:
|
|
170
|
+
pass
|
|
171
|
+
|
|
172
|
+
if mode == "diff":
|
|
173
|
+
return self._diff_compress(filepath, content)
|
|
174
|
+
|
|
175
|
+
if mode == "summary":
|
|
176
|
+
if not self.router:
|
|
177
|
+
return {"error": "Summary mode requires a router", "compressed": ""}
|
|
178
|
+
sum_res = self.router.summarize(content, style="concise")
|
|
179
|
+
summary = sum_res.get("summary", "Could not summarize")
|
|
180
|
+
result = f"# {filepath.name} — SUMMARY\n{summary}"
|
|
181
|
+
return {"compressed": result, "mode": "summary", **measure_savings(content, result)}
|
|
182
|
+
|
|
183
|
+
if mode == "bug_scan":
|
|
184
|
+
# Structure map + exception-handling annotation pass
|
|
185
|
+
structure = self._extract_structure(content, ext, "outline")
|
|
186
|
+
exception_section = self._scan_exception_handlers(content)
|
|
187
|
+
compressed_parts = [structure]
|
|
188
|
+
if exception_section:
|
|
189
|
+
compressed_parts.append(exception_section)
|
|
190
|
+
compressed = "\n".join(compressed_parts)
|
|
191
|
+
header = f"# {filepath.name} ({filepath.suffix}) — {len(content.splitlines())} lines [bug_scan]\n"
|
|
192
|
+
result = header + compressed
|
|
193
|
+
savings = measure_savings(content, result)
|
|
194
|
+
savings["compressed"] = result
|
|
195
|
+
savings["mode"] = "bug_scan"
|
|
196
|
+
savings["filepath"] = str(filepath)
|
|
197
|
+
self._file_hashes[str(filepath)] = content_hash
|
|
198
|
+
cache_key = f"{content_hash}_bug_scan{ext}.json"
|
|
199
|
+
self._mem_cache[cache_key] = dict(savings)
|
|
200
|
+
cache_file = self.cache_dir / cache_key
|
|
201
|
+
try:
|
|
202
|
+
with open(cache_file, "w", encoding="utf-8") as f:
|
|
203
|
+
json.dump(savings, f, indent=2)
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
206
|
+
return savings
|
|
207
|
+
|
|
208
|
+
if mode == "smart":
|
|
209
|
+
tokens = count_tokens(content)
|
|
210
|
+
if tokens < 80:
|
|
211
|
+
return {"compressed": content, "mode": "full", **measure_savings(content, content)}
|
|
212
|
+
elif tokens < 400:
|
|
213
|
+
actual_mode = "outline"
|
|
214
|
+
compressed = self._extract_structure(content, ext, "outline")
|
|
215
|
+
else:
|
|
216
|
+
# Try structure first; if savings < 30%, fall back to outline
|
|
217
|
+
compressed = self._extract_structure(content, ext, "structure")
|
|
218
|
+
structure_tokens = count_tokens(compressed)
|
|
219
|
+
if tokens > 0 and (1 - structure_tokens / tokens) < 0.30:
|
|
220
|
+
actual_mode = "outline"
|
|
221
|
+
compressed = self._extract_structure(content, ext, "outline")
|
|
222
|
+
else:
|
|
223
|
+
actual_mode = "structure"
|
|
224
|
+
# Reuse already-computed structure — no second call needed
|
|
225
|
+
else:
|
|
226
|
+
actual_mode = mode
|
|
227
|
+
compressed = self._extract_structure(content, ext, actual_mode)
|
|
228
|
+
|
|
229
|
+
# Build result
|
|
230
|
+
header = f"# {filepath.name} ({filepath.suffix}) — {len(content.splitlines())} lines\n"
|
|
231
|
+
result = header + compressed
|
|
232
|
+
|
|
233
|
+
savings = measure_savings(content, result)
|
|
234
|
+
savings["compressed"] = result
|
|
235
|
+
savings["mode"] = mode
|
|
236
|
+
savings["filepath"] = str(filepath)
|
|
237
|
+
|
|
238
|
+
# Cache hash for diff mode
|
|
239
|
+
self._file_hashes[str(filepath)] = content_hash
|
|
240
|
+
|
|
241
|
+
# Persist to cache (except diff which uses its own file format)
|
|
242
|
+
if mode not in ("diff", "summary"):
|
|
243
|
+
cache_key = f"{content_hash}_{mode}{ext}.json"
|
|
244
|
+
# Store in memory first (fast path for repeat access)
|
|
245
|
+
if len(self._mem_cache) >= self._MEM_CACHE_MAX:
|
|
246
|
+
# Evict oldest quarter
|
|
247
|
+
keys = list(self._mem_cache.keys())
|
|
248
|
+
for k in keys[:len(keys) // 4]:
|
|
249
|
+
del self._mem_cache[k]
|
|
250
|
+
self._mem_cache[cache_key] = dict(savings)
|
|
251
|
+
cache_file = self.cache_dir / cache_key
|
|
252
|
+
try:
|
|
253
|
+
with open(cache_file, 'w', encoding="utf-8") as f:
|
|
254
|
+
json.dump(savings, f, indent=2)
|
|
255
|
+
except Exception:
|
|
256
|
+
pass
|
|
257
|
+
|
|
258
|
+
return savings
|
|
259
|
+
|
|
260
|
+
def _extract_structure(self, content: str, ext: str, mode: str) -> str:
|
|
261
|
+
"""Extract structural elements from source code."""
|
|
262
|
+
# 1. Try Tree-sitter AST extraction first (if available and not disabled)
|
|
263
|
+
if HAS_TREE_SITTER:
|
|
264
|
+
try:
|
|
265
|
+
sections = extract_sections_ast(content, ext)
|
|
266
|
+
if sections:
|
|
267
|
+
return self._render_ast_sections(sections, content, mode)
|
|
268
|
+
except Exception:
|
|
269
|
+
# Fall back to regex on any AST failure
|
|
270
|
+
pass
|
|
271
|
+
|
|
272
|
+
# 2. Fall back to regex-based extraction
|
|
273
|
+
lines = content.split('\n')
|
|
274
|
+
patterns = STRUCTURE_PATTERNS.get(ext, {})
|
|
275
|
+
|
|
276
|
+
if not patterns:
|
|
277
|
+
return self._generic_compress(content, ext)
|
|
278
|
+
|
|
279
|
+
extracted = []
|
|
280
|
+
i = 0
|
|
281
|
+
indent_stack = [] # (indent_level, kind) for nesting context (2a)
|
|
282
|
+
|
|
283
|
+
while i < len(lines):
|
|
284
|
+
line = lines[i]
|
|
285
|
+
stripped = line.rstrip()
|
|
286
|
+
lstripped = line.lstrip()
|
|
287
|
+
indent = len(line) - len(lstripped)
|
|
288
|
+
|
|
289
|
+
# Update indent stack — pop entries at same or lower indent (2a)
|
|
290
|
+
while indent_stack and indent <= indent_stack[-1][0] and lstripped:
|
|
291
|
+
indent_stack.pop()
|
|
292
|
+
|
|
293
|
+
matched = False
|
|
294
|
+
for kind, pattern in patterns.items():
|
|
295
|
+
if re.match(pattern, lstripped, re.MULTILINE):
|
|
296
|
+
matched = True
|
|
297
|
+
|
|
298
|
+
# Compute hierarchical prefix from indent stack (2a)
|
|
299
|
+
nesting_prefix = " " * len(indent_stack)
|
|
300
|
+
|
|
301
|
+
if kind in ('import', 'library'):
|
|
302
|
+
extracted.append(stripped)
|
|
303
|
+
elif kind == 'decorator':
|
|
304
|
+
extracted.append(f"{nesting_prefix}{stripped}")
|
|
305
|
+
elif kind in ('class', 'interface', 'enum', 'type'):
|
|
306
|
+
extracted.append(f"\n{nesting_prefix}{stripped}")
|
|
307
|
+
indent_stack.append((indent, kind))
|
|
308
|
+
if mode == "outline":
|
|
309
|
+
doc = self._extract_docstring(lines, i + 1, ext)
|
|
310
|
+
if doc:
|
|
311
|
+
extracted.append(f"{nesting_prefix} {doc}")
|
|
312
|
+
elif kind in ('function', 'arrow'):
|
|
313
|
+
extracted.append(f"{nesting_prefix}{stripped}")
|
|
314
|
+
if mode == "outline":
|
|
315
|
+
doc = self._extract_docstring(lines, i + 1, ext)
|
|
316
|
+
if doc:
|
|
317
|
+
extracted.append(f"{nesting_prefix} {doc}")
|
|
318
|
+
elif kind == 'assignment':
|
|
319
|
+
extracted.append(f"{nesting_prefix}{stripped}")
|
|
320
|
+
# Capture multi-line assignments up to 3 continuation lines (2c)
|
|
321
|
+
if stripped.rstrip().endswith((',', '{', '[', '(')):
|
|
322
|
+
for j in range(1, 4):
|
|
323
|
+
if i + j < len(lines):
|
|
324
|
+
cont = lines[i + j].rstrip()
|
|
325
|
+
if cont.strip():
|
|
326
|
+
extracted.append(f"{nesting_prefix} {cont.strip()}")
|
|
327
|
+
if not cont.rstrip().endswith((',', '{', '[', '(')):
|
|
328
|
+
break
|
|
329
|
+
elif kind == 'export':
|
|
330
|
+
extracted.append(f"{nesting_prefix}{stripped}")
|
|
331
|
+
break
|
|
332
|
+
|
|
333
|
+
i += 1
|
|
334
|
+
|
|
335
|
+
return '\n'.join(extracted)
|
|
336
|
+
|
|
337
|
+
def _render_ast_sections(self, sections: list, content: str, mode: str) -> str:
|
|
338
|
+
"""Convert Tree-sitter sections into a compressed text summary."""
|
|
339
|
+
lines = content.splitlines()
|
|
340
|
+
extracted = []
|
|
341
|
+
|
|
342
|
+
# Track imports separately to group them at top
|
|
343
|
+
imports = [s for s in sections if s.get("type") == "import"]
|
|
344
|
+
if imports:
|
|
345
|
+
for s in imports:
|
|
346
|
+
line_idx = s["line_start"] - 1
|
|
347
|
+
if 0 <= line_idx < len(lines):
|
|
348
|
+
extracted.append(lines[line_idx].strip())
|
|
349
|
+
extracted.append("")
|
|
350
|
+
|
|
351
|
+
# Depth-first traversal of classes and functions
|
|
352
|
+
def _render_node(node_list, depth=0):
|
|
353
|
+
prefix = " " * depth
|
|
354
|
+
for s in node_list:
|
|
355
|
+
stype = s.get("type")
|
|
356
|
+
if stype == "import":
|
|
357
|
+
continue
|
|
358
|
+
|
|
359
|
+
name = s.get("name", "unnamed")
|
|
360
|
+
start, end = s["line_start"], s["line_end"]
|
|
361
|
+
if 1 <= start <= len(lines):
|
|
362
|
+
# For signature extraction:
|
|
363
|
+
# Take up to the first 3 lines of the section to capture multi-line signatures
|
|
364
|
+
sig_lines = lines[start-1:min(start+2, end)]
|
|
365
|
+
# Heuristic: stop at the first line ending with { or :
|
|
366
|
+
sig_found = False
|
|
367
|
+
decl = ""
|
|
368
|
+
for line in sig_lines:
|
|
369
|
+
clean = line.strip()
|
|
370
|
+
decl += " " + clean
|
|
371
|
+
if any(clean.endswith(c) for c in (':', '{')):
|
|
372
|
+
sig_found = True
|
|
373
|
+
break
|
|
374
|
+
|
|
375
|
+
decl = decl.strip()
|
|
376
|
+
if decl.endswith("{") or decl.endswith(":"):
|
|
377
|
+
decl = decl[:-1].strip()
|
|
378
|
+
|
|
379
|
+
if stype == "class":
|
|
380
|
+
extracted.append(f"\n{prefix}class {name}:")
|
|
381
|
+
else:
|
|
382
|
+
extracted.append(f"{prefix}{decl}")
|
|
383
|
+
|
|
384
|
+
if mode == "outline":
|
|
385
|
+
# Find docstring if it's within the section (start is already 1-indexed)
|
|
386
|
+
doc = self._extract_docstring(lines, start, "")
|
|
387
|
+
if doc:
|
|
388
|
+
extracted.append(f"{prefix} \"\"\" {doc} \"\"\"")
|
|
389
|
+
|
|
390
|
+
if "children" in s and s["children"]:
|
|
391
|
+
_render_node(s["children"], depth + 1)
|
|
392
|
+
|
|
393
|
+
_render_node([s for s in sections if s.get("type") != "import"])
|
|
394
|
+
return "\n".join(extracted)
|
|
395
|
+
|
|
396
|
+
def _extract_docstring(self, lines: list, start: int, ext: str) -> Optional[str]:
|
|
397
|
+
"""Extract docstring/JSDoc from position."""
|
|
398
|
+
if start >= len(lines):
|
|
399
|
+
return None
|
|
400
|
+
|
|
401
|
+
line = lines[start].strip()
|
|
402
|
+
|
|
403
|
+
# Python docstrings — first line only
|
|
404
|
+
if ext == '.py' and (line.startswith('"""') or line.startswith("'''")):
|
|
405
|
+
quote = line[:3]
|
|
406
|
+
if line.endswith(quote) and len(line) > 6:
|
|
407
|
+
return line[3:-3].strip()
|
|
408
|
+
# Multi-line: take just the first line
|
|
409
|
+
first = line[3:].strip()
|
|
410
|
+
if first:
|
|
411
|
+
return first
|
|
412
|
+
# First content line
|
|
413
|
+
if start + 1 < len(lines):
|
|
414
|
+
return lines[start + 1].strip()
|
|
415
|
+
return None
|
|
416
|
+
|
|
417
|
+
# JSDoc — first meaningful line only
|
|
418
|
+
if line.startswith('/**'):
|
|
419
|
+
for j in range(start, min(start + 10, len(lines))):
|
|
420
|
+
cleaned = lines[j].strip().lstrip('/*').rstrip('*/').strip()
|
|
421
|
+
if cleaned:
|
|
422
|
+
return cleaned
|
|
423
|
+
if '*/' in lines[j]:
|
|
424
|
+
break
|
|
425
|
+
return None
|
|
426
|
+
|
|
427
|
+
return None
|
|
428
|
+
|
|
429
|
+
def _scan_exception_handlers(self, content: str) -> str:
|
|
430
|
+
"""Scan for exception-handling hotspots and return an annotated section.
|
|
431
|
+
|
|
432
|
+
Returns a formatted block listing every bare/broad except clause with:
|
|
433
|
+
- line number
|
|
434
|
+
- the except line itself
|
|
435
|
+
- the immediately enclosing function name (if detectable)
|
|
436
|
+
|
|
437
|
+
Returns an empty string if no exception handlers are found.
|
|
438
|
+
"""
|
|
439
|
+
lines = content.splitlines()
|
|
440
|
+
# Patterns ranked from most to least problematic
|
|
441
|
+
_EXCEPT_PATTERNS = [
|
|
442
|
+
(re.compile(r"^\s*except\s*:"), "bare-except"),
|
|
443
|
+
(re.compile(r"^\s*except\s+Exception\s*:"), "broad-except"),
|
|
444
|
+
(re.compile(r"^\s*except\s+Exception\s+as\s+\w+\s*:"), "broad-except"),
|
|
445
|
+
(re.compile(r"^\s*except\s+\("), "multi-except"),
|
|
446
|
+
]
|
|
447
|
+
_FUNC_DEF = re.compile(r"^\s*(?:async\s+)?def\s+(\w+)")
|
|
448
|
+
|
|
449
|
+
hits: list[str] = []
|
|
450
|
+
# Track the most recently seen function name for context
|
|
451
|
+
current_func = "<module>"
|
|
452
|
+
for idx, line in enumerate(lines, start=1):
|
|
453
|
+
m = _FUNC_DEF.match(line)
|
|
454
|
+
if m:
|
|
455
|
+
current_func = m.group(1)
|
|
456
|
+
for pattern, label in _EXCEPT_PATTERNS:
|
|
457
|
+
if pattern.match(line):
|
|
458
|
+
# Show up to 2 continuation lines (body of the except block)
|
|
459
|
+
body_lines = []
|
|
460
|
+
for j in range(idx, min(idx + 2, len(lines))):
|
|
461
|
+
body = lines[j].strip()
|
|
462
|
+
if body and not body.startswith("except") and not body.startswith("try"):
|
|
463
|
+
body_lines.append(body)
|
|
464
|
+
body_preview = " | ".join(body_lines[:2]) if body_lines else ""
|
|
465
|
+
suffix = f" → {body_preview}" if body_preview else ""
|
|
466
|
+
hits.append(f" L{idx} [{label}] in `{current_func}`: {line.strip()}{suffix}")
|
|
467
|
+
break # one label per line
|
|
468
|
+
|
|
469
|
+
if not hits:
|
|
470
|
+
return ""
|
|
471
|
+
header = f"\n# Exception-handling hotspots ({len(hits)} found):"
|
|
472
|
+
return header + "\n" + "\n".join(hits)
|
|
473
|
+
|
|
474
|
+
def _generic_compress(self, content: str, ext: str) -> str:
|
|
475
|
+
"""Fallback compression for unknown languages."""
|
|
476
|
+
lines = content.split('\n')
|
|
477
|
+
# Keep non-empty lines that look structural
|
|
478
|
+
kept = []
|
|
479
|
+
for line in lines:
|
|
480
|
+
stripped = line.strip()
|
|
481
|
+
if not stripped:
|
|
482
|
+
continue
|
|
483
|
+
# Skip pure comment lines
|
|
484
|
+
if any(stripped.startswith(c) for c in ('#', '//', '/*', '*', '--')):
|
|
485
|
+
continue
|
|
486
|
+
# Keep lines that look like declarations/definitions
|
|
487
|
+
if any(kw in stripped.lower() for kw in ('function', 'class', 'def ', 'module', 'export', 'import', 'require', 'const ', 'let ', 'var ', 'type ', 'interface ')):
|
|
488
|
+
kept.append(stripped)
|
|
489
|
+
elif re.match(r'^[A-Za-z_]\w*\s*[=(<{]', stripped):
|
|
490
|
+
kept.append(stripped)
|
|
491
|
+
return '\n'.join(kept)
|
|
492
|
+
|
|
493
|
+
def _diff_compress(self, filepath: Path, current_content: str) -> dict:
|
|
494
|
+
"""Generate diff-based compression against cached version."""
|
|
495
|
+
cache_file = self.cache_dir / f"{filepath.name}.cache"
|
|
496
|
+
current_hash = hashlib.md5(current_content.encode()).hexdigest()
|
|
497
|
+
|
|
498
|
+
if cache_file.exists():
|
|
499
|
+
cached = cache_file.read_text(encoding="utf-8", errors="replace")
|
|
500
|
+
cached_hash = hashlib.md5(cached.encode()).hexdigest()
|
|
501
|
+
|
|
502
|
+
if cached_hash == current_hash:
|
|
503
|
+
result = f"# {filepath.name} — NO CHANGES"
|
|
504
|
+
return {"compressed": result, "mode": "diff-unchanged", **measure_savings(current_content, result)}
|
|
505
|
+
|
|
506
|
+
# Generate contextual diff
|
|
507
|
+
diff = self._contextual_diff(cached.split('\n'), current_content.split('\n'), filepath.name)
|
|
508
|
+
savings = measure_savings(current_content, diff)
|
|
509
|
+
savings["compressed"] = diff
|
|
510
|
+
savings["mode"] = "diff"
|
|
511
|
+
else:
|
|
512
|
+
# No cache — fall back to structure mode
|
|
513
|
+
compressed = self._extract_structure(current_content, filepath.suffix.lower(), "structure")
|
|
514
|
+
header = f"# {filepath.name} (FIRST SEEN) — {len(current_content.splitlines())} lines\n"
|
|
515
|
+
result = header + compressed
|
|
516
|
+
savings = measure_savings(current_content, result)
|
|
517
|
+
savings["compressed"] = result
|
|
518
|
+
savings["mode"] = "diff-first"
|
|
519
|
+
|
|
520
|
+
# Update cache
|
|
521
|
+
cache_file.write_text(current_content, encoding="utf-8")
|
|
522
|
+
return savings
|
|
523
|
+
|
|
524
|
+
def _contextual_diff(self, old_lines: list, new_lines: list, filename: str) -> str:
|
|
525
|
+
"""Generate a contextual diff with surrounding structure."""
|
|
526
|
+
import difflib
|
|
527
|
+
differ = difflib.unified_diff(old_lines, new_lines, lineterm='', n=1)
|
|
528
|
+
diff_text = '\n'.join(differ)
|
|
529
|
+
|
|
530
|
+
if not diff_text.strip():
|
|
531
|
+
return f"# {filename} — NO CHANGES"
|
|
532
|
+
|
|
533
|
+
header = f"# {filename} — CHANGES ONLY\n"
|
|
534
|
+
return header + diff_text
|
|
535
|
+
|
|
536
|
+
def compress_directory(self, dirpath: str, mode: str = "smart",
|
|
537
|
+
extensions: Optional[list] = None,
|
|
538
|
+
max_files: int = 50) -> dict:
|
|
539
|
+
"""Compress an entire directory of source files."""
|
|
540
|
+
dirpath = Path(dirpath).resolve()
|
|
541
|
+
if not dirpath.is_dir():
|
|
542
|
+
return {"error": f"Not a directory: {dirpath}"}
|
|
543
|
+
|
|
544
|
+
default_exts = {'.py', '.js', '.ts', '.tsx', '.jsx', '.r', '.R',
|
|
545
|
+
'.css', '.html', '.json', '.yaml', '.yml', '.md'}
|
|
546
|
+
allowed = set(extensions) if extensions else default_exts
|
|
547
|
+
|
|
548
|
+
# Skip common non-essential dirs
|
|
549
|
+
skip_dirs = {'node_modules', '.git', '__pycache__', '.c3', 'venv',
|
|
550
|
+
'env', '.venv', 'dist', 'build', '.next', '.cache'}
|
|
551
|
+
|
|
552
|
+
results = []
|
|
553
|
+
total_original = 0
|
|
554
|
+
total_compressed = 0
|
|
555
|
+
skipped_protected = []
|
|
556
|
+
|
|
557
|
+
files = sorted(dirpath.rglob('*'))[:max_files * 3] # Pre-limit
|
|
558
|
+
count = 0
|
|
559
|
+
|
|
560
|
+
for fpath in files:
|
|
561
|
+
if count >= max_files:
|
|
562
|
+
break
|
|
563
|
+
if not fpath.is_file():
|
|
564
|
+
continue
|
|
565
|
+
if fpath.suffix.lower() not in allowed:
|
|
566
|
+
continue
|
|
567
|
+
if any(skip in fpath.parts for skip in skip_dirs):
|
|
568
|
+
continue
|
|
569
|
+
if self.is_protected_file(fpath):
|
|
570
|
+
skipped_protected.append(self._relative_to_project(fpath))
|
|
571
|
+
continue
|
|
572
|
+
|
|
573
|
+
result = self.compress_file(str(fpath), mode)
|
|
574
|
+
if "error" not in result:
|
|
575
|
+
results.append(result)
|
|
576
|
+
total_original += result.get("original_tokens", 0)
|
|
577
|
+
total_compressed += result.get("compressed_tokens", 0)
|
|
578
|
+
count += 1
|
|
579
|
+
|
|
580
|
+
combined = '\n\n---\n\n'.join(r["compressed"] for r in results)
|
|
581
|
+
savings_pct = ((total_original - total_compressed) / total_original * 100) if total_original > 0 else 0
|
|
582
|
+
|
|
583
|
+
return {
|
|
584
|
+
"files_processed": len(results),
|
|
585
|
+
"total_original_tokens": total_original,
|
|
586
|
+
"total_compressed_tokens": total_compressed,
|
|
587
|
+
"savings_pct": round(savings_pct, 1),
|
|
588
|
+
"combined_output": combined,
|
|
589
|
+
"file_results": results,
|
|
590
|
+
"protected_files": self.get_protected_files(),
|
|
591
|
+
"skipped_protected_files": sorted(skipped_protected),
|
|
592
|
+
}
|