code-context-control 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. cli/__init__.py +1 -0
  2. cli/_hook_utils.py +99 -0
  3. cli/c3.py +6152 -0
  4. cli/commands/__init__.py +1 -0
  5. cli/commands/common.py +312 -0
  6. cli/commands/parser.py +286 -0
  7. cli/docs.html +3178 -0
  8. cli/edits.html +878 -0
  9. cli/hook_auto_snapshot.py +142 -0
  10. cli/hook_c3_signal.py +61 -0
  11. cli/hook_c3read.py +116 -0
  12. cli/hook_edit_ledger.py +213 -0
  13. cli/hook_edit_unlock.py +170 -0
  14. cli/hook_filter.py +130 -0
  15. cli/hook_ghost_files.py +238 -0
  16. cli/hook_pretool_enforce.py +334 -0
  17. cli/hook_read.py +200 -0
  18. cli/hook_session_stats.py +62 -0
  19. cli/hook_terse_advisor.py +190 -0
  20. cli/hub.html +3764 -0
  21. cli/hub_server.py +1619 -0
  22. cli/mcp_proxy.py +428 -0
  23. cli/mcp_server.py +660 -0
  24. cli/server.py +2985 -0
  25. cli/tools/__init__.py +4 -0
  26. cli/tools/_helpers.py +65 -0
  27. cli/tools/agent.py +1165 -0
  28. cli/tools/compress.py +215 -0
  29. cli/tools/delegate.py +1184 -0
  30. cli/tools/edit.py +313 -0
  31. cli/tools/edits.py +118 -0
  32. cli/tools/filter.py +285 -0
  33. cli/tools/impact.py +163 -0
  34. cli/tools/memory.py +469 -0
  35. cli/tools/read.py +224 -0
  36. cli/tools/search.py +337 -0
  37. cli/tools/session.py +95 -0
  38. cli/tools/shell.py +193 -0
  39. cli/tools/status.py +306 -0
  40. cli/tools/validate.py +310 -0
  41. cli/ui/api.js +36 -0
  42. cli/ui/app.js +207 -0
  43. cli/ui/components/chat.js +758 -0
  44. cli/ui/components/dashboard.js +689 -0
  45. cli/ui/components/edits.js +220 -0
  46. cli/ui/components/instructions.js +481 -0
  47. cli/ui/components/memory.js +626 -0
  48. cli/ui/components/sessions.js +606 -0
  49. cli/ui/components/settings.js +1404 -0
  50. cli/ui/components/sidebar.js +156 -0
  51. cli/ui/icons.js +51 -0
  52. cli/ui/shared.js +119 -0
  53. cli/ui/theme.js +22 -0
  54. cli/ui.html +168 -0
  55. cli/ui_legacy.html +6797 -0
  56. cli/ui_nano.html +503 -0
  57. code_context_control-2.28.0.dist-info/METADATA +248 -0
  58. code_context_control-2.28.0.dist-info/RECORD +150 -0
  59. code_context_control-2.28.0.dist-info/WHEEL +5 -0
  60. code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
  61. code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
  62. code_context_control-2.28.0.dist-info/top_level.txt +5 -0
  63. core/__init__.py +75 -0
  64. core/config.py +269 -0
  65. core/ide.py +188 -0
  66. oracle/__init__.py +1 -0
  67. oracle/config.py +75 -0
  68. oracle/oracle.html +3900 -0
  69. oracle/oracle_server.py +663 -0
  70. oracle/services/__init__.py +1 -0
  71. oracle/services/c3_bridge.py +210 -0
  72. oracle/services/chat_engine.py +1103 -0
  73. oracle/services/chat_store.py +155 -0
  74. oracle/services/cross_memory.py +154 -0
  75. oracle/services/federated_graph.py +463 -0
  76. oracle/services/health_checker.py +117 -0
  77. oracle/services/insight_engine.py +307 -0
  78. oracle/services/memory_reader.py +106 -0
  79. oracle/services/memory_writer.py +182 -0
  80. oracle/services/ollama_bridge.py +332 -0
  81. oracle/services/project_scanner.py +87 -0
  82. oracle/services/review_agent.py +206 -0
  83. services/__init__.py +1 -0
  84. services/activity_log.py +93 -0
  85. services/agent_base.py +124 -0
  86. services/agents.py +1529 -0
  87. services/auto_memory.py +407 -0
  88. services/bench/__init__.py +6 -0
  89. services/bench/external/__init__.py +29 -0
  90. services/bench/external/aider_polyglot.py +405 -0
  91. services/bench/external/swe_bench.py +485 -0
  92. services/benchmark_dashboard.py +596 -0
  93. services/claude_md.py +785 -0
  94. services/compressor.py +592 -0
  95. services/context_snapshot.py +356 -0
  96. services/conversation_store.py +870 -0
  97. services/doc_index.py +537 -0
  98. services/e2e_benchmark.py +2884 -0
  99. services/e2e_evaluator.py +396 -0
  100. services/e2e_tasks.py +743 -0
  101. services/edit_ledger.py +459 -0
  102. services/embedding_index.py +341 -0
  103. services/error_reporting.py +123 -0
  104. services/file_memory.py +734 -0
  105. services/hub_service.py +585 -0
  106. services/indexer.py +712 -0
  107. services/memory.py +318 -0
  108. services/memory_consolidator.py +538 -0
  109. services/memory_graph.py +382 -0
  110. services/memory_grounder.py +304 -0
  111. services/memory_scorer.py +246 -0
  112. services/metrics.py +86 -0
  113. services/notifications.py +209 -0
  114. services/ollama_client.py +201 -0
  115. services/output_filter.py +488 -0
  116. services/parser.py +1238 -0
  117. services/project_manager.py +579 -0
  118. services/protocol.py +306 -0
  119. services/proxy_state.py +152 -0
  120. services/retrieval_broker.py +129 -0
  121. services/router.py +414 -0
  122. services/runtime.py +326 -0
  123. services/session_benchmark.py +1945 -0
  124. services/session_manager.py +1026 -0
  125. services/session_preloader.py +251 -0
  126. services/text_index.py +90 -0
  127. services/tool_classifier.py +176 -0
  128. services/transcript_index.py +340 -0
  129. services/validation_cache.py +155 -0
  130. services/vector_store.py +299 -0
  131. services/version_tracker.py +271 -0
  132. services/watcher.py +192 -0
  133. tui/__init__.py +0 -0
  134. tui/backend.py +59 -0
  135. tui/main.py +145 -0
  136. tui/screens/__init__.py +1 -0
  137. tui/screens/benchmark_view.py +109 -0
  138. tui/screens/claudemd_view.py +46 -0
  139. tui/screens/compress_view.py +52 -0
  140. tui/screens/index_view.py +74 -0
  141. tui/screens/init_view.py +82 -0
  142. tui/screens/mcp_view.py +73 -0
  143. tui/screens/optimize_view.py +41 -0
  144. tui/screens/pipe_view.py +46 -0
  145. tui/screens/projects_view.py +355 -0
  146. tui/screens/search_view.py +55 -0
  147. tui/screens/session_view.py +143 -0
  148. tui/screens/stats.py +158 -0
  149. tui/screens/ui_view.py +54 -0
  150. tui/theme.tcss +335 -0
services/compressor.py ADDED
@@ -0,0 +1,592 @@
1
+ """
2
+ AST-based Code Compression Service
3
+
4
+ Parses source files into AST and generates compressed summaries:
5
+ - Function/class signatures only (skip bodies)
6
+ - Structural outline mode
7
+ - Smart truncation with context preservation
8
+ - Diff-only mode for edits
9
+ """
10
+ import hashlib
11
+ import json
12
+ import re
13
+ from pathlib import Path
14
+ from typing import Any, Iterable, Optional
15
+
16
+ from core import count_tokens, measure_savings
17
+ from services.parser import HAS_TREE_SITTER, extract_sections_ast
18
+
19
+ # Language-specific comment patterns
20
+ COMMENT_PATTERNS = {
21
+ '.py': (r'#.*$', r'"""[\s\S]*?"""', r"'''[\s\S]*?'''"),
22
+ '.js': (r'//.*$', r'/\*[\s\S]*?\*/'),
23
+ '.ts': (r'//.*$', r'/\*[\s\S]*?\*/'),
24
+ '.tsx': (r'//.*$', r'/\*[\s\S]*?\*/'),
25
+ '.jsx': (r'//.*$', r'/\*[\s\S]*?\*/'),
26
+ '.r': (r'#.*$',),
27
+ '.R': (r'#.*$',),
28
+ }
29
+
30
+ # Regex-based structural extractors (fallback when tree-sitter unavailable)
31
+ STRUCTURE_PATTERNS = {
32
+ '.py': {
33
+ 'class': r'^class\s+(\w+).*?:',
34
+ 'function': r'^(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?:',
35
+ 'import': r'^(?:from\s+\S+\s+)?import\s+.+$',
36
+ 'decorator': r'^@\w+',
37
+ 'assignment': r'^([A-Z_][A-Z_0-9]*)\s*=',
38
+ },
39
+ '.js': {
40
+ 'class': r'^(?:export\s+)?class\s+(\w+)',
41
+ 'function': r'^(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)',
42
+ 'arrow': r'^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*=>',
43
+ 'import': r'^import\s+.+$',
44
+ 'export': r'^export\s+(?:default\s+)?(?:const|let|var|function|class)\s+(\w+)',
45
+ },
46
+ '.ts': {
47
+ 'interface': r'^(?:export\s+)?interface\s+(\w+)',
48
+ 'type': r'^(?:export\s+)?type\s+(\w+)',
49
+ 'class': r'^(?:export\s+)?class\s+(\w+)',
50
+ 'function': r'^(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*[<(]',
51
+ 'arrow': r'^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*(?::\s*.+?\s*)?=\s*(?:async\s+)?\([^)]*\)\s*=>',
52
+ 'import': r'^import\s+.+$',
53
+ 'enum': r'^(?:export\s+)?enum\s+(\w+)',
54
+ },
55
+ '.r': {
56
+ 'function': r'^(\w+)\s*<-\s*function\s*\(([^)]*)\)',
57
+ 'assignment': r'^(\w+)\s*<-',
58
+ 'library': r'^(?:library|require)\s*\(.+\)',
59
+ },
60
+ '.R': {
61
+ 'function': r'^(\w+)\s*<-\s*function\s*\(([^)]*)\)',
62
+ 'assignment': r'^(\w+)\s*<-',
63
+ 'library': r'^(?:library|require)\s*\(.+\)',
64
+ },
65
+ }
66
+
67
+ # Extend for more file types
68
+ for ext in ['.tsx', '.jsx']:
69
+ STRUCTURE_PATTERNS[ext] = STRUCTURE_PATTERNS['.ts'].copy()
70
+
71
+
72
+ PROTECTED_COMPRESS_FILES = {
73
+ "cli/c3.py",
74
+ "cli/ui.html",
75
+ "cli/docs.html",
76
+ "core/config.py",
77
+ "CLAUDE.md",
78
+ "GEMINI.md",
79
+ "AGENTS.md",
80
+ "README.md",
81
+ "c3.bat",
82
+ "install.bat",
83
+ "install.sh",
84
+ "pyproject.toml",
85
+ ".codex/config.toml",
86
+ ".vscode/mcp.json",
87
+ ".gemini/settings.json",
88
+ }
89
+
90
+
91
+ class CodeCompressor:
92
+ """Compresses source code files into token-efficient summaries."""
93
+
94
+ def __init__(self, cache_dir: str = ".c3/cache",
95
+ project_root: Optional[str] = None,
96
+ protected_files: Optional[Iterable[str]] = None,
97
+ router: Optional[Any] = None):
98
+ self.cache_dir = Path(cache_dir)
99
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
100
+ self._file_hashes = {}
101
+ self._mem_cache = {} # In-memory LRU: {cache_key: result} — skips JSON parse on repeat access
102
+ self._MEM_CACHE_MAX = 128
103
+ self.project_root = (Path(project_root).resolve()
104
+ if project_root else self.cache_dir.parent.parent.resolve())
105
+ self._protected_files = set(PROTECTED_COMPRESS_FILES)
106
+ if protected_files:
107
+ self._protected_files.update(self._normalize_rel_path(p) for p in protected_files)
108
+ self.router = router
109
+
110
+ @staticmethod
111
+ def _normalize_rel_path(path: str) -> str:
112
+ return str(path).replace("\\", "/").lstrip("./")
113
+
114
+ def _relative_to_project(self, path: Path) -> str:
115
+ try:
116
+ return path.resolve().relative_to(self.project_root).as_posix()
117
+ except Exception:
118
+ return self._normalize_rel_path(str(path))
119
+
120
+ def is_protected_file(self, filepath: Path) -> bool:
121
+ normalized = self._normalize_rel_path(self._relative_to_project(filepath))
122
+ return normalized in self._protected_files
123
+
124
+ def get_protected_files(self) -> list:
125
+ return sorted(self._protected_files)
126
+
127
+ def compress_file(self, filepath: str, mode: str = "structure") -> dict:
128
+ """
129
+ Compress a source file.
130
+
131
+ Modes:
132
+ - "structure": Function/class signatures + imports (most compressed)
133
+ - "outline": Structure + docstrings + key comments
134
+ - "smart": Adaptive - more detail for small files, less for large
135
+ - "diff": Only changes since last seen (requires prior state)
136
+ - "summary": High-level LLM summary (requires router)
137
+ - "bug_scan": Structure map + annotated exception-handling hotspots with line numbers
138
+ """
139
+ filepath = Path(filepath).resolve()
140
+ if not filepath.exists():
141
+ return {"error": f"File not found: {filepath}", "compressed": ""}
142
+ if self.is_protected_file(filepath):
143
+ return {
144
+ "error": f"Compression is blocked for protected file: {self._relative_to_project(filepath)}",
145
+ "compressed": "",
146
+ "protected_files": self.get_protected_files(),
147
+ }
148
+
149
+ content = filepath.read_text(encoding="utf-8", errors='replace')
150
+ content_hash = hashlib.md5(content.encode()).hexdigest()
151
+ ext = filepath.suffix.lower()
152
+
153
+ # Check persistent cache (except for diff/summary which have their own logic)
154
+ if mode not in ("diff", "summary"):
155
+ cache_key = f"{content_hash}_{mode}{ext}.json"
156
+ # Fast path: in-memory cache (no JSON parse / disk I/O)
157
+ if cache_key in self._mem_cache:
158
+ hit = dict(self._mem_cache[cache_key])
159
+ hit["filepath"] = str(filepath)
160
+ return hit
161
+ cache_file = self.cache_dir / cache_key
162
+ if cache_file.exists():
163
+ try:
164
+ with open(cache_file, encoding="utf-8") as f:
165
+ cached_result = json.load(f)
166
+ cached_result["filepath"] = str(filepath)
167
+ self._mem_cache[cache_key] = cached_result
168
+ return cached_result
169
+ except Exception:
170
+ pass
171
+
172
+ if mode == "diff":
173
+ return self._diff_compress(filepath, content)
174
+
175
+ if mode == "summary":
176
+ if not self.router:
177
+ return {"error": "Summary mode requires a router", "compressed": ""}
178
+ sum_res = self.router.summarize(content, style="concise")
179
+ summary = sum_res.get("summary", "Could not summarize")
180
+ result = f"# {filepath.name} — SUMMARY\n{summary}"
181
+ return {"compressed": result, "mode": "summary", **measure_savings(content, result)}
182
+
183
+ if mode == "bug_scan":
184
+ # Structure map + exception-handling annotation pass
185
+ structure = self._extract_structure(content, ext, "outline")
186
+ exception_section = self._scan_exception_handlers(content)
187
+ compressed_parts = [structure]
188
+ if exception_section:
189
+ compressed_parts.append(exception_section)
190
+ compressed = "\n".join(compressed_parts)
191
+ header = f"# {filepath.name} ({filepath.suffix}) — {len(content.splitlines())} lines [bug_scan]\n"
192
+ result = header + compressed
193
+ savings = measure_savings(content, result)
194
+ savings["compressed"] = result
195
+ savings["mode"] = "bug_scan"
196
+ savings["filepath"] = str(filepath)
197
+ self._file_hashes[str(filepath)] = content_hash
198
+ cache_key = f"{content_hash}_bug_scan{ext}.json"
199
+ self._mem_cache[cache_key] = dict(savings)
200
+ cache_file = self.cache_dir / cache_key
201
+ try:
202
+ with open(cache_file, "w", encoding="utf-8") as f:
203
+ json.dump(savings, f, indent=2)
204
+ except Exception:
205
+ pass
206
+ return savings
207
+
208
+ if mode == "smart":
209
+ tokens = count_tokens(content)
210
+ if tokens < 80:
211
+ return {"compressed": content, "mode": "full", **measure_savings(content, content)}
212
+ elif tokens < 400:
213
+ actual_mode = "outline"
214
+ compressed = self._extract_structure(content, ext, "outline")
215
+ else:
216
+ # Try structure first; if savings < 30%, fall back to outline
217
+ compressed = self._extract_structure(content, ext, "structure")
218
+ structure_tokens = count_tokens(compressed)
219
+ if tokens > 0 and (1 - structure_tokens / tokens) < 0.30:
220
+ actual_mode = "outline"
221
+ compressed = self._extract_structure(content, ext, "outline")
222
+ else:
223
+ actual_mode = "structure"
224
+ # Reuse already-computed structure — no second call needed
225
+ else:
226
+ actual_mode = mode
227
+ compressed = self._extract_structure(content, ext, actual_mode)
228
+
229
+ # Build result
230
+ header = f"# {filepath.name} ({filepath.suffix}) — {len(content.splitlines())} lines\n"
231
+ result = header + compressed
232
+
233
+ savings = measure_savings(content, result)
234
+ savings["compressed"] = result
235
+ savings["mode"] = mode
236
+ savings["filepath"] = str(filepath)
237
+
238
+ # Cache hash for diff mode
239
+ self._file_hashes[str(filepath)] = content_hash
240
+
241
+ # Persist to cache (except diff which uses its own file format)
242
+ if mode not in ("diff", "summary"):
243
+ cache_key = f"{content_hash}_{mode}{ext}.json"
244
+ # Store in memory first (fast path for repeat access)
245
+ if len(self._mem_cache) >= self._MEM_CACHE_MAX:
246
+ # Evict oldest quarter
247
+ keys = list(self._mem_cache.keys())
248
+ for k in keys[:len(keys) // 4]:
249
+ del self._mem_cache[k]
250
+ self._mem_cache[cache_key] = dict(savings)
251
+ cache_file = self.cache_dir / cache_key
252
+ try:
253
+ with open(cache_file, 'w', encoding="utf-8") as f:
254
+ json.dump(savings, f, indent=2)
255
+ except Exception:
256
+ pass
257
+
258
+ return savings
259
+
260
+ def _extract_structure(self, content: str, ext: str, mode: str) -> str:
261
+ """Extract structural elements from source code."""
262
+ # 1. Try Tree-sitter AST extraction first (if available and not disabled)
263
+ if HAS_TREE_SITTER:
264
+ try:
265
+ sections = extract_sections_ast(content, ext)
266
+ if sections:
267
+ return self._render_ast_sections(sections, content, mode)
268
+ except Exception:
269
+ # Fall back to regex on any AST failure
270
+ pass
271
+
272
+ # 2. Fall back to regex-based extraction
273
+ lines = content.split('\n')
274
+ patterns = STRUCTURE_PATTERNS.get(ext, {})
275
+
276
+ if not patterns:
277
+ return self._generic_compress(content, ext)
278
+
279
+ extracted = []
280
+ i = 0
281
+ indent_stack = [] # (indent_level, kind) for nesting context (2a)
282
+
283
+ while i < len(lines):
284
+ line = lines[i]
285
+ stripped = line.rstrip()
286
+ lstripped = line.lstrip()
287
+ indent = len(line) - len(lstripped)
288
+
289
+ # Update indent stack — pop entries at same or lower indent (2a)
290
+ while indent_stack and indent <= indent_stack[-1][0] and lstripped:
291
+ indent_stack.pop()
292
+
293
+ matched = False
294
+ for kind, pattern in patterns.items():
295
+ if re.match(pattern, lstripped, re.MULTILINE):
296
+ matched = True
297
+
298
+ # Compute hierarchical prefix from indent stack (2a)
299
+ nesting_prefix = " " * len(indent_stack)
300
+
301
+ if kind in ('import', 'library'):
302
+ extracted.append(stripped)
303
+ elif kind == 'decorator':
304
+ extracted.append(f"{nesting_prefix}{stripped}")
305
+ elif kind in ('class', 'interface', 'enum', 'type'):
306
+ extracted.append(f"\n{nesting_prefix}{stripped}")
307
+ indent_stack.append((indent, kind))
308
+ if mode == "outline":
309
+ doc = self._extract_docstring(lines, i + 1, ext)
310
+ if doc:
311
+ extracted.append(f"{nesting_prefix} {doc}")
312
+ elif kind in ('function', 'arrow'):
313
+ extracted.append(f"{nesting_prefix}{stripped}")
314
+ if mode == "outline":
315
+ doc = self._extract_docstring(lines, i + 1, ext)
316
+ if doc:
317
+ extracted.append(f"{nesting_prefix} {doc}")
318
+ elif kind == 'assignment':
319
+ extracted.append(f"{nesting_prefix}{stripped}")
320
+ # Capture multi-line assignments up to 3 continuation lines (2c)
321
+ if stripped.rstrip().endswith((',', '{', '[', '(')):
322
+ for j in range(1, 4):
323
+ if i + j < len(lines):
324
+ cont = lines[i + j].rstrip()
325
+ if cont.strip():
326
+ extracted.append(f"{nesting_prefix} {cont.strip()}")
327
+ if not cont.rstrip().endswith((',', '{', '[', '(')):
328
+ break
329
+ elif kind == 'export':
330
+ extracted.append(f"{nesting_prefix}{stripped}")
331
+ break
332
+
333
+ i += 1
334
+
335
+ return '\n'.join(extracted)
336
+
337
+ def _render_ast_sections(self, sections: list, content: str, mode: str) -> str:
338
+ """Convert Tree-sitter sections into a compressed text summary."""
339
+ lines = content.splitlines()
340
+ extracted = []
341
+
342
+ # Track imports separately to group them at top
343
+ imports = [s for s in sections if s.get("type") == "import"]
344
+ if imports:
345
+ for s in imports:
346
+ line_idx = s["line_start"] - 1
347
+ if 0 <= line_idx < len(lines):
348
+ extracted.append(lines[line_idx].strip())
349
+ extracted.append("")
350
+
351
+ # Depth-first traversal of classes and functions
352
+ def _render_node(node_list, depth=0):
353
+ prefix = " " * depth
354
+ for s in node_list:
355
+ stype = s.get("type")
356
+ if stype == "import":
357
+ continue
358
+
359
+ name = s.get("name", "unnamed")
360
+ start, end = s["line_start"], s["line_end"]
361
+ if 1 <= start <= len(lines):
362
+ # For signature extraction:
363
+ # Take up to the first 3 lines of the section to capture multi-line signatures
364
+ sig_lines = lines[start-1:min(start+2, end)]
365
+ # Heuristic: stop at the first line ending with { or :
366
+ sig_found = False
367
+ decl = ""
368
+ for line in sig_lines:
369
+ clean = line.strip()
370
+ decl += " " + clean
371
+ if any(clean.endswith(c) for c in (':', '{')):
372
+ sig_found = True
373
+ break
374
+
375
+ decl = decl.strip()
376
+ if decl.endswith("{") or decl.endswith(":"):
377
+ decl = decl[:-1].strip()
378
+
379
+ if stype == "class":
380
+ extracted.append(f"\n{prefix}class {name}:")
381
+ else:
382
+ extracted.append(f"{prefix}{decl}")
383
+
384
+ if mode == "outline":
385
+ # Find docstring if it's within the section (start is already 1-indexed)
386
+ doc = self._extract_docstring(lines, start, "")
387
+ if doc:
388
+ extracted.append(f"{prefix} \"\"\" {doc} \"\"\"")
389
+
390
+ if "children" in s and s["children"]:
391
+ _render_node(s["children"], depth + 1)
392
+
393
+ _render_node([s for s in sections if s.get("type") != "import"])
394
+ return "\n".join(extracted)
395
+
396
+ def _extract_docstring(self, lines: list, start: int, ext: str) -> Optional[str]:
397
+ """Extract docstring/JSDoc from position."""
398
+ if start >= len(lines):
399
+ return None
400
+
401
+ line = lines[start].strip()
402
+
403
+ # Python docstrings — first line only
404
+ if ext == '.py' and (line.startswith('"""') or line.startswith("'''")):
405
+ quote = line[:3]
406
+ if line.endswith(quote) and len(line) > 6:
407
+ return line[3:-3].strip()
408
+ # Multi-line: take just the first line
409
+ first = line[3:].strip()
410
+ if first:
411
+ return first
412
+ # First content line
413
+ if start + 1 < len(lines):
414
+ return lines[start + 1].strip()
415
+ return None
416
+
417
+ # JSDoc — first meaningful line only
418
+ if line.startswith('/**'):
419
+ for j in range(start, min(start + 10, len(lines))):
420
+ cleaned = lines[j].strip().lstrip('/*').rstrip('*/').strip()
421
+ if cleaned:
422
+ return cleaned
423
+ if '*/' in lines[j]:
424
+ break
425
+ return None
426
+
427
+ return None
428
+
429
+ def _scan_exception_handlers(self, content: str) -> str:
430
+ """Scan for exception-handling hotspots and return an annotated section.
431
+
432
+ Returns a formatted block listing every bare/broad except clause with:
433
+ - line number
434
+ - the except line itself
435
+ - the immediately enclosing function name (if detectable)
436
+
437
+ Returns an empty string if no exception handlers are found.
438
+ """
439
+ lines = content.splitlines()
440
+ # Patterns ranked from most to least problematic
441
+ _EXCEPT_PATTERNS = [
442
+ (re.compile(r"^\s*except\s*:"), "bare-except"),
443
+ (re.compile(r"^\s*except\s+Exception\s*:"), "broad-except"),
444
+ (re.compile(r"^\s*except\s+Exception\s+as\s+\w+\s*:"), "broad-except"),
445
+ (re.compile(r"^\s*except\s+\("), "multi-except"),
446
+ ]
447
+ _FUNC_DEF = re.compile(r"^\s*(?:async\s+)?def\s+(\w+)")
448
+
449
+ hits: list[str] = []
450
+ # Track the most recently seen function name for context
451
+ current_func = "<module>"
452
+ for idx, line in enumerate(lines, start=1):
453
+ m = _FUNC_DEF.match(line)
454
+ if m:
455
+ current_func = m.group(1)
456
+ for pattern, label in _EXCEPT_PATTERNS:
457
+ if pattern.match(line):
458
+ # Show up to 2 continuation lines (body of the except block)
459
+ body_lines = []
460
+ for j in range(idx, min(idx + 2, len(lines))):
461
+ body = lines[j].strip()
462
+ if body and not body.startswith("except") and not body.startswith("try"):
463
+ body_lines.append(body)
464
+ body_preview = " | ".join(body_lines[:2]) if body_lines else ""
465
+ suffix = f" → {body_preview}" if body_preview else ""
466
+ hits.append(f" L{idx} [{label}] in `{current_func}`: {line.strip()}{suffix}")
467
+ break # one label per line
468
+
469
+ if not hits:
470
+ return ""
471
+ header = f"\n# Exception-handling hotspots ({len(hits)} found):"
472
+ return header + "\n" + "\n".join(hits)
473
+
474
+ def _generic_compress(self, content: str, ext: str) -> str:
475
+ """Fallback compression for unknown languages."""
476
+ lines = content.split('\n')
477
+ # Keep non-empty lines that look structural
478
+ kept = []
479
+ for line in lines:
480
+ stripped = line.strip()
481
+ if not stripped:
482
+ continue
483
+ # Skip pure comment lines
484
+ if any(stripped.startswith(c) for c in ('#', '//', '/*', '*', '--')):
485
+ continue
486
+ # Keep lines that look like declarations/definitions
487
+ if any(kw in stripped.lower() for kw in ('function', 'class', 'def ', 'module', 'export', 'import', 'require', 'const ', 'let ', 'var ', 'type ', 'interface ')):
488
+ kept.append(stripped)
489
+ elif re.match(r'^[A-Za-z_]\w*\s*[=(<{]', stripped):
490
+ kept.append(stripped)
491
+ return '\n'.join(kept)
492
+
493
+ def _diff_compress(self, filepath: Path, current_content: str) -> dict:
494
+ """Generate diff-based compression against cached version."""
495
+ cache_file = self.cache_dir / f"{filepath.name}.cache"
496
+ current_hash = hashlib.md5(current_content.encode()).hexdigest()
497
+
498
+ if cache_file.exists():
499
+ cached = cache_file.read_text(encoding="utf-8", errors="replace")
500
+ cached_hash = hashlib.md5(cached.encode()).hexdigest()
501
+
502
+ if cached_hash == current_hash:
503
+ result = f"# {filepath.name} — NO CHANGES"
504
+ return {"compressed": result, "mode": "diff-unchanged", **measure_savings(current_content, result)}
505
+
506
+ # Generate contextual diff
507
+ diff = self._contextual_diff(cached.split('\n'), current_content.split('\n'), filepath.name)
508
+ savings = measure_savings(current_content, diff)
509
+ savings["compressed"] = diff
510
+ savings["mode"] = "diff"
511
+ else:
512
+ # No cache — fall back to structure mode
513
+ compressed = self._extract_structure(current_content, filepath.suffix.lower(), "structure")
514
+ header = f"# {filepath.name} (FIRST SEEN) — {len(current_content.splitlines())} lines\n"
515
+ result = header + compressed
516
+ savings = measure_savings(current_content, result)
517
+ savings["compressed"] = result
518
+ savings["mode"] = "diff-first"
519
+
520
+ # Update cache
521
+ cache_file.write_text(current_content, encoding="utf-8")
522
+ return savings
523
+
524
+ def _contextual_diff(self, old_lines: list, new_lines: list, filename: str) -> str:
525
+ """Generate a contextual diff with surrounding structure."""
526
+ import difflib
527
+ differ = difflib.unified_diff(old_lines, new_lines, lineterm='', n=1)
528
+ diff_text = '\n'.join(differ)
529
+
530
+ if not diff_text.strip():
531
+ return f"# {filename} — NO CHANGES"
532
+
533
+ header = f"# {filename} — CHANGES ONLY\n"
534
+ return header + diff_text
535
+
536
+ def compress_directory(self, dirpath: str, mode: str = "smart",
537
+ extensions: Optional[list] = None,
538
+ max_files: int = 50) -> dict:
539
+ """Compress an entire directory of source files."""
540
+ dirpath = Path(dirpath).resolve()
541
+ if not dirpath.is_dir():
542
+ return {"error": f"Not a directory: {dirpath}"}
543
+
544
+ default_exts = {'.py', '.js', '.ts', '.tsx', '.jsx', '.r', '.R',
545
+ '.css', '.html', '.json', '.yaml', '.yml', '.md'}
546
+ allowed = set(extensions) if extensions else default_exts
547
+
548
+ # Skip common non-essential dirs
549
+ skip_dirs = {'node_modules', '.git', '__pycache__', '.c3', 'venv',
550
+ 'env', '.venv', 'dist', 'build', '.next', '.cache'}
551
+
552
+ results = []
553
+ total_original = 0
554
+ total_compressed = 0
555
+ skipped_protected = []
556
+
557
+ files = sorted(dirpath.rglob('*'))[:max_files * 3] # Pre-limit
558
+ count = 0
559
+
560
+ for fpath in files:
561
+ if count >= max_files:
562
+ break
563
+ if not fpath.is_file():
564
+ continue
565
+ if fpath.suffix.lower() not in allowed:
566
+ continue
567
+ if any(skip in fpath.parts for skip in skip_dirs):
568
+ continue
569
+ if self.is_protected_file(fpath):
570
+ skipped_protected.append(self._relative_to_project(fpath))
571
+ continue
572
+
573
+ result = self.compress_file(str(fpath), mode)
574
+ if "error" not in result:
575
+ results.append(result)
576
+ total_original += result.get("original_tokens", 0)
577
+ total_compressed += result.get("compressed_tokens", 0)
578
+ count += 1
579
+
580
+ combined = '\n\n---\n\n'.join(r["compressed"] for r in results)
581
+ savings_pct = ((total_original - total_compressed) / total_original * 100) if total_original > 0 else 0
582
+
583
+ return {
584
+ "files_processed": len(results),
585
+ "total_original_tokens": total_original,
586
+ "total_compressed_tokens": total_compressed,
587
+ "savings_pct": round(savings_pct, 1),
588
+ "combined_output": combined,
589
+ "file_results": results,
590
+ "protected_files": self.get_protected_files(),
591
+ "skipped_protected_files": sorted(skipped_protected),
592
+ }