code-context-control 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. cli/__init__.py +1 -0
  2. cli/_hook_utils.py +99 -0
  3. cli/c3.py +6152 -0
  4. cli/commands/__init__.py +1 -0
  5. cli/commands/common.py +312 -0
  6. cli/commands/parser.py +286 -0
  7. cli/docs.html +3178 -0
  8. cli/edits.html +878 -0
  9. cli/hook_auto_snapshot.py +142 -0
  10. cli/hook_c3_signal.py +61 -0
  11. cli/hook_c3read.py +116 -0
  12. cli/hook_edit_ledger.py +213 -0
  13. cli/hook_edit_unlock.py +170 -0
  14. cli/hook_filter.py +130 -0
  15. cli/hook_ghost_files.py +238 -0
  16. cli/hook_pretool_enforce.py +334 -0
  17. cli/hook_read.py +200 -0
  18. cli/hook_session_stats.py +62 -0
  19. cli/hook_terse_advisor.py +190 -0
  20. cli/hub.html +3764 -0
  21. cli/hub_server.py +1619 -0
  22. cli/mcp_proxy.py +428 -0
  23. cli/mcp_server.py +660 -0
  24. cli/server.py +2985 -0
  25. cli/tools/__init__.py +4 -0
  26. cli/tools/_helpers.py +65 -0
  27. cli/tools/agent.py +1165 -0
  28. cli/tools/compress.py +215 -0
  29. cli/tools/delegate.py +1184 -0
  30. cli/tools/edit.py +313 -0
  31. cli/tools/edits.py +118 -0
  32. cli/tools/filter.py +285 -0
  33. cli/tools/impact.py +163 -0
  34. cli/tools/memory.py +469 -0
  35. cli/tools/read.py +224 -0
  36. cli/tools/search.py +337 -0
  37. cli/tools/session.py +95 -0
  38. cli/tools/shell.py +193 -0
  39. cli/tools/status.py +306 -0
  40. cli/tools/validate.py +310 -0
  41. cli/ui/api.js +36 -0
  42. cli/ui/app.js +207 -0
  43. cli/ui/components/chat.js +758 -0
  44. cli/ui/components/dashboard.js +689 -0
  45. cli/ui/components/edits.js +220 -0
  46. cli/ui/components/instructions.js +481 -0
  47. cli/ui/components/memory.js +626 -0
  48. cli/ui/components/sessions.js +606 -0
  49. cli/ui/components/settings.js +1404 -0
  50. cli/ui/components/sidebar.js +156 -0
  51. cli/ui/icons.js +51 -0
  52. cli/ui/shared.js +119 -0
  53. cli/ui/theme.js +22 -0
  54. cli/ui.html +168 -0
  55. cli/ui_legacy.html +6797 -0
  56. cli/ui_nano.html +503 -0
  57. code_context_control-2.28.0.dist-info/METADATA +248 -0
  58. code_context_control-2.28.0.dist-info/RECORD +150 -0
  59. code_context_control-2.28.0.dist-info/WHEEL +5 -0
  60. code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
  61. code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
  62. code_context_control-2.28.0.dist-info/top_level.txt +5 -0
  63. core/__init__.py +75 -0
  64. core/config.py +269 -0
  65. core/ide.py +188 -0
  66. oracle/__init__.py +1 -0
  67. oracle/config.py +75 -0
  68. oracle/oracle.html +3900 -0
  69. oracle/oracle_server.py +663 -0
  70. oracle/services/__init__.py +1 -0
  71. oracle/services/c3_bridge.py +210 -0
  72. oracle/services/chat_engine.py +1103 -0
  73. oracle/services/chat_store.py +155 -0
  74. oracle/services/cross_memory.py +154 -0
  75. oracle/services/federated_graph.py +463 -0
  76. oracle/services/health_checker.py +117 -0
  77. oracle/services/insight_engine.py +307 -0
  78. oracle/services/memory_reader.py +106 -0
  79. oracle/services/memory_writer.py +182 -0
  80. oracle/services/ollama_bridge.py +332 -0
  81. oracle/services/project_scanner.py +87 -0
  82. oracle/services/review_agent.py +206 -0
  83. services/__init__.py +1 -0
  84. services/activity_log.py +93 -0
  85. services/agent_base.py +124 -0
  86. services/agents.py +1529 -0
  87. services/auto_memory.py +407 -0
  88. services/bench/__init__.py +6 -0
  89. services/bench/external/__init__.py +29 -0
  90. services/bench/external/aider_polyglot.py +405 -0
  91. services/bench/external/swe_bench.py +485 -0
  92. services/benchmark_dashboard.py +596 -0
  93. services/claude_md.py +785 -0
  94. services/compressor.py +592 -0
  95. services/context_snapshot.py +356 -0
  96. services/conversation_store.py +870 -0
  97. services/doc_index.py +537 -0
  98. services/e2e_benchmark.py +2884 -0
  99. services/e2e_evaluator.py +396 -0
  100. services/e2e_tasks.py +743 -0
  101. services/edit_ledger.py +459 -0
  102. services/embedding_index.py +341 -0
  103. services/error_reporting.py +123 -0
  104. services/file_memory.py +734 -0
  105. services/hub_service.py +585 -0
  106. services/indexer.py +712 -0
  107. services/memory.py +318 -0
  108. services/memory_consolidator.py +538 -0
  109. services/memory_graph.py +382 -0
  110. services/memory_grounder.py +304 -0
  111. services/memory_scorer.py +246 -0
  112. services/metrics.py +86 -0
  113. services/notifications.py +209 -0
  114. services/ollama_client.py +201 -0
  115. services/output_filter.py +488 -0
  116. services/parser.py +1238 -0
  117. services/project_manager.py +579 -0
  118. services/protocol.py +306 -0
  119. services/proxy_state.py +152 -0
  120. services/retrieval_broker.py +129 -0
  121. services/router.py +414 -0
  122. services/runtime.py +326 -0
  123. services/session_benchmark.py +1945 -0
  124. services/session_manager.py +1026 -0
  125. services/session_preloader.py +251 -0
  126. services/text_index.py +90 -0
  127. services/tool_classifier.py +176 -0
  128. services/transcript_index.py +340 -0
  129. services/validation_cache.py +155 -0
  130. services/vector_store.py +299 -0
  131. services/version_tracker.py +271 -0
  132. services/watcher.py +192 -0
  133. tui/__init__.py +0 -0
  134. tui/backend.py +59 -0
  135. tui/main.py +145 -0
  136. tui/screens/__init__.py +1 -0
  137. tui/screens/benchmark_view.py +109 -0
  138. tui/screens/claudemd_view.py +46 -0
  139. tui/screens/compress_view.py +52 -0
  140. tui/screens/index_view.py +74 -0
  141. tui/screens/init_view.py +82 -0
  142. tui/screens/mcp_view.py +73 -0
  143. tui/screens/optimize_view.py +41 -0
  144. tui/screens/pipe_view.py +46 -0
  145. tui/screens/projects_view.py +355 -0
  146. tui/screens/search_view.py +55 -0
  147. tui/screens/session_view.py +143 -0
  148. tui/screens/stats.py +158 -0
  149. tui/screens/ui_view.py +54 -0
  150. tui/theme.tcss +335 -0
services/parser.py ADDED
@@ -0,0 +1,1238 @@
1
+ """
2
+ AST-based parsing service using Tree-sitter.
3
+ Provides unified structural extraction for multiple languages.
4
+ """
5
+ import re
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ try:
9
+ import tree_sitter
10
+ import tree_sitter_css
11
+ import tree_sitter_go
12
+ import tree_sitter_html
13
+ import tree_sitter_javascript
14
+ import tree_sitter_json
15
+ import tree_sitter_markdown
16
+ import tree_sitter_python
17
+ import tree_sitter_rust
18
+ import tree_sitter_typescript
19
+ import tree_sitter_yaml
20
+ HAS_TREE_SITTER = True
21
+
22
+ PY_LANGUAGE = tree_sitter.Language(tree_sitter_python.language())
23
+ JS_LANGUAGE = tree_sitter.Language(tree_sitter_javascript.language())
24
+ TS_LANGUAGE = tree_sitter.Language(tree_sitter_typescript.language_typescript())
25
+ TSX_LANGUAGE = tree_sitter.Language(tree_sitter_typescript.language_tsx())
26
+ HTML_LANGUAGE = tree_sitter.Language(tree_sitter_html.language())
27
+ MD_LANGUAGE = tree_sitter.Language(tree_sitter_markdown.language())
28
+ CSS_LANGUAGE = tree_sitter.Language(tree_sitter_css.language())
29
+ GO_LANGUAGE = tree_sitter.Language(tree_sitter_go.language())
30
+ RUST_LANGUAGE = tree_sitter.Language(tree_sitter_rust.language())
31
+ JSON_LANGUAGE = tree_sitter.Language(tree_sitter_json.language())
32
+ YAML_LANGUAGE = tree_sitter.Language(tree_sitter_yaml.language())
33
+
34
+ LANGUAGES = {
35
+ '.py': PY_LANGUAGE,
36
+ '.js': JS_LANGUAGE,
37
+ '.jsx': JS_LANGUAGE,
38
+ '.ts': TS_LANGUAGE,
39
+ '.tsx': TSX_LANGUAGE,
40
+ '.html': HTML_LANGUAGE,
41
+ '.htm': HTML_LANGUAGE,
42
+ '.md': MD_LANGUAGE,
43
+ '.css': CSS_LANGUAGE,
44
+ '.go': GO_LANGUAGE,
45
+ '.rs': RUST_LANGUAGE,
46
+ '.json': JSON_LANGUAGE,
47
+ '.yaml': YAML_LANGUAGE,
48
+ '.yml': YAML_LANGUAGE,
49
+ }
50
+ except ImportError:
51
+ HAS_TREE_SITTER = False
52
+ LANGUAGES = {}
53
+
54
+ # Bump this any time extract_sections_ast / _walk_* logic changes so that
55
+ # file_memory records extracted with an older version are force-refreshed.
56
+ PARSER_VERSION = "2"
57
+
58
+ def get_parser(ext: str) -> Optional['tree_sitter.Parser']:
59
+ if not HAS_TREE_SITTER or ext not in LANGUAGES:
60
+ return None
61
+ parser = tree_sitter.Parser(LANGUAGES[ext])
62
+ return parser
63
+
64
+ def extract_sections_ast(content: str, ext: str) -> Optional[List[Dict[str, Any]]]:
65
+ """
66
+ Extract structural sections using Tree-sitter AST.
67
+ Returns None if language is not supported or parsing fails.
68
+ """
69
+ parser = get_parser(ext)
70
+ if not parser:
71
+ return None
72
+
73
+ try:
74
+ tree = parser.parse(content.encode('utf-8'))
75
+ lines = content.split('\n')
76
+ sections = []
77
+
78
+ if ext == '.py':
79
+ _walk_python(tree.root_node, lines, sections)
80
+ elif ext in ('.js', '.jsx', '.ts', '.tsx'):
81
+ _walk_js_ts(tree.root_node, lines, sections)
82
+ elif ext in ('.html', '.htm'):
83
+ _walk_html(tree.root_node, lines, sections)
84
+ elif ext == '.md':
85
+ _walk_markdown(tree.root_node, lines, sections)
86
+ elif ext == '.css':
87
+ _walk_css(tree.root_node, lines, sections)
88
+ elif ext == '.go':
89
+ _walk_go(tree.root_node, lines, sections)
90
+ elif ext == '.rs':
91
+ _walk_rust(tree.root_node, lines, sections)
92
+ elif ext == '.json':
93
+ _walk_json(tree.root_node, lines, sections)
94
+ elif ext in ('.yaml', '.yml'):
95
+ _walk_yaml(tree.root_node, lines, sections)
96
+ else:
97
+ return None
98
+
99
+ return sections
100
+ except Exception:
101
+ return None
102
+
103
+ def _extract_docstring_python(node, lines: List[str]) -> Optional[str]:
104
+ # In Python, docstring is the first expression statement in a block
105
+ if node.type in ('function_definition', 'class_definition'):
106
+ body = next((child for child in node.children if child.type == 'block'), None)
107
+ if body and body.children and body.children[0].type == 'expression_statement':
108
+ expr = body.children[0]
109
+ if expr.children and expr.children[0].type == 'string':
110
+ doc = lines[expr.start_point[0]].strip()
111
+ if doc.startswith('"""') or doc.startswith("'''"):
112
+ quote = doc[:3]
113
+ if doc.endswith(quote) and len(doc) > 6:
114
+ return doc[3:-3].strip()
115
+ first = doc[3:].strip()
116
+ if first: return first
117
+ if expr.start_point[0] + 1 <= expr.end_point[0]:
118
+ return lines[expr.start_point[0] + 1].strip()
119
+ return None
120
+
121
+ def _walk_python(node, lines: List[str], sections: List[Dict[str, Any]], parent_section=None):
122
+ for child in node.children:
123
+ if child.type == 'class_definition':
124
+ name_node = next((c for c in child.children if c.type == 'identifier'), None)
125
+ name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
126
+
127
+ sig_start = child.start_point[0]
128
+ # Find the colon
129
+ colon_node = next((c for c in child.children if c.type == ':'), None)
130
+ sig_end = colon_node.end_point[0] if colon_node else sig_start
131
+ signature = '\n'.join(lines[sig_start:sig_end+1]).strip()
132
+
133
+ section = {
134
+ "type": "class",
135
+ "name": name,
136
+ "line_start": child.start_point[0] + 1,
137
+ "line_end": child.end_point[0] + 1,
138
+ "signature": signature,
139
+ "children": []
140
+ }
141
+ doc = _extract_docstring_python(child, lines)
142
+ if doc: section["doc"] = doc
143
+
144
+ if parent_section:
145
+ parent_section["children"].append(section)
146
+ else:
147
+ sections.append(section)
148
+
149
+ body = next((c for c in child.children if c.type == 'block'), None)
150
+ if body:
151
+ _walk_python(body, lines, sections, section)
152
+
153
+ elif child.type == 'function_definition':
154
+ name_node = next((c for c in child.children if c.type == 'identifier'), None)
155
+ name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
156
+
157
+ sig_start = child.start_point[0]
158
+ colon_node = next((c for c in child.children if c.type == ':'), None)
159
+ sig_end = colon_node.end_point[0] if colon_node else sig_start
160
+ signature = '\n'.join(lines[sig_start:sig_end+1]).strip()
161
+
162
+ is_async = any(c.type == 'async' for c in child.children)
163
+
164
+ section = {
165
+ "type": "method" if parent_section and parent_section["type"] == "class" else "function",
166
+ "name": name,
167
+ "line_start": child.start_point[0] + 1,
168
+ "line_end": child.end_point[0] + 1,
169
+ "signature": signature
170
+ }
171
+ if is_async: section["async"] = True
172
+
173
+ doc = _extract_docstring_python(child, lines)
174
+ if doc: section["doc"] = doc
175
+
176
+ if parent_section:
177
+ parent_section["children"].append(section)
178
+ else:
179
+ sections.append(section)
180
+
181
+ elif child.type == 'expression_statement':
182
+ # Check for global constants (CAPITAL_NAME = value)
183
+ if not parent_section:
184
+ assign = next((c for c in child.children if c.type == 'assignment'), None)
185
+ if assign:
186
+ target = next((c for c in assign.children if c.type == 'identifier'), None)
187
+ if target:
188
+ name = lines[target.start_point[0]][target.start_point[1]:target.end_point[1]]
189
+ if name.isupper():
190
+ sections.append({
191
+ "type": "constant",
192
+ "name": name,
193
+ "line_start": child.start_point[0] + 1,
194
+ "line_end": child.end_point[0] + 1,
195
+ "signature": lines[child.start_point[0]].strip()
196
+ })
197
+
198
+ elif child.type == 'comment':
199
+ text = lines[child.start_point[0]][child.start_point[1]:child.end_point[1]]
200
+ if 'TODO' in text or 'FIXME' in text:
201
+ sections.append({
202
+ "type": "comment",
203
+ "name": text.lstrip('#').strip(),
204
+ "line_start": child.start_point[0] + 1,
205
+ "line_end": child.end_point[0] + 1,
206
+ "signature": text.strip()
207
+ })
208
+
209
+ elif child.type in ('import_statement', 'import_from_statement'):
210
+ section = {
211
+ "type": "import",
212
+ "name": lines[child.start_point[0]].strip(),
213
+ "line_start": child.start_point[0] + 1,
214
+ "line_end": child.end_point[0] + 1,
215
+ "signature": lines[child.start_point[0]].strip()
216
+ }
217
+ if not parent_section:
218
+ sections.append(section)
219
+
220
+ elif child.type == 'decorated_definition':
221
+ _walk_python(child, lines, sections, parent_section)
222
+
223
+ def _extract_docstring_js(node, lines: List[str]) -> Optional[str]:
224
+ # Look for previous sibling that is a comment
225
+ prev = node.prev_sibling
226
+ while prev and prev.type == 'comment':
227
+ comment = lines[prev.start_point[0]].strip()
228
+ if comment.startswith('/**'):
229
+ # simple single line extraction
230
+ cleaned = comment.lstrip('/*').rstrip('*/').strip()
231
+ if cleaned and cleaned != '*': return cleaned
232
+ if prev.start_point[0] + 1 <= prev.end_point[0]:
233
+ cleaned = lines[prev.start_point[0] + 1].strip().lstrip('*').strip()
234
+ if cleaned: return cleaned
235
+ prev = prev.prev_sibling
236
+ return None
237
+
238
+ def _walk_js_ts(node, lines: List[str], sections: List[Dict[str, Any]], parent_section=None):
239
+ for child in node.children:
240
+ if child.type in ('class_declaration', 'abstract_class_declaration', 'interface_declaration', 'type_alias_declaration', 'enum_declaration'):
241
+ name_node = next((c for c in child.children if c.type in ('identifier', 'type_identifier')), None)
242
+ name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
243
+
244
+ # Simple signature heuristic: first line
245
+ signature = lines[child.start_point[0]].strip()
246
+
247
+ t = child.type.split('_')[0]
248
+ section = {
249
+ "type": "class" if t == "class" else t,
250
+ "name": name,
251
+ "line_start": child.start_point[0] + 1,
252
+ "line_end": child.end_point[0] + 1,
253
+ "signature": signature,
254
+ "children": []
255
+ }
256
+ doc = _extract_docstring_js(child, lines)
257
+ if doc: section["doc"] = doc
258
+
259
+ if parent_section:
260
+ parent_section["children"].append(section)
261
+ else:
262
+ sections.append(section)
263
+
264
+ body = next((c for c in child.children if c.type in ('class_body', 'interface_body', 'enum_body', 'object_type')), None)
265
+ if body:
266
+ _walk_js_ts(body, lines, sections, section)
267
+
268
+ elif child.type in ('function_declaration', 'method_definition', 'public_field_definition', 'property_definition'):
269
+ name_node = next((c for c in child.children if c.type in ('property_identifier', 'identifier', 'private_property_identifier')), None)
270
+ name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
271
+
272
+ signature = lines[child.start_point[0]].strip()
273
+
274
+ is_async = any(c.type == 'async' for c in child.children)
275
+
276
+ # TS access modifiers can be direct children or inside a 'accessibility_modifier' node
277
+ access = None
278
+ for c in child.children:
279
+ if c.type in ('public', 'private', 'protected'):
280
+ access = c.type
281
+ break
282
+ if c.type == 'accessibility_modifier':
283
+ access = lines[c.start_point[0]][c.start_point[1]:c.end_point[1]]
284
+ break
285
+
286
+ stype = "method" if child.type == 'method_definition' else "function"
287
+ if child.type in ('public_field_definition', 'property_definition'):
288
+ stype = "property"
289
+
290
+ section = {
291
+ "type": stype,
292
+ "name": name,
293
+ "line_start": child.start_point[0] + 1,
294
+ "line_end": child.end_point[0] + 1,
295
+ "signature": signature
296
+ }
297
+ if is_async: section["async"] = True
298
+ if access: section["access"] = access
299
+
300
+ doc = _extract_docstring_js(child, lines)
301
+ if doc: section["doc"] = doc
302
+
303
+ if parent_section:
304
+ parent_section["children"].append(section)
305
+ else:
306
+ sections.append(section)
307
+
308
+ elif child.type == 'lexical_declaration' or child.type == 'variable_declaration':
309
+ # Check for constants/variables
310
+ decl = next((c for c in child.children if c.type == 'variable_declarator'), None)
311
+ if decl:
312
+ name_node = next((c for c in decl.children if c.type == 'identifier'), None)
313
+ value_node = next((c for c in decl.children if c.type == 'arrow_function'), None)
314
+
315
+ if name_node and value_node:
316
+ # Arrow function
317
+ name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]]
318
+ is_async = any(c.type == 'async' for c in value_node.children)
319
+ section = {
320
+ "type": "function",
321
+ "name": name,
322
+ "line_start": child.start_point[0] + 1,
323
+ "line_end": child.end_point[0] + 1,
324
+ "signature": lines[child.start_point[0]].strip()
325
+ }
326
+ if is_async: section["async"] = True
327
+ doc = _extract_docstring_js(child, lines)
328
+ if doc: section["doc"] = doc
329
+
330
+ if parent_section:
331
+ parent_section["children"].append(section)
332
+ else:
333
+ sections.append(section)
334
+ elif name_node and not parent_section:
335
+ # Global constant/variable
336
+ name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]]
337
+ sections.append({
338
+ "type": "constant" if "const" in lines[child.start_point[0]] else "variable",
339
+ "name": name,
340
+ "line_start": child.start_point[0] + 1,
341
+ "line_end": child.end_point[0] + 1,
342
+ "signature": lines[child.start_point[0]].strip()
343
+ })
344
+
345
+ elif child.type == 'comment':
346
+ text = lines[child.start_point[0]][child.start_point[1]:child.end_point[1]]
347
+ if 'TODO' in text or 'FIXME' in text:
348
+ sections.append({
349
+ "type": "comment",
350
+ "name": text.lstrip('/ ').strip(),
351
+ "line_start": child.start_point[0] + 1,
352
+ "line_end": child.end_point[0] + 1,
353
+ "signature": text.strip()
354
+ })
355
+
356
+ elif child.type == 'import_statement':
357
+ section = {
358
+ "type": "import",
359
+ "name": lines[child.start_point[0]].strip(),
360
+ "line_start": child.start_point[0] + 1,
361
+ "line_end": child.end_point[0] + 1,
362
+ "signature": lines[child.start_point[0]].strip()
363
+ }
364
+ if not parent_section:
365
+ sections.append(section)
366
+
367
+ elif child.type == 'export_statement':
368
+ # Drill down
369
+ decl = next((c for c in child.children if c.type != 'export' and c.type != 'default'), None)
370
+ if decl:
371
+ _walk_js_ts(decl, lines, sections, parent_section)
372
+
373
+ def _walk_html(node, lines: List[str], sections: List[Dict[str, Any]]):
374
+ """Extract headings and elements with IDs from HTML AST."""
375
+ for child in node.children:
376
+ if child.type == 'element':
377
+ start_tag = next((c for c in child.children if c.type == 'start_tag'), None)
378
+ if start_tag:
379
+ tag_name_node = next((c for c in start_tag.children if c.type == 'tag_name'), None)
380
+ tag_name = lines[tag_name_node.start_point[0]][tag_name_node.start_point[1]:tag_name_node.end_point[1]].lower() if tag_name_node else ''
381
+
382
+ # Check for headings (h1-h6)
383
+ if tag_name in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
384
+ # Get text content of heading
385
+ text_content = ""
386
+ for subchild in child.children:
387
+ if subchild.type == 'text':
388
+ text_content += lines[subchild.start_point[0]][subchild.start_point[1]:subchild.end_point[1]]
389
+
390
+ sections.append({
391
+ "type": "heading",
392
+ "name": f"{tag_name}: {text_content.strip()[:60]}",
393
+ "line_start": child.start_point[0] + 1,
394
+ "line_end": child.end_point[0] + 1,
395
+ "signature": lines[child.start_point[0]].strip()
396
+ })
397
+
398
+ # Check for ID attribute
399
+ else:
400
+ id_attr = None
401
+ for subchild in start_tag.children:
402
+ if subchild.type == 'attribute':
403
+ attr_name_node = next((c for c in subchild.children if c.type == 'attribute_name'), None)
404
+ if attr_name_node:
405
+ attr_name = lines[attr_name_node.start_point[0]][attr_name_node.start_point[1]:attr_name_node.end_point[1]]
406
+ if attr_name == 'id':
407
+ val_node = next((c for c in subchild.children if c.type == 'attribute_value'), None)
408
+ if val_node:
409
+ id_attr = lines[val_node.start_point[0]][val_node.start_point[1]:val_node.end_point[1]].strip('"\'')
410
+ break
411
+
412
+ if id_attr:
413
+ sections.append({
414
+ "type": "section",
415
+ "name": f"#{id_attr} ({tag_name})",
416
+ "line_start": child.start_point[0] + 1,
417
+ "line_end": child.end_point[0] + 1,
418
+ "signature": lines[child.start_point[0]].strip()
419
+ })
420
+
421
+ # Recurse into element children
422
+ _walk_html(child, lines, sections)
423
+ else:
424
+ # Recurse into other nodes (like document)
425
+ _walk_html(child, lines, sections)
426
+
427
+ def _walk_markdown(node, lines: List[str], sections: List[Dict[str, Any]]):
428
+ """Extract headings from Markdown AST."""
429
+ for child in node.children:
430
+ if child.type == 'atx_heading' or child.type == 'setext_heading':
431
+ heading_node = next((c for c in child.children if c.type == 'atx_h1_marker' or c.type == 'atx_h2_marker' or c.type == 'atx_h3_marker' or c.type == 'atx_h4_marker' or c.type == 'atx_h5_marker' or c.type == 'atx_h6_marker'), None)
432
+ level = "h1"
433
+ if heading_node:
434
+ marker = lines[heading_node.start_point[0]][heading_node.start_point[1]:heading_node.end_point[1]]
435
+ level = f"h{len(marker.strip())}"
436
+
437
+ # Extract content text
438
+ content_text = ""
439
+ for subchild in child.children:
440
+ if subchild.type in ('inline', 'text'):
441
+ content_text += lines[subchild.start_point[0]][subchild.start_point[1]:subchild.end_point[1]]
442
+
443
+ sections.append({
444
+ "type": "heading",
445
+ "name": f"{level}: {content_text.strip()[:60]}",
446
+ "line_start": child.start_point[0] + 1,
447
+ "line_end": child.end_point[0] + 1,
448
+ "signature": lines[child.start_point[0]].strip()
449
+ })
450
+
451
+ # Recurse if needed (atx headings are top level usually, but just in case)
452
+ _walk_markdown(child, lines, sections)
453
+
454
+ def _walk_css(node, lines: List[str], sections: List[Dict[str, Any]]):
455
+ """Extract rulesets from CSS AST."""
456
+ for child in node.children:
457
+ if child.type == 'rule_set':
458
+ selector_node = next((c for c in child.children if c.type == 'selectors'), None)
459
+ selector = lines[selector_node.start_point[0]][selector_node.start_point[1]:selector_node.end_point[1]].strip() if selector_node else "Unknown"
460
+
461
+ sections.append({
462
+ "type": "section",
463
+ "name": selector[:60],
464
+ "line_start": child.start_point[0] + 1,
465
+ "line_end": child.end_point[0] + 1,
466
+ "signature": selector
467
+ })
468
+ elif child.type == 'media_statement':
469
+ query_node = next((c for c in child.children if c.type == 'media_query'), None)
470
+ query = lines[query_node.start_point[0]][query_node.start_point[1]:query_node.end_point[1]].strip() if query_node else "@media"
471
+
472
+ section = {
473
+ "type": "section",
474
+ "name": f"@media {query}",
475
+ "line_start": child.start_point[0] + 1,
476
+ "line_end": child.end_point[0] + 1,
477
+ "signature": query,
478
+ "children": []
479
+ }
480
+ sections.append(section)
481
+ # Find block
482
+ block = next((c for c in child.children if c.type == 'block'), None)
483
+ if block:
484
+ # We reuse walk_css but redirect results to children if we wanted nested
485
+ # For simplicity, we'll just keep them flat but maybe prefixed
486
+ pass
487
+
488
+ # Recurse
489
+ _walk_css(child, lines, sections)
490
+
491
+ def _walk_go(node, lines: List[str], sections: List[Dict[str, Any]]):
492
+ """Extract functions, types, and methods from Go AST."""
493
+ for child in node.children:
494
+ if child.type in ('function_declaration', 'method_declaration'):
495
+ name_node = next((c for c in child.children if c.type == 'identifier' or c.type == 'field_identifier'), None)
496
+ name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
497
+
498
+ signature = lines[child.start_point[0]].strip()
499
+
500
+ sections.append({
501
+ "type": "function" if child.type == 'function_declaration' else "method",
502
+ "name": name,
503
+ "line_start": child.start_point[0] + 1,
504
+ "line_end": child.end_point[0] + 1,
505
+ "signature": signature
506
+ })
507
+ elif child.type == 'type_declaration':
508
+ # Drill into type specs
509
+ for spec in child.children:
510
+ if spec.type == 'type_spec':
511
+ name_node = next((c for c in spec.children if c.type == 'type_identifier'), None)
512
+ name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
513
+ sections.append({
514
+ "type": "type",
515
+ "name": name,
516
+ "line_start": child.start_point[0] + 1,
517
+ "line_end": child.end_point[0] + 1,
518
+ "signature": lines[child.start_point[0]].strip()
519
+ })
520
+ _walk_go(child, lines, sections)
521
+
522
+ def _walk_rust(node, lines: List[str], sections: List[Dict[str, Any]]):
523
+ """Extract functions, structs, enums, and impls from Rust AST."""
524
+ for child in node.children:
525
+ if child.type in ('function_item', 'struct_item', 'enum_item', 'trait_item', 'impl_item'):
526
+ name_node = next((c for c in child.children if c.type in ('identifier', 'type_identifier')), None)
527
+ name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
528
+
529
+ if child.type == 'impl_item':
530
+ # For impl, the name is the type being implemented
531
+ type_node = next((c for c in child.children if c.type == 'type_identifier'), None)
532
+ if type_node:
533
+ name = f"impl {lines[type_node.start_point[0]][type_node.start_point[1]:type_node.end_point[1]]}"
534
+
535
+ stype = child.type.replace('_item', '')
536
+ sections.append({
537
+ "type": stype,
538
+ "name": name,
539
+ "line_start": child.start_point[0] + 1,
540
+ "line_end": child.end_point[0] + 1,
541
+ "signature": lines[child.start_point[0]].strip()
542
+ })
543
+ _walk_rust(child, lines, sections)
544
+
545
+ def _walk_json(node, lines: List[str], sections: List[Dict[str, Any]]):
546
+ """Extract top-level keys from JSON AST."""
547
+ for child in node.children:
548
+ if child.type == 'object':
549
+ for pair in child.children:
550
+ if pair.type == 'pair':
551
+ key_node = next((c for c in pair.children if c.type == 'string'), None)
552
+ if key_node:
553
+ key = lines[key_node.start_point[0]][key_node.start_point[1]:key_node.end_point[1]].strip('"\'')
554
+ sections.append({
555
+ "type": "property",
556
+ "name": key,
557
+ "line_start": pair.start_point[0] + 1,
558
+ "line_end": pair.end_point[0] + 1,
559
+ "signature": key
560
+ })
561
+ # Typically only top level for mapping
562
+ break
563
+
564
+ def _walk_yaml(node, lines: List[str], sections: List[Dict[str, Any]]):
565
+ """Extract top-level keys from YAML AST."""
566
+ for doc in node.children:
567
+ if doc.type == 'document':
568
+ block = next((c for c in doc.children if c.type == 'block_node'), None)
569
+ if block:
570
+ mapping = next((c for c in block.children if c.type == 'block_mapping'), None)
571
+ if mapping:
572
+ for pair in mapping.children:
573
+ if pair.type == 'block_mapping_pair':
574
+ key_node = next((c for c in pair.children if c.type == 'flow_node' or c.type == 'block_node'), None)
575
+ if key_node:
576
+ key = lines[key_node.start_point[0]][key_node.start_point[1]:key_node.end_point[1]].strip()
577
+ sections.append({
578
+ "type": "property",
579
+ "name": key,
580
+ "line_start": pair.start_point[0] + 1,
581
+ "line_end": pair.end_point[0] + 1,
582
+ "signature": key
583
+ })
584
+ # Typically only top level for mapping
585
+ break
586
+
587
+ def check_syntax_ast(content: str, ext: str) -> List[Dict[str, Any]]:
588
+ """
589
+ Detect syntax errors using Tree-sitter.
590
+ Returns a list of errors with line numbers and descriptions.
591
+ """
592
+ parser = get_parser(ext)
593
+ if not parser:
594
+ return []
595
+
596
+ try:
597
+ tree = parser.parse(content.encode("utf-8"))
598
+ errors = []
599
+
600
+ def _find_errors(node):
601
+ if node.type == "ERROR" or node.is_error:
602
+ errors.append({
603
+ "line": node.start_point[0] + 1,
604
+ "column": node.start_point[1],
605
+ "type": "syntax_error",
606
+ "text": f"Syntax error at line {node.start_point[0] + 1}, column {node.start_point[1]}"
607
+ })
608
+
609
+ # node.is_missing is available in some versions
610
+ try:
611
+ if hasattr(node, "is_missing") and node.is_missing:
612
+ errors.append({
613
+ "line": node.start_point[0] + 1,
614
+ "column": node.start_point[1],
615
+ "type": "missing_token",
616
+ "text": f"Missing expected token at line {node.start_point[0] + 1}"
617
+ })
618
+ except Exception: pass
619
+
620
+ for child in node.children:
621
+ _find_errors(child)
622
+
623
+ _find_errors(tree.root_node)
624
+ return errors
625
+ except Exception as e:
626
+ return [{"line": 1, "column": 0, "type": "parser_error", "text": str(e)}]
627
+
628
+
629
+ # ── Native syntax checking (no tree-sitter, no AI) ──────────────────
630
+ import os as _os
631
+ import subprocess as _subprocess
632
+ import sys as _sys
633
+ import tempfile as _tempfile
634
+ import threading as _threading
635
+
636
+
637
+ def _kill_proc_tree(proc: _subprocess.Popen) -> None:
638
+ """Kill a process and all its children (Windows-safe)."""
639
+ try:
640
+ if _sys.platform == "win32":
641
+ # taskkill /T kills the entire process tree — critical on Windows
642
+ # where Rscript spawns child R processes that outlive the parent.
643
+ _subprocess.run(
644
+ ["taskkill", "/F", "/T", "/PID", str(proc.pid)],
645
+ capture_output=True, timeout=5,
646
+ creationflags=_subprocess.CREATE_NO_WINDOW,
647
+ )
648
+ else:
649
+ proc.kill()
650
+ proc.wait(timeout=3)
651
+ except Exception:
652
+ pass
653
+
654
+
655
+ def _err(line: int = 1, col: int = 0, text: str = "") -> Dict[str, Any]:
656
+ return {"line": max(1, int(line)), "column": max(0, int(col)), "text": str(text)}
657
+
658
+
659
+ def _clean_text(text: str) -> str:
660
+ return str(text or "").replace("\x00", "").strip()
661
+
662
+
663
+ def _checker_name(ext: str) -> str:
664
+ return {
665
+ '.py': 'python_ast',
666
+ '.json': 'json',
667
+ '.yaml': 'pyyaml',
668
+ '.yml': 'pyyaml',
669
+ '.xml': 'elementtree',
670
+ '.svg': 'elementtree',
671
+ '.toml': 'tomllib',
672
+ '.html': 'lxml',
673
+ '.htm': 'lxml',
674
+ '.css': 'tinycss2',
675
+ '.js': 'node --check',
676
+ '.jsx': 'tsc',
677
+ '.ts': 'tsc',
678
+ '.tsx': 'tsc',
679
+ '.java': 'javac',
680
+ '.go': 'gofmt',
681
+ '.rs': 'rustc',
682
+ '.r': 'Rscript',
683
+ '.php': 'php -l',
684
+ '.rb': 'ruby -c',
685
+ '.pl': 'perl -c',
686
+ '.pm': 'perl -c',
687
+ '.lua': 'luac -p',
688
+ '.sh': 'bash -n',
689
+ '.bash': 'bash -n',
690
+ }.get((ext or "").lower(), (ext or "unknown").lstrip(".") or "unknown")
691
+
692
+
693
+ def _result(status: str, checker: str, errors: List[Dict[str, Any]] | None = None, detail: str = "") -> Dict[str, Any]:
694
+ return {
695
+ "status": status,
696
+ "checker": checker,
697
+ "errors": errors or [],
698
+ "detail": _clean_text(detail),
699
+ }
700
+
701
+
702
+ def _subproc_check(cmd: List[str], content: str, suffix: str, checker: str, timeout: int = 8) -> Dict[str, Any]:
703
+ """Write content to a temp file and run a checker subprocess."""
704
+ fd, tmp = _tempfile.mkstemp(suffix=suffix)
705
+ try:
706
+ with _os.fdopen(fd, 'w', encoding='utf-8') as f:
707
+ f.write(content)
708
+
709
+ kwargs = {}
710
+ if _sys.platform == "win32":
711
+ kwargs["creationflags"] = _subprocess.CREATE_NO_WINDOW
712
+
713
+ r = _subprocess.run(cmd + [tmp], stdin=_subprocess.DEVNULL, capture_output=True, text=True, timeout=timeout, **kwargs)
714
+ return {
715
+ "status": "ok",
716
+ "checker": checker,
717
+ "returncode": r.returncode,
718
+ "stdout": r.stdout[:65536],
719
+ "stderr": r.stderr[:65536],
720
+ }
721
+ except FileNotFoundError:
722
+ return _result("checker_unavailable", checker, detail=f"{cmd[0]} is not installed or not on PATH.")
723
+ except PermissionError as e:
724
+ return _result("checker_failed", checker, detail=str(e) or f"{cmd[0]} access denied.")
725
+ except OSError as e:
726
+ return _result("checker_failed", checker, detail=str(e) or f"{cmd[0]} failed to start.")
727
+ except _subprocess.TimeoutExpired:
728
+ return _result("checker_timeout", checker, detail=f"{cmd[0]} exceeded {timeout}s.")
729
+ finally:
730
+ try:
731
+ _os.unlink(tmp)
732
+ except OSError:
733
+ pass
734
+
735
+
736
+ def _native_python(content: str) -> Dict[str, Any]:
737
+ import ast
738
+ try:
739
+ ast.parse(content)
740
+ return _result("clean", "python_ast")
741
+ except SyntaxError as e:
742
+ return _result("syntax_error", "python_ast", [_err(e.lineno or 1, (e.offset or 1) - 1, e.msg)])
743
+
744
+
745
+ def _native_json(content: str) -> Dict[str, Any]:
746
+ import json
747
+ try:
748
+ json.loads(content)
749
+ return _result("clean", "json")
750
+ except json.JSONDecodeError as e:
751
+ return _result("syntax_error", "json", [_err(e.lineno, e.colno, e.msg)], detail=e.msg)
752
+
753
+
754
+ def _native_yaml(content: str) -> Dict[str, Any]:
755
+ try:
756
+ import yaml
757
+ # Consume all documents to catch errors in multi-document YAML files.
758
+ list(yaml.safe_load_all(content))
759
+ return _result("clean", "pyyaml")
760
+ except ImportError:
761
+ return _result("checker_unavailable", "pyyaml", detail="PyYAML is not installed.")
762
+ except Exception as e:
763
+ mark = getattr(e, 'problem_mark', None)
764
+ line = (mark.line + 1) if mark else 1
765
+ col = mark.column if mark else 0
766
+ return _result("syntax_error", "pyyaml", [_err(line, col, str(e))], detail=str(e))
767
+
768
+
769
+ def _native_xml(content: str) -> Dict[str, Any]:
770
+ import xml.etree.ElementTree as ET
771
+ try:
772
+ ET.fromstring(content)
773
+ return _result("clean", "elementtree")
774
+ except ET.ParseError as e:
775
+ pos = getattr(e, 'position', None)
776
+ line, col = pos if pos else (1, 0)
777
+ return _result("syntax_error", "elementtree", [_err(line, col, str(e))], detail=str(e))
778
+
779
+
780
+ def _native_toml(content: str) -> Dict[str, Any]:
781
+ try:
782
+ try:
783
+ import tomllib # Python 3.11+
784
+ except ImportError:
785
+ import tomli as tomllib # type: ignore
786
+ tomllib.loads(content)
787
+ return _result("clean", "tomllib")
788
+ except ImportError:
789
+ return _result("checker_unavailable", "tomllib", detail="tomllib/tomli is not installed.")
790
+ except Exception as e:
791
+ m = re.search(r'line (\d+)', str(e))
792
+ return _result("syntax_error", "tomllib", [_err(int(m.group(1)) if m else 1, 0, str(e))], detail=str(e))
793
+
794
+
795
+ def _native_html(content: str) -> Dict[str, Any]:
796
+ # lxml false-positive filters for valid HTML5 constructs:
797
+ # - HTML_UNKNOWN_TAG: HTML5 semantic elements (nav, main, header, footer, etc.)
798
+ # - ERR_TAG_NAME_MISMATCH "script embeds close tag": JS code containing </script>
799
+ # - ERR_NAME_REQUIRED "htmlParseEntityRef: no name": unescaped & in code/text
800
+ _SKIP_TYPES = {"HTML_UNKNOWN_TAG", "ERR_NAME_REQUIRED"}
801
+ _SKIP_MSG_FRAGMENTS = {"script embeds close tag"}
802
+
803
+ def _is_false_positive(e) -> bool:
804
+ if e.type_name in _SKIP_TYPES:
805
+ return True
806
+ msg = e.message.lower()
807
+ return any(frag in msg for frag in _SKIP_MSG_FRAGMENTS)
808
+
809
+ try:
810
+ from lxml import etree # type: ignore
811
+ parser = etree.HTMLParser(recover=True)
812
+ etree.fromstring(content.encode('utf-8', errors='replace'), parser)
813
+ real_errors = [e for e in parser.error_log if not _is_false_positive(e)]
814
+ errors = [_err(e.line, e.column, e.message) for e in real_errors]
815
+ if errors:
816
+ return _result("syntax_error", "lxml", errors, detail=errors[0]["text"])
817
+ return _result("clean", "lxml")
818
+ except ImportError:
819
+ return _result("checker_unavailable", "lxml", detail="lxml is not installed.")
820
+ except Exception as e:
821
+ return _result("checker_failed", "lxml", detail=str(e))
822
+
823
+
824
+ def _native_css(content: str) -> Dict[str, Any]:
825
+ try:
826
+ import tinycss2 # type: ignore
827
+ rules = tinycss2.parse_stylesheet(content)
828
+ errors = [
829
+ _err(getattr(r, 'source_line', 1), 0, repr(r))
830
+ for r in rules if getattr(r, 'type', '') == 'error'
831
+ ]
832
+ if errors:
833
+ return _result("syntax_error", "tinycss2", errors, detail=errors[0]["text"])
834
+ return _result("clean", "tinycss2")
835
+ except ImportError:
836
+ return _result("checker_unavailable", "tinycss2", detail="tinycss2 is not installed.")
837
+ except Exception as e:
838
+ return _result("checker_failed", "tinycss2", detail=str(e))
839
+
840
+
841
+ def _native_js(content: str) -> Dict[str, Any]:
842
+ proc = _subproc_check(["node", "--check"], content, ".js", "node --check")
843
+ if proc["status"] != "ok":
844
+ return proc
845
+ if proc["returncode"] == 0:
846
+ return _result("clean", "node --check")
847
+ errors = []
848
+ lines = _clean_text(proc["stderr"] + "\n" + proc["stdout"]).splitlines()
849
+ for i, line in enumerate(lines):
850
+ m = re.match(r'^.*:(\d+)$', line.strip())
851
+ if m and i + 1 < len(lines):
852
+ errors.append(_err(int(m.group(1)), 0, lines[i + 1].strip()))
853
+ if errors:
854
+ return _result("syntax_error", "node --check", errors, detail=errors[0]["text"])
855
+ detail = lines[0] if lines else f"node --check returned {proc['returncode']}."
856
+ return _result("checker_failed", "node --check", detail=detail)
857
+
858
+
859
+ def _native_ts(content: str) -> Dict[str, Any]:
860
+ proc = _subproc_check(
861
+ ["tsc", "--noEmit", "--target", "ES2020", "--isolatedModules", "--skipLibCheck"],
862
+ content, ".ts", "tsc", timeout=15,
863
+ )
864
+ if proc["status"] == "checker_unavailable":
865
+ fallback = _native_js(content)
866
+ if fallback["status"] in {"clean", "syntax_error"}:
867
+ fallback["detail"] = _clean_text(
868
+ "tsc unavailable; fell back to node --check syntax validation. " + fallback.get("detail", "")
869
+ )
870
+ return fallback
871
+ if proc["status"] != "ok":
872
+ return proc
873
+ if proc["returncode"] == 0:
874
+ return _result("clean", "tsc")
875
+ errors = []
876
+ for line in (proc["stderr"] + proc["stdout"]).splitlines():
877
+ m = re.match(r'^.*\((\d+),(\d+)\):\s*error\s+\w+:\s*(.+)$', line)
878
+ if m:
879
+ errors.append(_err(int(m.group(1)), int(m.group(2)) - 1, m.group(3)))
880
+ if errors:
881
+ return _result("syntax_error", "tsc", errors, detail=errors[0]["text"])
882
+ return _result("checker_failed", "tsc", detail=f"tsc returned {proc['returncode']}.")
883
+
884
+
885
+ def _native_jsx(content: str) -> Dict[str, Any]:
886
+ proc = _subproc_check(
887
+ ["tsc", "--noEmit", "--jsx", "react", "--allowJs", "--isolatedModules",
888
+ "--skipLibCheck", "--target", "ES2020"],
889
+ content, ".jsx", "tsc", timeout=15,
890
+ )
891
+ if proc["status"] == "checker_unavailable":
892
+ return _result("unsupported", "tsc",
893
+ detail="tsc not found; node --check cannot validate JSX syntax.")
894
+ if proc["status"] != "ok":
895
+ return proc
896
+ if proc["returncode"] == 0:
897
+ return _result("clean", "tsc")
898
+ errors = []
899
+ for line in (proc["stderr"] + proc["stdout"]).splitlines():
900
+ m = re.match(r'^.*\((\d+),(\d+)\):\s*error\s+\w+:\s*(.+)$', line)
901
+ if m:
902
+ errors.append(_err(int(m.group(1)), int(m.group(2)) - 1, m.group(3)))
903
+ if errors:
904
+ return _result("syntax_error", "tsc", errors, detail=errors[0]["text"])
905
+ return _result("checker_failed", "tsc", detail=f"tsc returned {proc['returncode']}.")
906
+
907
+
908
+ def _native_tsx(content: str) -> Dict[str, Any]:
909
+ proc = _subproc_check(
910
+ ["tsc", "--noEmit", "--jsx", "react", "--isolatedModules",
911
+ "--skipLibCheck", "--target", "ES2020"],
912
+ content, ".tsx", "tsc", timeout=15,
913
+ )
914
+ if proc["status"] == "checker_unavailable":
915
+ fallback = _native_js(content)
916
+ if fallback["status"] in {"clean", "syntax_error"}:
917
+ fallback["detail"] = _clean_text(
918
+ "tsc unavailable; fell back to node --check (JSX not fully validated). "
919
+ + fallback.get("detail", "")
920
+ )
921
+ return fallback
922
+ if proc["status"] != "ok":
923
+ return proc
924
+ if proc["returncode"] == 0:
925
+ return _result("clean", "tsc")
926
+ errors = []
927
+ for line in (proc["stderr"] + proc["stdout"]).splitlines():
928
+ m = re.match(r'^.*\((\d+),(\d+)\):\s*error\s+\w+:\s*(.+)$', line)
929
+ if m:
930
+ errors.append(_err(int(m.group(1)), int(m.group(2)) - 1, m.group(3)))
931
+ if errors:
932
+ return _result("syntax_error", "tsc", errors, detail=errors[0]["text"])
933
+ return _result("checker_failed", "tsc", detail=f"tsc returned {proc['returncode']}.")
934
+
935
+
936
+ def _native_java(content: str) -> Dict[str, Any]:
937
+ proc = _subproc_check(
938
+ ["javac", "-proc:none", "-source", "11", "-encoding", "UTF-8"],
939
+ content, ".java", "javac", timeout=15,
940
+ )
941
+ if proc["status"] != "ok":
942
+ return proc
943
+ if proc["returncode"] == 0:
944
+ return _result("clean", "javac")
945
+ # Filter to syntax-only errors — javac also reports type/import errors which
946
+ # are not syntax issues (e.g. "cannot find symbol", "package does not exist").
947
+ _JAVAC_SEMANTIC_PATTERNS = {
948
+ "should be declared in", "cannot find symbol", "package does not exist",
949
+ "cannot access", "incompatible types", "is not abstract",
950
+ "has private access", "is already defined", "unreported exception",
951
+ "non-static method", "non-static variable",
952
+ }
953
+ errors = []
954
+ for line in (proc["stderr"] + proc["stdout"]).splitlines():
955
+ m = re.match(r'^.*:(\d+):\s*error:\s*(.+)$', line)
956
+ if m and not any(p in m.group(2) for p in _JAVAC_SEMANTIC_PATTERNS):
957
+ errors.append(_err(int(m.group(1)), 0, m.group(2)))
958
+ if errors:
959
+ return _result("syntax_error", "javac", errors, detail=errors[0]["text"])
960
+ if proc["returncode"] != 0:
961
+ # All errors were semantic (imports, types) — syntax is likely fine.
962
+ return _result("clean", "javac", detail="Syntax OK; semantic errors (imports/types) were ignored.")
963
+ return _result("clean", "javac")
964
+
965
+
966
+ def _native_go(content: str) -> Dict[str, Any]:
967
+ proc = _subproc_check(["gofmt", "-e"], content, ".go", "gofmt")
968
+ if proc["status"] != "ok":
969
+ return proc
970
+ if proc["returncode"] == 0:
971
+ return _result("clean", "gofmt")
972
+ errors = []
973
+ for line in (proc["stderr"] + proc["stdout"]).splitlines():
974
+ m = re.match(r'^.*:(\d+):(\d+):\s*(.+)$', line)
975
+ if m:
976
+ errors.append(_err(int(m.group(1)), int(m.group(2)) - 1, m.group(3)))
977
+ if errors:
978
+ return _result("syntax_error", "gofmt", errors, detail=errors[0]["text"])
979
+ return _result("checker_failed", "gofmt", detail=f"gofmt returned {proc['returncode']}.")
980
+
981
+
982
+ def _native_rust(content: str) -> Dict[str, Any]:
983
+ with _tempfile.TemporaryDirectory() as tmpdir:
984
+ rs_path = _os.path.join(tmpdir, "check.rs")
985
+ with open(rs_path, 'w', encoding='utf-8') as f:
986
+ f.write(content)
987
+ try:
988
+ kwargs = {}
989
+ if _sys.platform == "win32":
990
+ kwargs["creationflags"] = _subprocess.CREATE_NO_WINDOW
991
+ r = _subprocess.run(
992
+ ["rustc", "--edition", "2021", "--emit=metadata", "--out-dir", tmpdir, rs_path],
993
+ capture_output=True, text=True, timeout=15,
994
+ **kwargs
995
+ )
996
+ except FileNotFoundError:
997
+ return _result("checker_unavailable", "rustc", detail="rustc is not installed or not on PATH.")
998
+ except PermissionError as e:
999
+ return _result("checker_failed", "rustc", detail=str(e) or "rustc access denied.")
1000
+ except _subprocess.TimeoutExpired:
1001
+ return _result("checker_timeout", "rustc", detail="rustc exceeded 15s.")
1002
+ except OSError as e:
1003
+ return _result("checker_failed", "rustc", detail=str(e) or "rustc failed to start.")
1004
+ if r.returncode == 0:
1005
+ return _result("clean", "rustc")
1006
+ errors, lines = [], (r.stderr + r.stdout).splitlines()
1007
+ for i, line in enumerate(lines):
1008
+ m = re.match(r'^error(?:\[E\d+\])?: (.+)$', line)
1009
+ if m:
1010
+ for j in range(i + 1, min(i + 5, len(lines))):
1011
+ loc = re.match(r'^\s*--> [^:]+:(\d+):(\d+)', lines[j])
1012
+ if loc:
1013
+ errors.append(_err(int(loc.group(1)), int(loc.group(2)) - 1, m.group(1)))
1014
+ break
1015
+ if errors:
1016
+ return _result("syntax_error", "rustc", errors, detail=errors[0]["text"])
1017
+ detail = lines[0] if lines else f"rustc returned {r.returncode}."
1018
+ return _result("checker_failed", "rustc", detail=detail)
1019
+
1020
+
1021
+ def _native_r(content: str) -> Dict[str, Any]:
1022
+ # Write to a temp file so parse(file=...) gets the full content reliably.
1023
+ # parse(stdin()) silently truncates large files and misses errors in
1024
+ # complex multi-line constructs (e.g. knitr::knit_child() string args).
1025
+ _R_TIMEOUT = 20 # Rscript cold-start on Windows is 3-8s; 20s is generous.
1026
+ with _tempfile.NamedTemporaryFile(
1027
+ mode='w', suffix='.R', delete=False, encoding='utf-8'
1028
+ ) as tmp:
1029
+ tmp.write(content)
1030
+ tmp_path = tmp.name
1031
+ proc = None
1032
+ try:
1033
+ kwargs = {}
1034
+ if _sys.platform == "win32":
1035
+ kwargs["creationflags"] = _subprocess.CREATE_NO_WINDOW
1036
+ # Use forward slashes — R on Windows accepts them and avoids backslash escaping issues.
1037
+ # Escape single quotes for safe embedding in R string literal.
1038
+ r_path = tmp_path.replace('\\', '/').replace("'", "\\'")
1039
+ proc = _subprocess.Popen(
1040
+ ["Rscript", "--vanilla", "-e",
1041
+ f"tryCatch({{parse(file='{r_path}');cat('OK\\n')}},error=function(e){{cat('ERROR:',conditionMessage(e),'\\n')}})"],
1042
+ stdin=_subprocess.DEVNULL,
1043
+ stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, text=True,
1044
+ **kwargs
1045
+ )
1046
+ try:
1047
+ stdout, stderr = proc.communicate(timeout=_R_TIMEOUT)
1048
+ except _subprocess.TimeoutExpired:
1049
+ _kill_proc_tree(proc)
1050
+ return _result("checker_timeout", "Rscript", detail=f"Rscript exceeded {_R_TIMEOUT}s.")
1051
+ output = (stdout or "") + (stderr or "")
1052
+ output = output[:65536]
1053
+ if "ERROR:" in output:
1054
+ text = re.sub(r'^ERROR:\s*', '', output.strip())
1055
+ # R reports "file:line:col: message" — extract line and column.
1056
+ m = re.search(r':(\d+):(\d+):', text)
1057
+ if m:
1058
+ line, col = int(m.group(1)), int(m.group(2))
1059
+ else:
1060
+ lm = re.search(r'line (\d+)', text)
1061
+ line, col = (int(lm.group(1)) if lm else 1), 0
1062
+ return _result("syntax_error", "Rscript", [_err(line, col, text)], detail=text)
1063
+ if proc.returncode != 0 and "OK" not in output:
1064
+ detail = (output.strip() or f"Rscript exited with code {proc.returncode}")
1065
+ return _result("checker_failed", "Rscript", detail=detail)
1066
+ return _result("clean", "Rscript")
1067
+ except FileNotFoundError:
1068
+ return _result("checker_unavailable", "Rscript", detail="Rscript is not installed or not on PATH.")
1069
+ except PermissionError as e:
1070
+ return _result("checker_failed", "Rscript", detail=str(e) or "Rscript access denied.")
1071
+ except OSError as e:
1072
+ return _result("checker_failed", "Rscript", detail=str(e) or "Rscript failed to start.")
1073
+ finally:
1074
+ if proc and proc.poll() is None:
1075
+ _kill_proc_tree(proc)
1076
+ try:
1077
+ _os.unlink(tmp_path)
1078
+ except OSError:
1079
+ pass
1080
+
1081
+
1082
+ def _native_php(content: str) -> Dict[str, Any]:
1083
+ proc = _subproc_check(["php", "-l"], content, ".php", "php -l")
1084
+ if proc["status"] != "ok":
1085
+ return proc
1086
+ if proc["returncode"] == 0:
1087
+ return _result("clean", "php -l")
1088
+ output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
1089
+ for line in output.splitlines():
1090
+ m = re.match(r'^.*error:.*in\s+\S+\s+on line\s+(\d+)', line, re.IGNORECASE)
1091
+ if m:
1092
+ return _result("syntax_error", "php -l", [_err(int(m.group(1)), 0, line.strip())], detail=line.strip())
1093
+ return _result("syntax_error", "php -l", [_err(1, 0, output)], detail=output)
1094
+
1095
+
1096
+ def _native_ruby(content: str) -> Dict[str, Any]:
1097
+ proc = _subproc_check(["ruby", "-c"], content, ".rb", "ruby -c")
1098
+ if proc["status"] != "ok":
1099
+ return proc
1100
+ if proc["returncode"] == 0:
1101
+ return _result("clean", "ruby -c")
1102
+ output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
1103
+ for line in output.splitlines():
1104
+ m = re.match(r'^.*:(\d+):\s*(.+)$', line)
1105
+ if m:
1106
+ return _result("syntax_error", "ruby -c", [_err(int(m.group(1)), 0, m.group(2))], detail=m.group(2))
1107
+ return _result("syntax_error", "ruby -c", [_err(1, 0, output)], detail=output)
1108
+
1109
+
1110
+ def _native_perl(content: str) -> Dict[str, Any]:
1111
+ # Note: perl -c runs BEGIN blocks — use with caution on untrusted code.
1112
+ proc = _subproc_check(["perl", "-c"], content, ".pl", "perl -c")
1113
+ if proc["status"] != "ok":
1114
+ return proc
1115
+ if proc["returncode"] == 0:
1116
+ return _result("clean", "perl -c")
1117
+ output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
1118
+ for line in output.splitlines():
1119
+ m = re.match(r'^.*at\s+\S+\s+line\s+(\d+)', line)
1120
+ if m:
1121
+ return _result("syntax_error", "perl -c", [_err(int(m.group(1)), 0, line.strip())], detail=line.strip())
1122
+ return _result("syntax_error", "perl -c", [_err(1, 0, output)], detail=output)
1123
+
1124
+
1125
+ def _native_lua(content: str) -> Dict[str, Any]:
1126
+ proc = _subproc_check(["luac", "-p"], content, ".lua", "luac -p")
1127
+ if proc["status"] != "ok":
1128
+ return proc
1129
+ if proc["returncode"] == 0:
1130
+ return _result("clean", "luac -p")
1131
+ output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
1132
+ for line in output.splitlines():
1133
+ m = re.match(r'^.*:(\d+):\s*(.+)$', line)
1134
+ if m:
1135
+ return _result("syntax_error", "luac -p", [_err(int(m.group(1)), 0, m.group(2))], detail=m.group(2))
1136
+ return _result("syntax_error", "luac -p", [_err(1, 0, output)], detail=output)
1137
+
1138
+
1139
+ def _native_shell(content: str) -> Dict[str, Any]:
1140
+ proc = _subproc_check(["bash", "-n"], content, ".sh", "bash -n")
1141
+ if proc["status"] != "ok":
1142
+ return proc
1143
+ if proc["returncode"] == 0:
1144
+ return _result("clean", "bash -n")
1145
+ errors = []
1146
+ output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
1147
+ for line in output.splitlines():
1148
+ m = re.match(r'^.*: line (\d+): (.+)$', line)
1149
+ if m:
1150
+ errors.append(_err(int(m.group(1)), 0, m.group(2)))
1151
+ if errors:
1152
+ return _result("syntax_error", "bash -n", errors, detail=errors[0]["text"])
1153
+ detail = output or f"bash -n returned {proc['returncode']}."
1154
+ return _result("checker_failed", "bash -n", detail=detail)
1155
+
1156
+
1157
+ _NATIVE_DISPATCH = {
1158
+ '.py': _native_python,
1159
+ '.json': _native_json,
1160
+ '.yaml': _native_yaml, '.yml': _native_yaml,
1161
+ '.xml': _native_xml, '.svg': _native_xml,
1162
+ '.toml': _native_toml,
1163
+ '.html': _native_html, '.htm': _native_html,
1164
+ '.css': _native_css,
1165
+ '.js': _native_js, '.jsx': _native_jsx,
1166
+ '.ts': _native_ts, '.tsx': _native_tsx,
1167
+ '.java': _native_java,
1168
+ '.go': _native_go,
1169
+ '.rs': _native_rust,
1170
+ '.r': _native_r,
1171
+ '.php': _native_php,
1172
+ '.rb': _native_ruby,
1173
+ '.pl': _native_perl, '.pm': _native_perl,
1174
+ '.lua': _native_lua,
1175
+ '.sh': _native_shell, '.bash': _native_shell,
1176
+ }
1177
+
1178
+
1179
+ def check_syntax_native(content: str, ext: str) -> Dict[str, Any]:
1180
+ """Syntax-check using native parsers/compilers and return a structured result."""
1181
+ normalized_ext = (ext or "").lower()
1182
+ fn = _NATIVE_DISPATCH.get(normalized_ext)
1183
+ if not fn:
1184
+ return _result(
1185
+ "unsupported",
1186
+ _checker_name(normalized_ext),
1187
+ detail=f"No native syntax checker is registered for '{ext or '[no extension]'}'.",
1188
+ )
1189
+ try:
1190
+ result = fn(content)
1191
+ if not isinstance(result, dict) or "status" not in result:
1192
+ return _result("checker_failed", _checker_name(normalized_ext), detail="Checker returned an invalid result.")
1193
+ return result
1194
+ except Exception as e:
1195
+ return _result("checker_failed", _checker_name(normalized_ext), detail=f"Checker error: {e}")
1196
+
1197
+
1198
+ def check_syntax_native_with_timeout(content: str, ext: str, timeout_seconds: int = 35) -> Dict[str, Any]:
1199
+ """Run native syntax validation with a hard timeout using a daemon thread.
1200
+
1201
+ Uses threading instead of multiprocessing to avoid the Windows spawn
1202
+ deadlock (re-importing tree_sitter bindings in a fresh process blocked
1203
+ the MCP server's stdio thread indefinitely). Individual subprocess-based
1204
+ checkers already carry their own timeouts; this wrapper provides a final
1205
+ safety net for pure-Python checkers that could theoretically loop.
1206
+ """
1207
+ normalized_ext = (ext or "").lower()
1208
+ if normalized_ext not in _NATIVE_DISPATCH:
1209
+ return check_syntax_native(content, normalized_ext)
1210
+
1211
+ timeout_seconds = max(1, int(timeout_seconds or 12))
1212
+
1213
+ result_holder: list = [None]
1214
+
1215
+ def _run() -> None:
1216
+ try:
1217
+ result_holder[0] = check_syntax_native(content, normalized_ext)
1218
+ except Exception as e:
1219
+ result_holder[0] = _result(
1220
+ "checker_failed",
1221
+ _checker_name(normalized_ext),
1222
+ detail=f"Validation worker crashed: {e}",
1223
+ )
1224
+
1225
+ t = _threading.Thread(target=_run, daemon=True)
1226
+ t.start()
1227
+ t.join(timeout_seconds)
1228
+
1229
+ if t.is_alive():
1230
+ return _result(
1231
+ "checker_timeout",
1232
+ _checker_name(normalized_ext),
1233
+ detail=f"Validation exceeded {timeout_seconds}s and was terminated.",
1234
+ )
1235
+
1236
+ if not isinstance(result_holder[0], dict):
1237
+ return _result("checker_failed", _checker_name(normalized_ext), detail="Validation worker returned no result.")
1238
+ return result_holder[0]