code-context-control 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/_hook_utils.py +99 -0
- cli/c3.py +6152 -0
- cli/commands/__init__.py +1 -0
- cli/commands/common.py +312 -0
- cli/commands/parser.py +286 -0
- cli/docs.html +3178 -0
- cli/edits.html +878 -0
- cli/hook_auto_snapshot.py +142 -0
- cli/hook_c3_signal.py +61 -0
- cli/hook_c3read.py +116 -0
- cli/hook_edit_ledger.py +213 -0
- cli/hook_edit_unlock.py +170 -0
- cli/hook_filter.py +130 -0
- cli/hook_ghost_files.py +238 -0
- cli/hook_pretool_enforce.py +334 -0
- cli/hook_read.py +200 -0
- cli/hook_session_stats.py +62 -0
- cli/hook_terse_advisor.py +190 -0
- cli/hub.html +3764 -0
- cli/hub_server.py +1619 -0
- cli/mcp_proxy.py +428 -0
- cli/mcp_server.py +660 -0
- cli/server.py +2985 -0
- cli/tools/__init__.py +4 -0
- cli/tools/_helpers.py +65 -0
- cli/tools/agent.py +1165 -0
- cli/tools/compress.py +215 -0
- cli/tools/delegate.py +1184 -0
- cli/tools/edit.py +313 -0
- cli/tools/edits.py +118 -0
- cli/tools/filter.py +285 -0
- cli/tools/impact.py +163 -0
- cli/tools/memory.py +469 -0
- cli/tools/read.py +224 -0
- cli/tools/search.py +337 -0
- cli/tools/session.py +95 -0
- cli/tools/shell.py +193 -0
- cli/tools/status.py +306 -0
- cli/tools/validate.py +310 -0
- cli/ui/api.js +36 -0
- cli/ui/app.js +207 -0
- cli/ui/components/chat.js +758 -0
- cli/ui/components/dashboard.js +689 -0
- cli/ui/components/edits.js +220 -0
- cli/ui/components/instructions.js +481 -0
- cli/ui/components/memory.js +626 -0
- cli/ui/components/sessions.js +606 -0
- cli/ui/components/settings.js +1404 -0
- cli/ui/components/sidebar.js +156 -0
- cli/ui/icons.js +51 -0
- cli/ui/shared.js +119 -0
- cli/ui/theme.js +22 -0
- cli/ui.html +168 -0
- cli/ui_legacy.html +6797 -0
- cli/ui_nano.html +503 -0
- code_context_control-2.28.0.dist-info/METADATA +248 -0
- code_context_control-2.28.0.dist-info/RECORD +150 -0
- code_context_control-2.28.0.dist-info/WHEEL +5 -0
- code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
- code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
- code_context_control-2.28.0.dist-info/top_level.txt +5 -0
- core/__init__.py +75 -0
- core/config.py +269 -0
- core/ide.py +188 -0
- oracle/__init__.py +1 -0
- oracle/config.py +75 -0
- oracle/oracle.html +3900 -0
- oracle/oracle_server.py +663 -0
- oracle/services/__init__.py +1 -0
- oracle/services/c3_bridge.py +210 -0
- oracle/services/chat_engine.py +1103 -0
- oracle/services/chat_store.py +155 -0
- oracle/services/cross_memory.py +154 -0
- oracle/services/federated_graph.py +463 -0
- oracle/services/health_checker.py +117 -0
- oracle/services/insight_engine.py +307 -0
- oracle/services/memory_reader.py +106 -0
- oracle/services/memory_writer.py +182 -0
- oracle/services/ollama_bridge.py +332 -0
- oracle/services/project_scanner.py +87 -0
- oracle/services/review_agent.py +206 -0
- services/__init__.py +1 -0
- services/activity_log.py +93 -0
- services/agent_base.py +124 -0
- services/agents.py +1529 -0
- services/auto_memory.py +407 -0
- services/bench/__init__.py +6 -0
- services/bench/external/__init__.py +29 -0
- services/bench/external/aider_polyglot.py +405 -0
- services/bench/external/swe_bench.py +485 -0
- services/benchmark_dashboard.py +596 -0
- services/claude_md.py +785 -0
- services/compressor.py +592 -0
- services/context_snapshot.py +356 -0
- services/conversation_store.py +870 -0
- services/doc_index.py +537 -0
- services/e2e_benchmark.py +2884 -0
- services/e2e_evaluator.py +396 -0
- services/e2e_tasks.py +743 -0
- services/edit_ledger.py +459 -0
- services/embedding_index.py +341 -0
- services/error_reporting.py +123 -0
- services/file_memory.py +734 -0
- services/hub_service.py +585 -0
- services/indexer.py +712 -0
- services/memory.py +318 -0
- services/memory_consolidator.py +538 -0
- services/memory_graph.py +382 -0
- services/memory_grounder.py +304 -0
- services/memory_scorer.py +246 -0
- services/metrics.py +86 -0
- services/notifications.py +209 -0
- services/ollama_client.py +201 -0
- services/output_filter.py +488 -0
- services/parser.py +1238 -0
- services/project_manager.py +579 -0
- services/protocol.py +306 -0
- services/proxy_state.py +152 -0
- services/retrieval_broker.py +129 -0
- services/router.py +414 -0
- services/runtime.py +326 -0
- services/session_benchmark.py +1945 -0
- services/session_manager.py +1026 -0
- services/session_preloader.py +251 -0
- services/text_index.py +90 -0
- services/tool_classifier.py +176 -0
- services/transcript_index.py +340 -0
- services/validation_cache.py +155 -0
- services/vector_store.py +299 -0
- services/version_tracker.py +271 -0
- services/watcher.py +192 -0
- tui/__init__.py +0 -0
- tui/backend.py +59 -0
- tui/main.py +145 -0
- tui/screens/__init__.py +1 -0
- tui/screens/benchmark_view.py +109 -0
- tui/screens/claudemd_view.py +46 -0
- tui/screens/compress_view.py +52 -0
- tui/screens/index_view.py +74 -0
- tui/screens/init_view.py +82 -0
- tui/screens/mcp_view.py +73 -0
- tui/screens/optimize_view.py +41 -0
- tui/screens/pipe_view.py +46 -0
- tui/screens/projects_view.py +355 -0
- tui/screens/search_view.py +55 -0
- tui/screens/session_view.py +143 -0
- tui/screens/stats.py +158 -0
- tui/screens/ui_view.py +54 -0
- tui/theme.tcss +335 -0
services/parser.py
ADDED
|
@@ -0,0 +1,1238 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AST-based parsing service using Tree-sitter.
|
|
3
|
+
Provides unified structural extraction for multiple languages.
|
|
4
|
+
"""
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import tree_sitter
|
|
10
|
+
import tree_sitter_css
|
|
11
|
+
import tree_sitter_go
|
|
12
|
+
import tree_sitter_html
|
|
13
|
+
import tree_sitter_javascript
|
|
14
|
+
import tree_sitter_json
|
|
15
|
+
import tree_sitter_markdown
|
|
16
|
+
import tree_sitter_python
|
|
17
|
+
import tree_sitter_rust
|
|
18
|
+
import tree_sitter_typescript
|
|
19
|
+
import tree_sitter_yaml
|
|
20
|
+
HAS_TREE_SITTER = True
|
|
21
|
+
|
|
22
|
+
PY_LANGUAGE = tree_sitter.Language(tree_sitter_python.language())
|
|
23
|
+
JS_LANGUAGE = tree_sitter.Language(tree_sitter_javascript.language())
|
|
24
|
+
TS_LANGUAGE = tree_sitter.Language(tree_sitter_typescript.language_typescript())
|
|
25
|
+
TSX_LANGUAGE = tree_sitter.Language(tree_sitter_typescript.language_tsx())
|
|
26
|
+
HTML_LANGUAGE = tree_sitter.Language(tree_sitter_html.language())
|
|
27
|
+
MD_LANGUAGE = tree_sitter.Language(tree_sitter_markdown.language())
|
|
28
|
+
CSS_LANGUAGE = tree_sitter.Language(tree_sitter_css.language())
|
|
29
|
+
GO_LANGUAGE = tree_sitter.Language(tree_sitter_go.language())
|
|
30
|
+
RUST_LANGUAGE = tree_sitter.Language(tree_sitter_rust.language())
|
|
31
|
+
JSON_LANGUAGE = tree_sitter.Language(tree_sitter_json.language())
|
|
32
|
+
YAML_LANGUAGE = tree_sitter.Language(tree_sitter_yaml.language())
|
|
33
|
+
|
|
34
|
+
LANGUAGES = {
|
|
35
|
+
'.py': PY_LANGUAGE,
|
|
36
|
+
'.js': JS_LANGUAGE,
|
|
37
|
+
'.jsx': JS_LANGUAGE,
|
|
38
|
+
'.ts': TS_LANGUAGE,
|
|
39
|
+
'.tsx': TSX_LANGUAGE,
|
|
40
|
+
'.html': HTML_LANGUAGE,
|
|
41
|
+
'.htm': HTML_LANGUAGE,
|
|
42
|
+
'.md': MD_LANGUAGE,
|
|
43
|
+
'.css': CSS_LANGUAGE,
|
|
44
|
+
'.go': GO_LANGUAGE,
|
|
45
|
+
'.rs': RUST_LANGUAGE,
|
|
46
|
+
'.json': JSON_LANGUAGE,
|
|
47
|
+
'.yaml': YAML_LANGUAGE,
|
|
48
|
+
'.yml': YAML_LANGUAGE,
|
|
49
|
+
}
|
|
50
|
+
except ImportError:
|
|
51
|
+
HAS_TREE_SITTER = False
|
|
52
|
+
LANGUAGES = {}
|
|
53
|
+
|
|
54
|
+
# Bump this any time extract_sections_ast / _walk_* logic changes so that
|
|
55
|
+
# file_memory records extracted with an older version are force-refreshed.
|
|
56
|
+
PARSER_VERSION = "2"
|
|
57
|
+
|
|
58
|
+
def get_parser(ext: str) -> Optional['tree_sitter.Parser']:
|
|
59
|
+
if not HAS_TREE_SITTER or ext not in LANGUAGES:
|
|
60
|
+
return None
|
|
61
|
+
parser = tree_sitter.Parser(LANGUAGES[ext])
|
|
62
|
+
return parser
|
|
63
|
+
|
|
64
|
+
def extract_sections_ast(content: str, ext: str) -> Optional[List[Dict[str, Any]]]:
|
|
65
|
+
"""
|
|
66
|
+
Extract structural sections using Tree-sitter AST.
|
|
67
|
+
Returns None if language is not supported or parsing fails.
|
|
68
|
+
"""
|
|
69
|
+
parser = get_parser(ext)
|
|
70
|
+
if not parser:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
tree = parser.parse(content.encode('utf-8'))
|
|
75
|
+
lines = content.split('\n')
|
|
76
|
+
sections = []
|
|
77
|
+
|
|
78
|
+
if ext == '.py':
|
|
79
|
+
_walk_python(tree.root_node, lines, sections)
|
|
80
|
+
elif ext in ('.js', '.jsx', '.ts', '.tsx'):
|
|
81
|
+
_walk_js_ts(tree.root_node, lines, sections)
|
|
82
|
+
elif ext in ('.html', '.htm'):
|
|
83
|
+
_walk_html(tree.root_node, lines, sections)
|
|
84
|
+
elif ext == '.md':
|
|
85
|
+
_walk_markdown(tree.root_node, lines, sections)
|
|
86
|
+
elif ext == '.css':
|
|
87
|
+
_walk_css(tree.root_node, lines, sections)
|
|
88
|
+
elif ext == '.go':
|
|
89
|
+
_walk_go(tree.root_node, lines, sections)
|
|
90
|
+
elif ext == '.rs':
|
|
91
|
+
_walk_rust(tree.root_node, lines, sections)
|
|
92
|
+
elif ext == '.json':
|
|
93
|
+
_walk_json(tree.root_node, lines, sections)
|
|
94
|
+
elif ext in ('.yaml', '.yml'):
|
|
95
|
+
_walk_yaml(tree.root_node, lines, sections)
|
|
96
|
+
else:
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
return sections
|
|
100
|
+
except Exception:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
def _extract_docstring_python(node, lines: List[str]) -> Optional[str]:
|
|
104
|
+
# In Python, docstring is the first expression statement in a block
|
|
105
|
+
if node.type in ('function_definition', 'class_definition'):
|
|
106
|
+
body = next((child for child in node.children if child.type == 'block'), None)
|
|
107
|
+
if body and body.children and body.children[0].type == 'expression_statement':
|
|
108
|
+
expr = body.children[0]
|
|
109
|
+
if expr.children and expr.children[0].type == 'string':
|
|
110
|
+
doc = lines[expr.start_point[0]].strip()
|
|
111
|
+
if doc.startswith('"""') or doc.startswith("'''"):
|
|
112
|
+
quote = doc[:3]
|
|
113
|
+
if doc.endswith(quote) and len(doc) > 6:
|
|
114
|
+
return doc[3:-3].strip()
|
|
115
|
+
first = doc[3:].strip()
|
|
116
|
+
if first: return first
|
|
117
|
+
if expr.start_point[0] + 1 <= expr.end_point[0]:
|
|
118
|
+
return lines[expr.start_point[0] + 1].strip()
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
def _walk_python(node, lines: List[str], sections: List[Dict[str, Any]], parent_section=None):
|
|
122
|
+
for child in node.children:
|
|
123
|
+
if child.type == 'class_definition':
|
|
124
|
+
name_node = next((c for c in child.children if c.type == 'identifier'), None)
|
|
125
|
+
name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
|
|
126
|
+
|
|
127
|
+
sig_start = child.start_point[0]
|
|
128
|
+
# Find the colon
|
|
129
|
+
colon_node = next((c for c in child.children if c.type == ':'), None)
|
|
130
|
+
sig_end = colon_node.end_point[0] if colon_node else sig_start
|
|
131
|
+
signature = '\n'.join(lines[sig_start:sig_end+1]).strip()
|
|
132
|
+
|
|
133
|
+
section = {
|
|
134
|
+
"type": "class",
|
|
135
|
+
"name": name,
|
|
136
|
+
"line_start": child.start_point[0] + 1,
|
|
137
|
+
"line_end": child.end_point[0] + 1,
|
|
138
|
+
"signature": signature,
|
|
139
|
+
"children": []
|
|
140
|
+
}
|
|
141
|
+
doc = _extract_docstring_python(child, lines)
|
|
142
|
+
if doc: section["doc"] = doc
|
|
143
|
+
|
|
144
|
+
if parent_section:
|
|
145
|
+
parent_section["children"].append(section)
|
|
146
|
+
else:
|
|
147
|
+
sections.append(section)
|
|
148
|
+
|
|
149
|
+
body = next((c for c in child.children if c.type == 'block'), None)
|
|
150
|
+
if body:
|
|
151
|
+
_walk_python(body, lines, sections, section)
|
|
152
|
+
|
|
153
|
+
elif child.type == 'function_definition':
|
|
154
|
+
name_node = next((c for c in child.children if c.type == 'identifier'), None)
|
|
155
|
+
name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
|
|
156
|
+
|
|
157
|
+
sig_start = child.start_point[0]
|
|
158
|
+
colon_node = next((c for c in child.children if c.type == ':'), None)
|
|
159
|
+
sig_end = colon_node.end_point[0] if colon_node else sig_start
|
|
160
|
+
signature = '\n'.join(lines[sig_start:sig_end+1]).strip()
|
|
161
|
+
|
|
162
|
+
is_async = any(c.type == 'async' for c in child.children)
|
|
163
|
+
|
|
164
|
+
section = {
|
|
165
|
+
"type": "method" if parent_section and parent_section["type"] == "class" else "function",
|
|
166
|
+
"name": name,
|
|
167
|
+
"line_start": child.start_point[0] + 1,
|
|
168
|
+
"line_end": child.end_point[0] + 1,
|
|
169
|
+
"signature": signature
|
|
170
|
+
}
|
|
171
|
+
if is_async: section["async"] = True
|
|
172
|
+
|
|
173
|
+
doc = _extract_docstring_python(child, lines)
|
|
174
|
+
if doc: section["doc"] = doc
|
|
175
|
+
|
|
176
|
+
if parent_section:
|
|
177
|
+
parent_section["children"].append(section)
|
|
178
|
+
else:
|
|
179
|
+
sections.append(section)
|
|
180
|
+
|
|
181
|
+
elif child.type == 'expression_statement':
|
|
182
|
+
# Check for global constants (CAPITAL_NAME = value)
|
|
183
|
+
if not parent_section:
|
|
184
|
+
assign = next((c for c in child.children if c.type == 'assignment'), None)
|
|
185
|
+
if assign:
|
|
186
|
+
target = next((c for c in assign.children if c.type == 'identifier'), None)
|
|
187
|
+
if target:
|
|
188
|
+
name = lines[target.start_point[0]][target.start_point[1]:target.end_point[1]]
|
|
189
|
+
if name.isupper():
|
|
190
|
+
sections.append({
|
|
191
|
+
"type": "constant",
|
|
192
|
+
"name": name,
|
|
193
|
+
"line_start": child.start_point[0] + 1,
|
|
194
|
+
"line_end": child.end_point[0] + 1,
|
|
195
|
+
"signature": lines[child.start_point[0]].strip()
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
elif child.type == 'comment':
|
|
199
|
+
text = lines[child.start_point[0]][child.start_point[1]:child.end_point[1]]
|
|
200
|
+
if 'TODO' in text or 'FIXME' in text:
|
|
201
|
+
sections.append({
|
|
202
|
+
"type": "comment",
|
|
203
|
+
"name": text.lstrip('#').strip(),
|
|
204
|
+
"line_start": child.start_point[0] + 1,
|
|
205
|
+
"line_end": child.end_point[0] + 1,
|
|
206
|
+
"signature": text.strip()
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
elif child.type in ('import_statement', 'import_from_statement'):
|
|
210
|
+
section = {
|
|
211
|
+
"type": "import",
|
|
212
|
+
"name": lines[child.start_point[0]].strip(),
|
|
213
|
+
"line_start": child.start_point[0] + 1,
|
|
214
|
+
"line_end": child.end_point[0] + 1,
|
|
215
|
+
"signature": lines[child.start_point[0]].strip()
|
|
216
|
+
}
|
|
217
|
+
if not parent_section:
|
|
218
|
+
sections.append(section)
|
|
219
|
+
|
|
220
|
+
elif child.type == 'decorated_definition':
|
|
221
|
+
_walk_python(child, lines, sections, parent_section)
|
|
222
|
+
|
|
223
|
+
def _extract_docstring_js(node, lines: List[str]) -> Optional[str]:
|
|
224
|
+
# Look for previous sibling that is a comment
|
|
225
|
+
prev = node.prev_sibling
|
|
226
|
+
while prev and prev.type == 'comment':
|
|
227
|
+
comment = lines[prev.start_point[0]].strip()
|
|
228
|
+
if comment.startswith('/**'):
|
|
229
|
+
# simple single line extraction
|
|
230
|
+
cleaned = comment.lstrip('/*').rstrip('*/').strip()
|
|
231
|
+
if cleaned and cleaned != '*': return cleaned
|
|
232
|
+
if prev.start_point[0] + 1 <= prev.end_point[0]:
|
|
233
|
+
cleaned = lines[prev.start_point[0] + 1].strip().lstrip('*').strip()
|
|
234
|
+
if cleaned: return cleaned
|
|
235
|
+
prev = prev.prev_sibling
|
|
236
|
+
return None
|
|
237
|
+
|
|
238
|
+
def _walk_js_ts(node, lines: List[str], sections: List[Dict[str, Any]], parent_section=None):
|
|
239
|
+
for child in node.children:
|
|
240
|
+
if child.type in ('class_declaration', 'abstract_class_declaration', 'interface_declaration', 'type_alias_declaration', 'enum_declaration'):
|
|
241
|
+
name_node = next((c for c in child.children if c.type in ('identifier', 'type_identifier')), None)
|
|
242
|
+
name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
|
|
243
|
+
|
|
244
|
+
# Simple signature heuristic: first line
|
|
245
|
+
signature = lines[child.start_point[0]].strip()
|
|
246
|
+
|
|
247
|
+
t = child.type.split('_')[0]
|
|
248
|
+
section = {
|
|
249
|
+
"type": "class" if t == "class" else t,
|
|
250
|
+
"name": name,
|
|
251
|
+
"line_start": child.start_point[0] + 1,
|
|
252
|
+
"line_end": child.end_point[0] + 1,
|
|
253
|
+
"signature": signature,
|
|
254
|
+
"children": []
|
|
255
|
+
}
|
|
256
|
+
doc = _extract_docstring_js(child, lines)
|
|
257
|
+
if doc: section["doc"] = doc
|
|
258
|
+
|
|
259
|
+
if parent_section:
|
|
260
|
+
parent_section["children"].append(section)
|
|
261
|
+
else:
|
|
262
|
+
sections.append(section)
|
|
263
|
+
|
|
264
|
+
body = next((c for c in child.children if c.type in ('class_body', 'interface_body', 'enum_body', 'object_type')), None)
|
|
265
|
+
if body:
|
|
266
|
+
_walk_js_ts(body, lines, sections, section)
|
|
267
|
+
|
|
268
|
+
elif child.type in ('function_declaration', 'method_definition', 'public_field_definition', 'property_definition'):
|
|
269
|
+
name_node = next((c for c in child.children if c.type in ('property_identifier', 'identifier', 'private_property_identifier')), None)
|
|
270
|
+
name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
|
|
271
|
+
|
|
272
|
+
signature = lines[child.start_point[0]].strip()
|
|
273
|
+
|
|
274
|
+
is_async = any(c.type == 'async' for c in child.children)
|
|
275
|
+
|
|
276
|
+
# TS access modifiers can be direct children or inside a 'accessibility_modifier' node
|
|
277
|
+
access = None
|
|
278
|
+
for c in child.children:
|
|
279
|
+
if c.type in ('public', 'private', 'protected'):
|
|
280
|
+
access = c.type
|
|
281
|
+
break
|
|
282
|
+
if c.type == 'accessibility_modifier':
|
|
283
|
+
access = lines[c.start_point[0]][c.start_point[1]:c.end_point[1]]
|
|
284
|
+
break
|
|
285
|
+
|
|
286
|
+
stype = "method" if child.type == 'method_definition' else "function"
|
|
287
|
+
if child.type in ('public_field_definition', 'property_definition'):
|
|
288
|
+
stype = "property"
|
|
289
|
+
|
|
290
|
+
section = {
|
|
291
|
+
"type": stype,
|
|
292
|
+
"name": name,
|
|
293
|
+
"line_start": child.start_point[0] + 1,
|
|
294
|
+
"line_end": child.end_point[0] + 1,
|
|
295
|
+
"signature": signature
|
|
296
|
+
}
|
|
297
|
+
if is_async: section["async"] = True
|
|
298
|
+
if access: section["access"] = access
|
|
299
|
+
|
|
300
|
+
doc = _extract_docstring_js(child, lines)
|
|
301
|
+
if doc: section["doc"] = doc
|
|
302
|
+
|
|
303
|
+
if parent_section:
|
|
304
|
+
parent_section["children"].append(section)
|
|
305
|
+
else:
|
|
306
|
+
sections.append(section)
|
|
307
|
+
|
|
308
|
+
elif child.type == 'lexical_declaration' or child.type == 'variable_declaration':
|
|
309
|
+
# Check for constants/variables
|
|
310
|
+
decl = next((c for c in child.children if c.type == 'variable_declarator'), None)
|
|
311
|
+
if decl:
|
|
312
|
+
name_node = next((c for c in decl.children if c.type == 'identifier'), None)
|
|
313
|
+
value_node = next((c for c in decl.children if c.type == 'arrow_function'), None)
|
|
314
|
+
|
|
315
|
+
if name_node and value_node:
|
|
316
|
+
# Arrow function
|
|
317
|
+
name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]]
|
|
318
|
+
is_async = any(c.type == 'async' for c in value_node.children)
|
|
319
|
+
section = {
|
|
320
|
+
"type": "function",
|
|
321
|
+
"name": name,
|
|
322
|
+
"line_start": child.start_point[0] + 1,
|
|
323
|
+
"line_end": child.end_point[0] + 1,
|
|
324
|
+
"signature": lines[child.start_point[0]].strip()
|
|
325
|
+
}
|
|
326
|
+
if is_async: section["async"] = True
|
|
327
|
+
doc = _extract_docstring_js(child, lines)
|
|
328
|
+
if doc: section["doc"] = doc
|
|
329
|
+
|
|
330
|
+
if parent_section:
|
|
331
|
+
parent_section["children"].append(section)
|
|
332
|
+
else:
|
|
333
|
+
sections.append(section)
|
|
334
|
+
elif name_node and not parent_section:
|
|
335
|
+
# Global constant/variable
|
|
336
|
+
name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]]
|
|
337
|
+
sections.append({
|
|
338
|
+
"type": "constant" if "const" in lines[child.start_point[0]] else "variable",
|
|
339
|
+
"name": name,
|
|
340
|
+
"line_start": child.start_point[0] + 1,
|
|
341
|
+
"line_end": child.end_point[0] + 1,
|
|
342
|
+
"signature": lines[child.start_point[0]].strip()
|
|
343
|
+
})
|
|
344
|
+
|
|
345
|
+
elif child.type == 'comment':
|
|
346
|
+
text = lines[child.start_point[0]][child.start_point[1]:child.end_point[1]]
|
|
347
|
+
if 'TODO' in text or 'FIXME' in text:
|
|
348
|
+
sections.append({
|
|
349
|
+
"type": "comment",
|
|
350
|
+
"name": text.lstrip('/ ').strip(),
|
|
351
|
+
"line_start": child.start_point[0] + 1,
|
|
352
|
+
"line_end": child.end_point[0] + 1,
|
|
353
|
+
"signature": text.strip()
|
|
354
|
+
})
|
|
355
|
+
|
|
356
|
+
elif child.type == 'import_statement':
|
|
357
|
+
section = {
|
|
358
|
+
"type": "import",
|
|
359
|
+
"name": lines[child.start_point[0]].strip(),
|
|
360
|
+
"line_start": child.start_point[0] + 1,
|
|
361
|
+
"line_end": child.end_point[0] + 1,
|
|
362
|
+
"signature": lines[child.start_point[0]].strip()
|
|
363
|
+
}
|
|
364
|
+
if not parent_section:
|
|
365
|
+
sections.append(section)
|
|
366
|
+
|
|
367
|
+
elif child.type == 'export_statement':
|
|
368
|
+
# Drill down
|
|
369
|
+
decl = next((c for c in child.children if c.type != 'export' and c.type != 'default'), None)
|
|
370
|
+
if decl:
|
|
371
|
+
_walk_js_ts(decl, lines, sections, parent_section)
|
|
372
|
+
|
|
373
|
+
def _walk_html(node, lines: List[str], sections: List[Dict[str, Any]]):
|
|
374
|
+
"""Extract headings and elements with IDs from HTML AST."""
|
|
375
|
+
for child in node.children:
|
|
376
|
+
if child.type == 'element':
|
|
377
|
+
start_tag = next((c for c in child.children if c.type == 'start_tag'), None)
|
|
378
|
+
if start_tag:
|
|
379
|
+
tag_name_node = next((c for c in start_tag.children if c.type == 'tag_name'), None)
|
|
380
|
+
tag_name = lines[tag_name_node.start_point[0]][tag_name_node.start_point[1]:tag_name_node.end_point[1]].lower() if tag_name_node else ''
|
|
381
|
+
|
|
382
|
+
# Check for headings (h1-h6)
|
|
383
|
+
if tag_name in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
|
|
384
|
+
# Get text content of heading
|
|
385
|
+
text_content = ""
|
|
386
|
+
for subchild in child.children:
|
|
387
|
+
if subchild.type == 'text':
|
|
388
|
+
text_content += lines[subchild.start_point[0]][subchild.start_point[1]:subchild.end_point[1]]
|
|
389
|
+
|
|
390
|
+
sections.append({
|
|
391
|
+
"type": "heading",
|
|
392
|
+
"name": f"{tag_name}: {text_content.strip()[:60]}",
|
|
393
|
+
"line_start": child.start_point[0] + 1,
|
|
394
|
+
"line_end": child.end_point[0] + 1,
|
|
395
|
+
"signature": lines[child.start_point[0]].strip()
|
|
396
|
+
})
|
|
397
|
+
|
|
398
|
+
# Check for ID attribute
|
|
399
|
+
else:
|
|
400
|
+
id_attr = None
|
|
401
|
+
for subchild in start_tag.children:
|
|
402
|
+
if subchild.type == 'attribute':
|
|
403
|
+
attr_name_node = next((c for c in subchild.children if c.type == 'attribute_name'), None)
|
|
404
|
+
if attr_name_node:
|
|
405
|
+
attr_name = lines[attr_name_node.start_point[0]][attr_name_node.start_point[1]:attr_name_node.end_point[1]]
|
|
406
|
+
if attr_name == 'id':
|
|
407
|
+
val_node = next((c for c in subchild.children if c.type == 'attribute_value'), None)
|
|
408
|
+
if val_node:
|
|
409
|
+
id_attr = lines[val_node.start_point[0]][val_node.start_point[1]:val_node.end_point[1]].strip('"\'')
|
|
410
|
+
break
|
|
411
|
+
|
|
412
|
+
if id_attr:
|
|
413
|
+
sections.append({
|
|
414
|
+
"type": "section",
|
|
415
|
+
"name": f"#{id_attr} ({tag_name})",
|
|
416
|
+
"line_start": child.start_point[0] + 1,
|
|
417
|
+
"line_end": child.end_point[0] + 1,
|
|
418
|
+
"signature": lines[child.start_point[0]].strip()
|
|
419
|
+
})
|
|
420
|
+
|
|
421
|
+
# Recurse into element children
|
|
422
|
+
_walk_html(child, lines, sections)
|
|
423
|
+
else:
|
|
424
|
+
# Recurse into other nodes (like document)
|
|
425
|
+
_walk_html(child, lines, sections)
|
|
426
|
+
|
|
427
|
+
def _walk_markdown(node, lines: List[str], sections: List[Dict[str, Any]]):
|
|
428
|
+
"""Extract headings from Markdown AST."""
|
|
429
|
+
for child in node.children:
|
|
430
|
+
if child.type == 'atx_heading' or child.type == 'setext_heading':
|
|
431
|
+
heading_node = next((c for c in child.children if c.type == 'atx_h1_marker' or c.type == 'atx_h2_marker' or c.type == 'atx_h3_marker' or c.type == 'atx_h4_marker' or c.type == 'atx_h5_marker' or c.type == 'atx_h6_marker'), None)
|
|
432
|
+
level = "h1"
|
|
433
|
+
if heading_node:
|
|
434
|
+
marker = lines[heading_node.start_point[0]][heading_node.start_point[1]:heading_node.end_point[1]]
|
|
435
|
+
level = f"h{len(marker.strip())}"
|
|
436
|
+
|
|
437
|
+
# Extract content text
|
|
438
|
+
content_text = ""
|
|
439
|
+
for subchild in child.children:
|
|
440
|
+
if subchild.type in ('inline', 'text'):
|
|
441
|
+
content_text += lines[subchild.start_point[0]][subchild.start_point[1]:subchild.end_point[1]]
|
|
442
|
+
|
|
443
|
+
sections.append({
|
|
444
|
+
"type": "heading",
|
|
445
|
+
"name": f"{level}: {content_text.strip()[:60]}",
|
|
446
|
+
"line_start": child.start_point[0] + 1,
|
|
447
|
+
"line_end": child.end_point[0] + 1,
|
|
448
|
+
"signature": lines[child.start_point[0]].strip()
|
|
449
|
+
})
|
|
450
|
+
|
|
451
|
+
# Recurse if needed (atx headings are top level usually, but just in case)
|
|
452
|
+
_walk_markdown(child, lines, sections)
|
|
453
|
+
|
|
454
|
+
def _walk_css(node, lines: List[str], sections: List[Dict[str, Any]]):
|
|
455
|
+
"""Extract rulesets from CSS AST."""
|
|
456
|
+
for child in node.children:
|
|
457
|
+
if child.type == 'rule_set':
|
|
458
|
+
selector_node = next((c for c in child.children if c.type == 'selectors'), None)
|
|
459
|
+
selector = lines[selector_node.start_point[0]][selector_node.start_point[1]:selector_node.end_point[1]].strip() if selector_node else "Unknown"
|
|
460
|
+
|
|
461
|
+
sections.append({
|
|
462
|
+
"type": "section",
|
|
463
|
+
"name": selector[:60],
|
|
464
|
+
"line_start": child.start_point[0] + 1,
|
|
465
|
+
"line_end": child.end_point[0] + 1,
|
|
466
|
+
"signature": selector
|
|
467
|
+
})
|
|
468
|
+
elif child.type == 'media_statement':
|
|
469
|
+
query_node = next((c for c in child.children if c.type == 'media_query'), None)
|
|
470
|
+
query = lines[query_node.start_point[0]][query_node.start_point[1]:query_node.end_point[1]].strip() if query_node else "@media"
|
|
471
|
+
|
|
472
|
+
section = {
|
|
473
|
+
"type": "section",
|
|
474
|
+
"name": f"@media {query}",
|
|
475
|
+
"line_start": child.start_point[0] + 1,
|
|
476
|
+
"line_end": child.end_point[0] + 1,
|
|
477
|
+
"signature": query,
|
|
478
|
+
"children": []
|
|
479
|
+
}
|
|
480
|
+
sections.append(section)
|
|
481
|
+
# Find block
|
|
482
|
+
block = next((c for c in child.children if c.type == 'block'), None)
|
|
483
|
+
if block:
|
|
484
|
+
# We reuse walk_css but redirect results to children if we wanted nested
|
|
485
|
+
# For simplicity, we'll just keep them flat but maybe prefixed
|
|
486
|
+
pass
|
|
487
|
+
|
|
488
|
+
# Recurse
|
|
489
|
+
_walk_css(child, lines, sections)
|
|
490
|
+
|
|
491
|
+
def _walk_go(node, lines: List[str], sections: List[Dict[str, Any]]):
|
|
492
|
+
"""Extract functions, types, and methods from Go AST."""
|
|
493
|
+
for child in node.children:
|
|
494
|
+
if child.type in ('function_declaration', 'method_declaration'):
|
|
495
|
+
name_node = next((c for c in child.children if c.type == 'identifier' or c.type == 'field_identifier'), None)
|
|
496
|
+
name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
|
|
497
|
+
|
|
498
|
+
signature = lines[child.start_point[0]].strip()
|
|
499
|
+
|
|
500
|
+
sections.append({
|
|
501
|
+
"type": "function" if child.type == 'function_declaration' else "method",
|
|
502
|
+
"name": name,
|
|
503
|
+
"line_start": child.start_point[0] + 1,
|
|
504
|
+
"line_end": child.end_point[0] + 1,
|
|
505
|
+
"signature": signature
|
|
506
|
+
})
|
|
507
|
+
elif child.type == 'type_declaration':
|
|
508
|
+
# Drill into type specs
|
|
509
|
+
for spec in child.children:
|
|
510
|
+
if spec.type == 'type_spec':
|
|
511
|
+
name_node = next((c for c in spec.children if c.type == 'type_identifier'), None)
|
|
512
|
+
name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
|
|
513
|
+
sections.append({
|
|
514
|
+
"type": "type",
|
|
515
|
+
"name": name,
|
|
516
|
+
"line_start": child.start_point[0] + 1,
|
|
517
|
+
"line_end": child.end_point[0] + 1,
|
|
518
|
+
"signature": lines[child.start_point[0]].strip()
|
|
519
|
+
})
|
|
520
|
+
_walk_go(child, lines, sections)
|
|
521
|
+
|
|
522
|
+
def _walk_rust(node, lines: List[str], sections: List[Dict[str, Any]]):
|
|
523
|
+
"""Extract functions, structs, enums, and impls from Rust AST."""
|
|
524
|
+
for child in node.children:
|
|
525
|
+
if child.type in ('function_item', 'struct_item', 'enum_item', 'trait_item', 'impl_item'):
|
|
526
|
+
name_node = next((c for c in child.children if c.type in ('identifier', 'type_identifier')), None)
|
|
527
|
+
name = lines[name_node.start_point[0]][name_node.start_point[1]:name_node.end_point[1]] if name_node else 'Unknown'
|
|
528
|
+
|
|
529
|
+
if child.type == 'impl_item':
|
|
530
|
+
# For impl, the name is the type being implemented
|
|
531
|
+
type_node = next((c for c in child.children if c.type == 'type_identifier'), None)
|
|
532
|
+
if type_node:
|
|
533
|
+
name = f"impl {lines[type_node.start_point[0]][type_node.start_point[1]:type_node.end_point[1]]}"
|
|
534
|
+
|
|
535
|
+
stype = child.type.replace('_item', '')
|
|
536
|
+
sections.append({
|
|
537
|
+
"type": stype,
|
|
538
|
+
"name": name,
|
|
539
|
+
"line_start": child.start_point[0] + 1,
|
|
540
|
+
"line_end": child.end_point[0] + 1,
|
|
541
|
+
"signature": lines[child.start_point[0]].strip()
|
|
542
|
+
})
|
|
543
|
+
_walk_rust(child, lines, sections)
|
|
544
|
+
|
|
545
|
+
def _walk_json(node, lines: List[str], sections: List[Dict[str, Any]]):
|
|
546
|
+
"""Extract top-level keys from JSON AST."""
|
|
547
|
+
for child in node.children:
|
|
548
|
+
if child.type == 'object':
|
|
549
|
+
for pair in child.children:
|
|
550
|
+
if pair.type == 'pair':
|
|
551
|
+
key_node = next((c for c in pair.children if c.type == 'string'), None)
|
|
552
|
+
if key_node:
|
|
553
|
+
key = lines[key_node.start_point[0]][key_node.start_point[1]:key_node.end_point[1]].strip('"\'')
|
|
554
|
+
sections.append({
|
|
555
|
+
"type": "property",
|
|
556
|
+
"name": key,
|
|
557
|
+
"line_start": pair.start_point[0] + 1,
|
|
558
|
+
"line_end": pair.end_point[0] + 1,
|
|
559
|
+
"signature": key
|
|
560
|
+
})
|
|
561
|
+
# Typically only top level for mapping
|
|
562
|
+
break
|
|
563
|
+
|
|
564
|
+
def _walk_yaml(node, lines: List[str], sections: List[Dict[str, Any]]):
|
|
565
|
+
"""Extract top-level keys from YAML AST."""
|
|
566
|
+
for doc in node.children:
|
|
567
|
+
if doc.type == 'document':
|
|
568
|
+
block = next((c for c in doc.children if c.type == 'block_node'), None)
|
|
569
|
+
if block:
|
|
570
|
+
mapping = next((c for c in block.children if c.type == 'block_mapping'), None)
|
|
571
|
+
if mapping:
|
|
572
|
+
for pair in mapping.children:
|
|
573
|
+
if pair.type == 'block_mapping_pair':
|
|
574
|
+
key_node = next((c for c in pair.children if c.type == 'flow_node' or c.type == 'block_node'), None)
|
|
575
|
+
if key_node:
|
|
576
|
+
key = lines[key_node.start_point[0]][key_node.start_point[1]:key_node.end_point[1]].strip()
|
|
577
|
+
sections.append({
|
|
578
|
+
"type": "property",
|
|
579
|
+
"name": key,
|
|
580
|
+
"line_start": pair.start_point[0] + 1,
|
|
581
|
+
"line_end": pair.end_point[0] + 1,
|
|
582
|
+
"signature": key
|
|
583
|
+
})
|
|
584
|
+
# Typically only top level for mapping
|
|
585
|
+
break
|
|
586
|
+
|
|
587
|
+
def check_syntax_ast(content: str, ext: str) -> List[Dict[str, Any]]:
|
|
588
|
+
"""
|
|
589
|
+
Detect syntax errors using Tree-sitter.
|
|
590
|
+
Returns a list of errors with line numbers and descriptions.
|
|
591
|
+
"""
|
|
592
|
+
parser = get_parser(ext)
|
|
593
|
+
if not parser:
|
|
594
|
+
return []
|
|
595
|
+
|
|
596
|
+
try:
|
|
597
|
+
tree = parser.parse(content.encode("utf-8"))
|
|
598
|
+
errors = []
|
|
599
|
+
|
|
600
|
+
def _find_errors(node):
|
|
601
|
+
if node.type == "ERROR" or node.is_error:
|
|
602
|
+
errors.append({
|
|
603
|
+
"line": node.start_point[0] + 1,
|
|
604
|
+
"column": node.start_point[1],
|
|
605
|
+
"type": "syntax_error",
|
|
606
|
+
"text": f"Syntax error at line {node.start_point[0] + 1}, column {node.start_point[1]}"
|
|
607
|
+
})
|
|
608
|
+
|
|
609
|
+
# node.is_missing is available in some versions
|
|
610
|
+
try:
|
|
611
|
+
if hasattr(node, "is_missing") and node.is_missing:
|
|
612
|
+
errors.append({
|
|
613
|
+
"line": node.start_point[0] + 1,
|
|
614
|
+
"column": node.start_point[1],
|
|
615
|
+
"type": "missing_token",
|
|
616
|
+
"text": f"Missing expected token at line {node.start_point[0] + 1}"
|
|
617
|
+
})
|
|
618
|
+
except Exception: pass
|
|
619
|
+
|
|
620
|
+
for child in node.children:
|
|
621
|
+
_find_errors(child)
|
|
622
|
+
|
|
623
|
+
_find_errors(tree.root_node)
|
|
624
|
+
return errors
|
|
625
|
+
except Exception as e:
|
|
626
|
+
return [{"line": 1, "column": 0, "type": "parser_error", "text": str(e)}]
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
# ── Native syntax checking (no tree-sitter, no AI) ──────────────────
|
|
630
|
+
import os as _os
|
|
631
|
+
import subprocess as _subprocess
|
|
632
|
+
import sys as _sys
|
|
633
|
+
import tempfile as _tempfile
|
|
634
|
+
import threading as _threading
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def _kill_proc_tree(proc: _subprocess.Popen) -> None:
|
|
638
|
+
"""Kill a process and all its children (Windows-safe)."""
|
|
639
|
+
try:
|
|
640
|
+
if _sys.platform == "win32":
|
|
641
|
+
# taskkill /T kills the entire process tree — critical on Windows
|
|
642
|
+
# where Rscript spawns child R processes that outlive the parent.
|
|
643
|
+
_subprocess.run(
|
|
644
|
+
["taskkill", "/F", "/T", "/PID", str(proc.pid)],
|
|
645
|
+
capture_output=True, timeout=5,
|
|
646
|
+
creationflags=_subprocess.CREATE_NO_WINDOW,
|
|
647
|
+
)
|
|
648
|
+
else:
|
|
649
|
+
proc.kill()
|
|
650
|
+
proc.wait(timeout=3)
|
|
651
|
+
except Exception:
|
|
652
|
+
pass
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
def _err(line: int = 1, col: int = 0, text: str = "") -> Dict[str, Any]:
|
|
656
|
+
return {"line": max(1, int(line)), "column": max(0, int(col)), "text": str(text)}
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def _clean_text(text: str) -> str:
|
|
660
|
+
return str(text or "").replace("\x00", "").strip()
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
def _checker_name(ext: str) -> str:
|
|
664
|
+
return {
|
|
665
|
+
'.py': 'python_ast',
|
|
666
|
+
'.json': 'json',
|
|
667
|
+
'.yaml': 'pyyaml',
|
|
668
|
+
'.yml': 'pyyaml',
|
|
669
|
+
'.xml': 'elementtree',
|
|
670
|
+
'.svg': 'elementtree',
|
|
671
|
+
'.toml': 'tomllib',
|
|
672
|
+
'.html': 'lxml',
|
|
673
|
+
'.htm': 'lxml',
|
|
674
|
+
'.css': 'tinycss2',
|
|
675
|
+
'.js': 'node --check',
|
|
676
|
+
'.jsx': 'tsc',
|
|
677
|
+
'.ts': 'tsc',
|
|
678
|
+
'.tsx': 'tsc',
|
|
679
|
+
'.java': 'javac',
|
|
680
|
+
'.go': 'gofmt',
|
|
681
|
+
'.rs': 'rustc',
|
|
682
|
+
'.r': 'Rscript',
|
|
683
|
+
'.php': 'php -l',
|
|
684
|
+
'.rb': 'ruby -c',
|
|
685
|
+
'.pl': 'perl -c',
|
|
686
|
+
'.pm': 'perl -c',
|
|
687
|
+
'.lua': 'luac -p',
|
|
688
|
+
'.sh': 'bash -n',
|
|
689
|
+
'.bash': 'bash -n',
|
|
690
|
+
}.get((ext or "").lower(), (ext or "unknown").lstrip(".") or "unknown")
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
def _result(status: str, checker: str, errors: List[Dict[str, Any]] | None = None, detail: str = "") -> Dict[str, Any]:
|
|
694
|
+
return {
|
|
695
|
+
"status": status,
|
|
696
|
+
"checker": checker,
|
|
697
|
+
"errors": errors or [],
|
|
698
|
+
"detail": _clean_text(detail),
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
def _subproc_check(cmd: List[str], content: str, suffix: str, checker: str, timeout: int = 8) -> Dict[str, Any]:
|
|
703
|
+
"""Write content to a temp file and run a checker subprocess."""
|
|
704
|
+
fd, tmp = _tempfile.mkstemp(suffix=suffix)
|
|
705
|
+
try:
|
|
706
|
+
with _os.fdopen(fd, 'w', encoding='utf-8') as f:
|
|
707
|
+
f.write(content)
|
|
708
|
+
|
|
709
|
+
kwargs = {}
|
|
710
|
+
if _sys.platform == "win32":
|
|
711
|
+
kwargs["creationflags"] = _subprocess.CREATE_NO_WINDOW
|
|
712
|
+
|
|
713
|
+
r = _subprocess.run(cmd + [tmp], stdin=_subprocess.DEVNULL, capture_output=True, text=True, timeout=timeout, **kwargs)
|
|
714
|
+
return {
|
|
715
|
+
"status": "ok",
|
|
716
|
+
"checker": checker,
|
|
717
|
+
"returncode": r.returncode,
|
|
718
|
+
"stdout": r.stdout[:65536],
|
|
719
|
+
"stderr": r.stderr[:65536],
|
|
720
|
+
}
|
|
721
|
+
except FileNotFoundError:
|
|
722
|
+
return _result("checker_unavailable", checker, detail=f"{cmd[0]} is not installed or not on PATH.")
|
|
723
|
+
except PermissionError as e:
|
|
724
|
+
return _result("checker_failed", checker, detail=str(e) or f"{cmd[0]} access denied.")
|
|
725
|
+
except OSError as e:
|
|
726
|
+
return _result("checker_failed", checker, detail=str(e) or f"{cmd[0]} failed to start.")
|
|
727
|
+
except _subprocess.TimeoutExpired:
|
|
728
|
+
return _result("checker_timeout", checker, detail=f"{cmd[0]} exceeded {timeout}s.")
|
|
729
|
+
finally:
|
|
730
|
+
try:
|
|
731
|
+
_os.unlink(tmp)
|
|
732
|
+
except OSError:
|
|
733
|
+
pass
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def _native_python(content: str) -> Dict[str, Any]:
|
|
737
|
+
import ast
|
|
738
|
+
try:
|
|
739
|
+
ast.parse(content)
|
|
740
|
+
return _result("clean", "python_ast")
|
|
741
|
+
except SyntaxError as e:
|
|
742
|
+
return _result("syntax_error", "python_ast", [_err(e.lineno or 1, (e.offset or 1) - 1, e.msg)])
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
def _native_json(content: str) -> Dict[str, Any]:
|
|
746
|
+
import json
|
|
747
|
+
try:
|
|
748
|
+
json.loads(content)
|
|
749
|
+
return _result("clean", "json")
|
|
750
|
+
except json.JSONDecodeError as e:
|
|
751
|
+
return _result("syntax_error", "json", [_err(e.lineno, e.colno, e.msg)], detail=e.msg)
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
def _native_yaml(content: str) -> Dict[str, Any]:
|
|
755
|
+
try:
|
|
756
|
+
import yaml
|
|
757
|
+
# Consume all documents to catch errors in multi-document YAML files.
|
|
758
|
+
list(yaml.safe_load_all(content))
|
|
759
|
+
return _result("clean", "pyyaml")
|
|
760
|
+
except ImportError:
|
|
761
|
+
return _result("checker_unavailable", "pyyaml", detail="PyYAML is not installed.")
|
|
762
|
+
except Exception as e:
|
|
763
|
+
mark = getattr(e, 'problem_mark', None)
|
|
764
|
+
line = (mark.line + 1) if mark else 1
|
|
765
|
+
col = mark.column if mark else 0
|
|
766
|
+
return _result("syntax_error", "pyyaml", [_err(line, col, str(e))], detail=str(e))
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def _native_xml(content: str) -> Dict[str, Any]:
|
|
770
|
+
import xml.etree.ElementTree as ET
|
|
771
|
+
try:
|
|
772
|
+
ET.fromstring(content)
|
|
773
|
+
return _result("clean", "elementtree")
|
|
774
|
+
except ET.ParseError as e:
|
|
775
|
+
pos = getattr(e, 'position', None)
|
|
776
|
+
line, col = pos if pos else (1, 0)
|
|
777
|
+
return _result("syntax_error", "elementtree", [_err(line, col, str(e))], detail=str(e))
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
def _native_toml(content: str) -> Dict[str, Any]:
|
|
781
|
+
try:
|
|
782
|
+
try:
|
|
783
|
+
import tomllib # Python 3.11+
|
|
784
|
+
except ImportError:
|
|
785
|
+
import tomli as tomllib # type: ignore
|
|
786
|
+
tomllib.loads(content)
|
|
787
|
+
return _result("clean", "tomllib")
|
|
788
|
+
except ImportError:
|
|
789
|
+
return _result("checker_unavailable", "tomllib", detail="tomllib/tomli is not installed.")
|
|
790
|
+
except Exception as e:
|
|
791
|
+
m = re.search(r'line (\d+)', str(e))
|
|
792
|
+
return _result("syntax_error", "tomllib", [_err(int(m.group(1)) if m else 1, 0, str(e))], detail=str(e))
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
def _native_html(content: str) -> Dict[str, Any]:
|
|
796
|
+
# lxml false-positive filters for valid HTML5 constructs:
|
|
797
|
+
# - HTML_UNKNOWN_TAG: HTML5 semantic elements (nav, main, header, footer, etc.)
|
|
798
|
+
# - ERR_TAG_NAME_MISMATCH "script embeds close tag": JS code containing </script>
|
|
799
|
+
# - ERR_NAME_REQUIRED "htmlParseEntityRef: no name": unescaped & in code/text
|
|
800
|
+
_SKIP_TYPES = {"HTML_UNKNOWN_TAG", "ERR_NAME_REQUIRED"}
|
|
801
|
+
_SKIP_MSG_FRAGMENTS = {"script embeds close tag"}
|
|
802
|
+
|
|
803
|
+
def _is_false_positive(e) -> bool:
|
|
804
|
+
if e.type_name in _SKIP_TYPES:
|
|
805
|
+
return True
|
|
806
|
+
msg = e.message.lower()
|
|
807
|
+
return any(frag in msg for frag in _SKIP_MSG_FRAGMENTS)
|
|
808
|
+
|
|
809
|
+
try:
|
|
810
|
+
from lxml import etree # type: ignore
|
|
811
|
+
parser = etree.HTMLParser(recover=True)
|
|
812
|
+
etree.fromstring(content.encode('utf-8', errors='replace'), parser)
|
|
813
|
+
real_errors = [e for e in parser.error_log if not _is_false_positive(e)]
|
|
814
|
+
errors = [_err(e.line, e.column, e.message) for e in real_errors]
|
|
815
|
+
if errors:
|
|
816
|
+
return _result("syntax_error", "lxml", errors, detail=errors[0]["text"])
|
|
817
|
+
return _result("clean", "lxml")
|
|
818
|
+
except ImportError:
|
|
819
|
+
return _result("checker_unavailable", "lxml", detail="lxml is not installed.")
|
|
820
|
+
except Exception as e:
|
|
821
|
+
return _result("checker_failed", "lxml", detail=str(e))
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
def _native_css(content: str) -> Dict[str, Any]:
|
|
825
|
+
try:
|
|
826
|
+
import tinycss2 # type: ignore
|
|
827
|
+
rules = tinycss2.parse_stylesheet(content)
|
|
828
|
+
errors = [
|
|
829
|
+
_err(getattr(r, 'source_line', 1), 0, repr(r))
|
|
830
|
+
for r in rules if getattr(r, 'type', '') == 'error'
|
|
831
|
+
]
|
|
832
|
+
if errors:
|
|
833
|
+
return _result("syntax_error", "tinycss2", errors, detail=errors[0]["text"])
|
|
834
|
+
return _result("clean", "tinycss2")
|
|
835
|
+
except ImportError:
|
|
836
|
+
return _result("checker_unavailable", "tinycss2", detail="tinycss2 is not installed.")
|
|
837
|
+
except Exception as e:
|
|
838
|
+
return _result("checker_failed", "tinycss2", detail=str(e))
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
def _native_js(content: str) -> Dict[str, Any]:
|
|
842
|
+
proc = _subproc_check(["node", "--check"], content, ".js", "node --check")
|
|
843
|
+
if proc["status"] != "ok":
|
|
844
|
+
return proc
|
|
845
|
+
if proc["returncode"] == 0:
|
|
846
|
+
return _result("clean", "node --check")
|
|
847
|
+
errors = []
|
|
848
|
+
lines = _clean_text(proc["stderr"] + "\n" + proc["stdout"]).splitlines()
|
|
849
|
+
for i, line in enumerate(lines):
|
|
850
|
+
m = re.match(r'^.*:(\d+)$', line.strip())
|
|
851
|
+
if m and i + 1 < len(lines):
|
|
852
|
+
errors.append(_err(int(m.group(1)), 0, lines[i + 1].strip()))
|
|
853
|
+
if errors:
|
|
854
|
+
return _result("syntax_error", "node --check", errors, detail=errors[0]["text"])
|
|
855
|
+
detail = lines[0] if lines else f"node --check returned {proc['returncode']}."
|
|
856
|
+
return _result("checker_failed", "node --check", detail=detail)
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
def _native_ts(content: str) -> Dict[str, Any]:
|
|
860
|
+
proc = _subproc_check(
|
|
861
|
+
["tsc", "--noEmit", "--target", "ES2020", "--isolatedModules", "--skipLibCheck"],
|
|
862
|
+
content, ".ts", "tsc", timeout=15,
|
|
863
|
+
)
|
|
864
|
+
if proc["status"] == "checker_unavailable":
|
|
865
|
+
fallback = _native_js(content)
|
|
866
|
+
if fallback["status"] in {"clean", "syntax_error"}:
|
|
867
|
+
fallback["detail"] = _clean_text(
|
|
868
|
+
"tsc unavailable; fell back to node --check syntax validation. " + fallback.get("detail", "")
|
|
869
|
+
)
|
|
870
|
+
return fallback
|
|
871
|
+
if proc["status"] != "ok":
|
|
872
|
+
return proc
|
|
873
|
+
if proc["returncode"] == 0:
|
|
874
|
+
return _result("clean", "tsc")
|
|
875
|
+
errors = []
|
|
876
|
+
for line in (proc["stderr"] + proc["stdout"]).splitlines():
|
|
877
|
+
m = re.match(r'^.*\((\d+),(\d+)\):\s*error\s+\w+:\s*(.+)$', line)
|
|
878
|
+
if m:
|
|
879
|
+
errors.append(_err(int(m.group(1)), int(m.group(2)) - 1, m.group(3)))
|
|
880
|
+
if errors:
|
|
881
|
+
return _result("syntax_error", "tsc", errors, detail=errors[0]["text"])
|
|
882
|
+
return _result("checker_failed", "tsc", detail=f"tsc returned {proc['returncode']}.")
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
def _native_jsx(content: str) -> Dict[str, Any]:
|
|
886
|
+
proc = _subproc_check(
|
|
887
|
+
["tsc", "--noEmit", "--jsx", "react", "--allowJs", "--isolatedModules",
|
|
888
|
+
"--skipLibCheck", "--target", "ES2020"],
|
|
889
|
+
content, ".jsx", "tsc", timeout=15,
|
|
890
|
+
)
|
|
891
|
+
if proc["status"] == "checker_unavailable":
|
|
892
|
+
return _result("unsupported", "tsc",
|
|
893
|
+
detail="tsc not found; node --check cannot validate JSX syntax.")
|
|
894
|
+
if proc["status"] != "ok":
|
|
895
|
+
return proc
|
|
896
|
+
if proc["returncode"] == 0:
|
|
897
|
+
return _result("clean", "tsc")
|
|
898
|
+
errors = []
|
|
899
|
+
for line in (proc["stderr"] + proc["stdout"]).splitlines():
|
|
900
|
+
m = re.match(r'^.*\((\d+),(\d+)\):\s*error\s+\w+:\s*(.+)$', line)
|
|
901
|
+
if m:
|
|
902
|
+
errors.append(_err(int(m.group(1)), int(m.group(2)) - 1, m.group(3)))
|
|
903
|
+
if errors:
|
|
904
|
+
return _result("syntax_error", "tsc", errors, detail=errors[0]["text"])
|
|
905
|
+
return _result("checker_failed", "tsc", detail=f"tsc returned {proc['returncode']}.")
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
def _native_tsx(content: str) -> Dict[str, Any]:
|
|
909
|
+
proc = _subproc_check(
|
|
910
|
+
["tsc", "--noEmit", "--jsx", "react", "--isolatedModules",
|
|
911
|
+
"--skipLibCheck", "--target", "ES2020"],
|
|
912
|
+
content, ".tsx", "tsc", timeout=15,
|
|
913
|
+
)
|
|
914
|
+
if proc["status"] == "checker_unavailable":
|
|
915
|
+
fallback = _native_js(content)
|
|
916
|
+
if fallback["status"] in {"clean", "syntax_error"}:
|
|
917
|
+
fallback["detail"] = _clean_text(
|
|
918
|
+
"tsc unavailable; fell back to node --check (JSX not fully validated). "
|
|
919
|
+
+ fallback.get("detail", "")
|
|
920
|
+
)
|
|
921
|
+
return fallback
|
|
922
|
+
if proc["status"] != "ok":
|
|
923
|
+
return proc
|
|
924
|
+
if proc["returncode"] == 0:
|
|
925
|
+
return _result("clean", "tsc")
|
|
926
|
+
errors = []
|
|
927
|
+
for line in (proc["stderr"] + proc["stdout"]).splitlines():
|
|
928
|
+
m = re.match(r'^.*\((\d+),(\d+)\):\s*error\s+\w+:\s*(.+)$', line)
|
|
929
|
+
if m:
|
|
930
|
+
errors.append(_err(int(m.group(1)), int(m.group(2)) - 1, m.group(3)))
|
|
931
|
+
if errors:
|
|
932
|
+
return _result("syntax_error", "tsc", errors, detail=errors[0]["text"])
|
|
933
|
+
return _result("checker_failed", "tsc", detail=f"tsc returned {proc['returncode']}.")
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
def _native_java(content: str) -> Dict[str, Any]:
|
|
937
|
+
proc = _subproc_check(
|
|
938
|
+
["javac", "-proc:none", "-source", "11", "-encoding", "UTF-8"],
|
|
939
|
+
content, ".java", "javac", timeout=15,
|
|
940
|
+
)
|
|
941
|
+
if proc["status"] != "ok":
|
|
942
|
+
return proc
|
|
943
|
+
if proc["returncode"] == 0:
|
|
944
|
+
return _result("clean", "javac")
|
|
945
|
+
# Filter to syntax-only errors — javac also reports type/import errors which
|
|
946
|
+
# are not syntax issues (e.g. "cannot find symbol", "package does not exist").
|
|
947
|
+
_JAVAC_SEMANTIC_PATTERNS = {
|
|
948
|
+
"should be declared in", "cannot find symbol", "package does not exist",
|
|
949
|
+
"cannot access", "incompatible types", "is not abstract",
|
|
950
|
+
"has private access", "is already defined", "unreported exception",
|
|
951
|
+
"non-static method", "non-static variable",
|
|
952
|
+
}
|
|
953
|
+
errors = []
|
|
954
|
+
for line in (proc["stderr"] + proc["stdout"]).splitlines():
|
|
955
|
+
m = re.match(r'^.*:(\d+):\s*error:\s*(.+)$', line)
|
|
956
|
+
if m and not any(p in m.group(2) for p in _JAVAC_SEMANTIC_PATTERNS):
|
|
957
|
+
errors.append(_err(int(m.group(1)), 0, m.group(2)))
|
|
958
|
+
if errors:
|
|
959
|
+
return _result("syntax_error", "javac", errors, detail=errors[0]["text"])
|
|
960
|
+
if proc["returncode"] != 0:
|
|
961
|
+
# All errors were semantic (imports, types) — syntax is likely fine.
|
|
962
|
+
return _result("clean", "javac", detail="Syntax OK; semantic errors (imports/types) were ignored.")
|
|
963
|
+
return _result("clean", "javac")
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
def _native_go(content: str) -> Dict[str, Any]:
|
|
967
|
+
proc = _subproc_check(["gofmt", "-e"], content, ".go", "gofmt")
|
|
968
|
+
if proc["status"] != "ok":
|
|
969
|
+
return proc
|
|
970
|
+
if proc["returncode"] == 0:
|
|
971
|
+
return _result("clean", "gofmt")
|
|
972
|
+
errors = []
|
|
973
|
+
for line in (proc["stderr"] + proc["stdout"]).splitlines():
|
|
974
|
+
m = re.match(r'^.*:(\d+):(\d+):\s*(.+)$', line)
|
|
975
|
+
if m:
|
|
976
|
+
errors.append(_err(int(m.group(1)), int(m.group(2)) - 1, m.group(3)))
|
|
977
|
+
if errors:
|
|
978
|
+
return _result("syntax_error", "gofmt", errors, detail=errors[0]["text"])
|
|
979
|
+
return _result("checker_failed", "gofmt", detail=f"gofmt returned {proc['returncode']}.")
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
def _native_rust(content: str) -> Dict[str, Any]:
|
|
983
|
+
with _tempfile.TemporaryDirectory() as tmpdir:
|
|
984
|
+
rs_path = _os.path.join(tmpdir, "check.rs")
|
|
985
|
+
with open(rs_path, 'w', encoding='utf-8') as f:
|
|
986
|
+
f.write(content)
|
|
987
|
+
try:
|
|
988
|
+
kwargs = {}
|
|
989
|
+
if _sys.platform == "win32":
|
|
990
|
+
kwargs["creationflags"] = _subprocess.CREATE_NO_WINDOW
|
|
991
|
+
r = _subprocess.run(
|
|
992
|
+
["rustc", "--edition", "2021", "--emit=metadata", "--out-dir", tmpdir, rs_path],
|
|
993
|
+
capture_output=True, text=True, timeout=15,
|
|
994
|
+
**kwargs
|
|
995
|
+
)
|
|
996
|
+
except FileNotFoundError:
|
|
997
|
+
return _result("checker_unavailable", "rustc", detail="rustc is not installed or not on PATH.")
|
|
998
|
+
except PermissionError as e:
|
|
999
|
+
return _result("checker_failed", "rustc", detail=str(e) or "rustc access denied.")
|
|
1000
|
+
except _subprocess.TimeoutExpired:
|
|
1001
|
+
return _result("checker_timeout", "rustc", detail="rustc exceeded 15s.")
|
|
1002
|
+
except OSError as e:
|
|
1003
|
+
return _result("checker_failed", "rustc", detail=str(e) or "rustc failed to start.")
|
|
1004
|
+
if r.returncode == 0:
|
|
1005
|
+
return _result("clean", "rustc")
|
|
1006
|
+
errors, lines = [], (r.stderr + r.stdout).splitlines()
|
|
1007
|
+
for i, line in enumerate(lines):
|
|
1008
|
+
m = re.match(r'^error(?:\[E\d+\])?: (.+)$', line)
|
|
1009
|
+
if m:
|
|
1010
|
+
for j in range(i + 1, min(i + 5, len(lines))):
|
|
1011
|
+
loc = re.match(r'^\s*--> [^:]+:(\d+):(\d+)', lines[j])
|
|
1012
|
+
if loc:
|
|
1013
|
+
errors.append(_err(int(loc.group(1)), int(loc.group(2)) - 1, m.group(1)))
|
|
1014
|
+
break
|
|
1015
|
+
if errors:
|
|
1016
|
+
return _result("syntax_error", "rustc", errors, detail=errors[0]["text"])
|
|
1017
|
+
detail = lines[0] if lines else f"rustc returned {r.returncode}."
|
|
1018
|
+
return _result("checker_failed", "rustc", detail=detail)
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
def _native_r(content: str) -> Dict[str, Any]:
|
|
1022
|
+
# Write to a temp file so parse(file=...) gets the full content reliably.
|
|
1023
|
+
# parse(stdin()) silently truncates large files and misses errors in
|
|
1024
|
+
# complex multi-line constructs (e.g. knitr::knit_child() string args).
|
|
1025
|
+
_R_TIMEOUT = 20 # Rscript cold-start on Windows is 3-8s; 20s is generous.
|
|
1026
|
+
with _tempfile.NamedTemporaryFile(
|
|
1027
|
+
mode='w', suffix='.R', delete=False, encoding='utf-8'
|
|
1028
|
+
) as tmp:
|
|
1029
|
+
tmp.write(content)
|
|
1030
|
+
tmp_path = tmp.name
|
|
1031
|
+
proc = None
|
|
1032
|
+
try:
|
|
1033
|
+
kwargs = {}
|
|
1034
|
+
if _sys.platform == "win32":
|
|
1035
|
+
kwargs["creationflags"] = _subprocess.CREATE_NO_WINDOW
|
|
1036
|
+
# Use forward slashes — R on Windows accepts them and avoids backslash escaping issues.
|
|
1037
|
+
# Escape single quotes for safe embedding in R string literal.
|
|
1038
|
+
r_path = tmp_path.replace('\\', '/').replace("'", "\\'")
|
|
1039
|
+
proc = _subprocess.Popen(
|
|
1040
|
+
["Rscript", "--vanilla", "-e",
|
|
1041
|
+
f"tryCatch({{parse(file='{r_path}');cat('OK\\n')}},error=function(e){{cat('ERROR:',conditionMessage(e),'\\n')}})"],
|
|
1042
|
+
stdin=_subprocess.DEVNULL,
|
|
1043
|
+
stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, text=True,
|
|
1044
|
+
**kwargs
|
|
1045
|
+
)
|
|
1046
|
+
try:
|
|
1047
|
+
stdout, stderr = proc.communicate(timeout=_R_TIMEOUT)
|
|
1048
|
+
except _subprocess.TimeoutExpired:
|
|
1049
|
+
_kill_proc_tree(proc)
|
|
1050
|
+
return _result("checker_timeout", "Rscript", detail=f"Rscript exceeded {_R_TIMEOUT}s.")
|
|
1051
|
+
output = (stdout or "") + (stderr or "")
|
|
1052
|
+
output = output[:65536]
|
|
1053
|
+
if "ERROR:" in output:
|
|
1054
|
+
text = re.sub(r'^ERROR:\s*', '', output.strip())
|
|
1055
|
+
# R reports "file:line:col: message" — extract line and column.
|
|
1056
|
+
m = re.search(r':(\d+):(\d+):', text)
|
|
1057
|
+
if m:
|
|
1058
|
+
line, col = int(m.group(1)), int(m.group(2))
|
|
1059
|
+
else:
|
|
1060
|
+
lm = re.search(r'line (\d+)', text)
|
|
1061
|
+
line, col = (int(lm.group(1)) if lm else 1), 0
|
|
1062
|
+
return _result("syntax_error", "Rscript", [_err(line, col, text)], detail=text)
|
|
1063
|
+
if proc.returncode != 0 and "OK" not in output:
|
|
1064
|
+
detail = (output.strip() or f"Rscript exited with code {proc.returncode}")
|
|
1065
|
+
return _result("checker_failed", "Rscript", detail=detail)
|
|
1066
|
+
return _result("clean", "Rscript")
|
|
1067
|
+
except FileNotFoundError:
|
|
1068
|
+
return _result("checker_unavailable", "Rscript", detail="Rscript is not installed or not on PATH.")
|
|
1069
|
+
except PermissionError as e:
|
|
1070
|
+
return _result("checker_failed", "Rscript", detail=str(e) or "Rscript access denied.")
|
|
1071
|
+
except OSError as e:
|
|
1072
|
+
return _result("checker_failed", "Rscript", detail=str(e) or "Rscript failed to start.")
|
|
1073
|
+
finally:
|
|
1074
|
+
if proc and proc.poll() is None:
|
|
1075
|
+
_kill_proc_tree(proc)
|
|
1076
|
+
try:
|
|
1077
|
+
_os.unlink(tmp_path)
|
|
1078
|
+
except OSError:
|
|
1079
|
+
pass
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
def _native_php(content: str) -> Dict[str, Any]:
|
|
1083
|
+
proc = _subproc_check(["php", "-l"], content, ".php", "php -l")
|
|
1084
|
+
if proc["status"] != "ok":
|
|
1085
|
+
return proc
|
|
1086
|
+
if proc["returncode"] == 0:
|
|
1087
|
+
return _result("clean", "php -l")
|
|
1088
|
+
output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
|
|
1089
|
+
for line in output.splitlines():
|
|
1090
|
+
m = re.match(r'^.*error:.*in\s+\S+\s+on line\s+(\d+)', line, re.IGNORECASE)
|
|
1091
|
+
if m:
|
|
1092
|
+
return _result("syntax_error", "php -l", [_err(int(m.group(1)), 0, line.strip())], detail=line.strip())
|
|
1093
|
+
return _result("syntax_error", "php -l", [_err(1, 0, output)], detail=output)
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
def _native_ruby(content: str) -> Dict[str, Any]:
|
|
1097
|
+
proc = _subproc_check(["ruby", "-c"], content, ".rb", "ruby -c")
|
|
1098
|
+
if proc["status"] != "ok":
|
|
1099
|
+
return proc
|
|
1100
|
+
if proc["returncode"] == 0:
|
|
1101
|
+
return _result("clean", "ruby -c")
|
|
1102
|
+
output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
|
|
1103
|
+
for line in output.splitlines():
|
|
1104
|
+
m = re.match(r'^.*:(\d+):\s*(.+)$', line)
|
|
1105
|
+
if m:
|
|
1106
|
+
return _result("syntax_error", "ruby -c", [_err(int(m.group(1)), 0, m.group(2))], detail=m.group(2))
|
|
1107
|
+
return _result("syntax_error", "ruby -c", [_err(1, 0, output)], detail=output)
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
def _native_perl(content: str) -> Dict[str, Any]:
|
|
1111
|
+
# Note: perl -c runs BEGIN blocks — use with caution on untrusted code.
|
|
1112
|
+
proc = _subproc_check(["perl", "-c"], content, ".pl", "perl -c")
|
|
1113
|
+
if proc["status"] != "ok":
|
|
1114
|
+
return proc
|
|
1115
|
+
if proc["returncode"] == 0:
|
|
1116
|
+
return _result("clean", "perl -c")
|
|
1117
|
+
output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
|
|
1118
|
+
for line in output.splitlines():
|
|
1119
|
+
m = re.match(r'^.*at\s+\S+\s+line\s+(\d+)', line)
|
|
1120
|
+
if m:
|
|
1121
|
+
return _result("syntax_error", "perl -c", [_err(int(m.group(1)), 0, line.strip())], detail=line.strip())
|
|
1122
|
+
return _result("syntax_error", "perl -c", [_err(1, 0, output)], detail=output)
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
def _native_lua(content: str) -> Dict[str, Any]:
|
|
1126
|
+
proc = _subproc_check(["luac", "-p"], content, ".lua", "luac -p")
|
|
1127
|
+
if proc["status"] != "ok":
|
|
1128
|
+
return proc
|
|
1129
|
+
if proc["returncode"] == 0:
|
|
1130
|
+
return _result("clean", "luac -p")
|
|
1131
|
+
output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
|
|
1132
|
+
for line in output.splitlines():
|
|
1133
|
+
m = re.match(r'^.*:(\d+):\s*(.+)$', line)
|
|
1134
|
+
if m:
|
|
1135
|
+
return _result("syntax_error", "luac -p", [_err(int(m.group(1)), 0, m.group(2))], detail=m.group(2))
|
|
1136
|
+
return _result("syntax_error", "luac -p", [_err(1, 0, output)], detail=output)
|
|
1137
|
+
|
|
1138
|
+
|
|
1139
|
+
def _native_shell(content: str) -> Dict[str, Any]:
|
|
1140
|
+
proc = _subproc_check(["bash", "-n"], content, ".sh", "bash -n")
|
|
1141
|
+
if proc["status"] != "ok":
|
|
1142
|
+
return proc
|
|
1143
|
+
if proc["returncode"] == 0:
|
|
1144
|
+
return _result("clean", "bash -n")
|
|
1145
|
+
errors = []
|
|
1146
|
+
output = _clean_text(proc["stderr"] + "\n" + proc["stdout"])
|
|
1147
|
+
for line in output.splitlines():
|
|
1148
|
+
m = re.match(r'^.*: line (\d+): (.+)$', line)
|
|
1149
|
+
if m:
|
|
1150
|
+
errors.append(_err(int(m.group(1)), 0, m.group(2)))
|
|
1151
|
+
if errors:
|
|
1152
|
+
return _result("syntax_error", "bash -n", errors, detail=errors[0]["text"])
|
|
1153
|
+
detail = output or f"bash -n returned {proc['returncode']}."
|
|
1154
|
+
return _result("checker_failed", "bash -n", detail=detail)
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
_NATIVE_DISPATCH = {
|
|
1158
|
+
'.py': _native_python,
|
|
1159
|
+
'.json': _native_json,
|
|
1160
|
+
'.yaml': _native_yaml, '.yml': _native_yaml,
|
|
1161
|
+
'.xml': _native_xml, '.svg': _native_xml,
|
|
1162
|
+
'.toml': _native_toml,
|
|
1163
|
+
'.html': _native_html, '.htm': _native_html,
|
|
1164
|
+
'.css': _native_css,
|
|
1165
|
+
'.js': _native_js, '.jsx': _native_jsx,
|
|
1166
|
+
'.ts': _native_ts, '.tsx': _native_tsx,
|
|
1167
|
+
'.java': _native_java,
|
|
1168
|
+
'.go': _native_go,
|
|
1169
|
+
'.rs': _native_rust,
|
|
1170
|
+
'.r': _native_r,
|
|
1171
|
+
'.php': _native_php,
|
|
1172
|
+
'.rb': _native_ruby,
|
|
1173
|
+
'.pl': _native_perl, '.pm': _native_perl,
|
|
1174
|
+
'.lua': _native_lua,
|
|
1175
|
+
'.sh': _native_shell, '.bash': _native_shell,
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
def check_syntax_native(content: str, ext: str) -> Dict[str, Any]:
|
|
1180
|
+
"""Syntax-check using native parsers/compilers and return a structured result."""
|
|
1181
|
+
normalized_ext = (ext or "").lower()
|
|
1182
|
+
fn = _NATIVE_DISPATCH.get(normalized_ext)
|
|
1183
|
+
if not fn:
|
|
1184
|
+
return _result(
|
|
1185
|
+
"unsupported",
|
|
1186
|
+
_checker_name(normalized_ext),
|
|
1187
|
+
detail=f"No native syntax checker is registered for '{ext or '[no extension]'}'.",
|
|
1188
|
+
)
|
|
1189
|
+
try:
|
|
1190
|
+
result = fn(content)
|
|
1191
|
+
if not isinstance(result, dict) or "status" not in result:
|
|
1192
|
+
return _result("checker_failed", _checker_name(normalized_ext), detail="Checker returned an invalid result.")
|
|
1193
|
+
return result
|
|
1194
|
+
except Exception as e:
|
|
1195
|
+
return _result("checker_failed", _checker_name(normalized_ext), detail=f"Checker error: {e}")
|
|
1196
|
+
|
|
1197
|
+
|
|
1198
|
+
def check_syntax_native_with_timeout(content: str, ext: str, timeout_seconds: int = 35) -> Dict[str, Any]:
|
|
1199
|
+
"""Run native syntax validation with a hard timeout using a daemon thread.
|
|
1200
|
+
|
|
1201
|
+
Uses threading instead of multiprocessing to avoid the Windows spawn
|
|
1202
|
+
deadlock (re-importing tree_sitter bindings in a fresh process blocked
|
|
1203
|
+
the MCP server's stdio thread indefinitely). Individual subprocess-based
|
|
1204
|
+
checkers already carry their own timeouts; this wrapper provides a final
|
|
1205
|
+
safety net for pure-Python checkers that could theoretically loop.
|
|
1206
|
+
"""
|
|
1207
|
+
normalized_ext = (ext or "").lower()
|
|
1208
|
+
if normalized_ext not in _NATIVE_DISPATCH:
|
|
1209
|
+
return check_syntax_native(content, normalized_ext)
|
|
1210
|
+
|
|
1211
|
+
timeout_seconds = max(1, int(timeout_seconds or 12))
|
|
1212
|
+
|
|
1213
|
+
result_holder: list = [None]
|
|
1214
|
+
|
|
1215
|
+
def _run() -> None:
|
|
1216
|
+
try:
|
|
1217
|
+
result_holder[0] = check_syntax_native(content, normalized_ext)
|
|
1218
|
+
except Exception as e:
|
|
1219
|
+
result_holder[0] = _result(
|
|
1220
|
+
"checker_failed",
|
|
1221
|
+
_checker_name(normalized_ext),
|
|
1222
|
+
detail=f"Validation worker crashed: {e}",
|
|
1223
|
+
)
|
|
1224
|
+
|
|
1225
|
+
t = _threading.Thread(target=_run, daemon=True)
|
|
1226
|
+
t.start()
|
|
1227
|
+
t.join(timeout_seconds)
|
|
1228
|
+
|
|
1229
|
+
if t.is_alive():
|
|
1230
|
+
return _result(
|
|
1231
|
+
"checker_timeout",
|
|
1232
|
+
_checker_name(normalized_ext),
|
|
1233
|
+
detail=f"Validation exceeded {timeout_seconds}s and was terminated.",
|
|
1234
|
+
)
|
|
1235
|
+
|
|
1236
|
+
if not isinstance(result_holder[0], dict):
|
|
1237
|
+
return _result("checker_failed", _checker_name(normalized_ext), detail="Validation worker returned no result.")
|
|
1238
|
+
return result_holder[0]
|