code2logic 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code2logic/__init__.py +88 -0
- code2logic/analyzer.py +286 -0
- code2logic/cli.py +222 -0
- code2logic/dependency.py +246 -0
- code2logic/generators.py +1017 -0
- code2logic/gherkin.py +980 -0
- code2logic/intent.py +246 -0
- code2logic/llm.py +449 -0
- code2logic/mcp_server.py +354 -0
- code2logic/models.py +170 -0
- code2logic/parsers.py +908 -0
- code2logic/py.typed +2 -0
- code2logic/similarity.py +165 -0
- code2logic-1.0.0.dist-info/METADATA +322 -0
- code2logic-1.0.0.dist-info/RECORD +18 -0
- code2logic-1.0.0.dist-info/WHEEL +4 -0
- code2logic-1.0.0.dist-info/entry_points.txt +3 -0
- code2logic-1.0.0.dist-info/licenses/LICENSE +201 -0
code2logic/parsers.py
ADDED
|
@@ -0,0 +1,908 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Code parsers for multiple languages.
|
|
3
|
+
|
|
4
|
+
Includes:
|
|
5
|
+
- TreeSitterParser: High-accuracy AST parsing using Tree-sitter
|
|
6
|
+
- UniversalParser: Fallback regex/AST parser for environments without Tree-sitter
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import ast
|
|
10
|
+
import re
|
|
11
|
+
from typing import Optional, List
|
|
12
|
+
|
|
13
|
+
from .models import FunctionInfo, ClassInfo, TypeInfo, ModuleInfo
|
|
14
|
+
from .intent import EnhancedIntentGenerator
|
|
15
|
+
|
|
16
|
+
# Optional Tree-sitter imports
|
|
17
|
+
TREE_SITTER_AVAILABLE = False
|
|
18
|
+
try:
|
|
19
|
+
import tree_sitter_python as tspython
|
|
20
|
+
import tree_sitter_javascript as tsjavascript
|
|
21
|
+
from tree_sitter import Language, Parser
|
|
22
|
+
TREE_SITTER_AVAILABLE = True
|
|
23
|
+
except ImportError:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TreeSitterParser:
|
|
28
|
+
"""
|
|
29
|
+
Parser using Tree-sitter for high-accuracy AST parsing.
|
|
30
|
+
|
|
31
|
+
Supports Python, JavaScript, and TypeScript with 99% accuracy.
|
|
32
|
+
Falls back gracefully if Tree-sitter libraries are not installed.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
>>> parser = TreeSitterParser()
|
|
36
|
+
>>> if parser.is_available('python'):
|
|
37
|
+
... module = parser.parse('main.py', content, 'python')
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self):
|
|
41
|
+
"""Initialize Tree-sitter parsers for available languages."""
|
|
42
|
+
self.parsers: dict = {}
|
|
43
|
+
self.languages: dict = {}
|
|
44
|
+
self.intent_gen = EnhancedIntentGenerator()
|
|
45
|
+
|
|
46
|
+
if TREE_SITTER_AVAILABLE:
|
|
47
|
+
self._init_parsers()
|
|
48
|
+
|
|
49
|
+
def _init_parsers(self):
|
|
50
|
+
"""Initialize parsers for each supported language."""
|
|
51
|
+
try:
|
|
52
|
+
# Python
|
|
53
|
+
self.languages['python'] = Language(tspython.language())
|
|
54
|
+
self.parsers['python'] = Parser(self.languages['python'])
|
|
55
|
+
|
|
56
|
+
# JavaScript
|
|
57
|
+
self.languages['javascript'] = Language(tsjavascript.language())
|
|
58
|
+
self.parsers['javascript'] = Parser(self.languages['javascript'])
|
|
59
|
+
|
|
60
|
+
# TypeScript - try dedicated parser, fall back to JS
|
|
61
|
+
try:
|
|
62
|
+
import tree_sitter_typescript as tstypescript
|
|
63
|
+
self.languages['typescript'] = Language(tstypescript.language_typescript())
|
|
64
|
+
self.parsers['typescript'] = Parser(self.languages['typescript'])
|
|
65
|
+
except ImportError:
|
|
66
|
+
self.languages['typescript'] = self.languages['javascript']
|
|
67
|
+
self.parsers['typescript'] = self.parsers['javascript']
|
|
68
|
+
|
|
69
|
+
except Exception as e:
|
|
70
|
+
import sys
|
|
71
|
+
print(f"Tree-sitter init warning: {e}", file=sys.stderr)
|
|
72
|
+
|
|
73
|
+
def is_available(self, language: str) -> bool:
|
|
74
|
+
"""Check if Tree-sitter parser is available for a language."""
|
|
75
|
+
return language in self.parsers
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def get_supported_languages(cls) -> List[str]:
|
|
79
|
+
"""Get list of potentially supported languages."""
|
|
80
|
+
return ['python', 'javascript', 'typescript']
|
|
81
|
+
|
|
82
|
+
def parse(self, filepath: str, content: str, language: str) -> Optional[ModuleInfo]:
|
|
83
|
+
"""
|
|
84
|
+
Parse a source file using Tree-sitter.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
filepath: Relative path to the file
|
|
88
|
+
content: File content as string
|
|
89
|
+
language: Programming language
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
ModuleInfo if parsing succeeds, None otherwise
|
|
93
|
+
"""
|
|
94
|
+
if language not in self.parsers:
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
parser = self.parsers[language]
|
|
98
|
+
tree = parser.parse(bytes(content, 'utf8'))
|
|
99
|
+
|
|
100
|
+
if language == 'python':
|
|
101
|
+
return self._parse_python(filepath, content, tree)
|
|
102
|
+
elif language in ('javascript', 'typescript'):
|
|
103
|
+
return self._parse_js_ts(filepath, content, tree, language)
|
|
104
|
+
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
def _parse_python(self, filepath: str, content: str, tree) -> ModuleInfo:
|
|
108
|
+
"""Parse Python source using Tree-sitter AST."""
|
|
109
|
+
root = tree.root_node
|
|
110
|
+
imports, classes, functions, constants, exports = [], [], [], [], []
|
|
111
|
+
docstring = None
|
|
112
|
+
|
|
113
|
+
for child in root.children:
|
|
114
|
+
node_type = child.type
|
|
115
|
+
|
|
116
|
+
# Module docstring
|
|
117
|
+
if node_type == 'expression_statement' and not docstring:
|
|
118
|
+
expr = child.children[0] if child.children else None
|
|
119
|
+
if expr and expr.type == 'string':
|
|
120
|
+
docstring = self._extract_string(expr, content)
|
|
121
|
+
|
|
122
|
+
# Imports
|
|
123
|
+
elif node_type == 'import_statement':
|
|
124
|
+
imports.extend(self._extract_py_import(child, content))
|
|
125
|
+
elif node_type == 'import_from_statement':
|
|
126
|
+
imports.extend(self._extract_py_from_import(child, content))
|
|
127
|
+
|
|
128
|
+
# Functions
|
|
129
|
+
elif node_type == 'function_definition':
|
|
130
|
+
func = self._extract_py_function(child, content)
|
|
131
|
+
if func:
|
|
132
|
+
functions.append(func)
|
|
133
|
+
if not func.name.startswith('_'):
|
|
134
|
+
exports.append(func.name)
|
|
135
|
+
|
|
136
|
+
# Decorated functions
|
|
137
|
+
elif node_type == 'decorated_definition':
|
|
138
|
+
inner = self._find_child(child, 'function_definition')
|
|
139
|
+
if inner:
|
|
140
|
+
func = self._extract_py_function(inner, content, child)
|
|
141
|
+
if func:
|
|
142
|
+
functions.append(func)
|
|
143
|
+
if not func.name.startswith('_'):
|
|
144
|
+
exports.append(func.name)
|
|
145
|
+
|
|
146
|
+
# Classes
|
|
147
|
+
elif node_type == 'class_definition':
|
|
148
|
+
cls = self._extract_py_class(child, content)
|
|
149
|
+
if cls:
|
|
150
|
+
classes.append(cls)
|
|
151
|
+
if not cls.name.startswith('_'):
|
|
152
|
+
exports.append(cls.name)
|
|
153
|
+
|
|
154
|
+
# Constants
|
|
155
|
+
elif node_type == 'expression_statement':
|
|
156
|
+
const = self._extract_py_constant(child, content)
|
|
157
|
+
if const:
|
|
158
|
+
constants.append(const)
|
|
159
|
+
|
|
160
|
+
lines = content.split('\n')
|
|
161
|
+
return ModuleInfo(
|
|
162
|
+
path=filepath,
|
|
163
|
+
language='python',
|
|
164
|
+
imports=imports[:20],
|
|
165
|
+
exports=exports,
|
|
166
|
+
classes=classes,
|
|
167
|
+
functions=functions,
|
|
168
|
+
types=[],
|
|
169
|
+
constants=constants[:10],
|
|
170
|
+
docstring=docstring[:100] if docstring else None,
|
|
171
|
+
lines_total=len(lines),
|
|
172
|
+
lines_code=len([l for l in lines if l.strip() and not l.strip().startswith('#')])
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
def _extract_py_function(self, node, content: str,
|
|
176
|
+
decorated_node=None) -> Optional[FunctionInfo]:
|
|
177
|
+
"""Extract Python function from AST node."""
|
|
178
|
+
name_node = self._find_child(node, 'identifier')
|
|
179
|
+
if not name_node:
|
|
180
|
+
return None
|
|
181
|
+
name = self._text(name_node, content)
|
|
182
|
+
|
|
183
|
+
# Parameters
|
|
184
|
+
params = []
|
|
185
|
+
params_node = self._find_child(node, 'parameters')
|
|
186
|
+
if params_node:
|
|
187
|
+
for child in params_node.children:
|
|
188
|
+
if child.type == 'identifier':
|
|
189
|
+
params.append(self._text(child, content))
|
|
190
|
+
elif child.type in ('typed_parameter', 'typed_default_parameter'):
|
|
191
|
+
n = self._find_child(child, 'identifier')
|
|
192
|
+
t = self._find_child(child, 'type')
|
|
193
|
+
if n:
|
|
194
|
+
p = self._text(n, content)
|
|
195
|
+
if t:
|
|
196
|
+
p += ':' + self._text(t, content)
|
|
197
|
+
params.append(p)
|
|
198
|
+
elif child.type == 'default_parameter' and child.children:
|
|
199
|
+
params.append(self._text(child.children[0], content))
|
|
200
|
+
|
|
201
|
+
# Return type
|
|
202
|
+
return_type = None
|
|
203
|
+
ret_node = self._find_child(node, 'type')
|
|
204
|
+
if ret_node:
|
|
205
|
+
return_type = self._text(ret_node, content)
|
|
206
|
+
|
|
207
|
+
# Docstring
|
|
208
|
+
docstring = None
|
|
209
|
+
body = self._find_child(node, 'block')
|
|
210
|
+
if body and body.children:
|
|
211
|
+
first = body.children[0]
|
|
212
|
+
if first.type == 'expression_statement':
|
|
213
|
+
expr = first.children[0] if first.children else None
|
|
214
|
+
if expr and expr.type == 'string':
|
|
215
|
+
docstring = self._extract_string(expr, content)
|
|
216
|
+
|
|
217
|
+
# Decorators
|
|
218
|
+
decorators = []
|
|
219
|
+
if decorated_node:
|
|
220
|
+
for c in decorated_node.children:
|
|
221
|
+
if c.type == 'decorator':
|
|
222
|
+
decorators.append(self._text(c, content).lstrip('@').split('(')[0])
|
|
223
|
+
|
|
224
|
+
is_async = node.type == 'async_function_definition'
|
|
225
|
+
|
|
226
|
+
return FunctionInfo(
|
|
227
|
+
name=name,
|
|
228
|
+
params=params[:8],
|
|
229
|
+
return_type=return_type,
|
|
230
|
+
docstring=docstring[:100] if docstring else None,
|
|
231
|
+
calls=[],
|
|
232
|
+
raises=[],
|
|
233
|
+
complexity=1,
|
|
234
|
+
lines=node.end_point[0] - node.start_point[0] + 1,
|
|
235
|
+
decorators=decorators,
|
|
236
|
+
is_async=is_async,
|
|
237
|
+
is_static='staticmethod' in decorators,
|
|
238
|
+
is_private=name.startswith('_') and not name.startswith('__'),
|
|
239
|
+
intent=self.intent_gen.generate(name, docstring),
|
|
240
|
+
start_line=node.start_point[0] + 1,
|
|
241
|
+
end_line=node.end_point[0] + 1
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
def _extract_py_class(self, node, content: str) -> Optional[ClassInfo]:
|
|
245
|
+
"""Extract Python class from AST node."""
|
|
246
|
+
name_node = self._find_child(node, 'identifier')
|
|
247
|
+
if not name_node:
|
|
248
|
+
return None
|
|
249
|
+
name = self._text(name_node, content)
|
|
250
|
+
|
|
251
|
+
# Base classes
|
|
252
|
+
bases = []
|
|
253
|
+
arg_list = self._find_child(node, 'argument_list')
|
|
254
|
+
if arg_list:
|
|
255
|
+
for c in arg_list.children:
|
|
256
|
+
if c.type in ('identifier', 'attribute'):
|
|
257
|
+
bases.append(self._text(c, content))
|
|
258
|
+
|
|
259
|
+
# Docstring and methods
|
|
260
|
+
docstring = None
|
|
261
|
+
methods = []
|
|
262
|
+
body = self._find_child(node, 'block')
|
|
263
|
+
if body:
|
|
264
|
+
for i, child in enumerate(body.children):
|
|
265
|
+
if i == 0 and child.type == 'expression_statement':
|
|
266
|
+
expr = child.children[0] if child.children else None
|
|
267
|
+
if expr and expr.type == 'string':
|
|
268
|
+
docstring = self._extract_string(expr, content)
|
|
269
|
+
|
|
270
|
+
if child.type == 'function_definition':
|
|
271
|
+
m = self._extract_py_function(child, content)
|
|
272
|
+
if m:
|
|
273
|
+
methods.append(m)
|
|
274
|
+
elif child.type == 'decorated_definition':
|
|
275
|
+
inner = self._find_child(child, 'function_definition')
|
|
276
|
+
if inner:
|
|
277
|
+
m = self._extract_py_function(inner, content, child)
|
|
278
|
+
if m:
|
|
279
|
+
methods.append(m)
|
|
280
|
+
|
|
281
|
+
return ClassInfo(
|
|
282
|
+
name=name,
|
|
283
|
+
bases=bases,
|
|
284
|
+
docstring=docstring[:100] if docstring else None,
|
|
285
|
+
methods=methods,
|
|
286
|
+
properties=[],
|
|
287
|
+
is_interface=False,
|
|
288
|
+
is_abstract='ABC' in bases or 'ABCMeta' in bases,
|
|
289
|
+
generic_params=[]
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
def _extract_py_import(self, node, content: str) -> List[str]:
|
|
293
|
+
"""Extract import statement."""
|
|
294
|
+
imports = []
|
|
295
|
+
for c in node.children:
|
|
296
|
+
if c.type == 'dotted_name':
|
|
297
|
+
imports.append(self._text(c, content))
|
|
298
|
+
elif c.type == 'aliased_import':
|
|
299
|
+
n = self._find_child(c, 'dotted_name')
|
|
300
|
+
if n:
|
|
301
|
+
imports.append(self._text(n, content))
|
|
302
|
+
return imports
|
|
303
|
+
|
|
304
|
+
def _extract_py_from_import(self, node, content: str) -> List[str]:
|
|
305
|
+
"""Extract from ... import ... statement."""
|
|
306
|
+
imports = []
|
|
307
|
+
module = None
|
|
308
|
+
for c in node.children:
|
|
309
|
+
if c.type in ('dotted_name', 'import_prefix'):
|
|
310
|
+
module = self._text(c, content)
|
|
311
|
+
if module:
|
|
312
|
+
for c in node.children:
|
|
313
|
+
if c.type == 'identifier':
|
|
314
|
+
imports.append(f"{module}.{self._text(c, content)}")
|
|
315
|
+
elif c.type == 'aliased_import':
|
|
316
|
+
n = self._find_child(c, 'identifier')
|
|
317
|
+
if n:
|
|
318
|
+
imports.append(f"{module}.{self._text(n, content)}")
|
|
319
|
+
return imports
|
|
320
|
+
|
|
321
|
+
def _extract_py_constant(self, node, content: str) -> Optional[str]:
|
|
322
|
+
"""Extract constant (UPPERCASE assignment)."""
|
|
323
|
+
if node.children:
|
|
324
|
+
expr = node.children[0]
|
|
325
|
+
if expr.type == 'assignment':
|
|
326
|
+
left = expr.children[0] if expr.children else None
|
|
327
|
+
if left and left.type == 'identifier':
|
|
328
|
+
name = self._text(left, content)
|
|
329
|
+
if name.isupper():
|
|
330
|
+
return name
|
|
331
|
+
return None
|
|
332
|
+
|
|
333
|
+
def _parse_js_ts(self, filepath: str, content: str, tree, language: str) -> ModuleInfo:
|
|
334
|
+
"""Parse JavaScript/TypeScript source using Tree-sitter AST."""
|
|
335
|
+
root = tree.root_node
|
|
336
|
+
imports, classes, functions, types, constants, exports = [], [], [], [], [], []
|
|
337
|
+
docstring = None
|
|
338
|
+
|
|
339
|
+
for child in root.children:
|
|
340
|
+
node_type = child.type
|
|
341
|
+
|
|
342
|
+
# Imports
|
|
343
|
+
if node_type == 'import_statement':
|
|
344
|
+
for c in child.children:
|
|
345
|
+
if c.type == 'string':
|
|
346
|
+
imports.append(self._text(c, content).strip('"\''))
|
|
347
|
+
|
|
348
|
+
# Exports
|
|
349
|
+
elif node_type == 'export_statement':
|
|
350
|
+
for c in child.children:
|
|
351
|
+
if c.type == 'class_declaration':
|
|
352
|
+
cls = self._extract_js_class(c, content)
|
|
353
|
+
if cls:
|
|
354
|
+
classes.append(cls)
|
|
355
|
+
exports.append(cls.name)
|
|
356
|
+
elif c.type == 'function_declaration':
|
|
357
|
+
func = self._extract_js_function(c, content)
|
|
358
|
+
if func:
|
|
359
|
+
functions.append(func)
|
|
360
|
+
exports.append(func.name)
|
|
361
|
+
elif c.type == 'lexical_declaration':
|
|
362
|
+
func = self._extract_js_arrow_fn(c, content)
|
|
363
|
+
if func:
|
|
364
|
+
functions.append(func)
|
|
365
|
+
exports.append(func.name)
|
|
366
|
+
elif c.type in ('interface_declaration', 'type_alias_declaration'):
|
|
367
|
+
t = self._extract_ts_type(c, content)
|
|
368
|
+
if t:
|
|
369
|
+
types.append(t)
|
|
370
|
+
exports.append(t.name)
|
|
371
|
+
elif c.type == 'enum_declaration':
|
|
372
|
+
t = self._extract_ts_enum(c, content)
|
|
373
|
+
if t:
|
|
374
|
+
types.append(t)
|
|
375
|
+
exports.append(t.name)
|
|
376
|
+
|
|
377
|
+
# Non-exported declarations
|
|
378
|
+
elif node_type == 'class_declaration':
|
|
379
|
+
cls = self._extract_js_class(child, content)
|
|
380
|
+
if cls:
|
|
381
|
+
classes.append(cls)
|
|
382
|
+
exports.append(cls.name)
|
|
383
|
+
elif node_type == 'function_declaration':
|
|
384
|
+
func = self._extract_js_function(child, content)
|
|
385
|
+
if func:
|
|
386
|
+
functions.append(func)
|
|
387
|
+
exports.append(func.name)
|
|
388
|
+
elif node_type == 'lexical_declaration':
|
|
389
|
+
func = self._extract_js_arrow_fn(child, content)
|
|
390
|
+
if func:
|
|
391
|
+
functions.append(func)
|
|
392
|
+
const = self._extract_js_constant(child, content)
|
|
393
|
+
if const:
|
|
394
|
+
constants.append(const)
|
|
395
|
+
elif node_type in ('interface_declaration', 'type_alias_declaration'):
|
|
396
|
+
t = self._extract_ts_type(child, content)
|
|
397
|
+
if t:
|
|
398
|
+
types.append(t)
|
|
399
|
+
exports.append(t.name)
|
|
400
|
+
|
|
401
|
+
# Leading comment as docstring
|
|
402
|
+
elif node_type == 'comment' and not docstring:
|
|
403
|
+
docstring = self._extract_js_comment(child, content)
|
|
404
|
+
|
|
405
|
+
lines = content.split('\n')
|
|
406
|
+
return ModuleInfo(
|
|
407
|
+
path=filepath,
|
|
408
|
+
language=language,
|
|
409
|
+
imports=imports[:20],
|
|
410
|
+
exports=list(set(exports)),
|
|
411
|
+
classes=classes,
|
|
412
|
+
functions=functions,
|
|
413
|
+
types=types,
|
|
414
|
+
constants=constants[:10],
|
|
415
|
+
docstring=docstring[:100] if docstring else None,
|
|
416
|
+
lines_total=len(lines),
|
|
417
|
+
lines_code=len([l for l in lines if l.strip() and not l.strip().startswith('//')])
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
def _extract_js_class(self, node, content: str) -> Optional[ClassInfo]:
|
|
421
|
+
"""Extract JS/TS class from AST node."""
|
|
422
|
+
name_node = self._find_child(node, 'type_identifier') or self._find_child(node, 'identifier')
|
|
423
|
+
if not name_node:
|
|
424
|
+
return None
|
|
425
|
+
name = self._text(name_node, content)
|
|
426
|
+
|
|
427
|
+
# Base classes
|
|
428
|
+
bases = []
|
|
429
|
+
heritage = self._find_child(node, 'class_heritage')
|
|
430
|
+
if heritage:
|
|
431
|
+
for c in heritage.children:
|
|
432
|
+
if c.type == 'identifier':
|
|
433
|
+
bases.append(self._text(c, content))
|
|
434
|
+
|
|
435
|
+
# Methods
|
|
436
|
+
methods = []
|
|
437
|
+
body = self._find_child(node, 'class_body')
|
|
438
|
+
if body:
|
|
439
|
+
for c in body.children:
|
|
440
|
+
if c.type == 'method_definition':
|
|
441
|
+
m = self._extract_js_method(c, content)
|
|
442
|
+
if m:
|
|
443
|
+
methods.append(m)
|
|
444
|
+
|
|
445
|
+
return ClassInfo(
|
|
446
|
+
name=name,
|
|
447
|
+
bases=bases,
|
|
448
|
+
docstring=None,
|
|
449
|
+
methods=methods,
|
|
450
|
+
properties=[],
|
|
451
|
+
is_interface=False,
|
|
452
|
+
is_abstract='abstract' in self._text(node, content)[:50],
|
|
453
|
+
generic_params=[]
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
def _extract_js_method(self, node, content: str) -> Optional[FunctionInfo]:
|
|
457
|
+
"""Extract JS/TS method from AST node."""
|
|
458
|
+
name_node = self._find_child(node, 'property_identifier')
|
|
459
|
+
if not name_node:
|
|
460
|
+
return None
|
|
461
|
+
name = self._text(name_node, content)
|
|
462
|
+
|
|
463
|
+
node_text = self._text(node, content)[:100]
|
|
464
|
+
is_async = 'async' in node_text.split(name)[0] if name in node_text else False
|
|
465
|
+
is_static = 'static' in node_text.split(name)[0] if name in node_text else False
|
|
466
|
+
|
|
467
|
+
# Parameters
|
|
468
|
+
params = []
|
|
469
|
+
params_node = self._find_child(node, 'formal_parameters')
|
|
470
|
+
if params_node:
|
|
471
|
+
params = self._extract_js_params(params_node, content)
|
|
472
|
+
|
|
473
|
+
# Return type
|
|
474
|
+
return_type = None
|
|
475
|
+
type_ann = self._find_child(node, 'type_annotation')
|
|
476
|
+
if type_ann:
|
|
477
|
+
return_type = self._text(type_ann, content).lstrip(':').strip()
|
|
478
|
+
|
|
479
|
+
return FunctionInfo(
|
|
480
|
+
name=name,
|
|
481
|
+
params=params[:8],
|
|
482
|
+
return_type=return_type,
|
|
483
|
+
docstring=None,
|
|
484
|
+
calls=[],
|
|
485
|
+
raises=[],
|
|
486
|
+
complexity=1,
|
|
487
|
+
lines=node.end_point[0] - node.start_point[0] + 1,
|
|
488
|
+
decorators=[],
|
|
489
|
+
is_async=is_async,
|
|
490
|
+
is_static=is_static,
|
|
491
|
+
is_private=name.startswith('_') or name.startswith('#'),
|
|
492
|
+
intent=self.intent_gen.generate(name),
|
|
493
|
+
start_line=node.start_point[0] + 1,
|
|
494
|
+
end_line=node.end_point[0] + 1
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
def _extract_js_function(self, node, content: str) -> Optional[FunctionInfo]:
|
|
498
|
+
"""Extract JS/TS function from AST node."""
|
|
499
|
+
name_node = self._find_child(node, 'identifier')
|
|
500
|
+
if not name_node:
|
|
501
|
+
return None
|
|
502
|
+
name = self._text(name_node, content)
|
|
503
|
+
is_async = self._text(node, content)[:50].strip().startswith('async')
|
|
504
|
+
|
|
505
|
+
params = []
|
|
506
|
+
params_node = self._find_child(node, 'formal_parameters')
|
|
507
|
+
if params_node:
|
|
508
|
+
params = self._extract_js_params(params_node, content)
|
|
509
|
+
|
|
510
|
+
return_type = None
|
|
511
|
+
type_ann = self._find_child(node, 'type_annotation')
|
|
512
|
+
if type_ann:
|
|
513
|
+
return_type = self._text(type_ann, content).lstrip(':').strip()
|
|
514
|
+
|
|
515
|
+
return FunctionInfo(
|
|
516
|
+
name=name,
|
|
517
|
+
params=params[:8],
|
|
518
|
+
return_type=return_type,
|
|
519
|
+
docstring=None,
|
|
520
|
+
calls=[],
|
|
521
|
+
raises=[],
|
|
522
|
+
complexity=1,
|
|
523
|
+
lines=node.end_point[0] - node.start_point[0] + 1,
|
|
524
|
+
decorators=[],
|
|
525
|
+
is_async=is_async,
|
|
526
|
+
is_static=False,
|
|
527
|
+
is_private=name.startswith('_'),
|
|
528
|
+
intent=self.intent_gen.generate(name),
|
|
529
|
+
start_line=node.start_point[0] + 1,
|
|
530
|
+
end_line=node.end_point[0] + 1
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
def _extract_js_arrow_fn(self, node, content: str) -> Optional[FunctionInfo]:
|
|
534
|
+
"""Extract arrow function assigned to const."""
|
|
535
|
+
for c in node.children:
|
|
536
|
+
if c.type == 'variable_declarator':
|
|
537
|
+
name_node = self._find_child(c, 'identifier')
|
|
538
|
+
arrow = self._find_child(c, 'arrow_function')
|
|
539
|
+
if name_node and arrow:
|
|
540
|
+
name = self._text(name_node, content)
|
|
541
|
+
is_async = 'async' in self._text(arrow, content)[:30]
|
|
542
|
+
params = []
|
|
543
|
+
pn = self._find_child(arrow, 'formal_parameters')
|
|
544
|
+
if pn:
|
|
545
|
+
params = self._extract_js_params(pn, content)
|
|
546
|
+
return FunctionInfo(
|
|
547
|
+
name=name,
|
|
548
|
+
params=params[:8],
|
|
549
|
+
return_type=None,
|
|
550
|
+
docstring=None,
|
|
551
|
+
calls=[],
|
|
552
|
+
raises=[],
|
|
553
|
+
complexity=1,
|
|
554
|
+
lines=node.end_point[0] - node.start_point[0] + 1,
|
|
555
|
+
decorators=[],
|
|
556
|
+
is_async=is_async,
|
|
557
|
+
is_static=False,
|
|
558
|
+
is_private=name.startswith('_'),
|
|
559
|
+
intent=self.intent_gen.generate(name),
|
|
560
|
+
start_line=node.start_point[0] + 1,
|
|
561
|
+
end_line=node.end_point[0] + 1
|
|
562
|
+
)
|
|
563
|
+
return None
|
|
564
|
+
|
|
565
|
+
def _extract_js_params(self, params_node, content: str) -> List[str]:
|
|
566
|
+
"""Extract JS/TS function parameters."""
|
|
567
|
+
params = []
|
|
568
|
+
for c in params_node.children:
|
|
569
|
+
if c.type == 'identifier':
|
|
570
|
+
params.append(self._text(c, content))
|
|
571
|
+
elif c.type == 'required_parameter':
|
|
572
|
+
n = self._find_child(c, 'identifier')
|
|
573
|
+
t = self._find_child(c, 'type_annotation')
|
|
574
|
+
if n:
|
|
575
|
+
p = self._text(n, content)
|
|
576
|
+
if t:
|
|
577
|
+
p += self._text(t, content)
|
|
578
|
+
params.append(p)
|
|
579
|
+
elif c.type == 'optional_parameter':
|
|
580
|
+
n = self._find_child(c, 'identifier')
|
|
581
|
+
if n:
|
|
582
|
+
params.append(self._text(n, content) + '?')
|
|
583
|
+
return params
|
|
584
|
+
|
|
585
|
+
def _extract_ts_type(self, node, content: str) -> Optional[TypeInfo]:
|
|
586
|
+
"""Extract TypeScript interface or type alias."""
|
|
587
|
+
name_node = self._find_child(node, 'type_identifier') or self._find_child(node, 'identifier')
|
|
588
|
+
if not name_node:
|
|
589
|
+
return None
|
|
590
|
+
name = self._text(name_node, content)
|
|
591
|
+
kind = 'interface' if node.type == 'interface_declaration' else 'type'
|
|
592
|
+
return TypeInfo(name=name, kind=kind, definition=self._text(node, content)[:100])
|
|
593
|
+
|
|
594
|
+
def _extract_ts_enum(self, node, content: str) -> Optional[TypeInfo]:
|
|
595
|
+
"""Extract TypeScript enum."""
|
|
596
|
+
name_node = self._find_child(node, 'identifier')
|
|
597
|
+
if not name_node:
|
|
598
|
+
return None
|
|
599
|
+
name = self._text(name_node, content)
|
|
600
|
+
return TypeInfo(name=name, kind='enum', definition='')
|
|
601
|
+
|
|
602
|
+
def _extract_js_constant(self, node, content: str) -> Optional[str]:
|
|
603
|
+
"""Extract constant (UPPERCASE const)."""
|
|
604
|
+
for c in node.children:
|
|
605
|
+
if c.type == 'variable_declarator':
|
|
606
|
+
n = self._find_child(c, 'identifier')
|
|
607
|
+
if n:
|
|
608
|
+
name = self._text(n, content)
|
|
609
|
+
if name.isupper():
|
|
610
|
+
return name
|
|
611
|
+
return None
|
|
612
|
+
|
|
613
|
+
def _extract_js_comment(self, node, content: str) -> Optional[str]:
|
|
614
|
+
"""Extract JS comment content."""
|
|
615
|
+
text = self._text(node, content)
|
|
616
|
+
if text.startswith('/**'):
|
|
617
|
+
lines = text[3:-2].split('\n')
|
|
618
|
+
clean = [l.strip().lstrip('*').strip() for l in lines
|
|
619
|
+
if l.strip().lstrip('*').strip() and not l.strip().startswith('@')]
|
|
620
|
+
return ' '.join(clean)[:100] if clean else None
|
|
621
|
+
elif text.startswith('//'):
|
|
622
|
+
return text[2:].strip()[:100]
|
|
623
|
+
return None
|
|
624
|
+
|
|
625
|
+
# Helper methods
|
|
626
|
+
def _find_child(self, node, type_name: str):
|
|
627
|
+
"""Find first child of given type."""
|
|
628
|
+
for c in node.children:
|
|
629
|
+
if c.type == type_name:
|
|
630
|
+
return c
|
|
631
|
+
return None
|
|
632
|
+
|
|
633
|
+
def _text(self, node, content: str) -> str:
|
|
634
|
+
"""Get text content of node."""
|
|
635
|
+
return content[node.start_byte:node.end_byte]
|
|
636
|
+
|
|
637
|
+
def _extract_string(self, node, content: str) -> str:
|
|
638
|
+
"""Extract string content without quotes."""
|
|
639
|
+
text = self._text(node, content)
|
|
640
|
+
if text.startswith('"""') or text.startswith("'''"):
|
|
641
|
+
return text[3:-3].strip()
|
|
642
|
+
elif text.startswith('"') or text.startswith("'"):
|
|
643
|
+
return text[1:-1].strip()
|
|
644
|
+
return text
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
class UniversalParser:
|
|
648
|
+
"""
|
|
649
|
+
Fallback parser using Python AST and regex.
|
|
650
|
+
|
|
651
|
+
Used when Tree-sitter is not available. Provides reasonable
|
|
652
|
+
accuracy for Python (using built-in AST) and basic support
|
|
653
|
+
for JavaScript/TypeScript using regex patterns.
|
|
654
|
+
|
|
655
|
+
Example:
|
|
656
|
+
>>> parser = UniversalParser()
|
|
657
|
+
>>> module = parser.parse('main.py', content, 'python')
|
|
658
|
+
"""
|
|
659
|
+
|
|
660
|
+
def __init__(self):
|
|
661
|
+
"""Initialize the universal parser."""
|
|
662
|
+
self.intent_gen = EnhancedIntentGenerator()
|
|
663
|
+
|
|
664
|
+
def parse(self, filepath: str, content: str, language: str) -> Optional[ModuleInfo]:
|
|
665
|
+
"""
|
|
666
|
+
Parse a source file using AST or regex.
|
|
667
|
+
|
|
668
|
+
Args:
|
|
669
|
+
filepath: Relative path to the file
|
|
670
|
+
content: File content as string
|
|
671
|
+
language: Programming language
|
|
672
|
+
|
|
673
|
+
Returns:
|
|
674
|
+
ModuleInfo if parsing succeeds, None otherwise
|
|
675
|
+
"""
|
|
676
|
+
if language == 'python':
|
|
677
|
+
return self._parse_python(filepath, content)
|
|
678
|
+
elif language in ('javascript', 'typescript'):
|
|
679
|
+
return self._parse_js_ts(filepath, content, language)
|
|
680
|
+
return None
|
|
681
|
+
|
|
682
|
+
def _parse_python(self, filepath: str, content: str) -> Optional[ModuleInfo]:
|
|
683
|
+
"""Parse Python using built-in AST."""
|
|
684
|
+
try:
|
|
685
|
+
tree = ast.parse(content)
|
|
686
|
+
except SyntaxError:
|
|
687
|
+
lines = content.split('\n')
|
|
688
|
+
return ModuleInfo(
|
|
689
|
+
path=filepath, language='python', imports=[], exports=[],
|
|
690
|
+
classes=[], functions=[], types=[], constants=[], docstring=None,
|
|
691
|
+
lines_total=len(lines), lines_code=len([l for l in lines if l.strip()])
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
imports, classes, functions, constants = [], [], [], []
|
|
695
|
+
|
|
696
|
+
for node in ast.iter_child_nodes(tree):
|
|
697
|
+
if isinstance(node, ast.Import):
|
|
698
|
+
imports.extend(a.name for a in node.names)
|
|
699
|
+
elif isinstance(node, ast.ImportFrom):
|
|
700
|
+
module = node.module or ''
|
|
701
|
+
imports.extend(f"{module}.{a.name}" for a in node.names if a.name != '*')
|
|
702
|
+
elif isinstance(node, ast.ClassDef):
|
|
703
|
+
cls = self._extract_ast_class(node)
|
|
704
|
+
if cls:
|
|
705
|
+
classes.append(cls)
|
|
706
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
707
|
+
func = self._extract_ast_function(node)
|
|
708
|
+
if func:
|
|
709
|
+
functions.append(func)
|
|
710
|
+
elif isinstance(node, ast.Assign):
|
|
711
|
+
for t in node.targets:
|
|
712
|
+
if isinstance(t, ast.Name) and t.id.isupper():
|
|
713
|
+
constants.append(t.id)
|
|
714
|
+
|
|
715
|
+
exports = [c.name for c in classes if not c.name.startswith('_')]
|
|
716
|
+
exports += [f.name for f in functions if not f.name.startswith('_')]
|
|
717
|
+
lines = content.split('\n')
|
|
718
|
+
|
|
719
|
+
return ModuleInfo(
|
|
720
|
+
path=filepath,
|
|
721
|
+
language='python',
|
|
722
|
+
imports=imports[:20],
|
|
723
|
+
exports=exports,
|
|
724
|
+
classes=classes,
|
|
725
|
+
functions=functions,
|
|
726
|
+
types=[],
|
|
727
|
+
constants=constants[:10],
|
|
728
|
+
docstring=ast.get_docstring(tree)[:100] if ast.get_docstring(tree) else None,
|
|
729
|
+
lines_total=len(lines),
|
|
730
|
+
lines_code=len([l for l in lines if l.strip() and not l.strip().startswith('#')])
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
def _extract_ast_function(self, node) -> FunctionInfo:
|
|
734
|
+
"""Extract function from Python AST node."""
|
|
735
|
+
is_async = isinstance(node, ast.AsyncFunctionDef)
|
|
736
|
+
params = []
|
|
737
|
+
for arg in node.args.args:
|
|
738
|
+
p = arg.arg
|
|
739
|
+
if arg.annotation:
|
|
740
|
+
p += ':' + self._ann_str(arg.annotation)
|
|
741
|
+
params.append(p)
|
|
742
|
+
|
|
743
|
+
decorators = []
|
|
744
|
+
for dec in node.decorator_list:
|
|
745
|
+
if isinstance(dec, ast.Name):
|
|
746
|
+
decorators.append(dec.id)
|
|
747
|
+
elif isinstance(dec, ast.Call) and isinstance(dec.func, ast.Name):
|
|
748
|
+
decorators.append(dec.func.id)
|
|
749
|
+
|
|
750
|
+
docstring = ast.get_docstring(node)
|
|
751
|
+
return FunctionInfo(
|
|
752
|
+
name=node.name,
|
|
753
|
+
params=params[:8],
|
|
754
|
+
return_type=self._ann_str(node.returns) if node.returns else None,
|
|
755
|
+
docstring=docstring[:100] if docstring else None,
|
|
756
|
+
calls=[],
|
|
757
|
+
raises=[],
|
|
758
|
+
complexity=1,
|
|
759
|
+
lines=node.end_lineno - node.lineno + 1 if hasattr(node, 'end_lineno') else 1,
|
|
760
|
+
decorators=decorators,
|
|
761
|
+
is_async=is_async,
|
|
762
|
+
is_static='staticmethod' in decorators,
|
|
763
|
+
is_private=node.name.startswith('_') and not node.name.startswith('__'),
|
|
764
|
+
intent=self.intent_gen.generate(node.name, docstring),
|
|
765
|
+
start_line=node.lineno,
|
|
766
|
+
end_line=node.end_lineno if hasattr(node, 'end_lineno') else node.lineno
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
def _extract_ast_class(self, node: ast.ClassDef) -> ClassInfo:
|
|
770
|
+
"""Extract class from Python AST node."""
|
|
771
|
+
bases = []
|
|
772
|
+
for b in node.bases:
|
|
773
|
+
if isinstance(b, ast.Name):
|
|
774
|
+
bases.append(b.id)
|
|
775
|
+
elif isinstance(b, ast.Attribute):
|
|
776
|
+
bases.append(b.attr)
|
|
777
|
+
|
|
778
|
+
methods = []
|
|
779
|
+
for item in node.body:
|
|
780
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
781
|
+
methods.append(self._extract_ast_function(item))
|
|
782
|
+
|
|
783
|
+
return ClassInfo(
|
|
784
|
+
name=node.name,
|
|
785
|
+
bases=bases,
|
|
786
|
+
docstring=ast.get_docstring(node)[:100] if ast.get_docstring(node) else None,
|
|
787
|
+
methods=methods,
|
|
788
|
+
properties=[],
|
|
789
|
+
is_interface=False,
|
|
790
|
+
is_abstract='ABC' in bases,
|
|
791
|
+
generic_params=[]
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
def _ann_str(self, node) -> str:
|
|
795
|
+
"""Convert AST annotation to string."""
|
|
796
|
+
if isinstance(node, ast.Name):
|
|
797
|
+
return node.id
|
|
798
|
+
elif isinstance(node, ast.Constant):
|
|
799
|
+
return str(node.value)
|
|
800
|
+
elif isinstance(node, ast.Subscript):
|
|
801
|
+
base = self._ann_str(node.value)
|
|
802
|
+
if isinstance(node.slice, ast.Tuple):
|
|
803
|
+
args = ','.join(self._ann_str(e) for e in node.slice.elts)
|
|
804
|
+
else:
|
|
805
|
+
args = self._ann_str(node.slice)
|
|
806
|
+
return f"{base}[{args}]"
|
|
807
|
+
return "Any"
|
|
808
|
+
|
|
809
|
+
def _parse_js_ts(self, filepath: str, content: str, language: str) -> ModuleInfo:
|
|
810
|
+
"""Parse JS/TS using regex patterns."""
|
|
811
|
+
imports, classes, functions, types, constants, exports = [], [], [], [], [], []
|
|
812
|
+
|
|
813
|
+
# Import patterns
|
|
814
|
+
for m in re.finditer(r"import\s+.*?from\s+['\"]([^'\"]+)['\"]", content):
|
|
815
|
+
imports.append(m.group(1))
|
|
816
|
+
|
|
817
|
+
# Class patterns
|
|
818
|
+
for m in re.finditer(
|
|
819
|
+
r'(?:export\s+)?(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?',
|
|
820
|
+
content
|
|
821
|
+
):
|
|
822
|
+
classes.append(ClassInfo(
|
|
823
|
+
name=m.group(1),
|
|
824
|
+
bases=[m.group(2)] if m.group(2) else [],
|
|
825
|
+
docstring=None,
|
|
826
|
+
methods=[],
|
|
827
|
+
properties=[],
|
|
828
|
+
is_interface=False,
|
|
829
|
+
is_abstract='abstract' in m.group(0),
|
|
830
|
+
generic_params=[]
|
|
831
|
+
))
|
|
832
|
+
exports.append(m.group(1))
|
|
833
|
+
|
|
834
|
+
# Function patterns
|
|
835
|
+
for m in re.finditer(
|
|
836
|
+
r'(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*([^{]+))?',
|
|
837
|
+
content
|
|
838
|
+
):
|
|
839
|
+
name = m.group(1)
|
|
840
|
+
params = [p.strip() for p in (m.group(2) or '').split(',') if p.strip()][:8]
|
|
841
|
+
functions.append(FunctionInfo(
|
|
842
|
+
name=name,
|
|
843
|
+
params=params,
|
|
844
|
+
return_type=m.group(3).strip() if m.group(3) else None,
|
|
845
|
+
docstring=None,
|
|
846
|
+
calls=[],
|
|
847
|
+
raises=[],
|
|
848
|
+
complexity=1,
|
|
849
|
+
lines=1,
|
|
850
|
+
decorators=[],
|
|
851
|
+
is_async='async' in m.group(0),
|
|
852
|
+
is_static=False,
|
|
853
|
+
is_private=name.startswith('_'),
|
|
854
|
+
intent=self.intent_gen.generate(name)
|
|
855
|
+
))
|
|
856
|
+
exports.append(name)
|
|
857
|
+
|
|
858
|
+
# Arrow function patterns
|
|
859
|
+
for m in re.finditer(
|
|
860
|
+
r'(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*(?::\s*[^=]+)?\s*=>',
|
|
861
|
+
content
|
|
862
|
+
):
|
|
863
|
+
name = m.group(1)
|
|
864
|
+
functions.append(FunctionInfo(
|
|
865
|
+
name=name,
|
|
866
|
+
params=[],
|
|
867
|
+
return_type=None,
|
|
868
|
+
docstring=None,
|
|
869
|
+
calls=[],
|
|
870
|
+
raises=[],
|
|
871
|
+
complexity=1,
|
|
872
|
+
lines=1,
|
|
873
|
+
decorators=[],
|
|
874
|
+
is_async='async' in m.group(0),
|
|
875
|
+
is_static=False,
|
|
876
|
+
is_private=name.startswith('_'),
|
|
877
|
+
intent=self.intent_gen.generate(name)
|
|
878
|
+
))
|
|
879
|
+
exports.append(name)
|
|
880
|
+
|
|
881
|
+
# Interface/Type patterns
|
|
882
|
+
for m in re.finditer(r'(?:export\s+)?(interface|type)\s+(\w+)', content):
|
|
883
|
+
types.append(TypeInfo(name=m.group(2), kind=m.group(1), definition=''))
|
|
884
|
+
exports.append(m.group(2))
|
|
885
|
+
|
|
886
|
+
# Constant patterns
|
|
887
|
+
for m in re.finditer(r'const\s+([A-Z][A-Z0-9_]+)\s*=', content):
|
|
888
|
+
constants.append(m.group(1))
|
|
889
|
+
|
|
890
|
+
lines = content.split('\n')
|
|
891
|
+
return ModuleInfo(
|
|
892
|
+
path=filepath,
|
|
893
|
+
language=language,
|
|
894
|
+
imports=imports[:20],
|
|
895
|
+
exports=list(set(exports)),
|
|
896
|
+
classes=classes,
|
|
897
|
+
functions=functions,
|
|
898
|
+
types=types,
|
|
899
|
+
constants=constants[:10],
|
|
900
|
+
docstring=None,
|
|
901
|
+
lines_total=len(lines),
|
|
902
|
+
lines_code=len([l for l in lines if l.strip() and not l.strip().startswith('//')])
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
def is_tree_sitter_available() -> bool:
|
|
907
|
+
"""Check if Tree-sitter is available."""
|
|
908
|
+
return TREE_SITTER_AVAILABLE
|