nogic 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,220 @@
1
+ """
2
+ Tree-sitter parser service.
3
+
4
+ Provides multi-language code parsing using Tree-sitter.
5
+ """
6
+
7
+ import logging
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING
10
+
11
+ from tree_sitter import Language, Parser, Tree, Node
12
+
13
+ from .types import (
14
+ ParseResult,
15
+ ExtractedFunction,
16
+ ExtractedClass,
17
+ ExtractedCall,
18
+ ExtractedImport,
19
+ )
20
+
21
+ if TYPE_CHECKING:
22
+ pass
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class ParserService:
28
+ """Service for parsing source code using Tree-sitter."""
29
+
30
+ def __init__(self):
31
+ self._parsers: dict[str, Parser] = {}
32
+ self._languages: dict[str, Language] = {}
33
+
34
+ def _get_language(self, language: str) -> Language:
35
+ """Get or initialize a Tree-sitter Language."""
36
+ if language not in self._languages:
37
+ if language == "python":
38
+ import tree_sitter_python as ts_python
39
+
40
+ self._languages[language] = Language(ts_python.language())
41
+ elif language == "javascript":
42
+ import tree_sitter_javascript as ts_javascript
43
+
44
+ self._languages[language] = Language(ts_javascript.language())
45
+ elif language == "typescript":
46
+ import tree_sitter_typescript as ts_typescript
47
+
48
+ self._languages[language] = Language(
49
+ ts_typescript.language_typescript()
50
+ )
51
+ elif language == "tsx":
52
+ import tree_sitter_typescript as ts_typescript
53
+
54
+ self._languages[language] = Language(ts_typescript.language_tsx())
55
+ else:
56
+ raise ValueError(f"Unsupported language: {language}")
57
+
58
+ return self._languages[language]
59
+
60
+ def _get_parser(self, language: str) -> Parser:
61
+ """Get or initialize a parser for a language."""
62
+ if language not in self._parsers:
63
+ lang = self._get_language(language)
64
+ parser = Parser(lang)
65
+ self._parsers[language] = parser
66
+
67
+ return self._parsers[language]
68
+
69
+ def parse(self, source: str | bytes, language: str) -> Tree:
70
+ """
71
+ Parse source code into an AST.
72
+
73
+ Args:
74
+ source: Source code as string or bytes
75
+ language: Language name (python, javascript, typescript, tsx)
76
+
77
+ Returns:
78
+ Tree-sitter Tree object
79
+ """
80
+ if isinstance(source, str):
81
+ source = source.encode("utf-8")
82
+
83
+ parser = self._get_parser(language)
84
+ tree = parser.parse(source)
85
+ return tree
86
+
87
+ def get_node_text(self, node: Node, source: bytes) -> str:
88
+ """Extract text for a node from source."""
89
+ return source[node.start_byte : node.end_byte].decode("utf-8")
90
+
91
+ def find_nodes_by_type(
92
+ self, root: Node, node_types: list[str], recursive: bool = True
93
+ ) -> list[Node]:
94
+ """
95
+ Find all nodes of specified types.
96
+
97
+ Args:
98
+ root: Root node to search from
99
+ node_types: List of node type strings to match
100
+ recursive: Whether to search recursively
101
+
102
+ Returns:
103
+ List of matching nodes
104
+ """
105
+ results: list[Node] = []
106
+ stack: list[Node] = [root]
107
+
108
+ while stack:
109
+ node = stack.pop()
110
+ if node.type in node_types:
111
+ results.append(node)
112
+
113
+ if recursive:
114
+ stack.extend(reversed(node.children))
115
+
116
+ return results
117
+
118
+ def find_child_by_field(self, node: Node, field_name: str) -> Node | None:
119
+ """Find child node by field name."""
120
+ return node.child_by_field_name(field_name)
121
+
122
+ def find_children_by_type(self, node: Node, node_type: str) -> list[Node]:
123
+ """Find direct children of a specific type."""
124
+ return [child for child in node.children if child.type == node_type]
125
+
126
+ def parse_file(self, file_path: str, content: str, language: str) -> ParseResult:
127
+ """
128
+ Parse a file and extract all code elements.
129
+
130
+ Args:
131
+ file_path: Path to the file (for qualified names)
132
+ content: Source code content
133
+ language: Language name
134
+
135
+ Returns:
136
+ ParseResult with extracted elements
137
+ """
138
+ try:
139
+ source_bytes = content.encode("utf-8")
140
+ tree = self.parse(source_bytes, language)
141
+ root = tree.root_node
142
+
143
+ # Create module qualified name from file path
144
+ module_qn = self._path_to_module_qn(file_path)
145
+
146
+ # Get the appropriate extractor
147
+ if language == "python":
148
+ from .python_extractor import PythonExtractor
149
+
150
+ extractor = PythonExtractor(source_bytes, module_qn)
151
+ elif language in ("javascript", "typescript", "tsx"):
152
+ from .js_extractor import JavaScriptExtractor
153
+
154
+ is_ts = language in ("typescript", "tsx")
155
+ extractor = JavaScriptExtractor(source_bytes, module_qn, is_ts)
156
+ else:
157
+ return ParseResult(
158
+ file_path=file_path,
159
+ language=language,
160
+ error=f"Unsupported language: {language}",
161
+ )
162
+
163
+ # Extract elements
164
+ functions = extractor.extract_functions(root)
165
+ classes = extractor.extract_classes(root)
166
+ imports = extractor.extract_imports(root)
167
+
168
+ # Extract calls from functions and methods
169
+ calls: list[ExtractedCall] = []
170
+ for func in functions:
171
+ if func.node:
172
+ calls.extend(extractor.extract_calls(func.node, func.qualified_name))
173
+ for cls in classes:
174
+ for method in cls.methods:
175
+ if method.node:
176
+ calls.extend(
177
+ extractor.extract_calls(method.node, method.qualified_name)
178
+ )
179
+
180
+ return ParseResult(
181
+ file_path=file_path,
182
+ language=language,
183
+ functions=functions,
184
+ classes=classes,
185
+ imports=imports,
186
+ calls=calls,
187
+ )
188
+
189
+ except Exception as e:
190
+ logger.exception(f"Error parsing {file_path}")
191
+ return ParseResult(
192
+ file_path=file_path,
193
+ language=language,
194
+ error=str(e),
195
+ )
196
+
197
+ def _path_to_module_qn(self, file_path: str) -> str:
198
+ """Convert file path to module qualified name."""
199
+ path = Path(file_path)
200
+ # Remove extension
201
+ stem = path.stem
202
+ # Get parent dirs up to src or root
203
+ parts = list(path.parts[:-1]) + [stem]
204
+
205
+ # Try to find a common root like 'src'
206
+ try:
207
+ src_idx = parts.index("src")
208
+ parts = parts[src_idx + 1 :]
209
+ except ValueError:
210
+ pass
211
+
212
+ # Remove __init__ from the end
213
+ if parts and parts[-1] == "__init__":
214
+ parts = parts[:-1]
215
+
216
+ return ".".join(parts) if parts else stem
217
+
218
+
219
+ # Global singleton instance
220
+ parser_service = ParserService()
@@ -0,0 +1,484 @@
1
+ """
2
+ Python-specific code extractor using Tree-sitter.
3
+ """
4
+
5
+ from tree_sitter import Node
6
+
7
+ from .types import (
8
+ ExtractedFunction,
9
+ ExtractedClass,
10
+ ExtractedCall,
11
+ ExtractedImport,
12
+ )
13
+
14
+
15
+ class PythonExtractor:
16
+ """Extractor for Python code."""
17
+
18
+ def __init__(self, source: bytes, module_qn: str):
19
+ self.source = source
20
+ self.module_qn = module_qn
21
+
22
+ def get_node_text(self, node: Node) -> str:
23
+ """Get text content of a node."""
24
+ return self.source[node.start_byte : node.end_byte].decode("utf-8")
25
+
26
+ def get_node_lines(self, node: Node) -> tuple[int, int]:
27
+ """Get start and end line numbers (1-indexed)."""
28
+ return node.start_point[0] + 1, node.end_point[0] + 1
29
+
30
+ def extract_functions(self, root: Node) -> list[ExtractedFunction]:
31
+ """Extract all top-level functions (not methods)."""
32
+ functions: list[ExtractedFunction] = []
33
+ self._extract_functions_recursive(root, functions, is_top_level=True)
34
+ return functions
35
+
36
+ def _extract_functions_recursive(
37
+ self,
38
+ node: Node,
39
+ functions: list[ExtractedFunction],
40
+ is_top_level: bool = False,
41
+ ) -> None:
42
+ """Recursively extract functions, skipping class methods."""
43
+ for child in node.children:
44
+ if child.type == "function_definition":
45
+ # Skip if inside a class (methods are handled separately)
46
+ if not self._is_inside_class(child):
47
+ func = self._extract_function(child, class_name=None)
48
+ if func:
49
+ functions.append(func)
50
+ elif child.type == "decorated_definition":
51
+ # Handle decorated functions
52
+ func_node = self._get_decorated_function(child)
53
+ if func_node and not self._is_inside_class(func_node):
54
+ func = self._extract_function(
55
+ func_node, class_name=None, decorator_parent=child
56
+ )
57
+ if func:
58
+ functions.append(func)
59
+ elif child.type not in ("class_definition",):
60
+ # Recurse into other nodes but not classes
61
+ self._extract_functions_recursive(child, functions)
62
+
63
+ def _is_inside_class(self, node: Node) -> bool:
64
+ """Check if a node is inside a class definition."""
65
+ parent = node.parent
66
+ while parent:
67
+ if parent.type == "class_definition":
68
+ return True
69
+ if parent.type == "decorated_definition":
70
+ parent = parent.parent
71
+ continue
72
+ if parent.type == "block":
73
+ parent = parent.parent
74
+ continue
75
+ parent = parent.parent
76
+ return False
77
+
78
+ def _get_decorated_function(self, decorated_node: Node) -> Node | None:
79
+ """Get the function node from a decorated_definition."""
80
+ for child in decorated_node.children:
81
+ if child.type == "function_definition":
82
+ return child
83
+ return None
84
+
85
+ def _get_decorated_class(self, decorated_node: Node) -> Node | None:
86
+ """Get the class node from a decorated_definition."""
87
+ for child in decorated_node.children:
88
+ if child.type == "class_definition":
89
+ return child
90
+ return None
91
+
92
+ def _extract_function(
93
+ self,
94
+ node: Node,
95
+ class_name: str | None,
96
+ decorator_parent: Node | None = None,
97
+ ) -> ExtractedFunction | None:
98
+ """Extract information from a function_definition node."""
99
+ # Get function name
100
+ name_node = node.child_by_field_name("name")
101
+ if not name_node:
102
+ return None
103
+ name = self.get_node_text(name_node)
104
+
105
+ # Build qualified name
106
+ if class_name:
107
+ qualified_name = f"{self.module_qn}.{class_name}.{name}"
108
+ else:
109
+ qualified_name = f"{self.module_qn}.{name}"
110
+
111
+ # Get line numbers
112
+ start_line, end_line = self.get_node_lines(node)
113
+
114
+ # Get parameters
115
+ parameters = self._extract_parameters(node)
116
+
117
+ # Build signature
118
+ params_node = node.child_by_field_name("parameters")
119
+ params_text = self.get_node_text(params_node) if params_node else "()"
120
+
121
+ return_type_node = node.child_by_field_name("return_type")
122
+ return_type = self.get_node_text(return_type_node) if return_type_node else None
123
+
124
+ if return_type:
125
+ signature = f"def {name}{params_text} -> {return_type}"
126
+ else:
127
+ signature = f"def {name}{params_text}"
128
+
129
+ # Check if async
130
+ is_async = self._is_async_function(node)
131
+ if is_async:
132
+ signature = "async " + signature
133
+
134
+ # Get docstring
135
+ docstring = self._extract_docstring(node)
136
+
137
+ # Get decorators
138
+ decorators = self._extract_decorators(node, decorator_parent)
139
+
140
+ # Get source code
141
+ source_code = self.get_node_text(node)
142
+
143
+ return ExtractedFunction(
144
+ name=name,
145
+ qualified_name=qualified_name,
146
+ start_line=start_line,
147
+ end_line=end_line,
148
+ signature=signature,
149
+ docstring=docstring,
150
+ decorators=decorators,
151
+ is_async=is_async,
152
+ is_method=class_name is not None,
153
+ class_name=class_name,
154
+ parameters=parameters,
155
+ return_type=return_type,
156
+ source_code=source_code,
157
+ node=node,
158
+ )
159
+
160
+ def _extract_parameters(self, func_node: Node) -> list[str]:
161
+ """Extract parameter names from function."""
162
+ parameters: list[str] = []
163
+ params_node = func_node.child_by_field_name("parameters")
164
+ if not params_node:
165
+ return parameters
166
+
167
+ for child in params_node.children:
168
+ if child.type == "identifier":
169
+ parameters.append(self.get_node_text(child))
170
+ elif child.type in (
171
+ "typed_parameter",
172
+ "default_parameter",
173
+ "typed_default_parameter",
174
+ ):
175
+ # Get the name part
176
+ name_node = child.child_by_field_name("name")
177
+ if name_node:
178
+ parameters.append(self.get_node_text(name_node))
179
+ elif child.children and child.children[0].type == "identifier":
180
+ parameters.append(self.get_node_text(child.children[0]))
181
+ elif child.type == "list_splat_pattern":
182
+ # *args
183
+ for c in child.children:
184
+ if c.type == "identifier":
185
+ parameters.append("*" + self.get_node_text(c))
186
+ elif child.type == "dictionary_splat_pattern":
187
+ # **kwargs
188
+ for c in child.children:
189
+ if c.type == "identifier":
190
+ parameters.append("**" + self.get_node_text(c))
191
+
192
+ return parameters
193
+
194
+ def _is_async_function(self, node: Node) -> bool:
195
+ """Check if function is async."""
196
+ if node.children:
197
+ first_child = node.children[0]
198
+ if first_child.type == "async":
199
+ return True
200
+ return False
201
+
202
+ def _extract_docstring(self, node: Node) -> str | None:
203
+ """Extract docstring from function or class."""
204
+ body_node = node.child_by_field_name("body")
205
+ if not body_node or not body_node.children:
206
+ return None
207
+
208
+ # First statement in body
209
+ first_stmt = body_node.children[0]
210
+ if first_stmt.type == "expression_statement":
211
+ if first_stmt.children:
212
+ expr = first_stmt.children[0]
213
+ if expr.type == "string":
214
+ docstring = self.get_node_text(expr)
215
+ # Strip quotes
216
+ if docstring.startswith('"""') or docstring.startswith("'''"):
217
+ return docstring[3:-3].strip()
218
+ elif docstring.startswith('"') or docstring.startswith("'"):
219
+ return docstring[1:-1].strip()
220
+ return None
221
+
222
+ def _extract_decorators(
223
+ self, node: Node, decorator_parent: Node | None
224
+ ) -> list[str]:
225
+ """Extract decorators from a function or class."""
226
+ decorators: list[str] = []
227
+
228
+ # If we have a decorated_definition parent, get decorators from there
229
+ parent = decorator_parent or node.parent
230
+ if parent and parent.type == "decorated_definition":
231
+ for child in parent.children:
232
+ if child.type == "decorator":
233
+ dec_text = self.get_node_text(child)
234
+ decorators.append(dec_text)
235
+
236
+ return decorators
237
+
238
+ def extract_classes(self, root: Node) -> list[ExtractedClass]:
239
+ """Extract all classes from the AST."""
240
+ classes: list[ExtractedClass] = []
241
+ self._extract_classes_recursive(root, classes)
242
+ return classes
243
+
244
+ def _extract_classes_recursive(
245
+ self, node: Node, classes: list[ExtractedClass]
246
+ ) -> None:
247
+ """Recursively extract classes."""
248
+ for child in node.children:
249
+ if child.type == "class_definition":
250
+ cls = self._extract_class(child)
251
+ if cls:
252
+ classes.append(cls)
253
+ elif child.type == "decorated_definition":
254
+ class_node = self._get_decorated_class(child)
255
+ if class_node:
256
+ cls = self._extract_class(class_node, decorator_parent=child)
257
+ if cls:
258
+ classes.append(cls)
259
+ else:
260
+ self._extract_classes_recursive(child, classes)
261
+
262
+ def _extract_class(
263
+ self, node: Node, decorator_parent: Node | None = None
264
+ ) -> ExtractedClass | None:
265
+ """Extract information from a class_definition node."""
266
+ # Get class name
267
+ name_node = node.child_by_field_name("name")
268
+ if not name_node:
269
+ return None
270
+ name = self.get_node_text(name_node)
271
+
272
+ qualified_name = f"{self.module_qn}.{name}"
273
+ start_line, end_line = self.get_node_lines(node)
274
+
275
+ # Get base classes
276
+ bases = self._extract_bases(node)
277
+
278
+ # Get docstring
279
+ docstring = self._extract_docstring(node)
280
+
281
+ # Get decorators
282
+ decorators = self._extract_decorators(node, decorator_parent)
283
+
284
+ # Get methods
285
+ methods = self._extract_methods(node, name)
286
+
287
+ # Get source code
288
+ source_code = self.get_node_text(node)
289
+
290
+ return ExtractedClass(
291
+ name=name,
292
+ qualified_name=qualified_name,
293
+ start_line=start_line,
294
+ end_line=end_line,
295
+ docstring=docstring,
296
+ decorators=decorators,
297
+ bases=bases,
298
+ methods=methods,
299
+ source_code=source_code,
300
+ node=node,
301
+ )
302
+
303
+ def _extract_bases(self, class_node: Node) -> list[str]:
304
+ """Extract base class names."""
305
+ bases: list[str] = []
306
+ superclasses = class_node.child_by_field_name("superclasses")
307
+ if superclasses:
308
+ # argument_list contains the base classes
309
+ for child in superclasses.children:
310
+ if child.type == "identifier":
311
+ bases.append(self.get_node_text(child))
312
+ elif child.type == "attribute":
313
+ bases.append(self.get_node_text(child))
314
+ return bases
315
+
316
+ def _extract_methods(
317
+ self, class_node: Node, class_name: str
318
+ ) -> list[ExtractedFunction]:
319
+ """Extract methods from a class."""
320
+ methods: list[ExtractedFunction] = []
321
+ body_node = class_node.child_by_field_name("body")
322
+ if not body_node:
323
+ return methods
324
+
325
+ for child in body_node.children:
326
+ if child.type == "function_definition":
327
+ method = self._extract_function(child, class_name=class_name)
328
+ if method:
329
+ methods.append(method)
330
+ elif child.type == "decorated_definition":
331
+ func_node = self._get_decorated_function(child)
332
+ if func_node:
333
+ method = self._extract_function(
334
+ func_node, class_name=class_name, decorator_parent=child
335
+ )
336
+ if method:
337
+ methods.append(method)
338
+
339
+ return methods
340
+
341
+ def extract_calls(self, node: Node, caller_qn: str) -> list[ExtractedCall]:
342
+ """Extract function calls from within a function/method node."""
343
+ calls: list[ExtractedCall] = []
344
+ self._extract_calls_recursive(node, caller_qn, calls)
345
+ return calls
346
+
347
+ def _extract_calls_recursive(
348
+ self, node: Node, caller_qn: str, calls: list[ExtractedCall]
349
+ ) -> None:
350
+ """Recursively extract call expressions."""
351
+ if node.type == "call":
352
+ call = self._extract_single_call(node, caller_qn)
353
+ if call:
354
+ calls.append(call)
355
+
356
+ for child in node.children:
357
+ # Don't recurse into nested function definitions
358
+ if child.type not in ("function_definition", "class_definition"):
359
+ self._extract_calls_recursive(child, caller_qn, calls)
360
+
361
+ def _extract_single_call(
362
+ self, call_node: Node, caller_qn: str
363
+ ) -> ExtractedCall | None:
364
+ """Extract a single call expression."""
365
+ func_node = call_node.child_by_field_name("function")
366
+ if not func_node:
367
+ return None
368
+
369
+ line = call_node.start_point[0] + 1
370
+ call_text = self.get_node_text(func_node)
371
+
372
+ # Determine if it's a method call (obj.method)
373
+ receiver = None
374
+ name = call_text
375
+
376
+ if func_node.type == "attribute":
377
+ # Method call: obj.method()
378
+ obj_node = func_node.child_by_field_name("object")
379
+ attr_node = func_node.child_by_field_name("attribute")
380
+ if obj_node and attr_node:
381
+ receiver = self.get_node_text(obj_node)
382
+ name = self.get_node_text(attr_node)
383
+
384
+ # Extract arguments (simplified - just the text)
385
+ arguments: list[str] = []
386
+ args_node = call_node.child_by_field_name("arguments")
387
+ if args_node:
388
+ for child in args_node.children:
389
+ if child.type not in ("(", ")", ","):
390
+ arguments.append(self.get_node_text(child))
391
+
392
+ return ExtractedCall(
393
+ name=name,
394
+ line=line,
395
+ caller_qualified_name=caller_qn,
396
+ receiver=receiver,
397
+ arguments=arguments,
398
+ )
399
+
400
+ def extract_imports(self, root: Node) -> list[ExtractedImport]:
401
+ """Extract all import statements."""
402
+ imports: list[ExtractedImport] = []
403
+ self._extract_imports_recursive(root, imports)
404
+ return imports
405
+
406
+ def _extract_imports_recursive(
407
+ self, node: Node, imports: list[ExtractedImport]
408
+ ) -> None:
409
+ """Recursively extract import statements."""
410
+ if node.type == "import_statement":
411
+ self._extract_import_statement(node, imports)
412
+ elif node.type == "import_from_statement":
413
+ self._extract_import_from_statement(node, imports)
414
+ else:
415
+ for child in node.children:
416
+ self._extract_imports_recursive(child, imports)
417
+
418
+ def _extract_import_statement(
419
+ self, node: Node, imports: list[ExtractedImport]
420
+ ) -> None:
421
+ """Extract 'import x' or 'import x as y' statements."""
422
+ line = node.start_point[0] + 1
423
+
424
+ for child in node.children:
425
+ if child.type == "dotted_name":
426
+ module = self.get_node_text(child)
427
+ imports.append(ExtractedImport(module=module, line=line))
428
+ elif child.type == "aliased_import":
429
+ # import x as y
430
+ name_node = child.child_by_field_name("name")
431
+ alias_node = child.child_by_field_name("alias")
432
+ if name_node:
433
+ module = self.get_node_text(name_node)
434
+ alias = self.get_node_text(alias_node) if alias_node else None
435
+ imports.append(
436
+ ExtractedImport(module=module, alias=alias, line=line)
437
+ )
438
+
439
+ def _extract_import_from_statement(
440
+ self, node: Node, imports: list[ExtractedImport]
441
+ ) -> None:
442
+ """Extract 'from x import y' statements."""
443
+ line = node.start_point[0] + 1
444
+
445
+ # Get module name
446
+ module_name = None
447
+ for child in node.children:
448
+ if child.type == "dotted_name":
449
+ module_name = self.get_node_text(child)
450
+ break
451
+ elif child.type == "relative_import":
452
+ module_name = self.get_node_text(child)
453
+ break
454
+
455
+ if not module_name:
456
+ return
457
+
458
+ # Get imported names
459
+ for child in node.children:
460
+ if child.type == "wildcard_import":
461
+ imports.append(
462
+ ExtractedImport(module=module_name, is_wildcard=True, line=line)
463
+ )
464
+ elif child.type == "dotted_name" and child != node.children[1]:
465
+ # Named import without alias
466
+ name = self.get_node_text(child)
467
+ imports.append(
468
+ ExtractedImport(module=module_name, name=name, line=line)
469
+ )
470
+ elif child.type == "aliased_import":
471
+ # from x import y as z
472
+ name_node = child.child_by_field_name("name")
473
+ alias_node = child.child_by_field_name("alias")
474
+ if name_node:
475
+ name = self.get_node_text(name_node)
476
+ alias = self.get_node_text(alias_node) if alias_node else None
477
+ imports.append(
478
+ ExtractedImport(
479
+ module=module_name, name=name, alias=alias, line=line
480
+ )
481
+ )
482
+ elif child.type == "import_prefix":
483
+ # This is the "from" keyword, skip
484
+ continue