agent-security-scanner-mcp 1.5.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/generic_ast.py ADDED
@@ -0,0 +1,572 @@
1
+ """
2
+ Generic AST Module - Cross-Language AST Normalization
3
+
4
+ Defines a common AST representation that normalizes language-specific
5
+ tree-sitter nodes into a unified format for pattern matching.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import List, Optional, Dict, Any
10
+ from enum import Enum
11
+
12
+
13
+ class NodeKind(Enum):
14
+ """Generic node kinds for cross-language matching"""
15
+ # Top-level
16
+ MODULE = "module"
17
+
18
+ # Declarations
19
+ FUNCTION_DEF = "function_def"
20
+ CLASS_DEF = "class_def"
21
+ VARIABLE_DEF = "variable_def"
22
+ PARAMETER = "parameter"
23
+
24
+ # Statements
25
+ ASSIGNMENT = "assignment"
26
+ RETURN = "return"
27
+ IF = "if"
28
+ FOR = "for"
29
+ WHILE = "while"
30
+ TRY = "try"
31
+ WITH = "with"
32
+ IMPORT = "import"
33
+ EXPRESSION_STMT = "expression_stmt"
34
+
35
+ # Expressions
36
+ CALL = "call"
37
+ ATTRIBUTE = "attribute"
38
+ SUBSCRIPT = "subscript"
39
+ BINARY_OP = "binary_op"
40
+ UNARY_OP = "unary_op"
41
+ COMPARISON = "comparison"
42
+
43
+ # Literals
44
+ STRING = "string"
45
+ NUMBER = "number"
46
+ BOOLEAN = "boolean"
47
+ NONE = "none"
48
+ LIST = "list"
49
+ DICT = "dict"
50
+
51
+ # Identifiers
52
+ IDENTIFIER = "identifier"
53
+
54
+ # Other
55
+ COMMENT = "comment"
56
+ BLOCK = "block"
57
+ ARGUMENT = "argument"
58
+ UNKNOWN = "unknown"
59
+
60
+
61
+ @dataclass
62
+ class GenericNode:
63
+ """
64
+ Normalized AST node that works across all languages.
65
+
66
+ Attributes:
67
+ kind: The generic node type
68
+ text: The source text of this node
69
+ children: Child nodes
70
+ line: 1-indexed line number
71
+ column: 0-indexed column number
72
+ end_line: End line number
73
+ end_column: End column number
74
+ metadata: Additional language-specific info
75
+ """
76
+ kind: NodeKind
77
+ text: str
78
+ children: List['GenericNode'] = field(default_factory=list)
79
+ line: int = 0
80
+ column: int = 0
81
+ end_line: int = 0
82
+ end_column: int = 0
83
+ metadata: Dict[str, Any] = field(default_factory=dict)
84
+
85
+ # Named child accessors for pattern matching
86
+ name: Optional[str] = None
87
+ value: Optional['GenericNode'] = None
88
+ target: Optional['GenericNode'] = None
89
+ args: List['GenericNode'] = field(default_factory=list)
90
+ operator: Optional[str] = None
91
+
92
+ def find_all(self, kind: NodeKind) -> List['GenericNode']:
93
+ """Find all descendant nodes of a specific kind"""
94
+ results = []
95
+ if self.kind == kind:
96
+ results.append(self)
97
+ for child in self.children:
98
+ results.extend(child.find_all(kind))
99
+ return results
100
+
101
+ def find_first(self, kind: NodeKind) -> Optional['GenericNode']:
102
+ """Find first descendant node of a specific kind"""
103
+ if self.kind == kind:
104
+ return self
105
+ for child in self.children:
106
+ result = child.find_first(kind)
107
+ if result:
108
+ return result
109
+ return None
110
+
111
+ def __repr__(self):
112
+ return f"GenericNode({self.kind.value}, {self.text[:30]!r}...)" if len(self.text) > 30 else f"GenericNode({self.kind.value}, {self.text!r})"
113
+
114
+
115
+ # Language-specific node type mappings to generic kinds
116
+ PYTHON_NODE_MAP = {
117
+ 'module': NodeKind.MODULE,
118
+ 'function_definition': NodeKind.FUNCTION_DEF,
119
+ 'class_definition': NodeKind.CLASS_DEF,
120
+ 'assignment': NodeKind.ASSIGNMENT,
121
+ 'augmented_assignment': NodeKind.ASSIGNMENT,
122
+ 'return_statement': NodeKind.RETURN,
123
+ 'if_statement': NodeKind.IF,
124
+ 'for_statement': NodeKind.FOR,
125
+ 'while_statement': NodeKind.WHILE,
126
+ 'try_statement': NodeKind.TRY,
127
+ 'with_statement': NodeKind.WITH,
128
+ 'import_statement': NodeKind.IMPORT,
129
+ 'import_from_statement': NodeKind.IMPORT,
130
+ 'expression_statement': NodeKind.EXPRESSION_STMT,
131
+ 'call': NodeKind.CALL,
132
+ 'attribute': NodeKind.ATTRIBUTE,
133
+ 'subscript': NodeKind.SUBSCRIPT,
134
+ 'binary_operator': NodeKind.BINARY_OP,
135
+ 'unary_operator': NodeKind.UNARY_OP,
136
+ 'comparison_operator': NodeKind.COMPARISON,
137
+ 'string': NodeKind.STRING,
138
+ 'integer': NodeKind.NUMBER,
139
+ 'float': NodeKind.NUMBER,
140
+ 'true': NodeKind.BOOLEAN,
141
+ 'false': NodeKind.BOOLEAN,
142
+ 'none': NodeKind.NONE,
143
+ 'list': NodeKind.LIST,
144
+ 'dictionary': NodeKind.DICT,
145
+ 'identifier': NodeKind.IDENTIFIER,
146
+ 'comment': NodeKind.COMMENT,
147
+ 'block': NodeKind.BLOCK,
148
+ 'parameters': NodeKind.PARAMETER,
149
+ 'argument_list': NodeKind.ARGUMENT,
150
+ }
151
+
152
+ JAVASCRIPT_NODE_MAP = {
153
+ 'program': NodeKind.MODULE,
154
+ 'function_declaration': NodeKind.FUNCTION_DEF,
155
+ 'arrow_function': NodeKind.FUNCTION_DEF,
156
+ 'method_definition': NodeKind.FUNCTION_DEF,
157
+ 'class_declaration': NodeKind.CLASS_DEF,
158
+ 'variable_declaration': NodeKind.VARIABLE_DEF,
159
+ 'lexical_declaration': NodeKind.VARIABLE_DEF,
160
+ 'assignment_expression': NodeKind.ASSIGNMENT,
161
+ 'return_statement': NodeKind.RETURN,
162
+ 'if_statement': NodeKind.IF,
163
+ 'for_statement': NodeKind.FOR,
164
+ 'for_in_statement': NodeKind.FOR,
165
+ 'for_of_statement': NodeKind.FOR,
166
+ 'while_statement': NodeKind.WHILE,
167
+ 'try_statement': NodeKind.TRY,
168
+ 'import_statement': NodeKind.IMPORT,
169
+ 'expression_statement': NodeKind.EXPRESSION_STMT,
170
+ 'call_expression': NodeKind.CALL,
171
+ 'member_expression': NodeKind.ATTRIBUTE,
172
+ 'subscript_expression': NodeKind.SUBSCRIPT,
173
+ 'binary_expression': NodeKind.BINARY_OP,
174
+ 'unary_expression': NodeKind.UNARY_OP,
175
+ 'string': NodeKind.STRING,
176
+ 'template_string': NodeKind.STRING,
177
+ 'number': NodeKind.NUMBER,
178
+ 'true': NodeKind.BOOLEAN,
179
+ 'false': NodeKind.BOOLEAN,
180
+ 'null': NodeKind.NONE,
181
+ 'undefined': NodeKind.NONE,
182
+ 'array': NodeKind.LIST,
183
+ 'object': NodeKind.DICT,
184
+ 'identifier': NodeKind.IDENTIFIER,
185
+ 'property_identifier': NodeKind.IDENTIFIER,
186
+ 'comment': NodeKind.COMMENT,
187
+ 'statement_block': NodeKind.BLOCK,
188
+ 'arguments': NodeKind.ARGUMENT,
189
+ }
190
+
191
+ JAVA_NODE_MAP = {
192
+ 'program': NodeKind.MODULE,
193
+ 'method_declaration': NodeKind.FUNCTION_DEF,
194
+ 'constructor_declaration': NodeKind.FUNCTION_DEF,
195
+ 'class_declaration': NodeKind.CLASS_DEF,
196
+ 'interface_declaration': NodeKind.CLASS_DEF,
197
+ 'local_variable_declaration': NodeKind.VARIABLE_DEF,
198
+ 'field_declaration': NodeKind.VARIABLE_DEF,
199
+ 'assignment_expression': NodeKind.ASSIGNMENT,
200
+ 'return_statement': NodeKind.RETURN,
201
+ 'if_statement': NodeKind.IF,
202
+ 'for_statement': NodeKind.FOR,
203
+ 'enhanced_for_statement': NodeKind.FOR,
204
+ 'while_statement': NodeKind.WHILE,
205
+ 'try_statement': NodeKind.TRY,
206
+ 'import_declaration': NodeKind.IMPORT,
207
+ 'expression_statement': NodeKind.EXPRESSION_STMT,
208
+ 'method_invocation': NodeKind.CALL,
209
+ 'field_access': NodeKind.ATTRIBUTE,
210
+ 'array_access': NodeKind.SUBSCRIPT,
211
+ 'binary_expression': NodeKind.BINARY_OP,
212
+ 'unary_expression': NodeKind.UNARY_OP,
213
+ 'string_literal': NodeKind.STRING,
214
+ 'decimal_integer_literal': NodeKind.NUMBER,
215
+ 'decimal_floating_point_literal': NodeKind.NUMBER,
216
+ 'true': NodeKind.BOOLEAN,
217
+ 'false': NodeKind.BOOLEAN,
218
+ 'null_literal': NodeKind.NONE,
219
+ 'array_initializer': NodeKind.LIST,
220
+ 'identifier': NodeKind.IDENTIFIER,
221
+ 'line_comment': NodeKind.COMMENT,
222
+ 'block_comment': NodeKind.COMMENT,
223
+ 'block': NodeKind.BLOCK,
224
+ 'argument_list': NodeKind.ARGUMENT,
225
+ }
226
+
227
+ # C language node map (tree-sitter-c)
228
+ C_NODE_MAP = {
229
+ 'translation_unit': NodeKind.MODULE,
230
+ 'function_definition': NodeKind.FUNCTION_DEF,
231
+ 'declaration': NodeKind.VARIABLE_DEF,
232
+ 'call_expression': NodeKind.CALL,
233
+ 'assignment_expression': NodeKind.ASSIGNMENT,
234
+ 'return_statement': NodeKind.RETURN,
235
+ 'if_statement': NodeKind.IF,
236
+ 'for_statement': NodeKind.FOR,
237
+ 'while_statement': NodeKind.WHILE,
238
+ 'expression_statement': NodeKind.EXPRESSION_STMT,
239
+ 'binary_expression': NodeKind.BINARY_OP,
240
+ 'unary_expression': NodeKind.UNARY_OP,
241
+ 'string_literal': NodeKind.STRING,
242
+ 'number_literal': NodeKind.NUMBER,
243
+ 'char_literal': NodeKind.STRING,
244
+ 'true': NodeKind.BOOLEAN,
245
+ 'false': NodeKind.BOOLEAN,
246
+ 'null': NodeKind.NONE,
247
+ 'identifier': NodeKind.IDENTIFIER,
248
+ 'comment': NodeKind.COMMENT,
249
+ 'compound_statement': NodeKind.BLOCK,
250
+ 'argument_list': NodeKind.ARGUMENT,
251
+ 'field_expression': NodeKind.ATTRIBUTE,
252
+ 'subscript_expression': NodeKind.SUBSCRIPT,
253
+ }
254
+
255
+ # PHP language node map (tree-sitter-php)
256
+ PHP_NODE_MAP = {
257
+ 'program': NodeKind.MODULE,
258
+ 'function_definition': NodeKind.FUNCTION_DEF,
259
+ 'method_declaration': NodeKind.FUNCTION_DEF,
260
+ 'class_declaration': NodeKind.CLASS_DEF,
261
+ 'property_declaration': NodeKind.VARIABLE_DEF,
262
+ 'simple_parameter': NodeKind.PARAMETER,
263
+ 'function_call_expression': NodeKind.CALL,
264
+ 'method_call_expression': NodeKind.CALL,
265
+ 'member_call_expression': NodeKind.CALL,
266
+ 'assignment_expression': NodeKind.ASSIGNMENT,
267
+ 'return_statement': NodeKind.RETURN,
268
+ 'if_statement': NodeKind.IF,
269
+ 'for_statement': NodeKind.FOR,
270
+ 'foreach_statement': NodeKind.FOR,
271
+ 'while_statement': NodeKind.WHILE,
272
+ 'try_statement': NodeKind.TRY,
273
+ 'expression_statement': NodeKind.EXPRESSION_STMT,
274
+ 'binary_expression': NodeKind.BINARY_OP,
275
+ 'unary_op_expression': NodeKind.UNARY_OP,
276
+ 'encapsed_string': NodeKind.STRING,
277
+ 'string': NodeKind.STRING,
278
+ 'integer': NodeKind.NUMBER,
279
+ 'float': NodeKind.NUMBER,
280
+ 'boolean': NodeKind.BOOLEAN,
281
+ 'null': NodeKind.NONE,
282
+ 'name': NodeKind.IDENTIFIER,
283
+ 'variable_name': NodeKind.IDENTIFIER,
284
+ 'comment': NodeKind.COMMENT,
285
+ 'compound_statement': NodeKind.BLOCK,
286
+ 'arguments': NodeKind.ARGUMENT,
287
+ 'member_access_expression': NodeKind.ATTRIBUTE,
288
+ 'subscript_expression': NodeKind.SUBSCRIPT,
289
+ }
290
+
291
+ # Ruby language node map (tree-sitter-ruby)
292
+ RUBY_NODE_MAP = {
293
+ 'program': NodeKind.MODULE,
294
+ 'method': NodeKind.FUNCTION_DEF,
295
+ 'singleton_method': NodeKind.FUNCTION_DEF,
296
+ 'class': NodeKind.CLASS_DEF,
297
+ 'module': NodeKind.MODULE,
298
+ 'assignment': NodeKind.ASSIGNMENT,
299
+ 'call': NodeKind.CALL,
300
+ 'method_call': NodeKind.CALL,
301
+ 'return': NodeKind.RETURN,
302
+ 'if': NodeKind.IF,
303
+ 'unless': NodeKind.IF,
304
+ 'for': NodeKind.FOR,
305
+ 'while': NodeKind.WHILE,
306
+ 'until': NodeKind.WHILE,
307
+ 'begin': NodeKind.TRY,
308
+ 'binary': NodeKind.BINARY_OP,
309
+ 'unary': NodeKind.UNARY_OP,
310
+ 'string': NodeKind.STRING,
311
+ 'integer': NodeKind.NUMBER,
312
+ 'float': NodeKind.NUMBER,
313
+ 'true': NodeKind.BOOLEAN,
314
+ 'false': NodeKind.BOOLEAN,
315
+ 'nil': NodeKind.NONE,
316
+ 'identifier': NodeKind.IDENTIFIER,
317
+ 'constant': NodeKind.IDENTIFIER,
318
+ 'comment': NodeKind.COMMENT,
319
+ 'do_block': NodeKind.BLOCK,
320
+ 'block': NodeKind.BLOCK,
321
+ 'argument_list': NodeKind.ARGUMENT,
322
+ 'element_reference': NodeKind.SUBSCRIPT,
323
+ }
324
+
325
+ # Go language node map (tree-sitter-go)
326
+ GO_NODE_MAP = {
327
+ 'source_file': NodeKind.MODULE,
328
+ 'function_declaration': NodeKind.FUNCTION_DEF,
329
+ 'method_declaration': NodeKind.FUNCTION_DEF,
330
+ 'type_declaration': NodeKind.CLASS_DEF,
331
+ 'short_var_declaration': NodeKind.VARIABLE_DEF,
332
+ 'var_declaration': NodeKind.VARIABLE_DEF,
333
+ 'assignment_statement': NodeKind.ASSIGNMENT,
334
+ 'call_expression': NodeKind.CALL,
335
+ 'return_statement': NodeKind.RETURN,
336
+ 'if_statement': NodeKind.IF,
337
+ 'for_statement': NodeKind.FOR,
338
+ 'expression_statement': NodeKind.EXPRESSION_STMT,
339
+ 'binary_expression': NodeKind.BINARY_OP,
340
+ 'unary_expression': NodeKind.UNARY_OP,
341
+ 'interpreted_string_literal': NodeKind.STRING,
342
+ 'raw_string_literal': NodeKind.STRING,
343
+ 'int_literal': NodeKind.NUMBER,
344
+ 'float_literal': NodeKind.NUMBER,
345
+ 'true': NodeKind.BOOLEAN,
346
+ 'false': NodeKind.BOOLEAN,
347
+ 'nil': NodeKind.NONE,
348
+ 'identifier': NodeKind.IDENTIFIER,
349
+ 'comment': NodeKind.COMMENT,
350
+ 'block': NodeKind.BLOCK,
351
+ 'argument_list': NodeKind.ARGUMENT,
352
+ 'selector_expression': NodeKind.ATTRIBUTE,
353
+ 'index_expression': NodeKind.SUBSCRIPT,
354
+ }
355
+
356
+ # Rust language node map (tree-sitter-rust)
357
+ RUST_NODE_MAP = {
358
+ 'source_file': NodeKind.MODULE,
359
+ 'function_item': NodeKind.FUNCTION_DEF,
360
+ 'impl_item': NodeKind.CLASS_DEF,
361
+ 'struct_item': NodeKind.CLASS_DEF,
362
+ 'let_declaration': NodeKind.VARIABLE_DEF,
363
+ 'assignment_expression': NodeKind.ASSIGNMENT,
364
+ 'call_expression': NodeKind.CALL,
365
+ 'return_expression': NodeKind.RETURN,
366
+ 'if_expression': NodeKind.IF,
367
+ 'for_expression': NodeKind.FOR,
368
+ 'while_expression': NodeKind.WHILE,
369
+ 'loop_expression': NodeKind.WHILE,
370
+ 'expression_statement': NodeKind.EXPRESSION_STMT,
371
+ 'binary_expression': NodeKind.BINARY_OP,
372
+ 'unary_expression': NodeKind.UNARY_OP,
373
+ 'string_literal': NodeKind.STRING,
374
+ 'raw_string_literal': NodeKind.STRING,
375
+ 'integer_literal': NodeKind.NUMBER,
376
+ 'float_literal': NodeKind.NUMBER,
377
+ 'boolean_literal': NodeKind.BOOLEAN,
378
+ 'identifier': NodeKind.IDENTIFIER,
379
+ 'line_comment': NodeKind.COMMENT,
380
+ 'block_comment': NodeKind.COMMENT,
381
+ 'block': NodeKind.BLOCK,
382
+ 'arguments': NodeKind.ARGUMENT,
383
+ 'field_expression': NodeKind.ATTRIBUTE,
384
+ 'index_expression': NodeKind.SUBSCRIPT,
385
+ }
386
+
387
+ # C# language node map (tree-sitter-c-sharp)
388
+ CSHARP_NODE_MAP = {
389
+ 'compilation_unit': NodeKind.MODULE,
390
+ 'method_declaration': NodeKind.FUNCTION_DEF,
391
+ 'constructor_declaration': NodeKind.FUNCTION_DEF,
392
+ 'class_declaration': NodeKind.CLASS_DEF,
393
+ 'interface_declaration': NodeKind.CLASS_DEF,
394
+ 'variable_declaration': NodeKind.VARIABLE_DEF,
395
+ 'assignment_expression': NodeKind.ASSIGNMENT,
396
+ 'invocation_expression': NodeKind.CALL,
397
+ 'return_statement': NodeKind.RETURN,
398
+ 'if_statement': NodeKind.IF,
399
+ 'for_statement': NodeKind.FOR,
400
+ 'foreach_statement': NodeKind.FOR,
401
+ 'while_statement': NodeKind.WHILE,
402
+ 'try_statement': NodeKind.TRY,
403
+ 'expression_statement': NodeKind.EXPRESSION_STMT,
404
+ 'binary_expression': NodeKind.BINARY_OP,
405
+ 'prefix_unary_expression': NodeKind.UNARY_OP,
406
+ 'string_literal': NodeKind.STRING,
407
+ 'interpolated_string_expression': NodeKind.STRING,
408
+ 'integer_literal': NodeKind.NUMBER,
409
+ 'real_literal': NodeKind.NUMBER,
410
+ 'boolean_literal': NodeKind.BOOLEAN,
411
+ 'null_literal': NodeKind.NONE,
412
+ 'identifier': NodeKind.IDENTIFIER,
413
+ 'comment': NodeKind.COMMENT,
414
+ 'block': NodeKind.BLOCK,
415
+ 'argument_list': NodeKind.ARGUMENT,
416
+ 'member_access_expression': NodeKind.ATTRIBUTE,
417
+ 'element_access_expression': NodeKind.SUBSCRIPT,
418
+ }
419
+
420
+ # Generic mapping for languages not specifically mapped
421
+ GENERIC_NODE_MAP = {
422
+ 'source_file': NodeKind.MODULE,
423
+ 'program': NodeKind.MODULE,
424
+ 'module': NodeKind.MODULE,
425
+ 'function': NodeKind.FUNCTION_DEF,
426
+ 'method': NodeKind.FUNCTION_DEF,
427
+ 'class': NodeKind.CLASS_DEF,
428
+ 'call': NodeKind.CALL,
429
+ 'call_expression': NodeKind.CALL,
430
+ 'string': NodeKind.STRING,
431
+ 'string_literal': NodeKind.STRING,
432
+ 'identifier': NodeKind.IDENTIFIER,
433
+ 'comment': NodeKind.COMMENT,
434
+ }
435
+
436
+ # Combined language map
437
+ LANGUAGE_NODE_MAPS = {
438
+ 'python': PYTHON_NODE_MAP,
439
+ 'javascript': JAVASCRIPT_NODE_MAP,
440
+ 'typescript': JAVASCRIPT_NODE_MAP,
441
+ 'tsx': JAVASCRIPT_NODE_MAP,
442
+ 'java': JAVA_NODE_MAP,
443
+ 'c': C_NODE_MAP,
444
+ 'cpp': C_NODE_MAP, # C++ uses similar structure
445
+ 'php': PHP_NODE_MAP,
446
+ 'ruby': RUBY_NODE_MAP,
447
+ 'go': GO_NODE_MAP,
448
+ 'rust': RUST_NODE_MAP,
449
+ 'csharp': CSHARP_NODE_MAP,
450
+ 'c_sharp': CSHARP_NODE_MAP,
451
+ }
452
+
453
+
454
+ class ASTConverter:
455
+ """
456
+ Converts tree-sitter AST to generic AST representation.
457
+ """
458
+
459
+ def __init__(self, language: str):
460
+ self.language = language
461
+ self.node_map = LANGUAGE_NODE_MAPS.get(language, GENERIC_NODE_MAP)
462
+
463
+ def convert(self, ts_node, source_bytes: bytes) -> GenericNode:
464
+ """Convert a tree-sitter node to a generic node"""
465
+ # Handle MockNode or standard tree-sitter node
466
+ node_type = getattr(ts_node, 'type', 'unknown')
467
+
468
+ # Get the generic kind for this node type
469
+ kind = self.node_map.get(node_type, NodeKind.UNKNOWN)
470
+
471
+ # Extract text
472
+ start_byte = getattr(ts_node, 'start_byte', 0)
473
+ end_byte = getattr(ts_node, 'end_byte', len(source_bytes))
474
+ text = source_bytes[start_byte:end_byte].decode('utf-8', errors='replace')
475
+
476
+ # Get position info
477
+ start_point = getattr(ts_node, 'start_point', (0, 0))
478
+ end_point = getattr(ts_node, 'end_point', (0, 0))
479
+
480
+ # Create generic node
481
+ node = GenericNode(
482
+ kind=kind,
483
+ text=text,
484
+ line=start_point[0] + 1, # Convert to 1-indexed
485
+ column=start_point[1],
486
+ end_line=end_point[0] + 1,
487
+ end_column=end_point[1],
488
+ metadata={'ts_type': node_type}
489
+ )
490
+
491
+ # Convert children
492
+ for child in ts_node.children:
493
+ node.children.append(self.convert(child, source_bytes))
494
+
495
+ # Extract named parts for common node types
496
+ self._extract_named_parts(node, ts_node, source_bytes)
497
+
498
+ return node
499
+
500
+ def _extract_named_parts(self, node: GenericNode, ts_node, source_bytes: bytes):
501
+ """Extract named parts (name, args, etc.) for pattern matching"""
502
+
503
+ # For function calls, extract function name and arguments
504
+ if node.kind == NodeKind.CALL:
505
+ for child in ts_node.children:
506
+ child_text = source_bytes[child.start_byte:child.end_byte].decode('utf-8')
507
+ if child.type in ('identifier', 'attribute', 'member_expression'):
508
+ node.name = child_text
509
+ elif child.type in ('argument_list', 'arguments'):
510
+ for arg_child in child.children:
511
+ if arg_child.type not in ('(', ')', ','):
512
+ node.args.append(self.convert(arg_child, source_bytes))
513
+
514
+ # For assignments, extract target and value
515
+ elif node.kind == NodeKind.ASSIGNMENT:
516
+ children = [c for c in ts_node.children if c.type not in ('=', ':=')]
517
+ if len(children) >= 2:
518
+ node.target = self.convert(children[0], source_bytes)
519
+ node.value = self.convert(children[-1], source_bytes)
520
+
521
+ # For binary operations, extract operator
522
+ elif node.kind == NodeKind.BINARY_OP:
523
+ for child in ts_node.children:
524
+ if child.type in ('+', '-', '*', '/', '%', '==', '!=', '<', '>', '<=', '>=', 'and', 'or', '&&', '||', '+'):
525
+ node.operator = source_bytes[child.start_byte:child.end_byte].decode('utf-8')
526
+
527
+ # For function definitions, extract name
528
+ elif node.kind == NodeKind.FUNCTION_DEF:
529
+ for child in ts_node.children:
530
+ if child.type == 'identifier' or child.type == 'name':
531
+ node.name = source_bytes[child.start_byte:child.end_byte].decode('utf-8')
532
+ break
533
+
534
+
535
+ def convert_tree(ts_tree, language: str, source_bytes: bytes) -> GenericNode:
536
+ """Convenience function to convert a tree-sitter tree to generic AST"""
537
+ converter = ASTConverter(language)
538
+ return converter.convert(ts_tree.root_node, source_bytes)
539
+
540
+
541
+ if __name__ == '__main__':
542
+ # Quick test with ast_parser
543
+ import sys
544
+ sys.path.insert(0, '.')
545
+
546
+ from ast_parser import ASTParser
547
+
548
+ if len(sys.argv) < 2:
549
+ print("Usage: python generic_ast.py <file_path>")
550
+ sys.exit(1)
551
+
552
+ parser = ASTParser()
553
+ result = parser.parse_file(sys.argv[1])
554
+
555
+ if result.success:
556
+ generic_root = convert_tree(result.tree, result.language, result.source_bytes)
557
+
558
+ print(f"Language: {result.language}")
559
+ print(f"Root: {generic_root}")
560
+ print(f"\nChildren ({len(generic_root.children)}):")
561
+
562
+ for i, child in enumerate(generic_root.children[:15]):
563
+ print(f" [{i}] {child.kind.value}: {child.text[:40]!r}...")
564
+
565
+ # Find all function calls
566
+ calls = generic_root.find_all(NodeKind.CALL)
567
+ if calls:
568
+ print(f"\nFunction calls found ({len(calls)}):")
569
+ for call in calls[:10]:
570
+ print(f" - {call.name or call.text[:30]} at line {call.line}")
571
+ else:
572
+ print(f"Error: {result.error}")