codegraph-gen 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_gen/__init__.py +0 -0
- codegraph_gen/__main__.py +311 -0
- codegraph_gen/ai.py +77 -0
- codegraph_gen/analyzer.py +100 -0
- codegraph_gen/builder.py +747 -0
- codegraph_gen/cluster.py +116 -0
- codegraph_gen/config.py +76 -0
- codegraph_gen/detect.py +59 -0
- codegraph_gen/engine.py +367 -0
- codegraph_gen/parser/__init__.py +27 -0
- codegraph_gen/parser/base.py +38 -0
- codegraph_gen/parser/cpp.py +349 -0
- codegraph_gen/parser/go.py +268 -0
- codegraph_gen/parser/javascript.py +370 -0
- codegraph_gen/parser/kotlin.py +387 -0
- codegraph_gen/parser/python.py +415 -0
- codegraph_gen/parser/rust.py +497 -0
- codegraph_gen/parser/swift.py +327 -0
- codegraph_gen/py.typed +0 -0
- codegraph_gen/renderer.py +498 -0
- codegraph_gen/writer.py +97 -0
- codegraph_gen-0.2.0.dist-info/METADATA +169 -0
- codegraph_gen-0.2.0.dist-info/RECORD +25 -0
- codegraph_gen-0.2.0.dist-info/WHEEL +4 -0
- codegraph_gen-0.2.0.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import tree_sitter
|
|
4
|
+
from codegraph_gen.parser.base import BaseParser, ExtractionResult, NodeSchema, EdgeSchema
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CCppParser(BaseParser):
|
|
10
|
+
def __init__(self, lang_module):
|
|
11
|
+
self.language = tree_sitter.Language(lang_module.language())
|
|
12
|
+
self.parser = tree_sitter.Parser(self.language)
|
|
13
|
+
|
|
14
|
+
def _get_declarator_name(self, node, source: bytes) -> str:
|
|
15
|
+
if not node:
|
|
16
|
+
return ""
|
|
17
|
+
if node.type in ("identifier", "field_identifier", "destructor_name"):
|
|
18
|
+
return source[node.start_byte : node.end_byte].decode(
|
|
19
|
+
"utf-8", errors="replace"
|
|
20
|
+
)
|
|
21
|
+
elif node.type in ("qualified_identifier", "operator_name"):
|
|
22
|
+
return source[node.start_byte : node.end_byte].decode(
|
|
23
|
+
"utf-8", errors="replace"
|
|
24
|
+
)
|
|
25
|
+
elif node.type in (
|
|
26
|
+
"pointer_declarator",
|
|
27
|
+
"reference_declarator",
|
|
28
|
+
"parenthesized_declarator",
|
|
29
|
+
"array_declarator",
|
|
30
|
+
):
|
|
31
|
+
decl = node.child_by_field_name("declarator")
|
|
32
|
+
if decl:
|
|
33
|
+
return self._get_declarator_name(decl, source)
|
|
34
|
+
elif node.type == "function_declarator":
|
|
35
|
+
decl = node.child_by_field_name("declarator")
|
|
36
|
+
if decl:
|
|
37
|
+
return self._get_declarator_name(decl, source)
|
|
38
|
+
# Search all children for identifier/qualified_identifier/etc.
|
|
39
|
+
for child in node.children:
|
|
40
|
+
name = self._get_declarator_name(child, source)
|
|
41
|
+
if name:
|
|
42
|
+
return name
|
|
43
|
+
return ""
|
|
44
|
+
|
|
45
|
+
def _get_docstring(self, node, source: bytes) -> str:
|
|
46
|
+
docstring = ""
|
|
47
|
+
prev = node.prev_sibling
|
|
48
|
+
comments = []
|
|
49
|
+
while prev and prev.type in ("comment", "line_comment", "block_comment"):
|
|
50
|
+
comment_text = source[prev.start_byte : prev.end_byte].decode(
|
|
51
|
+
"utf-8", errors="replace"
|
|
52
|
+
)
|
|
53
|
+
# Strip comment markers (//, /*, */, ///)
|
|
54
|
+
clean_text = comment_text.strip().lstrip("/").strip()
|
|
55
|
+
if clean_text.endswith("*/"):
|
|
56
|
+
clean_text = clean_text[:-2].strip()
|
|
57
|
+
if clean_text.startswith("/*"):
|
|
58
|
+
clean_text = clean_text[2:].strip()
|
|
59
|
+
comments.append(clean_text)
|
|
60
|
+
prev = prev.prev_sibling
|
|
61
|
+
|
|
62
|
+
if comments:
|
|
63
|
+
docstring = "\n".join(reversed(comments))
|
|
64
|
+
return docstring
|
|
65
|
+
|
|
66
|
+
def _get_signature(self, node, source: bytes) -> str:
|
|
67
|
+
body = node.child_by_field_name("body")
|
|
68
|
+
if body:
|
|
69
|
+
end_byte = body.start_byte
|
|
70
|
+
sig = (
|
|
71
|
+
source[node.start_byte : end_byte]
|
|
72
|
+
.decode("utf-8", errors="replace")
|
|
73
|
+
.strip()
|
|
74
|
+
)
|
|
75
|
+
if sig.endswith("{"):
|
|
76
|
+
sig = sig[:-1].strip()
|
|
77
|
+
return sig
|
|
78
|
+
return (
|
|
79
|
+
source[node.start_byte : node.end_byte]
|
|
80
|
+
.decode("utf-8", errors="replace")
|
|
81
|
+
.split("\n")[0]
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def parse_file(self, file_path: Path, workspace_dir: Path) -> ExtractionResult:
|
|
85
|
+
try:
|
|
86
|
+
source = file_path.read_bytes()
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error(f"Error reading file {file_path}: {e}")
|
|
89
|
+
return ExtractionResult()
|
|
90
|
+
|
|
91
|
+
tree = self.parser.parse(source)
|
|
92
|
+
root = tree.root_node
|
|
93
|
+
|
|
94
|
+
rel_path = str(file_path.relative_to(workspace_dir))
|
|
95
|
+
result = ExtractionResult()
|
|
96
|
+
defined_ids = set()
|
|
97
|
+
|
|
98
|
+
# Add file node
|
|
99
|
+
file_node_id = rel_path
|
|
100
|
+
result.nodes.append(
|
|
101
|
+
NodeSchema(
|
|
102
|
+
id=file_node_id,
|
|
103
|
+
label=file_path.name,
|
|
104
|
+
type="file",
|
|
105
|
+
source_file=rel_path,
|
|
106
|
+
line_start=1,
|
|
107
|
+
line_end=len(source.splitlines()) or 1,
|
|
108
|
+
signature=f"file {file_path.name}",
|
|
109
|
+
docstring=self._get_docstring(root, source),
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
defined_ids.add(file_node_id)
|
|
113
|
+
|
|
114
|
+
scope_stack = [(file_node_id, "file")]
|
|
115
|
+
|
|
116
|
+
def get_current_parent_id():
|
|
117
|
+
return scope_stack[-1][0] if scope_stack else file_node_id
|
|
118
|
+
|
|
119
|
+
def walk(node):
|
|
120
|
+
nonlocal result
|
|
121
|
+
|
|
122
|
+
if node.type == "ERROR" or (hasattr(node, "is_error") and node.is_error):
|
|
123
|
+
logger.debug(f"Skipping syntax error node in C/C++ AST: {node}")
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
node_type = node.type
|
|
127
|
+
pushed_scope = False
|
|
128
|
+
|
|
129
|
+
if node_type in (
|
|
130
|
+
"class_specifier",
|
|
131
|
+
"struct_specifier",
|
|
132
|
+
"union_specifier",
|
|
133
|
+
"enum_specifier",
|
|
134
|
+
"namespace_definition",
|
|
135
|
+
):
|
|
136
|
+
if node_type != "namespace_definition":
|
|
137
|
+
body_node = node.child_by_field_name("body")
|
|
138
|
+
if not body_node:
|
|
139
|
+
for child in node.children:
|
|
140
|
+
walk(child)
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
name_node = node.child_by_field_name("name")
|
|
144
|
+
name = ""
|
|
145
|
+
if name_node:
|
|
146
|
+
name = (
|
|
147
|
+
source[name_node.start_byte : name_node.end_byte]
|
|
148
|
+
.decode("utf-8", errors="replace")
|
|
149
|
+
.strip()
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if not name:
|
|
153
|
+
# Anonymous specifier
|
|
154
|
+
for child in node.children:
|
|
155
|
+
walk(child)
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
parent_id = get_current_parent_id()
|
|
159
|
+
if "::" in name:
|
|
160
|
+
symbol_id = f"{rel_path}::{name}"
|
|
161
|
+
else:
|
|
162
|
+
parent_parts = parent_id.split("::", 1)
|
|
163
|
+
if len(parent_parts) > 1:
|
|
164
|
+
symbol_id = f"{rel_path}::{parent_parts[1]}.{name}"
|
|
165
|
+
else:
|
|
166
|
+
symbol_id = f"{rel_path}::{name}"
|
|
167
|
+
|
|
168
|
+
sym_type = "class"
|
|
169
|
+
if node_type == "struct_specifier":
|
|
170
|
+
sym_type = "struct"
|
|
171
|
+
elif node_type == "union_specifier":
|
|
172
|
+
sym_type = "union"
|
|
173
|
+
elif node_type == "enum_specifier":
|
|
174
|
+
sym_type = "enum"
|
|
175
|
+
elif node_type == "namespace_definition":
|
|
176
|
+
sym_type = "namespace"
|
|
177
|
+
|
|
178
|
+
result.nodes.append(
|
|
179
|
+
NodeSchema(
|
|
180
|
+
id=symbol_id,
|
|
181
|
+
label=name,
|
|
182
|
+
type=sym_type,
|
|
183
|
+
source_file=rel_path,
|
|
184
|
+
line_start=node.start_point[0] + 1,
|
|
185
|
+
line_end=node.end_point[0] + 1,
|
|
186
|
+
signature=self._get_signature(node, source),
|
|
187
|
+
docstring=self._get_docstring(node, source),
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
defined_ids.add(symbol_id)
|
|
191
|
+
|
|
192
|
+
result.edges.append(
|
|
193
|
+
EdgeSchema(source=parent_id, target=symbol_id, relation="contains")
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Handle base classes inheritance
|
|
197
|
+
for child in node.children:
|
|
198
|
+
if child.type == "base_class_clause":
|
|
199
|
+
|
|
200
|
+
def extract_base_types(n):
|
|
201
|
+
if n.type in (
|
|
202
|
+
"type_identifier",
|
|
203
|
+
"qualified_identifier",
|
|
204
|
+
"template_type",
|
|
205
|
+
):
|
|
206
|
+
return (
|
|
207
|
+
source[n.start_byte : n.end_byte]
|
|
208
|
+
.decode("utf-8", errors="replace")
|
|
209
|
+
.strip()
|
|
210
|
+
)
|
|
211
|
+
for c in n.children:
|
|
212
|
+
bt = extract_base_types(c)
|
|
213
|
+
if bt:
|
|
214
|
+
return bt
|
|
215
|
+
return None
|
|
216
|
+
|
|
217
|
+
for sub in child.children:
|
|
218
|
+
base_name = extract_base_types(sub)
|
|
219
|
+
if base_name:
|
|
220
|
+
result.edges.append(
|
|
221
|
+
EdgeSchema(
|
|
222
|
+
source=symbol_id,
|
|
223
|
+
target=base_name,
|
|
224
|
+
relation="inherits",
|
|
225
|
+
)
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
scope_stack.append((symbol_id, sym_type))
|
|
229
|
+
pushed_scope = True
|
|
230
|
+
|
|
231
|
+
elif node_type == "function_definition":
|
|
232
|
+
declarator = node.child_by_field_name("declarator")
|
|
233
|
+
func_name = self._get_declarator_name(declarator, source)
|
|
234
|
+
|
|
235
|
+
if func_name:
|
|
236
|
+
parent_id = get_current_parent_id()
|
|
237
|
+
parent_type = scope_stack[-1][1] if scope_stack else "file"
|
|
238
|
+
|
|
239
|
+
if "::" in func_name:
|
|
240
|
+
class_part, method_part = func_name.rsplit("::", 1)
|
|
241
|
+
class_id = f"{rel_path}::{class_part.replace('::', '.')}"
|
|
242
|
+
method_id = f"{class_id}.{method_part}"
|
|
243
|
+
sym_type = "method"
|
|
244
|
+
func_label = method_part
|
|
245
|
+
|
|
246
|
+
actual_parent = (
|
|
247
|
+
class_id if class_id in defined_ids else file_node_id
|
|
248
|
+
)
|
|
249
|
+
result.edges.append(
|
|
250
|
+
EdgeSchema(
|
|
251
|
+
source=actual_parent,
|
|
252
|
+
target=method_id,
|
|
253
|
+
relation="contains",
|
|
254
|
+
)
|
|
255
|
+
)
|
|
256
|
+
elif parent_type in ("class", "struct", "union", "namespace"):
|
|
257
|
+
method_id = f"{parent_id}.{func_name}"
|
|
258
|
+
sym_type = (
|
|
259
|
+
"method" if parent_type != "namespace" else "function"
|
|
260
|
+
)
|
|
261
|
+
func_label = func_name
|
|
262
|
+
result.edges.append(
|
|
263
|
+
EdgeSchema(
|
|
264
|
+
source=parent_id, target=method_id, relation="contains"
|
|
265
|
+
)
|
|
266
|
+
)
|
|
267
|
+
else:
|
|
268
|
+
method_id = f"{rel_path}::{func_name}"
|
|
269
|
+
sym_type = "function"
|
|
270
|
+
func_label = func_name
|
|
271
|
+
result.edges.append(
|
|
272
|
+
EdgeSchema(
|
|
273
|
+
source=parent_id, target=method_id, relation="contains"
|
|
274
|
+
)
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
result.nodes.append(
|
|
278
|
+
NodeSchema(
|
|
279
|
+
id=method_id,
|
|
280
|
+
label=func_label,
|
|
281
|
+
type=sym_type,
|
|
282
|
+
source_file=rel_path,
|
|
283
|
+
line_start=node.start_point[0] + 1,
|
|
284
|
+
line_end=node.end_point[0] + 1,
|
|
285
|
+
signature=self._get_signature(node, source),
|
|
286
|
+
docstring=self._get_docstring(node, source),
|
|
287
|
+
)
|
|
288
|
+
)
|
|
289
|
+
defined_ids.add(method_id)
|
|
290
|
+
|
|
291
|
+
scope_stack.append((method_id, sym_type))
|
|
292
|
+
pushed_scope = True
|
|
293
|
+
|
|
294
|
+
elif node_type == "preproc_include":
|
|
295
|
+
path_node = node.child_by_field_name("path")
|
|
296
|
+
if not path_node:
|
|
297
|
+
for child in node.children:
|
|
298
|
+
if child.type in ("string_literal", "system_lib_string"):
|
|
299
|
+
path_node = child
|
|
300
|
+
break
|
|
301
|
+
if path_node:
|
|
302
|
+
include_path = (
|
|
303
|
+
source[path_node.start_byte : path_node.end_byte]
|
|
304
|
+
.decode("utf-8", errors="replace")
|
|
305
|
+
.strip('"<>')
|
|
306
|
+
)
|
|
307
|
+
result.edges.append(
|
|
308
|
+
EdgeSchema(
|
|
309
|
+
source=file_node_id, target=include_path, relation="imports"
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
elif node_type == "call_expression":
|
|
314
|
+
func_node = node.child_by_field_name("function")
|
|
315
|
+
if func_node:
|
|
316
|
+
callee_name = (
|
|
317
|
+
source[func_node.start_byte : func_node.end_byte]
|
|
318
|
+
.decode("utf-8", errors="replace")
|
|
319
|
+
.strip()
|
|
320
|
+
)
|
|
321
|
+
caller_id = get_current_parent_id()
|
|
322
|
+
result.edges.append(
|
|
323
|
+
EdgeSchema(
|
|
324
|
+
source=caller_id, target=callee_name, relation="calls"
|
|
325
|
+
)
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
for child in node.children:
|
|
329
|
+
walk(child)
|
|
330
|
+
|
|
331
|
+
if pushed_scope:
|
|
332
|
+
scope_stack.pop()
|
|
333
|
+
|
|
334
|
+
walk(root)
|
|
335
|
+
return result
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
class CParser(CCppParser):
|
|
339
|
+
def __init__(self):
|
|
340
|
+
import tree_sitter_c
|
|
341
|
+
|
|
342
|
+
super().__init__(tree_sitter_c)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class CppParser(CCppParser):
|
|
346
|
+
def __init__(self):
|
|
347
|
+
import tree_sitter_cpp
|
|
348
|
+
|
|
349
|
+
super().__init__(tree_sitter_cpp)
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import tree_sitter
|
|
4
|
+
import tree_sitter_go
|
|
5
|
+
from codegraph_gen.parser.base import BaseParser, ExtractionResult, NodeSchema, EdgeSchema
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GoParser(BaseParser):
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self.language = tree_sitter.Language(tree_sitter_go.language())
|
|
13
|
+
self.parser = tree_sitter.Parser(self.language)
|
|
14
|
+
|
|
15
|
+
def _get_docstring(self, node, source: bytes) -> str:
|
|
16
|
+
"""Finds comments immediately preceding the node."""
|
|
17
|
+
docstring = ""
|
|
18
|
+
prev = node.prev_sibling
|
|
19
|
+
comments = []
|
|
20
|
+
while prev and prev.type in ("comment", "line_comment"):
|
|
21
|
+
comment_text = source[prev.start_byte : prev.end_byte].decode(
|
|
22
|
+
"utf-8", errors="replace"
|
|
23
|
+
)
|
|
24
|
+
# Strip comment markers (//)
|
|
25
|
+
clean_text = comment_text.strip().lstrip("//").strip()
|
|
26
|
+
comments.append(clean_text)
|
|
27
|
+
prev = prev.prev_sibling
|
|
28
|
+
|
|
29
|
+
if comments:
|
|
30
|
+
docstring = "\n".join(reversed(comments))
|
|
31
|
+
return docstring
|
|
32
|
+
|
|
33
|
+
def _get_signature(self, node, source: bytes) -> str:
|
|
34
|
+
body = node.child_by_field_name("body")
|
|
35
|
+
if body:
|
|
36
|
+
end_byte = body.start_byte
|
|
37
|
+
sig_bytes = source[node.start_byte : end_byte]
|
|
38
|
+
sig = sig_bytes.decode("utf-8", errors="replace").strip()
|
|
39
|
+
if sig.endswith("{"):
|
|
40
|
+
sig = sig[:-1].strip()
|
|
41
|
+
return sig
|
|
42
|
+
return (
|
|
43
|
+
source[node.start_byte : node.end_byte]
|
|
44
|
+
.decode("utf-8", errors="replace")
|
|
45
|
+
.split("\n")[0]
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def parse_file(self, file_path: Path, workspace_dir: Path) -> ExtractionResult:
|
|
49
|
+
try:
|
|
50
|
+
source = file_path.read_bytes()
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.error(f"Error reading file {file_path}: {e}")
|
|
53
|
+
return ExtractionResult()
|
|
54
|
+
|
|
55
|
+
tree = self.parser.parse(source)
|
|
56
|
+
root = tree.root_node
|
|
57
|
+
|
|
58
|
+
rel_path = str(file_path.relative_to(workspace_dir))
|
|
59
|
+
result = ExtractionResult()
|
|
60
|
+
|
|
61
|
+
# Add file node
|
|
62
|
+
file_node_id = rel_path
|
|
63
|
+
result.nodes.append(
|
|
64
|
+
NodeSchema(
|
|
65
|
+
id=file_node_id,
|
|
66
|
+
label=file_path.name,
|
|
67
|
+
type="file",
|
|
68
|
+
source_file=rel_path,
|
|
69
|
+
line_start=1,
|
|
70
|
+
line_end=len(source.splitlines()) or 1,
|
|
71
|
+
signature=f"package {file_path.parent.name or 'main'}",
|
|
72
|
+
docstring=self._get_docstring(root, source),
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def get_receiver_type(method_node) -> str | None:
|
|
77
|
+
receiver = method_node.child_by_field_name("receiver")
|
|
78
|
+
if receiver:
|
|
79
|
+
# Find parameter_declaration in receiver
|
|
80
|
+
for child in receiver.children:
|
|
81
|
+
if child.type == "parameter_declaration":
|
|
82
|
+
type_node = child.child_by_field_name("type")
|
|
83
|
+
if type_node:
|
|
84
|
+
# Might be *Type, so strip '*'
|
|
85
|
+
raw_type = source[
|
|
86
|
+
type_node.start_byte : type_node.end_byte
|
|
87
|
+
].decode("utf-8", errors="replace")
|
|
88
|
+
return raw_type.strip()
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
def walk(node):
|
|
92
|
+
nonlocal result
|
|
93
|
+
|
|
94
|
+
if node.type == "ERROR" or (hasattr(node, "is_error") and node.is_error):
|
|
95
|
+
logger.debug(f"Skipping syntax error node in Go AST: {node}")
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
node_type = node.type
|
|
99
|
+
|
|
100
|
+
if node_type == "type_declaration":
|
|
101
|
+
for child in node.children:
|
|
102
|
+
if child.type == "type_spec":
|
|
103
|
+
name_node = child.child_by_field_name("name")
|
|
104
|
+
if name_node:
|
|
105
|
+
type_name = source[
|
|
106
|
+
name_node.start_byte : name_node.end_byte
|
|
107
|
+
].decode("utf-8", errors="replace")
|
|
108
|
+
type_id = f"{rel_path}::{type_name}"
|
|
109
|
+
|
|
110
|
+
sym_type = "struct"
|
|
111
|
+
for tc in child.children:
|
|
112
|
+
if tc.type == "interface_type":
|
|
113
|
+
sym_type = "interface"
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
result.nodes.append(
|
|
117
|
+
NodeSchema(
|
|
118
|
+
id=type_id,
|
|
119
|
+
label=type_name,
|
|
120
|
+
type=sym_type,
|
|
121
|
+
source_file=rel_path,
|
|
122
|
+
line_start=child.start_point[0] + 1,
|
|
123
|
+
line_end=child.end_point[0] + 1,
|
|
124
|
+
signature=f"type {type_name} {sym_type}",
|
|
125
|
+
docstring=self._get_docstring(node, source),
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
result.edges.append(
|
|
130
|
+
EdgeSchema(
|
|
131
|
+
source=file_node_id,
|
|
132
|
+
target=type_id,
|
|
133
|
+
relation="contains",
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
elif node_type == "function_declaration":
|
|
138
|
+
name_node = node.child_by_field_name("name")
|
|
139
|
+
if name_node:
|
|
140
|
+
func_name = source[
|
|
141
|
+
name_node.start_byte : name_node.end_byte
|
|
142
|
+
].decode("utf-8", errors="replace")
|
|
143
|
+
func_id = f"{rel_path}::{func_name}"
|
|
144
|
+
|
|
145
|
+
result.nodes.append(
|
|
146
|
+
NodeSchema(
|
|
147
|
+
id=func_id,
|
|
148
|
+
label=func_name,
|
|
149
|
+
type="function",
|
|
150
|
+
source_file=rel_path,
|
|
151
|
+
line_start=node.start_point[0] + 1,
|
|
152
|
+
line_end=node.end_point[0] + 1,
|
|
153
|
+
signature=self._get_signature(node, source),
|
|
154
|
+
docstring=self._get_docstring(node, source),
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
result.edges.append(
|
|
159
|
+
EdgeSchema(
|
|
160
|
+
source=file_node_id, target=func_id, relation="contains"
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
elif node_type == "method_declaration":
|
|
165
|
+
name_node = node.child_by_field_name("name")
|
|
166
|
+
if name_node:
|
|
167
|
+
method_name = source[
|
|
168
|
+
name_node.start_byte : name_node.end_byte
|
|
169
|
+
].decode("utf-8", errors="replace")
|
|
170
|
+
receiver_type = get_receiver_type(node)
|
|
171
|
+
|
|
172
|
+
if receiver_type:
|
|
173
|
+
parent_id = f"{rel_path}::{receiver_type}"
|
|
174
|
+
method_id = f"{parent_id}.{method_name}"
|
|
175
|
+
relation = "contains"
|
|
176
|
+
else:
|
|
177
|
+
parent_id = file_node_id
|
|
178
|
+
method_id = f"{rel_path}::{method_name}"
|
|
179
|
+
relation = "contains"
|
|
180
|
+
|
|
181
|
+
result.nodes.append(
|
|
182
|
+
NodeSchema(
|
|
183
|
+
id=method_id,
|
|
184
|
+
label=method_name,
|
|
185
|
+
type="method",
|
|
186
|
+
source_file=rel_path,
|
|
187
|
+
line_start=node.start_point[0] + 1,
|
|
188
|
+
line_end=node.end_point[0] + 1,
|
|
189
|
+
signature=self._get_signature(node, source),
|
|
190
|
+
docstring=self._get_docstring(node, source),
|
|
191
|
+
)
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
result.edges.append(
|
|
195
|
+
EdgeSchema(
|
|
196
|
+
source=parent_id, target=method_id, relation=relation
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
elif node_type == "import_spec":
|
|
201
|
+
path_node = node.child_by_field_name("path")
|
|
202
|
+
if path_node:
|
|
203
|
+
import_path = source[
|
|
204
|
+
path_node.start_byte : path_node.end_byte
|
|
205
|
+
].decode("utf-8", errors="replace")
|
|
206
|
+
import_path = import_path.strip("\"'")
|
|
207
|
+
|
|
208
|
+
pkg_name = import_path.split("/")[-1]
|
|
209
|
+
import_map = {}
|
|
210
|
+
|
|
211
|
+
name_node = node.child_by_field_name("name")
|
|
212
|
+
if name_node:
|
|
213
|
+
local_name = source[
|
|
214
|
+
name_node.start_byte : name_node.end_byte
|
|
215
|
+
].decode("utf-8", errors="replace")
|
|
216
|
+
if local_name == ".":
|
|
217
|
+
import_map["*"] = "*"
|
|
218
|
+
else:
|
|
219
|
+
import_map[local_name] = pkg_name
|
|
220
|
+
else:
|
|
221
|
+
import_map[pkg_name] = pkg_name
|
|
222
|
+
|
|
223
|
+
result.edges.append(
|
|
224
|
+
EdgeSchema(
|
|
225
|
+
source=file_node_id,
|
|
226
|
+
target=import_path,
|
|
227
|
+
relation="imports",
|
|
228
|
+
import_map=import_map,
|
|
229
|
+
)
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
elif node_type == "call_expression":
|
|
233
|
+
func_node = node.child_by_field_name("function")
|
|
234
|
+
if func_node:
|
|
235
|
+
callee_name = source[
|
|
236
|
+
func_node.start_byte : func_node.end_byte
|
|
237
|
+
].decode("utf-8", errors="replace")
|
|
238
|
+
caller_id = file_node_id
|
|
239
|
+
curr = node.parent
|
|
240
|
+
while curr:
|
|
241
|
+
if curr.type in ("function_declaration", "method_declaration"):
|
|
242
|
+
c_name_node = curr.child_by_field_name("name")
|
|
243
|
+
if c_name_node:
|
|
244
|
+
c_name = source[
|
|
245
|
+
c_name_node.start_byte : c_name_node.end_byte
|
|
246
|
+
].decode("utf-8", errors="replace")
|
|
247
|
+
if curr.type == "method_declaration":
|
|
248
|
+
r_type = get_receiver_type(curr)
|
|
249
|
+
if r_type:
|
|
250
|
+
caller_id = f"{rel_path}::{r_type}.{c_name}"
|
|
251
|
+
else:
|
|
252
|
+
caller_id = f"{rel_path}::{c_name}"
|
|
253
|
+
else:
|
|
254
|
+
caller_id = f"{rel_path}::{c_name}"
|
|
255
|
+
break
|
|
256
|
+
curr = curr.parent
|
|
257
|
+
|
|
258
|
+
result.edges.append(
|
|
259
|
+
EdgeSchema(
|
|
260
|
+
source=caller_id, target=callee_name, relation="calls"
|
|
261
|
+
)
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
for child in node.children:
|
|
265
|
+
walk(child)
|
|
266
|
+
|
|
267
|
+
walk(root)
|
|
268
|
+
return result
|