codegraph-gen 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_gen/__init__.py +0 -0
- codegraph_gen/__main__.py +311 -0
- codegraph_gen/ai.py +77 -0
- codegraph_gen/analyzer.py +100 -0
- codegraph_gen/builder.py +747 -0
- codegraph_gen/cluster.py +116 -0
- codegraph_gen/config.py +76 -0
- codegraph_gen/detect.py +59 -0
- codegraph_gen/engine.py +367 -0
- codegraph_gen/parser/__init__.py +27 -0
- codegraph_gen/parser/base.py +38 -0
- codegraph_gen/parser/cpp.py +349 -0
- codegraph_gen/parser/go.py +268 -0
- codegraph_gen/parser/javascript.py +370 -0
- codegraph_gen/parser/kotlin.py +387 -0
- codegraph_gen/parser/python.py +415 -0
- codegraph_gen/parser/rust.py +497 -0
- codegraph_gen/parser/swift.py +327 -0
- codegraph_gen/py.typed +0 -0
- codegraph_gen/renderer.py +498 -0
- codegraph_gen/writer.py +97 -0
- codegraph_gen-0.2.0.dist-info/METADATA +169 -0
- codegraph_gen-0.2.0.dist-info/RECORD +25 -0
- codegraph_gen-0.2.0.dist-info/WHEEL +4 -0
- codegraph_gen-0.2.0.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import tree_sitter
|
|
4
|
+
import tree_sitter_swift
|
|
5
|
+
from codegraph_gen.parser.base import BaseParser, ExtractionResult, NodeSchema, EdgeSchema
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SwiftParser(BaseParser):
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self.language = tree_sitter.Language(tree_sitter_swift.language())
|
|
13
|
+
self.parser = tree_sitter.Parser(self.language)
|
|
14
|
+
|
|
15
|
+
def _get_docstring(self, node, source: bytes) -> str:
|
|
16
|
+
"""Finds comments immediately preceding the node."""
|
|
17
|
+
docstring = ""
|
|
18
|
+
prev = node.prev_sibling
|
|
19
|
+
comments = []
|
|
20
|
+
while prev and prev.type in ("comment", "line_comment", "block_comment"):
|
|
21
|
+
comment_text = source[prev.start_byte : prev.end_byte].decode(
|
|
22
|
+
"utf-8", errors="replace"
|
|
23
|
+
)
|
|
24
|
+
# Strip comment markers (///, //, /*)
|
|
25
|
+
clean_text = comment_text.strip().lstrip("/").strip()
|
|
26
|
+
comments.append(clean_text)
|
|
27
|
+
prev = prev.prev_sibling
|
|
28
|
+
|
|
29
|
+
if comments:
|
|
30
|
+
docstring = "\n".join(reversed(comments))
|
|
31
|
+
return docstring
|
|
32
|
+
|
|
33
|
+
def _get_signature(self, node, source: bytes) -> str:
|
|
34
|
+
# For Swift, we find body child or child starting with '{'
|
|
35
|
+
body = None
|
|
36
|
+
for child in node.children:
|
|
37
|
+
if child.type in (
|
|
38
|
+
"class_body",
|
|
39
|
+
"struct_body",
|
|
40
|
+
"protocol_body",
|
|
41
|
+
"enum_body",
|
|
42
|
+
"function_body",
|
|
43
|
+
"brace_item_list",
|
|
44
|
+
):
|
|
45
|
+
body = child
|
|
46
|
+
break
|
|
47
|
+
if body:
|
|
48
|
+
end_byte = body.start_byte
|
|
49
|
+
sig_bytes = source[node.start_byte : end_byte]
|
|
50
|
+
sig = sig_bytes.decode("utf-8", errors="replace").strip()
|
|
51
|
+
if sig.endswith("{"):
|
|
52
|
+
sig = sig[:-1].strip()
|
|
53
|
+
return sig
|
|
54
|
+
return (
|
|
55
|
+
source[node.start_byte : node.end_byte]
|
|
56
|
+
.decode("utf-8", errors="replace")
|
|
57
|
+
.split("\n")[0]
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def parse_file(self, file_path: Path, workspace_dir: Path) -> ExtractionResult:
|
|
61
|
+
try:
|
|
62
|
+
source = file_path.read_bytes()
|
|
63
|
+
except Exception as e:
|
|
64
|
+
logger.error(f"Error reading file {file_path}: {e}")
|
|
65
|
+
return ExtractionResult()
|
|
66
|
+
|
|
67
|
+
tree = self.parser.parse(source)
|
|
68
|
+
root = tree.root_node
|
|
69
|
+
|
|
70
|
+
rel_path = str(file_path.relative_to(workspace_dir))
|
|
71
|
+
result = ExtractionResult()
|
|
72
|
+
|
|
73
|
+
# Add file node
|
|
74
|
+
file_node_id = rel_path
|
|
75
|
+
result.nodes.append(
|
|
76
|
+
NodeSchema(
|
|
77
|
+
id=file_node_id,
|
|
78
|
+
label=file_path.name,
|
|
79
|
+
type="file",
|
|
80
|
+
source_file=rel_path,
|
|
81
|
+
line_start=1,
|
|
82
|
+
line_end=len(source.splitlines()) or 1,
|
|
83
|
+
signature=f"module {file_path.stem}",
|
|
84
|
+
docstring=self._get_docstring(root, source),
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
scope_stack = [(file_node_id, "file")]
|
|
89
|
+
|
|
90
|
+
def get_current_parent_id():
|
|
91
|
+
return scope_stack[-1][0] if scope_stack else file_node_id
|
|
92
|
+
|
|
93
|
+
def walk(node):
|
|
94
|
+
nonlocal result
|
|
95
|
+
|
|
96
|
+
if node.type == "ERROR" or (hasattr(node, "is_error") and node.is_error):
|
|
97
|
+
logger.debug(f"Skipping syntax error node in Swift AST: {node}")
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
node_type = node.type
|
|
101
|
+
pushed_scope = False
|
|
102
|
+
|
|
103
|
+
if node_type in (
|
|
104
|
+
"class_declaration",
|
|
105
|
+
"struct_declaration",
|
|
106
|
+
"protocol_declaration",
|
|
107
|
+
"enum_declaration",
|
|
108
|
+
):
|
|
109
|
+
name_node = node.child_by_field_name("name")
|
|
110
|
+
if name_node:
|
|
111
|
+
class_name = source[
|
|
112
|
+
name_node.start_byte : name_node.end_byte
|
|
113
|
+
].decode("utf-8", errors="replace")
|
|
114
|
+
parent_id = get_current_parent_id()
|
|
115
|
+
class_id = f"{rel_path}::{class_name}"
|
|
116
|
+
|
|
117
|
+
sym_type = "class"
|
|
118
|
+
if node_type == "struct_declaration":
|
|
119
|
+
sym_type = "struct"
|
|
120
|
+
elif node_type == "protocol_declaration":
|
|
121
|
+
sym_type = "interface"
|
|
122
|
+
elif node_type == "enum_declaration":
|
|
123
|
+
sym_type = "enum"
|
|
124
|
+
|
|
125
|
+
result.nodes.append(
|
|
126
|
+
NodeSchema(
|
|
127
|
+
id=class_id,
|
|
128
|
+
label=class_name,
|
|
129
|
+
type=sym_type,
|
|
130
|
+
source_file=rel_path,
|
|
131
|
+
line_start=node.start_point[0] + 1,
|
|
132
|
+
line_end=node.end_point[0] + 1,
|
|
133
|
+
signature=self._get_signature(node, source),
|
|
134
|
+
docstring=self._get_docstring(node, source),
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
result.edges.append(
|
|
139
|
+
EdgeSchema(
|
|
140
|
+
source=parent_id, target=class_id, relation="contains"
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Protocol conformances or subclassing (inheritance) can be found in children
|
|
145
|
+
# Swift uses type_inheritance_clause
|
|
146
|
+
for child in node.children:
|
|
147
|
+
if child.type == "type_inheritance_clause":
|
|
148
|
+
for sub in child.children:
|
|
149
|
+
if sub.type == "type_identifier":
|
|
150
|
+
parent_name = source[
|
|
151
|
+
sub.start_byte : sub.end_byte
|
|
152
|
+
].decode("utf-8", errors="replace")
|
|
153
|
+
result.edges.append(
|
|
154
|
+
EdgeSchema(
|
|
155
|
+
source=class_id,
|
|
156
|
+
target=parent_name,
|
|
157
|
+
relation="inherits",
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
scope_stack.append((class_id, sym_type))
|
|
162
|
+
pushed_scope = True
|
|
163
|
+
|
|
164
|
+
elif node_type in (
|
|
165
|
+
"function_declaration",
|
|
166
|
+
"init_declaration",
|
|
167
|
+
"deinit_declaration",
|
|
168
|
+
):
|
|
169
|
+
func_name = None
|
|
170
|
+
if node_type == "function_declaration":
|
|
171
|
+
name_node = node.child_by_field_name("name")
|
|
172
|
+
if name_node:
|
|
173
|
+
func_name = source[
|
|
174
|
+
name_node.start_byte : name_node.end_byte
|
|
175
|
+
].decode("utf-8", errors="replace")
|
|
176
|
+
elif node_type == "init_declaration":
|
|
177
|
+
func_name = "init"
|
|
178
|
+
elif node_type == "deinit_declaration":
|
|
179
|
+
func_name = "deinit"
|
|
180
|
+
|
|
181
|
+
if func_name:
|
|
182
|
+
parent_id = get_current_parent_id()
|
|
183
|
+
parent_type = scope_stack[-1][1] if scope_stack else "file"
|
|
184
|
+
|
|
185
|
+
if parent_type in ("class", "struct", "interface", "enum"):
|
|
186
|
+
func_id = f"{parent_id}.{func_name}"
|
|
187
|
+
sym_type = "method"
|
|
188
|
+
else:
|
|
189
|
+
func_id = f"{rel_path}::{func_name}"
|
|
190
|
+
sym_type = "function"
|
|
191
|
+
|
|
192
|
+
local_bindings = {}
|
|
193
|
+
|
|
194
|
+
def extract_type_id(tc):
|
|
195
|
+
if tc.type == "type_identifier":
|
|
196
|
+
return source[tc.start_byte : tc.end_byte].decode(
|
|
197
|
+
"utf-8", errors="replace"
|
|
198
|
+
)
|
|
199
|
+
for gc in tc.children:
|
|
200
|
+
res = extract_type_id(gc)
|
|
201
|
+
if res:
|
|
202
|
+
return res
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
def collect_local_bindings(n):
|
|
206
|
+
if n.type == "property_declaration":
|
|
207
|
+
var_name = None
|
|
208
|
+
for child in n.children:
|
|
209
|
+
if child.type == "pattern":
|
|
210
|
+
for gc in child.children:
|
|
211
|
+
if gc.type == "simple_identifier":
|
|
212
|
+
var_name = source[
|
|
213
|
+
gc.start_byte : gc.end_byte
|
|
214
|
+
].decode("utf-8", errors="replace")
|
|
215
|
+
if var_name:
|
|
216
|
+
type_name = None
|
|
217
|
+
for child in n.children:
|
|
218
|
+
if child.type == "type_annotation":
|
|
219
|
+
type_name = extract_type_id(child)
|
|
220
|
+
if not type_name:
|
|
221
|
+
for child in n.children:
|
|
222
|
+
if child.type == "call_expression":
|
|
223
|
+
for gc in child.children:
|
|
224
|
+
if gc.type == "simple_identifier":
|
|
225
|
+
type_name = source[
|
|
226
|
+
gc.start_byte : gc.end_byte
|
|
227
|
+
].decode("utf-8", errors="replace")
|
|
228
|
+
if type_name:
|
|
229
|
+
local_bindings[var_name] = type_name
|
|
230
|
+
elif n.type == "parameter":
|
|
231
|
+
identifiers = []
|
|
232
|
+
type_name = None
|
|
233
|
+
seen_colon = False
|
|
234
|
+
for child in n.children:
|
|
235
|
+
if child.type == "simple_identifier" and not seen_colon:
|
|
236
|
+
identifiers.append(
|
|
237
|
+
source[
|
|
238
|
+
child.start_byte : child.end_byte
|
|
239
|
+
].decode("utf-8", errors="replace")
|
|
240
|
+
)
|
|
241
|
+
elif child.type == ":":
|
|
242
|
+
seen_colon = True
|
|
243
|
+
elif seen_colon:
|
|
244
|
+
res = extract_type_id(child)
|
|
245
|
+
if res:
|
|
246
|
+
type_name = res
|
|
247
|
+
break
|
|
248
|
+
if identifiers and type_name:
|
|
249
|
+
var_name = identifiers[-1]
|
|
250
|
+
local_bindings[var_name] = type_name
|
|
251
|
+
|
|
252
|
+
for child in n.children:
|
|
253
|
+
collect_local_bindings(child)
|
|
254
|
+
|
|
255
|
+
collect_local_bindings(node)
|
|
256
|
+
|
|
257
|
+
result.nodes.append(
|
|
258
|
+
NodeSchema(
|
|
259
|
+
id=func_id,
|
|
260
|
+
label=func_name,
|
|
261
|
+
type=sym_type,
|
|
262
|
+
source_file=rel_path,
|
|
263
|
+
line_start=node.start_point[0] + 1,
|
|
264
|
+
line_end=node.end_point[0] + 1,
|
|
265
|
+
signature=self._get_signature(node, source),
|
|
266
|
+
docstring=self._get_docstring(node, source),
|
|
267
|
+
local_bindings=local_bindings,
|
|
268
|
+
)
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
result.edges.append(
|
|
272
|
+
EdgeSchema(
|
|
273
|
+
source=parent_id, target=func_id, relation="contains"
|
|
274
|
+
)
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
scope_stack.append((func_id, sym_type))
|
|
278
|
+
pushed_scope = True
|
|
279
|
+
|
|
280
|
+
elif node_type == "import_declaration":
|
|
281
|
+
# import UIKit or import class Module.Class
|
|
282
|
+
# Find path/identifier children
|
|
283
|
+
path_parts = []
|
|
284
|
+
for child in node.children:
|
|
285
|
+
if child.type in ("simple_identifier", "navigation_expression"):
|
|
286
|
+
path_parts.append(
|
|
287
|
+
source[child.start_byte : child.end_byte].decode(
|
|
288
|
+
"utf-8", errors="replace"
|
|
289
|
+
)
|
|
290
|
+
)
|
|
291
|
+
if path_parts:
|
|
292
|
+
import_path = ".".join(path_parts)
|
|
293
|
+
result.edges.append(
|
|
294
|
+
EdgeSchema(
|
|
295
|
+
source=file_node_id, target=import_path, relation="imports"
|
|
296
|
+
)
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
elif node_type == "call_expression":
|
|
300
|
+
# Swift call expression contains function name and arguments
|
|
301
|
+
# Find the child that represents the function
|
|
302
|
+
func_node = None
|
|
303
|
+
for child in node.children:
|
|
304
|
+
# It could be simple_identifier, navigation_expression, etc.
|
|
305
|
+
if child.type in ("simple_identifier", "navigation_expression"):
|
|
306
|
+
func_node = child
|
|
307
|
+
break
|
|
308
|
+
if func_node:
|
|
309
|
+
callee_name = source[
|
|
310
|
+
func_node.start_byte : func_node.end_byte
|
|
311
|
+
].decode("utf-8", errors="replace")
|
|
312
|
+
caller_id = get_current_parent_id()
|
|
313
|
+
result.edges.append(
|
|
314
|
+
EdgeSchema(
|
|
315
|
+
source=caller_id, target=callee_name, relation="calls"
|
|
316
|
+
)
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Recurse children
|
|
320
|
+
for child in node.children:
|
|
321
|
+
walk(child)
|
|
322
|
+
|
|
323
|
+
if pushed_scope:
|
|
324
|
+
scope_stack.pop()
|
|
325
|
+
|
|
326
|
+
walk(root)
|
|
327
|
+
return result
|
codegraph_gen/py.typed
ADDED
|
File without changes
|