codegraph-gen 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,327 @@
1
+ import logging
2
+ from pathlib import Path
3
+ import tree_sitter
4
+ import tree_sitter_swift
5
+ from codegraph_gen.parser.base import BaseParser, ExtractionResult, NodeSchema, EdgeSchema
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class SwiftParser(BaseParser):
11
+ def __init__(self):
12
+ self.language = tree_sitter.Language(tree_sitter_swift.language())
13
+ self.parser = tree_sitter.Parser(self.language)
14
+
15
+ def _get_docstring(self, node, source: bytes) -> str:
16
+ """Finds comments immediately preceding the node."""
17
+ docstring = ""
18
+ prev = node.prev_sibling
19
+ comments = []
20
+ while prev and prev.type in ("comment", "line_comment", "block_comment"):
21
+ comment_text = source[prev.start_byte : prev.end_byte].decode(
22
+ "utf-8", errors="replace"
23
+ )
24
+ # Strip comment markers (///, //, /*)
25
+ clean_text = comment_text.strip().lstrip("/").strip()
26
+ comments.append(clean_text)
27
+ prev = prev.prev_sibling
28
+
29
+ if comments:
30
+ docstring = "\n".join(reversed(comments))
31
+ return docstring
32
+
33
+ def _get_signature(self, node, source: bytes) -> str:
34
+ # For Swift, we find body child or child starting with '{'
35
+ body = None
36
+ for child in node.children:
37
+ if child.type in (
38
+ "class_body",
39
+ "struct_body",
40
+ "protocol_body",
41
+ "enum_body",
42
+ "function_body",
43
+ "brace_item_list",
44
+ ):
45
+ body = child
46
+ break
47
+ if body:
48
+ end_byte = body.start_byte
49
+ sig_bytes = source[node.start_byte : end_byte]
50
+ sig = sig_bytes.decode("utf-8", errors="replace").strip()
51
+ if sig.endswith("{"):
52
+ sig = sig[:-1].strip()
53
+ return sig
54
+ return (
55
+ source[node.start_byte : node.end_byte]
56
+ .decode("utf-8", errors="replace")
57
+ .split("\n")[0]
58
+ )
59
+
60
+ def parse_file(self, file_path: Path, workspace_dir: Path) -> ExtractionResult:
61
+ try:
62
+ source = file_path.read_bytes()
63
+ except Exception as e:
64
+ logger.error(f"Error reading file {file_path}: {e}")
65
+ return ExtractionResult()
66
+
67
+ tree = self.parser.parse(source)
68
+ root = tree.root_node
69
+
70
+ rel_path = str(file_path.relative_to(workspace_dir))
71
+ result = ExtractionResult()
72
+
73
+ # Add file node
74
+ file_node_id = rel_path
75
+ result.nodes.append(
76
+ NodeSchema(
77
+ id=file_node_id,
78
+ label=file_path.name,
79
+ type="file",
80
+ source_file=rel_path,
81
+ line_start=1,
82
+ line_end=len(source.splitlines()) or 1,
83
+ signature=f"module {file_path.stem}",
84
+ docstring=self._get_docstring(root, source),
85
+ )
86
+ )
87
+
88
+ scope_stack = [(file_node_id, "file")]
89
+
90
+ def get_current_parent_id():
91
+ return scope_stack[-1][0] if scope_stack else file_node_id
92
+
93
+ def walk(node):
94
+ nonlocal result
95
+
96
+ if node.type == "ERROR" or (hasattr(node, "is_error") and node.is_error):
97
+ logger.debug(f"Skipping syntax error node in Swift AST: {node}")
98
+ return
99
+
100
+ node_type = node.type
101
+ pushed_scope = False
102
+
103
+ if node_type in (
104
+ "class_declaration",
105
+ "struct_declaration",
106
+ "protocol_declaration",
107
+ "enum_declaration",
108
+ ):
109
+ name_node = node.child_by_field_name("name")
110
+ if name_node:
111
+ class_name = source[
112
+ name_node.start_byte : name_node.end_byte
113
+ ].decode("utf-8", errors="replace")
114
+ parent_id = get_current_parent_id()
115
+ class_id = f"{rel_path}::{class_name}"
116
+
117
+ sym_type = "class"
118
+ if node_type == "struct_declaration":
119
+ sym_type = "struct"
120
+ elif node_type == "protocol_declaration":
121
+ sym_type = "interface"
122
+ elif node_type == "enum_declaration":
123
+ sym_type = "enum"
124
+
125
+ result.nodes.append(
126
+ NodeSchema(
127
+ id=class_id,
128
+ label=class_name,
129
+ type=sym_type,
130
+ source_file=rel_path,
131
+ line_start=node.start_point[0] + 1,
132
+ line_end=node.end_point[0] + 1,
133
+ signature=self._get_signature(node, source),
134
+ docstring=self._get_docstring(node, source),
135
+ )
136
+ )
137
+
138
+ result.edges.append(
139
+ EdgeSchema(
140
+ source=parent_id, target=class_id, relation="contains"
141
+ )
142
+ )
143
+
144
+ # Protocol conformances or subclassing (inheritance) can be found in children
145
+ # Swift uses type_inheritance_clause
146
+ for child in node.children:
147
+ if child.type == "type_inheritance_clause":
148
+ for sub in child.children:
149
+ if sub.type == "type_identifier":
150
+ parent_name = source[
151
+ sub.start_byte : sub.end_byte
152
+ ].decode("utf-8", errors="replace")
153
+ result.edges.append(
154
+ EdgeSchema(
155
+ source=class_id,
156
+ target=parent_name,
157
+ relation="inherits",
158
+ )
159
+ )
160
+
161
+ scope_stack.append((class_id, sym_type))
162
+ pushed_scope = True
163
+
164
+ elif node_type in (
165
+ "function_declaration",
166
+ "init_declaration",
167
+ "deinit_declaration",
168
+ ):
169
+ func_name = None
170
+ if node_type == "function_declaration":
171
+ name_node = node.child_by_field_name("name")
172
+ if name_node:
173
+ func_name = source[
174
+ name_node.start_byte : name_node.end_byte
175
+ ].decode("utf-8", errors="replace")
176
+ elif node_type == "init_declaration":
177
+ func_name = "init"
178
+ elif node_type == "deinit_declaration":
179
+ func_name = "deinit"
180
+
181
+ if func_name:
182
+ parent_id = get_current_parent_id()
183
+ parent_type = scope_stack[-1][1] if scope_stack else "file"
184
+
185
+ if parent_type in ("class", "struct", "interface", "enum"):
186
+ func_id = f"{parent_id}.{func_name}"
187
+ sym_type = "method"
188
+ else:
189
+ func_id = f"{rel_path}::{func_name}"
190
+ sym_type = "function"
191
+
192
+ local_bindings = {}
193
+
194
+ def extract_type_id(tc):
195
+ if tc.type == "type_identifier":
196
+ return source[tc.start_byte : tc.end_byte].decode(
197
+ "utf-8", errors="replace"
198
+ )
199
+ for gc in tc.children:
200
+ res = extract_type_id(gc)
201
+ if res:
202
+ return res
203
+ return None
204
+
205
+ def collect_local_bindings(n):
206
+ if n.type == "property_declaration":
207
+ var_name = None
208
+ for child in n.children:
209
+ if child.type == "pattern":
210
+ for gc in child.children:
211
+ if gc.type == "simple_identifier":
212
+ var_name = source[
213
+ gc.start_byte : gc.end_byte
214
+ ].decode("utf-8", errors="replace")
215
+ if var_name:
216
+ type_name = None
217
+ for child in n.children:
218
+ if child.type == "type_annotation":
219
+ type_name = extract_type_id(child)
220
+ if not type_name:
221
+ for child in n.children:
222
+ if child.type == "call_expression":
223
+ for gc in child.children:
224
+ if gc.type == "simple_identifier":
225
+ type_name = source[
226
+ gc.start_byte : gc.end_byte
227
+ ].decode("utf-8", errors="replace")
228
+ if type_name:
229
+ local_bindings[var_name] = type_name
230
+ elif n.type == "parameter":
231
+ identifiers = []
232
+ type_name = None
233
+ seen_colon = False
234
+ for child in n.children:
235
+ if child.type == "simple_identifier" and not seen_colon:
236
+ identifiers.append(
237
+ source[
238
+ child.start_byte : child.end_byte
239
+ ].decode("utf-8", errors="replace")
240
+ )
241
+ elif child.type == ":":
242
+ seen_colon = True
243
+ elif seen_colon:
244
+ res = extract_type_id(child)
245
+ if res:
246
+ type_name = res
247
+ break
248
+ if identifiers and type_name:
249
+ var_name = identifiers[-1]
250
+ local_bindings[var_name] = type_name
251
+
252
+ for child in n.children:
253
+ collect_local_bindings(child)
254
+
255
+ collect_local_bindings(node)
256
+
257
+ result.nodes.append(
258
+ NodeSchema(
259
+ id=func_id,
260
+ label=func_name,
261
+ type=sym_type,
262
+ source_file=rel_path,
263
+ line_start=node.start_point[0] + 1,
264
+ line_end=node.end_point[0] + 1,
265
+ signature=self._get_signature(node, source),
266
+ docstring=self._get_docstring(node, source),
267
+ local_bindings=local_bindings,
268
+ )
269
+ )
270
+
271
+ result.edges.append(
272
+ EdgeSchema(
273
+ source=parent_id, target=func_id, relation="contains"
274
+ )
275
+ )
276
+
277
+ scope_stack.append((func_id, sym_type))
278
+ pushed_scope = True
279
+
280
+ elif node_type == "import_declaration":
281
+ # import UIKit or import class Module.Class
282
+ # Find path/identifier children
283
+ path_parts = []
284
+ for child in node.children:
285
+ if child.type in ("simple_identifier", "navigation_expression"):
286
+ path_parts.append(
287
+ source[child.start_byte : child.end_byte].decode(
288
+ "utf-8", errors="replace"
289
+ )
290
+ )
291
+ if path_parts:
292
+ import_path = ".".join(path_parts)
293
+ result.edges.append(
294
+ EdgeSchema(
295
+ source=file_node_id, target=import_path, relation="imports"
296
+ )
297
+ )
298
+
299
+ elif node_type == "call_expression":
300
+ # Swift call expression contains function name and arguments
301
+ # Find the child that represents the function
302
+ func_node = None
303
+ for child in node.children:
304
+ # It could be simple_identifier, navigation_expression, etc.
305
+ if child.type in ("simple_identifier", "navigation_expression"):
306
+ func_node = child
307
+ break
308
+ if func_node:
309
+ callee_name = source[
310
+ func_node.start_byte : func_node.end_byte
311
+ ].decode("utf-8", errors="replace")
312
+ caller_id = get_current_parent_id()
313
+ result.edges.append(
314
+ EdgeSchema(
315
+ source=caller_id, target=callee_name, relation="calls"
316
+ )
317
+ )
318
+
319
+ # Recurse children
320
+ for child in node.children:
321
+ walk(child)
322
+
323
+ if pushed_scope:
324
+ scope_stack.pop()
325
+
326
+ walk(root)
327
+ return result
codegraph_gen/py.typed ADDED
File without changes