polycodegraph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph/__init__.py +10 -0
- codegraph/analysis/__init__.py +30 -0
- codegraph/analysis/_common.py +125 -0
- codegraph/analysis/blast_radius.py +63 -0
- codegraph/analysis/cycles.py +79 -0
- codegraph/analysis/dataflow.py +861 -0
- codegraph/analysis/dead_code.py +165 -0
- codegraph/analysis/hotspots.py +68 -0
- codegraph/analysis/infrastructure.py +439 -0
- codegraph/analysis/metrics.py +52 -0
- codegraph/analysis/report.py +222 -0
- codegraph/analysis/roles.py +323 -0
- codegraph/analysis/untested.py +79 -0
- codegraph/cli.py +1506 -0
- codegraph/config.py +64 -0
- codegraph/embed/__init__.py +35 -0
- codegraph/embed/chunker.py +120 -0
- codegraph/embed/embedder.py +113 -0
- codegraph/embed/query.py +181 -0
- codegraph/embed/store.py +360 -0
- codegraph/graph/__init__.py +0 -0
- codegraph/graph/builder.py +212 -0
- codegraph/graph/schema.py +69 -0
- codegraph/graph/store_networkx.py +55 -0
- codegraph/graph/store_sqlite.py +249 -0
- codegraph/mcp_server/__init__.py +6 -0
- codegraph/mcp_server/server.py +933 -0
- codegraph/parsers/__init__.py +0 -0
- codegraph/parsers/base.py +70 -0
- codegraph/parsers/go.py +570 -0
- codegraph/parsers/python.py +1707 -0
- codegraph/parsers/typescript.py +1397 -0
- codegraph/py.typed +0 -0
- codegraph/resolve/__init__.py +4 -0
- codegraph/resolve/calls.py +480 -0
- codegraph/review/__init__.py +31 -0
- codegraph/review/baseline.py +32 -0
- codegraph/review/differ.py +211 -0
- codegraph/review/hook.py +70 -0
- codegraph/review/risk.py +219 -0
- codegraph/review/rules.py +342 -0
- codegraph/viz/__init__.py +17 -0
- codegraph/viz/_style.py +45 -0
- codegraph/viz/dashboard.py +740 -0
- codegraph/viz/diagrams.py +370 -0
- codegraph/viz/explore.py +453 -0
- codegraph/viz/hld.py +683 -0
- codegraph/viz/html.py +115 -0
- codegraph/viz/mermaid.py +111 -0
- codegraph/viz/svg.py +77 -0
- codegraph/web/__init__.py +4 -0
- codegraph/web/server.py +165 -0
- codegraph/web/static/app.css +664 -0
- codegraph/web/static/app.js +919 -0
- codegraph/web/static/index.html +112 -0
- codegraph/web/static/views/architecture.js +1671 -0
- codegraph/web/static/views/graph3d.css +564 -0
- codegraph/web/static/views/graph3d.js +999 -0
- codegraph/web/static/views/graph3d_transform.js +984 -0
- codegraph/workspace/__init__.py +34 -0
- codegraph/workspace/config.py +110 -0
- codegraph/workspace/operations.py +294 -0
- polycodegraph-0.1.0.dist-info/METADATA +687 -0
- polycodegraph-0.1.0.dist-info/RECORD +67 -0
- polycodegraph-0.1.0.dist-info/WHEEL +4 -0
- polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
- polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
|
File without changes
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Base classes and helpers for tree-sitter-based extractors."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import abc
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import tree_sitter
|
|
9
|
+
|
|
10
|
+
from codegraph.graph.schema import Edge, Node
|
|
11
|
+
|
|
12
|
+
_EXTRACTOR_REGISTRY: dict[str, ExtractorBase] = {}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ExtractorBase(abc.ABC):
|
|
16
|
+
language: str
|
|
17
|
+
extensions: tuple[str, ...]
|
|
18
|
+
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
def parse_file(
|
|
21
|
+
self, path: Path, repo_root: Path
|
|
22
|
+
) -> tuple[list[Node], list[Edge]]:
|
|
23
|
+
"""Parse a file and return (nodes, edges)."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@lru_cache(maxsize=16)
|
|
27
|
+
def load_parser(language: str) -> tree_sitter.Parser:
|
|
28
|
+
"""Return a cached tree_sitter.Parser for the given language key."""
|
|
29
|
+
lang = _get_language(language)
|
|
30
|
+
return tree_sitter.Parser(lang)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@lru_cache(maxsize=16)
|
|
34
|
+
def _get_language(language: str) -> tree_sitter.Language:
|
|
35
|
+
if language == "python":
|
|
36
|
+
import tree_sitter_python
|
|
37
|
+
return tree_sitter.Language(tree_sitter_python.language())
|
|
38
|
+
elif language == "typescript":
|
|
39
|
+
import tree_sitter_typescript
|
|
40
|
+
return tree_sitter.Language(tree_sitter_typescript.language_typescript())
|
|
41
|
+
elif language == "tsx":
|
|
42
|
+
import tree_sitter_typescript
|
|
43
|
+
return tree_sitter.Language(tree_sitter_typescript.language_tsx())
|
|
44
|
+
elif language in ("javascript", "jsx"):
|
|
45
|
+
import tree_sitter_javascript
|
|
46
|
+
return tree_sitter.Language(tree_sitter_javascript.language())
|
|
47
|
+
elif language == "go":
|
|
48
|
+
import tree_sitter_go
|
|
49
|
+
return tree_sitter.Language(tree_sitter_go.language())
|
|
50
|
+
else:
|
|
51
|
+
raise ValueError(f"Unsupported language: {language}")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def node_text(ts_node: tree_sitter.Node, source_bytes: bytes) -> str:
|
|
55
|
+
return source_bytes[ts_node.start_byte:ts_node.end_byte].decode(
|
|
56
|
+
"utf-8", errors="replace"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def register_extractor(cls: type[ExtractorBase]) -> type[ExtractorBase]:
|
|
61
|
+
"""Class decorator to register an extractor by its extensions."""
|
|
62
|
+
instance = cls()
|
|
63
|
+
for ext in cls.extensions:
|
|
64
|
+
_EXTRACTOR_REGISTRY[ext] = instance
|
|
65
|
+
return cls
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_extractor_for(path: Path) -> ExtractorBase | None:
|
|
69
|
+
"""Return the extractor for the given file extension, or None."""
|
|
70
|
+
return _EXTRACTOR_REGISTRY.get(path.suffix.lower())
|
codegraph/parsers/go.py
ADDED
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
"""Go (tree-sitter) extractor for codegraph.
|
|
2
|
+
|
|
3
|
+
Emits the same node/edge shape as :mod:`codegraph.parsers.python` so the
|
|
4
|
+
post-build resolver in :mod:`codegraph.resolve.calls` can wire up cross-file
|
|
5
|
+
references uniformly. Tree-sitter Go grammar is loaded lazily via
|
|
6
|
+
``tree-sitter-go``.
|
|
7
|
+
|
|
8
|
+
Node kinds emitted
|
|
9
|
+
------------------
|
|
10
|
+
- ``MODULE`` — one per ``.go`` file. Qualname is the package name from
|
|
11
|
+
``package X``, fallback to file path slug. Files in the same package
|
|
12
|
+
intentionally share a qualname so call-site resolution finds them.
|
|
13
|
+
- ``CLASS`` — Go ``type X struct {...}`` and ``type X interface {...}``.
|
|
14
|
+
(Not really classes, but the post-build resolver treats CLASS as "named
|
|
15
|
+
type" and that's what struct/interface declarations are.)
|
|
16
|
+
- ``FUNCTION``— top-level ``func Foo(...) {...}``.
|
|
17
|
+
- ``METHOD`` — ``func (r *Recv) Foo(...) {...}``. Qualname is
|
|
18
|
+
``module.ReceiverType.Foo``.
|
|
19
|
+
|
|
20
|
+
Edge kinds emitted
|
|
21
|
+
------------------
|
|
22
|
+
- ``DEFINED_IN`` — every function/method/type → its parent module (or
|
|
23
|
+
receiver type, in the case of methods).
|
|
24
|
+
- ``IMPORTS`` — module → ``unresolved::<package-path>`` per import.
|
|
25
|
+
- ``CALLS`` — function/method body → ``unresolved::<target>`` per
|
|
26
|
+
call site. Targets are emitted as the bare identifier (e.g. ``Foo``) or
|
|
27
|
+
dotted selector (e.g. ``pkg.Foo`` / ``r.Foo``); the resolver narrows.
|
|
28
|
+
- ``INHERITS`` — struct → unresolved::EmbeddedType for each embedded
|
|
29
|
+
type field. (Go composition is the closest analog to inheritance.)
|
|
30
|
+
|
|
31
|
+
Limitations (v1)
|
|
32
|
+
----------------
|
|
33
|
+
- Generic type parameters are parsed but stored only as text in metadata;
|
|
34
|
+
the qualname doesn't include them.
|
|
35
|
+
- Interface-satisfaction (does ``*Foo`` implement ``Bar``?) is not detected
|
|
36
|
+
here — that needs a whole-package pass. The resolver layer is where this
|
|
37
|
+
should land, not the parser.
|
|
38
|
+
- ``init()`` functions and ``main()`` get no special treatment yet.
|
|
39
|
+
"""
|
|
40
|
+
from __future__ import annotations
|
|
41
|
+
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
from typing import Any
|
|
44
|
+
|
|
45
|
+
import tree_sitter
|
|
46
|
+
|
|
47
|
+
from codegraph.graph.schema import Edge, EdgeKind, Node, NodeKind, make_node_id
|
|
48
|
+
from codegraph.parsers.base import (
|
|
49
|
+
ExtractorBase,
|
|
50
|
+
load_parser,
|
|
51
|
+
node_text,
|
|
52
|
+
register_extractor,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Helpers
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _file_to_module_qualname_fallback(rel: str) -> str:
|
|
61
|
+
"""Convert ``cmd/foo/bar.go`` → ``cmd.foo.bar`` when no ``package X`` is found.
|
|
62
|
+
|
|
63
|
+
Used only when the file is unparseable enough that we can't read the
|
|
64
|
+
package clause (shouldn't happen for valid Go, but defensive).
|
|
65
|
+
"""
|
|
66
|
+
stem = rel.rsplit(".", 1)[0]
|
|
67
|
+
return stem.replace("/", ".") or "main"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _is_test_file(rel: str) -> bool:
|
|
71
|
+
"""Go convention: ``*_test.go`` is a test file."""
|
|
72
|
+
name = rel.rsplit("/", 1)[-1]
|
|
73
|
+
return name.endswith("_test.go")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _named_children(ts_node: tree_sitter.Node) -> list[tree_sitter.Node]:
|
|
77
|
+
return [c for c in ts_node.children if c.is_named]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _find_child(ts_node: tree_sitter.Node, kind: str) -> tree_sitter.Node | None:
|
|
81
|
+
for c in ts_node.children:
|
|
82
|
+
if c.type == kind:
|
|
83
|
+
return c
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _find_named_descendant(
|
|
88
|
+
ts_node: tree_sitter.Node, kinds: set[str]
|
|
89
|
+
) -> tree_sitter.Node | None:
|
|
90
|
+
"""Depth-first search for the first descendant whose type is in *kinds*."""
|
|
91
|
+
stack: list[tree_sitter.Node] = list(ts_node.children)
|
|
92
|
+
while stack:
|
|
93
|
+
cur = stack.pop()
|
|
94
|
+
if cur.type in kinds:
|
|
95
|
+
return cur
|
|
96
|
+
stack.extend(cur.children)
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _extract_receiver_type(
|
|
101
|
+
method_node: tree_sitter.Node, src: bytes
|
|
102
|
+
) -> tuple[str | None, bool]:
|
|
103
|
+
"""Pull the receiver type name + pointer-ness out of a method_declaration.
|
|
104
|
+
|
|
105
|
+
Returns ``(type_name, is_pointer)`` or ``(None, False)`` if not parseable.
|
|
106
|
+
"""
|
|
107
|
+
# method_declaration → first child is the receiver parameter_list
|
|
108
|
+
pl = _find_child(method_node, "parameter_list")
|
|
109
|
+
if pl is None:
|
|
110
|
+
return None, False
|
|
111
|
+
# Inside the receiver param list: parameter_declaration → type
|
|
112
|
+
pd = _find_child(pl, "parameter_declaration")
|
|
113
|
+
if pd is None:
|
|
114
|
+
return None, False
|
|
115
|
+
type_node = pd.child_by_field_name("type")
|
|
116
|
+
if type_node is None:
|
|
117
|
+
# Some grammars: type lives as the last named child
|
|
118
|
+
named = _named_children(pd)
|
|
119
|
+
type_node = named[-1] if named else None
|
|
120
|
+
if type_node is None:
|
|
121
|
+
return None, False
|
|
122
|
+
is_pointer = False
|
|
123
|
+
if type_node.type == "pointer_type":
|
|
124
|
+
is_pointer = True
|
|
125
|
+
# tree-sitter-go doesn't expose the pointed-to type as a named field;
|
|
126
|
+
# walk children for the first named ``type_identifier`` (also handles
|
|
127
|
+
# qualified types like ``pkg.Foo`` via ``qualified_type``).
|
|
128
|
+
inner: tree_sitter.Node | None = None
|
|
129
|
+
for c in type_node.children:
|
|
130
|
+
if c.type in ("type_identifier", "qualified_type"):
|
|
131
|
+
inner = c
|
|
132
|
+
break
|
|
133
|
+
if inner is not None:
|
|
134
|
+
type_node = inner
|
|
135
|
+
return node_text(type_node, src).strip(), is_pointer
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _params_metadata(
|
|
139
|
+
params_node: tree_sitter.Node | None, src: bytes
|
|
140
|
+
) -> list[dict[str, Any]]:
|
|
141
|
+
"""Extract parameters as ``[{"name": "...", "type": "..."}]``."""
|
|
142
|
+
if params_node is None:
|
|
143
|
+
return []
|
|
144
|
+
out: list[dict[str, Any]] = []
|
|
145
|
+
for child in params_node.children:
|
|
146
|
+
if child.type != "parameter_declaration":
|
|
147
|
+
continue
|
|
148
|
+
type_node = child.child_by_field_name("type")
|
|
149
|
+
type_str = node_text(type_node, src).strip() if type_node else None
|
|
150
|
+
names: list[str] = []
|
|
151
|
+
for sub in child.children:
|
|
152
|
+
if sub.type == "identifier":
|
|
153
|
+
names.append(node_text(sub, src))
|
|
154
|
+
if not names:
|
|
155
|
+
# Anonymous parameter (just a type, e.g. `func f(int)`)
|
|
156
|
+
out.append({"name": "", "type": type_str})
|
|
157
|
+
else:
|
|
158
|
+
for n in names:
|
|
159
|
+
out.append({"name": n, "type": type_str})
|
|
160
|
+
return out
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _signature_text(decl: tree_sitter.Node, src: bytes) -> str:
|
|
164
|
+
"""Best-effort: ``func Name(...) result``. Skips the body."""
|
|
165
|
+
body = _find_child(decl, "block")
|
|
166
|
+
end = body.start_byte if body else decl.end_byte
|
|
167
|
+
return src[decl.start_byte:end].decode("utf-8", errors="replace").strip()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
# Main extractor
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@register_extractor
|
|
176
|
+
class GoExtractor(ExtractorBase):
|
|
177
|
+
language = "go"
|
|
178
|
+
extensions = (".go",)
|
|
179
|
+
|
|
180
|
+
def parse_file(
|
|
181
|
+
self, path: Path, repo_root: Path
|
|
182
|
+
) -> tuple[list[Node], list[Edge]]:
|
|
183
|
+
try:
|
|
184
|
+
src = path.read_bytes()
|
|
185
|
+
except OSError:
|
|
186
|
+
return [], []
|
|
187
|
+
rel = path.relative_to(repo_root).as_posix()
|
|
188
|
+
parser = load_parser("go")
|
|
189
|
+
tree = parser.parse(src)
|
|
190
|
+
root = tree.root_node
|
|
191
|
+
|
|
192
|
+
nodes: list[Node] = []
|
|
193
|
+
edges: list[Edge] = []
|
|
194
|
+
|
|
195
|
+
# 1. Module node — qualname is the package name when we can find it.
|
|
196
|
+
package_name = self._read_package_name(root, src)
|
|
197
|
+
module_qualname = package_name or _file_to_module_qualname_fallback(rel)
|
|
198
|
+
module_kind = NodeKind.TEST if _is_test_file(rel) else NodeKind.MODULE
|
|
199
|
+
module_id = make_node_id(module_kind, module_qualname, rel)
|
|
200
|
+
nodes.append(
|
|
201
|
+
Node(
|
|
202
|
+
id=module_id,
|
|
203
|
+
kind=module_kind,
|
|
204
|
+
name=path.stem,
|
|
205
|
+
qualname=module_qualname,
|
|
206
|
+
file=rel,
|
|
207
|
+
line_start=1,
|
|
208
|
+
line_end=root.end_point[0] + 1,
|
|
209
|
+
language="go",
|
|
210
|
+
metadata={"package": package_name} if package_name else {},
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# 2. Walk top-level declarations.
|
|
215
|
+
for child in _named_children(root):
|
|
216
|
+
if child.type == "import_declaration":
|
|
217
|
+
self._handle_imports(child, rel, module_id, src, edges)
|
|
218
|
+
elif child.type == "function_declaration":
|
|
219
|
+
self._handle_function(
|
|
220
|
+
child, rel, module_qualname, module_id, src, nodes, edges
|
|
221
|
+
)
|
|
222
|
+
elif child.type == "method_declaration":
|
|
223
|
+
self._handle_method(
|
|
224
|
+
child, rel, module_qualname, module_id, src, nodes, edges
|
|
225
|
+
)
|
|
226
|
+
elif child.type == "type_declaration":
|
|
227
|
+
self._handle_type_decl(
|
|
228
|
+
child, rel, module_qualname, module_id, src, nodes, edges
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return nodes, edges
|
|
232
|
+
|
|
233
|
+
# -----------------------------------------------------------------------
|
|
234
|
+
# Top-level structure
|
|
235
|
+
# -----------------------------------------------------------------------
|
|
236
|
+
|
|
237
|
+
def _read_package_name(
|
|
238
|
+
self, root: tree_sitter.Node, src: bytes
|
|
239
|
+
) -> str | None:
|
|
240
|
+
for child in root.children:
|
|
241
|
+
if child.type != "package_clause":
|
|
242
|
+
continue
|
|
243
|
+
ident = _find_child(child, "package_identifier") or _find_child(
|
|
244
|
+
child, "identifier"
|
|
245
|
+
)
|
|
246
|
+
if ident is not None:
|
|
247
|
+
return node_text(ident, src)
|
|
248
|
+
return None
|
|
249
|
+
|
|
250
|
+
def _handle_imports(
|
|
251
|
+
self,
|
|
252
|
+
decl: tree_sitter.Node,
|
|
253
|
+
rel: str,
|
|
254
|
+
module_id: str,
|
|
255
|
+
src: bytes,
|
|
256
|
+
edges: list[Edge],
|
|
257
|
+
) -> None:
|
|
258
|
+
"""Emit one ``IMPORTS`` edge per imported package path.
|
|
259
|
+
|
|
260
|
+
Handles three forms:
|
|
261
|
+
- ``import "fmt"`` (single spec)
|
|
262
|
+
- ``import ( "fmt" "os" )`` (grouped specs)
|
|
263
|
+
- ``import foo "github.com/.../foo"`` (named import)
|
|
264
|
+
- ``import _ "github.com/.../driver"`` (blank import)
|
|
265
|
+
"""
|
|
266
|
+
for spec in _named_children(decl):
|
|
267
|
+
specs: list[tree_sitter.Node] = []
|
|
268
|
+
if spec.type == "import_spec":
|
|
269
|
+
specs.append(spec)
|
|
270
|
+
elif spec.type == "import_spec_list":
|
|
271
|
+
specs.extend(c for c in _named_children(spec) if c.type == "import_spec")
|
|
272
|
+
for s in specs:
|
|
273
|
+
path_node = _find_child(s, "interpreted_string_literal")
|
|
274
|
+
if path_node is None:
|
|
275
|
+
path_node = _find_child(s, "raw_string_literal")
|
|
276
|
+
if path_node is None:
|
|
277
|
+
continue
|
|
278
|
+
pkg_path = node_text(path_node, src).strip("`").strip('"')
|
|
279
|
+
if not pkg_path:
|
|
280
|
+
continue
|
|
281
|
+
alias_node = _find_child(s, "package_identifier") or _find_child(
|
|
282
|
+
s, "identifier"
|
|
283
|
+
)
|
|
284
|
+
alias = node_text(alias_node, src) if alias_node else None
|
|
285
|
+
metadata: dict[str, Any] = {"package_path": pkg_path}
|
|
286
|
+
if alias and alias != pkg_path.rsplit("/", 1)[-1]:
|
|
287
|
+
metadata["alias"] = alias
|
|
288
|
+
edges.append(
|
|
289
|
+
Edge(
|
|
290
|
+
src=module_id,
|
|
291
|
+
dst=f"unresolved::{pkg_path}",
|
|
292
|
+
kind=EdgeKind.IMPORTS,
|
|
293
|
+
file=rel,
|
|
294
|
+
line=s.start_point[0] + 1,
|
|
295
|
+
metadata=metadata,
|
|
296
|
+
)
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# -----------------------------------------------------------------------
|
|
300
|
+
# Functions, methods, types
|
|
301
|
+
# -----------------------------------------------------------------------
|
|
302
|
+
|
|
303
|
+
def _handle_function(
|
|
304
|
+
self,
|
|
305
|
+
decl: tree_sitter.Node,
|
|
306
|
+
rel: str,
|
|
307
|
+
parent_qualname: str,
|
|
308
|
+
parent_id: str,
|
|
309
|
+
src: bytes,
|
|
310
|
+
nodes: list[Node],
|
|
311
|
+
edges: list[Edge],
|
|
312
|
+
) -> None:
|
|
313
|
+
name_node = decl.child_by_field_name("name")
|
|
314
|
+
if name_node is None:
|
|
315
|
+
return
|
|
316
|
+
name = node_text(name_node, src)
|
|
317
|
+
qualname = f"{parent_qualname}.{name}"
|
|
318
|
+
fn_id = make_node_id(NodeKind.FUNCTION, qualname, rel)
|
|
319
|
+
params_meta = _params_metadata(decl.child_by_field_name("parameters"), src)
|
|
320
|
+
nodes.append(
|
|
321
|
+
Node(
|
|
322
|
+
id=fn_id,
|
|
323
|
+
kind=NodeKind.FUNCTION,
|
|
324
|
+
name=name,
|
|
325
|
+
qualname=qualname,
|
|
326
|
+
file=rel,
|
|
327
|
+
line_start=decl.start_point[0] + 1,
|
|
328
|
+
line_end=decl.end_point[0] + 1,
|
|
329
|
+
signature=_signature_text(decl, src),
|
|
330
|
+
language="go",
|
|
331
|
+
metadata={"params": params_meta},
|
|
332
|
+
)
|
|
333
|
+
)
|
|
334
|
+
edges.append(
|
|
335
|
+
Edge(
|
|
336
|
+
src=fn_id,
|
|
337
|
+
dst=parent_id,
|
|
338
|
+
kind=EdgeKind.DEFINED_IN,
|
|
339
|
+
file=rel,
|
|
340
|
+
line=decl.start_point[0] + 1,
|
|
341
|
+
)
|
|
342
|
+
)
|
|
343
|
+
body = _find_child(decl, "block")
|
|
344
|
+
if body is not None:
|
|
345
|
+
self._collect_calls(body, rel, fn_id, src, edges)
|
|
346
|
+
|
|
347
|
+
def _handle_method(
|
|
348
|
+
self,
|
|
349
|
+
decl: tree_sitter.Node,
|
|
350
|
+
rel: str,
|
|
351
|
+
parent_qualname: str,
|
|
352
|
+
parent_id: str,
|
|
353
|
+
src: bytes,
|
|
354
|
+
nodes: list[Node],
|
|
355
|
+
edges: list[Edge],
|
|
356
|
+
) -> None:
|
|
357
|
+
name_node = decl.child_by_field_name("name")
|
|
358
|
+
if name_node is None:
|
|
359
|
+
return
|
|
360
|
+
name = node_text(name_node, src)
|
|
361
|
+
recv_type, is_pointer = _extract_receiver_type(decl, src)
|
|
362
|
+
# Qualname: prefer module.RecvType.Name so the resolver can stitch
|
|
363
|
+
# the method to its type. Fallback to module.Name when receiver isn't
|
|
364
|
+
# parseable.
|
|
365
|
+
if recv_type:
|
|
366
|
+
qualname = f"{parent_qualname}.{recv_type}.{name}"
|
|
367
|
+
recv_type_qualname = f"{parent_qualname}.{recv_type}"
|
|
368
|
+
else:
|
|
369
|
+
qualname = f"{parent_qualname}.{name}"
|
|
370
|
+
recv_type_qualname = None
|
|
371
|
+
method_id = make_node_id(NodeKind.METHOD, qualname, rel)
|
|
372
|
+
params_meta = _params_metadata(decl.child_by_field_name("parameters"), src)
|
|
373
|
+
nodes.append(
|
|
374
|
+
Node(
|
|
375
|
+
id=method_id,
|
|
376
|
+
kind=NodeKind.METHOD,
|
|
377
|
+
name=name,
|
|
378
|
+
qualname=qualname,
|
|
379
|
+
file=rel,
|
|
380
|
+
line_start=decl.start_point[0] + 1,
|
|
381
|
+
line_end=decl.end_point[0] + 1,
|
|
382
|
+
signature=_signature_text(decl, src),
|
|
383
|
+
language="go",
|
|
384
|
+
metadata={
|
|
385
|
+
"params": params_meta,
|
|
386
|
+
"receiver": recv_type,
|
|
387
|
+
"receiver_pointer": is_pointer,
|
|
388
|
+
},
|
|
389
|
+
)
|
|
390
|
+
)
|
|
391
|
+
# Method DEFINED_IN points at the receiver type if we know it, else module.
|
|
392
|
+
# The resolver expects to find the dst by node ID; for the receiver case
|
|
393
|
+
# we emit an unresolved:: edge so the resolver can match by qualname.
|
|
394
|
+
if recv_type_qualname:
|
|
395
|
+
edges.append(
|
|
396
|
+
Edge(
|
|
397
|
+
src=method_id,
|
|
398
|
+
dst=f"unresolved::{recv_type_qualname}",
|
|
399
|
+
kind=EdgeKind.DEFINED_IN,
|
|
400
|
+
file=rel,
|
|
401
|
+
line=decl.start_point[0] + 1,
|
|
402
|
+
)
|
|
403
|
+
)
|
|
404
|
+
else:
|
|
405
|
+
edges.append(
|
|
406
|
+
Edge(
|
|
407
|
+
src=method_id,
|
|
408
|
+
dst=parent_id,
|
|
409
|
+
kind=EdgeKind.DEFINED_IN,
|
|
410
|
+
file=rel,
|
|
411
|
+
line=decl.start_point[0] + 1,
|
|
412
|
+
)
|
|
413
|
+
)
|
|
414
|
+
body = _find_child(decl, "block")
|
|
415
|
+
if body is not None:
|
|
416
|
+
self._collect_calls(body, rel, method_id, src, edges)
|
|
417
|
+
|
|
418
|
+
def _handle_type_decl(
|
|
419
|
+
self,
|
|
420
|
+
decl: tree_sitter.Node,
|
|
421
|
+
rel: str,
|
|
422
|
+
parent_qualname: str,
|
|
423
|
+
parent_id: str,
|
|
424
|
+
src: bytes,
|
|
425
|
+
nodes: list[Node],
|
|
426
|
+
edges: list[Edge],
|
|
427
|
+
) -> None:
|
|
428
|
+
"""Emit a CLASS node per ``type Foo ...`` spec inside the declaration."""
|
|
429
|
+
for spec in _named_children(decl):
|
|
430
|
+
if spec.type != "type_spec":
|
|
431
|
+
continue
|
|
432
|
+
name_node = spec.child_by_field_name("name") or _find_child(
|
|
433
|
+
spec, "type_identifier"
|
|
434
|
+
)
|
|
435
|
+
if name_node is None:
|
|
436
|
+
continue
|
|
437
|
+
name = node_text(name_node, src)
|
|
438
|
+
qualname = f"{parent_qualname}.{name}"
|
|
439
|
+
type_id = make_node_id(NodeKind.CLASS, qualname, rel)
|
|
440
|
+
inner = spec.child_by_field_name("type")
|
|
441
|
+
inner_kind = inner.type if inner is not None else "unknown"
|
|
442
|
+
metadata: dict[str, Any] = {"type_kind": inner_kind}
|
|
443
|
+
nodes.append(
|
|
444
|
+
Node(
|
|
445
|
+
id=type_id,
|
|
446
|
+
kind=NodeKind.CLASS,
|
|
447
|
+
name=name,
|
|
448
|
+
qualname=qualname,
|
|
449
|
+
file=rel,
|
|
450
|
+
line_start=spec.start_point[0] + 1,
|
|
451
|
+
line_end=spec.end_point[0] + 1,
|
|
452
|
+
language="go",
|
|
453
|
+
metadata=metadata,
|
|
454
|
+
)
|
|
455
|
+
)
|
|
456
|
+
edges.append(
|
|
457
|
+
Edge(
|
|
458
|
+
src=type_id,
|
|
459
|
+
dst=parent_id,
|
|
460
|
+
kind=EdgeKind.DEFINED_IN,
|
|
461
|
+
file=rel,
|
|
462
|
+
line=spec.start_point[0] + 1,
|
|
463
|
+
)
|
|
464
|
+
)
|
|
465
|
+
# Embedded fields → INHERITS (Go's composition idiom).
|
|
466
|
+
if inner is not None and inner.type == "struct_type":
|
|
467
|
+
self._collect_embedded_fields(inner, rel, type_id, src, edges)
|
|
468
|
+
|
|
469
|
+
def _collect_embedded_fields(
|
|
470
|
+
self,
|
|
471
|
+
struct_node: tree_sitter.Node,
|
|
472
|
+
rel: str,
|
|
473
|
+
type_id: str,
|
|
474
|
+
src: bytes,
|
|
475
|
+
edges: list[Edge],
|
|
476
|
+
) -> None:
|
|
477
|
+
"""An embedded field is a ``field_declaration`` with NO field name —
|
|
478
|
+
only a type. Treat it as composition / pseudo-inheritance.
|
|
479
|
+
"""
|
|
480
|
+
field_list = _find_child(struct_node, "field_declaration_list")
|
|
481
|
+
if field_list is None:
|
|
482
|
+
return
|
|
483
|
+
for field in _named_children(field_list):
|
|
484
|
+
if field.type != "field_declaration":
|
|
485
|
+
continue
|
|
486
|
+
name_node = field.child_by_field_name("name")
|
|
487
|
+
if name_node is not None:
|
|
488
|
+
continue # explicit-name field, not embedded
|
|
489
|
+
type_node = field.child_by_field_name("type")
|
|
490
|
+
if type_node is None:
|
|
491
|
+
continue
|
|
492
|
+
embedded = node_text(type_node, src).lstrip("*").strip()
|
|
493
|
+
if not embedded:
|
|
494
|
+
continue
|
|
495
|
+
edges.append(
|
|
496
|
+
Edge(
|
|
497
|
+
src=type_id,
|
|
498
|
+
dst=f"unresolved::{embedded}",
|
|
499
|
+
kind=EdgeKind.INHERITS,
|
|
500
|
+
file=rel,
|
|
501
|
+
line=field.start_point[0] + 1,
|
|
502
|
+
metadata={"embedded": True},
|
|
503
|
+
)
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
# -----------------------------------------------------------------------
|
|
507
|
+
# Call sites — walk the body, emit CALLS edges per call_expression.
|
|
508
|
+
# -----------------------------------------------------------------------
|
|
509
|
+
|
|
510
|
+
def _collect_calls(
|
|
511
|
+
self,
|
|
512
|
+
body: tree_sitter.Node,
|
|
513
|
+
rel: str,
|
|
514
|
+
scope_id: str,
|
|
515
|
+
src: bytes,
|
|
516
|
+
edges: list[Edge],
|
|
517
|
+
) -> None:
|
|
518
|
+
"""Stack-based DFS over the function body, emitting CALLS edges per
|
|
519
|
+
``call_expression``. Stops descending into nested function literals so
|
|
520
|
+
their calls are attributed to the enclosing scope (mirrors python.py).
|
|
521
|
+
"""
|
|
522
|
+
stack: list[tree_sitter.Node] = list(body.children)
|
|
523
|
+
while stack:
|
|
524
|
+
cur = stack.pop()
|
|
525
|
+
if cur.type == "call_expression":
|
|
526
|
+
target = self._call_target_text(cur, src)
|
|
527
|
+
if target:
|
|
528
|
+
edges.append(
|
|
529
|
+
Edge(
|
|
530
|
+
src=scope_id,
|
|
531
|
+
dst=f"unresolved::{target}",
|
|
532
|
+
kind=EdgeKind.CALLS,
|
|
533
|
+
file=rel,
|
|
534
|
+
line=cur.start_point[0] + 1,
|
|
535
|
+
metadata={"target_name": target},
|
|
536
|
+
)
|
|
537
|
+
)
|
|
538
|
+
# Stop at nested function literals — their calls belong to them.
|
|
539
|
+
if cur.type in ("func_literal", "function_declaration", "method_declaration"):
|
|
540
|
+
continue
|
|
541
|
+
stack.extend(cur.children)
|
|
542
|
+
|
|
543
|
+
def _call_target_text(
|
|
544
|
+
self, call: tree_sitter.Node, src: bytes
|
|
545
|
+
) -> str | None:
|
|
546
|
+
"""Best-effort textual rendering of the call target.
|
|
547
|
+
|
|
548
|
+
``Foo()`` → ``Foo``
|
|
549
|
+
``pkg.Foo()`` → ``pkg.Foo``
|
|
550
|
+
``r.Method()`` → ``r.Method``
|
|
551
|
+
``r.M().X()`` → ``X`` (chained — keep the rightmost selector)
|
|
552
|
+
"""
|
|
553
|
+
fn = call.child_by_field_name("function")
|
|
554
|
+
if fn is None:
|
|
555
|
+
return None
|
|
556
|
+
if fn.type == "identifier":
|
|
557
|
+
return node_text(fn, src)
|
|
558
|
+
if fn.type == "selector_expression":
|
|
559
|
+
operand = fn.child_by_field_name("operand")
|
|
560
|
+
field = fn.child_by_field_name("field")
|
|
561
|
+
if field is None:
|
|
562
|
+
return None
|
|
563
|
+
if operand is not None and operand.type in ("identifier", "selector_expression"):
|
|
564
|
+
# Simple package or receiver reference — keep the full dotted name.
|
|
565
|
+
return f"{node_text(operand, src)}.{node_text(field, src)}"
|
|
566
|
+
# Chained / complex operand — fall back to the rightmost identifier
|
|
567
|
+
# so the resolver can at least try a tail match.
|
|
568
|
+
return node_text(field, src)
|
|
569
|
+
# Type conversions, function literals etc. — skip
|
|
570
|
+
return None
|