codemap-rust 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codemap_rust/__init__.py
ADDED
codemap_rust/indexer.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""Rust indexer built on tree-sitter-rust.
|
|
2
|
+
|
|
3
|
+
Notable Rust-specific behaviour: ``impl_item`` blocks (both inherent and
|
|
4
|
+
trait impls) attach their inner functions to the impl'd type's namespace,
|
|
5
|
+
so ``impl User { fn login(&self) {} }`` and ``impl Greeter for User {
|
|
6
|
+
fn hello(&self) {} }`` both produce ``User#xxx().`` SymbolIDs. Methods
|
|
7
|
+
defined directly on a trait (``trait T { fn m(&self); }``) attach to the
|
|
8
|
+
trait the same way.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from pathlib import Path, PurePosixPath
|
|
14
|
+
from typing import ClassVar
|
|
15
|
+
|
|
16
|
+
import tree_sitter
|
|
17
|
+
import tree_sitter_rust
|
|
18
|
+
|
|
19
|
+
from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
|
|
20
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
21
|
+
from codemap.indexers.base import IndexContext
|
|
22
|
+
|
|
23
|
+
SCHEME = "scip-rust"
|
|
24
|
+
LANG = "rust"
|
|
25
|
+
|
|
26
|
+
_RUST_LANG = tree_sitter.Language(tree_sitter_rust.language())
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RustIndexer:
|
|
30
|
+
name: ClassVar[str] = "rust"
|
|
31
|
+
version: ClassVar[str] = "0.1.0"
|
|
32
|
+
file_patterns: ClassVar[list[str]] = ["*.rs"]
|
|
33
|
+
languages: ClassVar[list[str]] = [LANG]
|
|
34
|
+
|
|
35
|
+
def supports(self, path: Path) -> bool:
|
|
36
|
+
return path.suffix == ".rs"
|
|
37
|
+
|
|
38
|
+
def index_file(
|
|
39
|
+
self,
|
|
40
|
+
path: Path,
|
|
41
|
+
source: bytes,
|
|
42
|
+
ctx: IndexContext,
|
|
43
|
+
) -> IndexResult:
|
|
44
|
+
try:
|
|
45
|
+
source.decode("utf-8")
|
|
46
|
+
except UnicodeDecodeError as exc:
|
|
47
|
+
return IndexResult(
|
|
48
|
+
diagnostics=[
|
|
49
|
+
Diagnostic(
|
|
50
|
+
severity="error",
|
|
51
|
+
file=ctx.relative_path,
|
|
52
|
+
code="RS002",
|
|
53
|
+
message=f"not valid UTF-8: {exc}",
|
|
54
|
+
producer=self.name,
|
|
55
|
+
)
|
|
56
|
+
]
|
|
57
|
+
)
|
|
58
|
+
parser = tree_sitter.Parser(_RUST_LANG)
|
|
59
|
+
tree = parser.parse(source)
|
|
60
|
+
visitor = _Visitor(ctx.relative_path)
|
|
61
|
+
visitor.visit(tree.root_node)
|
|
62
|
+
diagnostics = list(visitor.diagnostics)
|
|
63
|
+
if tree.root_node.has_error:
|
|
64
|
+
diagnostics.append(
|
|
65
|
+
Diagnostic(
|
|
66
|
+
severity="warning",
|
|
67
|
+
file=ctx.relative_path,
|
|
68
|
+
range=Range(start_line=1, end_line=1),
|
|
69
|
+
code="RS001",
|
|
70
|
+
message="tree-sitter reported parse errors; symbols may be incomplete",
|
|
71
|
+
producer=self.name,
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
return IndexResult(
|
|
75
|
+
symbols=visitor.symbols,
|
|
76
|
+
edges=visitor.edges,
|
|
77
|
+
diagnostics=diagnostics,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class _Visitor:
|
|
82
|
+
def __init__(self, relative_path: PurePosixPath) -> None:
|
|
83
|
+
self.relative_path = relative_path
|
|
84
|
+
self.symbols: list[Symbol] = []
|
|
85
|
+
self.edges: list[Edge] = []
|
|
86
|
+
self.diagnostics: list[Diagnostic] = []
|
|
87
|
+
self._type_stack: list[str] = []
|
|
88
|
+
|
|
89
|
+
def visit(self, node: tree_sitter.Node) -> None:
|
|
90
|
+
kind = node.type
|
|
91
|
+
if kind == "function_item":
|
|
92
|
+
self._visit_function_item(node)
|
|
93
|
+
return
|
|
94
|
+
if kind == "function_signature_item" and self._type_stack:
|
|
95
|
+
self._visit_function_signature_item(node)
|
|
96
|
+
return
|
|
97
|
+
if kind == "struct_item":
|
|
98
|
+
self._visit_type_decl(node, rust_kind="struct")
|
|
99
|
+
return
|
|
100
|
+
if kind == "enum_item":
|
|
101
|
+
self._visit_type_decl(node, rust_kind="enum")
|
|
102
|
+
return
|
|
103
|
+
if kind == "trait_item":
|
|
104
|
+
self._visit_trait_item(node)
|
|
105
|
+
return
|
|
106
|
+
if kind == "impl_item":
|
|
107
|
+
self._visit_impl_item(node)
|
|
108
|
+
return
|
|
109
|
+
if kind == "const_item":
|
|
110
|
+
self._visit_const_or_static(node, rust_kind="const")
|
|
111
|
+
return
|
|
112
|
+
if kind == "static_item":
|
|
113
|
+
self._visit_const_or_static(node, rust_kind="static")
|
|
114
|
+
return
|
|
115
|
+
for child in node.children:
|
|
116
|
+
self.visit(child)
|
|
117
|
+
|
|
118
|
+
# ---------------------------------------------------- functions
|
|
119
|
+
|
|
120
|
+
def _visit_function_item(self, node: tree_sitter.Node) -> None:
|
|
121
|
+
name_node = node.child_by_field_name("name")
|
|
122
|
+
if name_node is None:
|
|
123
|
+
return
|
|
124
|
+
name = _node_text(name_node)
|
|
125
|
+
if not name:
|
|
126
|
+
return
|
|
127
|
+
kind: str = "method" if self._type_stack else "function"
|
|
128
|
+
sid = self._make_id(name, kind=DescriptorKind.METHOD)
|
|
129
|
+
self.symbols.append(
|
|
130
|
+
Symbol(
|
|
131
|
+
id=sid,
|
|
132
|
+
kind=kind, # type: ignore[arg-type]
|
|
133
|
+
language=LANG,
|
|
134
|
+
file=self.relative_path,
|
|
135
|
+
range=_node_range(node),
|
|
136
|
+
signature=_fn_signature(node, name),
|
|
137
|
+
)
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _visit_function_signature_item(self, node: tree_sitter.Node) -> None:
|
|
141
|
+
name_node = node.child_by_field_name("name")
|
|
142
|
+
if name_node is None:
|
|
143
|
+
return
|
|
144
|
+
name = _node_text(name_node)
|
|
145
|
+
if not name:
|
|
146
|
+
return
|
|
147
|
+
sid = self._make_id(name, kind=DescriptorKind.METHOD)
|
|
148
|
+
self.symbols.append(
|
|
149
|
+
Symbol(
|
|
150
|
+
id=sid,
|
|
151
|
+
kind="method",
|
|
152
|
+
language=LANG,
|
|
153
|
+
file=self.relative_path,
|
|
154
|
+
range=_node_range(node),
|
|
155
|
+
signature=_fn_signature(node, name),
|
|
156
|
+
extra={"rust_kind": "trait_method"},
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# ------------------------------------------------------ types
|
|
161
|
+
|
|
162
|
+
def _visit_type_decl(self, node: tree_sitter.Node, *, rust_kind: str) -> None:
|
|
163
|
+
name_node = node.child_by_field_name("name")
|
|
164
|
+
if name_node is None:
|
|
165
|
+
return
|
|
166
|
+
name = _node_text(name_node)
|
|
167
|
+
if not name:
|
|
168
|
+
return
|
|
169
|
+
sid = self._make_id(name, kind=DescriptorKind.TYPE)
|
|
170
|
+
self.symbols.append(
|
|
171
|
+
Symbol(
|
|
172
|
+
id=sid,
|
|
173
|
+
kind="class",
|
|
174
|
+
language=LANG,
|
|
175
|
+
file=self.relative_path,
|
|
176
|
+
range=_node_range(node),
|
|
177
|
+
extra={"rust_kind": rust_kind},
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
def _visit_trait_item(self, node: tree_sitter.Node) -> None:
|
|
182
|
+
name_node = node.child_by_field_name("name")
|
|
183
|
+
if name_node is None:
|
|
184
|
+
return
|
|
185
|
+
name = _node_text(name_node)
|
|
186
|
+
if not name:
|
|
187
|
+
return
|
|
188
|
+
sid = self._make_id(name, kind=DescriptorKind.TYPE)
|
|
189
|
+
self.symbols.append(
|
|
190
|
+
Symbol(
|
|
191
|
+
id=sid,
|
|
192
|
+
kind="class",
|
|
193
|
+
language=LANG,
|
|
194
|
+
file=self.relative_path,
|
|
195
|
+
range=_node_range(node),
|
|
196
|
+
extra={"rust_kind": "trait"},
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
body = node.child_by_field_name("body")
|
|
200
|
+
if body is None:
|
|
201
|
+
return
|
|
202
|
+
self._type_stack.append(name)
|
|
203
|
+
try:
|
|
204
|
+
for child in body.children:
|
|
205
|
+
self.visit(child)
|
|
206
|
+
finally:
|
|
207
|
+
self._type_stack.pop()
|
|
208
|
+
|
|
209
|
+
def _visit_impl_item(self, node: tree_sitter.Node) -> None:
|
|
210
|
+
owner = _impl_owner_type(node)
|
|
211
|
+
body = node.child_by_field_name("body")
|
|
212
|
+
if owner is None or body is None:
|
|
213
|
+
# Walk anyway to capture function symbols at file scope.
|
|
214
|
+
for child in node.children:
|
|
215
|
+
self.visit(child)
|
|
216
|
+
return
|
|
217
|
+
self._type_stack.append(owner)
|
|
218
|
+
try:
|
|
219
|
+
for child in body.children:
|
|
220
|
+
self.visit(child)
|
|
221
|
+
finally:
|
|
222
|
+
self._type_stack.pop()
|
|
223
|
+
|
|
224
|
+
# ----------------------------------------------- const / static
|
|
225
|
+
|
|
226
|
+
def _visit_const_or_static(self, node: tree_sitter.Node, *, rust_kind: str) -> None:
|
|
227
|
+
name_node = node.child_by_field_name("name")
|
|
228
|
+
if name_node is None:
|
|
229
|
+
return
|
|
230
|
+
name = _node_text(name_node)
|
|
231
|
+
if not name:
|
|
232
|
+
return
|
|
233
|
+
sid = self._make_id(name, kind=DescriptorKind.TERM)
|
|
234
|
+
self.symbols.append(
|
|
235
|
+
Symbol(
|
|
236
|
+
id=sid,
|
|
237
|
+
kind="variable",
|
|
238
|
+
language=LANG,
|
|
239
|
+
file=self.relative_path,
|
|
240
|
+
range=_node_range(node),
|
|
241
|
+
extra={"rust_kind": rust_kind},
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# ----------------------------------------------------- helpers
|
|
246
|
+
|
|
247
|
+
def _make_id(self, name: str, *, kind: DescriptorKind) -> SymbolID:
|
|
248
|
+
descriptors = list(_path_namespaces(self.relative_path))
|
|
249
|
+
descriptors.extend(Descriptor(name=t, kind=DescriptorKind.TYPE) for t in self._type_stack)
|
|
250
|
+
descriptors.append(Descriptor(name=name, kind=kind))
|
|
251
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ---------------------------------------------------------------------------
|
|
255
|
+
# Pure helpers
|
|
256
|
+
# ---------------------------------------------------------------------------
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
|
|
260
|
+
return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _node_range(node: tree_sitter.Node) -> Range:
|
|
264
|
+
sr, sc = node.start_point
|
|
265
|
+
er, ec = node.end_point
|
|
266
|
+
return Range(
|
|
267
|
+
start_line=sr + 1,
|
|
268
|
+
start_col=sc,
|
|
269
|
+
end_line=max(er + 1, sr + 1),
|
|
270
|
+
end_col=ec,
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _node_text(node: tree_sitter.Node) -> str:
|
|
275
|
+
return node.text.decode("utf-8") if node.text is not None else ""
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _fn_signature(node: tree_sitter.Node, name: str) -> str:
|
|
279
|
+
params = node.child_by_field_name("parameters")
|
|
280
|
+
params_text = _node_text(params) if params is not None else "()"
|
|
281
|
+
return_type = node.child_by_field_name("return_type")
|
|
282
|
+
rt_text = (" -> " + _node_text(return_type)) if return_type is not None else ""
|
|
283
|
+
return f"fn {name}{params_text}{rt_text}"
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _impl_owner_type(impl_node: tree_sitter.Node) -> str | None:
|
|
287
|
+
"""Return the owning type of an impl block.
|
|
288
|
+
|
|
289
|
+
For ``impl Foo { ... }`` returns ``"Foo"``. For
|
|
290
|
+
``impl Trait for Foo { ... }`` returns ``"Foo"`` (the receiver, not
|
|
291
|
+
the trait).
|
|
292
|
+
"""
|
|
293
|
+
# Field-named child ``type`` is the receiver of the impl (both inherent
|
|
294
|
+
# and trait impls).
|
|
295
|
+
type_node = impl_node.child_by_field_name("type")
|
|
296
|
+
if type_node is not None:
|
|
297
|
+
return _unwrap_type(type_node)
|
|
298
|
+
# Fallback: walk children, take the last `type_identifier` before the
|
|
299
|
+
# `declaration_list`.
|
|
300
|
+
last: tree_sitter.Node | None = None
|
|
301
|
+
for child in impl_node.children:
|
|
302
|
+
if child.type == "declaration_list":
|
|
303
|
+
break
|
|
304
|
+
if child.type in {"type_identifier", "generic_type", "scoped_type_identifier"}:
|
|
305
|
+
last = child
|
|
306
|
+
if last is None:
|
|
307
|
+
return None
|
|
308
|
+
return _unwrap_type(last)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _unwrap_type(node: tree_sitter.Node) -> str | None:
|
|
312
|
+
if node.type == "type_identifier":
|
|
313
|
+
return _node_text(node)
|
|
314
|
+
if node.type == "generic_type":
|
|
315
|
+
for c in node.children:
|
|
316
|
+
if c.type == "type_identifier":
|
|
317
|
+
return _node_text(c)
|
|
318
|
+
return None
|
|
319
|
+
if node.type == "scoped_type_identifier":
|
|
320
|
+
for c in reversed(node.children):
|
|
321
|
+
if c.type == "type_identifier":
|
|
322
|
+
return _node_text(c)
|
|
323
|
+
return None
|
|
324
|
+
return None
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-rust
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Rust indexer plugin for CodeMap
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: codemap,indexer,rust,tree-sitter
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Rust
|
|
12
|
+
Classifier: Topic :: Software Development
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Requires-Dist: codemap-core<0.2,>=0.1.0
|
|
15
|
+
Requires-Dist: tree-sitter-rust>=0.23
|
|
16
|
+
Requires-Dist: tree-sitter>=0.25
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# codemap-rust
|
|
22
|
+
|
|
23
|
+
> A Rust indexer for [CodeMap](https://github.com/qxbyte/codemap), shipped
|
|
24
|
+
> as an independent PyPI package.
|
|
25
|
+
|
|
26
|
+
## What it captures
|
|
27
|
+
|
|
28
|
+
Backed by `tree-sitter-rust`:
|
|
29
|
+
|
|
30
|
+
| AST node | Symbol kind |
|
|
31
|
+
|---|---|
|
|
32
|
+
| `function_item` (free) | `function` |
|
|
33
|
+
| `function_item` (inside `impl`) | `method` (attached to the impl'd type) |
|
|
34
|
+
| `function_signature_item` (inside `trait`) | `method` (attached to the trait) |
|
|
35
|
+
| `struct_item` | `class` (with `extra.rust_kind=struct`) |
|
|
36
|
+
| `enum_item` | `class` (with `extra.rust_kind=enum`) |
|
|
37
|
+
| `trait_item` | `class` (with `extra.rust_kind=trait`) |
|
|
38
|
+
| `const_item` | `variable` (with `extra.rust_kind=const`) |
|
|
39
|
+
| `static_item` | `variable` (with `extra.rust_kind=static`) |
|
|
40
|
+
|
|
41
|
+
`impl Trait for Type` blocks attach methods to `Type` (the receiver), so
|
|
42
|
+
both inherent and trait impls end up addressable under the same
|
|
43
|
+
`Type#method()` shape.
|
|
44
|
+
|
|
45
|
+
## SymbolID encoding
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
scip-rust . . . src/user.rs/User#login().
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Install
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install codemap-rust
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
After install, `codemap doctor` lists `rust` alongside the other
|
|
58
|
+
indexers on identical terms (ADR-004 + ADR-L001).
|
|
59
|
+
|
|
60
|
+
## Limits
|
|
61
|
+
|
|
62
|
+
* Methods on generic types (`impl<T> Foo<T>`) attach to `Foo` without
|
|
63
|
+
the generic parameter — adequate for code search, lossy for type
|
|
64
|
+
checking.
|
|
65
|
+
* Macros are not expanded; their declarations are not captured as
|
|
66
|
+
symbols.
|
|
67
|
+
* `mod` declarations are not turned into namespace prefixes; the file
|
|
68
|
+
path is the only namespace.
|
|
69
|
+
|
|
70
|
+
## License
|
|
71
|
+
|
|
72
|
+
MIT.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
codemap_rust/__init__.py,sha256=_4-DOiixiJTtSOQqMhgEpAtEvZSakhNItaCO3s6W68E,170
|
|
2
|
+
codemap_rust/indexer.py,sha256=8xai62IKM-xYQlCPZ4dvyUjCfLM9htpi7mMFP-SI99U,10950
|
|
3
|
+
codemap_rust-0.1.0.dist-info/METADATA,sha256=1NlLJsHUPjeVmTlgPdFC8fvDd9oCylDQfNtMA0FhSV4,2123
|
|
4
|
+
codemap_rust-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
5
|
+
codemap_rust-0.1.0.dist-info/entry_points.txt,sha256=m3lIP9p77V76YTaKUuBlWkw0QGlfQ1IbatsatAm36V0,51
|
|
6
|
+
codemap_rust-0.1.0.dist-info/RECORD,,
|