codemap-ruby 0.1.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Build artifacts
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ *.egg
11
+ .eggs/
12
+
13
+ # Test / coverage
14
+ .pytest_cache/
15
+ .coverage
16
+ .coverage.*
17
+ htmlcov/
18
+ coverage.xml
19
+ .tox/
20
+ .mypy_cache/
21
+ .ruff_cache/
22
+ .benchmarks/
23
+
24
+ # Virtualenv
25
+ .venv/
26
+ venv/
27
+ env/
28
+
29
+ # uv / pdm lockfiles (commit uv.lock once we settle)
30
+ # uv.lock
31
+
32
+ # IDE
33
+ .idea/
34
+ .vscode/
35
+ *.swp
36
+ *.swo
37
+
38
+ # OS
39
+ .DS_Store
40
+ Thumbs.db
41
+
42
+ # CodeMap own index when dogfooding
43
+ .codemap/
@@ -0,0 +1,65 @@
1
+ Metadata-Version: 2.4
2
+ Name: codemap-ruby
3
+ Version: 0.1.0a1
4
+ Summary: Ruby indexer plugin for CodeMap
5
+ Project-URL: Homepage, https://github.com/qxbyte/codemap
6
+ Author: CodeMap Contributors
7
+ License: MIT
8
+ Keywords: codemap,indexer,ruby,tree-sitter
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Ruby
12
+ Classifier: Topic :: Software Development
13
+ Requires-Python: >=3.11
14
+ Requires-Dist: codemap-core<0.2,>=0.1.0a1
15
+ Requires-Dist: tree-sitter-ruby>=0.23
16
+ Requires-Dist: tree-sitter>=0.25
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=8.0; extra == 'dev'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # codemap-ruby
22
+
23
+ > A Ruby indexer for [CodeMap](https://github.com/qxbyte/codemap),
24
+ > shipped as an independent PyPI package.
25
+
26
+ ## What it captures
27
+
28
+ Backed by `tree-sitter-ruby`:
29
+
30
+ | AST node | Symbol kind |
31
+ |---|---|
32
+ | `class` | `class` (with `extra.ruby_kind=class`) |
33
+ | `module` | `class` (with `extra.ruby_kind=module`) |
34
+ | `method` (inside class/module) | `method` |
35
+ | `method` (top level) | `function` |
36
+ | `singleton_method` | `method` (with `extra.ruby_kind=singleton`) |
37
+ | top-level constant `assignment` | `variable` |
38
+ | constant assignment inside class/module | `field` |
39
+
40
+ Nested class / module declarations are tracked with a class stack:
41
+ `module A; class B; def m; end; end; end` produces `A#B#m()`.
42
+
43
+ ## Install
44
+
45
+ ```bash
46
+ pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-ruby"
47
+ ```
48
+
49
+ ## SymbolID encoding
50
+
51
+ ```
52
+ scip-ruby . . . app/models/user.rb/User#hello().
53
+ ```
54
+
55
+ ## Limits
56
+
57
+ * `attr_accessor` / `attr_reader` / `attr_writer` macros are not
58
+ expanded into individual field symbols.
59
+ * `define_method`, `class_eval`, and other metaprogramming are not
60
+ resolved.
61
+ * Mixin (`include`/`extend`) edges are not yet emitted.
62
+
63
+ ## License
64
+
65
+ MIT.
@@ -0,0 +1,45 @@
1
+ # codemap-ruby
2
+
3
+ > A Ruby indexer for [CodeMap](https://github.com/qxbyte/codemap),
4
+ > shipped as an independent PyPI package.
5
+
6
+ ## What it captures
7
+
8
+ Backed by `tree-sitter-ruby`:
9
+
10
+ | AST node | Symbol kind |
11
+ |---|---|
12
+ | `class` | `class` (with `extra.ruby_kind=class`) |
13
+ | `module` | `class` (with `extra.ruby_kind=module`) |
14
+ | `method` (inside class/module) | `method` |
15
+ | `method` (top level) | `function` |
16
+ | `singleton_method` | `method` (with `extra.ruby_kind=singleton`) |
17
+ | top-level constant `assignment` | `variable` |
18
+ | constant assignment inside class/module | `field` |
19
+
20
+ Nested class / module declarations are tracked with a class stack:
21
+ `module A; class B; def m; end; end; end` produces `A#B#m()`.
22
+
23
+ ## Install
24
+
25
+ ```bash
26
+ pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-ruby"
27
+ ```
28
+
29
+ ## SymbolID encoding
30
+
31
+ ```
32
+ scip-ruby . . . app/models/user.rb/User#hello().
33
+ ```
34
+
35
+ ## Limits
36
+
37
+ * `attr_accessor` / `attr_reader` / `attr_writer` macros are not
38
+ expanded into individual field symbols.
39
+ * `define_method`, `class_eval`, and other metaprogramming are not
40
+ resolved.
41
+ * Mixin (`include`/`extend`) edges are not yet emitted.
42
+
43
+ ## License
44
+
45
+ MIT.
@@ -0,0 +1,36 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.21"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "codemap-ruby"
7
+ version = "0.1.0a1"
8
+ description = "Ruby indexer plugin for CodeMap"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "CodeMap Contributors" }]
13
+ keywords = ["codemap", "ruby", "indexer", "tree-sitter"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Ruby",
18
+ "Topic :: Software Development",
19
+ ]
20
+ dependencies = [
21
+ "codemap-core>=0.1.0a1,<0.2",
22
+ "tree-sitter>=0.25",
23
+ "tree-sitter-ruby>=0.23",
24
+ ]
25
+
26
+ [project.optional-dependencies]
27
+ dev = ["pytest>=8.0"]
28
+
29
+ [project.entry-points."codemap.indexers"]
30
+ ruby = "codemap_ruby:RubyIndexer"
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/qxbyte/codemap"
34
+
35
+ [tool.hatch.build.targets.wheel]
36
+ packages = ["src/codemap_ruby"]
@@ -0,0 +1,8 @@
1
+ """Ruby indexer plugin for CodeMap."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from codemap_ruby.indexer import RubyIndexer
6
+
7
+ __all__ = ["RubyIndexer"]
8
+ __version__ = "0.1.0"
@@ -0,0 +1,232 @@
1
+ """Ruby indexer built on tree-sitter-ruby.
2
+
3
+ Tracks a class stack so nested modules / classes produce qualified
4
+ symbol IDs (``Outer#Inner#m()``). Top-level ``def`` is a function; ``def``
5
+ inside a class or module body is a method. ``singleton_method`` (``def
6
+ self.x``) is recorded as a method tagged ``extra.ruby_kind="singleton"``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from pathlib import Path, PurePosixPath
12
+ from typing import ClassVar
13
+
14
+ import tree_sitter
15
+ import tree_sitter_ruby
16
+
17
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
18
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
19
+ from codemap.indexers.base import IndexContext
20
+
21
+ SCHEME = "scip-ruby"
22
+ LANG = "ruby"
23
+
24
+ _RB_LANG = tree_sitter.Language(tree_sitter_ruby.language())
25
+
26
+
27
+ class RubyIndexer:
28
+ name: ClassVar[str] = "ruby"
29
+ version: ClassVar[str] = "0.1.0"
30
+ file_patterns: ClassVar[list[str]] = ["*.rb"]
31
+ languages: ClassVar[list[str]] = [LANG]
32
+
33
+ def supports(self, path: Path) -> bool:
34
+ return path.suffix == ".rb"
35
+
36
+ def index_file(
37
+ self,
38
+ path: Path,
39
+ source: bytes,
40
+ ctx: IndexContext,
41
+ ) -> IndexResult:
42
+ try:
43
+ source.decode("utf-8")
44
+ except UnicodeDecodeError as exc:
45
+ return IndexResult(
46
+ diagnostics=[
47
+ Diagnostic(
48
+ severity="error",
49
+ file=ctx.relative_path,
50
+ code="RB002",
51
+ message=f"not valid UTF-8: {exc}",
52
+ producer=self.name,
53
+ )
54
+ ]
55
+ )
56
+ parser = tree_sitter.Parser(_RB_LANG)
57
+ tree = parser.parse(source)
58
+ visitor = _Visitor(ctx.relative_path)
59
+ visitor.visit(tree.root_node)
60
+ diagnostics = list(visitor.diagnostics)
61
+ if tree.root_node.has_error:
62
+ diagnostics.append(
63
+ Diagnostic(
64
+ severity="warning",
65
+ file=ctx.relative_path,
66
+ range=Range(start_line=1, end_line=1),
67
+ code="RB001",
68
+ message="tree-sitter reported parse errors; symbols may be incomplete",
69
+ producer=self.name,
70
+ )
71
+ )
72
+ return IndexResult(
73
+ symbols=visitor.symbols,
74
+ edges=visitor.edges,
75
+ diagnostics=diagnostics,
76
+ )
77
+
78
+
79
+ class _Visitor:
80
+ def __init__(self, relative_path: PurePosixPath) -> None:
81
+ self.relative_path = relative_path
82
+ self.symbols: list[Symbol] = []
83
+ self.edges: list[Edge] = []
84
+ self.diagnostics: list[Diagnostic] = []
85
+ self._type_stack: list[str] = []
86
+
87
+ def visit(self, node: tree_sitter.Node) -> None:
88
+ kind = node.type
89
+ if kind == "class":
90
+ self._visit_type(node, ruby_kind="class")
91
+ return
92
+ if kind == "module":
93
+ self._visit_type(node, ruby_kind="module")
94
+ return
95
+ if kind == "method":
96
+ self._visit_method(node, singleton=False)
97
+ return
98
+ if kind == "singleton_method":
99
+ self._visit_method(node, singleton=True)
100
+ return
101
+ if kind == "assignment":
102
+ self._visit_assignment(node)
103
+ for child in node.children:
104
+ self.visit(child)
105
+
106
+ # ----------------------------------------------------- type-level
107
+
108
+ def _visit_type(self, node: tree_sitter.Node, *, ruby_kind: str) -> None:
109
+ name = _first_constant(node)
110
+ if name is None:
111
+ return
112
+ sid = self._make_id(name, kind=DescriptorKind.TYPE)
113
+ self.symbols.append(
114
+ Symbol(
115
+ id=sid,
116
+ kind="class",
117
+ language=LANG,
118
+ file=self.relative_path,
119
+ range=_node_range(node),
120
+ extra={"ruby_kind": ruby_kind},
121
+ )
122
+ )
123
+ self._type_stack.append(name)
124
+ try:
125
+ for child in node.children:
126
+ if child.type == "body_statement":
127
+ for grand in child.children:
128
+ self.visit(grand)
129
+ finally:
130
+ self._type_stack.pop()
131
+
132
+ # ----------------------------------------------------- methods
133
+
134
+ def _visit_method(self, node: tree_sitter.Node, *, singleton: bool) -> None:
135
+ name = _method_name(node)
136
+ if name is None:
137
+ return
138
+ sym_kind: str = "method" if self._type_stack else "function"
139
+ sid = self._make_id(name, kind=DescriptorKind.METHOD)
140
+ extra: dict[str, str] = {}
141
+ if singleton:
142
+ extra["ruby_kind"] = "singleton"
143
+ self.symbols.append(
144
+ Symbol(
145
+ id=sid,
146
+ kind=sym_kind, # type: ignore[arg-type]
147
+ language=LANG,
148
+ file=self.relative_path,
149
+ range=_node_range(node),
150
+ signature=f"def {name}",
151
+ extra=extra,
152
+ )
153
+ )
154
+
155
+ # ------------------------------------------------- constants / vars
156
+
157
+ def _visit_assignment(self, node: tree_sitter.Node) -> None:
158
+ # Capture top-level / inside-class assignments where the lhs is a
159
+ # constant (uppercase Ruby identifier). Ignore everything else
160
+ # (local variable assignments inside methods are not symbols).
161
+ lhs = node.children[0] if node.children else None
162
+ if lhs is None or lhs.type != "constant":
163
+ return
164
+ name = _node_text(lhs)
165
+ if not name:
166
+ return
167
+ sym_kind: str = "field" if self._type_stack else "variable"
168
+ sid = self._make_id(name, kind=DescriptorKind.TERM)
169
+ self.symbols.append(
170
+ Symbol(
171
+ id=sid,
172
+ kind=sym_kind, # type: ignore[arg-type]
173
+ language=LANG,
174
+ file=self.relative_path,
175
+ range=_node_range(node),
176
+ )
177
+ )
178
+
179
+ # -------------------------------------------------------- helpers
180
+
181
+ def _make_id(self, name: str, *, kind: DescriptorKind) -> SymbolID:
182
+ descriptors = list(_path_namespaces(self.relative_path))
183
+ descriptors.extend(Descriptor(name=t, kind=DescriptorKind.TYPE) for t in self._type_stack)
184
+ descriptors.append(Descriptor(name=name, kind=kind))
185
+ return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
186
+
187
+
188
+ # ---------------------------------------------------------------------------
189
+ # Pure helpers
190
+ # ---------------------------------------------------------------------------
191
+
192
+
193
+ def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
194
+ return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
195
+
196
+
197
+ def _node_range(node: tree_sitter.Node) -> Range:
198
+ sr, sc = node.start_point
199
+ er, ec = node.end_point
200
+ return Range(
201
+ start_line=sr + 1,
202
+ start_col=sc,
203
+ end_line=max(er + 1, sr + 1),
204
+ end_col=ec,
205
+ )
206
+
207
+
208
+ def _node_text(node: tree_sitter.Node) -> str:
209
+ return node.text.decode("utf-8") if node.text is not None else ""
210
+
211
+
212
+ def _first_constant(node: tree_sitter.Node) -> str | None:
213
+ for child in node.children:
214
+ if child.type == "constant":
215
+ return _node_text(child)
216
+ if child.type == "scope_resolution":
217
+ # nested: `class A::B` — take the trailing constant.
218
+ last = None
219
+ for grand in child.children:
220
+ if grand.type == "constant":
221
+ last = grand
222
+ if last is not None:
223
+ return _node_text(last)
224
+ return None
225
+
226
+
227
+ def _method_name(node: tree_sitter.Node) -> str | None:
228
+ """For ``def x`` returns ``x``; for ``def self.x`` returns ``x``."""
229
+ for child in node.children:
230
+ if child.type == "identifier":
231
+ return _node_text(child)
232
+ return None
File without changes
@@ -0,0 +1,171 @@
1
+ """Unit tests for the Ruby indexer plugin."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import textwrap
6
+ from pathlib import Path, PurePosixPath
7
+
8
+ from codemap_ruby import RubyIndexer
9
+ from codemap_ruby.indexer import SCHEME
10
+
11
+ from codemap.core.models import IndexResult
12
+ from codemap.indexers.base import IndexContext
13
+
14
+
15
+ def _index(source: str, *, path: str = "app/user.rb") -> IndexResult:
16
+ code = textwrap.dedent(source).lstrip("\n")
17
+ return RubyIndexer().index_file(
18
+ Path(path),
19
+ code.encode("utf-8"),
20
+ IndexContext(
21
+ project_root=Path("/tmp/proj"),
22
+ relative_path=PurePosixPath(path),
23
+ language="ruby",
24
+ ),
25
+ )
26
+
27
+
28
+ def test_indexer_metadata() -> None:
29
+ ix = RubyIndexer()
30
+ assert ix.name == "ruby"
31
+ assert ix.languages == ["ruby"]
32
+ assert ix.supports(Path("a.rb"))
33
+ assert not ix.supports(Path("a.py"))
34
+
35
+
36
+ def test_scheme_is_consistent() -> None:
37
+ r = _index(
38
+ """
39
+ class A; end
40
+ def f; end
41
+ X = 1
42
+ """
43
+ )
44
+ for s in r.symbols:
45
+ assert str(s.id).startswith(f"{SCHEME} ")
46
+
47
+
48
+ def test_class_declaration() -> None:
49
+ r = _index(
50
+ """
51
+ class User
52
+ def hello
53
+ @name
54
+ end
55
+ end
56
+ """
57
+ )
58
+ cls = next(s for s in r.symbols if s.kind == "class")
59
+ assert cls.extra.get("ruby_kind") == "class"
60
+ methods = [s for s in r.symbols if s.kind == "method"]
61
+ assert any("User#hello()." in str(m.id) for m in methods)
62
+
63
+
64
+ def test_module_declaration() -> None:
65
+ r = _index(
66
+ """
67
+ module Greeter
68
+ def hello
69
+ end
70
+ end
71
+ """
72
+ )
73
+ mod = next(s for s in r.symbols if s.kind == "class")
74
+ assert mod.extra.get("ruby_kind") == "module"
75
+
76
+
77
+ def test_nested_module_class_namespacing() -> None:
78
+ r = _index(
79
+ """
80
+ module Outer
81
+ class Inner
82
+ def m
83
+ end
84
+ end
85
+ end
86
+ """
87
+ )
88
+ ids = [str(s.id) for s in r.symbols]
89
+ assert any("Outer#Inner#" in i for i in ids)
90
+ assert any("Outer#Inner#m()." in i for i in ids)
91
+
92
+
93
+ def test_singleton_method() -> None:
94
+ r = _index(
95
+ """
96
+ class User
97
+ def self.create(name)
98
+ new(name)
99
+ end
100
+ end
101
+ """
102
+ )
103
+ singletons = [s for s in r.symbols if s.extra.get("ruby_kind") == "singleton"]
104
+ assert len(singletons) == 1
105
+ assert "User#create()." in str(singletons[0].id)
106
+
107
+
108
+ def test_top_level_method_is_function() -> None:
109
+ r = _index(
110
+ """
111
+ def helper(x)
112
+ x + 1
113
+ end
114
+ """
115
+ )
116
+ funcs = [s for s in r.symbols if s.kind == "function"]
117
+ assert len(funcs) == 1
118
+ assert "helper" in str(funcs[0].id)
119
+
120
+
121
+ def test_top_level_constant_is_variable() -> None:
122
+ r = _index("MAX = 10")
123
+ vars_ = [s for s in r.symbols if s.kind == "variable"]
124
+ assert len(vars_) == 1
125
+ assert str(vars_[0].id).endswith("MAX.")
126
+
127
+
128
+ def test_constant_inside_class_is_field() -> None:
129
+ r = _index(
130
+ """
131
+ class A
132
+ DEFAULT = 1
133
+ end
134
+ """
135
+ )
136
+ fields = [s for s in r.symbols if s.kind == "field"]
137
+ assert len(fields) == 1
138
+
139
+
140
+ def test_symbol_id_uses_path_namespaces() -> None:
141
+ r = _index("class A; end", path="app/models/user.rb")
142
+ cls = next(s for s in r.symbols if s.kind == "class")
143
+ assert str(cls.id) == "scip-ruby . . . app/models/user.rb/A#"
144
+
145
+
146
+ def test_empty_file_yields_no_symbols() -> None:
147
+ r = _index("")
148
+ assert r.symbols == []
149
+
150
+
151
+ def test_invalid_utf8_yields_diagnostic() -> None:
152
+ ix = RubyIndexer()
153
+ r = ix.index_file(
154
+ Path("bad.rb"),
155
+ b"\xff\xfe class",
156
+ IndexContext(
157
+ project_root=Path("/tmp/proj"),
158
+ relative_path=PurePosixPath("bad.rb"),
159
+ language="ruby",
160
+ ),
161
+ )
162
+ assert r.symbols == []
163
+ assert r.diagnostics[0].code == "RB002"
164
+
165
+
166
+ def test_method_outside_type_is_function_not_method() -> None:
167
+ r = _index("def f; end")
168
+ funcs = [s for s in r.symbols if s.kind == "function"]
169
+ methods = [s for s in r.symbols if s.kind == "method"]
170
+ assert len(funcs) == 1
171
+ assert len(methods) == 0