codemap-java 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codemap_java/__init__.py
ADDED
codemap_java/indexer.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
"""Java indexer built on tree-sitter-java.
|
|
2
|
+
|
|
3
|
+
Covers class / interface / enum / record / method / constructor / field
|
|
4
|
+
declarations. Package declarations are honoured as a namespace prefix
|
|
5
|
+
under the file path. Nested types track a class stack to produce the
|
|
6
|
+
correct ``Cls#Inner#m()`` chain.
|
|
7
|
+
|
|
8
|
+
The indexer is single-file by design; cross-file `extends` / `implements`
|
|
9
|
+
resolution lives in a future bridge so the indexer surface stays narrow.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from pathlib import Path, PurePosixPath
|
|
15
|
+
from typing import ClassVar
|
|
16
|
+
|
|
17
|
+
import tree_sitter
|
|
18
|
+
import tree_sitter_java
|
|
19
|
+
|
|
20
|
+
from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
|
|
21
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
22
|
+
from codemap.indexers.base import IndexContext
|
|
23
|
+
|
|
24
|
+
SCHEME = "scip-java"
|
|
25
|
+
LANG = "java"
|
|
26
|
+
|
|
27
|
+
_JAVA_LANG = tree_sitter.Language(tree_sitter_java.language())
|
|
28
|
+
|
|
29
|
+
_TYPE_DECLS = frozenset(
|
|
30
|
+
{
|
|
31
|
+
"class_declaration",
|
|
32
|
+
"interface_declaration",
|
|
33
|
+
"enum_declaration",
|
|
34
|
+
"record_declaration",
|
|
35
|
+
}
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class JavaIndexer:
|
|
40
|
+
name: ClassVar[str] = "java"
|
|
41
|
+
version: ClassVar[str] = "0.1.0"
|
|
42
|
+
file_patterns: ClassVar[list[str]] = ["*.java"]
|
|
43
|
+
languages: ClassVar[list[str]] = [LANG]
|
|
44
|
+
|
|
45
|
+
def supports(self, path: Path) -> bool:
|
|
46
|
+
return path.suffix == ".java"
|
|
47
|
+
|
|
48
|
+
def index_file(
|
|
49
|
+
self,
|
|
50
|
+
path: Path,
|
|
51
|
+
source: bytes,
|
|
52
|
+
ctx: IndexContext,
|
|
53
|
+
) -> IndexResult:
|
|
54
|
+
try:
|
|
55
|
+
source.decode("utf-8")
|
|
56
|
+
except UnicodeDecodeError as exc:
|
|
57
|
+
return IndexResult(
|
|
58
|
+
diagnostics=[
|
|
59
|
+
Diagnostic(
|
|
60
|
+
severity="error",
|
|
61
|
+
file=ctx.relative_path,
|
|
62
|
+
code="JAVA002",
|
|
63
|
+
message=f"not valid UTF-8: {exc}",
|
|
64
|
+
producer=self.name,
|
|
65
|
+
)
|
|
66
|
+
]
|
|
67
|
+
)
|
|
68
|
+
parser = tree_sitter.Parser(_JAVA_LANG)
|
|
69
|
+
tree = parser.parse(source)
|
|
70
|
+
visitor = _Visitor(ctx.relative_path)
|
|
71
|
+
visitor.visit(tree.root_node)
|
|
72
|
+
diagnostics = list(visitor.diagnostics)
|
|
73
|
+
if tree.root_node.has_error:
|
|
74
|
+
diagnostics.append(
|
|
75
|
+
Diagnostic(
|
|
76
|
+
severity="warning",
|
|
77
|
+
file=ctx.relative_path,
|
|
78
|
+
range=Range(start_line=1, end_line=1),
|
|
79
|
+
code="JAVA001",
|
|
80
|
+
message="tree-sitter reported parse errors; symbols may be incomplete",
|
|
81
|
+
producer=self.name,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
return IndexResult(
|
|
85
|
+
symbols=visitor.symbols,
|
|
86
|
+
edges=visitor.edges,
|
|
87
|
+
diagnostics=diagnostics,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
# AST visitor
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class _Visitor:
|
|
97
|
+
def __init__(self, relative_path: PurePosixPath) -> None:
|
|
98
|
+
self.relative_path = relative_path
|
|
99
|
+
self.symbols: list[Symbol] = []
|
|
100
|
+
self.edges: list[Edge] = []
|
|
101
|
+
self.diagnostics: list[Diagnostic] = []
|
|
102
|
+
self._class_stack: list[str] = []
|
|
103
|
+
self._package: str = ""
|
|
104
|
+
|
|
105
|
+
def visit(self, node: tree_sitter.Node) -> None:
|
|
106
|
+
if node.type == "package_declaration":
|
|
107
|
+
self._package = _node_text(node.children[1]) if node.child_count > 1 else ""
|
|
108
|
+
return
|
|
109
|
+
if node.type in _TYPE_DECLS:
|
|
110
|
+
self._visit_type(node)
|
|
111
|
+
return
|
|
112
|
+
if node.type == "method_declaration" and self._class_stack:
|
|
113
|
+
self._visit_method(node, is_constructor=False)
|
|
114
|
+
return
|
|
115
|
+
if node.type == "constructor_declaration" and self._class_stack:
|
|
116
|
+
self._visit_method(node, is_constructor=True)
|
|
117
|
+
return
|
|
118
|
+
if node.type == "field_declaration" and self._class_stack:
|
|
119
|
+
self._visit_field(node)
|
|
120
|
+
return
|
|
121
|
+
for child in node.children:
|
|
122
|
+
self.visit(child)
|
|
123
|
+
|
|
124
|
+
# ------------------------------------------------------------- types
|
|
125
|
+
|
|
126
|
+
def _visit_type(self, node: tree_sitter.Node) -> None:
|
|
127
|
+
name = _name_child(node)
|
|
128
|
+
if name is None:
|
|
129
|
+
return
|
|
130
|
+
java_kind = node.type.removesuffix("_declaration")
|
|
131
|
+
sid = self._make_id(name, kind=DescriptorKind.TYPE)
|
|
132
|
+
self.symbols.append(
|
|
133
|
+
Symbol(
|
|
134
|
+
id=sid,
|
|
135
|
+
kind="class", # Symbol schema has no separate interface/enum kind
|
|
136
|
+
language=LANG,
|
|
137
|
+
file=self.relative_path,
|
|
138
|
+
range=_node_range(node),
|
|
139
|
+
extra={"java_kind": java_kind, "package": self._package}
|
|
140
|
+
if self._package or java_kind != "class"
|
|
141
|
+
else {},
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
body = node.child_by_field_name("body")
|
|
145
|
+
if body is None:
|
|
146
|
+
return
|
|
147
|
+
self._class_stack.append(name)
|
|
148
|
+
try:
|
|
149
|
+
for child in body.children:
|
|
150
|
+
self.visit(child)
|
|
151
|
+
finally:
|
|
152
|
+
self._class_stack.pop()
|
|
153
|
+
|
|
154
|
+
# ----------------------------------------------------------- members
|
|
155
|
+
|
|
156
|
+
def _visit_method(self, node: tree_sitter.Node, *, is_constructor: bool) -> None:
|
|
157
|
+
name = _name_child(node)
|
|
158
|
+
if name is None:
|
|
159
|
+
return
|
|
160
|
+
if is_constructor:
|
|
161
|
+
display = "<init>"
|
|
162
|
+
sid = self._make_id(display, kind=DescriptorKind.METHOD)
|
|
163
|
+
else:
|
|
164
|
+
display = name
|
|
165
|
+
sid = self._make_id(name, kind=DescriptorKind.METHOD)
|
|
166
|
+
signature = _method_signature(node, name, is_constructor=is_constructor)
|
|
167
|
+
self.symbols.append(
|
|
168
|
+
Symbol(
|
|
169
|
+
id=sid,
|
|
170
|
+
kind="method",
|
|
171
|
+
language=LANG,
|
|
172
|
+
file=self.relative_path,
|
|
173
|
+
range=_node_range(node),
|
|
174
|
+
signature=signature,
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def _visit_field(self, node: tree_sitter.Node) -> None:
|
|
179
|
+
for child in node.children:
|
|
180
|
+
if child.type != "variable_declarator":
|
|
181
|
+
continue
|
|
182
|
+
name_node = child.child_by_field_name("name")
|
|
183
|
+
if name_node is None:
|
|
184
|
+
continue
|
|
185
|
+
name = _node_text(name_node)
|
|
186
|
+
if not name:
|
|
187
|
+
continue
|
|
188
|
+
sid = self._make_id(name, kind=DescriptorKind.TERM)
|
|
189
|
+
self.symbols.append(
|
|
190
|
+
Symbol(
|
|
191
|
+
id=sid,
|
|
192
|
+
kind="field",
|
|
193
|
+
language=LANG,
|
|
194
|
+
file=self.relative_path,
|
|
195
|
+
range=_node_range(child),
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# ----------------------------------------------------------- helpers
|
|
200
|
+
|
|
201
|
+
def _make_id(self, name: str, *, kind: DescriptorKind) -> SymbolID:
|
|
202
|
+
descriptors = list(_path_namespaces(self.relative_path))
|
|
203
|
+
descriptors.extend(
|
|
204
|
+
Descriptor(name=cls, kind=DescriptorKind.TYPE) for cls in self._class_stack
|
|
205
|
+
)
|
|
206
|
+
descriptors.append(Descriptor(name=name, kind=kind))
|
|
207
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# ---------------------------------------------------------------------------
|
|
211
|
+
# Pure helpers
|
|
212
|
+
# ---------------------------------------------------------------------------
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
|
|
216
|
+
return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _node_range(node: tree_sitter.Node) -> Range:
|
|
220
|
+
sr, sc = node.start_point
|
|
221
|
+
er, ec = node.end_point
|
|
222
|
+
return Range(
|
|
223
|
+
start_line=sr + 1,
|
|
224
|
+
start_col=sc,
|
|
225
|
+
end_line=max(er + 1, sr + 1),
|
|
226
|
+
end_col=ec,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _node_text(node: tree_sitter.Node) -> str:
|
|
231
|
+
return node.text.decode("utf-8") if node.text is not None else ""
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _name_child(node: tree_sitter.Node) -> str | None:
|
|
235
|
+
name_node = node.child_by_field_name("name")
|
|
236
|
+
if name_node is None or name_node.text is None:
|
|
237
|
+
return None
|
|
238
|
+
text = _node_text(name_node).strip()
|
|
239
|
+
return text or None
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _method_signature(
|
|
243
|
+
node: tree_sitter.Node,
|
|
244
|
+
name: str,
|
|
245
|
+
*,
|
|
246
|
+
is_constructor: bool,
|
|
247
|
+
) -> str:
|
|
248
|
+
params = node.child_by_field_name("parameters")
|
|
249
|
+
params_text = _node_text(params) if params is not None else "()"
|
|
250
|
+
if is_constructor:
|
|
251
|
+
return f"{name}{params_text}"
|
|
252
|
+
return_type = node.child_by_field_name("type")
|
|
253
|
+
rt_text = _node_text(return_type) + " " if return_type is not None else ""
|
|
254
|
+
return f"{rt_text}{name}{params_text}"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-java
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: Java indexer plugin for CodeMap
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: codemap,indexer,java,tree-sitter
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Java
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Software Development
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Requires-Dist: codemap-core<0.2,>=0.1.0a1
|
|
15
|
+
Requires-Dist: tree-sitter-java>=0.23
|
|
16
|
+
Requires-Dist: tree-sitter>=0.25
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# codemap-java
|
|
22
|
+
|
|
23
|
+
> A Java indexer for [CodeMap](https://github.com/qxbyte/codemap),
|
|
24
|
+
> distributed as an independent PyPI package.
|
|
25
|
+
|
|
26
|
+
## What it captures
|
|
27
|
+
|
|
28
|
+
Backed by `tree-sitter-java`. Single-file, no cross-file type inference (MVP):
|
|
29
|
+
|
|
30
|
+
| AST node | Symbol kind |
|
|
31
|
+
|---|---|
|
|
32
|
+
| `class_declaration` | `class` |
|
|
33
|
+
| `interface_declaration` | `interface` (stored as `class` with `extra.java_kind=interface`) |
|
|
34
|
+
| `enum_declaration` | stored as `class` with `extra.java_kind=enum` |
|
|
35
|
+
| `record_declaration` | stored as `class` with `extra.java_kind=record` |
|
|
36
|
+
| `method_declaration` (inside type) | `method` |
|
|
37
|
+
| `constructor_declaration` | `method` (signature prefixed with `<init>`) |
|
|
38
|
+
| `field_declaration` (inside type) | `field` |
|
|
39
|
+
|
|
40
|
+
Package declarations are captured and used as a prefix for the in-file
|
|
41
|
+
`package` namespace.
|
|
42
|
+
|
|
43
|
+
## SymbolID encoding
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
scip-java . . . src/com/example/Greeter.java/Greeter#hello().
|
|
47
|
+
└────────┘ └────────────────────────────────┘ └──────┘ └─────┘
|
|
48
|
+
scheme file path type method
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Install
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-java"
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
After installation, `codemap doctor` lists `java` next to the other
|
|
58
|
+
indexers on identical terms — same Indexer Protocol, same entry-point
|
|
59
|
+
group, no main-repo change required (ADR-004 + ADR-L001).
|
|
60
|
+
|
|
61
|
+
## Limits
|
|
62
|
+
|
|
63
|
+
* No `extends` / `implements` edges yet. Easy to add in v0.2.0.
|
|
64
|
+
* No generic-parameter descriptors.
|
|
65
|
+
* No annotation extraction (planned).
|
|
66
|
+
* Anonymous classes are skipped.
|
|
67
|
+
|
|
68
|
+
## License
|
|
69
|
+
|
|
70
|
+
MIT.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
codemap_java/__init__.py,sha256=CfrzKPuFCV4SA9ly2O8K84G96OIBVTCWrDUFafKIOgY,170
|
|
2
|
+
codemap_java/indexer.py,sha256=exyqk2mvj-BAEli9Fi9beUcQIPu_MTXoL9h_QqLjGX0,8461
|
|
3
|
+
codemap_java-0.1.0a1.dist-info/METADATA,sha256=1KbbOlR8lWtfyAtNtWNxuLlmV7SDtmTWy6hubnh2zsI,2319
|
|
4
|
+
codemap_java-0.1.0a1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
5
|
+
codemap_java-0.1.0a1.dist-info/entry_points.txt,sha256=nc_YzUZs5Nwz3H_qPVY1k1n4A1VtpA8F6Tj0AE6XnNc,51
|
|
6
|
+
codemap_java-0.1.0a1.dist-info/RECORD,,
|