codemap-java 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ """Java indexer plugin for CodeMap."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from codemap_java.indexer import JavaIndexer
6
+
7
+ __all__ = ["JavaIndexer"]
8
+ __version__ = "0.1.0"
@@ -0,0 +1,254 @@
1
+ """Java indexer built on tree-sitter-java.
2
+
3
+ Covers class / interface / enum / record / method / constructor / field
4
+ declarations. Package declarations are honoured as a namespace prefix
5
+ under the file path. Nested types track a class stack to produce the
6
+ correct ``Cls#Inner#m()`` chain.
7
+
8
+ The indexer is single-file by design; cross-file `extends` / `implements`
9
+ resolution lives in a future bridge so the indexer surface stays narrow.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from pathlib import Path, PurePosixPath
15
+ from typing import ClassVar
16
+
17
+ import tree_sitter
18
+ import tree_sitter_java
19
+
20
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
21
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
22
+ from codemap.indexers.base import IndexContext
23
+
24
+ SCHEME = "scip-java"
25
+ LANG = "java"
26
+
27
+ _JAVA_LANG = tree_sitter.Language(tree_sitter_java.language())
28
+
29
+ _TYPE_DECLS = frozenset(
30
+ {
31
+ "class_declaration",
32
+ "interface_declaration",
33
+ "enum_declaration",
34
+ "record_declaration",
35
+ }
36
+ )
37
+
38
+
39
+ class JavaIndexer:
40
+ name: ClassVar[str] = "java"
41
+ version: ClassVar[str] = "0.1.0"
42
+ file_patterns: ClassVar[list[str]] = ["*.java"]
43
+ languages: ClassVar[list[str]] = [LANG]
44
+
45
+ def supports(self, path: Path) -> bool:
46
+ return path.suffix == ".java"
47
+
48
+ def index_file(
49
+ self,
50
+ path: Path,
51
+ source: bytes,
52
+ ctx: IndexContext,
53
+ ) -> IndexResult:
54
+ try:
55
+ source.decode("utf-8")
56
+ except UnicodeDecodeError as exc:
57
+ return IndexResult(
58
+ diagnostics=[
59
+ Diagnostic(
60
+ severity="error",
61
+ file=ctx.relative_path,
62
+ code="JAVA002",
63
+ message=f"not valid UTF-8: {exc}",
64
+ producer=self.name,
65
+ )
66
+ ]
67
+ )
68
+ parser = tree_sitter.Parser(_JAVA_LANG)
69
+ tree = parser.parse(source)
70
+ visitor = _Visitor(ctx.relative_path)
71
+ visitor.visit(tree.root_node)
72
+ diagnostics = list(visitor.diagnostics)
73
+ if tree.root_node.has_error:
74
+ diagnostics.append(
75
+ Diagnostic(
76
+ severity="warning",
77
+ file=ctx.relative_path,
78
+ range=Range(start_line=1, end_line=1),
79
+ code="JAVA001",
80
+ message="tree-sitter reported parse errors; symbols may be incomplete",
81
+ producer=self.name,
82
+ )
83
+ )
84
+ return IndexResult(
85
+ symbols=visitor.symbols,
86
+ edges=visitor.edges,
87
+ diagnostics=diagnostics,
88
+ )
89
+
90
+
91
+ # ---------------------------------------------------------------------------
92
+ # AST visitor
93
+ # ---------------------------------------------------------------------------
94
+
95
+
96
+ class _Visitor:
97
+ def __init__(self, relative_path: PurePosixPath) -> None:
98
+ self.relative_path = relative_path
99
+ self.symbols: list[Symbol] = []
100
+ self.edges: list[Edge] = []
101
+ self.diagnostics: list[Diagnostic] = []
102
+ self._class_stack: list[str] = []
103
+ self._package: str = ""
104
+
105
+ def visit(self, node: tree_sitter.Node) -> None:
106
+ if node.type == "package_declaration":
107
+ self._package = _node_text(node.children[1]) if node.child_count > 1 else ""
108
+ return
109
+ if node.type in _TYPE_DECLS:
110
+ self._visit_type(node)
111
+ return
112
+ if node.type == "method_declaration" and self._class_stack:
113
+ self._visit_method(node, is_constructor=False)
114
+ return
115
+ if node.type == "constructor_declaration" and self._class_stack:
116
+ self._visit_method(node, is_constructor=True)
117
+ return
118
+ if node.type == "field_declaration" and self._class_stack:
119
+ self._visit_field(node)
120
+ return
121
+ for child in node.children:
122
+ self.visit(child)
123
+
124
+ # ------------------------------------------------------------- types
125
+
126
+ def _visit_type(self, node: tree_sitter.Node) -> None:
127
+ name = _name_child(node)
128
+ if name is None:
129
+ return
130
+ java_kind = node.type.removesuffix("_declaration")
131
+ sid = self._make_id(name, kind=DescriptorKind.TYPE)
132
+ self.symbols.append(
133
+ Symbol(
134
+ id=sid,
135
+ kind="class", # Symbol schema has no separate interface/enum kind
136
+ language=LANG,
137
+ file=self.relative_path,
138
+ range=_node_range(node),
139
+ extra={"java_kind": java_kind, "package": self._package}
140
+ if self._package or java_kind != "class"
141
+ else {},
142
+ )
143
+ )
144
+ body = node.child_by_field_name("body")
145
+ if body is None:
146
+ return
147
+ self._class_stack.append(name)
148
+ try:
149
+ for child in body.children:
150
+ self.visit(child)
151
+ finally:
152
+ self._class_stack.pop()
153
+
154
+ # ----------------------------------------------------------- members
155
+
156
+ def _visit_method(self, node: tree_sitter.Node, *, is_constructor: bool) -> None:
157
+ name = _name_child(node)
158
+ if name is None:
159
+ return
160
+ if is_constructor:
161
+ display = "<init>"
162
+ sid = self._make_id(display, kind=DescriptorKind.METHOD)
163
+ else:
164
+ display = name
165
+ sid = self._make_id(name, kind=DescriptorKind.METHOD)
166
+ signature = _method_signature(node, name, is_constructor=is_constructor)
167
+ self.symbols.append(
168
+ Symbol(
169
+ id=sid,
170
+ kind="method",
171
+ language=LANG,
172
+ file=self.relative_path,
173
+ range=_node_range(node),
174
+ signature=signature,
175
+ )
176
+ )
177
+
178
+ def _visit_field(self, node: tree_sitter.Node) -> None:
179
+ for child in node.children:
180
+ if child.type != "variable_declarator":
181
+ continue
182
+ name_node = child.child_by_field_name("name")
183
+ if name_node is None:
184
+ continue
185
+ name = _node_text(name_node)
186
+ if not name:
187
+ continue
188
+ sid = self._make_id(name, kind=DescriptorKind.TERM)
189
+ self.symbols.append(
190
+ Symbol(
191
+ id=sid,
192
+ kind="field",
193
+ language=LANG,
194
+ file=self.relative_path,
195
+ range=_node_range(child),
196
+ )
197
+ )
198
+
199
+ # ----------------------------------------------------------- helpers
200
+
201
+ def _make_id(self, name: str, *, kind: DescriptorKind) -> SymbolID:
202
+ descriptors = list(_path_namespaces(self.relative_path))
203
+ descriptors.extend(
204
+ Descriptor(name=cls, kind=DescriptorKind.TYPE) for cls in self._class_stack
205
+ )
206
+ descriptors.append(Descriptor(name=name, kind=kind))
207
+ return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
208
+
209
+
210
+ # ---------------------------------------------------------------------------
211
+ # Pure helpers
212
+ # ---------------------------------------------------------------------------
213
+
214
+
215
+ def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
216
+ return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
217
+
218
+
219
+ def _node_range(node: tree_sitter.Node) -> Range:
220
+ sr, sc = node.start_point
221
+ er, ec = node.end_point
222
+ return Range(
223
+ start_line=sr + 1,
224
+ start_col=sc,
225
+ end_line=max(er + 1, sr + 1),
226
+ end_col=ec,
227
+ )
228
+
229
+
230
+ def _node_text(node: tree_sitter.Node) -> str:
231
+ return node.text.decode("utf-8") if node.text is not None else ""
232
+
233
+
234
+ def _name_child(node: tree_sitter.Node) -> str | None:
235
+ name_node = node.child_by_field_name("name")
236
+ if name_node is None or name_node.text is None:
237
+ return None
238
+ text = _node_text(name_node).strip()
239
+ return text or None
240
+
241
+
242
+ def _method_signature(
243
+ node: tree_sitter.Node,
244
+ name: str,
245
+ *,
246
+ is_constructor: bool,
247
+ ) -> str:
248
+ params = node.child_by_field_name("parameters")
249
+ params_text = _node_text(params) if params is not None else "()"
250
+ if is_constructor:
251
+ return f"{name}{params_text}"
252
+ return_type = node.child_by_field_name("type")
253
+ rt_text = _node_text(return_type) + " " if return_type is not None else ""
254
+ return f"{rt_text}{name}{params_text}"
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.4
2
+ Name: codemap-java
3
+ Version: 0.1.0a1
4
+ Summary: Java indexer plugin for CodeMap
5
+ Project-URL: Homepage, https://github.com/qxbyte/codemap
6
+ Author: CodeMap Contributors
7
+ License: MIT
8
+ Keywords: codemap,indexer,java,tree-sitter
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Java
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Software Development
13
+ Requires-Python: >=3.11
14
+ Requires-Dist: codemap-core<0.2,>=0.1.0a1
15
+ Requires-Dist: tree-sitter-java>=0.23
16
+ Requires-Dist: tree-sitter>=0.25
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=8.0; extra == 'dev'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # codemap-java
22
+
23
+ > A Java indexer for [CodeMap](https://github.com/qxbyte/codemap),
24
+ > distributed as an independent PyPI package.
25
+
26
+ ## What it captures
27
+
28
+ Backed by `tree-sitter-java`. Single-file, no cross-file type inference (MVP):
29
+
30
+ | AST node | Symbol kind |
31
+ |---|---|
32
+ | `class_declaration` | `class` |
33
+ | `interface_declaration` | `interface` (stored as `class` with `extra.java_kind=interface`) |
34
+ | `enum_declaration` | stored as `class` with `extra.java_kind=enum` |
35
+ | `record_declaration` | stored as `class` with `extra.java_kind=record` |
36
+ | `method_declaration` (inside type) | `method` |
37
+ | `constructor_declaration` | `method` (signature prefixed with `<init>`) |
38
+ | `field_declaration` (inside type) | `field` |
39
+
40
+ Package declarations are captured and used as a prefix for the in-file
41
+ `package` namespace.
42
+
43
+ ## SymbolID encoding
44
+
45
+ ```
46
+ scip-java . . . src/com/example/Greeter.java/Greeter#hello().
47
+ └────────┘ └────────────────────────────────┘ └──────┘ └─────┘
48
+ scheme file path type method
49
+ ```
50
+
51
+ ## Install
52
+
53
+ ```bash
54
+ pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-java"
55
+ ```
56
+
57
+ After installation, `codemap doctor` lists `java` next to the other
58
+ indexers on identical terms — same Indexer Protocol, same entry-point
59
+ group, no main-repo change required (ADR-004 + ADR-L001).
60
+
61
+ ## Limits
62
+
63
+ * No `extends` / `implements` edges yet. Easy to add in v0.2.0.
64
+ * No generic-parameter descriptors.
65
+ * No annotation extraction (planned).
66
+ * Anonymous classes are skipped.
67
+
68
+ ## License
69
+
70
+ MIT.
@@ -0,0 +1,6 @@
1
+ codemap_java/__init__.py,sha256=CfrzKPuFCV4SA9ly2O8K84G96OIBVTCWrDUFafKIOgY,170
2
+ codemap_java/indexer.py,sha256=exyqk2mvj-BAEli9Fi9beUcQIPu_MTXoL9h_QqLjGX0,8461
3
+ codemap_java-0.1.0a1.dist-info/METADATA,sha256=1KbbOlR8lWtfyAtNtWNxuLlmV7SDtmTWy6hubnh2zsI,2319
4
+ codemap_java-0.1.0a1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
5
+ codemap_java-0.1.0a1.dist-info/entry_points.txt,sha256=nc_YzUZs5Nwz3H_qPVY1k1n4A1VtpA8F6Tj0AE6XnNc,51
6
+ codemap_java-0.1.0a1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [codemap.indexers]
2
+ java = codemap_java:JavaIndexer