codemap-cpp 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ """C++ indexer plugin for CodeMap."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from codemap_cpp.indexer import CppIndexer
6
+
7
+ __all__ = ["CppIndexer"]
8
+ __version__ = "0.1.0"
codemap_cpp/indexer.py ADDED
@@ -0,0 +1,376 @@
1
+ """C++ indexer built on tree-sitter-cpp.
2
+
3
+ The visitor recurses into ``namespace_definition`` so that descriptors of
4
+ nested types are prefixed with the containing namespace chain.
5
+ ``template_declaration`` wrappers are unwrapped so the underlying
6
+ function / class / struct surfaces as a normal symbol.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from pathlib import Path, PurePosixPath
12
+ from typing import ClassVar
13
+
14
+ import tree_sitter
15
+ import tree_sitter_cpp
16
+
17
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
18
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
19
+ from codemap.indexers.base import IndexContext
20
+
21
+ SCHEME = "scip-cpp"
22
+ LANG = "cpp"
23
+
24
+ _CPP_LANG = tree_sitter.Language(tree_sitter_cpp.language())
25
+
26
+
27
+ class CppIndexer:
28
+ name: ClassVar[str] = "cpp"
29
+ version: ClassVar[str] = "0.1.0"
30
+ file_patterns: ClassVar[list[str]] = [
31
+ "*.cpp",
32
+ "*.cc",
33
+ "*.cxx",
34
+ "*.hpp",
35
+ "*.hh",
36
+ "*.hxx",
37
+ ]
38
+ languages: ClassVar[list[str]] = [LANG]
39
+
40
+ def supports(self, path: Path) -> bool:
41
+ return path.suffix in {".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx"}
42
+
43
+ def index_file(
44
+ self,
45
+ path: Path,
46
+ source: bytes,
47
+ ctx: IndexContext,
48
+ ) -> IndexResult:
49
+ try:
50
+ source.decode("utf-8")
51
+ except UnicodeDecodeError as exc:
52
+ return IndexResult(
53
+ diagnostics=[
54
+ Diagnostic(
55
+ severity="error",
56
+ file=ctx.relative_path,
57
+ code="CPP002",
58
+ message=f"not valid UTF-8: {exc}",
59
+ producer=self.name,
60
+ )
61
+ ]
62
+ )
63
+ parser = tree_sitter.Parser(_CPP_LANG)
64
+ tree = parser.parse(source)
65
+ visitor = _Visitor(ctx.relative_path)
66
+ visitor.visit_root(tree.root_node)
67
+ diagnostics = list(visitor.diagnostics)
68
+ if tree.root_node.has_error:
69
+ diagnostics.append(
70
+ Diagnostic(
71
+ severity="warning",
72
+ file=ctx.relative_path,
73
+ range=Range(start_line=1, end_line=1),
74
+ code="CPP001",
75
+ message="tree-sitter reported parse errors; symbols may be incomplete",
76
+ producer=self.name,
77
+ )
78
+ )
79
+ return IndexResult(
80
+ symbols=visitor.symbols,
81
+ edges=visitor.edges,
82
+ diagnostics=diagnostics,
83
+ )
84
+
85
+
86
+ class _Visitor:
87
+ def __init__(self, relative_path: PurePosixPath) -> None:
88
+ self.relative_path = relative_path
89
+ self.symbols: list[Symbol] = []
90
+ self.edges: list[Edge] = []
91
+ self.diagnostics: list[Diagnostic] = []
92
+
93
+ def visit_root(self, root: tree_sitter.Node) -> None:
94
+ for child in root.children:
95
+ self._visit(child, scope=[])
96
+
97
+ def _visit(
98
+ self,
99
+ node: tree_sitter.Node,
100
+ *,
101
+ scope: list[Descriptor],
102
+ ) -> None:
103
+ kind = node.type
104
+ if kind == "namespace_definition":
105
+ self._visit_namespace(node, scope=scope)
106
+ elif kind == "template_declaration":
107
+ # Unwrap: a template is metadata around a single declaration.
108
+ inner = _template_inner(node)
109
+ if inner is not None:
110
+ self._visit(inner, scope=scope)
111
+ elif kind == "function_definition":
112
+ self._emit_function(node, scope=scope, container_kind=None)
113
+ elif kind == "class_specifier":
114
+ self._emit_record(node, scope=scope, cpp_kind="class")
115
+ elif kind == "struct_specifier":
116
+ self._emit_record(node, scope=scope, cpp_kind="struct")
117
+ elif kind == "union_specifier":
118
+ self._emit_record(node, scope=scope, cpp_kind="union")
119
+ elif kind == "enum_specifier":
120
+ self._emit_enum(node, scope=scope)
121
+
122
+ def _visit_namespace(
123
+ self,
124
+ node: tree_sitter.Node,
125
+ *,
126
+ scope: list[Descriptor],
127
+ ) -> None:
128
+ name = _first_child_text(node, "namespace_identifier")
129
+ new_scope = list(scope)
130
+ if name is not None:
131
+ new_scope.append(Descriptor(name=name, kind=DescriptorKind.NAMESPACE))
132
+ body = _first_child(node, "declaration_list")
133
+ if body is None:
134
+ return
135
+ for child in body.children:
136
+ self._visit(child, scope=new_scope)
137
+
138
+ def _emit_function(
139
+ self,
140
+ node: tree_sitter.Node,
141
+ *,
142
+ scope: list[Descriptor],
143
+ container_kind: str | None,
144
+ ) -> None:
145
+ name = _function_declarator_name(node)
146
+ if name is None:
147
+ return
148
+ descriptors = [*list(scope), Descriptor(name=name, kind=DescriptorKind.METHOD)]
149
+ kind = "method" if container_kind == "class" else "function"
150
+ self.symbols.append(
151
+ Symbol(
152
+ id=self._make_id(descriptors),
153
+ kind=kind,
154
+ language=LANG,
155
+ file=self.relative_path,
156
+ range=_node_range(node),
157
+ signature=f"{name}()",
158
+ )
159
+ )
160
+
161
+ def _emit_record(
162
+ self,
163
+ node: tree_sitter.Node,
164
+ *,
165
+ scope: list[Descriptor],
166
+ cpp_kind: str,
167
+ ) -> None:
168
+ name = _first_child_text(node, "type_identifier")
169
+ body = _first_child(node, "field_declaration_list")
170
+ if name is None or body is None:
171
+ return
172
+ record_desc = Descriptor(name=name, kind=DescriptorKind.TYPE)
173
+ descriptors = [*list(scope), record_desc]
174
+ self.symbols.append(
175
+ Symbol(
176
+ id=self._make_id(descriptors),
177
+ kind="class",
178
+ language=LANG,
179
+ file=self.relative_path,
180
+ range=_node_range(node),
181
+ extra={"cpp_kind": cpp_kind},
182
+ )
183
+ )
184
+ inner_scope = list(descriptors)
185
+ for child in body.children:
186
+ if child.type == "function_definition":
187
+ self._emit_function(child, scope=inner_scope, container_kind="class")
188
+ elif child.type == "field_declaration":
189
+ self._emit_field(child, scope=inner_scope)
190
+ elif child.type == "template_declaration":
191
+ inner = _template_inner(child)
192
+ if inner is not None:
193
+ if inner.type == "function_definition":
194
+ self._emit_function(inner, scope=inner_scope, container_kind="class")
195
+ elif inner.type == "field_declaration":
196
+ self._emit_field(inner, scope=inner_scope)
197
+
198
+ def _emit_field(
199
+ self,
200
+ node: tree_sitter.Node,
201
+ *,
202
+ scope: list[Descriptor],
203
+ ) -> None:
204
+ # field_declaration may carry either a field_identifier (data member)
205
+ # or a function_declarator (method declaration without body).
206
+ decl = _first_child(node, "function_declarator")
207
+ if decl is not None:
208
+ name = _first_child_text(decl, "field_identifier")
209
+ if name is None:
210
+ name = _first_child_text(decl, "identifier")
211
+ if name is None:
212
+ return
213
+ self.symbols.append(
214
+ Symbol(
215
+ id=self._make_id(
216
+ [*list(scope), Descriptor(name=name, kind=DescriptorKind.METHOD)]
217
+ ),
218
+ kind="method",
219
+ language=LANG,
220
+ file=self.relative_path,
221
+ range=_node_range(node),
222
+ signature=f"{name}()",
223
+ )
224
+ )
225
+ return
226
+ name = _first_child_text(node, "field_identifier")
227
+ if name is None:
228
+ return
229
+ self.symbols.append(
230
+ Symbol(
231
+ id=self._make_id([*list(scope), Descriptor(name=name, kind=DescriptorKind.TERM)]),
232
+ kind="field",
233
+ language=LANG,
234
+ file=self.relative_path,
235
+ range=_node_range(node),
236
+ )
237
+ )
238
+
239
+ def _emit_enum(
240
+ self,
241
+ node: tree_sitter.Node,
242
+ *,
243
+ scope: list[Descriptor],
244
+ ) -> None:
245
+ name = _first_child_text(node, "type_identifier")
246
+ if name is None:
247
+ return
248
+ enum_desc = Descriptor(name=name, kind=DescriptorKind.TYPE)
249
+ self.symbols.append(
250
+ Symbol(
251
+ id=self._make_id([*list(scope), enum_desc]),
252
+ kind="class",
253
+ language=LANG,
254
+ file=self.relative_path,
255
+ range=_node_range(node),
256
+ extra={"cpp_kind": "enum"},
257
+ )
258
+ )
259
+ body = _first_child(node, "enumerator_list")
260
+ if body is None:
261
+ return
262
+ for child in body.children:
263
+ if child.type == "enumerator":
264
+ enum_name = _first_child_text(child, "identifier")
265
+ if enum_name is None:
266
+ continue
267
+ self.symbols.append(
268
+ Symbol(
269
+ id=self._make_id(
270
+ [
271
+ *list(scope),
272
+ enum_desc,
273
+ Descriptor(name=enum_name, kind=DescriptorKind.TERM),
274
+ ]
275
+ ),
276
+ kind="field",
277
+ language=LANG,
278
+ file=self.relative_path,
279
+ range=_node_range(child),
280
+ extra={"cpp_kind": "enumerator"},
281
+ )
282
+ )
283
+
284
+ def _make_id(self, descriptors: list[Descriptor]) -> SymbolID:
285
+ full = list(_path_namespaces(self.relative_path))
286
+ full.extend(descriptors)
287
+ return SymbolID(scheme=SCHEME, descriptors=tuple(full))
288
+
289
+
290
+ # ---------------------------------------------------------------------------
291
+ # Pure helpers
292
+ # ---------------------------------------------------------------------------
293
+
294
+
295
+ def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
296
+ return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
297
+
298
+
299
+ def _node_range(node: tree_sitter.Node) -> Range:
300
+ sr, sc = node.start_point
301
+ er, ec = node.end_point
302
+ return Range(
303
+ start_line=sr + 1,
304
+ start_col=sc,
305
+ end_line=max(er + 1, sr + 1),
306
+ end_col=ec,
307
+ )
308
+
309
+
310
+ def _node_text(node: tree_sitter.Node) -> str:
311
+ return node.text.decode("utf-8") if node.text is not None else ""
312
+
313
+
314
+ def _first_child(node: tree_sitter.Node, kind: str) -> tree_sitter.Node | None:
315
+ for child in node.children:
316
+ if child.type == kind:
317
+ return child
318
+ return None
319
+
320
+
321
+ def _first_child_text(node: tree_sitter.Node, kind: str) -> str | None:
322
+ found = _first_child(node, kind)
323
+ return _node_text(found) if found is not None else None
324
+
325
+
326
+ _INNER_DECL_TYPES = frozenset(
327
+ {
328
+ "function_definition",
329
+ "class_specifier",
330
+ "struct_specifier",
331
+ "union_specifier",
332
+ "enum_specifier",
333
+ "field_declaration",
334
+ "template_declaration",
335
+ }
336
+ )
337
+
338
+
339
+ def _template_inner(node: tree_sitter.Node) -> tree_sitter.Node | None:
340
+ for child in node.children:
341
+ if child.type in _INNER_DECL_TYPES:
342
+ return child
343
+ return None
344
+
345
+
346
+ def _function_declarator_name(node: tree_sitter.Node) -> str | None:
347
+ declarator = _first_child(node, "function_declarator")
348
+ if declarator is None:
349
+ for child in node.children:
350
+ if child.type in {
351
+ "pointer_declarator",
352
+ "reference_declarator",
353
+ "parenthesized_declarator",
354
+ }:
355
+ name = _function_declarator_name(child)
356
+ if name is not None:
357
+ return name
358
+ return None
359
+ # function_declarator's first identifier is the function name.
360
+ for child in declarator.children:
361
+ if child.type in {"identifier", "field_identifier"}:
362
+ return _node_text(child)
363
+ if child.type == "qualified_identifier":
364
+ # ``Foo::bar`` — take the rightmost component.
365
+ return _qualified_name(child)
366
+ return None
367
+
368
+
369
+ def _qualified_name(node: tree_sitter.Node) -> str | None:
370
+ rightmost: str | None = None
371
+ for child in node.children:
372
+ if child.type in {"identifier", "field_identifier"}:
373
+ rightmost = _node_text(child)
374
+ elif child.type == "qualified_identifier":
375
+ rightmost = _qualified_name(child) or rightmost
376
+ return rightmost
@@ -0,0 +1,61 @@
1
+ Metadata-Version: 2.4
2
+ Name: codemap-cpp
3
+ Version: 0.1.0a1
4
+ Summary: C++ language indexer plugin for CodeMap
5
+ Project-URL: Homepage, https://github.com/qxbyte/codemap
6
+ Author: CodeMap Contributors
7
+ License: MIT
8
+ Keywords: c++,codemap,cpp,indexer,tree-sitter
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: C++
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Software Development
13
+ Requires-Python: >=3.11
14
+ Requires-Dist: codemap-core<0.2,>=0.1.0a1
15
+ Requires-Dist: tree-sitter-cpp>=0.23
16
+ Requires-Dist: tree-sitter>=0.25
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=8.0; extra == 'dev'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # codemap-cpp
22
+
23
+ > A C++ language indexer for [CodeMap](https://github.com/qxbyte/codemap),
24
+ > shipped as an independent PyPI package.
25
+
26
+ ## What it captures
27
+
28
+ Backed by `tree-sitter-cpp`:
29
+
30
+ | AST node | Symbol kind |
31
+ |---|---|
32
+ | `namespace_definition` | namespace prefix (recursed into) |
33
+ | `class_specifier` (named, with body) | `class` (`extra.cpp_kind=class`) |
34
+ | `struct_specifier` (named, with body) | `class` (`extra.cpp_kind=struct`) |
35
+ | `union_specifier` (named, with body) | `class` (`extra.cpp_kind=union`) |
36
+ | `enum_specifier` | `class` (`extra.cpp_kind=enum`) |
37
+ | `function_definition` (top-level or in namespace) | `function` |
38
+ | `function_definition` (inside class body) | `method` |
39
+ | `field_declaration` (data member) | `field` |
40
+ | `template_declaration` wrapping any of the above | unwraps to the inner declaration |
41
+
42
+ ## Install
43
+
44
+ ```bash
45
+ pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-cpp"
46
+ ```
47
+
48
+ ## File patterns
49
+
50
+ * `*.cpp`, `*.cc`, `*.cxx`, `*.hpp`, `*.hh`, `*.hxx`
51
+
52
+ ## Limits
53
+
54
+ * Out-of-class method definitions (``void Foo::bar() { ... }``) appear as
55
+ free functions, not as members of ``Foo``.
56
+ * `using` declarations and aliases are not emitted as symbols.
57
+ * Macro-only ``#define`` constants are not captured (see ``codemap-c``).
58
+
59
+ ## License
60
+
61
+ MIT.
@@ -0,0 +1,6 @@
1
+ codemap_cpp/__init__.py,sha256=GLztjsNZ4HpsUwDHf11o1_pAsCBKxTf_rh2VrlJOGLo,166
2
+ codemap_cpp/indexer.py,sha256=VPxNyOWFYzAbKord7xbadTx3z-BCB6BwhJVk9HXQnv4,12638
3
+ codemap_cpp-0.1.0a1.dist-info/METADATA,sha256=1XQxl_hlVHBoyQSbT4sbTom3unqWxF8oIGS4I8RYN8E,1964
4
+ codemap_cpp-0.1.0a1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
5
+ codemap_cpp-0.1.0a1.dist-info/entry_points.txt,sha256=YcM1pWI1dFOXcZkJ3CVjZGZdj1DdtTgH5dOYSRoJkyc,48
6
+ codemap_cpp-0.1.0a1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [codemap.indexers]
2
+ cpp = codemap_cpp:CppIndexer