codemap-cpp 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codemap_cpp/__init__.py
ADDED
codemap_cpp/indexer.py
ADDED
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""C++ indexer built on tree-sitter-cpp.
|
|
2
|
+
|
|
3
|
+
The visitor recurses into ``namespace_definition`` so that descriptors of
|
|
4
|
+
nested types are prefixed with the containing namespace chain.
|
|
5
|
+
``template_declaration`` wrappers are unwrapped so the underlying
|
|
6
|
+
function / class / struct surfaces as a normal symbol.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from pathlib import Path, PurePosixPath
|
|
12
|
+
from typing import ClassVar
|
|
13
|
+
|
|
14
|
+
import tree_sitter
|
|
15
|
+
import tree_sitter_cpp
|
|
16
|
+
|
|
17
|
+
from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
|
|
18
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
19
|
+
from codemap.indexers.base import IndexContext
|
|
20
|
+
|
|
21
|
+
SCHEME = "scip-cpp"
|
|
22
|
+
LANG = "cpp"
|
|
23
|
+
|
|
24
|
+
_CPP_LANG = tree_sitter.Language(tree_sitter_cpp.language())
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CppIndexer:
|
|
28
|
+
name: ClassVar[str] = "cpp"
|
|
29
|
+
version: ClassVar[str] = "0.1.0"
|
|
30
|
+
file_patterns: ClassVar[list[str]] = [
|
|
31
|
+
"*.cpp",
|
|
32
|
+
"*.cc",
|
|
33
|
+
"*.cxx",
|
|
34
|
+
"*.hpp",
|
|
35
|
+
"*.hh",
|
|
36
|
+
"*.hxx",
|
|
37
|
+
]
|
|
38
|
+
languages: ClassVar[list[str]] = [LANG]
|
|
39
|
+
|
|
40
|
+
def supports(self, path: Path) -> bool:
|
|
41
|
+
return path.suffix in {".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx"}
|
|
42
|
+
|
|
43
|
+
def index_file(
|
|
44
|
+
self,
|
|
45
|
+
path: Path,
|
|
46
|
+
source: bytes,
|
|
47
|
+
ctx: IndexContext,
|
|
48
|
+
) -> IndexResult:
|
|
49
|
+
try:
|
|
50
|
+
source.decode("utf-8")
|
|
51
|
+
except UnicodeDecodeError as exc:
|
|
52
|
+
return IndexResult(
|
|
53
|
+
diagnostics=[
|
|
54
|
+
Diagnostic(
|
|
55
|
+
severity="error",
|
|
56
|
+
file=ctx.relative_path,
|
|
57
|
+
code="CPP002",
|
|
58
|
+
message=f"not valid UTF-8: {exc}",
|
|
59
|
+
producer=self.name,
|
|
60
|
+
)
|
|
61
|
+
]
|
|
62
|
+
)
|
|
63
|
+
parser = tree_sitter.Parser(_CPP_LANG)
|
|
64
|
+
tree = parser.parse(source)
|
|
65
|
+
visitor = _Visitor(ctx.relative_path)
|
|
66
|
+
visitor.visit_root(tree.root_node)
|
|
67
|
+
diagnostics = list(visitor.diagnostics)
|
|
68
|
+
if tree.root_node.has_error:
|
|
69
|
+
diagnostics.append(
|
|
70
|
+
Diagnostic(
|
|
71
|
+
severity="warning",
|
|
72
|
+
file=ctx.relative_path,
|
|
73
|
+
range=Range(start_line=1, end_line=1),
|
|
74
|
+
code="CPP001",
|
|
75
|
+
message="tree-sitter reported parse errors; symbols may be incomplete",
|
|
76
|
+
producer=self.name,
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
return IndexResult(
|
|
80
|
+
symbols=visitor.symbols,
|
|
81
|
+
edges=visitor.edges,
|
|
82
|
+
diagnostics=diagnostics,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class _Visitor:
|
|
87
|
+
def __init__(self, relative_path: PurePosixPath) -> None:
|
|
88
|
+
self.relative_path = relative_path
|
|
89
|
+
self.symbols: list[Symbol] = []
|
|
90
|
+
self.edges: list[Edge] = []
|
|
91
|
+
self.diagnostics: list[Diagnostic] = []
|
|
92
|
+
|
|
93
|
+
def visit_root(self, root: tree_sitter.Node) -> None:
|
|
94
|
+
for child in root.children:
|
|
95
|
+
self._visit(child, scope=[])
|
|
96
|
+
|
|
97
|
+
def _visit(
|
|
98
|
+
self,
|
|
99
|
+
node: tree_sitter.Node,
|
|
100
|
+
*,
|
|
101
|
+
scope: list[Descriptor],
|
|
102
|
+
) -> None:
|
|
103
|
+
kind = node.type
|
|
104
|
+
if kind == "namespace_definition":
|
|
105
|
+
self._visit_namespace(node, scope=scope)
|
|
106
|
+
elif kind == "template_declaration":
|
|
107
|
+
# Unwrap: a template is metadata around a single declaration.
|
|
108
|
+
inner = _template_inner(node)
|
|
109
|
+
if inner is not None:
|
|
110
|
+
self._visit(inner, scope=scope)
|
|
111
|
+
elif kind == "function_definition":
|
|
112
|
+
self._emit_function(node, scope=scope, container_kind=None)
|
|
113
|
+
elif kind == "class_specifier":
|
|
114
|
+
self._emit_record(node, scope=scope, cpp_kind="class")
|
|
115
|
+
elif kind == "struct_specifier":
|
|
116
|
+
self._emit_record(node, scope=scope, cpp_kind="struct")
|
|
117
|
+
elif kind == "union_specifier":
|
|
118
|
+
self._emit_record(node, scope=scope, cpp_kind="union")
|
|
119
|
+
elif kind == "enum_specifier":
|
|
120
|
+
self._emit_enum(node, scope=scope)
|
|
121
|
+
|
|
122
|
+
def _visit_namespace(
|
|
123
|
+
self,
|
|
124
|
+
node: tree_sitter.Node,
|
|
125
|
+
*,
|
|
126
|
+
scope: list[Descriptor],
|
|
127
|
+
) -> None:
|
|
128
|
+
name = _first_child_text(node, "namespace_identifier")
|
|
129
|
+
new_scope = list(scope)
|
|
130
|
+
if name is not None:
|
|
131
|
+
new_scope.append(Descriptor(name=name, kind=DescriptorKind.NAMESPACE))
|
|
132
|
+
body = _first_child(node, "declaration_list")
|
|
133
|
+
if body is None:
|
|
134
|
+
return
|
|
135
|
+
for child in body.children:
|
|
136
|
+
self._visit(child, scope=new_scope)
|
|
137
|
+
|
|
138
|
+
def _emit_function(
|
|
139
|
+
self,
|
|
140
|
+
node: tree_sitter.Node,
|
|
141
|
+
*,
|
|
142
|
+
scope: list[Descriptor],
|
|
143
|
+
container_kind: str | None,
|
|
144
|
+
) -> None:
|
|
145
|
+
name = _function_declarator_name(node)
|
|
146
|
+
if name is None:
|
|
147
|
+
return
|
|
148
|
+
descriptors = [*list(scope), Descriptor(name=name, kind=DescriptorKind.METHOD)]
|
|
149
|
+
kind = "method" if container_kind == "class" else "function"
|
|
150
|
+
self.symbols.append(
|
|
151
|
+
Symbol(
|
|
152
|
+
id=self._make_id(descriptors),
|
|
153
|
+
kind=kind,
|
|
154
|
+
language=LANG,
|
|
155
|
+
file=self.relative_path,
|
|
156
|
+
range=_node_range(node),
|
|
157
|
+
signature=f"{name}()",
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def _emit_record(
|
|
162
|
+
self,
|
|
163
|
+
node: tree_sitter.Node,
|
|
164
|
+
*,
|
|
165
|
+
scope: list[Descriptor],
|
|
166
|
+
cpp_kind: str,
|
|
167
|
+
) -> None:
|
|
168
|
+
name = _first_child_text(node, "type_identifier")
|
|
169
|
+
body = _first_child(node, "field_declaration_list")
|
|
170
|
+
if name is None or body is None:
|
|
171
|
+
return
|
|
172
|
+
record_desc = Descriptor(name=name, kind=DescriptorKind.TYPE)
|
|
173
|
+
descriptors = [*list(scope), record_desc]
|
|
174
|
+
self.symbols.append(
|
|
175
|
+
Symbol(
|
|
176
|
+
id=self._make_id(descriptors),
|
|
177
|
+
kind="class",
|
|
178
|
+
language=LANG,
|
|
179
|
+
file=self.relative_path,
|
|
180
|
+
range=_node_range(node),
|
|
181
|
+
extra={"cpp_kind": cpp_kind},
|
|
182
|
+
)
|
|
183
|
+
)
|
|
184
|
+
inner_scope = list(descriptors)
|
|
185
|
+
for child in body.children:
|
|
186
|
+
if child.type == "function_definition":
|
|
187
|
+
self._emit_function(child, scope=inner_scope, container_kind="class")
|
|
188
|
+
elif child.type == "field_declaration":
|
|
189
|
+
self._emit_field(child, scope=inner_scope)
|
|
190
|
+
elif child.type == "template_declaration":
|
|
191
|
+
inner = _template_inner(child)
|
|
192
|
+
if inner is not None:
|
|
193
|
+
if inner.type == "function_definition":
|
|
194
|
+
self._emit_function(inner, scope=inner_scope, container_kind="class")
|
|
195
|
+
elif inner.type == "field_declaration":
|
|
196
|
+
self._emit_field(inner, scope=inner_scope)
|
|
197
|
+
|
|
198
|
+
def _emit_field(
|
|
199
|
+
self,
|
|
200
|
+
node: tree_sitter.Node,
|
|
201
|
+
*,
|
|
202
|
+
scope: list[Descriptor],
|
|
203
|
+
) -> None:
|
|
204
|
+
# field_declaration may carry either a field_identifier (data member)
|
|
205
|
+
# or a function_declarator (method declaration without body).
|
|
206
|
+
decl = _first_child(node, "function_declarator")
|
|
207
|
+
if decl is not None:
|
|
208
|
+
name = _first_child_text(decl, "field_identifier")
|
|
209
|
+
if name is None:
|
|
210
|
+
name = _first_child_text(decl, "identifier")
|
|
211
|
+
if name is None:
|
|
212
|
+
return
|
|
213
|
+
self.symbols.append(
|
|
214
|
+
Symbol(
|
|
215
|
+
id=self._make_id(
|
|
216
|
+
[*list(scope), Descriptor(name=name, kind=DescriptorKind.METHOD)]
|
|
217
|
+
),
|
|
218
|
+
kind="method",
|
|
219
|
+
language=LANG,
|
|
220
|
+
file=self.relative_path,
|
|
221
|
+
range=_node_range(node),
|
|
222
|
+
signature=f"{name}()",
|
|
223
|
+
)
|
|
224
|
+
)
|
|
225
|
+
return
|
|
226
|
+
name = _first_child_text(node, "field_identifier")
|
|
227
|
+
if name is None:
|
|
228
|
+
return
|
|
229
|
+
self.symbols.append(
|
|
230
|
+
Symbol(
|
|
231
|
+
id=self._make_id([*list(scope), Descriptor(name=name, kind=DescriptorKind.TERM)]),
|
|
232
|
+
kind="field",
|
|
233
|
+
language=LANG,
|
|
234
|
+
file=self.relative_path,
|
|
235
|
+
range=_node_range(node),
|
|
236
|
+
)
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
def _emit_enum(
|
|
240
|
+
self,
|
|
241
|
+
node: tree_sitter.Node,
|
|
242
|
+
*,
|
|
243
|
+
scope: list[Descriptor],
|
|
244
|
+
) -> None:
|
|
245
|
+
name = _first_child_text(node, "type_identifier")
|
|
246
|
+
if name is None:
|
|
247
|
+
return
|
|
248
|
+
enum_desc = Descriptor(name=name, kind=DescriptorKind.TYPE)
|
|
249
|
+
self.symbols.append(
|
|
250
|
+
Symbol(
|
|
251
|
+
id=self._make_id([*list(scope), enum_desc]),
|
|
252
|
+
kind="class",
|
|
253
|
+
language=LANG,
|
|
254
|
+
file=self.relative_path,
|
|
255
|
+
range=_node_range(node),
|
|
256
|
+
extra={"cpp_kind": "enum"},
|
|
257
|
+
)
|
|
258
|
+
)
|
|
259
|
+
body = _first_child(node, "enumerator_list")
|
|
260
|
+
if body is None:
|
|
261
|
+
return
|
|
262
|
+
for child in body.children:
|
|
263
|
+
if child.type == "enumerator":
|
|
264
|
+
enum_name = _first_child_text(child, "identifier")
|
|
265
|
+
if enum_name is None:
|
|
266
|
+
continue
|
|
267
|
+
self.symbols.append(
|
|
268
|
+
Symbol(
|
|
269
|
+
id=self._make_id(
|
|
270
|
+
[
|
|
271
|
+
*list(scope),
|
|
272
|
+
enum_desc,
|
|
273
|
+
Descriptor(name=enum_name, kind=DescriptorKind.TERM),
|
|
274
|
+
]
|
|
275
|
+
),
|
|
276
|
+
kind="field",
|
|
277
|
+
language=LANG,
|
|
278
|
+
file=self.relative_path,
|
|
279
|
+
range=_node_range(child),
|
|
280
|
+
extra={"cpp_kind": "enumerator"},
|
|
281
|
+
)
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
def _make_id(self, descriptors: list[Descriptor]) -> SymbolID:
|
|
285
|
+
full = list(_path_namespaces(self.relative_path))
|
|
286
|
+
full.extend(descriptors)
|
|
287
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(full))
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
# ---------------------------------------------------------------------------
|
|
291
|
+
# Pure helpers
|
|
292
|
+
# ---------------------------------------------------------------------------
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
|
|
296
|
+
return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _node_range(node: tree_sitter.Node) -> Range:
|
|
300
|
+
sr, sc = node.start_point
|
|
301
|
+
er, ec = node.end_point
|
|
302
|
+
return Range(
|
|
303
|
+
start_line=sr + 1,
|
|
304
|
+
start_col=sc,
|
|
305
|
+
end_line=max(er + 1, sr + 1),
|
|
306
|
+
end_col=ec,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _node_text(node: tree_sitter.Node) -> str:
|
|
311
|
+
return node.text.decode("utf-8") if node.text is not None else ""
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _first_child(node: tree_sitter.Node, kind: str) -> tree_sitter.Node | None:
|
|
315
|
+
for child in node.children:
|
|
316
|
+
if child.type == kind:
|
|
317
|
+
return child
|
|
318
|
+
return None
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def _first_child_text(node: tree_sitter.Node, kind: str) -> str | None:
|
|
322
|
+
found = _first_child(node, kind)
|
|
323
|
+
return _node_text(found) if found is not None else None
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
_INNER_DECL_TYPES = frozenset(
|
|
327
|
+
{
|
|
328
|
+
"function_definition",
|
|
329
|
+
"class_specifier",
|
|
330
|
+
"struct_specifier",
|
|
331
|
+
"union_specifier",
|
|
332
|
+
"enum_specifier",
|
|
333
|
+
"field_declaration",
|
|
334
|
+
"template_declaration",
|
|
335
|
+
}
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _template_inner(node: tree_sitter.Node) -> tree_sitter.Node | None:
|
|
340
|
+
for child in node.children:
|
|
341
|
+
if child.type in _INNER_DECL_TYPES:
|
|
342
|
+
return child
|
|
343
|
+
return None
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _function_declarator_name(node: tree_sitter.Node) -> str | None:
|
|
347
|
+
declarator = _first_child(node, "function_declarator")
|
|
348
|
+
if declarator is None:
|
|
349
|
+
for child in node.children:
|
|
350
|
+
if child.type in {
|
|
351
|
+
"pointer_declarator",
|
|
352
|
+
"reference_declarator",
|
|
353
|
+
"parenthesized_declarator",
|
|
354
|
+
}:
|
|
355
|
+
name = _function_declarator_name(child)
|
|
356
|
+
if name is not None:
|
|
357
|
+
return name
|
|
358
|
+
return None
|
|
359
|
+
# function_declarator's first identifier is the function name.
|
|
360
|
+
for child in declarator.children:
|
|
361
|
+
if child.type in {"identifier", "field_identifier"}:
|
|
362
|
+
return _node_text(child)
|
|
363
|
+
if child.type == "qualified_identifier":
|
|
364
|
+
# ``Foo::bar`` — take the rightmost component.
|
|
365
|
+
return _qualified_name(child)
|
|
366
|
+
return None
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _qualified_name(node: tree_sitter.Node) -> str | None:
|
|
370
|
+
rightmost: str | None = None
|
|
371
|
+
for child in node.children:
|
|
372
|
+
if child.type in {"identifier", "field_identifier"}:
|
|
373
|
+
rightmost = _node_text(child)
|
|
374
|
+
elif child.type == "qualified_identifier":
|
|
375
|
+
rightmost = _qualified_name(child) or rightmost
|
|
376
|
+
return rightmost
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-cpp
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: C++ language indexer plugin for CodeMap
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: c++,codemap,cpp,indexer,tree-sitter
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: C++
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Software Development
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Requires-Dist: codemap-core<0.2,>=0.1.0a1
|
|
15
|
+
Requires-Dist: tree-sitter-cpp>=0.23
|
|
16
|
+
Requires-Dist: tree-sitter>=0.25
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# codemap-cpp
|
|
22
|
+
|
|
23
|
+
> A C++ language indexer for [CodeMap](https://github.com/qxbyte/codemap),
|
|
24
|
+
> shipped as an independent PyPI package.
|
|
25
|
+
|
|
26
|
+
## What it captures
|
|
27
|
+
|
|
28
|
+
Backed by `tree-sitter-cpp`:
|
|
29
|
+
|
|
30
|
+
| AST node | Symbol kind |
|
|
31
|
+
|---|---|
|
|
32
|
+
| `namespace_definition` | namespace prefix (recursed into) |
|
|
33
|
+
| `class_specifier` (named, with body) | `class` (`extra.cpp_kind=class`) |
|
|
34
|
+
| `struct_specifier` (named, with body) | `class` (`extra.cpp_kind=struct`) |
|
|
35
|
+
| `union_specifier` (named, with body) | `class` (`extra.cpp_kind=union`) |
|
|
36
|
+
| `enum_specifier` | `class` (`extra.cpp_kind=enum`) |
|
|
37
|
+
| `function_definition` (top-level or in namespace) | `function` |
|
|
38
|
+
| `function_definition` (inside class body) | `method` |
|
|
39
|
+
| `field_declaration` (data member) | `field` |
|
|
40
|
+
| `template_declaration` wrapping any of the above | unwraps to the inner declaration |
|
|
41
|
+
|
|
42
|
+
## Install
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-cpp"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## File patterns
|
|
49
|
+
|
|
50
|
+
* `*.cpp`, `*.cc`, `*.cxx`, `*.hpp`, `*.hh`, `*.hxx`
|
|
51
|
+
|
|
52
|
+
## Limits
|
|
53
|
+
|
|
54
|
+
* Out-of-class method definitions (``void Foo::bar() { ... }``) appear as
|
|
55
|
+
free functions, not as members of ``Foo``.
|
|
56
|
+
* `using` declarations and aliases are not emitted as symbols.
|
|
57
|
+
* Macro-only ``#define`` constants are not captured (see ``codemap-c``).
|
|
58
|
+
|
|
59
|
+
## License
|
|
60
|
+
|
|
61
|
+
MIT.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
codemap_cpp/__init__.py,sha256=GLztjsNZ4HpsUwDHf11o1_pAsCBKxTf_rh2VrlJOGLo,166
|
|
2
|
+
codemap_cpp/indexer.py,sha256=VPxNyOWFYzAbKord7xbadTx3z-BCB6BwhJVk9HXQnv4,12638
|
|
3
|
+
codemap_cpp-0.1.0a1.dist-info/METADATA,sha256=1XQxl_hlVHBoyQSbT4sbTom3unqWxF8oIGS4I8RYN8E,1964
|
|
4
|
+
codemap_cpp-0.1.0a1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
5
|
+
codemap_cpp-0.1.0a1.dist-info/entry_points.txt,sha256=YcM1pWI1dFOXcZkJ3CVjZGZdj1DdtTgH5dOYSRoJkyc,48
|
|
6
|
+
codemap_cpp-0.1.0a1.dist-info/RECORD,,
|