codemap-c 0.1.0a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codemap_c-0.1.0a1/.gitignore +43 -0
- codemap_c-0.1.0a1/PKG-INFO +63 -0
- codemap_c-0.1.0a1/README.md +43 -0
- codemap_c-0.1.0a1/pyproject.toml +36 -0
- codemap_c-0.1.0a1/src/codemap_c/__init__.py +8 -0
- codemap_c-0.1.0a1/src/codemap_c/indexer.py +323 -0
- codemap_c-0.1.0a1/tests/__init__.py +0 -0
- codemap_c-0.1.0a1/tests/test_indexer.py +180 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Build artifacts
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
.eggs/
|
|
12
|
+
|
|
13
|
+
# Test / coverage
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.coverage
|
|
16
|
+
.coverage.*
|
|
17
|
+
htmlcov/
|
|
18
|
+
coverage.xml
|
|
19
|
+
.tox/
|
|
20
|
+
.mypy_cache/
|
|
21
|
+
.ruff_cache/
|
|
22
|
+
.benchmarks/
|
|
23
|
+
|
|
24
|
+
# Virtualenv
|
|
25
|
+
.venv/
|
|
26
|
+
venv/
|
|
27
|
+
env/
|
|
28
|
+
|
|
29
|
+
# uv / pdm lockfiles (commit uv.lock once we settle)
|
|
30
|
+
# uv.lock
|
|
31
|
+
|
|
32
|
+
# IDE
|
|
33
|
+
.idea/
|
|
34
|
+
.vscode/
|
|
35
|
+
*.swp
|
|
36
|
+
*.swo
|
|
37
|
+
|
|
38
|
+
# OS
|
|
39
|
+
.DS_Store
|
|
40
|
+
Thumbs.db
|
|
41
|
+
|
|
42
|
+
# CodeMap own index when dogfooding
|
|
43
|
+
.codemap/
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-c
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: C language indexer plugin for CodeMap
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: c,codemap,indexer,tree-sitter
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: C
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Software Development
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Requires-Dist: codemap-core<0.2,>=0.1.0a1
|
|
15
|
+
Requires-Dist: tree-sitter-c>=0.24
|
|
16
|
+
Requires-Dist: tree-sitter>=0.25
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# codemap-c
|
|
22
|
+
|
|
23
|
+
> A C language indexer for [CodeMap](https://github.com/qxbyte/codemap),
|
|
24
|
+
> shipped as an independent PyPI package.
|
|
25
|
+
|
|
26
|
+
## What it captures
|
|
27
|
+
|
|
28
|
+
Backed by `tree-sitter-c`:
|
|
29
|
+
|
|
30
|
+
| AST node | Symbol kind |
|
|
31
|
+
|---|---|
|
|
32
|
+
| `function_definition` | `function` |
|
|
33
|
+
| `struct_specifier` (named, with body) | `class` (`extra.c_kind=struct`) |
|
|
34
|
+
| `union_specifier` (named, with body) | `class` (`extra.c_kind=union`) |
|
|
35
|
+
| `enum_specifier` (named) | `class` (`extra.c_kind=enum`) |
|
|
36
|
+
| `type_definition` (`typedef`) | `class` (`extra.c_kind=typedef`) |
|
|
37
|
+
| `preproc_def` (`#define X ...`) | `variable` (`extra.c_kind=macro`) |
|
|
38
|
+
| Top-level `declaration` with initializer | `variable` |
|
|
39
|
+
| `field_declaration` inside a struct/union body | `field` (attached to the parent) |
|
|
40
|
+
|
|
41
|
+
Function bodies are not walked for inner declarations — locals are not
|
|
42
|
+
script-level interface.
|
|
43
|
+
|
|
44
|
+
## Install
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-c"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## File patterns
|
|
51
|
+
|
|
52
|
+
* `*.c`, `*.h`
|
|
53
|
+
|
|
54
|
+
## Limits
|
|
55
|
+
|
|
56
|
+
* Preprocessor conditionals (`#ifdef`/`#if`) parse but no branch selection
|
|
57
|
+
is performed — both arms contribute symbols.
|
|
58
|
+
* Function declarations (prototypes) are not emitted; only definitions.
|
|
59
|
+
* Forward struct declarations without a body are skipped.
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
MIT.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# codemap-c
|
|
2
|
+
|
|
3
|
+
> A C language indexer for [CodeMap](https://github.com/qxbyte/codemap),
|
|
4
|
+
> shipped as an independent PyPI package.
|
|
5
|
+
|
|
6
|
+
## What it captures
|
|
7
|
+
|
|
8
|
+
Backed by `tree-sitter-c`:
|
|
9
|
+
|
|
10
|
+
| AST node | Symbol kind |
|
|
11
|
+
|---|---|
|
|
12
|
+
| `function_definition` | `function` |
|
|
13
|
+
| `struct_specifier` (named, with body) | `class` (`extra.c_kind=struct`) |
|
|
14
|
+
| `union_specifier` (named, with body) | `class` (`extra.c_kind=union`) |
|
|
15
|
+
| `enum_specifier` (named) | `class` (`extra.c_kind=enum`) |
|
|
16
|
+
| `type_definition` (`typedef`) | `class` (`extra.c_kind=typedef`) |
|
|
17
|
+
| `preproc_def` (`#define X ...`) | `variable` (`extra.c_kind=macro`) |
|
|
18
|
+
| Top-level `declaration` with initializer | `variable` |
|
|
19
|
+
| `field_declaration` inside a struct/union body | `field` (attached to the parent) |
|
|
20
|
+
|
|
21
|
+
Function bodies are not walked for inner declarations — locals are not
|
|
22
|
+
script-level interface.
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-c"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## File patterns
|
|
31
|
+
|
|
32
|
+
* `*.c`, `*.h`
|
|
33
|
+
|
|
34
|
+
## Limits
|
|
35
|
+
|
|
36
|
+
* Preprocessor conditionals (`#ifdef`/`#if`) parse but no branch selection
|
|
37
|
+
is performed — both arms contribute symbols.
|
|
38
|
+
* Function declarations (prototypes) are not emitted; only definitions.
|
|
39
|
+
* Forward struct declarations without a body are skipped.
|
|
40
|
+
|
|
41
|
+
## License
|
|
42
|
+
|
|
43
|
+
MIT.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.21"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "codemap-c"
|
|
7
|
+
version = "0.1.0a1"
|
|
8
|
+
description = "C language indexer plugin for CodeMap"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "CodeMap Contributors" }]
|
|
13
|
+
keywords = ["codemap", "c", "indexer", "tree-sitter"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Programming Language :: C",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Topic :: Software Development",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"codemap-core>=0.1.0a1,<0.2",
|
|
22
|
+
"tree-sitter>=0.25",
|
|
23
|
+
"tree-sitter-c>=0.24",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = ["pytest>=8.0"]
|
|
28
|
+
|
|
29
|
+
[project.entry-points."codemap.indexers"]
|
|
30
|
+
c = "codemap_c:CIndexer"
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/qxbyte/codemap"
|
|
34
|
+
|
|
35
|
+
[tool.hatch.build.targets.wheel]
|
|
36
|
+
packages = ["src/codemap_c"]
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
"""C indexer built on tree-sitter-c.
|
|
2
|
+
|
|
3
|
+
Top-level declarations only (function bodies are not walked for inner
|
|
4
|
+
state). Captured node kinds:
|
|
5
|
+
|
|
6
|
+
* ``function_definition`` → ``function``
|
|
7
|
+
* ``struct_specifier`` / ``union_specifier`` → ``class`` (with body)
|
|
8
|
+
* ``enum_specifier`` → ``class``
|
|
9
|
+
* ``type_definition`` (``typedef``) → ``class``
|
|
10
|
+
* ``preproc_def`` / ``preproc_function_def`` → ``variable`` (macro)
|
|
11
|
+
* top-level ``declaration`` with initializer → ``variable``
|
|
12
|
+
* ``field_declaration`` inside struct/union body → ``field``
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from pathlib import Path, PurePosixPath
|
|
18
|
+
from typing import ClassVar
|
|
19
|
+
|
|
20
|
+
import tree_sitter
|
|
21
|
+
import tree_sitter_c
|
|
22
|
+
|
|
23
|
+
from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
|
|
24
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
25
|
+
from codemap.indexers.base import IndexContext
|
|
26
|
+
|
|
27
|
+
SCHEME = "scip-c"
|
|
28
|
+
LANG = "c"
|
|
29
|
+
|
|
30
|
+
_C_LANG = tree_sitter.Language(tree_sitter_c.language())
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class CIndexer:
|
|
34
|
+
name: ClassVar[str] = "c"
|
|
35
|
+
version: ClassVar[str] = "0.1.0"
|
|
36
|
+
file_patterns: ClassVar[list[str]] = ["*.c", "*.h"]
|
|
37
|
+
languages: ClassVar[list[str]] = [LANG]
|
|
38
|
+
|
|
39
|
+
def supports(self, path: Path) -> bool:
|
|
40
|
+
return path.suffix in {".c", ".h"}
|
|
41
|
+
|
|
42
|
+
def index_file(
|
|
43
|
+
self,
|
|
44
|
+
path: Path,
|
|
45
|
+
source: bytes,
|
|
46
|
+
ctx: IndexContext,
|
|
47
|
+
) -> IndexResult:
|
|
48
|
+
try:
|
|
49
|
+
source.decode("utf-8")
|
|
50
|
+
except UnicodeDecodeError as exc:
|
|
51
|
+
return IndexResult(
|
|
52
|
+
diagnostics=[
|
|
53
|
+
Diagnostic(
|
|
54
|
+
severity="error",
|
|
55
|
+
file=ctx.relative_path,
|
|
56
|
+
code="C002",
|
|
57
|
+
message=f"not valid UTF-8: {exc}",
|
|
58
|
+
producer=self.name,
|
|
59
|
+
)
|
|
60
|
+
]
|
|
61
|
+
)
|
|
62
|
+
parser = tree_sitter.Parser(_C_LANG)
|
|
63
|
+
tree = parser.parse(source)
|
|
64
|
+
visitor = _Visitor(ctx.relative_path)
|
|
65
|
+
visitor.visit_root(tree.root_node)
|
|
66
|
+
diagnostics = list(visitor.diagnostics)
|
|
67
|
+
if tree.root_node.has_error:
|
|
68
|
+
diagnostics.append(
|
|
69
|
+
Diagnostic(
|
|
70
|
+
severity="warning",
|
|
71
|
+
file=ctx.relative_path,
|
|
72
|
+
range=Range(start_line=1, end_line=1),
|
|
73
|
+
code="C001",
|
|
74
|
+
message="tree-sitter reported parse errors; symbols may be incomplete",
|
|
75
|
+
producer=self.name,
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
return IndexResult(
|
|
79
|
+
symbols=visitor.symbols,
|
|
80
|
+
edges=visitor.edges,
|
|
81
|
+
diagnostics=diagnostics,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class _Visitor:
|
|
86
|
+
def __init__(self, relative_path: PurePosixPath) -> None:
|
|
87
|
+
self.relative_path = relative_path
|
|
88
|
+
self.symbols: list[Symbol] = []
|
|
89
|
+
self.edges: list[Edge] = []
|
|
90
|
+
self.diagnostics: list[Diagnostic] = []
|
|
91
|
+
|
|
92
|
+
def visit_root(self, root: tree_sitter.Node) -> None:
|
|
93
|
+
for child in root.children:
|
|
94
|
+
self._visit_top_level(child)
|
|
95
|
+
|
|
96
|
+
def _visit_top_level(self, node: tree_sitter.Node) -> None:
|
|
97
|
+
kind = node.type
|
|
98
|
+
if kind == "function_definition":
|
|
99
|
+
self._emit_function(node)
|
|
100
|
+
elif kind == "preproc_def":
|
|
101
|
+
self._emit_macro(node, function_like=False)
|
|
102
|
+
elif kind == "preproc_function_def":
|
|
103
|
+
self._emit_macro(node, function_like=True)
|
|
104
|
+
elif kind == "type_definition":
|
|
105
|
+
self._emit_typedef(node)
|
|
106
|
+
elif kind == "struct_specifier":
|
|
107
|
+
self._emit_record(node, c_kind="struct")
|
|
108
|
+
elif kind == "union_specifier":
|
|
109
|
+
self._emit_record(node, c_kind="union")
|
|
110
|
+
elif kind == "enum_specifier":
|
|
111
|
+
self._emit_enum(node)
|
|
112
|
+
elif kind == "declaration":
|
|
113
|
+
self._emit_declaration(node)
|
|
114
|
+
|
|
115
|
+
def _emit_function(self, node: tree_sitter.Node) -> None:
|
|
116
|
+
name = _function_declarator_name(node)
|
|
117
|
+
if name is None:
|
|
118
|
+
return
|
|
119
|
+
sid = self._make_id([Descriptor(name=name, kind=DescriptorKind.METHOD)])
|
|
120
|
+
self.symbols.append(
|
|
121
|
+
Symbol(
|
|
122
|
+
id=sid,
|
|
123
|
+
kind="function",
|
|
124
|
+
language=LANG,
|
|
125
|
+
file=self.relative_path,
|
|
126
|
+
range=_node_range(node),
|
|
127
|
+
signature=f"{name}()",
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _emit_macro(self, node: tree_sitter.Node, *, function_like: bool) -> None:
|
|
132
|
+
name = _first_child_text(node, "identifier")
|
|
133
|
+
if name is None:
|
|
134
|
+
return
|
|
135
|
+
sid = self._make_id([Descriptor(name=name, kind=DescriptorKind.TERM)])
|
|
136
|
+
extra = {"c_kind": "macro_fn" if function_like else "macro"}
|
|
137
|
+
self.symbols.append(
|
|
138
|
+
Symbol(
|
|
139
|
+
id=sid,
|
|
140
|
+
kind="variable",
|
|
141
|
+
language=LANG,
|
|
142
|
+
file=self.relative_path,
|
|
143
|
+
range=_node_range(node),
|
|
144
|
+
extra=extra,
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def _emit_typedef(self, node: tree_sitter.Node) -> None:
|
|
149
|
+
# The aliased name is the last type_identifier child of the
|
|
150
|
+
# type_definition.
|
|
151
|
+
name: str | None = None
|
|
152
|
+
for child in node.children:
|
|
153
|
+
if child.type == "type_identifier":
|
|
154
|
+
name = _node_text(child)
|
|
155
|
+
if name is None:
|
|
156
|
+
return
|
|
157
|
+
sid = self._make_id([Descriptor(name=name, kind=DescriptorKind.TYPE)])
|
|
158
|
+
self.symbols.append(
|
|
159
|
+
Symbol(
|
|
160
|
+
id=sid,
|
|
161
|
+
kind="class",
|
|
162
|
+
language=LANG,
|
|
163
|
+
file=self.relative_path,
|
|
164
|
+
range=_node_range(node),
|
|
165
|
+
extra={"c_kind": "typedef"},
|
|
166
|
+
)
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def _emit_record(self, node: tree_sitter.Node, *, c_kind: str) -> None:
|
|
170
|
+
# Skip anonymous structs / forward declarations without a body.
|
|
171
|
+
name = _first_child_text(node, "type_identifier")
|
|
172
|
+
body = _first_child(node, "field_declaration_list")
|
|
173
|
+
if name is None or body is None:
|
|
174
|
+
return
|
|
175
|
+
record_desc = Descriptor(name=name, kind=DescriptorKind.TYPE)
|
|
176
|
+
sid = self._make_id([record_desc])
|
|
177
|
+
self.symbols.append(
|
|
178
|
+
Symbol(
|
|
179
|
+
id=sid,
|
|
180
|
+
kind="class",
|
|
181
|
+
language=LANG,
|
|
182
|
+
file=self.relative_path,
|
|
183
|
+
range=_node_range(node),
|
|
184
|
+
extra={"c_kind": c_kind},
|
|
185
|
+
)
|
|
186
|
+
)
|
|
187
|
+
for child in body.children:
|
|
188
|
+
if child.type == "field_declaration":
|
|
189
|
+
self._emit_field(child, parent=record_desc)
|
|
190
|
+
|
|
191
|
+
def _emit_enum(self, node: tree_sitter.Node) -> None:
|
|
192
|
+
name = _first_child_text(node, "type_identifier")
|
|
193
|
+
if name is None:
|
|
194
|
+
return
|
|
195
|
+
enum_desc = Descriptor(name=name, kind=DescriptorKind.TYPE)
|
|
196
|
+
sid = self._make_id([enum_desc])
|
|
197
|
+
self.symbols.append(
|
|
198
|
+
Symbol(
|
|
199
|
+
id=sid,
|
|
200
|
+
kind="class",
|
|
201
|
+
language=LANG,
|
|
202
|
+
file=self.relative_path,
|
|
203
|
+
range=_node_range(node),
|
|
204
|
+
extra={"c_kind": "enum"},
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
|
+
body = _first_child(node, "enumerator_list")
|
|
208
|
+
if body is None:
|
|
209
|
+
return
|
|
210
|
+
for child in body.children:
|
|
211
|
+
if child.type == "enumerator":
|
|
212
|
+
enum_name = _first_child_text(child, "identifier")
|
|
213
|
+
if enum_name is None:
|
|
214
|
+
continue
|
|
215
|
+
self.symbols.append(
|
|
216
|
+
Symbol(
|
|
217
|
+
id=self._make_id(
|
|
218
|
+
[
|
|
219
|
+
enum_desc,
|
|
220
|
+
Descriptor(name=enum_name, kind=DescriptorKind.TERM),
|
|
221
|
+
]
|
|
222
|
+
),
|
|
223
|
+
kind="field",
|
|
224
|
+
language=LANG,
|
|
225
|
+
file=self.relative_path,
|
|
226
|
+
range=_node_range(child),
|
|
227
|
+
extra={"c_kind": "enumerator"},
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
def _emit_field(
|
|
232
|
+
self,
|
|
233
|
+
node: tree_sitter.Node,
|
|
234
|
+
*,
|
|
235
|
+
parent: Descriptor,
|
|
236
|
+
) -> None:
|
|
237
|
+
name = _first_child_text(node, "field_identifier")
|
|
238
|
+
if name is None:
|
|
239
|
+
return
|
|
240
|
+
self.symbols.append(
|
|
241
|
+
Symbol(
|
|
242
|
+
id=self._make_id([parent, Descriptor(name=name, kind=DescriptorKind.TERM)]),
|
|
243
|
+
kind="field",
|
|
244
|
+
language=LANG,
|
|
245
|
+
file=self.relative_path,
|
|
246
|
+
range=_node_range(node),
|
|
247
|
+
)
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
def _emit_declaration(self, node: tree_sitter.Node) -> None:
|
|
251
|
+
# Only emit when there's an init_declarator (i.e. a definition with
|
|
252
|
+
# a value), not a bare extern prototype.
|
|
253
|
+
for child in node.children:
|
|
254
|
+
if child.type == "init_declarator":
|
|
255
|
+
name = _first_child_text(child, "identifier")
|
|
256
|
+
if name is None:
|
|
257
|
+
continue
|
|
258
|
+
self.symbols.append(
|
|
259
|
+
Symbol(
|
|
260
|
+
id=self._make_id([Descriptor(name=name, kind=DescriptorKind.TERM)]),
|
|
261
|
+
kind="variable",
|
|
262
|
+
language=LANG,
|
|
263
|
+
file=self.relative_path,
|
|
264
|
+
range=_node_range(node),
|
|
265
|
+
)
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def _make_id(self, descriptors: list[Descriptor]) -> SymbolID:
|
|
269
|
+
full = list(_path_namespaces(self.relative_path))
|
|
270
|
+
full.extend(descriptors)
|
|
271
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(full))
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
# ---------------------------------------------------------------------------
|
|
275
|
+
# Pure helpers
|
|
276
|
+
# ---------------------------------------------------------------------------
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
|
|
280
|
+
return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _node_range(node: tree_sitter.Node) -> Range:
|
|
284
|
+
sr, sc = node.start_point
|
|
285
|
+
er, ec = node.end_point
|
|
286
|
+
return Range(
|
|
287
|
+
start_line=sr + 1,
|
|
288
|
+
start_col=sc,
|
|
289
|
+
end_line=max(er + 1, sr + 1),
|
|
290
|
+
end_col=ec,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _node_text(node: tree_sitter.Node) -> str:
|
|
295
|
+
return node.text.decode("utf-8") if node.text is not None else ""
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _first_child(node: tree_sitter.Node, kind: str) -> tree_sitter.Node | None:
|
|
299
|
+
for child in node.children:
|
|
300
|
+
if child.type == kind:
|
|
301
|
+
return child
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _first_child_text(node: tree_sitter.Node, kind: str) -> str | None:
|
|
306
|
+
found = _first_child(node, kind)
|
|
307
|
+
return _node_text(found) if found is not None else None
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _function_declarator_name(node: tree_sitter.Node) -> str | None:
|
|
311
|
+
"""``int add(int, int) { ... }`` → ``add``. The function_declarator may
|
|
312
|
+
be wrapped in pointers (``int *foo(...)``), so unwrap recursively.
|
|
313
|
+
"""
|
|
314
|
+
declarator = _first_child(node, "function_declarator")
|
|
315
|
+
if declarator is None:
|
|
316
|
+
# Walk through pointer_declarator wrappers.
|
|
317
|
+
for child in node.children:
|
|
318
|
+
if child.type in {"pointer_declarator", "parenthesized_declarator"}:
|
|
319
|
+
name = _function_declarator_name(child)
|
|
320
|
+
if name is not None:
|
|
321
|
+
return name
|
|
322
|
+
return None
|
|
323
|
+
return _first_child_text(declarator, "identifier")
|
|
File without changes
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Unit tests for the C indexer plugin."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import textwrap
|
|
6
|
+
from pathlib import Path, PurePosixPath
|
|
7
|
+
|
|
8
|
+
from codemap_c import CIndexer
|
|
9
|
+
from codemap_c.indexer import SCHEME
|
|
10
|
+
|
|
11
|
+
from codemap.core.models import IndexResult
|
|
12
|
+
from codemap.indexers.base import IndexContext
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _index(source: str, *, path: str = "src/lib.c") -> IndexResult:
|
|
16
|
+
code = textwrap.dedent(source).lstrip("\n")
|
|
17
|
+
return CIndexer().index_file(
|
|
18
|
+
Path(path),
|
|
19
|
+
code.encode("utf-8"),
|
|
20
|
+
IndexContext(
|
|
21
|
+
project_root=Path("/tmp/proj"),
|
|
22
|
+
relative_path=PurePosixPath(path),
|
|
23
|
+
language="c",
|
|
24
|
+
),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_indexer_metadata() -> None:
|
|
29
|
+
ix = CIndexer()
|
|
30
|
+
assert ix.name == "c"
|
|
31
|
+
assert ix.languages == ["c"]
|
|
32
|
+
assert ix.supports(Path("a.c"))
|
|
33
|
+
assert ix.supports(Path("a.h"))
|
|
34
|
+
assert not ix.supports(Path("a.py"))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_scheme_is_consistent() -> None:
|
|
38
|
+
r = _index("int x = 1;\nint f(void) { return 0; }")
|
|
39
|
+
for s in r.symbols:
|
|
40
|
+
assert str(s.id).startswith(f"{SCHEME} ")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_function_definition() -> None:
|
|
44
|
+
r = _index("int add(int a, int b) { return a + b; }")
|
|
45
|
+
funcs = [s for s in r.symbols if s.kind == "function"]
|
|
46
|
+
assert len(funcs) == 1
|
|
47
|
+
assert funcs[0].id.descriptors[-1].name == "add"
|
|
48
|
+
assert funcs[0].signature == "add()"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_struct_with_fields() -> None:
|
|
52
|
+
r = _index(
|
|
53
|
+
"""
|
|
54
|
+
struct Point {
|
|
55
|
+
int x;
|
|
56
|
+
int y;
|
|
57
|
+
};
|
|
58
|
+
"""
|
|
59
|
+
)
|
|
60
|
+
classes = [s for s in r.symbols if s.kind == "class"]
|
|
61
|
+
fields = [s for s in r.symbols if s.kind == "field"]
|
|
62
|
+
assert len(classes) == 1
|
|
63
|
+
assert classes[0].extra["c_kind"] == "struct"
|
|
64
|
+
assert {f.id.descriptors[-1].name for f in fields} == {"x", "y"}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_union_with_fields() -> None:
|
|
68
|
+
r = _index(
|
|
69
|
+
"""
|
|
70
|
+
union U {
|
|
71
|
+
int i;
|
|
72
|
+
float f;
|
|
73
|
+
};
|
|
74
|
+
"""
|
|
75
|
+
)
|
|
76
|
+
classes = [s for s in r.symbols if s.kind == "class"]
|
|
77
|
+
assert len(classes) == 1
|
|
78
|
+
assert classes[0].extra["c_kind"] == "union"
|
|
79
|
+
assert len([s for s in r.symbols if s.kind == "field"]) == 2
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_enum_with_enumerators() -> None:
|
|
83
|
+
r = _index("enum Color { RED, GREEN, BLUE };")
|
|
84
|
+
classes = [s for s in r.symbols if s.kind == "class"]
|
|
85
|
+
fields = [s for s in r.symbols if s.kind == "field"]
|
|
86
|
+
assert classes[0].extra["c_kind"] == "enum"
|
|
87
|
+
assert {f.id.descriptors[-1].name for f in fields} == {"RED", "GREEN", "BLUE"}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_typedef_int_alias() -> None:
|
|
91
|
+
r = _index("typedef int MyInt;")
|
|
92
|
+
typedefs = [s for s in r.symbols if s.kind == "class"]
|
|
93
|
+
assert len(typedefs) == 1
|
|
94
|
+
assert typedefs[0].extra["c_kind"] == "typedef"
|
|
95
|
+
assert typedefs[0].id.descriptors[-1].name == "MyInt"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_object_macro() -> None:
|
|
99
|
+
r = _index("#define MAX 10")
|
|
100
|
+
macros = [s for s in r.symbols if s.kind == "variable"]
|
|
101
|
+
assert len(macros) == 1
|
|
102
|
+
assert macros[0].extra["c_kind"] == "macro"
|
|
103
|
+
assert macros[0].id.descriptors[-1].name == "MAX"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_function_like_macro() -> None:
|
|
107
|
+
r = _index("#define SQUARE(x) ((x)*(x))")
|
|
108
|
+
macros = [s for s in r.symbols if s.kind == "variable"]
|
|
109
|
+
assert macros[0].extra["c_kind"] == "macro_fn"
|
|
110
|
+
assert macros[0].id.descriptors[-1].name == "SQUARE"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_top_level_variable_with_initializer() -> None:
|
|
114
|
+
r = _index("static int counter = 0;")
|
|
115
|
+
vars_ = [s for s in r.symbols if s.kind == "variable"]
|
|
116
|
+
assert len(vars_) == 1
|
|
117
|
+
assert vars_[0].id.descriptors[-1].name == "counter"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_function_bodies_are_opaque() -> None:
|
|
121
|
+
"""Locals declared inside a function body must not surface."""
|
|
122
|
+
r = _index(
|
|
123
|
+
"""
|
|
124
|
+
int compute(void) {
|
|
125
|
+
int local_a = 1;
|
|
126
|
+
int local_b = 2;
|
|
127
|
+
return local_a + local_b;
|
|
128
|
+
}
|
|
129
|
+
"""
|
|
130
|
+
)
|
|
131
|
+
assert {s.id.descriptors[-1].name for s in r.symbols} == {"compute"}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def test_symbol_id_uses_path_namespaces() -> None:
|
|
135
|
+
r = _index("int x = 1;", path="src/util/math.c")
|
|
136
|
+
var = next(s for s in r.symbols if s.kind == "variable")
|
|
137
|
+
assert str(var.id) == "scip-c . . . src/util/math.c/x."
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_empty_file_yields_no_symbols() -> None:
|
|
141
|
+
r = _index("")
|
|
142
|
+
assert r.symbols == []
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def test_invalid_utf8_yields_diagnostic() -> None:
|
|
146
|
+
ix = CIndexer()
|
|
147
|
+
r = ix.index_file(
|
|
148
|
+
Path("bad.c"),
|
|
149
|
+
b"\xff\xfe int",
|
|
150
|
+
IndexContext(
|
|
151
|
+
project_root=Path("/tmp/proj"),
|
|
152
|
+
relative_path=PurePosixPath("bad.c"),
|
|
153
|
+
language="c",
|
|
154
|
+
),
|
|
155
|
+
)
|
|
156
|
+
assert r.symbols == []
|
|
157
|
+
assert r.diagnostics[0].code == "C002"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def test_realistic_translation_unit() -> None:
|
|
161
|
+
r = _index(
|
|
162
|
+
"""
|
|
163
|
+
#include <stdio.h>
|
|
164
|
+
#define MAX 100
|
|
165
|
+
typedef struct { int x; } Box;
|
|
166
|
+
struct Inner { int v; };
|
|
167
|
+
enum Mode { ON, OFF };
|
|
168
|
+
static int counter = 0;
|
|
169
|
+
int process(int n) { return n + counter; }
|
|
170
|
+
"""
|
|
171
|
+
)
|
|
172
|
+
by_kind: dict[str, set[str]] = {}
|
|
173
|
+
for s in r.symbols:
|
|
174
|
+
by_kind.setdefault(s.kind, set()).add(s.id.descriptors[-1].name)
|
|
175
|
+
assert "process" in by_kind["function"]
|
|
176
|
+
assert {"MAX", "counter"} <= by_kind["variable"]
|
|
177
|
+
assert {"Box", "Inner", "Mode"} <= by_kind["class"]
|
|
178
|
+
# Fields inside anonymous struct under a typedef are not addressable
|
|
179
|
+
# by C name scoping; only named-struct fields surface.
|
|
180
|
+
assert {"v", "ON", "OFF"} <= by_kind["field"]
|