codemap-c 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codemap_c/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """C indexer plugin for CodeMap."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from codemap_c.indexer import CIndexer
6
+
7
+ __all__ = ["CIndexer"]
8
+ __version__ = "0.1.0"
codemap_c/indexer.py ADDED
@@ -0,0 +1,323 @@
1
+ """C indexer built on tree-sitter-c.
2
+
3
+ Top-level declarations only (function bodies are not walked for inner
4
+ state). Captured node kinds:
5
+
6
+ * ``function_definition`` → ``function``
7
+ * ``struct_specifier`` / ``union_specifier`` → ``class`` (with body)
8
+ * ``enum_specifier`` → ``class``
9
+ * ``type_definition`` (``typedef``) → ``class``
10
+ * ``preproc_def`` / ``preproc_function_def`` → ``variable`` (macro)
11
+ * top-level ``declaration`` with initializer → ``variable``
12
+ * ``field_declaration`` inside struct/union body → ``field``
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from pathlib import Path, PurePosixPath
18
+ from typing import ClassVar
19
+
20
+ import tree_sitter
21
+ import tree_sitter_c
22
+
23
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
24
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
25
+ from codemap.indexers.base import IndexContext
26
+
27
+ SCHEME = "scip-c"
28
+ LANG = "c"
29
+
30
+ _C_LANG = tree_sitter.Language(tree_sitter_c.language())
31
+
32
+
33
+ class CIndexer:
34
+ name: ClassVar[str] = "c"
35
+ version: ClassVar[str] = "0.1.0"
36
+ file_patterns: ClassVar[list[str]] = ["*.c", "*.h"]
37
+ languages: ClassVar[list[str]] = [LANG]
38
+
39
+ def supports(self, path: Path) -> bool:
40
+ return path.suffix in {".c", ".h"}
41
+
42
+ def index_file(
43
+ self,
44
+ path: Path,
45
+ source: bytes,
46
+ ctx: IndexContext,
47
+ ) -> IndexResult:
48
+ try:
49
+ source.decode("utf-8")
50
+ except UnicodeDecodeError as exc:
51
+ return IndexResult(
52
+ diagnostics=[
53
+ Diagnostic(
54
+ severity="error",
55
+ file=ctx.relative_path,
56
+ code="C002",
57
+ message=f"not valid UTF-8: {exc}",
58
+ producer=self.name,
59
+ )
60
+ ]
61
+ )
62
+ parser = tree_sitter.Parser(_C_LANG)
63
+ tree = parser.parse(source)
64
+ visitor = _Visitor(ctx.relative_path)
65
+ visitor.visit_root(tree.root_node)
66
+ diagnostics = list(visitor.diagnostics)
67
+ if tree.root_node.has_error:
68
+ diagnostics.append(
69
+ Diagnostic(
70
+ severity="warning",
71
+ file=ctx.relative_path,
72
+ range=Range(start_line=1, end_line=1),
73
+ code="C001",
74
+ message="tree-sitter reported parse errors; symbols may be incomplete",
75
+ producer=self.name,
76
+ )
77
+ )
78
+ return IndexResult(
79
+ symbols=visitor.symbols,
80
+ edges=visitor.edges,
81
+ diagnostics=diagnostics,
82
+ )
83
+
84
+
85
+ class _Visitor:
86
+ def __init__(self, relative_path: PurePosixPath) -> None:
87
+ self.relative_path = relative_path
88
+ self.symbols: list[Symbol] = []
89
+ self.edges: list[Edge] = []
90
+ self.diagnostics: list[Diagnostic] = []
91
+
92
+ def visit_root(self, root: tree_sitter.Node) -> None:
93
+ for child in root.children:
94
+ self._visit_top_level(child)
95
+
96
+ def _visit_top_level(self, node: tree_sitter.Node) -> None:
97
+ kind = node.type
98
+ if kind == "function_definition":
99
+ self._emit_function(node)
100
+ elif kind == "preproc_def":
101
+ self._emit_macro(node, function_like=False)
102
+ elif kind == "preproc_function_def":
103
+ self._emit_macro(node, function_like=True)
104
+ elif kind == "type_definition":
105
+ self._emit_typedef(node)
106
+ elif kind == "struct_specifier":
107
+ self._emit_record(node, c_kind="struct")
108
+ elif kind == "union_specifier":
109
+ self._emit_record(node, c_kind="union")
110
+ elif kind == "enum_specifier":
111
+ self._emit_enum(node)
112
+ elif kind == "declaration":
113
+ self._emit_declaration(node)
114
+
115
+ def _emit_function(self, node: tree_sitter.Node) -> None:
116
+ name = _function_declarator_name(node)
117
+ if name is None:
118
+ return
119
+ sid = self._make_id([Descriptor(name=name, kind=DescriptorKind.METHOD)])
120
+ self.symbols.append(
121
+ Symbol(
122
+ id=sid,
123
+ kind="function",
124
+ language=LANG,
125
+ file=self.relative_path,
126
+ range=_node_range(node),
127
+ signature=f"{name}()",
128
+ )
129
+ )
130
+
131
+ def _emit_macro(self, node: tree_sitter.Node, *, function_like: bool) -> None:
132
+ name = _first_child_text(node, "identifier")
133
+ if name is None:
134
+ return
135
+ sid = self._make_id([Descriptor(name=name, kind=DescriptorKind.TERM)])
136
+ extra = {"c_kind": "macro_fn" if function_like else "macro"}
137
+ self.symbols.append(
138
+ Symbol(
139
+ id=sid,
140
+ kind="variable",
141
+ language=LANG,
142
+ file=self.relative_path,
143
+ range=_node_range(node),
144
+ extra=extra,
145
+ )
146
+ )
147
+
148
+ def _emit_typedef(self, node: tree_sitter.Node) -> None:
149
+ # The aliased name is the last type_identifier child of the
150
+ # type_definition.
151
+ name: str | None = None
152
+ for child in node.children:
153
+ if child.type == "type_identifier":
154
+ name = _node_text(child)
155
+ if name is None:
156
+ return
157
+ sid = self._make_id([Descriptor(name=name, kind=DescriptorKind.TYPE)])
158
+ self.symbols.append(
159
+ Symbol(
160
+ id=sid,
161
+ kind="class",
162
+ language=LANG,
163
+ file=self.relative_path,
164
+ range=_node_range(node),
165
+ extra={"c_kind": "typedef"},
166
+ )
167
+ )
168
+
169
+ def _emit_record(self, node: tree_sitter.Node, *, c_kind: str) -> None:
170
+ # Skip anonymous structs / forward declarations without a body.
171
+ name = _first_child_text(node, "type_identifier")
172
+ body = _first_child(node, "field_declaration_list")
173
+ if name is None or body is None:
174
+ return
175
+ record_desc = Descriptor(name=name, kind=DescriptorKind.TYPE)
176
+ sid = self._make_id([record_desc])
177
+ self.symbols.append(
178
+ Symbol(
179
+ id=sid,
180
+ kind="class",
181
+ language=LANG,
182
+ file=self.relative_path,
183
+ range=_node_range(node),
184
+ extra={"c_kind": c_kind},
185
+ )
186
+ )
187
+ for child in body.children:
188
+ if child.type == "field_declaration":
189
+ self._emit_field(child, parent=record_desc)
190
+
191
+ def _emit_enum(self, node: tree_sitter.Node) -> None:
192
+ name = _first_child_text(node, "type_identifier")
193
+ if name is None:
194
+ return
195
+ enum_desc = Descriptor(name=name, kind=DescriptorKind.TYPE)
196
+ sid = self._make_id([enum_desc])
197
+ self.symbols.append(
198
+ Symbol(
199
+ id=sid,
200
+ kind="class",
201
+ language=LANG,
202
+ file=self.relative_path,
203
+ range=_node_range(node),
204
+ extra={"c_kind": "enum"},
205
+ )
206
+ )
207
+ body = _first_child(node, "enumerator_list")
208
+ if body is None:
209
+ return
210
+ for child in body.children:
211
+ if child.type == "enumerator":
212
+ enum_name = _first_child_text(child, "identifier")
213
+ if enum_name is None:
214
+ continue
215
+ self.symbols.append(
216
+ Symbol(
217
+ id=self._make_id(
218
+ [
219
+ enum_desc,
220
+ Descriptor(name=enum_name, kind=DescriptorKind.TERM),
221
+ ]
222
+ ),
223
+ kind="field",
224
+ language=LANG,
225
+ file=self.relative_path,
226
+ range=_node_range(child),
227
+ extra={"c_kind": "enumerator"},
228
+ )
229
+ )
230
+
231
+ def _emit_field(
232
+ self,
233
+ node: tree_sitter.Node,
234
+ *,
235
+ parent: Descriptor,
236
+ ) -> None:
237
+ name = _first_child_text(node, "field_identifier")
238
+ if name is None:
239
+ return
240
+ self.symbols.append(
241
+ Symbol(
242
+ id=self._make_id([parent, Descriptor(name=name, kind=DescriptorKind.TERM)]),
243
+ kind="field",
244
+ language=LANG,
245
+ file=self.relative_path,
246
+ range=_node_range(node),
247
+ )
248
+ )
249
+
250
+ def _emit_declaration(self, node: tree_sitter.Node) -> None:
251
+ # Only emit when there's an init_declarator (i.e. a definition with
252
+ # a value), not a bare extern prototype.
253
+ for child in node.children:
254
+ if child.type == "init_declarator":
255
+ name = _first_child_text(child, "identifier")
256
+ if name is None:
257
+ continue
258
+ self.symbols.append(
259
+ Symbol(
260
+ id=self._make_id([Descriptor(name=name, kind=DescriptorKind.TERM)]),
261
+ kind="variable",
262
+ language=LANG,
263
+ file=self.relative_path,
264
+ range=_node_range(node),
265
+ )
266
+ )
267
+
268
+ def _make_id(self, descriptors: list[Descriptor]) -> SymbolID:
269
+ full = list(_path_namespaces(self.relative_path))
270
+ full.extend(descriptors)
271
+ return SymbolID(scheme=SCHEME, descriptors=tuple(full))
272
+
273
+
274
+ # ---------------------------------------------------------------------------
275
+ # Pure helpers
276
+ # ---------------------------------------------------------------------------
277
+
278
+
279
+ def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
280
+ return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
281
+
282
+
283
+ def _node_range(node: tree_sitter.Node) -> Range:
284
+ sr, sc = node.start_point
285
+ er, ec = node.end_point
286
+ return Range(
287
+ start_line=sr + 1,
288
+ start_col=sc,
289
+ end_line=max(er + 1, sr + 1),
290
+ end_col=ec,
291
+ )
292
+
293
+
294
+ def _node_text(node: tree_sitter.Node) -> str:
295
+ return node.text.decode("utf-8") if node.text is not None else ""
296
+
297
+
298
+ def _first_child(node: tree_sitter.Node, kind: str) -> tree_sitter.Node | None:
299
+ for child in node.children:
300
+ if child.type == kind:
301
+ return child
302
+ return None
303
+
304
+
305
+ def _first_child_text(node: tree_sitter.Node, kind: str) -> str | None:
306
+ found = _first_child(node, kind)
307
+ return _node_text(found) if found is not None else None
308
+
309
+
310
+ def _function_declarator_name(node: tree_sitter.Node) -> str | None:
311
+ """``int add(int, int) { ... }`` → ``add``. The function_declarator may
312
+ be wrapped in pointers (``int *foo(...)``), so unwrap recursively.
313
+ """
314
+ declarator = _first_child(node, "function_declarator")
315
+ if declarator is None:
316
+ # Walk through pointer_declarator wrappers.
317
+ for child in node.children:
318
+ if child.type in {"pointer_declarator", "parenthesized_declarator"}:
319
+ name = _function_declarator_name(child)
320
+ if name is not None:
321
+ return name
322
+ return None
323
+ return _first_child_text(declarator, "identifier")
@@ -0,0 +1,63 @@
1
+ Metadata-Version: 2.4
2
+ Name: codemap-c
3
+ Version: 0.1.0
4
+ Summary: C language indexer plugin for CodeMap
5
+ Project-URL: Homepage, https://github.com/qxbyte/codemap
6
+ Author: CodeMap Contributors
7
+ License: MIT
8
+ Keywords: c,codemap,indexer,tree-sitter
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: C
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Software Development
13
+ Requires-Python: >=3.11
14
+ Requires-Dist: codemap-core<0.2,>=0.1.0
15
+ Requires-Dist: tree-sitter-c>=0.24
16
+ Requires-Dist: tree-sitter>=0.25
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=8.0; extra == 'dev'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # codemap-c
22
+
23
+ > A C language indexer for [CodeMap](https://github.com/qxbyte/codemap),
24
+ > shipped as an independent PyPI package.
25
+
26
+ ## What it captures
27
+
28
+ Backed by `tree-sitter-c`:
29
+
30
+ | AST node | Symbol kind |
31
+ |---|---|
32
+ | `function_definition` | `function` |
33
+ | `struct_specifier` (named, with body) | `class` (`extra.c_kind=struct`) |
34
+ | `union_specifier` (named, with body) | `class` (`extra.c_kind=union`) |
35
+ | `enum_specifier` (named) | `class` (`extra.c_kind=enum`) |
36
+ | `type_definition` (`typedef`) | `class` (`extra.c_kind=typedef`) |
37
+ | `preproc_def` (`#define X ...`) | `variable` (`extra.c_kind=macro`) |
38
+ | Top-level `declaration` with initializer | `variable` |
39
+ | `field_declaration` inside a struct/union body | `field` (attached to the parent) |
40
+
41
+ Function bodies are not walked for inner declarations — locals are not
42
+ script-level interface.
43
+
44
+ ## Install
45
+
46
+ ```bash
47
+ pip install codemap-c
48
+ ```
49
+
50
+ ## File patterns
51
+
52
+ * `*.c`, `*.h`
53
+
54
+ ## Limits
55
+
56
+ * Preprocessor conditionals (`#ifdef`/`#if`) parse but no branch selection
57
+ is performed — both arms contribute symbols.
58
+ * Function declarations (prototypes) are not emitted; only definitions.
59
+ * Forward struct declarations without a body are skipped.
60
+
61
+ ## License
62
+
63
+ MIT.
@@ -0,0 +1,6 @@
1
+ codemap_c/__init__.py,sha256=hgLYmdY2O_TPNp3vi86rtSY-P5kBdtJTBKpzuYYgX5k,158
2
+ codemap_c/indexer.py,sha256=ZC998cfrNqu14gUXAL3Z0HSez2m0wi-QGjOCcDg7UEY,11289
3
+ codemap_c-0.1.0.dist-info/METADATA,sha256=zwZvPBqFgyNo_0GL_H0ZZ25_VPPUg9qPaj2QMrQgGZ0,1860
4
+ codemap_c-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
5
+ codemap_c-0.1.0.dist-info/entry_points.txt,sha256=jZge3YIqvMFCL4eKJdqvNiZHaFcDAt5q3vLuJwXMW1g,42
6
+ codemap_c-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [codemap.indexers]
2
+ c = codemap_c:CIndexer