codemap-sql 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ """SQL indexer plugin for CodeMap."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from codemap_sql.indexer import SqlIndexer
6
+
7
+ __all__ = ["SqlIndexer"]
8
+ __version__ = "0.1.0"
codemap_sql/indexer.py ADDED
@@ -0,0 +1,236 @@
1
+ """SQL indexer built on tree-sitter-sql.
2
+
3
+ Only DDL statements (CREATE TABLE / VIEW / INDEX) produce symbols. The
4
+ table column list is walked to emit field symbols attached to the
5
+ parent table, so callers can answer "which table has column X" and the
6
+ http_route / asset bridges can cross-reference application code against
7
+ schema state.
8
+
9
+ Query statements (SELECT / INSERT / UPDATE / DELETE) are intentionally
10
+ ignored — they are not durable schema artefacts.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from pathlib import Path, PurePosixPath
16
+ from typing import ClassVar
17
+
18
+ import tree_sitter
19
+ import tree_sitter_sql
20
+
21
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
22
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
23
+ from codemap.indexers.base import IndexContext
24
+
25
+ SCHEME = "scip-sql"
26
+ LANG = "sql"
27
+
28
+ _SQL_LANG = tree_sitter.Language(tree_sitter_sql.language())
29
+
30
+
31
+ class SqlIndexer:
32
+ name: ClassVar[str] = "sql"
33
+ version: ClassVar[str] = "0.1.0"
34
+ file_patterns: ClassVar[list[str]] = ["*.sql", "*.ddl"]
35
+ languages: ClassVar[list[str]] = [LANG]
36
+
37
+ def supports(self, path: Path) -> bool:
38
+ return path.suffix in {".sql", ".ddl"}
39
+
40
+ def index_file(
41
+ self,
42
+ path: Path,
43
+ source: bytes,
44
+ ctx: IndexContext,
45
+ ) -> IndexResult:
46
+ try:
47
+ source.decode("utf-8")
48
+ except UnicodeDecodeError as exc:
49
+ return IndexResult(
50
+ diagnostics=[
51
+ Diagnostic(
52
+ severity="error",
53
+ file=ctx.relative_path,
54
+ code="SQL002",
55
+ message=f"not valid UTF-8: {exc}",
56
+ producer=self.name,
57
+ )
58
+ ]
59
+ )
60
+ parser = tree_sitter.Parser(_SQL_LANG)
61
+ tree = parser.parse(source)
62
+ visitor = _Visitor(ctx.relative_path)
63
+ visitor.visit(tree.root_node)
64
+ diagnostics = list(visitor.diagnostics)
65
+ if tree.root_node.has_error:
66
+ diagnostics.append(
67
+ Diagnostic(
68
+ severity="warning",
69
+ file=ctx.relative_path,
70
+ range=Range(start_line=1, end_line=1),
71
+ code="SQL001",
72
+ message="tree-sitter reported parse errors; symbols may be incomplete",
73
+ producer=self.name,
74
+ )
75
+ )
76
+ return IndexResult(
77
+ symbols=visitor.symbols,
78
+ edges=visitor.edges,
79
+ diagnostics=diagnostics,
80
+ )
81
+
82
+
83
+ class _Visitor:
84
+ def __init__(self, relative_path: PurePosixPath) -> None:
85
+ self.relative_path = relative_path
86
+ self.symbols: list[Symbol] = []
87
+ self.edges: list[Edge] = []
88
+ self.diagnostics: list[Diagnostic] = []
89
+
90
+ def visit(self, node: tree_sitter.Node) -> None:
91
+ kind = node.type
92
+ if kind == "create_table":
93
+ self._visit_create_table(node)
94
+ return
95
+ if kind == "create_view":
96
+ self._visit_create_view(node)
97
+ return
98
+ if kind == "create_index":
99
+ self._visit_create_index(node)
100
+ return
101
+ for child in node.children:
102
+ self.visit(child)
103
+
104
+ # -------------------------------------------------------- types
105
+
106
+ def _visit_create_table(self, node: tree_sitter.Node) -> None:
107
+ name = _object_name(node)
108
+ if name is None:
109
+ return
110
+ sid = self._make_id([], name, kind=DescriptorKind.TYPE)
111
+ self.symbols.append(
112
+ Symbol(
113
+ id=sid,
114
+ kind="class",
115
+ language=LANG,
116
+ file=self.relative_path,
117
+ range=_node_range(node),
118
+ extra={"sql_kind": "table"},
119
+ )
120
+ )
121
+ # Walk column_definitions for column names.
122
+ for child in node.children:
123
+ if child.type != "column_definitions":
124
+ continue
125
+ for sub in child.children:
126
+ if sub.type == "column_definition":
127
+ self._emit_column(name, sub)
128
+
129
+ def _emit_column(self, table_name: str, node: tree_sitter.Node) -> None:
130
+ col_name = None
131
+ for child in node.children:
132
+ if child.type == "identifier":
133
+ col_name = _node_text(child)
134
+ break
135
+ if not col_name:
136
+ return
137
+ sid = self._make_id([table_name], col_name, kind=DescriptorKind.TERM)
138
+ self.symbols.append(
139
+ Symbol(
140
+ id=sid,
141
+ kind="field",
142
+ language=LANG,
143
+ file=self.relative_path,
144
+ range=_node_range(node),
145
+ )
146
+ )
147
+
148
+ def _visit_create_view(self, node: tree_sitter.Node) -> None:
149
+ name = _object_name(node)
150
+ if name is None:
151
+ return
152
+ sid = self._make_id([], name, kind=DescriptorKind.TYPE)
153
+ self.symbols.append(
154
+ Symbol(
155
+ id=sid,
156
+ kind="class",
157
+ language=LANG,
158
+ file=self.relative_path,
159
+ range=_node_range(node),
160
+ extra={"sql_kind": "view"},
161
+ )
162
+ )
163
+
164
+ def _visit_create_index(self, node: tree_sitter.Node) -> None:
165
+ # create_index > identifier (index name) is the first identifier.
166
+ name = None
167
+ for child in node.children:
168
+ if child.type == "identifier":
169
+ name = _node_text(child)
170
+ break
171
+ if name is None:
172
+ return
173
+ sid = self._make_id([], name, kind=DescriptorKind.TERM)
174
+ self.symbols.append(
175
+ Symbol(
176
+ id=sid,
177
+ kind="variable",
178
+ language=LANG,
179
+ file=self.relative_path,
180
+ range=_node_range(node),
181
+ extra={"sql_kind": "index"},
182
+ )
183
+ )
184
+
185
+ # ---------------------------------------------------- helpers
186
+
187
+ def _make_id(
188
+ self,
189
+ type_chain: list[str],
190
+ name: str,
191
+ *,
192
+ kind: DescriptorKind,
193
+ ) -> SymbolID:
194
+ descriptors = list(_path_namespaces(self.relative_path))
195
+ descriptors.extend(Descriptor(name=t, kind=DescriptorKind.TYPE) for t in type_chain)
196
+ descriptors.append(Descriptor(name=name, kind=kind))
197
+ return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
198
+
199
+
200
+ # ---------------------------------------------------------------------------
201
+ # Pure helpers
202
+ # ---------------------------------------------------------------------------
203
+
204
+
205
+ def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
206
+ return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
207
+
208
+
209
+ def _node_range(node: tree_sitter.Node) -> Range:
210
+ sr, sc = node.start_point
211
+ er, ec = node.end_point
212
+ return Range(
213
+ start_line=sr + 1,
214
+ start_col=sc,
215
+ end_line=max(er + 1, sr + 1),
216
+ end_col=ec,
217
+ )
218
+
219
+
220
+ def _node_text(node: tree_sitter.Node) -> str:
221
+ return node.text.decode("utf-8") if node.text is not None else ""
222
+
223
+
224
+ def _object_name(node: tree_sitter.Node) -> str | None:
225
+ """Pull a CREATE TABLE / VIEW target name from `object_reference`."""
226
+ for child in node.children:
227
+ if child.type != "object_reference":
228
+ continue
229
+ # Take the last identifier so schema-qualified `db.users` → "users".
230
+ last = None
231
+ for grand in child.children:
232
+ if grand.type == "identifier":
233
+ last = grand
234
+ if last is not None:
235
+ return _node_text(last)
236
+ return None
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.4
2
+ Name: codemap-sql
3
+ Version: 0.1.0a1
4
+ Summary: SQL indexer plugin for CodeMap
5
+ Project-URL: Homepage, https://github.com/qxbyte/codemap
6
+ Author: CodeMap Contributors
7
+ License: MIT
8
+ Keywords: codemap,indexer,sql,tree-sitter
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: SQL
12
+ Classifier: Topic :: Database
13
+ Classifier: Topic :: Software Development
14
+ Requires-Python: >=3.11
15
+ Requires-Dist: codemap-core<0.2,>=0.1.0a1
16
+ Requires-Dist: tree-sitter-sql>=0.3
17
+ Requires-Dist: tree-sitter>=0.25
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=8.0; extra == 'dev'
20
+ Description-Content-Type: text/markdown
21
+
22
+ # codemap-sql
23
+
24
+ > A SQL indexer for [CodeMap](https://github.com/qxbyte/codemap), shipped
25
+ > as an independent PyPI package.
26
+
27
+ Indexes DDL (schema) statements so AI agents can answer "which table has
28
+ column X" and similar structural questions without scanning every
29
+ migration file. Query (`SELECT`/`INSERT`/...) statements are ignored —
30
+ they are not durable schema artefacts.
31
+
32
+ ## What it captures
33
+
34
+ Backed by `tree-sitter-sql`:
35
+
36
+ | AST node | Symbol kind |
37
+ |---|---|
38
+ | `create_table` | `class` (with `extra.sql_kind=table`) |
39
+ | `create_view` | `class` (with `extra.sql_kind=view`) |
40
+ | `create_index` | `variable` (with `extra.sql_kind=index`) |
41
+ | `column_definition` inside `create_table` | `field` (attached to the table) |
42
+
43
+ Asset-style scheme: this plugin uses ``scip-sql`` so a downstream
44
+ ``http_route`` / ``orm_mapping`` bridge can cross-reference these
45
+ symbols against application code that talks to them.
46
+
47
+ ## SymbolID encoding
48
+
49
+ ```
50
+ scip-sql . . . db/migrations/0001_init.sql/users#
51
+ scip-sql . . . db/migrations/0001_init.sql/users#email.
52
+ scip-sql . . . db/migrations/0001_init.sql/idx_users_email.
53
+ ```
54
+
55
+ ## Install
56
+
57
+ ```bash
58
+ pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-sql"
59
+ ```
60
+
61
+ ## Limits
62
+
63
+ * Only DDL is indexed; `SELECT` / `INSERT` / `UPDATE` / `DELETE` are
64
+ walked past silently.
65
+ * `CREATE PROCEDURE` / `CREATE FUNCTION` / `CREATE TRIGGER` are not
66
+ yet captured — the grammar reports them under generic statement nodes.
67
+ * Dialect-specific features (e.g. PostgreSQL extensions) may not be
68
+ parsed.
69
+ * `ALTER TABLE` statements that add columns are not tracked.
70
+
71
+ ## License
72
+
73
+ MIT.
@@ -0,0 +1,6 @@
1
+ codemap_sql/__init__.py,sha256=4-POuSiPhVHBDS9fcXyscJxoPNukyNgIcN2hXs-k2EM,166
2
+ codemap_sql/indexer.py,sha256=JCHXEFqNSbEi5eJEM5qWnIlm9qe0RoffBWOid1gSf38,7642
3
+ codemap_sql-0.1.0a1.dist-info/METADATA,sha256=kKvQortL08XDPLPpLTQSxd4SCA4aVagCnak9jtoA2SY,2305
4
+ codemap_sql-0.1.0a1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
5
+ codemap_sql-0.1.0a1.dist-info/entry_points.txt,sha256=K6H8ueXSVLxVx_BY3L4br7aAB_Fb0W11TwyOpe39IiQ,48
6
+ codemap_sql-0.1.0a1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [codemap.indexers]
2
+ sql = codemap_sql:SqlIndexer