codemap-sql 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codemap_sql/__init__.py
ADDED
codemap_sql/indexer.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""SQL indexer built on tree-sitter-sql.
|
|
2
|
+
|
|
3
|
+
Only DDL statements (CREATE TABLE / VIEW / INDEX) produce symbols. The
|
|
4
|
+
table column list is walked to emit field symbols attached to the
|
|
5
|
+
parent table, so callers can answer "which table has column X" and the
|
|
6
|
+
http_route / asset bridges can cross-reference application code against
|
|
7
|
+
schema state.
|
|
8
|
+
|
|
9
|
+
Query statements (SELECT / INSERT / UPDATE / DELETE) are intentionally
|
|
10
|
+
ignored — they are not durable schema artefacts.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from pathlib import Path, PurePosixPath
|
|
16
|
+
from typing import ClassVar
|
|
17
|
+
|
|
18
|
+
import tree_sitter
|
|
19
|
+
import tree_sitter_sql
|
|
20
|
+
|
|
21
|
+
from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
|
|
22
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
23
|
+
from codemap.indexers.base import IndexContext
|
|
24
|
+
|
|
25
|
+
SCHEME = "scip-sql"
|
|
26
|
+
LANG = "sql"
|
|
27
|
+
|
|
28
|
+
_SQL_LANG = tree_sitter.Language(tree_sitter_sql.language())
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SqlIndexer:
|
|
32
|
+
name: ClassVar[str] = "sql"
|
|
33
|
+
version: ClassVar[str] = "0.1.0"
|
|
34
|
+
file_patterns: ClassVar[list[str]] = ["*.sql", "*.ddl"]
|
|
35
|
+
languages: ClassVar[list[str]] = [LANG]
|
|
36
|
+
|
|
37
|
+
def supports(self, path: Path) -> bool:
|
|
38
|
+
return path.suffix in {".sql", ".ddl"}
|
|
39
|
+
|
|
40
|
+
def index_file(
|
|
41
|
+
self,
|
|
42
|
+
path: Path,
|
|
43
|
+
source: bytes,
|
|
44
|
+
ctx: IndexContext,
|
|
45
|
+
) -> IndexResult:
|
|
46
|
+
try:
|
|
47
|
+
source.decode("utf-8")
|
|
48
|
+
except UnicodeDecodeError as exc:
|
|
49
|
+
return IndexResult(
|
|
50
|
+
diagnostics=[
|
|
51
|
+
Diagnostic(
|
|
52
|
+
severity="error",
|
|
53
|
+
file=ctx.relative_path,
|
|
54
|
+
code="SQL002",
|
|
55
|
+
message=f"not valid UTF-8: {exc}",
|
|
56
|
+
producer=self.name,
|
|
57
|
+
)
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
parser = tree_sitter.Parser(_SQL_LANG)
|
|
61
|
+
tree = parser.parse(source)
|
|
62
|
+
visitor = _Visitor(ctx.relative_path)
|
|
63
|
+
visitor.visit(tree.root_node)
|
|
64
|
+
diagnostics = list(visitor.diagnostics)
|
|
65
|
+
if tree.root_node.has_error:
|
|
66
|
+
diagnostics.append(
|
|
67
|
+
Diagnostic(
|
|
68
|
+
severity="warning",
|
|
69
|
+
file=ctx.relative_path,
|
|
70
|
+
range=Range(start_line=1, end_line=1),
|
|
71
|
+
code="SQL001",
|
|
72
|
+
message="tree-sitter reported parse errors; symbols may be incomplete",
|
|
73
|
+
producer=self.name,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
return IndexResult(
|
|
77
|
+
symbols=visitor.symbols,
|
|
78
|
+
edges=visitor.edges,
|
|
79
|
+
diagnostics=diagnostics,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class _Visitor:
|
|
84
|
+
def __init__(self, relative_path: PurePosixPath) -> None:
|
|
85
|
+
self.relative_path = relative_path
|
|
86
|
+
self.symbols: list[Symbol] = []
|
|
87
|
+
self.edges: list[Edge] = []
|
|
88
|
+
self.diagnostics: list[Diagnostic] = []
|
|
89
|
+
|
|
90
|
+
def visit(self, node: tree_sitter.Node) -> None:
|
|
91
|
+
kind = node.type
|
|
92
|
+
if kind == "create_table":
|
|
93
|
+
self._visit_create_table(node)
|
|
94
|
+
return
|
|
95
|
+
if kind == "create_view":
|
|
96
|
+
self._visit_create_view(node)
|
|
97
|
+
return
|
|
98
|
+
if kind == "create_index":
|
|
99
|
+
self._visit_create_index(node)
|
|
100
|
+
return
|
|
101
|
+
for child in node.children:
|
|
102
|
+
self.visit(child)
|
|
103
|
+
|
|
104
|
+
# -------------------------------------------------------- types
|
|
105
|
+
|
|
106
|
+
def _visit_create_table(self, node: tree_sitter.Node) -> None:
|
|
107
|
+
name = _object_name(node)
|
|
108
|
+
if name is None:
|
|
109
|
+
return
|
|
110
|
+
sid = self._make_id([], name, kind=DescriptorKind.TYPE)
|
|
111
|
+
self.symbols.append(
|
|
112
|
+
Symbol(
|
|
113
|
+
id=sid,
|
|
114
|
+
kind="class",
|
|
115
|
+
language=LANG,
|
|
116
|
+
file=self.relative_path,
|
|
117
|
+
range=_node_range(node),
|
|
118
|
+
extra={"sql_kind": "table"},
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
# Walk column_definitions for column names.
|
|
122
|
+
for child in node.children:
|
|
123
|
+
if child.type != "column_definitions":
|
|
124
|
+
continue
|
|
125
|
+
for sub in child.children:
|
|
126
|
+
if sub.type == "column_definition":
|
|
127
|
+
self._emit_column(name, sub)
|
|
128
|
+
|
|
129
|
+
def _emit_column(self, table_name: str, node: tree_sitter.Node) -> None:
|
|
130
|
+
col_name = None
|
|
131
|
+
for child in node.children:
|
|
132
|
+
if child.type == "identifier":
|
|
133
|
+
col_name = _node_text(child)
|
|
134
|
+
break
|
|
135
|
+
if not col_name:
|
|
136
|
+
return
|
|
137
|
+
sid = self._make_id([table_name], col_name, kind=DescriptorKind.TERM)
|
|
138
|
+
self.symbols.append(
|
|
139
|
+
Symbol(
|
|
140
|
+
id=sid,
|
|
141
|
+
kind="field",
|
|
142
|
+
language=LANG,
|
|
143
|
+
file=self.relative_path,
|
|
144
|
+
range=_node_range(node),
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def _visit_create_view(self, node: tree_sitter.Node) -> None:
|
|
149
|
+
name = _object_name(node)
|
|
150
|
+
if name is None:
|
|
151
|
+
return
|
|
152
|
+
sid = self._make_id([], name, kind=DescriptorKind.TYPE)
|
|
153
|
+
self.symbols.append(
|
|
154
|
+
Symbol(
|
|
155
|
+
id=sid,
|
|
156
|
+
kind="class",
|
|
157
|
+
language=LANG,
|
|
158
|
+
file=self.relative_path,
|
|
159
|
+
range=_node_range(node),
|
|
160
|
+
extra={"sql_kind": "view"},
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def _visit_create_index(self, node: tree_sitter.Node) -> None:
|
|
165
|
+
# create_index > identifier (index name) is the first identifier.
|
|
166
|
+
name = None
|
|
167
|
+
for child in node.children:
|
|
168
|
+
if child.type == "identifier":
|
|
169
|
+
name = _node_text(child)
|
|
170
|
+
break
|
|
171
|
+
if name is None:
|
|
172
|
+
return
|
|
173
|
+
sid = self._make_id([], name, kind=DescriptorKind.TERM)
|
|
174
|
+
self.symbols.append(
|
|
175
|
+
Symbol(
|
|
176
|
+
id=sid,
|
|
177
|
+
kind="variable",
|
|
178
|
+
language=LANG,
|
|
179
|
+
file=self.relative_path,
|
|
180
|
+
range=_node_range(node),
|
|
181
|
+
extra={"sql_kind": "index"},
|
|
182
|
+
)
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# ---------------------------------------------------- helpers
|
|
186
|
+
|
|
187
|
+
def _make_id(
|
|
188
|
+
self,
|
|
189
|
+
type_chain: list[str],
|
|
190
|
+
name: str,
|
|
191
|
+
*,
|
|
192
|
+
kind: DescriptorKind,
|
|
193
|
+
) -> SymbolID:
|
|
194
|
+
descriptors = list(_path_namespaces(self.relative_path))
|
|
195
|
+
descriptors.extend(Descriptor(name=t, kind=DescriptorKind.TYPE) for t in type_chain)
|
|
196
|
+
descriptors.append(Descriptor(name=name, kind=kind))
|
|
197
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# ---------------------------------------------------------------------------
|
|
201
|
+
# Pure helpers
|
|
202
|
+
# ---------------------------------------------------------------------------
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
|
|
206
|
+
return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _node_range(node: tree_sitter.Node) -> Range:
|
|
210
|
+
sr, sc = node.start_point
|
|
211
|
+
er, ec = node.end_point
|
|
212
|
+
return Range(
|
|
213
|
+
start_line=sr + 1,
|
|
214
|
+
start_col=sc,
|
|
215
|
+
end_line=max(er + 1, sr + 1),
|
|
216
|
+
end_col=ec,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _node_text(node: tree_sitter.Node) -> str:
|
|
221
|
+
return node.text.decode("utf-8") if node.text is not None else ""
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _object_name(node: tree_sitter.Node) -> str | None:
|
|
225
|
+
"""Pull a CREATE TABLE / VIEW target name from `object_reference`."""
|
|
226
|
+
for child in node.children:
|
|
227
|
+
if child.type != "object_reference":
|
|
228
|
+
continue
|
|
229
|
+
# Take the last identifier so schema-qualified `db.users` → "users".
|
|
230
|
+
last = None
|
|
231
|
+
for grand in child.children:
|
|
232
|
+
if grand.type == "identifier":
|
|
233
|
+
last = grand
|
|
234
|
+
if last is not None:
|
|
235
|
+
return _node_text(last)
|
|
236
|
+
return None
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-sql
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: SQL indexer plugin for CodeMap
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: codemap,indexer,sql,tree-sitter
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: SQL
|
|
12
|
+
Classifier: Topic :: Database
|
|
13
|
+
Classifier: Topic :: Software Development
|
|
14
|
+
Requires-Python: >=3.11
|
|
15
|
+
Requires-Dist: codemap-core<0.2,>=0.1.0
|
|
16
|
+
Requires-Dist: tree-sitter-sql>=0.3
|
|
17
|
+
Requires-Dist: tree-sitter>=0.25
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# codemap-sql
|
|
23
|
+
|
|
24
|
+
> A SQL indexer for [CodeMap](https://github.com/qxbyte/codemap), shipped
|
|
25
|
+
> as an independent PyPI package.
|
|
26
|
+
|
|
27
|
+
Indexes DDL (schema) statements so AI agents can answer "which table has
|
|
28
|
+
column X" and similar structural questions without scanning every
|
|
29
|
+
migration file. Query (`SELECT`/`INSERT`/...) statements are ignored —
|
|
30
|
+
they are not durable schema artefacts.
|
|
31
|
+
|
|
32
|
+
## What it captures
|
|
33
|
+
|
|
34
|
+
Backed by `tree-sitter-sql`:
|
|
35
|
+
|
|
36
|
+
| AST node | Symbol kind |
|
|
37
|
+
|---|---|
|
|
38
|
+
| `create_table` | `class` (with `extra.sql_kind=table`) |
|
|
39
|
+
| `create_view` | `class` (with `extra.sql_kind=view`) |
|
|
40
|
+
| `create_index` | `variable` (with `extra.sql_kind=index`) |
|
|
41
|
+
| `column_definition` inside `create_table` | `field` (attached to the table) |
|
|
42
|
+
|
|
43
|
+
Asset-style scheme: this plugin uses ``scip-sql`` so a downstream
|
|
44
|
+
``http_route`` / ``orm_mapping`` bridge can cross-reference these
|
|
45
|
+
symbols against application code that talks to them.
|
|
46
|
+
|
|
47
|
+
## SymbolID encoding
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
scip-sql . . . db/migrations/0001_init.sql/users#
|
|
51
|
+
scip-sql . . . db/migrations/0001_init.sql/users#email.
|
|
52
|
+
scip-sql . . . db/migrations/0001_init.sql/idx_users_email.
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Install
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install codemap-sql
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Limits
|
|
62
|
+
|
|
63
|
+
* Only DDL is indexed; `SELECT` / `INSERT` / `UPDATE` / `DELETE` are
|
|
64
|
+
walked past silently.
|
|
65
|
+
* `CREATE PROCEDURE` / `CREATE FUNCTION` / `CREATE TRIGGER` are not
|
|
66
|
+
yet captured — the grammar reports them under generic statement nodes.
|
|
67
|
+
* Dialect-specific features (e.g. PostgreSQL extensions) may not be
|
|
68
|
+
parsed.
|
|
69
|
+
* `ALTER TABLE` statements that add columns are not tracked.
|
|
70
|
+
|
|
71
|
+
## License
|
|
72
|
+
|
|
73
|
+
MIT.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
codemap_sql/__init__.py,sha256=4-POuSiPhVHBDS9fcXyscJxoPNukyNgIcN2hXs-k2EM,166
|
|
2
|
+
codemap_sql/indexer.py,sha256=JCHXEFqNSbEi5eJEM5qWnIlm9qe0RoffBWOid1gSf38,7642
|
|
3
|
+
codemap_sql-0.1.0.dist-info/METADATA,sha256=UZk3KBU6GnpCZAJ99yd2vZX_LUeUhw-G5RwhdGWm-NQ,2236
|
|
4
|
+
codemap_sql-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
5
|
+
codemap_sql-0.1.0.dist-info/entry_points.txt,sha256=K6H8ueXSVLxVx_BY3L4br7aAB_Fb0W11TwyOpe39IiQ,48
|
|
6
|
+
codemap_sql-0.1.0.dist-info/RECORD,,
|