codemap-sql 0.1.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Build artifacts
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ *.egg
11
+ .eggs/
12
+
13
+ # Test / coverage
14
+ .pytest_cache/
15
+ .coverage
16
+ .coverage.*
17
+ htmlcov/
18
+ coverage.xml
19
+ .tox/
20
+ .mypy_cache/
21
+ .ruff_cache/
22
+ .benchmarks/
23
+
24
+ # Virtualenv
25
+ .venv/
26
+ venv/
27
+ env/
28
+
29
+ # uv / pdm lockfiles (commit uv.lock once we settle)
30
+ # uv.lock
31
+
32
+ # IDE
33
+ .idea/
34
+ .vscode/
35
+ *.swp
36
+ *.swo
37
+
38
+ # OS
39
+ .DS_Store
40
+ Thumbs.db
41
+
42
+ # CodeMap own index when dogfooding
43
+ .codemap/
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.4
2
+ Name: codemap-sql
3
+ Version: 0.1.0a1
4
+ Summary: SQL indexer plugin for CodeMap
5
+ Project-URL: Homepage, https://github.com/qxbyte/codemap
6
+ Author: CodeMap Contributors
7
+ License: MIT
8
+ Keywords: codemap,indexer,sql,tree-sitter
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: SQL
12
+ Classifier: Topic :: Database
13
+ Classifier: Topic :: Software Development
14
+ Requires-Python: >=3.11
15
+ Requires-Dist: codemap-core<0.2,>=0.1.0a1
16
+ Requires-Dist: tree-sitter-sql>=0.3
17
+ Requires-Dist: tree-sitter>=0.25
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=8.0; extra == 'dev'
20
+ Description-Content-Type: text/markdown
21
+
22
+ # codemap-sql
23
+
24
+ > A SQL indexer for [CodeMap](https://github.com/qxbyte/codemap), shipped
25
+ > as an independent PyPI package.
26
+
27
+ Indexes DDL (schema) statements so AI agents can answer "which table has
28
+ column X" and similar structural questions without scanning every
29
+ migration file. Query (`SELECT`/`INSERT`/...) statements are ignored —
30
+ they are not durable schema artefacts.
31
+
32
+ ## What it captures
33
+
34
+ Backed by `tree-sitter-sql`:
35
+
36
+ | AST node | Symbol kind |
37
+ |---|---|
38
+ | `create_table` | `class` (with `extra.sql_kind=table`) |
39
+ | `create_view` | `class` (with `extra.sql_kind=view`) |
40
+ | `create_index` | `variable` (with `extra.sql_kind=index`) |
41
+ | `column_definition` inside `create_table` | `field` (attached to the table) |
42
+
43
+ Asset-style scheme: this plugin uses ``scip-sql`` so a downstream
44
+ ``http_route`` / ``orm_mapping`` bridge can cross-reference these
45
+ symbols against application code that talks to them.
46
+
47
+ ## SymbolID encoding
48
+
49
+ ```
50
+ scip-sql . . . db/migrations/0001_init.sql/users#
51
+ scip-sql . . . db/migrations/0001_init.sql/users#email.
52
+ scip-sql . . . db/migrations/0001_init.sql/idx_users_email.
53
+ ```
54
+
55
+ ## Install
56
+
57
+ ```bash
58
+ pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-sql"
59
+ ```
60
+
61
+ ## Limits
62
+
63
+ * Only DDL is indexed; `SELECT` / `INSERT` / `UPDATE` / `DELETE` are
64
+ walked past silently.
65
+ * `CREATE PROCEDURE` / `CREATE FUNCTION` / `CREATE TRIGGER` are not
66
+ yet captured — the grammar reports them under generic statement nodes.
67
+ * Dialect-specific features (e.g. PostgreSQL extensions) may not be
68
+ parsed.
69
+ * `ALTER TABLE` statements that add columns are not tracked.
70
+
71
+ ## License
72
+
73
+ MIT.
@@ -0,0 +1,52 @@
1
+ # codemap-sql
2
+
3
+ > A SQL indexer for [CodeMap](https://github.com/qxbyte/codemap), shipped
4
+ > as an independent PyPI package.
5
+
6
+ Indexes DDL (schema) statements so AI agents can answer "which table has
7
+ column X" and similar structural questions without scanning every
8
+ migration file. Query (`SELECT`/`INSERT`/...) statements are ignored —
9
+ they are not durable schema artefacts.
10
+
11
+ ## What it captures
12
+
13
+ Backed by `tree-sitter-sql`:
14
+
15
+ | AST node | Symbol kind |
16
+ |---|---|
17
+ | `create_table` | `class` (with `extra.sql_kind=table`) |
18
+ | `create_view` | `class` (with `extra.sql_kind=view`) |
19
+ | `create_index` | `variable` (with `extra.sql_kind=index`) |
20
+ | `column_definition` inside `create_table` | `field` (attached to the table) |
21
+
22
+ Asset-style scheme: this plugin uses ``scip-sql`` so a downstream
23
+ ``http_route`` / ``orm_mapping`` bridge can cross-reference these
24
+ symbols against application code that talks to them.
25
+
26
+ ## SymbolID encoding
27
+
28
+ ```
29
+ scip-sql . . . db/migrations/0001_init.sql/users#
30
+ scip-sql . . . db/migrations/0001_init.sql/users#email.
31
+ scip-sql . . . db/migrations/0001_init.sql/idx_users_email.
32
+ ```
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install "git+https://github.com/qxbyte/codemap.git#subdirectory=plugins/codemap-sql"
38
+ ```
39
+
40
+ ## Limits
41
+
42
+ * Only DDL is indexed; `SELECT` / `INSERT` / `UPDATE` / `DELETE` are
43
+ walked past silently.
44
+ * `CREATE PROCEDURE` / `CREATE FUNCTION` / `CREATE TRIGGER` are not
45
+ yet captured — the grammar reports them under generic statement nodes.
46
+ * Dialect-specific features (e.g. PostgreSQL extensions) may not be
47
+ parsed.
48
+ * `ALTER TABLE` statements that add columns are not tracked.
49
+
50
+ ## License
51
+
52
+ MIT.
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.21"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "codemap-sql"
7
+ version = "0.1.0a1"
8
+ description = "SQL indexer plugin for CodeMap"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "CodeMap Contributors" }]
13
+ keywords = ["codemap", "sql", "indexer", "tree-sitter"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: SQL",
18
+ "Topic :: Software Development",
19
+ "Topic :: Database",
20
+ ]
21
+ dependencies = [
22
+ "codemap-core>=0.1.0a1,<0.2",
23
+ "tree-sitter>=0.25",
24
+ "tree-sitter-sql>=0.3",
25
+ ]
26
+
27
+ [project.optional-dependencies]
28
+ dev = ["pytest>=8.0"]
29
+
30
+ [project.entry-points."codemap.indexers"]
31
+ sql = "codemap_sql:SqlIndexer"
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/qxbyte/codemap"
35
+
36
+ [tool.hatch.build.targets.wheel]
37
+ packages = ["src/codemap_sql"]
@@ -0,0 +1,8 @@
1
+ """SQL indexer plugin for CodeMap."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from codemap_sql.indexer import SqlIndexer
6
+
7
+ __all__ = ["SqlIndexer"]
8
+ __version__ = "0.1.0"
@@ -0,0 +1,236 @@
1
+ """SQL indexer built on tree-sitter-sql.
2
+
3
+ Only DDL statements (CREATE TABLE / VIEW / INDEX) produce symbols. The
4
+ table column list is walked to emit field symbols attached to the
5
+ parent table, so callers can answer "which table has column X" and the
6
+ http_route / asset bridges can cross-reference application code against
7
+ schema state.
8
+
9
+ Query statements (SELECT / INSERT / UPDATE / DELETE) are intentionally
10
+ ignored — they are not durable schema artefacts.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from pathlib import Path, PurePosixPath
16
+ from typing import ClassVar
17
+
18
+ import tree_sitter
19
+ import tree_sitter_sql
20
+
21
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
22
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
23
+ from codemap.indexers.base import IndexContext
24
+
25
+ SCHEME = "scip-sql"
26
+ LANG = "sql"
27
+
28
+ _SQL_LANG = tree_sitter.Language(tree_sitter_sql.language())
29
+
30
+
31
+ class SqlIndexer:
32
+ name: ClassVar[str] = "sql"
33
+ version: ClassVar[str] = "0.1.0"
34
+ file_patterns: ClassVar[list[str]] = ["*.sql", "*.ddl"]
35
+ languages: ClassVar[list[str]] = [LANG]
36
+
37
+ def supports(self, path: Path) -> bool:
38
+ return path.suffix in {".sql", ".ddl"}
39
+
40
+ def index_file(
41
+ self,
42
+ path: Path,
43
+ source: bytes,
44
+ ctx: IndexContext,
45
+ ) -> IndexResult:
46
+ try:
47
+ source.decode("utf-8")
48
+ except UnicodeDecodeError as exc:
49
+ return IndexResult(
50
+ diagnostics=[
51
+ Diagnostic(
52
+ severity="error",
53
+ file=ctx.relative_path,
54
+ code="SQL002",
55
+ message=f"not valid UTF-8: {exc}",
56
+ producer=self.name,
57
+ )
58
+ ]
59
+ )
60
+ parser = tree_sitter.Parser(_SQL_LANG)
61
+ tree = parser.parse(source)
62
+ visitor = _Visitor(ctx.relative_path)
63
+ visitor.visit(tree.root_node)
64
+ diagnostics = list(visitor.diagnostics)
65
+ if tree.root_node.has_error:
66
+ diagnostics.append(
67
+ Diagnostic(
68
+ severity="warning",
69
+ file=ctx.relative_path,
70
+ range=Range(start_line=1, end_line=1),
71
+ code="SQL001",
72
+ message="tree-sitter reported parse errors; symbols may be incomplete",
73
+ producer=self.name,
74
+ )
75
+ )
76
+ return IndexResult(
77
+ symbols=visitor.symbols,
78
+ edges=visitor.edges,
79
+ diagnostics=diagnostics,
80
+ )
81
+
82
+
83
+ class _Visitor:
84
+ def __init__(self, relative_path: PurePosixPath) -> None:
85
+ self.relative_path = relative_path
86
+ self.symbols: list[Symbol] = []
87
+ self.edges: list[Edge] = []
88
+ self.diagnostics: list[Diagnostic] = []
89
+
90
+ def visit(self, node: tree_sitter.Node) -> None:
91
+ kind = node.type
92
+ if kind == "create_table":
93
+ self._visit_create_table(node)
94
+ return
95
+ if kind == "create_view":
96
+ self._visit_create_view(node)
97
+ return
98
+ if kind == "create_index":
99
+ self._visit_create_index(node)
100
+ return
101
+ for child in node.children:
102
+ self.visit(child)
103
+
104
+ # -------------------------------------------------------- types
105
+
106
+ def _visit_create_table(self, node: tree_sitter.Node) -> None:
107
+ name = _object_name(node)
108
+ if name is None:
109
+ return
110
+ sid = self._make_id([], name, kind=DescriptorKind.TYPE)
111
+ self.symbols.append(
112
+ Symbol(
113
+ id=sid,
114
+ kind="class",
115
+ language=LANG,
116
+ file=self.relative_path,
117
+ range=_node_range(node),
118
+ extra={"sql_kind": "table"},
119
+ )
120
+ )
121
+ # Walk column_definitions for column names.
122
+ for child in node.children:
123
+ if child.type != "column_definitions":
124
+ continue
125
+ for sub in child.children:
126
+ if sub.type == "column_definition":
127
+ self._emit_column(name, sub)
128
+
129
+ def _emit_column(self, table_name: str, node: tree_sitter.Node) -> None:
130
+ col_name = None
131
+ for child in node.children:
132
+ if child.type == "identifier":
133
+ col_name = _node_text(child)
134
+ break
135
+ if not col_name:
136
+ return
137
+ sid = self._make_id([table_name], col_name, kind=DescriptorKind.TERM)
138
+ self.symbols.append(
139
+ Symbol(
140
+ id=sid,
141
+ kind="field",
142
+ language=LANG,
143
+ file=self.relative_path,
144
+ range=_node_range(node),
145
+ )
146
+ )
147
+
148
+ def _visit_create_view(self, node: tree_sitter.Node) -> None:
149
+ name = _object_name(node)
150
+ if name is None:
151
+ return
152
+ sid = self._make_id([], name, kind=DescriptorKind.TYPE)
153
+ self.symbols.append(
154
+ Symbol(
155
+ id=sid,
156
+ kind="class",
157
+ language=LANG,
158
+ file=self.relative_path,
159
+ range=_node_range(node),
160
+ extra={"sql_kind": "view"},
161
+ )
162
+ )
163
+
164
+ def _visit_create_index(self, node: tree_sitter.Node) -> None:
165
+ # create_index > identifier (index name) is the first identifier.
166
+ name = None
167
+ for child in node.children:
168
+ if child.type == "identifier":
169
+ name = _node_text(child)
170
+ break
171
+ if name is None:
172
+ return
173
+ sid = self._make_id([], name, kind=DescriptorKind.TERM)
174
+ self.symbols.append(
175
+ Symbol(
176
+ id=sid,
177
+ kind="variable",
178
+ language=LANG,
179
+ file=self.relative_path,
180
+ range=_node_range(node),
181
+ extra={"sql_kind": "index"},
182
+ )
183
+ )
184
+
185
+ # ---------------------------------------------------- helpers
186
+
187
+ def _make_id(
188
+ self,
189
+ type_chain: list[str],
190
+ name: str,
191
+ *,
192
+ kind: DescriptorKind,
193
+ ) -> SymbolID:
194
+ descriptors = list(_path_namespaces(self.relative_path))
195
+ descriptors.extend(Descriptor(name=t, kind=DescriptorKind.TYPE) for t in type_chain)
196
+ descriptors.append(Descriptor(name=name, kind=kind))
197
+ return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
198
+
199
+
200
+ # ---------------------------------------------------------------------------
201
+ # Pure helpers
202
+ # ---------------------------------------------------------------------------
203
+
204
+
205
+ def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
206
+ return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
207
+
208
+
209
+ def _node_range(node: tree_sitter.Node) -> Range:
210
+ sr, sc = node.start_point
211
+ er, ec = node.end_point
212
+ return Range(
213
+ start_line=sr + 1,
214
+ start_col=sc,
215
+ end_line=max(er + 1, sr + 1),
216
+ end_col=ec,
217
+ )
218
+
219
+
220
+ def _node_text(node: tree_sitter.Node) -> str:
221
+ return node.text.decode("utf-8") if node.text is not None else ""
222
+
223
+
224
+ def _object_name(node: tree_sitter.Node) -> str | None:
225
+ """Pull a CREATE TABLE / VIEW target name from `object_reference`."""
226
+ for child in node.children:
227
+ if child.type != "object_reference":
228
+ continue
229
+ # Take the last identifier so schema-qualified `db.users` → "users".
230
+ last = None
231
+ for grand in child.children:
232
+ if grand.type == "identifier":
233
+ last = grand
234
+ if last is not None:
235
+ return _node_text(last)
236
+ return None
File without changes
@@ -0,0 +1,155 @@
1
+ """Unit tests for the SQL indexer plugin."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import textwrap
6
+ from pathlib import Path, PurePosixPath
7
+
8
+ from codemap_sql import SqlIndexer
9
+ from codemap_sql.indexer import SCHEME
10
+
11
+ from codemap.core.models import IndexResult
12
+ from codemap.indexers.base import IndexContext
13
+
14
+
15
+ def _index(source: str, *, path: str = "schema.sql") -> IndexResult:
16
+ code = textwrap.dedent(source).lstrip("\n")
17
+ return SqlIndexer().index_file(
18
+ Path(path),
19
+ code.encode("utf-8"),
20
+ IndexContext(
21
+ project_root=Path("/tmp/proj"),
22
+ relative_path=PurePosixPath(path),
23
+ language="sql",
24
+ ),
25
+ )
26
+
27
+
28
+ def test_indexer_metadata() -> None:
29
+ ix = SqlIndexer()
30
+ assert ix.name == "sql"
31
+ assert ix.languages == ["sql"]
32
+ assert ix.supports(Path("a.sql"))
33
+ assert ix.supports(Path("a.ddl"))
34
+ assert not ix.supports(Path("a.py"))
35
+
36
+
37
+ def test_scheme_is_consistent() -> None:
38
+ r = _index("CREATE TABLE u (id INT);")
39
+ for s in r.symbols:
40
+ assert str(s.id).startswith(f"{SCHEME} ")
41
+
42
+
43
+ def test_create_table_with_columns() -> None:
44
+ r = _index(
45
+ """
46
+ CREATE TABLE users (
47
+ id INTEGER PRIMARY KEY,
48
+ name TEXT NOT NULL,
49
+ email TEXT UNIQUE
50
+ );
51
+ """
52
+ )
53
+ tables = [s for s in r.symbols if s.kind == "class"]
54
+ assert len(tables) == 1
55
+ assert tables[0].extra.get("sql_kind") == "table"
56
+ assert "users#" in str(tables[0].id)
57
+
58
+ fields = [s for s in r.symbols if s.kind == "field"]
59
+ field_names = {s.id.descriptors[-1].name for s in fields}
60
+ assert field_names == {"id", "name", "email"}
61
+
62
+
63
+ def test_create_view() -> None:
64
+ r = _index(
65
+ """
66
+ CREATE VIEW active_users AS
67
+ SELECT * FROM users WHERE active = 1;
68
+ """
69
+ )
70
+ view = next(s for s in r.symbols if s.kind == "class")
71
+ assert view.extra.get("sql_kind") == "view"
72
+ assert "active_users#" in str(view.id)
73
+
74
+
75
+ def test_create_index() -> None:
76
+ r = _index("CREATE INDEX idx_users_email ON users(email);")
77
+ indexes = [s for s in r.symbols if s.kind == "variable"]
78
+ assert len(indexes) == 1
79
+ assert indexes[0].extra.get("sql_kind") == "index"
80
+ assert "idx_users_email." in str(indexes[0].id)
81
+
82
+
83
+ def test_columns_attached_to_table() -> None:
84
+ r = _index(
85
+ """
86
+ CREATE TABLE products (
87
+ sku TEXT PRIMARY KEY,
88
+ price DECIMAL
89
+ );
90
+ """
91
+ )
92
+ fields = [s for s in r.symbols if s.kind == "field"]
93
+ for f in fields:
94
+ assert "products#" in str(f.id)
95
+
96
+
97
+ def test_multiple_tables_in_one_file() -> None:
98
+ r = _index(
99
+ """
100
+ CREATE TABLE a (x INT);
101
+ CREATE TABLE b (y INT);
102
+ """
103
+ )
104
+ tables = [s for s in r.symbols if s.kind == "class"]
105
+ assert {t.id.descriptors[-1].name for t in tables} == {"a", "b"}
106
+
107
+
108
+ def test_select_statements_ignored() -> None:
109
+ r = _index(
110
+ """
111
+ CREATE TABLE u (id INT);
112
+ SELECT * FROM u WHERE id = 1;
113
+ INSERT INTO u VALUES (1);
114
+ """
115
+ )
116
+ tables = [s for s in r.symbols if s.kind == "class"]
117
+ # Only `u` from the CREATE TABLE; SELECT/INSERT don't add symbols.
118
+ assert len(tables) == 1
119
+
120
+
121
+ def test_symbol_id_uses_path_namespaces() -> None:
122
+ r = _index("CREATE TABLE u (id INT);", path="db/migrations/0001.sql")
123
+ table = next(s for s in r.symbols if s.kind == "class")
124
+ assert str(table.id) == "scip-sql . . . db/migrations/0001.sql/u#"
125
+
126
+
127
+ def test_lowercase_keywords_supported() -> None:
128
+ r = _index("create table u (id int);")
129
+ tables = [s for s in r.symbols if s.kind == "class"]
130
+ assert len(tables) == 1
131
+
132
+
133
+ def test_empty_file_yields_no_symbols() -> None:
134
+ r = _index("")
135
+ assert r.symbols == []
136
+
137
+
138
+ def test_invalid_utf8_yields_diagnostic() -> None:
139
+ ix = SqlIndexer()
140
+ r = ix.index_file(
141
+ Path("bad.sql"),
142
+ b"\xff\xfe CREATE",
143
+ IndexContext(
144
+ project_root=Path("/tmp/proj"),
145
+ relative_path=PurePosixPath("bad.sql"),
146
+ language="sql",
147
+ ),
148
+ )
149
+ assert r.symbols == []
150
+ assert r.diagnostics[0].code == "SQL002"
151
+
152
+
153
+ def test_ddl_extension_supported() -> None:
154
+ r = _index("CREATE TABLE t (id INT);", path="schema.ddl")
155
+ assert any(s.kind == "class" for s in r.symbols)