brickql 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brickql-0.1.7/LICENSE +21 -0
- brickql-0.1.7/PKG-INFO +636 -0
- brickql-0.1.7/README.md +569 -0
- brickql-0.1.7/brickql/__init__.py +218 -0
- brickql-0.1.7/brickql/compile/__init__.py +14 -0
- brickql-0.1.7/brickql/compile/base.py +112 -0
- brickql-0.1.7/brickql/compile/builder.py +214 -0
- brickql-0.1.7/brickql/compile/clause_builders.py +196 -0
- brickql-0.1.7/brickql/compile/context.py +26 -0
- brickql-0.1.7/brickql/compile/expression_builder.py +247 -0
- brickql-0.1.7/brickql/compile/mysql.py +74 -0
- brickql-0.1.7/brickql/compile/postgres.py +72 -0
- brickql-0.1.7/brickql/compile/registry.py +167 -0
- brickql-0.1.7/brickql/compile/sqlite.py +30 -0
- brickql-0.1.7/brickql/errors.py +181 -0
- brickql-0.1.7/brickql/policy/__init__.py +5 -0
- brickql-0.1.7/brickql/policy/engine.py +325 -0
- brickql-0.1.7/brickql/prompt/__init__.py +5 -0
- brickql-0.1.7/brickql/prompt/builder.py +298 -0
- brickql-0.1.7/brickql/schema/__init__.py +42 -0
- brickql-0.1.7/brickql/schema/column_reference.py +99 -0
- brickql-0.1.7/brickql/schema/context.py +26 -0
- brickql-0.1.7/brickql/schema/converters.py +188 -0
- brickql-0.1.7/brickql/schema/dialect.py +357 -0
- brickql-0.1.7/brickql/schema/expressions.py +184 -0
- brickql-0.1.7/brickql/schema/operands.py +179 -0
- brickql-0.1.7/brickql/schema/query_plan.py +332 -0
- brickql-0.1.7/brickql/schema/snapshot.py +142 -0
- brickql-0.1.7/brickql/validate/__init__.py +5 -0
- brickql-0.1.7/brickql/validate/dialect_validator.py +85 -0
- brickql-0.1.7/brickql/validate/operand_validator.py +274 -0
- brickql-0.1.7/brickql/validate/schema_validator.py +82 -0
- brickql-0.1.7/brickql/validate/semantic_validator.py +60 -0
- brickql-0.1.7/brickql/validate/validator.py +196 -0
- brickql-0.1.7/brickql.egg-info/PKG-INFO +636 -0
- brickql-0.1.7/brickql.egg-info/SOURCES.txt +44 -0
- brickql-0.1.7/brickql.egg-info/dependency_links.txt +1 -0
- brickql-0.1.7/brickql.egg-info/requires.txt +25 -0
- brickql-0.1.7/brickql.egg-info/top_level.txt +1 -0
- brickql-0.1.7/pyproject.toml +165 -0
- brickql-0.1.7/setup.cfg +4 -0
- brickql-0.1.7/tests/test_compile.py +423 -0
- brickql-0.1.7/tests/test_converters.py +473 -0
- brickql-0.1.7/tests/test_policy.py +410 -0
- brickql-0.1.7/tests/test_prompt.py +260 -0
- brickql-0.1.7/tests/test_validate.py +645 -0
brickql-0.1.7/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 brickQL Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
brickql-0.1.7/PKG-INFO
ADDED
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: brickql
|
|
3
|
+
Version: 0.1.7
|
|
4
|
+
Summary: Policy-driven, SQL-standard-aligned query orchestration framework for LLMs.
|
|
5
|
+
License: MIT License
|
|
6
|
+
|
|
7
|
+
Copyright (c) 2026 brickQL Contributors
|
|
8
|
+
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
+
furnished to do so, subject to the following conditions:
|
|
15
|
+
|
|
16
|
+
The above copyright notice and this permission notice shall be included in all
|
|
17
|
+
copies or substantial portions of the Software.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
|
+
SOFTWARE.
|
|
26
|
+
|
|
27
|
+
Project-URL: Homepage, https://brickql.org
|
|
28
|
+
Project-URL: Documentation, https://brickql.org
|
|
29
|
+
Project-URL: Source, https://github.com/flyingraptor/BrickQL
|
|
30
|
+
Project-URL: Bug Tracker, https://github.com/flyingraptor/BrickQL/issues
|
|
31
|
+
Keywords: sql,llm,query,orchestration,policy,ai,nlp,rag,pydantic,sqlite,postgresql
|
|
32
|
+
Classifier: Programming Language :: Python :: 3
|
|
33
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Operating System :: OS Independent
|
|
38
|
+
Classifier: Intended Audience :: Developers
|
|
39
|
+
Classifier: Topic :: Database
|
|
40
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
41
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
42
|
+
Classifier: Development Status :: 4 - Beta
|
|
43
|
+
Requires-Python: >=3.10
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
License-File: LICENSE
|
|
46
|
+
Requires-Dist: pydantic>=2.12
|
|
47
|
+
Provides-Extra: postgres
|
|
48
|
+
Requires-Dist: psycopg[binary]>=3.3; extra == "postgres"
|
|
49
|
+
Provides-Extra: mysql
|
|
50
|
+
Requires-Dist: pymysql>=1.1; extra == "mysql"
|
|
51
|
+
Provides-Extra: sqlalchemy
|
|
52
|
+
Requires-Dist: sqlalchemy>=2.0; extra == "sqlalchemy"
|
|
53
|
+
Provides-Extra: examples
|
|
54
|
+
Requires-Dist: langchain>=1.2.10; extra == "examples"
|
|
55
|
+
Requires-Dist: langchain-ollama>=1.0.1; extra == "examples"
|
|
56
|
+
Provides-Extra: dev
|
|
57
|
+
Requires-Dist: pytest>=9.0; extra == "dev"
|
|
58
|
+
Requires-Dist: pytest-docker>=3.2; extra == "dev"
|
|
59
|
+
Requires-Dist: psycopg[binary]>=3.3; extra == "dev"
|
|
60
|
+
Requires-Dist: sqlalchemy>=2.0; extra == "dev"
|
|
61
|
+
Requires-Dist: ruff>=0.15; extra == "dev"
|
|
62
|
+
Requires-Dist: mypy>=1.19; extra == "dev"
|
|
63
|
+
Requires-Dist: pydantic>=2.12; extra == "dev"
|
|
64
|
+
Requires-Dist: langchain>=1.2.10; extra == "dev"
|
|
65
|
+
Requires-Dist: langchain-ollama>=1.0.1; extra == "dev"
|
|
66
|
+
Dynamic: license-file
|
|
67
|
+
|
|
68
|
+
<p align="center">
|
|
69
|
+
<img src="logo.png" alt="brickQL" width="420" />
|
|
70
|
+
</p>
|
|
71
|
+
|
|
72
|
+
# Text to brickQL
|
|
73
|
+
|
|
74
|
+
**Policy-driven, SQL-standard-aligned query orchestration for LLMs.**
|
|
75
|
+
|
|
76
|
+
> The Building Blocks of Safe SQL.
|
|
77
|
+
|
|
78
|
+
brickQL separates concerns cleanly: the LLM outputs a structured **QueryPlan (JSON)**; brickQL validates it against your schema, enforces policy rules, and compiles it to safe, parameterized SQL. Raw SQL never generated by the LLM.
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Why not Text-to-SQL?
|
|
83
|
+
|
|
84
|
+
Text-to-SQL is a well-established approach: feed the LLM a natural-language question and a schema, and let it write the SQL directly. It works well for simple queries and controlled environments, but breaks down as soon as real-world constraints appear:
|
|
85
|
+
|
|
86
|
+
| Challenge | Why it hurts | How brickQL fixes it |
|
|
87
|
+
|---|---|---|
|
|
88
|
+
| **Hallucinated syntax** | LLMs invent column names, functions, or dialect-specific constructs that don't exist in your database, causing runtime errors that are hard to debug at scale. | The LLM outputs a typed, structured `QueryPlan` JSON. Free-form SQL is structurally impossible. brickQL compiles it to parameterized SQL with no string interpolation. |
|
|
89
|
+
| **No policy enforcement** | There is no layer between the generated SQL and the database. Row-level filters, param-bound column constraints, and column allowlists must be bolted on externally, and can silently fail. | Per-table param-bound column enforcement, column allowlists (RBAC), and denied columns. Any column can be bound to a runtime parameter; the LLM cannot bypass the predicate. |
|
|
90
|
+
| **Prompt-injection surface** | The LLM sees and reasons over raw SQL strings. Malicious content in user input or database values can redirect the query, exfiltrate data, or trigger destructive operations. | Implements Plan-Then-Execute: the LLM commits to a structured `QueryPlan` before any data is returned. Database contents can never inject new instructions. |
|
|
91
|
+
| **Non-deterministic repairs** | When a query fails, the LLM must re-generate free-form SQL, each attempt is a new coin flip with the same attack surface. | All errors are machine-readable: a typed exception hierarchy with `code` and `details` fields. `to_error_response()` feeds structured error context back to the LLM for targeted repair. |
|
|
92
|
+
| **Dialect fragility** | SQL is not a single language. A query that works on PostgreSQL may silently mis-behave on SQLite or MySQL; the LLM has no mechanism to stay within a safe dialect subset. | Opt in to only the SQL features you need via `DialectProfile`. A fluent builder with dependency enforcement at `build()` time. |
|
|
93
|
+
| **Target DB awareness** | The LLM must know which database it's targeting and produce the correct syntax — date functions, quoting, pagination, and type casting all differ across engines. | Built-in compilers for SQLite, PostgreSQL, and MySQL. Add any other target by registering a `SQLCompiler` subclass with one decorator, no core changes. |
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## How it works
|
|
98
|
+
|
|
99
|
+
<p align="center">
|
|
100
|
+
<img src="docs/how-it-works.png" alt="brickQL flow diagram" width="500" />
|
|
101
|
+
</p>
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Security model
|
|
106
|
+
|
|
107
|
+
brickQL implements the design patterns recommended for SQL agents in [*Design Patterns for Securing LLM Agents against Prompt Injections*](https://arxiv.org/abs/2506.08837) (Beurer-Kellner et al., 2025).
|
|
108
|
+
|
|
109
|
+
Among its ten case studies, §4.2 examines SQL agents under a threat model where the attacker can control the input query or the database content, with goals ranging from unauthorized data extraction to remote code execution. That case study concludes that the **Plan-Then-Execute** pattern is the correct baseline, and Appendix A identifies **strict output formatting** and **least-privilege access control** as mandatory best practices across all agent types. brickQL maps each of these directly to code:
|
|
110
|
+
|
|
111
|
+
| Paper recommendation | brickQL implementation |
|
|
112
|
+
|---|---|
|
|
113
|
+
| **Plan-Then-Execute** - LLM commits to a query plan *before* any database data is returned to it, so database contents can never inject new instructions | The LLM outputs a `QueryPlan` JSON; brickQL validates and compiles it to SQL without ever feeding query results back to the LLM |
|
|
114
|
+
| **Strict output formatting** - constrain the LLM to a well-specified format rather than free-form SQL | `QueryPlan` is a typed Pydantic model; free-form SQL is structurally impossible |
|
|
115
|
+
| **Least-privilege access control** - restrict tables, columns, and operations to exactly what the role needs | `DialectProfile` allowlists tables and SQL features; `PolicyConfig` / `TablePolicy` enforce per-table column allowlists, deny lists, and param-bound columns |
|
|
116
|
+
| **Parameterized execution** - prevent SQL injection from literal values in the plan | All `{"value": …}` operands are compiled to named placeholders; no string interpolation occurs anywhere in the compilation path |
|
|
117
|
+
|
|
118
|
+
The OR-bypass hardening in `PolicyEngine._where_satisfies_param` (which ensures a param-bound column cannot be satisfied by placing the required predicate inside an `OR` branch) and `to_error_response()` (which serializes error context to a self-contained JSON string before it is embedded in an LLM repair prompt, preventing plan content from injecting new instructions) are direct responses to security risks identified through the paper's threat model.
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Installation
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
# Core library (SQLite only)
|
|
126
|
+
pip install brickql
|
|
127
|
+
|
|
128
|
+
# With PostgreSQL driver (psycopg v3)
|
|
129
|
+
pip install "brickql[postgres]"
|
|
130
|
+
|
|
131
|
+
# With MySQL driver (PyMySQL)
|
|
132
|
+
pip install "brickql[mysql]"
|
|
133
|
+
|
|
134
|
+
# With SQLAlchemy schema reflector
|
|
135
|
+
pip install "brickql[sqlalchemy]"
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Requires Python ≥ 3.10.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Quick start
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
import brickql
|
|
146
|
+
from brickql import SchemaSnapshot, DialectProfile, PolicyConfig, TablePolicy
|
|
147
|
+
|
|
148
|
+
# 1. Load your schema snapshot (describes tables, columns, relationships)
|
|
149
|
+
import json
|
|
150
|
+
snapshot = SchemaSnapshot.model_validate(json.loads(open("schema.json").read()))
|
|
151
|
+
|
|
152
|
+
# 2. Choose a dialect profile (compose exactly the features you need)
|
|
153
|
+
dialect = (
|
|
154
|
+
DialectProfile.builder(["employees", "departments"], target="postgres")
|
|
155
|
+
.joins()
|
|
156
|
+
.aggregations()
|
|
157
|
+
.build()
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# 3. Configure policy (tenant isolation, row limits)
|
|
161
|
+
policy = PolicyConfig(
|
|
162
|
+
inject_missing_params=True, # auto-inject tenant_id predicates
|
|
163
|
+
default_limit=100,
|
|
164
|
+
tables={
|
|
165
|
+
"employees": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
|
|
166
|
+
"departments": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
|
|
167
|
+
},
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# 4. Compile the LLM's QueryPlan JSON
|
|
171
|
+
plan_json = llm_response # {"SELECT": [...], "FROM": {...}, "JOIN": [...], ...}
|
|
172
|
+
|
|
173
|
+
compiled = brickql.validate_and_compile(plan_json, snapshot, dialect, policy)
|
|
174
|
+
|
|
175
|
+
# 5. Execute with your own connection - brickQL does not execute queries
|
|
176
|
+
cursor.execute(compiled.sql, compiled.merge_runtime_params({"TENANT": tenant_id}))
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Key concepts
|
|
182
|
+
|
|
183
|
+
### QueryPlan JSON
|
|
184
|
+
|
|
185
|
+
The only output the LLM must produce. A structured, SQL-grammar-aligned JSON object - never raw SQL.
|
|
186
|
+
|
|
187
|
+
```json
|
|
188
|
+
{
|
|
189
|
+
"SELECT": [
|
|
190
|
+
{"expr": {"col": "employees.first_name"}},
|
|
191
|
+
{"expr": {"col": "departments.name"}, "alias": "dept"}
|
|
192
|
+
],
|
|
193
|
+
"FROM": {"table": "employees"},
|
|
194
|
+
"JOIN": [{"rel": "departments__employees", "type": "LEFT"}],
|
|
195
|
+
"WHERE": {"EQ": [{"col": "employees.active"}, {"value": true}]},
|
|
196
|
+
"LIMIT": {"value": 50}
|
|
197
|
+
}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Each expression field (`expr`, `GROUP_BY` items, `ORDER_BY` items, window `partition_by`) is parsed into a **typed operand** automatically by Pydantic:
|
|
201
|
+
|
|
202
|
+
| JSON shape | Python type |
|
|
203
|
+
|---|---|
|
|
204
|
+
| `{"col": "t.col"}` | `ColumnOperand(col="t.col")` |
|
|
205
|
+
| `{"value": 42}` | `ValueOperand(value=42)` |
|
|
206
|
+
| `{"param": "TENANT"}` | `ParamOperand(param="TENANT")` |
|
|
207
|
+
| `{"func": "COUNT", "args": [...]}` | `FuncOperand(func="COUNT", args=[...])` |
|
|
208
|
+
| `{"case": {"when": [...], "else": ...}}` | `CaseOperand(case=CaseBody(...))` |
|
|
209
|
+
|
|
210
|
+
These types are importable if you need to inspect or construct plans programmatically:
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from brickql import ColumnOperand, ValueOperand, ParamOperand, FuncOperand, CaseOperand, Operand
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### SchemaSnapshot
|
|
217
|
+
|
|
218
|
+
Describes your database structure: tables, columns (name, type, nullability), and named relationships. It is purely structural - no policy or access-control concerns. Loaded once at startup and shared across requests.
|
|
219
|
+
|
|
220
|
+
Both `TableInfo` and `ColumnInfo` accept an optional `description` field. When present, descriptions are included in the LLM system prompt so the model can make better join and filter decisions without guessing from column names alone.
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
snapshot = SchemaSnapshot.model_validate({
|
|
224
|
+
"tables": [
|
|
225
|
+
{
|
|
226
|
+
"name": "employees",
|
|
227
|
+
"description": "One row per employee. Joined to departments via department_id.",
|
|
228
|
+
"columns": [
|
|
229
|
+
{"name": "employee_id", "type": "INTEGER", "nullable": False},
|
|
230
|
+
{"name": "tenant_id", "type": "TEXT", "nullable": False},
|
|
231
|
+
{"name": "status", "type": "TEXT", "nullable": True,
|
|
232
|
+
"description": "Employment status. Values: ACTIVE, TERMINATED, ON_LEAVE."},
|
|
233
|
+
],
|
|
234
|
+
"relationships": ["departments__employees"]
|
|
235
|
+
}
|
|
236
|
+
],
|
|
237
|
+
"relationships": [
|
|
238
|
+
{"key": "departments__employees", "from_table": "employees",
|
|
239
|
+
"from_col": "department_id", "to_table": "departments", "to_col": "department_id"}
|
|
240
|
+
]
|
|
241
|
+
})
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
> **Note** - `tenant_id` is just a regular column in the snapshot. Which columns
|
|
245
|
+
> require runtime parameters and what those params are named is configured in
|
|
246
|
+
> `PolicyConfig` via `TablePolicy`, not in the schema.
|
|
247
|
+
|
|
248
|
+
#### Reflecting a schema from a live database
|
|
249
|
+
|
|
250
|
+
Use `schema_from_sqlalchemy` to populate a `SchemaSnapshot` directly from an existing database instead of writing the JSON by hand:
|
|
251
|
+
|
|
252
|
+
```python
|
|
253
|
+
from sqlalchemy import create_engine
|
|
254
|
+
from brickql import schema_from_sqlalchemy
|
|
255
|
+
|
|
256
|
+
engine = create_engine("postgresql+psycopg://user:pass@localhost:5432/mydb")
|
|
257
|
+
snapshot = schema_from_sqlalchemy(engine)
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
`schema_from_sqlalchemy` requires the `sqlalchemy` optional dependency:
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
pip install "brickql[sqlalchemy]"
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
The reflected snapshot is a starting point - add `description` fields and manually define any relationships that naming heuristics cannot detect, then save it to a JSON file for inspection and version control.
|
|
267
|
+
|
|
268
|
+
### DialectProfile - builder
|
|
269
|
+
|
|
270
|
+
Compose exactly the SQL features you need. Each method is independent - no hidden stacking, no implicit dependencies:
|
|
271
|
+
|
|
272
|
+
| Builder method | SQL capabilities unlocked | Requires |
|
|
273
|
+
|---|---|---|
|
|
274
|
+
| *(base)* | Single-table `SELECT` / `WHERE` / `LIMIT` | - |
|
|
275
|
+
| `.joins(max_join_depth=2)` | `JOIN` (inner, left, self-referential, many-to-many), `ORDER BY`, `OFFSET`, `ILIKE` | - |
|
|
276
|
+
| `.aggregations()` | `GROUP BY` / `HAVING` / `COUNT` `SUM` `AVG` `MIN` `MAX` / `CASE` | - |
|
|
277
|
+
| `.scalar_functions(*funcs)` | Additional scalar functions by name (e.g. `DATE_PART`, `COALESCE`) | - |
|
|
278
|
+
| `.subqueries()` | `EXISTS`, correlated and derived-table subqueries | - |
|
|
279
|
+
| `.ctes()` | `WITH` / `WITH RECURSIVE` - CTEs (Common Table Expressions: named temporary result sets scoped to the query) | **`.subqueries()`** |
|
|
280
|
+
| `.set_operations()` | `UNION` / `UNION ALL` / `INTERSECT` / `EXCEPT` | - |
|
|
281
|
+
| `.window_functions()` | `ROW_NUMBER`, `RANK`, `LAG`, `LEAD`, `OVER`, `PARTITION BY` + aggregate window functions | **`.aggregations()`** |
|
|
282
|
+
|
|
283
|
+
`.scalar_functions()` is additive and can be chained with any other method:
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
profile = (
|
|
287
|
+
DialectProfile.builder(tables, target="postgres")
|
|
288
|
+
.aggregations()
|
|
289
|
+
.scalar_functions("DATE_PART", "COALESCE")
|
|
290
|
+
.ctes()
|
|
291
|
+
.subqueries()
|
|
292
|
+
.build()
|
|
293
|
+
)
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
Dependencies are enforced at `build()` time with a `ProfileConfigError` and a clear message.
|
|
297
|
+
|
|
298
|
+
```python
|
|
299
|
+
# Joins + aggregations only
|
|
300
|
+
profile = (
|
|
301
|
+
DialectProfile.builder(tables, target="postgres")
|
|
302
|
+
.joins(max_join_depth=2)
|
|
303
|
+
.aggregations()
|
|
304
|
+
.build()
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Window functions without join support
|
|
308
|
+
profile = (
|
|
309
|
+
DialectProfile.builder(tables, target="sqlite")
|
|
310
|
+
.aggregations()
|
|
311
|
+
.window_functions()
|
|
312
|
+
.build()
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Everything
|
|
316
|
+
profile = (
|
|
317
|
+
DialectProfile.builder(tables)
|
|
318
|
+
.joins()
|
|
319
|
+
.aggregations()
|
|
320
|
+
.subqueries()
|
|
321
|
+
.ctes()
|
|
322
|
+
.set_operations()
|
|
323
|
+
.window_functions()
|
|
324
|
+
.build()
|
|
325
|
+
)
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### PolicyConfig and TablePolicy
|
|
329
|
+
|
|
330
|
+
`PolicyConfig` controls the overall request policy. `TablePolicy` configures
|
|
331
|
+
per-table rules - each table can have its own param-bound columns, a positive
|
|
332
|
+
column allowlist, and/or a denied column list.
|
|
333
|
+
|
|
334
|
+
```python
|
|
335
|
+
from brickql import PolicyConfig, TablePolicy
|
|
336
|
+
|
|
337
|
+
policy = PolicyConfig(
|
|
338
|
+
inject_missing_params=True,
|
|
339
|
+
default_limit=200,
|
|
340
|
+
tables={
|
|
341
|
+
"companies": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
|
|
342
|
+
"departments": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
|
|
343
|
+
"employees": TablePolicy(
|
|
344
|
+
param_bound_columns={"tenant_id": "TENANT"},
|
|
345
|
+
denied_columns=["salary"],
|
|
346
|
+
),
|
|
347
|
+
"projects": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
|
|
348
|
+
},
|
|
349
|
+
)
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
#### Column allowlist - RBAC pattern
|
|
353
|
+
|
|
354
|
+
`allowed_columns` is a **positive allowlist**: when non-empty, only the listed
|
|
355
|
+
columns may appear in any plan referencing that table. This maps directly to
|
|
356
|
+
RBAC grant patterns where a role should see only a specific subset of columns,
|
|
357
|
+
without having to enumerate every other column in a blocklist.
|
|
358
|
+
|
|
359
|
+
```python
|
|
360
|
+
analyst_policy = PolicyConfig(
|
|
361
|
+
inject_missing_params=True,
|
|
362
|
+
tables={
|
|
363
|
+
"employees": TablePolicy(
|
|
364
|
+
param_bound_columns={"tenant_id": "TENANT"},
|
|
365
|
+
allowed_columns=[
|
|
366
|
+
"employee_id", "first_name", "last_name",
|
|
367
|
+
"department_id", "hire_date", "active",
|
|
368
|
+
],
|
|
369
|
+
),
|
|
370
|
+
},
|
|
371
|
+
)
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
`denied_columns` (per-table or global) is subtracted from `allowed_columns`
|
|
375
|
+
when both are set, so you can always enforce a hard blocklist on top. An empty
|
|
376
|
+
`allowed_columns` (the default) means all snapshot columns are permitted,
|
|
377
|
+
subject only to `denied_columns`.
|
|
378
|
+
|
|
379
|
+
Different tables can use **different param names**:
|
|
380
|
+
|
|
381
|
+
```python
|
|
382
|
+
policy = PolicyConfig(
|
|
383
|
+
tables={
|
|
384
|
+
"employees": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
|
|
385
|
+
"audit_log": TablePolicy(param_bound_columns={"org_id": "ORG"}),
|
|
386
|
+
}
|
|
387
|
+
)
|
|
388
|
+
params = compiled.merge_runtime_params({"TENANT": "acme", "ORG": "acme-org-42"})
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
### CompiledSQL
|
|
392
|
+
|
|
393
|
+
The output of `validate_and_compile`. Contains the parameterized SQL string and a `params` dict. Runtime parameters (e.g. `TENANT`) are merged in before execution:
|
|
394
|
+
|
|
395
|
+
```python
|
|
396
|
+
sql_params = compiled.merge_runtime_params({"TENANT": "acme"})
|
|
397
|
+
cursor.execute(compiled.sql, sql_params)
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
---
|
|
401
|
+
|
|
402
|
+
## Prompting the LLM
|
|
403
|
+
|
|
404
|
+
```python
|
|
405
|
+
components = brickql.get_prompt_components(
|
|
406
|
+
snapshot=snapshot,
|
|
407
|
+
dialect=dialect,
|
|
408
|
+
question="List the top 5 highest-paid employees in Engineering",
|
|
409
|
+
policy_summary='Always filter by tenant_id using {"param": "TENANT"}.',
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Send to your LLM
|
|
413
|
+
response = llm.chat(system=components.system_prompt, user=components.user_prompt)
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
---
|
|
417
|
+
|
|
418
|
+
## Error handling
|
|
419
|
+
|
|
420
|
+
All errors are subclasses of `brickQLError`. `ParseError` and `PolicyViolationError` both expose `to_error_response()` which returns a ready-to-embed JSON string for LLM repair loops.
|
|
421
|
+
|
|
422
|
+
```python
|
|
423
|
+
from brickql import ParseError, PolicyViolationError, ValidationError, CompilationError
|
|
424
|
+
|
|
425
|
+
try:
|
|
426
|
+
compiled = brickql.validate_and_compile(plan_json, snapshot, dialect, policy)
|
|
427
|
+
except ParseError as e:
|
|
428
|
+
# Malformed JSON - e.to_error_response() returns a JSON string for LLM repair
|
|
429
|
+
pass
|
|
430
|
+
except PolicyViolationError as e:
|
|
431
|
+
# Policy rule violated (denied column, missing param, disallowed table)
|
|
432
|
+
# e.to_error_response() returns a JSON string with code + details for LLM repair
|
|
433
|
+
pass
|
|
434
|
+
except ValidationError as e:
|
|
435
|
+
# Schema or dialect rule violated
|
|
436
|
+
pass
|
|
437
|
+
except CompilationError as e:
|
|
438
|
+
raise
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
---
|
|
442
|
+
|
|
443
|
+
## Extensibility
|
|
444
|
+
|
|
445
|
+
### Adding a new dialect
|
|
446
|
+
|
|
447
|
+
Register a custom `SQLCompiler` subclass once; `validate_and_compile` picks it up automatically for any `DialectProfile` with that target:
|
|
448
|
+
|
|
449
|
+
```python
|
|
450
|
+
from brickql.compile.base import SQLCompiler
|
|
451
|
+
from brickql.compile.registry import CompilerFactory
|
|
452
|
+
|
|
453
|
+
@CompilerFactory.register("mysql")
|
|
454
|
+
class MySQLCompiler(SQLCompiler):
|
|
455
|
+
@property
|
|
456
|
+
def dialect_name(self) -> str:
|
|
457
|
+
return "mysql"
|
|
458
|
+
|
|
459
|
+
def param_placeholder(self, name: str) -> str:
|
|
460
|
+
return f"%({name})s"
|
|
461
|
+
|
|
462
|
+
def like_operator(self, op: str) -> str:
|
|
463
|
+
return op
|
|
464
|
+
|
|
465
|
+
def quote_identifier(self, name: str) -> str:
|
|
466
|
+
return f"`{name}`"
|
|
467
|
+
|
|
468
|
+
# Now you can use target="mysql" in DialectProfile.builder(...)
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
#### Customising function compilation per dialect
|
|
472
|
+
|
|
473
|
+
Override `build_func_call` to control how specific functions are rendered for your dialect - inline literal args, add type casts, rename functions, etc. The default renders `FUNC(arg1, arg2, …)`:
|
|
474
|
+
|
|
475
|
+
```python
|
|
476
|
+
from typing import Any, Callable
|
|
477
|
+
|
|
478
|
+
@CompilerFactory.register("mysql")
|
|
479
|
+
class MySQLCompiler(SQLCompiler):
|
|
480
|
+
# ... required abstract methods ...
|
|
481
|
+
|
|
482
|
+
def build_func_call(
|
|
483
|
+
self,
|
|
484
|
+
func_name: str,
|
|
485
|
+
args: list[Any],
|
|
486
|
+
build_arg: Callable[[Any], str],
|
|
487
|
+
) -> str:
|
|
488
|
+
if func_name.upper() == "DATE_PART":
|
|
489
|
+
# MySQL uses YEAR(col) instead of DATE_PART('year', col)
|
|
490
|
+
return f"YEAR({build_arg(args[1])})"
|
|
491
|
+
return super().build_func_call(func_name, args, build_arg)
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
`build_arg` is a callback that compiles a single typed `Operand` to SQL, so the full operand chain (column quoting, param binding, nested functions) works correctly for any arg you forward.
|
|
495
|
+
|
|
496
|
+
### Adding a new operator
|
|
497
|
+
|
|
498
|
+
Register a rendering handler; `OperatorRegistry` wires it in without touching the built-in `PredicateBuilder`:
|
|
499
|
+
|
|
500
|
+
```python
|
|
501
|
+
from brickql.compile.registry import OperatorRegistry
|
|
502
|
+
|
|
503
|
+
@OperatorRegistry.register("REGEXP")
|
|
504
|
+
def _regexp_handler(op, args, build_operand):
|
|
505
|
+
left = build_operand(args[0])
|
|
506
|
+
right = build_operand(args[1])
|
|
507
|
+
return f"{left} REGEXP {right}"
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
---
|
|
511
|
+
|
|
512
|
+
## Known limitations
|
|
513
|
+
|
|
514
|
+
| Limitation | Workaround |
|
|
515
|
+
|---|---|
|
|
516
|
+
| **Scalar subqueries in comparison operators** - `salary > (SELECT AVG(salary) …)` is not a supported operand type. | Use a window-function CTE: compute `AVG(salary) OVER ()` inside the CTE so every row carries the aggregate, then filter on that result column in the outer query. See the example below. |
|
|
517
|
+
| **JOIN alias column references** - column references in SELECT / WHERE must use the original table name, not a JOIN alias. The exception is CTE names, which can be used as table qualifiers. | Use the real table name in all column references; aliases are only for output renaming. |
|
|
518
|
+
|
|
519
|
+
**Scalar subquery workaround - window-function CTE**
|
|
520
|
+
|
|
521
|
+
Goal: *list employees whose salary is above the overall average.*
|
|
522
|
+
|
|
523
|
+
```json
|
|
524
|
+
{
|
|
525
|
+
"CTE": [{
|
|
526
|
+
"name": "emp_with_avg",
|
|
527
|
+
"query": {
|
|
528
|
+
"SELECT": [
|
|
529
|
+
{"expr": {"col": "employees.first_name"}},
|
|
530
|
+
{"expr": {"col": "employees.last_name"}},
|
|
531
|
+
{"expr": {"col": "employees.salary"}},
|
|
532
|
+
{
|
|
533
|
+
"expr": {"func": "AVG", "args": [{"col": "employees.salary"}]},
|
|
534
|
+
"alias": "avg_sal",
|
|
535
|
+
"over": {"partition_by": []}
|
|
536
|
+
}
|
|
537
|
+
],
|
|
538
|
+
"FROM": {"table": "employees"},
|
|
539
|
+
"WHERE": {"EQ": [{"col": "employees.tenant_id"}, {"param": "TENANT"}]}
|
|
540
|
+
}
|
|
541
|
+
}],
|
|
542
|
+
"SELECT": [
|
|
543
|
+
{"expr": {"col": "emp_with_avg.first_name"}},
|
|
544
|
+
{"expr": {"col": "emp_with_avg.last_name"}},
|
|
545
|
+
{"expr": {"col": "emp_with_avg.salary"}}
|
|
546
|
+
],
|
|
547
|
+
"FROM": {"table": "emp_with_avg"},
|
|
548
|
+
"WHERE": {"GT": [{"col": "emp_with_avg.salary"}, {"col": "emp_with_avg.avg_sal"}]},
|
|
549
|
+
"LIMIT": {"value": 50}
|
|
550
|
+
}
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
Dialect requirements: `.aggregations()`, `.subqueries()`, `.ctes()`, `.window_functions()`.
|
|
554
|
+
|
|
555
|
+
---
|
|
556
|
+
|
|
557
|
+
## Development
|
|
558
|
+
|
|
559
|
+
```bash
|
|
560
|
+
# Set up virtual environment and install all dev dependencies
|
|
561
|
+
make install
|
|
562
|
+
|
|
563
|
+
# Lint
|
|
564
|
+
make lint
|
|
565
|
+
|
|
566
|
+
# Auto-format
|
|
567
|
+
make fmt
|
|
568
|
+
|
|
569
|
+
# Type check
|
|
570
|
+
make typecheck
|
|
571
|
+
|
|
572
|
+
# Unit tests only (no database required)
|
|
573
|
+
make test-unit
|
|
574
|
+
|
|
575
|
+
# SQLite integration tests only (in-memory, no Docker)
|
|
576
|
+
make test-integration-sqlite
|
|
577
|
+
|
|
578
|
+
# PostgreSQL integration tests only (starts and stops Docker automatically)
|
|
579
|
+
make test-integration-postgres
|
|
580
|
+
|
|
581
|
+
# All tests - unit + SQLite + PostgreSQL + MySQL (requires Docker)
|
|
582
|
+
make test
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
---
|
|
586
|
+
|
|
587
|
+
## Repository layout
|
|
588
|
+
|
|
589
|
+
```
|
|
590
|
+
brickql/
|
|
591
|
+
schema/
|
|
592
|
+
expressions.py # Operator/operand enums and frozenset constants
|
|
593
|
+
operands.py # Typed operand models (ColumnOperand, ValueOperand, …) + Operand union
|
|
594
|
+
query_plan.py # QueryPlan Pydantic model + domain methods (collect_col_refs, …)
|
|
595
|
+
snapshot.py # SchemaSnapshot, TableInfo, ColumnInfo, RelationshipInfo
|
|
596
|
+
dialect.py # DialectProfile + DialectProfileBuilder (fluent API)
|
|
597
|
+
column_reference.py # ColumnReference - parse + validate table.column strings
|
|
598
|
+
context.py # ValidationContext value object (snapshot + dialect)
|
|
599
|
+
converters.py # schema_from_sqlalchemy() - reflect a live DB into SchemaSnapshot
|
|
600
|
+
validate/
|
|
601
|
+
validator.py # PlanValidator - orchestrates all sub-validators
|
|
602
|
+
dialect_validator.py # Feature-flag checks (CTE, subquery, join depth, window)
|
|
603
|
+
schema_validator.py # Table / column existence, JOIN relationship keys
|
|
604
|
+
semantic_validator.py # HAVING/GROUP_BY pairing, LIMIT range
|
|
605
|
+
operand_validator.py # OperandValidator + PredicateValidator (mutually recursive)
|
|
606
|
+
policy/
|
|
607
|
+
engine.py # PolicyEngine, PolicyConfig, TablePolicy
|
|
608
|
+
compile/
|
|
609
|
+
base.py # SQLCompiler ABC + CompiledSQL dataclass
|
|
610
|
+
registry.py # CompilerFactory + OperatorRegistry (OCP extension points)
|
|
611
|
+
context.py # CompilationContext value object (compiler + snapshot)
|
|
612
|
+
expression_builder.py # RuntimeContext + OperandBuilder + PredicateBuilder
|
|
613
|
+
clause_builders.py # SelectClause / From / Join / Window / CTE / SetOp builders
|
|
614
|
+
builder.py # QueryBuilder - orchestrates all sub-builders
|
|
615
|
+
postgres.py # PostgresCompiler (%(name)s placeholders, ILIKE, DATE_PART specialisation)
|
|
616
|
+
sqlite.py # SQLiteCompiler (:name placeholders, LIKE fallback)
|
|
617
|
+
mysql.py # MySQLCompiler (%(name)s placeholders, backtick identifiers, EXTRACT)
|
|
618
|
+
prompt/
|
|
619
|
+
builder.py # PromptBuilder + PromptComponents
|
|
620
|
+
errors.py # Exception hierarchy (brickQLError and subclasses)
|
|
621
|
+
docs/
|
|
622
|
+
how-it-works.mmd # Simple end-to-end flow (Mermaid)
|
|
623
|
+
how-it-works.excalidraw # Visual flow diagram (Excalidraw)
|
|
624
|
+
tests/
|
|
625
|
+
fixtures/ # schema.json, ddl_sqlite.sql, ddl_postgres.sql, ddl_mysql.sql
|
|
626
|
+
integration/ # SQLite (in-memory), PostgreSQL, and MySQL (Docker) integration tests
|
|
627
|
+
docker-compose.yml # PostgreSQL and MySQL services for integration tests
|
|
628
|
+
pyproject.toml # Package metadata, dependencies, ruff, mypy config
|
|
629
|
+
Makefile # Development task runner
|
|
630
|
+
```
|
|
631
|
+
|
|
632
|
+
---
|
|
633
|
+
|
|
634
|
+
## License
|
|
635
|
+
|
|
636
|
+
MIT - see [LICENSE](LICENSE).
|