brickql 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. brickql-0.1.7/LICENSE +21 -0
  2. brickql-0.1.7/PKG-INFO +636 -0
  3. brickql-0.1.7/README.md +569 -0
  4. brickql-0.1.7/brickql/__init__.py +218 -0
  5. brickql-0.1.7/brickql/compile/__init__.py +14 -0
  6. brickql-0.1.7/brickql/compile/base.py +112 -0
  7. brickql-0.1.7/brickql/compile/builder.py +214 -0
  8. brickql-0.1.7/brickql/compile/clause_builders.py +196 -0
  9. brickql-0.1.7/brickql/compile/context.py +26 -0
  10. brickql-0.1.7/brickql/compile/expression_builder.py +247 -0
  11. brickql-0.1.7/brickql/compile/mysql.py +74 -0
  12. brickql-0.1.7/brickql/compile/postgres.py +72 -0
  13. brickql-0.1.7/brickql/compile/registry.py +167 -0
  14. brickql-0.1.7/brickql/compile/sqlite.py +30 -0
  15. brickql-0.1.7/brickql/errors.py +181 -0
  16. brickql-0.1.7/brickql/policy/__init__.py +5 -0
  17. brickql-0.1.7/brickql/policy/engine.py +325 -0
  18. brickql-0.1.7/brickql/prompt/__init__.py +5 -0
  19. brickql-0.1.7/brickql/prompt/builder.py +298 -0
  20. brickql-0.1.7/brickql/schema/__init__.py +42 -0
  21. brickql-0.1.7/brickql/schema/column_reference.py +99 -0
  22. brickql-0.1.7/brickql/schema/context.py +26 -0
  23. brickql-0.1.7/brickql/schema/converters.py +188 -0
  24. brickql-0.1.7/brickql/schema/dialect.py +357 -0
  25. brickql-0.1.7/brickql/schema/expressions.py +184 -0
  26. brickql-0.1.7/brickql/schema/operands.py +179 -0
  27. brickql-0.1.7/brickql/schema/query_plan.py +332 -0
  28. brickql-0.1.7/brickql/schema/snapshot.py +142 -0
  29. brickql-0.1.7/brickql/validate/__init__.py +5 -0
  30. brickql-0.1.7/brickql/validate/dialect_validator.py +85 -0
  31. brickql-0.1.7/brickql/validate/operand_validator.py +274 -0
  32. brickql-0.1.7/brickql/validate/schema_validator.py +82 -0
  33. brickql-0.1.7/brickql/validate/semantic_validator.py +60 -0
  34. brickql-0.1.7/brickql/validate/validator.py +196 -0
  35. brickql-0.1.7/brickql.egg-info/PKG-INFO +636 -0
  36. brickql-0.1.7/brickql.egg-info/SOURCES.txt +44 -0
  37. brickql-0.1.7/brickql.egg-info/dependency_links.txt +1 -0
  38. brickql-0.1.7/brickql.egg-info/requires.txt +25 -0
  39. brickql-0.1.7/brickql.egg-info/top_level.txt +1 -0
  40. brickql-0.1.7/pyproject.toml +165 -0
  41. brickql-0.1.7/setup.cfg +4 -0
  42. brickql-0.1.7/tests/test_compile.py +423 -0
  43. brickql-0.1.7/tests/test_converters.py +473 -0
  44. brickql-0.1.7/tests/test_policy.py +410 -0
  45. brickql-0.1.7/tests/test_prompt.py +260 -0
  46. brickql-0.1.7/tests/test_validate.py +645 -0
brickql-0.1.7/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 brickQL Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
brickql-0.1.7/PKG-INFO ADDED
@@ -0,0 +1,636 @@
1
+ Metadata-Version: 2.4
2
+ Name: brickql
3
+ Version: 0.1.7
4
+ Summary: Policy-driven, SQL-standard-aligned query orchestration framework for LLMs.
5
+ License: MIT License
6
+
7
+ Copyright (c) 2026 brickQL Contributors
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+
27
+ Project-URL: Homepage, https://brickql.org
28
+ Project-URL: Documentation, https://brickql.org
29
+ Project-URL: Source, https://github.com/flyingraptor/BrickQL
30
+ Project-URL: Bug Tracker, https://github.com/flyingraptor/BrickQL/issues
31
+ Keywords: sql,llm,query,orchestration,policy,ai,nlp,rag,pydantic,sqlite,postgresql
32
+ Classifier: Programming Language :: Python :: 3
33
+ Classifier: Programming Language :: Python :: 3.10
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Operating System :: OS Independent
38
+ Classifier: Intended Audience :: Developers
39
+ Classifier: Topic :: Database
40
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
41
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
42
+ Classifier: Development Status :: 4 - Beta
43
+ Requires-Python: >=3.10
44
+ Description-Content-Type: text/markdown
45
+ License-File: LICENSE
46
+ Requires-Dist: pydantic>=2.12
47
+ Provides-Extra: postgres
48
+ Requires-Dist: psycopg[binary]>=3.3; extra == "postgres"
49
+ Provides-Extra: mysql
50
+ Requires-Dist: pymysql>=1.1; extra == "mysql"
51
+ Provides-Extra: sqlalchemy
52
+ Requires-Dist: sqlalchemy>=2.0; extra == "sqlalchemy"
53
+ Provides-Extra: examples
54
+ Requires-Dist: langchain>=1.2.10; extra == "examples"
55
+ Requires-Dist: langchain-ollama>=1.0.1; extra == "examples"
56
+ Provides-Extra: dev
57
+ Requires-Dist: pytest>=9.0; extra == "dev"
58
+ Requires-Dist: pytest-docker>=3.2; extra == "dev"
59
+ Requires-Dist: psycopg[binary]>=3.3; extra == "dev"
60
+ Requires-Dist: sqlalchemy>=2.0; extra == "dev"
61
+ Requires-Dist: ruff>=0.15; extra == "dev"
62
+ Requires-Dist: mypy>=1.19; extra == "dev"
63
+ Requires-Dist: pydantic>=2.12; extra == "dev"
64
+ Requires-Dist: langchain>=1.2.10; extra == "dev"
65
+ Requires-Dist: langchain-ollama>=1.0.1; extra == "dev"
66
+ Dynamic: license-file
67
+
68
+ <p align="center">
69
+ <img src="logo.png" alt="brickQL" width="420" />
70
+ </p>
71
+
72
+ # Text to brickQL
73
+
74
+ **Policy-driven, SQL-standard-aligned query orchestration for LLMs.**
75
+
76
+ > The Building Blocks of Safe SQL.
77
+
78
+ brickQL separates concerns cleanly: the LLM outputs a structured **QueryPlan (JSON)**; brickQL validates it against your schema, enforces policy rules, and compiles it to safe, parameterized SQL. Raw SQL never generated by the LLM.
79
+
80
+ ---
81
+
82
+ ## Why not Text-to-SQL?
83
+
84
+ Text-to-SQL is a well-established approach: feed the LLM a natural-language question and a schema, and let it write the SQL directly. It works well for simple queries and controlled environments, but breaks down as soon as real-world constraints appear:
85
+
86
+ | Challenge | Why it hurts | How brickQL fixes it |
87
+ |---|---|---|
88
+ | **Hallucinated syntax** | LLMs invent column names, functions, or dialect-specific constructs that don't exist in your database, causing runtime errors that are hard to debug at scale. | The LLM outputs a typed, structured `QueryPlan` JSON. Free-form SQL is structurally impossible. brickQL compiles it to parameterized SQL with no string interpolation. |
89
+ | **No policy enforcement** | There is no layer between the generated SQL and the database. Row-level filters, param-bound column constraints, and column allowlists must be bolted on externally, and can silently fail. | Per-table param-bound column enforcement, column allowlists (RBAC), and denied columns. Any column can be bound to a runtime parameter; the LLM cannot bypass the predicate. |
90
+ | **Prompt-injection surface** | The LLM sees and reasons over raw SQL strings. Malicious content in user input or database values can redirect the query, exfiltrate data, or trigger destructive operations. | Implements Plan-Then-Execute: the LLM commits to a structured `QueryPlan` before any data is returned. Database contents can never inject new instructions. |
91
+ | **Non-deterministic repairs** | When a query fails, the LLM must re-generate free-form SQL, each attempt is a new coin flip with the same attack surface. | All errors are machine-readable: a typed exception hierarchy with `code` and `details` fields. `to_error_response()` feeds structured error context back to the LLM for targeted repair. |
92
+ | **Dialect fragility** | SQL is not a single language. A query that works on PostgreSQL may silently mis-behave on SQLite or MySQL; the LLM has no mechanism to stay within a safe dialect subset. | Opt in to only the SQL features you need via `DialectProfile`. A fluent builder with dependency enforcement at `build()` time. |
93
+ | **Target DB awareness** | The LLM must know which database it's targeting and produce the correct syntax — date functions, quoting, pagination, and type casting all differ across engines. | Built-in compilers for SQLite, PostgreSQL, and MySQL. Add any other target by registering a `SQLCompiler` subclass with one decorator, no core changes. |
94
+
95
+ ---
96
+
97
+ ## How it works
98
+
99
+ <p align="center">
100
+ <img src="docs/how-it-works.png" alt="brickQL flow diagram" width="500" />
101
+ </p>
102
+
103
+ ---
104
+
105
+ ## Security model
106
+
107
+ brickQL implements the design patterns recommended for SQL agents in [*Design Patterns for Securing LLM Agents against Prompt Injections*](https://arxiv.org/abs/2506.08837) (Beurer-Kellner et al., 2025).
108
+
109
+ Among its ten case studies, §4.2 examines SQL agents under a threat model where the attacker can control the input query or the database content, with goals ranging from unauthorized data extraction to remote code execution. That case study concludes that the **Plan-Then-Execute** pattern is the correct baseline, and Appendix A identifies **strict output formatting** and **least-privilege access control** as mandatory best practices across all agent types. brickQL maps each of these directly to code:
110
+
111
+ | Paper recommendation | brickQL implementation |
112
+ |---|---|
113
+ | **Plan-Then-Execute** - LLM commits to a query plan *before* any database data is returned to it, so database contents can never inject new instructions | The LLM outputs a `QueryPlan` JSON; brickQL validates and compiles it to SQL without ever feeding query results back to the LLM |
114
+ | **Strict output formatting** - constrain the LLM to a well-specified format rather than free-form SQL | `QueryPlan` is a typed Pydantic model; free-form SQL is structurally impossible |
115
+ | **Least-privilege access control** - restrict tables, columns, and operations to exactly what the role needs | `DialectProfile` allowlists tables and SQL features; `PolicyConfig` / `TablePolicy` enforce per-table column allowlists, deny lists, and param-bound columns |
116
+ | **Parameterized execution** - prevent SQL injection from literal values in the plan | All `{"value": …}` operands are compiled to named placeholders; no string interpolation occurs anywhere in the compilation path |
117
+
118
+ The OR-bypass hardening in `PolicyEngine._where_satisfies_param` (which ensures a param-bound column cannot be satisfied by placing the required predicate inside an `OR` branch) and `to_error_response()` (which serializes error context to a self-contained JSON string before it is embedded in an LLM repair prompt, preventing plan content from injecting new instructions) are direct responses to security risks identified through the paper's threat model.
119
+
120
+ ---
121
+
122
+ ## Installation
123
+
124
+ ```bash
125
+ # Core library (SQLite only)
126
+ pip install brickql
127
+
128
+ # With PostgreSQL driver (psycopg v3)
129
+ pip install "brickql[postgres]"
130
+
131
+ # With MySQL driver (PyMySQL)
132
+ pip install "brickql[mysql]"
133
+
134
+ # With SQLAlchemy schema reflector
135
+ pip install "brickql[sqlalchemy]"
136
+ ```
137
+
138
+ Requires Python ≥ 3.10.
139
+
140
+ ---
141
+
142
+ ## Quick start
143
+
144
+ ```python
145
+ import brickql
146
+ from brickql import SchemaSnapshot, DialectProfile, PolicyConfig, TablePolicy
147
+
148
+ # 1. Load your schema snapshot (describes tables, columns, relationships)
149
+ import json
150
+ snapshot = SchemaSnapshot.model_validate(json.loads(open("schema.json").read()))
151
+
152
+ # 2. Choose a dialect profile (compose exactly the features you need)
153
+ dialect = (
154
+ DialectProfile.builder(["employees", "departments"], target="postgres")
155
+ .joins()
156
+ .aggregations()
157
+ .build()
158
+ )
159
+
160
+ # 3. Configure policy (tenant isolation, row limits)
161
+ policy = PolicyConfig(
162
+ inject_missing_params=True, # auto-inject tenant_id predicates
163
+ default_limit=100,
164
+ tables={
165
+ "employees": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
166
+ "departments": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
167
+ },
168
+ )
169
+
170
+ # 4. Compile the LLM's QueryPlan JSON
171
+ plan_json = llm_response # {"SELECT": [...], "FROM": {...}, "JOIN": [...], ...}
172
+
173
+ compiled = brickql.validate_and_compile(plan_json, snapshot, dialect, policy)
174
+
175
+ # 5. Execute with your own connection - brickQL does not execute queries
176
+ cursor.execute(compiled.sql, compiled.merge_runtime_params({"TENANT": tenant_id}))
177
+ ```
178
+
179
+ ---
180
+
181
+ ## Key concepts
182
+
183
+ ### QueryPlan JSON
184
+
185
+ The only output the LLM must produce. A structured, SQL-grammar-aligned JSON object - never raw SQL.
186
+
187
+ ```json
188
+ {
189
+ "SELECT": [
190
+ {"expr": {"col": "employees.first_name"}},
191
+ {"expr": {"col": "departments.name"}, "alias": "dept"}
192
+ ],
193
+ "FROM": {"table": "employees"},
194
+ "JOIN": [{"rel": "departments__employees", "type": "LEFT"}],
195
+ "WHERE": {"EQ": [{"col": "employees.active"}, {"value": true}]},
196
+ "LIMIT": {"value": 50}
197
+ }
198
+ ```
199
+
200
+ Each expression field (`expr`, `GROUP_BY` items, `ORDER_BY` items, window `partition_by`) is parsed into a **typed operand** automatically by Pydantic:
201
+
202
+ | JSON shape | Python type |
203
+ |---|---|
204
+ | `{"col": "t.col"}` | `ColumnOperand(col="t.col")` |
205
+ | `{"value": 42}` | `ValueOperand(value=42)` |
206
+ | `{"param": "TENANT"}` | `ParamOperand(param="TENANT")` |
207
+ | `{"func": "COUNT", "args": [...]}` | `FuncOperand(func="COUNT", args=[...])` |
208
+ | `{"case": {"when": [...], "else": ...}}` | `CaseOperand(case=CaseBody(...))` |
209
+
210
+ These types are importable if you need to inspect or construct plans programmatically:
211
+
212
+ ```python
213
+ from brickql import ColumnOperand, ValueOperand, ParamOperand, FuncOperand, CaseOperand, Operand
214
+ ```
215
+
216
+ ### SchemaSnapshot
217
+
218
+ Describes your database structure: tables, columns (name, type, nullability), and named relationships. It is purely structural - no policy or access-control concerns. Loaded once at startup and shared across requests.
219
+
220
+ Both `TableInfo` and `ColumnInfo` accept an optional `description` field. When present, descriptions are included in the LLM system prompt so the model can make better join and filter decisions without guessing from column names alone.
221
+
222
+ ```python
223
+ snapshot = SchemaSnapshot.model_validate({
224
+ "tables": [
225
+ {
226
+ "name": "employees",
227
+ "description": "One row per employee. Joined to departments via department_id.",
228
+ "columns": [
229
+ {"name": "employee_id", "type": "INTEGER", "nullable": False},
230
+ {"name": "tenant_id", "type": "TEXT", "nullable": False},
231
+ {"name": "status", "type": "TEXT", "nullable": True,
232
+ "description": "Employment status. Values: ACTIVE, TERMINATED, ON_LEAVE."},
233
+ ],
234
+ "relationships": ["departments__employees"]
235
+ }
236
+ ],
237
+ "relationships": [
238
+ {"key": "departments__employees", "from_table": "employees",
239
+ "from_col": "department_id", "to_table": "departments", "to_col": "department_id"}
240
+ ]
241
+ })
242
+ ```
243
+
244
+ > **Note** - `tenant_id` is just a regular column in the snapshot. Which columns
245
+ > require runtime parameters and what those params are named is configured in
246
+ > `PolicyConfig` via `TablePolicy`, not in the schema.
247
+
248
+ #### Reflecting a schema from a live database
249
+
250
+ Use `schema_from_sqlalchemy` to populate a `SchemaSnapshot` directly from an existing database instead of writing the JSON by hand:
251
+
252
+ ```python
253
+ from sqlalchemy import create_engine
254
+ from brickql import schema_from_sqlalchemy
255
+
256
+ engine = create_engine("postgresql+psycopg://user:pass@localhost:5432/mydb")
257
+ snapshot = schema_from_sqlalchemy(engine)
258
+ ```
259
+
260
+ `schema_from_sqlalchemy` requires the `sqlalchemy` optional dependency:
261
+
262
+ ```bash
263
+ pip install "brickql[sqlalchemy]"
264
+ ```
265
+
266
+ The reflected snapshot is a starting point - add `description` fields and manually define any relationships that naming heuristics cannot detect, then save it to a JSON file for inspection and version control.
267
+
268
+ ### DialectProfile - builder
269
+
270
+ Compose exactly the SQL features you need. Each method is independent - no hidden stacking, no implicit dependencies:
271
+
272
+ | Builder method | SQL capabilities unlocked | Requires |
273
+ |---|---|---|
274
+ | *(base)* | Single-table `SELECT` / `WHERE` / `LIMIT` | - |
275
+ | `.joins(max_join_depth=2)` | `JOIN` (inner, left, self-referential, many-to-many), `ORDER BY`, `OFFSET`, `ILIKE` | - |
276
+ | `.aggregations()` | `GROUP BY` / `HAVING` / `COUNT` `SUM` `AVG` `MIN` `MAX` / `CASE` | - |
277
+ | `.scalar_functions(*funcs)` | Additional scalar functions by name (e.g. `DATE_PART`, `COALESCE`) | - |
278
+ | `.subqueries()` | `EXISTS`, correlated and derived-table subqueries | - |
279
+ | `.ctes()` | `WITH` / `WITH RECURSIVE` - CTEs (Common Table Expressions: named temporary result sets scoped to the query) | **`.subqueries()`** |
280
+ | `.set_operations()` | `UNION` / `UNION ALL` / `INTERSECT` / `EXCEPT` | - |
281
+ | `.window_functions()` | `ROW_NUMBER`, `RANK`, `LAG`, `LEAD`, `OVER`, `PARTITION BY` + aggregate window functions | **`.aggregations()`** |
282
+
283
+ `.scalar_functions()` is additive and can be chained with any other method:
284
+
285
+ ```python
286
+ profile = (
287
+ DialectProfile.builder(tables, target="postgres")
288
+ .aggregations()
289
+ .scalar_functions("DATE_PART", "COALESCE")
290
+ .ctes()
291
+ .subqueries()
292
+ .build()
293
+ )
294
+ ```
295
+
296
+ Dependencies are enforced at `build()` time with a `ProfileConfigError` and a clear message.
297
+
298
+ ```python
299
+ # Joins + aggregations only
300
+ profile = (
301
+ DialectProfile.builder(tables, target="postgres")
302
+ .joins(max_join_depth=2)
303
+ .aggregations()
304
+ .build()
305
+ )
306
+
307
+ # Window functions without join support
308
+ profile = (
309
+ DialectProfile.builder(tables, target="sqlite")
310
+ .aggregations()
311
+ .window_functions()
312
+ .build()
313
+ )
314
+
315
+ # Everything
316
+ profile = (
317
+ DialectProfile.builder(tables)
318
+ .joins()
319
+ .aggregations()
320
+ .subqueries()
321
+ .ctes()
322
+ .set_operations()
323
+ .window_functions()
324
+ .build()
325
+ )
326
+ ```
327
+
328
+ ### PolicyConfig and TablePolicy
329
+
330
+ `PolicyConfig` controls the overall request policy. `TablePolicy` configures
331
+ per-table rules - each table can have its own param-bound columns, a positive
332
+ column allowlist, and/or a denied column list.
333
+
334
+ ```python
335
+ from brickql import PolicyConfig, TablePolicy
336
+
337
+ policy = PolicyConfig(
338
+ inject_missing_params=True,
339
+ default_limit=200,
340
+ tables={
341
+ "companies": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
342
+ "departments": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
343
+ "employees": TablePolicy(
344
+ param_bound_columns={"tenant_id": "TENANT"},
345
+ denied_columns=["salary"],
346
+ ),
347
+ "projects": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
348
+ },
349
+ )
350
+ ```
351
+
352
+ #### Column allowlist - RBAC pattern
353
+
354
+ `allowed_columns` is a **positive allowlist**: when non-empty, only the listed
355
+ columns may appear in any plan referencing that table. This maps directly to
356
+ RBAC grant patterns where a role should see only a specific subset of columns,
357
+ without having to enumerate every other column in a blocklist.
358
+
359
+ ```python
360
+ analyst_policy = PolicyConfig(
361
+ inject_missing_params=True,
362
+ tables={
363
+ "employees": TablePolicy(
364
+ param_bound_columns={"tenant_id": "TENANT"},
365
+ allowed_columns=[
366
+ "employee_id", "first_name", "last_name",
367
+ "department_id", "hire_date", "active",
368
+ ],
369
+ ),
370
+ },
371
+ )
372
+ ```
373
+
374
+ `denied_columns` (per-table or global) is subtracted from `allowed_columns`
375
+ when both are set, so you can always enforce a hard blocklist on top. An empty
376
+ `allowed_columns` (the default) means all snapshot columns are permitted,
377
+ subject only to `denied_columns`.
378
+
379
+ Different tables can use **different param names**:
380
+
381
+ ```python
382
+ policy = PolicyConfig(
383
+ tables={
384
+ "employees": TablePolicy(param_bound_columns={"tenant_id": "TENANT"}),
385
+ "audit_log": TablePolicy(param_bound_columns={"org_id": "ORG"}),
386
+ }
387
+ )
388
+ params = compiled.merge_runtime_params({"TENANT": "acme", "ORG": "acme-org-42"})
389
+ ```
390
+
391
+ ### CompiledSQL
392
+
393
+ The output of `validate_and_compile`. Contains the parameterized SQL string and a `params` dict. Runtime parameters (e.g. `TENANT`) are merged in before execution:
394
+
395
+ ```python
396
+ sql_params = compiled.merge_runtime_params({"TENANT": "acme"})
397
+ cursor.execute(compiled.sql, sql_params)
398
+ ```
399
+
400
+ ---
401
+
402
+ ## Prompting the LLM
403
+
404
+ ```python
405
+ components = brickql.get_prompt_components(
406
+ snapshot=snapshot,
407
+ dialect=dialect,
408
+ question="List the top 5 highest-paid employees in Engineering",
409
+ policy_summary='Always filter by tenant_id using {"param": "TENANT"}.',
410
+ )
411
+
412
+ # Send to your LLM
413
+ response = llm.chat(system=components.system_prompt, user=components.user_prompt)
414
+ ```
415
+
416
+ ---
417
+
418
+ ## Error handling
419
+
420
+ All errors are subclasses of `brickQLError`. `ParseError` and `PolicyViolationError` both expose `to_error_response()` which returns a ready-to-embed JSON string for LLM repair loops.
421
+
422
+ ```python
423
+ from brickql import ParseError, PolicyViolationError, ValidationError, CompilationError
424
+
425
+ try:
426
+ compiled = brickql.validate_and_compile(plan_json, snapshot, dialect, policy)
427
+ except ParseError as e:
428
+ # Malformed JSON - e.to_error_response() returns a JSON string for LLM repair
429
+ pass
430
+ except PolicyViolationError as e:
431
+ # Policy rule violated (denied column, missing param, disallowed table)
432
+ # e.to_error_response() returns a JSON string with code + details for LLM repair
433
+ pass
434
+ except ValidationError as e:
435
+ # Schema or dialect rule violated
436
+ pass
437
+ except CompilationError as e:
438
+ raise
439
+ ```
440
+
441
+ ---
442
+
443
+ ## Extensibility
444
+
445
+ ### Adding a new dialect
446
+
447
+ Register a custom `SQLCompiler` subclass once; `validate_and_compile` picks it up automatically for any `DialectProfile` with that target:
448
+
449
+ ```python
450
+ from brickql.compile.base import SQLCompiler
451
+ from brickql.compile.registry import CompilerFactory
452
+
453
+ @CompilerFactory.register("mysql")
454
+ class MySQLCompiler(SQLCompiler):
455
+ @property
456
+ def dialect_name(self) -> str:
457
+ return "mysql"
458
+
459
+ def param_placeholder(self, name: str) -> str:
460
+ return f"%({name})s"
461
+
462
+ def like_operator(self, op: str) -> str:
463
+ return op
464
+
465
+ def quote_identifier(self, name: str) -> str:
466
+ return f"`{name}`"
467
+
468
+ # Now you can use target="mysql" in DialectProfile.builder(...)
469
+ ```
470
+
471
+ #### Customising function compilation per dialect
472
+
473
+ Override `build_func_call` to control how specific functions are rendered for your dialect - inline literal args, add type casts, rename functions, etc. The default renders `FUNC(arg1, arg2, …)`:
474
+
475
+ ```python
476
+ from typing import Any, Callable
477
+
478
+ @CompilerFactory.register("mysql")
479
+ class MySQLCompiler(SQLCompiler):
480
+ # ... required abstract methods ...
481
+
482
+ def build_func_call(
483
+ self,
484
+ func_name: str,
485
+ args: list[Any],
486
+ build_arg: Callable[[Any], str],
487
+ ) -> str:
488
+ if func_name.upper() == "DATE_PART":
489
+ # MySQL uses YEAR(col) instead of DATE_PART('year', col)
490
+ return f"YEAR({build_arg(args[1])})"
491
+ return super().build_func_call(func_name, args, build_arg)
492
+ ```
493
+
494
+ `build_arg` is a callback that compiles a single typed `Operand` to SQL, so the full operand chain (column quoting, param binding, nested functions) works correctly for any arg you forward.
495
+
496
+ ### Adding a new operator
497
+
498
+ Register a rendering handler; `OperatorRegistry` wires it in without touching the built-in `PredicateBuilder`:
499
+
500
+ ```python
501
+ from brickql.compile.registry import OperatorRegistry
502
+
503
+ @OperatorRegistry.register("REGEXP")
504
+ def _regexp_handler(op, args, build_operand):
505
+ left = build_operand(args[0])
506
+ right = build_operand(args[1])
507
+ return f"{left} REGEXP {right}"
508
+ ```
509
+
510
+ ---
511
+
512
+ ## Known limitations
513
+
514
+ | Limitation | Workaround |
515
+ |---|---|
516
+ | **Scalar subqueries in comparison operators** - `salary > (SELECT AVG(salary) …)` is not a supported operand type. | Use a window-function CTE: compute `AVG(salary) OVER ()` inside the CTE so every row carries the aggregate, then filter on that result column in the outer query. See the example below. |
517
+ | **JOIN alias column references** - column references in SELECT / WHERE must use the original table name, not a JOIN alias. The exception is CTE names, which can be used as table qualifiers. | Use the real table name in all column references; aliases are only for output renaming. |
518
+
519
+ **Scalar subquery workaround - window-function CTE**
520
+
521
+ Goal: *list employees whose salary is above the overall average.*
522
+
523
+ ```json
524
+ {
525
+ "CTE": [{
526
+ "name": "emp_with_avg",
527
+ "query": {
528
+ "SELECT": [
529
+ {"expr": {"col": "employees.first_name"}},
530
+ {"expr": {"col": "employees.last_name"}},
531
+ {"expr": {"col": "employees.salary"}},
532
+ {
533
+ "expr": {"func": "AVG", "args": [{"col": "employees.salary"}]},
534
+ "alias": "avg_sal",
535
+ "over": {"partition_by": []}
536
+ }
537
+ ],
538
+ "FROM": {"table": "employees"},
539
+ "WHERE": {"EQ": [{"col": "employees.tenant_id"}, {"param": "TENANT"}]}
540
+ }
541
+ }],
542
+ "SELECT": [
543
+ {"expr": {"col": "emp_with_avg.first_name"}},
544
+ {"expr": {"col": "emp_with_avg.last_name"}},
545
+ {"expr": {"col": "emp_with_avg.salary"}}
546
+ ],
547
+ "FROM": {"table": "emp_with_avg"},
548
+ "WHERE": {"GT": [{"col": "emp_with_avg.salary"}, {"col": "emp_with_avg.avg_sal"}]},
549
+ "LIMIT": {"value": 50}
550
+ }
551
+ ```
552
+
553
+ Dialect requirements: `.aggregations()`, `.subqueries()`, `.ctes()`, `.window_functions()`.
554
+
555
+ ---
556
+
557
+ ## Development
558
+
559
+ ```bash
560
+ # Set up virtual environment and install all dev dependencies
561
+ make install
562
+
563
+ # Lint
564
+ make lint
565
+
566
+ # Auto-format
567
+ make fmt
568
+
569
+ # Type check
570
+ make typecheck
571
+
572
+ # Unit tests only (no database required)
573
+ make test-unit
574
+
575
+ # SQLite integration tests only (in-memory, no Docker)
576
+ make test-integration-sqlite
577
+
578
+ # PostgreSQL integration tests only (starts and stops Docker automatically)
579
+ make test-integration-postgres
580
+
581
+ # All tests - unit + SQLite + PostgreSQL + MySQL (requires Docker)
582
+ make test
583
+ ```
584
+
585
+ ---
586
+
587
+ ## Repository layout
588
+
589
+ ```
590
+ brickql/
591
+ schema/
592
+ expressions.py # Operator/operand enums and frozenset constants
593
+ operands.py # Typed operand models (ColumnOperand, ValueOperand, …) + Operand union
594
+ query_plan.py # QueryPlan Pydantic model + domain methods (collect_col_refs, …)
595
+ snapshot.py # SchemaSnapshot, TableInfo, ColumnInfo, RelationshipInfo
596
+ dialect.py # DialectProfile + DialectProfileBuilder (fluent API)
597
+ column_reference.py # ColumnReference - parse + validate table.column strings
598
+ context.py # ValidationContext value object (snapshot + dialect)
599
+ converters.py # schema_from_sqlalchemy() - reflect a live DB into SchemaSnapshot
600
+ validate/
601
+ validator.py # PlanValidator - orchestrates all sub-validators
602
+ dialect_validator.py # Feature-flag checks (CTE, subquery, join depth, window)
603
+ schema_validator.py # Table / column existence, JOIN relationship keys
604
+ semantic_validator.py # HAVING/GROUP_BY pairing, LIMIT range
605
+ operand_validator.py # OperandValidator + PredicateValidator (mutually recursive)
606
+ policy/
607
+ engine.py # PolicyEngine, PolicyConfig, TablePolicy
608
+ compile/
609
+ base.py # SQLCompiler ABC + CompiledSQL dataclass
610
+ registry.py # CompilerFactory + OperatorRegistry (OCP extension points)
611
+ context.py # CompilationContext value object (compiler + snapshot)
612
+ expression_builder.py # RuntimeContext + OperandBuilder + PredicateBuilder
613
+ clause_builders.py # SelectClause / From / Join / Window / CTE / SetOp builders
614
+ builder.py # QueryBuilder - orchestrates all sub-builders
615
+ postgres.py # PostgresCompiler (%(name)s placeholders, ILIKE, DATE_PART specialisation)
616
+ sqlite.py # SQLiteCompiler (:name placeholders, LIKE fallback)
617
+ mysql.py # MySQLCompiler (%(name)s placeholders, backtick identifiers, EXTRACT)
618
+ prompt/
619
+ builder.py # PromptBuilder + PromptComponents
620
+ errors.py # Exception hierarchy (brickQLError and subclasses)
621
+ docs/
622
+ how-it-works.mmd # Simple end-to-end flow (Mermaid)
623
+ how-it-works.excalidraw # Visual flow diagram (Excalidraw)
624
+ tests/
625
+ fixtures/ # schema.json, ddl_sqlite.sql, ddl_postgres.sql, ddl_mysql.sql
626
+ integration/ # SQLite (in-memory), PostgreSQL, and MySQL (Docker) integration tests
627
+ docker-compose.yml # PostgreSQL and MySQL services for integration tests
628
+ pyproject.toml # Package metadata, dependencies, ruff, mypy config
629
+ Makefile # Development task runner
630
+ ```
631
+
632
+ ---
633
+
634
+ ## License
635
+
636
+ MIT - see [LICENSE](LICENSE).