aetherdialect 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {aetherdialect-0.1.0/src/aetherdialect.egg-info → aetherdialect-0.1.1}/PKG-INFO +24 -26
  2. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/README.md +20 -23
  3. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/pyproject.toml +15 -4
  4. {aetherdialect-0.1.0 → aetherdialect-0.1.1/src/aetherdialect.egg-info}/PKG-INFO +24 -26
  5. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/aetherdialect.egg-info/SOURCES.txt +2 -2
  6. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/aetherdialect.egg-info/requires.txt +3 -2
  7. aetherdialect-0.1.1/src/text2sql/__init__.py +10 -0
  8. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/config.py +253 -97
  9. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/contracts_base.py +1132 -952
  10. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/contracts_core.py +735 -180
  11. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/core_utils.py +936 -834
  12. aetherdialect-0.1.1/src/text2sql/dialect.py +2307 -0
  13. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/expansion_ops.py +1593 -1218
  14. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/intent_expr.py +514 -317
  15. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/intent_process.py +670 -602
  16. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/intent_repair.py +931 -475
  17. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/intent_resolve.py +230 -151
  18. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/live_testing.py +500 -368
  19. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/main_execution.py +142 -155
  20. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/pipeline.py +780 -397
  21. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/qsim_ops.py +1261 -1286
  22. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/qsim_sample.py +793 -609
  23. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/qsim_struct.py +525 -569
  24. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/schema.py +471 -224
  25. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/schema_profiling.py +589 -535
  26. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/simulator.py +271 -233
  27. aetherdialect-0.1.1/src/text2sql/sql_gen.py +2288 -0
  28. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/templates.py +297 -311
  29. aetherdialect-0.1.1/src/text2sql/text2sql.py +856 -0
  30. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/utils.py +115 -106
  31. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/validation_agg.py +1025 -1033
  32. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/validation_execute.py +174 -152
  33. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/validation_schema.py +775 -193
  34. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/text2sql/validation_semantic.py +2109 -2122
  35. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_config.py +51 -65
  36. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_contracts.py +121 -113
  37. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_core_utils.py +27 -28
  38. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_dialect.py +145 -91
  39. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_expansion_ops.py +92 -78
  40. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_intent_expr.py +138 -59
  41. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_intent_process.py +365 -172
  42. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_intent_repair.py +257 -74
  43. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_intent_resolve.py +73 -100
  44. aetherdialect-0.1.1/tests/test_join_bool_cte_matrix.py +383 -0
  45. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_live_testing.py +102 -42
  46. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_main_execution.py +275 -277
  47. aetherdialect-0.1.1/tests/test_pipeline_targeted.py +136 -0
  48. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_pipeline_units.py +517 -36
  49. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_qsim_ops.py +7 -8
  50. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_qsim_sample.py +3 -6
  51. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_qsim_struct.py +2 -4
  52. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_schema.py +46 -14
  53. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_schema_profiling.py +109 -72
  54. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_simulator.py +29 -6
  55. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_simulator_pipeline.py +286 -248
  56. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_sql_gen.py +488 -59
  57. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_templates.py +191 -143
  58. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_text2sql.py +237 -240
  59. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_utils.py +39 -42
  60. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_validation_agg.py +7 -14
  61. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_validation_execute.py +72 -86
  62. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_validation_schema.py +126 -8
  63. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/tests/test_validation_semantic.py +30 -59
  64. aetherdialect-0.1.0/src/text2sql/__init__.py +0 -7
  65. aetherdialect-0.1.0/src/text2sql/dialect.py +0 -1134
  66. aetherdialect-0.1.0/src/text2sql/expansion_rules.py +0 -496
  67. aetherdialect-0.1.0/src/text2sql/sql_gen.py +0 -1537
  68. aetherdialect-0.1.0/src/text2sql/text2sql.py +0 -726
  69. aetherdialect-0.1.0/tests/test_expansion_rules.py +0 -434
  70. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/LICENSE +0 -0
  71. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/setup.cfg +0 -0
  72. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/aetherdialect.egg-info/dependency_links.txt +0 -0
  73. {aetherdialect-0.1.0 → aetherdialect-0.1.1}/src/aetherdialect.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aetherdialect
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Deterministic, validation-first Text-to-SQL system for business databases
5
5
  Author-email: Akul Ameya <akul.ameya@gmail.com>
6
6
  License: MIT
@@ -10,14 +10,15 @@ Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: jsonschema<5,>=4.0
12
12
  Requires-Dist: openai<3,>=2.0.0
13
- Requires-Dist: sqlglot<30,>=29.0
14
13
  Requires-Dist: platformdirs<5,>=2.0.0
15
14
  Requires-Dist: python-dotenv<2,>=1.0.0
15
+ Requires-Dist: SQLAlchemy<3,>=2.0
16
16
  Provides-Extra: databricks
17
+ Requires-Dist: sqlglot<30,>=29.0; extra == "databricks"
17
18
  Requires-Dist: pyspark<4,>=3.3; extra == "databricks"
18
19
  Requires-Dist: databricks-sql-connector<4,>=3.0; extra == "databricks"
20
+ Requires-Dist: databricks-sqlalchemy<3,>=2.0; extra == "databricks"
19
21
  Provides-Extra: postgresql
20
- Requires-Dist: SQLAlchemy<3,>=2.0; extra == "postgresql"
21
22
  Requires-Dist: psycopg2-binary<3,>=2.9; extra == "postgresql"
22
23
  Requires-Dist: pglast<8,>=5.0; extra == "postgresql"
23
24
  Provides-Extra: dev
@@ -44,14 +45,15 @@ pip install "text2sql[databricks]"
44
45
  pip install "text2sql[postgresql,databricks]"
45
46
  ```
46
47
 
47
- Requires Python ≥ 3.10 and an [OpenAI API key](https://platform.openai.com/api-keys).
48
+ Requires Python ≥ 3.10 and either an [OpenAI API key](https://platform.openai.com/api-keys) or Azure OpenAI credentials.
48
49
 
49
- | Extra | Brings in | Use when |
50
- | ------------ | ---------------------------------------------- | --------------------------- |
51
- | `postgresql` | SQLAlchemy, PostgreSQL driver, `pglast` | `engine="postgresql"` |
52
- | `databricks` | PySpark, Databricks SQL connector | `engine="databricks"` |
50
+ | Extra | Brings in | Use when |
51
+ | ------------ | ------------------------------------------------------------------------- | --------------------- |
52
+ | (base) | **SQLAlchemy** (shared introspection / execution interface) | Always installed |
53
+ | `postgresql` | PostgreSQL driver (`psycopg2-binary`), **`pglast`** | `engine="postgresql"` |
54
+ | `databricks` | PySpark, Databricks SQL connector, **`databricks-sqlalchemy`**, `sqlglot` | `engine="databricks"` |
53
55
 
54
- **SQL parsing for validation:** PostgreSQL uses **`pglast`** for structural AST checks (join pairs, CTE bodies, `ast_validate`). Databricks / Spark SQL uses **`sqlglot`** with the **Spark** dialect. The base package already depends on `sqlglot`; `pglast` is installed with the `postgresql` extra.
56
+ **SQL parsing for validation:** PostgreSQL uses **`pglast`** for structural AST checks (join pairs, CTE bodies, `ast_validate`). Databricks / Spark SQL uses **`sqlglot`** with the **Spark** dialect.
55
57
 
56
58
  ---
57
59
 
@@ -91,7 +93,7 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
91
93
  - **Determinism over creativity** — prefer a correct, boring plan to a novel one.
92
94
  - **Correct joins over clever SQL** — join paths come from **foreign keys** and precomputed paths, not free-form guessing.
93
95
  - **Validate before execute** — schema checks, intent consistency, join shape, and dialect-safe **read-only** `SELECT` rules.
94
- - **Minimal LLM surface** — parse intent, resolve ambiguous joins when needed, generate or repair SQL; everything else is rules and stores.
96
+ - **Minimal LLM surface** — parse intent, resolve ambiguous joins when needed, repair SQL; everything else is rules and stores.
95
97
  - **Safe defaults for non-analysts** — narrow allowed SQL; you still choose **database credentials** (read-only vs write-capable is outside this library).
96
98
 
97
99
  ---
@@ -100,28 +102,28 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
100
102
 
101
103
  **Backends**
102
104
 
103
- - PostgreSQL via SQLAlchemy.
104
- - Databricks via Unity Catalog introspection (with optional DDL file fallback when the catalog is empty).
105
+ - PostgreSQL
106
+ - Databricks
105
107
 
106
108
  **Schema**
107
109
 
108
110
  - Load from **live introspection** (primary).
109
111
  - Optional **`CREATE TABLE` file** as extra or fallback (especially when you cannot reach all metadata from the driver).
110
112
  - Cached schema snapshot per connection fingerprint so restarts avoid re-reflecting unchanged databases.
111
- - **Table roles** (e.g. fact vs dimension), **column roles** (measure, categorical, temporal, identifier, etc.), **filter / aggregation / HAVING** allowances per column, **value domains** from profiling.
112
- - Optional **human notes** (plain text) fed once when the schema graph is built: richer **descriptions**, **roles**, and optional **sensitivity** labels — without renaming tables or inventing columns.
113
+ - **Table roles** (e.g. fact vs dimension), **column roles** (measure, categorical, temporal, identifier, etc.), **filter / aggregation / HAVING** allowances per column, **value domains** from profiling — all assigned when the graph is built (reflection, DDL, profiling, and optional notes).
114
+ - Optional **human notes** (plain text), via `Text2SQL(..., notes_file=...)` (see **[USAGE.md](USAGE.md)**): read once when the schema graph is **first** created (not on cache load). The LLM uses them to refine **table and column roles**, natural-language **descriptions**, and optional **sensitivity** labels — without renaming tables or inventing columns.
113
115
  - Optional **deny lists** for tables or columns so they stay out of prompts and can be stripped from intents.
114
116
 
115
117
  **Intent / SQL shape (analytical subset)**
116
118
 
117
119
  - **Queries:** `SELECT` only (enforced with pattern checks and dialect parsing). **CTEs** reuse the same intent model as the outer query.
118
- - **Joins:** only along **FK-backed paths**; join type for injected joins is chosen **deterministically** from table roles (e.g. dimension side as `LEFT` where applicable).
120
+ - **Joins:** related tables are wired using the schema’s relationships; when more than one valid path could link the same tables, one coherent path is chosen for the whole query, and join style follows table roles (e.g. `LEFT` toward dimensions where that fits). Self-joins use **CTEs** instead of repeating the same base table in one `FROM` chain.
119
121
  - **Select list:** bare columns, **aggregates** (`COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, etc.), **arithmetic and string expressions** where the schema allows them, **`DISTINCT`**, and **scalar functions** subject to column metadata.
120
122
  - **Filters / boolean logic:** comparisons, **`AND` / `OR`**, **`IN`**, **`LIKE`**; **`ILIKE` / `NOT ILIKE` on PostgreSQL only** (intent stays dialect-agnostic; SQL rendering differs). Null / boolean value normalization in the repair chain.
121
123
  - **`BETWEEN`** in intent is **decomposed** into a pair of comparable predicates.
122
124
  - **Grouping / ordering:** **`GROUP BY`**, **`HAVING`** (aggregate-aware), **`ORDER BY`**, **`LIMIT`**; rules tie **grain** (row-level vs grouped) to aggregates and grouped columns.
123
125
  - **Dates:** structured **`date_window`** (anchor unit + offset) and **date-difference** filters between columns where supported.
124
- - **Windows:** `ROW_NUMBER`, `RANK`, `DENSE_RANK`, and windowed `SUM` / `AVG` on select columns (main query and CTEs).
126
+ - **Windows:** `ROW_NUMBER`, `RANK`, `DENSE_RANK`, and windowed `SUM`/`AVG`, `LAG`, `LEAD`, `FIRST_VALUE`, and `LAST_VALUE` on select columns (main query and CTEs).
125
127
  - **`CASE` / `WHEN`:** only in the **select list** in the intent model (not in `WHERE` / `HAVING`).
126
128
  - **Arrays / lists:** membership-style filters; SQL uses dialect-appropriate forms; optional **UNNEST / EXPLODE-style** expansion in CTE select lists for typed array columns.
127
129
  - **Metadata:** **UNIQUE** (and related) when reflection or DDL exposes it, for ranking “human readable” identifiers.
@@ -137,7 +139,7 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
137
139
  ## What it is not
138
140
 
139
141
  - Not a **full SQL** or **stored-procedure** generator: no `UNION`/`INTERSECT`/`EXCEPT`, no correlated subqueries, no `EXISTS`, no `LATERAL`, no **DML/DDL**, no arbitrary **RIGHT/FULL OUTER** join policy in the constrained path.
140
- - Not a substitute for **database security**: use credentials with **least privilege** (`SELECT` / `EXPLAIN` as you prefer).
142
+ - Not a substitute for **database security**: use credentials with **least privilege** (`SELECT` and `EXPLAIN`).
141
143
  - Not **schema-agnostic**: quality depends on **FKs**, sensible types, and optional notes for domain language.
142
144
 
143
145
  ---
@@ -145,31 +147,27 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
145
147
  ## How a question becomes SQL
146
148
 
147
149
  1. **Template match** — if a trusted pattern fits, reuse parameterized SQL (often **no** SQL LLM call).
148
- 2. **Intent parse** — structured intent from the question + schema summary, then a long **deterministic repair chain** (see below).
150
+ 2. **Intent parse** — structured intent from the question + schema summary, then a long **deterministic repair chain**.
149
151
  3. **Join resolution** — choose among valid **FK paths** for the intent’s tables; disambiguation may use the LLM when multiple paths tie.
150
- 4. **SQL generation & validation** — deterministic skeleton, injected joins, LLM fill/repair **under constraints**, then **semantic validation**, **`SELECT`-only / forbidden-pattern checks**, **dialect AST** validation (**`pglast`** or **`sqlglot`**), and optionally **`EXPLAIN`** when an engine is available.
152
+ 4. **SQL generation & validation** — deterministic skeleton, injected joins, LLM fill/repair **under constraints**, then **semantic validation**, **`SELECT`-only / forbidden-pattern checks**, **dialect AST** validation (**`pglast`** or **`sqlglot`**), and **`EXPLAIN`**.
151
153
  5. **Execute** (where the mode allows) and **learn** — accept → promote template trust; reject → record negative pattern.
152
154
 
153
155
  ---
154
156
 
155
157
  ## Validation (layers)
156
158
 
157
- - **Safety / shape** — `SELECT`-only enforcement, configurable **forbidden SQL** substrings, then **dialect `ast_validate`** (`pglast` on PostgreSQL, **`sqlglot` (Spark)** on Databricks). When a live **engine** is passed in, **`EXPLAIN`** can be used as an extra executability check.
159
+ - **Safety / shape** — `SELECT`-only enforcement, configurable **forbidden SQL** substrings, then **dialect `ast_validate`** (**`pglast`** or **`sqlglot`**). **`EXPLAIN`** is used as an extra executability check.
158
160
  - **Schema vs intent** — tables/columns/CTEs, **selectability**, access and sensitivity policy, window / CASE / array shapes, filter and HAVING ops per column, aggregate roles in select/HAVING/ORDER BY, scalar function typing, filter value types vs column types, null/date-window/date-diff rules.
159
- - **Semantic consistency** — grouped queries require proper aggregation; contradictions and impossible HAVING; **grain** alignment (one of many cross-checks, not the only story).
161
+ - **Semantic consistency** — grouped queries require proper aggregation; contradictions and impossible HAVING; **grain** alignment.
160
162
  - **Joins** — paths must match the FK graph; guarded path avoids ad-hoc join guessing.
161
163
 
162
- ## Deterministic repairs (after intent parse)
163
-
164
- Applied in order (high level): `COUNT(*)` normalization; CTE naming and output aliases; qualify CTE outputs; sanitize table names; grain rules for grouped CTE usage and **grain consistency**; strip redundant `GROUP BY`; normalize filters/HAVING; null-equality fixes; strip join-condition leakage into filters; per-CTE sort order; simplify expressions; `IN` value normalization; date-diff classification and raw-value fixes; **`BETWEEN` → paired predicates**; auto-repair filters/HAVING; strip impossible HAVING; FK filter type repair; filter value case / enum alignment; boolean and null filter values; expand FK selects to descriptive columns; deduplicate contradictory filters; redundant PK re-qualification; **window**, **CASE**, and **array** intent repairs; sensitivity and access policy enforcement on the intent.
165
-
166
164
  ---
167
165
 
168
166
  ## Learning and reuse
169
167
 
170
168
  - **Accepted templates** — intent fingerprint, parameterized SQL, optional example question, **trust** that rises with validation and falls with rejection.
171
169
  - **Rejected templates** — categorized failures so similar bad intents are discouraged.
172
- - Persistence is under a **per-connection artifact directory** (see USAGE); you can back it up or reset it by removing that directory.
170
+ - Persistence is under a **per-connection artifact directory** (see **[USAGE.md](USAGE.md)**); you can back it up or reset it by removing that directory.
173
171
 
174
172
  ---
175
173
 
@@ -9,14 +9,15 @@ pip install "text2sql[databricks]"
9
9
  pip install "text2sql[postgresql,databricks]"
10
10
  ```
11
11
 
12
- Requires Python ≥ 3.10 and an [OpenAI API key](https://platform.openai.com/api-keys).
12
+ Requires Python ≥ 3.10 and either an [OpenAI API key](https://platform.openai.com/api-keys) or Azure OpenAI credentials.
13
13
 
14
- | Extra | Brings in | Use when |
15
- | ------------ | ---------------------------------------------- | --------------------------- |
16
- | `postgresql` | SQLAlchemy, PostgreSQL driver, `pglast` | `engine="postgresql"` |
17
- | `databricks` | PySpark, Databricks SQL connector | `engine="databricks"` |
14
+ | Extra | Brings in | Use when |
15
+ | ------------ | ------------------------------------------------------------------------- | --------------------- |
16
+ | (base) | **SQLAlchemy** (shared introspection / execution interface) | Always installed |
17
+ | `postgresql` | PostgreSQL driver (`psycopg2-binary`), **`pglast`** | `engine="postgresql"` |
18
+ | `databricks` | PySpark, Databricks SQL connector, **`databricks-sqlalchemy`**, `sqlglot` | `engine="databricks"` |
18
19
 
19
- **SQL parsing for validation:** PostgreSQL uses **`pglast`** for structural AST checks (join pairs, CTE bodies, `ast_validate`). Databricks / Spark SQL uses **`sqlglot`** with the **Spark** dialect. The base package already depends on `sqlglot`; `pglast` is installed with the `postgresql` extra.
20
+ **SQL parsing for validation:** PostgreSQL uses **`pglast`** for structural AST checks (join pairs, CTE bodies, `ast_validate`). Databricks / Spark SQL uses **`sqlglot`** with the **Spark** dialect.
20
21
 
21
22
  ---
22
23
 
@@ -56,7 +57,7 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
56
57
  - **Determinism over creativity** — prefer a correct, boring plan to a novel one.
57
58
  - **Correct joins over clever SQL** — join paths come from **foreign keys** and precomputed paths, not free-form guessing.
58
59
  - **Validate before execute** — schema checks, intent consistency, join shape, and dialect-safe **read-only** `SELECT` rules.
59
- - **Minimal LLM surface** — parse intent, resolve ambiguous joins when needed, generate or repair SQL; everything else is rules and stores.
60
+ - **Minimal LLM surface** — parse intent, resolve ambiguous joins when needed, repair SQL; everything else is rules and stores.
60
61
  - **Safe defaults for non-analysts** — narrow allowed SQL; you still choose **database credentials** (read-only vs write-capable is outside this library).
61
62
 
62
63
  ---
@@ -65,28 +66,28 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
65
66
 
66
67
  **Backends**
67
68
 
68
- - PostgreSQL via SQLAlchemy.
69
- - Databricks via Unity Catalog introspection (with optional DDL file fallback when the catalog is empty).
69
+ - PostgreSQL
70
+ - Databricks
70
71
 
71
72
  **Schema**
72
73
 
73
74
  - Load from **live introspection** (primary).
74
75
  - Optional **`CREATE TABLE` file** as extra or fallback (especially when you cannot reach all metadata from the driver).
75
76
  - Cached schema snapshot per connection fingerprint so restarts avoid re-reflecting unchanged databases.
76
- - **Table roles** (e.g. fact vs dimension), **column roles** (measure, categorical, temporal, identifier, etc.), **filter / aggregation / HAVING** allowances per column, **value domains** from profiling.
77
- - Optional **human notes** (plain text) fed once when the schema graph is built: richer **descriptions**, **roles**, and optional **sensitivity** labels — without renaming tables or inventing columns.
77
+ - **Table roles** (e.g. fact vs dimension), **column roles** (measure, categorical, temporal, identifier, etc.), **filter / aggregation / HAVING** allowances per column, **value domains** from profiling — all assigned when the graph is built (reflection, DDL, profiling, and optional notes).
78
+ - Optional **human notes** (plain text), via `Text2SQL(..., notes_file=...)` (see **[USAGE.md](USAGE.md)**): read once when the schema graph is **first** created (not on cache load). The LLM uses them to refine **table and column roles**, natural-language **descriptions**, and optional **sensitivity** labels — without renaming tables or inventing columns.
78
79
  - Optional **deny lists** for tables or columns so they stay out of prompts and can be stripped from intents.
79
80
 
80
81
  **Intent / SQL shape (analytical subset)**
81
82
 
82
83
  - **Queries:** `SELECT` only (enforced with pattern checks and dialect parsing). **CTEs** reuse the same intent model as the outer query.
83
- - **Joins:** only along **FK-backed paths**; join type for injected joins is chosen **deterministically** from table roles (e.g. dimension side as `LEFT` where applicable).
84
+ - **Joins:** related tables are wired using the schema’s relationships; when more than one valid path could link the same tables, one coherent path is chosen for the whole query, and join style follows table roles (e.g. `LEFT` toward dimensions where that fits). Self-joins use **CTEs** instead of repeating the same base table in one `FROM` chain.
84
85
  - **Select list:** bare columns, **aggregates** (`COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, etc.), **arithmetic and string expressions** where the schema allows them, **`DISTINCT`**, and **scalar functions** subject to column metadata.
85
86
  - **Filters / boolean logic:** comparisons, **`AND` / `OR`**, **`IN`**, **`LIKE`**; **`ILIKE` / `NOT ILIKE` on PostgreSQL only** (intent stays dialect-agnostic; SQL rendering differs). Null / boolean value normalization in the repair chain.
86
87
  - **`BETWEEN`** in intent is **decomposed** into a pair of comparable predicates.
87
88
  - **Grouping / ordering:** **`GROUP BY`**, **`HAVING`** (aggregate-aware), **`ORDER BY`**, **`LIMIT`**; rules tie **grain** (row-level vs grouped) to aggregates and grouped columns.
88
89
  - **Dates:** structured **`date_window`** (anchor unit + offset) and **date-difference** filters between columns where supported.
89
- - **Windows:** `ROW_NUMBER`, `RANK`, `DENSE_RANK`, and windowed `SUM` / `AVG` on select columns (main query and CTEs).
90
+ - **Windows:** `ROW_NUMBER`, `RANK`, `DENSE_RANK`, and windowed `SUM`/`AVG`, `LAG`, `LEAD`, `FIRST_VALUE`, and `LAST_VALUE` on select columns (main query and CTEs).
90
91
  - **`CASE` / `WHEN`:** only in the **select list** in the intent model (not in `WHERE` / `HAVING`).
91
92
  - **Arrays / lists:** membership-style filters; SQL uses dialect-appropriate forms; optional **UNNEST / EXPLODE-style** expansion in CTE select lists for typed array columns.
92
93
  - **Metadata:** **UNIQUE** (and related) when reflection or DDL exposes it, for ranking “human readable” identifiers.
@@ -102,7 +103,7 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
102
103
  ## What it is not
103
104
 
104
105
  - Not a **full SQL** or **stored-procedure** generator: no `UNION`/`INTERSECT`/`EXCEPT`, no correlated subqueries, no `EXISTS`, no `LATERAL`, no **DML/DDL**, no arbitrary **RIGHT/FULL OUTER** join policy in the constrained path.
105
- - Not a substitute for **database security**: use credentials with **least privilege** (`SELECT` / `EXPLAIN` as you prefer).
106
+ - Not a substitute for **database security**: use credentials with **least privilege** (`SELECT` and `EXPLAIN`).
106
107
  - Not **schema-agnostic**: quality depends on **FKs**, sensible types, and optional notes for domain language.
107
108
 
108
109
  ---
@@ -110,31 +111,27 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
110
111
  ## How a question becomes SQL
111
112
 
112
113
  1. **Template match** — if a trusted pattern fits, reuse parameterized SQL (often **no** SQL LLM call).
113
- 2. **Intent parse** — structured intent from the question + schema summary, then a long **deterministic repair chain** (see below).
114
+ 2. **Intent parse** — structured intent from the question + schema summary, then a long **deterministic repair chain**.
114
115
  3. **Join resolution** — choose among valid **FK paths** for the intent’s tables; disambiguation may use the LLM when multiple paths tie.
115
- 4. **SQL generation & validation** — deterministic skeleton, injected joins, LLM fill/repair **under constraints**, then **semantic validation**, **`SELECT`-only / forbidden-pattern checks**, **dialect AST** validation (**`pglast`** or **`sqlglot`**), and optionally **`EXPLAIN`** when an engine is available.
116
+ 4. **SQL generation & validation** — deterministic skeleton, injected joins, LLM fill/repair **under constraints**, then **semantic validation**, **`SELECT`-only / forbidden-pattern checks**, **dialect AST** validation (**`pglast`** or **`sqlglot`**), and **`EXPLAIN`**.
116
117
  5. **Execute** (where the mode allows) and **learn** — accept → promote template trust; reject → record negative pattern.
117
118
 
118
119
  ---
119
120
 
120
121
  ## Validation (layers)
121
122
 
122
- - **Safety / shape** — `SELECT`-only enforcement, configurable **forbidden SQL** substrings, then **dialect `ast_validate`** (`pglast` on PostgreSQL, **`sqlglot` (Spark)** on Databricks). When a live **engine** is passed in, **`EXPLAIN`** can be used as an extra executability check.
123
+ - **Safety / shape** — `SELECT`-only enforcement, configurable **forbidden SQL** substrings, then **dialect `ast_validate`** (**`pglast`** or **`sqlglot`**). **`EXPLAIN`** is used as an extra executability check.
123
124
  - **Schema vs intent** — tables/columns/CTEs, **selectability**, access and sensitivity policy, window / CASE / array shapes, filter and HAVING ops per column, aggregate roles in select/HAVING/ORDER BY, scalar function typing, filter value types vs column types, null/date-window/date-diff rules.
124
- - **Semantic consistency** — grouped queries require proper aggregation; contradictions and impossible HAVING; **grain** alignment (one of many cross-checks, not the only story).
125
+ - **Semantic consistency** — grouped queries require proper aggregation; contradictions and impossible HAVING; **grain** alignment.
125
126
  - **Joins** — paths must match the FK graph; guarded path avoids ad-hoc join guessing.
126
127
 
127
- ## Deterministic repairs (after intent parse)
128
-
129
- Applied in order (high level): `COUNT(*)` normalization; CTE naming and output aliases; qualify CTE outputs; sanitize table names; grain rules for grouped CTE usage and **grain consistency**; strip redundant `GROUP BY`; normalize filters/HAVING; null-equality fixes; strip join-condition leakage into filters; per-CTE sort order; simplify expressions; `IN` value normalization; date-diff classification and raw-value fixes; **`BETWEEN` → paired predicates**; auto-repair filters/HAVING; strip impossible HAVING; FK filter type repair; filter value case / enum alignment; boolean and null filter values; expand FK selects to descriptive columns; deduplicate contradictory filters; redundant PK re-qualification; **window**, **CASE**, and **array** intent repairs; sensitivity and access policy enforcement on the intent.
130
-
131
128
  ---
132
129
 
133
130
  ## Learning and reuse
134
131
 
135
132
  - **Accepted templates** — intent fingerprint, parameterized SQL, optional example question, **trust** that rises with validation and falls with rejection.
136
133
  - **Rejected templates** — categorized failures so similar bad intents are discouraged.
137
- - Persistence is under a **per-connection artifact directory** (see USAGE); you can back it up or reset it by removing that directory.
134
+ - Persistence is under a **per-connection artifact directory** (see **[USAGE.md](USAGE.md)**); you can back it up or reset it by removing that directory.
138
135
 
139
136
  ---
140
137
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "aetherdialect"
7
- version = "0.1.0"
7
+ version = "0.1.1"
8
8
  description = "Deterministic, validation-first Text-to-SQL system for business databases"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -13,15 +13,19 @@ authors = [{name = "Akul Ameya", email = "akul.ameya@gmail.com"}]
13
13
  dependencies = [
14
14
  "jsonschema>=4.0,<5",
15
15
  "openai>=2.0.0,<3",
16
- "sqlglot>=29.0,<30",
17
16
  "platformdirs>=2.0.0,<5",
18
17
  "python-dotenv>=1.0.0,<2",
18
+ "SQLAlchemy>=2.0,<3",
19
19
  ]
20
20
 
21
21
  [project.optional-dependencies]
22
- databricks = ["pyspark>=3.3,<4", "databricks-sql-connector>=3.0,<4"]
22
+ databricks = [
23
+ "sqlglot>=29.0,<30",
24
+ "pyspark>=3.3,<4",
25
+ "databricks-sql-connector>=3.0,<4",
26
+ "databricks-sqlalchemy>=2.0,<3",
27
+ ]
23
28
  postgresql = [
24
- "SQLAlchemy>=2.0,<3",
25
29
  "psycopg2-binary>=2.9,<3",
26
30
  "pglast>=5.0,<8",
27
31
  ]
@@ -62,12 +66,19 @@ quote-style = "double"
62
66
  indent-style = "space"
63
67
  line-ending = "auto"
64
68
 
69
+ [tool.docformatter]
70
+ wrap-summaries = 72
71
+ wrap-descriptions = 72
72
+ style = "google"
73
+ force-wrap = true
74
+
65
75
  [tool.mypy]
66
76
  python_version = "3.10"
67
77
  strict = true
68
78
 
69
79
  [tool.pytest.ini_options]
70
80
  testpaths = ["tests", "live_tests"]
81
+ pythonpath = ["src"]
71
82
  markers = [
72
83
  "live: marks tests that require a live LLM and database connection (deselect with '-m \"not live\"')",
73
84
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aetherdialect
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Deterministic, validation-first Text-to-SQL system for business databases
5
5
  Author-email: Akul Ameya <akul.ameya@gmail.com>
6
6
  License: MIT
@@ -10,14 +10,15 @@ Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: jsonschema<5,>=4.0
12
12
  Requires-Dist: openai<3,>=2.0.0
13
- Requires-Dist: sqlglot<30,>=29.0
14
13
  Requires-Dist: platformdirs<5,>=2.0.0
15
14
  Requires-Dist: python-dotenv<2,>=1.0.0
15
+ Requires-Dist: SQLAlchemy<3,>=2.0
16
16
  Provides-Extra: databricks
17
+ Requires-Dist: sqlglot<30,>=29.0; extra == "databricks"
17
18
  Requires-Dist: pyspark<4,>=3.3; extra == "databricks"
18
19
  Requires-Dist: databricks-sql-connector<4,>=3.0; extra == "databricks"
20
+ Requires-Dist: databricks-sqlalchemy<3,>=2.0; extra == "databricks"
19
21
  Provides-Extra: postgresql
20
- Requires-Dist: SQLAlchemy<3,>=2.0; extra == "postgresql"
21
22
  Requires-Dist: psycopg2-binary<3,>=2.9; extra == "postgresql"
22
23
  Requires-Dist: pglast<8,>=5.0; extra == "postgresql"
23
24
  Provides-Extra: dev
@@ -44,14 +45,15 @@ pip install "text2sql[databricks]"
44
45
  pip install "text2sql[postgresql,databricks]"
45
46
  ```
46
47
 
47
- Requires Python ≥ 3.10 and an [OpenAI API key](https://platform.openai.com/api-keys).
48
+ Requires Python ≥ 3.10 and either an [OpenAI API key](https://platform.openai.com/api-keys) or Azure OpenAI credentials.
48
49
 
49
- | Extra | Brings in | Use when |
50
- | ------------ | ---------------------------------------------- | --------------------------- |
51
- | `postgresql` | SQLAlchemy, PostgreSQL driver, `pglast` | `engine="postgresql"` |
52
- | `databricks` | PySpark, Databricks SQL connector | `engine="databricks"` |
50
+ | Extra | Brings in | Use when |
51
+ | ------------ | ------------------------------------------------------------------------- | --------------------- |
52
+ | (base) | **SQLAlchemy** (shared introspection / execution interface) | Always installed |
53
+ | `postgresql` | PostgreSQL driver (`psycopg2-binary`), **`pglast`** | `engine="postgresql"` |
54
+ | `databricks` | PySpark, Databricks SQL connector, **`databricks-sqlalchemy`**, `sqlglot` | `engine="databricks"` |
53
55
 
54
- **SQL parsing for validation:** PostgreSQL uses **`pglast`** for structural AST checks (join pairs, CTE bodies, `ast_validate`). Databricks / Spark SQL uses **`sqlglot`** with the **Spark** dialect. The base package already depends on `sqlglot`; `pglast` is installed with the `postgresql` extra.
56
+ **SQL parsing for validation:** PostgreSQL uses **`pglast`** for structural AST checks (join pairs, CTE bodies, `ast_validate`). Databricks / Spark SQL uses **`sqlglot`** with the **Spark** dialect.
55
57
 
56
58
  ---
57
59
 
@@ -91,7 +93,7 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
91
93
  - **Determinism over creativity** — prefer a correct, boring plan to a novel one.
92
94
  - **Correct joins over clever SQL** — join paths come from **foreign keys** and precomputed paths, not free-form guessing.
93
95
  - **Validate before execute** — schema checks, intent consistency, join shape, and dialect-safe **read-only** `SELECT` rules.
94
- - **Minimal LLM surface** — parse intent, resolve ambiguous joins when needed, generate or repair SQL; everything else is rules and stores.
96
+ - **Minimal LLM surface** — parse intent, resolve ambiguous joins when needed, repair SQL; everything else is rules and stores.
95
97
  - **Safe defaults for non-analysts** — narrow allowed SQL; you still choose **database credentials** (read-only vs write-capable is outside this library).
96
98
 
97
99
  ---
@@ -100,28 +102,28 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
100
102
 
101
103
  **Backends**
102
104
 
103
- - PostgreSQL via SQLAlchemy.
104
- - Databricks via Unity Catalog introspection (with optional DDL file fallback when the catalog is empty).
105
+ - PostgreSQL
106
+ - Databricks
105
107
 
106
108
  **Schema**
107
109
 
108
110
  - Load from **live introspection** (primary).
109
111
  - Optional **`CREATE TABLE` file** as extra or fallback (especially when you cannot reach all metadata from the driver).
110
112
  - Cached schema snapshot per connection fingerprint so restarts avoid re-reflecting unchanged databases.
111
- - **Table roles** (e.g. fact vs dimension), **column roles** (measure, categorical, temporal, identifier, etc.), **filter / aggregation / HAVING** allowances per column, **value domains** from profiling.
112
- - Optional **human notes** (plain text) fed once when the schema graph is built: richer **descriptions**, **roles**, and optional **sensitivity** labels — without renaming tables or inventing columns.
113
+ - **Table roles** (e.g. fact vs dimension), **column roles** (measure, categorical, temporal, identifier, etc.), **filter / aggregation / HAVING** allowances per column, **value domains** from profiling — all assigned when the graph is built (reflection, DDL, profiling, and optional notes).
114
+ - Optional **human notes** (plain text), via `Text2SQL(..., notes_file=...)` (see **[USAGE.md](USAGE.md)**): read once when the schema graph is **first** created (not on cache load). The LLM uses them to refine **table and column roles**, natural-language **descriptions**, and optional **sensitivity** labels — without renaming tables or inventing columns.
113
115
  - Optional **deny lists** for tables or columns so they stay out of prompts and can be stripped from intents.
114
116
 
115
117
  **Intent / SQL shape (analytical subset)**
116
118
 
117
119
  - **Queries:** `SELECT` only (enforced with pattern checks and dialect parsing). **CTEs** reuse the same intent model as the outer query.
118
- - **Joins:** only along **FK-backed paths**; join type for injected joins is chosen **deterministically** from table roles (e.g. dimension side as `LEFT` where applicable).
120
+ - **Joins:** related tables are wired using the schema’s relationships; when more than one valid path could link the same tables, one coherent path is chosen for the whole query, and join style follows table roles (e.g. `LEFT` toward dimensions where that fits). Self-joins use **CTEs** instead of repeating the same base table in one `FROM` chain.
119
121
  - **Select list:** bare columns, **aggregates** (`COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, etc.), **arithmetic and string expressions** where the schema allows them, **`DISTINCT`**, and **scalar functions** subject to column metadata.
120
122
  - **Filters / boolean logic:** comparisons, **`AND` / `OR`**, **`IN`**, **`LIKE`**; **`ILIKE` / `NOT ILIKE` on PostgreSQL only** (intent stays dialect-agnostic; SQL rendering differs). Null / boolean value normalization in the repair chain.
121
123
  - **`BETWEEN`** in intent is **decomposed** into a pair of comparable predicates.
122
124
  - **Grouping / ordering:** **`GROUP BY`**, **`HAVING`** (aggregate-aware), **`ORDER BY`**, **`LIMIT`**; rules tie **grain** (row-level vs grouped) to aggregates and grouped columns.
123
125
  - **Dates:** structured **`date_window`** (anchor unit + offset) and **date-difference** filters between columns where supported.
124
- - **Windows:** `ROW_NUMBER`, `RANK`, `DENSE_RANK`, and windowed `SUM` / `AVG` on select columns (main query and CTEs).
126
+ - **Windows:** `ROW_NUMBER`, `RANK`, `DENSE_RANK`, and windowed `SUM`/`AVG`, `LAG`, `LEAD`, `FIRST_VALUE`, and `LAST_VALUE` on select columns (main query and CTEs).
125
127
  - **`CASE` / `WHEN`:** only in the **select list** in the intent model (not in `WHERE` / `HAVING`).
126
128
  - **Arrays / lists:** membership-style filters; SQL uses dialect-appropriate forms; optional **UNNEST / EXPLODE-style** expansion in CTE select lists for typed array columns.
127
129
  - **Metadata:** **UNIQUE** (and related) when reflection or DDL exposes it, for ranking “human readable” identifiers.
@@ -137,7 +139,7 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
137
139
  ## What it is not
138
140
 
139
141
  - Not a **full SQL** or **stored-procedure** generator: no `UNION`/`INTERSECT`/`EXCEPT`, no correlated subqueries, no `EXISTS`, no `LATERAL`, no **DML/DDL**, no arbitrary **RIGHT/FULL OUTER** join policy in the constrained path.
140
- - Not a substitute for **database security**: use credentials with **least privilege** (`SELECT` / `EXPLAIN` as you prefer).
142
+ - Not a substitute for **database security**: use credentials with **least privilege** (`SELECT` and `EXPLAIN`).
141
143
  - Not **schema-agnostic**: quality depends on **FKs**, sensible types, and optional notes for domain language.
142
144
 
143
145
  ---
@@ -145,31 +147,27 @@ A **validation-first** text-to-SQL layer for **PostgreSQL** and **Databricks**.
145
147
  ## How a question becomes SQL
146
148
 
147
149
  1. **Template match** — if a trusted pattern fits, reuse parameterized SQL (often **no** SQL LLM call).
148
- 2. **Intent parse** — structured intent from the question + schema summary, then a long **deterministic repair chain** (see below).
150
+ 2. **Intent parse** — structured intent from the question + schema summary, then a long **deterministic repair chain**.
149
151
  3. **Join resolution** — choose among valid **FK paths** for the intent’s tables; disambiguation may use the LLM when multiple paths tie.
150
- 4. **SQL generation & validation** — deterministic skeleton, injected joins, LLM fill/repair **under constraints**, then **semantic validation**, **`SELECT`-only / forbidden-pattern checks**, **dialect AST** validation (**`pglast`** or **`sqlglot`**), and optionally **`EXPLAIN`** when an engine is available.
152
+ 4. **SQL generation & validation** — deterministic skeleton, injected joins, LLM fill/repair **under constraints**, then **semantic validation**, **`SELECT`-only / forbidden-pattern checks**, **dialect AST** validation (**`pglast`** or **`sqlglot`**), and **`EXPLAIN`**.
151
153
  5. **Execute** (where the mode allows) and **learn** — accept → promote template trust; reject → record negative pattern.
152
154
 
153
155
  ---
154
156
 
155
157
  ## Validation (layers)
156
158
 
157
- - **Safety / shape** — `SELECT`-only enforcement, configurable **forbidden SQL** substrings, then **dialect `ast_validate`** (`pglast` on PostgreSQL, **`sqlglot` (Spark)** on Databricks). When a live **engine** is passed in, **`EXPLAIN`** can be used as an extra executability check.
159
+ - **Safety / shape** — `SELECT`-only enforcement, configurable **forbidden SQL** substrings, then **dialect `ast_validate`** (**`pglast`** or **`sqlglot`**). **`EXPLAIN`** is used as an extra executability check.
158
160
  - **Schema vs intent** — tables/columns/CTEs, **selectability**, access and sensitivity policy, window / CASE / array shapes, filter and HAVING ops per column, aggregate roles in select/HAVING/ORDER BY, scalar function typing, filter value types vs column types, null/date-window/date-diff rules.
159
- - **Semantic consistency** — grouped queries require proper aggregation; contradictions and impossible HAVING; **grain** alignment (one of many cross-checks, not the only story).
161
+ - **Semantic consistency** — grouped queries require proper aggregation; contradictions and impossible HAVING; **grain** alignment.
160
162
  - **Joins** — paths must match the FK graph; guarded path avoids ad-hoc join guessing.
161
163
 
162
- ## Deterministic repairs (after intent parse)
163
-
164
- Applied in order (high level): `COUNT(*)` normalization; CTE naming and output aliases; qualify CTE outputs; sanitize table names; grain rules for grouped CTE usage and **grain consistency**; strip redundant `GROUP BY`; normalize filters/HAVING; null-equality fixes; strip join-condition leakage into filters; per-CTE sort order; simplify expressions; `IN` value normalization; date-diff classification and raw-value fixes; **`BETWEEN` → paired predicates**; auto-repair filters/HAVING; strip impossible HAVING; FK filter type repair; filter value case / enum alignment; boolean and null filter values; expand FK selects to descriptive columns; deduplicate contradictory filters; redundant PK re-qualification; **window**, **CASE**, and **array** intent repairs; sensitivity and access policy enforcement on the intent.
165
-
166
164
  ---
167
165
 
168
166
  ## Learning and reuse
169
167
 
170
168
  - **Accepted templates** — intent fingerprint, parameterized SQL, optional example question, **trust** that rises with validation and falls with rejection.
171
169
  - **Rejected templates** — categorized failures so similar bad intents are discouraged.
172
- - Persistence is under a **per-connection artifact directory** (see USAGE); you can back it up or reset it by removing that directory.
170
+ - Persistence is under a **per-connection artifact directory** (see **[USAGE.md](USAGE.md)**); you can back it up or reset it by removing that directory.
173
171
 
174
172
  ---
175
173
 
@@ -13,7 +13,6 @@ src/text2sql/contracts_core.py
13
13
  src/text2sql/core_utils.py
14
14
  src/text2sql/dialect.py
15
15
  src/text2sql/expansion_ops.py
16
- src/text2sql/expansion_rules.py
17
16
  src/text2sql/intent_expr.py
18
17
  src/text2sql/intent_process.py
19
18
  src/text2sql/intent_repair.py
@@ -40,13 +39,14 @@ tests/test_contracts.py
40
39
  tests/test_core_utils.py
41
40
  tests/test_dialect.py
42
41
  tests/test_expansion_ops.py
43
- tests/test_expansion_rules.py
44
42
  tests/test_intent_expr.py
45
43
  tests/test_intent_process.py
46
44
  tests/test_intent_repair.py
47
45
  tests/test_intent_resolve.py
46
+ tests/test_join_bool_cte_matrix.py
48
47
  tests/test_live_testing.py
49
48
  tests/test_main_execution.py
49
+ tests/test_pipeline_targeted.py
50
50
  tests/test_pipeline_units.py
51
51
  tests/test_qsim_ops.py
52
52
  tests/test_qsim_sample.py
@@ -1,12 +1,14 @@
1
1
  jsonschema<5,>=4.0
2
2
  openai<3,>=2.0.0
3
- sqlglot<30,>=29.0
4
3
  platformdirs<5,>=2.0.0
5
4
  python-dotenv<2,>=1.0.0
5
+ SQLAlchemy<3,>=2.0
6
6
 
7
7
  [databricks]
8
+ sqlglot<30,>=29.0
8
9
  pyspark<4,>=3.3
9
10
  databricks-sql-connector<4,>=3.0
11
+ databricks-sqlalchemy<3,>=2.0
10
12
 
11
13
  [dev]
12
14
  pytest>=8.0
@@ -21,6 +23,5 @@ black<25,>=24
21
23
  docformatter<2,>=1.7
22
24
 
23
25
  [postgresql]
24
- SQLAlchemy<3,>=2.0
25
26
  psycopg2-binary<3,>=2.9
26
27
  pglast<8,>=5.0
@@ -0,0 +1,10 @@
1
+ from importlib.metadata import PackageNotFoundError, version
2
+
3
+ from .text2sql import Text2SQL
4
+
5
+ try:
6
+ __version__ = version("aetherdialect")
7
+ except PackageNotFoundError:
8
+ __version__ = "0.0.0+dev"
9
+
10
+ __all__ = ["Text2SQL", "__version__"]