aetherdialect 0.1.6__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aetherdialect-0.1.7/PKG-INFO +99 -0
- aetherdialect-0.1.7/README.md +63 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/pyproject.toml +92 -93
- aetherdialect-0.1.7/src/aetherdialect/__init__.py +62 -0
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_config.py +656 -199
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_contracts_base.py +1096 -106
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_contracts_core.py +552 -17
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_core_utils.py +490 -130
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_dialect.py +307 -54
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_expansion_ops.py +317 -13
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_intent_expr.py +113 -11
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_intent_process.py +1339 -191
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_intent_repair.py +186 -30
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_intent_resolve.py +285 -6
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_live_testing.py +193 -120
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_main_execution.py +2116 -404
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_pipeline.py +656 -294
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_qsim_ops.py +1641 -1309
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_schema.py +799 -299
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_schema_profiling.py +536 -112
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_seed_warmup.py +747 -95
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_sql_gen.py +61 -14
- aetherdialect-0.1.7/src/aetherdialect/_sql_to_intent.py +3173 -0
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_templates.py +1195 -156
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_utils.py +714 -78
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_validation_agg.py +1095 -1092
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_validation_execute.py +68 -19
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_validation_schema.py +139 -108
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_validation_semantic.py +612 -148
- aetherdialect-0.1.7/src/aetherdialect/text2sql.py +763 -0
- aetherdialect-0.1.7/src/aetherdialect.egg-info/PKG-INFO +99 -0
- aetherdialect-0.1.7/src/aetherdialect.egg-info/SOURCES.txt +78 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/src/aetherdialect.egg-info/requires.txt +0 -1
- aetherdialect-0.1.7/src/aetherdialect.egg-info/top_level.txt +1 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_artifact_lock.py +72 -72
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_bool_op_combinations.py +634 -634
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_config.py +199 -25
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_contracts.py +218 -63
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_core_utils.py +2089 -2017
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_dialect.py +823 -853
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_expansion_ops.py +1797 -1778
- aetherdialect-0.1.7/tests/test_imports.py +77 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_intent_expr.py +32 -20
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_intent_process.py +2517 -2218
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_intent_repair.py +147 -10
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_intent_resolve.py +196 -9
- aetherdialect-0.1.7/tests/test_literal_attribution.py +118 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_live_testing.py +994 -990
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_main_execution.py +385 -97
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_migration_diff_driven.py +428 -386
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_phase_c_repairs.py +152 -152
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_pipeline.py +3490 -3279
- aetherdialect-0.1.7/tests/test_programmatic_session_contract.py +264 -0
- aetherdialect-0.1.7/tests/test_public_api_surface.py +218 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_qsim.py +1631 -1631
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_qsim_ops.py +4 -4
- aetherdialect-0.1.7/tests/test_retryable.py +30 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_schema.py +311 -148
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_schema_cache_probe.py +314 -314
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_schema_diff_apply.py +367 -367
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_schema_diff_renames.py +517 -517
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_schema_inference_paths.py +4 -4
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_schema_profiling.py +324 -23
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_schema_scope_change.py +282 -283
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_seed_warmup.py +126 -40
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_sql_gen.py +113 -35
- aetherdialect-0.1.7/tests/test_sql_to_intent.py +515 -0
- aetherdialect-0.1.7/tests/test_step26_checklist.py +479 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_templates.py +438 -320
- aetherdialect-0.1.7/tests/test_text2sql.py +441 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_utils.py +2334 -2214
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_validation_agg.py +1165 -1165
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_validation_execute.py +31 -7
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_validation_schema.py +8 -6
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_validation_semantic.py +222 -40
- aetherdialect-0.1.6/PKG-INFO +0 -271
- aetherdialect-0.1.6/README.md +0 -234
- aetherdialect-0.1.6/src/aetherdialect.egg-info/PKG-INFO +0 -271
- aetherdialect-0.1.6/src/aetherdialect.egg-info/SOURCES.txt +0 -70
- aetherdialect-0.1.6/src/aetherdialect.egg-info/top_level.txt +0 -1
- aetherdialect-0.1.6/src/text2sql/__init__.py +0 -29
- aetherdialect-0.1.6/src/text2sql/text2sql.py +0 -501
- aetherdialect-0.1.6/tests/test_text2sql.py +0 -189
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/LICENSE +0 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/setup.cfg +0 -0
- {aetherdialect-0.1.6/src/text2sql → aetherdialect-0.1.7/src/aetherdialect}/_qsim.py +0 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/src/aetherdialect.egg-info/dependency_links.txt +0 -0
- {aetherdialect-0.1.6 → aetherdialect-0.1.7}/tests/test_notebook_export_signature.py +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: aetherdialect
|
|
3
|
+
Version: 0.1.7
|
|
4
|
+
Summary: Deterministic, validation-first Text-to-SQL system for business databases
|
|
5
|
+
Author-email: Akul Ameya <akul.ameya@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/dkecompany/aether-dialect
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: pandas<3,>=2.0
|
|
12
|
+
Requires-Dist: packaging<25,>=23.0
|
|
13
|
+
Requires-Dist: jsonschema<5,>=4.0
|
|
14
|
+
Requires-Dist: openai<3,>=2.0.0
|
|
15
|
+
Requires-Dist: platformdirs<5,>=2.0.0
|
|
16
|
+
Requires-Dist: sqlglot<30,>=29.0
|
|
17
|
+
Requires-Dist: SQLAlchemy<3,>=2.0
|
|
18
|
+
Provides-Extra: databricks
|
|
19
|
+
Requires-Dist: databricks-sql-connector<4,>=3.0; extra == "databricks"
|
|
20
|
+
Requires-Dist: databricks-sqlalchemy<3,>=2.0; extra == "databricks"
|
|
21
|
+
Provides-Extra: postgresql
|
|
22
|
+
Requires-Dist: pglast<8,>=5.0; extra == "postgresql"
|
|
23
|
+
Requires-Dist: psycopg2-binary<3,>=2.9; extra == "postgresql"
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
27
|
+
Requires-Dist: vulture<3,>=2.11; extra == "dev"
|
|
28
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
29
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
30
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
31
|
+
Requires-Dist: build>=1.0; extra == "dev"
|
|
32
|
+
Requires-Dist: pre-commit>=3.0; extra == "dev"
|
|
33
|
+
Requires-Dist: black<25,>=24; extra == "dev"
|
|
34
|
+
Requires-Dist: docformatter<2,>=1.7; extra == "dev"
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
|
|
37
|
+
# AetherDialect — validation-first Text-to-SQL for PostgreSQL and Databricks
|
|
38
|
+
|
|
39
|
+
`aetherdialect` turns analytical questions into read-only `SELECT` pipelines: a structured intent representation, multi-stage validation (including dialect `EXPLAIN`), template reuse from accepted answers, and bounded learning from rejections. The language model fills bounded slots in that intent; it does not author unconstrained SQL.
|
|
40
|
+
|
|
41
|
+
## Why this exists
|
|
42
|
+
|
|
43
|
+
Teams need answers from relational data without shipping opaque generated SQL. AetherDialect targets analysts and integrators who want a **repeatable** path from question to result: the same question can return cached SQL with no model round-trip, schema drift surfaces as an explicit migration stop instead of silent breakage, and every generated statement is checked against the catalog and engine before it runs.
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install aetherdialect
|
|
49
|
+
pip install "aetherdialect[postgresql]"
|
|
50
|
+
pip install "aetherdialect[databricks]"
|
|
51
|
+
pip install "aetherdialect[postgresql,databricks]"
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Requires Python 3.10 or newer. Configure the LLM and database via a TOML `config_file` (recommended) and/or process environment; the full key list lives in the [API reference](https://github.com/dkecompany/aether-dialect/blob/main/docs/API_REFERENCE.md).
|
|
55
|
+
|
|
56
|
+
## Quick start
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from aetherdialect import SchemaContext, Text2SQL
|
|
60
|
+
|
|
61
|
+
t2s = Text2SQL(
|
|
62
|
+
SchemaContext(),
|
|
63
|
+
artifacts_dir="./my_run",
|
|
64
|
+
config_file="./aetherdialect.toml",
|
|
65
|
+
)
|
|
66
|
+
t2s.run_interactive()
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
`run_interactive` prompts once per invocation; call it again for another question. For programmatic UIs, use `Text2SQL.session()` or `Text2SQL.asession()` and drive `SessionStep` objects — see the [Integrator guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/INTEGRATOR_GUIDE.md).
|
|
70
|
+
|
|
71
|
+
`dry_run_warmup` exercises a newline-delimited seed question file through validation and execution without persisting templates; see the [User guide — Seed warmup](https://github.com/dkecompany/aether-dialect/blob/main/docs/USER_GUIDE.md#seed-warmup).
|
|
72
|
+
|
|
73
|
+
## What makes this different
|
|
74
|
+
|
|
75
|
+
- Constant-learning cache: exact `q_norm` reuse returns SQL with zero LLM calls; near-paraphrases (token Levenshtein at most 2) reuse the same template with one bounded LLM call that only extracts parameters. ([How it works](https://github.com/dkecompany/aether-dialect/blob/main/docs/HOW_IT_WORKS.md))
|
|
76
|
+
|
|
77
|
+
- Schema overrides are a JSON file you read, edit, and version. Every override (descriptions, roles, sensitivity, added or suppressed foreign keys, primary key endorsements) is replayed on every cache invalidation. ([API reference](https://github.com/dkecompany/aether-dialect/blob/main/docs/API_REFERENCE.md))
|
|
78
|
+
|
|
79
|
+
- Migration is never silent. When the catalog changes structurally, the engine writes a `schema_migration_map.json` skeleton and stops. You decide the action; it resumes. ([User guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/USER_GUIDE.md))
|
|
80
|
+
|
|
81
|
+
- Generated SQL passes through four validation layers (intent JSON, dialect AST, schema/catalog alignment, dialect EXPLAIN). The LLM never emits raw SQL; it fills bounded slots in a structured intent IR. ([Security](https://github.com/dkecompany/aether-dialect/blob/main/docs/SECURITY.md))
|
|
82
|
+
|
|
83
|
+
- Reader / writer split is built in. Many readers can ask questions; the engine drains `write_queue.jsonl` at the **start of every writer-mode turn** under the artifacts lock so learning persists without readers touching the partitioned template store files. ([Integrator guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/INTEGRATOR_GUIDE.md))
|
|
84
|
+
|
|
85
|
+
## Documentation
|
|
86
|
+
|
|
87
|
+
| Doc | When to read it |
|
|
88
|
+
| ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
89
|
+
| [User guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/USER_GUIDE.md) | Install, first run, asking questions, notes, overrides, migration, seed warmup and dry-run warmup, pitfalls. |
|
|
90
|
+
| [Integrator guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/INTEGRATOR_GUIDE.md) | Embedding patterns, sessions, multi-turn relay, threading, reader/writer split and queue, audit and diagnostics, cache reset. |
|
|
91
|
+
| [API reference](https://github.com/dkecompany/aether-dialect/blob/main/docs/API_REFERENCE.md) | Types, `config_file` TOML schema, methods, schema overrides JSON, diagnostic codes, exceptions. |
|
|
92
|
+
| [How it works](https://github.com/dkecompany/aether-dialect/blob/main/docs/HOW_IT_WORKS.md) | Architecture diagrams, schema build, engine storage, question pipeline, migration, overrides, validation, learning model, configuration, observability, warmup/QSim, offline-mock design pointer. |
|
|
93
|
+
| [Offline testing and mock LLM (design)](https://github.com/dkecompany/aether-dialect/blob/main/docs/OFFLINE_AND_MOCK_LLM.md) | Planned mock LLM provider and fixture workflow for hermetic tests; links to `dev_workspace/mock.txt` (not on PyPI). |
|
|
94
|
+
| [Security](https://github.com/dkecompany/aether-dialect/blob/main/docs/SECURITY.md) | Threat model, LLM context inventory, on-disk inventory, sensitivity model, deny lists, raw-SQL impossibility, EXPLAIN gate, network. |
|
|
95
|
+
| [Support matrix](https://github.com/dkecompany/aether-dialect/blob/main/docs/SUPPORT_MATRIX.md) | Per-engine table, IR-unsupported constructs and reformulations. |
|
|
96
|
+
|
|
97
|
+
## License
|
|
98
|
+
|
|
99
|
+
See [LICENSE](https://github.com/dkecompany/aether-dialect/blob/main/LICENSE).
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# AetherDialect — validation-first Text-to-SQL for PostgreSQL and Databricks
|
|
2
|
+
|
|
3
|
+
`aetherdialect` turns analytical questions into read-only `SELECT` pipelines: a structured intent representation, multi-stage validation (including dialect `EXPLAIN`), template reuse from accepted answers, and bounded learning from rejections. The language model fills bounded slots in that intent; it does not author unconstrained SQL.
|
|
4
|
+
|
|
5
|
+
## Why this exists
|
|
6
|
+
|
|
7
|
+
Teams need answers from relational data without shipping opaque generated SQL. AetherDialect targets analysts and integrators who want a **repeatable** path from question to result: the same question can return cached SQL with no model round-trip, schema drift surfaces as an explicit migration stop instead of silent breakage, and every generated statement is checked against the catalog and engine before it runs.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install aetherdialect
|
|
13
|
+
pip install "aetherdialect[postgresql]"
|
|
14
|
+
pip install "aetherdialect[databricks]"
|
|
15
|
+
pip install "aetherdialect[postgresql,databricks]"
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Requires Python 3.10 or newer. Configure the LLM and database via a TOML `config_file` (recommended) and/or process environment; the full key list lives in the [API reference](https://github.com/dkecompany/aether-dialect/blob/main/docs/API_REFERENCE.md).
|
|
19
|
+
|
|
20
|
+
## Quick start
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from aetherdialect import SchemaContext, Text2SQL
|
|
24
|
+
|
|
25
|
+
t2s = Text2SQL(
|
|
26
|
+
SchemaContext(),
|
|
27
|
+
artifacts_dir="./my_run",
|
|
28
|
+
config_file="./aetherdialect.toml",
|
|
29
|
+
)
|
|
30
|
+
t2s.run_interactive()
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
`run_interactive` prompts once per invocation; call it again for another question. For programmatic UIs, use `Text2SQL.session()` or `Text2SQL.asession()` and drive `SessionStep` objects — see the [Integrator guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/INTEGRATOR_GUIDE.md).
|
|
34
|
+
|
|
35
|
+
`dry_run_warmup` exercises a newline-delimited seed question file through validation and execution without persisting templates; see the [User guide — Seed warmup](https://github.com/dkecompany/aether-dialect/blob/main/docs/USER_GUIDE.md#seed-warmup).
|
|
36
|
+
|
|
37
|
+
## What makes this different
|
|
38
|
+
|
|
39
|
+
- Constant-learning cache: exact `q_norm` reuse returns SQL with zero LLM calls; near-paraphrases (token Levenshtein at most 2) reuse the same template with one bounded LLM call that only extracts parameters. ([How it works](https://github.com/dkecompany/aether-dialect/blob/main/docs/HOW_IT_WORKS.md))
|
|
40
|
+
|
|
41
|
+
- Schema overrides are a JSON file you read, edit, and version. Every override (descriptions, roles, sensitivity, added or suppressed foreign keys, primary key endorsements) is replayed on every cache invalidation. ([API reference](https://github.com/dkecompany/aether-dialect/blob/main/docs/API_REFERENCE.md))
|
|
42
|
+
|
|
43
|
+
- Migration is never silent. When the catalog changes structurally, the engine writes a `schema_migration_map.json` skeleton and stops. You decide the action; it resumes. ([User guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/USER_GUIDE.md))
|
|
44
|
+
|
|
45
|
+
- Generated SQL passes through four validation layers (intent JSON, dialect AST, schema/catalog alignment, dialect EXPLAIN). The LLM never emits raw SQL; it fills bounded slots in a structured intent IR. ([Security](https://github.com/dkecompany/aether-dialect/blob/main/docs/SECURITY.md))
|
|
46
|
+
|
|
47
|
+
- Reader / writer split is built in. Many readers can ask questions; the engine drains `write_queue.jsonl` at the **start of every writer-mode turn** under the artifacts lock so learning persists without readers touching the partitioned template store files. ([Integrator guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/INTEGRATOR_GUIDE.md))
|
|
48
|
+
|
|
49
|
+
## Documentation
|
|
50
|
+
|
|
51
|
+
| Doc | When to read it |
|
|
52
|
+
| ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
53
|
+
| [User guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/USER_GUIDE.md) | Install, first run, asking questions, notes, overrides, migration, seed warmup and dry-run warmup, pitfalls. |
|
|
54
|
+
| [Integrator guide](https://github.com/dkecompany/aether-dialect/blob/main/docs/INTEGRATOR_GUIDE.md) | Embedding patterns, sessions, multi-turn relay, threading, reader/writer split and queue, audit and diagnostics, cache reset. |
|
|
55
|
+
| [API reference](https://github.com/dkecompany/aether-dialect/blob/main/docs/API_REFERENCE.md) | Types, `config_file` TOML schema, methods, schema overrides JSON, diagnostic codes, exceptions. |
|
|
56
|
+
| [How it works](https://github.com/dkecompany/aether-dialect/blob/main/docs/HOW_IT_WORKS.md) | Architecture diagrams, schema build, engine storage, question pipeline, migration, overrides, validation, learning model, configuration, observability, warmup/QSim, offline-mock design pointer. |
|
|
57
|
+
| [Offline testing and mock LLM (design)](https://github.com/dkecompany/aether-dialect/blob/main/docs/OFFLINE_AND_MOCK_LLM.md) | Planned mock LLM provider and fixture workflow for hermetic tests; links to `dev_workspace/mock.txt` (not on PyPI). |
|
|
58
|
+
| [Security](https://github.com/dkecompany/aether-dialect/blob/main/docs/SECURITY.md) | Threat model, LLM context inventory, on-disk inventory, sensitivity model, deny lists, raw-SQL impossibility, EXPLAIN gate, network. |
|
|
59
|
+
| [Support matrix](https://github.com/dkecompany/aether-dialect/blob/main/docs/SUPPORT_MATRIX.md) | Per-engine table, IR-unsupported constructs and reformulations. |
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
See [LICENSE](https://github.com/dkecompany/aether-dialect/blob/main/LICENSE).
|
|
@@ -1,93 +1,92 @@
|
|
|
1
|
-
[build-system]
|
|
2
|
-
requires = ["setuptools>=68", "wheel"]
|
|
3
|
-
build-backend = "setuptools.build_meta"
|
|
4
|
-
|
|
5
|
-
[project]
|
|
6
|
-
name = "aetherdialect"
|
|
7
|
-
version = "0.1.
|
|
8
|
-
description = "Deterministic, validation-first Text-to-SQL system for business databases"
|
|
9
|
-
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.10"
|
|
11
|
-
license = {text = "MIT"}
|
|
12
|
-
authors = [{name = "Akul Ameya", email = "akul.ameya@gmail.com"}]
|
|
13
|
-
dependencies = [
|
|
14
|
-
"pandas>=2.0,<3",
|
|
15
|
-
"packaging>=23.0,<25",
|
|
16
|
-
"jsonschema>=4.0,<5",
|
|
17
|
-
"openai>=2.0.0,<3",
|
|
18
|
-
"platformdirs>=2.0.0,<5",
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
[
|
|
25
|
-
databricks
|
|
26
|
-
"databricks-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"pytest>=
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
"
|
|
52
|
-
"
|
|
53
|
-
"
|
|
54
|
-
"
|
|
55
|
-
"__pycache__",
|
|
56
|
-
"
|
|
57
|
-
"
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
[
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
wrap-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
[
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
"
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
[
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
Homepage = "https://github.com/akul-ameya/aetherdialect"
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "aetherdialect"
|
|
7
|
+
version = "0.1.7"
|
|
8
|
+
description = "Deterministic, validation-first Text-to-SQL system for business databases"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [{name = "Akul Ameya", email = "akul.ameya@gmail.com"}]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"pandas>=2.0,<3",
|
|
15
|
+
"packaging>=23.0,<25",
|
|
16
|
+
"jsonschema>=4.0,<5",
|
|
17
|
+
"openai>=2.0.0,<3",
|
|
18
|
+
"platformdirs>=2.0.0,<5",
|
|
19
|
+
"sqlglot>=29.0,<30",
|
|
20
|
+
"SQLAlchemy>=2.0,<3",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
databricks = [
|
|
25
|
+
"databricks-sql-connector>=3.0,<4",
|
|
26
|
+
"databricks-sqlalchemy>=2.0,<3",
|
|
27
|
+
]
|
|
28
|
+
postgresql = [
|
|
29
|
+
"pglast>=5.0,<8",
|
|
30
|
+
"psycopg2-binary>=2.9,<3",
|
|
31
|
+
]
|
|
32
|
+
dev = [
|
|
33
|
+
"pytest>=8.0",
|
|
34
|
+
"pytest-cov>=5.0",
|
|
35
|
+
"vulture>=2.11,<3",
|
|
36
|
+
"ruff>=0.4",
|
|
37
|
+
"mypy>=1.10",
|
|
38
|
+
"twine>=5.0",
|
|
39
|
+
"build>=1.0",
|
|
40
|
+
"pre-commit>=3.0",
|
|
41
|
+
"black>=24,<25",
|
|
42
|
+
"docformatter>=1.7,<2",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[tool.ruff]
|
|
46
|
+
line-length = 120
|
|
47
|
+
target-version = "py310"
|
|
48
|
+
src = ["src", "tests", "live_tests"]
|
|
49
|
+
exclude = [
|
|
50
|
+
"dev_workspace",
|
|
51
|
+
".venv",
|
|
52
|
+
"venv",
|
|
53
|
+
".pytest_cache",
|
|
54
|
+
"__pycache__",
|
|
55
|
+
"src/aetherdialect/__pycache__",
|
|
56
|
+
"tests/__pycache__",
|
|
57
|
+
"live_tests/__pycache__"
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
[tool.ruff.lint]
|
|
61
|
+
select = ["E", "F", "I", "B", "UP"]
|
|
62
|
+
ignore = ["E501"]
|
|
63
|
+
|
|
64
|
+
[tool.ruff.format]
|
|
65
|
+
quote-style = "double"
|
|
66
|
+
indent-style = "space"
|
|
67
|
+
line-ending = "auto"
|
|
68
|
+
|
|
69
|
+
[tool.docformatter]
|
|
70
|
+
wrap-summaries = 72
|
|
71
|
+
wrap-descriptions = 72
|
|
72
|
+
style = "google"
|
|
73
|
+
|
|
74
|
+
[tool.mypy]
|
|
75
|
+
python_version = "3.10"
|
|
76
|
+
strict = true
|
|
77
|
+
|
|
78
|
+
[tool.pytest.ini_options]
|
|
79
|
+
testpaths = ["tests", "live_tests"]
|
|
80
|
+
pythonpath = ["src"]
|
|
81
|
+
markers = [
|
|
82
|
+
"live: integration test against a real database and LLM",
|
|
83
|
+
"live_no_llm: integration test against a real database without LLM calls",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
[tool.setuptools.packages.find]
|
|
87
|
+
where = ["src"]
|
|
88
|
+
include = ["aetherdialect*"]
|
|
89
|
+
exclude = ["__pycache__"]
|
|
90
|
+
|
|
91
|
+
[project.urls]
|
|
92
|
+
Homepage = "https://github.com/dkecompany/aether-dialect"
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Stable import surface for the Text2SQL package."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
4
|
+
|
|
5
|
+
from ._config import LlmExecutionConfig, load_runtime_config
|
|
6
|
+
from ._contracts_base import (
|
|
7
|
+
AuditEvent,
|
|
8
|
+
ConfigError,
|
|
9
|
+
ConnectionError,
|
|
10
|
+
DatabasePingFailed,
|
|
11
|
+
Diagnostic,
|
|
12
|
+
LlmTransientFailure,
|
|
13
|
+
MigrationPendingError,
|
|
14
|
+
MigrationPreview,
|
|
15
|
+
RetryableError,
|
|
16
|
+
RuntimeConfig,
|
|
17
|
+
SchemaAccessError,
|
|
18
|
+
SchemaContext,
|
|
19
|
+
SessionActiveError,
|
|
20
|
+
SessionStep,
|
|
21
|
+
StatementTimeoutError,
|
|
22
|
+
)
|
|
23
|
+
from ._main_execution import PipelineSession
|
|
24
|
+
from .text2sql import (
|
|
25
|
+
AsyncPipelineSession,
|
|
26
|
+
ConfigSnapshot,
|
|
27
|
+
QSimSummarySnapshot,
|
|
28
|
+
SchemaStatsSnapshot,
|
|
29
|
+
SeedWarmupSummarySnapshot,
|
|
30
|
+
Text2SQL,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
__version__ = version("aetherdialect")
|
|
35
|
+
except PackageNotFoundError:
|
|
36
|
+
__version__ = "0.0.0+dev"
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"AsyncPipelineSession",
|
|
40
|
+
"AuditEvent",
|
|
41
|
+
"PipelineSession",
|
|
42
|
+
"ConfigError",
|
|
43
|
+
"ConfigSnapshot",
|
|
44
|
+
"DatabasePingFailed",
|
|
45
|
+
"Diagnostic",
|
|
46
|
+
"LlmExecutionConfig",
|
|
47
|
+
"LlmTransientFailure",
|
|
48
|
+
"MigrationPendingError",
|
|
49
|
+
"MigrationPreview",
|
|
50
|
+
"QSimSummarySnapshot",
|
|
51
|
+
"RetryableError",
|
|
52
|
+
"RuntimeConfig",
|
|
53
|
+
"SchemaAccessError",
|
|
54
|
+
"SchemaContext",
|
|
55
|
+
"SchemaStatsSnapshot",
|
|
56
|
+
"SeedWarmupSummarySnapshot",
|
|
57
|
+
"SessionActiveError",
|
|
58
|
+
"SessionStep",
|
|
59
|
+
"StatementTimeoutError",
|
|
60
|
+
"Text2SQL",
|
|
61
|
+
"__version__",
|
|
62
|
+
]
|