aetherdialect 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {aetherdialect-0.1.3/src/aetherdialect.egg-info → aetherdialect-0.1.4}/PKG-INFO +16 -15
  2. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/README.md +234 -234
  3. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/pyproject.toml +2 -1
  4. {aetherdialect-0.1.3 → aetherdialect-0.1.4/src/aetherdialect.egg-info}/PKG-INFO +16 -15
  5. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/aetherdialect.egg-info/SOURCES.txt +3 -8
  6. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/aetherdialect.egg-info/requires.txt +1 -0
  7. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_config.py +2180 -1974
  8. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_contracts_base.py +2262 -1738
  9. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_contracts_core.py +3820 -3335
  10. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_core_utils.py +1927 -1912
  11. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_dialect.py +4297 -4025
  12. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_expansion_ops.py +2409 -2035
  13. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_intent_expr.py +2903 -2733
  14. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_intent_process.py +2958 -3267
  15. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_intent_repair.py +4028 -3636
  16. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_intent_resolve.py +2680 -2173
  17. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_live_testing.py +58 -38
  18. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_main_execution.py +2767 -2789
  19. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_pipeline.py +3127 -3253
  20. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_qsim.py +33 -5
  21. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_schema.py +6287 -4742
  22. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_schema_profiling.py +3085 -2961
  23. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_seed_warmup.py +10 -5
  24. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_sql_gen.py +3618 -3398
  25. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_templates.py +1777 -2705
  26. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_utils.py +1189 -1203
  27. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_validation_execute.py +1649 -1542
  28. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_validation_schema.py +197 -80
  29. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_validation_semantic.py +2526 -2496
  30. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/text2sql.py +494 -418
  31. aetherdialect-0.1.4/tests/test_bool_op_combinations.py +634 -0
  32. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_config.py +3 -3
  33. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_contracts.py +2654 -2646
  34. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_core_utils.py +89 -39
  35. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_dialect.py +41 -2
  36. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_expansion_ops.py +755 -12
  37. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_intent_expr.py +3928 -3944
  38. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_intent_process.py +2228 -2076
  39. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_intent_repair.py +4960 -4922
  40. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_intent_resolve.py +2806 -2546
  41. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_live_testing.py +991 -991
  42. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_main_execution.py +4 -4
  43. aetherdialect-0.1.4/tests/test_notebook_export_signature.py +27 -0
  44. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_pipeline.py +3295 -3170
  45. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_qsim.py +1 -0
  46. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_schema.py +2880 -2627
  47. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_schema_cache_probe.py +314 -314
  48. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_schema_diff_apply.py +25 -4
  49. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_schema_diff_renames.py +5 -3
  50. aetherdialect-0.1.4/tests/test_schema_inference_paths.py +168 -0
  51. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_schema_profiling.py +1615 -1663
  52. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_schema_scope_change.py +37 -3
  53. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_seed_warmup.py +8 -8
  54. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_sql_gen.py +3194 -3033
  55. aetherdialect-0.1.4/tests/test_templates.py +321 -0
  56. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_text2sql.py +43 -0
  57. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_utils.py +13 -13
  58. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_validation_execute.py +1049 -905
  59. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_validation_schema.py +2219 -2169
  60. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_validation_semantic.py +3092 -3109
  61. aetherdialect-0.1.3/tests/test_join_bool_cte_matrix.py +0 -383
  62. aetherdialect-0.1.3/tests/test_pipeline_session.py +0 -159
  63. aetherdialect-0.1.3/tests/test_pipeline_targeted.py +0 -135
  64. aetherdialect-0.1.3/tests/test_pipeline_units.py +0 -1723
  65. aetherdialect-0.1.3/tests/test_qsim_sample.py +0 -799
  66. aetherdialect-0.1.3/tests/test_qsim_struct.py +0 -807
  67. aetherdialect-0.1.3/tests/test_simulator.py +0 -553
  68. aetherdialect-0.1.3/tests/test_simulator_pipeline.py +0 -286
  69. aetherdialect-0.1.3/tests/test_templates.py +0 -2059
  70. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/LICENSE +0 -0
  71. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/setup.cfg +0 -0
  72. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/aetherdialect.egg-info/dependency_links.txt +0 -0
  73. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/aetherdialect.egg-info/top_level.txt +0 -0
  74. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/__init__.py +0 -0
  75. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_qsim_ops.py +0 -0
  76. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/src/text2sql/_validation_agg.py +0 -0
  77. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_artifact_lock.py +0 -0
  78. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_migration_diff_driven.py +0 -0
  79. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_phase_c_repairs.py +0 -0
  80. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_qsim_ops.py +0 -0
  81. {aetherdialect-0.1.3 → aetherdialect-0.1.4}/tests/test_validation_agg.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aetherdialect
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Deterministic, validation-first Text-to-SQL system for business databases
5
5
  Author-email: Akul Ameya <akul.ameya@gmail.com>
6
6
  License: MIT
@@ -8,6 +8,7 @@ Project-URL: Homepage, https://github.com/akul-ameya/aetherdialect
8
8
  Requires-Python: >=3.10
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
+ Requires-Dist: pandas<3,>=2.0
11
12
  Requires-Dist: packaging<25,>=23.0
12
13
  Requires-Dist: jsonschema<5,>=4.0
13
14
  Requires-Dist: openai<3,>=2.0.0
@@ -38,9 +39,9 @@ Dynamic: license-file
38
39
 
39
40
  This library turns **analytical questions** into **read-only `SELECT`** pipelines on **PostgreSQL** or **Databricks**: structured intent, heavy validation (including dialect AST and `EXPLAIN`), optional **template reuse** from accepted answers, and **negative memory** from rejections. When you construct **`Text2SQL`**, it checks **database connectivity**, **LLM reachability**, and whether **on-disk artifacts** still match the live schema.
40
41
 
41
- **Practical tips:** Questions resolve more reliably when you state intent explicitly—entities, grain, filters, time scope, and ordering—instead of leaving those details implied. The same goes for optional domain notes (`SchemaContext.notes_path`, see **[API_REFERENCE.md](API_REFERENCE.md)**): richer notes and clearer questions generally improve routing speed and SQL quality.
42
+ **Practical tips:** Questions resolve more reliably when you state intent explicitly—entities, grain, filters, time scope, and ordering—instead of leaving those details implied. The same goes for optional domain notes (`SchemaContext.notes_file`, see **[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)**): richer notes and clearer questions generally improve routing speed and SQL quality.
42
43
 
43
- **Internals:** for how the schema graph is built, how columns become visible to the LLM, how joins are picked, and how stored artifacts migrate when the schema drifts, see **[OVERVIEW.md](OVERVIEW.md)**.
44
+ **Internals:** for how the schema graph is built, how columns become visible to the LLM, how joins are picked, and how stored artifacts migrate when the schema drifts, see **[OVERVIEW.md](https://github.com/akul-ameya/aetherdialect/blob/main/OVERVIEW.md)**.
44
45
 
45
46
  ## Installation
46
47
 
@@ -51,13 +52,13 @@ pip install "aetherdialect[databricks]"
51
52
  pip install "aetherdialect[postgresql,databricks]"
52
53
  ```
53
54
 
54
- Requires Python ≥ 3.10 and either an [OpenAI API key](https://platform.openai.com/api-keys) or Azure OpenAI credentials. Construction verifies LLM connectivity for **each distinct** model or deployment the run uses; see **[API_REFERENCE.md](API_REFERENCE.md)** for required variables, optional deployment-name overrides on Azure, and Databricks SQL warehouse vs PySpark.
55
+ Requires Python ≥ 3.10 and either an [OpenAI API key](https://platform.openai.com/api-keys) or Azure OpenAI credentials. Construction verifies LLM connectivity for **each distinct** model or deployment the run uses; see **[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)** for required variables, optional deployment-name overrides on Azure, and Databricks SQL warehouse vs PySpark.
55
56
 
56
57
  | Extra | Brings in | Use when |
57
58
  | ------------ | ------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------- |
58
59
  | (base) | **SQLAlchemy** (shared introspection / execution interface) | Always installed |
59
- | `postgresql` | PostgreSQL driver (`psycopg2-binary`), **`pglast`** | PostgreSQL via `PG*` / `POSTGRES_*` env (see **API_REFERENCE.md**) |
60
- | `databricks` | Databricks SQL connector (preferred), PySpark (fallback), **`databricks-sqlalchemy`**, `sqlglot` | Databricks via `DATABRICKS_*` / related aliases (see **API_REFERENCE.md**) |
60
+ | `postgresql` | PostgreSQL driver (`psycopg2-binary`), **`pglast`** | PostgreSQL via `PG*` / `POSTGRES_*` env (see **[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)**) |
61
+ | `databricks` | Databricks SQL connector (preferred), PySpark (fallback), **`databricks-sqlalchemy`**, `sqlglot` | Databricks via `DATABRICKS_*` / related aliases (see **[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)**) |
61
62
 
62
63
  **SQL parsing for validation:** PostgreSQL uses **`pglast`** for structural AST checks (join pairs, CTE bodies, `ast_validate`). Databricks / Spark SQL uses **`sqlglot`** with the **Spark** dialect.
63
64
 
@@ -77,9 +78,9 @@ t2s = Text2SQL(
77
78
  t2s.run_interactive()
78
79
  ```
79
80
 
80
- Set database and LLM variables in the process environment or in **`env_file`**. The full matrix is in **[API_REFERENCE.md](API_REFERENCE.md)**. Pass **`artifacts_dir=`** so artifacts are written under `<root>/text2sql`; when omitted, a platform user-data directory is used.
81
+ Set database and LLM variables in the process environment or in **`env_file`**. The full matrix is in **[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)**. Pass **`artifacts_dir=`** so artifacts are written under `<root>/text2sql`; when omitted, a platform user-data directory is used.
81
82
 
82
- **Interactive two ways:** **`run_interactive()`** is a stdin loop. For your own UI or protocol, use **`Text2SQL.pipeline_session()`** with **`PipelineSession.ask`** and **`PipelineSession.step`**, which return **`SessionStep`** until **`done`** is true. Details are in **[API_REFERENCE.md](API_REFERENCE.md)**.
83
+ **Interactive two ways:** **`run_interactive()`** is a stdin loop. For your own UI or protocol, use **`Text2SQL.pipeline_session()`** with **`PipelineSession.ask`** and **`PipelineSession.step`**, which return **`SessionStep`** until **`done`** is true. Details are in **[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)**.
83
84
 
84
85
  ---
85
86
 
@@ -118,7 +119,7 @@ A **validation-first** layer for **stable business schemas** and **repeated anal
118
119
 
119
120
  ## LLM: three fixed models
120
121
 
121
- The library uses **three** named models internally. On **Azure OpenAI** you expose **three deployments** whose default names match those internal names, or you map each name to your deployment with optional env vars (**[API_REFERENCE.md](API_REFERENCE.md)** lists the exact strings and variables).
122
+ The library uses **three** named models internally. On **Azure OpenAI** you expose **three deployments** whose default names match those internal names, or you map each name to your deployment with optional env vars (**[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)** lists the exact strings and variables).
122
123
 
123
124
  When `Text2SQL` is constructed, a **short completion** is sent once per **distinct** configured model name (OpenAI) or deployment name (Azure), so bad keys, endpoints, or deployment maps fail immediately with **`ConfigError`** instead of halfway through a session.
124
125
 
@@ -132,7 +133,7 @@ Treat credentials as you would for any read-only analyst account.
132
133
 
133
134
  - The engine needs to **reflect** the tables or views in your **`SchemaContext`**, run **`SELECT`** (and **`EXPLAIN`**) on generated queries, and execute the paths you enable (interactive display, warmup, etc.).
134
135
  - **Least privilege** is recommended: a role limited to **`SELECT`** (and whatever your database requires for **`EXPLAIN`**) on the objects you include. The library enforces an analytical **`SELECT`**-only policy in generated SQL, but that is **not** a substitute for database- and network-level security.
135
- - **Scope** matters: allow/deny lists and `include` settings restrict what is visible; they also feed **fingerprinting** so template stores stay aligned when you change scope (**[API_REFERENCE.md](API_REFERENCE.md)**).
136
+ - **Scope** matters: allow/deny lists and `include` settings restrict what is visible; they also feed **fingerprinting** so template stores stay aligned when you change scope (**[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)**).
136
137
 
137
138
  ---
138
139
 
@@ -140,7 +141,7 @@ Treat credentials as you would for any read-only analyst account.
140
141
 
141
142
  Everything learned or cached for a connection “shape” lives under **`artifacts_dir`** (see Quickstart): resolved to **`<root>/text2sql`**, or a platform user-data directory if you omit **`artifacts_dir`**. That folder holds the **schema snapshot**, **template store**, **QSim skeletons**, seed-warmup cache, and a small **manifest** of fingerprints—not your raw database.
142
143
 
143
- Each time **`Text2SQL(...)`** runs, the **live** schema graph is compared to the **stored** manifest. The outcome is one of four **migration tiers** (construction step 6 and the table below; for a conceptual walkthrough see **[OVERVIEW.md § Migration tiers](OVERVIEW.md#5-migration-tiers-what-happens-when-your-schema-changes)**):
144
+ Each time **`Text2SQL(...)`** runs, the **live** schema graph is compared to the **stored** manifest. The outcome is one of four **migration tiers** (construction step 6 and the table below; for a conceptual walkthrough see **[OVERVIEW.md § Migration tiers](https://github.com/akul-ameya/aetherdialect/blob/main/OVERVIEW.md#5-migration-tiers-what-happens-when-your-schema-changes)**):
144
145
 
145
146
  | Tier | What it means for you |
146
147
  | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@@ -161,7 +162,7 @@ After construction, the migration outcome is printed when non-trivial; use **`Te
161
162
  ## How to improve results (without touching code)
162
163
 
163
164
  - **Schema quality:** **declared foreign keys** in the database, sensible types, and a stable star/snowflake-style layout are ideal. If production metadata is thin, the graph still gains **inferred FK-style links** from naming conventions where those rules apply, plus **semantic join neighbors** from profiled column value overlap—so imperfect warehouses get extra join signal, not only whatever the catalog declared.
164
- - **Domain language:** optional **notes** file (`SchemaContext.notes_path`) and concrete questions (entities, time range, grain) improve routing and SQL quality; see the opening paragraph of this README.
165
+ - **Domain language:** optional **notes** file (`SchemaContext.notes_file`) and concrete questions (entities, time range, grain) improve routing and SQL quality; see the opening paragraph of this README.
165
166
  - **Scope:** use **allow/deny** and **`include`** deliberately so the graph matches how analysts think about the warehouse; changing scope changes fingerprints and can trigger migration.
166
167
  - **Operational learning:** accept good SQL; when you reject, answer the **“what was wrong?”** prompt when it appears—the reason improves negative learning (see above). Use **seed warmup** or **QSim** to broaden template coverage in a controlled way.
167
168
 
@@ -191,7 +192,7 @@ After construction, the migration outcome is printed when non-trivial; use **`Te
191
192
  - Cached schema snapshot per connection fingerprint so restarts avoid re-reflecting unchanged databases.
192
193
  - **Table roles** (e.g. fact vs dimension), **column roles** (measure, categorical, temporal, identifier, etc.), **filter / aggregation / HAVING** allowances per column, **value domains** from profiling — all assigned when the graph is built (reflection, DDL, profiling, and optional notes).
193
194
  - Profiling captures the **mode frequency** for each column. When one value occupies ≥99% of the non-null distribution (a sentinel like `0`, `-1`, or `'Unknown'`) the column is hidden from the LLM by the same gate as columns that are ≥99% null — sentinel-dominated columns carry no useful filter or grouping signal.
194
- - Optional **human notes** (plain text), via **`SchemaContext.notes_path`** (see **[API_REFERENCE.md](API_REFERENCE.md)**): merged when the graph is built or when notes change; if the cache already contains notes and you omit `notes_path` on a later run, cached roles and hints are kept.
195
+ - Optional **human notes** (plain text), via **`SchemaContext.notes_file`** (see **[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)**): merged when the graph is built or when notes change; if the cache already contains notes and you omit `notes_file` on a later run, cached roles and hints are kept.
195
196
  - Optional **`deny_columns`** and **`allow_objects`** in **`SchemaContext`**; they participate in **scope hashing** so template stores reconcile when scope changes. Each `deny_columns` entry is either a qualified **`"table.column"`** (denies that exact column) or a bare **`"column"`** name (denies that column name on every table where it appears — qualify if you want one-table scope). Denied columns are hidden from the LLM context and rejected anywhere they would appear in the IR (bare select, filter, `GROUP BY`, `HAVING`, `ORDER BY`, aggregate).
196
197
  - Optional **`allow_columns`** in **`SchemaContext`** complements `deny_columns`: when non-empty, only the listed columns survive reflection. Same grammar as `deny_columns` (qualified or bare). **Pragmatic auto-include**: primary key columns and any column appearing in a foreign key edge (source or destination) are always retained so the join graph survives a narrow allow list. Participates in scope hashing.
197
198
  - Per-column **`sensitivity`** tag on `ColumnMetadata` accepts `"pii"` or `"restricted"`. Both hide the column from the LLM context. `"pii"` additionally rejects bare select-list projection and `GROUP BY` references; aggregates and equality filters remain available. `"restricted"` hides from the LLM only — IR references that survive other validators are permitted.
@@ -212,7 +213,7 @@ After construction, the migration outcome is printed when non-trivial; use **`Te
212
213
 
213
214
  **Operational modes**
214
215
 
215
- - **Interactive** — ask questions, accept/reject, results export; via **`run_interactive()`** or a programmatic **`PipelineSession`** (see Quickstart above and **[API_REFERENCE.md](API_REFERENCE.md)**).
216
+ - **Interactive** — ask questions, accept/reject, results export; via **`run_interactive()`** or a programmatic **`PipelineSession`** (see Quickstart above and **[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)**).
216
217
  - **Seed warmup** — seed questions → gold intents → **deterministic expansion** (many operators, deduplicated) → validate/execute → NL question generation for new templates.
217
218
  - **QSim** — reproducible synthetic questions from schema and profiles (seeded randomness).
218
219
 
@@ -250,7 +251,7 @@ After construction, the migration outcome is printed when non-trivial; use **`Te
250
251
  - **Accepted templates** — intent fingerprint, parameterized SQL, optional example question, **trust** that rises with validation and falls with rejection.
251
252
  - **Rejected templates** (“negative memory”) — failures are stored with **categories** (and optional user **rejection reasons** when collected) so similar bad intents are discouraged on later turns.
252
253
  - **Loader reconciliation** — when you open an existing template file, rows that no longer match the current graph (missing tables, columns, or join segments) are pruned, negative memory for removed rejects is cleared, and stale failure-log rows from older hashes are filtered before the store is saved for the current scope. Large fingerprint jumps are handled by the **migration** path above, not only this incremental prune.
253
- - Persistence lives next to the manifest under your **`artifacts_dir`** tree (**[API_REFERENCE.md](API_REFERENCE.md)**); back it up or reset it as described under **Artifacts and migration**.
254
+ - Persistence lives next to the manifest under your **`artifacts_dir`** tree (**[API_REFERENCE.md](https://github.com/akul-ameya/aetherdialect/blob/main/API_REFERENCE.md)**); back it up or reset it as described under **Artifacts and migration**.
254
255
 
255
256
  ---
256
257