aetherdialect 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {aetherdialect-0.1.4/src/aetherdialect.egg-info → aetherdialect-0.1.6}/PKG-INFO +1 -1
  2. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/pyproject.toml +1 -1
  3. {aetherdialect-0.1.4 → aetherdialect-0.1.6/src/aetherdialect.egg-info}/PKG-INFO +1 -1
  4. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_config.py +6 -2
  5. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_contracts_base.py +38 -1
  6. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_contracts_core.py +0 -21
  7. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_core_utils.py +24 -1
  8. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_dialect.py +392 -34
  9. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_intent_expr.py +46 -6
  10. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_intent_process.py +4 -4
  11. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_intent_repair.py +26 -0
  12. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_live_testing.py +26 -9
  13. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_main_execution.py +105 -35
  14. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_pipeline.py +90 -75
  15. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_schema.py +26 -39
  16. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_schema_profiling.py +92 -25
  17. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_seed_warmup.py +1 -2
  18. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_templates.py +15 -44
  19. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_validation_semantic.py +3 -0
  20. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/text2sql.py +37 -30
  21. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_config.py +10 -0
  22. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_contracts.py +28 -9
  23. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_core_utils.py +19 -1
  24. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_intent_expr.py +12 -0
  25. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_intent_process.py +2218 -2228
  26. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_live_testing.py +990 -991
  27. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_main_execution.py +122 -0
  28. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_migration_diff_driven.py +386 -387
  29. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_pipeline.py +3279 -3295
  30. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema.py +49 -1
  31. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_profiling.py +150 -1
  32. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_seed_warmup.py +0 -2
  33. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_templates.py +320 -321
  34. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_utils.py +0 -1
  35. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/LICENSE +0 -0
  36. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/README.md +0 -0
  37. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/setup.cfg +0 -0
  38. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/aetherdialect.egg-info/SOURCES.txt +0 -0
  39. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/aetherdialect.egg-info/dependency_links.txt +0 -0
  40. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/aetherdialect.egg-info/requires.txt +0 -0
  41. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/aetherdialect.egg-info/top_level.txt +0 -0
  42. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/__init__.py +0 -0
  43. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_expansion_ops.py +0 -0
  44. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_intent_resolve.py +0 -0
  45. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_qsim.py +0 -0
  46. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_qsim_ops.py +0 -0
  47. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_sql_gen.py +0 -0
  48. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_utils.py +0 -0
  49. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_validation_agg.py +0 -0
  50. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_validation_execute.py +0 -0
  51. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_validation_schema.py +0 -0
  52. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_artifact_lock.py +0 -0
  53. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_bool_op_combinations.py +0 -0
  54. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_dialect.py +0 -0
  55. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_expansion_ops.py +0 -0
  56. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_intent_repair.py +0 -0
  57. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_intent_resolve.py +0 -0
  58. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_notebook_export_signature.py +0 -0
  59. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_phase_c_repairs.py +0 -0
  60. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_qsim.py +0 -0
  61. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_qsim_ops.py +0 -0
  62. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_cache_probe.py +0 -0
  63. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_diff_apply.py +0 -0
  64. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_diff_renames.py +0 -0
  65. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_inference_paths.py +0 -0
  66. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_scope_change.py +0 -0
  67. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_sql_gen.py +0 -0
  68. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_text2sql.py +0 -0
  69. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_validation_agg.py +0 -0
  70. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_validation_execute.py +0 -0
  71. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_validation_schema.py +0 -0
  72. {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_validation_semantic.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aetherdialect
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Deterministic, validation-first Text-to-SQL system for business databases
5
5
  Author-email: Akul Ameya <akul.ameya@gmail.com>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "aetherdialect"
7
- version = "0.1.4"
7
+ version = "0.1.6"
8
8
  description = "Deterministic, validation-first Text-to-SQL system for business databases"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aetherdialect
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Deterministic, validation-first Text-to-SQL system for business databases
5
5
  Author-email: Akul Ameya <akul.ameya@gmail.com>
6
6
  License: MIT
@@ -197,11 +197,13 @@ VALID_SCALAR_FUNCTIONS = {
197
197
  "date_part",
198
198
  "extract",
199
199
  "coalesce",
200
+ "concat",
200
201
  "year",
201
202
  "month",
202
203
  "day",
203
204
  }
204
- SCALAR_FUNCTIONS_STRING = {"upper", "lower", "trim", "ltrim", "rtrim", "length"}
205
+ SCALAR_FUNCTIONS_STRING = {"upper", "lower", "trim", "ltrim", "rtrim", "length", "concat"}
206
+ SCALAR_FUNCTIONS_VARIADIC = frozenset({"concat"})
205
207
  SCALAR_FUNCTIONS_NUMERIC = {"abs", "round", "floor", "ceil"}
206
208
  SCALAR_FUNCTIONS_TEMPORAL = {
207
209
  "date_trunc",
@@ -215,6 +217,7 @@ SCALAR_FUNCTIONS_LEADING_ARG = {"date_trunc", "date_part", "extract"}
215
217
  DISALLOWED_EXTRACT_UNITS = {"epoch"}
216
218
  VALID_GRAINS = {"scalar", "grouped", "row_level"}
217
219
  VALID_EXPECTED_ROWS = {"one", "few", "many"}
220
+ REGISTRY_TOKEN_PATTERN = r"^[wc]\d+$"
218
221
  VALID_HAVING_OPS = {"=", "!=", "<", "<=", ">", ">=", "in", "not in", "between"}
219
222
  DATABRICKS_TABLE_QUALIFY_SKIP_IDENTIFIERS: frozenset[str] = frozenset(
220
223
  {
@@ -1292,6 +1295,7 @@ INTENT_CRITICAL_RULES: tuple[str, ...] = (
1292
1295
  "Do not embed COUNT(*) inside arithmetic subexpressions—use COUNT(*) only as a top-level aggregate where appropriate.",
1293
1296
  "Arithmetic combines expressions with +, -, *, /; aggregations may wrap arithmetic (e.g. SUM(tbl_a.col_a * tbl_a.col_b)). "
1294
1297
  "Subtract date columns directly (tbl_a.date_a - tbl_a.date_b) for day differences.",
1298
+ "String concatenation uses CONCAT(expr1, ' ', expr2, ...) in expr strings; do not use the SQL || operator (pipe-pipe).",
1295
1299
  "Apply scalar functions such as ROUND after aggregates when needed (e.g. ROUND(SUM(tbl_a.col_a), 2)).",
1296
1300
  "Use exact identifiers from the provided schema text; never leave synthetic shape tokens from this prompt "
1297
1301
  "(tbl_a, tbl_b, col_a, date_a, date_b), generic instructional tokens (table_N, column_N), or angle-bracket markup in expressions.",
@@ -1734,7 +1738,7 @@ CASE_RESULT_BARE_LABEL_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
1734
1738
  CASE_RESULT_REGISTRY_TOKEN_RE = re.compile(r"^[wc]\d{2}$")
1735
1739
 
1736
1740
  ROLE_VALUE_TYPE_COMPAT: dict[str, frozenset[str]] = {
1737
- "boolean": frozenset({"boolean"}),
1741
+ "boolean": frozenset({"boolean", "integer", "string"}),
1738
1742
  "numeric_measure": frozenset({"integer", "number"}),
1739
1743
  "numeric_categorical": frozenset({"integer", "number"}),
1740
1744
  "temporal": frozenset({"date"}),
@@ -782,6 +782,42 @@ class FKEdge:
782
782
  self.inference_tag = coerce_inference_tag(self.inference_tag)
783
783
 
784
784
 
785
+ @dataclass
786
+ class CatalogTableStructuralConstraints:
787
+ """
788
+ Catalog-sourced primary-key column names, foreign-key edges, and single-column unique names for one table.
789
+
790
+ Each :class:`FKEdge` carries ``src_table`` equal to the referencing table so the bundle can be converted into ``tables_meta`` foreign-key dicts without losing the child table identity.
791
+ """
792
+
793
+ primary_keys: list[str] = field(default_factory=list)
794
+ foreign_keys: list[FKEdge] = field(default_factory=list)
795
+ unique_columns: list[str] = field(default_factory=list)
796
+
797
+
798
+ @dataclass
799
+ class CatalogStructuralConstraintsIndex:
800
+ """
801
+ Per-table structural constraint bundles keyed by lowercased relation name within one catalog schema.
802
+
803
+ When ``tables`` is empty the caller should treat catalog reflection as unavailable and continue with DDL-based parsing.
804
+ """
805
+
806
+ tables: dict[str, CatalogTableStructuralConstraints] = field(default_factory=dict)
807
+
808
+ @classmethod
809
+ def empty(cls) -> CatalogStructuralConstraintsIndex:
810
+ """
811
+ Construct an empty index for failed information_schema queries.
812
+
813
+ Returns:
814
+
815
+ Empty :class:`CatalogStructuralConstraintsIndex` instance.
816
+ """
817
+
818
+ return cls(tables={})
819
+
820
+
785
821
  @dataclass
786
822
  class ValueDomain:
787
823
  """Value domain for sampling concrete values during question generation."""
@@ -1521,6 +1557,7 @@ class SchemaGraph:
1521
1557
  continue
1522
1558
  pk_marker = " [PK]" if c.is_primary_key else ""
1523
1559
  fk_marker = f" [FK->{c.fk_target[0]}.{c.fk_target[1]}]" if c.fk_target else ""
1560
+ unique_marker = " [UNIQUE]" if (c.is_unique and not c.is_primary_key) else ""
1524
1561
  role_tag = ""
1525
1562
  if c.role and c.role not in _SKIP_ROLE_TAGS:
1526
1563
  role_tag = f" [{c.role}]"
@@ -1534,7 +1571,7 @@ class SchemaGraph:
1534
1571
  if vt.lower() == "boolean" and c.boolean_truth_value:
1535
1572
  truth_tag = f" truth_value={c.boolean_truth_value}"
1536
1573
  filter_tag = " [filter]" if c.is_filterable else ""
1537
- out.append(f" {c.name}: {vt}{truth_tag}{pk_marker}{fk_marker}{role_tag}{hint_tag}{filter_tag}")
1574
+ out.append(f" {c.name}: {vt}{truth_tag}{pk_marker}{fk_marker}{unique_marker}{role_tag}{hint_tag}{filter_tag}")
1538
1575
  if self.enum_values:
1539
1576
  out.append("")
1540
1577
  out.append("ENUM TYPES:")
@@ -2526,9 +2526,6 @@ class RuntimeIntent:
2526
2526
  distinct_select_index: int = -1
2527
2527
  extra_tables: set[str] = field(default_factory=set)
2528
2528
  sql_param: str = ""
2529
- sql_display_param: str = ""
2530
- sql_substituted: str = ""
2531
- deterministic_sql: str = ""
2532
2529
  sql_shape: SQLShape | None = None
2533
2530
  schema_invalid: bool = False
2534
2531
 
@@ -2614,9 +2611,6 @@ class RuntimeIntent:
2614
2611
  distinct_select_index=int(d.get("distinct_select_index", -1)),
2615
2612
  extra_tables=set(d.get("extra_tables", [])),
2616
2613
  sql_param=d.get("sql_param", ""),
2617
- sql_display_param=d.get("sql_display_param", ""),
2618
- sql_substituted=d.get("sql_substituted", ""),
2619
- deterministic_sql=d.get("deterministic_sql", ""),
2620
2614
  sql_shape=(SQLShape.from_dict(d["sql_shape"]) if d.get("sql_shape") else None),
2621
2615
  schema_invalid=d.get("schema_invalid", False),
2622
2616
  )
@@ -2651,9 +2645,6 @@ class RuntimeIntent:
2651
2645
  "distinct_select_index": self.distinct_select_index,
2652
2646
  "extra_tables": sorted(self.extra_tables),
2653
2647
  "sql_param": self.sql_param,
2654
- "sql_display_param": self.sql_display_param,
2655
- "sql_substituted": self.sql_substituted,
2656
- "deterministic_sql": self.deterministic_sql,
2657
2648
  "sql_shape": self.sql_shape.to_dict() if self.sql_shape else None,
2658
2649
  "schema_invalid": self.schema_invalid,
2659
2650
  }
@@ -3510,7 +3501,6 @@ class Template:
3510
3501
  intent_key: str
3511
3502
  tables_used: list[str]
3512
3503
  sql_param: str
3513
- sql_display_param: str
3514
3504
  sql_fp: str
3515
3505
  shape: SQLShape
3516
3506
  colmap_sig: str
@@ -3520,9 +3510,6 @@ class Template:
3520
3510
  source: str = "human"
3521
3511
  trust_level: int = 1
3522
3512
  structural_defaults: dict[str, str | int | float] = field(default_factory=dict)
3523
- deterministic_sql: str = ""
3524
- aliased_sql: str = ""
3525
- execution_sql: str = ""
3526
3513
  display_alias_map: dict[str, str] = field(default_factory=dict)
3527
3514
  feedback_by_question: dict[str, FeedbackCounts] = field(default_factory=dict)
3528
3515
 
@@ -3591,7 +3578,6 @@ class Template:
3591
3578
  intent_key=d.get("intent_key", ""),
3592
3579
  tables_used=d.get("tables_used", []),
3593
3580
  sql_param=d.get("sql_param", ""),
3594
- sql_display_param=d.get("sql_display_param", ""),
3595
3581
  sql_fp=d.get("sql_fp", ""),
3596
3582
  shape=shape,
3597
3583
  colmap_sig=d.get("colmap_sig", ""),
@@ -3601,9 +3587,6 @@ class Template:
3601
3587
  source=d.get("source", "human"),
3602
3588
  trust_level=d.get("trust_level", 1),
3603
3589
  structural_defaults=d.get("structural_defaults", {}),
3604
- deterministic_sql=d.get("deterministic_sql", ""),
3605
- aliased_sql=d.get("aliased_sql", ""),
3606
- execution_sql=d.get("execution_sql") or d.get("spark_sql_param", ""),
3607
3590
  display_alias_map=dict(d.get("display_alias_map") or {}),
3608
3591
  feedback_by_question=feedback_by_question,
3609
3592
  )
@@ -3628,7 +3611,6 @@ class Template:
3628
3611
  "intent_key": self.intent_key,
3629
3612
  "tables_used": self.tables_used,
3630
3613
  "sql_param": self.sql_param,
3631
- "sql_display_param": self.sql_display_param,
3632
3614
  "sql_fp": self.sql_fp,
3633
3615
  "shape": self.shape.to_dict(),
3634
3616
  "colmap_sig": self.colmap_sig,
@@ -3638,9 +3620,6 @@ class Template:
3638
3620
  "source": self.source,
3639
3621
  "trust_level": self.trust_level,
3640
3622
  "structural_defaults": self.structural_defaults,
3641
- "deterministic_sql": self.deterministic_sql,
3642
- "aliased_sql": self.aliased_sql,
3643
- "execution_sql": self.execution_sql,
3644
3623
  "display_alias_map": self.display_alias_map,
3645
3624
  "feedback_by_question": {q: c.to_dict() for q, c in self.feedback_by_question.items()},
3646
3625
  }
@@ -80,6 +80,29 @@ def notify(message: str) -> None:
80
80
  print(message, file=sys.stdout, flush=True)
81
81
 
82
82
 
83
+ _progress_depth = 0
84
+
85
+
86
+ def progress(message: str) -> None:
87
+ """Print a status line only when :func:`progress_enabled` is active (interactive CLI)."""
88
+
89
+ if _progress_depth <= 0:
90
+ return
91
+ print(message, file=sys.stdout, flush=True)
92
+
93
+
94
+ @contextmanager
95
+ def progress_enabled() -> Iterator[None]:
96
+ """Enable :func:`progress` writes for the duration of the block (supports nesting)."""
97
+
98
+ global _progress_depth
99
+ _progress_depth += 1
100
+ try:
101
+ yield
102
+ finally:
103
+ _progress_depth -= 1
104
+
105
+
83
106
  def result(message: str) -> None:
84
107
  """Print a query-result line to stdout with no prefix."""
85
108
 
@@ -1182,7 +1205,7 @@ def ask_user_choice(prompt: str, options: list[str], silent_no: bool = False) ->
1182
1205
  ``"y"``, ``"n"``, or ``None`` on EOF/invalid.
1183
1206
  """
1184
1207
  options_display = "/".join(options)
1185
- print(f"{prompt} [{options_display}]: ", end="")
1208
+ print(f"{prompt} ({options_display}): ", end="", flush=True)
1186
1209
  try:
1187
1210
  user_input = input().strip()
1188
1211
  except (EOFError, KeyboardInterrupt):