aetherdialect 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aetherdialect-0.1.4/src/aetherdialect.egg-info → aetherdialect-0.1.6}/PKG-INFO +1 -1
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/pyproject.toml +1 -1
- {aetherdialect-0.1.4 → aetherdialect-0.1.6/src/aetherdialect.egg-info}/PKG-INFO +1 -1
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_config.py +6 -2
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_contracts_base.py +38 -1
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_contracts_core.py +0 -21
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_core_utils.py +24 -1
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_dialect.py +392 -34
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_intent_expr.py +46 -6
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_intent_process.py +4 -4
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_intent_repair.py +26 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_live_testing.py +26 -9
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_main_execution.py +105 -35
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_pipeline.py +90 -75
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_schema.py +26 -39
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_schema_profiling.py +92 -25
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_seed_warmup.py +1 -2
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_templates.py +15 -44
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_validation_semantic.py +3 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/text2sql.py +37 -30
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_config.py +10 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_contracts.py +28 -9
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_core_utils.py +19 -1
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_intent_expr.py +12 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_intent_process.py +2218 -2228
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_live_testing.py +990 -991
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_main_execution.py +122 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_migration_diff_driven.py +386 -387
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_pipeline.py +3279 -3295
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema.py +49 -1
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_profiling.py +150 -1
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_seed_warmup.py +0 -2
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_templates.py +320 -321
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_utils.py +0 -1
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/LICENSE +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/README.md +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/setup.cfg +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/aetherdialect.egg-info/SOURCES.txt +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/aetherdialect.egg-info/dependency_links.txt +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/aetherdialect.egg-info/requires.txt +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/aetherdialect.egg-info/top_level.txt +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/__init__.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_expansion_ops.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_intent_resolve.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_qsim.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_qsim_ops.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_sql_gen.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_utils.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_validation_agg.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_validation_execute.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/src/text2sql/_validation_schema.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_artifact_lock.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_bool_op_combinations.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_dialect.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_expansion_ops.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_intent_repair.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_intent_resolve.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_notebook_export_signature.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_phase_c_repairs.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_qsim.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_qsim_ops.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_cache_probe.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_diff_apply.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_diff_renames.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_inference_paths.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_schema_scope_change.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_sql_gen.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_text2sql.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_validation_agg.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_validation_execute.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_validation_schema.py +0 -0
- {aetherdialect-0.1.4 → aetherdialect-0.1.6}/tests/test_validation_semantic.py +0 -0
|
@@ -197,11 +197,13 @@ VALID_SCALAR_FUNCTIONS = {
|
|
|
197
197
|
"date_part",
|
|
198
198
|
"extract",
|
|
199
199
|
"coalesce",
|
|
200
|
+
"concat",
|
|
200
201
|
"year",
|
|
201
202
|
"month",
|
|
202
203
|
"day",
|
|
203
204
|
}
|
|
204
|
-
SCALAR_FUNCTIONS_STRING = {"upper", "lower", "trim", "ltrim", "rtrim", "length"}
|
|
205
|
+
SCALAR_FUNCTIONS_STRING = {"upper", "lower", "trim", "ltrim", "rtrim", "length", "concat"}
|
|
206
|
+
SCALAR_FUNCTIONS_VARIADIC = frozenset({"concat"})
|
|
205
207
|
SCALAR_FUNCTIONS_NUMERIC = {"abs", "round", "floor", "ceil"}
|
|
206
208
|
SCALAR_FUNCTIONS_TEMPORAL = {
|
|
207
209
|
"date_trunc",
|
|
@@ -215,6 +217,7 @@ SCALAR_FUNCTIONS_LEADING_ARG = {"date_trunc", "date_part", "extract"}
|
|
|
215
217
|
DISALLOWED_EXTRACT_UNITS = {"epoch"}
|
|
216
218
|
VALID_GRAINS = {"scalar", "grouped", "row_level"}
|
|
217
219
|
VALID_EXPECTED_ROWS = {"one", "few", "many"}
|
|
220
|
+
REGISTRY_TOKEN_PATTERN = r"^[wc]\d+$"
|
|
218
221
|
VALID_HAVING_OPS = {"=", "!=", "<", "<=", ">", ">=", "in", "not in", "between"}
|
|
219
222
|
DATABRICKS_TABLE_QUALIFY_SKIP_IDENTIFIERS: frozenset[str] = frozenset(
|
|
220
223
|
{
|
|
@@ -1292,6 +1295,7 @@ INTENT_CRITICAL_RULES: tuple[str, ...] = (
|
|
|
1292
1295
|
"Do not embed COUNT(*) inside arithmetic subexpressions—use COUNT(*) only as a top-level aggregate where appropriate.",
|
|
1293
1296
|
"Arithmetic combines expressions with +, -, *, /; aggregations may wrap arithmetic (e.g. SUM(tbl_a.col_a * tbl_a.col_b)). "
|
|
1294
1297
|
"Subtract date columns directly (tbl_a.date_a - tbl_a.date_b) for day differences.",
|
|
1298
|
+
"String concatenation uses CONCAT(expr1, ' ', expr2, ...) in expr strings; do not use the SQL || operator (pipe-pipe).",
|
|
1295
1299
|
"Apply scalar functions such as ROUND after aggregates when needed (e.g. ROUND(SUM(tbl_a.col_a), 2)).",
|
|
1296
1300
|
"Use exact identifiers from the provided schema text; never leave synthetic shape tokens from this prompt "
|
|
1297
1301
|
"(tbl_a, tbl_b, col_a, date_a, date_b), generic instructional tokens (table_N, column_N), or angle-bracket markup in expressions.",
|
|
@@ -1734,7 +1738,7 @@ CASE_RESULT_BARE_LABEL_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
|
1734
1738
|
CASE_RESULT_REGISTRY_TOKEN_RE = re.compile(r"^[wc]\d{2}$")
|
|
1735
1739
|
|
|
1736
1740
|
ROLE_VALUE_TYPE_COMPAT: dict[str, frozenset[str]] = {
|
|
1737
|
-
"boolean": frozenset({"boolean"}),
|
|
1741
|
+
"boolean": frozenset({"boolean", "integer", "string"}),
|
|
1738
1742
|
"numeric_measure": frozenset({"integer", "number"}),
|
|
1739
1743
|
"numeric_categorical": frozenset({"integer", "number"}),
|
|
1740
1744
|
"temporal": frozenset({"date"}),
|
|
@@ -782,6 +782,42 @@ class FKEdge:
|
|
|
782
782
|
self.inference_tag = coerce_inference_tag(self.inference_tag)
|
|
783
783
|
|
|
784
784
|
|
|
785
|
+
@dataclass
|
|
786
|
+
class CatalogTableStructuralConstraints:
|
|
787
|
+
"""
|
|
788
|
+
Catalog-sourced primary-key column names, foreign-key edges, and single-column unique names for one table.
|
|
789
|
+
|
|
790
|
+
Each :class:`FKEdge` carries ``src_table`` equal to the referencing table so the bundle can be converted into ``tables_meta`` foreign-key dicts without losing the child table identity.
|
|
791
|
+
"""
|
|
792
|
+
|
|
793
|
+
primary_keys: list[str] = field(default_factory=list)
|
|
794
|
+
foreign_keys: list[FKEdge] = field(default_factory=list)
|
|
795
|
+
unique_columns: list[str] = field(default_factory=list)
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
@dataclass
|
|
799
|
+
class CatalogStructuralConstraintsIndex:
|
|
800
|
+
"""
|
|
801
|
+
Per-table structural constraint bundles keyed by lowercased relation name within one catalog schema.
|
|
802
|
+
|
|
803
|
+
When ``tables`` is empty the caller should treat catalog reflection as unavailable and continue with DDL-based parsing.
|
|
804
|
+
"""
|
|
805
|
+
|
|
806
|
+
tables: dict[str, CatalogTableStructuralConstraints] = field(default_factory=dict)
|
|
807
|
+
|
|
808
|
+
@classmethod
|
|
809
|
+
def empty(cls) -> CatalogStructuralConstraintsIndex:
|
|
810
|
+
"""
|
|
811
|
+
Construct an empty index for failed information_schema queries.
|
|
812
|
+
|
|
813
|
+
Returns:
|
|
814
|
+
|
|
815
|
+
Empty :class:`CatalogStructuralConstraintsIndex` instance.
|
|
816
|
+
"""
|
|
817
|
+
|
|
818
|
+
return cls(tables={})
|
|
819
|
+
|
|
820
|
+
|
|
785
821
|
@dataclass
|
|
786
822
|
class ValueDomain:
|
|
787
823
|
"""Value domain for sampling concrete values during question generation."""
|
|
@@ -1521,6 +1557,7 @@ class SchemaGraph:
|
|
|
1521
1557
|
continue
|
|
1522
1558
|
pk_marker = " [PK]" if c.is_primary_key else ""
|
|
1523
1559
|
fk_marker = f" [FK->{c.fk_target[0]}.{c.fk_target[1]}]" if c.fk_target else ""
|
|
1560
|
+
unique_marker = " [UNIQUE]" if (c.is_unique and not c.is_primary_key) else ""
|
|
1524
1561
|
role_tag = ""
|
|
1525
1562
|
if c.role and c.role not in _SKIP_ROLE_TAGS:
|
|
1526
1563
|
role_tag = f" [{c.role}]"
|
|
@@ -1534,7 +1571,7 @@ class SchemaGraph:
|
|
|
1534
1571
|
if vt.lower() == "boolean" and c.boolean_truth_value:
|
|
1535
1572
|
truth_tag = f" truth_value={c.boolean_truth_value}"
|
|
1536
1573
|
filter_tag = " [filter]" if c.is_filterable else ""
|
|
1537
|
-
out.append(f" {c.name}: {vt}{truth_tag}{pk_marker}{fk_marker}{role_tag}{hint_tag}{filter_tag}")
|
|
1574
|
+
out.append(f" {c.name}: {vt}{truth_tag}{pk_marker}{fk_marker}{unique_marker}{role_tag}{hint_tag}{filter_tag}")
|
|
1538
1575
|
if self.enum_values:
|
|
1539
1576
|
out.append("")
|
|
1540
1577
|
out.append("ENUM TYPES:")
|
|
@@ -2526,9 +2526,6 @@ class RuntimeIntent:
|
|
|
2526
2526
|
distinct_select_index: int = -1
|
|
2527
2527
|
extra_tables: set[str] = field(default_factory=set)
|
|
2528
2528
|
sql_param: str = ""
|
|
2529
|
-
sql_display_param: str = ""
|
|
2530
|
-
sql_substituted: str = ""
|
|
2531
|
-
deterministic_sql: str = ""
|
|
2532
2529
|
sql_shape: SQLShape | None = None
|
|
2533
2530
|
schema_invalid: bool = False
|
|
2534
2531
|
|
|
@@ -2614,9 +2611,6 @@ class RuntimeIntent:
|
|
|
2614
2611
|
distinct_select_index=int(d.get("distinct_select_index", -1)),
|
|
2615
2612
|
extra_tables=set(d.get("extra_tables", [])),
|
|
2616
2613
|
sql_param=d.get("sql_param", ""),
|
|
2617
|
-
sql_display_param=d.get("sql_display_param", ""),
|
|
2618
|
-
sql_substituted=d.get("sql_substituted", ""),
|
|
2619
|
-
deterministic_sql=d.get("deterministic_sql", ""),
|
|
2620
2614
|
sql_shape=(SQLShape.from_dict(d["sql_shape"]) if d.get("sql_shape") else None),
|
|
2621
2615
|
schema_invalid=d.get("schema_invalid", False),
|
|
2622
2616
|
)
|
|
@@ -2651,9 +2645,6 @@ class RuntimeIntent:
|
|
|
2651
2645
|
"distinct_select_index": self.distinct_select_index,
|
|
2652
2646
|
"extra_tables": sorted(self.extra_tables),
|
|
2653
2647
|
"sql_param": self.sql_param,
|
|
2654
|
-
"sql_display_param": self.sql_display_param,
|
|
2655
|
-
"sql_substituted": self.sql_substituted,
|
|
2656
|
-
"deterministic_sql": self.deterministic_sql,
|
|
2657
2648
|
"sql_shape": self.sql_shape.to_dict() if self.sql_shape else None,
|
|
2658
2649
|
"schema_invalid": self.schema_invalid,
|
|
2659
2650
|
}
|
|
@@ -3510,7 +3501,6 @@ class Template:
|
|
|
3510
3501
|
intent_key: str
|
|
3511
3502
|
tables_used: list[str]
|
|
3512
3503
|
sql_param: str
|
|
3513
|
-
sql_display_param: str
|
|
3514
3504
|
sql_fp: str
|
|
3515
3505
|
shape: SQLShape
|
|
3516
3506
|
colmap_sig: str
|
|
@@ -3520,9 +3510,6 @@ class Template:
|
|
|
3520
3510
|
source: str = "human"
|
|
3521
3511
|
trust_level: int = 1
|
|
3522
3512
|
structural_defaults: dict[str, str | int | float] = field(default_factory=dict)
|
|
3523
|
-
deterministic_sql: str = ""
|
|
3524
|
-
aliased_sql: str = ""
|
|
3525
|
-
execution_sql: str = ""
|
|
3526
3513
|
display_alias_map: dict[str, str] = field(default_factory=dict)
|
|
3527
3514
|
feedback_by_question: dict[str, FeedbackCounts] = field(default_factory=dict)
|
|
3528
3515
|
|
|
@@ -3591,7 +3578,6 @@ class Template:
|
|
|
3591
3578
|
intent_key=d.get("intent_key", ""),
|
|
3592
3579
|
tables_used=d.get("tables_used", []),
|
|
3593
3580
|
sql_param=d.get("sql_param", ""),
|
|
3594
|
-
sql_display_param=d.get("sql_display_param", ""),
|
|
3595
3581
|
sql_fp=d.get("sql_fp", ""),
|
|
3596
3582
|
shape=shape,
|
|
3597
3583
|
colmap_sig=d.get("colmap_sig", ""),
|
|
@@ -3601,9 +3587,6 @@ class Template:
|
|
|
3601
3587
|
source=d.get("source", "human"),
|
|
3602
3588
|
trust_level=d.get("trust_level", 1),
|
|
3603
3589
|
structural_defaults=d.get("structural_defaults", {}),
|
|
3604
|
-
deterministic_sql=d.get("deterministic_sql", ""),
|
|
3605
|
-
aliased_sql=d.get("aliased_sql", ""),
|
|
3606
|
-
execution_sql=d.get("execution_sql") or d.get("spark_sql_param", ""),
|
|
3607
3590
|
display_alias_map=dict(d.get("display_alias_map") or {}),
|
|
3608
3591
|
feedback_by_question=feedback_by_question,
|
|
3609
3592
|
)
|
|
@@ -3628,7 +3611,6 @@ class Template:
|
|
|
3628
3611
|
"intent_key": self.intent_key,
|
|
3629
3612
|
"tables_used": self.tables_used,
|
|
3630
3613
|
"sql_param": self.sql_param,
|
|
3631
|
-
"sql_display_param": self.sql_display_param,
|
|
3632
3614
|
"sql_fp": self.sql_fp,
|
|
3633
3615
|
"shape": self.shape.to_dict(),
|
|
3634
3616
|
"colmap_sig": self.colmap_sig,
|
|
@@ -3638,9 +3620,6 @@ class Template:
|
|
|
3638
3620
|
"source": self.source,
|
|
3639
3621
|
"trust_level": self.trust_level,
|
|
3640
3622
|
"structural_defaults": self.structural_defaults,
|
|
3641
|
-
"deterministic_sql": self.deterministic_sql,
|
|
3642
|
-
"aliased_sql": self.aliased_sql,
|
|
3643
|
-
"execution_sql": self.execution_sql,
|
|
3644
3623
|
"display_alias_map": self.display_alias_map,
|
|
3645
3624
|
"feedback_by_question": {q: c.to_dict() for q, c in self.feedback_by_question.items()},
|
|
3646
3625
|
}
|
|
@@ -80,6 +80,29 @@ def notify(message: str) -> None:
|
|
|
80
80
|
print(message, file=sys.stdout, flush=True)
|
|
81
81
|
|
|
82
82
|
|
|
83
|
+
_progress_depth = 0
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def progress(message: str) -> None:
|
|
87
|
+
"""Print a status line only when :func:`progress_enabled` is active (interactive CLI)."""
|
|
88
|
+
|
|
89
|
+
if _progress_depth <= 0:
|
|
90
|
+
return
|
|
91
|
+
print(message, file=sys.stdout, flush=True)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@contextmanager
|
|
95
|
+
def progress_enabled() -> Iterator[None]:
|
|
96
|
+
"""Enable :func:`progress` writes for the duration of the block (supports nesting)."""
|
|
97
|
+
|
|
98
|
+
global _progress_depth
|
|
99
|
+
_progress_depth += 1
|
|
100
|
+
try:
|
|
101
|
+
yield
|
|
102
|
+
finally:
|
|
103
|
+
_progress_depth -= 1
|
|
104
|
+
|
|
105
|
+
|
|
83
106
|
def result(message: str) -> None:
|
|
84
107
|
"""Print a query-result line to stdout with no prefix."""
|
|
85
108
|
|
|
@@ -1182,7 +1205,7 @@ def ask_user_choice(prompt: str, options: list[str], silent_no: bool = False) ->
|
|
|
1182
1205
|
``"y"``, ``"n"``, or ``None`` on EOF/invalid.
|
|
1183
1206
|
"""
|
|
1184
1207
|
options_display = "/".join(options)
|
|
1185
|
-
print(f"{prompt}
|
|
1208
|
+
print(f"{prompt} ({options_display}): ", end="", flush=True)
|
|
1186
1209
|
try:
|
|
1187
1210
|
user_input = input().strip()
|
|
1188
1211
|
except (EOFError, KeyboardInterrupt):
|