PyPI - aetherdialect - Versions diffs - 0.1.0__py3-none-any.whl - Mend

aetherdialect 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

aetherdialect-0.1.0.dist-info/METADATA +197 -0
aetherdialect-0.1.0.dist-info/RECORD +34 -0
aetherdialect-0.1.0.dist-info/WHEEL +5 -0
aetherdialect-0.1.0.dist-info/licenses/LICENSE +7 -0
aetherdialect-0.1.0.dist-info/top_level.txt +1 -0
text2sql/__init__.py +7 -0
text2sql/config.py +1063 -0
text2sql/contracts_base.py +952 -0
text2sql/contracts_core.py +1890 -0
text2sql/core_utils.py +834 -0
text2sql/dialect.py +1134 -0
text2sql/expansion_ops.py +1218 -0
text2sql/expansion_rules.py +496 -0
text2sql/intent_expr.py +1759 -0
text2sql/intent_process.py +2133 -0
text2sql/intent_repair.py +1733 -0
text2sql/intent_resolve.py +1292 -0
text2sql/live_testing.py +1117 -0
text2sql/main_execution.py +799 -0
text2sql/pipeline.py +1662 -0
text2sql/qsim_ops.py +1286 -0
text2sql/qsim_sample.py +609 -0
text2sql/qsim_struct.py +569 -0
text2sql/schema.py +973 -0
text2sql/schema_profiling.py +2075 -0
text2sql/simulator.py +970 -0
text2sql/sql_gen.py +1537 -0
text2sql/templates.py +1037 -0
text2sql/text2sql.py +726 -0
text2sql/utils.py +973 -0
text2sql/validation_agg.py +1033 -0
text2sql/validation_execute.py +1092 -0
text2sql/validation_schema.py +1847 -0
text2sql/validation_semantic.py +2122 -0

text2sql/contracts_base.py ADDED Viewed

@@ -0,0 +1,952 @@
+"""Base schema and utility contracts for the text-to-SQL pipeline.
+Defines the foundational dataclasses and enums shared across all pipeline stages: schema representation (ColumnMetadata, TableMetadata, SchemaGraph), intent validation results, SQL shape comparison, QSim skeleton structures, and result containers for query plans, validation, and template management.
+Also provides type utility helpers for mapping raw SQL data types to the standardized value_type vocabulary used throughout the pipeline.
+"""
+from __future__ import annotations
+import re
+from dataclasses import asdict, dataclass, field
+from enum import Enum
+from typing import Any
+from .config import (
+    BOOLEAN_VALUE_PATTERNS,
+    COLUMN_TYPE_TO_VALUE_TYPE,
+    DATE_TYPE_TOKENS,
+    EXCLUDED_FILTER_PATTERNS,
+    NUMERIC_TYPE_TOKENS,
+    STRING_TYPE_TOKENS,
+    normalize_column_type,
+)
+from .core_utils import issue_sig
+def _is_numeric_type(data_type: str) -> bool:
+    """Return True if data type string contains a numeric token.
+    Args:
+        data_type: Raw SQL data type string.
+    Returns:
+        True if any numeric token is found in the lowercased type string.
+    """
+    dt = data_type.lower()
+    return any(t in dt for t in NUMERIC_TYPE_TOKENS)
+def is_string_type(data_type: str) -> bool:
+    """Return True if data type string contains a string/text token.
+    Args:
+        data_type: Raw SQL data type string.
+    Returns:
+        True if any string/text token is found in the lowercased type string.
+    """
+    dt = data_type.lower()
+    return any(t in dt for t in STRING_TYPE_TOKENS)
+def _is_date_type(data_type: str) -> bool:
+    """Return True if data type string contains a temporal token.
+    Args:
+        data_type: Raw SQL data type string.
+    Returns:
+        True if any temporal token is found in the lowercased type string.
+    """
+    dt = data_type.lower()
+    return any(t in dt for t in DATE_TYPE_TOKENS)
+def _data_type_to_value_type(data_type: str) -> str:
+    """Map raw SQL data type to standardized value type.
+    Args:
+        data_type: Raw SQL data type string such as 'varchar(255)' or 'integer'.
+    Returns:
+        One of 'integer', 'number', 'string', 'date', or 'boolean'.
+    """
+    normalized = normalize_column_type(data_type)
+    vt = COLUMN_TYPE_TO_VALUE_TYPE.get(normalized)
+    if vt:
+        return vt
+    if _is_numeric_type(data_type):
+        return "number"
+    if _is_date_type(data_type):
+        return "date"
+    if is_string_type(data_type):
+        return "string"
+    return "string"
+class ColumnRole(Enum):
+    """Column role for profiling and question simulation."""
+    IDENTIFIER = "identifier"
+    CATEGORICAL = "categorical"
+    NUMERIC_CATEGORICAL = "numeric_categorical"
+    NUMERIC_MEASURE = "numeric_measure"
+    TEMPORAL = "temporal"
+    BOOLEAN = "boolean"
+    FREE_TEXT = "free_text"
+    AUDIT = "audit"
+class TableRole(Enum):
+    """Table role for join constraint validation."""
+    DIMENSION = "dimension"
+    FACT = "fact"
+    BRIDGE = "bridge"
+    UNKNOWN = "unknown"
+@dataclass
+class FKEdge:
+    """Foreign key relationship between tables."""
+    src_table: str
+    src_cols: list[str]
+    dst_table: str
+    dst_cols: list[str]
+@dataclass
+class ValueDomain:
+    """Value domain for sampling concrete values during question
+    generation."""
+    values: list[str] = field(default_factory=list)
+    min_val: str | None = None
+    max_val: str | None = None
+    data_type: str | None = None
+@dataclass
+class ColumnMetadata:
+    """Consolidated column metadata with profile, role, and value
+    domain."""
+    name: str
+    data_type: str
+    is_primary_key: bool = False
+    is_foreign_key: bool = False
+    fk_target: tuple[str, str] | None = None
+    role: str | None = None
+    value_type: str = ""
+    row_count: int = 0
+    distinct_count: int = 0
+    distinct_ratio: float = 0.0
+    null_ratio: float = 0.0
+    min_val: str | None = None
+    max_val: str | None = None
+    top_k_values: list[str] = field(default_factory=list)
+    is_aggregatable_override: bool | None = None
+    is_groupable_override: bool | None = None
+    is_filterable_override: bool | None = None
+    valid_filter_ops: list[str] = field(default_factory=list)
+    valid_aggregations: list[str] = field(default_factory=list)
+    valid_having_ops: list[str] = field(default_factory=list)
+    boolean_true_value: str | None = None
+    boolean_false_value: str | None = None
+    description: str = ""
+    def __post_init__(self) -> None:
+        """Auto-compute value_type from data_type when value_type is not explicitly set."""
+        if not self.value_type and self.data_type:
+            self.value_type = _data_type_to_value_type(self.data_type)
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> ColumnMetadata:
+        """Create ColumnMetadata from dictionary.
+        Args:
+            d: Dictionary with keys matching ColumnMetadata fields.
+        Returns:
+            Populated ColumnMetadata instance.
+        """
+        fk_target = None
+        if d.get("fk_target"):
+            fk_target = tuple(d["fk_target"]) if isinstance(d["fk_target"], list) else d["fk_target"]
+        return ColumnMetadata(
+            name=d.get("name", ""),
+            data_type=d.get("data_type", ""),
+            is_primary_key=d.get("is_primary_key", False),
+            is_foreign_key=d.get("is_foreign_key", False),
+            fk_target=fk_target,
+            role=d.get("role"),
+            value_type=d.get("value_type", ""),
+            row_count=d.get("row_count", 0),
+            distinct_count=d.get("distinct_count", 0),
+            distinct_ratio=d.get("distinct_ratio", 0.0),
+            null_ratio=d.get("null_ratio", 0.0),
+            min_val=d.get("min_val"),
+            max_val=d.get("max_val"),
+            top_k_values=d.get("top_k_values", []),
+            is_aggregatable_override=d.get("is_aggregatable_override"),
+            is_groupable_override=d.get("is_groupable_override"),
+            is_filterable_override=d.get("is_filterable_override"),
+            valid_filter_ops=d.get("valid_filter_ops", []),
+            valid_aggregations=d.get("valid_aggregations", []),
+            valid_having_ops=d.get("valid_having_ops", []),
+            boolean_true_value=d.get("boolean_true_value"),
+            boolean_false_value=d.get("boolean_false_value"),
+            description=d.get("description", ""),
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize to a plain dictionary for JSON storage.
+        Returns:
+            Dictionary with all ColumnMetadata fields as primitives.
+        """
+        return {
+            "name": self.name,
+            "data_type": self.data_type,
+            "is_primary_key": self.is_primary_key,
+            "is_foreign_key": self.is_foreign_key,
+            "fk_target": list(self.fk_target) if self.fk_target else None,
+            "role": self.role,
+            "value_type": self.value_type,
+            "row_count": self.row_count,
+            "distinct_count": self.distinct_count,
+            "distinct_ratio": self.distinct_ratio,
+            "null_ratio": self.null_ratio,
+            "min_val": self.min_val,
+            "max_val": self.max_val,
+            "top_k_values": self.top_k_values,
+            "is_aggregatable_override": self.is_aggregatable_override,
+            "is_groupable_override": self.is_groupable_override,
+            "is_filterable_override": self.is_filterable_override,
+            "valid_filter_ops": self.valid_filter_ops,
+            "valid_aggregations": self.valid_aggregations,
+            "valid_having_ops": self.valid_having_ops,
+            "boolean_true_value": self.boolean_true_value,
+            "boolean_false_value": self.boolean_false_value,
+            "description": self.description,
+        }
+    @property
+    def is_usable(self) -> bool:
+        """Column has sufficient variance for meaningful queries."""
+        if self.role == ColumnRole.AUDIT.value:
+            return False
+        if self.distinct_count is not None and self.distinct_count <= 1:
+            return False
+        return True
+    @property
+    def is_boolean_like(self) -> bool:
+        """Column has exactly 2 distinct values and behaves like a boolean flag."""
+        if self.role == ColumnRole.BOOLEAN.value:
+            return True
+        dtype_lower = (self.data_type or "").lower()
+        if "bool" in dtype_lower:
+            return True
+        if self.distinct_count != 2:
+            return False
+        if self.is_primary_key or self.is_foreign_key:
+            return False
+        if not self.top_k_values or len(self.top_k_values) != 2:
+            return False
+        values_lower = frozenset(str(v).lower().strip() for v in self.top_k_values)
+        return values_lower in BOOLEAN_VALUE_PATTERNS
+    @property
+    def is_filterable(self) -> bool:
+        """Column can be used in WHERE clause."""
+        for pattern in EXCLUDED_FILTER_PATTERNS:
+            if re.search(pattern, self.name, re.IGNORECASE):
+                return False
+        if self.is_filterable_override is not None:
+            return self.is_filterable_override
+        if self.is_primary_key:
+            return True
+        if self.is_foreign_key:
+            return True
+        if self.role in (
+            ColumnRole.CATEGORICAL.value,
+            ColumnRole.NUMERIC_CATEGORICAL.value,
+            ColumnRole.NUMERIC_MEASURE.value,
+            ColumnRole.TEMPORAL.value,
+            ColumnRole.BOOLEAN.value,
+        ):
+            return True
+        return False
+    def get_valid_filter_ops(self) -> list[str]:
+        """Return valid filter operators for this column.
+        Always includes null check operators regardless of column type.
+        Returns:
+            List of operator strings such as '=', '!=', 'like', 'between'.
+        """
+        null_ops = ["is null", "is not null"]
+        if self.valid_filter_ops:
+            return list(set(self.valid_filter_ops + null_ops))
+        return null_ops
+    def get_valid_aggregations(self) -> set[str]:
+        """Return valid aggregation functions for this column.
+        Returns the stored valid_aggregations set.
+        Returns:
+            Set of lowercase aggregation function names such as 'count'.
+        """
+        if self.valid_aggregations:
+            return set(agg.lower() for agg in self.valid_aggregations)
+        return set()
+    def get_valid_having_ops(self) -> list[str]:
+        """Return valid HAVING clause operators for this column.
+        Returns the stored valid_having_ops list.
+        Returns:
+            List of operator strings valid in a HAVING clause.
+        """
+        if self.valid_having_ops:
+            return list(self.valid_having_ops)
+        return []
+    @property
+    def is_groupable(self) -> bool:
+        """Column can be used in GROUP BY clause."""
+        if self.is_groupable_override is not None:
+            return self.is_groupable_override
+        if self.is_foreign_key:
+            return True
+        return self.role in (
+            ColumnRole.CATEGORICAL.value,
+            ColumnRole.NUMERIC_CATEGORICAL.value,
+            ColumnRole.BOOLEAN.value,
+            ColumnRole.TEMPORAL.value,
+            ColumnRole.IDENTIFIER.value,
+        )
+    @property
+    def is_aggregatable(self) -> bool:
+        """Column can be used with SUM/AVG aggregations."""
+        if self.is_aggregatable_override is not None:
+            return self.is_aggregatable_override
+        return self.role == ColumnRole.NUMERIC_MEASURE.value
+@dataclass
+class TableMetadata:
+    """Table metadata with nested columns, foreign keys, partition columns, and role."""
+    name: str
+    columns: dict[str, ColumnMetadata]
+    primary_key: list[str]
+    foreign_keys: list[FKEdge]
+    partition_columns: list[str] = field(default_factory=list)
+    role: str | None = None
+    row_count: int = 0
+    description: str = ""
+    composite_descriptive_ratios: dict[tuple[str, str], float] = field(
+        default_factory=dict,
+    )
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> TableMetadata:
+        """Create TableMetadata from dictionary.
+        Args:
+            d: Dictionary with keys matching TableMetadata fields.
+        Returns:
+            Populated TableMetadata instance with nested ColumnMetadata and FKEdge objects.
+        """
+        cols_raw = d.get("columns", {})
+        columns = {k: ColumnMetadata.from_dict(v) for k, v in cols_raw.items()} if isinstance(cols_raw, dict) else {}
+        fk_raw = d.get("foreign_keys", [])
+        foreign_keys = [FKEdge(**fk) if isinstance(fk, dict) else fk for fk in fk_raw]
+        return TableMetadata(
+            name=d.get("name", ""),
+            columns=columns,
+            primary_key=d.get("primary_key", []),
+            foreign_keys=foreign_keys,
+            partition_columns=d.get("partition_columns", []),
+            role=d.get("role"),
+            row_count=d.get("row_count", 0),
+            description=d.get("description", ""),
+            composite_descriptive_ratios={
+                tuple(k.split("|", 1)): v
+                for k, v in d.get("composite_descriptive_ratios", {}).items()
+                if "|" in k
+            },
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize to a plain dictionary for JSON storage.
+        Returns:
+            Dictionary with all TableMetadata fields, with nested columns and foreign keys serialized recursively.
+        """
+        return {
+            "name": self.name,
+            "columns": {k: v.to_dict() for k, v in self.columns.items()},
+            "primary_key": self.primary_key,
+            "foreign_keys": [asdict(fk) for fk in self.foreign_keys],
+            "partition_columns": self.partition_columns,
+            "role": self.role,
+            "row_count": self.row_count,
+            "description": self.description,
+            "composite_descriptive_ratios": {
+                f"{c1}|{c2}": ratio
+                for (c1, c2), ratio in self.composite_descriptive_ratios.items()
+            },
+        }
+    @property
+    def column_names(self) -> list[str]:
+        """Get list of column names."""
+        return list(self.columns.keys())
+@dataclass
+class SchemaGraph:
+    """Schema graph with nested tables, join paths, and metadata."""
+    tables: dict[str, TableMetadata]
+    join_paths_multi: dict[str, dict[str, list[list[dict[str, Any]]]]]
+    schema_hash: str
+    created_at: str = ""
+    enum_values: dict[str, list[str]] | None = None
+    schema_stats: dict[str, Any] | None = None
+    @property
+    def fk_edges(self) -> list[FKEdge]:
+        """Get all FK edges from tables."""
+        return [fk for table in self.tables.values() for fk in table.foreign_keys]
+    @property
+    def table_names(self) -> list[str]:
+        """Get list of table names."""
+        return list(self.tables.keys())
+    def get_column(self, table: str, column: str) -> ColumnMetadata | None:
+        """Get column metadata by table and column name.
+        Args:
+            table: Table name to look up.
+            column: Column name within that table.
+        Returns:
+            ColumnMetadata if found, otherwise None.
+        """
+        if table in self.tables and column in self.tables[table].columns:
+            return self.tables[table].columns[column]
+        return None
+    @property
+    def schema_literal_text(self) -> str:
+        _SKIP_ROLE_TAGS = {ColumnRole.IDENTIFIER.value, ColumnRole.AUDIT.value, ""}
+        out = []
+        for t in sorted(self.tables):
+            tm = self.tables[t]
+            table_header = f"TABLE {tm.name}"
+            if tm.role:
+                table_header += f" ({tm.role})"
+            if tm.description:
+                table_header += f" — {tm.description}"
+            out.append(table_header)
+            for c in tm.columns.values():
+                pk_marker = " [PK]" if c.is_primary_key else ""
+                fk_marker = f" [FK->{c.fk_target[0]}.{c.fk_target[1]}]" if c.fk_target else ""
+                role_tag = ""
+                if c.role and c.role not in _SKIP_ROLE_TAGS:
+                    role_tag = f" [{c.role}]"
+                hint_tag = ""
+                if c.description:
+                    hint_tag = f" — {c.description}"
+                out.append(f"  {c.name}: {c.data_type}{pk_marker}{fk_marker}{role_tag}{hint_tag}")
+        if self.enum_values:
+            out.append("")
+            out.append("ENUM TYPES:")
+            for enum_name, values in sorted(self.enum_values.items()):
+                out.append(f"  {enum_name}: {values[:10]}{'...' if len(values) > 10 else ''}")
+        return "\n".join(out).strip()
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> SchemaGraph:
+        """Create SchemaGraph from dictionary.
+        Args:
+            d: Dictionary with keys matching SchemaGraph fields, typically loaded from JSON.
+        Returns:
+            Populated SchemaGraph with nested TableMetadata instances.
+        """
+        tables_raw = d.get("tables", {})
+        tables = {k: TableMetadata.from_dict(v) for k, v in tables_raw.items()}
+        return SchemaGraph(
+            tables=tables,
+            join_paths_multi=d.get("join_paths_multi", {}),
+            schema_hash=d.get("schema_hash", ""),
+            created_at=d.get("created_at", ""),
+            enum_values=d.get("enum_values"),
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize to a plain dictionary for JSON storage.
+        Returns:
+            Dictionary with all SchemaGraph fields, with nested tables serialized recursively.
+        """
+        return {
+            "tables": {k: v.to_dict() for k, v in self.tables.items()},
+            "join_paths_multi": self.join_paths_multi,
+            "schema_hash": self.schema_hash,
+            "created_at": self.created_at,
+            "enum_values": self.enum_values,
+        }
+@dataclass
+class ExpansionMetadata:
+    """Metadata for intent expansion operations."""
+    operator: str
+    parent_intent_id: str | None = None
+    depth: int = 0
+    expansion_path: list[str] = field(default_factory=list)
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> ExpansionMetadata:
+        """Create ExpansionMetadata from dictionary.
+        Args:
+            d: Dictionary with keys matching ExpansionMetadata fields.
+        Returns:
+            Populated ExpansionMetadata instance.
+        """
+        return ExpansionMetadata(
+            operator=d.get("operator", ""),
+            parent_intent_id=d.get("parent_intent_id"),
+            depth=d.get("depth", 0),
+            expansion_path=d.get("expansion_path", []),
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return asdict(self)
+@dataclass
+class CteOutputColumnMeta:
+    """Metadata for CTE output column including source, role, and aggregation info."""
+    source: str
+    base_column: str = ""
+    agg_func: str = ""
+    role: str | None = None
+    filterable: bool = True
+    aggregatable: bool = True
+    data_type: str = "unknown"
+    value_type: str = ""
+    groupable: bool = True
+    valid_filter_ops: list[str] = field(default_factory=list)
+    valid_aggregations: list[str] = field(default_factory=list)
+    valid_having_ops: list[str] = field(default_factory=list)
+    def __post_init__(self):
+        """Auto-compute value_type from data_type when not provided."""
+        if not self.value_type and self.data_type:
+            self.value_type = _data_type_to_value_type(self.data_type)
+    def get_valid_filter_ops(self) -> list[str]:
+        """Return valid filter operators for this CTE output column."""
+        null_ops = ["is null", "is not null"]
+        if self.valid_filter_ops:
+            return list(set(self.valid_filter_ops + null_ops))
+        if self.filterable:
+            return [
+                "=",
+                "!=",
+                "<",
+                "<=",
+                ">",
+                ">=",
+                "in",
+                "not in",
+                "is null",
+                "is not null",
+            ]
+        return null_ops
+    def get_valid_aggregations(self) -> set[str]:
+        """Return valid aggregation functions for this CTE output column."""
+        if self.valid_aggregations:
+            return set(agg.lower() for agg in self.valid_aggregations)
+        if self.aggregatable:
+            return {"count", "sum", "avg", "min", "max"}
+        return {"count"}
+    def get_valid_having_ops(self) -> list[str]:
+        """Return valid HAVING clause operators for this CTE output column."""
+        if self.valid_having_ops:
+            return list(self.valid_having_ops)
+        if self.aggregatable:
+            return ["=", "!=", "<", "<=", ">", ">="]
+        return []
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> CteOutputColumnMeta:
+        """Create CteOutputColumnMeta from dictionary.
+        Args:
+            d: Dictionary with keys matching CteOutputColumnMeta fields.
+        Returns:
+            Populated CteOutputColumnMeta instance.
+        """
+        return CteOutputColumnMeta(
+            source=d.get("source", "passthrough"),
+            base_column=d.get("base_column", ""),
+            agg_func=d.get("agg_func", ""),
+            role=d.get("role"),
+            filterable=d.get("filterable", True),
+            aggregatable=d.get("aggregatable", True),
+            data_type=d.get("data_type", "unknown"),
+            value_type=d.get("value_type", ""),
+            groupable=d.get("groupable", True),
+            valid_filter_ops=d.get("valid_filter_ops", []),
+            valid_aggregations=d.get("valid_aggregations", []),
+            valid_having_ops=d.get("valid_having_ops", []),
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return asdict(self)
+@dataclass
+class RetryFailureContext:
+    """Structured failure context for LLM retry guidance."""
+    failure_type: str
+    required_tables: list[str]
+    used_tables: set[str]
+    missing_tables: set[str]
+    attempt_number: int
+@dataclass
+class SQLShape:
+    """Structural features of a SQL query for comparison."""
+    num_joins: int
+    has_group_by: bool
+    has_agg: bool
+    num_cte: int = 0
+    num_filters: int = 0
+    num_having: int = 0
+    has_distinct: bool = False
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> SQLShape:
+        """Create SQLShape from dictionary.
+        Args:     d: Dictionary with keys matching SQLShape fields.
+        Returns:
+            Populated SQLShape instance.
+        """
+        return SQLShape(
+            num_joins=d.get("num_joins", 0),
+            has_group_by=d.get("has_group_by", False),
+            has_agg=d.get("has_agg", False),
+            num_cte=d.get("num_cte", 0),
+            num_filters=d.get("num_filters", 0),
+            num_having=d.get("num_having", 0),
+            has_distinct=d.get("has_distinct", False),
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return asdict(self)
+@dataclass
+class IntentIssue:
+    """Issue detected during intent validation or resolution."""
+    issue_id: str
+    category: str
+    severity: str
+    message: str
+    context: dict[str, Any] = field(default_factory=dict)
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> IntentIssue:
+        """Create IntentIssue from dictionary.
+        Args:
+            d: Dictionary with keys matching IntentIssue fields.
+        Returns:
+            Populated IntentIssue instance.
+        """
+        return IntentIssue(
+            issue_id=d.get("issue_id", ""),
+            category=d.get("category", ""),
+            severity=d.get("severity", "error"),
+            message=d.get("message", ""),
+            context=d.get("context", {}),
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return asdict(self)
+@dataclass
+class IntentValidationResult:
+    """Result container for intent validation with issue tracking."""
+    issues: list[IntentIssue] = field(default_factory=list)
+    @property
+    def is_valid(self) -> bool:
+        """Return True if no errors exist."""
+        return not any(i.severity == "error" for i in self.issues)
+    @property
+    def issue_signature(self) -> str:
+        """Return signature of all issues for ABAB detection."""
+        return issue_sig([i.issue_id for i in self.issues])
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> IntentValidationResult:
+        """Create IntentValidationResult from dictionary.
+        Args:
+            d: Dictionary with an 'issues' list of serialized IntentIssue dicts.
+        Returns:
+            Populated IntentValidationResult with deserialized IntentIssue objects.
+        """
+        issues_raw = d.get("issues", [])
+        return IntentValidationResult(
+            issues=[IntentIssue.from_dict(i) for i in issues_raw],
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {"issues": [i.to_dict() for i in self.issues]}
+@dataclass
+class TemplateStats:
+    """Template acceptance/rejection statistics."""
+    accept: int = 0
+    reject: int = 0
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> TemplateStats:
+        """Create TemplateStats from dictionary.
+        Args:
+            d: Dictionary with 'accept' and 'reject' integer keys.
+        Returns:
+            Populated TemplateStats instance.
+        """
+        return TemplateStats(
+            accept=int(d.get("accept", 0)),
+            reject=int(d.get("reject", 0)),
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return asdict(self)
+@dataclass
+class QSimSkeleton:
+    """Structural skeleton for QSim intent before LLM fills semantics."""
+    tables: list[str]
+    has_aggregation: bool
+    num_filters: int
+    num_groupby: int
+    has_orderby: bool
+    has_having: bool
+    has_distinct: bool = False
+    has_expr_comparison: bool = False
+@dataclass
+class SkeletonPool:
+    """Tiered skeleton pool with round-robin table set selection."""
+    tier_a_by_table_set: dict[str, list[QSimSkeleton]]
+    tier_b_by_table_set: dict[str, list[QSimSkeleton]]
+    tier_c_by_table_set: dict[str, list[QSimSkeleton]]
+    table_set_keys: list[str]
+    tier_a_indices: dict[str, int]
+    tier_b_indices: dict[str, int]
+    tier_c_indices: dict[str, int]
+    current_table_idx: int = 0
+@dataclass
+class QueryPlan:
+    """Output of SQL generation stage."""
+    sql: str
+    chosen_join_candidate_id: str
+    chosen_join_path_signature: list[str]
+@dataclass
+class ValidationResult:
+    """Output of validation stage."""
+    valid: bool
+    errors: list[str] = field(default_factory=list)
+    warnings: list[str] = field(default_factory=list)
+    extra_tables: set = field(default_factory=set)
+@dataclass
+class TemplateInfo:
+    """User-facing template information with obfuscated internals."""
+    id: str
+    natural_language: str
+    example_question: str
+    trust_level: str
+    source: str
+@dataclass
+class RejectedTemplateInfo:
+    """User-facing rejected template with generic categories."""
+    id: str
+    natural_language: str
+    example_question: str
+    rejection_category: str
+    rejection_count: int
+@dataclass
+class SimulatorSummary:
+    """High-level simulator execution statistics."""
+    version: int
+    total: int
+    success: int
+    failed: int
+    success_rate: float
+@dataclass
+class QSimSummary:
+    """Question simulator run metadata with timestamp, counts, and seed."""
+    timestamp: str
+    num_intents: int
+    num_questions: int
+    seed: int
+    @staticmethod
+    def from_dict(d: dict[str, Any]) -> QSimSummary:
+        """Create QSimSummary from dictionary.
+        Args:
+            d: Dictionary with keys matching QSimSummary fields.
+        Returns:
+            Populated QSimSummary instance.
+        """
+        return QSimSummary(
+            timestamp=d.get("timestamp", ""),
+            num_intents=d.get("num_intents", 0),
+            num_questions=d.get("num_questions", 0),
+            seed=d.get("seed", 42),
+        )
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return asdict(self)
+@dataclass
+class QSimRange:
+    """User-facing range limits for QSim parameters."""
+    num_intents_range: tuple[int, int]
+    num_questions_range: tuple[int, int]
+@dataclass
+class SchemaLimits:
+    """Internal schema-based limits for adaptive parameter validation."""
+    max_filters: int
+    max_groupby: int
+    max_tables: int
+@dataclass
+class SkeletonLimits:
+    """Schema-derived limits for QSim skeleton enumeration.
+    Computed from column capabilities (filterable, groupable, aggregatable) for a given table set.
+    Used when generating valid skeleton combinations.
+    """
+    max_filters: int
+    max_groupby: int
+    max_having: int