aetherdialect 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1218 @@
1
+ """Fully deterministic expansion operators for synthetic intent generation.
2
+
3
+ Implements A-series (attribute: filters, aggregations, GROUP BY, ORDER BY,
4
+ HAVING), B-series (join: dimension, fact, swap, remove, bridge), C-series
5
+ (gold inclusion), T-series (temporal: EXTRACT, DATE_TRUNC, date_window,
6
+ date_diff), N-series (numeric: ROUND, ABS), and structural operators
7
+ (DISTINCT, LIMIT, OR-groups, expression composition).
8
+
9
+ All operators are purely deterministic — no LLM calls. The top-level
10
+ ``expand_gold_intents`` function orchestrates multi-depth expansion with
11
+ SHA-256 dedup across all gold intents.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import copy
17
+ from dataclasses import replace
18
+ from typing import Any
19
+
20
+ from .config import (
21
+ VALID_AGGREGATION_FUNCTIONS,
22
+ VALID_FILTER_OPS,
23
+ VALID_HAVING_OPS,
24
+ SimulatorConfig,
25
+ )
26
+ from .contracts_base import (
27
+ ColumnRole,
28
+ ExpansionMetadata,
29
+ SchemaGraph,
30
+ SchemaLimits,
31
+ TableRole,
32
+ )
33
+ from .contracts_core import (
34
+ FilterParam,
35
+ HavingParam,
36
+ MulGroup,
37
+ NormalizedExpr,
38
+ OrderByCol,
39
+ SelectCol,
40
+ SimulatorIntent,
41
+ )
42
+ from .core_utils import debug, log
43
+ from .intent_resolve import enforce_schema
44
+ from .utils import intent_key
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Schema helpers
49
+ # ---------------------------------------------------------------------------
50
+
51
+ def _get_table_role(schema: SchemaGraph, table: str) -> str | None:
52
+ """Return table role string from schema."""
53
+ tm = schema.tables.get(table)
54
+ return tm.role if tm else None
55
+
56
+
57
+ def _table_from_column_ref(col_ref: str) -> str:
58
+ """Extract table name from ``table.column`` reference."""
59
+ if not col_ref or "." not in col_ref:
60
+ return ""
61
+ return col_ref.split(".", 1)[0]
62
+
63
+
64
+ def _build_column_metadata(
65
+ schema: SchemaGraph,
66
+ ) -> dict[str, dict[str, dict[str, Any]]]:
67
+ """Build nested ``table -> column -> metadata`` dict from schema."""
68
+ result: dict[str, dict[str, dict[str, Any]]] = {}
69
+ for table_name, table_obj in schema.tables.items():
70
+ result[table_name] = {}
71
+ for col_name, col in table_obj.columns.items():
72
+ result[table_name][col_name] = {
73
+ "data_type": col.data_type,
74
+ "role": col.role,
75
+ "nullable": col.null_ratio > 0.0,
76
+ "cardinality": getattr(col, "cardinality", None),
77
+ }
78
+ return result
79
+
80
+
81
+ def _build_fk_map(schema: SchemaGraph) -> dict[str, list[dict[str, str]]]:
82
+ """Build FK adjacency map: ``source_table -> [{source_column, target_table, target_column}]``."""
83
+ fk_map: dict[str, list[dict[str, str]]] = {}
84
+ for fk in schema.fk_edges:
85
+ source = fk.src_table
86
+ if source not in fk_map:
87
+ fk_map[source] = []
88
+ fk_map[source].append({
89
+ "source_column": fk.src_cols[0] if fk.src_cols else "",
90
+ "target_table": fk.dst_table,
91
+ "target_column": fk.dst_cols[0] if fk.dst_cols else "",
92
+ })
93
+ return fk_map
94
+
95
+
96
+ def _tables_are_connected(
97
+ tables: list[str],
98
+ fk_map: dict[str, list[dict[str, str]]],
99
+ ) -> bool:
100
+ """Return True when all *tables* form a connected component via FKs."""
101
+ if len(tables) <= 1:
102
+ return True
103
+ adjacency: dict[str, set[str]] = {t: set() for t in tables}
104
+ for source, fks in fk_map.items():
105
+ if source not in adjacency:
106
+ continue
107
+ for fk in fks:
108
+ target = fk.get("target_table", "")
109
+ if target in adjacency:
110
+ adjacency[source].add(target)
111
+ adjacency[target].add(source)
112
+ visited: set[str] = set()
113
+ stack = [tables[0]]
114
+ while stack:
115
+ current = stack.pop()
116
+ if current in visited:
117
+ continue
118
+ visited.add(current)
119
+ for neighbor in adjacency.get(current, []):
120
+ if neighbor not in visited:
121
+ stack.append(neighbor)
122
+ return len(visited) == len(tables)
123
+
124
+
125
+ def _get_filterable_columns(
126
+ schema: SchemaGraph, table_name: str,
127
+ ) -> list[str]:
128
+ """Return ``table.column`` refs suitable for filtering (CATEGORICAL, TEMPORAL, IDENTIFIER)."""
129
+ if table_name not in schema.tables:
130
+ return []
131
+ table = schema.tables[table_name]
132
+ return [
133
+ f"{table_name}.{c}"
134
+ for c, col in table.columns.items()
135
+ if col.role in (
136
+ ColumnRole.CATEGORICAL.value,
137
+ ColumnRole.TEMPORAL.value,
138
+ ColumnRole.IDENTIFIER.value,
139
+ )
140
+ ]
141
+
142
+
143
+ def _get_groupable_columns(
144
+ schema: SchemaGraph, table_name: str,
145
+ ) -> list[str]:
146
+ """Return ``table.column`` refs suitable for GROUP BY."""
147
+ if table_name not in schema.tables:
148
+ return []
149
+ table = schema.tables[table_name]
150
+ return [
151
+ f"{table_name}.{c}"
152
+ for c, col in table.columns.items()
153
+ if col.role in (
154
+ ColumnRole.CATEGORICAL.value,
155
+ ColumnRole.TEMPORAL.value,
156
+ )
157
+ ]
158
+
159
+
160
+ def _get_temporal_columns(
161
+ schema: SchemaGraph, table_name: str,
162
+ ) -> list[str]:
163
+ """Return ``table.column`` refs for TEMPORAL columns."""
164
+ if table_name not in schema.tables:
165
+ return []
166
+ table = schema.tables[table_name]
167
+ return [
168
+ f"{table_name}.{c}"
169
+ for c, col in table.columns.items()
170
+ if col.role == ColumnRole.TEMPORAL.value
171
+ ]
172
+
173
+
174
+ def _get_numeric_measure_columns(
175
+ schema: SchemaGraph, table_name: str,
176
+ ) -> list[str]:
177
+ """Return ``table.column`` refs for NUMERIC_MEASURE columns."""
178
+ if table_name not in schema.tables:
179
+ return []
180
+ table = schema.tables[table_name]
181
+ return [
182
+ f"{table_name}.{c}"
183
+ for c, col in table.columns.items()
184
+ if col.role == ColumnRole.NUMERIC_MEASURE.value
185
+ ]
186
+
187
+
188
+ def _get_dimension_tables(schema: SchemaGraph) -> list[str]:
189
+ """Return all dimension table names."""
190
+ return [
191
+ t for t, info in schema.tables.items()
192
+ if info.role == TableRole.DIMENSION.value
193
+ ]
194
+
195
+
196
+ def _add_expansion_metadata(
197
+ intent: SimulatorIntent, operator: str,
198
+ ) -> None:
199
+ """Stamp *intent* in-place with expansion metadata for *operator*."""
200
+ if intent.expansion_metadata is None:
201
+ intent.expansion_metadata = ExpansionMetadata(
202
+ parent_intent_id="",
203
+ operator=operator,
204
+ depth=1,
205
+ expansion_path=[operator],
206
+ )
207
+ else:
208
+ intent.expansion_metadata = ExpansionMetadata(
209
+ parent_intent_id=(
210
+ intent.expansion_metadata.parent_intent_id
211
+ or intent.intent_id
212
+ ),
213
+ operator=operator,
214
+ depth=(intent.expansion_metadata.depth or 0) + 1,
215
+ expansion_path=(
216
+ (intent.expansion_metadata.expansion_path or [])
217
+ + [operator]
218
+ ),
219
+ )
220
+
221
+
222
+ def _compatible_data_types(type_a: str, type_b: str) -> bool:
223
+ """Return True when *type_a* and *type_b* belong to the same broad category."""
224
+ numeric = {
225
+ "integer", "decimal", "float", "numeric",
226
+ "double", "bigint", "smallint", "real",
227
+ }
228
+ text = {"character varying", "varchar", "text", "char", "character"}
229
+ temporal = {
230
+ "date", "timestamp", "timestamp without time zone",
231
+ "timestamp with time zone", "time",
232
+ }
233
+ a, b = type_a.lower(), type_b.lower()
234
+ if a == b:
235
+ return True
236
+ for group in (numeric, text, temporal):
237
+ if a in group and b in group:
238
+ return True
239
+ return False
240
+
241
+
242
+ # ---------------------------------------------------------------------------
243
+ # A-series operators (attribute modifications)
244
+ # ---------------------------------------------------------------------------
245
+
246
+ def _a1_add_filter(
247
+ intent: SimulatorIntent, schema: SchemaGraph,
248
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
249
+ ) -> list[SimulatorIntent]:
250
+ """A1: Add one value-based filter per filterable column not yet filtered."""
251
+ current_filter_cols = {
252
+ f.left_expr.primary_column for f in (intent.filters_param or [])
253
+ }
254
+ if len(current_filter_cols) >= SimulatorConfig.MAX_FILTERS:
255
+ return []
256
+
257
+ results: list[SimulatorIntent] = []
258
+ for table in intent.tables or []:
259
+ for col in _get_filterable_columns(schema, table):
260
+ if col in current_filter_cols:
261
+ continue
262
+ new_intent = copy.deepcopy(intent)
263
+ new_filter = FilterParam(
264
+ left_expr=NormalizedExpr.from_column(col),
265
+ op="=",
266
+ value_type="string",
267
+ param_key=f"f_{col.replace('.', '_')}",
268
+ )
269
+ new_intent.filters_param = list(
270
+ new_intent.filters_param or []
271
+ ) + [new_filter]
272
+ _add_expansion_metadata(new_intent, "A1_add_filter")
273
+ results.append(new_intent)
274
+ return results
275
+
276
+
277
+ def _a2_add_expr_filter(
278
+ intent: SimulatorIntent, schema: SchemaGraph,
279
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
280
+ ) -> list[SimulatorIntent]:
281
+ """A2: Add column-vs-column comparison for same-type column pairs."""
282
+ existing = set()
283
+ for f in intent.filters_param or []:
284
+ if f.right_expr:
285
+ existing.add((
286
+ f.left_expr.primary_column,
287
+ f.op,
288
+ f.right_expr.primary_column,
289
+ ))
290
+ if len(existing) >= SimulatorConfig.MAX_EXPR_COMPARISONS:
291
+ return []
292
+
293
+ type_groups: dict[str, list[str]] = {}
294
+ for table in intent.tables or []:
295
+ if table not in column_metadata:
296
+ continue
297
+ for col_name, col_info in column_metadata[table].items():
298
+ dtype = col_info.get("data_type", "unknown")
299
+ full_col = f"{table}.{col_name}"
300
+ type_groups.setdefault(dtype, []).append(full_col)
301
+
302
+ results: list[SimulatorIntent] = []
303
+ for cols in type_groups.values():
304
+ if len(cols) < 2:
305
+ continue
306
+ for i, left in enumerate(cols):
307
+ for right in cols[i + 1:]:
308
+ for op in ["=", ">", "<"]:
309
+ if (left, op, right) in existing:
310
+ continue
311
+ new_intent = copy.deepcopy(intent)
312
+ new_filter = FilterParam(
313
+ left_expr=NormalizedExpr.from_column(left),
314
+ op=op,
315
+ right_expr=NormalizedExpr.from_column(right),
316
+ value_type="column",
317
+ param_key="",
318
+ )
319
+ new_intent.filters_param = list(
320
+ new_intent.filters_param or []
321
+ ) + [new_filter]
322
+ _add_expansion_metadata(new_intent, "A2_add_expr_filter")
323
+ results.append(new_intent)
324
+ return results
325
+
326
+
327
+ def _swap_agg_func(expr: NormalizedExpr, new_agg: str) -> NormalizedExpr:
328
+ """Return *expr* with its aggregation function swapped to *new_agg*."""
329
+ if expr.agg_func:
330
+ return replace(expr, agg_func=new_agg)
331
+ if expr.add_groups and expr.add_groups[0].agg_func:
332
+ new_group = replace(expr.add_groups[0], agg_func=new_agg)
333
+ return replace(
334
+ expr, add_groups=[new_group] + list(expr.add_groups[1:])
335
+ )
336
+ return NormalizedExpr.from_agg(new_agg, expr.primary_column)
337
+
338
+
339
+ def _a3_change_aggregation(
340
+ intent: SimulatorIntent, schema: SchemaGraph,
341
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
342
+ ) -> list[SimulatorIntent]:
343
+ """A3: Swap aggregation function on each aggregated select column."""
344
+ results: list[SimulatorIntent] = []
345
+ alt_aggs = ["count", "sum", "avg", "min", "max"]
346
+
347
+ for sc in intent.select_cols or []:
348
+ if not sc.is_aggregated:
349
+ continue
350
+ sc_col = sc.expr.primary_column
351
+ sc_term = sc.expr.primary_term
352
+ for new_agg in alt_aggs:
353
+ new_term = f"{new_agg}({sc_col})"
354
+ if new_term.lower() == sc_term.lower():
355
+ continue
356
+ new_intent = copy.deepcopy(intent)
357
+ for i, s in enumerate(new_intent.select_cols or []):
358
+ if (
359
+ s.expr.primary_column == sc_col
360
+ and s.expr.primary_term == sc_term
361
+ ):
362
+ new_expr = _swap_agg_func(s.expr, new_agg)
363
+ new_intent.select_cols[i] = SelectCol(expr=new_expr)
364
+ break
365
+ _add_expansion_metadata(new_intent, "A3_change_aggregation")
366
+ results.append(new_intent)
367
+ return results
368
+
369
+
370
+ def _a4_add_groupby(
371
+ intent: SimulatorIntent, schema: SchemaGraph,
372
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
373
+ ) -> list[SimulatorIntent]:
374
+ """A4: Add one GROUP BY column per groupable column not yet grouped."""
375
+ current_gb = {g.primary_column for g in (intent.group_by_cols or [])}
376
+ if len(current_gb) >= SimulatorConfig.MAX_GROUPBY:
377
+ return []
378
+
379
+ results: list[SimulatorIntent] = []
380
+ for table in intent.tables or []:
381
+ for col in _get_groupable_columns(schema, table):
382
+ if col in current_gb:
383
+ continue
384
+ new_intent = copy.deepcopy(intent)
385
+ new_intent.group_by_cols = sorted(
386
+ list(intent.group_by_cols or [])
387
+ + [NormalizedExpr.from_column(col)],
388
+ key=lambda g: g.signature_key,
389
+ )
390
+ if new_intent.grain == "row_level":
391
+ new_intent.grain = "grouped"
392
+ _add_expansion_metadata(new_intent, "A4_add_groupby")
393
+ results.append(new_intent)
394
+ return results
395
+
396
+
397
+ def _a5_add_orderby(
398
+ intent: SimulatorIntent, schema: SchemaGraph,
399
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
400
+ ) -> list[SimulatorIntent]:
401
+ """A5: Add ORDER BY for each select/groupby column in ASC and DESC."""
402
+ current_ob = {o.expr.primary_column for o in (intent.order_by_cols or [])}
403
+ candidates = [g.primary_column for g in (intent.group_by_cols or [])]
404
+ for sc in intent.select_cols or []:
405
+ if sc.expr.primary_column not in candidates:
406
+ candidates.append(sc.expr.primary_column)
407
+
408
+ results: list[SimulatorIntent] = []
409
+ for col in candidates:
410
+ if col in current_ob:
411
+ continue
412
+ for direction in ["ASC", "DESC"]:
413
+ new_intent = copy.deepcopy(intent)
414
+ new_order = OrderByCol(
415
+ expr=NormalizedExpr.from_column(col),
416
+ direction=direction,
417
+ )
418
+ new_intent.order_by_cols = list(
419
+ new_intent.order_by_cols or []
420
+ ) + [new_order]
421
+ _add_expansion_metadata(new_intent, "A5_add_orderby")
422
+ results.append(new_intent)
423
+ return results
424
+
425
+
426
+ def _a6_add_having_value(
427
+ intent: SimulatorIntent, schema: SchemaGraph,
428
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
429
+ ) -> list[SimulatorIntent]:
430
+ """A6: Add HAVING with value threshold for grouped intents."""
431
+ if intent.grain != "grouped" or not intent.group_by_cols:
432
+ return []
433
+ existing = {
434
+ (h.left_expr.primary_term, h.op)
435
+ for h in (intent.having_param or [])
436
+ }
437
+ results: list[SimulatorIntent] = []
438
+ for agg_func in ["count", "sum", "avg"]:
439
+ for op in [">", "<", ">=", "<="]:
440
+ left_agg = f"{agg_func}(*)"
441
+ if (left_agg, op) in existing:
442
+ continue
443
+ new_intent = copy.deepcopy(intent)
444
+ new_having = HavingParam(
445
+ left_expr=NormalizedExpr.from_agg(agg_func, "*"),
446
+ op=op,
447
+ value_type="number",
448
+ param_key=f"h_{agg_func}_{op.replace('<', 'lt').replace('>', 'gt').replace('=', 'e')}",
449
+ )
450
+ new_intent.having_param = list(
451
+ new_intent.having_param or []
452
+ ) + [new_having]
453
+ _add_expansion_metadata(new_intent, "A6_add_having_value")
454
+ results.append(new_intent)
455
+ return results
456
+
457
+
458
+ def _a7_add_having_expr(
459
+ intent: SimulatorIntent, schema: SchemaGraph,
460
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
461
+ ) -> list[SimulatorIntent]:
462
+ """A7: Add HAVING agg-vs-agg comparison for grouped intents."""
463
+ if intent.grain != "grouped" or not intent.group_by_cols:
464
+ return []
465
+ existing = {
466
+ (h.left_expr.primary_term, h.op)
467
+ for h in (intent.having_param or [])
468
+ }
469
+ agg_cols = [
470
+ sc.expr.primary_column
471
+ for sc in (intent.select_cols or []) if sc.is_aggregated
472
+ ]
473
+ target_col = agg_cols[0] if agg_cols else "*"
474
+
475
+ agg_pairs = [("count", "avg"), ("sum", "count"), ("avg", "min")]
476
+ results: list[SimulatorIntent] = []
477
+ for left_agg, right_agg in agg_pairs:
478
+ left_term = f"{left_agg}({target_col})"
479
+ if (left_term, ">") in existing:
480
+ continue
481
+ new_intent = copy.deepcopy(intent)
482
+ new_having = HavingParam(
483
+ left_expr=NormalizedExpr.from_agg(left_agg, target_col),
484
+ op=">",
485
+ right_expr=NormalizedExpr.from_agg(right_agg, target_col),
486
+ value_type="expression",
487
+ param_key="",
488
+ )
489
+ new_intent.having_param = list(
490
+ new_intent.having_param or []
491
+ ) + [new_having]
492
+ _add_expansion_metadata(new_intent, "A7_add_having_expr")
493
+ results.append(new_intent)
494
+ return results
495
+
496
+
497
+ def _a8_remove_filter(
498
+ intent: SimulatorIntent, schema: SchemaGraph,
499
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
500
+ ) -> list[SimulatorIntent]:
501
+ """A8: Remove each filter one at a time."""
502
+ current = intent.filters_param or []
503
+ if not current:
504
+ return []
505
+ results: list[SimulatorIntent] = []
506
+ for i in range(len(current)):
507
+ new_intent = copy.deepcopy(intent)
508
+ new_intent.filters_param = current[:i] + current[i + 1:]
509
+ _add_expansion_metadata(new_intent, "A8_remove_filter")
510
+ results.append(new_intent)
511
+ return results
512
+
513
+
514
+ def _a9_remove_groupby(
515
+ intent: SimulatorIntent, schema: SchemaGraph,
516
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
517
+ ) -> list[SimulatorIntent]:
518
+ """A9: Remove each GROUP BY column one at a time (skip if single)."""
519
+ current = list(intent.group_by_cols or [])
520
+ if len(current) <= 1:
521
+ return []
522
+ results: list[SimulatorIntent] = []
523
+ for gb in current:
524
+ new_intent = copy.deepcopy(intent)
525
+ new_intent.group_by_cols = [
526
+ g for g in current if g.primary_column != gb.primary_column
527
+ ]
528
+ _add_expansion_metadata(new_intent, "A9_remove_groupby")
529
+ results.append(new_intent)
530
+ return results
531
+
532
+
533
+ def _a10_remove_having(
534
+ intent: SimulatorIntent, schema: SchemaGraph,
535
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
536
+ ) -> list[SimulatorIntent]:
537
+ """A10: Remove each HAVING condition one at a time."""
538
+ current = intent.having_param or []
539
+ if not current:
540
+ return []
541
+ results: list[SimulatorIntent] = []
542
+ for i in range(len(current)):
543
+ new_intent = copy.deepcopy(intent)
544
+ new_intent.having_param = current[:i] + current[i + 1:]
545
+ _add_expansion_metadata(new_intent, "A10_remove_having")
546
+ results.append(new_intent)
547
+ return results
548
+
549
+
550
+ # ---------------------------------------------------------------------------
551
+ # B-series operators (join modifications)
552
+ # ---------------------------------------------------------------------------
553
+
554
+ def _b1_add_dimension_join(
555
+ intent: SimulatorIntent, schema: SchemaGraph,
556
+ fk_map: dict[str, list[dict[str, str]]],
557
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
558
+ ) -> list[SimulatorIntent]:
559
+ """B1: Add each FK-connected dimension table not already present."""
560
+ current = set(intent.tables or [])
561
+ if len(current) >= SimulatorConfig.MAX_TABLES:
562
+ return []
563
+ results: list[SimulatorIntent] = []
564
+ for table in list(current):
565
+ for fk in fk_map.get(table, []):
566
+ target = fk.get("target_table")
567
+ if not target or target in current:
568
+ continue
569
+ if (
570
+ _get_table_role(schema, target) or TableRole.FACT.value
571
+ ) != TableRole.DIMENSION.value:
572
+ continue
573
+ new_tables = list(current | {target})
574
+ if not _tables_are_connected(new_tables, fk_map):
575
+ continue
576
+ new_intent = copy.deepcopy(intent)
577
+ new_intent.tables = sorted(new_tables)
578
+ _add_expansion_metadata(new_intent, "B1_add_dimension_join")
579
+ results.append(new_intent)
580
+ return results
581
+
582
+
583
+ def _b2_add_fact_join(
584
+ intent: SimulatorIntent, schema: SchemaGraph,
585
+ fk_map: dict[str, list[dict[str, str]]],
586
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
587
+ ) -> list[SimulatorIntent]:
588
+ """B2: Add each FK-connected fact table not already present."""
589
+ current = set(intent.tables or [])
590
+ if len(current) >= SimulatorConfig.MAX_TABLES:
591
+ return []
592
+ results: list[SimulatorIntent] = []
593
+ seen_targets: set[str] = set()
594
+ for table in list(current):
595
+ for fk in fk_map.get(table, []):
596
+ target = fk.get("target_table")
597
+ if not target or target in current or target in seen_targets:
598
+ continue
599
+ if (
600
+ _get_table_role(schema, target) or TableRole.FACT.value
601
+ ) != TableRole.FACT.value:
602
+ continue
603
+ new_tables = list(current | {target})
604
+ if not _tables_are_connected(new_tables, fk_map):
605
+ continue
606
+ seen_targets.add(target)
607
+ new_intent = copy.deepcopy(intent)
608
+ new_intent.tables = sorted(new_tables)
609
+ _add_expansion_metadata(new_intent, "B2_add_fact_join")
610
+ results.append(new_intent)
611
+
612
+ for other_table, other_fks in fk_map.items():
613
+ if other_table in current or other_table in seen_targets:
614
+ continue
615
+ if (
616
+ _get_table_role(schema, other_table) or TableRole.FACT.value
617
+ ) != TableRole.FACT.value:
618
+ continue
619
+ for ofk in other_fks:
620
+ if ofk.get("target_table") == table:
621
+ new_tables = list(current | {other_table})
622
+ if not _tables_are_connected(new_tables, fk_map):
623
+ continue
624
+ seen_targets.add(other_table)
625
+ new_intent = copy.deepcopy(intent)
626
+ new_intent.tables = sorted(new_tables)
627
+ _add_expansion_metadata(
628
+ new_intent, "B2_add_fact_join",
629
+ )
630
+ results.append(new_intent)
631
+ break
632
+ return results
633
+
634
+
635
+ def _b3_swap_dimension(
636
+ intent: SimulatorIntent, schema: SchemaGraph,
637
+ fk_map: dict[str, list[dict[str, str]]],
638
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
639
+ ) -> list[SimulatorIntent]:
640
+ """B3: Swap each dimension for an alternative FK-connected dimension."""
641
+ current = list(intent.tables or [])
642
+ results: list[SimulatorIntent] = []
643
+ for i, table in enumerate(current):
644
+ if (
645
+ _get_table_role(schema, table) or TableRole.FACT.value
646
+ ) != TableRole.DIMENSION.value:
647
+ continue
648
+ fact_tables = [
649
+ t for t in current
650
+ if (_get_table_role(schema, t) or TableRole.FACT.value)
651
+ == TableRole.FACT.value
652
+ ]
653
+ if not fact_tables:
654
+ continue
655
+ for dim in _get_dimension_tables(schema):
656
+ if dim == table or dim in current:
657
+ continue
658
+ can_join = any(
659
+ fk.get("target_table") == dim
660
+ for fact in fact_tables
661
+ for fk in fk_map.get(fact, [])
662
+ )
663
+ if not can_join:
664
+ continue
665
+ new_tables = current[:i] + [dim] + current[i + 1:]
666
+ new_intent = copy.deepcopy(intent)
667
+ new_intent.tables = sorted(new_tables)
668
+ _add_expansion_metadata(new_intent, "B3_swap_dimension")
669
+ results.append(new_intent)
670
+ return results
671
+
672
+
673
+ def _b4_remove_table(
674
+ intent: SimulatorIntent, schema: SchemaGraph,
675
+ fk_map: dict[str, list[dict[str, str]]],
676
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
677
+ ) -> list[SimulatorIntent]:
678
+ """B4: Remove each removable dimension table, pruning dependent clauses."""
679
+ current = list(intent.tables or [])
680
+ if len(current) <= 1:
681
+ return []
682
+ results: list[SimulatorIntent] = []
683
+ for i, table in enumerate(current):
684
+ if (
685
+ _get_table_role(schema, table) or TableRole.FACT.value
686
+ ) != TableRole.DIMENSION.value:
687
+ continue
688
+ new_tables = current[:i] + current[i + 1:]
689
+ if not new_tables:
690
+ continue
691
+ if not _tables_are_connected(new_tables, fk_map):
692
+ continue
693
+ new_intent = copy.deepcopy(intent)
694
+ new_intent.tables = sorted(new_tables)
695
+ ts = set(new_tables)
696
+ new_intent.filters_param = [
697
+ f for f in (new_intent.filters_param or [])
698
+ if _table_from_column_ref(f.left_expr.primary_column) in ts
699
+ ]
700
+ new_intent.group_by_cols = [
701
+ c for c in (new_intent.group_by_cols or [])
702
+ if _table_from_column_ref(c.primary_column) in ts
703
+ ]
704
+ new_intent.order_by_cols = [
705
+ o for o in (new_intent.order_by_cols or [])
706
+ if _table_from_column_ref(o.expr.primary_column) in ts
707
+ ]
708
+ new_intent.select_cols = [
709
+ sc for sc in (new_intent.select_cols or [])
710
+ if _table_from_column_ref(sc.expr.primary_column) in ts
711
+ ]
712
+ if not new_intent.select_cols:
713
+ continue
714
+ _add_expansion_metadata(new_intent, "B4_remove_table")
715
+ results.append(new_intent)
716
+ return results
717
+
718
+
719
+ def _b5_bridge_via_intermediate(
720
+ intent: SimulatorIntent, schema: SchemaGraph,
721
+ fk_map: dict[str, list[dict[str, str]]],
722
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
723
+ ) -> list[SimulatorIntent]:
724
+ """B5: Add bridge tables connected to 2+ current tables."""
725
+ current = set(intent.tables or [])
726
+ if len(current) >= SimulatorConfig.MAX_TABLES:
727
+ return []
728
+ results: list[SimulatorIntent] = []
729
+ for bridge in schema.tables:
730
+ if bridge in current:
731
+ continue
732
+ if (
733
+ _get_table_role(schema, bridge) or TableRole.FACT.value
734
+ ) != TableRole.BRIDGE.value:
735
+ continue
736
+ connected = {
737
+ fk.get("target_table")
738
+ for fk in fk_map.get(bridge, [])
739
+ if fk.get("target_table") in current
740
+ }
741
+ if len(connected) < 2:
742
+ continue
743
+ new_tables = list(current | {bridge})
744
+ if not _tables_are_connected(new_tables, fk_map):
745
+ continue
746
+ new_intent = copy.deepcopy(intent)
747
+ new_intent.tables = sorted(new_tables)
748
+ _add_expansion_metadata(
749
+ new_intent, "B5_bridge_via_intermediate",
750
+ )
751
+ results.append(new_intent)
752
+ return results
753
+
754
+
755
+ # ---------------------------------------------------------------------------
756
+ # C-series (gold inclusion)
757
+ # ---------------------------------------------------------------------------
758
+
759
+ def _c1_include_gold(
760
+ intent: SimulatorIntent, schema: SchemaGraph,
761
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
762
+ ) -> list[SimulatorIntent]:
763
+ """C1: Include the gold intent as-is with expansion metadata."""
764
+ gold_copy = copy.deepcopy(intent)
765
+ _add_expansion_metadata(gold_copy, "C1_include_gold")
766
+ return [gold_copy]
767
+
768
+
769
+ # ---------------------------------------------------------------------------
770
+ # T-series (temporal scalar expansions)
771
+ # ---------------------------------------------------------------------------
772
+
773
+ def _t1_extract_select_groupby(
774
+ intent: SimulatorIntent, schema: SchemaGraph,
775
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
776
+ ) -> list[SimulatorIntent]:
777
+ """T1: Wrap temporal columns with EXTRACT(unit) in SELECT and GROUP BY."""
778
+ results: list[SimulatorIntent] = []
779
+ for table in intent.tables or []:
780
+ for col in _get_temporal_columns(schema, table):
781
+ for unit in SimulatorConfig.EXTRACT_EXPANSION_UNITS:
782
+ new_intent = copy.deepcopy(intent)
783
+ extract_expr = NormalizedExpr.from_column(col)
784
+ extract_expr = replace(
785
+ extract_expr,
786
+ scalar_func="extract",
787
+ scalar_func_args=[unit],
788
+ )
789
+ new_intent.select_cols = list(
790
+ new_intent.select_cols or []
791
+ ) + [SelectCol(expr=extract_expr)]
792
+ new_intent.group_by_cols = sorted(
793
+ list(new_intent.group_by_cols or [])
794
+ + [extract_expr],
795
+ key=lambda g: g.signature_key,
796
+ )
797
+ if new_intent.grain == "row_level":
798
+ new_intent.grain = "grouped"
799
+ _add_expansion_metadata(
800
+ new_intent, "T1_extract_select_groupby",
801
+ )
802
+ results.append(new_intent)
803
+ return results
804
+
805
+
806
+ def _t2_date_trunc_groupby(
807
+ intent: SimulatorIntent, schema: SchemaGraph,
808
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
809
+ ) -> list[SimulatorIntent]:
810
+ """T2: Wrap temporal columns with DATE_TRUNC(unit) in GROUP BY and SELECT."""
811
+ results: list[SimulatorIntent] = []
812
+ for table in intent.tables or []:
813
+ for col in _get_temporal_columns(schema, table):
814
+ for unit in SimulatorConfig.DATE_TRUNC_EXPANSION_UNITS:
815
+ new_intent = copy.deepcopy(intent)
816
+ trunc_expr = NormalizedExpr.from_column(col)
817
+ trunc_expr = replace(
818
+ trunc_expr,
819
+ scalar_func="date_trunc",
820
+ scalar_func_args=[unit],
821
+ )
822
+ new_intent.select_cols = list(
823
+ new_intent.select_cols or []
824
+ ) + [SelectCol(expr=trunc_expr)]
825
+ new_intent.group_by_cols = sorted(
826
+ list(new_intent.group_by_cols or [])
827
+ + [trunc_expr],
828
+ key=lambda g: g.signature_key,
829
+ )
830
+ if new_intent.grain == "row_level":
831
+ new_intent.grain = "grouped"
832
+ _add_expansion_metadata(
833
+ new_intent, "T2_date_trunc_groupby",
834
+ )
835
+ results.append(new_intent)
836
+ return results
837
+
838
+
839
+ def _t3_date_window_filter(
840
+ intent: SimulatorIntent, schema: SchemaGraph,
841
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
842
+ ) -> list[SimulatorIntent]:
843
+ """T3: Add date_window filter on temporal columns using config presets."""
844
+ current_filter_cols = {
845
+ f.left_expr.primary_column for f in (intent.filters_param or [])
846
+ }
847
+ if len(current_filter_cols) >= SimulatorConfig.MAX_FILTERS:
848
+ return []
849
+ results: list[SimulatorIntent] = []
850
+ for table in intent.tables or []:
851
+ for col in _get_temporal_columns(schema, table):
852
+ if col in current_filter_cols:
853
+ continue
854
+ for preset in SimulatorConfig.DATE_WINDOW_EXPANSION_PRESETS:
855
+ new_intent = copy.deepcopy(intent)
856
+ new_filter = FilterParam(
857
+ left_expr=NormalizedExpr.from_column(col),
858
+ op=">=",
859
+ value_type="date_window",
860
+ param_key="",
861
+ raw_value=dict(preset),
862
+ )
863
+ new_intent.filters_param = list(
864
+ new_intent.filters_param or []
865
+ ) + [new_filter]
866
+ _add_expansion_metadata(
867
+ new_intent, "T3_date_window_filter",
868
+ )
869
+ results.append(new_intent)
870
+ return results
871
+
872
+
873
+ def _t4_date_diff_filter(
874
+ intent: SimulatorIntent, schema: SchemaGraph,
875
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
876
+ ) -> list[SimulatorIntent]:
877
+ """T4: Add date_diff filter on temporal columns using config presets."""
878
+ current_filter_cols = {
879
+ f.left_expr.primary_column for f in (intent.filters_param or [])
880
+ }
881
+ if len(current_filter_cols) >= SimulatorConfig.MAX_FILTERS:
882
+ return []
883
+ results: list[SimulatorIntent] = []
884
+ for table in intent.tables or []:
885
+ for col in _get_temporal_columns(schema, table):
886
+ if col in current_filter_cols:
887
+ continue
888
+ for preset in SimulatorConfig.DATE_DIFF_EXPANSION_PRESETS:
889
+ new_intent = copy.deepcopy(intent)
890
+ new_filter = FilterParam(
891
+ left_expr=NormalizedExpr.from_column(col),
892
+ op="<=",
893
+ value_type="date_diff",
894
+ param_key="",
895
+ raw_value=dict(preset),
896
+ )
897
+ new_intent.filters_param = list(
898
+ new_intent.filters_param or []
899
+ ) + [new_filter]
900
+ _add_expansion_metadata(
901
+ new_intent, "T4_date_diff_filter",
902
+ )
903
+ results.append(new_intent)
904
+ return results
905
+
906
+
907
+ # ---------------------------------------------------------------------------
908
+ # N-series (numeric scalar expansions)
909
+ # ---------------------------------------------------------------------------
910
+
911
+ def _n1_round_numeric(
912
+ intent: SimulatorIntent, schema: SchemaGraph,
913
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
914
+ ) -> list[SimulatorIntent]:
915
+ """N1: Wrap NUMERIC_MEASURE select columns with ROUND."""
916
+ numeric_cols: set[str] = set()
917
+ for table in intent.tables or []:
918
+ numeric_cols.update(_get_numeric_measure_columns(schema, table))
919
+
920
+ results: list[SimulatorIntent] = []
921
+ for idx, sc in enumerate(intent.select_cols or []):
922
+ if sc.expr.primary_column not in numeric_cols:
923
+ continue
924
+ if sc.expr.scalar_func == "round":
925
+ continue
926
+ new_intent = copy.deepcopy(intent)
927
+ new_expr = replace(
928
+ new_intent.select_cols[idx].expr,
929
+ scalar_func="round",
930
+ scalar_func_args=[0],
931
+ )
932
+ new_intent.select_cols[idx] = SelectCol(expr=new_expr)
933
+ _add_expansion_metadata(new_intent, "N1_round_numeric")
934
+ results.append(new_intent)
935
+ return results
936
+
937
+
938
+ def _n2_abs_filter(
939
+ intent: SimulatorIntent, schema: SchemaGraph,
940
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
941
+ ) -> list[SimulatorIntent]:
942
+ """N2: Wrap numeric filter left_expr with ABS for range ops."""
943
+ results: list[SimulatorIntent] = []
944
+ for idx, f in enumerate(intent.filters_param or []):
945
+ if f.op not in (">", "<", ">=", "<="):
946
+ continue
947
+ if f.left_expr.scalar_func == "abs":
948
+ continue
949
+ col = f.left_expr.primary_column
950
+ table = _table_from_column_ref(col)
951
+ if not table or table not in column_metadata:
952
+ continue
953
+ bare = col.split(".", 1)[1] if "." in col else col
954
+ col_info = column_metadata.get(table, {}).get(bare, {})
955
+ if col_info.get("role") != ColumnRole.NUMERIC_MEASURE.value:
956
+ continue
957
+ new_intent = copy.deepcopy(intent)
958
+ new_expr = replace(
959
+ new_intent.filters_param[idx].left_expr,
960
+ scalar_func="abs",
961
+ )
962
+ new_intent.filters_param[idx] = replace(
963
+ new_intent.filters_param[idx], left_expr=new_expr,
964
+ )
965
+ _add_expansion_metadata(new_intent, "N2_abs_filter")
966
+ results.append(new_intent)
967
+ return results
968
+
969
+
970
+ # ---------------------------------------------------------------------------
971
+ # Structural operators (DISTINCT, LIMIT, OR-groups, expression composition)
972
+ # ---------------------------------------------------------------------------
973
+
974
+ def _d1_add_distinct(
975
+ intent: SimulatorIntent, schema: SchemaGraph,
976
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
977
+ ) -> list[SimulatorIntent]:
978
+ """D1: Set distinct=True if not already set."""
979
+ if getattr(intent, "distinct", False):
980
+ return []
981
+ new_intent = copy.deepcopy(intent)
982
+ if hasattr(new_intent, "distinct"):
983
+ new_intent.distinct = True
984
+ _add_expansion_metadata(new_intent, "D1_add_distinct")
985
+ return [new_intent]
986
+
987
+
988
+ def _l1_add_limit(
989
+ intent: SimulatorIntent, schema: SchemaGraph,
990
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
991
+ ) -> list[SimulatorIntent]:
992
+ """L1: Add LIMIT with representative values from config."""
993
+ if intent.limit is not None:
994
+ return []
995
+ results: list[SimulatorIntent] = []
996
+ for val in SimulatorConfig.LIMIT_EXPANSION_VALUES:
997
+ new_intent = copy.deepcopy(intent)
998
+ new_intent.limit = val
999
+ _add_expansion_metadata(new_intent, "L1_add_limit")
1000
+ results.append(new_intent)
1001
+ return results
1002
+
1003
+
1004
+ def _f1_or_filter_group(
1005
+ intent: SimulatorIntent, schema: SchemaGraph,
1006
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
1007
+ ) -> list[SimulatorIntent]:
1008
+ """F1: Convert pairs of existing AND filters into OR groups."""
1009
+ filters = intent.filters_param or []
1010
+ if len(filters) < 2:
1011
+ return []
1012
+ results: list[SimulatorIntent] = []
1013
+ for i in range(len(filters)):
1014
+ for j in range(i + 1, len(filters)):
1015
+ fi, fj = filters[i], filters[j]
1016
+ if fi.right_expr or fj.right_expr:
1017
+ continue
1018
+ if fi.value_type in ("date_window", "date_diff"):
1019
+ continue
1020
+ if fj.value_type in ("date_window", "date_diff"):
1021
+ continue
1022
+ new_intent = copy.deepcopy(intent)
1023
+ group_id = 1
1024
+ new_fi = replace(
1025
+ new_intent.filters_param[i],
1026
+ bool_op="OR",
1027
+ filter_group=group_id,
1028
+ )
1029
+ new_fj = replace(
1030
+ new_intent.filters_param[j],
1031
+ bool_op="OR",
1032
+ filter_group=group_id,
1033
+ )
1034
+ new_intent.filters_param[i] = new_fi
1035
+ new_intent.filters_param[j] = new_fj
1036
+ _add_expansion_metadata(new_intent, "F1_or_filter_group")
1037
+ results.append(new_intent)
1038
+ return results
1039
+
1040
+
1041
+ def _e1_expression_select(
1042
+ intent: SimulatorIntent, schema: SchemaGraph,
1043
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
1044
+ ) -> list[SimulatorIntent]:
1045
+ """E1: Create composed expressions from numeric column pairs in SELECT."""
1046
+ numeric_cols: list[str] = []
1047
+ for table in intent.tables or []:
1048
+ numeric_cols.extend(_get_numeric_measure_columns(schema, table))
1049
+
1050
+ if len(numeric_cols) < 2:
1051
+ return []
1052
+
1053
+ results: list[SimulatorIntent] = []
1054
+ for i, col_a in enumerate(numeric_cols):
1055
+ for col_b in numeric_cols[i + 1:]:
1056
+ new_intent = copy.deepcopy(intent)
1057
+ composed = NormalizedExpr(
1058
+ add_groups=[
1059
+ MulGroup(multiply=[col_a, col_b]),
1060
+ ],
1061
+ )
1062
+ new_intent.select_cols = list(
1063
+ new_intent.select_cols or []
1064
+ ) + [SelectCol(expr=composed)]
1065
+ _add_expansion_metadata(new_intent, "E1_expression_select")
1066
+ results.append(new_intent)
1067
+ return results
1068
+
1069
+
1070
+ # ---------------------------------------------------------------------------
1071
+ # Orchestration: single-depth expansion
1072
+ # ---------------------------------------------------------------------------
1073
+
1074
+ _OperatorFn = Any
1075
+
1076
+
1077
+ def _build_operator_registry(
1078
+ column_metadata: dict[str, dict[str, dict[str, Any]]],
1079
+ fk_map: dict[str, list[dict[str, str]]],
1080
+ ) -> dict[str, _OperatorFn]:
1081
+ """Return the full operator map keyed by short code."""
1082
+ return {
1083
+ "A1": lambda i, s: _a1_add_filter(i, s, column_metadata),
1084
+ "A2": lambda i, s: _a2_add_expr_filter(i, s, column_metadata),
1085
+ "A3": lambda i, s: _a3_change_aggregation(i, s, column_metadata),
1086
+ "A4": lambda i, s: _a4_add_groupby(i, s, column_metadata),
1087
+ "A5": lambda i, s: _a5_add_orderby(i, s, column_metadata),
1088
+ "A6": lambda i, s: _a6_add_having_value(i, s, column_metadata),
1089
+ "A7": lambda i, s: _a7_add_having_expr(i, s, column_metadata),
1090
+ "A8": lambda i, s: _a8_remove_filter(i, s, column_metadata),
1091
+ "A9": lambda i, s: _a9_remove_groupby(i, s, column_metadata),
1092
+ "A10": lambda i, s: _a10_remove_having(i, s, column_metadata),
1093
+ "B1": lambda i, s: _b1_add_dimension_join(i, s, fk_map, column_metadata),
1094
+ "B2": lambda i, s: _b2_add_fact_join(i, s, fk_map, column_metadata),
1095
+ "B3": lambda i, s: _b3_swap_dimension(i, s, fk_map, column_metadata),
1096
+ "B4": lambda i, s: _b4_remove_table(i, s, fk_map, column_metadata),
1097
+ "B5": lambda i, s: _b5_bridge_via_intermediate(i, s, fk_map, column_metadata),
1098
+ "C1": lambda i, s: _c1_include_gold(i, s, column_metadata),
1099
+ "T1": lambda i, s: _t1_extract_select_groupby(i, s, column_metadata),
1100
+ "T2": lambda i, s: _t2_date_trunc_groupby(i, s, column_metadata),
1101
+ "T3": lambda i, s: _t3_date_window_filter(i, s, column_metadata),
1102
+ "T4": lambda i, s: _t4_date_diff_filter(i, s, column_metadata),
1103
+ "N1": lambda i, s: _n1_round_numeric(i, s, column_metadata),
1104
+ "N2": lambda i, s: _n2_abs_filter(i, s, column_metadata),
1105
+ "D1": lambda i, s: _d1_add_distinct(i, s, column_metadata),
1106
+ "L1": lambda i, s: _l1_add_limit(i, s, column_metadata),
1107
+ "F1": lambda i, s: _f1_or_filter_group(i, s, column_metadata),
1108
+ "E1": lambda i, s: _e1_expression_select(i, s, column_metadata),
1109
+ }
1110
+
1111
+
1112
+ def _expand_single_depth(
1113
+ intents: list[SimulatorIntent],
1114
+ schema: SchemaGraph,
1115
+ operators: dict[str, _OperatorFn],
1116
+ seen_keys: set[str],
1117
+ ) -> list[SimulatorIntent]:
1118
+ """Run all operators on each intent in *intents*, returning new unique variants.
1119
+
1120
+ Deduplicates via *seen_keys* (mutated in place) and enforces schema
1121
+ consistency on each accepted variant.
1122
+ """
1123
+ results: list[SimulatorIntent] = []
1124
+ for intent in intents:
1125
+ for op_name, op_func in operators.items():
1126
+ variants = op_func(intent, schema)
1127
+ for var in variants:
1128
+ var_key = intent_key(var.to_runtime_intent())
1129
+ if var_key in seen_keys:
1130
+ continue
1131
+ seen_keys.add(var_key)
1132
+ if var.grain == "grouped" and not var.group_by_cols:
1133
+ continue
1134
+ var, _ = enforce_schema(var, schema)
1135
+ results.append(var)
1136
+ return results
1137
+
1138
+
1139
+ # ---------------------------------------------------------------------------
1140
+ # Public API
1141
+ # ---------------------------------------------------------------------------
1142
+
1143
+ def expand_gold_intents(
1144
+ gold_intents: list[SimulatorIntent],
1145
+ schema: SchemaGraph,
1146
+ limits: SchemaLimits | None = None,
1147
+ max_depth: int | None = None,
1148
+ ) -> list[SimulatorIntent]:
1149
+ """Expand all gold intents into synthetic intents via multi-depth deterministic expansion.
1150
+
1151
+ Runs every operator on every gold intent (depth 1), then re-expands
1152
+ the depth-1 results (depth 2), up to *max_depth*. Deduplicates
1153
+ across all golds and depths via SHA-256 ``intent_key``.
1154
+
1155
+ CTE gold intents are now included (operators apply to the main
1156
+ query portion).
1157
+
1158
+ Args:
1159
+
1160
+ gold_intents: Seed SimulatorIntents to expand.
1161
+ schema: Schema graph for column/table introspection.
1162
+ limits: Optional SchemaLimits overriding MAX_FILTERS,
1163
+ MAX_GROUPBY, MAX_TABLES.
1164
+ max_depth: Expansion depth; defaults to
1165
+ ``SimulatorConfig.MAX_EXPANSION_DEPTH``.
1166
+
1167
+ Returns:
1168
+
1169
+ List of unique synthetic SimulatorIntents.
1170
+ """
1171
+ if limits is not None:
1172
+ SimulatorConfig.MAX_FILTERS = limits.max_filters
1173
+ SimulatorConfig.MAX_GROUPBY = limits.max_groupby
1174
+ SimulatorConfig.MAX_TABLES = limits.max_tables
1175
+ log(
1176
+ f"expand_gold_intents: using SchemaLimits "
1177
+ f"max_filters={limits.max_filters}, "
1178
+ f"max_groupby={limits.max_groupby}, "
1179
+ f"max_tables={limits.max_tables}"
1180
+ )
1181
+
1182
+ if max_depth is None:
1183
+ max_depth = SimulatorConfig.MAX_EXPANSION_DEPTH
1184
+
1185
+ log(
1186
+ f"expand_gold_intents: expanding {len(gold_intents)} gold intents "
1187
+ f"with max_depth={max_depth}"
1188
+ )
1189
+
1190
+ column_metadata = _build_column_metadata(schema)
1191
+ fk_map = _build_fk_map(schema)
1192
+ operators = _build_operator_registry(column_metadata, fk_map)
1193
+
1194
+ seen_keys: set[str] = set()
1195
+ for gold in gold_intents:
1196
+ seen_keys.add(intent_key(gold.to_runtime_intent()))
1197
+
1198
+ current_layer = list(gold_intents)
1199
+ all_synthetic: list[SimulatorIntent] = []
1200
+
1201
+ for depth in range(1, max_depth + 1):
1202
+ new_variants = _expand_single_depth(
1203
+ current_layer, schema, operators, seen_keys,
1204
+ )
1205
+ log(
1206
+ f"expand_gold_intents: depth={depth} produced "
1207
+ f"{len(new_variants)} new variants"
1208
+ )
1209
+ if not new_variants:
1210
+ break
1211
+ all_synthetic.extend(new_variants)
1212
+ current_layer = new_variants
1213
+
1214
+ log(
1215
+ f"expand_gold_intents: generated {len(all_synthetic)} "
1216
+ f"unique synthetic intents across {max_depth} depth(s)"
1217
+ )
1218
+ return all_synthetic