relationalai 0.11.2__py3-none-any.whl → 0.11.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. relationalai/clients/snowflake.py +44 -15
  2. relationalai/clients/types.py +1 -0
  3. relationalai/clients/use_index_poller.py +446 -178
  4. relationalai/early_access/builder/std/__init__.py +1 -1
  5. relationalai/early_access/dsl/bindings/csv.py +4 -4
  6. relationalai/semantics/internal/internal.py +22 -4
  7. relationalai/semantics/lqp/executor.py +69 -18
  8. relationalai/semantics/lqp/intrinsics.py +23 -0
  9. relationalai/semantics/lqp/model2lqp.py +16 -6
  10. relationalai/semantics/lqp/passes.py +3 -4
  11. relationalai/semantics/lqp/primitives.py +38 -14
  12. relationalai/semantics/metamodel/builtins.py +152 -11
  13. relationalai/semantics/metamodel/factory.py +3 -2
  14. relationalai/semantics/metamodel/helpers.py +78 -2
  15. relationalai/semantics/reasoners/graph/core.py +343 -40
  16. relationalai/semantics/reasoners/optimization/solvers_dev.py +20 -1
  17. relationalai/semantics/reasoners/optimization/solvers_pb.py +24 -3
  18. relationalai/semantics/rel/compiler.py +5 -17
  19. relationalai/semantics/rel/executor.py +2 -2
  20. relationalai/semantics/rel/rel.py +6 -0
  21. relationalai/semantics/rel/rel_utils.py +37 -1
  22. relationalai/semantics/rel/rewrite/extract_common.py +153 -242
  23. relationalai/semantics/sql/compiler.py +540 -202
  24. relationalai/semantics/sql/executor/duck_db.py +21 -0
  25. relationalai/semantics/sql/executor/result_helpers.py +7 -0
  26. relationalai/semantics/sql/executor/snowflake.py +9 -2
  27. relationalai/semantics/sql/rewrite/denormalize.py +4 -6
  28. relationalai/semantics/sql/rewrite/recursive_union.py +23 -3
  29. relationalai/semantics/sql/sql.py +120 -46
  30. relationalai/semantics/std/__init__.py +9 -4
  31. relationalai/semantics/std/datetime.py +363 -0
  32. relationalai/semantics/std/math.py +77 -0
  33. relationalai/semantics/std/re.py +83 -0
  34. relationalai/semantics/std/strings.py +1 -1
  35. relationalai/tools/cli_controls.py +445 -60
  36. relationalai/util/format.py +78 -1
  37. {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/METADATA +3 -2
  38. {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/RECORD +41 -39
  39. relationalai/semantics/std/dates.py +0 -213
  40. {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/WHEEL +0 -0
  41. {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/entry_points.txt +0 -0
  42. {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/licenses/LICENSE +0 -0
@@ -52,39 +52,118 @@ class Compiler(c.Compiler):
52
52
  """
53
53
  return str(self.model_to_sql.to_sql(model, options)), model
54
54
 
55
+ @dataclass(frozen=True)
56
+ class OutputVar:
57
+ value: ir.Value
58
+ alias: Optional[str] = None
59
+ value_type: Optional[str] = None
60
+ task: Optional[ir.Task] = None
61
+
62
+ @dataclass
63
+ class RelationInfo:
64
+ used: bool = False
65
+ view_selects: list[sql.Select] = field(default_factory=list)
66
+ table_selects: list[sql.Select] = field(default_factory=list)
67
+ dynamic_table_selects: list[sql.Select] = field(default_factory=list)
68
+
69
+ @dataclass
70
+ class ImportSpec:
71
+ value: str
72
+ module: Optional[str] = None # e.g., "scipy.special"
73
+
74
+ def render(self) -> str:
75
+ return f"from {self.module} import {self.value}" if self.module else f"import {self.value}"
76
+
77
+
78
+ @dataclass
79
+ class UDFConfig:
80
+ handler: str
81
+ code: str
82
+ imports: list[ImportSpec] = field(default_factory=list)
83
+ packages: list[str] = field(default_factory=list)
55
84
 
56
85
  @dataclass
57
86
  class ModelToSQL:
58
87
  """ Generates SQL from an IR Model, assuming the compiler rewrites were done. """
59
88
 
60
89
  _is_duck_db: bool = False
90
+ _warehouse: str = 'MAIN_WH'
61
91
  _query_compilation: bool = False
92
+ _default_dynamic_table_target_lag: str = '5 minutes'
62
93
  relation_name_cache: NameCache = field(default_factory=NameCache)
63
94
  relation_arg_name_cache: NameCache = field(default_factory=NameCache)
64
- _error_relation_names: set[str] = field(
65
- default_factory=lambda: {'Error', 'pyrel_error_attrs'}
66
- )
95
+ relation_infos: dict[ir.Relation, RelationInfo] = field(default_factory=dict)
96
+ _error_relation_names: set[str] = field(default_factory=lambda: {'Error', 'pyrel_error_attrs'})
67
97
 
68
98
  def to_sql(self, model: ir.Model, options:dict) -> sql.Program:
99
+ self.relation_infos.clear()
69
100
  self._is_duck_db = options.get("is_duck_db", False)
101
+ self._warehouse = options.get("warehouse") or self._warehouse
70
102
  self._query_compilation = options.get("query_compilation", False)
103
+ self._default_dynamic_table_target_lag = (options.get("default_dynamic_table_target_lag") or
104
+ self._default_dynamic_table_target_lag)
71
105
  return sql.Program(self._sort_dependencies(self._union_output_selects(self._generate_statements(model))))
72
106
 
73
107
  def _generate_statements(self, model: ir.Model) -> list[sql.Node]:
74
- relations = self._get_relations(model)
108
+ table_relations, used_builtins = self._get_relations(model)
75
109
 
76
- self._register_relation_args(relations)
110
+ self._register_relation_args(table_relations)
77
111
  self._register_external_relations(model)
78
112
 
79
113
  statements: list[sql.Node] = []
80
- for relation in relations:
81
- statements.append(self._create_table(cast(ir.Relation, relation)))
114
+ # 1. Process root logical body
82
115
  root = cast(ir.Logical, model.root)
83
116
  for child in root.body:
84
117
  if isinstance(child, ir.Logical):
85
118
  statements.extend(self._create_statement(cast(ir.Logical, child)))
86
119
  elif isinstance(child, ir.Union):
87
120
  statements.append(self._create_recursive_view(cast(ir.Union, child)))
121
+
122
+ relation_selects = {
123
+ relation: info.dynamic_table_selects + info.view_selects + info.table_selects
124
+ for relation, info in self.relation_infos.items()
125
+ if info.dynamic_table_selects or info.view_selects or info.table_selects
126
+ }
127
+
128
+ # 3. Handle each relation with proper priority
129
+ for relation, selects in relation_selects.items():
130
+ table_name = self._relation_name(relation)
131
+
132
+ info = self._get_relation_info(relation)
133
+ if info.table_selects:
134
+ # Relation is a table → insert into it
135
+ columns = [self._var_name(relation.id, f) for f in relation.fields]
136
+ if len(selects) == 1:
137
+ statements.append(sql.Insert(table_name, columns, [], selects[0]))
138
+ else:
139
+ statements.append(sql.Insert(table_name, columns, [],
140
+ sql.CTE(False, f"{table_name}_cte", columns, selects, True)))
141
+ elif info.view_selects:
142
+ statements.append(sql.CreateView(table_name, selects))
143
+ else:
144
+ # Snowflake currently has issues when using DISTINCT together with UNION in a Dynamic Table.
145
+ # As a workaround, we generate a CTE without DISTINCT, using UNION ALL.
146
+ # Then, we create a dynamic table with `SELECT DISTINCT * FROM CTE` to remove duplicates.
147
+ columns = [self._var_name(relation.id, f) for f in relation.fields]
148
+ statements.append(
149
+ sql.CreateDynamicTable(
150
+ table_name,
151
+ sql.CTE(False, f"{table_name}_cte", columns, selects, True),
152
+ self._default_dynamic_table_target_lag,
153
+ self._warehouse
154
+ )
155
+ )
156
+
157
+ # 4. Create physical tables for explicitly declared table relations
158
+ for relation in table_relations:
159
+ info = self.relation_infos.get(relation)
160
+ if info is None or info.table_selects:
161
+ statements.append(self._create_table(relation))
162
+
163
+ #5. Create Snowflake user-defined functions
164
+ if not self._is_duck_db:
165
+ statements.extend(self._create_user_defined_functions(used_builtins))
166
+
88
167
  return statements
89
168
 
90
169
  #--------------------------------------------------
@@ -121,7 +200,9 @@ class ModelToSQL:
121
200
 
122
201
  aliases = []
123
202
  for i, arg in enumerate(update.args):
124
- aliases.append((self._var_name(relation.id, relation.fields[i]), arg))
203
+ relation_field = old_relation.fields[i]
204
+ field_type = self._convert_type(relation_field.type)
205
+ aliases.append(OutputVar(arg, self._var_name(old_relation.id, relation_field), value_type=field_type))
125
206
 
126
207
  return self._make_select(lookups, aliases, nots, unions, constructs)
127
208
 
@@ -129,6 +210,7 @@ class ModelToSQL:
129
210
  update = v.collect_by_type(ir.Update, union).some()
130
211
 
131
212
  relation = update.relation
213
+ self.mark_used(relation)
132
214
  return sql.CreateView(
133
215
  self._relation_name(relation),
134
216
  sql.CTE(
@@ -142,6 +224,62 @@ class ModelToSQL:
142
224
  )
143
225
  )
144
226
 
227
+ def _create_user_defined_functions(self, relations: list[ir.Relation]) -> list[sql.CreateFunction]:
228
+ # Central UDF metadata configuration
229
+ udf_relations: dict[str, UDFConfig] = {
230
+ builtins.acot.name: UDFConfig(
231
+ handler="compute",
232
+ imports=[ImportSpec("math")],
233
+ code="""def compute(x): return math.atan(1 / x) if x != 0 else math.copysign(math.pi / 2, x)"""
234
+ ),
235
+ builtins.erf.name: UDFConfig(
236
+ handler="compute",
237
+ imports=[ImportSpec("math")],
238
+ code="""def compute(x): return math.erf(x)"""
239
+ ),
240
+ builtins.erfinv.name: UDFConfig(
241
+ handler="compute",
242
+ imports=[ImportSpec("erfinv", module="scipy.special")],
243
+ packages=["'scipy'"],
244
+ code="""def compute(x): return erfinv(x)"""
245
+ )
246
+ }
247
+
248
+ statements: list[sql.CreateFunction] = []
249
+
250
+ for r in relations:
251
+ meta = udf_relations.get(r.name)
252
+ if not meta:
253
+ continue
254
+
255
+ # Split relation fields into inputs and return type
256
+ # We expect a single return argument per builtin relation
257
+ return_type = None
258
+ input_columns: list[sql.Column] = []
259
+ for f in r.fields:
260
+ if f.input:
261
+ input_columns.append(sql.Column(self._var_name(r.id, f), self._convert_type(f.type)))
262
+ else:
263
+ return_type = self._convert_type(f.type)
264
+
265
+ # Build a full code block (imports + code)
266
+ imports_code = "\n".join(imp.render() for imp in meta.imports)
267
+ python_block = "\n".join(part for part in (imports_code, meta.code) if part)
268
+
269
+ assert return_type, f"No return type found for relation '{r.name}'"
270
+ statements.append(
271
+ sql.CreateFunction(
272
+ name=r.name,
273
+ inputs=input_columns,
274
+ return_type=return_type,
275
+ handler=meta.handler,
276
+ body=python_block,
277
+ packages=meta.packages
278
+ )
279
+ )
280
+
281
+ return statements
282
+
145
283
  def _create_statement(self, task: ir.Logical):
146
284
 
147
285
  # TODO - improve the typing info to avoid these casts
@@ -158,7 +296,6 @@ class ModelToSQL:
158
296
  var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
159
297
 
160
298
  statements = []
161
- # TODO - this is simplifying soooo much :crying_blood:
162
299
  if updates and not lookups and not nots and not aggs and not logicals and not unions:
163
300
  for u in updates:
164
301
  r = u.relation
@@ -171,13 +308,15 @@ class ModelToSQL:
171
308
  raise Exception(f"Expected SQL source to be a string, got: {type(src).__name__}")
172
309
  statements.append(sql.RawSource(src))
173
310
  else:
174
- # TODO: this is assuming that the updates are all static values
175
- # Insert static values: INSERT INTO ... SELECT hash(V1, ...), V2, V3 UNION ALL SELECT hash(V4, ...), V5, V6
311
+ # Generate select with static values: SELECT hash(V1, ...), V2, V3
176
312
  # We need to use `SELECT` instead of `VALUES` because Snowflake parses and restricts certain expressions in VALUES(...).
177
313
  # Built-in functions like HASH() or MD5() are often rejected unless used in SELECT.
178
- statements.append(
179
- sql.Insert(self._relation_name(r), [self._var_name(r.id, f) for f in r.fields], self._get_tuples(task, u), None)
180
- )
314
+ for values in self._get_tuples(task, u):
315
+ output_vars = [
316
+ sql.VarRef(str(value), alias=self._var_name(r.id, f))
317
+ for f, value in zip(r.fields, values)
318
+ ]
319
+ self.add_table_select(r, sql.Select(False, output_vars))
181
320
  elif lookups or outputs or nots or aggs or updates:
182
321
  # Some of the lookup relations we wrap into logical and we need to get them out for the SQL compilation.
183
322
  # For example QB `decimal(0)` in IR will look like this:
@@ -207,28 +346,39 @@ class ModelToSQL:
207
346
  continue
208
347
  # We shouldn’t create or populate tables for value types that can be directly sourced from existing Snowflake tables.
209
348
  if not self._is_value_type_population_relation(r):
210
- select = None
211
- if aggs:
212
- # After flatten it can be only one aggregation per rule.
213
- select = self._make_agg_select(u, all_lookups, aggs[0], nots, unions, constructs)
214
- elif ranks:
215
- # After flatten it can be only one rank per rule.
216
- select = self._make_rank_select(u, all_lookups, ranks[0], nots, unions, constructs)
349
+ if all_lookups and all(builtins.is_builtin(lookup.relation) for lookup in all_lookups):
350
+ # Assuming static values insert when you have only builtin lookups (like `cast`, etc.) and you do not have table lookups.
351
+ aliases = self._get_update_aliases(u, var_to_construct, var_to_union, True)
352
+ select = self._make_select(all_lookups, aliases, nots, unions, constructs)
353
+ self.add_table_select(r, select)
217
354
  else:
218
- aliases = []
219
- for i, arg in enumerate(u.args):
220
- field_name = self._var_name(r.id, r.fields[i])
221
- aliases.append(self._get_alias(field_name, arg, var_to_construct, var_to_union))
222
-
223
- if not unions:
224
- select = self._make_select(all_lookups, aliases, nots, unions, constructs, True)
225
- elif lookups:
226
- select = self._make_match_select(all_lookups, aliases, unions, nots, constructs, True)
355
+ select = None
356
+ drv = DerivedRelationsVisitor()
357
+ task.accept(drv)
358
+ if aggs:
359
+ # After flatten it can be only one aggregation per rule.
360
+ select = self._make_agg_select(u, all_lookups, aggs[0], nots, unions, constructs)
361
+ elif ranks:
362
+ # After flatten it can be only one rank per rule.
363
+ select = self._make_rank_select(u, all_lookups, ranks[0], nots, unions, constructs)
227
364
  else:
228
- select = self._make_full_outer_join_select(aliases, unions, constructs, True)
229
-
230
- statements.append(sql.Insert(self._relation_name(r), [self._var_name(r.id, f) for f in r.fields],
231
- [], select))
365
+ # Snowflake currently has issues when using DISTINCT together with UNION in a Dynamic Table.
366
+ # That is why we generate statements without DISTINCT, and we remove duplicates later
367
+ # by using CTE + DISTINCT to declare the Dynamic Tables
368
+ distinct = True if self._is_duck_db or not drv.is_derived() else False
369
+ aliases = self._get_update_aliases(u, var_to_construct, var_to_union)
370
+
371
+ if not unions:
372
+ select = self._make_select(all_lookups, aliases, nots, unions, constructs, distinct)
373
+ elif lookups:
374
+ select = self._make_match_select(all_lookups, aliases, unions, nots, constructs, distinct)
375
+ else:
376
+ select = self._make_full_outer_join_select(aliases, unions, constructs, distinct)
377
+
378
+ if drv.is_derived() and not self._is_duck_db:
379
+ self.add_dynamic_table_select(r, select)
380
+ else:
381
+ self.add_view_select(r, select)
232
382
  elif outputs:
233
383
  # output a query: SELECT ... FROM ... WHERE ...
234
384
  aliases = []
@@ -236,7 +386,7 @@ class ModelToSQL:
236
386
  for output in outputs:
237
387
  distinct = distinct or output.keys is None
238
388
  for key, arg in output.aliases:
239
- aliases.append(self._get_alias(key, arg, var_to_construct, var_to_union))
389
+ aliases.append(self._get_alias(key, arg, None, var_to_construct, var_to_union))
240
390
 
241
391
  if not unions:
242
392
  if all(builtins.is_builtin(lookup.relation) for lookup in all_lookups):
@@ -331,7 +481,7 @@ class ModelToSQL:
331
481
 
332
482
  seen_args = set()
333
483
  outputs: list[Union[sql.VarRef, sql.RowNumberVar, int]] = []
334
- sub_query_outputs: list[Tuple[Optional[str], ir.Value]|Tuple[Optional[str], ir.Value, ir.Task]] = []
484
+ sub_query_outputs: list[OutputVar] = []
335
485
 
336
486
  relation = update.relation
337
487
  agg_var = agg.args[0] if agg.aggregation == builtins.count else agg.args[1]
@@ -340,22 +490,23 @@ class ModelToSQL:
340
490
 
341
491
  for i, arg in enumerate(update.args):
342
492
  if arg not in seen_args:
343
- field_name = self._var_name(relation.id, relation.fields[i])
493
+ relation_field = relation.fields[i]
494
+ field_type = self._convert_type(relation_field.type)
495
+ field_name = self._var_name(relation.id, relation_field)
344
496
  if isinstance(arg, ir.Var) and arg == agg_var:
345
- outputs.append(sql.VarRef(f"{agg.aggregation.name}({field_name})", alias=field_name))
346
- sub_query_outputs.append((field_name, arg, agg))
497
+ outputs.append(sql.VarRef(f"{agg.aggregation.name}({field_name})", alias=field_name, type=field_type))
498
+ sub_query_outputs.append(OutputVar(arg, field_name, task=agg))
347
499
  else:
348
- var_ref = sql.VarRef(field_name)
349
- group_by.append(var_ref)
350
- outputs.append(var_ref)
351
- sub_query_outputs.append((field_name, arg))
500
+ group_by.append(sql.VarRef(field_name))
501
+ outputs.append(sql.VarRef(field_name, alias=field_name, type=field_type))
502
+ sub_query_outputs.append(OutputVar(arg, field_name))
352
503
  seen_args.add(arg)
353
504
 
354
505
  for arg in agg.projection:
355
506
  if arg not in seen_args:
356
507
  if agg.aggregation == builtins.count and arg == agg.projection[-1]:
357
508
  continue
358
- sub_query_outputs.append((None, arg))
509
+ sub_query_outputs.append(OutputVar(value=arg))
359
510
  seen_args.add(arg)
360
511
 
361
512
  sub_select = self._make_select(lookups, sub_query_outputs, nots, unions, constructs, True)
@@ -379,7 +530,7 @@ class ModelToSQL:
379
530
  Cat AS v0, cat_name AS v1
380
531
  WHERE
381
532
  v0.cat = v1.cat
382
- ) LIMIT 10;
533
+ ) ORDER BY v LIMIT 10;
383
534
 
384
535
  Rationale:
385
536
  In the IR, it’s not always explicit whether rank should be applied over distinct rows.
@@ -411,7 +562,7 @@ class ModelToSQL:
411
562
 
412
563
  seen_args = set()
413
564
  outputs: list[Union[sql.VarRef, sql.RowNumberVar, int]] = []
414
- sub_query_outputs: list[Tuple[Optional[str], ir.Value] | Tuple[Optional[str], ir.Value, ir.Task]] = []
565
+ sub_query_outputs: list[OutputVar] = []
415
566
 
416
567
  order_by_vars = []
417
568
  for arg, is_ascending in zip(rank.args, rank.arg_is_ascending):
@@ -420,30 +571,33 @@ class ModelToSQL:
420
571
 
421
572
  relation = update.relation
422
573
 
574
+ rank_result_field_name = None
423
575
  for i, arg in enumerate(update.args):
424
576
  if arg not in seen_args:
425
- field_name = self._var_name(relation.id, relation.fields[i])
577
+ relation_field = relation.fields[i]
578
+ field_type = self._convert_type(relation_field.type)
579
+ field_name = self._var_name(relation.id, relation_field)
426
580
  if isinstance(arg, ir.Var) and arg == rank.result:
427
- outputs.append(sql.RowNumberVar(order_by_vars, partition_by_vars, field_name))
581
+ rank_result_field_name = field_name
582
+ outputs.append(sql.RowNumberVar(order_by_vars, partition_by_vars, field_name, field_type))
428
583
  else:
429
- outputs.append(sql.VarRef(field_name))
430
- sub_query_outputs.append((field_name, arg))
584
+ outputs.append(sql.VarRef(field_name, alias=field_name, type=field_type))
585
+ sub_query_outputs.append(OutputVar(arg, field_name))
431
586
  seen_args.add(arg)
432
587
 
433
588
  for arg in rank.projection:
434
589
  if arg not in seen_args:
435
- sub_query_outputs.append((None, arg))
590
+ sub_query_outputs.append(OutputVar(value=arg))
436
591
  seen_args.add(arg)
437
592
 
438
593
  sub_select = self._make_select(lookups, sub_query_outputs, nots, unions, constructs, True)
439
594
 
440
- return sql.Select(False, outputs, sub_select, limit=rank.limit)
595
+ assert rank_result_field_name is not None, "Rank result variable not found in update.args."
596
+ return sql.Select(False, outputs, sub_select, order_by=[sql.VarRef(rank_result_field_name)], limit=rank.limit)
441
597
 
442
- def _make_match_select(self, lookups: list[ir.Lookup],
443
- outputs: list[Tuple[Optional[str], ir.Value] | Tuple[Optional[str], ir.Value, ir.Task]],
444
- unions: list[ir.Union], nots: Optional[list[ir.Not]] = None,
445
- constructs: Optional[list[ir.Construct]] = None, distinct: bool = False,
446
- is_output: bool = False):
598
+ def _make_match_select(self, lookups: list[ir.Lookup], outputs: list[OutputVar], unions: list[ir.Union],
599
+ nots: Optional[list[ir.Not]] = None, constructs: Optional[list[ir.Construct]] = None,
600
+ distinct: bool = False, is_output: bool = False):
447
601
 
448
602
  """
449
603
  Generate a SQL SELECT statement representing a match operation.
@@ -480,8 +634,10 @@ class ModelToSQL:
480
634
  froms, joins, wheres, sql_vars, var_column, var_lookups = self._extract_match_lookups_metadata(table_lookups, union_lookups)
481
635
 
482
636
  builtin_lookups = OrderedSet.from_iterable(t for t in lookups if builtins.is_builtin(t.relation))
483
- builtin_vars, builtin_wheres = self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars,
484
- var_to_construct, outputs)
637
+ builtin_vars, builtin_wheres, builtin_table_expressions = (
638
+ self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
639
+
640
+ froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
485
641
 
486
642
  wheres.extend(builtin_wheres)
487
643
 
@@ -502,10 +658,9 @@ class ModelToSQL:
502
658
 
503
659
  return sql.Select(distinct, vars, froms, where, joins, is_output=is_output)
504
660
 
505
- def _make_full_outer_join_select(self,
506
- outputs: list[Tuple[Optional[str], ir.Value] | Tuple[Optional[str], ir.Value, ir.Task]],
507
- unions: list[ir.Union], constructs: Optional[list[ir.Construct]] = None,
508
- distinct: bool = False, is_output: bool = False):
661
+ def _make_full_outer_join_select(self, outputs: list[OutputVar], unions: list[ir.Union],
662
+ constructs: Optional[list[ir.Construct]] = None, distinct: bool = False,
663
+ is_output: bool = False):
509
664
 
510
665
  """
511
666
  Generate a SQL SELECT statement representing a match operation that combines multiple sets of data
@@ -563,8 +718,7 @@ class ModelToSQL:
563
718
 
564
719
  return sql.Select(distinct, vars, froms, where, joins, is_output=is_output)
565
720
 
566
- def _make_left_outer_join_select(self, task: ir.Logical, lookups: list[ir.Lookup],
567
- outputs: list[Tuple[Optional[str], ir.Value] | Tuple[Optional[str], ir.Value, ir.Task]],
721
+ def _make_left_outer_join_select(self, task: ir.Logical, lookups: list[ir.Lookup], outputs: list[OutputVar],
568
722
  nots: Optional[list[ir.Not]] = None, constructs: Optional[list[ir.Construct]] = None,
569
723
  distinct: bool = False) -> sql.Select:
570
724
 
@@ -661,8 +815,14 @@ class ModelToSQL:
661
815
  self._extract_left_outer_joins_lookups_metadata(task, table_lookups, nots))
662
816
 
663
817
  builtin_lookups = OrderedSet.from_iterable(t for t in lookups if builtins.is_builtin(t.relation))
664
- builtin_vars, builtin_wheres = self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars,
665
- var_to_construct, outputs)
818
+ builtin_vars, builtin_wheres, builtin_table_expressions = (
819
+ self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
820
+
821
+ # SF in case of `LEFT OUTER JOIN` and `ARRAY_GENERATE_RANGE` doesn't allow usage of `ON TRUE` but
822
+ # for DuckDB this is mandatory that is why we have 2 different join classes.
823
+ make_join = (lambda e, a: sql.Join(e, a)) if self._is_duck_db else (lambda e, a: sql.JoinWithoutCondition(e, a))
824
+ joins.extend(make_join(expr, alias) for alias, expr in builtin_table_expressions.items())
825
+
666
826
  wheres.extend(builtin_wheres)
667
827
 
668
828
  construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
@@ -678,10 +838,9 @@ class ModelToSQL:
678
838
 
679
839
  return sql.Select(distinct, vars, froms, where, joins, is_output=True)
680
840
 
681
- def _make_select(self, lookups: list[ir.Lookup],
682
- outputs: list[Tuple[Optional[str], ir.Value]|Tuple[Optional[str], ir.Value, ir.Task]],
683
- nots: Optional[list[ir.Not]] = None, unions: Optional[list[ir.Union]] = None,
684
- constructs: Optional[list[ir.Construct]] = None, distinct: bool = False, is_output: bool = False) -> sql.Select:
841
+ def _make_select(self, lookups: list[ir.Lookup], outputs: list[OutputVar], nots: Optional[list[ir.Not]] = None,
842
+ unions: Optional[list[ir.Union]] = None, constructs: Optional[list[ir.Construct]] = None,
843
+ distinct: bool = False, is_output: bool = False) -> sql.Select:
685
844
 
686
845
  var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
687
846
 
@@ -692,8 +851,11 @@ class ModelToSQL:
692
851
  froms, wheres, sql_vars, var_column, var_lookups = self._extract_lookups_metadata(table_lookups)
693
852
 
694
853
  builtin_lookups = OrderedSet.from_iterable(t for t in all_lookups if builtins.is_builtin(t.relation))
695
- builtin_vars, builtin_wheres = self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars,
696
- var_to_construct, outputs)
854
+ builtin_vars, builtin_wheres, builtin_table_expressions = (
855
+ self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
856
+
857
+ froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
858
+
697
859
  wheres.extend(builtin_wheres)
698
860
 
699
861
  construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
@@ -991,14 +1153,19 @@ class ModelToSQL:
991
1153
  """Generate hash expression like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME)."""
992
1154
  elements = []
993
1155
  for val in values:
994
- val = resolve_builtin_var(val)
995
- if isinstance(val, ir.Var):
996
- if val in var_to_construct:
997
- elements.append(self._resolve_construct_var(reference, resolve_builtin_var, var_to_construct, var_to_construct[val]))
1156
+ resolved_val = resolve_builtin_var(val)
1157
+ if val != resolved_val and isinstance(resolved_val, str):
1158
+ # In case we parsed builtin into some expression, we may add it as an element.
1159
+ # For example, `TO_DATE('1990-1-1', 'Y-m-d')` or `(v1.value + 5)`.
1160
+ elements.append(f"{resolved_val}")
1161
+ continue
1162
+ if isinstance(resolved_val, ir.Var):
1163
+ if resolved_val in var_to_construct:
1164
+ elements.append(self._resolve_construct_var(reference, resolve_builtin_var, var_to_construct, var_to_construct[resolved_val]))
998
1165
  else:
999
- elements.append(reference(val))
1166
+ elements.append(reference(resolved_val))
1000
1167
  else:
1001
- elements.append(self._convert_value(val, True))
1168
+ elements.append(str(self._convert_value(resolved_val)))
1002
1169
  return f"hash({', '.join(elements)})"
1003
1170
 
1004
1171
  def _resolve_construct_var(self, reference, resolve_builtin_var, var_to_construct, construct: ir.Construct):
@@ -1012,17 +1179,20 @@ class ModelToSQL:
1012
1179
  def _resolve_builtins(self, builtin_lookups: OrderedSet[ir.Lookup], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
1013
1180
  var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
1014
1181
  var_to_construct: dict[ir.Var, ir.Construct],
1015
- outputs: Optional[list[Tuple[Optional[str], ir.Value]|Tuple[Optional[str], ir.Value, ir.Task]]] = None):
1182
+ outputs: Optional[list[OutputVar]] = None):
1016
1183
 
1017
1184
  wheres: list[sql.Expr] = []
1185
+ # We need to maintain a mapping of these builtin expressions because they generate a new table, which must be
1186
+ # referenced in the FROM clause as part of a JOIN. Structure is `SQL table variable` -> `generated expression`
1187
+ table_expressions: dict[str, str] = {}
1018
1188
  builtin_vars: dict[ir.Var, ir.Value|str|int] = {}
1019
1189
  # TODO: remove this when we introduce date periods in builtins
1020
1190
  date_period_var_type: dict[ir.Var, str] = {}
1021
1191
 
1022
1192
  output_vars = {
1023
- output[1]
1193
+ output.value
1024
1194
  for output in outputs or []
1025
- if isinstance(output[1], ir.Var)
1195
+ if isinstance(output.value, ir.Var)
1026
1196
  }
1027
1197
 
1028
1198
  intermediate_builtin_vars: set[ir.Var] = {
@@ -1073,6 +1243,46 @@ class ModelToSQL:
1073
1243
  idx = self._var_to_expr(idx_raw, reference, resolve_builtin_var, var_to_construct)
1074
1244
  assert isinstance(output, ir.Var)
1075
1245
  builtin_vars[output] = f"split_part({s}, {separator}, {idx})"
1246
+ elif relation == builtins.split:
1247
+ assert len(args) == 4, f"Expected 4 args for `split`, got {len(args)}: {args}"
1248
+ separator_raw, value_raw, index, part = args
1249
+ value = self._var_to_expr(value_raw, reference, resolve_builtin_var, var_to_construct)
1250
+ separator = self._var_to_expr(separator_raw, reference, resolve_builtin_var, var_to_construct)
1251
+ table_sql_var = f"v{len(sql_vars)}"
1252
+ sql_vars[lookup] = table_sql_var
1253
+ if self._is_duck_db:
1254
+ table_alias = f"{table_sql_var}(data)"
1255
+ table_expressions[table_alias] = f"VALUES(string_split({value}, {separator}))"
1256
+
1257
+ part_expr = f"unnest({table_sql_var}.data)"
1258
+ index_expr = f"generate_subscripts({table_sql_var}.data, 1)"
1259
+ else:
1260
+ table_expressions[table_sql_var] = f"LATERAL FLATTEN(input => SPLIT({value}, {separator}))"
1261
+
1262
+ # SF returns values in `""` and to avoid this, we need to cast it to `TEXT` type
1263
+ part_expr = f"cast({table_sql_var}.value as TEXT)"
1264
+ index_expr = f"({table_sql_var}.index + 1)" # SF is 0-based internally, adjust to it back
1265
+ assert isinstance(index, ir.Var) and isinstance(part, ir.Var), "Third and fourth arguments (index, part) must be variables"
1266
+ builtin_vars[part] = part_expr
1267
+ builtin_vars[index] = index_expr
1268
+ elif relation == builtins.range:
1269
+ assert len(args) == 4, f"Expected 4 args for `range`, got {len(args)}: {args}"
1270
+ start_raw, stop_raw, step_raw, result = args
1271
+ start = self._var_to_expr(start_raw, reference, resolve_builtin_var, var_to_construct)
1272
+ stop = self._var_to_expr(stop_raw, reference, resolve_builtin_var, var_to_construct)
1273
+ step = self._var_to_expr(step_raw, reference, resolve_builtin_var, var_to_construct)
1274
+ table_sql_var = f"v{len(sql_vars)}"
1275
+ sql_vars[lookup] = table_sql_var
1276
+ # In SQL range is 1...stop exclusive, and because we did `-1` in PyRel v1 we need to return it here
1277
+ if self._is_duck_db:
1278
+ table_expr = f"LATERAL range(cast({start} as bigint), cast(({stop} + 1) as bigint), cast({step} as bigint))"
1279
+ expr = f"{table_sql_var}.range"
1280
+ else:
1281
+ table_expr = f"LATERAL FLATTEN(input => ARRAY_GENERATE_RANGE({start}, ({stop} + 1), {step}))"
1282
+ expr = f"{table_sql_var}.value"
1283
+ table_expressions[table_sql_var] = table_expr
1284
+ assert isinstance(result, ir.Var), "Fourth argument (result) must be a variable"
1285
+ builtin_vars[result] = f"{expr}"
1076
1286
  elif relation == builtins.cast:
1077
1287
  assert len(args) == 3, f"Expected 3 args for `cast`, got {len(args)}: {args}"
1078
1288
 
@@ -1080,38 +1290,50 @@ class ModelToSQL:
1080
1290
  assert isinstance(result, ir.Var), "Third argument (result) must be a variable"
1081
1291
 
1082
1292
  builtin_vars[result] = original_raw
1083
- elif relation in (builtins.isnan, builtins.isinf):
1293
+ elif relation in {builtins.isnan, builtins.isinf}:
1084
1294
  arg_expr = self._var_to_expr(args[0], reference, resolve_builtin_var, var_to_construct)
1085
1295
  expr = "cast('NaN' AS DOUBLE)" if relation == builtins.isnan else "cast('Infinity' AS DOUBLE)"
1086
1296
  wheres.append(sql.Terminal(f"{arg_expr} = {expr}"))
1297
+ elif relation == builtins.construct_date:
1298
+ assert len(args) == 4, f"Expected 4 args for `construct_date`, got {len(args)}: {args}"
1299
+ year_raw, month_raw, day_raw, result = args
1300
+ year = self._var_to_expr(year_raw, reference, resolve_builtin_var, var_to_construct)
1301
+ month = self._var_to_expr(month_raw, reference, resolve_builtin_var, var_to_construct)
1302
+ day = self._var_to_expr(day_raw, reference, resolve_builtin_var, var_to_construct)
1303
+
1304
+ assert isinstance(result, ir.Var), "Fourth argument (result) must be a variable."
1305
+ if self._is_duck_db:
1306
+ expr = f"make_date(cast({year} as bigint), cast({month} as bigint), cast({day} as bigint))"
1307
+ else:
1308
+ expr = f"date_from_parts({year}, {month}, {day})"
1309
+ builtin_vars[result] = expr
1087
1310
  elif relation == builtins.construct_datetime_ms_tz:
1088
1311
  assert len(args) == 9, f"Expected 9 args for `construct_datetime_ms_tz`, got {len(args)}: {args}"
1089
1312
 
1090
- year_raw, month_raw, day_raw, hour_raw, minute_raw, second_raw, millisecond_raw, tz, result = args
1091
- tz = self._convert_value(tz, quote_strings=False)
1092
- assert isinstance(tz, str), "Timezone argument (tz) must be a string."
1313
+ year_raw, month_raw, day_raw, hour_raw, minute_raw, second_raw, millisecond_raw, tz_raw, result = args
1093
1314
  assert isinstance(result, ir.Var), "Ninth argument (result) must be a variable."
1094
1315
 
1095
1316
  year = self._var_to_expr(year_raw, reference, resolve_builtin_var, var_to_construct)
1096
1317
  month = self._var_to_expr(month_raw, reference, resolve_builtin_var, var_to_construct)
1097
1318
  day = self._var_to_expr(day_raw, reference, resolve_builtin_var, var_to_construct)
1098
- hour = self._convert_value(hour_raw)
1099
- minute = self._convert_value(minute_raw)
1100
- second = self._convert_value(second_raw)
1101
- millisecond = self._convert_value(millisecond_raw)
1319
+ hour = self._var_to_expr(hour_raw, reference, resolve_builtin_var, var_to_construct)
1320
+ minute = self._var_to_expr(minute_raw, reference, resolve_builtin_var, var_to_construct)
1321
+ second = self._var_to_expr(second_raw, reference, resolve_builtin_var, var_to_construct)
1322
+ millisecond = self._var_to_expr(millisecond_raw, reference, resolve_builtin_var, var_to_construct)
1323
+ tz = self._var_to_expr(tz_raw, reference, resolve_builtin_var, var_to_construct)
1102
1324
 
1103
1325
  if self._is_duck_db:
1104
- sub_expr = (f"make_timestamp({year}, {month}, {day}, {hour}, {minute}, "
1105
- f"{second} + {millisecond}/1000.0)")
1106
- if tz.lower() != "utc":
1107
- sub_expr = f"(({sub_expr} at time zone '{tz}') at time zone 'UTC')"
1326
+ sub_expr = (f"make_timestamp(cast({year} as bigint), cast({month} as bigint), cast({day} as bigint), "
1327
+ f"cast({hour} as bigint), cast({minute} as bigint), cast({second} as bigint) + {millisecond}/1000.0)")
1328
+ if tz.lower() != "'utc'":
1329
+ sub_expr = f"(({sub_expr} at time zone {tz}) at time zone 'UTC')"
1108
1330
  else:
1109
1331
  sub_expr = (f"to_timestamp_ntz(lpad({year}, 4, '0') || '-' || lpad({month}, 2, '0') || '-' || "
1110
1332
  f"lpad({day}, 2, '0') || ' ' || lpad({hour}, 2, '0') || ':' || "
1111
1333
  f"lpad({minute}, 2, '0') || ':' || lpad({second}, 2, '0') || '.' || "
1112
1334
  f"lpad({millisecond}, 3, '0'), 'YYYY-MM-DD HH24:MI:SS.FF3')")
1113
- if tz.lower() != "utc":
1114
- sub_expr = f"convert_timezone('{tz}', 'UTC', {sub_expr})"
1335
+ if tz.lower() != "'utc'":
1336
+ sub_expr = f"convert_timezone({tz}, 'UTC', {sub_expr})"
1115
1337
  builtin_vars[result] = f"cast({sub_expr} as DATETIME)"
1116
1338
  else:
1117
1339
  # Assuming infix binary or ternary operators here
@@ -1150,6 +1372,12 @@ class ModelToSQL:
1150
1372
  function = "levenshtein" if self._is_duck_db else "editdistance"
1151
1373
  assert isinstance(args[2], ir.Var)
1152
1374
  builtin_vars[args[2]] = f"{function}({left}, {right})"
1375
+ elif relation == builtins.concat:
1376
+ assert len(args) == 3, f"Expected 3 args for `concat`, got {len(args)}: {args}"
1377
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1378
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1379
+ assert isinstance(args[2], ir.Var)
1380
+ builtin_vars[args[2]] = f"concat({left}, {right})"
1153
1381
  elif relation == builtins.join:
1154
1382
  assert len(args) == 3, f"Expected 3 args for `join`, got {len(args)}: {args}"
1155
1383
  assert isinstance(lhs, tuple)
@@ -1175,12 +1403,32 @@ class ModelToSQL:
1175
1403
  elif relation == builtins.parse_float and isinstance(rhs, ir.Var):
1176
1404
  left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1177
1405
  builtin_vars[rhs] = f"cast({left} AS DOUBLE)"
1406
+ elif relation == builtins.parse_date:
1407
+ if self._is_duck_db:
1408
+ raise Exception("DuckDB: unsupported builtin relation 'parse_date'.")
1409
+ assert len(args) == 3, f"Expected 3 args for `parse_date`, got {len(args)}: {args}"
1410
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1411
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1412
+ assert isinstance(args[2], ir.Var)
1413
+ builtin_vars[args[2]] = f"to_date({left}, {right})"
1414
+ elif relation == builtins.parse_datetime:
1415
+ assert len(args) == 3, f"Expected 3 args for `parse_datetime`, got {len(args)}: {args}"
1416
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1417
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1418
+ sub_expr = left
1419
+ if 'z' in right: # this means that out datetime formatter includes timezone, and we need to convert first.
1420
+ if self._is_duck_db:
1421
+ sub_expr = f"({left} AT TIME ZONE 'UTC')"
1422
+ else:
1423
+ sub_expr = f"convert_timezone('UTC', to_timestamp_tz({left}))"
1424
+ assert isinstance(args[2], ir.Var)
1425
+ builtin_vars[args[2]] = f"cast({sub_expr} as DATETIME)"
1178
1426
  elif relation in builtins.date_periods and isinstance(rhs, ir.Var):
1179
1427
  builtin_vars[rhs] = lhs
1180
1428
  date_period_var_type[rhs] = relation.name
1181
1429
  elif relation in builtins.date_builtins:
1182
- if relation in (builtins.date_add, builtins.date_subtract, builtins.datetime_add,
1183
- builtins.datetime_subtract):
1430
+ if relation in {builtins.date_add, builtins.date_subtract, builtins.datetime_add,
1431
+ builtins.datetime_subtract}:
1184
1432
  assert len(args) == 3, f"Expected 3 args for {relation}, got {len(args)}: {args}"
1185
1433
  assert isinstance(rhs, ir.Var), f"Period variable must be `ir.Var`, got: {rhs}"
1186
1434
  period = date_period_var_type[rhs]
@@ -1189,10 +1437,10 @@ class ModelToSQL:
1189
1437
  left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1190
1438
 
1191
1439
  if self._is_duck_db:
1192
- op = "+" if relation in (builtins.date_add, builtins.datetime_add) else "-"
1193
- expr = f"({left} {op} interval {period_val} {period})"
1440
+ op = "+" if relation in {builtins.date_add, builtins.datetime_add} else "-"
1441
+ expr = f"({left} {op} {period_val} * interval 1 {period})"
1194
1442
  else:
1195
- sign = 1 if relation in (builtins.date_add, builtins.datetime_add) else -1
1443
+ sign = 1 if relation in {builtins.date_add, builtins.datetime_add} else -1
1196
1444
  expr = f"dateadd({period}, ({sign} * {period_val}), {left})"
1197
1445
 
1198
1446
  result_var = args[2]
@@ -1206,9 +1454,13 @@ class ModelToSQL:
1206
1454
  assert isinstance(rhs, ir.Var), f"Resulting variable must be `ir.Var`, got: {rhs}"
1207
1455
  expr_map = {
1208
1456
  builtins.date_year: "year",
1457
+ builtins.date_quarter: "quarter",
1209
1458
  builtins.date_month: "month",
1210
1459
  builtins.date_week: "week",
1211
- builtins.date_day: "day"
1460
+ builtins.date_day: "day",
1461
+ builtins.date_dayofyear: "dayofyear",
1462
+ builtins.date_weekday: "isodow" if self._is_duck_db else "dayofweekiso",
1463
+ builtins.datetime_second: "second",
1212
1464
  }
1213
1465
  expr = expr_map.get(relation)
1214
1466
  lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
@@ -1217,19 +1469,34 @@ class ModelToSQL:
1217
1469
  result_var = args[2]
1218
1470
  assert isinstance(result_var, ir.Var), f"Resulting variable must be `ir.Var`, got: {result_var}"
1219
1471
  expr_map = {
1472
+ builtins.datetime_year: "year",
1473
+ builtins.datetime_quarter: "quarter",
1474
+ builtins.datetime_month: "month",
1220
1475
  builtins.datetime_week: "week",
1476
+ builtins.datetime_day: "day",
1477
+ builtins.datetime_dayofyear: "dayofyear",
1478
+ builtins.datetime_hour: "hour",
1479
+ builtins.datetime_minute: "minute",
1480
+ builtins.datetime_weekday: "isodow" if self._is_duck_db else "dayofweekiso",
1481
+ builtins.dates_period_days: "date_diff" if self._is_duck_db else "datediff",
1482
+ builtins.datetimes_period_milliseconds: "date_diff" if self._is_duck_db else "datediff"
1221
1483
  }
1222
1484
  expr = expr_map.get(relation)
1223
1485
  lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1224
- rhs = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct, quote_strings=False)
1225
- sub_expr = self._convert_timezone(lhs, rhs)
1486
+ rhs = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1487
+ if relation == builtins.dates_period_days:
1488
+ sub_expr = f"'day', {lhs}, {rhs}" if self._is_duck_db else f"day, {lhs}, {rhs}"
1489
+ elif relation == builtins.datetimes_period_milliseconds:
1490
+ sub_expr = f"'millisecond', {lhs}, {rhs}" if self._is_duck_db else f"millisecond, {lhs}, {rhs}"
1491
+ else:
1492
+ sub_expr = self._convert_timezone(lhs, rhs)
1226
1493
  builtin_vars[result_var] = f"{expr}({sub_expr})"
1227
1494
  else:
1228
1495
  raise NotImplementedError("Unsupported number of arguments for date builtin (3+).")
1229
1496
  elif relation == builtins.construct_date_from_datetime:
1230
- assert len(args) == 3, f"Expected 3 args for `construct_date`, got {len(args)}: {args}"
1497
+ assert len(args) == 3, f"Expected 3 args for `construct_date_from_datetime`, got {len(args)}: {args}"
1231
1498
  dt_raw, tz, result = args
1232
- tz = self._convert_value(tz, quote_strings=False)
1499
+ tz = self._convert_value(tz)
1233
1500
 
1234
1501
  assert isinstance(tz, str), "Timezone argument (tz) must be a string."
1235
1502
  assert isinstance(result, ir.Var), "Third argument (result) must be a variable."
@@ -1253,9 +1520,14 @@ class ModelToSQL:
1253
1520
  if rel_name == builtins.factorial.name and self._is_duck_db:
1254
1521
  # Factorial requires an integer operand in DuckDB
1255
1522
  sub_expr = f"{left}::INTEGER"
1523
+ elif rel_name == builtins.log10.name:
1524
+ # log10 is not supported, so we use log with base 10
1525
+ sub_expr = f"10, {left}"
1526
+ method = "log"
1256
1527
  expr = f"{method}({sub_expr})"
1257
- elif rel_name in (builtins.minimum.name, builtins.maximum.name, builtins.trunc_div.name,
1258
- builtins.power.name, builtins.mod.name, builtins.pow.name):
1528
+ elif rel_name in {builtins.minimum.name, builtins.maximum.name, builtins.trunc_div.name,
1529
+ builtins.power.name, builtins.mod.name, builtins.pow.name,
1530
+ builtins.log.name}:
1259
1531
  assert len(args) == 3, f"Expected 3 args for {relation}, got {len(args)}: {args}"
1260
1532
 
1261
1533
  result_var = args[2]
@@ -1269,6 +1541,8 @@ class ModelToSQL:
1269
1541
  expr = f"trunc({left} / {right})"
1270
1542
  elif rel_name == builtins.power.name or rel_name == builtins.pow.name:
1271
1543
  expr = f"power({left}, {right})"
1544
+ elif rel_name == builtins.log.name:
1545
+ expr = f"log({left}, {right})"
1272
1546
  else:
1273
1547
  expr = f"mod({left}, {right})"
1274
1548
  else:
@@ -1278,15 +1552,11 @@ class ModelToSQL:
1278
1552
  f"but got `{type(result_var).__name__}`: {result_var}"
1279
1553
  )
1280
1554
  builtin_vars[result_var] = expr
1281
- elif relation in (builtins.parse_int64, builtins.parse_int128) and isinstance(rhs, ir.Var):
1555
+ elif relation in {builtins.parse_int64, builtins.parse_int128} and isinstance(rhs, ir.Var):
1282
1556
  builtin_vars[rhs] = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct, False)
1283
1557
  elif helpers.is_from_cast(lookup) and isinstance(rhs, ir.Var):
1284
1558
  # For the `from cast` relations we keep the raw var, and we will ground it later.
1285
1559
  builtin_vars[rhs] = lhs
1286
- elif isinstance(lhs, ir.Var) and lhs in output_vars & intermediate_builtin_vars:
1287
- builtin_vars[lhs] = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1288
- elif isinstance(rhs, ir.Var) and rhs in output_vars & intermediate_builtin_vars:
1289
- builtin_vars[rhs] = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1290
1560
  elif isinstance(lhs, ir.Var) and lhs in intermediate_builtin_vars and lhs not in (builtin_vars | var_to_construct):
1291
1561
  # Example IR:
1292
1562
  # Logical
@@ -1314,26 +1584,13 @@ class ModelToSQL:
1314
1584
  out_var = args[2]
1315
1585
  if isinstance(out_var, ir.Var):
1316
1586
  out_var = resolve_builtin_var(out_var)
1317
- if relation == builtins.concat:
1318
- expr = f"{relation_name}({left}, {right})"
1319
- elif relation == builtins.parse_date:
1320
- expr = f"cast({left} as DATE)"
1321
- elif relation == builtins.parse_datetime:
1322
- sub_expr = left
1323
- if 'z' in right: # this means that out datetime formatter includes timezone and we need to convert first.
1324
- if self._is_duck_db:
1325
- sub_expr = f"({left} AT TIME ZONE 'UTC')"
1326
- else:
1327
- sub_expr = f"convert_timezone('UTC', to_timestamp_tz({left}))"
1328
- expr = f"cast({sub_expr} as DATETIME)"
1329
- else:
1330
- expr = f"({left} {relation_name} {right})"
1587
+ expr = f"({left} {relation_name} {right})"
1331
1588
  if isinstance(out_var, ir.Var):
1332
1589
  # For example, when this is an intermediate result
1333
1590
  # example: c = a - b in the IR is (a - b = d) and (d = c)
1334
1591
  builtin_vars[out_var] = expr
1335
1592
  else:
1336
- # This means that var was already grounded and we can add WHERE clause.
1593
+ # This means that var was already grounded, and we can add a WHERE clause.
1337
1594
  wheres.append(sql.Terminal(f"{expr} = {out_var}"))
1338
1595
  else:
1339
1596
  raise Exception(
@@ -1363,16 +1620,23 @@ class ModelToSQL:
1363
1620
  rhs = str(rhs_ref)
1364
1621
  wheres.append(sql.Terminal(f"{ref} = {rhs}"))
1365
1622
 
1366
- return builtin_vars, wheres
1623
+ return builtin_vars, wheres, table_expressions
1367
1624
 
1368
1625
  def _convert_timezone(self, dt: str, tz: str) -> str:
1369
- if tz.lower() != "utc":
1626
+ if tz.lower() != "'utc'":
1370
1627
  if self._is_duck_db:
1371
- return f"({dt} at time zone 'UTC') at time zone '{tz}'"
1628
+ return f"({dt} at time zone 'UTC') at time zone {tz}"
1372
1629
  else:
1373
- return f"convert_timezone('UTC', '{tz}', {dt})"
1630
+ return f"convert_timezone('UTC', {tz}, {dt})"
1374
1631
  return dt
1375
1632
 
1633
+ def _process_builtin_table_expressions(self, builtin_table_expressions: dict[str, str]):
1634
+ """Convert builtin table expressions into SQL FROM clauses."""
1635
+ return [
1636
+ sql.From(expr, alias)
1637
+ for alias, expr in builtin_table_expressions.items()
1638
+ ]
1639
+
1376
1640
  def _process_constructs(self, lookups: OrderedSet[ir.Lookup], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
1377
1641
  var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
1378
1642
  builtin_vars: dict[ir.Var, ir.Value|str|int], var_to_construct: dict[ir.Var, ir.Construct]) -> list[sql.Expr]:
@@ -1485,21 +1749,20 @@ class ModelToSQL:
1485
1749
  where = sql.Where(sql.And(wheres))
1486
1750
  return where
1487
1751
 
1488
- def _generate_select_output(self, outputs: list[Tuple[Optional[str], ir.Value]|Tuple[Optional[str], ir.Value, ir.Task]],
1489
- builtin_vars: dict[ir.Var, ir.Value|str|int], sql_vars: dict[ir.Lookup, str],
1490
- var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field],
1752
+ def _generate_select_output(self, outputs: list[OutputVar], builtin_vars: dict[ir.Var, ir.Value|str|int],
1753
+ sql_vars: dict[ir.Lookup, str], var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field],
1491
1754
  var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
1492
1755
  var_to_construct: dict[ir.Var, ir.Construct]):
1493
1756
 
1494
1757
  reference = partial(self._var_reference, var_lookups, sql_vars, var_column)
1495
1758
  resolve_builtin_var = partial(self._resolve_builtin_var, builtin_vars)
1496
1759
 
1497
- def handle_lookup_var(var):
1760
+ def handle_lookup_var(var, var_type, alias):
1498
1761
  lookup = var_lookups[var].some()
1499
1762
  relation = lookup.relation
1500
1763
  var_name = sql_vars[lookup]
1501
1764
  column_name = self._var_name(relation.id, var_column[var, lookup])
1502
- vars.append(sql.VarRef(var_name, column_name, alias))
1765
+ vars.append(sql.VarRef(var_name, column_name, alias, var_type))
1503
1766
  if from_cdc_annotation in relation.annotations:
1504
1767
  not_null_vars.add(f"{var_name}.{column_name}")
1505
1768
 
@@ -1509,6 +1772,11 @@ class ModelToSQL:
1509
1772
  for val in construct.values:
1510
1773
  if val in builtin_vars:
1511
1774
  val = resolve_builtin_var(val)
1775
+ if isinstance(val, str):
1776
+ # In case we parsed builtin into some expression, we may add it as an element.
1777
+ # For example, `TO_DATE('1990-1-1', 'Y-m-d')` or `(v1.value + 5)`.
1778
+ elements.append(f"{val}")
1779
+ continue
1512
1780
  if isinstance(val, ir.Var):
1513
1781
  if val in var_to_construct:
1514
1782
  elements.append(handle_construct(var_to_construct[val]))
@@ -1520,41 +1788,40 @@ class ModelToSQL:
1520
1788
  if from_cdc_annotation in lookup.relation.annotations:
1521
1789
  not_null_vars.add(lookup_var)
1522
1790
  else:
1523
- elements.append(self._convert_value(val, True))
1791
+ elements.append(str(self._convert_value(val)))
1524
1792
  return f"hash({', '.join(elements)})"
1525
1793
 
1526
1794
  # finally, compute what the select will return
1527
1795
  vars = []
1528
1796
  not_null_vars = ordered_set()
1529
1797
  for output in outputs:
1530
- alias, var = output[0], output[1]
1531
- task = output[2] if len(output) > 2 else None
1798
+ alias, var, var_type, task = output.alias, output.value, output.value_type, output.task
1532
1799
  if isinstance(var, ir.Var):
1533
1800
  if var in var_lookups and not task:
1534
- handle_lookup_var(var)
1801
+ handle_lookup_var(var, var_type, alias)
1535
1802
  elif var in builtin_vars:
1536
1803
  var_ref = resolve_builtin_var(var)
1537
1804
  if var_ref in var_lookups:
1538
1805
  # Case: result of `cast` variable
1539
- handle_lookup_var(var_ref)
1806
+ handle_lookup_var(var_ref, var_type, alias)
1540
1807
  elif isinstance(var_ref, ir.Literal):
1541
1808
  # Case: literal value from `cast` relation, e.g. `decimal(0)`
1542
- vars.append(sql.VarRef(str(self._convert_value(var_ref.value)), alias=alias))
1809
+ vars.append(sql.VarRef(str(self._convert_value(var_ref.value)), alias=alias, type=var_type))
1543
1810
  else:
1544
1811
  # Example: We may have `decimal(0)` in QB which turns in IR into:
1545
1812
  # (cast(Decimal128, 0, vDecimal128) and decimal128(vDecimal128, res_3))
1546
1813
  # and we need to make it `0` in SQL.
1547
1814
  var_ref = var_ref.name if isinstance(var_ref, ir.Var) else str(var_ref)
1548
- vars.append(sql.VarRef(var_ref, alias=alias))
1815
+ vars.append(sql.VarRef(var_ref, alias=alias, type=var_type))
1549
1816
  elif task:
1550
1817
  if isinstance(task, ir.Construct):
1551
1818
  # Generate constructions like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME) as `alias`
1552
- vars.append(sql.VarRef(handle_construct(task), alias=alias))
1819
+ vars.append(sql.VarRef(handle_construct(task), alias=alias, type=var_type))
1553
1820
  elif isinstance(task, ir.Aggregate):
1554
1821
  result_arg = task.projection[-1] if task.aggregation == builtins.count else task.args[0]
1555
1822
  result_arg = resolve_builtin_var(result_arg)
1556
1823
  ref = reference(result_arg) if isinstance(result_arg, ir.Var) else str(result_arg)
1557
- vars.append(sql.VarRef(str(ref), alias=alias))
1824
+ vars.append(sql.VarRef(str(ref), alias=alias, type=var_type))
1558
1825
  elif isinstance(task, ir.Union):
1559
1826
  # Handle `COALESCE` of all lookups of this var from the union
1560
1827
  lookups = self._extract_all_lookups_from_union(task)
@@ -1566,10 +1833,10 @@ class ModelToSQL:
1566
1833
  elements.append(f"{sql_vars[lu]}.{column_name}")
1567
1834
 
1568
1835
  expr = "COALESCE(" + ", ".join(elements) + ")"
1569
- vars.append(sql.VarRef(expr, alias=alias))
1836
+ vars.append(sql.VarRef(expr, alias=alias, type=var_type))
1570
1837
  else:
1571
1838
  # TODO - abusing even more here, because var is a value!
1572
- vars.append(sql.VarRef(str(self._convert_value(var)), alias=alias))
1839
+ vars.append(sql.VarRef(str(self._convert_value(var)), alias=alias, type=var_type))
1573
1840
  return not_null_vars, vars
1574
1841
 
1575
1842
  def _generate_select_nots(self, nots: Optional[list[ir.Not]], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
@@ -1615,8 +1882,11 @@ class ModelToSQL:
1615
1882
 
1616
1883
  var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
1617
1884
  builtin_lookups = OrderedSet.from_iterable(t for t in all_lookups if builtins.is_builtin(t.relation))
1618
- builtin_vars, builtin_wheres = self._resolve_builtins(builtin_lookups, all_var_lookups, all_var_column,
1619
- all_sql_vars, var_to_construct)
1885
+ builtin_vars, builtin_wheres, builtin_table_expressions = (
1886
+ self._resolve_builtins(builtin_lookups, all_var_lookups, all_var_column, all_sql_vars, var_to_construct))
1887
+
1888
+ froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
1889
+
1620
1890
  wheres.extend(builtin_wheres)
1621
1891
 
1622
1892
  construct_wheres = self._process_constructs(lookups, var_lookups, var_column, sql_vars, builtin_vars,
@@ -1728,12 +1998,24 @@ class ModelToSQL:
1728
1998
  else:
1729
1999
  return str(resolved) if isinstance(var, ir.Var) or not quote_strings else f"'{resolved}'"
1730
2000
 
1731
- def _get_alias(self, key, arg, var_to_construct, var_to_union):
2001
+ def _get_update_aliases(self, update: ir.Update, var_to_construct, var_to_union, skip_type:bool=False):
2002
+ relation = update.relation
2003
+ return [
2004
+ self._get_alias(
2005
+ self._var_name(relation.id, f),
2006
+ arg,
2007
+ self._convert_type(f.type) if not skip_type else None,
2008
+ var_to_construct,
2009
+ var_to_union,
2010
+ )
2011
+ for f, arg in zip(relation.fields, update.args)
2012
+ ]
2013
+
2014
+ def _get_alias(self, key, arg, arg_type, var_to_construct, var_to_union):
1732
2015
  if not isinstance(arg, ir.Var):
1733
- return key, arg
2016
+ return OutputVar(arg, key, arg_type)
1734
2017
 
1735
- var_task = var_to_construct.get(arg) or var_to_union.get(arg)
1736
- return (key, arg, var_task) if var_task else (key, arg)
2018
+ return OutputVar(arg, key, arg_type, var_to_construct.get(arg) or var_to_union.get(arg))
1737
2019
 
1738
2020
  def _get_tuples(self, logical: ir.Logical, u: ir.Update):
1739
2021
  """
@@ -1763,12 +2045,12 @@ class ModelToSQL:
1763
2045
  if isinstance(val, ir.Var):
1764
2046
  inner_construct = find_construct(val)
1765
2047
  if inner_construct:
1766
- nested = [str(self._convert_value(x, True)) for x in inner_construct.values]
2048
+ nested = [str(self._convert_value(x)) for x in inner_construct.values]
1767
2049
  resolved.append(f"hash({', '.join(nested)})")
1768
2050
  else:
1769
- resolved.append(self._convert_value(val, True))
2051
+ resolved.append(str(self._convert_value(val)))
1770
2052
  else:
1771
- resolved.append(self._convert_value(val, True))
2053
+ resolved.append(str(self._convert_value(val)))
1772
2054
 
1773
2055
  return f"hash({', '.join(resolved)})"
1774
2056
  elif isinstance(arg, FrozenOrderedSet):
@@ -1791,7 +2073,7 @@ class ModelToSQL:
1791
2073
  tuples = [prev + [value] for prev in tuples]
1792
2074
  return [tuple(t) for t in tuples]
1793
2075
 
1794
- def _convert_value(self, v, quote_numbers:bool=False, quote_strings:bool=True) -> str|int:
2076
+ def _convert_value(self, v, quote_strings:bool=True) -> str|int:
1795
2077
  """ Convert the literal value in v to a SQL value."""
1796
2078
  if isinstance(v, str):
1797
2079
  return f"'{v}'" if quote_strings else v
@@ -1804,7 +2086,7 @@ class ModelToSQL:
1804
2086
  return f"cast('{v.value}' as date)"
1805
2087
  if v.type == types.DateTime:
1806
2088
  return f"cast('{v.value}' as datetime)"
1807
- return self._convert_value(v.value, quote_numbers, quote_strings)
2089
+ return self._convert_value(v.value, quote_strings)
1808
2090
  if isinstance(v, float):
1809
2091
  if math.isnan(v):
1810
2092
  return "cast('NaN' as DOUBLE)"
@@ -1812,7 +2094,7 @@ class ModelToSQL:
1812
2094
  return "cast('Infinity' as DOUBLE)"
1813
2095
  elif v == float("-inf"):
1814
2096
  return "cast('-Infinity' as DOUBLE)"
1815
- return str(v) if not quote_numbers else f"'{v}'"
2097
+ return str(v)
1816
2098
  if isinstance(v, datetime.datetime):
1817
2099
  return f"cast('{v}' as datetime)"
1818
2100
  if isinstance(v, datetime.date):
@@ -1820,7 +2102,7 @@ class ModelToSQL:
1820
2102
  if isinstance(v, bool):
1821
2103
  return str(v).lower()
1822
2104
  if isinstance(v, int):
1823
- return v if not quote_numbers else f"'{v}'"
2105
+ return v
1824
2106
  return str(v)
1825
2107
 
1826
2108
  COMMON_CONVERSION = {
@@ -1861,21 +2143,27 @@ class ModelToSQL:
1861
2143
  return f"DECIMAL({base_type.precision},{base_type.scale})"
1862
2144
  raise Exception(f"Unknown built-in type: {t}")
1863
2145
 
1864
- def _get_relations(self, model: ir.Model) -> list[ir.Relation]:
2146
+ def _get_relations(self, model: ir.Model) -> Tuple[list[ir.Relation], list[ir.Relation]]:
2147
+ rw = ReadWriteVisitor()
2148
+ model.accept(rw)
2149
+
2150
+ root = cast(ir.Logical, model.root)
2151
+
2152
+ # For query compilation exclude read-only tables because we do not need to declare `CREATE TABLE` statements
2153
+ used_relations = rw.writes(root) if self._query_compilation else rw.writes(root) | rw.reads(root)
2154
+
1865
2155
  # Filter only relations that require table creation
1866
- relations = [
1867
- r for r in model.relations
2156
+ table_relations = [
2157
+ r for r in used_relations
1868
2158
  if self._is_table_creation_required(r)
1869
2159
  ]
1870
2160
 
1871
- # Optionally exclude read-only tables
1872
- if self._query_compilation:
1873
- rw = ReadWriteVisitor()
1874
- model.accept(rw)
1875
- writable = rw.writes(cast(ir.Logical, model.root))
1876
- relations = [r for r in relations if r in writable]
2161
+ used_builtins = [
2162
+ r for r in rw.reads(root)
2163
+ if builtins.is_builtin(r)
2164
+ ]
1877
2165
 
1878
- return relations
2166
+ return table_relations, used_builtins
1879
2167
 
1880
2168
  def _is_table_creation_required(self, r: ir.Relation) -> bool:
1881
2169
  """
@@ -1932,9 +2220,26 @@ class ModelToSQL:
1932
2220
  if helpers.is_external(r):
1933
2221
  self.relation_name_cache.get_name(r.id, r.name)
1934
2222
 
2223
+ def _get_relation_info(self, relation: ir.Relation) -> RelationInfo:
2224
+ if relation not in self.relation_infos:
2225
+ self.relation_infos[relation] = RelationInfo()
2226
+ return self.relation_infos[relation]
2227
+
2228
+ def mark_used(self, relation: ir.Relation):
2229
+ self._get_relation_info(relation).used = True
2230
+
2231
+ def add_table_select(self, relation: ir.Relation, select: sql.Select):
2232
+ self._get_relation_info(relation).table_selects.append(select)
2233
+
2234
+ def add_view_select(self, relation: ir.Relation, select: sql.Select):
2235
+ self._get_relation_info(relation).view_selects.append(select)
2236
+
2237
+ def add_dynamic_table_select(self, relation: ir.Relation, select: sql.Select):
2238
+ self._get_relation_info(relation).dynamic_table_selects.append(select)
2239
+
1935
2240
  def _var_name(self, relation_id: int, arg: Union[ir.Var, ir.Field]):
1936
2241
  name = helpers.sanitize(self.relation_arg_name_cache.get_name((relation_id, arg.id), arg.name))
1937
- return f'"{name}"' if name.lower() in ("any", "order") else name
2242
+ return f'"{name}"' if name.lower() in {"any", "order"} else name
1938
2243
 
1939
2244
  def _register_relation_args(self, relations: list[ir.Relation]):
1940
2245
  """
@@ -1960,11 +2265,13 @@ class ModelToSQL:
1960
2265
  def _build_builtin_lookups_dependencies(lookups: list[ir.Lookup]) -> list[Tuple[ir.Lookup, ir.Lookup]]:
1961
2266
  """
1962
2267
  Builds dependency edges for topological_sort:
1963
- 1. A lookup whose last argument is used non-terminally in another must come first.
1964
- 2. Terminal comparisons (neq, gt, lt, gte, lte) come last.
1965
- 3. Conditionals (starts_with, contains, etc.) come after basic lookups but before terminals.
1966
- 4. eq with only constants comes first.
1967
- 5. eq with two vars must wait until one of them is grounded.
2268
+ 1. Terminal comparisons (neq, gt, lt, gte, lte) come last.
2269
+ 2. Conditionals (starts_with, contains, etc.) come after basic lookups but before terminals.
2270
+ 3. eq with only constants comes first.
2271
+ 4. eq with two vars must wait until one of them is grounded.
2272
+ 5. A lookup whose last argument is used non-terminally in another must come first.
2273
+ 6. For builtins that take multiple input arguments (like range, concat, substring, etc.),
2274
+ ensure that all non-terminal arguments are processed before the builtin that consumes them.
1968
2275
  """
1969
2276
 
1970
2277
  edges = []
@@ -1988,7 +2295,7 @@ class ModelToSQL:
1988
2295
  relation_name = lookup.relation.name
1989
2296
  args = lookup.args
1990
2297
 
1991
- # Rule 2: Terminal relations depend on everything else
2298
+ # Rule 1: Terminal relations depend on everything else
1992
2299
  if relation_name in terminal_relations:
1993
2300
  for other in lookups:
1994
2301
  other_name = other.relation.name
@@ -1996,7 +2303,7 @@ class ModelToSQL:
1996
2303
  edges.append((other, lookup))
1997
2304
  continue # skip rest of rules for terminal lookups
1998
2305
 
1999
- # Rule 3: Conditional relations go before terminals, but after others
2306
+ # Rule 2: Conditional relations go before terminals, but after others
2000
2307
  if relation_name in conditional_relations:
2001
2308
  for other in lookups:
2002
2309
  if other is not lookup:
@@ -2008,31 +2315,46 @@ class ModelToSQL:
2008
2315
  if relation_name == builtins.eq.name:
2009
2316
  var_args = [arg for arg in args if isinstance(arg, ir.Var)]
2010
2317
 
2011
- # Rule 4: eq with only constants comes first
2318
+ # Rule 3: eq with only constants comes first
2012
2319
  if len(var_args) == 1:
2013
2320
  # This lookup defines a var — should come before any that use this var non-terminally
2014
2321
  grounded_var = var_args[0]
2015
- for other_lookup, pos in arg_usages[grounded_var]:
2016
- if other_lookup is not lookup:
2017
- if pos != len(other_lookup.args) - 1:
2018
- edges.append((lookup, other_lookup))
2322
+ for other, pos in arg_usages[grounded_var]:
2323
+ if other is not lookup:
2324
+ if pos != len(other.args) - 1:
2325
+ edges.append((lookup, other))
2019
2326
  continue # skip adding other edges among terminal assignments like a=2, b=2
2020
2327
 
2021
- # Rule 5: eq with two vars must wait until one of them is grounded
2328
+ # Rule 4: eq with two vars must wait until one of them is grounded
2022
2329
  elif len(var_args) == 2:
2023
2330
  # eq(x, y): both are vars — lookup must come after those grounding either var
2024
2331
  for var in var_args:
2025
- for other_lookup, pos in arg_usages[var]:
2026
- if other_lookup is not lookup:
2027
- if other_lookup.args[-1] == var:
2028
- edges.append((other_lookup, lookup))
2332
+ for other, pos in arg_usages[var]:
2333
+ if other is not lookup:
2334
+ if other.args[-1] == var:
2335
+ edges.append((other, lookup))
2029
2336
  continue
2030
2337
 
2031
- # Rule 1: last argument used elsewhere non-terminally must go first
2032
- last_arg = args[-1]
2033
- for other_lookup, pos in arg_usages.get(last_arg, []):
2034
- if other_lookup is not lookup and pos != len(other_lookup.args) - 1:
2035
- edges.append((lookup, other_lookup))
2338
+ # In generate builtins has a single output var but `split` returns `index` and `part`
2339
+ num_outputs = 2 if lookup.relation == builtins.split else 1
2340
+
2341
+ # Rule 5: last output args must go first if used elsewhere non-terminally
2342
+ for out_arg in args[-num_outputs:]:
2343
+ for other, pos in arg_usages.get(out_arg, []):
2344
+ if other is not lookup and pos != len(other.args) - 1:
2345
+ edges.append((lookup, other))
2346
+
2347
+ # Rule 6: builtins with multiple input args must wait until all input args are grounded,
2348
+ # for example, range(start, end, step, result)
2349
+ if len(args) > num_outputs:
2350
+ for input_arg in args[:-num_outputs]:
2351
+ for other, pos in arg_usages.get(input_arg, []):
2352
+ if other is not lookup:
2353
+ other_name = other.relation.name
2354
+ if other_name not in terminal_relations and other_name not in conditional_relations:
2355
+ # Ensure any lookup that defines this arg (as last) comes before
2356
+ if other.args[-1] == input_arg:
2357
+ edges.append((other, lookup))
2036
2358
 
2037
2359
  return edges
2038
2360
 
@@ -2064,8 +2386,9 @@ class ModelToSQL:
2064
2386
  3. Other statements except SELECT queries
2065
2387
  4. SELECT queries
2066
2388
  """
2389
+ udfs = []
2067
2390
  create_tables = []
2068
- need_sort: dict[str, list[Union[sql.Insert, sql.CreateView]]] = defaultdict(list)
2391
+ need_sort: dict[str, list[Union[sql.Insert, sql.CreateView, sql.CreateDynamicTable]]] = defaultdict(list)
2069
2392
  updates = []
2070
2393
  miscellaneous_statements = []
2071
2394
  selects = []
@@ -2077,19 +2400,23 @@ class ModelToSQL:
2077
2400
  need_sort[statement.table].append(statement)
2078
2401
  elif isinstance(statement, sql.CreateView):
2079
2402
  need_sort[statement.name].append(statement)
2403
+ elif isinstance(statement, sql.CreateDynamicTable):
2404
+ need_sort[statement.name].append(statement)
2080
2405
  elif isinstance(statement, sql.Update):
2081
2406
  updates.append(statement)
2082
2407
  elif isinstance(statement, sql.Select):
2083
2408
  selects.append(statement)
2409
+ elif isinstance(statement, sql.CreateFunction):
2410
+ udfs.append(statement)
2084
2411
  else:
2085
2412
  miscellaneous_statements.append(statement)
2086
2413
 
2087
2414
  sorted_statements = self._sort_statements_dependency_graph(need_sort)
2088
2415
 
2089
- return create_tables + sorted_statements + updates + miscellaneous_statements + selects
2416
+ return udfs + create_tables + sorted_statements + updates + miscellaneous_statements + selects
2090
2417
 
2091
2418
  @staticmethod
2092
- def _sort_statements_dependency_graph(statements: dict[str, list[Union[sql.Insert, sql.CreateView]]]) -> list[sql.Insert]:
2419
+ def _sort_statements_dependency_graph(statements: dict[str, list[Union[sql.Insert, sql.CreateView, sql.CreateDynamicTable]]]) -> list[sql.Insert]:
2093
2420
  """ Topologic sort INSERT and CREATE VIEW statements based on dependencies in their SELECT FROM clauses. """
2094
2421
  edges = ordered_set()
2095
2422
  nodes = OrderedSet.from_iterable(statements.keys())
@@ -2124,16 +2451,17 @@ class ModelToSQL:
2124
2451
  for sub_expr in expr.expr:
2125
2452
  _extract_from_expr(sub_expr)
2126
2453
 
2127
- if select.where:
2454
+ if select.where and select.where.expression:
2128
2455
  _extract_from_expr(select.where.expression)
2129
2456
 
2130
2457
  for target_table, table_statements in statements.items():
2131
2458
  for statement in table_statements:
2132
- if isinstance(statement, sql.Insert):
2133
- extract_dependencies(statement.select, target_table)
2134
- elif isinstance(statement, sql.CreateView):
2459
+ if statement.query:
2135
2460
  query = statement.query
2136
- if isinstance(query, sql.Select):
2461
+ if isinstance(query, list):
2462
+ for sub_query in query:
2463
+ extract_dependencies(sub_query, target_table)
2464
+ elif isinstance(query, sql.Select):
2137
2465
  extract_dependencies(query, target_table)
2138
2466
  elif isinstance(query, sql.CTE):
2139
2467
  for select in query.selects:
@@ -2158,3 +2486,13 @@ class RecursiveLookupsRewriter(v.Rewriter):
2158
2486
  if node.relation == self._recursive_relation:
2159
2487
  return node.reconstruct(node.engine, self._new_recursive_relation, node.args, node.annotations)
2160
2488
  return node
2489
+
2490
+ class DerivedRelationsVisitor(v.Visitor):
2491
+ _is_derived: bool = True
2492
+
2493
+ def is_derived(self) -> bool:
2494
+ return self._is_derived
2495
+
2496
+ def visit_relation(self, node: ir.Relation, parent: Optional[ir.Node]):
2497
+ if self._is_derived and from_cdc_annotation in node.annotations:
2498
+ self._is_derived = False