relationalai 0.11.2__py3-none-any.whl → 0.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relationalai/clients/snowflake.py +44 -15
- relationalai/clients/types.py +1 -0
- relationalai/clients/use_index_poller.py +446 -178
- relationalai/early_access/builder/std/__init__.py +1 -1
- relationalai/early_access/dsl/bindings/csv.py +4 -4
- relationalai/semantics/internal/internal.py +22 -4
- relationalai/semantics/lqp/executor.py +69 -18
- relationalai/semantics/lqp/intrinsics.py +23 -0
- relationalai/semantics/lqp/model2lqp.py +16 -6
- relationalai/semantics/lqp/passes.py +3 -4
- relationalai/semantics/lqp/primitives.py +38 -14
- relationalai/semantics/metamodel/builtins.py +152 -11
- relationalai/semantics/metamodel/factory.py +3 -2
- relationalai/semantics/metamodel/helpers.py +78 -2
- relationalai/semantics/reasoners/graph/core.py +343 -40
- relationalai/semantics/reasoners/optimization/solvers_dev.py +20 -1
- relationalai/semantics/reasoners/optimization/solvers_pb.py +24 -3
- relationalai/semantics/rel/compiler.py +5 -17
- relationalai/semantics/rel/executor.py +2 -2
- relationalai/semantics/rel/rel.py +6 -0
- relationalai/semantics/rel/rel_utils.py +37 -1
- relationalai/semantics/rel/rewrite/extract_common.py +153 -242
- relationalai/semantics/sql/compiler.py +540 -202
- relationalai/semantics/sql/executor/duck_db.py +21 -0
- relationalai/semantics/sql/executor/result_helpers.py +7 -0
- relationalai/semantics/sql/executor/snowflake.py +9 -2
- relationalai/semantics/sql/rewrite/denormalize.py +4 -6
- relationalai/semantics/sql/rewrite/recursive_union.py +23 -3
- relationalai/semantics/sql/sql.py +120 -46
- relationalai/semantics/std/__init__.py +9 -4
- relationalai/semantics/std/datetime.py +363 -0
- relationalai/semantics/std/math.py +77 -0
- relationalai/semantics/std/re.py +83 -0
- relationalai/semantics/std/strings.py +1 -1
- relationalai/tools/cli_controls.py +445 -60
- relationalai/util/format.py +78 -1
- {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/METADATA +3 -2
- {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/RECORD +41 -39
- relationalai/semantics/std/dates.py +0 -213
- {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/WHEEL +0 -0
- {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/entry_points.txt +0 -0
- {relationalai-0.11.2.dist-info → relationalai-0.11.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -52,39 +52,118 @@ class Compiler(c.Compiler):
|
|
|
52
52
|
"""
|
|
53
53
|
return str(self.model_to_sql.to_sql(model, options)), model
|
|
54
54
|
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class OutputVar:
|
|
57
|
+
value: ir.Value
|
|
58
|
+
alias: Optional[str] = None
|
|
59
|
+
value_type: Optional[str] = None
|
|
60
|
+
task: Optional[ir.Task] = None
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class RelationInfo:
|
|
64
|
+
used: bool = False
|
|
65
|
+
view_selects: list[sql.Select] = field(default_factory=list)
|
|
66
|
+
table_selects: list[sql.Select] = field(default_factory=list)
|
|
67
|
+
dynamic_table_selects: list[sql.Select] = field(default_factory=list)
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class ImportSpec:
|
|
71
|
+
value: str
|
|
72
|
+
module: Optional[str] = None # e.g., "scipy.special"
|
|
73
|
+
|
|
74
|
+
def render(self) -> str:
|
|
75
|
+
return f"from {self.module} import {self.value}" if self.module else f"import {self.value}"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class UDFConfig:
|
|
80
|
+
handler: str
|
|
81
|
+
code: str
|
|
82
|
+
imports: list[ImportSpec] = field(default_factory=list)
|
|
83
|
+
packages: list[str] = field(default_factory=list)
|
|
55
84
|
|
|
56
85
|
@dataclass
|
|
57
86
|
class ModelToSQL:
|
|
58
87
|
""" Generates SQL from an IR Model, assuming the compiler rewrites were done. """
|
|
59
88
|
|
|
60
89
|
_is_duck_db: bool = False
|
|
90
|
+
_warehouse: str = 'MAIN_WH'
|
|
61
91
|
_query_compilation: bool = False
|
|
92
|
+
_default_dynamic_table_target_lag: str = '5 minutes'
|
|
62
93
|
relation_name_cache: NameCache = field(default_factory=NameCache)
|
|
63
94
|
relation_arg_name_cache: NameCache = field(default_factory=NameCache)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
)
|
|
95
|
+
relation_infos: dict[ir.Relation, RelationInfo] = field(default_factory=dict)
|
|
96
|
+
_error_relation_names: set[str] = field(default_factory=lambda: {'Error', 'pyrel_error_attrs'})
|
|
67
97
|
|
|
68
98
|
def to_sql(self, model: ir.Model, options:dict) -> sql.Program:
|
|
99
|
+
self.relation_infos.clear()
|
|
69
100
|
self._is_duck_db = options.get("is_duck_db", False)
|
|
101
|
+
self._warehouse = options.get("warehouse") or self._warehouse
|
|
70
102
|
self._query_compilation = options.get("query_compilation", False)
|
|
103
|
+
self._default_dynamic_table_target_lag = (options.get("default_dynamic_table_target_lag") or
|
|
104
|
+
self._default_dynamic_table_target_lag)
|
|
71
105
|
return sql.Program(self._sort_dependencies(self._union_output_selects(self._generate_statements(model))))
|
|
72
106
|
|
|
73
107
|
def _generate_statements(self, model: ir.Model) -> list[sql.Node]:
|
|
74
|
-
|
|
108
|
+
table_relations, used_builtins = self._get_relations(model)
|
|
75
109
|
|
|
76
|
-
self._register_relation_args(
|
|
110
|
+
self._register_relation_args(table_relations)
|
|
77
111
|
self._register_external_relations(model)
|
|
78
112
|
|
|
79
113
|
statements: list[sql.Node] = []
|
|
80
|
-
|
|
81
|
-
statements.append(self._create_table(cast(ir.Relation, relation)))
|
|
114
|
+
# 1. Process root logical body
|
|
82
115
|
root = cast(ir.Logical, model.root)
|
|
83
116
|
for child in root.body:
|
|
84
117
|
if isinstance(child, ir.Logical):
|
|
85
118
|
statements.extend(self._create_statement(cast(ir.Logical, child)))
|
|
86
119
|
elif isinstance(child, ir.Union):
|
|
87
120
|
statements.append(self._create_recursive_view(cast(ir.Union, child)))
|
|
121
|
+
|
|
122
|
+
relation_selects = {
|
|
123
|
+
relation: info.dynamic_table_selects + info.view_selects + info.table_selects
|
|
124
|
+
for relation, info in self.relation_infos.items()
|
|
125
|
+
if info.dynamic_table_selects or info.view_selects or info.table_selects
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# 3. Handle each relation with proper priority
|
|
129
|
+
for relation, selects in relation_selects.items():
|
|
130
|
+
table_name = self._relation_name(relation)
|
|
131
|
+
|
|
132
|
+
info = self._get_relation_info(relation)
|
|
133
|
+
if info.table_selects:
|
|
134
|
+
# Relation is a table → insert into it
|
|
135
|
+
columns = [self._var_name(relation.id, f) for f in relation.fields]
|
|
136
|
+
if len(selects) == 1:
|
|
137
|
+
statements.append(sql.Insert(table_name, columns, [], selects[0]))
|
|
138
|
+
else:
|
|
139
|
+
statements.append(sql.Insert(table_name, columns, [],
|
|
140
|
+
sql.CTE(False, f"{table_name}_cte", columns, selects, True)))
|
|
141
|
+
elif info.view_selects:
|
|
142
|
+
statements.append(sql.CreateView(table_name, selects))
|
|
143
|
+
else:
|
|
144
|
+
# Snowflake currently has issues when using DISTINCT together with UNION in a Dynamic Table.
|
|
145
|
+
# As a workaround, we generate a CTE without DISTINCT, using UNION ALL.
|
|
146
|
+
# Then, we create a dynamic table with `SELECT DISTINCT * FROM CTE` to remove duplicates.
|
|
147
|
+
columns = [self._var_name(relation.id, f) for f in relation.fields]
|
|
148
|
+
statements.append(
|
|
149
|
+
sql.CreateDynamicTable(
|
|
150
|
+
table_name,
|
|
151
|
+
sql.CTE(False, f"{table_name}_cte", columns, selects, True),
|
|
152
|
+
self._default_dynamic_table_target_lag,
|
|
153
|
+
self._warehouse
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# 4. Create physical tables for explicitly declared table relations
|
|
158
|
+
for relation in table_relations:
|
|
159
|
+
info = self.relation_infos.get(relation)
|
|
160
|
+
if info is None or info.table_selects:
|
|
161
|
+
statements.append(self._create_table(relation))
|
|
162
|
+
|
|
163
|
+
#5. Create Snowflake user-defined functions
|
|
164
|
+
if not self._is_duck_db:
|
|
165
|
+
statements.extend(self._create_user_defined_functions(used_builtins))
|
|
166
|
+
|
|
88
167
|
return statements
|
|
89
168
|
|
|
90
169
|
#--------------------------------------------------
|
|
@@ -121,7 +200,9 @@ class ModelToSQL:
|
|
|
121
200
|
|
|
122
201
|
aliases = []
|
|
123
202
|
for i, arg in enumerate(update.args):
|
|
124
|
-
|
|
203
|
+
relation_field = old_relation.fields[i]
|
|
204
|
+
field_type = self._convert_type(relation_field.type)
|
|
205
|
+
aliases.append(OutputVar(arg, self._var_name(old_relation.id, relation_field), value_type=field_type))
|
|
125
206
|
|
|
126
207
|
return self._make_select(lookups, aliases, nots, unions, constructs)
|
|
127
208
|
|
|
@@ -129,6 +210,7 @@ class ModelToSQL:
|
|
|
129
210
|
update = v.collect_by_type(ir.Update, union).some()
|
|
130
211
|
|
|
131
212
|
relation = update.relation
|
|
213
|
+
self.mark_used(relation)
|
|
132
214
|
return sql.CreateView(
|
|
133
215
|
self._relation_name(relation),
|
|
134
216
|
sql.CTE(
|
|
@@ -142,6 +224,62 @@ class ModelToSQL:
|
|
|
142
224
|
)
|
|
143
225
|
)
|
|
144
226
|
|
|
227
|
+
def _create_user_defined_functions(self, relations: list[ir.Relation]) -> list[sql.CreateFunction]:
|
|
228
|
+
# Central UDF metadata configuration
|
|
229
|
+
udf_relations: dict[str, UDFConfig] = {
|
|
230
|
+
builtins.acot.name: UDFConfig(
|
|
231
|
+
handler="compute",
|
|
232
|
+
imports=[ImportSpec("math")],
|
|
233
|
+
code="""def compute(x): return math.atan(1 / x) if x != 0 else math.copysign(math.pi / 2, x)"""
|
|
234
|
+
),
|
|
235
|
+
builtins.erf.name: UDFConfig(
|
|
236
|
+
handler="compute",
|
|
237
|
+
imports=[ImportSpec("math")],
|
|
238
|
+
code="""def compute(x): return math.erf(x)"""
|
|
239
|
+
),
|
|
240
|
+
builtins.erfinv.name: UDFConfig(
|
|
241
|
+
handler="compute",
|
|
242
|
+
imports=[ImportSpec("erfinv", module="scipy.special")],
|
|
243
|
+
packages=["'scipy'"],
|
|
244
|
+
code="""def compute(x): return erfinv(x)"""
|
|
245
|
+
)
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
statements: list[sql.CreateFunction] = []
|
|
249
|
+
|
|
250
|
+
for r in relations:
|
|
251
|
+
meta = udf_relations.get(r.name)
|
|
252
|
+
if not meta:
|
|
253
|
+
continue
|
|
254
|
+
|
|
255
|
+
# Split relation fields into inputs and return type
|
|
256
|
+
# We expect a single return argument per builtin relation
|
|
257
|
+
return_type = None
|
|
258
|
+
input_columns: list[sql.Column] = []
|
|
259
|
+
for f in r.fields:
|
|
260
|
+
if f.input:
|
|
261
|
+
input_columns.append(sql.Column(self._var_name(r.id, f), self._convert_type(f.type)))
|
|
262
|
+
else:
|
|
263
|
+
return_type = self._convert_type(f.type)
|
|
264
|
+
|
|
265
|
+
# Build a full code block (imports + code)
|
|
266
|
+
imports_code = "\n".join(imp.render() for imp in meta.imports)
|
|
267
|
+
python_block = "\n".join(part for part in (imports_code, meta.code) if part)
|
|
268
|
+
|
|
269
|
+
assert return_type, f"No return type found for relation '{r.name}'"
|
|
270
|
+
statements.append(
|
|
271
|
+
sql.CreateFunction(
|
|
272
|
+
name=r.name,
|
|
273
|
+
inputs=input_columns,
|
|
274
|
+
return_type=return_type,
|
|
275
|
+
handler=meta.handler,
|
|
276
|
+
body=python_block,
|
|
277
|
+
packages=meta.packages
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return statements
|
|
282
|
+
|
|
145
283
|
def _create_statement(self, task: ir.Logical):
|
|
146
284
|
|
|
147
285
|
# TODO - improve the typing info to avoid these casts
|
|
@@ -158,7 +296,6 @@ class ModelToSQL:
|
|
|
158
296
|
var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
|
|
159
297
|
|
|
160
298
|
statements = []
|
|
161
|
-
# TODO - this is simplifying soooo much :crying_blood:
|
|
162
299
|
if updates and not lookups and not nots and not aggs and not logicals and not unions:
|
|
163
300
|
for u in updates:
|
|
164
301
|
r = u.relation
|
|
@@ -171,13 +308,15 @@ class ModelToSQL:
|
|
|
171
308
|
raise Exception(f"Expected SQL source to be a string, got: {type(src).__name__}")
|
|
172
309
|
statements.append(sql.RawSource(src))
|
|
173
310
|
else:
|
|
174
|
-
#
|
|
175
|
-
# Insert static values: INSERT INTO ... SELECT hash(V1, ...), V2, V3 UNION ALL SELECT hash(V4, ...), V5, V6
|
|
311
|
+
# Generate select with static values: SELECT hash(V1, ...), V2, V3
|
|
176
312
|
# We need to use `SELECT` instead of `VALUES` because Snowflake parses and restricts certain expressions in VALUES(...).
|
|
177
313
|
# Built-in functions like HASH() or MD5() are often rejected unless used in SELECT.
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
314
|
+
for values in self._get_tuples(task, u):
|
|
315
|
+
output_vars = [
|
|
316
|
+
sql.VarRef(str(value), alias=self._var_name(r.id, f))
|
|
317
|
+
for f, value in zip(r.fields, values)
|
|
318
|
+
]
|
|
319
|
+
self.add_table_select(r, sql.Select(False, output_vars))
|
|
181
320
|
elif lookups or outputs or nots or aggs or updates:
|
|
182
321
|
# Some of the lookup relations we wrap into logical and we need to get them out for the SQL compilation.
|
|
183
322
|
# For example QB `decimal(0)` in IR will look like this:
|
|
@@ -207,28 +346,39 @@ class ModelToSQL:
|
|
|
207
346
|
continue
|
|
208
347
|
# We shouldn’t create or populate tables for value types that can be directly sourced from existing Snowflake tables.
|
|
209
348
|
if not self._is_value_type_population_relation(r):
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
select = self.
|
|
214
|
-
|
|
215
|
-
# After flatten it can be only one rank per rule.
|
|
216
|
-
select = self._make_rank_select(u, all_lookups, ranks[0], nots, unions, constructs)
|
|
349
|
+
if all_lookups and all(builtins.is_builtin(lookup.relation) for lookup in all_lookups):
|
|
350
|
+
# Assuming static values insert when you have only builtin lookups (like `cast`, etc.) and you do not have table lookups.
|
|
351
|
+
aliases = self._get_update_aliases(u, var_to_construct, var_to_union, True)
|
|
352
|
+
select = self._make_select(all_lookups, aliases, nots, unions, constructs)
|
|
353
|
+
self.add_table_select(r, select)
|
|
217
354
|
else:
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
select = self.
|
|
355
|
+
select = None
|
|
356
|
+
drv = DerivedRelationsVisitor()
|
|
357
|
+
task.accept(drv)
|
|
358
|
+
if aggs:
|
|
359
|
+
# After flatten it can be only one aggregation per rule.
|
|
360
|
+
select = self._make_agg_select(u, all_lookups, aggs[0], nots, unions, constructs)
|
|
361
|
+
elif ranks:
|
|
362
|
+
# After flatten it can be only one rank per rule.
|
|
363
|
+
select = self._make_rank_select(u, all_lookups, ranks[0], nots, unions, constructs)
|
|
227
364
|
else:
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
365
|
+
# Snowflake currently has issues when using DISTINCT together with UNION in a Dynamic Table.
|
|
366
|
+
# That is why we generate statements without DISTINCT, and we remove duplicates later
|
|
367
|
+
# by using CTE + DISTINCT to declare the Dynamic Tables
|
|
368
|
+
distinct = True if self._is_duck_db or not drv.is_derived() else False
|
|
369
|
+
aliases = self._get_update_aliases(u, var_to_construct, var_to_union)
|
|
370
|
+
|
|
371
|
+
if not unions:
|
|
372
|
+
select = self._make_select(all_lookups, aliases, nots, unions, constructs, distinct)
|
|
373
|
+
elif lookups:
|
|
374
|
+
select = self._make_match_select(all_lookups, aliases, unions, nots, constructs, distinct)
|
|
375
|
+
else:
|
|
376
|
+
select = self._make_full_outer_join_select(aliases, unions, constructs, distinct)
|
|
377
|
+
|
|
378
|
+
if drv.is_derived() and not self._is_duck_db:
|
|
379
|
+
self.add_dynamic_table_select(r, select)
|
|
380
|
+
else:
|
|
381
|
+
self.add_view_select(r, select)
|
|
232
382
|
elif outputs:
|
|
233
383
|
# output a query: SELECT ... FROM ... WHERE ...
|
|
234
384
|
aliases = []
|
|
@@ -236,7 +386,7 @@ class ModelToSQL:
|
|
|
236
386
|
for output in outputs:
|
|
237
387
|
distinct = distinct or output.keys is None
|
|
238
388
|
for key, arg in output.aliases:
|
|
239
|
-
aliases.append(self._get_alias(key, arg, var_to_construct, var_to_union))
|
|
389
|
+
aliases.append(self._get_alias(key, arg, None, var_to_construct, var_to_union))
|
|
240
390
|
|
|
241
391
|
if not unions:
|
|
242
392
|
if all(builtins.is_builtin(lookup.relation) for lookup in all_lookups):
|
|
@@ -331,7 +481,7 @@ class ModelToSQL:
|
|
|
331
481
|
|
|
332
482
|
seen_args = set()
|
|
333
483
|
outputs: list[Union[sql.VarRef, sql.RowNumberVar, int]] = []
|
|
334
|
-
sub_query_outputs: list[
|
|
484
|
+
sub_query_outputs: list[OutputVar] = []
|
|
335
485
|
|
|
336
486
|
relation = update.relation
|
|
337
487
|
agg_var = agg.args[0] if agg.aggregation == builtins.count else agg.args[1]
|
|
@@ -340,22 +490,23 @@ class ModelToSQL:
|
|
|
340
490
|
|
|
341
491
|
for i, arg in enumerate(update.args):
|
|
342
492
|
if arg not in seen_args:
|
|
343
|
-
|
|
493
|
+
relation_field = relation.fields[i]
|
|
494
|
+
field_type = self._convert_type(relation_field.type)
|
|
495
|
+
field_name = self._var_name(relation.id, relation_field)
|
|
344
496
|
if isinstance(arg, ir.Var) and arg == agg_var:
|
|
345
|
-
outputs.append(sql.VarRef(f"{agg.aggregation.name}({field_name})", alias=field_name))
|
|
346
|
-
sub_query_outputs.append((
|
|
497
|
+
outputs.append(sql.VarRef(f"{agg.aggregation.name}({field_name})", alias=field_name, type=field_type))
|
|
498
|
+
sub_query_outputs.append(OutputVar(arg, field_name, task=agg))
|
|
347
499
|
else:
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
sub_query_outputs.append((field_name, arg))
|
|
500
|
+
group_by.append(sql.VarRef(field_name))
|
|
501
|
+
outputs.append(sql.VarRef(field_name, alias=field_name, type=field_type))
|
|
502
|
+
sub_query_outputs.append(OutputVar(arg, field_name))
|
|
352
503
|
seen_args.add(arg)
|
|
353
504
|
|
|
354
505
|
for arg in agg.projection:
|
|
355
506
|
if arg not in seen_args:
|
|
356
507
|
if agg.aggregation == builtins.count and arg == agg.projection[-1]:
|
|
357
508
|
continue
|
|
358
|
-
sub_query_outputs.append((
|
|
509
|
+
sub_query_outputs.append(OutputVar(value=arg))
|
|
359
510
|
seen_args.add(arg)
|
|
360
511
|
|
|
361
512
|
sub_select = self._make_select(lookups, sub_query_outputs, nots, unions, constructs, True)
|
|
@@ -379,7 +530,7 @@ class ModelToSQL:
|
|
|
379
530
|
Cat AS v0, cat_name AS v1
|
|
380
531
|
WHERE
|
|
381
532
|
v0.cat = v1.cat
|
|
382
|
-
) LIMIT 10;
|
|
533
|
+
) ORDER BY v LIMIT 10;
|
|
383
534
|
|
|
384
535
|
Rationale:
|
|
385
536
|
In the IR, it’s not always explicit whether rank should be applied over distinct rows.
|
|
@@ -411,7 +562,7 @@ class ModelToSQL:
|
|
|
411
562
|
|
|
412
563
|
seen_args = set()
|
|
413
564
|
outputs: list[Union[sql.VarRef, sql.RowNumberVar, int]] = []
|
|
414
|
-
sub_query_outputs: list[
|
|
565
|
+
sub_query_outputs: list[OutputVar] = []
|
|
415
566
|
|
|
416
567
|
order_by_vars = []
|
|
417
568
|
for arg, is_ascending in zip(rank.args, rank.arg_is_ascending):
|
|
@@ -420,30 +571,33 @@ class ModelToSQL:
|
|
|
420
571
|
|
|
421
572
|
relation = update.relation
|
|
422
573
|
|
|
574
|
+
rank_result_field_name = None
|
|
423
575
|
for i, arg in enumerate(update.args):
|
|
424
576
|
if arg not in seen_args:
|
|
425
|
-
|
|
577
|
+
relation_field = relation.fields[i]
|
|
578
|
+
field_type = self._convert_type(relation_field.type)
|
|
579
|
+
field_name = self._var_name(relation.id, relation_field)
|
|
426
580
|
if isinstance(arg, ir.Var) and arg == rank.result:
|
|
427
|
-
|
|
581
|
+
rank_result_field_name = field_name
|
|
582
|
+
outputs.append(sql.RowNumberVar(order_by_vars, partition_by_vars, field_name, field_type))
|
|
428
583
|
else:
|
|
429
|
-
outputs.append(sql.VarRef(field_name))
|
|
430
|
-
sub_query_outputs.append((
|
|
584
|
+
outputs.append(sql.VarRef(field_name, alias=field_name, type=field_type))
|
|
585
|
+
sub_query_outputs.append(OutputVar(arg, field_name))
|
|
431
586
|
seen_args.add(arg)
|
|
432
587
|
|
|
433
588
|
for arg in rank.projection:
|
|
434
589
|
if arg not in seen_args:
|
|
435
|
-
sub_query_outputs.append((
|
|
590
|
+
sub_query_outputs.append(OutputVar(value=arg))
|
|
436
591
|
seen_args.add(arg)
|
|
437
592
|
|
|
438
593
|
sub_select = self._make_select(lookups, sub_query_outputs, nots, unions, constructs, True)
|
|
439
594
|
|
|
440
|
-
|
|
595
|
+
assert rank_result_field_name is not None, "Rank result variable not found in update.args."
|
|
596
|
+
return sql.Select(False, outputs, sub_select, order_by=[sql.VarRef(rank_result_field_name)], limit=rank.limit)
|
|
441
597
|
|
|
442
|
-
def _make_match_select(self, lookups: list[ir.Lookup],
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
constructs: Optional[list[ir.Construct]] = None, distinct: bool = False,
|
|
446
|
-
is_output: bool = False):
|
|
598
|
+
def _make_match_select(self, lookups: list[ir.Lookup], outputs: list[OutputVar], unions: list[ir.Union],
|
|
599
|
+
nots: Optional[list[ir.Not]] = None, constructs: Optional[list[ir.Construct]] = None,
|
|
600
|
+
distinct: bool = False, is_output: bool = False):
|
|
447
601
|
|
|
448
602
|
"""
|
|
449
603
|
Generate a SQL SELECT statement representing a match operation.
|
|
@@ -480,8 +634,10 @@ class ModelToSQL:
|
|
|
480
634
|
froms, joins, wheres, sql_vars, var_column, var_lookups = self._extract_match_lookups_metadata(table_lookups, union_lookups)
|
|
481
635
|
|
|
482
636
|
builtin_lookups = OrderedSet.from_iterable(t for t in lookups if builtins.is_builtin(t.relation))
|
|
483
|
-
builtin_vars, builtin_wheres =
|
|
484
|
-
|
|
637
|
+
builtin_vars, builtin_wheres, builtin_table_expressions = (
|
|
638
|
+
self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
|
|
639
|
+
|
|
640
|
+
froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
|
|
485
641
|
|
|
486
642
|
wheres.extend(builtin_wheres)
|
|
487
643
|
|
|
@@ -502,10 +658,9 @@ class ModelToSQL:
|
|
|
502
658
|
|
|
503
659
|
return sql.Select(distinct, vars, froms, where, joins, is_output=is_output)
|
|
504
660
|
|
|
505
|
-
def _make_full_outer_join_select(self,
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
distinct: bool = False, is_output: bool = False):
|
|
661
|
+
def _make_full_outer_join_select(self, outputs: list[OutputVar], unions: list[ir.Union],
|
|
662
|
+
constructs: Optional[list[ir.Construct]] = None, distinct: bool = False,
|
|
663
|
+
is_output: bool = False):
|
|
509
664
|
|
|
510
665
|
"""
|
|
511
666
|
Generate a SQL SELECT statement representing a match operation that combines multiple sets of data
|
|
@@ -563,8 +718,7 @@ class ModelToSQL:
|
|
|
563
718
|
|
|
564
719
|
return sql.Select(distinct, vars, froms, where, joins, is_output=is_output)
|
|
565
720
|
|
|
566
|
-
def _make_left_outer_join_select(self, task: ir.Logical, lookups: list[ir.Lookup],
|
|
567
|
-
outputs: list[Tuple[Optional[str], ir.Value] | Tuple[Optional[str], ir.Value, ir.Task]],
|
|
721
|
+
def _make_left_outer_join_select(self, task: ir.Logical, lookups: list[ir.Lookup], outputs: list[OutputVar],
|
|
568
722
|
nots: Optional[list[ir.Not]] = None, constructs: Optional[list[ir.Construct]] = None,
|
|
569
723
|
distinct: bool = False) -> sql.Select:
|
|
570
724
|
|
|
@@ -661,8 +815,14 @@ class ModelToSQL:
|
|
|
661
815
|
self._extract_left_outer_joins_lookups_metadata(task, table_lookups, nots))
|
|
662
816
|
|
|
663
817
|
builtin_lookups = OrderedSet.from_iterable(t for t in lookups if builtins.is_builtin(t.relation))
|
|
664
|
-
builtin_vars, builtin_wheres =
|
|
665
|
-
|
|
818
|
+
builtin_vars, builtin_wheres, builtin_table_expressions = (
|
|
819
|
+
self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
|
|
820
|
+
|
|
821
|
+
# SF in case of `LEFT OUTER JOIN` and `ARRAY_GENERATE_RANGE` doesn't allow usage of `ON TRUE` but
|
|
822
|
+
# for DuckDB this is mandatory that is why we have 2 different join classes.
|
|
823
|
+
make_join = (lambda e, a: sql.Join(e, a)) if self._is_duck_db else (lambda e, a: sql.JoinWithoutCondition(e, a))
|
|
824
|
+
joins.extend(make_join(expr, alias) for alias, expr in builtin_table_expressions.items())
|
|
825
|
+
|
|
666
826
|
wheres.extend(builtin_wheres)
|
|
667
827
|
|
|
668
828
|
construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
|
|
@@ -678,10 +838,9 @@ class ModelToSQL:
|
|
|
678
838
|
|
|
679
839
|
return sql.Select(distinct, vars, froms, where, joins, is_output=True)
|
|
680
840
|
|
|
681
|
-
def _make_select(self, lookups: list[ir.Lookup],
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
constructs: Optional[list[ir.Construct]] = None, distinct: bool = False, is_output: bool = False) -> sql.Select:
|
|
841
|
+
def _make_select(self, lookups: list[ir.Lookup], outputs: list[OutputVar], nots: Optional[list[ir.Not]] = None,
|
|
842
|
+
unions: Optional[list[ir.Union]] = None, constructs: Optional[list[ir.Construct]] = None,
|
|
843
|
+
distinct: bool = False, is_output: bool = False) -> sql.Select:
|
|
685
844
|
|
|
686
845
|
var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
|
|
687
846
|
|
|
@@ -692,8 +851,11 @@ class ModelToSQL:
|
|
|
692
851
|
froms, wheres, sql_vars, var_column, var_lookups = self._extract_lookups_metadata(table_lookups)
|
|
693
852
|
|
|
694
853
|
builtin_lookups = OrderedSet.from_iterable(t for t in all_lookups if builtins.is_builtin(t.relation))
|
|
695
|
-
builtin_vars, builtin_wheres =
|
|
696
|
-
|
|
854
|
+
builtin_vars, builtin_wheres, builtin_table_expressions = (
|
|
855
|
+
self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
|
|
856
|
+
|
|
857
|
+
froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
|
|
858
|
+
|
|
697
859
|
wheres.extend(builtin_wheres)
|
|
698
860
|
|
|
699
861
|
construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
|
|
@@ -991,14 +1153,19 @@ class ModelToSQL:
|
|
|
991
1153
|
"""Generate hash expression like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME)."""
|
|
992
1154
|
elements = []
|
|
993
1155
|
for val in values:
|
|
994
|
-
|
|
995
|
-
if isinstance(
|
|
996
|
-
|
|
997
|
-
|
|
1156
|
+
resolved_val = resolve_builtin_var(val)
|
|
1157
|
+
if val != resolved_val and isinstance(resolved_val, str):
|
|
1158
|
+
# In case we parsed builtin into some expression, we may add it as an element.
|
|
1159
|
+
# For example, `TO_DATE('1990-1-1', 'Y-m-d')` or `(v1.value + 5)`.
|
|
1160
|
+
elements.append(f"{resolved_val}")
|
|
1161
|
+
continue
|
|
1162
|
+
if isinstance(resolved_val, ir.Var):
|
|
1163
|
+
if resolved_val in var_to_construct:
|
|
1164
|
+
elements.append(self._resolve_construct_var(reference, resolve_builtin_var, var_to_construct, var_to_construct[resolved_val]))
|
|
998
1165
|
else:
|
|
999
|
-
elements.append(reference(
|
|
1166
|
+
elements.append(reference(resolved_val))
|
|
1000
1167
|
else:
|
|
1001
|
-
elements.append(self._convert_value(
|
|
1168
|
+
elements.append(str(self._convert_value(resolved_val)))
|
|
1002
1169
|
return f"hash({', '.join(elements)})"
|
|
1003
1170
|
|
|
1004
1171
|
def _resolve_construct_var(self, reference, resolve_builtin_var, var_to_construct, construct: ir.Construct):
|
|
@@ -1012,17 +1179,20 @@ class ModelToSQL:
|
|
|
1012
1179
|
def _resolve_builtins(self, builtin_lookups: OrderedSet[ir.Lookup], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
|
|
1013
1180
|
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
|
|
1014
1181
|
var_to_construct: dict[ir.Var, ir.Construct],
|
|
1015
|
-
outputs: Optional[list[
|
|
1182
|
+
outputs: Optional[list[OutputVar]] = None):
|
|
1016
1183
|
|
|
1017
1184
|
wheres: list[sql.Expr] = []
|
|
1185
|
+
# We need to maintain a mapping of these builtin expressions because they generate a new table, which must be
|
|
1186
|
+
# referenced in the FROM clause as part of a JOIN. Structure is `SQL table variable` -> `generated expression`
|
|
1187
|
+
table_expressions: dict[str, str] = {}
|
|
1018
1188
|
builtin_vars: dict[ir.Var, ir.Value|str|int] = {}
|
|
1019
1189
|
# TODO: remove this when we introduce date periods in builtins
|
|
1020
1190
|
date_period_var_type: dict[ir.Var, str] = {}
|
|
1021
1191
|
|
|
1022
1192
|
output_vars = {
|
|
1023
|
-
output
|
|
1193
|
+
output.value
|
|
1024
1194
|
for output in outputs or []
|
|
1025
|
-
if isinstance(output
|
|
1195
|
+
if isinstance(output.value, ir.Var)
|
|
1026
1196
|
}
|
|
1027
1197
|
|
|
1028
1198
|
intermediate_builtin_vars: set[ir.Var] = {
|
|
@@ -1073,6 +1243,46 @@ class ModelToSQL:
|
|
|
1073
1243
|
idx = self._var_to_expr(idx_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1074
1244
|
assert isinstance(output, ir.Var)
|
|
1075
1245
|
builtin_vars[output] = f"split_part({s}, {separator}, {idx})"
|
|
1246
|
+
elif relation == builtins.split:
|
|
1247
|
+
assert len(args) == 4, f"Expected 4 args for `split`, got {len(args)}: {args}"
|
|
1248
|
+
separator_raw, value_raw, index, part = args
|
|
1249
|
+
value = self._var_to_expr(value_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1250
|
+
separator = self._var_to_expr(separator_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1251
|
+
table_sql_var = f"v{len(sql_vars)}"
|
|
1252
|
+
sql_vars[lookup] = table_sql_var
|
|
1253
|
+
if self._is_duck_db:
|
|
1254
|
+
table_alias = f"{table_sql_var}(data)"
|
|
1255
|
+
table_expressions[table_alias] = f"VALUES(string_split({value}, {separator}))"
|
|
1256
|
+
|
|
1257
|
+
part_expr = f"unnest({table_sql_var}.data)"
|
|
1258
|
+
index_expr = f"generate_subscripts({table_sql_var}.data, 1)"
|
|
1259
|
+
else:
|
|
1260
|
+
table_expressions[table_sql_var] = f"LATERAL FLATTEN(input => SPLIT({value}, {separator}))"
|
|
1261
|
+
|
|
1262
|
+
# SF returns values in `""` and to avoid this, we need to cast it to `TEXT` type
|
|
1263
|
+
part_expr = f"cast({table_sql_var}.value as TEXT)"
|
|
1264
|
+
index_expr = f"({table_sql_var}.index + 1)" # SF is 0-based internally, adjust to it back
|
|
1265
|
+
assert isinstance(index, ir.Var) and isinstance(part, ir.Var), "Third and fourth arguments (index, part) must be variables"
|
|
1266
|
+
builtin_vars[part] = part_expr
|
|
1267
|
+
builtin_vars[index] = index_expr
|
|
1268
|
+
elif relation == builtins.range:
|
|
1269
|
+
assert len(args) == 4, f"Expected 4 args for `range`, got {len(args)}: {args}"
|
|
1270
|
+
start_raw, stop_raw, step_raw, result = args
|
|
1271
|
+
start = self._var_to_expr(start_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1272
|
+
stop = self._var_to_expr(stop_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1273
|
+
step = self._var_to_expr(step_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1274
|
+
table_sql_var = f"v{len(sql_vars)}"
|
|
1275
|
+
sql_vars[lookup] = table_sql_var
|
|
1276
|
+
# In SQL range is 1...stop exclusive, and because we did `-1` in PyRel v1 we need to return it here
|
|
1277
|
+
if self._is_duck_db:
|
|
1278
|
+
table_expr = f"LATERAL range(cast({start} as bigint), cast(({stop} + 1) as bigint), cast({step} as bigint))"
|
|
1279
|
+
expr = f"{table_sql_var}.range"
|
|
1280
|
+
else:
|
|
1281
|
+
table_expr = f"LATERAL FLATTEN(input => ARRAY_GENERATE_RANGE({start}, ({stop} + 1), {step}))"
|
|
1282
|
+
expr = f"{table_sql_var}.value"
|
|
1283
|
+
table_expressions[table_sql_var] = table_expr
|
|
1284
|
+
assert isinstance(result, ir.Var), "Fourth argument (result) must be a variable"
|
|
1285
|
+
builtin_vars[result] = f"{expr}"
|
|
1076
1286
|
elif relation == builtins.cast:
|
|
1077
1287
|
assert len(args) == 3, f"Expected 3 args for `cast`, got {len(args)}: {args}"
|
|
1078
1288
|
|
|
@@ -1080,38 +1290,50 @@ class ModelToSQL:
|
|
|
1080
1290
|
assert isinstance(result, ir.Var), "Third argument (result) must be a variable"
|
|
1081
1291
|
|
|
1082
1292
|
builtin_vars[result] = original_raw
|
|
1083
|
-
elif relation in
|
|
1293
|
+
elif relation in {builtins.isnan, builtins.isinf}:
|
|
1084
1294
|
arg_expr = self._var_to_expr(args[0], reference, resolve_builtin_var, var_to_construct)
|
|
1085
1295
|
expr = "cast('NaN' AS DOUBLE)" if relation == builtins.isnan else "cast('Infinity' AS DOUBLE)"
|
|
1086
1296
|
wheres.append(sql.Terminal(f"{arg_expr} = {expr}"))
|
|
1297
|
+
elif relation == builtins.construct_date:
|
|
1298
|
+
assert len(args) == 4, f"Expected 4 args for `construct_date`, got {len(args)}: {args}"
|
|
1299
|
+
year_raw, month_raw, day_raw, result = args
|
|
1300
|
+
year = self._var_to_expr(year_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1301
|
+
month = self._var_to_expr(month_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1302
|
+
day = self._var_to_expr(day_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1303
|
+
|
|
1304
|
+
assert isinstance(result, ir.Var), "Fourth argument (result) must be a variable."
|
|
1305
|
+
if self._is_duck_db:
|
|
1306
|
+
expr = f"make_date(cast({year} as bigint), cast({month} as bigint), cast({day} as bigint))"
|
|
1307
|
+
else:
|
|
1308
|
+
expr = f"date_from_parts({year}, {month}, {day})"
|
|
1309
|
+
builtin_vars[result] = expr
|
|
1087
1310
|
elif relation == builtins.construct_datetime_ms_tz:
|
|
1088
1311
|
assert len(args) == 9, f"Expected 9 args for `construct_datetime_ms_tz`, got {len(args)}: {args}"
|
|
1089
1312
|
|
|
1090
|
-
year_raw, month_raw, day_raw, hour_raw, minute_raw, second_raw, millisecond_raw,
|
|
1091
|
-
tz = self._convert_value(tz, quote_strings=False)
|
|
1092
|
-
assert isinstance(tz, str), "Timezone argument (tz) must be a string."
|
|
1313
|
+
year_raw, month_raw, day_raw, hour_raw, minute_raw, second_raw, millisecond_raw, tz_raw, result = args
|
|
1093
1314
|
assert isinstance(result, ir.Var), "Ninth argument (result) must be a variable."
|
|
1094
1315
|
|
|
1095
1316
|
year = self._var_to_expr(year_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1096
1317
|
month = self._var_to_expr(month_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1097
1318
|
day = self._var_to_expr(day_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1098
|
-
hour = self.
|
|
1099
|
-
minute = self.
|
|
1100
|
-
second = self.
|
|
1101
|
-
millisecond = self.
|
|
1319
|
+
hour = self._var_to_expr(hour_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1320
|
+
minute = self._var_to_expr(minute_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1321
|
+
second = self._var_to_expr(second_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1322
|
+
millisecond = self._var_to_expr(millisecond_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1323
|
+
tz = self._var_to_expr(tz_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1102
1324
|
|
|
1103
1325
|
if self._is_duck_db:
|
|
1104
|
-
sub_expr = (f"make_timestamp({year}, {month}, {day}
|
|
1105
|
-
f"{second} + {millisecond}/1000.0)")
|
|
1106
|
-
if tz.lower() != "utc":
|
|
1107
|
-
sub_expr = f"(({sub_expr} at time zone
|
|
1326
|
+
sub_expr = (f"make_timestamp(cast({year} as bigint), cast({month} as bigint), cast({day} as bigint), "
|
|
1327
|
+
f"cast({hour} as bigint), cast({minute} as bigint), cast({second} as bigint) + {millisecond}/1000.0)")
|
|
1328
|
+
if tz.lower() != "'utc'":
|
|
1329
|
+
sub_expr = f"(({sub_expr} at time zone {tz}) at time zone 'UTC')"
|
|
1108
1330
|
else:
|
|
1109
1331
|
sub_expr = (f"to_timestamp_ntz(lpad({year}, 4, '0') || '-' || lpad({month}, 2, '0') || '-' || "
|
|
1110
1332
|
f"lpad({day}, 2, '0') || ' ' || lpad({hour}, 2, '0') || ':' || "
|
|
1111
1333
|
f"lpad({minute}, 2, '0') || ':' || lpad({second}, 2, '0') || '.' || "
|
|
1112
1334
|
f"lpad({millisecond}, 3, '0'), 'YYYY-MM-DD HH24:MI:SS.FF3')")
|
|
1113
|
-
if tz.lower() != "utc":
|
|
1114
|
-
sub_expr = f"convert_timezone(
|
|
1335
|
+
if tz.lower() != "'utc'":
|
|
1336
|
+
sub_expr = f"convert_timezone({tz}, 'UTC', {sub_expr})"
|
|
1115
1337
|
builtin_vars[result] = f"cast({sub_expr} as DATETIME)"
|
|
1116
1338
|
else:
|
|
1117
1339
|
# Assuming infix binary or ternary operators here
|
|
@@ -1150,6 +1372,12 @@ class ModelToSQL:
|
|
|
1150
1372
|
function = "levenshtein" if self._is_duck_db else "editdistance"
|
|
1151
1373
|
assert isinstance(args[2], ir.Var)
|
|
1152
1374
|
builtin_vars[args[2]] = f"{function}({left}, {right})"
|
|
1375
|
+
elif relation == builtins.concat:
|
|
1376
|
+
assert len(args) == 3, f"Expected 3 args for `concat`, got {len(args)}: {args}"
|
|
1377
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1378
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1379
|
+
assert isinstance(args[2], ir.Var)
|
|
1380
|
+
builtin_vars[args[2]] = f"concat({left}, {right})"
|
|
1153
1381
|
elif relation == builtins.join:
|
|
1154
1382
|
assert len(args) == 3, f"Expected 3 args for `join`, got {len(args)}: {args}"
|
|
1155
1383
|
assert isinstance(lhs, tuple)
|
|
@@ -1175,12 +1403,32 @@ class ModelToSQL:
|
|
|
1175
1403
|
elif relation == builtins.parse_float and isinstance(rhs, ir.Var):
|
|
1176
1404
|
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1177
1405
|
builtin_vars[rhs] = f"cast({left} AS DOUBLE)"
|
|
1406
|
+
elif relation == builtins.parse_date:
|
|
1407
|
+
if self._is_duck_db:
|
|
1408
|
+
raise Exception("DuckDB: unsupported builtin relation 'parse_date'.")
|
|
1409
|
+
assert len(args) == 3, f"Expected 3 args for `parse_date`, got {len(args)}: {args}"
|
|
1410
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1411
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1412
|
+
assert isinstance(args[2], ir.Var)
|
|
1413
|
+
builtin_vars[args[2]] = f"to_date({left}, {right})"
|
|
1414
|
+
elif relation == builtins.parse_datetime:
|
|
1415
|
+
assert len(args) == 3, f"Expected 3 args for `parse_datetime`, got {len(args)}: {args}"
|
|
1416
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1417
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1418
|
+
sub_expr = left
|
|
1419
|
+
if 'z' in right: # this means that out datetime formatter includes timezone, and we need to convert first.
|
|
1420
|
+
if self._is_duck_db:
|
|
1421
|
+
sub_expr = f"({left} AT TIME ZONE 'UTC')"
|
|
1422
|
+
else:
|
|
1423
|
+
sub_expr = f"convert_timezone('UTC', to_timestamp_tz({left}))"
|
|
1424
|
+
assert isinstance(args[2], ir.Var)
|
|
1425
|
+
builtin_vars[args[2]] = f"cast({sub_expr} as DATETIME)"
|
|
1178
1426
|
elif relation in builtins.date_periods and isinstance(rhs, ir.Var):
|
|
1179
1427
|
builtin_vars[rhs] = lhs
|
|
1180
1428
|
date_period_var_type[rhs] = relation.name
|
|
1181
1429
|
elif relation in builtins.date_builtins:
|
|
1182
|
-
if relation in
|
|
1183
|
-
builtins.datetime_subtract
|
|
1430
|
+
if relation in {builtins.date_add, builtins.date_subtract, builtins.datetime_add,
|
|
1431
|
+
builtins.datetime_subtract}:
|
|
1184
1432
|
assert len(args) == 3, f"Expected 3 args for {relation}, got {len(args)}: {args}"
|
|
1185
1433
|
assert isinstance(rhs, ir.Var), f"Period variable must be `ir.Var`, got: {rhs}"
|
|
1186
1434
|
period = date_period_var_type[rhs]
|
|
@@ -1189,10 +1437,10 @@ class ModelToSQL:
|
|
|
1189
1437
|
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1190
1438
|
|
|
1191
1439
|
if self._is_duck_db:
|
|
1192
|
-
op = "+" if relation in
|
|
1193
|
-
expr = f"({left} {op}
|
|
1440
|
+
op = "+" if relation in {builtins.date_add, builtins.datetime_add} else "-"
|
|
1441
|
+
expr = f"({left} {op} {period_val} * interval 1 {period})"
|
|
1194
1442
|
else:
|
|
1195
|
-
sign = 1 if relation in
|
|
1443
|
+
sign = 1 if relation in {builtins.date_add, builtins.datetime_add} else -1
|
|
1196
1444
|
expr = f"dateadd({period}, ({sign} * {period_val}), {left})"
|
|
1197
1445
|
|
|
1198
1446
|
result_var = args[2]
|
|
@@ -1206,9 +1454,13 @@ class ModelToSQL:
|
|
|
1206
1454
|
assert isinstance(rhs, ir.Var), f"Resulting variable must be `ir.Var`, got: {rhs}"
|
|
1207
1455
|
expr_map = {
|
|
1208
1456
|
builtins.date_year: "year",
|
|
1457
|
+
builtins.date_quarter: "quarter",
|
|
1209
1458
|
builtins.date_month: "month",
|
|
1210
1459
|
builtins.date_week: "week",
|
|
1211
|
-
builtins.date_day: "day"
|
|
1460
|
+
builtins.date_day: "day",
|
|
1461
|
+
builtins.date_dayofyear: "dayofyear",
|
|
1462
|
+
builtins.date_weekday: "isodow" if self._is_duck_db else "dayofweekiso",
|
|
1463
|
+
builtins.datetime_second: "second",
|
|
1212
1464
|
}
|
|
1213
1465
|
expr = expr_map.get(relation)
|
|
1214
1466
|
lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
@@ -1217,19 +1469,34 @@ class ModelToSQL:
|
|
|
1217
1469
|
result_var = args[2]
|
|
1218
1470
|
assert isinstance(result_var, ir.Var), f"Resulting variable must be `ir.Var`, got: {result_var}"
|
|
1219
1471
|
expr_map = {
|
|
1472
|
+
builtins.datetime_year: "year",
|
|
1473
|
+
builtins.datetime_quarter: "quarter",
|
|
1474
|
+
builtins.datetime_month: "month",
|
|
1220
1475
|
builtins.datetime_week: "week",
|
|
1476
|
+
builtins.datetime_day: "day",
|
|
1477
|
+
builtins.datetime_dayofyear: "dayofyear",
|
|
1478
|
+
builtins.datetime_hour: "hour",
|
|
1479
|
+
builtins.datetime_minute: "minute",
|
|
1480
|
+
builtins.datetime_weekday: "isodow" if self._is_duck_db else "dayofweekiso",
|
|
1481
|
+
builtins.dates_period_days: "date_diff" if self._is_duck_db else "datediff",
|
|
1482
|
+
builtins.datetimes_period_milliseconds: "date_diff" if self._is_duck_db else "datediff"
|
|
1221
1483
|
}
|
|
1222
1484
|
expr = expr_map.get(relation)
|
|
1223
1485
|
lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1224
|
-
rhs = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct
|
|
1225
|
-
|
|
1486
|
+
rhs = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1487
|
+
if relation == builtins.dates_period_days:
|
|
1488
|
+
sub_expr = f"'day', {lhs}, {rhs}" if self._is_duck_db else f"day, {lhs}, {rhs}"
|
|
1489
|
+
elif relation == builtins.datetimes_period_milliseconds:
|
|
1490
|
+
sub_expr = f"'millisecond', {lhs}, {rhs}" if self._is_duck_db else f"millisecond, {lhs}, {rhs}"
|
|
1491
|
+
else:
|
|
1492
|
+
sub_expr = self._convert_timezone(lhs, rhs)
|
|
1226
1493
|
builtin_vars[result_var] = f"{expr}({sub_expr})"
|
|
1227
1494
|
else:
|
|
1228
1495
|
raise NotImplementedError("Unsupported number of arguments for date builtin (3+).")
|
|
1229
1496
|
elif relation == builtins.construct_date_from_datetime:
|
|
1230
|
-
assert len(args) == 3, f"Expected 3 args for `
|
|
1497
|
+
assert len(args) == 3, f"Expected 3 args for `construct_date_from_datetime`, got {len(args)}: {args}"
|
|
1231
1498
|
dt_raw, tz, result = args
|
|
1232
|
-
tz = self._convert_value(tz
|
|
1499
|
+
tz = self._convert_value(tz)
|
|
1233
1500
|
|
|
1234
1501
|
assert isinstance(tz, str), "Timezone argument (tz) must be a string."
|
|
1235
1502
|
assert isinstance(result, ir.Var), "Third argument (result) must be a variable."
|
|
@@ -1253,9 +1520,14 @@ class ModelToSQL:
|
|
|
1253
1520
|
if rel_name == builtins.factorial.name and self._is_duck_db:
|
|
1254
1521
|
# Factorial requires an integer operand in DuckDB
|
|
1255
1522
|
sub_expr = f"{left}::INTEGER"
|
|
1523
|
+
elif rel_name == builtins.log10.name:
|
|
1524
|
+
# log10 is not supported, so we use log with base 10
|
|
1525
|
+
sub_expr = f"10, {left}"
|
|
1526
|
+
method = "log"
|
|
1256
1527
|
expr = f"{method}({sub_expr})"
|
|
1257
|
-
elif rel_name in
|
|
1258
|
-
builtins.power.name, builtins.mod.name, builtins.pow.name
|
|
1528
|
+
elif rel_name in {builtins.minimum.name, builtins.maximum.name, builtins.trunc_div.name,
|
|
1529
|
+
builtins.power.name, builtins.mod.name, builtins.pow.name,
|
|
1530
|
+
builtins.log.name}:
|
|
1259
1531
|
assert len(args) == 3, f"Expected 3 args for {relation}, got {len(args)}: {args}"
|
|
1260
1532
|
|
|
1261
1533
|
result_var = args[2]
|
|
@@ -1269,6 +1541,8 @@ class ModelToSQL:
|
|
|
1269
1541
|
expr = f"trunc({left} / {right})"
|
|
1270
1542
|
elif rel_name == builtins.power.name or rel_name == builtins.pow.name:
|
|
1271
1543
|
expr = f"power({left}, {right})"
|
|
1544
|
+
elif rel_name == builtins.log.name:
|
|
1545
|
+
expr = f"log({left}, {right})"
|
|
1272
1546
|
else:
|
|
1273
1547
|
expr = f"mod({left}, {right})"
|
|
1274
1548
|
else:
|
|
@@ -1278,15 +1552,11 @@ class ModelToSQL:
|
|
|
1278
1552
|
f"but got `{type(result_var).__name__}`: {result_var}"
|
|
1279
1553
|
)
|
|
1280
1554
|
builtin_vars[result_var] = expr
|
|
1281
|
-
elif relation in
|
|
1555
|
+
elif relation in {builtins.parse_int64, builtins.parse_int128} and isinstance(rhs, ir.Var):
|
|
1282
1556
|
builtin_vars[rhs] = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct, False)
|
|
1283
1557
|
elif helpers.is_from_cast(lookup) and isinstance(rhs, ir.Var):
|
|
1284
1558
|
# For the `from cast` relations we keep the raw var, and we will ground it later.
|
|
1285
1559
|
builtin_vars[rhs] = lhs
|
|
1286
|
-
elif isinstance(lhs, ir.Var) and lhs in output_vars & intermediate_builtin_vars:
|
|
1287
|
-
builtin_vars[lhs] = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1288
|
-
elif isinstance(rhs, ir.Var) and rhs in output_vars & intermediate_builtin_vars:
|
|
1289
|
-
builtin_vars[rhs] = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1290
1560
|
elif isinstance(lhs, ir.Var) and lhs in intermediate_builtin_vars and lhs not in (builtin_vars | var_to_construct):
|
|
1291
1561
|
# Example IR:
|
|
1292
1562
|
# Logical
|
|
@@ -1314,26 +1584,13 @@ class ModelToSQL:
|
|
|
1314
1584
|
out_var = args[2]
|
|
1315
1585
|
if isinstance(out_var, ir.Var):
|
|
1316
1586
|
out_var = resolve_builtin_var(out_var)
|
|
1317
|
-
|
|
1318
|
-
expr = f"{relation_name}({left}, {right})"
|
|
1319
|
-
elif relation == builtins.parse_date:
|
|
1320
|
-
expr = f"cast({left} as DATE)"
|
|
1321
|
-
elif relation == builtins.parse_datetime:
|
|
1322
|
-
sub_expr = left
|
|
1323
|
-
if 'z' in right: # this means that out datetime formatter includes timezone and we need to convert first.
|
|
1324
|
-
if self._is_duck_db:
|
|
1325
|
-
sub_expr = f"({left} AT TIME ZONE 'UTC')"
|
|
1326
|
-
else:
|
|
1327
|
-
sub_expr = f"convert_timezone('UTC', to_timestamp_tz({left}))"
|
|
1328
|
-
expr = f"cast({sub_expr} as DATETIME)"
|
|
1329
|
-
else:
|
|
1330
|
-
expr = f"({left} {relation_name} {right})"
|
|
1587
|
+
expr = f"({left} {relation_name} {right})"
|
|
1331
1588
|
if isinstance(out_var, ir.Var):
|
|
1332
1589
|
# For example, when this is an intermediate result
|
|
1333
1590
|
# example: c = a - b in the IR is (a - b = d) and (d = c)
|
|
1334
1591
|
builtin_vars[out_var] = expr
|
|
1335
1592
|
else:
|
|
1336
|
-
# This means that var was already grounded and we can add WHERE clause.
|
|
1593
|
+
# This means that var was already grounded, and we can add a WHERE clause.
|
|
1337
1594
|
wheres.append(sql.Terminal(f"{expr} = {out_var}"))
|
|
1338
1595
|
else:
|
|
1339
1596
|
raise Exception(
|
|
@@ -1363,16 +1620,23 @@ class ModelToSQL:
|
|
|
1363
1620
|
rhs = str(rhs_ref)
|
|
1364
1621
|
wheres.append(sql.Terminal(f"{ref} = {rhs}"))
|
|
1365
1622
|
|
|
1366
|
-
return builtin_vars, wheres
|
|
1623
|
+
return builtin_vars, wheres, table_expressions
|
|
1367
1624
|
|
|
1368
1625
|
def _convert_timezone(self, dt: str, tz: str) -> str:
|
|
1369
|
-
if tz.lower() != "utc":
|
|
1626
|
+
if tz.lower() != "'utc'":
|
|
1370
1627
|
if self._is_duck_db:
|
|
1371
|
-
return f"({dt} at time zone 'UTC') at time zone
|
|
1628
|
+
return f"({dt} at time zone 'UTC') at time zone {tz}"
|
|
1372
1629
|
else:
|
|
1373
|
-
return f"convert_timezone('UTC',
|
|
1630
|
+
return f"convert_timezone('UTC', {tz}, {dt})"
|
|
1374
1631
|
return dt
|
|
1375
1632
|
|
|
1633
|
+
def _process_builtin_table_expressions(self, builtin_table_expressions: dict[str, str]):
|
|
1634
|
+
"""Convert builtin table expressions into SQL FROM clauses."""
|
|
1635
|
+
return [
|
|
1636
|
+
sql.From(expr, alias)
|
|
1637
|
+
for alias, expr in builtin_table_expressions.items()
|
|
1638
|
+
]
|
|
1639
|
+
|
|
1376
1640
|
def _process_constructs(self, lookups: OrderedSet[ir.Lookup], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
|
|
1377
1641
|
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
|
|
1378
1642
|
builtin_vars: dict[ir.Var, ir.Value|str|int], var_to_construct: dict[ir.Var, ir.Construct]) -> list[sql.Expr]:
|
|
@@ -1485,21 +1749,20 @@ class ModelToSQL:
|
|
|
1485
1749
|
where = sql.Where(sql.And(wheres))
|
|
1486
1750
|
return where
|
|
1487
1751
|
|
|
1488
|
-
def _generate_select_output(self, outputs: list[
|
|
1489
|
-
|
|
1490
|
-
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field],
|
|
1752
|
+
def _generate_select_output(self, outputs: list[OutputVar], builtin_vars: dict[ir.Var, ir.Value|str|int],
|
|
1753
|
+
sql_vars: dict[ir.Lookup, str], var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field],
|
|
1491
1754
|
var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
|
|
1492
1755
|
var_to_construct: dict[ir.Var, ir.Construct]):
|
|
1493
1756
|
|
|
1494
1757
|
reference = partial(self._var_reference, var_lookups, sql_vars, var_column)
|
|
1495
1758
|
resolve_builtin_var = partial(self._resolve_builtin_var, builtin_vars)
|
|
1496
1759
|
|
|
1497
|
-
def handle_lookup_var(var):
|
|
1760
|
+
def handle_lookup_var(var, var_type, alias):
|
|
1498
1761
|
lookup = var_lookups[var].some()
|
|
1499
1762
|
relation = lookup.relation
|
|
1500
1763
|
var_name = sql_vars[lookup]
|
|
1501
1764
|
column_name = self._var_name(relation.id, var_column[var, lookup])
|
|
1502
|
-
vars.append(sql.VarRef(var_name, column_name, alias))
|
|
1765
|
+
vars.append(sql.VarRef(var_name, column_name, alias, var_type))
|
|
1503
1766
|
if from_cdc_annotation in relation.annotations:
|
|
1504
1767
|
not_null_vars.add(f"{var_name}.{column_name}")
|
|
1505
1768
|
|
|
@@ -1509,6 +1772,11 @@ class ModelToSQL:
|
|
|
1509
1772
|
for val in construct.values:
|
|
1510
1773
|
if val in builtin_vars:
|
|
1511
1774
|
val = resolve_builtin_var(val)
|
|
1775
|
+
if isinstance(val, str):
|
|
1776
|
+
# In case we parsed builtin into some expression, we may add it as an element.
|
|
1777
|
+
# For example, `TO_DATE('1990-1-1', 'Y-m-d')` or `(v1.value + 5)`.
|
|
1778
|
+
elements.append(f"{val}")
|
|
1779
|
+
continue
|
|
1512
1780
|
if isinstance(val, ir.Var):
|
|
1513
1781
|
if val in var_to_construct:
|
|
1514
1782
|
elements.append(handle_construct(var_to_construct[val]))
|
|
@@ -1520,41 +1788,40 @@ class ModelToSQL:
|
|
|
1520
1788
|
if from_cdc_annotation in lookup.relation.annotations:
|
|
1521
1789
|
not_null_vars.add(lookup_var)
|
|
1522
1790
|
else:
|
|
1523
|
-
elements.append(self._convert_value(val
|
|
1791
|
+
elements.append(str(self._convert_value(val)))
|
|
1524
1792
|
return f"hash({', '.join(elements)})"
|
|
1525
1793
|
|
|
1526
1794
|
# finally, compute what the select will return
|
|
1527
1795
|
vars = []
|
|
1528
1796
|
not_null_vars = ordered_set()
|
|
1529
1797
|
for output in outputs:
|
|
1530
|
-
alias, var = output
|
|
1531
|
-
task = output[2] if len(output) > 2 else None
|
|
1798
|
+
alias, var, var_type, task = output.alias, output.value, output.value_type, output.task
|
|
1532
1799
|
if isinstance(var, ir.Var):
|
|
1533
1800
|
if var in var_lookups and not task:
|
|
1534
|
-
handle_lookup_var(var)
|
|
1801
|
+
handle_lookup_var(var, var_type, alias)
|
|
1535
1802
|
elif var in builtin_vars:
|
|
1536
1803
|
var_ref = resolve_builtin_var(var)
|
|
1537
1804
|
if var_ref in var_lookups:
|
|
1538
1805
|
# Case: result of `cast` variable
|
|
1539
|
-
handle_lookup_var(var_ref)
|
|
1806
|
+
handle_lookup_var(var_ref, var_type, alias)
|
|
1540
1807
|
elif isinstance(var_ref, ir.Literal):
|
|
1541
1808
|
# Case: literal value from `cast` relation, e.g. `decimal(0)`
|
|
1542
|
-
vars.append(sql.VarRef(str(self._convert_value(var_ref.value)), alias=alias))
|
|
1809
|
+
vars.append(sql.VarRef(str(self._convert_value(var_ref.value)), alias=alias, type=var_type))
|
|
1543
1810
|
else:
|
|
1544
1811
|
# Example: We may have `decimal(0)` in QB which turns in IR into:
|
|
1545
1812
|
# (cast(Decimal128, 0, vDecimal128) and decimal128(vDecimal128, res_3))
|
|
1546
1813
|
# and we need to make it `0` in SQL.
|
|
1547
1814
|
var_ref = var_ref.name if isinstance(var_ref, ir.Var) else str(var_ref)
|
|
1548
|
-
vars.append(sql.VarRef(var_ref, alias=alias))
|
|
1815
|
+
vars.append(sql.VarRef(var_ref, alias=alias, type=var_type))
|
|
1549
1816
|
elif task:
|
|
1550
1817
|
if isinstance(task, ir.Construct):
|
|
1551
1818
|
# Generate constructions like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME) as `alias`
|
|
1552
|
-
vars.append(sql.VarRef(handle_construct(task), alias=alias))
|
|
1819
|
+
vars.append(sql.VarRef(handle_construct(task), alias=alias, type=var_type))
|
|
1553
1820
|
elif isinstance(task, ir.Aggregate):
|
|
1554
1821
|
result_arg = task.projection[-1] if task.aggregation == builtins.count else task.args[0]
|
|
1555
1822
|
result_arg = resolve_builtin_var(result_arg)
|
|
1556
1823
|
ref = reference(result_arg) if isinstance(result_arg, ir.Var) else str(result_arg)
|
|
1557
|
-
vars.append(sql.VarRef(str(ref), alias=alias))
|
|
1824
|
+
vars.append(sql.VarRef(str(ref), alias=alias, type=var_type))
|
|
1558
1825
|
elif isinstance(task, ir.Union):
|
|
1559
1826
|
# Handle `COALESCE` of all lookups of this var from the union
|
|
1560
1827
|
lookups = self._extract_all_lookups_from_union(task)
|
|
@@ -1566,10 +1833,10 @@ class ModelToSQL:
|
|
|
1566
1833
|
elements.append(f"{sql_vars[lu]}.{column_name}")
|
|
1567
1834
|
|
|
1568
1835
|
expr = "COALESCE(" + ", ".join(elements) + ")"
|
|
1569
|
-
vars.append(sql.VarRef(expr, alias=alias))
|
|
1836
|
+
vars.append(sql.VarRef(expr, alias=alias, type=var_type))
|
|
1570
1837
|
else:
|
|
1571
1838
|
# TODO - abusing even more here, because var is a value!
|
|
1572
|
-
vars.append(sql.VarRef(str(self._convert_value(var)), alias=alias))
|
|
1839
|
+
vars.append(sql.VarRef(str(self._convert_value(var)), alias=alias, type=var_type))
|
|
1573
1840
|
return not_null_vars, vars
|
|
1574
1841
|
|
|
1575
1842
|
def _generate_select_nots(self, nots: Optional[list[ir.Not]], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
|
|
@@ -1615,8 +1882,11 @@ class ModelToSQL:
|
|
|
1615
1882
|
|
|
1616
1883
|
var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
|
|
1617
1884
|
builtin_lookups = OrderedSet.from_iterable(t for t in all_lookups if builtins.is_builtin(t.relation))
|
|
1618
|
-
builtin_vars, builtin_wheres =
|
|
1619
|
-
|
|
1885
|
+
builtin_vars, builtin_wheres, builtin_table_expressions = (
|
|
1886
|
+
self._resolve_builtins(builtin_lookups, all_var_lookups, all_var_column, all_sql_vars, var_to_construct))
|
|
1887
|
+
|
|
1888
|
+
froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
|
|
1889
|
+
|
|
1620
1890
|
wheres.extend(builtin_wheres)
|
|
1621
1891
|
|
|
1622
1892
|
construct_wheres = self._process_constructs(lookups, var_lookups, var_column, sql_vars, builtin_vars,
|
|
@@ -1728,12 +1998,24 @@ class ModelToSQL:
|
|
|
1728
1998
|
else:
|
|
1729
1999
|
return str(resolved) if isinstance(var, ir.Var) or not quote_strings else f"'{resolved}'"
|
|
1730
2000
|
|
|
1731
|
-
def
|
|
2001
|
+
def _get_update_aliases(self, update: ir.Update, var_to_construct, var_to_union, skip_type:bool=False):
|
|
2002
|
+
relation = update.relation
|
|
2003
|
+
return [
|
|
2004
|
+
self._get_alias(
|
|
2005
|
+
self._var_name(relation.id, f),
|
|
2006
|
+
arg,
|
|
2007
|
+
self._convert_type(f.type) if not skip_type else None,
|
|
2008
|
+
var_to_construct,
|
|
2009
|
+
var_to_union,
|
|
2010
|
+
)
|
|
2011
|
+
for f, arg in zip(relation.fields, update.args)
|
|
2012
|
+
]
|
|
2013
|
+
|
|
2014
|
+
def _get_alias(self, key, arg, arg_type, var_to_construct, var_to_union):
|
|
1732
2015
|
if not isinstance(arg, ir.Var):
|
|
1733
|
-
return key,
|
|
2016
|
+
return OutputVar(arg, key, arg_type)
|
|
1734
2017
|
|
|
1735
|
-
|
|
1736
|
-
return (key, arg, var_task) if var_task else (key, arg)
|
|
2018
|
+
return OutputVar(arg, key, arg_type, var_to_construct.get(arg) or var_to_union.get(arg))
|
|
1737
2019
|
|
|
1738
2020
|
def _get_tuples(self, logical: ir.Logical, u: ir.Update):
|
|
1739
2021
|
"""
|
|
@@ -1763,12 +2045,12 @@ class ModelToSQL:
|
|
|
1763
2045
|
if isinstance(val, ir.Var):
|
|
1764
2046
|
inner_construct = find_construct(val)
|
|
1765
2047
|
if inner_construct:
|
|
1766
|
-
nested = [str(self._convert_value(x
|
|
2048
|
+
nested = [str(self._convert_value(x)) for x in inner_construct.values]
|
|
1767
2049
|
resolved.append(f"hash({', '.join(nested)})")
|
|
1768
2050
|
else:
|
|
1769
|
-
resolved.append(self._convert_value(val
|
|
2051
|
+
resolved.append(str(self._convert_value(val)))
|
|
1770
2052
|
else:
|
|
1771
|
-
resolved.append(self._convert_value(val
|
|
2053
|
+
resolved.append(str(self._convert_value(val)))
|
|
1772
2054
|
|
|
1773
2055
|
return f"hash({', '.join(resolved)})"
|
|
1774
2056
|
elif isinstance(arg, FrozenOrderedSet):
|
|
@@ -1791,7 +2073,7 @@ class ModelToSQL:
|
|
|
1791
2073
|
tuples = [prev + [value] for prev in tuples]
|
|
1792
2074
|
return [tuple(t) for t in tuples]
|
|
1793
2075
|
|
|
1794
|
-
def _convert_value(self, v,
|
|
2076
|
+
def _convert_value(self, v, quote_strings:bool=True) -> str|int:
|
|
1795
2077
|
""" Convert the literal value in v to a SQL value."""
|
|
1796
2078
|
if isinstance(v, str):
|
|
1797
2079
|
return f"'{v}'" if quote_strings else v
|
|
@@ -1804,7 +2086,7 @@ class ModelToSQL:
|
|
|
1804
2086
|
return f"cast('{v.value}' as date)"
|
|
1805
2087
|
if v.type == types.DateTime:
|
|
1806
2088
|
return f"cast('{v.value}' as datetime)"
|
|
1807
|
-
return self._convert_value(v.value,
|
|
2089
|
+
return self._convert_value(v.value, quote_strings)
|
|
1808
2090
|
if isinstance(v, float):
|
|
1809
2091
|
if math.isnan(v):
|
|
1810
2092
|
return "cast('NaN' as DOUBLE)"
|
|
@@ -1812,7 +2094,7 @@ class ModelToSQL:
|
|
|
1812
2094
|
return "cast('Infinity' as DOUBLE)"
|
|
1813
2095
|
elif v == float("-inf"):
|
|
1814
2096
|
return "cast('-Infinity' as DOUBLE)"
|
|
1815
|
-
return str(v)
|
|
2097
|
+
return str(v)
|
|
1816
2098
|
if isinstance(v, datetime.datetime):
|
|
1817
2099
|
return f"cast('{v}' as datetime)"
|
|
1818
2100
|
if isinstance(v, datetime.date):
|
|
@@ -1820,7 +2102,7 @@ class ModelToSQL:
|
|
|
1820
2102
|
if isinstance(v, bool):
|
|
1821
2103
|
return str(v).lower()
|
|
1822
2104
|
if isinstance(v, int):
|
|
1823
|
-
return v
|
|
2105
|
+
return v
|
|
1824
2106
|
return str(v)
|
|
1825
2107
|
|
|
1826
2108
|
COMMON_CONVERSION = {
|
|
@@ -1861,21 +2143,27 @@ class ModelToSQL:
|
|
|
1861
2143
|
return f"DECIMAL({base_type.precision},{base_type.scale})"
|
|
1862
2144
|
raise Exception(f"Unknown built-in type: {t}")
|
|
1863
2145
|
|
|
1864
|
-
def _get_relations(self, model: ir.Model) -> list[ir.Relation]:
|
|
2146
|
+
def _get_relations(self, model: ir.Model) -> Tuple[list[ir.Relation], list[ir.Relation]]:
|
|
2147
|
+
rw = ReadWriteVisitor()
|
|
2148
|
+
model.accept(rw)
|
|
2149
|
+
|
|
2150
|
+
root = cast(ir.Logical, model.root)
|
|
2151
|
+
|
|
2152
|
+
# For query compilation exclude read-only tables because we do not need to declare `CREATE TABLE` statements
|
|
2153
|
+
used_relations = rw.writes(root) if self._query_compilation else rw.writes(root) | rw.reads(root)
|
|
2154
|
+
|
|
1865
2155
|
# Filter only relations that require table creation
|
|
1866
|
-
|
|
1867
|
-
r for r in
|
|
2156
|
+
table_relations = [
|
|
2157
|
+
r for r in used_relations
|
|
1868
2158
|
if self._is_table_creation_required(r)
|
|
1869
2159
|
]
|
|
1870
2160
|
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
writable = rw.writes(cast(ir.Logical, model.root))
|
|
1876
|
-
relations = [r for r in relations if r in writable]
|
|
2161
|
+
used_builtins = [
|
|
2162
|
+
r for r in rw.reads(root)
|
|
2163
|
+
if builtins.is_builtin(r)
|
|
2164
|
+
]
|
|
1877
2165
|
|
|
1878
|
-
return
|
|
2166
|
+
return table_relations, used_builtins
|
|
1879
2167
|
|
|
1880
2168
|
def _is_table_creation_required(self, r: ir.Relation) -> bool:
|
|
1881
2169
|
"""
|
|
@@ -1932,9 +2220,26 @@ class ModelToSQL:
|
|
|
1932
2220
|
if helpers.is_external(r):
|
|
1933
2221
|
self.relation_name_cache.get_name(r.id, r.name)
|
|
1934
2222
|
|
|
2223
|
+
def _get_relation_info(self, relation: ir.Relation) -> RelationInfo:
|
|
2224
|
+
if relation not in self.relation_infos:
|
|
2225
|
+
self.relation_infos[relation] = RelationInfo()
|
|
2226
|
+
return self.relation_infos[relation]
|
|
2227
|
+
|
|
2228
|
+
def mark_used(self, relation: ir.Relation):
|
|
2229
|
+
self._get_relation_info(relation).used = True
|
|
2230
|
+
|
|
2231
|
+
def add_table_select(self, relation: ir.Relation, select: sql.Select):
|
|
2232
|
+
self._get_relation_info(relation).table_selects.append(select)
|
|
2233
|
+
|
|
2234
|
+
def add_view_select(self, relation: ir.Relation, select: sql.Select):
|
|
2235
|
+
self._get_relation_info(relation).view_selects.append(select)
|
|
2236
|
+
|
|
2237
|
+
def add_dynamic_table_select(self, relation: ir.Relation, select: sql.Select):
|
|
2238
|
+
self._get_relation_info(relation).dynamic_table_selects.append(select)
|
|
2239
|
+
|
|
1935
2240
|
def _var_name(self, relation_id: int, arg: Union[ir.Var, ir.Field]):
|
|
1936
2241
|
name = helpers.sanitize(self.relation_arg_name_cache.get_name((relation_id, arg.id), arg.name))
|
|
1937
|
-
return f'"{name}"' if name.lower() in
|
|
2242
|
+
return f'"{name}"' if name.lower() in {"any", "order"} else name
|
|
1938
2243
|
|
|
1939
2244
|
def _register_relation_args(self, relations: list[ir.Relation]):
|
|
1940
2245
|
"""
|
|
@@ -1960,11 +2265,13 @@ class ModelToSQL:
|
|
|
1960
2265
|
def _build_builtin_lookups_dependencies(lookups: list[ir.Lookup]) -> list[Tuple[ir.Lookup, ir.Lookup]]:
|
|
1961
2266
|
"""
|
|
1962
2267
|
Builds dependency edges for topological_sort:
|
|
1963
|
-
1.
|
|
1964
|
-
2.
|
|
1965
|
-
3.
|
|
1966
|
-
4. eq with
|
|
1967
|
-
5.
|
|
2268
|
+
1. Terminal comparisons (neq, gt, lt, gte, lte) come last.
|
|
2269
|
+
2. Conditionals (starts_with, contains, etc.) come after basic lookups but before terminals.
|
|
2270
|
+
3. eq with only constants comes first.
|
|
2271
|
+
4. eq with two vars must wait until one of them is grounded.
|
|
2272
|
+
5. A lookup whose last argument is used non-terminally in another must come first.
|
|
2273
|
+
6. For builtins that take multiple input arguments (like range, concat, substring, etc.),
|
|
2274
|
+
ensure that all non-terminal arguments are processed before the builtin that consumes them.
|
|
1968
2275
|
"""
|
|
1969
2276
|
|
|
1970
2277
|
edges = []
|
|
@@ -1988,7 +2295,7 @@ class ModelToSQL:
|
|
|
1988
2295
|
relation_name = lookup.relation.name
|
|
1989
2296
|
args = lookup.args
|
|
1990
2297
|
|
|
1991
|
-
# Rule
|
|
2298
|
+
# Rule 1: Terminal relations depend on everything else
|
|
1992
2299
|
if relation_name in terminal_relations:
|
|
1993
2300
|
for other in lookups:
|
|
1994
2301
|
other_name = other.relation.name
|
|
@@ -1996,7 +2303,7 @@ class ModelToSQL:
|
|
|
1996
2303
|
edges.append((other, lookup))
|
|
1997
2304
|
continue # skip rest of rules for terminal lookups
|
|
1998
2305
|
|
|
1999
|
-
# Rule
|
|
2306
|
+
# Rule 2: Conditional relations go before terminals, but after others
|
|
2000
2307
|
if relation_name in conditional_relations:
|
|
2001
2308
|
for other in lookups:
|
|
2002
2309
|
if other is not lookup:
|
|
@@ -2008,31 +2315,46 @@ class ModelToSQL:
|
|
|
2008
2315
|
if relation_name == builtins.eq.name:
|
|
2009
2316
|
var_args = [arg for arg in args if isinstance(arg, ir.Var)]
|
|
2010
2317
|
|
|
2011
|
-
# Rule
|
|
2318
|
+
# Rule 3: eq with only constants comes first
|
|
2012
2319
|
if len(var_args) == 1:
|
|
2013
2320
|
# This lookup defines a var — should come before any that use this var non-terminally
|
|
2014
2321
|
grounded_var = var_args[0]
|
|
2015
|
-
for
|
|
2016
|
-
if
|
|
2017
|
-
if pos != len(
|
|
2018
|
-
edges.append((lookup,
|
|
2322
|
+
for other, pos in arg_usages[grounded_var]:
|
|
2323
|
+
if other is not lookup:
|
|
2324
|
+
if pos != len(other.args) - 1:
|
|
2325
|
+
edges.append((lookup, other))
|
|
2019
2326
|
continue # skip adding other edges among terminal assignments like a=2, b=2
|
|
2020
2327
|
|
|
2021
|
-
# Rule
|
|
2328
|
+
# Rule 4: eq with two vars must wait until one of them is grounded
|
|
2022
2329
|
elif len(var_args) == 2:
|
|
2023
2330
|
# eq(x, y): both are vars — lookup must come after those grounding either var
|
|
2024
2331
|
for var in var_args:
|
|
2025
|
-
for
|
|
2026
|
-
if
|
|
2027
|
-
if
|
|
2028
|
-
edges.append((
|
|
2332
|
+
for other, pos in arg_usages[var]:
|
|
2333
|
+
if other is not lookup:
|
|
2334
|
+
if other.args[-1] == var:
|
|
2335
|
+
edges.append((other, lookup))
|
|
2029
2336
|
continue
|
|
2030
2337
|
|
|
2031
|
-
#
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2338
|
+
# In generate builtins has a single output var but `split` returns `index` and `part`
|
|
2339
|
+
num_outputs = 2 if lookup.relation == builtins.split else 1
|
|
2340
|
+
|
|
2341
|
+
# Rule 5: last output args must go first if used elsewhere non-terminally
|
|
2342
|
+
for out_arg in args[-num_outputs:]:
|
|
2343
|
+
for other, pos in arg_usages.get(out_arg, []):
|
|
2344
|
+
if other is not lookup and pos != len(other.args) - 1:
|
|
2345
|
+
edges.append((lookup, other))
|
|
2346
|
+
|
|
2347
|
+
# Rule 6: builtins with multiple input args must wait until all input args are grounded,
|
|
2348
|
+
# for example, range(start, end, step, result)
|
|
2349
|
+
if len(args) > num_outputs:
|
|
2350
|
+
for input_arg in args[:-num_outputs]:
|
|
2351
|
+
for other, pos in arg_usages.get(input_arg, []):
|
|
2352
|
+
if other is not lookup:
|
|
2353
|
+
other_name = other.relation.name
|
|
2354
|
+
if other_name not in terminal_relations and other_name not in conditional_relations:
|
|
2355
|
+
# Ensure any lookup that defines this arg (as last) comes before
|
|
2356
|
+
if other.args[-1] == input_arg:
|
|
2357
|
+
edges.append((other, lookup))
|
|
2036
2358
|
|
|
2037
2359
|
return edges
|
|
2038
2360
|
|
|
@@ -2064,8 +2386,9 @@ class ModelToSQL:
|
|
|
2064
2386
|
3. Other statements except SELECT queries
|
|
2065
2387
|
4. SELECT queries
|
|
2066
2388
|
"""
|
|
2389
|
+
udfs = []
|
|
2067
2390
|
create_tables = []
|
|
2068
|
-
need_sort: dict[str, list[Union[sql.Insert, sql.CreateView]]] = defaultdict(list)
|
|
2391
|
+
need_sort: dict[str, list[Union[sql.Insert, sql.CreateView, sql.CreateDynamicTable]]] = defaultdict(list)
|
|
2069
2392
|
updates = []
|
|
2070
2393
|
miscellaneous_statements = []
|
|
2071
2394
|
selects = []
|
|
@@ -2077,19 +2400,23 @@ class ModelToSQL:
|
|
|
2077
2400
|
need_sort[statement.table].append(statement)
|
|
2078
2401
|
elif isinstance(statement, sql.CreateView):
|
|
2079
2402
|
need_sort[statement.name].append(statement)
|
|
2403
|
+
elif isinstance(statement, sql.CreateDynamicTable):
|
|
2404
|
+
need_sort[statement.name].append(statement)
|
|
2080
2405
|
elif isinstance(statement, sql.Update):
|
|
2081
2406
|
updates.append(statement)
|
|
2082
2407
|
elif isinstance(statement, sql.Select):
|
|
2083
2408
|
selects.append(statement)
|
|
2409
|
+
elif isinstance(statement, sql.CreateFunction):
|
|
2410
|
+
udfs.append(statement)
|
|
2084
2411
|
else:
|
|
2085
2412
|
miscellaneous_statements.append(statement)
|
|
2086
2413
|
|
|
2087
2414
|
sorted_statements = self._sort_statements_dependency_graph(need_sort)
|
|
2088
2415
|
|
|
2089
|
-
return create_tables + sorted_statements + updates + miscellaneous_statements + selects
|
|
2416
|
+
return udfs + create_tables + sorted_statements + updates + miscellaneous_statements + selects
|
|
2090
2417
|
|
|
2091
2418
|
@staticmethod
|
|
2092
|
-
def _sort_statements_dependency_graph(statements: dict[str, list[Union[sql.Insert, sql.CreateView]]]) -> list[sql.Insert]:
|
|
2419
|
+
def _sort_statements_dependency_graph(statements: dict[str, list[Union[sql.Insert, sql.CreateView, sql.CreateDynamicTable]]]) -> list[sql.Insert]:
|
|
2093
2420
|
""" Topologic sort INSERT and CREATE VIEW statements based on dependencies in their SELECT FROM clauses. """
|
|
2094
2421
|
edges = ordered_set()
|
|
2095
2422
|
nodes = OrderedSet.from_iterable(statements.keys())
|
|
@@ -2124,16 +2451,17 @@ class ModelToSQL:
|
|
|
2124
2451
|
for sub_expr in expr.expr:
|
|
2125
2452
|
_extract_from_expr(sub_expr)
|
|
2126
2453
|
|
|
2127
|
-
if select.where:
|
|
2454
|
+
if select.where and select.where.expression:
|
|
2128
2455
|
_extract_from_expr(select.where.expression)
|
|
2129
2456
|
|
|
2130
2457
|
for target_table, table_statements in statements.items():
|
|
2131
2458
|
for statement in table_statements:
|
|
2132
|
-
if
|
|
2133
|
-
extract_dependencies(statement.select, target_table)
|
|
2134
|
-
elif isinstance(statement, sql.CreateView):
|
|
2459
|
+
if statement.query:
|
|
2135
2460
|
query = statement.query
|
|
2136
|
-
if isinstance(query,
|
|
2461
|
+
if isinstance(query, list):
|
|
2462
|
+
for sub_query in query:
|
|
2463
|
+
extract_dependencies(sub_query, target_table)
|
|
2464
|
+
elif isinstance(query, sql.Select):
|
|
2137
2465
|
extract_dependencies(query, target_table)
|
|
2138
2466
|
elif isinstance(query, sql.CTE):
|
|
2139
2467
|
for select in query.selects:
|
|
@@ -2158,3 +2486,13 @@ class RecursiveLookupsRewriter(v.Rewriter):
|
|
|
2158
2486
|
if node.relation == self._recursive_relation:
|
|
2159
2487
|
return node.reconstruct(node.engine, self._new_recursive_relation, node.args, node.annotations)
|
|
2160
2488
|
return node
|
|
2489
|
+
|
|
2490
|
+
class DerivedRelationsVisitor(v.Visitor):
|
|
2491
|
+
_is_derived: bool = True
|
|
2492
|
+
|
|
2493
|
+
def is_derived(self) -> bool:
|
|
2494
|
+
return self._is_derived
|
|
2495
|
+
|
|
2496
|
+
def visit_relation(self, node: ir.Relation, parent: Optional[ir.Node]):
|
|
2497
|
+
if self._is_derived and from_cdc_annotation in node.annotations:
|
|
2498
|
+
self._is_derived = False
|