relationalai 0.13.2__py3-none-any.whl → 0.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relationalai/clients/client.py +3 -4
- relationalai/clients/exec_txn_poller.py +62 -31
- relationalai/clients/resources/snowflake/direct_access_resources.py +6 -5
- relationalai/clients/resources/snowflake/snowflake.py +47 -51
- relationalai/semantics/lqp/algorithms.py +173 -0
- relationalai/semantics/lqp/builtins.py +199 -2
- relationalai/semantics/lqp/executor.py +65 -36
- relationalai/semantics/lqp/ir.py +28 -2
- relationalai/semantics/lqp/model2lqp.py +215 -45
- relationalai/semantics/lqp/passes.py +13 -658
- relationalai/semantics/lqp/rewrite/__init__.py +12 -0
- relationalai/semantics/lqp/rewrite/algorithm.py +385 -0
- relationalai/semantics/lqp/rewrite/constants_to_vars.py +70 -0
- relationalai/semantics/lqp/rewrite/deduplicate_vars.py +104 -0
- relationalai/semantics/lqp/rewrite/eliminate_data.py +108 -0
- relationalai/semantics/lqp/rewrite/period_math.py +77 -0
- relationalai/semantics/lqp/rewrite/quantify_vars.py +65 -31
- relationalai/semantics/lqp/rewrite/unify_definitions.py +317 -0
- relationalai/semantics/lqp/utils.py +11 -1
- relationalai/semantics/lqp/validators.py +14 -1
- relationalai/semantics/metamodel/builtins.py +2 -1
- relationalai/semantics/metamodel/compiler.py +2 -1
- relationalai/semantics/metamodel/dependency.py +12 -3
- relationalai/semantics/metamodel/executor.py +11 -1
- relationalai/semantics/metamodel/factory.py +2 -2
- relationalai/semantics/metamodel/helpers.py +7 -0
- relationalai/semantics/metamodel/ir.py +3 -2
- relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +30 -20
- relationalai/semantics/metamodel/rewrite/flatten.py +50 -13
- relationalai/semantics/metamodel/rewrite/format_outputs.py +9 -3
- relationalai/semantics/metamodel/typer/checker.py +6 -4
- relationalai/semantics/metamodel/typer/typer.py +2 -5
- relationalai/semantics/metamodel/visitor.py +4 -3
- relationalai/semantics/reasoners/optimization/solvers_dev.py +1 -1
- relationalai/semantics/reasoners/optimization/solvers_pb.py +3 -4
- relationalai/semantics/rel/compiler.py +2 -1
- relationalai/semantics/rel/executor.py +3 -2
- relationalai/semantics/tests/lqp/__init__.py +0 -0
- relationalai/semantics/tests/lqp/algorithms.py +345 -0
- relationalai/tools/cli_controls.py +216 -67
- relationalai/util/format.py +5 -2
- {relationalai-0.13.2.dist-info → relationalai-0.13.3.dist-info}/METADATA +1 -1
- {relationalai-0.13.2.dist-info → relationalai-0.13.3.dist-info}/RECORD +46 -37
- {relationalai-0.13.2.dist-info → relationalai-0.13.3.dist-info}/WHEEL +0 -0
- {relationalai-0.13.2.dist-info → relationalai-0.13.3.dist-info}/entry_points.txt +0 -0
- {relationalai-0.13.2.dist-info → relationalai-0.13.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from relationalai.semantics.metamodel.compiler import Pass
|
|
2
|
+
from relationalai.semantics.metamodel import ir, builtins as rel_builtins, factory as f, visitor
|
|
3
|
+
|
|
4
|
+
from typing import cast
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import hashlib
|
|
7
|
+
|
|
8
|
+
# Creates intermediary relations for all Data nodes and replaces said Data nodes
|
|
9
|
+
# with a Lookup into these created relations. Reuse duplicate created relations.
|
|
10
|
+
class EliminateData(Pass):
|
|
11
|
+
def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
|
|
12
|
+
r = self.DataRewriter()
|
|
13
|
+
return r.walk(model)
|
|
14
|
+
|
|
15
|
+
# Does the actual work.
|
|
16
|
+
class DataRewriter(visitor.Rewriter):
|
|
17
|
+
new_relations: list[ir.Relation]
|
|
18
|
+
new_updates: list[ir.Logical]
|
|
19
|
+
# Counter for naming new relations.
|
|
20
|
+
# It must be that new_count == len new_updates == len new_relations.
|
|
21
|
+
new_count: int
|
|
22
|
+
# Cache for Data nodes to avoid creating duplicate intermediary relations
|
|
23
|
+
data_cache: dict[str, ir.Relation]
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
self.new_relations = []
|
|
27
|
+
self.new_updates = []
|
|
28
|
+
self.new_count = 0
|
|
29
|
+
self.data_cache = {}
|
|
30
|
+
super().__init__()
|
|
31
|
+
|
|
32
|
+
# Create a cache key for a Data node based on its structure and content
|
|
33
|
+
def _data_cache_key(self, node: ir.Data) -> str:
|
|
34
|
+
values = pd.util.hash_pandas_object(node.data).values
|
|
35
|
+
return hashlib.sha256(bytes(values)).hexdigest()
|
|
36
|
+
|
|
37
|
+
def _intermediary_relation(self, node: ir.Data) -> ir.Relation:
|
|
38
|
+
cache_key = self._data_cache_key(node)
|
|
39
|
+
if cache_key in self.data_cache:
|
|
40
|
+
return self.data_cache[cache_key]
|
|
41
|
+
self.new_count += 1
|
|
42
|
+
intermediary_name = f"formerly_Data_{self.new_count}"
|
|
43
|
+
|
|
44
|
+
intermediary_relation = f.relation(
|
|
45
|
+
intermediary_name,
|
|
46
|
+
[f.field(v.name, v.type) for v in node.vars]
|
|
47
|
+
)
|
|
48
|
+
self.new_relations.append(intermediary_relation)
|
|
49
|
+
|
|
50
|
+
intermediary_update = f.logical([
|
|
51
|
+
# For each row (union), equate values and their variable (logical).
|
|
52
|
+
f.union(
|
|
53
|
+
[
|
|
54
|
+
f.logical(
|
|
55
|
+
[
|
|
56
|
+
f.lookup(rel_builtins.eq, [f.literal(val, var.type), var])
|
|
57
|
+
for (val, var) in zip(row, node.vars)
|
|
58
|
+
],
|
|
59
|
+
)
|
|
60
|
+
for row in node
|
|
61
|
+
],
|
|
62
|
+
hoisted = node.vars,
|
|
63
|
+
),
|
|
64
|
+
# And pop it back into the relation.
|
|
65
|
+
f.update(intermediary_relation, node.vars, ir.Effect.derive),
|
|
66
|
+
])
|
|
67
|
+
self.new_updates.append(intermediary_update)
|
|
68
|
+
|
|
69
|
+
# Cache the result for reuse
|
|
70
|
+
self.data_cache[cache_key] = intermediary_relation
|
|
71
|
+
|
|
72
|
+
return intermediary_relation
|
|
73
|
+
|
|
74
|
+
# Create a new intermediary relation representing the Data (and pop it in
|
|
75
|
+
# new_updates/new_relations) and replace this Data with a Lookup of said
|
|
76
|
+
# intermediary.
|
|
77
|
+
def handle_data(self, node: ir.Data, parent: ir.Node) -> ir.Lookup:
|
|
78
|
+
intermediary_relation = self._intermediary_relation(node)
|
|
79
|
+
replacement_lookup = f.lookup(intermediary_relation, node.vars)
|
|
80
|
+
|
|
81
|
+
return replacement_lookup
|
|
82
|
+
|
|
83
|
+
# Walks the model for the handle_data work then updates the model with
|
|
84
|
+
# the new state.
|
|
85
|
+
def handle_model(self, model: ir.Model, parent: None):
|
|
86
|
+
walked_model = super().handle_model(model, parent)
|
|
87
|
+
assert len(self.new_relations) == len(self.new_updates) and self.new_count == len(self.new_relations)
|
|
88
|
+
|
|
89
|
+
# This is okay because its LQP.
|
|
90
|
+
assert isinstance(walked_model.root, ir.Logical)
|
|
91
|
+
root_logical = cast(ir.Logical, walked_model.root)
|
|
92
|
+
|
|
93
|
+
# We may need to add the new intermediaries from handle_data to the model.
|
|
94
|
+
if self.new_count == 0:
|
|
95
|
+
return model
|
|
96
|
+
else:
|
|
97
|
+
return ir.Model(
|
|
98
|
+
walked_model.engines,
|
|
99
|
+
walked_model.relations | self.new_relations,
|
|
100
|
+
walked_model.types,
|
|
101
|
+
ir.Logical(
|
|
102
|
+
root_logical.engine,
|
|
103
|
+
root_logical.hoisted,
|
|
104
|
+
root_logical.body + tuple(self.new_updates),
|
|
105
|
+
root_logical.annotations,
|
|
106
|
+
),
|
|
107
|
+
walked_model.annotations,
|
|
108
|
+
)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from relationalai.semantics.metamodel.compiler import Pass
|
|
2
|
+
from relationalai.semantics.metamodel import ir, builtins as rel_builtins, factory as f, visitor
|
|
3
|
+
from relationalai.semantics.metamodel import types
|
|
4
|
+
|
|
5
|
+
# Generate date arithmetic expressions, such as
|
|
6
|
+
# `rel_primitive_date_add(:day, [date] delta, res_2)` by finding the period
|
|
7
|
+
# expression for the delta and adding the period type to the date arithmetic expression.
|
|
8
|
+
#
|
|
9
|
+
# date_add and it's kin are generated by a period expression, e.g.,
|
|
10
|
+
# `day(delta, res_1)`
|
|
11
|
+
# followed by the date arithmetic expression using the period
|
|
12
|
+
# `date_add([date] res_1 res_2)`
|
|
13
|
+
class PeriodMath(Pass):
|
|
14
|
+
def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
|
|
15
|
+
period_rewriter = self.PeriodRewriter()
|
|
16
|
+
model = period_rewriter.walk(model)
|
|
17
|
+
period_math_rewriter = self.PeriodMathRewriter(period_rewriter.period_vars)
|
|
18
|
+
model = period_math_rewriter.walk(model)
|
|
19
|
+
return model
|
|
20
|
+
|
|
21
|
+
# Find all period builtins. We need to make them safe for the emitter (either by
|
|
22
|
+
# translating to a cast, or removing) and store the variable and period type for use
|
|
23
|
+
# in the date/datetime add/subtract expressions.
|
|
24
|
+
class PeriodRewriter(visitor.Rewriter):
|
|
25
|
+
def __init__(self):
|
|
26
|
+
super().__init__()
|
|
27
|
+
self.period_vars: dict[ir.Var, str] = {}
|
|
28
|
+
|
|
29
|
+
def handle_lookup(self, node: ir.Lookup, parent: ir.Node) -> ir.Lookup:
|
|
30
|
+
if not rel_builtins.is_builtin(node.relation):
|
|
31
|
+
return node
|
|
32
|
+
|
|
33
|
+
if node.relation.name not in {
|
|
34
|
+
"year", "month", "week", "day", "hour", "minute", "second", "millisecond", "microsecond", "nanosecond"
|
|
35
|
+
}:
|
|
36
|
+
return node
|
|
37
|
+
|
|
38
|
+
assert len(node.args) == 2, "Expect 2 arguments for period builtins"
|
|
39
|
+
assert isinstance(node.args[1], ir.Var), "Expect result to be a variable"
|
|
40
|
+
period = node.relation.name
|
|
41
|
+
result_var = node.args[1]
|
|
42
|
+
self.period_vars[result_var] = period
|
|
43
|
+
|
|
44
|
+
# Ideally we could now remove the unused and unhandled period type construction
|
|
45
|
+
# but we may also need to cast the original variable to an Int64 for use by the
|
|
46
|
+
# date/datetime add/subtract expressions.
|
|
47
|
+
# TODO: Remove the node entirely where possible and update uses of the result
|
|
48
|
+
return f.lookup(rel_builtins.cast, [types.Int64, node.args[0], result_var])
|
|
49
|
+
|
|
50
|
+
# Update date/datetime add/subtract expressions with period information.
|
|
51
|
+
class PeriodMathRewriter(visitor.Rewriter):
|
|
52
|
+
def __init__(self, period_vars: dict[ir.Var, str]):
|
|
53
|
+
super().__init__()
|
|
54
|
+
self.period_vars: dict[ir.Var, str] = period_vars
|
|
55
|
+
|
|
56
|
+
def handle_lookup(self, node: ir.Lookup, parent: ir.Node) -> ir.Lookup:
|
|
57
|
+
if not rel_builtins.is_builtin(node.relation):
|
|
58
|
+
return node
|
|
59
|
+
|
|
60
|
+
if node.relation.name not in {
|
|
61
|
+
"date_add", "date_subtract", "datetime_add", "datetime_subtract"
|
|
62
|
+
}:
|
|
63
|
+
return node
|
|
64
|
+
|
|
65
|
+
if len(node.args) == 4:
|
|
66
|
+
# We've already visited this lookup
|
|
67
|
+
return node
|
|
68
|
+
|
|
69
|
+
assert isinstance(node.args[1], ir.Var), "Expect period to be a variable"
|
|
70
|
+
period_var = node.args[1]
|
|
71
|
+
assert period_var in self.period_vars, "datemath found, but no vars to insert"
|
|
72
|
+
|
|
73
|
+
period = self.period_vars[period_var]
|
|
74
|
+
|
|
75
|
+
new_args = [f.literal(period, types.Symbol)] + [arg for arg in node.args]
|
|
76
|
+
|
|
77
|
+
return f.lookup(node.relation, new_args)
|
|
@@ -69,7 +69,7 @@ class VarScopeInfo(Visitor):
|
|
|
69
69
|
ir.Var, ir.Literal, ir.Relation, ir.Field,
|
|
70
70
|
ir.Default, ir.Output, ir.Update, ir.Aggregate,
|
|
71
71
|
ir.Check, ir.Require,
|
|
72
|
-
ir.Annotation, ir.Rank)
|
|
72
|
+
ir.Annotation, ir.Rank, ir.Break)
|
|
73
73
|
|
|
74
74
|
def __init__(self):
|
|
75
75
|
super().__init__()
|
|
@@ -103,16 +103,29 @@ class VarScopeInfo(Visitor):
|
|
|
103
103
|
self._record(node, scope_vars)
|
|
104
104
|
|
|
105
105
|
elif isinstance(node, (ir.Match, ir.Union)):
|
|
106
|
-
# Match/Union inherits
|
|
106
|
+
# Match/Union only inherits vars if they are in scope for all sub-tasks.
|
|
107
107
|
scope_vars = ordered_set()
|
|
108
|
+
# Prime the search with the first sub-task's vars.
|
|
109
|
+
if node.tasks:
|
|
110
|
+
scope_vars.update(self._vars_in_scope.get(node.tasks[0].id, None))
|
|
111
|
+
|
|
108
112
|
for task in node.tasks:
|
|
109
113
|
sub_scope_vars = self._vars_in_scope.get(task.id, None)
|
|
110
|
-
if sub_scope_vars:
|
|
111
|
-
scope_vars
|
|
114
|
+
if not scope_vars or not sub_scope_vars:
|
|
115
|
+
scope_vars = ordered_set()
|
|
116
|
+
break
|
|
117
|
+
scope_vars = (scope_vars & sub_scope_vars)
|
|
118
|
+
|
|
112
119
|
# Hoisted vars are not considered for quantification at this level.
|
|
113
120
|
scope_vars.difference_update(helpers.hoisted_vars(node.hoisted))
|
|
114
121
|
self._record(node, scope_vars)
|
|
115
122
|
|
|
123
|
+
elif isinstance(node, (ir.Loop, ir.Sequence)):
|
|
124
|
+
# Variables in Loops and Sequences are scoped exclusively within the body and
|
|
125
|
+
# not propagated outside. No need to record any variables, as they shouldn't be
|
|
126
|
+
# in scope for the node itself
|
|
127
|
+
pass
|
|
128
|
+
|
|
116
129
|
elif isinstance(node, ir.Logical):
|
|
117
130
|
self._do_logical(node)
|
|
118
131
|
|
|
@@ -128,6 +141,9 @@ class VarScopeInfo(Visitor):
|
|
|
128
141
|
all_nested_vars = ordered_set()
|
|
129
142
|
output_vars = ordered_set()
|
|
130
143
|
|
|
144
|
+
# Collect variables nested in child Logical and Not nodes
|
|
145
|
+
nested_vars_in_task: dict[ir.Var, int] = dict()
|
|
146
|
+
|
|
131
147
|
# Collect all variables from logical sub-tasks
|
|
132
148
|
for task in node.body:
|
|
133
149
|
if isinstance(task, ir.Output):
|
|
@@ -140,19 +156,29 @@ class VarScopeInfo(Visitor):
|
|
|
140
156
|
scope_vars.add(var)
|
|
141
157
|
continue
|
|
142
158
|
|
|
143
|
-
sub_scope_vars = self._vars_in_scope.get(task.id, None)
|
|
144
|
-
|
|
145
159
|
# Hoisted variables from sub-tasks are brought again into scope.
|
|
146
160
|
if isinstance(task, (ir.Logical, ir.Union, ir.Match)):
|
|
147
161
|
scope_vars.update(helpers.hoisted_vars(task.hoisted))
|
|
148
162
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
163
|
+
# Get variables in sub-task scope
|
|
164
|
+
sub_scope_vars = self._vars_in_scope.get(task.id, ordered_set())
|
|
165
|
+
|
|
166
|
+
if isinstance(task, ir.Logical):
|
|
167
|
+
# Logical child nodes should have their nested variables quantified
|
|
168
|
+
# only if they are needed in more than one child task.
|
|
169
|
+
for var in sub_scope_vars:
|
|
170
|
+
if var not in nested_vars_in_task:
|
|
171
|
+
nested_vars_in_task[var] = 0
|
|
172
|
+
nested_vars_in_task[var] += 1
|
|
173
|
+
elif not isinstance(task, ir.Not):
|
|
174
|
+
# Other nodes with nested variables need to be quantified at this level
|
|
175
|
+
scope_vars.update(sub_scope_vars)
|
|
176
|
+
|
|
177
|
+
for v, c in nested_vars_in_task.items():
|
|
178
|
+
# If the variable appears in more than one nested child, then it needs to be
|
|
179
|
+
# quantified here. Otherwise, it will be handled in the child node
|
|
180
|
+
if c > 1:
|
|
181
|
+
all_nested_vars.add(v)
|
|
156
182
|
|
|
157
183
|
# Nested variables also need to be introduced, provided they are not output variables.
|
|
158
184
|
for var in all_nested_vars:
|
|
@@ -190,37 +216,30 @@ class FindQuantificationNodes(Visitor):
|
|
|
190
216
|
def __init__(self, var_info: VarScopeInfo):
|
|
191
217
|
super().__init__()
|
|
192
218
|
self._vars_in_scope = var_info._vars_in_scope
|
|
193
|
-
self.
|
|
219
|
+
self.handled_vars: dict[int, OrderedSet[ir.Var]] = {}
|
|
194
220
|
self.node_quantifies_vars = {}
|
|
195
221
|
|
|
196
222
|
def enter(self, node: ir.Node, parent: Optional[ir.Node]=None) -> "Visitor":
|
|
197
223
|
if contains_only_declarable_constraints(node):
|
|
198
224
|
return self
|
|
199
225
|
|
|
226
|
+
handled_vars = self.handled_vars.get(parent.id, ordered_set()) if parent else ordered_set()
|
|
227
|
+
# Clone the set to avoid modifying parent's handled vars
|
|
228
|
+
handled_vars = OrderedSet.from_iterable(handled_vars)
|
|
229
|
+
|
|
200
230
|
if isinstance(node, (ir.Logical, ir.Not)):
|
|
201
231
|
ignored_vars = _ignored_vars(node)
|
|
202
|
-
|
|
232
|
+
handled_vars.update(ignored_vars)
|
|
203
233
|
|
|
204
234
|
scope_vars = self._vars_in_scope.get(node.id, None)
|
|
205
235
|
if scope_vars:
|
|
206
|
-
scope_vars.difference_update(
|
|
236
|
+
scope_vars.difference_update(handled_vars)
|
|
207
237
|
if scope_vars:
|
|
208
|
-
|
|
238
|
+
handled_vars.update(scope_vars)
|
|
209
239
|
self.node_quantifies_vars[node.id] = scope_vars
|
|
210
|
-
return self
|
|
211
|
-
|
|
212
|
-
def leave(self, node: ir.Node, parent: Optional[ir.Node]=None) -> ir.Node:
|
|
213
|
-
if contains_only_declarable_constraints(node):
|
|
214
|
-
return node
|
|
215
240
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
self._handled_vars.difference_update(ignored_vars)
|
|
219
|
-
|
|
220
|
-
scope_vars = self._vars_in_scope.get(node.id, None)
|
|
221
|
-
if scope_vars:
|
|
222
|
-
self._handled_vars.difference_update(scope_vars)
|
|
223
|
-
return node
|
|
241
|
+
self.handled_vars[node.id] = handled_vars
|
|
242
|
+
return self
|
|
224
243
|
|
|
225
244
|
class QuantifyVarsRewriter(Rewriter):
|
|
226
245
|
"""
|
|
@@ -254,7 +273,12 @@ class QuantifyVarsRewriter(Rewriter):
|
|
|
254
273
|
# in IR directly may do so and the flatten pass doesn't split them yet.
|
|
255
274
|
if len(agg_or_rank_tasks) > 0:
|
|
256
275
|
print(f"Multiple aggregate/rank tasks found: {agg_or_rank_tasks} and {task}")
|
|
257
|
-
|
|
276
|
+
# If the agg/rank depends on any of the vars being quantified here,
|
|
277
|
+
# then it needs to be inside the quantification
|
|
278
|
+
if any(var in helpers.vars(task.projection) for var in vars):
|
|
279
|
+
inner_tasks.append(task)
|
|
280
|
+
else:
|
|
281
|
+
agg_or_rank_tasks.append(task)
|
|
258
282
|
|
|
259
283
|
else:
|
|
260
284
|
inner_tasks.append(task)
|
|
@@ -283,6 +307,16 @@ class QuantifyVarsRewriter(Rewriter):
|
|
|
283
307
|
|
|
284
308
|
return node if node.task is new_task else f.not_(new_task)
|
|
285
309
|
|
|
310
|
+
def handle_union(self, node: ir.Union, parent: ir.Node, ctx:Optional[Any]=None) -> ir.Union:
|
|
311
|
+
if not node.tasks:
|
|
312
|
+
return node
|
|
313
|
+
|
|
314
|
+
new_tasks = self.walk_list(node.tasks, node)
|
|
315
|
+
return node if node.tasks is new_tasks else f.union(
|
|
316
|
+
tasks = new_tasks,
|
|
317
|
+
hoisted = node.hoisted,
|
|
318
|
+
)
|
|
319
|
+
|
|
286
320
|
# To avoid unnecessary cloning of vars in the visitor.
|
|
287
321
|
def handle_var(self, node: ir.Var, parent: ir.Node, ctx:Optional[Any]=None) -> ir.Var:
|
|
288
322
|
return node
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
from relationalai.semantics.metamodel.compiler import Pass
|
|
2
|
+
from relationalai.semantics.metamodel import ir, builtins as rel_builtins, factory as f, visitor
|
|
3
|
+
from relationalai.semantics.metamodel.typer import typer
|
|
4
|
+
from relationalai.semantics.metamodel import helpers
|
|
5
|
+
from relationalai.semantics.metamodel.util import FrozenOrderedSet, OrderedSet
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from typing import cast, Union, Optional, Iterable
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
|
|
11
|
+
# LQP does not support multiple definitions for the same relation. This pass unifies all
|
|
12
|
+
# definitions for each relation into a single definition using a union.
|
|
13
|
+
class UnifyDefinitions(Pass):
|
|
14
|
+
def __init__(self):
|
|
15
|
+
super().__init__()
|
|
16
|
+
|
|
17
|
+
def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
|
|
18
|
+
# Maintain a cache of renamings for each relation. These need to be consistent
|
|
19
|
+
# across all definitions of the same relation.
|
|
20
|
+
self.renamed_relation_args: dict[Union[ir.Value, ir.Relation], list[ir.Var]] = {}
|
|
21
|
+
|
|
22
|
+
root = cast(ir.Logical, model.root)
|
|
23
|
+
new_tasks = self.get_combined_multidefs(root)
|
|
24
|
+
return ir.Model(
|
|
25
|
+
model.engines,
|
|
26
|
+
model.relations,
|
|
27
|
+
model.types,
|
|
28
|
+
f.logical(
|
|
29
|
+
tuple(new_tasks),
|
|
30
|
+
root.hoisted,
|
|
31
|
+
root.engine,
|
|
32
|
+
),
|
|
33
|
+
model.annotations,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def _get_heads(self, logical: ir.Logical) -> list[Union[ir.Update, ir.Output]]:
|
|
37
|
+
derives = []
|
|
38
|
+
for task in logical.body:
|
|
39
|
+
if isinstance(task, ir.Update) and task.effect == ir.Effect.derive:
|
|
40
|
+
derives.append(task)
|
|
41
|
+
elif isinstance(task, ir.Output):
|
|
42
|
+
derives.append(task)
|
|
43
|
+
return derives
|
|
44
|
+
|
|
45
|
+
def _get_non_heads(self, logical: ir.Logical) -> list[ir.Task]:
|
|
46
|
+
non_derives = []
|
|
47
|
+
for task in logical.body:
|
|
48
|
+
if not(isinstance(task, ir.Update) and task.effect == ir.Effect.derive) and not isinstance(task, ir.Output):
|
|
49
|
+
non_derives.append(task)
|
|
50
|
+
return non_derives
|
|
51
|
+
|
|
52
|
+
def _get_head_identifier(self, head: Union[ir.Update, ir.Output]) -> Optional[ir.Value]:
|
|
53
|
+
if isinstance(head, ir.Update):
|
|
54
|
+
return head.relation
|
|
55
|
+
else:
|
|
56
|
+
assert isinstance(head, ir.Output)
|
|
57
|
+
if len(head.aliases) <= 2:
|
|
58
|
+
# For processing here, we need output to have at least the column markers
|
|
59
|
+
# `cols` and `col`, and also a key
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
output_alias_names = helpers.output_alias_names(head.aliases)
|
|
63
|
+
output_vals = helpers.output_values(head.aliases)
|
|
64
|
+
|
|
65
|
+
# For normal outputs, the pattern is output[keys](cols, "col000" as 'col', ...)
|
|
66
|
+
if output_alias_names[0] == "cols" and output_alias_names[1] == "col":
|
|
67
|
+
return output_vals[1]
|
|
68
|
+
|
|
69
|
+
# For exports, the pattern is output[keys]("col000" as 'col', ...)
|
|
70
|
+
if helpers.is_export(head):
|
|
71
|
+
if output_alias_names[0] == "col":
|
|
72
|
+
return output_vals[0]
|
|
73
|
+
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
def get_combined_multidefs(self, root: ir.Logical) -> list[ir.Logical]:
|
|
77
|
+
# Step 1: Group tasks by the relation they define.
|
|
78
|
+
relation_to_tasks: dict[Union[None, ir.Value, ir.Relation], list[ir.Logical]] = defaultdict(list)
|
|
79
|
+
|
|
80
|
+
for task in root.body:
|
|
81
|
+
task = cast(ir.Logical, task)
|
|
82
|
+
task_heads = self._get_heads(task)
|
|
83
|
+
|
|
84
|
+
# Some relations do not need to be grouped, e.g., if they don't contain a
|
|
85
|
+
# derive. Use `None` as a placeholder key for these cases.
|
|
86
|
+
if len(task_heads) != 1:
|
|
87
|
+
relation_to_tasks[None].append(task)
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
head_id = self._get_head_identifier(task_heads[0])
|
|
91
|
+
relation_to_tasks[head_id].append(task)
|
|
92
|
+
|
|
93
|
+
# Step 2: For each relation, combine all of the body definitions into a union.
|
|
94
|
+
result_tasks = []
|
|
95
|
+
for relation, tasks in relation_to_tasks.items():
|
|
96
|
+
# If there's only one task for the relation, or if grouping is not needed, then
|
|
97
|
+
# just keep the original tasks.
|
|
98
|
+
if len(tasks) == 1 or relation is None:
|
|
99
|
+
result_tasks.extend(tasks)
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
result_tasks.append(self._combine_tasks_into_union(tasks))
|
|
103
|
+
return result_tasks
|
|
104
|
+
|
|
105
|
+
def _get_variable_mapping(self, logical: ir.Logical) -> dict[ir.Value, ir.Var]:
|
|
106
|
+
heads = self._get_heads(logical)
|
|
107
|
+
assert len(heads) == 1, "should only have one head in a logical at this stage"
|
|
108
|
+
head = heads[0]
|
|
109
|
+
|
|
110
|
+
var_mapping = {}
|
|
111
|
+
head_id = self._get_head_identifier(head)
|
|
112
|
+
|
|
113
|
+
if isinstance(head, ir.Update):
|
|
114
|
+
args_for_renaming = head.args
|
|
115
|
+
else:
|
|
116
|
+
assert isinstance(head, ir.Output)
|
|
117
|
+
output_alias_names = helpers.output_alias_names(head.aliases)
|
|
118
|
+
if output_alias_names[0] == "cols" and output_alias_names[1] == "col":
|
|
119
|
+
assert len(head.aliases) > 2
|
|
120
|
+
|
|
121
|
+
# For outputs, we do not need to rename the `cols` and `col` markers or the
|
|
122
|
+
# keys.
|
|
123
|
+
output_values = helpers.output_values(head.aliases)[2:]
|
|
124
|
+
|
|
125
|
+
else:
|
|
126
|
+
assert helpers.is_export(head) and output_alias_names[0] == "col"
|
|
127
|
+
assert len(head.aliases) > 1
|
|
128
|
+
|
|
129
|
+
# For exports, we do not need to rename the `col` marker or the keys.
|
|
130
|
+
output_values = helpers.output_values(head.aliases)[1:]
|
|
131
|
+
|
|
132
|
+
args_for_renaming = []
|
|
133
|
+
for v in output_values:
|
|
134
|
+
if head.keys and isinstance(v, ir.Var) and v in head.keys:
|
|
135
|
+
continue
|
|
136
|
+
args_for_renaming.append(v)
|
|
137
|
+
|
|
138
|
+
if head_id not in self.renamed_relation_args:
|
|
139
|
+
renamed_vars = []
|
|
140
|
+
for (i, arg) in enumerate(args_for_renaming):
|
|
141
|
+
typ = typer.to_type(arg)
|
|
142
|
+
assert arg not in var_mapping, "args of update should be unique"
|
|
143
|
+
if isinstance(arg, ir.Var):
|
|
144
|
+
var_mapping[arg] = ir.Var(typ, arg.name)
|
|
145
|
+
else:
|
|
146
|
+
var_mapping[arg] = ir.Var(typ, f"arg_{i}")
|
|
147
|
+
|
|
148
|
+
renamed_vars.append(var_mapping[arg])
|
|
149
|
+
self.renamed_relation_args[head_id] = renamed_vars
|
|
150
|
+
else:
|
|
151
|
+
for (arg, var) in zip(args_for_renaming, self.renamed_relation_args[head_id]):
|
|
152
|
+
var_mapping[arg] = var
|
|
153
|
+
|
|
154
|
+
return var_mapping
|
|
155
|
+
|
|
156
|
+
def _rename_variables(self, logical: ir.Logical) -> ir.Logical:
|
|
157
|
+
class RenameVisitor(visitor.Rewriter):
|
|
158
|
+
def __init__(self, var_mapping: dict[ir.Value, ir.Var]):
|
|
159
|
+
super().__init__()
|
|
160
|
+
self.var_mapping = var_mapping
|
|
161
|
+
|
|
162
|
+
def _get_mapped_value(self, val: ir.Value) -> ir.Value:
|
|
163
|
+
if isinstance(val, tuple):
|
|
164
|
+
return tuple(self._get_mapped_value(t) for t in val)
|
|
165
|
+
return self.var_mapping.get(val, val)
|
|
166
|
+
|
|
167
|
+
def _get_mapped_values(self, vals: Iterable[ir.Value]) -> list[ir.Value]:
|
|
168
|
+
return [self._get_mapped_value(v) for v in vals]
|
|
169
|
+
|
|
170
|
+
def handle_var(self, node: ir.Var, parent: ir.Node) -> ir.Var:
|
|
171
|
+
return self.var_mapping.get(node, node)
|
|
172
|
+
|
|
173
|
+
# TODO: ideally, extend the rewriter class to allow rewriting PyValue to Var so
|
|
174
|
+
# we don't need to separately handle all cases containing them.
|
|
175
|
+
def handle_update(self, node: ir.Update, parent: ir.Node) -> ir.Update:
|
|
176
|
+
return ir.Update(
|
|
177
|
+
node.engine,
|
|
178
|
+
node.relation,
|
|
179
|
+
tuple(self._get_mapped_values(node.args)),
|
|
180
|
+
node.effect,
|
|
181
|
+
node.annotations,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
def handle_lookup(self, node: ir.Lookup, parent: ir.Node) -> ir.Lookup:
|
|
185
|
+
return ir.Lookup(
|
|
186
|
+
node.engine,
|
|
187
|
+
node.relation,
|
|
188
|
+
tuple(self._get_mapped_values(node.args)),
|
|
189
|
+
node.annotations,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
def handle_output(self, node: ir.Output, parent: ir.Node) -> ir.Output:
|
|
193
|
+
new_aliases = FrozenOrderedSet(
|
|
194
|
+
[(name, self._get_mapped_value(value)) for name, value in node.aliases]
|
|
195
|
+
)
|
|
196
|
+
if node.keys:
|
|
197
|
+
new_keys = FrozenOrderedSet(
|
|
198
|
+
[self.var_mapping.get(key, key) for key in node.keys]
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
new_keys = node.keys
|
|
202
|
+
|
|
203
|
+
return ir.Output(
|
|
204
|
+
node.engine,
|
|
205
|
+
new_aliases,
|
|
206
|
+
new_keys,
|
|
207
|
+
node.annotations,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def handle_construct(self, node: ir.Construct, parent: ir.Node) -> ir.Construct:
|
|
211
|
+
new_values = tuple(self._get_mapped_values(node.values))
|
|
212
|
+
new_id_var = self.var_mapping.get(node.id_var, node.id_var)
|
|
213
|
+
return ir.Construct(
|
|
214
|
+
node.engine,
|
|
215
|
+
new_values,
|
|
216
|
+
new_id_var,
|
|
217
|
+
node.annotations,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
def handle_aggregate(self, node: ir.Aggregate, parent: ir.Node) -> ir.Aggregate:
|
|
221
|
+
new_projection = tuple(self.var_mapping.get(arg, arg) for arg in node.projection)
|
|
222
|
+
new_group = tuple(self.var_mapping.get(arg, arg) for arg in node.group)
|
|
223
|
+
new_args = tuple(self._get_mapped_values(node.args))
|
|
224
|
+
return ir.Aggregate(
|
|
225
|
+
node.engine,
|
|
226
|
+
node.aggregation,
|
|
227
|
+
new_projection,
|
|
228
|
+
new_group,
|
|
229
|
+
new_args,
|
|
230
|
+
node.annotations,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def handle_rank(self, node: ir.Rank, parent: ir.Node) -> ir.Rank:
|
|
234
|
+
new_projection = tuple(self.var_mapping.get(arg, arg) for arg in node.projection)
|
|
235
|
+
new_group = tuple(self.var_mapping.get(arg, arg) for arg in node.group)
|
|
236
|
+
new_args = tuple(self.var_mapping.get(arg, arg) for arg in node.args)
|
|
237
|
+
new_result = self.var_mapping.get(node.result, node.result)
|
|
238
|
+
|
|
239
|
+
return ir.Rank(
|
|
240
|
+
node.engine,
|
|
241
|
+
new_projection,
|
|
242
|
+
new_group,
|
|
243
|
+
new_args,
|
|
244
|
+
node.arg_is_ascending,
|
|
245
|
+
new_result,
|
|
246
|
+
node.limit,
|
|
247
|
+
node.annotations,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
var_mapping = self._get_variable_mapping(logical)
|
|
251
|
+
|
|
252
|
+
renamer = RenameVisitor(var_mapping)
|
|
253
|
+
result = renamer.walk(logical)
|
|
254
|
+
|
|
255
|
+
# Also need to append the equality for each renamed constant. E.g., if the mapping
|
|
256
|
+
# contains (50.0::FLOAT -> arg_2::FLOAT), we need to add
|
|
257
|
+
# `eq(arg_2::FLOAT, 50.0::FLOAT)` to the result.
|
|
258
|
+
value_eqs = []
|
|
259
|
+
for (old_var, new_var) in var_mapping.items():
|
|
260
|
+
if not isinstance(old_var, ir.Var):
|
|
261
|
+
value_eqs.append(f.lookup(rel_builtins.eq, [new_var, old_var]))
|
|
262
|
+
|
|
263
|
+
return ir.Logical(
|
|
264
|
+
result.engine,
|
|
265
|
+
result.hoisted,
|
|
266
|
+
tuple(value_eqs) + tuple(result.body),
|
|
267
|
+
result.annotations,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# This function is the main workhorse for this rewrite pass. It takes a list of tasks
|
|
271
|
+
# that define the same relation, and combines them into a single task that defines
|
|
272
|
+
# the relation using a union of all of the bodies.
|
|
273
|
+
def _combine_tasks_into_union(self, tasks: list[ir.Logical]) -> ir.Logical:
|
|
274
|
+
# Step 1: Rename the variables in all tasks so that they will match the final derive
|
|
275
|
+
# after reconstructing into a union
|
|
276
|
+
renamed_tasks = [self._rename_variables(task) for task in tasks]
|
|
277
|
+
|
|
278
|
+
# Step 2: Get the final derive
|
|
279
|
+
derives = self._get_heads(renamed_tasks[0])
|
|
280
|
+
assert len(derives) == 1, "should only have one derive in a logical at this stage"
|
|
281
|
+
# Also make sure that all the derives are the same. This should be the case because
|
|
282
|
+
# we renamed all the variables to be the same in step 1.
|
|
283
|
+
for task in renamed_tasks[1:]:
|
|
284
|
+
assert self._get_heads(task) == derives, "all derives should be the same"
|
|
285
|
+
|
|
286
|
+
derive = derives[0]
|
|
287
|
+
|
|
288
|
+
# Step 3: Remove the final `derive` from each task
|
|
289
|
+
renamed_task_bodies = [
|
|
290
|
+
f.logical(
|
|
291
|
+
tuple(self._get_non_heads(t)), # Only keep non-head tasks
|
|
292
|
+
t.hoisted,
|
|
293
|
+
t.engine,
|
|
294
|
+
)
|
|
295
|
+
for t in renamed_tasks
|
|
296
|
+
]
|
|
297
|
+
|
|
298
|
+
# Deduplicate bodies
|
|
299
|
+
renamed_task_bodies = OrderedSet.from_iterable(renamed_task_bodies).get_list()
|
|
300
|
+
|
|
301
|
+
# Step 4: Construct a union of all the task bodies
|
|
302
|
+
if len(renamed_task_bodies) == 1:
|
|
303
|
+
# If there's only one body after deduplication, no need to create a union
|
|
304
|
+
new_body = renamed_task_bodies[0]
|
|
305
|
+
else:
|
|
306
|
+
new_body = f.union(
|
|
307
|
+
tuple(renamed_task_bodies),
|
|
308
|
+
[],
|
|
309
|
+
renamed_tasks[0].engine,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Step 5: Add the final derive back
|
|
313
|
+
return f.logical(
|
|
314
|
+
(new_body, derive),
|
|
315
|
+
[],
|
|
316
|
+
renamed_tasks[0].engine,
|
|
317
|
+
)
|