relationalai 1.0.0a3__py3-none-any.whl → 1.0.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. relationalai/config/config.py +47 -21
  2. relationalai/config/connections/__init__.py +5 -2
  3. relationalai/config/connections/duckdb.py +2 -2
  4. relationalai/config/connections/local.py +31 -0
  5. relationalai/config/connections/snowflake.py +0 -1
  6. relationalai/config/external/raiconfig_converter.py +235 -0
  7. relationalai/config/external/raiconfig_models.py +202 -0
  8. relationalai/config/external/utils.py +31 -0
  9. relationalai/config/shims.py +1 -0
  10. relationalai/semantics/__init__.py +10 -8
  11. relationalai/semantics/backends/sql/sql_compiler.py +1 -4
  12. relationalai/semantics/experimental/__init__.py +0 -0
  13. relationalai/semantics/experimental/builder.py +295 -0
  14. relationalai/semantics/experimental/builtins.py +154 -0
  15. relationalai/semantics/frontend/base.py +67 -42
  16. relationalai/semantics/frontend/core.py +34 -6
  17. relationalai/semantics/frontend/front_compiler.py +209 -37
  18. relationalai/semantics/frontend/pprint.py +6 -2
  19. relationalai/semantics/metamodel/__init__.py +7 -0
  20. relationalai/semantics/metamodel/metamodel.py +2 -0
  21. relationalai/semantics/metamodel/metamodel_analyzer.py +58 -16
  22. relationalai/semantics/metamodel/pprint.py +6 -1
  23. relationalai/semantics/metamodel/rewriter.py +11 -7
  24. relationalai/semantics/metamodel/typer.py +116 -41
  25. relationalai/semantics/reasoners/__init__.py +11 -0
  26. relationalai/semantics/reasoners/graph/__init__.py +35 -0
  27. relationalai/semantics/reasoners/graph/core.py +9028 -0
  28. relationalai/semantics/std/__init__.py +30 -10
  29. relationalai/semantics/std/aggregates.py +641 -12
  30. relationalai/semantics/std/common.py +146 -13
  31. relationalai/semantics/std/constraints.py +71 -1
  32. relationalai/semantics/std/datetime.py +904 -21
  33. relationalai/semantics/std/decimals.py +143 -2
  34. relationalai/semantics/std/floats.py +57 -4
  35. relationalai/semantics/std/integers.py +98 -4
  36. relationalai/semantics/std/math.py +857 -35
  37. relationalai/semantics/std/numbers.py +216 -20
  38. relationalai/semantics/std/re.py +213 -5
  39. relationalai/semantics/std/strings.py +437 -44
  40. relationalai/shims/executor.py +60 -52
  41. relationalai/shims/fixtures.py +85 -0
  42. relationalai/shims/helpers.py +26 -2
  43. relationalai/shims/hoister.py +28 -9
  44. relationalai/shims/mm2v0.py +204 -173
  45. relationalai/tools/cli/cli.py +192 -10
  46. relationalai/tools/cli/components/progress_reader.py +1 -1
  47. relationalai/tools/cli/docs.py +394 -0
  48. relationalai/tools/debugger.py +11 -4
  49. relationalai/tools/qb_debugger.py +435 -0
  50. relationalai/tools/typer_debugger.py +1 -2
  51. relationalai/util/dataclasses.py +3 -5
  52. relationalai/util/docutils.py +1 -2
  53. relationalai/util/error.py +2 -5
  54. relationalai/util/python.py +23 -0
  55. relationalai/util/runtime.py +1 -2
  56. relationalai/util/schema.py +2 -4
  57. relationalai/util/structures.py +4 -2
  58. relationalai/util/tracing.py +8 -2
  59. {relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/METADATA +8 -5
  60. {relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/RECORD +118 -95
  61. {relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/WHEEL +1 -1
  62. v0/relationalai/__init__.py +1 -1
  63. v0/relationalai/clients/client.py +52 -18
  64. v0/relationalai/clients/exec_txn_poller.py +122 -0
  65. v0/relationalai/clients/local.py +23 -8
  66. v0/relationalai/clients/resources/azure/azure.py +36 -11
  67. v0/relationalai/clients/resources/snowflake/__init__.py +4 -4
  68. v0/relationalai/clients/resources/snowflake/cli_resources.py +12 -1
  69. v0/relationalai/clients/resources/snowflake/direct_access_resources.py +124 -100
  70. v0/relationalai/clients/resources/snowflake/engine_service.py +381 -0
  71. v0/relationalai/clients/resources/snowflake/engine_state_handlers.py +35 -29
  72. v0/relationalai/clients/resources/snowflake/error_handlers.py +43 -2
  73. v0/relationalai/clients/resources/snowflake/snowflake.py +277 -179
  74. v0/relationalai/clients/resources/snowflake/use_index_poller.py +8 -0
  75. v0/relationalai/clients/types.py +5 -0
  76. v0/relationalai/errors.py +19 -1
  77. v0/relationalai/semantics/lqp/algorithms.py +173 -0
  78. v0/relationalai/semantics/lqp/builtins.py +199 -2
  79. v0/relationalai/semantics/lqp/executor.py +68 -37
  80. v0/relationalai/semantics/lqp/ir.py +28 -2
  81. v0/relationalai/semantics/lqp/model2lqp.py +215 -45
  82. v0/relationalai/semantics/lqp/passes.py +13 -658
  83. v0/relationalai/semantics/lqp/rewrite/__init__.py +12 -0
  84. v0/relationalai/semantics/lqp/rewrite/algorithm.py +385 -0
  85. v0/relationalai/semantics/lqp/rewrite/constants_to_vars.py +70 -0
  86. v0/relationalai/semantics/lqp/rewrite/deduplicate_vars.py +104 -0
  87. v0/relationalai/semantics/lqp/rewrite/eliminate_data.py +108 -0
  88. v0/relationalai/semantics/lqp/rewrite/extract_keys.py +25 -3
  89. v0/relationalai/semantics/lqp/rewrite/period_math.py +77 -0
  90. v0/relationalai/semantics/lqp/rewrite/quantify_vars.py +65 -31
  91. v0/relationalai/semantics/lqp/rewrite/unify_definitions.py +317 -0
  92. v0/relationalai/semantics/lqp/utils.py +11 -1
  93. v0/relationalai/semantics/lqp/validators.py +14 -1
  94. v0/relationalai/semantics/metamodel/builtins.py +2 -1
  95. v0/relationalai/semantics/metamodel/compiler.py +2 -1
  96. v0/relationalai/semantics/metamodel/dependency.py +12 -3
  97. v0/relationalai/semantics/metamodel/executor.py +11 -1
  98. v0/relationalai/semantics/metamodel/factory.py +2 -2
  99. v0/relationalai/semantics/metamodel/helpers.py +7 -0
  100. v0/relationalai/semantics/metamodel/ir.py +3 -2
  101. v0/relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +30 -20
  102. v0/relationalai/semantics/metamodel/rewrite/flatten.py +50 -13
  103. v0/relationalai/semantics/metamodel/rewrite/format_outputs.py +9 -3
  104. v0/relationalai/semantics/metamodel/typer/checker.py +6 -4
  105. v0/relationalai/semantics/metamodel/typer/typer.py +4 -3
  106. v0/relationalai/semantics/metamodel/visitor.py +4 -3
  107. v0/relationalai/semantics/reasoners/optimization/solvers_dev.py +1 -1
  108. v0/relationalai/semantics/reasoners/optimization/solvers_pb.py +336 -86
  109. v0/relationalai/semantics/rel/compiler.py +2 -1
  110. v0/relationalai/semantics/rel/executor.py +3 -2
  111. v0/relationalai/semantics/tests/lqp/__init__.py +0 -0
  112. v0/relationalai/semantics/tests/lqp/algorithms.py +345 -0
  113. v0/relationalai/tools/cli.py +339 -186
  114. v0/relationalai/tools/cli_controls.py +216 -67
  115. v0/relationalai/tools/cli_helpers.py +410 -6
  116. v0/relationalai/util/format.py +5 -2
  117. {relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/entry_points.txt +0 -0
  118. {relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,14 @@
1
1
  from v0.relationalai.semantics.metamodel.compiler import Pass
2
- from v0.relationalai.semantics.metamodel import ir, builtins as rel_builtins, factory as f, visitor
3
- from v0.relationalai.semantics.metamodel.typer import Checker, InferTypes, typer
4
- from v0.relationalai.semantics.metamodel import helpers, types
5
- from v0.relationalai.semantics.metamodel.util import FrozenOrderedSet
2
+ from v0.relationalai.semantics.metamodel.typer import Checker, InferTypes
6
3
 
7
- from v0.relationalai.semantics.metamodel.rewrite import Flatten
8
-
9
- from ..metamodel.rewrite import DNFUnionSplitter, ExtractNestedLogicals, FormatOutputs
4
+ from ..metamodel.rewrite import (
5
+ DNFUnionSplitter, ExtractNestedLogicals, Flatten, FormatOutputs
6
+ )
10
7
  from .rewrite import (
11
- AnnotateConstraints, CDC, ExtractCommon, ExtractKeys, FunctionAnnotations, QuantifyVars,
12
- Splinter, SplitMultiCheckRequires
8
+ AlgorithmPass, AnnotateConstraints, CDC, ConstantsToVars, DeduplicateVars,
9
+ ExtractCommon, EliminateData, ExtractKeys, FunctionAnnotations, PeriodMath,
10
+ QuantifyVars, Splinter, SplitMultiCheckRequires, UnifyDefinitions,
13
11
  )
14
- from v0.relationalai.semantics.lqp.utils import output_names
15
-
16
- from typing import cast, List, Sequence, Tuple, Union, Optional, Iterable
17
- from collections import defaultdict
18
- import pandas as pd
19
- import hashlib
20
12
 
21
13
  def lqp_passes() -> list[Pass]:
22
14
  return [
@@ -27,654 +19,17 @@ def lqp_passes() -> list[Pass]:
27
19
  CDC(), # specialize to physical relations before extracting nested and typing
28
20
  ExtractNestedLogicals(), # before InferTypes to avoid extracting casts
29
21
  InferTypes(),
30
- DNFUnionSplitter(),
31
- ExtractKeys(),
22
+ DNFUnionSplitter(), # Handle unions that require DNF decomposition
23
+ ExtractKeys(), # Create a logical for each valid combinations of keys
32
24
  FormatOutputs(),
33
25
  ExtractCommon(), # Extracts tasks that will become common after Flatten into their own definition
34
- Flatten(),
26
+ Flatten(), # Move nested tasks to the top level, and various related things touched along the way
35
27
  Splinter(), # Splits multi-headed rules into multiple rules
36
28
  QuantifyVars(), # Adds missing existentials
37
29
  EliminateData(), # Turns Data nodes into ordinary relations.
38
30
  DeduplicateVars(), # Deduplicates vars in Updates and Outputs.
39
- PeriodMath(), # Rewrite date period uses.
40
31
  ConstantsToVars(), # Turns constants in Updates and Outputs into vars.
41
- UnifyDefinitions(),
32
+ AlgorithmPass(),
33
+ PeriodMath(), # Rewrite date period uses.
34
+ UnifyDefinitions(), # Unify relations with multiple definitions.
42
35
  ]
43
-
44
- # LQP does not support multiple definitions for the same relation. This pass unifies all
45
- # definitions for each relation into a single definition using a union.
46
- class UnifyDefinitions(Pass):
47
- def __init__(self):
48
- super().__init__()
49
-
50
- def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
51
- # Maintain a cache of renamings for each relation. These need to be consistent
52
- # across all definitions of the same relation.
53
- self.renamed_relation_args: dict[Union[ir.Value, ir.Relation], list[ir.Var]] = {}
54
-
55
- root = cast(ir.Logical, model.root)
56
- new_tasks = self.get_combined_multidefs(root)
57
- return ir.Model(
58
- model.engines,
59
- model.relations,
60
- model.types,
61
- f.logical(
62
- tuple(new_tasks),
63
- root.hoisted,
64
- root.engine,
65
- ),
66
- model.annotations,
67
- )
68
-
69
- def _get_heads(self, logical: ir.Logical) -> list[Union[ir.Update, ir.Output]]:
70
- derives = []
71
- for task in logical.body:
72
- if isinstance(task, ir.Update) and task.effect == ir.Effect.derive:
73
- derives.append(task)
74
- elif isinstance(task, ir.Output):
75
- derives.append(task)
76
- return derives
77
-
78
- def _get_non_heads(self, logical: ir.Logical) -> list[ir.Task]:
79
- non_derives = []
80
- for task in logical.body:
81
- if not(isinstance(task, ir.Update) and task.effect == ir.Effect.derive) and not isinstance(task, ir.Output):
82
- non_derives.append(task)
83
- return non_derives
84
-
85
- def _get_head_identifier(self, head: Union[ir.Update, ir.Output]) -> Optional[ir.Value]:
86
- if isinstance(head, ir.Update):
87
- return head.relation
88
- else:
89
- assert isinstance(head, ir.Output)
90
- if len(head.aliases) <= 2:
91
- # For processing here, we need output to have at least the column markers
92
- # `cols` and `col`, and also a key
93
- return None
94
-
95
- output_alias_names = helpers.output_alias_names(head.aliases)
96
- output_vals = helpers.output_values(head.aliases)
97
-
98
- # For normal outputs, the pattern is output[keys](cols, "col000" as 'col', ...)
99
- if output_alias_names[0] == "cols" and output_alias_names[1] == "col":
100
- return output_vals[1]
101
-
102
- # For exports, the pattern is output[keys]("col000" as 'col', ...)
103
- if rel_builtins.export_annotation in head.annotations:
104
- if output_alias_names[0] == "col":
105
- return output_vals[0]
106
-
107
- return None
108
-
109
- def get_combined_multidefs(self, root: ir.Logical) -> list[ir.Logical]:
110
- # Step 1: Group tasks by the relation they define.
111
- relation_to_tasks: dict[Union[None, ir.Value, ir.Relation], list[ir.Logical]] = defaultdict(list)
112
-
113
- for task in root.body:
114
- task = cast(ir.Logical, task)
115
- task_heads = self._get_heads(task)
116
-
117
- # Some relations do not need to be grouped, e.g., if they don't contain a
118
- # derive. Use `None` as a placeholder key for these cases.
119
- if len(task_heads) != 1:
120
- relation_to_tasks[None].append(task)
121
- continue
122
-
123
- head_id = self._get_head_identifier(task_heads[0])
124
- relation_to_tasks[head_id].append(task)
125
-
126
- # Step 2: For each relation, combine all of the body definitions into a union.
127
- result_tasks = []
128
- for relation, tasks in relation_to_tasks.items():
129
- # If there's only one task for the relation, or if grouping is not needed, then
130
- # just keep the original tasks.
131
- if len(tasks) == 1 or relation is None:
132
- result_tasks.extend(tasks)
133
- continue
134
-
135
- result_tasks.append(self._combine_tasks_into_union(tasks))
136
- return result_tasks
137
-
138
- def _get_variable_mapping(self, logical: ir.Logical) -> dict[ir.Value, ir.Var]:
139
- heads = self._get_heads(logical)
140
- assert len(heads) == 1, "should only have one head in a logical at this stage"
141
- head = heads[0]
142
-
143
- var_mapping = {}
144
- head_id = self._get_head_identifier(head)
145
-
146
- if isinstance(head, ir.Update):
147
- args_for_renaming = head.args
148
- else:
149
- assert isinstance(head, ir.Output)
150
- output_alias_names = helpers.output_alias_names(head.aliases)
151
- if output_alias_names[0] == "cols" and output_alias_names[1] == "col":
152
- assert len(head.aliases) > 2
153
-
154
- # For outputs, we do not need to rename the `cols` and `col` markers or the
155
- # keys.
156
- output_values = helpers.output_values(head.aliases)[2:]
157
-
158
- else:
159
- assert rel_builtins.export_annotation in head.annotations and output_alias_names[0] == "col"
160
- assert len(head.aliases) > 1
161
-
162
- # For exports, we do not need to rename the `col` marker or the keys.
163
- output_values = helpers.output_values(head.aliases)[1:]
164
-
165
- args_for_renaming = []
166
- for v in output_values:
167
- if head.keys and isinstance(v, ir.Var) and v in head.keys:
168
- continue
169
- args_for_renaming.append(v)
170
-
171
- if head_id not in self.renamed_relation_args:
172
- renamed_vars = []
173
- for (i, arg) in enumerate(args_for_renaming):
174
- typ = typer.to_type(arg)
175
- assert arg not in var_mapping, "args of update should be unique"
176
- if isinstance(arg, ir.Var):
177
- var_mapping[arg] = ir.Var(typ, arg.name)
178
- else:
179
- var_mapping[arg] = ir.Var(typ, f"arg_{i}")
180
-
181
- renamed_vars.append(var_mapping[arg])
182
- self.renamed_relation_args[head_id] = renamed_vars
183
- else:
184
- for (arg, var) in zip(args_for_renaming, self.renamed_relation_args[head_id]):
185
- var_mapping[arg] = var
186
-
187
- return var_mapping
188
-
189
- def _rename_variables(self, logical: ir.Logical) -> ir.Logical:
190
- class RenameVisitor(visitor.Rewriter):
191
- def __init__(self, var_mapping: dict[ir.Value, ir.Var]):
192
- super().__init__()
193
- self.var_mapping = var_mapping
194
-
195
- def _get_mapped_value(self, val: ir.Value) -> ir.Value:
196
- if isinstance(val, tuple):
197
- return tuple(self._get_mapped_value(t) for t in val)
198
- return self.var_mapping.get(val, val)
199
-
200
- def _get_mapped_values(self, vals: Iterable[ir.Value]) -> list[ir.Value]:
201
- return [self._get_mapped_value(v) for v in vals]
202
-
203
- def handle_var(self, node: ir.Var, parent: ir.Node) -> ir.Var:
204
- return self.var_mapping.get(node, node)
205
-
206
- # TODO: ideally, extend the rewriter class to allow rewriting PyValue to Var so
207
- # we don't need to separately handle all cases containing them.
208
- def handle_update(self, node: ir.Update, parent: ir.Node) -> ir.Update:
209
- return ir.Update(
210
- node.engine,
211
- node.relation,
212
- tuple(self._get_mapped_values(node.args)),
213
- node.effect,
214
- node.annotations,
215
- )
216
-
217
- def handle_lookup(self, node: ir.Lookup, parent: ir.Node) -> ir.Lookup:
218
- return ir.Lookup(
219
- node.engine,
220
- node.relation,
221
- tuple(self._get_mapped_values(node.args)),
222
- node.annotations,
223
- )
224
-
225
- def handle_output(self, node: ir.Output, parent: ir.Node) -> ir.Output:
226
- new_aliases = FrozenOrderedSet(
227
- [(name, self._get_mapped_value(value)) for name, value in node.aliases]
228
- )
229
- if node.keys:
230
- new_keys = FrozenOrderedSet(
231
- [self.var_mapping.get(key, key) for key in node.keys]
232
- )
233
- else:
234
- new_keys = node.keys
235
-
236
- return ir.Output(
237
- node.engine,
238
- new_aliases,
239
- new_keys,
240
- node.annotations,
241
- )
242
-
243
- def handle_construct(self, node: ir.Construct, parent: ir.Node) -> ir.Construct:
244
- new_values = tuple(self._get_mapped_values(node.values))
245
- new_id_var = self.var_mapping.get(node.id_var, node.id_var)
246
- return ir.Construct(
247
- node.engine,
248
- new_values,
249
- new_id_var,
250
- node.annotations,
251
- )
252
-
253
- def handle_aggregate(self, node: ir.Aggregate, parent: ir.Node) -> ir.Aggregate:
254
- new_projection = tuple(self.var_mapping.get(arg, arg) for arg in node.projection)
255
- new_group = tuple(self.var_mapping.get(arg, arg) for arg in node.group)
256
- new_args = tuple(self._get_mapped_values(node.args))
257
- return ir.Aggregate(
258
- node.engine,
259
- node.aggregation,
260
- new_projection,
261
- new_group,
262
- new_args,
263
- node.annotations,
264
- )
265
-
266
- def handle_rank(self, node: ir.Rank, parent: ir.Node) -> ir.Rank:
267
- new_projection = tuple(self.var_mapping.get(arg, arg) for arg in node.projection)
268
- new_group = tuple(self.var_mapping.get(arg, arg) for arg in node.group)
269
- new_args = tuple(self.var_mapping.get(arg, arg) for arg in node.args)
270
- new_result = self.var_mapping.get(node.result, node.result)
271
-
272
- return ir.Rank(
273
- node.engine,
274
- new_projection,
275
- new_group,
276
- new_args,
277
- node.arg_is_ascending,
278
- new_result,
279
- node.limit,
280
- node.annotations,
281
- )
282
-
283
- var_mapping = self._get_variable_mapping(logical)
284
-
285
- renamer = RenameVisitor(var_mapping)
286
- result = renamer.walk(logical)
287
-
288
- # Also need to append the equality for each renamed constant. E.g., if the mapping
289
- # contains (50.0::FLOAT -> arg_2::FLOAT), we need to add
290
- # `eq(arg_2::FLOAT, 50.0::FLOAT)` to the result.
291
- value_eqs = []
292
- for (old_var, new_var) in var_mapping.items():
293
- if not isinstance(old_var, ir.Var):
294
- value_eqs.append(f.lookup(rel_builtins.eq, [new_var, old_var]))
295
-
296
- return ir.Logical(
297
- result.engine,
298
- result.hoisted,
299
- tuple(value_eqs) + tuple(result.body),
300
- result.annotations,
301
- )
302
-
303
- # This function is the main workhorse for this rewrite pass. It takes a list of tasks
304
- # that define the same relation, and combines them into a single task that defines
305
- # the relation using a union of all of the bodies.
306
- def _combine_tasks_into_union(self, tasks: list[ir.Logical]) -> ir.Logical:
307
- # Step 1: Rename the variables in all tasks so that they will match the final derive
308
- # after reconstructing into a union
309
- renamed_tasks = [self._rename_variables(task) for task in tasks]
310
-
311
- # Step 2: Get the final derive
312
- derives = self._get_heads(renamed_tasks[0])
313
- assert len(derives) == 1, "should only have one derive in a logical at this stage"
314
- # Also make sure that all the derives are the same. This should be the case because
315
- # we renamed all the variables to be the same in step 1.
316
- for task in renamed_tasks[1:]:
317
- assert self._get_heads(task) == derives, "all derives should be the same"
318
-
319
- derive = derives[0]
320
-
321
- # Step 3: Remove the final `derive` from each task
322
- renamed_task_bodies = [
323
- f.logical(
324
- tuple(self._get_non_heads(t)), # Only keep non-head tasks
325
- t.hoisted,
326
- t.engine,
327
- )
328
- for t in renamed_tasks
329
- ]
330
-
331
- # Step 4: Construct a union of all the task bodies
332
- union = f.union(
333
- tuple(renamed_task_bodies),
334
- [],
335
- renamed_tasks[0].engine,
336
- )
337
-
338
- # Step 5: Add the final derive back
339
- return f.logical(
340
- (union, derive),
341
- [],
342
- renamed_tasks[0].engine,
343
- )
344
-
345
- # Creates intermediary relations for all Data nodes and replaces said Data nodes
346
- # with a Lookup into these created relations. Reuse duplicate created relations.
347
- class EliminateData(Pass):
348
- def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
349
- r = self.DataRewriter()
350
- return r.walk(model)
351
-
352
- # Does the actual work.
353
- class DataRewriter(visitor.Rewriter):
354
- new_relations: list[ir.Relation]
355
- new_updates: list[ir.Logical]
356
- # Counter for naming new relations.
357
- # It must be that new_count == len new_updates == len new_relations.
358
- new_count: int
359
- # Cache for Data nodes to avoid creating duplicate intermediary relations
360
- data_cache: dict[str, ir.Relation]
361
-
362
- def __init__(self):
363
- self.new_relations = []
364
- self.new_updates = []
365
- self.new_count = 0
366
- self.data_cache = {}
367
- super().__init__()
368
-
369
- # Create a cache key for a Data node based on its structure and content
370
- def _data_cache_key(self, node: ir.Data) -> str:
371
- values = pd.util.hash_pandas_object(node.data).values
372
- return hashlib.sha256(bytes(values)).hexdigest()
373
-
374
- def _intermediary_relation(self, node: ir.Data) -> ir.Relation:
375
- cache_key = self._data_cache_key(node)
376
- if cache_key in self.data_cache:
377
- return self.data_cache[cache_key]
378
- self.new_count += 1
379
- intermediary_name = f"formerly_Data_{self.new_count}"
380
-
381
- intermediary_relation = f.relation(
382
- intermediary_name,
383
- [f.field(v.name, v.type) for v in node.vars]
384
- )
385
- self.new_relations.append(intermediary_relation)
386
-
387
- intermediary_update = f.logical([
388
- # For each row (union), equate values and their variable (logical).
389
- f.union(
390
- [
391
- f.logical(
392
- [
393
- f.lookup(rel_builtins.eq, [f.literal(val, var.type), var])
394
- for (val, var) in zip(row, node.vars)
395
- ],
396
- )
397
- for row in node
398
- ],
399
- hoisted = node.vars,
400
- ),
401
- # And pop it back into the relation.
402
- f.update(intermediary_relation, node.vars, ir.Effect.derive),
403
- ])
404
- self.new_updates.append(intermediary_update)
405
-
406
- # Cache the result for reuse
407
- self.data_cache[cache_key] = intermediary_relation
408
-
409
- return intermediary_relation
410
-
411
- # Create a new intermediary relation representing the Data (and pop it in
412
- # new_updates/new_relations) and replace this Data with a Lookup of said
413
- # intermediary.
414
- def handle_data(self, node: ir.Data, parent: ir.Node) -> ir.Lookup:
415
- intermediary_relation = self._intermediary_relation(node)
416
- replacement_lookup = f.lookup(intermediary_relation, node.vars)
417
-
418
- return replacement_lookup
419
-
420
- # Walks the model for the handle_data work then updates the model with
421
- # the new state.
422
- def handle_model(self, model: ir.Model, parent: None):
423
- walked_model = super().handle_model(model, parent)
424
- assert len(self.new_relations) == len(self.new_updates) and self.new_count == len(self.new_relations)
425
-
426
- # This is okay because its LQP.
427
- assert isinstance(walked_model.root, ir.Logical)
428
- root_logical = cast(ir.Logical, walked_model.root)
429
-
430
- # We may need to add the new intermediaries from handle_data to the model.
431
- if self.new_count == 0:
432
- return model
433
- else:
434
- return ir.Model(
435
- walked_model.engines,
436
- walked_model.relations | self.new_relations,
437
- walked_model.types,
438
- ir.Logical(
439
- root_logical.engine,
440
- root_logical.hoisted,
441
- root_logical.body + tuple(self.new_updates),
442
- root_logical.annotations,
443
- ),
444
- walked_model.annotations,
445
- )
446
-
447
- # Deduplicate Vars in Updates and Outputs.
448
- class DeduplicateVars(Pass):
449
- def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
450
- r = self.VarDeduplicator()
451
- return r.walk(model)
452
-
453
- # Return 1) a new list of Values with no duplicates (at the object level) and
454
- # 2) equalities between any original Value and a deduplicated Value.
455
- @staticmethod
456
- def dedup_values(vals: Sequence[ir.Value]) -> Tuple[List[ir.Value], List[ir.Lookup]]:
457
- # If a var is seen more than once, it is a duplicate and we will create
458
- # a new Var and equate it with the seen one.
459
- seen_vars = set()
460
-
461
- new_vals = []
462
- eqs = []
463
-
464
- for i, val in enumerate(vals):
465
- # Duplicates can only occur within Vars.
466
- # TODO: we don't know for sure if these are the only relevant cases.
467
- if isinstance(val, ir.Default) or isinstance(val, ir.Var):
468
- var = val if isinstance(val, ir.Var) else val.var
469
- if var in seen_vars:
470
- new_var = ir.Var(var.type, var.name + "_dup_" + str(i))
471
- new_val = new_var if isinstance(val, ir.Var) else ir.Default(new_var, val.value)
472
- new_vals.append(new_val)
473
- eqs.append(f.lookup(rel_builtins.eq, [new_var, var]))
474
- else:
475
- seen_vars.add(var)
476
- new_vals.append(val)
477
- else:
478
- # No possibility of problematic duplication.
479
- new_vals.append(val)
480
-
481
- return new_vals, eqs
482
-
483
- # Returns a reconstructed output with no duplicate variable objects
484
- # (dedup_values) and now necessary equalities between any two previously
485
- # duplicate variables.
486
- @staticmethod
487
- def dedup_output(output: ir.Output) -> List[Union[ir.Output, ir.Lookup]]:
488
- vals = helpers.output_values(output.aliases)
489
- deduped_vals, req_lookups = DeduplicateVars.dedup_values(vals)
490
- # Need the names so we can recombine.
491
- alias_names = output_names(output.aliases)
492
- new_output = ir.Output(
493
- output.engine,
494
- FrozenOrderedSet(list(zip(alias_names, deduped_vals))),
495
- output.keys,
496
- output.annotations,
497
- )
498
- return req_lookups + [new_output]
499
-
500
- # Returns a replacement update with no duplicate variable objects
501
- # (dedup_values) and now necessary equalities between any two previously
502
- # duplicate variables.
503
- @staticmethod
504
- def dedup_update(update: ir.Update) -> List[Union[ir.Update, ir.Lookup]]:
505
- deduped_vals, req_lookups = DeduplicateVars.dedup_values(update.args)
506
- new_update = ir.Update(
507
- update.engine,
508
- update.relation,
509
- tuple(deduped_vals),
510
- update.effect,
511
- update.annotations,
512
- )
513
- return req_lookups + [new_update]
514
-
515
- # Does the actual work.
516
- class VarDeduplicator(visitor.Rewriter):
517
- def __init__(self):
518
- super().__init__()
519
-
520
- # We implement handle_logical instead of handle_update/handle_output
521
- # because in addition to modifying said update/output we require new
522
- # lookups (equality between original and deduplicated variables).
523
- def handle_logical(self, node: ir.Logical, parent: ir.Node):
524
- # In order to recurse over subtasks.
525
- node = super().handle_logical(node, parent)
526
-
527
- new_body = []
528
- for subtask in node.body:
529
- if isinstance(subtask, ir.Output):
530
- new_body.extend(DeduplicateVars.dedup_output(subtask))
531
- elif isinstance(subtask, ir.Update):
532
- new_body.extend(DeduplicateVars.dedup_update(subtask))
533
- else:
534
- new_body.append(subtask)
535
-
536
- return ir.Logical(
537
- node.engine,
538
- node.hoisted,
539
- tuple(new_body),
540
- node.annotations
541
- )
542
-
543
- # Generate date arithmetic expressions, such as
544
- # `rel_primitive_date_add(:day, [date] delta, res_2)` by finding the period
545
- # expression for the delta and adding the period type to the date arithmetic expression.
546
- #
547
- # date_add and it's kin are generated by a period expression, e.g.,
548
- # `day(delta, res_1)`
549
- # followed by the date arithmetic expression using the period
550
- # `date_add([date] res_1 res_2)`
551
- class PeriodMath(Pass):
552
- def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
553
- period_rewriter = self.PeriodRewriter()
554
- model = period_rewriter.walk(model)
555
- period_math_rewriter = self.PeriodMathRewriter(period_rewriter.period_vars)
556
- model = period_math_rewriter.walk(model)
557
- return model
558
-
559
- # Find all period builtins. We need to make them safe for the emitter (either by
560
- # translating to a cast, or removing) and store the variable and period type for use
561
- # in the date/datetime add/subtract expressions.
562
- class PeriodRewriter(visitor.Rewriter):
563
- def __init__(self):
564
- super().__init__()
565
- self.period_vars: dict[ir.Var, str] = {}
566
-
567
- def handle_lookup(self, node: ir.Lookup, parent: ir.Node) -> ir.Lookup:
568
- if not rel_builtins.is_builtin(node.relation):
569
- return node
570
-
571
- if node.relation.name not in {
572
- "year", "month", "week", "day", "hour", "minute", "second", "millisecond", "microsecond", "nanosecond"
573
- }:
574
- return node
575
-
576
- assert len(node.args) == 2, "Expect 2 arguments for period builtins"
577
- assert isinstance(node.args[1], ir.Var), "Expect result to be a variable"
578
- period = node.relation.name
579
- result_var = node.args[1]
580
- self.period_vars[result_var] = period
581
-
582
- # Ideally we could now remove the unused and unhandled period type construction
583
- # but we may also need to cast the original variable to an Int64 for use by the
584
- # date/datetime add/subtract expressions.
585
- # TODO: Remove the node entirely where possible and update uses of the result
586
- return f.lookup(rel_builtins.cast, [types.Int64, node.args[0], result_var])
587
-
588
- # Update date/datetime add/subtract expressions with period information.
589
- class PeriodMathRewriter(visitor.Rewriter):
590
- def __init__(self, period_vars: dict[ir.Var, str]):
591
- super().__init__()
592
- self.period_vars: dict[ir.Var, str] = period_vars
593
-
594
- def handle_lookup(self, node: ir.Lookup, parent: ir.Node) -> ir.Lookup:
595
- if not rel_builtins.is_builtin(node.relation):
596
- return node
597
-
598
- if node.relation.name not in {
599
- "date_add", "date_subtract", "datetime_add", "datetime_subtract"
600
- }:
601
- return node
602
-
603
- if len(node.args) == 4:
604
- # We've already visited this lookup
605
- return node
606
-
607
- assert isinstance(node.args[1], ir.Var), "Expect period to be a variable"
608
- period_var = node.args[1]
609
- assert period_var in self.period_vars, "datemath found, but no vars to insert"
610
-
611
- period = self.period_vars[period_var]
612
-
613
- new_args = [f.literal(period, types.Symbol)] + [arg for arg in node.args]
614
-
615
- return f.lookup(node.relation, new_args)
616
-
617
- # Rewrite constants to vars in Updates. This results in a more normalized format where
618
- # updates contain only variables. This allows for easier rewrites in later passes.
619
- class ConstantsToVars(Pass):
620
- def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
621
- r = self.ConstantToVarRewriter()
622
- return r.walk(model)
623
-
624
- # Return 1) a new list of Values with no duplicates (at the object level) and
625
- # 2) equalities between any original Value and a deduplicated Value.
626
- @staticmethod
627
- def replace_constants_with_vars(vals: Sequence[ir.Value]) -> Tuple[List[ir.Value], List[ir.Lookup]]:
628
- new_vals = []
629
- eqs = []
630
-
631
- for i, val in enumerate(vals):
632
- if isinstance(val, ir.PyValue) or isinstance(val, ir.Literal):
633
- # Replace constant with a new Var.
634
- typ = typer.to_type(val)
635
- assert isinstance(typ, ir.ScalarType), "can only replace scalar constants with vars"
636
- new_var = ir.Var(typ, f"{typ.name.lower()}_{i}")
637
- new_vals.append(new_var)
638
- eqs.append(f.lookup(rel_builtins.eq, [new_var, val]))
639
- else:
640
- new_vals.append(val)
641
-
642
- return new_vals, eqs
643
-
644
- @staticmethod
645
- def dedup_update(update: ir.Update) -> List[Union[ir.Update, ir.Lookup]]:
646
- deduped_vals, req_lookups = ConstantsToVars.replace_constants_with_vars(update.args)
647
- new_update = ir.Update(
648
- update.engine,
649
- update.relation,
650
- tuple(deduped_vals),
651
- update.effect,
652
- update.annotations,
653
- )
654
- return req_lookups + [new_update]
655
-
656
- # Does the actual work.
657
- class ConstantToVarRewriter(visitor.Rewriter):
658
- def __init__(self):
659
- super().__init__()
660
-
661
- # We implement handle_logical instead of handle_update because in
662
- # addition to modifying said update we require new lookups (equality
663
- # between original and deduplicated variables).
664
- def handle_logical(self, node: ir.Logical, parent: ir.Node):
665
- # In order to recurse over subtasks.
666
- node = super().handle_logical(node, parent)
667
-
668
- new_body = []
669
- for subtask in node.body:
670
- if isinstance(subtask, ir.Update):
671
- new_body.extend(ConstantsToVars.dedup_update(subtask))
672
- else:
673
- new_body.append(subtask)
674
-
675
- return ir.Logical(
676
- node.engine,
677
- node.hoisted,
678
- tuple(new_body),
679
- node.annotations
680
- )