relationalai 0.12.6__py3-none-any.whl → 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. relationalai/clients/snowflake.py +48 -7
  2. relationalai/clients/use_index_poller.py +11 -1
  3. relationalai/early_access/lqp/constructors/__init__.py +2 -2
  4. relationalai/early_access/metamodel/rewrite/__init__.py +2 -2
  5. relationalai/semantics/internal/internal.py +1 -4
  6. relationalai/semantics/internal/snowflake.py +14 -1
  7. relationalai/semantics/lqp/constructors.py +0 -5
  8. relationalai/semantics/lqp/executor.py +34 -10
  9. relationalai/semantics/lqp/intrinsics.py +2 -2
  10. relationalai/semantics/lqp/model2lqp.py +10 -7
  11. relationalai/semantics/lqp/passes.py +29 -9
  12. relationalai/semantics/lqp/primitives.py +15 -15
  13. relationalai/semantics/lqp/rewrite/__init__.py +2 -2
  14. relationalai/semantics/lqp/rewrite/{fd_constraints.py → function_annotations.py} +4 -4
  15. relationalai/semantics/lqp/utils.py +17 -13
  16. relationalai/semantics/metamodel/builtins.py +1 -0
  17. relationalai/semantics/metamodel/rewrite/__init__.py +2 -1
  18. relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +1 -1
  19. relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +5 -6
  20. relationalai/semantics/metamodel/rewrite/flatten.py +18 -149
  21. relationalai/semantics/metamodel/rewrite/format_outputs.py +165 -0
  22. relationalai/semantics/reasoners/graph/core.py +98 -70
  23. relationalai/semantics/reasoners/optimization/__init__.py +55 -10
  24. relationalai/semantics/reasoners/optimization/common.py +63 -8
  25. relationalai/semantics/reasoners/optimization/solvers_dev.py +39 -33
  26. relationalai/semantics/reasoners/optimization/solvers_pb.py +1033 -385
  27. relationalai/semantics/rel/compiler.py +4 -3
  28. relationalai/semantics/rel/executor.py +30 -8
  29. relationalai/semantics/snowflake/__init__.py +2 -2
  30. relationalai/semantics/sql/executor/snowflake.py +6 -2
  31. relationalai/semantics/tests/test_snapshot_abstract.py +5 -4
  32. relationalai/tools/cli.py +10 -0
  33. relationalai/tools/cli_controls.py +15 -0
  34. {relationalai-0.12.6.dist-info → relationalai-0.12.8.dist-info}/METADATA +2 -2
  35. {relationalai-0.12.6.dist-info → relationalai-0.12.8.dist-info}/RECORD +38 -37
  36. {relationalai-0.12.6.dist-info → relationalai-0.12.8.dist-info}/WHEEL +0 -0
  37. {relationalai-0.12.6.dist-info → relationalai-0.12.8.dist-info}/entry_points.txt +0 -0
  38. {relationalai-0.12.6.dist-info → relationalai-0.12.8.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,6 @@
1
1
  from relationalai.semantics.lqp import ir as lqp
2
2
  from relationalai.semantics.metamodel import ir
3
+ from relationalai.semantics.metamodel.helpers import sanitize
3
4
  from relationalai.semantics.metamodel.util import FrozenOrderedSet
4
5
 
5
6
  from hashlib import sha256
@@ -13,21 +14,24 @@ class UniqueNames:
13
14
  self.id_to_name = dict[int,str]()
14
15
 
15
16
  def get_name(self, name: str) -> str:
16
- if name in self.seen:
17
- self.seen[name] += 1
18
- id = self.seen[name]
19
- # If the original name has a suffix we can get collisions with generated names,
20
- # so test the new name.
21
- while f"{name}_{id}" in self.seen:
22
- id += 1
23
- self.seen[name] = id
24
- new_name = f"{name}_{id}"
25
- self.seen[new_name] = 1
26
- return new_name
27
- else:
17
+ # Names will eventually be sanitized, which could cause collisions, so we
18
+ # do the sanitization here.
19
+ name = '_' if name == '_' else sanitize(name)
20
+ if name not in self.seen:
28
21
  self.seen[name] = 1
29
22
  return f"{name}"
30
23
 
24
+ self.seen[name] += 1
25
+ id = self.seen[name]
26
+ # If the original name has a suffix we can get collisions with generated names,
27
+ # so test the new name.
28
+ while f"{name}_{id}" in self.seen:
29
+ id += 1
30
+ self.seen[name] = id
31
+ new_name = f"{name}_{id}"
32
+ self.seen[new_name] = 1
33
+ return new_name
34
+
31
35
  # Get a unique name for the given id. If the id is already in the map, return the
32
36
  # existing name. Otherwise, generate a new name using the suggested_name and
33
37
  # store it in the map.
@@ -55,7 +59,7 @@ def gen_rel_id(ctx: TranslationCtx, orig_name: str, suffix: str = "") -> lqp.Rel
55
59
  ctx.rel_id_to_orig_name[relation_id] = orig_name
56
60
  return relation_id
57
61
 
58
- def gen_unique_var(ctx: TranslationCtx, name_hint: str):
62
+ def gen_unique_var(ctx: TranslationCtx, name_hint: str) -> lqp.Var:
59
63
  """
60
64
  Generate a new variable with a unique name based on the provided hint.
61
65
  """
@@ -654,6 +654,7 @@ rel_primitive_solverlib_ho_appl = aggregation("rel_primitive_solverlib_ho_appl",
654
654
  ])
655
655
  implies = f.relation("implies", [f.input_field("a", types.Bool), f.input_field("b", types.Bool)])
656
656
  all_different = aggregation("all_different", [f.input_field("over", types.Any)])
657
+ special_ordered_set_type_2 = aggregation("special_ordered_set_type_2", [f.input_field("rank", types.Any)])
657
658
 
658
659
  # graph primitive algorithm helpers
659
660
  infomap = aggregation("infomap", [
@@ -2,5 +2,6 @@ from .discharge_constraints import DischargeConstraints
2
2
  from .dnf_union_splitter import DNFUnionSplitter
3
3
  from .extract_nested_logicals import ExtractNestedLogicals
4
4
  from .flatten import Flatten
5
+ from .format_outputs import FormatOutputs
5
6
 
6
- __all__ = ["DischargeConstraints", "DNFUnionSplitter", "ExtractNestedLogicals", "Flatten"]
7
+ __all__ = ["DischargeConstraints", "DNFUnionSplitter", "ExtractNestedLogicals", "Flatten", "FormatOutputs"]
@@ -150,7 +150,7 @@ class DNFExtractor(Visitor):
150
150
 
151
151
  replacement_tasks: list[ir.Task] = []
152
152
  for body in replacement_bodies:
153
- new_task = f.logical(body)
153
+ new_task = f.logical(body, node.hoisted)
154
154
  replacement_tasks.append(new_task)
155
155
  self.replaced_by[node] = replacement_tasks
156
156
 
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from relationalai.semantics.metamodel import ir, factory as f, helpers
4
- from relationalai.semantics.metamodel.visitor import Rewriter
4
+ from relationalai.semantics.metamodel.visitor import Rewriter, collect_by_type
5
5
  from relationalai.semantics.metamodel.compiler import Pass
6
6
  from relationalai.semantics.metamodel.util import OrderedSet, ordered_set, NameCache
7
7
  from relationalai.semantics.metamodel import dependency
@@ -61,11 +61,10 @@ class LogicalExtractor(Rewriter):
61
61
 
62
62
  # if there are aggregations, make sure we don't expose the projected and input vars,
63
63
  # but expose groupbys
64
- for child in node.body:
65
- if isinstance(child, ir.Aggregate):
66
- exposed_vars.difference_update(child.projection)
67
- exposed_vars.difference_update(helpers.aggregate_inputs(child))
68
- exposed_vars.update(child.group)
64
+ for agg in collect_by_type(ir.Aggregate, node):
65
+ exposed_vars.difference_update(agg.projection)
66
+ exposed_vars.difference_update(helpers.aggregate_inputs(agg))
67
+ exposed_vars.update(agg.group)
69
68
  # add the values (hoisted)
70
69
  exposed_vars.update(helpers.hoisted_vars(logical.hoisted))
71
70
 
@@ -3,12 +3,11 @@ from dataclasses import dataclass
3
3
  from typing import cast, Optional, TypeVar
4
4
  from typing import Tuple
5
5
 
6
- from relationalai.semantics.metamodel import builtins, ir, factory as f, helpers, types
6
+ from relationalai.semantics.metamodel import builtins, ir, factory as f, helpers
7
7
  from relationalai.semantics.metamodel.compiler import Pass, group_tasks
8
8
  from relationalai.semantics.metamodel.util import OrderedSet, ordered_set, NameCache
9
9
  from relationalai.semantics.metamodel import dependency
10
- from relationalai.semantics.metamodel.util import FrozenOrderedSet, filter_by_type
11
- from relationalai.semantics.metamodel.typer.typer import to_type, is_primitive
10
+ from relationalai.semantics.metamodel.typer.typer import to_type
12
11
 
13
12
  class Flatten(Pass):
14
13
  """
@@ -225,15 +224,26 @@ class Flatten(Pass):
225
224
  "ranks": ir.Rank,
226
225
  })
227
226
 
228
- # if there are outputs, adjust them (depending on the config for wide vs gnf)
227
+ # If there are outputs, flatten each into its own top-level rule, along with its
228
+ # dependencies.
229
229
  if groups["outputs"]:
230
- if self._handle_outputs:
231
- return self.adjust_outputs(task, body, groups, ctx)
232
- else:
233
- # When we do not handle outputs. For example, in SQL compiler. We need to leave output as a top-level element.
230
+ if not self._handle_outputs:
234
231
  ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations))
235
232
  return Flatten.HandleResult(None)
236
233
 
234
+ # Analyze the dependencies in the newly rewritten body
235
+ new_logical = ir.Logical(task.engine, task.hoisted, tuple(body))
236
+ info = dependency.analyze(new_logical)
237
+
238
+ for output in groups["outputs"]:
239
+ assert(isinstance(output, ir.Output))
240
+ new_body = info.task_dependencies(output)
241
+ new_body.update(ctx.extra_tasks)
242
+ new_body.add(output)
243
+ ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, task.hoisted, tuple(new_body), task.annotations))
244
+
245
+ return Flatten.HandleResult(None)
246
+
237
247
  # if there are updates, extract as a new top level rule
238
248
  if groups["updates"]:
239
249
  # add task dependencies to the body
@@ -455,147 +465,6 @@ class Flatten(Pass):
455
465
  task.annotations
456
466
  ))
457
467
 
458
- #--------------------------------------------------
459
- # GNF vs wide output support
460
- #--------------------------------------------------
461
- def adjust_outputs(self, task: ir.Logical, body: OrderedSet[ir.Task], groups: dict[str, OrderedSet[ir.Task]], ctx: Context):
462
-
463
- # for wide outputs, only adjust the output task to include the keys.
464
- if ctx.options.get("wide_outputs", False):
465
- for output in groups["outputs"]:
466
- assert(isinstance(output, ir.Output))
467
- if output.keys:
468
- body.remove(output)
469
- body.add(self.rewrite_wide_output(output))
470
- # self.remove_subsumptions(body, ctx)
471
- return Flatten.HandleResult(ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations))
472
-
473
- # for GNF outputs we need to generate a rule for each "column" in the output
474
- else:
475
- # first split outputs in potentially multiple outputs, one for each "column"
476
- for output in groups["outputs"]:
477
- assert(isinstance(output, ir.Output))
478
- if output.keys:
479
- # we will replace the output bellow,
480
- body.remove(output)
481
-
482
- is_export = builtins.export_annotation in output.annotations
483
-
484
- # generate an output for each "column"
485
- # output looks like def output(:cols, :col000, key0, key1, value):
486
- original_cols = OrderedSet()
487
- for idx, alias in enumerate(output.aliases):
488
- # skip None values which are used as a placeholder for missing values
489
- if alias[1] is None:
490
- continue
491
- original_cols.add(alias[1])
492
- self._generate_output_column(body, output, idx, alias, is_export)
493
-
494
- idx = len(output.aliases)
495
- for key in output.keys:
496
- if key not in original_cols:
497
- self._generate_output_column(body, output, idx, (key.name, key), is_export)
498
- idx += 1
499
-
500
- # analyse the resulting logical to be able to pull dependencies
501
- logical = ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
502
- info = dependency.analyze(logical)
503
-
504
- # now extract a logical for each output, bringing together its dependencies
505
- for output in filter_by_type(body, ir.Output):
506
- deps = info.task_dependencies(output)
507
- # TODO: verify safety of doing this
508
- # self.remove_subsumptions(deps, ctx)
509
-
510
- deps.add(output)
511
- ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, tuple(), tuple(deps)))
512
-
513
- return Flatten.HandleResult(None)
514
-
515
- def _generate_output_column(self, body: OrderedSet[ir.Task], output: ir.Output, idx: int, alias: tuple[str, ir.Value], is_export: bool):
516
- if not output.keys:
517
- return output
518
-
519
- aliases = [("cols", f.literal("cols", types.Symbol))] if not is_export else []
520
- aliases.append(("col", f.literal(f"col{idx:03}", types.Symbol)))
521
-
522
- for k in output.keys:
523
- aliases.append((f"key_{k.name}_{idx}", k))
524
-
525
- if (is_export and
526
- isinstance(alias[1], ir.Var) and
527
- (not is_primitive(alias[1].type) or alias[1].type == types.Hash)):
528
-
529
- uuid = f.var(f"{alias[0]}_{idx}_uuid", types.String)
530
- body.add(f.lookup(builtins.uuid_to_string, [alias[1], uuid]))
531
- aliases.append((uuid.name, uuid))
532
- else:
533
- aliases.append(alias)
534
-
535
- body.add(ir.Output(
536
- output.engine,
537
- FrozenOrderedSet.from_iterable(aliases),
538
- output.keys,
539
- output.annotations
540
- ))
541
-
542
-
543
- def remove_subsumptions(self, body:OrderedSet[ir.Task], ctx: Context):
544
- # remove from the body all the tasks that are subsumed by some other task in the set;
545
- # this can be done because some tasks are references to extracted nested logical that
546
- # contain filters they dependend on, so we don't need those filters here if the
547
- # reference is present.
548
- for logical in filter_by_type(body, ir.Logical):
549
- if logical.id in ctx.included:
550
- # if the logical id is included, it means it's a reference to an extracted
551
- # rule, so remove all other items in the body that are already included in
552
- # the body referenced by it
553
- for item in body:
554
- if item in ctx.included[logical.id]:
555
- body.remove(item)
556
-
557
-
558
- def rewrite_wide_output(self, output: ir.Output):
559
- assert(output.keys)
560
-
561
- # only append keys that are not already in the output
562
- suffix_keys = []
563
- for key in output.keys:
564
- if all([val is not key for _, val in output.aliases]):
565
- suffix_keys.append(key)
566
-
567
- aliases: OrderedSet[Tuple[str, ir.Value]] = ordered_set()
568
-
569
- # add the remaining args, unless it is already a key
570
- for name, val in output.aliases:
571
- if not isinstance(val, ir.Var) or val not in suffix_keys:
572
- aliases.add((name, val))
573
-
574
- # add the keys to the output
575
- for key in suffix_keys:
576
- aliases.add((key.name, key))
577
-
578
- # TODO - we are assuming that the Rel compiler will translate nullable lookups
579
- # properly, returning a `Missing` if necessary, like this:
580
- # (nested_192(_adult, _adult_name) or (not nested_192(_adult, _) and _adult_name = Missing)) and
581
- return ir.Output(
582
- output.engine,
583
- aliases.frozen(),
584
- output.keys,
585
- output.annotations
586
- )
587
-
588
- # TODO: in the rel compiler, see if we can do this outer join
589
- # 1. number of keys
590
- # 2. each relation
591
- # 3. each variable, starting with the keys
592
- # 4. tag output with @arrow
593
-
594
- # @arrow def output(_book, _book_title, _author_name):
595
- # rel_primitive_outer_join(#1, book_title, author_name, _book, _book_title, _author_name)
596
- # def output(p, n, c):
597
- # rel_primitive_outer_join(#1, name, coolness, p, n, c)
598
-
599
468
  #--------------------------------------------------
600
469
  # Helpers
601
470
  #--------------------------------------------------
@@ -0,0 +1,165 @@
1
+ from __future__ import annotations
2
+ from typing import Tuple
3
+
4
+ from relationalai.semantics.metamodel import builtins, ir, factory as f, types, visitor
5
+ from relationalai.semantics.metamodel.compiler import Pass, group_tasks
6
+ from relationalai.semantics.metamodel.util import OrderedSet
7
+ from relationalai.semantics.metamodel.util import FrozenOrderedSet
8
+ from relationalai.semantics.metamodel.typer.typer import is_primitive
9
+
10
+ class FormatOutputs(Pass):
11
+ def __init__(self, handle_outputs: bool=True):
12
+ super().__init__()
13
+ self._handle_outputs = handle_outputs
14
+
15
+ #--------------------------------------------------
16
+ # Public API
17
+ #--------------------------------------------------
18
+ def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
19
+ wide_outputs = options.get("wide_outputs", False)
20
+ return self.OutputRewriter(wide_outputs).walk(model)
21
+
22
+ class OutputRewriter(visitor.Rewriter):
23
+ def __init__(self, wide_outputs: bool = False):
24
+ super().__init__()
25
+ self.wide_outputs = wide_outputs
26
+
27
+ def handle_logical(self, node: ir.Logical, parent: ir.Node):
28
+ # Rewrite children first
29
+ node = super().handle_logical(node, parent)
30
+
31
+ groups = group_tasks(node.body, {
32
+ "outputs": ir.Output,
33
+ })
34
+
35
+ # If no outputs, return as is
36
+ if not groups["outputs"]:
37
+ return node
38
+
39
+ return adjust_outputs(node, groups["outputs"], self.wide_outputs)
40
+
41
+ #--------------------------------------------------
42
+ # GNF vs wide output support
43
+ #--------------------------------------------------
44
+ def adjust_outputs(task: ir.Logical, outputs: OrderedSet[ir.Task], wide_outputs: bool = False):
45
+
46
+ body = list(task.body)
47
+
48
+ # For wide outputs, only adjust the output task to include the keys.
49
+ if wide_outputs:
50
+ for output in outputs:
51
+ assert(isinstance(output, ir.Output))
52
+ if output.keys:
53
+ body.remove(output)
54
+ body.append(rewrite_wide_output(output))
55
+ return ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
56
+
57
+ # For GNF outputs we need to generate a rule for each "column" in the output
58
+ else:
59
+ # First split outputs in potentially multiple outputs, one for each "column"
60
+ for output in outputs:
61
+ assert(isinstance(output, ir.Output))
62
+ if output.keys:
63
+ # Remove the original output. This is replaced by per-column outputs below
64
+ body.remove(output)
65
+
66
+ is_export = builtins.export_annotation in output.annotations
67
+
68
+ # Generate an output for each "column"
69
+ # output looks like def output(:cols, :col000, key0, key1, value):
70
+ original_cols = OrderedSet()
71
+ for idx, alias in enumerate(output.aliases):
72
+ # Skip None values which are used as a placeholder for missing values
73
+ if alias[1] is None:
74
+ continue
75
+ original_cols.add(alias[1])
76
+ body.extend(_generate_output_column(output, idx, alias, is_export))
77
+
78
+ idx = len(output.aliases)
79
+ for key in output.keys:
80
+ if key not in original_cols:
81
+ body.extend(_generate_output_column(output, idx, (key.name, key), is_export))
82
+ idx += 1
83
+
84
+ return ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
85
+
86
+ # TODO: return non list?
87
+ def _generate_output_column(output: ir.Output, idx: int, alias: tuple[str, ir.Value], is_export: bool):
88
+ if not output.keys:
89
+ return [output]
90
+
91
+ aliases = [("cols", f.literal("cols", types.Symbol))] if not is_export else []
92
+ aliases.append(("col", f.literal(f"col{idx:03}", types.Symbol)))
93
+
94
+ # Append all keys at the start
95
+ for k in output.keys:
96
+ aliases.append((f"key_{k.name}_{idx}", k))
97
+
98
+ if (is_export and
99
+ isinstance(alias[1], ir.Var) and
100
+ (not is_primitive(alias[1].type) or alias[1].type == types.Hash)):
101
+
102
+ uuid = f.var(f"{alias[0]}_{idx}_uuid", types.String)
103
+ aliases.append((uuid.name, uuid))
104
+
105
+ return [
106
+ ir.Lookup(None, builtins.uuid_to_string, (alias[1], uuid)),
107
+ ir.Output(
108
+ output.engine,
109
+ FrozenOrderedSet.from_iterable(aliases),
110
+ output.keys,
111
+ output.annotations
112
+ )
113
+ ]
114
+ else:
115
+ aliases.append(alias)
116
+
117
+ return [
118
+ ir.Output(
119
+ output.engine,
120
+ FrozenOrderedSet.from_iterable(aliases),
121
+ output.keys,
122
+ output.annotations
123
+ )
124
+ ]
125
+
126
+ def rewrite_wide_output(output: ir.Output):
127
+ assert(output.keys)
128
+
129
+ # Only append keys that are not already in the output
130
+ suffix_keys = []
131
+ for key in output.keys:
132
+ if all([val is not key for _, val in output.aliases]):
133
+ suffix_keys.append(key)
134
+
135
+ aliases: OrderedSet[Tuple[str, ir.Value]] = OrderedSet()
136
+
137
+ # Add the remaining args, unless it is already a key
138
+ for name, val in output.aliases:
139
+ if not isinstance(val, ir.Var) or val not in suffix_keys:
140
+ aliases.add((name, val))
141
+
142
+ # Add the keys to the output
143
+ for key in suffix_keys:
144
+ aliases.add((key.name, key))
145
+
146
+ # TODO - we are assuming that the Rel compiler will translate nullable lookups
147
+ # properly, returning a `Missing` if necessary, like this:
148
+ # (nested_192(_adult, _adult_name) or (not nested_192(_adult, _) and _adult_name = Missing)) and
149
+ return ir.Output(
150
+ output.engine,
151
+ aliases.frozen(),
152
+ output.keys,
153
+ output.annotations
154
+ )
155
+
156
+ # TODO: in the rel compiler, see if we can do this outer join
157
+ # 1. number of keys
158
+ # 2. each relation
159
+ # 3. each variable, starting with the keys
160
+ # 4. tag output with @arrow
161
+
162
+ # @arrow def output(_book, _book_title, _author_name):
163
+ # rel_primitive_outer_join(#1, book_title, author_name, _book, _book_title, _author_name)
164
+ # def output(p, n, c):
165
+ # rel_primitive_outer_join(#1, name, coolness, p, n, c)