PyPI - relationalai - Versions diffs - 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl - Mend

relationalai 0.13.2py3-none-any.whl → 0.13.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

relationalai/clients/client.py +3 -4
relationalai/clients/exec_txn_poller.py +62 -31
relationalai/clients/resources/snowflake/direct_access_resources.py +6 -5
relationalai/clients/resources/snowflake/snowflake.py +54 -51
relationalai/clients/resources/snowflake/use_index_poller.py +1 -1
relationalai/semantics/internal/snowflake.py +5 -1
relationalai/semantics/lqp/algorithms.py +173 -0
relationalai/semantics/lqp/builtins.py +199 -2
relationalai/semantics/lqp/executor.py +90 -41
relationalai/semantics/lqp/export_rewriter.py +40 -0
relationalai/semantics/lqp/ir.py +28 -2
relationalai/semantics/lqp/model2lqp.py +218 -45
relationalai/semantics/lqp/passes.py +13 -658
relationalai/semantics/lqp/rewrite/__init__.py +12 -0
relationalai/semantics/lqp/rewrite/algorithm.py +385 -0
relationalai/semantics/lqp/rewrite/annotate_constraints.py +22 -10
relationalai/semantics/lqp/rewrite/constants_to_vars.py +70 -0
relationalai/semantics/lqp/rewrite/deduplicate_vars.py +104 -0
relationalai/semantics/lqp/rewrite/eliminate_data.py +108 -0
relationalai/semantics/lqp/rewrite/functional_dependencies.py +31 -2
relationalai/semantics/lqp/rewrite/period_math.py +77 -0
relationalai/semantics/lqp/rewrite/quantify_vars.py +65 -31
relationalai/semantics/lqp/rewrite/unify_definitions.py +317 -0
relationalai/semantics/lqp/utils.py +11 -1
relationalai/semantics/lqp/validators.py +14 -1
relationalai/semantics/metamodel/builtins.py +2 -1
relationalai/semantics/metamodel/compiler.py +2 -1
relationalai/semantics/metamodel/dependency.py +12 -3
relationalai/semantics/metamodel/executor.py +11 -1
relationalai/semantics/metamodel/factory.py +2 -2
relationalai/semantics/metamodel/helpers.py +7 -0
relationalai/semantics/metamodel/ir.py +3 -2
relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +30 -20
relationalai/semantics/metamodel/rewrite/flatten.py +50 -13
relationalai/semantics/metamodel/rewrite/format_outputs.py +9 -3
relationalai/semantics/metamodel/typer/checker.py +6 -4
relationalai/semantics/metamodel/typer/typer.py +2 -5
relationalai/semantics/metamodel/visitor.py +4 -3
relationalai/semantics/reasoners/optimization/solvers_dev.py +1 -1
relationalai/semantics/reasoners/optimization/solvers_pb.py +3 -4
relationalai/semantics/rel/compiler.py +2 -1
relationalai/semantics/rel/executor.py +3 -2
relationalai/semantics/tests/lqp/__init__.py +0 -0
relationalai/semantics/tests/lqp/algorithms.py +345 -0
relationalai/semantics/tests/test_snapshot_abstract.py +2 -1
relationalai/tools/cli_controls.py +216 -67
relationalai/util/format.py +5 -2
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/METADATA +2 -2
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/RECORD +52 -42
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/WHEEL +0 -0
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/entry_points.txt +0 -0
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/licenses/LICENSE +0 -0

relationalai/semantics/lqp/rewrite/eliminate_data.py ADDED Viewed

@@ -0,0 +1,108 @@
+from relationalai.semantics.metamodel.compiler import Pass
+from relationalai.semantics.metamodel import ir, builtins as rel_builtins, factory as f, visitor
+from typing import cast
+import pandas as pd
+import hashlib
+# Creates intermediary relations for all Data nodes and replaces said Data nodes
+# with a Lookup into these created relations. Reuse duplicate created relations.
+class EliminateData(Pass):
+    def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
+        r = self.DataRewriter()
+        return r.walk(model)
+    # Does the actual work.
+    class DataRewriter(visitor.Rewriter):
+        new_relations: list[ir.Relation]
+        new_updates: list[ir.Logical]
+        # Counter for naming new relations.
+        # It must be that new_count == len new_updates == len new_relations.
+        new_count: int
+        # Cache for Data nodes to avoid creating duplicate intermediary relations
+        data_cache: dict[str, ir.Relation]
+        def __init__(self):
+            self.new_relations = []
+            self.new_updates = []
+            self.new_count = 0
+            self.data_cache = {}
+            super().__init__()
+        # Create a cache key for a Data node based on its structure and content
+        def _data_cache_key(self, node: ir.Data) -> str:
+            values = pd.util.hash_pandas_object(node.data).values
+            return hashlib.sha256(bytes(values)).hexdigest()
+        def _intermediary_relation(self, node: ir.Data) -> ir.Relation:
+            cache_key = self._data_cache_key(node)
+            if cache_key in self.data_cache:
+                return self.data_cache[cache_key]
+            self.new_count += 1
+            intermediary_name = f"formerly_Data_{self.new_count}"
+            intermediary_relation = f.relation(
+                intermediary_name,
+                [f.field(v.name, v.type) for v in node.vars]
+            )
+            self.new_relations.append(intermediary_relation)
+            intermediary_update = f.logical([
+                # For each row (union), equate values and their variable (logical).
+                f.union(
+                    [
+                        f.logical(
+                            [
+                                f.lookup(rel_builtins.eq, [f.literal(val, var.type), var])
+                                for (val, var) in zip(row, node.vars)
+                            ],
+                        )
+                        for row in node
+                    ],
+                    hoisted = node.vars,
+                ),
+                # And pop it back into the relation.
+                f.update(intermediary_relation, node.vars, ir.Effect.derive),
+            ])
+            self.new_updates.append(intermediary_update)
+            # Cache the result for reuse
+            self.data_cache[cache_key] = intermediary_relation
+            return intermediary_relation
+        # Create a new intermediary relation representing the Data (and pop it in
+        # new_updates/new_relations) and replace this Data with a Lookup of said
+        # intermediary.
+        def handle_data(self, node: ir.Data, parent: ir.Node) -> ir.Lookup:
+            intermediary_relation = self._intermediary_relation(node)
+            replacement_lookup = f.lookup(intermediary_relation, node.vars)
+            return replacement_lookup
+        # Walks the model for the handle_data work then updates the model with
+        # the new state.
+        def handle_model(self, model: ir.Model, parent: None):
+            walked_model = super().handle_model(model, parent)
+            assert len(self.new_relations) == len(self.new_updates) and self.new_count == len(self.new_relations)
+            # This is okay because its LQP.
+            assert isinstance(walked_model.root, ir.Logical)
+            root_logical = cast(ir.Logical, walked_model.root)
+            # We may need to add the new intermediaries from handle_data to the model.
+            if self.new_count  == 0:
+                return model
+            else:
+                return ir.Model(
+                    walked_model.engines,
+                    walked_model.relations | self.new_relations,
+                    walked_model.types,
+                    ir.Logical(
+                        root_logical.engine,
+                        root_logical.hoisted,
+                        root_logical.body + tuple(self.new_updates),
+                        root_logical.annotations,
+                    ),
+                    walked_model.annotations,
+                )

relationalai/semantics/lqp/rewrite/functional_dependencies.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 from typing import Optional, Sequence
 from relationalai.semantics.internal import internal
 from relationalai.semantics.metamodel.ir import (
-    Node, Require, Logical, Var, Relation, Lookup, ScalarType
+    Node, Require, Logical, Var, Relation, Lookup, ScalarType, Literal
 )
 from relationalai.semantics.metamodel import builtins
@@ -13,7 +13,7 @@ dependencies. The main functionalities provided are:
     1. Check whether a `Require` node is a valid unique constraint representation
     2. Represent the uniqueness constraint as a functional dependency
     3. Check if the functional dependency is structural i.e., can be represented with
-         `@function(k)` annotation on a single relation.
+       `@function(k)` annotation on a single relation.
 =========================== Structure of unique constraints ================================
 A `Require` node represents a _unique constraint_ if it meets the following criteria:
@@ -234,6 +234,9 @@ class FunctionalDependency:
         self._determine_is_structural()
+        # compute canonical string representation of the fd
+        self._canonical_str = self._compute_canonical_str()
     # A functional dependency `φ: X → Y` is _k-functional_ if `φ` consists of a single atom
     # `R(x1,...,xm,y1,...,yk)` and `X = {x1,...,xm}`. Not all functional dependencies are
     # k-functional. For instance, `R(x, y, z): {y, z} → {x}` cannot be expressed with
@@ -291,6 +294,32 @@ class FunctionalDependency:
         values_str = ", ".join([str(var) for var in self.values]).strip()
         return f"{guard_str}: {{{keys_str}}} -> {{{values_str}}}"
+    # computes a canonical string representation of the functional dependency
+    def _compute_canonical_str(self) -> str:
+        # we construct a stable tuple-term representation of the fd
+        fd_term = ("fd",)
+        for atom in sorted(self.guard, key=lambda x: x.relation.name):
+            atom_term = (atom.relation.name,)
+            for arg in atom.args:
+                if isinstance(arg, Var):
+                    atom_term += (("var", arg.name, str(arg.type)),)
+                elif isinstance(arg, Literal):
+                    atom_term += (("lit", arg.value, str(arg.type)),)
+                else:
+                    atom_term += (("arg", str(arg)),)
+            fd_term += (atom_term,)
+        keys_term = tuple(sorted((("var", v.name, str(v.type)) for v in self.keys)))
+        values_term = tuple(sorted((("var", v.name, str(v.type)) for v in self.values)))
+        fd_term += (("keys", keys_term), ("values", values_term))
+        return str(fd_term)
+    @property
+    def canonical_str(self) -> str:
+        """
+        A canonical string representation (depends on guard atoms, keys, and values).
+        """
+        return self._canonical_str
 def contains_only_declarable_constraints(node: Node) -> bool:
     """
     Checks whether the input `Logical` node contains only `Require` nodes annotated with

relationalai/semantics/lqp/rewrite/period_math.py ADDED Viewed

@@ -0,0 +1,77 @@
+from relationalai.semantics.metamodel.compiler import Pass
+from relationalai.semantics.metamodel import ir, builtins as rel_builtins, factory as f, visitor
+from relationalai.semantics.metamodel import types
+# Generate date arithmetic expressions, such as
+# `rel_primitive_date_add(:day, [date] delta, res_2)` by finding the period
+# expression for the delta and adding the period type to the date arithmetic expression.
+#
+# date_add and it's kin are generated by a period expression, e.g.,
+# `day(delta, res_1)`
+# followed by the date arithmetic expression using the period
+# `date_add([date] res_1 res_2)`
+class PeriodMath(Pass):
+    def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
+        period_rewriter = self.PeriodRewriter()
+        model = period_rewriter.walk(model)
+        period_math_rewriter = self.PeriodMathRewriter(period_rewriter.period_vars)
+        model = period_math_rewriter.walk(model)
+        return model
+    # Find all period builtins. We need to make them safe for the emitter (either by
+    # translating to a cast, or removing) and store the variable and period type for use
+    # in the date/datetime add/subtract expressions.
+    class PeriodRewriter(visitor.Rewriter):
+        def __init__(self):
+            super().__init__()
+            self.period_vars: dict[ir.Var, str] = {}
+        def handle_lookup(self, node: ir.Lookup, parent: ir.Node) -> ir.Lookup:
+            if not rel_builtins.is_builtin(node.relation):
+                return node
+            if node.relation.name not in {
+                "year", "month", "week", "day", "hour", "minute", "second", "millisecond", "microsecond", "nanosecond"
+            }:
+                return node
+            assert len(node.args) == 2, "Expect 2 arguments for period builtins"
+            assert isinstance(node.args[1], ir.Var), "Expect result to be a variable"
+            period = node.relation.name
+            result_var = node.args[1]
+            self.period_vars[result_var] = period
+            # Ideally we could now remove the unused and unhandled period type construction
+            # but we may also need to cast the original variable to an Int64 for use by the
+            # date/datetime add/subtract expressions.
+            # TODO: Remove the node entirely where possible and update uses of the result
+            return f.lookup(rel_builtins.cast, [types.Int64, node.args[0], result_var])
+    # Update date/datetime add/subtract expressions with period information.
+    class PeriodMathRewriter(visitor.Rewriter):
+        def __init__(self, period_vars: dict[ir.Var, str]):
+            super().__init__()
+            self.period_vars: dict[ir.Var, str] = period_vars
+        def handle_lookup(self, node: ir.Lookup, parent: ir.Node) -> ir.Lookup:
+            if not rel_builtins.is_builtin(node.relation):
+                return node
+            if node.relation.name not in {
+                "date_add", "date_subtract", "datetime_add", "datetime_subtract"
+            }:
+                return node
+            if len(node.args) == 4:
+                # We've already visited this lookup
+                return node
+            assert isinstance(node.args[1], ir.Var), "Expect period to be a variable"
+            period_var = node.args[1]
+            assert period_var in self.period_vars, "datemath found, but no vars to insert"
+            period = self.period_vars[period_var]
+            new_args = [f.literal(period, types.Symbol)] + [arg for arg in node.args]
+            return f.lookup(node.relation, new_args)

relationalai/semantics/lqp/rewrite/quantify_vars.py CHANGED Viewed

@@ -69,7 +69,7 @@ class VarScopeInfo(Visitor):
                     ir.Var, ir.Literal, ir.Relation, ir.Field,
                     ir.Default, ir.Output, ir.Update, ir.Aggregate,
                     ir.Check, ir.Require,
-                    ir.Annotation, ir.Rank)
+                    ir.Annotation, ir.Rank, ir.Break)
     def __init__(self):
         super().__init__()
@@ -103,16 +103,29 @@ class VarScopeInfo(Visitor):
             self._record(node, scope_vars)
         elif isinstance(node, (ir.Match, ir.Union)):
-            # Match/Union inherits the vars in scope from its sub-tasks.
+            # Match/Union only inherits vars if they are in scope for all sub-tasks.
             scope_vars = ordered_set()
+            # Prime the search with the first sub-task's vars.
+            if node.tasks:
+                scope_vars.update(self._vars_in_scope.get(node.tasks[0].id, None))
             for task in node.tasks:
                 sub_scope_vars = self._vars_in_scope.get(task.id, None)
-                if sub_scope_vars:
-                    scope_vars.update(sub_scope_vars)
+                if not scope_vars or not sub_scope_vars:
+                    scope_vars = ordered_set()
+                    break
+                scope_vars = (scope_vars & sub_scope_vars)
             # Hoisted vars are not considered for quantification at this level.
             scope_vars.difference_update(helpers.hoisted_vars(node.hoisted))
             self._record(node, scope_vars)
+        elif isinstance(node, (ir.Loop, ir.Sequence)):
+            # Variables in Loops and Sequences are scoped exclusively within the body and
+            # not propagated outside. No need to record any variables, as they shouldn't be
+            # in scope for the node itself
+            pass
         elif isinstance(node, ir.Logical):
             self._do_logical(node)
@@ -128,6 +141,9 @@ class VarScopeInfo(Visitor):
         all_nested_vars = ordered_set()
         output_vars = ordered_set()
+        # Collect variables nested in child Logical and Not nodes
+        nested_vars_in_task: dict[ir.Var, int] = dict()
         # Collect all variables from logical sub-tasks
         for task in node.body:
             if isinstance(task, ir.Output):
@@ -140,19 +156,29 @@ class VarScopeInfo(Visitor):
                         scope_vars.add(var)
                 continue
-            sub_scope_vars = self._vars_in_scope.get(task.id, None)
             # Hoisted variables from sub-tasks are brought again into scope.
             if isinstance(task, (ir.Logical, ir.Union, ir.Match)):
                 scope_vars.update(helpers.hoisted_vars(task.hoisted))
-            if sub_scope_vars:
-                if isinstance(task, ir.Logical):
-                    all_nested_vars.update(sub_scope_vars)
-                elif not isinstance(task, ir.Not):
-                    # For all other node kinds (except Not), just propagate the variables in scope.
-                    # Not nodes stop the propagation of variables coming from their sub-tasks.
-                    scope_vars.update(sub_scope_vars)
+            # Get variables in sub-task scope
+            sub_scope_vars = self._vars_in_scope.get(task.id, ordered_set())
+            if isinstance(task, ir.Logical):
+                # Logical child nodes should have their nested variables quantified
+                # only if they are needed in more than one child task.
+                for var in sub_scope_vars:
+                    if var not in nested_vars_in_task:
+                        nested_vars_in_task[var] = 0
+                    nested_vars_in_task[var] += 1
+            elif not isinstance(task, ir.Not):
+                # Other nodes with nested variables need to be quantified at this level
+                scope_vars.update(sub_scope_vars)
+        for v, c in nested_vars_in_task.items():
+            # If the variable appears in more than one nested child, then it needs to be
+            # quantified here. Otherwise, it will be handled in the child node
+            if c > 1:
+                all_nested_vars.add(v)
         # Nested variables also need to be introduced, provided they are not output variables.
         for var in all_nested_vars:
@@ -190,37 +216,30 @@ class FindQuantificationNodes(Visitor):
     def __init__(self, var_info: VarScopeInfo):
         super().__init__()
         self._vars_in_scope = var_info._vars_in_scope
-        self._handled_vars = ordered_set()
+        self.handled_vars: dict[int, OrderedSet[ir.Var]] = {}
         self.node_quantifies_vars = {}
     def enter(self, node: ir.Node, parent: Optional[ir.Node]=None) -> "Visitor":
         if contains_only_declarable_constraints(node):
             return self
+        handled_vars = self.handled_vars.get(parent.id, ordered_set()) if parent else ordered_set()
+        # Clone the set to avoid modifying parent's handled vars
+        handled_vars = OrderedSet.from_iterable(handled_vars)
         if isinstance(node, (ir.Logical, ir.Not)):
             ignored_vars = _ignored_vars(node)
-            self._handled_vars.update(ignored_vars)
+            handled_vars.update(ignored_vars)
             scope_vars = self._vars_in_scope.get(node.id, None)
             if scope_vars:
-                scope_vars.difference_update(self._handled_vars)
+                scope_vars.difference_update(handled_vars)
                 if scope_vars:
-                    self._handled_vars.update(scope_vars)
+                    handled_vars.update(scope_vars)
                     self.node_quantifies_vars[node.id] = scope_vars
-        return self
-    def leave(self, node: ir.Node, parent: Optional[ir.Node]=None) -> ir.Node:
-        if contains_only_declarable_constraints(node):
-            return node
-        if isinstance(node, (ir.Logical, ir.Not)):
-            ignored_vars = _ignored_vars(node)
-            self._handled_vars.difference_update(ignored_vars)
-            scope_vars = self._vars_in_scope.get(node.id, None)
-            if scope_vars:
-                self._handled_vars.difference_update(scope_vars)
-        return node
+        self.handled_vars[node.id] = handled_vars
+        return self
 class QuantifyVarsRewriter(Rewriter):
     """
@@ -254,7 +273,12 @@ class QuantifyVarsRewriter(Rewriter):
                     # in IR directly may do so and the flatten pass doesn't split them yet.
                     if len(agg_or_rank_tasks) > 0:
                         print(f"Multiple aggregate/rank tasks found: {agg_or_rank_tasks} and {task}")
-                    agg_or_rank_tasks.append(task)
+                    # If the agg/rank depends on any of the vars being quantified here,
+                    # then it needs to be inside the quantification
+                    if any(var in helpers.vars(task.projection) for var in vars):
+                        inner_tasks.append(task)
+                    else:
+                        agg_or_rank_tasks.append(task)
                 else:
                     inner_tasks.append(task)
@@ -283,6 +307,16 @@ class QuantifyVarsRewriter(Rewriter):
         return node if node.task is new_task else f.not_(new_task)
+    def handle_union(self, node: ir.Union, parent: ir.Node, ctx:Optional[Any]=None) -> ir.Union:
+        if not node.tasks:
+            return node
+        new_tasks = self.walk_list(node.tasks, node)
+        return node if node.tasks is new_tasks else f.union(
+            tasks = new_tasks,
+            hoisted = node.hoisted,
+        )
     # To avoid unnecessary cloning of vars in the visitor.
     def handle_var(self, node: ir.Var, parent: ir.Node, ctx:Optional[Any]=None) -> ir.Var:
         return node

relationalai 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl

relationalai 0.13.2py3-none-any.whl → 0.13.4py3-none-any.whl