PyPI - relationalai - Versions diffs - 0.13.4__py3-none-any.whl → 0.13.5__py3-none-any.whl - Mend

relationalai 0.13.4py3-none-any.whl → 0.13.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

relationalai/semantics/lqp/rewrite/extract_keys.py CHANGED Viewed

@@ -15,7 +15,7 @@ Given an Output with a group of keys (some of them potentially null),
 * generate all the valid combinations of keys being present or not
   * first all keys are present,
   * then we remove one key at a time,
-  * then we remove two keys at a time,and so on.
+  * then we remove two keys at a time, and so on.
   * the last combination is when all the *nullable* keys are missing.
 * for each combination:
   * create a compound (hash) key
@@ -103,10 +103,13 @@ Logical
             construct(Hash, "Foo", foo, compound_key)
             output[compound_key](v1, None, None)
 """
 class ExtractKeys(Pass):
-    def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
+    def rewrite(self, model: ir.Model, options: dict = {}) -> ir.Model:
         return ExtractKeysRewriter().walk(model)
 """
 * First, figure out all tasks that are common for all alternative logicals that will be
   generated
@@ -117,6 +120,8 @@ class ExtractKeys(Pass):
   missing (None will be filtered out in a later step -- we just need the column number to be
   the same here).
 """
 class ExtractKeysRewriter(Rewriter):
     def __init__(self):
         super().__init__()
@@ -129,7 +134,9 @@ class ExtractKeysRewriter(Rewriter):
         self.compound_keys[orig_keys] = compound_key
         return compound_key
-    def handle_logical(self, node: ir.Logical, parent: ir.Node, ctx:Optional[Any]=None) -> ir.Logical:
+    def handle_logical(
+        self, node: ir.Logical, parent: ir.Node, ctx: Optional[Any] = None
+    ) -> ir.Logical:
         outputs = [x for x in node.body if isinstance(x, ir.Output) and x.keys]
         # We are not in a logical with an output at this level.
         if not outputs:
@@ -170,7 +177,9 @@ class ExtractKeysRewriter(Rewriter):
         partitions, deps = self.partition_tasks(flat_body, all_vars)
         # Compute all valid key combinations (keys that are not null)
-        combinations = self.key_combinations(nullable_keys, deps, 0, non_nullable_keys.get_list())
+        combinations = self.key_combinations(
+            nullable_keys, deps, 0, non_nullable_keys.get_list()
+        )
         # there is no need to transform if there is only a single combination
         if len(combinations) == 1:
             return node
@@ -212,7 +221,9 @@ class ExtractKeysRewriter(Rewriter):
                     values.append(ir.Literal(types.String, key.type.name))
                 if key in key_combination:
                     values.append(key)
-            body.add(ir.Construct(None, tuple(values), compound_key, OrderedSet().frozen()))
+            body.add(
+                ir.Construct(None, tuple(values), compound_key, OrderedSet().frozen())
+            )
             # find variables used only inside the negated context
             negative_vars = OrderedSet[ir.Var]()
@@ -233,7 +244,9 @@ class ExtractKeysRewriter(Rewriter):
             problematic_out_vars = OrderedSet[ir.Var]()
             for out_var in out_vars:
                 out_deps = deps[out_var]
-                if out_var in missing_keys:
+                if out_var in negative_vars:
+                    missing_out_vars.add(out_var)
+                elif out_var in missing_keys:
                     missing_out_vars.add(out_var)
                 elif any(x in missing_keys for x in out_deps):
                     missing_out_vars.add(out_var)
@@ -248,8 +261,17 @@ class ExtractKeysRewriter(Rewriter):
                 exclude_vars = out_vars
                 has_problematic_var = False
-            self.negate_missing_keys(body, missing_keys, var_to_default, partitions, deps,
-                                     out_vars, exclude_vars, negative_vars, has_problematic_var)
+            self.negate_missing_keys(
+                body,
+                missing_keys,
+                var_to_default,
+                partitions,
+                deps,
+                out_vars,
+                exclude_vars,
+                negative_vars,
+                has_problematic_var,
+            )
             new_output_aliases = []
             for alias, out_value in output.aliases:
@@ -269,6 +291,23 @@ class ExtractKeysRewriter(Rewriter):
         # logicals that don't hoist variables are essentially filters like lookups
         if not node.hoisted:
             return True
+        # If the body contains an aggregate, and the Logical hoists only the aggregate
+        # output, then this node behaves as a lookup
+        if any(isinstance(t, ir.Aggregate) for t in node.body):
+            hoisted_vars = helpers.hoisted_vars(node.hoisted)
+            aggregate_outputs = []
+            for t in node.body:
+                if isinstance(t, ir.Aggregate):
+                    aggregate_outputs.extend(
+                        v
+                        for v in helpers.vars(t.args)
+                        if not helpers.is_aggregate_input(v, t)
+                    )
+            if hoisted_vars == aggregate_outputs:
+                return True
         if len(node.body) != 1:
             return False
         inner = node.body[0]
@@ -347,9 +386,9 @@ class ExtractKeysRewriter(Rewriter):
     # given a set of variables, compute the tasks that each variable is using and also
     # other variables needed for this variable to bind correctly
-    def partition_tasks(self, tasks:Iterable[ir.Task], vars:Iterable[ir.Var]):
-        partitions:dict[ir.Var, OrderedSet[ir.Task]] = defaultdict(OrderedSet)
-        dependencies:dict[ir.Var, OrderedSet[ir.Var]] = defaultdict(OrderedSet)
+    def partition_tasks(self, tasks: Iterable[ir.Task], vars: Iterable[ir.Var]):
+        partitions: dict[ir.Var, OrderedSet[ir.Task]] = defaultdict(OrderedSet)
+        dependencies: dict[ir.Var, OrderedSet[ir.Var]] = defaultdict(OrderedSet)
         def dfs_collect_deps(task, deps):
             if isinstance(task, ir.Lookup):
@@ -360,7 +399,7 @@ class ExtractKeysRewriter(Rewriter):
                         deps[v].add(args[j])
                     # for ternary+ lookups, a var also depends on the next vars
                     if i > 0 and len(args) >= 3:
-                        for j in range(i+1, len(args)):
+                        for j in range(i + 1, len(args)):
                             deps[v].add(args[j])
             elif isinstance(task, ir.Construct):
                 vars = helpers.vars(task.values)
@@ -436,11 +475,21 @@ class ExtractKeysRewriter(Rewriter):
         return partitions, dependencies
     # Generate all the valid combinations of non-nullable keys and nullable keys.
-    def key_combinations(self, nullable_keys: OrderedSet[ir.Var], key_deps, idx: int, non_null_keys: list[ir.Var]) -> OrderedSet[Tuple[ir.Var]]:
+    def key_combinations(
+        self,
+        nullable_keys: OrderedSet[ir.Var],
+        key_deps,
+        idx: int,
+        non_null_keys: list[ir.Var],
+    ) -> OrderedSet[Tuple[ir.Var]]:
         if idx < len(nullable_keys):
             key = nullable_keys[idx]
-            set1 = self.key_combinations(nullable_keys, key_deps, idx + 1, non_null_keys + [key])
-            set2 = self.key_combinations(nullable_keys, key_deps, idx + 1, non_null_keys)
+            set1 = self.key_combinations(
+                nullable_keys, key_deps, idx + 1, non_null_keys + [key]
+            )
+            set2 = self.key_combinations(
+                nullable_keys, key_deps, idx + 1, non_null_keys
+            )
             set1.update(set2)
             return set1
         else:
@@ -449,13 +498,25 @@ class ExtractKeysRewriter(Rewriter):
                 # If a key depends on other keys, all of them should be present in this combination.
                 # If some dependency is not present, ignore the current key.
                 deps = key_deps.get(k)
-                if deps and any(dk in nullable_keys and dk not in non_null_keys for dk in deps):
+                if deps and any(
+                    dk in nullable_keys and dk not in non_null_keys for dk in deps
+                ):
                     continue
                 final_keys.append(k)
             return OrderedSet.from_iterable([tuple(final_keys)])
-    def negate_missing_keys(self, body, missing_keys, var_to_default, partitions, deps,
-                            out_vars, exclude_vars, negative_vars, has_problematic_var:bool):
+    def negate_missing_keys(
+        self,
+        body,
+        missing_keys,
+        var_to_default,
+        partitions,
+        deps,
+        out_vars,
+        exclude_vars,
+        negative_vars,
+        has_problematic_var: bool,
+    ):
         # for keys that are not present in the current combination
         # we have to include their tasks negated
         negated_tasks = OrderedSet[ir.Task]()
@@ -492,7 +553,9 @@ class ExtractKeysRewriter(Rewriter):
             out_deps = deps[out_var]
             if has_problematic_var and any(x in missing_keys for x in out_deps):
                 continue
-            elif not has_problematic_var and any(x in missing_keys or x in negative_vars for x in out_deps):
+            elif not has_problematic_var and any(
+                x in missing_keys or x in negative_vars for x in out_deps
+            ):
                 continue
             default = var_to_default.get(out_var)
@@ -509,4 +572,6 @@ class ExtractKeysRewriter(Rewriter):
             else:
                 property_body.update(partition)
             if property_body:
-                body.add(f.logical(tuple(property_body), [default] if default else []))
+                body.add(
+                    f.logical(tuple(property_body), [default] if default else [out_var])
+                )

relationalai/semantics/lqp/rewrite/flatten_script.py ADDED Viewed

@@ -0,0 +1,301 @@
+from __future__ import annotations
+from relationalai.semantics.metamodel import ir, factory as f, helpers
+from relationalai.semantics.metamodel.util import OrderedSet, ordered_set
+from relationalai.semantics.metamodel.rewrite.flatten import Flatten, negate, extend_body
+from relationalai.semantics.lqp.algorithms import is_script, mk_assign
+class FlattenScript(Flatten):
+    """
+    Flattens Match nodes inside @script Sequence blocks. This pass extends Flatten
+    of standard Logicals, to reuse a number of utilities, but DOES NOT flatten Match
+    nodes outside of scripts (which are handled by the Flatten pass).
+    Unlike the regular Flatten pass which extracts to top-level, FlattenScript
+    maintains order by inserting intermediate relations right before they're used
+    within the Sequence. This is necessary because order matters in a script Sequence.
+    Additionally, if the original Logical is a Loopy instruction, the flattened Logicals are
+    made Loopy too (`@assign`).
+    Example:
+    === BEFORE ===
+    Logical
+        Sequence @script @algorithm
+            Logical
+                dom(n)
+                Match ⇑[k]
+                    Logical ⇑[k]
+                        value(n, k)
+                    Logical ⇑[k]
+                        k = 0
+                → derive result(n, k) @assign @global
+                filter(n)
+    === AFTER ===
+    Logical
+        Sequence @script @algorithm
+            Logical
+                dom(n)
+                filter(n)
+                Logical ⇑[v]
+                    value(n, v)
+                → derive _match_1(n, v) @assign
+            Logical
+                dom(n)
+                filter(n)
+                Logical ⇑[v]
+                    v = 0
+                Not
+                    _match_1(n, _)
+                → derive _match_2(n, v) @assign
+            Logical
+                dom(n)
+                filter(n)
+                Union ⇑[v]
+                    _match_1(n, v)
+                    _match_2(n, v)
+                → derive result(n, v) @assign @global
+    """
+    class Context(Flatten.Context):
+        """Extended context with script tracking."""
+        def __init__(self, model: ir.Model, options: dict):
+            super().__init__(model, options)
+            self.in_script: bool = False
+    def rewrite(self, model: ir.Model, options: dict = {}) -> ir.Model:
+        """Traverse the model and flatten Match nodes inside script Sequences."""
+        ctx = FlattenScript.Context(model, options)
+        result = self.handle(model.root, ctx)
+        if result.replacement is None:
+            return model
+        # Convert relations list to FrozenOrderedSet (adding any new intermediate relations)
+        new_relations = OrderedSet.from_iterable(model.relations).update(ctx.rewrite_ctx.relations).frozen()
+        return ir.Model(
+            model.engines,
+            new_relations,
+            model.types,
+            result.replacement
+        )
+    def handle(self, task: ir.Task, ctx: Flatten.Context) -> Flatten.HandleResult:
+        """Override handle to add Loop support."""
+        if isinstance(task, ir.Loop):
+            return self.handle_loop(task, ctx)
+        return super().handle(task, ctx)
+    def handle_loop(self, task: ir.Loop, ctx: Flatten.Context) -> Flatten.HandleResult:
+        """Recursively handle the body of the loop."""
+        result = self.handle(task.body, ctx)
+        assert(result.replacement)
+        # If body unchanged, return original loop
+        if result.replacement is task.body:
+            return Flatten.HandleResult(task)
+        # Return new loop with handled body
+        return Flatten.HandleResult(ir.Loop(
+            task.engine,
+            task.hoisted,
+            task.iter,
+            result.replacement,
+            task.concurrency,
+            task.annotations
+        ))
+    def handle_logical(self, task: ir.Logical, ctx: Context):  # type: ignore[override]
+        """
+        Handle Logical nodes.
+        Outside scripts: simple traversal to find nested scripts.
+        Inside scripts: prevent extraction to top-level and keep everything in sequence.
+        Note: Type checker complains about parameter type narrowing, but this is safe
+        because we only create FlattenScript.Context in our own rewrite() method.
+        """
+        # Recursively process children
+        body: OrderedSet[ir.Task] = ordered_set()
+        for child in task.body:
+            result = self.handle(child, ctx)
+            if result.replacement is not None:
+                if ctx.in_script and isinstance(result.replacement, ir.Logical) and not result.replacement.hoisted:
+                    # Inside script: inline simple logicals without hoisting
+                    body.update(result.replacement.body)
+                else:
+                    body.add(result.replacement)
+        if not body:
+            return Flatten.HandleResult(None)
+        return Flatten.HandleResult(
+            ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
+        )
+    def flatten_match_in_logical(self, logical: ir.Logical, match: ir.Match, match_idx: int, ctx: Context) -> list[ir.Logical]:
+        """
+        Flatten a Match inside a Logical within a script Sequence.
+        Returns a list of Logicals to be inserted in sequence.
+        """
+        if not match.tasks:
+            return [logical]
+        # Split the logical into: tasks_before, match, tasks_after
+        tasks_before = list(logical.body[:match_idx])
+        tasks_after = list(logical.body[match_idx + 1:])
+        # Separate tasks_after into filters (non-Update tasks) and updates (Update tasks)
+        # Filters are constraints that should be included in all branches
+        filters = [task for task in tasks_after if not isinstance(task, ir.Update)]
+        updates = [task for task in tasks_after if isinstance(task, ir.Update)]
+        # Compute exposed variables
+        exposed_vars = self.compute_exposed_vars(match, match.tasks, ctx)
+        # Use dependency analysis for branch bodies (like flatten.py does)
+        branch_dependencies = ctx.info.task_dependencies(match)
+        # Collect all dependencies for the final Logical (tasks before + filters after)
+        final_dependencies = tasks_before + filters
+        # Negation length for wildcards
+        outputs = ctx.info.task_outputs(match)
+        negation_len = len(outputs) if outputs else 0
+        # Result: list of Logicals to insert
+        result_logicals = []
+        references = []
+        negated_reference = None
+        # Process each branch
+        for branch in match.tasks:
+            # Create connection relation for this branch
+            name = helpers.create_task_name(self.name_cache, branch, "_match")
+            relation = helpers.create_connection_relation(branch, exposed_vars, ctx.rewrite_ctx, name)
+            # Handle the branch (recursively process nested structures)
+            result = self.handle(branch, ctx)
+            branch_content = result.replacement if result.replacement else branch
+            # Update dependency tracking if branch was transformed
+            if result.replacement:
+                ctx.info.replaced(branch, result.replacement)
+            # Build logical for this branch
+            branch_body: OrderedSet[ir.Task] = ordered_set()
+            # Add dependencies (using dependency analysis, not all tasks)
+            branch_body.update(branch_dependencies)
+            # Add branch content using extend_body helper
+            extend_body(branch_body, branch_content)
+            # Add negation of previous branches (after branch content)
+            if negated_reference:
+                branch_body.add(negated_reference)
+            # Add derive to connection relation
+            branch_update = f.derive(relation, exposed_vars)
+            branch_body.add(branch_update)
+            # Create the Logical for this branch
+            branch_logical = mk_assign(ir.Logical(match.engine, tuple(), tuple(branch_body)))
+            result_logicals.append(branch_logical)
+            # Update references for final union
+            reference = f.lookup(relation, exposed_vars)
+            negated_reference = negate(reference, negation_len)
+            references.append(reference)
+        # Create final Logical with Union and remaining tasks
+        final_body: OrderedSet[ir.Task] = ordered_set()
+        final_body.update(final_dependencies)
+        # Add union of all branches
+        union = f.union(references, match.hoisted)
+        final_body.add(union)
+        # Add updates that came after the match (filters are already in dependencies)
+        final_body.update(updates)
+        # Create final logical preserving the original annotations
+        final_logical = ir.Logical(logical.engine, logical.hoisted, tuple(final_body), logical.annotations)
+        result_logicals.append(final_logical)
+        return result_logicals
+    def handle_sequence(self, task: ir.Sequence, ctx: Context):  # type: ignore[override]
+        """
+        Handle a Sequence.
+        If it's a script: set context flag and flatten Match nodes within.
+        If not a script: simple traversal (Flatten pass already processed it).
+        Note: Type checker complains about parameter type narrowing, but this is safe
+        because we only create FlattenScript.Context in our own rewrite() method.
+        """
+        if not is_script(task):
+            # Not a script sequence - already processed by Flatten, just return as-is
+            return Flatten.HandleResult(task)
+        # This is a script - mark context and process with flattening
+        old_in_script = ctx.in_script
+        ctx.in_script = True
+        # Process the sequence tasks
+        new_tasks: list[ir.Task] = []
+        for child in task.tasks:
+            # Check if this child is a Logical with Match that needs flattening
+            if isinstance(child, ir.Logical):
+                new_tasks.extend(self.try_flatten_logical(child, ctx))
+                continue
+            # No flattening needed, process normally
+            result = self.handle(child, ctx)
+            if result.replacement is not None:
+                new_tasks.append(result.replacement)
+        # Restore context
+        ctx.in_script = old_in_script
+        return Flatten.HandleResult(
+            ir.Sequence(task.engine, task.hoisted, tuple(new_tasks), task.annotations)
+        )
+    def try_flatten_logical(self, logical: ir.Logical, ctx: Context) -> list[ir.Logical]:
+        """
+        Flatten all Matches in a Logical.
+        Iteratively flattens until no more Matches remain in any of the resulting Logicals.
+        """
+        worklist = [logical]
+        result = []
+        while worklist:
+            current = worklist.pop()
+            # Find first Match in current logical
+            match = None
+            match_idx = -1
+            for i, child in enumerate(current.body):
+                if isinstance(child, ir.Match):
+                    match = child
+                    match_idx = i
+                    break
+            if match is None:
+                # No Match found - this logical is done
+                result.append(current)
+            else:
+                # Flatten and add results back to worklist for further processing.
+                # Reverse so that pop() returns them in the original order.
+                flattened = self.flatten_match_in_logical(current, match, match_idx, ctx)
+                worklist.extend(reversed(flattened))
+        return result

relationalai/semantics/lqp/rewrite/functional_dependencies.py CHANGED Viewed

@@ -322,22 +322,27 @@ class FunctionalDependency:
 def contains_only_declarable_constraints(node: Node) -> bool:
     """
-    Checks whether the input `Logical` node contains only `Require` nodes annotated with
-    `declare_constraint`.
+    Checks whether the input node contains only `Require` nodes annotated with
+    `declare_constraint` (or such a node itself).
     """
+    # Check if the node itself is a Require node with declarable constraint
+    if isinstance(node, Require):
+        return is_declarable_constraint(node)
+    # Otherwise, check if it is a Logical node containing only declarable constraints
     if not isinstance(node, Logical):
         return False
     if len(node.body) == 0:
         return False
     for task in node.body:
-        if not isinstance(task, Require):
-            return False
-        if not is_declarable_constraint(task):
+        if not contains_only_declarable_constraints(task):
             return False
     return True
-def is_declarable_constraint(node: Require) -> bool:
+def is_declarable_constraint(node: Node) -> bool:
     """
-    Checks whether the input `Require` node is annotated with `declare_constraint`.
+    Checks whether the input node is a `Require` node annotated with `declare_constraint`.
     """
+    if not isinstance(node, Require):
+        return False
     return builtins.declare_constraint_annotation in node.annotations

relationalai/semantics/lqp/rewrite/quantify_vars.py CHANGED Viewed

@@ -45,7 +45,7 @@ def _ignored_vars(node: ir.Logical|ir.Not):
         elif isinstance(task, ir.Rank):
             # Variables that are keys, and not in the group-by, don't need to be quantified.
-            for var in task.args + task.projection:
+            for var in task.args:
                 if var not in task.group:
                     vars_to_ignore.add(var)
@@ -149,13 +149,22 @@ class VarScopeInfo(Visitor):
             if isinstance(task, ir.Output):
                 output_vars.update(helpers.output_vars(task.aliases))
-            if isinstance(task, (ir.Aggregate, ir.Rank)):
-                # Variables that are in the group-by, and not in the projections, can come into scope.
+            if isinstance(task, ir.Aggregate):
+                # Variables that are in the group-by, and not in the args,
+                # can come into scope.
                 for var in task.group:
                     if var not in task.args:
                         scope_vars.add(var)
                 continue
+            if isinstance(task, ir.Rank):
+                # Variables that are in the group-by or projection, and not in the args,
+                # can come into scope.
+                for var in task.group + task.projection:
+                    if var not in task.args:
+                        scope_vars.add(var)
+                continue
             # Hoisted variables from sub-tasks are brought again into scope.
             if isinstance(task, (ir.Logical, ir.Union, ir.Match)):
                 scope_vars.update(helpers.hoisted_vars(task.hoisted))

relationalai/semantics/lqp/rewrite/unify_definitions.py CHANGED Viewed

@@ -54,9 +54,9 @@ class UnifyDefinitions(Pass):
             return head.relation
         else:
             assert isinstance(head, ir.Output)
-            if len(head.aliases) <= 2:
-                # For processing here, we need output to have at least the column markers
-                # `cols` and `col`, and also a key
+            if len(head.aliases) < 2:
+                # For processing here, we need output to have at least the column marker
+                # `keys` or both `cols` and `col` markers, and also a key
                 return None
             output_alias_names = helpers.output_alias_names(head.aliases)
@@ -65,6 +65,8 @@ class UnifyDefinitions(Pass):
             # For normal outputs, the pattern is output[keys](cols, "col000" as 'col', ...)
             if output_alias_names[0] == "cols" and output_alias_names[1] == "col":
                 return output_vals[1]
+            if output_alias_names[0] == "keys": # handle wide keys relation
+                return output_vals[0]
             # For exports, the pattern is output[keys]("col000" as 'col', ...)
             if helpers.is_export(head):
@@ -122,6 +124,10 @@ class UnifyDefinitions(Pass):
                 # keys.
                 output_values = helpers.output_values(head.aliases)[2:]
+            elif output_alias_names[0] == "keys": # handle wide keys output
+                assert len(head.aliases) > 1
+                output_values = helpers.output_values(head.aliases)[1:]
             else:
                 assert helpers.is_export(head) and output_alias_names[0] == "col"
                 assert len(head.aliases) > 1

relationalai/semantics/metamodel/dependency.py CHANGED Viewed

@@ -500,6 +500,15 @@ class DependencyAnalysis(visitor.Visitor):
                     assert(isinstance(c2task, helpers.COMPOSITES))
                     if not c2task.hoisted:
                         return True
+            # c1 is a composite with hoisted variables; it depends on c2 if c2 is mergeable
+            # and they share variables, hence behaving like a filter
+            if c1.composite and c2.mergeable and c1.shares_variable(c2):
+                c1task = c1.content.some()
+                assert(isinstance(c1task, helpers.COMPOSITES))
+                if c1task.hoisted:
+                    return True
             return False
         cs = list(clusters)

relationalai 0.13.4__py3-none-any.whl → 0.13.5__py3-none-any.whl

relationalai 0.13.4py3-none-any.whl → 0.13.5py3-none-any.whl