PyPI - relationalai - Versions diffs - 0.12.6__py3-none-any.whl → 0.12.8__py3-none-any.whl - Mend

relationalai 0.12.6py3-none-any.whl → 0.12.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

relationalai/semantics/lqp/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from relationalai.semantics.lqp import ir as lqp
 from relationalai.semantics.metamodel import ir
+from relationalai.semantics.metamodel.helpers import sanitize
 from relationalai.semantics.metamodel.util import FrozenOrderedSet
 from hashlib import sha256
@@ -13,21 +14,24 @@ class UniqueNames:
         self.id_to_name = dict[int,str]()
     def get_name(self, name: str) -> str:
-        if name in self.seen:
-            self.seen[name] += 1
-            id = self.seen[name]
-            # If the original name has a suffix we can get collisions with generated names,
-            # so test the new name.
-            while f"{name}_{id}" in self.seen:
-                id += 1
-                self.seen[name] = id
-            new_name = f"{name}_{id}"
-            self.seen[new_name] = 1
-            return new_name
-        else:
+        # Names will eventually be sanitized, which could cause collisions, so we
+        # do the sanitization here.
+        name = '_' if name == '_' else sanitize(name)
+        if name not in self.seen:
             self.seen[name] = 1
             return f"{name}"
+        self.seen[name] += 1
+        id = self.seen[name]
+        # If the original name has a suffix we can get collisions with generated names,
+        # so test the new name.
+        while f"{name}_{id}" in self.seen:
+            id += 1
+            self.seen[name] = id
+        new_name = f"{name}_{id}"
+        self.seen[new_name] = 1
+        return new_name
     # Get a unique name for the given id. If the id is already in the map, return the
     # existing name. Otherwise, generate a new name using the suggested_name and
     # store it in the map.
@@ -55,7 +59,7 @@ def gen_rel_id(ctx: TranslationCtx, orig_name: str, suffix: str = "") -> lqp.Rel
     ctx.rel_id_to_orig_name[relation_id] = orig_name
     return relation_id
-def gen_unique_var(ctx: TranslationCtx, name_hint: str):
+def gen_unique_var(ctx: TranslationCtx, name_hint: str) -> lqp.Var:
     """
     Generate a new variable with a unique name based on the provided hint.
     """

relationalai/semantics/metamodel/builtins.py CHANGED Viewed

@@ -654,6 +654,7 @@ rel_primitive_solverlib_ho_appl = aggregation("rel_primitive_solverlib_ho_appl",
 ])
 implies = f.relation("implies", [f.input_field("a", types.Bool), f.input_field("b", types.Bool)])
 all_different = aggregation("all_different", [f.input_field("over", types.Any)])
+special_ordered_set_type_2 = aggregation("special_ordered_set_type_2", [f.input_field("rank", types.Any)])
 # graph primitive algorithm helpers
 infomap = aggregation("infomap", [

relationalai/semantics/metamodel/rewrite/__init__.py CHANGED Viewed

@@ -2,5 +2,6 @@ from .discharge_constraints import DischargeConstraints
 from .dnf_union_splitter import DNFUnionSplitter
 from .extract_nested_logicals import ExtractNestedLogicals
 from .flatten import Flatten
+from .format_outputs import FormatOutputs
-__all__ = ["DischargeConstraints", "DNFUnionSplitter", "ExtractNestedLogicals", "Flatten"]
+__all__ = ["DischargeConstraints", "DNFUnionSplitter", "ExtractNestedLogicals", "Flatten", "FormatOutputs"]

relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py CHANGED Viewed

@@ -150,7 +150,7 @@ class DNFExtractor(Visitor):
             replacement_tasks: list[ir.Task] = []
             for body in replacement_bodies:
-                new_task = f.logical(body)
+                new_task = f.logical(body, node.hoisted)
                 replacement_tasks.append(new_task)
             self.replaced_by[node] = replacement_tasks

relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 from relationalai.semantics.metamodel import ir, factory as f, helpers
-from relationalai.semantics.metamodel.visitor import Rewriter
+from relationalai.semantics.metamodel.visitor import Rewriter, collect_by_type
 from relationalai.semantics.metamodel.compiler import Pass
 from relationalai.semantics.metamodel.util import OrderedSet, ordered_set, NameCache
 from relationalai.semantics.metamodel import dependency
@@ -61,11 +61,10 @@ class LogicalExtractor(Rewriter):
         # if there are aggregations, make sure we don't expose the projected and input vars,
         # but expose groupbys
-        for child in node.body:
-            if isinstance(child, ir.Aggregate):
-                exposed_vars.difference_update(child.projection)
-                exposed_vars.difference_update(helpers.aggregate_inputs(child))
-                exposed_vars.update(child.group)
+        for agg in collect_by_type(ir.Aggregate, node):
+            exposed_vars.difference_update(agg.projection)
+            exposed_vars.difference_update(helpers.aggregate_inputs(agg))
+            exposed_vars.update(agg.group)
         # add the values (hoisted)
         exposed_vars.update(helpers.hoisted_vars(logical.hoisted))

relationalai/semantics/metamodel/rewrite/flatten.py CHANGED Viewed

@@ -3,12 +3,11 @@ from dataclasses import dataclass
 from typing import cast, Optional, TypeVar
 from typing import Tuple
-from relationalai.semantics.metamodel import builtins, ir, factory as f, helpers, types
+from relationalai.semantics.metamodel import builtins, ir, factory as f, helpers
 from relationalai.semantics.metamodel.compiler import Pass, group_tasks
 from relationalai.semantics.metamodel.util import OrderedSet, ordered_set, NameCache
 from relationalai.semantics.metamodel import dependency
-from relationalai.semantics.metamodel.util import FrozenOrderedSet, filter_by_type
-from relationalai.semantics.metamodel.typer.typer import to_type, is_primitive
+from relationalai.semantics.metamodel.typer.typer import to_type
 class Flatten(Pass):
     """
@@ -225,15 +224,26 @@ class Flatten(Pass):
             "ranks": ir.Rank,
         })
-        # if there are outputs, adjust them (depending on the config for wide vs gnf)
+        # If there are outputs, flatten each into its own top-level rule, along with its
+        # dependencies.
         if groups["outputs"]:
-            if self._handle_outputs:
-                return self.adjust_outputs(task, body, groups, ctx)
-            else:
-                # When we do not handle outputs. For example, in SQL compiler. We need to leave output as a top-level element.
+            if not self._handle_outputs:
                 ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations))
                 return Flatten.HandleResult(None)
+            # Analyze the dependencies in the newly rewritten body
+            new_logical = ir.Logical(task.engine, task.hoisted, tuple(body))
+            info = dependency.analyze(new_logical)
+            for output in groups["outputs"]:
+                assert(isinstance(output, ir.Output))
+                new_body = info.task_dependencies(output)
+                new_body.update(ctx.extra_tasks)
+                new_body.add(output)
+                ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, task.hoisted, tuple(new_body), task.annotations))
+            return Flatten.HandleResult(None)
         # if there are updates, extract as a new top level rule
         if groups["updates"]:
             # add task dependencies to the body
@@ -455,147 +465,6 @@ class Flatten(Pass):
                 task.annotations
             ))
-    #--------------------------------------------------
-    # GNF vs wide output support
-    #--------------------------------------------------
-    def adjust_outputs(self, task: ir.Logical, body: OrderedSet[ir.Task], groups: dict[str, OrderedSet[ir.Task]], ctx: Context):
-        # for wide outputs, only adjust the output task to include the keys.
-        if ctx.options.get("wide_outputs", False):
-            for output in groups["outputs"]:
-                assert(isinstance(output, ir.Output))
-                if output.keys:
-                    body.remove(output)
-                    body.add(self.rewrite_wide_output(output))
-            # self.remove_subsumptions(body, ctx)
-            return Flatten.HandleResult(ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations))
-        # for GNF outputs we need to generate a rule for each "column" in the output
-        else:
-            # first split outputs in potentially multiple outputs, one for each "column"
-            for output in groups["outputs"]:
-                assert(isinstance(output, ir.Output))
-                if output.keys:
-                    # we will replace the output bellow,
-                    body.remove(output)
-                    is_export = builtins.export_annotation in output.annotations
-                    # generate an output for each "column"
-                    # output looks like def output(:cols, :col000, key0, key1, value):
-                    original_cols = OrderedSet()
-                    for idx, alias in enumerate(output.aliases):
-                        # skip None values which are used as a placeholder for missing values
-                        if alias[1] is None:
-                            continue
-                        original_cols.add(alias[1])
-                        self._generate_output_column(body, output, idx, alias, is_export)
-                    idx = len(output.aliases)
-                    for key in output.keys:
-                        if key not in original_cols:
-                            self._generate_output_column(body, output, idx, (key.name, key), is_export)
-                            idx += 1
-            # analyse the resulting logical to be able to pull dependencies
-            logical = ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
-            info = dependency.analyze(logical)
-            # now extract a logical for each output, bringing together its dependencies
-            for output in filter_by_type(body, ir.Output):
-                deps = info.task_dependencies(output)
-                # TODO: verify safety of doing this
-                # self.remove_subsumptions(deps, ctx)
-                deps.add(output)
-                ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, tuple(), tuple(deps)))
-            return Flatten.HandleResult(None)
-    def _generate_output_column(self, body: OrderedSet[ir.Task], output: ir.Output, idx: int, alias: tuple[str, ir.Value], is_export: bool):
-        if not output.keys:
-            return output
-        aliases = [("cols", f.literal("cols", types.Symbol))] if not is_export else []
-        aliases.append(("col", f.literal(f"col{idx:03}", types.Symbol)))
-        for k in output.keys:
-            aliases.append((f"key_{k.name}_{idx}", k))
-        if (is_export and
-            isinstance(alias[1], ir.Var) and
-            (not is_primitive(alias[1].type) or alias[1].type == types.Hash)):
-            uuid = f.var(f"{alias[0]}_{idx}_uuid", types.String)
-            body.add(f.lookup(builtins.uuid_to_string, [alias[1], uuid]))
-            aliases.append((uuid.name, uuid))
-        else:
-            aliases.append(alias)
-        body.add(ir.Output(
-            output.engine,
-            FrozenOrderedSet.from_iterable(aliases),
-            output.keys,
-            output.annotations
-        ))
-    def remove_subsumptions(self, body:OrderedSet[ir.Task], ctx: Context):
-        # remove from the body all the tasks that are subsumed by some other task in the set;
-        # this can be done because some tasks are references to extracted nested logical that
-        # contain filters they dependend on, so we don't need those filters here if the
-        # reference is present.
-        for logical in filter_by_type(body, ir.Logical):
-            if logical.id in ctx.included:
-                # if the logical id is included, it means it's a reference to an extracted
-                # rule, so remove all other items in the body that are already included in
-                # the body referenced by it
-                for item in body:
-                    if item in ctx.included[logical.id]:
-                        body.remove(item)
-    def rewrite_wide_output(self, output: ir.Output):
-        assert(output.keys)
-        # only append keys that are not already in the output
-        suffix_keys = []
-        for key in output.keys:
-            if all([val is not key for _, val in output.aliases]):
-                suffix_keys.append(key)
-        aliases: OrderedSet[Tuple[str, ir.Value]] = ordered_set()
-        # add the remaining args, unless it is already a key
-        for name, val in output.aliases:
-            if not isinstance(val, ir.Var) or val not in suffix_keys:
-                aliases.add((name, val))
-        # add the keys to the output
-        for key in suffix_keys:
-            aliases.add((key.name, key))
-        # TODO - we are assuming that the Rel compiler will translate nullable lookups
-        # properly, returning a `Missing` if necessary, like this:
-        # (nested_192(_adult, _adult_name) or (not nested_192(_adult, _) and _adult_name = Missing)) and
-        return ir.Output(
-            output.engine,
-            aliases.frozen(),
-            output.keys,
-            output.annotations
-        )
-        # TODO: in the rel compiler, see if we can do this outer join
-        # 1. number of keys
-        # 2. each relation
-        # 3. each variable, starting with the keys
-        # 4. tag output with @arrow
-        # @arrow def output(_book, _book_title, _author_name):
-        #   rel_primitive_outer_join(#1, book_title, author_name, _book, _book_title, _author_name)
-        # def output(p, n, c):
-        #     rel_primitive_outer_join(#1, name, coolness, p, n, c)
 #--------------------------------------------------
 # Helpers
 #--------------------------------------------------

relationalai/semantics/metamodel/rewrite/format_outputs.py ADDED Viewed

@@ -0,0 +1,165 @@
+from __future__ import annotations
+from typing import Tuple
+from relationalai.semantics.metamodel import builtins, ir, factory as f, types, visitor
+from relationalai.semantics.metamodel.compiler import Pass, group_tasks
+from relationalai.semantics.metamodel.util import OrderedSet
+from relationalai.semantics.metamodel.util import FrozenOrderedSet
+from relationalai.semantics.metamodel.typer.typer import is_primitive
+class FormatOutputs(Pass):
+    def __init__(self, handle_outputs: bool=True):
+        super().__init__()
+        self._handle_outputs = handle_outputs
+    #--------------------------------------------------
+    # Public API
+    #--------------------------------------------------
+    def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
+        wide_outputs = options.get("wide_outputs", False)
+        return self.OutputRewriter(wide_outputs).walk(model)
+    class OutputRewriter(visitor.Rewriter):
+        def __init__(self, wide_outputs: bool = False):
+            super().__init__()
+            self.wide_outputs = wide_outputs
+        def handle_logical(self, node: ir.Logical, parent: ir.Node):
+            # Rewrite children first
+            node = super().handle_logical(node, parent)
+            groups = group_tasks(node.body, {
+                "outputs": ir.Output,
+            })
+            # If no outputs, return as is
+            if not groups["outputs"]:
+                return node
+            return adjust_outputs(node, groups["outputs"], self.wide_outputs)
+#--------------------------------------------------
+# GNF vs wide output support
+#--------------------------------------------------
+def adjust_outputs(task: ir.Logical, outputs: OrderedSet[ir.Task], wide_outputs: bool = False):
+    body = list(task.body)
+    # For wide outputs, only adjust the output task to include the keys.
+    if wide_outputs:
+        for output in outputs:
+            assert(isinstance(output, ir.Output))
+            if output.keys:
+                body.remove(output)
+                body.append(rewrite_wide_output(output))
+        return ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
+    # For GNF outputs we need to generate a rule for each "column" in the output
+    else:
+        # First split outputs in potentially multiple outputs, one for each "column"
+        for output in outputs:
+            assert(isinstance(output, ir.Output))
+            if output.keys:
+                # Remove the original output. This is replaced by per-column outputs below
+                body.remove(output)
+                is_export = builtins.export_annotation in output.annotations
+                # Generate an output for each "column"
+                # output looks like def output(:cols, :col000, key0, key1, value):
+                original_cols = OrderedSet()
+                for idx, alias in enumerate(output.aliases):
+                    # Skip None values which are used as a placeholder for missing values
+                    if alias[1] is None:
+                        continue
+                    original_cols.add(alias[1])
+                    body.extend(_generate_output_column(output, idx, alias, is_export))
+                idx = len(output.aliases)
+                for key in output.keys:
+                    if key not in original_cols:
+                        body.extend(_generate_output_column(output, idx, (key.name, key), is_export))
+                        idx += 1
+        return ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
+# TODO: return non list?
+def _generate_output_column(output: ir.Output, idx: int, alias: tuple[str, ir.Value], is_export: bool):
+    if not output.keys:
+        return [output]
+    aliases = [("cols", f.literal("cols", types.Symbol))] if not is_export else []
+    aliases.append(("col", f.literal(f"col{idx:03}", types.Symbol)))
+    # Append all keys at the start
+    for k in output.keys:
+        aliases.append((f"key_{k.name}_{idx}", k))
+    if (is_export and
+        isinstance(alias[1], ir.Var) and
+        (not is_primitive(alias[1].type) or alias[1].type == types.Hash)):
+        uuid = f.var(f"{alias[0]}_{idx}_uuid", types.String)
+        aliases.append((uuid.name, uuid))
+        return [
+            ir.Lookup(None, builtins.uuid_to_string, (alias[1], uuid)),
+            ir.Output(
+                output.engine,
+                FrozenOrderedSet.from_iterable(aliases),
+                output.keys,
+                output.annotations
+            )
+        ]
+    else:
+        aliases.append(alias)
+        return [
+            ir.Output(
+                output.engine,
+                FrozenOrderedSet.from_iterable(aliases),
+                output.keys,
+                output.annotations
+            )
+        ]
+def rewrite_wide_output(output: ir.Output):
+    assert(output.keys)
+    # Only append keys that are not already in the output
+    suffix_keys = []
+    for key in output.keys:
+        if all([val is not key for _, val in output.aliases]):
+            suffix_keys.append(key)
+    aliases: OrderedSet[Tuple[str, ir.Value]] = OrderedSet()
+    # Add the remaining args, unless it is already a key
+    for name, val in output.aliases:
+        if not isinstance(val, ir.Var) or val not in suffix_keys:
+            aliases.add((name, val))
+    # Add the keys to the output
+    for key in suffix_keys:
+        aliases.add((key.name, key))
+    # TODO - we are assuming that the Rel compiler will translate nullable lookups
+    # properly, returning a `Missing` if necessary, like this:
+    # (nested_192(_adult, _adult_name) or (not nested_192(_adult, _) and _adult_name = Missing)) and
+    return ir.Output(
+        output.engine,
+        aliases.frozen(),
+        output.keys,
+        output.annotations
+    )
+    # TODO: in the rel compiler, see if we can do this outer join
+    # 1. number of keys
+    # 2. each relation
+    # 3. each variable, starting with the keys
+    # 4. tag output with @arrow
+    # @arrow def output(_book, _book_title, _author_name):
+    #   rel_primitive_outer_join(#1, book_title, author_name, _book, _book_title, _author_name)
+    # def output(p, n, c):
+    #     rel_primitive_outer_join(#1, name, coolness, p, n, c)

relationalai 0.12.6__py3-none-any.whl → 0.12.8__py3-none-any.whl

relationalai 0.12.6py3-none-any.whl → 0.12.8py3-none-any.whl