PyPI - relationalai - Versions diffs - 1.0.0a3__py3-none-any.whl → 1.0.0a5__py3-none-any.whl - Mend

relationalai 1.0.0a3py3-none-any.whl → 1.0.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

relationalai/config/config.py +47 -21
relationalai/config/connections/__init__.py +5 -2
relationalai/config/connections/duckdb.py +2 -2
relationalai/config/connections/local.py +31 -0
relationalai/config/connections/snowflake.py +0 -1
relationalai/config/external/raiconfig_converter.py +235 -0
relationalai/config/external/raiconfig_models.py +202 -0
relationalai/config/external/utils.py +31 -0
relationalai/config/shims.py +1 -0
relationalai/semantics/__init__.py +10 -8
relationalai/semantics/backends/sql/sql_compiler.py +1 -4
relationalai/semantics/experimental/__init__.py +0 -0
relationalai/semantics/experimental/builder.py +295 -0
relationalai/semantics/experimental/builtins.py +154 -0
relationalai/semantics/frontend/base.py +67 -42
relationalai/semantics/frontend/core.py +34 -6
relationalai/semantics/frontend/front_compiler.py +209 -37
relationalai/semantics/frontend/pprint.py +6 -2
relationalai/semantics/metamodel/__init__.py +7 -0
relationalai/semantics/metamodel/metamodel.py +2 -0
relationalai/semantics/metamodel/metamodel_analyzer.py +58 -16
relationalai/semantics/metamodel/pprint.py +6 -1
relationalai/semantics/metamodel/rewriter.py +11 -7
relationalai/semantics/metamodel/typer.py +116 -41
relationalai/semantics/reasoners/__init__.py +11 -0
relationalai/semantics/reasoners/graph/__init__.py +35 -0
relationalai/semantics/reasoners/graph/core.py +9028 -0
relationalai/semantics/std/__init__.py +30 -10
relationalai/semantics/std/aggregates.py +641 -12
relationalai/semantics/std/common.py +146 -13
relationalai/semantics/std/constraints.py +71 -1
relationalai/semantics/std/datetime.py +904 -21
relationalai/semantics/std/decimals.py +143 -2
relationalai/semantics/std/floats.py +57 -4
relationalai/semantics/std/integers.py +98 -4
relationalai/semantics/std/math.py +857 -35
relationalai/semantics/std/numbers.py +216 -20
relationalai/semantics/std/re.py +213 -5
relationalai/semantics/std/strings.py +437 -44
relationalai/shims/executor.py +60 -52
relationalai/shims/fixtures.py +85 -0
relationalai/shims/helpers.py +26 -2
relationalai/shims/hoister.py +28 -9
relationalai/shims/mm2v0.py +204 -173
relationalai/tools/cli/cli.py +192 -10
relationalai/tools/cli/components/progress_reader.py +1 -1
relationalai/tools/cli/docs.py +394 -0
relationalai/tools/debugger.py +11 -4
relationalai/tools/qb_debugger.py +435 -0
relationalai/tools/typer_debugger.py +1 -2
relationalai/util/dataclasses.py +3 -5
relationalai/util/docutils.py +1 -2
relationalai/util/error.py +2 -5
relationalai/util/python.py +23 -0
relationalai/util/runtime.py +1 -2
relationalai/util/schema.py +2 -4
relationalai/util/structures.py +4 -2
relationalai/util/tracing.py +8 -2
{relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/METADATA +8 -5
{relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/RECORD +118 -95
{relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/WHEEL +1 -1
v0/relationalai/__init__.py +1 -1
v0/relationalai/clients/client.py +52 -18
v0/relationalai/clients/exec_txn_poller.py +122 -0
v0/relationalai/clients/local.py +23 -8
v0/relationalai/clients/resources/azure/azure.py +36 -11
v0/relationalai/clients/resources/snowflake/__init__.py +4 -4
v0/relationalai/clients/resources/snowflake/cli_resources.py +12 -1
v0/relationalai/clients/resources/snowflake/direct_access_resources.py +124 -100
v0/relationalai/clients/resources/snowflake/engine_service.py +381 -0
v0/relationalai/clients/resources/snowflake/engine_state_handlers.py +35 -29
v0/relationalai/clients/resources/snowflake/error_handlers.py +43 -2
v0/relationalai/clients/resources/snowflake/snowflake.py +277 -179
v0/relationalai/clients/resources/snowflake/use_index_poller.py +8 -0
v0/relationalai/clients/types.py +5 -0
v0/relationalai/errors.py +19 -1
v0/relationalai/semantics/lqp/algorithms.py +173 -0
v0/relationalai/semantics/lqp/builtins.py +199 -2
v0/relationalai/semantics/lqp/executor.py +68 -37
v0/relationalai/semantics/lqp/ir.py +28 -2
v0/relationalai/semantics/lqp/model2lqp.py +215 -45
v0/relationalai/semantics/lqp/passes.py +13 -658
v0/relationalai/semantics/lqp/rewrite/__init__.py +12 -0
v0/relationalai/semantics/lqp/rewrite/algorithm.py +385 -0
v0/relationalai/semantics/lqp/rewrite/constants_to_vars.py +70 -0
v0/relationalai/semantics/lqp/rewrite/deduplicate_vars.py +104 -0
v0/relationalai/semantics/lqp/rewrite/eliminate_data.py +108 -0
v0/relationalai/semantics/lqp/rewrite/extract_keys.py +25 -3
v0/relationalai/semantics/lqp/rewrite/period_math.py +77 -0
v0/relationalai/semantics/lqp/rewrite/quantify_vars.py +65 -31
v0/relationalai/semantics/lqp/rewrite/unify_definitions.py +317 -0
v0/relationalai/semantics/lqp/utils.py +11 -1
v0/relationalai/semantics/lqp/validators.py +14 -1
v0/relationalai/semantics/metamodel/builtins.py +2 -1
v0/relationalai/semantics/metamodel/compiler.py +2 -1
v0/relationalai/semantics/metamodel/dependency.py +12 -3
v0/relationalai/semantics/metamodel/executor.py +11 -1
v0/relationalai/semantics/metamodel/factory.py +2 -2
v0/relationalai/semantics/metamodel/helpers.py +7 -0
v0/relationalai/semantics/metamodel/ir.py +3 -2
v0/relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +30 -20
v0/relationalai/semantics/metamodel/rewrite/flatten.py +50 -13
v0/relationalai/semantics/metamodel/rewrite/format_outputs.py +9 -3
v0/relationalai/semantics/metamodel/typer/checker.py +6 -4
v0/relationalai/semantics/metamodel/typer/typer.py +4 -3
v0/relationalai/semantics/metamodel/visitor.py +4 -3
v0/relationalai/semantics/reasoners/optimization/solvers_dev.py +1 -1
v0/relationalai/semantics/reasoners/optimization/solvers_pb.py +336 -86
v0/relationalai/semantics/rel/compiler.py +2 -1
v0/relationalai/semantics/rel/executor.py +3 -2
v0/relationalai/semantics/tests/lqp/__init__.py +0 -0
v0/relationalai/semantics/tests/lqp/algorithms.py +345 -0
v0/relationalai/tools/cli.py +339 -186
v0/relationalai/tools/cli_controls.py +216 -67
v0/relationalai/tools/cli_helpers.py +410 -6
v0/relationalai/util/format.py +5 -2
{relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/entry_points.txt +0 -0
{relationalai-1.0.0a3.dist-info → relationalai-1.0.0a5.dist-info}/top_level.txt +0 -0

v0/relationalai/semantics/lqp/rewrite/unify_definitions.py ADDED Viewed

@@ -0,0 +1,317 @@
+from v0.relationalai.semantics.metamodel.compiler import Pass
+from v0.relationalai.semantics.metamodel import ir, builtins as rel_builtins, factory as f, visitor
+from v0.relationalai.semantics.metamodel.typer import typer
+from v0.relationalai.semantics.metamodel import helpers
+from v0.relationalai.semantics.metamodel.util import FrozenOrderedSet, OrderedSet
+from typing import cast, Union, Optional, Iterable
+from collections import defaultdict
+# LQP does not support multiple definitions for the same relation. This pass unifies all
+# definitions for each relation into a single definition using a union.
+class UnifyDefinitions(Pass):
+    def __init__(self):
+        super().__init__()
+    def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
+        # Maintain a cache of renamings for each relation. These need to be consistent
+        # across all definitions of the same relation.
+        self.renamed_relation_args: dict[Union[ir.Value, ir.Relation], list[ir.Var]] = {}
+        root = cast(ir.Logical, model.root)
+        new_tasks = self.get_combined_multidefs(root)
+        return ir.Model(
+            model.engines,
+            model.relations,
+            model.types,
+            f.logical(
+                tuple(new_tasks),
+                root.hoisted,
+                root.engine,
+            ),
+            model.annotations,
+        )
+    def _get_heads(self, logical: ir.Logical) -> list[Union[ir.Update, ir.Output]]:
+        derives = []
+        for task in logical.body:
+            if isinstance(task, ir.Update) and task.effect == ir.Effect.derive:
+                derives.append(task)
+            elif isinstance(task, ir.Output):
+                derives.append(task)
+        return derives
+    def _get_non_heads(self, logical: ir.Logical) -> list[ir.Task]:
+        non_derives = []
+        for task in logical.body:
+            if not(isinstance(task, ir.Update) and task.effect == ir.Effect.derive) and not isinstance(task, ir.Output):
+                non_derives.append(task)
+        return non_derives
+    def _get_head_identifier(self, head: Union[ir.Update, ir.Output]) -> Optional[ir.Value]:
+        if isinstance(head, ir.Update):
+            return head.relation
+        else:
+            assert isinstance(head, ir.Output)
+            if len(head.aliases) <= 2:
+                # For processing here, we need output to have at least the column markers
+                # `cols` and `col`, and also a key
+                return None
+            output_alias_names = helpers.output_alias_names(head.aliases)
+            output_vals = helpers.output_values(head.aliases)
+            # For normal outputs, the pattern is output[keys](cols, "col000" as 'col', ...)
+            if output_alias_names[0] == "cols" and output_alias_names[1] == "col":
+                return output_vals[1]
+            # For exports, the pattern is output[keys]("col000" as 'col', ...)
+            if helpers.is_export(head):
+                if output_alias_names[0] == "col":
+                    return output_vals[0]
+        return None
+    def get_combined_multidefs(self, root: ir.Logical) -> list[ir.Logical]:
+        # Step 1: Group tasks by the relation they define.
+        relation_to_tasks: dict[Union[None, ir.Value, ir.Relation], list[ir.Logical]] = defaultdict(list)
+        for task in root.body:
+            task = cast(ir.Logical, task)
+            task_heads = self._get_heads(task)
+            # Some relations do not need to be grouped, e.g., if they don't contain a
+            # derive. Use `None` as a placeholder key for these cases.
+            if len(task_heads) != 1:
+                relation_to_tasks[None].append(task)
+                continue
+            head_id = self._get_head_identifier(task_heads[0])
+            relation_to_tasks[head_id].append(task)
+        # Step 2: For each relation, combine all of the body definitions into a union.
+        result_tasks = []
+        for relation, tasks in relation_to_tasks.items():
+            # If there's only one task for the relation, or if grouping is not needed, then
+            # just keep the original tasks.
+            if len(tasks) == 1 or relation is None:
+                result_tasks.extend(tasks)
+                continue
+            result_tasks.append(self._combine_tasks_into_union(tasks))
+        return result_tasks
+    def _get_variable_mapping(self, logical: ir.Logical) -> dict[ir.Value, ir.Var]:
+        heads = self._get_heads(logical)
+        assert len(heads) == 1, "should only have one head in a logical at this stage"
+        head = heads[0]
+        var_mapping = {}
+        head_id = self._get_head_identifier(head)
+        if isinstance(head, ir.Update):
+            args_for_renaming = head.args
+        else:
+            assert isinstance(head, ir.Output)
+            output_alias_names = helpers.output_alias_names(head.aliases)
+            if output_alias_names[0] == "cols" and output_alias_names[1] == "col":
+                assert len(head.aliases) > 2
+                # For outputs, we do not need to rename the `cols` and `col` markers or the
+                # keys.
+                output_values = helpers.output_values(head.aliases)[2:]
+            else:
+                assert helpers.is_export(head) and output_alias_names[0] == "col"
+                assert len(head.aliases) > 1
+                # For exports, we do not need to rename the `col` marker or the keys.
+                output_values = helpers.output_values(head.aliases)[1:]
+            args_for_renaming = []
+            for v in output_values:
+                if head.keys and isinstance(v, ir.Var) and v in head.keys:
+                    continue
+                args_for_renaming.append(v)
+        if head_id not in self.renamed_relation_args:
+            renamed_vars = []
+            for (i, arg) in enumerate(args_for_renaming):
+                typ = typer.to_type(arg)
+                assert arg not in var_mapping, "args of update should be unique"
+                if isinstance(arg, ir.Var):
+                    var_mapping[arg] = ir.Var(typ, arg.name)
+                else:
+                    var_mapping[arg] = ir.Var(typ, f"arg_{i}")
+                renamed_vars.append(var_mapping[arg])
+            self.renamed_relation_args[head_id] = renamed_vars
+        else:
+            for (arg, var) in zip(args_for_renaming, self.renamed_relation_args[head_id]):
+                var_mapping[arg] = var
+        return var_mapping
+    def _rename_variables(self, logical: ir.Logical) -> ir.Logical:
+        class RenameVisitor(visitor.Rewriter):
+            def __init__(self, var_mapping: dict[ir.Value, ir.Var]):
+                super().__init__()
+                self.var_mapping = var_mapping
+            def _get_mapped_value(self, val: ir.Value) -> ir.Value:
+                if isinstance(val, tuple):
+                    return tuple(self._get_mapped_value(t) for t in val)
+                return self.var_mapping.get(val, val)
+            def _get_mapped_values(self, vals: Iterable[ir.Value]) -> list[ir.Value]:
+                return [self._get_mapped_value(v) for v in vals]
+            def handle_var(self, node: ir.Var, parent: ir.Node) -> ir.Var:
+                return self.var_mapping.get(node, node)
+            # TODO: ideally, extend the rewriter class to allow rewriting PyValue to Var so
+            # we don't need to separately handle all cases containing them.
+            def handle_update(self, node: ir.Update, parent: ir.Node) -> ir.Update:
+                return ir.Update(
+                    node.engine,
+                    node.relation,
+                    tuple(self._get_mapped_values(node.args)),
+                    node.effect,
+                    node.annotations,
+                )
+            def handle_lookup(self, node: ir.Lookup, parent: ir.Node) -> ir.Lookup:
+                return ir.Lookup(
+                    node.engine,
+                    node.relation,
+                    tuple(self._get_mapped_values(node.args)),
+                    node.annotations,
+                )
+            def handle_output(self, node: ir.Output, parent: ir.Node) -> ir.Output:
+                new_aliases = FrozenOrderedSet(
+                    [(name, self._get_mapped_value(value)) for name, value in node.aliases]
+                )
+                if node.keys:
+                    new_keys = FrozenOrderedSet(
+                        [self.var_mapping.get(key, key) for key in node.keys]
+                    )
+                else:
+                    new_keys = node.keys
+                return ir.Output(
+                    node.engine,
+                    new_aliases,
+                    new_keys,
+                    node.annotations,
+                )
+            def handle_construct(self, node: ir.Construct, parent: ir.Node) -> ir.Construct:
+                new_values = tuple(self._get_mapped_values(node.values))
+                new_id_var = self.var_mapping.get(node.id_var, node.id_var)
+                return ir.Construct(
+                    node.engine,
+                    new_values,
+                    new_id_var,
+                    node.annotations,
+                )
+            def handle_aggregate(self, node: ir.Aggregate, parent: ir.Node) -> ir.Aggregate:
+                new_projection = tuple(self.var_mapping.get(arg, arg) for arg in node.projection)
+                new_group = tuple(self.var_mapping.get(arg, arg) for arg in node.group)
+                new_args = tuple(self._get_mapped_values(node.args))
+                return ir.Aggregate(
+                    node.engine,
+                    node.aggregation,
+                    new_projection,
+                    new_group,
+                    new_args,
+                    node.annotations,
+                )
+            def handle_rank(self, node: ir.Rank, parent: ir.Node) -> ir.Rank:
+                new_projection = tuple(self.var_mapping.get(arg, arg) for arg in node.projection)
+                new_group = tuple(self.var_mapping.get(arg, arg) for arg in node.group)
+                new_args = tuple(self.var_mapping.get(arg, arg) for arg in node.args)
+                new_result = self.var_mapping.get(node.result, node.result)
+                return ir.Rank(
+                    node.engine,
+                    new_projection,
+                    new_group,
+                    new_args,
+                    node.arg_is_ascending,
+                    new_result,
+                    node.limit,
+                    node.annotations,
+                )
+        var_mapping = self._get_variable_mapping(logical)
+        renamer = RenameVisitor(var_mapping)
+        result = renamer.walk(logical)
+        # Also need to append the equality for each renamed constant. E.g., if the mapping
+        # contains (50.0::FLOAT -> arg_2::FLOAT), we need to add
+        # `eq(arg_2::FLOAT, 50.0::FLOAT)` to the result.
+        value_eqs = []
+        for (old_var, new_var) in var_mapping.items():
+            if not isinstance(old_var, ir.Var):
+                value_eqs.append(f.lookup(rel_builtins.eq, [new_var, old_var]))
+        return ir.Logical(
+            result.engine,
+            result.hoisted,
+            tuple(value_eqs) + tuple(result.body),
+            result.annotations,
+        )
+    # This function is the main workhorse for this rewrite pass. It takes a list of tasks
+    # that define the same relation, and combines them into a single task that defines
+    # the relation using a union of all of the bodies.
+    def _combine_tasks_into_union(self, tasks: list[ir.Logical]) -> ir.Logical:
+        # Step 1: Rename the variables in all tasks so that they will match the final derive
+        # after reconstructing into a union
+        renamed_tasks = [self._rename_variables(task) for task in tasks]
+        # Step 2: Get the final derive
+        derives = self._get_heads(renamed_tasks[0])
+        assert len(derives) == 1, "should only have one derive in a logical at this stage"
+        # Also make sure that all the derives are the same. This should be the case because
+        # we renamed all the variables to be the same in step 1.
+        for task in renamed_tasks[1:]:
+            assert self._get_heads(task) == derives, "all derives should be the same"
+        derive = derives[0]
+        # Step 3: Remove the final `derive` from each task
+        renamed_task_bodies = [
+            f.logical(
+                tuple(self._get_non_heads(t)),  # Only keep non-head tasks
+                t.hoisted,
+                t.engine,
+            )
+            for t in renamed_tasks
+        ]
+        # Deduplicate bodies
+        renamed_task_bodies = OrderedSet.from_iterable(renamed_task_bodies).get_list()
+        # Step 4: Construct a union of all the task bodies
+        if len(renamed_task_bodies) == 1:
+            # If there's only one body after deduplication, no need to create a union
+            new_body = renamed_task_bodies[0]
+        else:
+            new_body = f.union(
+                tuple(renamed_task_bodies),
+                [],
+                renamed_tasks[0].engine,
+            )
+        # Step 5: Add the final derive back
+        return f.logical(
+            (new_body, derive),
+            [],
+            renamed_tasks[0].engine,
+        )

v0/relationalai/semantics/lqp/utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ from v0.relationalai.semantics.metamodel import ir
 from v0.relationalai.semantics.metamodel.helpers import sanitize
 from v0.relationalai.semantics.metamodel.util import FrozenOrderedSet
+from dataclasses import dataclass
 from hashlib import sha256
 from typing import Tuple
@@ -43,16 +44,25 @@ class UniqueNames:
         self.id_to_name[id] = name
         return name
+@dataclass(frozen=True)
+class ExportDescriptor:
+    relation_id: lqp.RelationId
+    column_name: str
+    column_number: int
+    column_type: lqp.Type
 class TranslationCtx:
     def __init__(self, def_names: UniqueNames = UniqueNames()):
         # TODO: comment these fields
         self.def_names = def_names
         self.var_names = UniqueNames()
         self.output_names = UniqueNames()
+        # A counter for break rules generated during translation of while loops
+        self.break_rule_counter = 0
         # Map relation IDs to their original names for debugging and pretty printing.
         self.rel_id_to_orig_name = {}
         self.output_ids: list[tuple[lqp.RelationId, str]] = []
-        self.export_ids: list[tuple[lqp.RelationId, int, lqp.Type]] = []
+        self.export_descriptors: list[ExportDescriptor] = []
 def gen_rel_id(ctx: TranslationCtx, orig_name: str, suffix: str = "") -> lqp.RelationId:
     relation_id = lqp.RelationId(id=lqp_hash(orig_name + suffix), meta=None)

v0/relationalai/semantics/lqp/validators.py CHANGED Viewed

@@ -6,6 +6,11 @@ CompilableType = Union[
     ir.Logical,
     ir.Union,
+    # Loops
+    ir.Loop,
+    ir.Sequence,
+    ir.Break,
     # Formulas
     ir.Lookup,
     ir.Exists,
@@ -36,7 +41,7 @@ def assert_valid_input(model: ir.Model) -> None:
 def _assert_valid_subtask(task: ir.Task) -> None:
     # TODO: assert what subtasks should look like
-    assert isinstance(task, ir.Logical), f"expected logical task, got {type(task)}"
+    assert isinstance(task, (ir.Logical, ir.Sequence)), f"expected logical task, got {type(task)}"
     _assert_task_compilable(task)
 def _assert_task_compilable(task: ir.Task) -> None:
@@ -51,6 +56,14 @@ def _assert_task_compilable(task: ir.Task) -> None:
         assert_valid_update(task)
         effect = task.effect
         assert effect == ir.Effect.derive, "only derive supported at the moment"
+    elif isinstance(task, ir.Sequence):
+        assert any(anno.relation.name == "script" for anno in task.annotations), "only @script sequences supported at the moment"
+        for subtask in task.tasks:
+            _assert_task_compilable(subtask)
+    elif isinstance(task, ir.Loop):
+        assert isinstance(task.body, ir.Sequence), f"expected loop body to be a sequence, got {type(task.body)}"
+        for subtask in task.body.tasks:
+            _assert_task_compilable(subtask)
 def assert_valid_update(update: ir.Update) -> None:
     effect = update.effect

v0/relationalai/semantics/metamodel/builtins.py CHANGED Viewed

@@ -474,7 +474,8 @@ external = f.relation("external", [])
 external_annotation = f.annotation(external, [])
 # indicates an output is meant to be exported
-export = f.relation("export", [])
+export = f.relation("export", [f.input_field("fqn", types.String)])
+# convenience for when there are no arguments (this is deprecated as fqn should always be used)
 export_annotation = f.annotation(export, [])
 # indicates this relation is a concept population

v0/relationalai/semantics/metamodel/compiler.py CHANGED Viewed

@@ -21,7 +21,8 @@ class Compiler():
             for p in self.passes:
                 with debugging.span(p.name) as span:
                     model = p.rewrite(model, options)
-                    span["metamodel"] = str(model.root)
+                    if debugging.DEBUG:
+                        span["metamodel"] = str(model.root)
                 p.reset()
         return model

v0/relationalai/semantics/metamodel/dependency.py CHANGED Viewed

@@ -31,6 +31,8 @@ class DependencyInfo():
     parent: dict[int, ir.Task] = field(default_factory=dict)
     # keep track of replacements that were made during a rewrite
     replacements: dict[int, ir.Task] = field(default_factory=dict)
+    # keep track of which logicals are effectful
+    effectful: set[int] = field(default_factory=set)
     def task_inputs(self, node: ir.Task) -> Optional[OrderedSet[ir.Var]]:
         """ The input variables for this task, if any. """
@@ -165,7 +167,7 @@ class Cluster():
         # this is a binders cluster, which is a candidate to being merged
         self.mergeable = not self.required and isinstance(task, helpers.BINDERS)
         # this is a cluster that will only hold an effect
-        self.effectful = isinstance(task, helpers.EFFECTS)
+        self.effectful = isinstance(task, helpers.EFFECTS) or task.id in info.effectful
         # this is a cluster that will only hold a composite
         self.composite = isinstance(task, helpers.COMPOSITES)
         # content is either a single task or a set of tasks
@@ -374,7 +376,6 @@ class DependencyAnalysis(visitor.Visitor):
     def __init__(self, info: DependencyInfo):
         self.info = info
     def enter(self, node: ir.Node, parent: Optional[ir.Node]=None):
         # keep track of parents of all nodes
         if parent and isinstance(parent, ir.Task):
@@ -456,7 +457,7 @@ class DependencyAnalysis(visitor.Visitor):
             # if c1 has an effect and c2 is a composite without hoisted variables or with a
             # hoisted variable that does not have a default (it is a plain var), then c2
             # behaves like a filter and c1 depends on it.
-            if c1.effectful and c2.composite:
+            if c1.effectful and c2.composite and not c2.effectful:
                 task = c2.content.some()
                 assert(isinstance(task, helpers.COMPOSITES))
                 if not task.hoisted:
@@ -608,6 +609,10 @@ class BindingAnalysis(visitor.Visitor):
         else:
             map[key.id].add(val)
+    def leave(self, node: ir.Node, parent: Optional[ir.Node]=None):
+        if parent and node.id in self.info.effectful:
+            self.info.effectful.add(parent.id)
+        return super().leave(node, parent)
     #
     # Composite tasks
@@ -768,6 +773,8 @@ class BindingAnalysis(visitor.Visitor):
     def visit_update(self, node: ir.Update, parent: Optional[ir.Node]):
+        assert parent is not None
+        self.info.effectful.add(parent.id)
         # register variables being used as arguments to the update, it's always considered an input
         for v in helpers.vars(node.args):
             self.input(node, v)
@@ -816,6 +823,8 @@ class BindingAnalysis(visitor.Visitor):
     def visit_output(self, node: ir.Output, parent: Optional[ir.Node]):
+        assert parent is not None
+        self.info.effectful.add(parent.id)
         # register variables being output, they always considered an input to the task
         for v in helpers.output_vars(node.aliases):
             self.input(node, v)

v0/relationalai/semantics/metamodel/executor.py CHANGED Viewed

@@ -1,15 +1,21 @@
 from __future__ import annotations
 from pandas import DataFrame
-from typing import Any, Union, Tuple, Literal
+from typing import Any, Union, Tuple, Literal, TYPE_CHECKING
 from v0.relationalai.clients.config import Config
 from v0.relationalai.semantics.metamodel import Model, Task, ir
 from v0.relationalai.semantics.metamodel.visitor import collect_by_type
+if TYPE_CHECKING:
+    from v0.relationalai.semantics.internal.internal import Model as InternalModel
 from .util import NameCache
 import rich
+# global flag to suppress type errors from being printed
+SUPPRESS_TYPE_ERRORS = False
 class Executor():
     """ Interface for an object that can execute the program specified by a model. """
     def execute(self, model: Model, task:Task, format:Literal["pandas", "snowpark"]="pandas") -> Union[DataFrame, Any]:
@@ -59,3 +65,7 @@ class Executor():
                 if col in df.columns:
                     df = df.drop(col, axis=1)
         return df
+    def export_to_csv(self, model: "InternalModel", query) -> str:
+        ### Only implemented in the LQP executor for now.
+        raise NotImplementedError(f"export_to_csv is not supported by {type(self).__name__}")

v0/relationalai/semantics/metamodel/factory.py CHANGED Viewed

@@ -277,8 +277,8 @@ def for_all(vars: PySequence[ir.Var], task: ir.Task, engine: Optional[ir.Engine]
 #
 # loops body until a break condition is met
-def loop(iter: ir.Var, body: ir.Task, hoisted: PySequence[ir.VarOrDefault]=[], engine: Optional[ir.Engine]=None, annos: PySequence[ir.Annotation]=[]):
-    return ir.Loop(engine, tuple(hoisted), iter, body, FrozenOrderedSet(annos))
+def loop(body: ir.Task, iter: PySequence[ir.Var]=[],  hoisted: PySequence[ir.VarOrDefault]=[], concurrency:int=1, engine: Optional[ir.Engine]=None, annos: PySequence[ir.Annotation]=[]):
+    return ir.Loop(engine, tuple(hoisted), tuple(iter), body, concurrency, FrozenOrderedSet(annos))
 def break_(check: ir.Task, engine: Optional[ir.Engine]=None, annos: PySequence[ir.Annotation]=[]):
     return Break(check, engine, annos)

v0/relationalai/semantics/metamodel/helpers.py CHANGED Viewed

@@ -24,6 +24,13 @@ def sanitize(name:str) -> str:
 # Checks
 #--------------------------------------------------
+def is_export(node: ir.Node):
+    """ Whether this node is an export output. """
+    return isinstance(node, ir.Output) and (
+        builtins.export_annotation in node.annotations or
+        any(annotation.relation == builtins.export for annotation in node.annotations)
+    )
 def is_concept_lookup(node: ir.Lookup|ir.Relation):
     """ Whether this task is a concept lookup. """
     if isinstance(node, ir.Lookup) and is_concept_lookup(node.relation):

v0/relationalai/semantics/metamodel/ir.py CHANGED Viewed

@@ -427,8 +427,9 @@ class Loop(Task):
     """Execute the body in a loop, incrementing the iter variable, until a break sub-task in
     the body succeeds."""
     hoisted: Tuple[VarOrDefault, ...]
-    iter: Var
+    iter: Tuple[Var, ...]
     body: Task
+    concurrency: int = 1
     annotations:FrozenOrderedSet[Annotation] = annotations_field()
 @acceptor
@@ -856,7 +857,7 @@ class Printer(BasePrinter):
         # Iteration (Loops)
         elif isinstance(node, Loop):
-            self.print_hoisted(depth, f"Loop ⇓[{self.value_to_string(node.iter)}]{annos_str}", node.hoisted)
+            self.print_hoisted(depth, f"Loop ⇓[{', '.join([self.value_to_string(v) for v in node.iter])}] concurrency={node.concurrency} {annos_str}", node.hoisted)
             self.pprint(node.body, depth + 1, print_ids=print_ids)
         elif isinstance(node, Break):

v0/relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 from v0.relationalai.semantics.metamodel import ir
 from v0.relationalai.semantics.metamodel.compiler import Pass
 from v0.relationalai.semantics.metamodel.visitor import Visitor, Rewriter
-from v0.relationalai.semantics.metamodel.util import OrderedSet
+from v0.relationalai.semantics.metamodel.util import OrderedSet, ordered_set
 from v0.relationalai.semantics.metamodel import helpers, factory as f
 from typing import Optional, Any
@@ -106,6 +106,7 @@ class DNFExtractor(Visitor):
         # The logical that contains the output.
         # The assumption for the IR at this point is that there is only one output.
         self.output_logical: Optional[ir.Logical] = None
+        self.output_keys: OrderedSet[ir.Var] = ordered_set()
         self.active_negations: list[ir.Not] = []
         # Nodes that have to split into multiple similar nodes, depending on the changes
         # of sub-nodes.
@@ -120,6 +121,8 @@ class DNFExtractor(Visitor):
             if any(isinstance(x, ir.Output) for x in node.body):
                 assert not self.output_logical, "multiple outputs"
                 self.output_logical = node
+                output_node = next(x for x in node.body if isinstance(x, ir.Output))
+                self.output_keys = helpers.collect_vars(output_node)
         elif isinstance(node, ir.Not):
             self.active_negations.append(node)
@@ -168,29 +171,36 @@ class DNFExtractor(Visitor):
               self.output_logical and
               len(self.active_negations) % 2 == 0 and
               len(node.tasks) > 1):
-            # We split the union when there is a branch with vars "X,Y" and another with "X,Z"
-            # If some branches have vars "X, Y, Z" and others have "X, Y" or "Y, Z" we don't split
+            # We split the union when there are vars not present in all branches that are
+            # present in the output keys. If vars are not in output keys then they act as
+            # filters only and do not require splitting.
             should_split = False
-            all_vars = helpers.collect_vars(node.tasks[0])
-            for t in node.tasks[1:]:
-                vars = helpers.collect_vars(t)
-                curr_intersection = vars.get_set().intersection(all_vars.get_set())
-                should_split |= not (curr_intersection == vars.get_set() or curr_intersection == all_vars.get_set())
-                if should_split:
-                    replacements:list[ir.Task] = []
-                    for t in node.tasks:
-                        # If some branch should already be replaced, we flatten all the replacements here.
-                        if t in self.replaced_by:
-                            replacements.extend(self.replaced_by[t])
-                        else:
-                            replacements.append(t)
-                    self.replaced_by[node] = replacements
-                    self.should_split.add(parent)
-                    break
-                all_vars.update(vars)
+            all_vars = helpers.collect_vars(node).get_set()
+            all_vars &= self.output_keys.get_set()
+            if all_vars:
+                for t in node.tasks:
+                    vars = helpers.collect_vars(t).get_set()
+                    curr_intersection = vars.intersection(all_vars)
+                    if curr_intersection != all_vars:
+                        should_split = True
+                        break
+            if should_split:
+                replacements:list[ir.Task] = []
+                for t in node.tasks:
+                    # If some branch should already be replaced, we flatten all
+                    # the replacements here.
+                    if t in self.replaced_by:
+                        replacements.extend(self.replaced_by[t])
+                    else:
+                        replacements.append(t)
+                self.replaced_by[node] = replacements
+                self.should_split.add(parent)
         if isinstance(node, ir.Logical) and node == self.output_logical:
             self.output_logical = None
+            self.output_keys = ordered_set()
         return node

relationalai 1.0.0a3__py3-none-any.whl → 1.0.0a5__py3-none-any.whl

relationalai 1.0.0a3py3-none-any.whl → 1.0.0a5py3-none-any.whl