PyPI - relationalai - Versions diffs - 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl - Mend

relationalai 0.13.2py3-none-any.whl → 0.13.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

relationalai/clients/client.py +3 -4
relationalai/clients/exec_txn_poller.py +62 -31
relationalai/clients/resources/snowflake/direct_access_resources.py +6 -5
relationalai/clients/resources/snowflake/snowflake.py +54 -51
relationalai/clients/resources/snowflake/use_index_poller.py +1 -1
relationalai/semantics/internal/snowflake.py +5 -1
relationalai/semantics/lqp/algorithms.py +173 -0
relationalai/semantics/lqp/builtins.py +199 -2
relationalai/semantics/lqp/executor.py +90 -41
relationalai/semantics/lqp/export_rewriter.py +40 -0
relationalai/semantics/lqp/ir.py +28 -2
relationalai/semantics/lqp/model2lqp.py +218 -45
relationalai/semantics/lqp/passes.py +13 -658
relationalai/semantics/lqp/rewrite/__init__.py +12 -0
relationalai/semantics/lqp/rewrite/algorithm.py +385 -0
relationalai/semantics/lqp/rewrite/annotate_constraints.py +22 -10
relationalai/semantics/lqp/rewrite/constants_to_vars.py +70 -0
relationalai/semantics/lqp/rewrite/deduplicate_vars.py +104 -0
relationalai/semantics/lqp/rewrite/eliminate_data.py +108 -0
relationalai/semantics/lqp/rewrite/functional_dependencies.py +31 -2
relationalai/semantics/lqp/rewrite/period_math.py +77 -0
relationalai/semantics/lqp/rewrite/quantify_vars.py +65 -31
relationalai/semantics/lqp/rewrite/unify_definitions.py +317 -0
relationalai/semantics/lqp/utils.py +11 -1
relationalai/semantics/lqp/validators.py +14 -1
relationalai/semantics/metamodel/builtins.py +2 -1
relationalai/semantics/metamodel/compiler.py +2 -1
relationalai/semantics/metamodel/dependency.py +12 -3
relationalai/semantics/metamodel/executor.py +11 -1
relationalai/semantics/metamodel/factory.py +2 -2
relationalai/semantics/metamodel/helpers.py +7 -0
relationalai/semantics/metamodel/ir.py +3 -2
relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +30 -20
relationalai/semantics/metamodel/rewrite/flatten.py +50 -13
relationalai/semantics/metamodel/rewrite/format_outputs.py +9 -3
relationalai/semantics/metamodel/typer/checker.py +6 -4
relationalai/semantics/metamodel/typer/typer.py +2 -5
relationalai/semantics/metamodel/visitor.py +4 -3
relationalai/semantics/reasoners/optimization/solvers_dev.py +1 -1
relationalai/semantics/reasoners/optimization/solvers_pb.py +3 -4
relationalai/semantics/rel/compiler.py +2 -1
relationalai/semantics/rel/executor.py +3 -2
relationalai/semantics/tests/lqp/__init__.py +0 -0
relationalai/semantics/tests/lqp/algorithms.py +345 -0
relationalai/semantics/tests/test_snapshot_abstract.py +2 -1
relationalai/tools/cli_controls.py +216 -67
relationalai/util/format.py +5 -2
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/METADATA +2 -2
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/RECORD +52 -42
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/WHEEL +0 -0
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/entry_points.txt +0 -0
{relationalai-0.13.2.dist-info → relationalai-0.13.4.dist-info}/licenses/LICENSE +0 -0

relationalai/semantics/lqp/builtins.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from relationalai.semantics.metamodel import factory as f
+from typing import TypeGuard
+from relationalai.semantics.metamodel import factory as f, ir, types
 from relationalai.semantics.metamodel.util import FrozenOrderedSet
 from relationalai.semantics.metamodel import builtins
@@ -8,9 +9,205 @@ adhoc = f.relation("adhoc", [])
 adhoc_annotation = f.annotation(adhoc, [])
 # We only want to emit attributes for a known set of annotations.
-annotations_to_emit = FrozenOrderedSet([
+supported_lqp_annotations = FrozenOrderedSet([
     adhoc.name,
     builtins.function.name,
     builtins.track.name,
     builtins.recursion_config.name,
 ])
+# [LoopyIR] Annotations used to mark metamodel IR elements as Loopy constructs.
+# 1. Programming structures:
+#   * @script marks Sequence blocks `begin ... end`
+#   * @algorithm additionally marks the top-level script
+#   * @while marks Loop as a `while(true) {...}`; its sole Task is a @script @while Sequence
+# 2. Base instructions (Update's with derive Effects)
+#   * @global marks instructions that write to a global relation (only used in top-level script)
+#   * @empty marks instructions that initialize relations to an empty relation
+#   * @assign marks instructions that are standard assignments
+#   * @upsert marks instructions that perform in-place upserts
+#   * @monoid marks instructions that perform in-place monoid updates
+#   * @monus marks instructions that perform in-place monus updates
+# These tasks require dedicated handling and currently are only supported in LQP.
+# Here we only provide basic inspection functions. Functions for creating these annotations
+# and more complex analysis are in the module relationalai.semantics.lqp.algorithms
+# Algorithm: for top-level script of an algorithm
+_algorithm_anno_name = "algorithm"
+algorithm = f.relation(_algorithm_anno_name, [])
+def algorithm_annotation():
+    return f.annotation(algorithm, [])
+def has_algorithm_annotation(node: ir.Node) -> bool:
+    if not hasattr(node, "annotations"):
+        return False
+    annotations = getattr(node, "annotations", [])
+    for anno in annotations:
+        if anno.relation.name == _algorithm_anno_name:
+            return True
+    return False
+# Script: for Sequence blocks (algorithm or while loop)
+_script_anno_name = "script"
+script = f.relation(_script_anno_name, [])
+def script_annotation():
+    return f.annotation(script, [])
+def has_script_annotation(node: ir.Node) -> bool:
+    if not hasattr(node, "annotations"):
+        return False
+    annotations = getattr(node, "annotations", [])
+    for anno in annotations:
+        if anno.relation.name == _script_anno_name:
+            return True
+    return False
+# While: for a while Loop or its script body (Sequence)
+_while_anno_name = "while"
+while_ = f.relation(_while_anno_name, [])
+def while_annotation():
+    return f.annotation(while_, [])
+def has_while_annotation(node: ir.Node) -> bool:
+    if not hasattr(node, "annotations"):
+        return False
+    annotations = getattr(node, "annotations", [])
+    for anno in annotations:
+        if anno.relation.name == _while_anno_name:
+            return True
+    return False
+# Global: marks instructions that write to relation that is the result of an algorithm
+_global_anno_name = "global"
+global_ = f.relation(_global_anno_name, [])
+def global_annotation():
+    return f.annotation(global_, [])
+def has_global_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
+    if not hasattr(node, "annotations"):
+        return False
+    annotations = getattr(node, "annotations", [])
+    for anno in annotations:
+        if anno.relation.name == _global_anno_name:
+            return True
+    return False
+# Empty: Initializes a relation to an empty relation
+_empty_anno_name = "empty"
+empty = f.relation(_empty_anno_name, [])
+def empty_annotation():
+    return f.annotation(empty, [])
+def has_empty_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
+    if not hasattr(node, "annotations"):
+        return False
+    annotations = getattr(node, "annotations", [])
+    for anno in annotations:
+        if anno.relation.name == _empty_anno_name:
+            return True
+    return False
+# Assign: overwrites the target relation
+_assign_anno_name = "assign"
+assign = f.relation(_assign_anno_name, [])
+def assign_annotation():
+    return f.annotation(assign, [])
+def has_assign_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
+    if not hasattr(node, "annotations"):
+        return False
+    annotations = getattr(node, "annotations", [])
+    for anno in annotations:
+        if anno.relation.name == _assign_anno_name:
+            return True
+    return False
+# Upsert: In-place update of relation
+_upsert_anno_name = "upsert"
+upsert = f.relation(_upsert_anno_name, [])
+def upsert_annotation(arity: int):
+    return f.annotation(upsert, [f.literal(arity, type=types.Int64)])
+def has_upsert_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
+    if not hasattr(node, "annotations"):
+        return False
+    annotations = getattr(node, "annotations", [])
+    for anno in annotations:
+        if anno.relation.name == _upsert_anno_name:
+            return True
+    return False
+def get_upsert_annotation(i: ir.Update):
+    for anno in i.annotations:
+        if anno.relation.name == _upsert_anno_name:
+            return anno
+    return None
+# Monoid: In-place update of relation by another by a monoid operation (e.g. Integer addition)
+_monoid_anno_name = "monoid"
+monoid = f.relation(_monoid_anno_name, [])
+def monoid_annotation(monoid_type: ir.ScalarType, monoid_op: str, arity: int):
+    return f.annotation(monoid, [f.literal(arity, type=types.Int64), monoid_type, f.literal(monoid_op, type=types.String)])
+def has_monoid_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
+    if not hasattr(node, "annotations"):
+        return False
+    annotations = getattr(node, "annotations", [])
+    for anno in annotations:
+        if anno.relation.name == _monoid_anno_name:
+            return True
+    return False
+def get_monoid_annotation(i: ir.Update):
+    for anno in i.annotations:
+        if anno.relation.name == _monoid_anno_name:
+            return anno
+    return None
+# Monus: In-place update of relation by another by "subtraction" operation, if it exists (e.g. Integer subtraction)
+_monus_anno_name = "monus"
+monus = f.relation(_monus_anno_name, [])
+def monus_annotation(monoid_type: ir.ScalarType, monoid_op: str, arity: int):
+    return f.annotation(monus, [f.literal(arity, type=types.Int64), monoid_type, f.literal(monoid_op, type=types.String)])
+def has_monus_annotation(node: ir.Node) -> TypeGuard[ir.Update]:
+    if not hasattr(node, "annotations"):
+        return False
+    annotations = getattr(node, "annotations", [])
+    for anno in annotations:
+        if anno.relation.name == _monus_anno_name:
+            return True
+    return False
+def get_monus_annotation(i: ir.Update):
+    for anno in i.annotations:
+        if anno.relation.name == _monus_anno_name:
+            return anno
+    return None
+# Get arity from annotation (for @upsert, @monoid, and @monus)
+def get_arity(i: ir.Annotation):
+    for arg in i.args:
+        if isinstance(arg, ir.Literal) and (arg.type == types.Int64 or arg.type == types.Int128 or arg.type == types.Number):
+            return arg.value
+    assert False, "Failed to get arity"
+# All Loopy instructions
+loopy_instructions = [
+    empty,
+    assign,
+    upsert,
+    monoid,
+    monus
+]

relationalai/semantics/lqp/executor.py CHANGED Viewed

@@ -10,8 +10,12 @@ from snowflake.snowpark import Session
 from relationalai import debugging
 from relationalai.errors import NonDefaultLQPSemanticsVersionWarning
-from relationalai.semantics.lqp import result_helpers
+from relationalai.semantics.lqp import result_helpers, export_rewriter
 from relationalai.semantics.metamodel import ir, factory as f, executor as e
+from relationalai.semantics.metamodel.visitor import collect_by_type
+if TYPE_CHECKING:
+    from relationalai.semantics.internal.internal import Model as InternalModel
 from relationalai.semantics.lqp.compiler import Compiler
 from relationalai.semantics.lqp.intrinsics import mk_intrinsic_datetime_now
 from relationalai.semantics.lqp.constructors import mk_transaction
@@ -280,10 +284,11 @@ class LQPExecutor(e.Executor):
                 fields.append(f"NULL as \"{name}\"")
                 continue
-            colname = f"col{ix:03}"
+            # Get the actual physical column name from column_fields
+            colname = column_fields[ix][0]
             ix += 1
-            if colname in sample_keys:
+            if colname.lower() in sample_keys:
                 # Actual column exists in sample
                 fields.append(f"{colname} as \"{name}\"")
             else:
@@ -364,7 +369,7 @@ class LQPExecutor(e.Executor):
             meta=None,
         )
-    def compile_lqp(self, model: ir.Model, task: ir.Task):
+    def compile_lqp(self, model: ir.Model, task: ir.Task, format: Optional[Literal["pandas", "snowpark", "csv"]] = "pandas"):
         configure = self._construct_configure()
         # Merge the epochs into a single transaction. Long term the query bits should all
         # go into a WhatIf action and the intrinsics could be fused with either of them. But
@@ -390,6 +395,21 @@ class LQPExecutor(e.Executor):
             result, final_model = self.compiler.compile_inner(query, options)
             export_info, query_epoch = result
+            if format == "csv":
+                # Extract original column names from Output
+                outputs = collect_by_type(ir.Output, task)
+                assert outputs, "No Output found in the task"
+                assert len(outputs) == 1, "Multiple Outputs found in the task"
+                output = outputs[0]
+                original_cols = []
+                for alias, _ in output.aliases:
+                    if not alias:
+                        continue
+                    original_cols.append(alias)
+                # Use rewriter to filter data_columns
+                column_filter = export_rewriter.ExtraColumnsFilter(original_cols)
+                query_epoch = column_filter.filter_epoch(query_epoch)
             epochs.append(query_epoch)
             epochs.append(self._compile_undefine_query(query_epoch))
@@ -402,47 +422,23 @@ class LQPExecutor(e.Executor):
         txn_proto = convert_transaction(txn)
         return final_model, export_info, txn_proto
-    # TODO (azreika): This should probably be split up into exporting and other processing. There are quite a lot of arguments here...
-    def _process_results(self, task: ir.Task, final_model: ir.Model, raw_results: TransactionAsyncResponse, export_info: Optional[tuple], export_to: Optional[Table], update: bool) -> DataFrame:
-        cols, extra_cols = self._compute_cols(task, final_model)
-        df, errs = result_helpers.format_results(raw_results, cols)
-        self.report_errors(errs)
-        # Rename columns if wide outputs is enabled
-        if self.wide_outputs and len(cols) - len(extra_cols) == len(df.columns):
-            df.columns = cols[: len(df.columns)]
-        # Process exports
-        if export_to and not self.dry_run:
-            assert cols, "No columns found in the output"
-            assert isinstance(raw_results, TransactionAsyncResponse) and raw_results.transaction, "Invalid transaction result"
-            result_cols = export_to._col_names
-            if result_cols is not None:
-                assert all(col in result_cols or col in extra_cols for col in cols)
-            else:
-                result_cols = [col for col in cols if col not in extra_cols]
-            assert result_cols
-            assert export_info, "Export info should be populated if we are exporting results"
-            self._export(raw_results.transaction['id'], export_info, export_to, cols, result_cols, update)
-        return self._postprocess_df(self.config, df, extra_cols)
-    def execute(self, model: ir.Model, task: ir.Task, format: Literal["pandas", "snowpark"] = "pandas",
-                export_to: Optional[Table] = None,
-                update: bool = False, meta: dict[str, Any] | None = None) -> DataFrame:
+    def execute(
+        self,
+        model: ir.Model,
+        task: ir.Task,
+        format: Literal["pandas", "snowpark", "csv"] = "pandas",
+        export_to: Optional[Table] = None,
+        update: bool = False,
+        meta: dict[str, Any] | None = None,
+    ) -> DataFrame:
         self.prepare_data()
         previous_model = self._last_model
-        final_model, export_info, txn_proto = self.compile_lqp(model, task)
+        final_model, export_info, txn_proto = self.compile_lqp(model, task, format=format)
         if self.dry_run:
             return DataFrame()
-        if format != "pandas":
+        if format == "snowpark":
             raise ValueError(f"Unsupported format: {format}")
         # Format meta as headers
@@ -459,11 +455,64 @@ class LQPExecutor(e.Executor):
             nowait_durable=True,
             headers=headers,
         )
-        assert isinstance(raw_results, TransactionAsyncResponse)
+        assert isinstance(raw_results, TransactionAsyncResponse), "Expected TransactionAsyncResponse from LQP execution"
+        assert raw_results.transaction is not None, "Transaction result is missing"
+        txid = raw_results.transaction['id']
         try:
-            return self._process_results(task, final_model, raw_results, export_info, export_to, update)
+            cols, extra_cols = self._compute_cols(task, final_model)
+            df, errs = result_helpers.format_results(raw_results, cols)
+            self.report_errors(errs)
+            # Rename columns if wide outputs is enabled
+            if self.wide_outputs and len(cols) - len(extra_cols) == len(df.columns):
+                df.columns = cols[: len(df.columns)]
+            if export_to:
+                assert cols, "No columns found in the output"
+                assert export_info, "Export info should be populated if we are exporting results"
+                result_cols = export_to._col_names
+                if result_cols is not None:
+                    assert all(col in result_cols or col in extra_cols for col in cols)
+                else:
+                    result_cols = [col for col in cols if col not in extra_cols]
+                assert result_cols
+                self._export(txid, export_info, export_to, cols, result_cols, update)
+            if format == "csv":
+                if export_info is not None and isinstance(export_info, tuple) and isinstance(export_info[0], str):
+                    # The full CSV path has two parts. The first part is chosen by the frontend, while
+                    # the second part is chosen by the backend to avoid collisions. We need to ensure
+                    # the second part is synchronized with the future changes in the backend.
+                    full_path = export_info[0] + f"/data_{txid}.gz"
+                    return DataFrame([full_path], columns=["path"])
+                else:
+                    raise ValueError("The CSV export was not successful!")
+            return self._postprocess_df(self.config, df, extra_cols)
         except Exception as e:
             # If processing the results failed, revert to the previous model.
             self._last_model = previous_model
             raise e
+    def export_to_csv(self, model: "InternalModel", query) -> str:
+        ### Exports the result of the given query fragment to a CSV file in
+        ### the Snowflake stage area and returns the path to the CSV file.
+        from relationalai.semantics.internal.internal import Fragment, with_source
+        from relationalai.environments import runtime_env
+        if not query._select:
+            raise ValueError("Cannot export empty selection to CSV")
+        clone = Fragment(parent=query)
+        clone._is_export = True
+        clone._source = runtime_env.get_source_pos()
+        ir_model = model._to_ir()
+        with debugging.span("query", dsl=str(clone), **with_source(clone), meta=clone._meta):
+            query_task = model._compiler.fragment(clone)
+            csv_info = self.execute(ir_model, query_task, format="csv", meta=clone._meta)
+            path = csv_info.at[0, "path"]
+            assert isinstance(path, str)
+            return path

relationalai/semantics/lqp/export_rewriter.py ADDED Viewed

@@ -0,0 +1,40 @@
+#----------------------------------------------------------------------------------------------
+# This is a custom LQP rewriter that filters extra columns from CSV export. It is used in the
+# LQP executor, when the format="csv", to ensure only intended columns are being exported.
+#----------------------------------------------------------------------------------------------
+from dataclasses import replace
+from lqp import ir as lqp_ir
+class ExtraColumnsFilter:
+    def __init__(self, original_cols: list[str]):
+        self.original_cols = set(original_cols)
+    def filter_epoch(self, query_epoch: lqp_ir.Epoch) -> lqp_ir.Epoch:
+        # Only process epochs with a single read which is dedicated to Export
+        if not (query_epoch.reads and len(query_epoch.reads) == 1):
+            return query_epoch
+        old_read = query_epoch.reads[0]
+        if not isinstance(old_read.read_type, lqp_ir.Export):
+            return query_epoch
+        config = old_read.read_type.config
+        assert isinstance(config, lqp_ir.ExportCSVConfig) and config.data_columns is not None, \
+            "Expected ExportCSVConfig with data_columns in the read type"
+        data_columns = config.data_columns
+        # Filter data_columns to only include columns in original_cols
+        new_data_columns = [col for col in data_columns if col.column_name in self.original_cols]
+        # Reconstruct the nested structure with filtered data_columns
+        new_config = replace(old_read.read_type.config, data_columns=new_data_columns)
+        new_read_type = replace(old_read.read_type, config=new_config)
+        new_read = replace(old_read, read_type=new_read_type)
+        # Return new epoch with updated read
+        remaining_reads = list(query_epoch.reads[1:])
+        return replace(query_epoch, reads=[new_read] + remaining_reads)

relationalai/semantics/lqp/ir.py CHANGED Viewed

@@ -6,7 +6,6 @@ __all__ = [
     "Declaration",
     "FunctionalDependency",
     "Def",
-    "Loop",
     "Abstraction",
     "Formula",
     "Exists",
@@ -55,6 +54,20 @@ __all__ = [
     "convert_transaction",
     "validate_lqp",
     "construct_configure",
+    "Algorithm",
+    "Script",
+    "Construct",
+    "Loop",
+    "Instruction",
+    "Assign",
+    "Break",
+    "Upsert",
+    "MonoidDef",
+    "MonusDef",
+    "OrMonoid",
+    "MinMonoid",
+    "MaxMonoid",
+    "SumMonoid",
 ]
 from lqp.ir import (
@@ -63,7 +76,6 @@ from lqp.ir import (
     Declaration,
     FunctionalDependency,
     Def,
-    Loop,
     Abstraction,
     Formula,
     Exists,
@@ -108,6 +120,20 @@ from lqp.ir import (
     Configure,
     IVMConfig,
     MaintenanceLevel,
+    Algorithm,
+    Script,
+    Construct,
+    Loop,
+    Instruction,
+    Assign,
+    Break,
+    Upsert,
+    MonoidDef,
+    MonusDef,
+    OrMonoid,
+    MinMonoid,
+    MaxMonoid,
+    SumMonoid,
 )
 from lqp.emit import (

relationalai 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl

relationalai 0.13.2py3-none-any.whl → 0.13.4py3-none-any.whl