PyPI - relationalai - Versions diffs - 0.12.7__py3-none-any.whl → 0.12.8__py3-none-any.whl - Mend

relationalai 0.12.7py3-none-any.whl → 0.12.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

relationalai/clients/snowflake.py CHANGED Viewed

@@ -851,7 +851,17 @@ Otherwise, remove it from your '{profile}' configuration profile.
                 self.generation
             )
             # If cache is valid (data freshness has not expired), skip polling
-            if not poller.cache.is_valid():
+            if poller.cache.is_valid():
+                cached_sources = len(poller.cache.sources)
+                total_sources = len(sources_list)
+                cached_timestamp = poller.cache._metadata.get("cachedIndices", {}).get(poller.cache.key, {}).get("last_use_index_update_on", "")
+                message = f"Using cached data for {cached_sources}/{total_sources} data streams"
+                if cached_timestamp:
+                    print(f"\n{message} (cached at {cached_timestamp})\n")
+                else:
+                    print(f"\n{message}\n")
+            else:
                 return poller.poll()
     #--------------------------------------------------
@@ -3284,12 +3294,24 @@ class DirectAccessResources(Resources):
             try:
                 response = _send_request()
                 if response.status_code != 200:
+                    # For 404 responses with skip_auto_create=True, return immediately to let caller handle it
+                    # (e.g., get_engine needs to check 404 and return None for auto_create_engine)
+                    # For skip_auto_create=False, continue to auto-creation logic below
+                    if response.status_code == 404 and skip_auto_create:
+                        return response
                     try:
                         message = response.json().get("message", "")
                     except requests.exceptions.JSONDecodeError:
-                        raise ResponseStatusException(
-                            f"Failed to parse error response from endpoint {endpoint}.", response
-                        )
+                        # Can't parse JSON response. For skip_auto_create=True (e.g., get_engine),
+                        # this should have been caught by the 404 check above, so this is an error.
+                        # For skip_auto_create=False, we explicitly check status_code below,
+                        # so we don't need to parse the message.
+                        if skip_auto_create:
+                            raise ResponseStatusException(
+                                f"Failed to parse error response from endpoint {endpoint}.", response
+                            )
+                        message = ""  # Not used when we check status_code directly
                     # fix engine on engine error and retry
                     # Skip auto-retry if skip_auto_create is True to avoid recursion
@@ -3482,7 +3504,17 @@ class DirectAccessResources(Resources):
                 generation=self.generation,
             )
             # If cache is valid (data freshness has not expired), skip polling
-            if not poller.cache.is_valid():
+            if poller.cache.is_valid():
+                cached_sources = len(poller.cache.sources)
+                total_sources = len(sources_list)
+                cached_timestamp = poller.cache._metadata.get("cachedIndices", {}).get(poller.cache.key, {}).get("last_use_index_update_on", "")
+                message = f"Using cached data for {cached_sources}/{total_sources} data streams"
+                if cached_timestamp:
+                    print(f"\n{message} (cached at {cached_timestamp})\n")
+                else:
+                    print(f"\n{message}\n")
+            else:
                 return poller.poll()
     def _check_exec_async_status(self, txn_id: str, headers: Dict[str, str] | None = None) -> bool:

relationalai/clients/use_index_poller.py CHANGED Viewed

@@ -250,7 +250,17 @@ class UseIndexPoller:
             # Cache was used - show how many sources were cached
             total_sources = len(self.cache.sources)
             cached_sources = total_sources - len(self.sources)
-            progress.add_sub_task(f"Using cached data for {cached_sources}/{total_sources} data streams", task_id="cache_usage", category=TASK_CATEGORY_CACHE)
+            # Get the timestamp when sources were cached
+            entry = self.cache._metadata.get("cachedIndices", {}).get(self.cache.key, {})
+            cached_timestamp = entry.get("last_use_index_update_on", "")
+            message = f"Using cached data for {cached_sources}/{total_sources} data streams"
+            # Format the message with timestamp
+            if cached_timestamp:
+                message += f" (cached at {cached_timestamp})"
+            progress.add_sub_task(message, task_id="cache_usage", category=TASK_CATEGORY_CACHE)
             # Complete the subtask immediately since it's just informational
             progress.complete_sub_task("cache_usage")

relationalai/semantics/lqp/passes.py CHANGED Viewed

@@ -6,7 +6,7 @@ from relationalai.semantics.metamodel.util import FrozenOrderedSet
 from relationalai.semantics.metamodel.rewrite import Flatten
-from ..metamodel.rewrite import DischargeConstraints, DNFUnionSplitter, ExtractNestedLogicals
+from ..metamodel.rewrite import DischargeConstraints, DNFUnionSplitter, ExtractNestedLogicals, FormatOutputs
 from .rewrite import CDC, ExtractCommon, ExtractKeys, FunctionAnnotations, QuantifyVars, Splinter
 from relationalai.semantics.lqp.utils import output_names
@@ -27,6 +27,7 @@ def lqp_passes() -> list[Pass]:
         DNFUnionSplitter(),
         ExtractKeys(),
         ExtractCommon(),
+        FormatOutputs(),
         Flatten(),
         Splinter(), # Splits multi-headed rules into multiple rules
         QuantifyVars(), # Adds missing existentials

relationalai/semantics/metamodel/builtins.py CHANGED Viewed

@@ -654,6 +654,7 @@ rel_primitive_solverlib_ho_appl = aggregation("rel_primitive_solverlib_ho_appl",
 ])
 implies = f.relation("implies", [f.input_field("a", types.Bool), f.input_field("b", types.Bool)])
 all_different = aggregation("all_different", [f.input_field("over", types.Any)])
+special_ordered_set_type_2 = aggregation("special_ordered_set_type_2", [f.input_field("rank", types.Any)])
 # graph primitive algorithm helpers
 infomap = aggregation("infomap", [

relationalai/semantics/metamodel/rewrite/__init__.py CHANGED Viewed

@@ -2,5 +2,6 @@ from .discharge_constraints import DischargeConstraints
 from .dnf_union_splitter import DNFUnionSplitter
 from .extract_nested_logicals import ExtractNestedLogicals
 from .flatten import Flatten
+from .format_outputs import FormatOutputs
-__all__ = ["DischargeConstraints", "DNFUnionSplitter", "ExtractNestedLogicals", "Flatten"]
+__all__ = ["DischargeConstraints", "DNFUnionSplitter", "ExtractNestedLogicals", "Flatten", "FormatOutputs"]

relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py CHANGED Viewed

@@ -150,7 +150,7 @@ class DNFExtractor(Visitor):
             replacement_tasks: list[ir.Task] = []
             for body in replacement_bodies:
-                new_task = f.logical(body)
+                new_task = f.logical(body, node.hoisted)
                 replacement_tasks.append(new_task)
             self.replaced_by[node] = replacement_tasks

relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 from relationalai.semantics.metamodel import ir, factory as f, helpers
-from relationalai.semantics.metamodel.visitor import Rewriter
+from relationalai.semantics.metamodel.visitor import Rewriter, collect_by_type
 from relationalai.semantics.metamodel.compiler import Pass
 from relationalai.semantics.metamodel.util import OrderedSet, ordered_set, NameCache
 from relationalai.semantics.metamodel import dependency
@@ -61,11 +61,10 @@ class LogicalExtractor(Rewriter):
         # if there are aggregations, make sure we don't expose the projected and input vars,
         # but expose groupbys
-        for child in node.body:
-            if isinstance(child, ir.Aggregate):
-                exposed_vars.difference_update(child.projection)
-                exposed_vars.difference_update(helpers.aggregate_inputs(child))
-                exposed_vars.update(child.group)
+        for agg in collect_by_type(ir.Aggregate, node):
+            exposed_vars.difference_update(agg.projection)
+            exposed_vars.difference_update(helpers.aggregate_inputs(agg))
+            exposed_vars.update(agg.group)
         # add the values (hoisted)
         exposed_vars.update(helpers.hoisted_vars(logical.hoisted))

relationalai/semantics/metamodel/rewrite/flatten.py CHANGED Viewed

@@ -3,12 +3,11 @@ from dataclasses import dataclass
 from typing import cast, Optional, TypeVar
 from typing import Tuple
-from relationalai.semantics.metamodel import builtins, ir, factory as f, helpers, types
+from relationalai.semantics.metamodel import builtins, ir, factory as f, helpers
 from relationalai.semantics.metamodel.compiler import Pass, group_tasks
 from relationalai.semantics.metamodel.util import OrderedSet, ordered_set, NameCache
 from relationalai.semantics.metamodel import dependency
-from relationalai.semantics.metamodel.util import FrozenOrderedSet, filter_by_type
-from relationalai.semantics.metamodel.typer.typer import to_type, is_primitive
+from relationalai.semantics.metamodel.typer.typer import to_type
 class Flatten(Pass):
     """
@@ -225,15 +224,26 @@ class Flatten(Pass):
             "ranks": ir.Rank,
         })
-        # if there are outputs, adjust them (depending on the config for wide vs gnf)
+        # If there are outputs, flatten each into its own top-level rule, along with its
+        # dependencies.
         if groups["outputs"]:
-            if self._handle_outputs:
-                return self.adjust_outputs(task, body, groups, ctx)
-            else:
-                # When we do not handle outputs. For example, in SQL compiler. We need to leave output as a top-level element.
+            if not self._handle_outputs:
                 ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations))
                 return Flatten.HandleResult(None)
+            # Analyze the dependencies in the newly rewritten body
+            new_logical = ir.Logical(task.engine, task.hoisted, tuple(body))
+            info = dependency.analyze(new_logical)
+            for output in groups["outputs"]:
+                assert(isinstance(output, ir.Output))
+                new_body = info.task_dependencies(output)
+                new_body.update(ctx.extra_tasks)
+                new_body.add(output)
+                ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, task.hoisted, tuple(new_body), task.annotations))
+            return Flatten.HandleResult(None)
         # if there are updates, extract as a new top level rule
         if groups["updates"]:
             # add task dependencies to the body
@@ -455,147 +465,6 @@ class Flatten(Pass):
                 task.annotations
             ))
-    #--------------------------------------------------
-    # GNF vs wide output support
-    #--------------------------------------------------
-    def adjust_outputs(self, task: ir.Logical, body: OrderedSet[ir.Task], groups: dict[str, OrderedSet[ir.Task]], ctx: Context):
-        # for wide outputs, only adjust the output task to include the keys.
-        if ctx.options.get("wide_outputs", False):
-            for output in groups["outputs"]:
-                assert(isinstance(output, ir.Output))
-                if output.keys:
-                    body.remove(output)
-                    body.add(self.rewrite_wide_output(output))
-            # self.remove_subsumptions(body, ctx)
-            return Flatten.HandleResult(ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations))
-        # for GNF outputs we need to generate a rule for each "column" in the output
-        else:
-            # first split outputs in potentially multiple outputs, one for each "column"
-            for output in groups["outputs"]:
-                assert(isinstance(output, ir.Output))
-                if output.keys:
-                    # we will replace the output bellow,
-                    body.remove(output)
-                    is_export = builtins.export_annotation in output.annotations
-                    # generate an output for each "column"
-                    # output looks like def output(:cols, :col000, key0, key1, value):
-                    original_cols = OrderedSet()
-                    for idx, alias in enumerate(output.aliases):
-                        # skip None values which are used as a placeholder for missing values
-                        if alias[1] is None:
-                            continue
-                        original_cols.add(alias[1])
-                        self._generate_output_column(body, output, idx, alias, is_export)
-                    idx = len(output.aliases)
-                    for key in output.keys:
-                        if key not in original_cols:
-                            self._generate_output_column(body, output, idx, (key.name, key), is_export)
-                            idx += 1
-            # analyse the resulting logical to be able to pull dependencies
-            logical = ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
-            info = dependency.analyze(logical)
-            # now extract a logical for each output, bringing together its dependencies
-            for output in filter_by_type(body, ir.Output):
-                deps = info.task_dependencies(output)
-                # TODO: verify safety of doing this
-                # self.remove_subsumptions(deps, ctx)
-                deps.add(output)
-                ctx.rewrite_ctx.top_level.append(ir.Logical(task.engine, tuple(), tuple(deps)))
-            return Flatten.HandleResult(None)
-    def _generate_output_column(self, body: OrderedSet[ir.Task], output: ir.Output, idx: int, alias: tuple[str, ir.Value], is_export: bool):
-        if not output.keys:
-            return output
-        aliases = [("cols", f.literal("cols", types.Symbol))] if not is_export else []
-        aliases.append(("col", f.literal(f"col{idx:03}", types.Symbol)))
-        for k in output.keys:
-            aliases.append((f"key_{k.name}_{idx}", k))
-        if (is_export and
-            isinstance(alias[1], ir.Var) and
-            (not is_primitive(alias[1].type) or alias[1].type == types.Hash)):
-            uuid = f.var(f"{alias[0]}_{idx}_uuid", types.String)
-            body.add(f.lookup(builtins.uuid_to_string, [alias[1], uuid]))
-            aliases.append((uuid.name, uuid))
-        else:
-            aliases.append(alias)
-        body.add(ir.Output(
-            output.engine,
-            FrozenOrderedSet.from_iterable(aliases),
-            output.keys,
-            output.annotations
-        ))
-    def remove_subsumptions(self, body:OrderedSet[ir.Task], ctx: Context):
-        # remove from the body all the tasks that are subsumed by some other task in the set;
-        # this can be done because some tasks are references to extracted nested logical that
-        # contain filters they dependend on, so we don't need those filters here if the
-        # reference is present.
-        for logical in filter_by_type(body, ir.Logical):
-            if logical.id in ctx.included:
-                # if the logical id is included, it means it's a reference to an extracted
-                # rule, so remove all other items in the body that are already included in
-                # the body referenced by it
-                for item in body:
-                    if item in ctx.included[logical.id]:
-                        body.remove(item)
-    def rewrite_wide_output(self, output: ir.Output):
-        assert(output.keys)
-        # only append keys that are not already in the output
-        suffix_keys = []
-        for key in output.keys:
-            if all([val is not key for _, val in output.aliases]):
-                suffix_keys.append(key)
-        aliases: OrderedSet[Tuple[str, ir.Value]] = ordered_set()
-        # add the remaining args, unless it is already a key
-        for name, val in output.aliases:
-            if not isinstance(val, ir.Var) or val not in suffix_keys:
-                aliases.add((name, val))
-        # add the keys to the output
-        for key in suffix_keys:
-            aliases.add((key.name, key))
-        # TODO - we are assuming that the Rel compiler will translate nullable lookups
-        # properly, returning a `Missing` if necessary, like this:
-        # (nested_192(_adult, _adult_name) or (not nested_192(_adult, _) and _adult_name = Missing)) and
-        return ir.Output(
-            output.engine,
-            aliases.frozen(),
-            output.keys,
-            output.annotations
-        )
-        # TODO: in the rel compiler, see if we can do this outer join
-        # 1. number of keys
-        # 2. each relation
-        # 3. each variable, starting with the keys
-        # 4. tag output with @arrow
-        # @arrow def output(_book, _book_title, _author_name):
-        #   rel_primitive_outer_join(#1, book_title, author_name, _book, _book_title, _author_name)
-        # def output(p, n, c):
-        #     rel_primitive_outer_join(#1, name, coolness, p, n, c)
 #--------------------------------------------------
 # Helpers
 #--------------------------------------------------

relationalai/semantics/metamodel/rewrite/format_outputs.py ADDED Viewed

@@ -0,0 +1,165 @@
+from __future__ import annotations
+from typing import Tuple
+from relationalai.semantics.metamodel import builtins, ir, factory as f, types, visitor
+from relationalai.semantics.metamodel.compiler import Pass, group_tasks
+from relationalai.semantics.metamodel.util import OrderedSet
+from relationalai.semantics.metamodel.util import FrozenOrderedSet
+from relationalai.semantics.metamodel.typer.typer import is_primitive
+class FormatOutputs(Pass):
+    def __init__(self, handle_outputs: bool=True):
+        super().__init__()
+        self._handle_outputs = handle_outputs
+    #--------------------------------------------------
+    # Public API
+    #--------------------------------------------------
+    def rewrite(self, model: ir.Model, options:dict={}) -> ir.Model:
+        wide_outputs = options.get("wide_outputs", False)
+        return self.OutputRewriter(wide_outputs).walk(model)
+    class OutputRewriter(visitor.Rewriter):
+        def __init__(self, wide_outputs: bool = False):
+            super().__init__()
+            self.wide_outputs = wide_outputs
+        def handle_logical(self, node: ir.Logical, parent: ir.Node):
+            # Rewrite children first
+            node = super().handle_logical(node, parent)
+            groups = group_tasks(node.body, {
+                "outputs": ir.Output,
+            })
+            # If no outputs, return as is
+            if not groups["outputs"]:
+                return node
+            return adjust_outputs(node, groups["outputs"], self.wide_outputs)
+#--------------------------------------------------
+# GNF vs wide output support
+#--------------------------------------------------
+def adjust_outputs(task: ir.Logical, outputs: OrderedSet[ir.Task], wide_outputs: bool = False):
+    body = list(task.body)
+    # For wide outputs, only adjust the output task to include the keys.
+    if wide_outputs:
+        for output in outputs:
+            assert(isinstance(output, ir.Output))
+            if output.keys:
+                body.remove(output)
+                body.append(rewrite_wide_output(output))
+        return ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
+    # For GNF outputs we need to generate a rule for each "column" in the output
+    else:
+        # First split outputs in potentially multiple outputs, one for each "column"
+        for output in outputs:
+            assert(isinstance(output, ir.Output))
+            if output.keys:
+                # Remove the original output. This is replaced by per-column outputs below
+                body.remove(output)
+                is_export = builtins.export_annotation in output.annotations
+                # Generate an output for each "column"
+                # output looks like def output(:cols, :col000, key0, key1, value):
+                original_cols = OrderedSet()
+                for idx, alias in enumerate(output.aliases):
+                    # Skip None values which are used as a placeholder for missing values
+                    if alias[1] is None:
+                        continue
+                    original_cols.add(alias[1])
+                    body.extend(_generate_output_column(output, idx, alias, is_export))
+                idx = len(output.aliases)
+                for key in output.keys:
+                    if key not in original_cols:
+                        body.extend(_generate_output_column(output, idx, (key.name, key), is_export))
+                        idx += 1
+        return ir.Logical(task.engine, task.hoisted, tuple(body), task.annotations)
+# TODO: return non list?
+def _generate_output_column(output: ir.Output, idx: int, alias: tuple[str, ir.Value], is_export: bool):
+    if not output.keys:
+        return [output]
+    aliases = [("cols", f.literal("cols", types.Symbol))] if not is_export else []
+    aliases.append(("col", f.literal(f"col{idx:03}", types.Symbol)))
+    # Append all keys at the start
+    for k in output.keys:
+        aliases.append((f"key_{k.name}_{idx}", k))
+    if (is_export and
+        isinstance(alias[1], ir.Var) and
+        (not is_primitive(alias[1].type) or alias[1].type == types.Hash)):
+        uuid = f.var(f"{alias[0]}_{idx}_uuid", types.String)
+        aliases.append((uuid.name, uuid))
+        return [
+            ir.Lookup(None, builtins.uuid_to_string, (alias[1], uuid)),
+            ir.Output(
+                output.engine,
+                FrozenOrderedSet.from_iterable(aliases),
+                output.keys,
+                output.annotations
+            )
+        ]
+    else:
+        aliases.append(alias)
+        return [
+            ir.Output(
+                output.engine,
+                FrozenOrderedSet.from_iterable(aliases),
+                output.keys,
+                output.annotations
+            )
+        ]
+def rewrite_wide_output(output: ir.Output):
+    assert(output.keys)
+    # Only append keys that are not already in the output
+    suffix_keys = []
+    for key in output.keys:
+        if all([val is not key for _, val in output.aliases]):
+            suffix_keys.append(key)
+    aliases: OrderedSet[Tuple[str, ir.Value]] = OrderedSet()
+    # Add the remaining args, unless it is already a key
+    for name, val in output.aliases:
+        if not isinstance(val, ir.Var) or val not in suffix_keys:
+            aliases.add((name, val))
+    # Add the keys to the output
+    for key in suffix_keys:
+        aliases.add((key.name, key))
+    # TODO - we are assuming that the Rel compiler will translate nullable lookups
+    # properly, returning a `Missing` if necessary, like this:
+    # (nested_192(_adult, _adult_name) or (not nested_192(_adult, _) and _adult_name = Missing)) and
+    return ir.Output(
+        output.engine,
+        aliases.frozen(),
+        output.keys,
+        output.annotations
+    )
+    # TODO: in the rel compiler, see if we can do this outer join
+    # 1. number of keys
+    # 2. each relation
+    # 3. each variable, starting with the keys
+    # 4. tag output with @arrow
+    # @arrow def output(_book, _book_title, _author_name):
+    #   rel_primitive_outer_join(#1, book_title, author_name, _book, _book_title, _author_name)
+    # def output(p, n, c):
+    #     rel_primitive_outer_join(#1, name, coolness, p, n, c)

relationalai 0.12.7__py3-none-any.whl → 0.12.8__py3-none-any.whl

relationalai 0.12.7py3-none-any.whl → 0.12.8py3-none-any.whl