PyPI - relationalai - Versions diffs - 0.13.4__py3-none-any.whl → 0.13.5__py3-none-any.whl - Mend

relationalai 0.13.4py3-none-any.whl → 0.13.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

relationalai/clients/exec_txn_poller.py CHANGED Viewed

@@ -5,11 +5,14 @@ from typing import Dict, Optional, TYPE_CHECKING
 from relationalai import debugging
 from relationalai.clients.util import poll_with_specified_overhead
+from relationalai.clients.config import Config
 from relationalai.tools.cli_controls import create_progress
 from relationalai.util.format import format_duration
+from relationalai.tools.txn_progress import format_execution_tree
 if TYPE_CHECKING:
     from relationalai.clients.resources.snowflake import Resources
+    from relationalai.clients.resources.snowflake.snowflake import TxnStatusResponse
 # Polling behavior constants
 POLL_OVERHEAD_RATE = 0.1  # Overhead rate for exponential backoff
@@ -19,6 +22,14 @@ GREEN_COLOR = '\033[92m'
 GRAY_COLOR = '\033[90m'
 ENDC = '\033[0m'
+PRINT_TXN_PROGRESS_FLAG = "print_txn_progress"
+PRINT_INTERNAL_TXN_PROGRESS_FLAG = "print_txn_progress_internal"
+def should_print_txn_progress(config: Config) -> bool:
+    return bool(config.get(PRINT_TXN_PROGRESS_FLAG, False))
+def should_print_internal_txn_progress(config) -> bool:
+    return bool(config.get(PRINT_INTERNAL_TXN_PROGRESS_FLAG, False))
 class ExecTxnPoller:
     """
@@ -27,17 +38,19 @@ class ExecTxnPoller:
     def __init__(
         self,
-        print_txn_progress: bool,
-        resource: "Resources",
+        config: Config,
+        resource: Optional["Resources"] = None,
         txn_id: Optional[str] = None,
         headers: Optional[Dict] = None,
-        txn_start_time: Optional[float] = None,
+        txn_start_time: Optional[float] = None
     ):
-        self.print_txn_progress = print_txn_progress
+        self.print_txn_progress = should_print_txn_progress(config)
         self.res = resource
         self.txn_id = txn_id
         self.headers = headers or {}
         self.txn_start_time = txn_start_time or time.time()
+        self.print_internal_txn_progress = should_print_internal_txn_progress(config)
+        self.last_status: Optional[TxnStatusResponse] = None
     def __enter__(self) -> ExecTxnPoller:
         if not self.print_txn_progress:
@@ -53,17 +66,23 @@ class ExecTxnPoller:
         return self
     def __exit__(self, exc_type, exc_value, traceback) -> None:
-        if not self.print_txn_progress or self.txn_id is None:
+        if not self.print_txn_progress:
             return
         # Update to success message with duration
         total_duration = time.time() - self.txn_start_time
         txn_id = self.txn_id
         self.progress.update_main_status(
-            query_complete_message(txn_id, total_duration)
+            query_complete_message(txn_id, total_duration, internal_txn_progress=self._get_internal_progress())
         )
         self.progress.__exit__(exc_type, exc_value, traceback)
         return
+    def _get_internal_progress(self) -> Optional[Dict]:
+        """Get internal transaction progress if enabled and available."""
+        if self.print_internal_txn_progress and self.last_status:
+            return self.last_status.progress
+        return None
     def poll(self) -> bool:
         """
         Poll for transaction completion with interactive progress display.
@@ -79,44 +98,56 @@ class ExecTxnPoller:
         if self.print_txn_progress:
             # Update the main status to include the new txn_id
             self.progress.update_main_status_fn(
-                lambda: self.description_with_timing(txn_id),
+                lambda: self.description_with_timing(txn_id, self._get_internal_progress()),
             )
         # Don't show duration summary - we handle our own completion message
         def check_status() -> bool:
             """Check if transaction is complete."""
-            finished = self.res._check_exec_async_status(txn_id, headers=self.headers)
-            return finished
+            if self.res is None:
+                raise ValueError("Resource must be provided for polling.")
+            self.last_status = self.res._check_exec_async_status(txn_id, headers=self.headers)
+            return self.last_status.finished
-        with debugging.span("wait", txn_id=self.txn_id):
+        with debugging.span("wait", txn_id=txn_id):
             poll_with_specified_overhead(check_status, overhead_rate=POLL_OVERHEAD_RATE)
         return True
-    def description_with_timing(self, txn_id: str | None = None) -> str:
+    def description_with_timing(self, txn_id: str | None = None, internal_txn_progress: Dict | None = None) -> str:
         elapsed = time.time() - self.txn_start_time
         if txn_id is None:
             return query_progress_header(elapsed)
         else:
-            return query_progress_message(txn_id, elapsed)
+            return query_progress_message(txn_id, elapsed, internal_txn_progress)
 def query_progress_header(duration: float) -> str:
     # Don't print sub-second decimals, because it updates too fast and is distracting.
     duration_str = format_duration(duration, seconds_decimals=False)
     return f"Evaluating Query... {duration_str:>15}\n"
-def query_progress_message(id: str, duration: float) -> str:
-    return (
+def query_progress_message(id: str, duration: float, internal_txn_progress: Dict | None = None) -> str:
+    result = (
         query_progress_header(duration) +
         # Print with whitespace to align with the end of the transaction ID
         f"{GRAY_COLOR}ID: {id}{ENDC}"
     )
+    if internal_txn_progress is not None:
+        result += format_execution_tree(internal_txn_progress)
+    return result
-def query_complete_message(id: str, duration: float, status_header: bool = False) -> str:
-    return (
+def query_complete_message(id: str | None, duration: float, status_header: bool = False, internal_txn_progress: Dict | None = None) -> str:
+    out = (
         (f"{GREEN_COLOR}✅ " if status_header else "") +
         # Print with whitespace to align with the end of the transaction ID
-        f"Query Complete: {format_duration(duration):>21}\n" +
-        f"{GRAY_COLOR}ID: {id}{ENDC}"
-    )
+        f"Query Complete: {format_duration(duration):>21}"
+        )
+    if id is None:
+        out += ENDC
+    else:
+        out += f"\n{GRAY_COLOR}ID: {id}{ENDC}"
+    if internal_txn_progress is not None:
+        out += format_execution_tree(internal_txn_progress)
+    return out

relationalai/clients/local.py CHANGED Viewed

@@ -4,6 +4,7 @@ import base64
 import json
 from urllib.parse import quote, urlencode
 import pyarrow as pa
+import time
 import requests
 from email import message_from_bytes, policy
 from email.message import EmailMessage
@@ -18,6 +19,7 @@ from .config import Config
 from .types import TransactionAsyncResponse
 from .util import get_pyrel_version
 from ..errors import ResponseStatusException
+from ..clients.exec_txn_poller import ExecTxnPoller
 from .. import debugging
 @dataclass
@@ -112,7 +114,7 @@ class LocalResources(ResourcesBase):
     def reset(self):
         raise NotImplementedError("reset not supported in local mode")
     #--------------------------------------------------
     # Check direct access is enabled (0 implemented)
     #--------------------------------------------------
@@ -332,7 +334,7 @@ class LocalResources(ResourcesBase):
     #--------------------------------------------------
     # Exec Async
     #--------------------------------------------------
     def _parse_multipart_response(self, response: requests.Response) -> Dict[str, Any]:
         response_map = {}
         response_map['results'] = {}
@@ -464,11 +466,17 @@ class LocalResources(ResourcesBase):
             "readonly": readonly,
         }
-        parsed_response = self._create_transaction(
-            target_endpoint="create_txn",
-            payload=payload,
-            headers=headers
-        )
+        txn_start_time = time.time()
+        with ExecTxnPoller(
+            self.config,
+            txn_id=None,
+            txn_start_time=txn_start_time
+        ) as _poller:  # unused, except for __enter__ and __exit__ display
+            parsed_response = self._create_transaction(
+                target_endpoint="create_txn",
+                payload=payload,
+                headers=headers
+            )
         state = parsed_response["state"]
         if state not in ["COMPLETED", "ABORTED"]:

relationalai/clients/resources/snowflake/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Snowflake resources module.
 """
 # Import order matters - Resources must be imported first since other classes depend on it
-from .snowflake import Resources, Provider, Graph, SnowflakeClient, APP_NAME, PYREL_ROOT_DB, ExecContext, PrimaryKey, PRINT_TXN_PROGRESS_FLAG
+from .snowflake import Resources, Provider, Graph, SnowflakeClient, APP_NAME, PYREL_ROOT_DB, ExecContext, PrimaryKey
 from .engine_service import EngineType, INTERNAL_ENGINE_SIZES, ENGINE_SIZES_AWS, ENGINE_SIZES_AZURE
 # These imports depend on Resources, so they come after
 from .cli_resources import CLIResources
@@ -14,7 +14,7 @@ __all__ = [
     'Resources', 'DirectAccessResources', 'Provider', 'Graph', 'SnowflakeClient',
     'APP_NAME', 'PYREL_ROOT_DB', 'CLIResources', 'UseIndexResources', 'ExecContext', 'EngineType',
     'INTERNAL_ENGINE_SIZES', 'ENGINE_SIZES_AWS', 'ENGINE_SIZES_AZURE', 'PrimaryKey',
-    'PRINT_TXN_PROGRESS_FLAG', 'create_resources_instance',
+    'create_resources_instance',
 ]

relationalai/clients/resources/snowflake/direct_access_resources.py CHANGED Viewed

@@ -18,7 +18,7 @@ from snowflake.snowpark import Session
 # Import UseIndexResources to enable use_index functionality with direct access
 from .use_index_resources import UseIndexResources
-from .snowflake import TxnCreationResult
+from .snowflake import TxnCreationResult, TxnStatusResponse
 # Import helper functions from util
 from .util import is_engine_issue as _is_engine_issue, is_database_issue as _is_database_issue, collect_error_messages
@@ -314,7 +314,7 @@ class DirectAccessResources(UseIndexResources):
             return response.json()
-    def _check_exec_async_status(self, txn_id: str, headers: Dict[str, str] | None = None) -> bool:
+    def _check_exec_async_status(self, txn_id: str, headers: Dict[str, str] | None = None) -> TxnStatusResponse:
         """Check whether the given transaction has completed."""
         with debugging.span("check_status"):
@@ -349,8 +349,12 @@ class DirectAccessResources(UseIndexResources):
             elif reason == TXN_ABORT_REASON_GUARD_RAILS:
                 raise GuardRailsException(response_content.get("progress", {}))
-        # @TODO: Find some way to tunnel the ABORT_REASON out. Azure doesn't have this, but it's handy
-        return status == "COMPLETED" or status == "ABORTED"
+        return TxnStatusResponse(
+            txn_id=txn_id,
+            finished=status in ["COMPLETED", "ABORTED"],
+            abort_reason=response_content.get("abort_reason", None),
+            progress=response_content.get("progress", None),
+        )
     def _list_exec_async_artifacts(self, txn_id: str, headers: Dict[str, str] | None = None) -> Dict[str, Dict]:
         """Grab the list of artifacts produced in the transaction and the URLs to retrieve their contents."""

relationalai/clients/resources/snowflake/snowflake.py CHANGED Viewed

@@ -15,7 +15,7 @@ import hashlib
 from dataclasses import dataclass
 from ....auth.token_handler import TokenHandler
-from relationalai.clients.exec_txn_poller import ExecTxnPoller
+from ....clients.exec_txn_poller import ExecTxnPoller
 import snowflake.snowpark
 from ....rel_utils import sanitize_identifier, to_fqn_relation_name
@@ -104,7 +104,6 @@ TERMINAL_TXN_STATES = ["COMPLETED", "ABORTED"]
 TXN_ABORT_REASON_TIMEOUT = "transaction timeout"
 GUARDRAILS_ABORT_REASON = "guard rail violation"
-PRINT_TXN_PROGRESS_FLAG = "print_txn_progress"
 ENABLE_GUARD_RAILS_FLAG = "enable_guard_rails"
 ENABLE_GUARD_RAILS_HEADER = "X-RAI-Enable-Guard-Rails"
@@ -113,9 +112,6 @@ ENABLE_GUARD_RAILS_HEADER = "X-RAI-Enable-Guard-Rails"
 # Helpers
 #--------------------------------------------------
-def should_print_txn_progress(config) -> bool:
-    return bool(config.get(PRINT_TXN_PROGRESS_FLAG, False))
 def should_enable_guard_rails(config) -> bool:
     return bool(config.get(ENABLE_GUARD_RAILS_FLAG, False))
@@ -157,6 +153,14 @@ class TxnCreationResult:
     artifact_info: Dict[str, Dict]  # Populated if fast-path (state is COMPLETED/ABORTED)
+@dataclass
+class TxnStatusResponse:
+    """Transaction progress response for transaction status checks."""
+    txn_id: str
+    finished: bool
+    abort_reason: str | None = None
+    progress: Dict | None = None
 class Resources(ResourcesBase):
     def __init__(
         self,
@@ -1409,7 +1413,7 @@ Otherwise, remove it from your '{profile}' configuration profile.
     # Exec Async
     #--------------------------------------------------
-    def _check_exec_async_status(self, txn_id: str, headers: Dict | None = None):
+    def _check_exec_async_status(self, txn_id: str, headers: Dict | None = None) -> TxnStatusResponse:
         """Check whether the given transaction has completed."""
         if headers is None:
             headers = {}
@@ -1439,8 +1443,11 @@ Otherwise, remove it from your '{profile}' configuration profile.
             elif response_row.get("ABORT_REASON", "") == GUARDRAILS_ABORT_REASON:
                 raise GuardRailsException()
-        # @TODO: Find some way to tunnel the ABORT_REASON out. Azure doesn't have this, but it's handy
-        return status == "COMPLETED" or status == "ABORTED"
+        return TxnStatusResponse(
+            txn_id=txn_id,
+            finished=status in ["COMPLETED", "ABORTED"],
+            abort_reason=response_row.get("ABORT_REASON", None),
+        )
     def _list_exec_async_artifacts(self, txn_id: str, headers: Dict | None = None) -> Dict[str, Dict]:
@@ -1794,10 +1801,8 @@ Otherwise, remove it from your '{profile}' configuration profile.
         with debugging.span("transaction", **query_attrs_dict) as txn_span:
             txn_start_time = time.time()
-            print_txn_progress = should_print_txn_progress(self.config)
             with ExecTxnPoller(
-                print_txn_progress=print_txn_progress,
+                config=self.config,
                 resource=self, txn_id=None, headers=request_headers,
                 txn_start_time=txn_start_time
             ) as poller:

relationalai/experimental/solvers.py CHANGED Viewed

@@ -533,6 +533,14 @@ class Solver:
         self.engine_size = engine_size or settings.pop("engine_size", None)
         self.engine_auto_suspend_mins = auto_suspend_mins or settings.pop("auto_suspend_mins", None)
+        # Set default CSV store setting if not already configured
+        if "store" not in settings:
+            settings["store"] = {}
+        if "csv" not in settings["store"]:
+            settings["store"]["csv"] = {}
+        if "enabled" not in settings["store"]["csv"]:
+            settings["store"]["csv"]["enabled"] = True
         # The settings are used when creating a solver engine, they
         # may configure each individual solver.
         self.engine_settings = settings

relationalai/semantics/lqp/executor.py CHANGED Viewed

@@ -460,8 +460,8 @@ class LQPExecutor(e.Executor):
         txid = raw_results.transaction['id']
         try:
-            cols, extra_cols = self._compute_cols(task, final_model)
-            df, errs = result_helpers.format_results(raw_results, cols)
+            cols, extra_cols, key_locs = self._compute_cols(task, final_model)
+            df, errs = result_helpers.format_results(raw_results, cols, key_locs)
             self.report_errors(errs)
             # Rename columns if wide outputs is enabled
@@ -488,7 +488,7 @@ class LQPExecutor(e.Executor):
                     return DataFrame([full_path], columns=["path"])
                 else:
                     raise ValueError("The CSV export was not successful!")
             return self._postprocess_df(self.config, df, extra_cols)
         except Exception as e:

relationalai/semantics/lqp/model2lqp.py CHANGED Viewed

@@ -126,37 +126,43 @@ def _translate_to_decls(ctx: TranslationCtx, rule: ir.Logical) -> list[lqp.Decla
 def _translate_to_constraint_decls(ctx: TranslationCtx, rule: ir.Logical) -> list[lqp.Declaration]:
     constraint_decls: list[lqp.Declaration] = []
     for task in rule.body:
-        assert isinstance(task, ir.Require)
-        fd = normalized_fd(task)
-        assert fd is not None
-        # check for unresolved types
-        if any(types.is_any(var.type) for var in fd.keys + fd.values):
-            warn(f"Ignoring FD with unresolved type: {fd}")
+        if isinstance(task, ir.Logical):
+            constraint_decls.extend(_translate_to_constraint_decls(ctx, task))
             continue
-        lqp_typed_keys = [_translate_term(ctx, key) for key in fd.keys]
-        lqp_typed_values = [_translate_term(ctx, value) for value in fd.values]
-        lqp_typed_vars:list[Tuple[lqp.Var, lqp.Type]] = lqp_typed_keys + lqp_typed_values # type: ignore
-        lqp_guard_atoms = [_translate_to_atom(ctx, atom) for atom in fd.guard]
-        lqp_guard = mk_abstraction(lqp_typed_vars, mk_and(lqp_guard_atoms))
-        lqp_keys:list[lqp.Var] = [var for (var, _) in lqp_typed_keys] # type: ignore
-        lqp_values:list[lqp.Var] = [var for (var, _) in lqp_typed_values] # type: ignore
-        lqp_id = utils.lqp_hash(fd.canonical_str)
-        lqp_name:lqp.RelationId = lqp.RelationId(id=lqp_id, meta=None)
-        fd_decl = lqp.FunctionalDependency(
-            name=lqp_name,
-            guard=lqp_guard,
-            keys=lqp_keys,
-            values=lqp_values,
-            meta=None
-        )
-        constraint_decls.append(fd_decl)
+        else:
+            assert isinstance(task, ir.Require)
+            decl = _translate_to_constraint_decl(ctx, task)
+            if decl is not None:
+                constraint_decls.append(decl)
     return constraint_decls
+def _translate_to_constraint_decl(ctx: TranslationCtx, rule: ir.Require) -> Optional[lqp.Declaration]:
+    fd = normalized_fd(rule)
+    assert fd is not None
+    # check for unresolved types
+    if any(types.is_any(var.type) for var in fd.keys + fd.values):
+        warn(f"Ignoring FD with unresolved type: {fd}")
+        return None
+    lqp_typed_keys = [_translate_term(ctx, key) for key in fd.keys]
+    lqp_typed_values = [_translate_term(ctx, value) for value in fd.values]
+    lqp_typed_vars:list[Tuple[lqp.Var, lqp.Type]] = lqp_typed_keys + lqp_typed_values # type: ignore
+    lqp_guard_atoms = [_translate_to_atom(ctx, atom) for atom in fd.guard]
+    lqp_guard = mk_abstraction(lqp_typed_vars, mk_and(lqp_guard_atoms))
+    lqp_keys:list[lqp.Var] = [var for (var, _) in lqp_typed_keys] # type: ignore
+    lqp_values:list[lqp.Var] = [var for (var, _) in lqp_typed_values] # type: ignore
+    lqp_id = utils.lqp_hash(fd.canonical_str)
+    lqp_name:lqp.RelationId = lqp.RelationId(id=lqp_id, meta=None)
+    return lqp.FunctionalDependency(
+        name=lqp_name,
+        guard=lqp_guard,
+        keys=lqp_keys,
+        values=lqp_values,
+        meta=None
+    )
 def _translate_algorithms(ctx: TranslationCtx, task: ir.Logical) -> list[lqp.Declaration]:
     assert is_algorithm_logical(task)
     decls: list[lqp.Declaration] = []

relationalai/semantics/lqp/passes.py CHANGED Viewed

@@ -2,11 +2,12 @@ from relationalai.semantics.metamodel.compiler import Pass
 from relationalai.semantics.metamodel.typer import Checker, InferTypes
 from ..metamodel.rewrite import (
-    DNFUnionSplitter, ExtractNestedLogicals, Flatten, FormatOutputs
+    DNFUnionSplitter, Flatten, FormatOutputs, ExtractNestedLogicals,
+    # HandleAggregationsAndRanks
 )
 from .rewrite import (
     AlgorithmPass, AnnotateConstraints, CDC, ConstantsToVars, DeduplicateVars,
-    ExtractCommon, EliminateData, ExtractKeys, FunctionAnnotations, PeriodMath,
+    ExtractCommon, EliminateData, ExtractKeys, FlattenScript, FunctionAnnotations, PeriodMath,
     QuantifyVars, Splinter, SplitMultiCheckRequires, UnifyDefinitions,
 )
@@ -17,13 +18,15 @@ def lqp_passes() -> list[Pass]:
         AnnotateConstraints(),
         Checker(),
         CDC(), # specialize to physical relations before extracting nested and typing
-        ExtractNestedLogicals(), # before InferTypes to avoid extracting casts
+        ExtractNestedLogicals(),
         InferTypes(),
         DNFUnionSplitter(), # Handle unions that require DNF decomposition
         ExtractKeys(), # Create a logical for each valid combinations of keys
         FormatOutputs(),
         ExtractCommon(), # Extracts tasks that will become common after Flatten into their own definition
         Flatten(), # Move nested tasks to the top level, and various related things touched along the way
+        FlattenScript(), # Additional flattening specific to scripts
+        # HandleAggregationsAndRanks(), # Handle aggregation and rank dependencies
         Splinter(), # Splits multi-headed rules into multiple rules
         QuantifyVars(), # Adds missing existentials
         EliminateData(),  # Turns Data nodes into ordinary relations.

relationalai/semantics/lqp/result_helpers.py CHANGED Viewed

@@ -13,8 +13,12 @@ from relationalai.clients.result_helpers import format_columns, format_value, me
     sort_data_frame_result
 from relationalai.tools.constants import Generation
-def format_results(results, result_cols:List[str]|None = None)  -> Tuple[DataFrame, List[Any]]:
+# Convert LQP results into the expected single wide table dataframe for the end user
+# - Requires identifying and unrolling all GNF relations and populating any resulting nulls
+# - Relies on expected ordering of results, as we do not have IDs or names associated with columns here.
+# - At the end, col names are rewritten with the expected output names blindly; we trust the stitching
+#   has been done correctly for the names to line up with the correct columns
+def format_results(results, result_cols:List[str]|None = None, key_locations:List[int]|None = None)  -> Tuple[DataFrame, List[Any]]:
     with debugging.span("format_results"):
         data_frame = DataFrame()
         problems = defaultdict(
@@ -37,8 +41,15 @@ def format_results(results, result_cols:List[str]|None = None)  -> Tuple[DataFra
         # Check if there are any results to process
         if len(results.results):
-            ret_cols = result_cols or []
-            has_cols:List[DataFrame] = [DataFrame() for _ in range(0, len(ret_cols))]
+            ret_cols = result_cols or [] # output column names
+            key_locations = key_locations or [] # where output keys are located in outputs
+            out_keys_n = len(key_locations) # number of keys in output
+            assert out_keys_n <= len(ret_cols)
+            out_vals_n = len(ret_cols) - out_keys_n # number of values in output
+            # only create cols for values, we handle keys separately as they are not GNF
+            has_cols:List[DataFrame] = [DataFrame() for _ in range(0, out_vals_n)]
+            keys_data_frame = DataFrame()
             key_len = 0
             for result in results.results:
@@ -46,7 +57,7 @@ def format_results(results, result_cols:List[str]|None = None)  -> Tuple[DataFra
                 result_frame = result["table"].to_pandas()
                 types = [
                     t
-                    for t in result["relationId"].split("/")
+                    for t in relation_id.split("/")
                     if t != "" and not t.startswith(":")
                 ]
@@ -168,13 +179,15 @@ def format_results(results, result_cols:List[str]|None = None)  -> Tuple[DataFra
                 else:
                     result_frame = format_columns(result_frame, types, Generation.QB)
                     result["table"] = result_frame
-                    if "/:output" in result["relationId"] \
-                            and "_cols_col" in result["relationId"]:
+                    if "/:output" in relation_id \
+                            and "_cols_col" in relation_id:
                         # Match rows with an id like "/:output.*_cols_col[0-9]+"
-                        matched = re.search(r"_cols_col([0-9]+)", result["relationId"])
-                        assert matched, f"Column id not found for: {result['relationId']}"
+                        # These should be all of the GNF value outputs
+                        matched = re.search(r"_cols_col([0-9]+)", relation_id)
+                        assert matched, f"Column id not found for: {relation_id}"
                         col_ix = int(matched.group(1))
+                        # Generate col names and write them into the df (idn for keys, vn for cols)
                         key_cols = [f"id{i}" for i in range(0, len(result_frame.columns) - 1)]
                         key_len = len(key_cols)
                         result_frame.columns = [*key_cols, f"v{col_ix}"]
@@ -183,20 +196,71 @@ def format_results(results, result_cols:List[str]|None = None)  -> Tuple[DataFra
                             has_cols[col_ix] = result_frame
                         else:
                             has_cols[col_ix] = pd.concat([has_cols[col_ix], result_frame], ignore_index=True)
-                    elif ":output" in result["relationId"]:
+                    elif ":output" in relation_id \
+                        and "_keys" in relation_id:
+                        # data for all keys (wide), to merge in later
+                        keys_data_frame = result_frame
+                        # Rename wide key col names to match key cols in df_wide_reset
+                        keys_data_frame.columns = pd.RangeIndex(len(keys_data_frame.columns))
+                        keys_data_frame = keys_data_frame.rename(columns=lambda c: f"id{c}")
+                    elif ":output" in relation_id: # wide outputs case
                         data_frame = pd.concat(
                             [data_frame, result_frame], ignore_index=True
                         )
+            # GNF values case: stitch together output vals and keys into one wide dataframe
             if any(not col.empty for col in has_cols):
+                # Merge value cols together by their key cols
                 key_cols = [f"id{i}" for i in range(0, key_len)]
                 df_wide_reset = reduce(lambda left, right: merge_columns(left, right, key_cols), has_cols)
-                data_frame = df_wide_reset.drop(columns=key_cols)
+                # Join wide keys with wide vals (keys all at the front; still needs reordering)
+                data_frame = pd.merge(keys_data_frame, df_wide_reset, on=key_cols, how='outer')
+                # Reorder outputs
+                if key_locations:
+                    data_frame = _shift_keys(data_frame, keys_data_frame, out_keys_n, key_locations)
+                else: # if no keys in output, just drop all of the key cols
+                    data_frame = data_frame.drop(columns=key_cols)
+            # Empty values case: reorder/drop keys as needed
+            elif not keys_data_frame.empty:
+                if key_locations: # Reorder outputs
+                    data_frame = _shift_keys(keys_data_frame, keys_data_frame, out_keys_n, key_locations)
+                else: # if there are no keys to output, we may still need to populate nulls for output values
+                    # Take into account the cols that could contain values (even though they're empty)
+                    key_cols = [f"id{i}" for i in range(0, len(keys_data_frame.columns))]
+                    has_cols.append(keys_data_frame) # include the keys so we know how many nulls to generate
+                    df_wide_reset = reduce(lambda left, right: merge_columns(left, right, key_cols), has_cols)
+                    data_frame = df_wide_reset.drop(columns=key_cols)
             data_frame = sort_data_frame_result(data_frame)
-            if len(ret_cols) and len(data_frame.columns) == len(ret_cols):
+            # Overwrite column names with user-defined names
+            # The assumption is that the extra keys have been chopped off the front, and the
+            # remaining columns are in the correct order and require renaming
+            if len(ret_cols) and len(data_frame.columns) <= len(ret_cols):
                 if result_cols is not None:
                     data_frame.columns = result_cols[: len(data_frame.columns)]
         return (data_frame, list(problems.values()))
+# Reorder `res` df to match user-specified output order and drop non-output key cols
+# E.g., Current df looks like:
+#   [out_key_pos1, out_key_pos4, hidden_key_1, value_pos2, value_pos3, value_pos5]
+# Target df looks like:
+#   [out_key_pos1, value_pos2, value_pos3, out_key_pos4, value_pos5]
+def _shift_keys(res:DataFrame, keys_data_frame:DataFrame, out_keys_n:int, key_locations:List[int]):
+    offset = len(keys_data_frame.columns) # index of first value in data frame
+    assert out_keys_n <= offset
+    extra_keys_n = offset - out_keys_n # number of keys to drop (those not in output)
+    # Shift output keys into the correct spots and drop the rest
+    for i in key_locations:
+        res = res[res.columns[1:].insert(i + offset - 1, res.columns[0])]
+        offset -= 1
+    res = res.drop(columns=res.columns[:extra_keys_n])
+    return res

relationalai/semantics/lqp/rewrite/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ from .eliminate_data import EliminateData
 from .extract_common import ExtractCommon
 from .extract_keys import ExtractKeys
 from .function_annotations import FunctionAnnotations, SplitMultiCheckRequires
+from .flatten_script import FlattenScript
 from .period_math import PeriodMath
 from .quantify_vars import QuantifyVars
 from .splinter import Splinter
@@ -22,6 +23,7 @@ __all__ = [
     "ExtractCommon",
     "ExtractKeys",
     "FunctionAnnotations",
+    "FlattenScript",
     "PeriodMath",
     "QuantifyVars",
     "Splinter",

relationalai/semantics/lqp/rewrite/extract_common.py CHANGED Viewed

@@ -315,8 +315,10 @@ def _compute_local_dependencies(ctx: ExtractCommon.Context, binders: OrderedSet[
     return local_body
 def _is_binder(task: ir.Task):
+    binder_types = (ir.Lookup, ir.Construct, ir.Exists, ir.Data, ir.Not)
     # If the task itself is a binder
-    if any(isinstance(task, binder) for binder in (ir.Lookup, ir.Construct, ir.Exists, ir.Data, ir.Not)):
+    if isinstance(task, binder_types):
         return True
     # If the task is a Logical containing only binders

relationalai 0.13.4__py3-none-any.whl → 0.13.5__py3-none-any.whl

relationalai 0.13.4py3-none-any.whl → 0.13.5py3-none-any.whl