PyPI - relationalai - Versions diffs - 0.12.0__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

relationalai 0.12.0py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

relationalai/clients/snowflake.py +228 -83
relationalai/clients/types.py +4 -1
relationalai/clients/use_index_poller.py +72 -48
relationalai/clients/util.py +9 -0
relationalai/dsl.py +1 -2
relationalai/environments/snowbook.py +10 -1
relationalai/semantics/internal/internal.py +22 -3
relationalai/semantics/lqp/executor.py +12 -4
relationalai/semantics/lqp/model2lqp.py +1 -0
relationalai/semantics/metamodel/executor.py +2 -1
relationalai/semantics/metamodel/rewrite/flatten.py +8 -7
relationalai/semantics/reasoners/graph/core.py +1174 -226
relationalai/semantics/rel/executor.py +20 -11
relationalai/semantics/sql/executor/snowflake.py +1 -1
relationalai/tools/cli.py +6 -2
relationalai/tools/cli_controls.py +334 -352
relationalai/tools/constants.py +1 -0
relationalai/tools/query_utils.py +27 -0
relationalai/util/otel_configuration.py +1 -1
{relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/METADATA +1 -1
{relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/RECORD +24 -23
{relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/WHEEL +0 -0
{relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/entry_points.txt +0 -0
{relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/licenses/LICENSE +0 -0

relationalai/clients/snowflake.py CHANGED Viewed

@@ -41,7 +41,7 @@ from ..clients.types import AvailableModel, EngineState, Import, ImportSource, I
 from ..clients.config import Config, ConfigStore, ENDPOINT_FILE
 from ..clients.client import Client, ExportParams, ProviderBase, ResourcesBase
 from ..clients.direct_access_client import DirectAccessClient
-from ..clients.util import IdentityParser, escape_for_f_string, get_pyrel_version, get_with_retries, poll_with_specified_overhead, safe_json_loads, sanitize_module_name, scrub_exception, wrap_with_request_id, ms_to_timestamp
+from ..clients.util import IdentityParser, escape_for_f_string, get_pyrel_version, get_with_retries, poll_with_specified_overhead, safe_json_loads, sanitize_module_name, scrub_exception, wrap_with_request_id, ms_to_timestamp, normalize_datetime
 from ..environments import runtime_env, HexEnvironment, SnowbookEnvironment
 from .. import dsl, rel, metamodel as m
 from ..errors import DuoSecurityFailed, EngineProvisioningFailed, EngineNameValidationException, EngineNotFoundException, EnginePending, EngineSizeMismatchWarning, EngineResumeFailed, Errors, InvalidAliasError, InvalidEngineSizeError, InvalidSourceTypeWarning, RAIAbortedTransactionError, RAIException, HexSessionException, SnowflakeAppMissingException, SnowflakeChangeTrackingNotEnabledException, SnowflakeDatabaseException, SnowflakeImportMissingException, SnowflakeInvalidSource, SnowflakeMissingConfigValuesException, SnowflakeProxyAPIDeprecationWarning, SnowflakeProxySourceError, SnowflakeRaiAppNotStarted, ModelNotFoundException, UnknownSourceWarning, ResponseStatusException, RowsDroppedFromTargetTableWarning, QueryTimeoutExceededException
@@ -1867,7 +1867,7 @@ Otherwise, remove it from your '{profile}' configuration profile.
         except Exception as e:
             err_message = str(e).lower()
             if _is_engine_issue(err_message):
-                self.auto_create_engine(engine)
+                self.auto_create_engine(engine, headers=headers)
                 self._exec_async_v2(
                     database, engine, raw_code_b64, inputs, readonly, nowait_durable,
                     headers=headers, bypass_index=bypass_index, language='lqp',
@@ -1907,7 +1907,7 @@ Otherwise, remove it from your '{profile}' configuration profile.
         except Exception as e:
             err_message = str(e).lower()
             if _is_engine_issue(err_message):
-                self.auto_create_engine(engine)
+                self.auto_create_engine(engine, headers=headers)
                 return self._exec_async_v2(
                     database,
                     engine,
@@ -1970,9 +1970,9 @@ Otherwise, remove it from your '{profile}' configuration profile.
                     if use_graph_index:
                         # we do not provide a default value for query_timeout_mins so that we can control the default on app level
                         if query_timeout_mins is not None:
-                            res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, ?, ?, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data, query_timeout_mins])
+                            res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, NULL, ?, {headers}, ?, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data, query_timeout_mins])
                         else:
-                            res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, ?, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data])
+                             res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, NULL, ?, {headers}, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data])
                         txn_id = json.loads(res[0]["EXEC_INTO_TABLE"])["rai_transaction_id"]
                         rejected_rows = json.loads(res[0]["EXEC_INTO_TABLE"]).get("rejected_rows", [])
                         rejected_rows_count = json.loads(res[0]["EXEC_INTO_TABLE"]).get("rejected_rows_count", 0)
@@ -2047,9 +2047,10 @@ Otherwise, remove it from your '{profile}' configuration profile.
         app_name = self.get_app_name()
         source_types = dict[str, SourceInfo]()
-        partitioned_sources: dict[str, dict[str, list[str]]] = defaultdict(
+        partitioned_sources: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
             lambda: defaultdict(list)
         )
+        fqn_to_parts: dict[str, tuple[str, str, str]] = {}
         for source in sources:
             parser = IdentityParser(source, True)
@@ -2057,82 +2058,219 @@ Otherwise, remove it from your '{profile}' configuration profile.
             assert len(parsed) == 4, f"Invalid source: {source}"
             db, schema, entity, identity = parsed
             assert db and schema and entity and identity, f"Invalid source: {source}"
-            source_types[identity] = cast(SourceInfo, {"type": None, "state": "", "columns_hash": None})
-            partitioned_sources[db][schema].append(entity)
-        # TODO: Move to NA layer
-        query = (
-            " UNION ALL ".join(
-                f"""SELECT
-                    inf.FQN,
-                    inf.KIND,
-                    inf.COLUMNS_HASH,
-                    IFF(DATEDIFF(second, ds.created_at::TIMESTAMP, inf.LAST_DDL::TIMESTAMP) > 0, 'STALE', 'CURRENT') AS STATE
-                FROM (
-                    SELECT (SELECT {app_name}.api.normalize_fq_ids(ARRAY_CONSTRUCT(FQ_OBJECT_NAME))[0]:identifier::string) as FQ_OBJECT_NAME,
-                        CREATED_AT FROM {app_name}.api.data_streams
-                    WHERE RAI_DATABASE = '{PYREL_ROOT_DB}'
-                ) ds
-                RIGHT JOIN (
+            source_types[identity] = cast(
+                SourceInfo,
+                {
+                    "type": None,
+                    "state": "",
+                    "columns_hash": None,
+                    "table_created_at": None,
+                    "stream_created_at": None,
+                    "last_ddl": None,
+                },
+            )
+            partitioned_sources[db][schema].append({"entity": entity, "identity": identity})
+            fqn_to_parts[identity] = (db, schema, entity)
+        if not partitioned_sources:
+            return source_types
+        state_queries: list[str] = []
+        for db, schemas in partitioned_sources.items():
+            select_rows: list[str] = []
+            for schema, tables in schemas.items():
+                for table_info in tables:
+                    select_rows.append(
+                        "SELECT "
+                        f"{IdentityParser.to_sql_value(db)} AS catalog_name, "
+                        f"{IdentityParser.to_sql_value(schema)} AS schema_name, "
+                        f"{IdentityParser.to_sql_value(table_info['entity'])} AS table_name"
+                    )
+            if not select_rows:
+                continue
+            target_entities_clause = "\n                        UNION ALL\n                        ".join(select_rows)
+            # Main query:
+            #   1. Enumerate the target tables via target_entities.
+            #   2. Pull their metadata (last_altered, type) from INFORMATION_SCHEMA.TABLES.
+            #   3. Look up the most recent stream activity for those FQNs only.
+            #   4. Capture creation timestamps and use last_ddl vs created_at to classify each target,
+            #      so we mark tables as stale when they were recreated even if column hashes still match.
+            state_queries.append(
+                f"""WITH target_entities AS (
+                        {target_entities_clause}
+                    ),
+                    table_info AS (
+                        SELECT
+                            {app_name}.api.normalize_fq_ids(
+                                ARRAY_CONSTRUCT(
+                                    CASE
+                                        WHEN t.table_catalog = UPPER(t.table_catalog) THEN t.table_catalog
+                                        ELSE '"' || t.table_catalog || '"'
+                                    END || '.' ||
+                                    CASE
+                                        WHEN t.table_schema = UPPER(t.table_schema) THEN t.table_schema
+                                        ELSE '"' || t.table_schema || '"'
+                                    END || '.' ||
+                                    CASE
+                                        WHEN t.table_name = UPPER(t.table_name) THEN t.table_name
+                                        ELSE '"' || t.table_name || '"'
+                                    END
+                                )
+                            )[0]:identifier::string AS fqn,
+                            CONVERT_TIMEZONE('UTC', t.last_altered) AS last_ddl,
+                            CONVERT_TIMEZONE('UTC', t.created) AS table_created_at,
+                            t.table_type AS kind
+                        FROM {db}.INFORMATION_SCHEMA.tables t
+                        JOIN target_entities te
+                            ON t.table_catalog = te.catalog_name
+                            AND t.table_schema = te.schema_name
+                            AND t.table_name = te.table_name
+                    ),
+                    stream_activity AS (
+                        SELECT
+                            sa.fqn,
+                            MAX(sa.created_at) AS created_at
+                        FROM (
+                            SELECT
+                                {app_name}.api.normalize_fq_ids(ARRAY_CONSTRUCT(fq_object_name))[0]:identifier::string AS fqn,
+                                created_at
+                            FROM {app_name}.api.data_streams
+                            WHERE rai_database = '{PYREL_ROOT_DB}'
+                        ) sa
+                        JOIN table_info ti
+                            ON sa.fqn = ti.fqn
+                        GROUP BY sa.fqn
+                    )
                     SELECT
-                        (SELECT {app_name}.api.normalize_fq_ids(
-                            ARRAY_CONSTRUCT(
-                                CASE
-                                    WHEN t.TABLE_CATALOG = UPPER(t.TABLE_CATALOG) THEN t.TABLE_CATALOG
-                                    ELSE '"' || t.TABLE_CATALOG || '"'
-                                END || '.' ||
-                                CASE
-                                    WHEN t.TABLE_SCHEMA = UPPER(t.TABLE_SCHEMA) THEN t.TABLE_SCHEMA
-                                    ELSE '"' || t.TABLE_SCHEMA || '"'
-                                END || '.' ||
-                                CASE
-                                    WHEN t.TABLE_NAME = UPPER(t.TABLE_NAME) THEN t.TABLE_NAME
-                                    ELSE '"' || t.TABLE_NAME || '"'
-                                END
-                            )
-                        )[0]:identifier::string) as FQN,
-                        CONVERT_TIMEZONE('UTC', LAST_DDL) AS LAST_DDL,
-                        TABLE_TYPE as KIND,
-                        SHA2(LISTAGG(
-                            COLUMN_NAME ||
-                            CASE
-                                WHEN c.NUMERIC_PRECISION IS NOT NULL AND c.NUMERIC_SCALE IS NOT NULL
-                                    THEN c.DATA_TYPE || '(' || c.NUMERIC_PRECISION || ',' || c.NUMERIC_SCALE || ')'
-                                WHEN c.DATETIME_PRECISION IS NOT NULL
-                                    THEN c.DATA_TYPE || '(0,' || c.DATETIME_PRECISION || ')'
-                                WHEN c.CHARACTER_MAXIMUM_LENGTH IS NOT NULL
-                                    THEN c.DATA_TYPE || '(' || c.CHARACTER_MAXIMUM_LENGTH || ')'
-                                ELSE c.DATA_TYPE
-                            END ||
-                            IS_NULLABLE,
-                            ','
-                        ) WITHIN GROUP (ORDER BY COLUMN_NAME), 256) as COLUMNS_HASH
-                    FROM {db}.INFORMATION_SCHEMA.TABLES t
-                    JOIN {db}.INFORMATION_SCHEMA.COLUMNS c
-                        ON t.TABLE_CATALOG = c.TABLE_CATALOG
-                        AND t.TABLE_SCHEMA = c.TABLE_SCHEMA
-                        AND t.TABLE_NAME = c.TABLE_NAME
-                    WHERE t.TABLE_CATALOG = {IdentityParser.to_sql_value(db)} AND ({" OR ".join(
-                        f"(t.TABLE_SCHEMA = {IdentityParser.to_sql_value(schema)} AND t.TABLE_NAME IN ({','.join(f'{IdentityParser.to_sql_value(table)}' for table in tables)}))"
-                        for schema, tables in schemas.items()
-                    )})
-                    GROUP BY t.TABLE_CATALOG, t.TABLE_SCHEMA, t.TABLE_NAME, t.LAST_DDL, t.TABLE_TYPE
-                ) inf on inf.FQN = ds.FQ_OBJECT_NAME
-            """
-                for db, schemas in partitioned_sources.items()
+                        ti.fqn,
+                        ti.kind,
+                        ti.last_ddl,
+                        ti.table_created_at,
+                        sa.created_at AS stream_created_at,
+                        IFF(
+                            DATEDIFF(second, sa.created_at::timestamp, ti.last_ddl::timestamp) > 0,
+                            'STALE',
+                            'CURRENT'
+                        ) AS state
+                    FROM table_info ti
+                    LEFT JOIN stream_activity sa
+                        ON sa.fqn = ti.fqn
+                """
             )
-            + ";"
+        stale_fqns: list[str] = []
+        for state_query in state_queries:
+            for row in self._exec(state_query):
+                row_dict = row.as_dict() if hasattr(row, "as_dict") else dict(row)
+                row_fqn = row_dict["FQN"]
+                parser = IdentityParser(row_fqn, True)
+                fqn = parser.identity
+                assert fqn, f"Error parsing returned FQN: {row_fqn}"
+                source_types[fqn]["type"] = (
+                    "TABLE" if row_dict["KIND"] == "BASE TABLE" else row_dict["KIND"]
+                )
+                source_types[fqn]["state"] = row_dict["STATE"]
+                source_types[fqn]["last_ddl"] = normalize_datetime(row_dict.get("LAST_DDL"))
+                source_types[fqn]["table_created_at"] = normalize_datetime(row_dict.get("TABLE_CREATED_AT"))
+                source_types[fqn]["stream_created_at"] = normalize_datetime(row_dict.get("STREAM_CREATED_AT"))
+                if row_dict["STATE"] == "STALE":
+                    stale_fqns.append(fqn)
+        if not stale_fqns:
+            return source_types
+        # We batch stale tables by database/schema so each Snowflake query can hash
+        # multiple objects at once instead of issuing one statement per table.
+        stale_partitioned: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
+            lambda: defaultdict(list)
         )
+        for fqn in stale_fqns:
+            db, schema, table = fqn_to_parts[fqn]
+            stale_partitioned[db][schema].append({"table": table, "identity": fqn})
+        # Build one hash query per database, grouping schemas/tables inside so we submit
+        # at most a handful of set-based statements to Snowflake.
+        for db, schemas in stale_partitioned.items():
+            column_select_rows: list[str] = []
+            for schema, tables in schemas.items():
+                for table_info in tables:
+                    # Build the literal rows for this db/schema so we can join back
+                    # against INFORMATION_SCHEMA.COLUMNS in a single statement.
+                    column_select_rows.append(
+                        "SELECT "
+                        f"{IdentityParser.to_sql_value(db)} AS catalog_name, "
+                        f"{IdentityParser.to_sql_value(schema)} AS schema_name, "
+                        f"{IdentityParser.to_sql_value(table_info['table'])} AS table_name"
+                    )
-        for row in self._exec(query):
-            row_fqn = row["FQN"]
-            parser = IdentityParser(row_fqn, True)
-            fqn = parser.identity
-            assert fqn, f"Error parsing returned FQN: {row_fqn}"
+            if not column_select_rows:
+                continue
-            source_types[fqn]["type"] = "TABLE" if row["KIND"] == "BASE TABLE" else row["KIND"]
-            source_types[fqn]["columns_hash"] = row["COLUMNS_HASH"]
-            source_types[fqn]["state"] = row["STATE"]
+            target_entities_clause = "\n                        UNION ALL\n                        ".join(column_select_rows)
+            # Main query: compute deterministic column hashes for every stale table
+            # in this database/schema batch so we can compare schemas without a round trip per table.
+            column_query = f"""WITH target_entities AS (
+                        {target_entities_clause}
+                    ),
+                    column_info AS (
+                        SELECT
+                            {app_name}.api.normalize_fq_ids(
+                                ARRAY_CONSTRUCT(
+                                    CASE
+                                        WHEN c.table_catalog = UPPER(c.table_catalog) THEN c.table_catalog
+                                        ELSE '"' || c.table_catalog || '"'
+                                    END || '.' ||
+                                    CASE
+                                        WHEN c.table_schema = UPPER(c.table_schema) THEN c.table_schema
+                                        ELSE '"' || c.table_schema || '"'
+                                    END || '.' ||
+                                    CASE
+                                        WHEN c.table_name = UPPER(c.table_name) THEN c.table_name
+                                        ELSE '"' || c.table_name || '"'
+                                    END
+                                )
+                            )[0]:identifier::string AS fqn,
+                            c.column_name,
+                            CASE
+                                WHEN c.numeric_precision IS NOT NULL AND c.numeric_scale IS NOT NULL
+                                    THEN c.data_type || '(' || c.numeric_precision || ',' || c.numeric_scale || ')'
+                                WHEN c.datetime_precision IS NOT NULL
+                                    THEN c.data_type || '(0,' || c.datetime_precision || ')'
+                                WHEN c.character_maximum_length IS NOT NULL
+                                    THEN c.data_type || '(' || c.character_maximum_length || ')'
+                                ELSE c.data_type
+                            END AS type_signature,
+                            IFF(c.is_nullable = 'YES', 'YES', 'NO') AS nullable_flag
+                        FROM {db}.INFORMATION_SCHEMA.COLUMNS c
+                        JOIN target_entities te
+                            ON c.table_catalog = te.catalog_name
+                            AND c.table_schema = te.schema_name
+                            AND c.table_name = te.table_name
+                    )
+                    SELECT
+                        fqn,
+                        HEX_ENCODE(
+                            HASH_AGG(
+                                HASH(
+                                    column_name,
+                                    type_signature,
+                                    nullable_flag
+                                )
+                            )
+                        ) AS columns_hash
+                    FROM column_info
+                    GROUP BY fqn
+                """
+            for row in self._exec(column_query):
+                row_fqn = row["FQN"]
+                parser = IdentityParser(row_fqn, True)
+                fqn = parser.identity
+                assert fqn, f"Error parsing returned FQN: {row_fqn}"
+                source_types[fqn]["columns_hash"] = row["COLUMNS_HASH"]
         return source_types
@@ -2142,12 +2280,13 @@ Otherwise, remove it from your '{profile}' configuration profile.
         invalid_sources = {}
         source_references = []
         for source, info in source_info.items():
-            if info['type'] is None:
+            source_type = info.get("type")
+            if source_type is None:
                 missing_sources.append(source)
-            elif info['type'] not in ("TABLE", "VIEW"):
-                invalid_sources[source] = info['type']
+            elif source_type not in ("TABLE", "VIEW"):
+                invalid_sources[source] = source_type
             else:
-                source_references.append(f"{app_name}.api.object_reference('{info['type']}', '{source}')")
+                source_references.append(f"{app_name}.api.object_reference('{source_type}', '{source}')")
         if missing_sources:
             current_role = self.get_sf_session().get_current_role()
@@ -3045,6 +3184,7 @@ class DirectAccessResources(Resources):
         headers: Dict[str, str] | None = None,
         path_params: Dict[str, str] | None = None,
         query_params: Dict[str, str] | None = None,
+        skip_auto_create: bool = False,
     ) -> requests.Response:
         with debugging.span("direct_access_request"):
             def _send_request():
@@ -3066,7 +3206,8 @@ class DirectAccessResources(Resources):
                         )
                     # fix engine on engine error and retry
-                    if _is_engine_issue(message):
+                    # Skip auto-retry if skip_auto_create is True to avoid recursion
+                    if _is_engine_issue(message) and not skip_auto_create:
                         engine = payload.get("engine_name", "") if payload else ""
                         self.auto_create_engine(engine)
                         response = _send_request()
@@ -3431,7 +3572,7 @@ class DirectAccessResources(Resources):
         return sorted(engines, key=lambda x: x["name"])
     def get_engine(self, name: str):
-        response = self.request("get_engine", path_params={"engine_name": name, "engine_type": "logic"})
+        response = self.request("get_engine", path_params={"engine_name": name, "engine_type": "logic"}, skip_auto_create=True)
         if response.status_code == 404: # engine not found return 404
             return None
         elif response.status_code != 200:
@@ -3478,6 +3619,7 @@ class DirectAccessResources(Resources):
             payload=payload,
             path_params={"engine_type": "logic"},
             headers=headers,
+            skip_auto_create=True,
         )
         if response.status_code != 200:
             raise ResponseStatusException(
@@ -3489,6 +3631,7 @@ class DirectAccessResources(Resources):
             "delete_engine",
             path_params={"engine_name": name, "engine_type": "logic"},
             headers=headers,
+            skip_auto_create=True,
         )
         if response.status_code != 200:
             raise ResponseStatusException(
@@ -3499,6 +3642,7 @@ class DirectAccessResources(Resources):
         response = self.request(
             "suspend_engine",
             path_params={"engine_name": name, "engine_type": "logic"},
+            skip_auto_create=True,
         )
         if response.status_code != 200:
             raise ResponseStatusException(
@@ -3510,6 +3654,7 @@ class DirectAccessResources(Resources):
             "resume_engine",
             path_params={"engine_name": name, "engine_type": "logic"},
             headers=headers,
+            skip_auto_create=True,
         )
         if response.status_code != 200:
             raise ResponseStatusException(

relationalai/clients/types.py CHANGED Viewed

@@ -38,10 +38,13 @@ class EngineState(TypedDict):
     auto_suspend: int|None
     suspends_at: datetime|None
-class SourceInfo(TypedDict):
+class SourceInfo(TypedDict, total=False):
     type: str|None
     state: str
     columns_hash: str|None
+    table_created_at: datetime|None
+    stream_created_at: datetime|None
+    last_ddl: datetime|None
     source: str

relationalai/clients/use_index_poller.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from typing import Iterable, Dict, Optional, List, cast, TYPE_CHECKING
 import json
 import logging
@@ -5,7 +7,11 @@ import uuid
 from relationalai import debugging
 from relationalai.clients.cache_store import GraphIndexCache
-from relationalai.clients.util import get_pyrel_version, poll_with_specified_overhead
+from relationalai.clients.util import (
+    get_pyrel_version,
+    normalize_datetime,
+    poll_with_specified_overhead,
+)
 from relationalai.errors import (
     ERPNotRunningError,
     EngineProvisioningFailed,
@@ -29,6 +35,7 @@ from relationalai.tools.constants import WAIT_FOR_STREAM_SYNC, Generation
 # Set up logger for this module
 logger = logging.getLogger(__name__)
 try:
     from rich.console import Console
     from rich.table import Table
@@ -63,49 +70,49 @@ POLL_MAX_DELAY = 2.5  # Maximum delay between polls in seconds
 # This query calculates a hash of column metadata (name, type, precision, scale, nullable)
 # to detect if source table schema has changed since stream was created
 STREAM_COLUMN_HASH_QUERY = """
-SELECT
-    FQ_OBJECT_NAME,
-    SHA2(
-        LISTAGG(
-            value:name::VARCHAR ||
+WITH stream_columns AS (
+    SELECT
+        fq_object_name,
+        HASH(
+            value:name::VARCHAR,
             CASE
-                WHEN value:precision IS NOT NULL AND value:scale IS NOT NULL
-                    THEN CASE value:type::VARCHAR
-                            WHEN 'FIXED' THEN 'NUMBER'
-                            WHEN 'REAL' THEN 'FLOAT'
-                            WHEN 'TEXT' THEN 'TEXT'
-                            ELSE value:type::VARCHAR
-                        END || '(' || value:precision || ',' || value:scale || ')'
-                WHEN value:precision IS NOT NULL AND value:scale IS NULL
-                    THEN CASE value:type::VARCHAR
-                            WHEN 'FIXED' THEN 'NUMBER'
-                            WHEN 'REAL' THEN 'FLOAT'
-                            WHEN 'TEXT' THEN 'TEXT'
-                            ELSE value:type::VARCHAR
-                        END || '(0,' || value:precision || ')'
-                WHEN value:length IS NOT NULL
-                    THEN CASE value:type::VARCHAR
-                            WHEN 'FIXED' THEN 'NUMBER'
-                            WHEN 'REAL' THEN 'FLOAT'
-                            WHEN 'TEXT' THEN 'TEXT'
-                            ELSE value:type::VARCHAR
-                        END || '(' || value:length || ')'
+                WHEN value:precision IS NOT NULL AND value:scale IS NOT NULL THEN CASE value:type::VARCHAR
+                    WHEN 'FIXED' THEN 'NUMBER'
+                    WHEN 'REAL' THEN 'FLOAT'
+                    WHEN 'TEXT' THEN 'TEXT'
+                    ELSE value:type::VARCHAR
+                END || '(' || value:precision || ',' || value:scale || ')'
+                WHEN value:precision IS NOT NULL AND value:scale IS NULL THEN CASE value:type::VARCHAR
+                    WHEN 'FIXED' THEN 'NUMBER'
+                    WHEN 'REAL' THEN 'FLOAT'
+                    WHEN 'TEXT' THEN 'TEXT'
+                    ELSE value:type::VARCHAR
+                END || '(0,' || value:precision || ')'
+                WHEN value:length IS NOT NULL THEN CASE value:type::VARCHAR
+                    WHEN 'FIXED' THEN 'NUMBER'
+                    WHEN 'REAL' THEN 'FLOAT'
+                    WHEN 'TEXT' THEN 'TEXT'
+                    ELSE value:type::VARCHAR
+                END || '(' || value:length || ')'
                 ELSE CASE value:type::VARCHAR
-                        WHEN 'FIXED' THEN 'NUMBER'
-                        WHEN 'REAL' THEN 'FLOAT'
-                        WHEN 'TEXT' THEN 'TEXT'
-                        ELSE value:type::VARCHAR
-                    END
-            END ||
-            CASE WHEN value:nullable::BOOLEAN THEN 'YES' ELSE 'NO' END,
-            ','
-        ) WITHIN GROUP (ORDER BY value:name::VARCHAR),
-        256
-    ) AS STREAM_HASH
-FROM {app_name}.api.data_streams,
-LATERAL FLATTEN(input => COLUMNS) f
-WHERE RAI_DATABASE = '{rai_database}' AND FQ_OBJECT_NAME IN ({fqn_list})
-GROUP BY FQ_OBJECT_NAME;
+                    WHEN 'FIXED' THEN 'NUMBER'
+                    WHEN 'REAL' THEN 'FLOAT'
+                    WHEN 'TEXT' THEN 'TEXT'
+                    ELSE value:type::VARCHAR
+                END
+            END,
+            IFF(value:nullable::BOOLEAN, 'YES', 'NO')
+        ) AS column_signature
+    FROM {app_name}.api.data_streams,
+        LATERAL FLATTEN(input => columns)
+    WHERE rai_database = '{rai_database}'
+        AND fq_object_name IN ({fqn_list})
+)
+SELECT
+    fq_object_name AS FQ_OBJECT_NAME,
+    HEX_ENCODE(HASH_AGG(column_signature)) AS STREAM_HASH
+FROM stream_columns
+GROUP BY fq_object_name;
 """
@@ -296,9 +303,10 @@ class UseIndexPoller:
         Returns:
             List of truly stale sources that need to be deleted/recreated
-        A source is truly stale if:
-        - The stream doesn't exist (needs to be created), OR
-        - The column hashes don't match (needs to be recreated)
+        A source is truly stale if any of the following apply:
+        - The stream doesn't exist (needs to be created)
+        - The source table was recreated after the stream (table creation timestamp is newer)
+        - The column hashes don't match (schema drift needs cleanup)
         """
         stream_hashes = self._get_stream_column_hashes(stale_sources, progress)
@@ -306,14 +314,30 @@ class UseIndexPoller:
         for source in stale_sources:
             source_hash = self.source_info[source].get("columns_hash")
             stream_hash = stream_hashes.get(source)
+            table_created_at_raw = self.source_info[source].get("table_created_at")
+            stream_created_at_raw = self.source_info[source].get("stream_created_at")
+            table_created_at = normalize_datetime(table_created_at_raw)
+            stream_created_at = normalize_datetime(stream_created_at_raw)
+            recreated_table = False
+            if table_created_at is not None and stream_created_at is not None:
+                # If the source table was recreated (new creation timestamp) but kept
+                # the same column definitions, we still need to recycle the stream so
+                # that Snowflake picks up the new table instance.
+                recreated_table = table_created_at > stream_created_at
             # Log hash comparison for debugging
             logger.debug(f"Source: {source}")
             logger.debug(f"  Source table hash: {source_hash}")
             logger.debug(f"  Stream hash: {stream_hash}")
             logger.debug(f"  Match: {source_hash == stream_hash}")
+            if recreated_table:
+                logger.debug("  Table appears to have been recreated (table_created_at > stream_created_at)")
+                logger.debug(f"    table_created_at: {table_created_at}")
+                logger.debug(f"    stream_created_at: {stream_created_at}")
-            if stream_hash is None or source_hash != stream_hash:
+            if stream_hash is None or source_hash != stream_hash or recreated_table:
                 logger.debug("  Action: DELETE (stale)")
                 truly_stale.append(source)
             else:
@@ -376,7 +400,7 @@ class UseIndexPoller:
             stale_sources = [
                 source
                 for source, info in self.source_info.items()
-                if info["state"] == "STALE"
+                if info.get("state") == "STALE"
             ]
         if not stale_sources:
@@ -763,7 +787,7 @@ class UseIndexPoller:
                     # Log the error for debugging
                     logger.warning(f"Failed to enable change tracking on {fqn}: {e}")
                     failed_tables.append((fqn, str(e)))
                     # Handle errors based on subtask type
                     if len(tables_to_process) <= MAX_INDIVIDUAL_SUBTASKS:
                         # Mark the individual subtask as failed and complete it

relationalai/clients/util.py CHANGED Viewed

@@ -80,6 +80,15 @@ def escape_for_f_string(code: str) -> str:
 def escape_for_sproc(code: str) -> str:
     return code.replace("$$", "\\$\\$")
+def normalize_datetime(value: object) -> datetime | None:
+    """Return a timezone-aware UTC datetime or None."""
+    if not isinstance(value, datetime):
+        return None
+    if value.tzinfo is None:
+        return value.replace(tzinfo=timezone.utc)
+    return value.astimezone(timezone.utc)
 # @NOTE: `overhead_rate` should fall between 0.05 and 0.5 depending on how time sensitive / expensive the operation in question is.
 def poll_with_specified_overhead(
     f,

relationalai/dsl.py CHANGED Viewed

@@ -22,6 +22,7 @@ import sys
 from pandas import DataFrame
 from relationalai.environments import runtime_env, SnowbookEnvironment
+from relationalai.tools.constants import QUERY_ATTRIBUTES_HEADER
 from .clients.client import Client
@@ -34,9 +35,7 @@ from .errors import FilterAsValue, Errors, InvalidPropertySetException, Multiple
 #--------------------------------------------------
 RESERVED_PROPS = ["add", "set", "persist", "unpersist"]
 MAX_QUERY_ATTRIBUTE_LENGTH = 255
-QUERY_ATTRIBUTES_HEADER = "X-Query-Attributes"
 Value = Union[
     "Expression",

relationalai 0.12.0__py3-none-any.whl → 0.12.1__py3-none-any.whl

relationalai 0.12.0py3-none-any.whl → 0.12.1py3-none-any.whl