PyPI - relationalai - Versions diffs - 0.11.3__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

relationalai 0.11.3py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

relationalai/clients/config.py +7 -0
relationalai/clients/direct_access_client.py +113 -0
relationalai/clients/snowflake.py +41 -107
relationalai/clients/use_index_poller.py +349 -188
relationalai/early_access/dsl/bindings/csv.py +2 -2
relationalai/early_access/metamodel/rewrite/__init__.py +5 -3
relationalai/early_access/rel/rewrite/__init__.py +1 -1
relationalai/errors.py +24 -3
relationalai/semantics/internal/annotations.py +1 -0
relationalai/semantics/internal/internal.py +22 -4
relationalai/semantics/lqp/builtins.py +1 -0
relationalai/semantics/lqp/executor.py +61 -12
relationalai/semantics/lqp/intrinsics.py +23 -0
relationalai/semantics/lqp/model2lqp.py +13 -4
relationalai/semantics/lqp/passes.py +4 -6
relationalai/semantics/lqp/primitives.py +12 -1
relationalai/semantics/{rel → lqp}/rewrite/__init__.py +6 -0
relationalai/semantics/lqp/rewrite/extract_common.py +362 -0
relationalai/semantics/metamodel/builtins.py +20 -2
relationalai/semantics/metamodel/factory.py +3 -2
relationalai/semantics/metamodel/rewrite/__init__.py +3 -9
relationalai/semantics/reasoners/graph/core.py +273 -71
relationalai/semantics/reasoners/optimization/solvers_dev.py +20 -1
relationalai/semantics/reasoners/optimization/solvers_pb.py +24 -3
relationalai/semantics/rel/builtins.py +5 -1
relationalai/semantics/rel/compiler.py +7 -19
relationalai/semantics/rel/executor.py +2 -2
relationalai/semantics/rel/rel.py +6 -0
relationalai/semantics/rel/rel_utils.py +8 -1
relationalai/semantics/sql/compiler.py +122 -42
relationalai/semantics/sql/executor/duck_db.py +28 -3
relationalai/semantics/sql/rewrite/denormalize.py +4 -6
relationalai/semantics/sql/rewrite/recursive_union.py +23 -3
relationalai/semantics/sql/sql.py +27 -0
relationalai/semantics/std/__init__.py +2 -1
relationalai/semantics/std/datetime.py +4 -0
relationalai/semantics/std/re.py +83 -0
relationalai/semantics/std/strings.py +1 -1
relationalai/tools/cli.py +11 -4
relationalai/tools/cli_controls.py +445 -60
relationalai/util/format.py +78 -1
{relationalai-0.11.3.dist-info → relationalai-0.12.0.dist-info}/METADATA +7 -5
{relationalai-0.11.3.dist-info → relationalai-0.12.0.dist-info}/RECORD +51 -50
relationalai/semantics/metamodel/rewrite/gc_nodes.py +0 -58
relationalai/semantics/metamodel/rewrite/list_types.py +0 -109
relationalai/semantics/rel/rewrite/extract_common.py +0 -451
/relationalai/semantics/{rel → lqp}/rewrite/cdc.py +0 -0
/relationalai/semantics/{metamodel → lqp}/rewrite/extract_keys.py +0 -0
/relationalai/semantics/{metamodel → lqp}/rewrite/fd_constraints.py +0 -0
/relationalai/semantics/{rel → lqp}/rewrite/quantify_vars.py +0 -0
/relationalai/semantics/{metamodel → lqp}/rewrite/splinter.py +0 -0
{relationalai-0.11.3.dist-info → relationalai-0.12.0.dist-info}/WHEEL +0 -0
{relationalai-0.11.3.dist-info → relationalai-0.12.0.dist-info}/entry_points.txt +0 -0
{relationalai-0.11.3.dist-info → relationalai-0.12.0.dist-info}/licenses/LICENSE +0 -0

relationalai/clients/use_index_poller.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Iterable, Dict, Optional, List, cast, TYPE_CHECKING
 import json
+import logging
 import uuid
 from relationalai import debugging
@@ -12,9 +13,29 @@ from relationalai.errors import (
     SnowflakeTableObjectsException,
     SnowflakeTableObject,
 )
-from relationalai.tools.cli_controls import DebuggingSpan, create_progress
+from relationalai.tools.cli_controls import (
+    DebuggingSpan,
+    create_progress,
+    TASK_CATEGORY_INDEXING,
+    TASK_CATEGORY_PROVISIONING,
+    TASK_CATEGORY_CHANGE_TRACKING,
+    TASK_CATEGORY_CACHE,
+    TASK_CATEGORY_RELATIONS,
+    TASK_CATEGORY_STATUS,
+    TASK_CATEGORY_VALIDATION,
+)
 from relationalai.tools.constants import WAIT_FOR_STREAM_SYNC, Generation
+# Set up logger for this module
+logger = logging.getLogger(__name__)
+try:
+    from rich.console import Console
+    from rich.table import Table
+except ImportError:
+    Console = None
+    Table = None
 if TYPE_CHECKING:
     from relationalai.clients.snowflake import Resources
     from relationalai.clients.snowflake import DirectAccessResources
@@ -32,15 +53,73 @@ MAX_DATA_SOURCE_SUBTASKS = 10
 # How often to check ERP status (every N iterations)
 # To limit performance overhead, we only check ERP status periodically
-ERP_CHECK_FREQUENCY = 5
+ERP_CHECK_FREQUENCY = 15
+# Polling behavior constants
+POLL_OVERHEAD_RATE = 0.1  # Overhead rate for exponential backoff
+POLL_MAX_DELAY = 2.5  # Maximum delay between polls in seconds
+# SQL query template for getting stream column hashes
+# This query calculates a hash of column metadata (name, type, precision, scale, nullable)
+# to detect if source table schema has changed since stream was created
+STREAM_COLUMN_HASH_QUERY = """
+SELECT
+    FQ_OBJECT_NAME,
+    SHA2(
+        LISTAGG(
+            value:name::VARCHAR ||
+            CASE
+                WHEN value:precision IS NOT NULL AND value:scale IS NOT NULL
+                    THEN CASE value:type::VARCHAR
+                            WHEN 'FIXED' THEN 'NUMBER'
+                            WHEN 'REAL' THEN 'FLOAT'
+                            WHEN 'TEXT' THEN 'TEXT'
+                            ELSE value:type::VARCHAR
+                        END || '(' || value:precision || ',' || value:scale || ')'
+                WHEN value:precision IS NOT NULL AND value:scale IS NULL
+                    THEN CASE value:type::VARCHAR
+                            WHEN 'FIXED' THEN 'NUMBER'
+                            WHEN 'REAL' THEN 'FLOAT'
+                            WHEN 'TEXT' THEN 'TEXT'
+                            ELSE value:type::VARCHAR
+                        END || '(0,' || value:precision || ')'
+                WHEN value:length IS NOT NULL
+                    THEN CASE value:type::VARCHAR
+                            WHEN 'FIXED' THEN 'NUMBER'
+                            WHEN 'REAL' THEN 'FLOAT'
+                            WHEN 'TEXT' THEN 'TEXT'
+                            ELSE value:type::VARCHAR
+                        END || '(' || value:length || ')'
+                ELSE CASE value:type::VARCHAR
+                        WHEN 'FIXED' THEN 'NUMBER'
+                        WHEN 'REAL' THEN 'FLOAT'
+                        WHEN 'TEXT' THEN 'TEXT'
+                        ELSE value:type::VARCHAR
+                    END
+            END ||
+            CASE WHEN value:nullable::BOOLEAN THEN 'YES' ELSE 'NO' END,
+            ','
+        ) WITHIN GROUP (ORDER BY value:name::VARCHAR),
+        256
+    ) AS STREAM_HASH
+FROM {app_name}.api.data_streams,
+LATERAL FLATTEN(input => COLUMNS) f
+WHERE RAI_DATABASE = '{rai_database}' AND FQ_OBJECT_NAME IN ({fqn_list})
+GROUP BY FQ_OBJECT_NAME;
+"""
 class UseIndexPoller:
     """
     Encapsulates the polling logic for `use_index` streams.
     """
-    def _add_stream_subtask(self, progress, fq_name, status, batches_count):
-        """Add a stream subtask if we haven't reached the limit."""
+    def _add_stream_subtask(self, progress, fq_name: str, status: str, batches_count: int) -> bool:
+        """Add a stream subtask if we haven't reached the limit.
+        Returns:
+            True if subtask was added, False if limit reached
+        """
         if fq_name not in self.stream_task_ids and len(self.stream_task_ids) < MAX_DATA_SOURCE_SUBTASKS:
             # Get the position in the stream order (should already be there)
             if fq_name in self.stream_order:
@@ -58,11 +137,11 @@ class UseIndexPoller:
             else:
                 initial_message = f"Syncing {fq_name} ({stream_position}/{self.total_streams})"
-            self.stream_task_ids[fq_name] = progress.add_sub_task(initial_message, task_id=fq_name)
+            self.stream_task_ids[fq_name] = progress.add_sub_task(initial_message, task_id=fq_name, category=TASK_CATEGORY_INDEXING)
-            # Complete immediately if already synced
+            # Complete immediately if already synced (without recording completion time)
             if status == "synced":
-                progress.complete_sub_task(fq_name)
+                progress.complete_sub_task(fq_name, record_time=False)
             return True
         return False
@@ -125,16 +204,24 @@ class UseIndexPoller:
         self.stream_position = 0
         self.stream_order = []  # Track the order of streams as they appear in data
+        # Timing will be tracked by TaskProgress
     def poll(self) -> None:
         """
         Standard stream-based polling for use_index.
         """
+        # Read show_duration_summary config flag (defaults to True for backward compatibility)
+        show_duration_summary = bool(self.res.config.get("show_duration_summary", True))
         with create_progress(
             description="Initializing data index",
-            success_message="Initialization complete",
+            success_message="",  # We'll handle this in the context manager
             leading_newline=True,
             trailing_newline=True,
+            show_duration_summary=show_duration_summary,
         ) as progress:
+            # Set process start time
+            progress.set_process_start_time()
             progress.update_main_status("Validating data sources")
             self._maybe_delete_stale(progress)
@@ -145,123 +232,126 @@ class UseIndexPoller:
             self._poll_loop(progress)
             self._post_check(progress)
+            # Set process end time (summary will be automatically printed by __exit__)
+            progress.set_process_end_time()
     def _add_cache_subtask(self, progress) -> None:
         """Add a subtask showing cache usage information only when cache is used."""
         if self.cache.using_cache:
             # Cache was used - show how many sources were cached
             total_sources = len(self.cache.sources)
             cached_sources = total_sources - len(self.sources)
-            progress.add_sub_task(f"Using cached data for {cached_sources}/{total_sources} data streams", task_id="cache_usage")
+            progress.add_sub_task(f"Using cached data for {cached_sources}/{total_sources} data streams", task_id="cache_usage", category=TASK_CATEGORY_CACHE)
             # Complete the subtask immediately since it's just informational
             progress.complete_sub_task("cache_usage")
-    def _get_stream_column_hashes(self, sources: List[str]) -> Dict[str, str]:
+    def _get_stream_column_hashes(self, sources: List[str], progress) -> Dict[str, str]:
         """
         Query data_streams to get current column hashes for the given sources.
-        Returns a dict mapping FQN -> column hash.
+        Args:
+            sources: List of source FQNs to query
+            progress: TaskProgress instance for updating status on error
+        Returns:
+            Dict mapping FQN -> column hash
+        Raises:
+            ValueError: If the query fails (permissions, table doesn't exist, etc.)
         """
         from relationalai.clients.snowflake import PYREL_ROOT_DB
+        # Build FQN list for SQL IN clause
         fqn_list = ", ".join([f"'{source}'" for source in sources])
-        hash_query = f"""
-        SELECT
-            FQ_OBJECT_NAME,
-            SHA2(
-                LISTAGG(
-                    value:name::VARCHAR ||
-                    CASE
-                        WHEN value:precision IS NOT NULL AND value:scale IS NOT NULL
-                            THEN CASE value:type::VARCHAR
-                                    WHEN 'FIXED' THEN 'NUMBER'
-                                    WHEN 'REAL' THEN 'FLOAT'
-                                    WHEN 'TEXT' THEN 'TEXT'
-                                    ELSE value:type::VARCHAR
-                                END || '(' || value:precision || ',' || value:scale || ')'
-                        WHEN value:precision IS NOT NULL AND value:scale IS NULL
-                            THEN CASE value:type::VARCHAR
-                                    WHEN 'FIXED' THEN 'NUMBER'
-                                    WHEN 'REAL' THEN 'FLOAT'
-                                    WHEN 'TEXT' THEN 'TEXT'
-                                    ELSE value:type::VARCHAR
-                                END || '(0,' || value:precision || ')'
-                        WHEN value:length IS NOT NULL
-                            THEN CASE value:type::VARCHAR
-                                    WHEN 'FIXED' THEN 'NUMBER'
-                                    WHEN 'REAL' THEN 'FLOAT'
-                                    WHEN 'TEXT' THEN 'TEXT'
-                                    ELSE value:type::VARCHAR
-                                END || '(' || value:length || ')'
-                        ELSE CASE value:type::VARCHAR
-                                WHEN 'FIXED' THEN 'NUMBER'
-                                WHEN 'REAL' THEN 'FLOAT'
-                                WHEN 'TEXT' THEN 'TEXT'
-                                ELSE value:type::VARCHAR
-                            END
-                    END ||
-                    CASE WHEN value:nullable::BOOLEAN THEN 'YES' ELSE 'NO' END,
-                    ','
-                ) WITHIN GROUP (ORDER BY value:name::VARCHAR),
-                256
-            ) AS STREAM_HASH
-        FROM {self.app_name}.api.data_streams,
-        LATERAL FLATTEN(input => COLUMNS) f
-        WHERE RAI_DATABASE = '{PYREL_ROOT_DB}' AND FQ_OBJECT_NAME IN ({fqn_list})
-        GROUP BY FQ_OBJECT_NAME;
-        """
-        hash_results = self.res._exec(hash_query)
-        return {row["FQ_OBJECT_NAME"]: row["STREAM_HASH"] for row in hash_results}
+        # Format query template with actual values
+        hash_query = STREAM_COLUMN_HASH_QUERY.format(
+            app_name=self.app_name,
+            rai_database=PYREL_ROOT_DB,
+            fqn_list=fqn_list
+        )
-    def _filter_truly_stale_sources(self, stale_sources: List[str]) -> List[str]:
+        try:
+            hash_results = self.res._exec(hash_query)
+            return {row["FQ_OBJECT_NAME"]: row["STREAM_HASH"] for row in hash_results}
+        except Exception as e:
+            logger.error(f"Failed to query stream column hashes: {e}")
+            logger.error(f"  Query: {hash_query[:200]}...")
+            logger.error(f"  Sources: {sources}")
+            progress.update_main_status("❌ Failed to validate data stream metadata")
+            raise ValueError(
+                f"Failed to validate stream column hashes. This may indicate a permissions "
+                f"issue or missing data_streams table. Error: {e}"
+            ) from e
+    def _filter_truly_stale_sources(self, stale_sources: List[str], progress) -> List[str]:
         """
         Filter stale sources to only include those with mismatched column hashes.
+        Args:
+            stale_sources: List of source FQNs marked as stale
+            progress: TaskProgress instance for updating status on error
+        Returns:
+            List of truly stale sources that need to be deleted/recreated
         A source is truly stale if:
         - The stream doesn't exist (needs to be created), OR
         - The column hashes don't match (needs to be recreated)
         """
-        stream_hashes = self._get_stream_column_hashes(stale_sources)
+        stream_hashes = self._get_stream_column_hashes(stale_sources, progress)
         truly_stale = []
         for source in stale_sources:
             source_hash = self.source_info[source].get("columns_hash")
             stream_hash = stream_hashes.get(source)
-            # Debug prints to see hash comparison
-            # print(f"\n[DEBUG] Source: {source}")
-            # print(f"  Source table hash: {source_hash}")
-            # print(f"  Stream hash:       {stream_hash}")
-            # print(f"  Match: {source_hash == stream_hash}")
-            # print(f"  Action: {'KEEP (valid)' if stream_hash is not None and source_hash == stream_hash else 'DELETE (stale)'}")
+            # Log hash comparison for debugging
+            logger.debug(f"Source: {source}")
+            logger.debug(f"  Source table hash: {source_hash}")
+            logger.debug(f"  Stream hash: {stream_hash}")
+            logger.debug(f"  Match: {source_hash == stream_hash}")
             if stream_hash is None or source_hash != stream_hash:
+                logger.debug("  Action: DELETE (stale)")
                 truly_stale.append(source)
+            else:
+                logger.debug("  Action: KEEP (valid)")
-        # print(f"\n[DEBUG] Stale sources summary:")
-        # print(f"  Total candidates: {len(stale_sources)}")
-        # print(f"  Truly stale: {len(truly_stale)}")
-        # print(f"  Skipped (valid): {len(stale_sources) - len(truly_stale)}\n")
+        logger.debug(f"Stale sources summary: {len(truly_stale)}/{len(stale_sources)} truly stale")
         return truly_stale
     def _add_deletion_subtasks(self, progress, sources: List[str]) -> None:
-        """Add progress subtasks for source deletion."""
+        """Add progress subtasks for source deletion.
+        Args:
+            progress: TaskProgress instance
+            sources: List of source FQNs to be deleted
+        """
         if len(sources) <= MAX_INDIVIDUAL_SUBTASKS:
             for i, source in enumerate(sources):
                 progress.add_sub_task(
                     f"Removing stale stream {source} ({i+1}/{len(sources)})",
-                    task_id=f"stale_source_{i}"
+                    task_id=f"stale_source_{i}",
+                    category=TASK_CATEGORY_VALIDATION
                 )
         else:
             progress.add_sub_task(
                 f"Removing {len(sources)} stale data sources",
-                task_id="stale_sources_summary"
+                task_id="stale_sources_summary",
+                category=TASK_CATEGORY_VALIDATION
             )
     def _complete_deletion_subtasks(self, progress, sources: List[str], deleted_count: int) -> None:
-        """Complete progress subtasks for source deletion."""
+        """Complete progress subtasks for source deletion.
+        Args:
+            progress: TaskProgress instance
+            sources: List of source FQNs that were processed
+            deleted_count: Number of sources successfully deleted
+        """
         if len(sources) <= MAX_INDIVIDUAL_SUBTASKS:
             for i in range(len(sources)):
                 if f"stale_source_{i}" in progress._tasks:
@@ -277,7 +367,11 @@ class UseIndexPoller:
                 progress.complete_sub_task("stale_sources_summary")
     def _maybe_delete_stale(self, progress) -> None:
-        """Check for and delete stale data streams that need recreation."""
+        """Check for and delete stale data streams that need recreation.
+        Args:
+            progress: TaskProgress instance for tracking deletion progress
+        """
         with debugging.span("check_sources"):
             stale_sources = [
                 source
@@ -291,7 +385,7 @@ class UseIndexPoller:
         with DebuggingSpan("validate_sources"):
             try:
                 # Validate which sources truly need deletion by comparing column hashes
-                truly_stale = self._filter_truly_stale_sources(stale_sources)
+                truly_stale = self._filter_truly_stale_sources(stale_sources, progress)
                 if not truly_stale:
                     return
@@ -330,6 +424,11 @@ class UseIndexPoller:
                     raise e from None
     def _poll_loop(self, progress) -> None:
+        """Main polling loop for use_index streams.
+        Args:
+            progress: TaskProgress instance for tracking polling progress
+        """
         source_references = self.res._get_source_references(self.source_info)
         sources_object_references_str = ", ".join(source_references)
@@ -341,7 +440,7 @@ class UseIndexPoller:
                 with debugging.span("check_erp_status"):
                     # Add subtask for ERP status check
                     if self._erp_check_task_id is None:
-                        self._erp_check_task_id = progress.add_sub_task("Checking system status", task_id="erp_check")
+                        self._erp_check_task_id = progress.add_sub_task("Checking system status", task_id="erp_check", category=TASK_CATEGORY_STATUS)
                     if not self.res.is_erp_running(self.app_name):
                         progress.update_sub_task("erp_check", "❌ System status check failed")
@@ -376,11 +475,18 @@ class UseIndexPoller:
                 use_index_json_str = results[0]["USE_INDEX"]
                 # Parse the JSON string into a Python dictionary
-                use_index_data = json.loads(use_index_json_str)
+                try:
+                    use_index_data = json.loads(use_index_json_str)
+                except json.JSONDecodeError as e:
+                    logger.error(f"Invalid JSON from use_index API: {e}")
+                    logger.error(f"Raw response (first 500 chars): {use_index_json_str[:500]}")
+                    progress.update_main_status("❌ Received invalid response from server")
+                    raise ValueError(f"Invalid JSON response from use_index: {e}") from e
                 span.update(use_index_data)
-                # Useful to see the full use_index_data on each poll loop
-                # print(f"\n\nuse_index_data: {json.dumps(use_index_data, indent=4)}\n\n")
+                # Log the full use_index_data for debugging if needed
+                logger.debug(f"use_index_data: {json.dumps(use_index_data, indent=4)}")
                 all_data = use_index_data.get("data", [])
                 ready = use_index_data.get("ready", False)
@@ -405,16 +511,17 @@ class UseIndexPoller:
                 if not ready and all_data:
                     progress.update_main_status("Processing background tasks. This may take a while...")
-                    # Build complete stream order first
-                    for data in all_data:
-                        if data is None:
-                            continue
-                        fq_name = data.get("fq_object_name", "Unknown")
-                        if fq_name not in self.stream_order:
-                            self.stream_order.append(fq_name)
+                    # Build complete stream order first (only on first iteration with data)
+                    if self.total_streams == 0:
+                        for data in all_data:
+                            if data is None:
+                                continue
+                            fq_name = data.get("fq_object_name", "Unknown")
+                            if fq_name not in self.stream_order:
+                                self.stream_order.append(fq_name)
-                    # Set total streams count based on complete order
-                    self.total_streams = len(self.stream_order)
+                        # Set total streams count based on complete order (only once)
+                        self.total_streams = len(self.stream_order)
                     # Add new streams as subtasks if we haven't reached the limit
                     for data in all_data:
@@ -474,62 +581,69 @@ class UseIndexPoller:
                     for engine in engines:
                         if not engine or not isinstance(engine, dict):
                             continue
-                        name = engine.get("name", "Unknown")
                         size = self.engine_size
-                        if name not in self.engine_task_ids:
-                            self.engine_task_ids[name] = progress.add_sub_task(f"Provisioning engine {name} ({size})", task_id=name)
-                        state = (engine.get("state") or "").lower()
+                        name = engine.get("name", "Unknown")
                         status = (engine.get("status") or "").lower()
+                        sub_task_id = self.engine_task_ids.get(name, None)
+                        sub_task_status_message = ""
-                        # Determine engine status message
-                        if state == "ready" or status == "ready":
-                            status_message = f"Engine {name} ({size}) ready"
-                            should_complete = True
-                        else:
+                        # Complete the sub task if it exists and the engine status is ready
+                        if sub_task_id and name in progress._tasks and not progress._tasks[name].completed and (status == "ready"):
+                            sub_task_status_message = f"Engine {name} ({size}) ready"
+                            progress.update_sub_task(name, sub_task_status_message)
+                            progress.complete_sub_task(name)
+                        # Add the sub task if it doesn't exist and the engine status is pending
+                        if not sub_task_id and status == "pending":
                             writer = engine.get("writer", False)
                             engine_type = "writer engine" if writer else "engine"
-                            status_message = f"Provisioning {engine_type} {name} ({size})"
-                            should_complete = False
-                        # Only update if the task isn't already completed
-                        if name in progress._tasks and not progress._tasks[name].completed:
-                            progress.update_sub_task(name, status_message)
-                            if should_complete:
-                                progress.complete_sub_task(name)
+                            sub_task_status_message = f"Provisioning {engine_type} {name} ({size})"
+                            self.engine_task_ids[name] = progress.add_sub_task(sub_task_status_message, task_id=name, category=TASK_CATEGORY_PROVISIONING)
                     # Special handling for CDC_MANAGED_ENGINE - mark ready when any stream starts processing
-                    if CDC_MANAGED_ENGINE in self.engine_task_ids:
+                    cdc_task = progress._tasks.get(CDC_MANAGED_ENGINE) if CDC_MANAGED_ENGINE in progress._tasks else None
+                    if CDC_MANAGED_ENGINE in self.engine_task_ids and cdc_task and not cdc_task.completed:
                         has_processing_streams = any(
                             stream.get("next_batch_status", "") == "processing"
                             for stream in all_data
                         )
-                        if has_processing_streams and CDC_MANAGED_ENGINE in progress._tasks and not progress._tasks[CDC_MANAGED_ENGINE].completed:
+                        if has_processing_streams and cdc_task and not cdc_task.completed:
                             progress.update_sub_task(CDC_MANAGED_ENGINE, f"Engine {CDC_MANAGED_ENGINE} ({self.engine_size}) ready")
                             progress.complete_sub_task(CDC_MANAGED_ENGINE)
                     self.counter += 1
                 # Handle relations data
-                if not ready and relations and isinstance(relations, dict):
+                if relations and isinstance(relations, dict):
                     txn = relations.get("txn", {}) or {}
                     txn_id = txn.get("id", None)
                     # Only show relations subtask if there is a valid txn object
                     if txn_id:
                         status = relations.get("status", "").upper()
-                        state = txn.get("state", "").upper()
                         # Create relations subtask if it doesn't exist
                         if self.relations_task_id is None:
-                            self.relations_task_id = progress.add_sub_task("Populating relations", task_id="relations")
+                            self.relations_task_id = progress.add_sub_task("Populating relations", task_id="relations", category=TASK_CATEGORY_RELATIONS)
+                        # Set the start time from the JSON if available (always update)
+                        start_time_ms = relations.get("start_time")
+                        if start_time_ms:
+                            start_time_seconds = start_time_ms / 1000.0
+                            progress._tasks["relations"].added_time = start_time_seconds
                         # Update relations status
-                        if state == "COMPLETED":
+                        if status == "COMPLETED":
                             progress.update_sub_task("relations", f"Relations populated (txn: {txn_id})")
-                            progress.complete_sub_task("relations")
+                            # Set the completion time from the JSON if available
+                            end_time_ms = relations.get("end_time")
+                            if end_time_ms:
+                                end_time_seconds = end_time_ms / 1000.0
+                                progress._tasks["relations"].completed_time = end_time_seconds
+                            progress.complete_sub_task("relations", record_time=False)  # Don't record local time
                         else:
                             progress.update_sub_task("relations", f"Relations populating (txn: {txn_id})")
@@ -580,85 +694,128 @@ class UseIndexPoller:
                 return break_loop
-        poll_with_specified_overhead(lambda: check_ready(progress), overhead_rate=0.1, max_delay=1)
+        poll_with_specified_overhead(lambda: check_ready(progress), overhead_rate=POLL_OVERHEAD_RATE, max_delay=POLL_MAX_DELAY)
     def _post_check(self, progress) -> None:
-            num_tables_altered = 0
+        """Run post-processing checks including change tracking enablement.
-            enabled_tables = []
-            if (
-                self.tables_with_not_enabled_change_tracking
-                and self.res.config.get("ensure_change_tracking", False)
-            ):
-                tables_to_process = self.tables_with_not_enabled_change_tracking
-                # Add subtasks based on count
-                if len(tables_to_process) <= MAX_INDIVIDUAL_SUBTASKS:
-                    # Add individual subtasks for each table
-                    for i, table in enumerate(tables_to_process):
-                        fqn, kind = table
-                        progress.add_sub_task(f"Enabling change tracking on {fqn} ({i+1}/{len(tables_to_process)})", task_id=f"change_tracking_{i}")
-                else:
-                    # Add single summary subtask for many tables
-                    progress.add_sub_task(f"Enabling change tracking on {len(tables_to_process)} tables", task_id="change_tracking_summary")
+        Args:
+            progress: TaskProgress instance for tracking progress
-                # Process tables
+        Raises:
+            SnowflakeChangeTrackingNotEnabledException: If change tracking cannot be enabled
+            SnowflakeTableObjectsException: If there are table-related errors
+            EngineProvisioningFailed: If engine provisioning fails
+        """
+        num_tables_altered = 0
+        failed_tables = []  # Track tables that failed to enable change tracking
+        enabled_tables = []
+        if (
+            self.tables_with_not_enabled_change_tracking
+            and self.res.config.get("ensure_change_tracking", False)
+        ):
+            tables_to_process = self.tables_with_not_enabled_change_tracking
+            # Track timing for change tracking
+            # Add subtasks based on count
+            if len(tables_to_process) <= MAX_INDIVIDUAL_SUBTASKS:
+                # Add individual subtasks for each table
                 for i, table in enumerate(tables_to_process):
-                    try:
-                        fqn, kind = table
-                        self.res._exec(f"ALTER {kind} {fqn} SET CHANGE_TRACKING = TRUE;")
-                        enabled_tables.append(table)
-                        num_tables_altered += 1
-                        # Update progress based on subtask type
-                        if len(tables_to_process) <= MAX_INDIVIDUAL_SUBTASKS:
-                            # Complete individual table subtask
-                            progress.complete_sub_task(f"change_tracking_{i}")
-                        else:
-                            # Update summary subtask with progress
-                            progress.update_sub_task("change_tracking_summary",
-                                f"Enabling change tracking on {len(tables_to_process)} tables... ({i+1}/{len(tables_to_process)})")
-                    except Exception:
-                        # Handle errors based on subtask type
+                    fqn, kind = table
+                    progress.add_sub_task(f"Enabling change tracking on {fqn} ({i+1}/{len(tables_to_process)})", task_id=f"change_tracking_{i}", category=TASK_CATEGORY_CHANGE_TRACKING)
+            else:
+                # Add single summary subtask for many tables
+                progress.add_sub_task(f"Enabling change tracking on {len(tables_to_process)} tables", task_id="change_tracking_summary", category=TASK_CATEGORY_CHANGE_TRACKING)
+            # Process tables
+            for i, table in enumerate(tables_to_process):
+                fqn, kind = table  # Unpack outside try block to ensure fqn is defined
+                try:
+                    # Validate table_type to prevent SQL injection
+                    # Should only be TABLE or VIEW
+                    if kind not in ("TABLE", "VIEW"):
+                        logger.error(f"Invalid table kind '{kind}' for {fqn}, skipping")
+                        failed_tables.append((fqn, f"Invalid table kind: {kind}"))
+                        # Mark as failed in progress
                         if len(tables_to_process) <= MAX_INDIVIDUAL_SUBTASKS:
-                            # Complete the individual subtask even if it failed
                             if f"change_tracking_{i}" in progress._tasks:
+                                progress.update_sub_task(f"change_tracking_{i}", f"❌ Invalid type: {fqn}")
                                 progress.complete_sub_task(f"change_tracking_{i}")
-                        pass
-                # Complete summary subtask if used
-                if len(tables_to_process) > MAX_INDIVIDUAL_SUBTASKS and "change_tracking_summary" in progress._tasks:
-                    if num_tables_altered > 0:
-                        s = "s" if num_tables_altered > 1 else ""
-                        progress.update_sub_task("change_tracking_summary", f"Enabled change tracking on {num_tables_altered} table{s}")
-                    progress.complete_sub_task("change_tracking_summary")
-                # Remove the tables that were successfully enabled from the list of not enabled tables
-                # so that we don't raise an exception for them later
-                self.tables_with_not_enabled_change_tracking = [
-                    t for t in self.tables_with_not_enabled_change_tracking if t not in enabled_tables
-                ]
-            if self.tables_with_not_enabled_change_tracking:
-                progress.update_main_status("Errors found. See below for details.")
-                raise SnowflakeChangeTrackingNotEnabledException(
-                    self.tables_with_not_enabled_change_tracking
-                )
+                        continue
+                    # Execute ALTER statement
+                    # Note: fqn should already be properly quoted from source_info
+                    self.res._exec(f"ALTER {kind} {fqn} SET CHANGE_TRACKING = TRUE;")
+                    enabled_tables.append(table)
+                    num_tables_altered += 1
+                    # Update progress based on subtask type
+                    if len(tables_to_process) <= MAX_INDIVIDUAL_SUBTASKS:
+                        # Complete individual table subtask
+                        progress.complete_sub_task(f"change_tracking_{i}")
+                    else:
+                        # Update summary subtask with progress
+                        progress.update_sub_task("change_tracking_summary",
+                            f"Enabling change tracking on {len(tables_to_process)} tables... ({i+1}/{len(tables_to_process)})")
+                except Exception as e:
+                    # Log the error for debugging
+                    logger.warning(f"Failed to enable change tracking on {fqn}: {e}")
+                    failed_tables.append((fqn, str(e)))
+                    # Handle errors based on subtask type
+                    if len(tables_to_process) <= MAX_INDIVIDUAL_SUBTASKS:
+                        # Mark the individual subtask as failed and complete it
+                        if f"change_tracking_{i}" in progress._tasks:
+                            progress.update_sub_task(f"change_tracking_{i}", f"❌ Failed: {fqn}")
+                            progress.complete_sub_task(f"change_tracking_{i}")
+                    # Continue processing other tables despite this failure
+            # Complete summary subtask if used
+            if len(tables_to_process) > MAX_INDIVIDUAL_SUBTASKS and "change_tracking_summary" in progress._tasks:
+                if num_tables_altered > 0:
+                    s = "s" if num_tables_altered > 1 else ""
+                    success_msg = f"Enabled change tracking on {num_tables_altered} table{s}"
+                    if failed_tables:
+                        success_msg += f" ({len(failed_tables)} failed)"
+                    progress.update_sub_task("change_tracking_summary", success_msg)
+                elif failed_tables:
+                    progress.update_sub_task("change_tracking_summary", f"❌ Failed on {len(failed_tables)} table(s)")
+                progress.complete_sub_task("change_tracking_summary")
+            # Log summary of failed tables
+            if failed_tables:
+                logger.warning(f"Failed to enable change tracking on {len(failed_tables)} table(s)")
+                for fqn, error in failed_tables:
+                    logger.warning(f"  {fqn}: {error}")
+            # Remove the tables that were successfully enabled from the list of not enabled tables
+            # so that we don't raise an exception for them later
+            self.tables_with_not_enabled_change_tracking = [
+                t for t in self.tables_with_not_enabled_change_tracking if t not in enabled_tables
+            ]
-            if self.table_objects_with_other_errors:
-                progress.update_main_status("Errors found. See below for details.")
-                raise SnowflakeTableObjectsException(self.table_objects_with_other_errors)
-            if self.engine_errors:
-                progress.update_main_status("Errors found. See below for details.")
-                # if there is an engine error, probably auto create engine failed
-                # Create a synthetic exception from the first engine error
-                first_error = self.engine_errors[0]
-                error_message = first_error.get("message", "Unknown engine error")
-                synthetic_exception = Exception(f"Engine error: {error_message}")
-                raise EngineProvisioningFailed(self.engine_name, synthetic_exception)
-            if num_tables_altered > 0:
-                self._poll_loop(progress)
+        if self.tables_with_not_enabled_change_tracking:
+            progress.update_main_status("Errors found. See below for details.")
+            raise SnowflakeChangeTrackingNotEnabledException(
+                self.tables_with_not_enabled_change_tracking
+            )
+        if self.table_objects_with_other_errors:
+            progress.update_main_status("Errors found. See below for details.")
+            raise SnowflakeTableObjectsException(self.table_objects_with_other_errors)
+        if self.engine_errors:
+            progress.update_main_status("Errors found. See below for details.")
+            # if there is an engine error, probably auto create engine failed
+            # Create a synthetic exception from the first engine error
+            first_error = self.engine_errors[0]
+            error_message = first_error.get("message", "Unknown engine error")
+            synthetic_exception = Exception(f"Engine error: {error_message}")
+            raise EngineProvisioningFailed(self.engine_name, synthetic_exception)
+        if num_tables_altered > 0:
+            self._poll_loop(progress)
 class DirectUseIndexPoller(UseIndexPoller):
     """
@@ -743,17 +900,21 @@ class DirectUseIndexPoller(UseIndexPoller):
                     attempt += 1
                     return False
+            # Read show_duration_summary config flag (defaults to True for backward compatibility)
+            show_duration_summary = bool(self.res.config.get("show_duration_summary", True))
             with create_progress(
                 description="Preparing your data...",
                 success_message="Done",
                 leading_newline=True,
                 trailing_newline=True,
+                show_duration_summary=show_duration_summary,
             ) as progress:
                 # Add cache usage subtask
                 self._add_cache_subtask(progress)
                 with debugging.span("poll_direct"):
-                    poll_with_specified_overhead(lambda: check_direct(progress), overhead_rate=0.1, max_delay=1)
+                    poll_with_specified_overhead(lambda: check_direct(progress), overhead_rate=POLL_OVERHEAD_RATE, max_delay=POLL_MAX_DELAY)
                 # Run the same post-check logic as UseIndexPoller
                 self._post_check(progress)

relationalai 0.11.3__py3-none-any.whl → 0.12.0__py3-none-any.whl

relationalai 0.11.3py3-none-any.whl → 0.12.0py3-none-any.whl