PyPI - mlrun - Versions diffs - 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc40py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (150) hide show

mlrun/__init__.py +3 -2
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +7 -0
mlrun/common/model_monitoring/helpers.py +41 -4
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +13 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +10 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +24 -0
mlrun/common/schemas/hub.py +3 -2
mlrun/common/schemas/model_monitoring/__init__.py +1 -1
mlrun/common/schemas/model_monitoring/constants.py +2 -2
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +53 -15
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +2 -3
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +1 -1
mlrun/datastore/datastore_profile.py +49 -17
mlrun/datastore/model_provider/huggingface_provider.py +6 -2
mlrun/datastore/model_provider/model_provider.py +2 -2
mlrun/datastore/model_provider/openai_provider.py +2 -2
mlrun/datastore/s3.py +15 -16
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +16 -10
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +16 -3
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +36 -12
mlrun/db/httpdb.py +316 -101
mlrun/db/nopdb.py +29 -11
mlrun/errors.py +4 -2
mlrun/execution.py +11 -12
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +37 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +67 -76
mlrun/hub/step.py +113 -0
mlrun/launcher/base.py +2 -1
mlrun/launcher/local.py +2 -1
mlrun/model.py +12 -2
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +2 -2
mlrun/model_monitoring/applications/base.py +20 -6
mlrun/model_monitoring/applications/context.py +1 -0
mlrun/model_monitoring/controller.py +7 -17
mlrun/model_monitoring/db/_schedules.py +2 -16
mlrun/model_monitoring/db/_stats.py +2 -13
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +2 -4
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +4 -6
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +147 -79
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +2 -1
mlrun/model_monitoring/stream_processing.py +18 -16
mlrun/model_monitoring/writer.py +4 -3
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -44
mlrun/platforms/iguazio.py +1 -1
mlrun/projects/operations.py +11 -10
mlrun/projects/project.py +81 -82
mlrun/run.py +4 -7
mlrun/runtimes/__init__.py +2 -204
mlrun/runtimes/base.py +89 -21
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +4 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/mounts.py +5 -0
mlrun/runtimes/nuclio/__init__.py +12 -8
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +200 -32
mlrun/runtimes/nuclio/function.py +154 -49
mlrun/runtimes/nuclio/serving.py +55 -42
mlrun/runtimes/pod.py +59 -10
mlrun/secrets.py +46 -2
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +5 -5
mlrun/serving/routers.py +3 -3
mlrun/serving/server.py +46 -43
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +554 -207
mlrun/serving/steps.py +1 -1
mlrun/serving/system_steps.py +42 -33
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +89 -16
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/git.py +1 -1
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification_pusher.py +2 -2
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +39 -49
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +144 -130
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -343
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1368
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +0 -51
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py ADDED Viewed

@@ -0,0 +1,808 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+import psycopg
+import mlrun.common.schemas.model_monitoring as mm_schemas
+import mlrun.errors
+import mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_schema as timescaledb_schema
+from mlrun.datastore.datastore_profile import DatastoreProfilePostgreSQL
+from mlrun.model_monitoring.db.tsdb.preaggregate import PreAggregateConfig
+from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_connection import (
+    Statement,
+    TimescaleDBConnection,
+)
+from mlrun.model_monitoring.db.tsdb.timescaledb.utils.timescaledb_query_builder import (
+    TimescaleDBNaming,
+)
+from mlrun.utils import datetime_from_iso, logger
+class TimescaleDBOperationsManager:
+    """
+    Handles all CRUD operations for TimescaleDB TSDB connector.
+    This class implements all create/update/delete operations for model monitoring data:
+    - Table and schema creation with optional pre-aggregates and continuous aggregates
+    - Event writing with parameterized queries
+    - Record deletion with support for both raw and aggregate data cleanup
+    - Resource deletion with automatic discovery of project-related tables and views
+    - Schema management with automatic cleanup of empty schemas
+    Key Features:
+    - Parameterized queries for all write/delete operations
+    - Automatic discovery of aggregate tables for comprehensive cleanup
+    - Transaction-based operations for data consistency
+    - Configurable pre-aggregation with retention policies
+    - Thread-safe operations through shared connection pooling
+    :param project: Project name used for table naming and schema organization
+    :param connection: Shared TimescaleDBConnection instance
+    :param pre_aggregate_config: Optional configuration for pre-aggregated tables
+    """
+    def __init__(
+        self,
+        project: str,
+        connection: TimescaleDBConnection,
+        pre_aggregate_config: Optional[PreAggregateConfig] = None,
+        profile: Optional[DatastoreProfilePostgreSQL] = None,
+    ):
+        """
+        Initialize operations handler with a shared connection.
+        :param project: The project name
+        :param connection: Shared TimescaleDBConnection instance
+        :param pre_aggregate_config: Optional pre-aggregation configuration
+        :param profile: Optional datastore profile for admin operations (database creation)
+        """
+        self.project = project
+        self._pre_aggregate_config = pre_aggregate_config
+        self._profile = profile
+        # Use the injected shared connection
+        self._connection = connection
+        # Initialize table schemas
+        self._init_tables()
+    def _init_tables(self) -> None:
+        self.tables = timescaledb_schema.create_table_schemas(self.project)
+    def _create_db_if_not_exists(self) -> None:
+        """
+        Create the database if it does not exist.
+        This method connects to the default 'postgres' database to create
+        the monitoring database if it doesn't already exist. It also ensures the
+        TimescaleDB extension is enabled in the monitoring database.
+        Note: Requires a profile to be set during initialization.
+        """
+        if not self._profile:
+            logger.debug(
+                "No profile provided, skipping database creation",
+                project=self.project,
+            )
+            return
+        database_name = self._profile.database
+        logger.debug(
+            "Checking/creating TimescaleDB database",
+            project=self.project,
+            database=database_name,
+        )
+        # Connect to default postgres database to create the monitoring database
+        admin_connection = TimescaleDBConnection(
+            dsn=self._profile.admin_dsn(),
+            min_connections=1,
+            max_connections=1,
+            autocommit=True,  # DDL requires autocommit
+        )
+        try:
+            # Check if database exists using parameterized Statement
+            check_stmt = Statement(
+                sql="SELECT 1 FROM pg_database WHERE datname = %s",
+                parameters=(database_name,),
+            )
+            result = admin_connection.run(query=check_stmt)
+            if not result or not result.data:
+                # Database doesn't exist, create it
+                # Note: CREATE DATABASE cannot be parameterized, but database_name
+                # comes from our own profile, not user input
+                admin_connection.run(statements=[f'CREATE DATABASE "{database_name}"'])
+                logger.info(
+                    "Created TimescaleDB database",
+                    project=self.project,
+                    database=database_name,
+                )
+            else:
+                logger.debug(
+                    "TimescaleDB database already exists",
+                    project=self.project,
+                    database=database_name,
+                )
+        finally:
+            # Close the admin connection pool to avoid resource leak
+            admin_connection.close()
+    def create_tables(
+        self, pre_aggregate_config: Optional[PreAggregateConfig] = None
+    ) -> None:
+        config = pre_aggregate_config or self._pre_aggregate_config
+        logger.debug(
+            "Creating TimescaleDB tables for model monitoring",
+            project=self.project,
+            with_pre_aggregates=config is not None,
+        )
+        # Create database if it doesn't exist
+        self._create_db_if_not_exists()
+        # Try to create extension, ignore if already exists
+        try:
+            self._connection.run(
+                statements=["CREATE EXTENSION IF NOT EXISTS timescaledb"]
+            )
+        except psycopg.errors.DuplicateObject:
+            # Extension already loaded - this is fine
+            pass
+        # Create schema if it doesn't exist
+        schema_name = self.tables[mm_schemas.TimescaleDBTables.PREDICTIONS].schema
+        self._connection.run(statements=[f"CREATE SCHEMA IF NOT EXISTS {schema_name}"])
+        # Create main tables and convert to hypertables
+        for table_type, table in self.tables.items():
+            statements = [table._create_table_query()]
+            # Convert to hypertable
+            statements.append(table._create_hypertable_query())
+            # Create indexes
+            statements.extend(table._create_indexes_query())
+            # Create pre-aggregate tables if config provided
+            if config:
+                statements.extend(table._create_continuous_aggregates_query(config))
+                statements.extend(table._create_retention_policies_query(config))
+            # Execute all statements for this table
+            self._connection.run(statements=statements)
+        logger.debug(
+            "Successfully created TimescaleDB tables",
+            project=self.project,
+            table_count=len(self.tables),
+        )
+    def write_application_event(
+        self,
+        event: dict,
+        kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
+    ) -> None:
+        """
+        Write a single result or metric to TimescaleDB using parameterized queries.
+        Uses PostgreSQL's parameterized queries for safety and performance.
+        :param event: Event data to write
+        :param kind: Type of event (RESULT or METRIC)
+        """
+        if kind == mm_schemas.WriterEventKind.RESULT:
+            table = self.tables[mm_schemas.TimescaleDBTables.APP_RESULTS]
+        else:
+            table = self.tables[mm_schemas.TimescaleDBTables.METRICS]
+        # Convert datetime strings to datetime objects if needed
+        for time_field in [
+            mm_schemas.WriterEvent.END_INFER_TIME,
+            mm_schemas.WriterEvent.START_INFER_TIME,
+        ]:
+            if time_field in event:
+                if isinstance(event[time_field], str):
+                    event[time_field] = datetime_from_iso(event[time_field])
+                # datetime objects can stay as-is
+        # Prepare the INSERT statement with parameterized query
+        columns = list(table.columns.keys())
+        placeholders = ", ".join(["%s"] * len(columns))
+        insert_sql = f"""
+            INSERT INTO {table.full_name()} ({', '.join(columns)})
+            VALUES ({placeholders})
+        """
+        # Prepare values in the correct order
+        values = tuple(event.get(col) for col in columns)
+        # Create parameterized statement
+        stmt = Statement(insert_sql, values)
+        try:
+            # Execute parameterized query
+            self._connection.run(statements=[stmt])
+        except Exception as e:
+            logger.error(
+                "Failed to write application event to TimescaleDB",
+                project=self.project,
+                table=table.table_name,
+                error=mlrun.errors.err_to_str(e),
+            )
+            raise mlrun.errors.MLRunRuntimeError(
+                f"Failed to write event to TimescaleDB: {e}"
+            ) from e
+    def delete_tsdb_records(
+        self,
+        endpoint_ids: list[str],
+        include_aggregates: bool = True,
+    ) -> None:
+        """
+        Delete model endpoint records from TimescaleDB using parameterized queries.
+        :param endpoint_ids: List of endpoint IDs to delete
+        :param include_aggregates: Whether to delete from pre-aggregate tables as well
+        """
+        if not endpoint_ids:
+            logger.debug("No endpoint IDs provided for deletion", project=self.project)
+            return
+        logger.debug(
+            "Deleting model endpoint records from TimescaleDB",
+            project=self.project,
+            number_of_endpoints_to_delete=len(endpoint_ids),
+            include_aggregates=include_aggregates,
+        )
+        try:
+            # Execute all deletions in a single transaction to prevent race conditions
+            # Raw data must be deleted first to prevent continuous aggregates from repopulating
+            all_deletion_statements = []
+            # 1. Delete raw data first (removes source for continuous aggregates)
+            all_deletion_statements.extend(
+                self._get_raw_delete_statements(endpoint_ids)
+            )
+            # 2. Delete aggregate data second (cleanup existing aggregated data)
+            if include_aggregates:
+                # Always try to discover and delete aggregates, regardless of config
+                all_deletion_statements.extend(
+                    self._get_aggregate_delete_statements_by_endpoints(endpoint_ids)
+                )
+            # Execute all deletions in a single transaction
+            self._connection.run(statements=all_deletion_statements)
+            logger.debug(
+                "Successfully deleted model endpoint records from TimescaleDB",
+                project=self.project,
+                number_of_endpoints_deleted=len(endpoint_ids),
+            )
+        except Exception as e:
+            logger.error(
+                "Failed to delete model endpoint records from TimescaleDB",
+                project=self.project,
+                endpoint_count=len(endpoint_ids),
+                error=mlrun.errors.err_to_str(e),
+            )
+            raise
+    def _get_raw_delete_statements(self, endpoint_ids: list[str]) -> list[Statement]:
+        """
+        Get parameterized DELETE statements for raw data tables.
+        :param endpoint_ids: List of endpoint IDs to delete
+        :return: List of Statement objects for raw data deletion
+        """
+        statements = []
+        for table_schema in self.tables.values():
+            if len(endpoint_ids) == 1:
+                delete_sql = (
+                    f"DELETE FROM {table_schema.full_name()} "
+                    f"WHERE {mm_schemas.WriterEvent.ENDPOINT_ID} = %s"
+                )
+                stmt = Statement(delete_sql, (endpoint_ids[0],))
+            else:
+                delete_sql = (
+                    f"DELETE FROM {table_schema.full_name()} "
+                    f"WHERE {mm_schemas.WriterEvent.ENDPOINT_ID} = ANY(%s)"
+                )
+                stmt = Statement(delete_sql, (endpoint_ids,))
+            statements.append(stmt)
+        return statements
+    def _get_aggregate_delete_statements_by_endpoints(
+        self, endpoint_ids: list[str]
+    ) -> list[Statement]:
+        """
+        Get parameterized DELETE statements for aggregate data tables by discovering existing tables.
+        This approach discovers all existing aggregate tables rather than relying on configuration,
+        ensuring we don't miss any aggregate data.
+        :param endpoint_ids: List of endpoint IDs to delete (must be non-empty)
+        :return: List of Statement objects for aggregate data deletion
+        """
+        statements = []
+        # Early return for empty endpoint list - nothing to delete
+        if not endpoint_ids:
+            return statements
+        try:
+            schema_name = self.tables[mm_schemas.TimescaleDBTables.PREDICTIONS].schema
+            # Get base table patterns for tables that have endpoint_id
+            base_patterns = []
+            base_patterns.extend(
+                self.tables[table_type].table_name
+                for table_type in [
+                    mm_schemas.TimescaleDBTables.PREDICTIONS,
+                    mm_schemas.TimescaleDBTables.METRICS,
+                    mm_schemas.TimescaleDBTables.APP_RESULTS,
+                ]
+                if table_type in self.tables
+            )
+            if not base_patterns:
+                return statements
+            # Build query to find all aggregate tables and continuous aggregate views
+            # TimescaleDB continuous aggregates appear as VIEWs in information_schema
+            pattern_conditions = []
+            parameters = [schema_name]
+            for pattern in base_patterns:
+                pattern_conditions.extend(
+                    [
+                        "table_name LIKE %s",  # _agg_ tables
+                        "table_name LIKE %s",  # _cagg_ views
+                    ]
+                )
+                parameters.extend(TimescaleDBNaming.get_all_aggregate_patterns(pattern))
+            discovery_stmt = Statement(
+                f"""
+                SELECT table_name
+                FROM information_schema.tables
+                WHERE table_schema = %s
+                AND table_type IN ('BASE TABLE', 'VIEW')
+                AND ({' OR '.join(pattern_conditions)})
+                ORDER BY table_name
+                """,
+                tuple(parameters),
+            )
+            result = self._connection.run(query=discovery_stmt)
+            discovered_objects = (
+                [row[0] for row in result.data] if result and result.data else []
+            )
+            if not discovered_objects:
+                logger.debug(
+                    "No aggregate objects found for deletion",
+                    project=self.project,
+                    schema=schema_name,
+                )
+                return statements
+            logger.debug(
+                "Discovered aggregate objects for endpoint deletion",
+                project=self.project,
+                aggregate_objects=len(discovered_objects),
+                endpoint_count=len(endpoint_ids),
+            )
+            # Create delete statements for all discovered aggregate objects
+            for object_name in discovered_objects:
+                delete_sql = f"DELETE FROM {schema_name}.{object_name} WHERE"
+                if len(endpoint_ids) == 1:
+                    delete_sql += f" {mm_schemas.WriterEvent.ENDPOINT_ID} = %s"
+                    stmt = Statement(delete_sql, (endpoint_ids[0],))
+                else:
+                    delete_sql += f" {mm_schemas.WriterEvent.ENDPOINT_ID} = ANY(%s)"
+                    stmt = Statement(delete_sql, (endpoint_ids,))
+                statements.append(stmt)
+        except Exception as e:
+            logger.debug(
+                "Failed to discover aggregate objects for deletion",
+                project=self.project,
+                error=mlrun.errors.err_to_str(e),
+            )
+            # Continue with empty statements list rather than failing completely
+        return statements
+    def delete_tsdb_resources(self) -> None:
+        """
+        Delete all project resources in TimescaleDB by discovering existing tables that match our patterns.
+        This approach ensures we don't miss any tables, even if configurations are out of sync.
+        """
+        logger.debug(
+            "Deleting all project resources from TimescaleDB",
+            project=self.project,
+        )
+        try:
+            schema_name = self.tables[mm_schemas.TimescaleDBTables.PREDICTIONS].schema
+            # Get the base table patterns for this project
+            base_patterns = []
+            base_patterns.extend(
+                table_schema.table_name for table_schema in self.tables.values()
+            )
+            # Build discovery query for all project objects
+            pattern_conditions = []
+            parameters = [schema_name]
+            for pattern in base_patterns:
+                # Match exact table name OR table name with _agg_/_cagg_ suffix
+                pattern_conditions.extend(
+                    [
+                        "table_name = %s",
+                        "table_name LIKE %s",  # _agg_ tables
+                        "table_name LIKE %s",  # _cagg_ views
+                    ]
+                )
+                parameters.extend(TimescaleDBNaming.get_deletion_patterns(pattern))
+            # Discover tables
+            tables_stmt = Statement(
+                f"""
+                SELECT table_name
+                FROM information_schema.tables
+                WHERE table_schema = %s
+                AND table_type = 'BASE TABLE'
+                AND ({' OR '.join(pattern_conditions)})
+                ORDER BY table_name
+                """,
+                tuple([schema_name] + parameters[1:]),
+            )
+            # Build separate pattern conditions for TimescaleDB continuous aggregates
+            view_pattern_conditions = []
+            view_parameters = [schema_name]
+            for pattern in base_patterns:
+                # For continuous aggregates, look for _cagg_ pattern
+                view_pattern_conditions.append("view_name LIKE %s")
+                view_parameters.append(TimescaleDBNaming.get_cagg_pattern(pattern))
+            # Discover TimescaleDB continuous aggregates (use TimescaleDB catalog, not pg_matviews)
+            views_stmt = Statement(
+                f"""
+                SELECT view_name as table_name
+                FROM timescaledb_information.continuous_aggregates
+                WHERE view_schema = %s
+                AND ({' OR '.join(view_pattern_conditions)})
+                ORDER BY view_name
+                """,
+                tuple(view_parameters),
+            )
+            tables_result = self._connection.run(query=tables_stmt)
+            views_result = self._connection.run(query=views_stmt)
+            discovered_tables = (
+                [row[0] for row in tables_result.data]
+                if tables_result and tables_result.data
+                else []
+            )
+            discovered_views = (
+                [row[0] for row in views_result.data]
+                if views_result and views_result.data
+                else []
+            )
+            if not discovered_tables and not discovered_views:
+                logger.debug(
+                    "No project resources found to delete",
+                    project=self.project,
+                    schema=schema_name,
+                )
+                return
+            logger.debug(
+                "Discovered project resources for deletion",
+                project=self.project,
+                tables=len(discovered_tables),
+                views=len(discovered_views),
+                schema=schema_name,
+            )
+            drop_statements = []
+            # Drop materialized views first (they depend on tables)
+            if discovered_views:
+                view_list = ", ".join(
+                    f"{schema_name}.{view_name}" for view_name in discovered_views
+                )
+                drop_statements.append(
+                    f"DROP MATERIALIZED VIEW IF EXISTS {view_list} CASCADE"
+                )
+            # Drop tables second (one by one due to TimescaleDB hypertable limitations)
+            drop_statements.extend(
+                f"DROP TABLE IF EXISTS {schema_name}.{table_name} CASCADE"
+                for table_name in discovered_tables
+            )
+            # Execute all drops
+            if drop_statements:
+                self._connection.run(statements=drop_statements)
+                logger.debug(
+                    "Successfully dropped project resources from TimescaleDB",
+                    project=self.project,
+                )
+            # Optional cleanup: drop schema if empty (errors are logged but don't fail the operation)
+            self._drop_schema_if_empty()
+        except Exception as e:
+            logger.error(
+                "Failed to delete all project resources from TimescaleDB",
+                project=self.project,
+                error=mlrun.errors.err_to_str(e),
+            )
+            raise
+        logger.debug(
+            "Successfully deleted all project resources from TimescaleDB",
+            project=self.project,
+        )
+    def _drop_schema_if_empty(self) -> None:
+        """
+        Drop the schema if it contains no more tables using parameterized query.
+        This is a best-effort cleanup operation that should not fail the main resource deletion.
+        Schema dropping may fail due to permissions, remaining objects, or concurrent operations,
+        but the primary table deletion operation has already succeeded.
+        """
+        try:
+            schema_name = self.tables[mm_schemas.TimescaleDBTables.PREDICTIONS].schema
+            # Check if schema has any tables using parameterized query
+            check_stmt = Statement(
+                """
+                SELECT COUNT(*) AS table_count
+                FROM information_schema.tables
+                WHERE table_schema = %s
+                """,
+                (schema_name,),
+            )
+            result = self._connection.run(query=check_stmt)
+            if result and result.data and result.data[0][0] == 0:
+                # Schema is empty, drop it
+                drop_schema_query = f"DROP SCHEMA IF EXISTS {schema_name} CASCADE"
+                self._connection.run(statements=[drop_schema_query])
+                logger.debug(
+                    "Dropped empty schema",
+                    project=self.project,
+                    schema=schema_name,
+                )
+        except Exception as e:
+            # Schema dropping is optional cleanup - don't fail the main operation
+            # This may happen due to permissions, remaining objects, or concurrent operations
+            logger.warning(
+                "Failed to check/drop empty schema (non-critical cleanup operation)",
+                project=self.project,
+                error=mlrun.errors.err_to_str(e),
+            )
+    def delete_application_records(
+        self, application_name: str, endpoint_ids: Optional[list[str]] = None
+    ) -> None:
+        """
+        Delete application records from TimescaleDB for the given model endpoints or all if endpoint_ids is None.
+        This method deletes records from both app_results and metrics tables that match the specified
+        application name and optionally filter by endpoint IDs.
+        :param application_name: Name of the application whose records should be deleted
+        :param endpoint_ids: Optional list of endpoint IDs to filter deletion. If None, deletes all records
+                            for the application across all endpoints.
+        """
+        logger.debug(
+            "Deleting application records from TimescaleDB",
+            project=self.project,
+            application_name=application_name,
+            endpoint_ids=endpoint_ids,
+        )
+        if not application_name:
+            logger.warning(
+                "No application name provided for deletion", project=self.project
+            )
+            return
+        try:
+            self._delete_application_records(application_name, endpoint_ids)
+        except Exception as e:
+            logger.error(
+                "Failed to delete application records from TimescaleDB",
+                project=self.project,
+                application_name=application_name,
+                endpoint_ids=endpoint_ids,
+                error=mlrun.errors.err_to_str(e),
+            )
+            raise mlrun.errors.MLRunRuntimeError(
+                f"Failed to delete application records for {application_name}: {e}"
+            ) from e
+    def _delete_application_records(self, application_name, endpoint_ids):
+        base_parameters = [application_name]
+        # Add endpoint filter if provided
+        if endpoint_ids:
+            if len(endpoint_ids) == 1:
+                endpoint_filter = f" AND {mm_schemas.WriterEvent.ENDPOINT_ID} = %s"
+                parameters = base_parameters + [endpoint_ids[0]]
+            else:
+                endpoint_filter = f" AND {mm_schemas.WriterEvent.ENDPOINT_ID} = ANY(%s)"
+                parameters = base_parameters + [endpoint_ids]
+        else:
+            endpoint_filter = ""
+            parameters = base_parameters
+        # Delete from app_results table
+        app_results_table = self.tables[mm_schemas.TimescaleDBTables.APP_RESULTS]
+        app_filter = f"{mm_schemas.WriterEvent.APPLICATION_NAME} = %s"
+        app_results_sql = (
+            f"DELETE FROM {app_results_table.full_name()} "
+            f"WHERE {app_filter}{endpoint_filter}"
+        )
+        deletion_statements = [Statement(app_results_sql, tuple(parameters))]
+        # Delete from metrics table
+        metrics_table = self.tables[mm_schemas.TimescaleDBTables.METRICS]
+        metrics_sql = (
+            f"DELETE FROM {metrics_table.full_name()} "
+            f"WHERE {app_filter}{endpoint_filter}"
+        )
+        deletion_statements.append(Statement(metrics_sql, tuple(parameters)))
+        # Also delete from aggregate tables if they exist
+        aggregate_statements = self._get_aggregate_delete_statements_by_application(
+            application_name, endpoint_ids
+        )
+        deletion_statements.extend(aggregate_statements)
+        # Execute all deletions in a single transaction
+        self._connection.run(statements=deletion_statements)
+        logger.debug(
+            "Successfully deleted application records from TimescaleDB",
+            project=self.project,
+            application_name=application_name,
+            endpoint_count=len(endpoint_ids) if endpoint_ids else "all",
+        )
+    def _get_aggregate_delete_statements_by_application(
+        self, application_name: str, endpoint_ids: Optional[list[str]] = None
+    ) -> list[Statement]:
+        """
+        Get parameterized DELETE statements for aggregate tables filtered by application name.
+        This discovers existing aggregate tables and creates deletion statements that filter
+        by both application name and optionally endpoint IDs.
+        :param application_name: Application name to filter by
+        :param endpoint_ids: Optional endpoint IDs to filter by
+        :return: List of Statement objects for aggregate data deletion
+        """
+        statements = []
+        try:
+            schema_name = self.tables[mm_schemas.TimescaleDBTables.PREDICTIONS].schema
+            # Discover all continuous aggregates and materialized views for this project
+            discovery_stmt = Statement(
+                """
+                SELECT table_name
+                FROM (
+                    SELECT matviewname as table_name
+                    FROM pg_matviews
+                    WHERE schemaname = %s
+                    AND matviewname LIKE %s
+                    UNION ALL
+                    SELECT view_name as table_name
+                    FROM timescaledb_information.continuous_aggregates
+                    WHERE view_schema = %s
+                    AND view_name LIKE %s
+                ) AS combined_objects
+                ORDER BY table_name
+                """,
+                (schema_name, f"%{self.project}%", schema_name, f"%{self.project}%"),
+            )
+            result = self._connection.run(query=discovery_stmt)
+            discovered_objects = (
+                [row[0] for row in result.data] if result and result.data else []
+            )
+            if not discovered_objects:
+                logger.debug(
+                    "No aggregate objects found for application deletion",
+                    project=self.project,
+                    application_name=application_name,
+                    schema=schema_name,
+                )
+                return statements
+            logger.debug(
+                "Discovered aggregate objects for application deletion",
+                project=self.project,
+                application_name=application_name,
+                aggregate_objects=len(discovered_objects),
+            )
+            # Build filter conditions
+            app_filter = f"{mm_schemas.WriterEvent.APPLICATION_NAME} = %s"
+            base_parameters = [application_name]
+            # Note: None means "delete all for this application" (no endpoint filter)
+            # Empty list [] would delete nothing, so we treat it same as None
+            if endpoint_ids:  # Non-empty list
+                if len(endpoint_ids) == 1:
+                    endpoint_filter = f" AND {mm_schemas.WriterEvent.ENDPOINT_ID} = %s"
+                    parameters = base_parameters + [endpoint_ids[0]]
+                else:
+                    endpoint_filter = (
+                        f" AND {mm_schemas.WriterEvent.ENDPOINT_ID} = ANY(%s)"
+                    )
+                    parameters = base_parameters + [endpoint_ids]
+            else:
+                endpoint_filter = ""
+                parameters = base_parameters
+            # Create delete statements for all discovered aggregate objects
+            for object_name in discovered_objects:
+                delete_sql = (
+                    f"DELETE FROM {schema_name}.{object_name} "
+                    f"WHERE {app_filter}{endpoint_filter}"
+                )
+                stmt = Statement(delete_sql, tuple(parameters))
+                statements.append(stmt)
+        except Exception as e:
+            logger.warning(
+                "Failed to discover aggregate objects for application deletion",
+                project=self.project,
+                application_name=application_name,
+                error=mlrun.errors.err_to_str(e),
+            )
+            # Continue with empty statements list rather than failing completely
+        return statements

mlrun 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc40py3-none-any.whl → 1.11.0rc16py3-none-any.whl