PyPI - mlrun - Versions diffs - 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show

mlrun/__init__.py +24 -3
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +21 -15
mlrun/artifacts/model.py +3 -3
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +14 -0
mlrun/common/model_monitoring/helpers.py +123 -0
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +14 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +12 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +34 -0
mlrun/common/schemas/hub.py +33 -20
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +12 -15
mlrun/common/schemas/model_monitoring/functions.py +13 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +69 -19
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +12 -5
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +7 -2
mlrun/datastore/datastore_profile.py +84 -22
mlrun/datastore/model_provider/huggingface_provider.py +225 -41
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +206 -74
mlrun/datastore/model_provider/openai_provider.py +226 -66
mlrun/datastore/s3.py +39 -18
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +17 -12
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +25 -6
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +63 -32
mlrun/db/httpdb.py +373 -153
mlrun/db/nopdb.py +54 -21
mlrun/errors.py +4 -2
mlrun/execution.py +66 -25
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +52 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +172 -0
mlrun/hub/step.py +113 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +15 -7
mlrun/launcher/local.py +4 -1
mlrun/model.py +14 -4
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +65 -28
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +299 -128
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/controller.py +132 -58
mlrun/model_monitoring/db/_schedules.py +38 -29
mlrun/model_monitoring/db/_stats.py +6 -16
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +29 -9
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +30 -6
mlrun/model_monitoring/stream_processing.py +34 -28
mlrun/model_monitoring/writer.py +224 -4
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -43
mlrun/platforms/iguazio.py +8 -4
mlrun/projects/operations.py +17 -11
mlrun/projects/pipelines.py +2 -2
mlrun/projects/project.py +187 -123
mlrun/run.py +95 -21
mlrun/runtimes/__init__.py +2 -186
mlrun/runtimes/base.py +103 -25
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +5 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/nuclio/__init__.py +12 -7
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +339 -40
mlrun/runtimes/nuclio/function.py +222 -72
mlrun/runtimes/nuclio/serving.py +132 -42
mlrun/runtimes/pod.py +213 -21
mlrun/runtimes/utils.py +49 -9
mlrun/secrets.py +99 -14
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +84 -11
mlrun/serving/routers.py +26 -44
mlrun/serving/server.py +138 -51
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +997 -283
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +149 -95
mlrun/serving/v2_serving.py +9 -10
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +292 -94
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +3 -5
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +3 -3
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
mlrun/api/schemas/__init__.py +0 -259
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py ADDED Viewed

@@ -0,0 +1,502 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import datetime
+from dataclasses import dataclass
+from io import StringIO
+from typing import Optional
+import mlrun.common.schemas.model_monitoring as mm_schemas
+import mlrun.errors
+from mlrun.model_monitoring.db.tsdb.preaggregate import PreAggregateConfig
+from mlrun.model_monitoring.db.tsdb.timescaledb.utils.timescaledb_query_builder import (
+    TimescaleDBNaming,
+)
+_MODEL_MONITORING_SCHEMA = "mlrun_model_monitoring"
+# TimescaleDB-specific constants
+TIME_BUCKET_COLUMN = "time_bucket"
+# Database schema constants
+MODEL_ERROR_MAX_LENGTH = 1000
+CUSTOM_METRICS_MAX_LENGTH = 1000
+RESULT_EXTRA_DATA_MAX_LENGTH = 1000
+def create_table_schemas(project: str) -> dict:
+    """Create all TimescaleDB table schemas for a project.
+    This consolidated function eliminates duplication across connector, operations, and test fixtures.
+    Args:
+        project: The project name for table creation
+    Returns:
+        Dictionary mapping TimescaleDBTables enum values to table schema objects
+    """
+    import mlrun
+    schema = f"{_MODEL_MONITORING_SCHEMA}_{mlrun.mlconf.system_id}"
+    return {
+        mm_schemas.TimescaleDBTables.APP_RESULTS: AppResultTable(
+            project=project, schema=schema
+        ),
+        mm_schemas.TimescaleDBTables.METRICS: Metrics(project=project, schema=schema),
+        mm_schemas.TimescaleDBTables.PREDICTIONS: Predictions(
+            project=project, schema=schema
+        ),
+        mm_schemas.TimescaleDBTables.ERRORS: Errors(project=project, schema=schema),
+    }
+class _TimescaleDBColumnType:
+    """Represents a TimescaleDB column type with optional constraints."""
+    def __init__(
+        self, data_type: str, length: Optional[int] = None, nullable: bool = True
+    ):
+        self.data_type = data_type
+        self.length = length
+        self.nullable = nullable
+    def __str__(self):
+        if self.length is not None:
+            return f"{self.data_type}({self.length})"
+        else:
+            return self.data_type
+@dataclass
+class TimescaleDBSchema:
+    """
+    A class to represent a hypertable schema in TimescaleDB. Using this schema, you can generate the relevant queries to
+    create, insert, delete and query data from TimescaleDB. At the moment, there are 4 schemas: AppResultTable,
+    Metrics, Predictions, and Errors.
+    """
+    def __init__(
+        self,
+        table_name: str,
+        columns: dict[str, _TimescaleDBColumnType],
+        time_column: str,
+        project: str,
+        schema: Optional[str] = None,
+        chunk_time_interval: str = "1 day",
+        indexes: Optional[list[str]] = None,
+    ):
+        self.table_name = f"{table_name}_{project.replace('-', '_')}"
+        self.columns = columns
+        self.time_column = time_column
+        self.schema = schema or _MODEL_MONITORING_SCHEMA
+        self.chunk_time_interval = chunk_time_interval
+        self.indexes = indexes or []
+        self.project = project
+    def full_name(self) -> str:
+        """Return the fully qualified table name (schema.table_name)."""
+        return f"{self.schema}.{self.table_name}"
+    def _create_table_query(self) -> str:
+        """Create the base table SQL."""
+        columns_def = ", ".join(
+            f"{col} {col_type}" + ("" if col_type.nullable else " NOT NULL")
+            for col, col_type in self.columns.items()
+        )
+        return f"CREATE TABLE IF NOT EXISTS {self.full_name()} ({columns_def});"
+    def _create_hypertable_query(self) -> str:
+        """Convert table to hypertable."""
+        return (
+            f"SELECT create_hypertable('{self.full_name()}', '{self.time_column}', "
+            f"chunk_time_interval => INTERVAL '{self.chunk_time_interval}', if_not_exists => TRUE);"
+        )
+    def _create_indexes_query(self) -> list[str]:
+        """Create indexes for the table."""
+        queries = []
+        for index_columns in self.indexes:
+            index_name = f"idx_{self.table_name}_{index_columns.replace(',', '_').replace(' ', '_')}"
+            queries.append(
+                f"CREATE INDEX IF NOT EXISTS {index_name} "
+                f"ON {self.full_name()} ({index_columns});"
+            )
+        return queries
+    def _create_pre_aggregate_tables_query(
+        self, config: PreAggregateConfig
+    ) -> list[str]:
+        """Create pre-aggregate tables for each interval."""
+        queries = []
+        for interval in config.aggregate_intervals:
+            agg_table_name = TimescaleDBNaming.get_agg_table_name(
+                self.table_name, interval
+            )
+            # Create aggregate table structure
+            agg_columns = [f"{TIME_BUCKET_COLUMN} TIMESTAMPTZ NOT NULL"]
+            # Add aggregated columns for numeric fields
+            for col, col_type in self.columns.items():
+                if col == self.time_column:
+                    continue
+                if col_type.data_type in ["DOUBLE PRECISION", "INTEGER", "BIGINT"]:
+                    agg_columns.extend(
+                        f"{func}_{col} {col_type}" for func in config.agg_functions
+                    )
+                else:
+                    # For non-numeric columns, keep the original type for grouping
+                    agg_columns.append(f"{col} {col_type}")
+            create_agg_table = f"CREATE TABLE IF NOT EXISTS {self.schema}.{agg_table_name} ({', '.join(agg_columns)});"
+            # Create hypertable for aggregate table
+            create_agg_hypertable = (
+                f"SELECT create_hypertable('{self.schema}.{agg_table_name}', "
+                f"'{TIME_BUCKET_COLUMN}', chunk_time_interval => INTERVAL "
+                f"'{self._get_chunk_interval_for_agg(interval)}', if_not_exists => TRUE);"
+            )
+            queries.extend([create_agg_table, create_agg_hypertable])
+        return queries
+    def _get_chunk_interval_for_agg(self, interval: str) -> str:
+        """Get appropriate chunk interval for aggregate tables."""
+        interval_to_chunk = {
+            "10m": "1 hour",
+            "1h": "1 day",
+            "6h": "1 day",
+            "1d": "7 days",
+            "1w": "1 month",
+            "1M": "3 months",
+        }
+        return interval_to_chunk.get(interval, "1 day")
+    def _create_continuous_aggregates_query(
+        self, config: PreAggregateConfig
+    ) -> list[str]:
+        """Create TimescaleDB continuous aggregates for pre-computation."""
+        queries = []
+        for interval in config.aggregate_intervals:
+            cagg_name = TimescaleDBNaming.get_cagg_view_name(self.table_name, interval)
+            # Build SELECT clause for continuous aggregate
+            select_parts = [
+                f"time_bucket(INTERVAL '{interval}', {self.time_column}) AS {TIME_BUCKET_COLUMN}"
+            ]
+            # Add aggregations for numeric columns
+            for col, col_type in self.columns.items():
+                if col == self.time_column:
+                    continue
+                if col_type.data_type in ["DOUBLE PRECISION", "INTEGER", "BIGINT"]:
+                    for func in config.agg_functions:
+                        if func == "count":
+                            select_parts.append(f"COUNT({col}) AS {func}_{col}")
+                        else:
+                            select_parts.append(
+                                f"{func.upper()}({col}) AS {func}_{col}"
+                            )
+                elif col in [
+                    mm_schemas.WriterEvent.ENDPOINT_ID,
+                    mm_schemas.WriterEvent.APPLICATION_NAME,
+                    mm_schemas.MetricData.METRIC_NAME,
+                    mm_schemas.ResultData.RESULT_NAME,
+                ]:
+                    select_parts.append(col)
+            # Group by clause
+            group_by_cols = [TIME_BUCKET_COLUMN]
+            for col in self.columns:
+                if col == self.time_column:
+                    continue
+                if col in [
+                    mm_schemas.WriterEvent.ENDPOINT_ID,
+                    mm_schemas.WriterEvent.APPLICATION_NAME,
+                    mm_schemas.MetricData.METRIC_NAME,
+                    mm_schemas.ResultData.RESULT_NAME,
+                ]:
+                    group_by_cols.append(col)
+            create_cagg = (
+                f"CREATE MATERIALIZED VIEW IF NOT EXISTS {self.schema}.{cagg_name} "
+                f"WITH (timescaledb.continuous) "
+                f"AS SELECT {', '.join(select_parts)} FROM {self.full_name()} "
+                f"GROUP BY {', '.join(group_by_cols)} WITH NO DATA;"
+            )
+            queries.append(create_cagg)
+        return queries
+    def _create_retention_policies_query(self, config: PreAggregateConfig) -> list[str]:
+        """Create retention policies for tables."""
+        queries = []
+        # Retention for main table
+        if "raw" in config.retention_policy:
+            queries.append(
+                f"SELECT add_retention_policy('{self.full_name()}', INTERVAL "
+                f"'{config.retention_policy['raw']}', if_not_exists => TRUE);"
+            )
+        # Retention for continuous aggregates
+        for interval in config.aggregate_intervals:
+            if interval in config.retention_policy:
+                cagg_name = TimescaleDBNaming.get_cagg_view_name(
+                    self.table_name, interval
+                )
+                queries.append(
+                    f"SELECT add_retention_policy('{self.schema}.{cagg_name}', INTERVAL "
+                    f"'{config.retention_policy[interval]}', if_not_exists => TRUE);"
+                )
+        return queries
+    def drop_table_query(self) -> str:
+        """Drop the main table."""
+        return f"DROP TABLE IF EXISTS {self.full_name()} CASCADE;"
+    def _get_records_query(
+        self,
+        start: datetime.datetime,
+        end: datetime.datetime,
+        columns_to_filter: Optional[list[str]] = None,
+        filter_query: Optional[str] = None,
+        interval: Optional[str] = None,
+        limit: Optional[int] = None,
+        agg_funcs: Optional[list] = None,
+        order_by: Optional[str] = None,
+        desc: Optional[bool] = None,
+        use_pre_aggregates: bool = True,
+        group_by: Optional[list[str]] = None,
+        timestamp_column: Optional[str] = None,
+    ) -> str:
+        """Build query to get records from the table or its pre-aggregates."""
+        # Determine table to query
+        table_name = self.table_name
+        time_col = timestamp_column or self.time_column
+        if interval and agg_funcs and use_pre_aggregates:
+            if timestamp_column and timestamp_column != self.time_column:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"Cannot use custom timestamp_column='{timestamp_column}' with pre-aggregates. "
+                    "Pre-aggregates are built on the table's default time column."
+                )
+            # Use continuous aggregate if available
+            table_name = TimescaleDBNaming.get_cagg_view_name(self.table_name, interval)
+            time_col = TIME_BUCKET_COLUMN
+        with StringIO() as query:
+            query.write("SELECT ")
+            if columns_to_filter:
+                if interval and agg_funcs and use_pre_aggregates:
+                    modified_columns = []
+                    for col in columns_to_filter:
+                        if col == time_col:
+                            modified_columns.append(TIME_BUCKET_COLUMN)
+                        else:
+                            # Use column name as-is - caller should provide correct pre-agg column names
+                            modified_columns.append(col)
+                    query.write(", ".join(modified_columns))
+                else:
+                    query.write(", ".join(columns_to_filter))
+            else:
+                query.write("*")
+            query.write(f" FROM {self.schema}.{table_name}")
+            # WHERE clause
+            conditions = []
+            if filter_query:
+                conditions.append(filter_query)
+            if start:
+                conditions.append(f"{time_col} >= '{start}'")
+            if end:
+                conditions.append(f"{time_col} <= '{end}'")
+            if conditions:
+                query.write(" WHERE " + " AND ".join(conditions))
+            # GROUP BY clause (must come before ORDER BY)
+            if group_by:
+                query.write(f" GROUP BY {', '.join(group_by)}")
+            # ORDER BY clause (must come after GROUP BY)
+            if order_by:
+                direction = " DESC" if desc else " ASC"
+                query.write(f" ORDER BY {order_by}{direction}")
+            if limit:
+                query.write(f" LIMIT {limit}")
+            query.write(";")
+            return query.getvalue()
+@dataclass
+class AppResultTable(TimescaleDBSchema):
+    """Schema for application results table."""
+    def __init__(self, project: str, schema: Optional[str] = None):
+        table_name = mm_schemas.TimescaleDBTables.APP_RESULTS
+        columns = {
+            mm_schemas.WriterEvent.END_INFER_TIME: _TimescaleDBColumnType(
+                "TIMESTAMPTZ"
+            ),
+            mm_schemas.WriterEvent.START_INFER_TIME: _TimescaleDBColumnType(
+                "TIMESTAMPTZ"
+            ),
+            mm_schemas.ResultData.RESULT_VALUE: _TimescaleDBColumnType(
+                "DOUBLE PRECISION"
+            ),
+            mm_schemas.ResultData.RESULT_STATUS: _TimescaleDBColumnType("INTEGER"),
+            mm_schemas.ResultData.RESULT_EXTRA_DATA: _TimescaleDBColumnType(
+                "VARCHAR", RESULT_EXTRA_DATA_MAX_LENGTH
+            ),
+            mm_schemas.WriterEvent.ENDPOINT_ID: _TimescaleDBColumnType("VARCHAR", 64),
+            mm_schemas.WriterEvent.APPLICATION_NAME: _TimescaleDBColumnType(
+                "VARCHAR", 64
+            ),
+            mm_schemas.ResultData.RESULT_NAME: _TimescaleDBColumnType("VARCHAR", 64),
+            mm_schemas.ResultData.RESULT_KIND: _TimescaleDBColumnType("INTEGER"),
+        }
+        indexes = [
+            mm_schemas.WriterEvent.ENDPOINT_ID,
+            f"{mm_schemas.WriterEvent.APPLICATION_NAME}, {mm_schemas.ResultData.RESULT_NAME}",
+            mm_schemas.WriterEvent.END_INFER_TIME,
+        ]
+        super().__init__(
+            table_name=table_name,
+            columns=columns,
+            time_column=mm_schemas.WriterEvent.END_INFER_TIME,
+            schema=schema,
+            project=project,
+            indexes=indexes,
+        )
+@dataclass
+class Metrics(TimescaleDBSchema):
+    """Schema for metrics table."""
+    def __init__(self, project: str, schema: Optional[str] = None):
+        table_name = mm_schemas.TimescaleDBTables.METRICS
+        columns = {
+            mm_schemas.WriterEvent.END_INFER_TIME: _TimescaleDBColumnType(
+                "TIMESTAMPTZ"
+            ),
+            mm_schemas.WriterEvent.START_INFER_TIME: _TimescaleDBColumnType(
+                "TIMESTAMPTZ"
+            ),
+            mm_schemas.MetricData.METRIC_VALUE: _TimescaleDBColumnType(
+                "DOUBLE PRECISION"
+            ),
+            mm_schemas.WriterEvent.ENDPOINT_ID: _TimescaleDBColumnType("VARCHAR", 64),
+            mm_schemas.WriterEvent.APPLICATION_NAME: _TimescaleDBColumnType(
+                "VARCHAR", 64
+            ),
+            mm_schemas.MetricData.METRIC_NAME: _TimescaleDBColumnType("VARCHAR", 64),
+        }
+        indexes = [
+            mm_schemas.WriterEvent.ENDPOINT_ID,
+            f"{mm_schemas.WriterEvent.APPLICATION_NAME}, {mm_schemas.MetricData.METRIC_NAME}",
+            mm_schemas.WriterEvent.END_INFER_TIME,
+            f"{mm_schemas.WriterEvent.END_INFER_TIME}, {mm_schemas.WriterEvent.ENDPOINT_ID},\
+                        {mm_schemas.WriterEvent.APPLICATION_NAME}",
+            f"{mm_schemas.WriterEvent.APPLICATION_NAME}, {mm_schemas.WriterEvent.END_INFER_TIME}",
+        ]
+        super().__init__(
+            table_name=table_name,
+            columns=columns,
+            time_column=mm_schemas.WriterEvent.END_INFER_TIME,
+            schema=schema,
+            project=project,
+            indexes=indexes,
+        )
+@dataclass
+class Predictions(TimescaleDBSchema):
+    """Schema for predictions table."""
+    def __init__(self, project: str, schema: Optional[str] = None):
+        table_name = mm_schemas.TimescaleDBTables.PREDICTIONS
+        columns = {
+            mm_schemas.WriterEvent.END_INFER_TIME: _TimescaleDBColumnType(
+                "TIMESTAMPTZ"
+            ),
+            mm_schemas.EventFieldType.LATENCY: _TimescaleDBColumnType(
+                "DOUBLE PRECISION"
+            ),
+            mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TimescaleDBColumnType(
+                "VARCHAR", CUSTOM_METRICS_MAX_LENGTH
+            ),
+            mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT: _TimescaleDBColumnType(
+                "DOUBLE PRECISION"
+            ),
+            mm_schemas.EventFieldType.EFFECTIVE_SAMPLE_COUNT: _TimescaleDBColumnType(
+                "INTEGER"
+            ),
+            mm_schemas.WriterEvent.ENDPOINT_ID: _TimescaleDBColumnType("VARCHAR", 64),
+        }
+        indexes = [
+            mm_schemas.WriterEvent.ENDPOINT_ID,
+            mm_schemas.WriterEvent.END_INFER_TIME,
+            f"{mm_schemas.WriterEvent.END_INFER_TIME}, {mm_schemas.WriterEvent.ENDPOINT_ID}",
+        ]
+        super().__init__(
+            table_name=table_name,
+            columns=columns,
+            time_column=mm_schemas.WriterEvent.END_INFER_TIME,
+            schema=schema,
+            project=project,
+            indexes=indexes,
+        )
+@dataclass
+class Errors(TimescaleDBSchema):
+    """Schema for errors table."""
+    def __init__(self, project: str, schema: Optional[str] = None):
+        table_name = mm_schemas.TimescaleDBTables.ERRORS
+        columns = {
+            mm_schemas.EventFieldType.TIME: _TimescaleDBColumnType("TIMESTAMPTZ"),
+            mm_schemas.EventFieldType.MODEL_ERROR: _TimescaleDBColumnType(
+                "VARCHAR", MODEL_ERROR_MAX_LENGTH
+            ),
+            mm_schemas.WriterEvent.ENDPOINT_ID: _TimescaleDBColumnType("VARCHAR", 64),
+            mm_schemas.EventFieldType.ERROR_TYPE: _TimescaleDBColumnType("VARCHAR", 64),
+        }
+        indexes = [
+            mm_schemas.WriterEvent.ENDPOINT_ID,
+            mm_schemas.EventFieldType.ERROR_TYPE,
+            mm_schemas.EventFieldType.TIME,
+        ]
+        super().__init__(
+            table_name=table_name,
+            columns=columns,
+            time_column=mm_schemas.EventFieldType.TIME,
+            schema=schema,
+            project=project,
+            indexes=indexes,
+        )

mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py ADDED Viewed

@@ -0,0 +1,163 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import mlrun
+import mlrun.common.schemas.model_monitoring as mm_schemas
+import mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_schema as timescaledb_schema
+from mlrun.datastore.datastore_profile import DatastoreProfilePostgreSQL
+from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_connection import (
+    TimescaleDBConnection,
+)
+class TimescaleDBStreamProcessor:
+    """
+    Handles stream processing operations for TimescaleDB TSDB connector.
+    This class implements stream graph setup methods:
+    - Monitoring stream steps configuration
+    - Error handling setup
+    - Real-time data ingestion pipeline
+    Each instance creates its own TimescaleDBConnection that shares the global connection pool.
+    """
+    def __init__(
+        self,
+        project: str,
+        profile: DatastoreProfilePostgreSQL,
+        connection: TimescaleDBConnection,
+    ):
+        """
+        Initialize stream handler with a shared connection.
+        :param project: The project name
+        :param profile: Datastore profile for connection (used for table initialization)
+        :param connection: Shared TimescaleDBConnection instance
+        """
+        self.project = project
+        self.profile = profile
+        # Use the injected shared connection
+        self._connection = connection
+        # Initialize table schemas for stream operations
+        self._init_tables()
+    def _init_tables(self) -> None:
+        """Initialize TimescaleDB table schemas for stream operations."""
+        schema_name = (
+            f"{timescaledb_schema._MODEL_MONITORING_SCHEMA}_{mlrun.mlconf.system_id}"
+        )
+        self.tables = {
+            mm_schemas.TimescaleDBTables.PREDICTIONS: timescaledb_schema.Predictions(
+                project=self.project, schema=schema_name
+            ),
+            mm_schemas.TimescaleDBTables.ERRORS: timescaledb_schema.Errors(
+                project=self.project, schema=schema_name
+            ),
+        }
+    def apply_monitoring_stream_steps(self, graph, **kwargs) -> None:
+        """
+        Apply TimescaleDB steps on the monitoring graph for real-time data ingestion.
+        Sets up the stream processing pipeline to write prediction latency and
+        custom metrics to TimescaleDB hypertables using the TimescaleDBTarget.
+        :param graph: The stream processing graph to modify
+        :param kwargs: Additional configuration parameters
+        """
+        def apply_process_before_timescaledb():
+            """Add preprocessing step for TimescaleDB data format."""
+            graph.add_step(
+                "mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_stream_graph_steps.ProcessBeforeTimescaleDB",
+                name="ProcessBeforeTimescaleDB",
+                after="FilterNOP",
+            )
+        def apply_timescaledb_target(name: str, after: str):
+            """Add TimescaleDB target for writing predictions data."""
+            predictions_table = self.tables[mm_schemas.TimescaleDBTables.PREDICTIONS]
+            graph.add_step(
+                "mlrun.datastore.storeytargets.TimescaleDBStoreyTarget",
+                name=name,
+                after=after,
+                url=f"ds://{self.profile.name}",
+                time_col=mm_schemas.WriterEvent.END_INFER_TIME,
+                table=predictions_table.full_name(),
+                columns=[
+                    mm_schemas.EventFieldType.LATENCY,
+                    mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
+                    mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT,
+                    mm_schemas.EventFieldType.EFFECTIVE_SAMPLE_COUNT,
+                    mm_schemas.WriterEvent.ENDPOINT_ID,
+                ],
+                max_events=kwargs.get("tsdb_batching_max_events", 1000),
+                flush_after_seconds=kwargs.get("tsdb_batching_timeout_secs", 30),
+            )
+        # Apply the processing steps
+        apply_process_before_timescaledb()
+        apply_timescaledb_target(
+            name="TimescaleDBTarget",
+            after="ProcessBeforeTimescaleDB",
+        )
+    def handle_model_error(
+        self,
+        graph,
+        tsdb_batching_max_events: int = 1000,
+        tsdb_batching_timeout_secs: int = 30,
+        **kwargs,
+    ) -> None:
+        """
+        Add error handling branch to the stream processing graph.
+        Processes model errors and writes them to the TimescaleDB errors table
+        for monitoring and alerting purposes.
+        :param graph: The stream processing graph to modify
+        :param tsdb_batching_max_events: Maximum events per batch
+        :param tsdb_batching_timeout_secs: Batch timeout in seconds
+        :param kwargs: Additional configuration parameters
+        """
+        errors_table = self.tables[mm_schemas.TimescaleDBTables.ERRORS]
+        # Add error extraction step
+        graph.add_step(
+            "mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_stream_graph_steps.TimescaleDBErrorExtractor",
+            name="error_extractor",
+            after="ForwardError",
+        )
+        # Add TimescaleDB target for error data
+        graph.add_step(
+            "mlrun.datastore.storeytargets.TimescaleDBStoreyTarget",
+            name="timescaledb_error",
+            after="error_extractor",
+            url=f"ds://{self.profile.name}",
+            time_col=mm_schemas.EventFieldType.TIME,
+            table=errors_table.full_name(),
+            columns=[
+                mm_schemas.EventFieldType.MODEL_ERROR,
+                mm_schemas.WriterEvent.ENDPOINT_ID,
+                mm_schemas.EventFieldType.ERROR_TYPE,
+            ],
+            max_events=tsdb_batching_max_events,
+            flush_after_seconds=tsdb_batching_timeout_secs,
+        )

mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc18py3-none-any.whl → 1.11.0rc16py3-none-any.whl