PyPI - mlrun - Versions diffs - 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc40py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (150) hide show

mlrun/__init__.py +3 -2
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +7 -0
mlrun/common/model_monitoring/helpers.py +41 -4
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +13 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +10 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +24 -0
mlrun/common/schemas/hub.py +3 -2
mlrun/common/schemas/model_monitoring/__init__.py +1 -1
mlrun/common/schemas/model_monitoring/constants.py +2 -2
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +53 -15
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +2 -3
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +1 -1
mlrun/datastore/datastore_profile.py +49 -17
mlrun/datastore/model_provider/huggingface_provider.py +6 -2
mlrun/datastore/model_provider/model_provider.py +2 -2
mlrun/datastore/model_provider/openai_provider.py +2 -2
mlrun/datastore/s3.py +15 -16
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +16 -10
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +16 -3
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +36 -12
mlrun/db/httpdb.py +316 -101
mlrun/db/nopdb.py +29 -11
mlrun/errors.py +4 -2
mlrun/execution.py +11 -12
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +37 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +67 -76
mlrun/hub/step.py +113 -0
mlrun/launcher/base.py +2 -1
mlrun/launcher/local.py +2 -1
mlrun/model.py +12 -2
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +2 -2
mlrun/model_monitoring/applications/base.py +20 -6
mlrun/model_monitoring/applications/context.py +1 -0
mlrun/model_monitoring/controller.py +7 -17
mlrun/model_monitoring/db/_schedules.py +2 -16
mlrun/model_monitoring/db/_stats.py +2 -13
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +2 -4
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +4 -6
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +147 -79
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +2 -1
mlrun/model_monitoring/stream_processing.py +18 -16
mlrun/model_monitoring/writer.py +4 -3
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -44
mlrun/platforms/iguazio.py +1 -1
mlrun/projects/operations.py +11 -10
mlrun/projects/project.py +81 -82
mlrun/run.py +4 -7
mlrun/runtimes/__init__.py +2 -204
mlrun/runtimes/base.py +89 -21
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +4 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/mounts.py +5 -0
mlrun/runtimes/nuclio/__init__.py +12 -8
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +200 -32
mlrun/runtimes/nuclio/function.py +154 -49
mlrun/runtimes/nuclio/serving.py +55 -42
mlrun/runtimes/pod.py +59 -10
mlrun/secrets.py +46 -2
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +5 -5
mlrun/serving/routers.py +3 -3
mlrun/serving/server.py +46 -43
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +554 -207
mlrun/serving/steps.py +1 -1
mlrun/serving/system_steps.py +42 -33
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +89 -16
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/git.py +1 -1
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification_pusher.py +2 -2
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +39 -49
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +144 -130
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -343
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1368
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +0 -51
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/__init__.py CHANGED Viewed

@@ -28,7 +28,7 @@ class ObjectTSDBFactory(enum.Enum):
     """Enum class to handle the different TSDB connector type values for storing real time metrics"""
     v3io_tsdb = "v3io-tsdb"
-    tdengine = "tdengine"
+    timescaledb = "postgresql"
     def to_tsdb_connector(
         self, project: str, profile: DatastoreProfile, **kwargs
@@ -50,12 +50,12 @@ class ObjectTSDBFactory(enum.Enum):
             return V3IOTSDBConnector(project=project, **kwargs)
-        # Assuming TDEngine connector if connector type is not V3IO TSDB.
-        # Update these lines once there are more than two connector types.
+        if self == self.timescaledb:
+            from .timescaledb.timescaledb_connector import TimescaleDBConnector
-        from .tdengine.tdengine_connector import TDEngineConnector
+            return TimescaleDBConnector(project=project, profile=profile, **kwargs)
-        return TDEngineConnector(project=project, profile=profile, **kwargs)
+        raise mlrun.errors.MLRunInvalidMMStoreTypeError("Code should not reach here")
     @classmethod
     def _missing_(cls, value: typing.Any):
@@ -92,9 +92,11 @@ def get_tsdb_connector(
     if isinstance(profile, mlrun.datastore.datastore_profile.DatastoreProfileV3io):
         tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.V3IO_TSDB
     elif isinstance(
-        profile, mlrun.datastore.datastore_profile.DatastoreProfileTDEngine
+        profile, mlrun.datastore.datastore_profile.DatastoreProfilePostgreSQL
     ):
-        tsdb_connector_type = mlrun.common.schemas.model_monitoring.TSDBTarget.TDEngine
+        tsdb_connector_type = (
+            mlrun.common.schemas.model_monitoring.TSDBTarget.TimescaleDB
+        )
     else:
         extra_message = (
             ""

mlrun/model_monitoring/db/tsdb/base.py CHANGED Viewed

@@ -14,7 +14,7 @@
 from abc import ABC, abstractmethod
 from datetime import datetime, timedelta
-from typing import Callable, ClassVar, Literal, Optional, Union
+from typing import ClassVar, Literal, Optional, Union
 import pandas as pd
 import pydantic.v1
@@ -444,11 +444,9 @@ class TSDBConnector(ABC):
                                    ]
         """
-    async def add_basic_metrics(
+    def add_basic_metrics(
         self,
         model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
-        project: str,
-        run_in_threadpool: Callable,
         metric_list: Optional[list[str]] = None,
     ) -> list[mlrun.common.schemas.ModelEndpoint]:
         raise NotImplementedError()

mlrun/model_monitoring/db/tsdb/preaggregate.py ADDED Viewed

@@ -0,0 +1,234 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import Optional
+import mlrun.errors
+import mlrun.utils
+# Compiled regex pattern for parsing time intervals (e.g., "1h", "10m", "1d", "1w", "1M")
+_INTERVAL_PATTERN = re.compile(r"(\d+)([mhdwM])")
+@dataclass
+class PreAggregateConfig:
+    """Configuration for pre-aggregated tables and retention policies."""
+    aggregate_intervals: list[str] = None
+    agg_functions: list[str] = None
+    retention_policy: dict[str, str] = None
+    def __post_init__(self):
+        if self.aggregate_intervals is None:
+            self.aggregate_intervals = ["10m", "1h", "6h", "1d", "1w", "1M"]
+        if self.agg_functions is None:
+            self.agg_functions = ["sum", "avg", "min", "max", "count", "last"]
+        if self.retention_policy is None:
+            self.retention_policy = {
+                "raw": "7d",
+                "10m": "30d",
+                "1h": "1y",
+                "6h": "1y",
+                "1d": "5y",
+                "1w": "5y",
+                "1M": "5y",
+            }
+class PreAggregateManager:
+    """Handles pre-aggregate validation, time alignment, and optimization decisions."""
+    def __init__(self, pre_aggregate_config: Optional[PreAggregateConfig] = None):
+        """
+        Initialize the pre-aggregate handler.
+        :param pre_aggregate_config: Configuration for pre-aggregated tables and operations.
+                                   If None, all pre-aggregate operations will be disabled.
+        """
+        self._pre_aggregate_config = pre_aggregate_config
+    def validate_interval_and_function(
+        self, interval: Optional[str], agg_function: Optional[str]
+    ) -> None:
+        """Validate that interval and aggregation function are available in pre-aggregate config."""
+        if not interval and not agg_function:
+            return
+        if not self._pre_aggregate_config:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Pre-aggregate configuration not available. Cannot use interval or agg_function parameters."
+            )
+        if interval and interval not in self._pre_aggregate_config.aggregate_intervals:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"Interval '{interval}' not available in pre-aggregate configuration. "
+                f"Available intervals: {self._pre_aggregate_config.aggregate_intervals}"
+            )
+        if (
+            agg_function
+            and agg_function not in self._pre_aggregate_config.agg_functions
+        ):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"Aggregation function '{agg_function}' not available in pre-aggregate configuration. "
+                f"Available functions: {self._pre_aggregate_config.agg_functions}"
+            )
+    def can_use_pre_aggregates(
+        self, interval: Optional[str] = None, agg_funcs: Optional[list[str]] = None
+    ) -> bool:
+        """Check if pre-aggregates can be used for the given parameters."""
+        if not self._pre_aggregate_config or not interval:
+            return False
+        if interval not in self._pre_aggregate_config.aggregate_intervals:
+            return False
+        if agg_funcs:
+            return all(
+                func in self._pre_aggregate_config.agg_functions for func in agg_funcs
+            )
+        return True
+    def align_time_to_interval(
+        self, dt: datetime, interval: str, align_start: bool = True
+    ) -> datetime:
+        """Align datetime to interval boundaries."""
+        if not interval:
+            return dt
+        # Parse interval (e.g., "1h", "10m", "1d")
+        match = _INTERVAL_PATTERN.match(interval)
+        if not match:
+            return dt
+        amount, unit = int(match.group(1)), match.group(2)
+        # Get the start boundary for this interval
+        aligned_start = self._get_interval_start_boundary(dt, amount, unit)
+        if align_start:
+            return aligned_start
+        # For end alignment, add the interval duration to the start
+        return self._add_interval_to_datetime(aligned_start, amount, unit)
+    def _get_interval_start_boundary(
+        self, dt: datetime, amount: int, unit: str
+    ) -> datetime:
+        """Get the start boundary for the given interval."""
+        if unit == "m":  # minutes
+            return dt.replace(second=0, microsecond=0) - timedelta(
+                minutes=dt.minute % amount
+            )
+        elif unit == "h":  # hours
+            return dt.replace(minute=0, second=0, microsecond=0) - timedelta(
+                hours=dt.hour % amount
+            )
+        elif unit == "d":  # days
+            return dt.replace(hour=0, minute=0, second=0, microsecond=0)
+        elif unit == "w":  # weeks
+            # Align to Monday (start of week)
+            days_since_monday = dt.weekday()
+            return (dt - timedelta(days=days_since_monday)).replace(
+                hour=0, minute=0, second=0, microsecond=0
+            )
+        elif unit == "M":  # months (approximate)
+            return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
+        return dt
+    def _add_interval_to_datetime(
+        self, dt: datetime, amount: int, unit: str
+    ) -> datetime:
+        """Add the specified interval amount to a datetime."""
+        if unit == "m":  # minutes
+            return dt + timedelta(minutes=amount)
+        elif unit == "h":  # hours
+            return dt + timedelta(hours=amount)
+        elif unit == "d":  # days
+            return dt + timedelta(days=amount)
+        elif unit == "w":  # weeks
+            return dt + timedelta(weeks=amount)
+        elif unit == "M":  # months (approximate)
+            if dt.month == 12:
+                return dt.replace(year=dt.year + 1, month=1)
+            return dt.replace(month=dt.month + 1)
+        return dt
+    def align_time_range(
+        self, start: datetime, end: datetime, interval: Optional[str]
+    ) -> tuple[datetime, datetime]:
+        """Align both start and end times to interval boundaries."""
+        if not interval:
+            return start, end
+        aligned_start = self.align_time_to_interval(start, interval, align_start=True)
+        aligned_end = self.align_time_to_interval(end, interval, align_start=False)
+        return aligned_start, aligned_end
+    @staticmethod
+    def get_start_end(
+        start: Optional[datetime],
+        end: Optional[datetime],
+    ) -> tuple[datetime, datetime]:
+        """
+        Utility function for TSDB start/end format validation.
+        :param start: Either None or datetime, None is handled as datetime.min(tz=timezone.utc)
+        :param end: Either None or datetime, None is handled as datetime.now(tz=timezone.utc)
+        :return: start datetime, end datetime
+        """
+        start = start or mlrun.utils.datetime_min()
+        end = end or mlrun.utils.datetime_now()
+        if not (isinstance(start, datetime) and isinstance(end, datetime)):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Both start and end must be datetime objects"
+            )
+        return start, end
+    @property
+    def config(self) -> Optional[PreAggregateConfig]:
+        """Get the current pre-aggregate configuration."""
+        return self._pre_aggregate_config
+    def is_pre_aggregates_enabled(self) -> bool:
+        """Check if pre-aggregates are enabled (config is provided)."""
+        return self._pre_aggregate_config is not None
+    def get_available_intervals(self) -> list[str]:
+        """Get list of available intervals for pre-aggregation."""
+        if not self._pre_aggregate_config:
+            return []
+        return self._pre_aggregate_config.aggregate_intervals.copy()
+    def get_available_functions(self) -> list[str]:
+        """Get list of available aggregation functions."""
+        if not self._pre_aggregate_config:
+            return []
+        return self._pre_aggregate_config.agg_functions.copy()
+    def get_retention_policy(self) -> dict[str, str]:
+        """Get the retention policy configuration."""
+        if not self._pre_aggregate_config:
+            return {}
+        return self._pre_aggregate_config.retention_policy.copy()

mlrun/model_monitoring/db/tsdb/stream_graph_steps.py ADDED Viewed

@@ -0,0 +1,63 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from datetime import datetime
+import mlrun.feature_store.steps
+from mlrun.common.schemas.model_monitoring import EventFieldType
+# Import the authoritative database schema constant
+from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_schema import (
+    MODEL_ERROR_MAX_LENGTH,
+)
+from mlrun.utils import logger
+# Error truncation log message
+ERROR_TRUNCATION_MESSAGE = "Error message truncated for storage"
+class BaseErrorExtractor(mlrun.feature_store.steps.MapClass):
+    """
+    Shared error extraction implementation for TimescaleDB.
+    Prepares events for insertion into the errors TSDB table.
+    V3io has different requirements and uses its own implementation.
+    """
+    def do(self, event):
+        error = str(event.get("error"))
+        original_error_length = len(error)
+        if len(error) > MODEL_ERROR_MAX_LENGTH:
+            error = error[-MODEL_ERROR_MAX_LENGTH:]
+            logger.warning(
+                ERROR_TRUNCATION_MESSAGE,
+                endpoint_id=event.get(EventFieldType.ENDPOINT_ID),
+                function_uri=event.get(EventFieldType.FUNCTION_URI),
+                original_error_length=original_error_length,
+                max_length=MODEL_ERROR_MAX_LENGTH,
+                truncated_error=error,
+            )
+        timestamp = datetime.fromisoformat(event.get("when"))
+        endpoint_id = event[EventFieldType.ENDPOINT_ID]
+        result_event = {
+            EventFieldType.MODEL_ERROR: error,
+            EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
+            EventFieldType.ENDPOINT_ID: endpoint_id,
+            EventFieldType.TIME: timestamp,
+            EventFieldType.PROJECT: event[EventFieldType.FUNCTION_URI].split("/")[0],
+            EventFieldType.TABLE_COLUMN: "_err_"
+            + event.get(EventFieldType.ENDPOINT_ID),
+        }
+        logger.info("Write error to errors TSDB table", event=result_event)
+        return result_event

mlrun 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc40py3-none-any.whl → 1.11.0rc16py3-none-any.whl