PyPI - mlrun - Versions diffs - 1.7.0rc38__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl - Mend

mlrun 1.7.0rc38py3-none-any.whl → 1.7.0rc41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (59) hide show

mlrun/alerts/alert.py +30 -27
mlrun/common/constants.py +3 -0
mlrun/common/helpers.py +0 -1
mlrun/common/schemas/alert.py +3 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
mlrun/common/schemas/notification.py +1 -0
mlrun/config.py +1 -1
mlrun/data_types/to_pandas.py +9 -9
mlrun/datastore/alibaba_oss.py +3 -2
mlrun/datastore/azure_blob.py +7 -9
mlrun/datastore/base.py +13 -1
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +84 -29
mlrun/datastore/redis.py +1 -0
mlrun/datastore/s3.py +3 -2
mlrun/datastore/sources.py +54 -0
mlrun/datastore/storeytargets.py +147 -0
mlrun/datastore/targets.py +76 -122
mlrun/datastore/v3io.py +1 -0
mlrun/db/httpdb.py +6 -1
mlrun/errors.py +8 -0
mlrun/execution.py +7 -0
mlrun/feature_store/api.py +5 -0
mlrun/feature_store/retrieval/job.py +1 -0
mlrun/model.py +24 -3
mlrun/model_monitoring/api.py +10 -2
mlrun/model_monitoring/applications/_application_steps.py +52 -34
mlrun/model_monitoring/applications/context.py +206 -70
mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
mlrun/model_monitoring/controller.py +15 -12
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -9
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +46 -10
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +38 -24
mlrun/model_monitoring/helpers.py +54 -18
mlrun/model_monitoring/stream_processing.py +10 -29
mlrun/projects/pipelines.py +19 -30
mlrun/projects/project.py +86 -67
mlrun/run.py +8 -6
mlrun/runtimes/__init__.py +4 -0
mlrun/runtimes/nuclio/api_gateway.py +18 -0
mlrun/runtimes/nuclio/application/application.py +150 -59
mlrun/runtimes/nuclio/function.py +5 -11
mlrun/runtimes/nuclio/serving.py +2 -2
mlrun/runtimes/utils.py +16 -0
mlrun/serving/routers.py +1 -1
mlrun/serving/server.py +19 -5
mlrun/serving/states.py +8 -0
mlrun/serving/v2_serving.py +34 -26
mlrun/utils/helpers.py +33 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/METADATA +9 -12
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/RECORD +59 -58
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -19,11 +19,11 @@ import numpy as np
 import pandas as pd
 import mlrun
+import mlrun.artifacts
 import mlrun.common.model_monitoring.helpers
-import mlrun.common.schemas
-from mlrun.common.schemas.model_monitoring import (
-    EventFieldType,
-)
+import mlrun.common.schemas.model_monitoring.constants as mm_constants
+import mlrun.data_types.infer
+import mlrun.model_monitoring
 from mlrun.common.schemas.model_monitoring.model_endpoints import (
     ModelEndpointMonitoringMetric,
     ModelEndpointMonitoringMetricType,
@@ -35,7 +35,6 @@ from mlrun.utils import logger
 if typing.TYPE_CHECKING:
     from mlrun.db.base import RunDBInterface
     from mlrun.projects import MlrunProject
-import mlrun.common.schemas.model_monitoring.constants as mm_constants
 class _BatchDict(typing.TypedDict):
@@ -45,26 +44,29 @@ class _BatchDict(typing.TypedDict):
 def get_stream_path(
-    project: str, function_name: str = mm_constants.MonitoringFunctionNames.STREAM
+    project: str,
+    function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
+    stream_uri: typing.Optional[str] = None,
 ) -> str:
     """
     Get stream path from the project secret. If wasn't set, take it from the system configurations
     :param project:             Project name.
-    :param function_name:    Application name. Default is model_monitoring_stream.
+    :param function_name:       Application name. Default is model_monitoring_stream.
+    :param stream_uri:          Stream URI. If provided, it will be used instead of the one from the project secret.
     :return:                    Monitoring stream path to the relevant application.
     """
-    stream_uri = mlrun.get_secret_or_env(
-        mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
+    stream_uri = stream_uri or mlrun.get_secret_or_env(
+        mm_constants.ProjectSecretKeys.STREAM_PATH
     )
     if not stream_uri or stream_uri == "v3io":
         # TODO : remove the first part of this condition in 1.9.0
         stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
             project=project,
-            kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
+            kind=mm_constants.FileTargetKind.STREAM,
             target="online",
             function_name=function_name,
         )
@@ -78,7 +80,7 @@ def get_stream_path(
 def get_monitoring_parquet_path(
     project: "MlrunProject",
-    kind: str = mlrun.common.schemas.model_monitoring.FileTargetKind.PARQUET,
+    kind: str = mm_constants.FileTargetKind.PARQUET,
 ) -> str:
     """Get model monitoring parquet target for the current project and kind. The parquet target path is based on the
     project artifact path. If project artifact path is not defined, the parquet target path will be based on MLRun
@@ -111,7 +113,7 @@ def get_connection_string(secret_provider: typing.Callable[[str], str] = None) -
     """
     return mlrun.get_secret_or_env(
-        key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
+        key=mm_constants.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
         secret_provider=secret_provider,
     )
@@ -126,7 +128,7 @@ def get_tsdb_connection_string(
     """
     return mlrun.get_secret_or_env(
-        key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
+        key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
         secret_provider=secret_provider,
     )
@@ -200,7 +202,7 @@ def update_model_endpoint_last_request(
         db.patch_model_endpoint(
             project=project,
             endpoint_id=model_endpoint.metadata.uid,
-            attributes={EventFieldType.LAST_REQUEST: current_request},
+            attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
         )
     else:
         try:
@@ -229,7 +231,7 @@ def update_model_endpoint_last_request(
         db.patch_model_endpoint(
             project=project,
             endpoint_id=model_endpoint.metadata.uid,
-            attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
+            attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
         )
@@ -249,12 +251,11 @@ def calculate_inputs_statistics(
     # Use `DFDataInfer` to calculate the statistics over the inputs:
     inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
-        df=inputs,
-        options=mlrun.data_types.infer.InferOptions.Histogram,
+        df=inputs, options=mlrun.data_types.infer.InferOptions.Histogram
     )
     # Recalculate the histograms over the bins that are set in the sample-set of the end point:
-    for feature in inputs_statistics.keys():
+    for feature in list(inputs_statistics):
         if feature in sample_set_statistics:
             counts, bins = np.histogram(
                 inputs[feature].to_numpy(),
@@ -271,6 +272,9 @@ def calculate_inputs_statistics(
                     inputs_statistics[feature]["hist"]
                 )
             )
+        else:
+            # If the feature is not in the sample set and doesn't have a histogram, remove it from the statistics:
+            inputs_statistics.pop(feature)
     return inputs_statistics
@@ -323,3 +327,35 @@ def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
         name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
         full_name=get_invocations_fqn(project),
     )
+def enrich_model_endpoint_with_model_uri(
+    model_endpoint: ModelEndpoint,
+    model_obj: mlrun.artifacts.ModelArtifact,
+):
+    """
+    Enrich the model endpoint object with the model uri from the model object. We will use a unique reference
+    to the model object that includes the project, db_key, iter, and tree.
+    In addition, we verify that the model object is of type `ModelArtifact`.
+    :param model_endpoint:    An object representing the model endpoint that will be enriched with the model uri.
+    :param model_obj:         An object representing the model artifact.
+    :raise: `MLRunInvalidArgumentError` if the model object is not of type `ModelArtifact`.
+    """
+    mlrun.utils.helpers.verify_field_of_type(
+        field_name="model_endpoint.spec.model_uri",
+        field_value=model_obj,
+        expected_type=mlrun.artifacts.ModelArtifact,
+    )
+    # Update model_uri with a unique reference to handle future changes
+    model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
+        project=model_endpoint.metadata.project,
+        key=model_obj.db_key,
+        iter=model_obj.iter,
+        tree=model_obj.tree,
+    )
+    model_endpoint.spec.model_uri = mlrun.datastore.get_store_uri(
+        kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
+    )

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -37,6 +37,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
     ModelEndpointTarget,
     ProjectSecretKeys,
 )
+from mlrun.model_monitoring.db import StoreBase, TSDBConnector
 from mlrun.utils import logger
@@ -48,14 +49,12 @@ class EventStreamProcessor:
         parquet_batching_max_events: int,
         parquet_batching_timeout_secs: int,
         parquet_target: str,
-        sample_window: int = 10,
         aggregate_windows: typing.Optional[list[str]] = None,
-        aggregate_period: str = "30s",
+        aggregate_period: str = "5m",
         model_monitoring_access_key: str = None,
     ):
         # General configurations, mainly used for the storey steps in the future serving graph
         self.project = project
-        self.sample_window = sample_window
         self.aggregate_windows = aggregate_windows or ["5m", "1h"]
         self.aggregate_period = aggregate_period
@@ -133,7 +132,8 @@ class EventStreamProcessor:
     def apply_monitoring_serving_graph(
         self,
         fn: mlrun.runtimes.ServingRuntime,
-        secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
+        tsdb_connector: TSDBConnector,
+        endpoint_store: StoreBase,
     ) -> None:
         """
         Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -161,8 +161,8 @@ class EventStreamProcessor:
            using CE, the parquet target path is based on the defined MLRun artifact path.
         :param fn: A serving function.
-        :param secret_provider: An optional callable function that provides the connection string from the project
-                                secret.
+        :param tsdb_connector: Time series database connector.
+        :param endpoint_store: KV/SQL store used for endpoint data.
         """
         graph = typing.cast(
@@ -190,10 +190,6 @@ class EventStreamProcessor:
             _fn="(event.get('error') is not None)",
         )
-        tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
-            project=self.project, secret_provider=secret_provider
-        )
         tsdb_connector.handle_model_error(
             graph,
         )
@@ -202,7 +198,7 @@ class EventStreamProcessor:
         def apply_process_endpoint_event():
             graph.add_step(
                 "ProcessEndpointEvent",
-                after="FilterError",
+                after="extract_endpoint",  # TODO: change this to FilterError in ML-7456
                 full_event=True,
                 project=self.project,
             )
@@ -306,24 +302,9 @@ class EventStreamProcessor:
                 table=self.kv_path,
             )
-        store_object = mlrun.model_monitoring.get_store_object(
-            project=self.project, secret_provider=secret_provider
-        )
-        if store_object.type == ModelEndpointTarget.V3IO_NOSQL:
+        if endpoint_store.type == ModelEndpointTarget.V3IO_NOSQL:
             apply_infer_schema()
-        # Emits the event in window size of events based on sample_window size (10 by default)
-        def apply_storey_sample_window():
-            graph.add_step(
-                "storey.steps.SampleWindow",
-                name="sample",
-                after="Rename",
-                window_size=self.sample_window,
-                key=EventFieldType.ENDPOINT_ID,
-            )
-        apply_storey_sample_window()
         tsdb_connector.apply_monitoring_stream_steps(graph=graph)
         # Parquet branch
@@ -353,6 +334,7 @@ class EventStreamProcessor:
                 index_cols=[EventFieldType.ENDPOINT_ID],
                 key_bucketing_number=0,
                 time_partitioning_granularity="hour",
+                time_field=EventFieldType.TIMESTAMP,
                 partition_cols=["$key", "$year", "$month", "$day", "$hour"],
             )
@@ -527,9 +509,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         # If error key has been found in the current event,
         # increase the error counter by 1 and raise the error description
         error = event.get("error")
-        if error:
+        if error:  # TODO: delete this in ML-7456
             self.error_count[endpoint_id] += 1
-            # TODO: write to tsdb / kv once in a while
             raise mlrun.errors.MLRunInvalidArgumentError(str(error))
         # Validate event fields

mlrun/projects/pipelines.py CHANGED Viewed

@@ -447,7 +447,6 @@ class _PipelineRunner(abc.ABC):
         namespace=None,
         source=None,
         notifications: list[mlrun.model.Notification] = None,
-        send_start_notification: bool = True,
     ) -> _PipelineRunStatus:
         pass
@@ -567,7 +566,6 @@ class _KFPRunner(_PipelineRunner):
         namespace=None,
         source=None,
         notifications: list[mlrun.model.Notification] = None,
-        send_start_notification: bool = True,
     ) -> _PipelineRunStatus:
         pipeline_context.set(project, workflow_spec)
         workflow_handler = _PipelineRunner._get_handler(
@@ -585,7 +583,8 @@ class _KFPRunner(_PipelineRunner):
                 "Notifications will only be sent if you wait for pipeline completion. "
                 "To use the new notification behavior, use the remote pipeline runner."
             )
-            for notification in notifications:
+            # for start message, fallback to old notification behavior
+            for notification in notifications or []:
                 project.notifiers.add_notification(
                     notification.kind, notification.params
                 )
@@ -616,13 +615,12 @@ class _KFPRunner(_PipelineRunner):
                     func_name=func.metadata.name,
                     exc_info=err_to_str(exc),
                 )
-        if send_start_notification:
-            project.notifiers.push_pipeline_start_message(
-                project.metadata.name,
-                project.get_param("commit_id", None),
-                run_id,
-                True,
-            )
+        project.notifiers.push_pipeline_start_message(
+            project.metadata.name,
+            project.get_param("commit_id", None),
+            run_id,
+            True,
+        )
         pipeline_context.clear()
         return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
@@ -670,7 +668,6 @@ class _LocalRunner(_PipelineRunner):
         namespace=None,
         source=None,
         notifications: list[mlrun.model.Notification] = None,
-        send_start_notification: bool = True,
     ) -> _PipelineRunStatus:
         pipeline_context.set(project, workflow_spec)
         workflow_handler = _PipelineRunner._get_handler(
@@ -692,10 +689,9 @@ class _LocalRunner(_PipelineRunner):
             project.set_source(source=source)
         pipeline_context.workflow_artifact_path = artifact_path
-        if send_start_notification:
-            project.notifiers.push_pipeline_start_message(
-                project.metadata.name, pipeline_id=workflow_id
-            )
+        project.notifiers.push_pipeline_start_message(
+            project.metadata.name, pipeline_id=workflow_id
+        )
         err = None
         try:
             workflow_handler(**workflow_spec.args)
@@ -755,22 +751,10 @@ class _RemoteRunner(_PipelineRunner):
         namespace: str = None,
         source: str = None,
         notifications: list[mlrun.model.Notification] = None,
-        send_start_notification: bool = True,
     ) -> typing.Optional[_PipelineRunStatus]:
         workflow_name = normalize_workflow_name(name=name, project_name=project.name)
         workflow_id = None
-        # for start message, fallback to old notification behavior
-        if send_start_notification:
-            for notification in notifications or []:
-                project.notifiers.add_notification(
-                    notification.kind, notification.params
-                )
-                # if a notification with `when=running` is provided, it will be used explicitly and others
-                # will be ignored
-                if "running" in notification.when:
-                    break
         # The returned engine for this runner is the engine of the workflow.
         # In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
         inner_engine = get_workflow_engine(workflow_spec.engine)
@@ -870,9 +854,6 @@ class _RemoteRunner(_PipelineRunner):
             state = mlrun_pipelines.common.models.RunStatuses.failed
         else:
             state = mlrun_pipelines.common.models.RunStatuses.running
-            project.notifiers.push_pipeline_start_message(
-                project.metadata.name,
-            )
             pipeline_context.clear()
         return _PipelineRunStatus(
             run_id=workflow_id,
@@ -1078,6 +1059,13 @@ def load_and_run(
     if load_only:
         return
+    # extract "start" notification if exists
+    start_notifications = [
+        notification
+        for notification in context.get_notifications()
+        if "running" in notification.when
+    ]
     workflow_log_message = workflow_name or workflow_path
     context.logger.info(f"Running workflow {workflow_log_message} from remote")
     run = project.run(
@@ -1093,6 +1081,7 @@ def load_and_run(
         cleanup_ttl=cleanup_ttl,
         engine=engine,
         local=local,
+        notifications=start_notifications,
     )
     context.log_result(key="workflow_id", value=run.run_id)
     context.log_result(key="engine", value=run._engine.engine, commit=True)

mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc38py3-none-any.whl → 1.7.0rc41py3-none-any.whl