PyPI - mlrun - Versions diffs - 1.10.0rc14__py3-none-any.whl → 1.10.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc14py3-none-any.whl → 1.10.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (48) hide show

mlrun/artifacts/base.py +0 -31
mlrun/artifacts/llm_prompt.py +6 -0
mlrun/artifacts/manager.py +0 -5
mlrun/common/constants.py +1 -0
mlrun/common/schemas/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/functions.py +1 -1
mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
mlrun/common/schemas/workflow.py +2 -0
mlrun/config.py +1 -1
mlrun/datastore/model_provider/model_provider.py +42 -14
mlrun/datastore/model_provider/openai_provider.py +96 -15
mlrun/db/base.py +20 -0
mlrun/db/httpdb.py +64 -9
mlrun/db/nopdb.py +13 -0
mlrun/launcher/local.py +13 -0
mlrun/model_monitoring/__init__.py +1 -0
mlrun/model_monitoring/applications/base.py +176 -20
mlrun/model_monitoring/db/_schedules.py +84 -24
mlrun/model_monitoring/db/tsdb/base.py +72 -1
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
mlrun/model_monitoring/helpers.py +26 -4
mlrun/projects/pipelines.py +44 -24
mlrun/projects/project.py +26 -7
mlrun/runtimes/daskjob.py +6 -0
mlrun/runtimes/mpijob/abstract.py +6 -0
mlrun/runtimes/mpijob/v1.py +6 -0
mlrun/runtimes/nuclio/application/application.py +2 -0
mlrun/runtimes/nuclio/function.py +6 -0
mlrun/runtimes/nuclio/serving.py +12 -11
mlrun/runtimes/pod.py +21 -0
mlrun/runtimes/remotesparkjob.py +6 -0
mlrun/runtimes/sparkjob/spark3job.py +6 -0
mlrun/runtimes/utils.py +0 -2
mlrun/serving/server.py +122 -53
mlrun/serving/states.py +128 -44
mlrun/serving/system_steps.py +84 -58
mlrun/utils/helpers.py +82 -12
mlrun/utils/retryer.py +15 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/METADATA +2 -7
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/RECORD +48 -48
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -469,6 +469,7 @@ class TDEngineConnector(TSDBConnector):
         preform_agg_columns: Optional[list] = None,
         order_by: Optional[str] = None,
         desc: Optional[bool] = None,
+        partition_by: Optional[str] = None,
     ) -> pd.DataFrame:
         """
         Getting records from TSDB data collection.
@@ -496,6 +497,8 @@ class TDEngineConnector(TSDBConnector):
                                       if an empty list was provided The aggregation won't be performed.
         :param order_by:              The column or alias to preform ordering on the query.
         :param desc:                  Whether or not to sort the results in descending order.
+        :param partition_by:          The column to partition the results by. Note that if interval is provided,
+                                      `agg_funcs` must bg provided as well.
         :return: DataFrame with the provided attributes from the data collection.
         :raise:  MLRunInvalidArgumentError if query the provided table failed.
@@ -517,6 +520,7 @@ class TDEngineConnector(TSDBConnector):
             preform_agg_funcs_columns=preform_agg_columns,
             order_by=order_by,
             desc=desc,
+            partition_by=partition_by,
         )
         logger.debug("Querying TDEngine", query=full_query)
         try:
@@ -1205,6 +1209,39 @@ class TDEngineConnector(TSDBConnector):
             )
         )
+    def get_drift_data(
+        self,
+        start: datetime,
+        end: datetime,
+    ) -> mm_schemas.ModelEndpointDriftValues:
+        filter_query = self._generate_filter_query(
+            filter_column=mm_schemas.ResultData.RESULT_STATUS,
+            filter_values=[
+                mm_schemas.ResultStatusApp.potential_detection.value,
+                mm_schemas.ResultStatusApp.detected.value,
+            ],
+        )
+        table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
+        start, end, interval = self._prepare_aligned_start_end(start, end)
+        # get per time-interval x endpoint_id combination the max result status
+        df = self._get_records(
+            table=table,
+            start=start,
+            end=end,
+            interval=interval,
+            columns=[mm_schemas.ResultData.RESULT_STATUS],
+            filter_query=filter_query,
+            timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
+            agg_funcs=["max"],
+            partition_by=mm_schemas.WriterEvent.ENDPOINT_ID,
+        )
+        if df.empty:
+            return mm_schemas.ModelEndpointDriftValues(values=[])
+        df["_wstart"] = pd.to_datetime(df["_wstart"])
+        return self._df_to_drift_data(df)
     # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
     #
     # def read_prediction_metric_for_endpoint_if_exists(

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -1450,3 +1450,28 @@ class V3IOTSDBConnector(TSDBConnector):
             return metric_objects
         return build_metric_objects()
+    def get_drift_data(
+        self,
+        start: datetime,
+        end: datetime,
+    ) -> mm_schemas.ModelEndpointDriftValues:
+        table = mm_schemas.V3IOTSDBTables.APP_RESULTS
+        start, end, interval = self._prepare_aligned_start_end(start, end)
+        # get per time-interval x endpoint_id combination the max result status
+        df = self._get_records(
+            table=table,
+            start=start,
+            end=end,
+            interval=interval,
+            sliding_window_step=interval,
+            columns=[mm_schemas.ResultData.RESULT_STATUS],
+            agg_funcs=["max"],
+            group_by=mm_schemas.WriterEvent.ENDPOINT_ID,
+        )
+        if df.empty:
+            return mm_schemas.ModelEndpointDriftValues(values=[])
+        df = df[df[f"max({mm_schemas.ResultData.RESULT_STATUS})"] >= 1]
+        df = df.reset_index(names="_wstart")
+        return self._df_to_drift_data(df)

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -549,6 +549,10 @@ def _get_monitoring_schedules_folder_path(project: str) -> str:
     )
+def _get_monitoring_schedules_user_folder_path(out_path: str) -> str:
+    return os.path.join(out_path, mm_constants.FileTargetKind.MONITORING_SCHEDULES)
 def _get_monitoring_schedules_file_endpoint_path(
     *, project: str, endpoint_id: str
 ) -> str:
@@ -570,10 +574,7 @@ def get_monitoring_schedules_endpoint_data(
     )
-def get_monitoring_schedules_chief_data(
-    *,
-    project: str,
-) -> "DataItem":
+def get_monitoring_schedules_chief_data(*, project: str) -> "DataItem":
     """
     Get the model monitoring schedules' data item of the project's model endpoint.
     """
@@ -582,6 +583,19 @@ def get_monitoring_schedules_chief_data(
     )
+def get_monitoring_schedules_user_application_data(
+    *, out_path: str, application: str
+) -> "DataItem":
+    """
+    Get the model monitoring schedules' data item of user application runs.
+    """
+    return mlrun.datastore.store_manager.object(
+        _get_monitoring_schedules_file_user_application_path(
+            out_path=out_path, application=application
+        )
+    )
 def _get_monitoring_schedules_file_chief_path(
     *,
     project: str,
@@ -591,6 +605,14 @@ def _get_monitoring_schedules_file_chief_path(
     )
+def _get_monitoring_schedules_file_user_application_path(
+    *, out_path: str, application: str
+) -> str:
+    return os.path.join(
+        _get_monitoring_schedules_user_folder_path(out_path), f"{application}.json"
+    )
 def get_start_end(
     start: Union[datetime.datetime, None],
     end: Union[datetime.datetime, None],

mlrun/projects/pipelines.py CHANGED Viewed

@@ -1072,7 +1072,11 @@ def github_webhook(request):
 def rerun_workflow(
-    context: mlrun.execution.MLClientCtx, run_uid: str, project_name: str
+    context: mlrun.execution.MLClientCtx,
+    run_uid: str,
+    project_name: str,
+    original_runner_uid: str,
+    original_workflow_name: str,
 ):
     """
     Re-run a workflow by retrying a previously failed KFP pipeline.
@@ -1080,8 +1084,11 @@ def rerun_workflow(
     :param context:      MLRun context.
     :param run_uid:      The run UID of the original workflow to retry.
     :param project_name: The project name.
+    :param original_runner_uid: The original workflow runner UID.
+    :param original_workflow_name: The original workflow name.
     """
     db = mlrun.get_run_db()
+    new_pipeline_id = None
     try:
         # Invoke the KFP retry endpoint (direct-submit mode)
@@ -1096,6 +1103,24 @@ def rerun_workflow(
             rerun_of_workflow=run_uid,
         )
+        # Enqueue "running" notifications server-side for this RerunRunner run
+        db.push_run_notifications(context.uid, project_name)
+        context.set_label(
+            mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id
+        )
+        context.update_run()
+        context.log_result("workflow_id", new_pipeline_id)
+        pipeline = wait_for_pipeline_completion(
+            new_pipeline_id,
+            project=project_name,
+        )
+        final_state = pipeline["run"]["status"]
+        context.log_result("workflow_state", final_state, commit=True)
     except mlrun.errors.MLRunHTTPError as http_exc:
         logger.error(
             "Failed calling KFP retry API",
@@ -1104,33 +1129,28 @@ def rerun_workflow(
         )
         raise
-    # Enqueue "running" notifications server-side for this RerunRunner run
-    db.push_run_notifications(context.uid, project_name)
-    context.set_label(mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id)
-    context.update_run()
-    context.log_result("workflow_id", new_pipeline_id)
-    try:
-        pipeline = wait_for_pipeline_completion(
-            new_pipeline_id,
-            project=project_name,
-        )
     except Exception as exc:
-        mlrun.utils.logger.error(
-            "Failed waiting for workflow completion",
+        logger.error(
+            "Error during rerun_workflow execution",
+            error=err_to_str(exc),
             rerun_pipeline_id=new_pipeline_id,
-            exc=err_to_str(exc),
         )
-    else:
-        final_state = pipeline["run"]["status"]
-        context.log_result("workflow_state", final_state, commit=True)
+        raise
-        if final_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
-            raise mlrun.errors.MLRunRuntimeError(
-                f"Pipeline retry of {run_uid} finished in state={final_state}"
-            )
+    finally:
+        # Once the rerun has finished, clear the “retrying” label on the original runner
+        # so that subsequent retry requests can acquire the lock again.
+        db.set_run_retrying_status(
+            project=project_name,
+            name=original_workflow_name,
+            run_id=original_runner_uid,
+            retrying=False,
+        )
+    if final_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
+        raise mlrun.errors.MLRunRuntimeError(
+            f"Pipeline retry of {run_uid} finished in state={final_state}"
+        )
 def load_and_run(context, *args, **kwargs):

mlrun/projects/project.py CHANGED Viewed

@@ -1042,12 +1042,7 @@ class ProjectSpec(ModelObj):
                 artifact = artifact.to_dict()
             else:  # artifact is a dict
                 # imported/legacy artifacts don't have metadata,spec,status fields
-                key_field = (
-                    "key"
-                    if _is_imported_artifact(artifact)
-                    or mlrun.utils.is_legacy_artifact(artifact)
-                    else "metadata.key"
-                )
+                key_field = "key" if _is_imported_artifact(artifact) else "metadata.key"
                 key = mlrun.utils.get_in(artifact, key_field, "")
                 if not key:
                     raise ValueError(f'artifacts "{key_field}" must be specified')
@@ -5078,7 +5073,6 @@ class MlrunProject(ModelObj):
         :param states: List only runs whose state is one of the provided states.
         :param sort: Whether to sort the result according to their start time. Otherwise, results will be
             returned by their internal order in the DB (order will not be guaranteed).
-        :param last: Deprecated - currently not used (will be removed in 1.10.0).
         :param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
         :param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
         :param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
@@ -5557,6 +5551,31 @@ class MlrunProject(ModelObj):
             **kwargs,
         )
+    def get_drift_over_time(
+        self,
+        start: Optional[datetime.datetime] = None,
+        end: Optional[datetime.datetime] = None,
+    ) -> mlrun.common.schemas.model_monitoring.ModelEndpointDriftValues:
+        """
+        Get drift counts over time for the project.
+        This method returns a list of tuples, each representing a time-interval (in a granularity set by the
+        duration of the given time range) and the number of suspected drifts and detected drifts in that interval.
+        For a range of 6 hours or less, the granularity is 10 minute, for a range of 2 hours to 72 hours, the
+        granularity is 1 hour, and for a range of more than 72 hours, the granularity is 24 hours.
+        :param start: Start time of the range to retrieve drift counts from.
+        :param end: End time of the range to retrieve drift counts from.
+        :return: A ModelEndpointDriftValues object containing the drift counts over time.
+        """
+        db = mlrun.db.get_run_db(secrets=self._secrets)
+        return db.get_drift_over_time(
+            project=self.metadata.name,
+            start=start,
+            end=end,
+        )
     def _run_authenticated_git_action(
         self,
         action: Callable,

mlrun/runtimes/daskjob.py CHANGED Viewed

@@ -93,6 +93,9 @@ class DaskSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -123,6 +126,9 @@ class DaskSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.args = args

mlrun/runtimes/mpijob/abstract.py CHANGED Viewed

@@ -55,6 +55,9 @@ class MPIResourceSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -85,6 +88,9 @@ class MPIResourceSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.mpi_args = mpi_args or [
             "-x",

mlrun/runtimes/mpijob/v1.py CHANGED Viewed

@@ -50,6 +50,9 @@ class MPIV1ResourceSpec(MPIResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -81,6 +84,9 @@ class MPIV1ResourceSpec(MPIResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.clean_pod_policy = clean_pod_policy or MPIJobV1CleanPodPolicies.default()

mlrun/runtimes/nuclio/application/application.py CHANGED Viewed

@@ -400,8 +400,10 @@ class ApplicationRuntime(RemoteRuntime):
         # nuclio implementation detail - when providing the image and emptying out the source code and build source,
         # nuclio skips rebuilding the image and simply takes the prebuilt image
         self.spec.build.functionSourceCode = ""
+        self.spec.config.pop("spec.build.functionSourceCode", None)
         self.status.application_source = self.spec.build.source
         self.spec.build.source = ""
+        self.spec.config.pop("spec.build.source", None)
         # save the image in the status, so we won't repopulate the function source code
         self.status.container_image = image

mlrun/runtimes/nuclio/function.py CHANGED Viewed

@@ -155,6 +155,9 @@ class NuclioSpec(KubeResourceSpec):
         state_thresholds=None,
         disable_default_http_trigger=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -185,6 +188,9 @@ class NuclioSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.base_spec = base_spec or {}

mlrun/runtimes/nuclio/serving.py CHANGED Viewed

@@ -720,6 +720,7 @@ class ServingRuntime(RemoteRuntime):
             "track_models": self.spec.track_models,
             "default_content_type": self.spec.default_content_type,
             "model_endpoint_creation_task_name": self.spec.model_endpoint_creation_task_name,
+            # TODO: find another way to pass this (needed for local run)
             "filename": getattr(self.spec, "filename", None),
         }
@@ -788,17 +789,13 @@ class ServingRuntime(RemoteRuntime):
             monitoring_mock=self.spec.track_models,
         )
-        if (
-            isinstance(self.spec.graph, RootFlowStep)
-            and self.spec.graph.include_monitored_step()
-        ):
-            server.graph = add_system_steps_to_graph(
-                server.project,
-                server.graph,
-                self.spec.track_models,
-                server.context,
-                self.spec,
-            )
+        server.graph = add_system_steps_to_graph(
+            server.project,
+            server.graph,
+            self.spec.track_models,
+            server.context,
+            self.spec,
+        )
         if workdir:
             os.chdir(old_workdir)
@@ -858,6 +855,7 @@ class ServingRuntime(RemoteRuntime):
             description=self.spec.description,
             workdir=self.spec.workdir,
             image_pull_secret=self.spec.image_pull_secret,
+            build=self.spec.build,
             node_name=self.spec.node_name,
             node_selector=self.spec.node_selector,
             affinity=self.spec.affinity,
@@ -868,6 +866,9 @@ class ServingRuntime(RemoteRuntime):
             security_context=self.spec.security_context,
             state_thresholds=self.spec.state_thresholds,
             serving_spec=self._get_serving_spec(),
+            track_models=self.spec.track_models,
+            parameters=self.spec.parameters,
+            graph=self.spec.graph,
         )
         job = KubejobRuntime(
             spec=spec,

mlrun/runtimes/pod.py CHANGED Viewed

@@ -104,6 +104,9 @@ class KubeResourceSpec(FunctionSpec):
         "security_context",
         "state_thresholds",
         "serving_spec",
+        "track_models",
+        "parameters",
+        "graph",
     ]
     _default_fields_to_strip = FunctionSpec._default_fields_to_strip + [
         "volumes",
@@ -180,6 +183,9 @@ class KubeResourceSpec(FunctionSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        track_models=None,
+        parameters=None,
+        graph=None,
     ):
         super().__init__(
             command=command,
@@ -226,6 +232,10 @@ class KubeResourceSpec(FunctionSpec):
             or mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
         )
         self.serving_spec = serving_spec
+        self.track_models = track_models
+        self.parameters = parameters
+        self._graph = None
+        self.graph = graph
         # Termination grace period is internal for runtimes that have a pod termination hook hence it is not in the
         # _dict_fields and doesn't have a setter.
         self._termination_grace_period_seconds = None
@@ -303,6 +313,17 @@ class KubeResourceSpec(FunctionSpec):
     def termination_grace_period_seconds(self) -> typing.Optional[int]:
         return self._termination_grace_period_seconds
+    @property
+    def graph(self):
+        """states graph, holding the serving workflow/DAG topology"""
+        return self._graph
+    @graph.setter
+    def graph(self, graph):
+        from ..serving.states import graph_root_setter
+        graph_root_setter(self, graph)
     def _serialize_field(
         self, struct: dict, field_name: typing.Optional[str] = None, strip: bool = False
     ) -> typing.Any:

mlrun/runtimes/remotesparkjob.py CHANGED Viewed

@@ -59,6 +59,9 @@ class RemoteSparkSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -89,6 +92,9 @@ class RemoteSparkSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.provider = provider

mlrun/runtimes/sparkjob/spark3job.py CHANGED Viewed

@@ -169,6 +169,9 @@ class Spark3JobSpec(KubeResourceSpec):
         security_context=None,
         state_thresholds=None,
         serving_spec=None,
+        graph=None,
+        parameters=None,
+        track_models=None,
     ):
         super().__init__(
             command=command,
@@ -199,6 +202,9 @@ class Spark3JobSpec(KubeResourceSpec):
             security_context=security_context,
             state_thresholds=state_thresholds,
             serving_spec=serving_spec,
+            graph=graph,
+            parameters=parameters,
+            track_models=track_models,
         )
         self.driver_resources = driver_resources or {}

mlrun/runtimes/utils.py CHANGED Viewed

@@ -445,8 +445,6 @@ def enrich_run_labels(
     labels_enrichment = {
         mlrun_constants.MLRunInternalLabels.owner: os.environ.get("V3IO_USERNAME")
         or getpass.getuser(),
-        # TODO: remove this in 1.10.0
-        mlrun_constants.MLRunInternalLabels.v3io_user: os.environ.get("V3IO_USERNAME"),
     }
     # Resolve which label keys to enrich

mlrun 1.10.0rc14__py3-none-any.whl → 1.10.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc14py3-none-any.whl → 1.10.0rc16py3-none-any.whl