PyPI - mlrun - Versions diffs - 1.5.0rc1__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl - Mend

mlrun 1.5.0rc1py3-none-any.whl → 1.5.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (119) hide show

mlrun/__init__.py +2 -35
mlrun/__main__.py +1 -40
mlrun/api/api/api.py +6 -0
mlrun/api/api/endpoints/feature_store.py +0 -4
mlrun/api/api/endpoints/files.py +14 -2
mlrun/api/api/endpoints/functions.py +6 -1
mlrun/api/api/endpoints/logs.py +17 -3
mlrun/api/api/endpoints/pipelines.py +1 -5
mlrun/api/api/endpoints/projects.py +88 -0
mlrun/api/api/endpoints/runs.py +48 -6
mlrun/api/api/endpoints/workflows.py +355 -0
mlrun/api/api/utils.py +1 -1
mlrun/api/crud/__init__.py +1 -0
mlrun/api/crud/client_spec.py +3 -0
mlrun/api/crud/model_monitoring/deployment.py +36 -7
mlrun/api/crud/model_monitoring/grafana.py +1 -1
mlrun/api/crud/model_monitoring/helpers.py +32 -2
mlrun/api/crud/model_monitoring/model_endpoints.py +27 -5
mlrun/api/crud/notifications.py +9 -4
mlrun/api/crud/pipelines.py +4 -9
mlrun/api/crud/runtime_resources.py +4 -3
mlrun/api/crud/secrets.py +21 -0
mlrun/api/crud/workflows.py +352 -0
mlrun/api/db/base.py +16 -1
mlrun/api/db/sqldb/db.py +97 -16
mlrun/api/launcher.py +26 -7
mlrun/api/main.py +3 -4
mlrun/{mlutils → api/rundb}/__init__.py +2 -6
mlrun/{db → api/rundb}/sqldb.py +35 -83
mlrun/api/runtime_handlers/__init__.py +56 -0
mlrun/api/runtime_handlers/base.py +1247 -0
mlrun/api/runtime_handlers/daskjob.py +209 -0
mlrun/api/runtime_handlers/kubejob.py +37 -0
mlrun/api/runtime_handlers/mpijob.py +147 -0
mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
mlrun/api/runtime_handlers/sparkjob.py +148 -0
mlrun/api/utils/builder.py +1 -4
mlrun/api/utils/clients/chief.py +14 -0
mlrun/api/utils/scheduler.py +98 -15
mlrun/api/utils/singletons/db.py +4 -0
mlrun/artifacts/manager.py +1 -2
mlrun/common/schemas/__init__.py +6 -0
mlrun/common/schemas/auth.py +4 -1
mlrun/common/schemas/client_spec.py +1 -1
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +11 -0
mlrun/common/schemas/project.py +1 -0
mlrun/common/schemas/runs.py +1 -8
mlrun/common/schemas/schedule.py +1 -8
mlrun/common/schemas/workflow.py +54 -0
mlrun/config.py +42 -40
mlrun/datastore/sources.py +1 -1
mlrun/db/__init__.py +4 -68
mlrun/db/base.py +12 -0
mlrun/db/factory.py +65 -0
mlrun/db/httpdb.py +175 -19
mlrun/db/nopdb.py +4 -2
mlrun/execution.py +4 -2
mlrun/feature_store/__init__.py +1 -0
mlrun/feature_store/api.py +1 -2
mlrun/feature_store/feature_set.py +0 -10
mlrun/feature_store/feature_vector.py +340 -2
mlrun/feature_store/ingestion.py +5 -10
mlrun/feature_store/retrieval/base.py +118 -104
mlrun/feature_store/retrieval/dask_merger.py +17 -10
mlrun/feature_store/retrieval/job.py +4 -1
mlrun/feature_store/retrieval/local_merger.py +18 -18
mlrun/feature_store/retrieval/spark_merger.py +21 -14
mlrun/feature_store/retrieval/storey_merger.py +21 -15
mlrun/kfpops.py +3 -9
mlrun/launcher/base.py +3 -3
mlrun/launcher/client.py +3 -2
mlrun/launcher/factory.py +16 -13
mlrun/lists.py +0 -11
mlrun/model.py +9 -15
mlrun/model_monitoring/helpers.py +15 -25
mlrun/model_monitoring/model_monitoring_batch.py +72 -4
mlrun/model_monitoring/prometheus.py +219 -0
mlrun/model_monitoring/stores/__init__.py +15 -9
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +3 -1
mlrun/model_monitoring/stream_processing.py +181 -29
mlrun/package/packager.py +6 -8
mlrun/package/packagers/default_packager.py +121 -10
mlrun/platforms/__init__.py +0 -2
mlrun/platforms/iguazio.py +0 -56
mlrun/projects/pipelines.py +57 -158
mlrun/projects/project.py +6 -32
mlrun/render.py +1 -1
mlrun/run.py +2 -124
mlrun/runtimes/__init__.py +6 -42
mlrun/runtimes/base.py +26 -1241
mlrun/runtimes/daskjob.py +2 -198
mlrun/runtimes/function.py +16 -5
mlrun/runtimes/kubejob.py +5 -29
mlrun/runtimes/mpijob/__init__.py +2 -2
mlrun/runtimes/mpijob/abstract.py +10 -1
mlrun/runtimes/mpijob/v1.py +0 -76
mlrun/runtimes/mpijob/v1alpha1.py +1 -74
mlrun/runtimes/nuclio.py +3 -2
mlrun/runtimes/pod.py +0 -10
mlrun/runtimes/remotesparkjob.py +1 -15
mlrun/runtimes/serving.py +1 -1
mlrun/runtimes/sparkjob/__init__.py +0 -1
mlrun/runtimes/sparkjob/abstract.py +4 -131
mlrun/serving/states.py +1 -1
mlrun/utils/db.py +0 -2
mlrun/utils/helpers.py +19 -13
mlrun/utils/notifications/notification_pusher.py +5 -25
mlrun/utils/regex.py +7 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +24 -23
{mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +116 -107
{mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
mlrun/mlutils/data.py +0 -160
mlrun/mlutils/models.py +0 -78
mlrun/mlutils/plots.py +0 -902
{mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
{mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0

mlrun/api/api/endpoints/workflows.py ADDED Viewed

@@ -0,0 +1,355 @@
+# Copyright 2018 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import collections.abc
+import copy
+import traceback
+import typing
+from http import HTTPStatus
+from typing import Dict
+import fastapi
+from fastapi.concurrency import run_in_threadpool
+from sqlalchemy.orm import Session
+import mlrun
+import mlrun.api.api.deps
+import mlrun.api.api.utils
+import mlrun.api.crud
+import mlrun.api.utils.auth.verifier
+import mlrun.api.utils.clients.chief
+import mlrun.api.utils.singletons.db
+import mlrun.api.utils.singletons.project_member
+import mlrun.common.schemas
+import mlrun.projects.pipelines
+from mlrun.api.api.utils import log_and_raise
+from mlrun.utils.helpers import logger
+router = fastapi.APIRouter()
+@router.post(
+    "/projects/{project}/workflows/{name}/submit",
+    status_code=HTTPStatus.ACCEPTED.value,
+    response_model=mlrun.common.schemas.WorkflowResponse,
+)
+async def submit_workflow(
+    project: str,
+    name: str,
+    request: fastapi.Request,
+    workflow_request: mlrun.common.schemas.WorkflowRequest = mlrun.common.schemas.WorkflowRequest(),
+    auth_info: mlrun.common.schemas.AuthInfo = fastapi.Depends(
+        mlrun.api.api.deps.authenticate_request
+    ),
+    db_session: Session = fastapi.Depends(mlrun.api.api.deps.get_db_session),
+):
+    """
+    Submitting a workflow of existing project.
+    To support workflow scheduling, we use here an auxiliary function called 'load_and_run'.
+    This function runs remotely (in a distinct pod), loads a project and then runs the workflow.
+    In this way we can run the workflow remotely with the workflow's engine or
+    schedule this function which in every time loads the project and runs the workflow.
+    Notice:
+    in case of simply running a workflow, the returned run_id value is the id of the run of the auxiliary function.
+    For getting the id and status of the workflow, use the `get_workflow_id` endpoint with the returned run id.
+    :param project:             name of the project
+    :param name:                name of the workflow
+    :param request:             fastapi request for supporting rerouting to chief if needed
+    :param workflow_request:    the request includes: workflow spec, arguments for the workflow, artifact path
+                                as the artifact target path of the workflow, source url of the project for overriding
+                                the existing one, run name to override the default: 'workflow-runner-<workflow name>'
+                                and kubernetes namespace if other than default
+    :param auth_info:           auth info of the request
+    :param db_session:          session that manages the current dialog with the database
+    :returns: response that contains the project name, workflow name, name of the workflow,
+             status, run id (in case of a single run) and schedule (in case of scheduling)
+    """
+    project = await run_in_threadpool(
+        mlrun.api.utils.singletons.project_member.get_project_member().get_project,
+        db_session=db_session,
+        name=project,
+        leader_session=auth_info.session,
+    )
+    # check permission CREATE run
+    await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
+        resource_type=mlrun.common.schemas.AuthorizationResourceTypes.run,
+        project_name=project.metadata.name,
+        resource_name=workflow_request.run_name or "",
+        action=mlrun.common.schemas.AuthorizationAction.create,
+        auth_info=auth_info,
+    )
+    # check permission READ workflow on project's workflow
+    await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
+        resource_type=mlrun.common.schemas.AuthorizationResourceTypes.workflow,
+        project_name=project.metadata.name,
+        resource_name=name,
+        action=mlrun.common.schemas.AuthorizationAction.read,
+        auth_info=auth_info,
+    )
+    # Check permission CREATE workflow on new workflow's name
+    await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
+        resource_type=mlrun.common.schemas.AuthorizationResourceTypes.workflow,
+        project_name=project.metadata.name,
+        # If workflow spec has not passed need to create on same name:
+        resource_name=getattr(workflow_request.spec, "name", name),
+        action=mlrun.common.schemas.AuthorizationAction.create,
+        auth_info=auth_info,
+    )
+    # Re-route to chief in case of schedule
+    if (
+        _is_requested_schedule(name, workflow_request.spec, project)
+        and mlrun.mlconf.httpdb.clusterization.role
+        != mlrun.common.schemas.ClusterizationRole.chief
+    ):
+        chief_client = mlrun.api.utils.clients.chief.Client()
+        return await chief_client.submit_workflow(
+            project=project.metadata.name,
+            name=name,
+            request=request,
+            json=workflow_request.dict(),
+        )
+    workflow_spec = _fill_workflow_missing_fields_from_project(
+        project=project,
+        workflow_name=name,
+        spec=workflow_request.spec,
+        arguments=workflow_request.arguments,
+    )
+    updated_request = workflow_request.copy()
+    updated_request.spec = workflow_spec
+    # This function is for loading the project and running workflow remotely.
+    # In this way we can schedule workflows (by scheduling a job that runs the workflow)
+    workflow_runner = await run_in_threadpool(
+        mlrun.api.crud.WorkflowRunners().create_runner,
+        run_name=updated_request.run_name
+        or mlrun.mlconf.workflows.default_workflow_runner_name.format(
+            workflow_spec.name
+        ),
+        project=project.metadata.name,
+        db_session=db_session,
+        auth_info=auth_info,
+        image=workflow_spec.image
+        or project.spec.default_image
+        or mlrun.mlconf.default_base_image,
+    )
+    logger.debug(
+        "Saved function for running workflow",
+        project_name=workflow_runner.metadata.project,
+        function_name=workflow_runner.metadata.name,
+        workflow_name=workflow_spec.name,
+        arguments=workflow_spec.args,
+        source=updated_request.source or project.spec.source,
+        kind=workflow_runner.kind,
+        image=workflow_runner.spec.image,
+    )
+    run_uid = None
+    status = None
+    workflow_action = "schedule" if workflow_spec.schedule else "run"
+    try:
+        if workflow_spec.schedule:
+            await run_in_threadpool(
+                mlrun.api.crud.WorkflowRunners().schedule,
+                runner=workflow_runner,
+                project=project,
+                workflow_request=updated_request,
+                db_session=db_session,
+                auth_info=auth_info,
+            )
+            status = "scheduled"
+        else:
+            run = await run_in_threadpool(
+                mlrun.api.crud.WorkflowRunners().run,
+                runner=workflow_runner,
+                project=project,
+                workflow_request=updated_request,
+            )
+            status = mlrun.run.RunStatuses.running
+            run_uid = run.uid()
+    except Exception as error:
+        logger.error(traceback.format_exc())
+        log_and_raise(
+            reason="Workflow failed",
+            workflow_name=workflow_spec.name,
+            workflow_action=workflow_action,
+            error=mlrun.errors.err_to_str(error),
+        )
+    return mlrun.common.schemas.WorkflowResponse(
+        project=project.metadata.name,
+        name=workflow_spec.name,
+        status=status,
+        run_id=run_uid,
+        schedule=workflow_spec.schedule,
+    )
+def _is_requested_schedule(
+    name: str,
+    workflow_spec: mlrun.common.schemas.WorkflowSpec,
+    project: mlrun.common.schemas.Project,
+) -> bool:
+    """
+    Checks if the workflow needs to be scheduled, which can be decided either the request itself
+    contains schedule information or the workflow which was predefined in the project contains schedule.
+    :param name:            workflow name
+    :param workflow_spec:   workflow spec input
+    :param project:         MLRun project that contains the workflow
+    :return: True if the workflow need to be scheduled and False if not.
+    """
+    if workflow_spec:
+        return workflow_spec.schedule is not None
+    project_workflow = _get_workflow_by_name(project, name)
+    return bool(project_workflow.get("schedule"))
+def _get_workflow_by_name(
+    project: mlrun.common.schemas.Project, name: str
+) -> typing.Optional[Dict]:
+    """
+    Getting workflow from project
+    :param project:     MLRun project
+    :param name:        workflow name
+    :return: workflow as a dict if project has the workflow, otherwise raises a bad request exception
+    """
+    for workflow in project.spec.workflows:
+        if workflow["name"] == name:
+            return workflow
+    log_and_raise(
+        reason=f"workflow {name} not found in project",
+    )
+def _fill_workflow_missing_fields_from_project(
+    project: mlrun.common.schemas.Project,
+    workflow_name: str,
+    spec: mlrun.common.schemas.WorkflowSpec,
+    arguments: typing.Dict,
+) -> mlrun.common.schemas.WorkflowSpec:
+    """
+    Fill the workflow spec details from the project object, with favour to spec
+    :param project:         MLRun project that contains the workflow.
+    :param workflow_name:   workflow name
+    :param spec:            workflow spec input
+    :param arguments:       arguments to workflow
+    :return: completed workflow spec
+    """
+    # Verifying workflow exists in project:
+    workflow = _get_workflow_by_name(project, workflow_name)
+    if spec:
+        # Merge between the workflow spec provided in the request with existing
+        # workflow while the provided workflow takes precedence over the existing workflow params
+        workflow = copy.deepcopy(workflow)
+        workflow = _update_dict(workflow, spec.dict())
+    workflow_spec = mlrun.common.schemas.WorkflowSpec(**workflow)
+    # Overriding arguments of the existing workflow:
+    if arguments:
+        workflow_spec.args = workflow_spec.args or {}
+        workflow_spec.args.update(arguments)
+    return workflow_spec
+def _update_dict(dict_1: dict, dict_2: dict):
+    """
+    Update two dictionaries included nested dictionaries (recursively).
+    :param dict_1: The dict to update
+    :param dict_2: The values of this dict take precedence over dict_1.
+    :return:
+    """
+    for key, val in dict_2.items():
+        if isinstance(val, collections.abc.Mapping):
+            dict_1[key] = _update_dict(dict_1.get(key, {}), val)
+        # It is necessary to update only if value is exist because
+        # on initialization of the WorkflowSpec object all unfilled values gets None values,
+        # and when converting to dict the keys gets those None values.
+        elif val:
+            dict_1[key] = val
+    return dict_1
+@router.get(
+    "/projects/{project}/workflows/{name}/runs/{uid}",
+    response_model=mlrun.common.schemas.GetWorkflowResponse,
+)
+async def get_workflow_id(
+    project: str,
+    name: str,
+    uid: str,
+    auth_info: mlrun.common.schemas.AuthInfo = fastapi.Depends(
+        mlrun.api.api.deps.authenticate_request
+    ),
+    db_session: Session = fastapi.Depends(mlrun.api.api.deps.get_db_session),
+    engine: str = "kfp",
+) -> mlrun.common.schemas.GetWorkflowResponse:
+    """
+    Retrieve workflow id from the uid of the workflow runner.
+    When creating a remote workflow we are creating an auxiliary function
+    which is responsible for actually running the workflow,
+    as we don't know beforehand the workflow uid but only the run uid of the auxiliary function we ran,
+    we have to wait until the running function will log the workflow id it created.
+    Because we don't know how long it will take for the run to create the workflow
+    we decided to implement that in an asynchronous mechanism which at first,
+    client will get the run uid and then will pull the workflow id from the run id
+    kinda as you would use a background task to query if it finished.
+    Supporting workflows that executed by the remote engine **only**.
+    :param project:     name of the project
+    :param name:        name of the workflow
+    :param uid:         the id of the running job that runs the workflow
+    :param auth_info:   auth info of the request
+    :param db_session:  session that manages the current dialog with the database
+    :param engine:      pipeline runner, for example: "kfp"
+    :returns: workflow id
+    """
+    # Check permission READ run:
+    await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
+        mlrun.common.schemas.AuthorizationResourceTypes.run,
+        project,
+        uid,
+        mlrun.common.schemas.AuthorizationAction.read,
+        auth_info,
+    )
+    # Check permission READ workflow:
+    await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
+        mlrun.common.schemas.AuthorizationResourceTypes.workflow,
+        project,
+        name,
+        mlrun.common.schemas.AuthorizationAction.read,
+        auth_info,
+    )
+    return await run_in_threadpool(
+        mlrun.api.crud.WorkflowRunners().get_workflow_id,
+        uid=uid,
+        project=project,
+        engine=engine,
+        db_session=db_session,
+    )

mlrun/api/api/utils.py CHANGED Viewed

@@ -37,12 +37,12 @@ import mlrun.errors
 import mlrun.runtimes.pod
 import mlrun.utils.helpers
 from mlrun.api.db.sqldb.db import SQLDB
+from mlrun.api.rundb.sqldb import SQLRunDB
 from mlrun.api.utils.singletons.db import get_db
 from mlrun.api.utils.singletons.logs_dir import get_logs_dir
 from mlrun.api.utils.singletons.scheduler import get_scheduler
 from mlrun.common.helpers import parse_versioned_object_uri
 from mlrun.config import config
-from mlrun.db.sqldb import SQLDB as SQLRunDB
 from mlrun.errors import err_to_str
 from mlrun.run import import_function, new_function
 from mlrun.runtimes.utils import enrich_function_from_dict

mlrun/api/crud/__init__.py CHANGED Viewed

@@ -29,3 +29,4 @@ from .runs import Runs
 from .runtime_resources import RuntimeResources
 from .secrets import Secrets, SecretsClientType
 from .tags import Tags
+from .workflows import WorkflowRunners

mlrun/api/crud/client_spec.py CHANGED Viewed

@@ -102,6 +102,9 @@ class ClientSpec(
             feature_store_data_prefixes=self._get_config_value_if_not_default(
                 "feature_store.data_prefixes"
             ),
+            model_endpoint_monitoring_store_type=self._get_config_value_if_not_default(
+                "model_endpoint_monitoring.store_type"
+            ),
         )
     @staticmethod

mlrun/api/crud/model_monitoring/deployment.py CHANGED Viewed

@@ -28,6 +28,7 @@ import mlrun.model_monitoring.stream_processing
 import mlrun.model_monitoring.tracking_policy
 from mlrun import feature_store as fstore
 from mlrun.api.api import deps
+from mlrun.api.crud.model_monitoring.helpers import Seconds, seconds2minutes
 from mlrun.utils import logger
 _MODEL_MONITORING_COMMON_PATH = pathlib.Path(__file__).parents[3] / "model_monitoring"
@@ -40,6 +41,24 @@ _MONITORING_BATCH_FUNCTION_PATH = (
 class MonitoringDeployment:
+    def __init__(
+        self,
+        parquet_batching_max_events: int = mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
+        max_parquet_save_interval: int = mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
+    ) -> None:
+        self._parquet_batching_max_events = parquet_batching_max_events
+        self._max_parquet_save_interval = max_parquet_save_interval
+        """
+        Initialize a MonitoringDeployment object, which handles the deployment of both model monitoring stream nuclio
+        function and the scheduled batch drift job.
+        :param parquet_batching_max_events: Maximum number of events that will be used for writing the monitoring
+                                            parquet by the monitoring stream function.
+        :param max_parquet_save_interval:   Maximum number of seconds to hold events before they are written to the
+                                            monitoring parquet target. Note that this value will be used to handle the
+                                            offset by the scheduled batch job.
+        """
     def deploy_monitoring_functions(
         self,
         project: str,
@@ -70,6 +89,7 @@ class MonitoringDeployment:
             db_session=db_session,
             auth_info=auth_info,
             tracking_policy=tracking_policy,
+            tracking_offset=Seconds(self._max_parquet_save_interval),
         )
     def deploy_model_monitoring_stream_processing(
@@ -79,7 +99,7 @@ class MonitoringDeployment:
         db_session: sqlalchemy.orm.Session,
         auth_info: mlrun.common.schemas.AuthInfo,
         tracking_policy: mlrun.model_monitoring.tracking_policy.TrackingPolicy,
-    ):
+    ) -> None:
         """
         Deploying model monitoring stream real time nuclio function. The goal of this real time function is
         to monitor the log of the data stream. It is triggered when a new log entry is detected.
@@ -129,6 +149,9 @@ class MonitoringDeployment:
             parquet_target=parquet_target,
         )
+        # Adding label to the function - will be used to identify the stream pod
+        fn.metadata.labels = {"type": "model-monitoring-stream"}
         mlrun.api.api.endpoints.functions._build_function(
             db_session=db_session, auth_info=auth_info, function=fn
         )
@@ -140,6 +163,7 @@ class MonitoringDeployment:
         db_session: sqlalchemy.orm.Session,
         auth_info: mlrun.common.schemas.AuthInfo,
         tracking_policy: mlrun.model_monitoring.tracking_policy.TrackingPolicy,
+        tracking_offset: Seconds = Seconds(0),
     ):
         """
         Deploying model monitoring batch job. The goal of this job is to identify drift in the data
@@ -152,6 +176,7 @@ class MonitoringDeployment:
         :param db_session:                  A session that manages the current dialog with the database.
         :param auth_info:                   The auth info of the request.
         :param tracking_policy:             Model monitoring configurations.
+        :param tracking_offset:             Offset for the tracking policy (for synchronization with the stream)
         """
         logger.info(
@@ -210,7 +235,8 @@ class MonitoringDeployment:
         data = {
             "task": task.to_dict(),
             "schedule": mlrun.api.crud.model_monitoring.helpers.convert_to_cron_string(
-                tracking_policy.default_batch_intervals
+                tracking_policy.default_batch_intervals,
+                minute_delay=seconds2minutes(tracking_offset),
             ),
         }
@@ -247,11 +273,14 @@ class MonitoringDeployment:
         """
         # Initialize Stream Processor object
-        stream_processor = mlrun.model_monitoring.stream_processing.EventStreamProcessor(
-            project=project,
-            parquet_batching_max_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
-            parquet_target=parquet_target,
-            model_monitoring_access_key=model_monitoring_access_key,
+        stream_processor = (
+            mlrun.model_monitoring.stream_processing.EventStreamProcessor(
+                project=project,
+                parquet_batching_max_events=self._parquet_batching_max_events,
+                parquet_batching_timeout_secs=self._max_parquet_save_interval,
+                parquet_target=parquet_target,
+                model_monitoring_access_key=model_monitoring_access_key,
+            )
         )
         # Create a new serving function for the streaming process

mlrun/api/crud/model_monitoring/grafana.py CHANGED Viewed

@@ -149,7 +149,7 @@ async def grafana_list_endpoints(
         if (
             filter_router
             and endpoint.status.endpoint_type
-            == mlrun.common.model_monitoring.EndpointType.ROUTER
+            == mlrun.common.schemas.model_monitoring.EndpointType.ROUTER
         ):
             continue
         row = [

mlrun/api/crud/model_monitoring/helpers.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 #
 import json
+import math
 import typing
 import sqlalchemy.orm
@@ -23,6 +24,16 @@ import mlrun.common.model_monitoring.helpers
 import mlrun.common.schemas.schedule
 import mlrun.errors
+Seconds = typing.NewType("Seconds", int)
+Minutes = typing.NewType("Minutes", int)
+_SECONDS_IN_MINUTE: Seconds = Seconds(60)
+_MINUTES_IN_HOUR: Minutes = Minutes(60)
+def seconds2minutes(seconds: Seconds) -> Minutes:
+    return Minutes(math.ceil(seconds / _SECONDS_IN_MINUTE))
 def get_batching_interval_param(intervals_list: typing.List):
     """Convert each value in the intervals list into a float number. None
@@ -47,12 +58,31 @@ def get_batching_interval_param(intervals_list: typing.List):
     )
+def _add_minutes_offset(
+    minute: typing.Optional[typing.Union[int, str]],
+    offset: Minutes,
+) -> typing.Optional[typing.Union[int, str]]:
+    """
+    :param minute: the minute specification in the cron schedule, e.g. "0".
+    :param offset: the offset in minutes to add to the cron minute specification.
+    :return: the minute cron with the offset applied (if supported).
+    """
+    if minute and (
+        (isinstance(minute, str) and str.isdigit(minute)) or isinstance(minute, int)
+    ):
+        minute = (int(minute) + offset) % _MINUTES_IN_HOUR
+    return minute
 def convert_to_cron_string(
     cron_trigger: mlrun.common.schemas.schedule.ScheduleCronTrigger,
-):
+    minute_delay: Minutes = Minutes(0),
+) -> str:
     """Convert the batch interval `ScheduleCronTrigger` into a cron trigger expression"""
     return "{} {} {} * *".format(
-        cron_trigger.minute, cron_trigger.hour, cron_trigger.day
+        _add_minutes_offset(cron_trigger.minute, minute_delay),
+        cron_trigger.hour,
+        cron_trigger.day,
     ).replace("None", "*")

mlrun/api/crud/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -22,6 +22,8 @@ import sqlalchemy.orm
 import mlrun.api.api.utils
 import mlrun.api.crud.model_monitoring.deployment
 import mlrun.api.crud.model_monitoring.helpers
+import mlrun.api.crud.secrets
+import mlrun.api.rundb.sqldb
 import mlrun.artifacts
 import mlrun.common.helpers
 import mlrun.common.schemas.model_monitoring
@@ -155,6 +157,9 @@ class ModelEndpoints:
         # Write the new model endpoint
         model_endpoint_store = get_model_endpoint_store(
             project=model_endpoint.metadata.project,
+            secret_provider=mlrun.api.crud.secrets.get_project_secret_provider(
+                project=model_endpoint.metadata.project
+            ),
         )
         model_endpoint_store.write_model_endpoint(endpoint=model_endpoint.flat_dict())
@@ -184,6 +189,9 @@ class ModelEndpoints:
         # Generate a model endpoint store object and apply the update process
         model_endpoint_store = get_model_endpoint_store(
             project=project,
+            secret_provider=mlrun.api.crud.secrets.get_project_secret_provider(
+                project=project
+            ),
         )
         model_endpoint_store.update_model_endpoint(
             endpoint_id=endpoint_id, attributes=attributes
@@ -203,7 +211,7 @@ class ModelEndpoints:
         model_endpoint: mlrun.common.schemas.ModelEndpoint,
         model_obj: mlrun.artifacts.ModelArtifact,
         db_session: sqlalchemy.orm.Session,
-        run_db: mlrun.db.sqldb.SQLDB,
+        run_db: mlrun.api.rundb.sqldb.SQLRunDB,
     ):
         """
         Create monitoring feature set with the relevant parquet target.
@@ -290,7 +298,6 @@ class ModelEndpoints:
         driver.update_resource_status("created")
         # Save the new feature set
-        feature_set._override_run_db(db_session)
         feature_set.save()
         logger.info(
             "Monitoring feature set created",
@@ -313,6 +320,9 @@ class ModelEndpoints:
         """
         model_endpoint_store = get_model_endpoint_store(
             project=project,
+            secret_provider=mlrun.api.crud.secrets.get_project_secret_provider(
+                project=project
+            ),
         )
         model_endpoint_store.delete_model_endpoint(endpoint_id=endpoint_id)
@@ -361,7 +371,11 @@ class ModelEndpoints:
         # Generate a model endpoint store object and get the model endpoint record as a dictionary
         model_endpoint_store = get_model_endpoint_store(
-            project=project, access_key=auth_info.data_session
+            project=project,
+            access_key=auth_info.data_session,
+            secret_provider=mlrun.api.crud.secrets.get_project_secret_provider(
+                project=project
+            ),
         )
         model_endpoint_record = model_endpoint_store.get_model_endpoint(
@@ -454,7 +468,11 @@ class ModelEndpoints:
         # Generate a model endpoint store object and get a list of model endpoint dictionaries
         endpoint_store = get_model_endpoint_store(
-            access_key=auth_info.data_session, project=project
+            access_key=auth_info.data_session,
+            project=project,
+            secret_provider=mlrun.api.crud.secrets.get_project_secret_provider(
+                project=project
+            ),
         )
         endpoint_dictionary_list = endpoint_store.list_model_endpoints(
@@ -523,7 +541,11 @@ class ModelEndpoints:
         # Generate a model endpoint store object and get a list of model endpoint dictionaries
         endpoint_store = get_model_endpoint_store(
-            access_key=auth_info.data_session, project=project_name
+            access_key=auth_info.data_session,
+            project=project_name,
+            secret_provider=mlrun.api.crud.secrets.get_project_secret_provider(
+                project=project_name
+            ),
         )
         endpoints = endpoint_store.list_model_endpoints()

mlrun/api/crud/notifications.py CHANGED Viewed

@@ -34,13 +34,18 @@ class Notifications(
         notification_objects: typing.List[mlrun.model.Notification],
         run_uid: str,
         project: str = None,
+        mask_params: bool = True,
     ):
         project = project or mlrun.mlconf.default_project
-        notification_objects_to_store = (
-            mlrun.api.api.utils.validate_and_mask_notification_list(
-                notification_objects, run_uid, project
+        # we don't mask the notification params when it's a status update as they are already masked
+        notification_objects_to_store = notification_objects
+        if mask_params:
+            notification_objects_to_store = (
+                mlrun.api.api.utils.validate_and_mask_notification_list(
+                    notification_objects, run_uid, project
+                )
             )
-        )
         mlrun.api.utils.singletons.db.get_db().store_run_notifications(
             session, notification_objects_to_store, run_uid, project

mlrun 1.5.0rc1__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.5.0rc1py3-none-any.whl → 1.5.0rc2py3-none-any.whl