PyPI - mlrun - Versions diffs - 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl - Mend

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show

mlrun/api/api/deps.py +14 -1
mlrun/api/api/endpoints/frontend_spec.py +0 -2
mlrun/api/api/endpoints/functions.py +15 -27
mlrun/api/api/endpoints/grafana_proxy.py +435 -74
mlrun/api/api/endpoints/healthz.py +5 -18
mlrun/api/api/endpoints/model_endpoints.py +33 -37
mlrun/api/api/utils.py +6 -13
mlrun/api/crud/__init__.py +14 -16
mlrun/api/crud/logs.py +5 -7
mlrun/api/crud/model_monitoring/__init__.py +2 -2
mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
mlrun/api/crud/pipelines.py +2 -3
mlrun/api/db/sqldb/models/models_mysql.py +52 -19
mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
mlrun/api/db/sqldb/session.py +19 -26
mlrun/api/schemas/__init__.py +2 -0
mlrun/api/schemas/constants.py +0 -13
mlrun/api/schemas/frontend_spec.py +0 -1
mlrun/api/schemas/model_endpoints.py +38 -195
mlrun/api/schemas/schedule.py +2 -2
mlrun/api/utils/clients/log_collector.py +5 -0
mlrun/builder.py +9 -41
mlrun/config.py +1 -76
mlrun/data_types/__init__.py +1 -6
mlrun/data_types/data_types.py +1 -3
mlrun/datastore/__init__.py +2 -9
mlrun/datastore/sources.py +20 -25
mlrun/datastore/store_resources.py +1 -1
mlrun/datastore/targets.py +34 -67
mlrun/datastore/utils.py +4 -26
mlrun/db/base.py +2 -4
mlrun/db/filedb.py +5 -13
mlrun/db/httpdb.py +32 -64
mlrun/db/sqldb.py +2 -4
mlrun/errors.py +0 -5
mlrun/execution.py +0 -2
mlrun/feature_store/api.py +8 -24
mlrun/feature_store/feature_set.py +6 -28
mlrun/feature_store/feature_vector.py +0 -2
mlrun/feature_store/ingestion.py +11 -8
mlrun/feature_store/retrieval/base.py +43 -271
mlrun/feature_store/retrieval/dask_merger.py +153 -55
mlrun/feature_store/retrieval/job.py +3 -12
mlrun/feature_store/retrieval/local_merger.py +130 -48
mlrun/feature_store/retrieval/spark_merger.py +125 -126
mlrun/features.py +2 -7
mlrun/model_monitoring/constants.py +6 -48
mlrun/model_monitoring/helpers.py +35 -118
mlrun/model_monitoring/model_monitoring_batch.py +260 -293
mlrun/model_monitoring/stream_processing_fs.py +253 -220
mlrun/platforms/iguazio.py +0 -33
mlrun/projects/project.py +72 -34
mlrun/runtimes/base.py +0 -5
mlrun/runtimes/daskjob.py +0 -2
mlrun/runtimes/function.py +3 -29
mlrun/runtimes/kubejob.py +15 -39
mlrun/runtimes/local.py +45 -7
mlrun/runtimes/mpijob/abstract.py +0 -2
mlrun/runtimes/mpijob/v1.py +0 -2
mlrun/runtimes/pod.py +0 -2
mlrun/runtimes/remotesparkjob.py +0 -2
mlrun/runtimes/serving.py +0 -6
mlrun/runtimes/sparkjob/abstract.py +2 -39
mlrun/runtimes/sparkjob/spark3job.py +0 -2
mlrun/serving/__init__.py +1 -2
mlrun/serving/routers.py +35 -35
mlrun/serving/server.py +12 -22
mlrun/serving/states.py +30 -162
mlrun/serving/v2_serving.py +10 -13
mlrun/utils/clones.py +1 -1
mlrun/utils/model_monitoring.py +96 -122
mlrun/utils/version/version.json +2 -2
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
mlrun/api/crud/model_monitoring/grafana.py +0 -427
mlrun/datastore/spark_udf.py +0 -40
mlrun/model_monitoring/__init__.py +0 -44
mlrun/model_monitoring/common.py +0 -112
mlrun/model_monitoring/model_endpoint.py +0 -141
mlrun/model_monitoring/stores/__init__.py +0 -106
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
mlrun/model_monitoring/stores/models/__init__.py +0 -23
mlrun/model_monitoring/stores/models/base.py +0 -18
mlrun/model_monitoring/stores/models/mysql.py +0 -100
mlrun/model_monitoring/stores/models/sqlite.py +0 -98
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
mlrun/utils/db.py +0 -52
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0

mlrun/api/api/endpoints/healthz.py CHANGED Viewed

@@ -12,33 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import http
 from fastapi import APIRouter
+import mlrun.api.crud
 import mlrun.api.schemas
-from mlrun.config import config as mlconfig
 router = APIRouter()
 @router.get(
     "/healthz",
-    status_code=http.HTTPStatus.OK.value,
+    response_model=mlrun.api.schemas.ClientSpec,
 )
 def health():
-    # offline is the initial state
-    # waiting for chief is set for workers waiting for chief to be ready and then clusterize against it
-    if mlconfig.httpdb.state in [
-        mlrun.api.schemas.APIStates.offline,
-        mlrun.api.schemas.APIStates.waiting_for_chief,
-    ]:
-        raise mlrun.errors.MLRunServiceUnavailableError()
-    return {
-        # for old `align_mlrun.sh` scripts expecting `version` in the response
-        # TODO: remove on mlrun >= 1.6.0
-        "version": mlconfig.version,
-        "status": "ok",
-    }
+    # TODO: From 0.7.0 client uses the /client-spec endpoint,
+    #  when this is the oldest relevant client, remove this logic from the healthz endpoint
+    return mlrun.api.crud.ClientSpec().get_client_spec()

mlrun/api/api/endpoints/model_endpoints.py CHANGED Viewed

@@ -33,7 +33,7 @@ router = APIRouter()
 @router.put(
     "/projects/{project}/model-endpoints/{endpoint_id}",
-    response_model=mlrun.api.schemas.ModelEndpoint,
+    status_code=HTTPStatus.NO_CONTENT.value,
 )
 async def create_or_patch(
     project: str,
@@ -43,9 +43,9 @@ async def create_or_patch(
         mlrun.api.api.deps.authenticate_request
     ),
     db_session: Session = Depends(mlrun.api.api.deps.get_db_session),
-) -> mlrun.api.schemas.ModelEndpoint:
+):
     """
-    Either create or update the record of a given `ModelEndpoint` object.
+    Either create or updates the record of a given ModelEndpoint object.
     Leaving here for backwards compatibility.
     """
@@ -76,7 +76,7 @@ async def create_or_patch(
         )
     # Since the endpoint records are created automatically, at point of serving function deployment, we need to use
     # V3IO_ACCESS_KEY here
-    return await run_in_threadpool(
+    await run_in_threadpool(
         mlrun.api.crud.ModelEndpoints().create_or_patch,
         db_session=db_session,
         access_key=os.environ.get("V3IO_ACCESS_KEY"),
@@ -99,7 +99,7 @@ async def create_model_endpoint(
     db_session: Session = Depends(mlrun.api.api.deps.get_db_session),
 ) -> mlrun.api.schemas.ModelEndpoint:
     """
-    Create a DB record of a given `ModelEndpoint` object.
+    Create a DB record of a given ModelEndpoint object.
     :param project:         The name of the project.
     :param endpoint_id:     The unique id of the model endpoint.
@@ -111,7 +111,6 @@ async def create_model_endpoint(
     :return: A Model endpoint object.
     """
     await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
         resource_type=mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint,
         project_name=project,
@@ -150,13 +149,14 @@ async def patch_model_endpoint(
     ),
 ) -> mlrun.api.schemas.ModelEndpoint:
     """
-    Update a DB record of a given `ModelEndpoint` object.
+    Update a DB record of a given ModelEndpoint object.
     :param project:       The name of the project.
     :param endpoint_id:   The unique id of the model endpoint.
     :param attributes:    Attributes that will be updated. The input is provided in a json structure that will be
                           converted into a dictionary before applying the patch process. Note that the keys of
-                          the dictionary should exist in the DB target.
+                          dictionary should exist in the DB target. More details about the model endpoint available
+                          attributes can be found under :py:class:`~mlrun.api.schemas.ModelEndpoint`.
                           example::
@@ -245,7 +245,7 @@ async def list_model_endpoints(
     labels or top level. By default, when no filters are applied, all available endpoints for the given project will be
     listed.
-    If uids are passed: will return `ModelEndpointList` of endpoints with uid in uids
+    If uids are passed: will return ModelEndpointList of endpoints with uid in uids
     Labels can be used to filter on the existence of a label:
     api/projects/{project}/model-endpoints/?label=mylabel
@@ -264,11 +264,11 @@ async def list_model_endpoints(
     :param model:     The name of the model to filter by.
     :param function:  The name of the function to filter by.
     :param labels:    A list of labels to filter by. Label filters work by either filtering a specific value of a label
-                      (i.e. list("key=value")) or by looking for the existence of a given key (i.e. "key").
-    :param metrics:   A list of real-time metrics to return for each endpoint. There are pre-defined real-time metrics
-                      for model endpoints such as predictions_per_second and latency_avg_5m but also custom metrics
-                      defined by the user. Please note that these metrics are stored in the time series DB and the
-                      results will be appeared under model_endpoint.spec.metrics of each endpoint.
+                      (i.e. list("key==value")) or by looking for the existence of a given key (i.e. "key").
+    :param metrics:   A list of metrics to return for each endpoint. There are pre-defined metrics for model endpoints
+                      such as predictions_per_second and latency_avg_5m but also custom metrics defined by the user.
+                      Please note that these metrics are stored in the time series DB and the results will be appeared
+                      under model_endpoint.spec.metrics of each endpoint.
     :param start:     The start time of the metrics. Can be represented by a string containing an RFC 3339
                       time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
                       `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
@@ -276,9 +276,9 @@ async def list_model_endpoints(
                       time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
                       `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
     :param top_level: If True will return only routers and endpoint that are NOT children of any router.
-    :param uids:      Will return `ModelEndpointList` of endpoints with uid in uids.
+    :param uids:      Will return ModelEndpointList of endpoints with uid in uids.
-    :return: An object of `ModelEndpointList` which is literally a list of model endpoints along with some metadata. To
+    :return: An object of ModelEndpointList which is literally a list of model endpoints along with some metadata. To
              get a standard list of model endpoints use ModelEndpointList.endpoints.
     """
@@ -333,27 +333,23 @@ async def get_model_endpoint(
     """Get a single model endpoint object. You can apply different time series metrics that will be added to the
        result.
-    :param project:                    The name of the project
-    :param endpoint_id:                The unique id of the model endpoint.
-    :param start:                      The start time of the metrics. Can be represented by a string containing an
-                                       RFC 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
-                                       `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or
-                                       0 for the earliest time.
-    :param end:                        The end time of the metrics. Can be represented by a string containing an
-                                       RFC 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
-                                       `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or
-                                       0 for the earliest time.
-    :param metrics:                    A list of real-time metrics to return for the model endpoint. There are
-                                       pre-defined real-time metrics for model endpoints such as predictions_per_second
-                                       and latency_avg_5m but also custom metrics defined by the user. Please note that
-                                       these metrics are stored in the time series DB and the results will be
-                                       appeared under model_endpoint.spec.metrics.
-    :param feature_analysis:           When True, the base feature statistics and current feature statistics will
-                                       be added to the output of the resulting object.
-    :param auth_info:                  The auth info of the request
-    :return:  A `ModelEndpoint` object.
+    :param project:          The name of the project.
+    :param endpoint_id:      The unique id of the model endpoint.
+    :param start:            The start time of the metrics. Can be represented by a string containing an RFC 3339
+                             time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`,
+                             where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
+    :param end:              The end time of the metrics. Can be represented by a string containing an RFC 3339
+                             time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`,
+                             where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
+    :param metrics:          A list of metrics to return for the model endpoint. There are pre-defined metrics for model
+                             endpoints such as predictions_per_second and latency_avg_5m but also custom metrics
+                             defined by the user. Please note that these metrics are stored in the time series DB and
+                             the results will be appeared under model_endpoint.spec.metrics.
+    :param feature_analysis: When True, the base feature statistics and current feature statistics will be added to
+                             the output of the resulting object.
+    :param auth_info:        The auth info of the request.
+    :return: A ModelEndpoint object.
     """
     await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
         mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint,

mlrun/api/api/utils.py CHANGED Viewed

@@ -98,18 +98,8 @@ def get_obj_path(schema, path, user=""):
         if not path.startswith(schema_prefix):
             path = f"{schema_prefix}{path}"
-    allowed_paths_list = get_allowed_path_prefixes_list()
-    if not any(path.startswith(allowed_path) for allowed_path in allowed_paths_list):
-        raise mlrun.errors.MLRunAccessDeniedError("Unauthorized path")
-    return path
-def get_allowed_path_prefixes_list() -> typing.List[str]:
-    """
-    Get list of allowed paths - v3io:// is always allowed, and also the real_path parameter if specified.
-    We never allow local files in the allowed paths list. Allowed paths must contain a schema (://).
-    """
-    real_path = config.httpdb.real_path
+    # Check if path is allowed - v3io:// is always allowed, and also the real_path parameter if specified.
+    # We never allow local files in the allowed paths list. Allowed paths must contain a schema (://)
     allowed_file_paths = config.httpdb.allowed_file_paths or ""
     allowed_paths_list = [
         path.strip() for path in allowed_file_paths.split(",") if "://" in path
@@ -117,7 +107,10 @@ def get_allowed_path_prefixes_list() -> typing.List[str]:
     if real_path:
         allowed_paths_list.append(real_path)
     allowed_paths_list.append("v3io://")
-    return allowed_paths_list
+    if not any(path.startswith(allowed_path) for allowed_path in allowed_paths_list):
+        raise mlrun.errors.MLRunAccessDeniedError("Unauthorized path")
+    return path
 def get_secrets(auth_info: mlrun.api.schemas.AuthInfo):

mlrun/api/crud/__init__.py CHANGED Viewed

@@ -12,19 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-# flake8: noqa: F401  - this is until we take care of the F401 violations with respect to __all__ & sphinx
-from .artifacts import Artifacts
-from .client_spec import ClientSpec
-from .clusterization_spec import ClusterizationSpec
-from .feature_store import FeatureStore
-from .functions import Functions
-from .logs import Logs
-from .marketplace import Marketplace
-from .model_monitoring import ModelEndpoints
-from .pipelines import Pipelines
-from .projects import Projects
-from .runs import Runs
-from .runtime_resources import RuntimeResources
-from .secrets import Secrets, SecretsClientType
-from .tags import Tags
+from .artifacts import Artifacts  # noqa: F401
+from .client_spec import ClientSpec  # noqa: F401
+from .clusterization_spec import ClusterizationSpec  # noqa: F401
+from .feature_store import FeatureStore  # noqa: F401
+from .functions import Functions  # noqa: F401
+from .logs import Logs  # noqa: F401
+from .marketplace import Marketplace  # noqa: F401
+from .model_monitoring import ModelEndpoints, ModelEndpointStoreType  # noqa: F401
+from .pipelines import Pipelines  # noqa: F401
+from .projects import Projects  # noqa: F401
+from .runs import Runs  # noqa: F401
+from .runtime_resources import RuntimeResources  # noqa: F401
+from .secrets import Secrets, SecretsClientType  # noqa: F401
+from .tags import Tags  # noqa: F401

mlrun/api/crud/logs.py CHANGED Viewed

@@ -242,10 +242,7 @@ class Logs(
     def log_file_exists_for_run_uid(project: str, uid: str) -> (bool, pathlib.Path):
         """
         Checks if the log file exists for the given project and uid
-        There could be two types of log files:
-        1. Log file which was created by the legacy logger with the following file format - project/<run-uid>)
-        2. Log file which was created by the new logger with the following file format- /project/<run-uid>-<pod-name>
-        Therefore, we check if the log file exists for both formats
+        A Run's log file path is: /mlrun/logs/{project}/{uid}
         :param project: project name
         :param uid: run uid
         :return: True if the log file exists, False otherwise, and the log file path
@@ -253,9 +250,10 @@ class Logs(
         project_logs_dir = project_logs_path(project)
         if not project_logs_dir.exists():
             return False, None
-        for file in os.listdir(str(project_logs_dir)):
-            if file.startswith(uid):
-                return True, project_logs_dir / file
+        log_file = log_path(project, uid)
+        if log_file.exists():
+            return True, log_file
         return False, None

mlrun/api/crud/model_monitoring/__init__.py CHANGED Viewed

@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-# flake8: noqa: F401 - this is until we take care of the F401 violations with respect to __all__ & sphinx
-from .model_endpoints import ModelEndpoints
+from .model_endpoint_store import ModelEndpointStoreType  # noqa: F401
+from .model_endpoints import ModelEndpoints  # noqa: F401

mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl