PyPI - mlrun - Versions diffs - 1.7.0rc39__py3-none-any.whl → 1.7.0rc42__py3-none-any.whl - Mend

mlrun 1.7.0rc39py3-none-any.whl → 1.7.0rc42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (58) hide show

mlrun/common/constants.py +3 -0
mlrun/common/db/sql_session.py +3 -2
mlrun/common/helpers.py +0 -1
mlrun/common/schemas/api_gateway.py +6 -6
mlrun/common/schemas/common.py +4 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
mlrun/config.py +1 -1
mlrun/data_types/to_pandas.py +12 -12
mlrun/datastore/alibaba_oss.py +1 -0
mlrun/datastore/azure_blob.py +1 -6
mlrun/datastore/base.py +12 -0
mlrun/datastore/dbfs_store.py +1 -5
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +1 -9
mlrun/datastore/redis.py +1 -0
mlrun/datastore/s3.py +1 -0
mlrun/datastore/storeytargets.py +147 -0
mlrun/datastore/targets.py +67 -69
mlrun/datastore/v3io.py +1 -0
mlrun/errors.py +7 -4
mlrun/feature_store/feature_vector.py +3 -1
mlrun/feature_store/retrieval/job.py +3 -1
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/model.py +1 -1
mlrun/model_monitoring/api.py +1 -2
mlrun/model_monitoring/applications/_application_steps.py +25 -43
mlrun/model_monitoring/applications/context.py +206 -70
mlrun/model_monitoring/controller.py +0 -1
mlrun/model_monitoring/db/stores/__init__.py +3 -3
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +14 -4
mlrun/model_monitoring/db/tsdb/__init__.py +3 -3
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +18 -10
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -23
mlrun/model_monitoring/helpers.py +38 -1
mlrun/model_monitoring/stream_processing.py +8 -26
mlrun/package/packagers/default_packager.py +2 -2
mlrun/projects/project.py +17 -16
mlrun/runtimes/funcdoc.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +9 -0
mlrun/runtimes/nuclio/application/application.py +131 -55
mlrun/runtimes/nuclio/function.py +4 -10
mlrun/runtimes/nuclio/serving.py +2 -2
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +16 -0
mlrun/serving/routers.py +1 -1
mlrun/serving/server.py +19 -5
mlrun/serving/states.py +8 -0
mlrun/serving/v2_serving.py +34 -26
mlrun/utils/helpers.py +12 -2
mlrun/utils/v3io_clients.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/METADATA +2 -2
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/RECORD +58 -57
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/top_level.txt +0 -0

mlrun/common/constants.py CHANGED Viewed

@@ -65,6 +65,9 @@ class MLRunInternalLabels:
     task_name = f"{MLRUN_LABEL_PREFIX}task-name"
     resource_name = f"{MLRUN_LABEL_PREFIX}resource_name"
     created = f"{MLRUN_LABEL_PREFIX}created"
+    producer_type = f"{MLRUN_LABEL_PREFIX}producer-type"
+    app_name = f"{MLRUN_LABEL_PREFIX}app-name"
+    endpoint_id = f"{MLRUN_LABEL_PREFIX}endpoint-id"
     host = "host"
     job_type = "job-type"
     kind = "kind"

mlrun/common/db/sql_session.py CHANGED Viewed

@@ -11,13 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 from sqlalchemy import create_engine
 from sqlalchemy.engine import Engine
 from sqlalchemy.orm import Session
-from sqlalchemy.orm import sessionmaker as SessionMaker
+from sqlalchemy.orm import (
+    sessionmaker as SessionMaker,  # noqa: N812 - `sessionmaker` is a class
+)
 from mlrun.config import config

mlrun/common/helpers.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 def parse_versioned_object_uri(

mlrun/common/schemas/api_gateway.py CHANGED Viewed

@@ -77,7 +77,7 @@ class APIGatewaySpec(_APIGatewayBaseModel):
     name: str
     description: Optional[str]
     path: Optional[str] = "/"
-    authenticationMode: Optional[APIGatewayAuthenticationMode] = (
+    authenticationMode: Optional[APIGatewayAuthenticationMode] = (  # noqa: N815 - for compatibility with Nuclio https://github.com/nuclio/nuclio/blob/672b8e36f9edd6e42b4685ec1d27cabae3c5f045/pkg/platform/types.go#L476
         APIGatewayAuthenticationMode.none
     )
     upstreams: list[APIGatewayUpstream]
@@ -103,11 +103,11 @@ class APIGateway(_APIGatewayBaseModel):
         ]
     def get_invoke_url(self):
-        return (
-            self.spec.host + self.spec.path
-            if self.spec.path and self.spec.host
-            else self.spec.host
-        ).rstrip("/")
+        if self.spec.host and self.spec.path:
+            return f"{self.spec.host.rstrip('/')}/{self.spec.path.lstrip('/')}".rstrip(
+                "/"
+            )
+        return self.spec.host.rstrip("/")
     def enrich_mlrun_names(self):
         self._enrich_api_gateway_mlrun_name()

mlrun/common/schemas/common.py CHANGED Viewed

@@ -11,16 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 import typing
 import pydantic
 class ImageBuilder(pydantic.BaseModel):
-    functionSourceCode: typing.Optional[str] = None
-    codeEntryType: typing.Optional[str] = None
-    codeEntryAttributes: typing.Optional[str] = None
+    functionSourceCode: typing.Optional[str] = None  # noqa: N815
+    codeEntryType: typing.Optional[str] = None  # noqa: N815
+    codeEntryAttributes: typing.Optional[str] = None  # noqa: N815
     source: typing.Optional[str] = None
     code_origin: typing.Optional[str] = None
     origin_filename: typing.Optional[str] = None

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -21,7 +21,6 @@ from typing import Any, NamedTuple, Optional
 from pydantic import BaseModel, Field, validator
 from pydantic.main import Extra
-import mlrun.common.model_monitoring
 import mlrun.common.types
 from ..object import ObjectKind, ObjectSpec, ObjectStatus

mlrun/config.py CHANGED Viewed

@@ -863,7 +863,7 @@ class Config:
                     f"Unable to decode {attribute_path}"
                 )
             parsed_attribute_value = json.loads(decoded_attribute_value)
-            if type(parsed_attribute_value) != expected_type:
+            if not isinstance(parsed_attribute_value, expected_type):
                 raise mlrun.errors.MLRunInvalidArgumentTypeError(
                     f"Expected type {expected_type}, got {type(parsed_attribute_value)}"
                 )

mlrun/data_types/to_pandas.py CHANGED Viewed

@@ -19,9 +19,9 @@ import pandas as pd
 import semver
-def _toPandas(spark_df):
+def _to_pandas(spark_df):
     """
-    Modified version of spark DataFrame.toPandas() –
+    Modified version of spark DataFrame.toPandas() -
     https://github.com/apache/spark/blob/v3.2.3/python/pyspark/sql/pandas/conversion.py#L35
     The original code (which is only replaced in pyspark 3.5.0) fails with Pandas 2 installed, with the following error:
@@ -223,21 +223,21 @@ def _to_corrected_pandas_type(dt):
         TimestampType,
     )
-    if type(dt) == ByteType:
+    if isinstance(dt, ByteType):
         return np.int8
-    elif type(dt) == ShortType:
+    elif isinstance(dt, ShortType):
         return np.int16
-    elif type(dt) == IntegerType:
+    elif isinstance(dt, IntegerType):
         return np.int32
-    elif type(dt) == LongType:
+    elif isinstance(dt, LongType):
         return np.int64
-    elif type(dt) == FloatType:
+    elif isinstance(dt, FloatType):
         return np.float32
-    elif type(dt) == DoubleType:
+    elif isinstance(dt, DoubleType):
         return np.float64
-    elif type(dt) == BooleanType:
+    elif isinstance(dt, BooleanType):
         return bool
-    elif type(dt) == TimestampType:
+    elif isinstance(dt, TimestampType):
         return "datetime64[ns]"
     else:
         return None
@@ -262,9 +262,9 @@ def spark_df_to_pandas(spark_df):
                 )
                 type_conversion_dict[field.name] = "datetime64[ns]"
-        df = _toPandas(spark_df)
+        df = _to_pandas(spark_df)
         if type_conversion_dict:
             df = df.astype(type_conversion_dict)
         return df
     else:
-        return _toPandas(spark_df)
+        return _to_pandas(spark_df)

mlrun/datastore/alibaba_oss.py CHANGED Viewed

@@ -85,6 +85,7 @@ class OSSStore(DataStore):
         return oss.get_object(key).read()
     def put(self, key, data, append=False):
+        data, _ = self._prepare_put_data(data, append)
         bucket, key = self.get_bucket_and_key(key)
         oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
         oss.put_object(key, data)

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -189,12 +189,7 @@ class AzureBlobStore(DataStore):
                 "Append mode not supported for Azure blob datastore"
             )
         remote_path = self._convert_key_to_remote_path(key)
-        if isinstance(data, bytes):
-            mode = "wb"
-        elif isinstance(data, str):
-            mode = "w"
-        else:
-            raise TypeError("Data type unknown.  Unable to put in Azure!")
+        data, mode = self._prepare_put_data(data, append)
         with self.filesystem.open(remote_path, mode) as f:
             f.write(data)

mlrun/datastore/base.py CHANGED Viewed

@@ -157,6 +157,18 @@ class DataStore:
     def put(self, key, data, append=False):
         pass
+    def _prepare_put_data(self, data, append=False):
+        mode = "a" if append else "w"
+        if isinstance(data, bytearray):
+            data = bytes(data)
+        if isinstance(data, bytes):
+            return data, f"{mode}b"
+        elif isinstance(data, str):
+            return data, mode
+        else:
+            raise TypeError(f"Unable to put a value of type {type(self).__name__}")
     def stat(self, key):
         pass

mlrun/datastore/dbfs_store.py CHANGED Viewed

@@ -130,11 +130,7 @@ class DBFSStore(DataStore):
                 "Append mode not supported for Databricks file system"
             )
         #  can not use append mode because it overrides data.
-        mode = "w"
-        if isinstance(data, bytes):
-            mode += "b"
-        elif not isinstance(data, str):
-            raise TypeError(f"Unknown data type {type(data)}")
+        data, mode = self._prepare_put_data(data, append)
         with self.filesystem.open(key, mode) as f:
             f.write(data)

mlrun/datastore/filestore.py CHANGED Viewed

@@ -66,9 +66,7 @@ class FileStore(DataStore):
         dir_to_create = path.dirname(self._join(key))
         if dir_to_create:
             self._ensure_directory(dir_to_create)
-        mode = "a" if append else "w"
-        if isinstance(data, bytes):
-            mode = mode + "b"
+        data, mode = self._prepare_put_data(data, append)
         with open(self._join(key), mode) as fp:
             fp.write(data)
             fp.close()

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -131,15 +131,7 @@ class GoogleCloudStorageStore(DataStore):
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Append mode not supported for Google cloud storage datastore"
             )
-        if isinstance(data, bytes):
-            mode = "wb"
-        elif isinstance(data, str):
-            mode = "w"
-        else:
-            raise TypeError(
-                "Data type unknown.  Unable to put in Google cloud storage!"
-            )
+        data, mode = self._prepare_put_data(data, append)
         with self.filesystem.open(path, mode) as f:
             f.write(data)

mlrun/datastore/redis.py CHANGED Viewed

@@ -126,6 +126,7 @@ class RedisStore(DataStore):
     def put(self, key, data, append=False):
         key = RedisStore.build_redis_key(key)
+        data, _ = self._prepare_put_data(data, append)
         if append:
             self.redis.append(key, data)
         else:

mlrun/datastore/s3.py CHANGED Viewed

@@ -183,6 +183,7 @@ class S3Store(DataStore):
         return obj.get()["Body"].read()
     def put(self, key, data, append=False):
+        data, _ = self._prepare_put_data(data, append)
         bucket, key = self.get_bucket_and_key(key)
         self.s3.Object(bucket, key).put(Body=data)

mlrun/datastore/storeytargets.py ADDED Viewed

@@ -0,0 +1,147 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import storey
+from mergedeep import merge
+from storey import V3ioDriver
+import mlrun
+import mlrun.model_monitoring.helpers
+from mlrun.datastore.base import DataStore
+from .utils import (
+    parse_kafka_url,
+)
+"""
+Storey targets expect storage_options, which may contain credentials.
+To avoid passing it openly within the graph, we use wrapper classes.
+"""
+def get_url_and_storage_options(path, external_storage_options=None):
+    store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(path)
+    storage_options = store.get_storage_options()
+    if storage_options and external_storage_options:
+        # merge external storage options with the store's storage options. storage_options takes precedence
+        storage_options = merge(external_storage_options, storage_options)
+    else:
+        storage_options = storage_options or external_storage_options
+    return url, DataStore._sanitize_storage_options(storage_options)
+class TDEngineStoreyTarget(storey.TDEngineTarget):
+    def __init__(self, *args, **kwargs):
+        kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
+        super().__init__(*args, **kwargs)
+class StoreyTargetUtils:
+    @staticmethod
+    def process_args_and_kwargs(args, kwargs):
+        args = list(args)
+        path = args[0] if args else kwargs.get("path")
+        external_storage_options = kwargs.get("storage_options")
+        url, storage_options = get_url_and_storage_options(
+            path, external_storage_options
+        )
+        if storage_options:
+            kwargs["storage_options"] = storage_options
+        if args:
+            args[0] = url
+        if "path" in kwargs:
+            kwargs["path"] = url
+        return args, kwargs
+class ParquetStoreyTarget(storey.ParquetTarget):
+    def __init__(self, *args, **kwargs):
+        args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
+        super().__init__(*args, **kwargs)
+class CSVStoreyTarget(storey.CSVTarget):
+    def __init__(self, *args, **kwargs):
+        args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
+        super().__init__(*args, **kwargs)
+class StreamStoreyTarget(storey.StreamTarget):
+    def __init__(self, *args, **kwargs):
+        args = list(args)
+        path = args[0] if args else kwargs.get("stream_path")
+        endpoint, storage_options = get_url_and_storage_options(path)
+        if not path:
+            raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
+        access_key = storage_options.get("v3io_access_key")
+        storage = (
+            V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key),
+        )
+        if storage_options:
+            kwargs["storage"] = storage
+        if args:
+            args[0] = endpoint
+        if "stream_path" in kwargs:
+            kwargs["stream_path"] = endpoint
+        super().__init__(*args, **kwargs)
+class KafkaStoreyTarget(storey.KafkaTarget):
+    def __init__(self, *args, **kwargs):
+        path = kwargs.pop("path")
+        attributes = kwargs.pop("attributes", None)
+        if path and path.startswith("ds://"):
+            datastore_profile = (
+                mlrun.datastore.datastore_profile.datastore_profile_read(path)
+            )
+            attributes = merge(attributes, datastore_profile.attributes())
+            brokers = attributes.pop(
+                "brokers", attributes.pop("bootstrap_servers", None)
+            )
+            topic = datastore_profile.topic
+        else:
+            brokers = attributes.pop(
+                "brokers", attributes.pop("bootstrap_servers", None)
+            )
+            topic, brokers = parse_kafka_url(path, brokers)
+        if not topic:
+            raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
+        kwargs["brokers"] = brokers
+        kwargs["topic"] = topic
+        super().__init__(*args, **kwargs, **attributes)
+class NoSqlStoreyTarget(storey.NoSqlTarget):
+    pass
+class RedisNoSqlStoreyTarget(storey.NoSqlTarget):
+    def __init__(self, *args, **kwargs):
+        path = kwargs.pop("path")
+        endpoint, uri = mlrun.datastore.targets.RedisNoSqlTarget.get_server_endpoint(
+            path
+        )
+        kwargs["path"] = endpoint + "/" + uri
+        super().__init__(*args, **kwargs)
+class TSDBStoreyTarget(storey.TSDBTarget):
+    pass

mlrun 1.7.0rc39__py3-none-any.whl → 1.7.0rc42__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc39py3-none-any.whl → 1.7.0rc42py3-none-any.whl