PyPI - mlrun - Versions diffs - 1.7.0rc39__py3-none-any.whl → 1.7.0rc42__py3-none-any.whl - Mend

mlrun 1.7.0rc39py3-none-any.whl → 1.7.0rc42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (58) hide show

mlrun/common/constants.py +3 -0
mlrun/common/db/sql_session.py +3 -2
mlrun/common/helpers.py +0 -1
mlrun/common/schemas/api_gateway.py +6 -6
mlrun/common/schemas/common.py +4 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
mlrun/config.py +1 -1
mlrun/data_types/to_pandas.py +12 -12
mlrun/datastore/alibaba_oss.py +1 -0
mlrun/datastore/azure_blob.py +1 -6
mlrun/datastore/base.py +12 -0
mlrun/datastore/dbfs_store.py +1 -5
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +1 -9
mlrun/datastore/redis.py +1 -0
mlrun/datastore/s3.py +1 -0
mlrun/datastore/storeytargets.py +147 -0
mlrun/datastore/targets.py +67 -69
mlrun/datastore/v3io.py +1 -0
mlrun/errors.py +7 -4
mlrun/feature_store/feature_vector.py +3 -1
mlrun/feature_store/retrieval/job.py +3 -1
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/model.py +1 -1
mlrun/model_monitoring/api.py +1 -2
mlrun/model_monitoring/applications/_application_steps.py +25 -43
mlrun/model_monitoring/applications/context.py +206 -70
mlrun/model_monitoring/controller.py +0 -1
mlrun/model_monitoring/db/stores/__init__.py +3 -3
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +14 -4
mlrun/model_monitoring/db/tsdb/__init__.py +3 -3
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +18 -10
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -23
mlrun/model_monitoring/helpers.py +38 -1
mlrun/model_monitoring/stream_processing.py +8 -26
mlrun/package/packagers/default_packager.py +2 -2
mlrun/projects/project.py +17 -16
mlrun/runtimes/funcdoc.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +9 -0
mlrun/runtimes/nuclio/application/application.py +131 -55
mlrun/runtimes/nuclio/function.py +4 -10
mlrun/runtimes/nuclio/serving.py +2 -2
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +16 -0
mlrun/serving/routers.py +1 -1
mlrun/serving/server.py +19 -5
mlrun/serving/states.py +8 -0
mlrun/serving/v2_serving.py +34 -26
mlrun/utils/helpers.py +12 -2
mlrun/utils/v3io_clients.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/METADATA +2 -2
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/RECORD +58 -57
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/top_level.txt +0 -0

mlrun/datastore/targets.py CHANGED Viewed

@@ -47,7 +47,6 @@ from .spark_utils import spark_session_update_hadoop_options
 from .utils import (
     _generate_sql_query_with_time_filter,
     filter_df_start_end_time,
-    parse_kafka_url,
     select_columns_from_df,
 )
@@ -928,8 +927,9 @@ class ParquetTarget(BaseStoreTarget):
                 if time_unit == time_partitioning_granularity:
                     break
+        target_path = self.get_target_path()
         if not self.partitioned and not mlrun.utils.helpers.is_parquet_file(
-            self.get_target_path()
+            target_path
         ):
             partition_cols = []
@@ -937,25 +937,16 @@ class ParquetTarget(BaseStoreTarget):
         for key_column in key_columns:
             tuple_key_columns.append((key_column.name, key_column.value_type))
-        store, path_in_store, target_path = self._get_store_and_path()
-        storage_options = store.get_storage_options()
-        if storage_options and self.storage_options:
-            storage_options = merge(storage_options, self.storage_options)
-        else:
-            storage_options = storage_options or self.storage_options
         step = graph.add_step(
             name=self.name or "ParquetTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.ParquetTarget",
+            class_name="mlrun.datastore.storeytargets.ParquetStoreyTarget",
             path=target_path,
             columns=column_list,
             index_cols=tuple_key_columns,
             partition_cols=partition_cols,
             time_field=timestamp_key,
-            storage_options=storage_options,
             max_events=self.max_events,
             flush_after_seconds=self.flush_after_seconds,
             update_last_written=featureset_status.update_last_written_for_target,
@@ -1110,17 +1101,16 @@ class CSVTarget(BaseStoreTarget):
         column_list = self._get_column_list(
             features=features, timestamp_key=timestamp_key, key_columns=key_columns
         )
-        store, path_in_store, target_path = self._get_store_and_path()
+        target_path = self.get_target_path()
         graph.add_step(
             name=self.name or "CSVTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.CSVTarget",
+            class_name="mlrun.datastore.storeytargets.CSVStoreyTarget",
             path=target_path,
             columns=column_list,
             header=True,
             index_cols=key_columns,
-            storage_options=store.get_storage_options(),
             **self.attributes,
         )
@@ -1334,6 +1324,19 @@ class NoSqlBaseTarget(BaseStoreTarget):
         timestamp_key=None,
         featureset_status=None,
     ):
+        table, column_list = self._get_table_and_columns(features, key_columns)
+        graph.add_step(
+            name=self.name or self.writer_step_name,
+            after=after,
+            graph_shape="cylinder",
+            class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
+            columns=column_list,
+            table=table,
+            **self.attributes,
+        )
+    def _get_table_and_columns(self, features, key_columns):
         key_columns = list(key_columns.keys())
         table = self._resource.uri
         column_list = self._get_column_list(
@@ -1352,15 +1355,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
                 col for col in column_list if col[0] not in aggregate_features
             ]
-        graph.add_step(
-            name=self.name or self.writer_step_name,
-            after=after,
-            graph_shape="cylinder",
-            class_name="storey.NoSqlTarget",
-            columns=column_list,
-            table=table,
-            **self.attributes,
-        )
+        return table, column_list
     def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         raise NotImplementedError()
@@ -1483,11 +1478,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
     support_spark = True
     writer_step_name = "RedisNoSqlTarget"
-    # Fetch server url from the RedisNoSqlTarget::__init__() 'path' parameter.
-    # If not set fetch it from 'mlrun.mlconf.redis.url' (MLRUN_REDIS__URL environment variable).
-    # Then look for username and password at REDIS_xxx secrets
-    def _get_server_endpoint(self):
-        endpoint, uri = parse_path(self.get_target_path())
+    @staticmethod
+    def get_server_endpoint(path):
+        endpoint, uri = parse_path(path)
         endpoint = endpoint or mlrun.mlconf.redis.url
         if endpoint.startswith("ds://"):
             datastore_profile = datastore_profile_read(endpoint)
@@ -1504,8 +1497,13 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
                 raise mlrun.errors.MLRunInvalidArgumentError(
                     "Provide Redis username and password only via secrets"
                 )
-            user = self._get_credential("REDIS_USER", "")
-            password = self._get_credential("REDIS_PASSWORD", "")
+            credentials_prefix = mlrun.get_secret_or_env(key="CREDENTIALS_PREFIX")
+            user = mlrun.get_secret_or_env(
+                "REDIS_USER", default="", prefix=credentials_prefix
+            )
+            password = mlrun.get_secret_or_env(
+                "REDIS_PASSWORD", default="", prefix=credentials_prefix
+            )
             host = parsed_endpoint.hostname
             port = parsed_endpoint.port if parsed_endpoint.port else "6379"
             scheme = parsed_endpoint.scheme
@@ -1519,7 +1517,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
         from storey import Table
         from storey.redis_driver import RedisDriver
-        endpoint, uri = self._get_server_endpoint()
+        endpoint, uri = self.get_server_endpoint(self.get_target_path())
         return Table(
             uri,
@@ -1528,7 +1526,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
         )
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
-        endpoint, uri = self._get_server_endpoint()
+        endpoint, uri = self.get_server_endpoint(self.get_target_path())
         parsed_endpoint = urlparse(endpoint)
         store, path_in_store, path = self._get_store_and_path()
         return {
@@ -1560,6 +1558,28 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
         return df
+    def add_writer_step(
+        self,
+        graph,
+        after,
+        features,
+        key_columns=None,
+        timestamp_key=None,
+        featureset_status=None,
+    ):
+        table, column_list = self._get_table_and_columns(features, key_columns)
+        graph.add_step(
+            path=self.get_target_path(),
+            name=self.name or self.writer_step_name,
+            after=after,
+            graph_shape="cylinder",
+            class_name="mlrun.datastore.storeytargets.RedisNoSqlStoreyTarget",
+            columns=column_list,
+            table=table,
+            **self.attributes,
+        )
 class StreamTarget(BaseStoreTarget):
     kind = TargetTypes.stream
@@ -1578,29 +1598,22 @@ class StreamTarget(BaseStoreTarget):
         timestamp_key=None,
         featureset_status=None,
     ):
-        from storey import V3ioDriver
         key_columns = list(key_columns.keys())
-        store, path_in_store, path = self._get_store_and_path()
-        if not path:
-            raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
-        endpoint, uri = parse_path(path)
-        storage_options = store.get_storage_options()
-        access_key = storage_options.get("v3io_access_key")
         column_list = self._get_column_list(
             features=features, timestamp_key=timestamp_key, key_columns=key_columns
         )
+        stream_path = self.get_target_path()
+        if not stream_path:
+            raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
         graph.add_step(
             name=self.name or "StreamTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.StreamTarget",
+            class_name="mlrun.datastore.storeytargets.StreamStoreyTarget",
             columns=column_list,
-            storage=V3ioDriver(
-                webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
-            ),
-            stream_path=uri,
+            stream_path=stream_path,
             **self.attributes,
         )
@@ -1676,34 +1689,19 @@ class KafkaTarget(BaseStoreTarget):
         column_list = self._get_column_list(
             features=features, timestamp_key=timestamp_key, key_columns=key_columns
         )
-        if self.path and self.path.startswith("ds://"):
-            datastore_profile = datastore_profile_read(self.path)
-            attributes = datastore_profile.attributes()
-            brokers = attributes.pop(
-                "brokers", attributes.pop("bootstrap_servers", None)
-            )
-            topic = datastore_profile.topic
-        else:
-            attributes = copy(self.attributes)
-            brokers = attributes.pop(
-                "brokers", attributes.pop("bootstrap_servers", None)
-            )
-            topic, brokers = parse_kafka_url(self.get_target_path(), brokers)
+        path = self.get_target_path()
-        if not topic:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                "KafkaTarget requires a path (topic)"
-            )
+        if not path:
+            raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a path")
         graph.add_step(
             name=self.name or "KafkaTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.KafkaTarget",
+            class_name="mlrun.datastore.storeytargets.KafkaStoreyTarget",
             columns=column_list,
-            topic=topic,
-            brokers=brokers,
-            **attributes,
+            path=path,
+            attributes=self.attributes,
         )
     def purge(self):
@@ -1740,7 +1738,7 @@ class TSDBTarget(BaseStoreTarget):
         graph.add_step(
             name=self.name or "TSDBTarget",
-            class_name="storey.TSDBTarget",
+            class_name="mlrun.datastore.storeytargets.TSDBStoreyTarget",
             after=after,
             graph_shape="cylinder",
             path=uri,
@@ -2029,7 +2027,7 @@ class SQLTarget(BaseStoreTarget):
             name=self.name or "SqlTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.NoSqlTarget",
+            class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
             columns=column_list,
             header=True,
             table=table,

mlrun/datastore/v3io.py CHANGED Viewed

@@ -140,6 +140,7 @@ class V3ioStore(DataStore):
         max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
     ):
         """helper function for put method, allows for controlling max_chunk_size in testing"""
+        data, _ = self._prepare_put_data(data, append)
         container, path = split_path(self._join(key))
         buffer_size = len(data)  # in bytes
         buffer_offset = 0

mlrun/errors.py CHANGED Viewed

@@ -29,11 +29,14 @@ class MLRunBaseError(Exception):
     pass
-class MLRunTaskNotReady(MLRunBaseError):
+class MLRunTaskNotReadyError(MLRunBaseError):
     """indicate we are trying to read a value which is not ready
     or need to come from a job which is in progress"""
+MLRunTaskNotReady = MLRunTaskNotReadyError  # kept for BC only
 class MLRunHTTPError(MLRunBaseError, requests.HTTPError):
     def __init__(
         self,
@@ -205,15 +208,15 @@ class MLRunTimeoutError(MLRunHTTPStatusError, TimeoutError):
     error_status_code = HTTPStatus.GATEWAY_TIMEOUT.value
-class MLRunInvalidMMStoreType(MLRunHTTPStatusError, ValueError):
+class MLRunInvalidMMStoreTypeError(MLRunHTTPStatusError, ValueError):
     error_status_code = HTTPStatus.BAD_REQUEST.value
-class MLRunStreamConnectionFailure(MLRunHTTPStatusError, ValueError):
+class MLRunStreamConnectionFailureError(MLRunHTTPStatusError, ValueError):
     error_status_code = HTTPStatus.BAD_REQUEST.value
-class MLRunTSDBConnectionFailure(MLRunHTTPStatusError, ValueError):
+class MLRunTSDBConnectionFailureError(MLRunHTTPStatusError, ValueError):
     error_status_code = HTTPStatus.BAD_REQUEST.value

mlrun/feature_store/feature_vector.py CHANGED Viewed

@@ -1086,7 +1086,9 @@ class OfflineVectorResponse:
     def to_dataframe(self, to_pandas=True):
         """return result as dataframe"""
         if self.status != "completed":
-            raise mlrun.errors.MLRunTaskNotReady("feature vector dataset is not ready")
+            raise mlrun.errors.MLRunTaskNotReadyError(
+                "feature vector dataset is not ready"
+            )
         return self._merger.get_df(to_pandas=to_pandas)
     def to_parquet(self, target_path, **kw):

mlrun/feature_store/retrieval/job.py CHANGED Viewed

@@ -156,7 +156,9 @@ class RemoteVectorResponse:
     def _is_ready(self):
         if self.status != "completed":
-            raise mlrun.errors.MLRunTaskNotReady("feature vector dataset is not ready")
+            raise mlrun.errors.MLRunTaskNotReadyError(
+                "feature vector dataset is not ready"
+            )
         self.vector.reload()
     def to_dataframe(self, columns=None, df_module=None, **kwargs):

mlrun/frameworks/sklearn/mlrun_interface.py CHANGED Viewed

@@ -97,7 +97,7 @@ class SKLearnMLRunInterface(MLRunInterface, ABC):
         def wrapper(
             self: SKLearnTypes.ModelType,
-            X: SKLearnTypes.DatasetType,
+            X: SKLearnTypes.DatasetType,  # noqa: N803 - should be lowercase "x", kept for BC
             y: SKLearnTypes.DatasetType = None,
             *args,
             **kwargs,
@@ -124,7 +124,12 @@ class SKLearnMLRunInterface(MLRunInterface, ABC):
         return wrapper
-    def mlrun_predict(self, X: SKLearnTypes.DatasetType, *args, **kwargs):
+    def mlrun_predict(
+        self,
+        X: SKLearnTypes.DatasetType,  # noqa: N803 - should be lowercase "x", kept for BC
+        *args,
+        **kwargs,
+    ):
         """
         MLRun's wrapper for the common ML API predict method.
         """
@@ -136,7 +141,12 @@ class SKLearnMLRunInterface(MLRunInterface, ABC):
         return y_pred
-    def mlrun_predict_proba(self, X: SKLearnTypes.DatasetType, *args, **kwargs):
+    def mlrun_predict_proba(
+        self,
+        X: SKLearnTypes.DatasetType,  # noqa: N803 - should be lowercase "x", kept for BC
+        *args,
+        **kwargs,
+    ):
         """
         MLRun's wrapper for the common ML API predict_proba method.
         """

mlrun/model.py CHANGED Viewed

@@ -487,7 +487,7 @@ class ImageBuilder(ModelObj):
     def __init__(
         self,
-        functionSourceCode=None,
+        functionSourceCode=None,  # noqa: N803 - should be "snake_case", kept for BC
         source=None,
         image=None,
         base_image=None,

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -147,8 +147,7 @@ def record_results(
                                      on the provided `endpoint_id`.
     :param function_name:            If a new model endpoint is created, use this function name for generating the
                                      function URI.
-    :param context:                  MLRun context. Note that the context is required for logging the artifacts
-                                     following the batch drift job.
+    :param context:                  MLRun context. Note that the context is required generating the model endpoint.
     :param infer_results_df:         DataFrame that will be stored under the model endpoint parquet target. Will be
                                      used for doing the drift analysis. Please make sure that the dataframe includes
                                      both feature names and label columns.

mlrun/model_monitoring/applications/_application_steps.py CHANGED Viewed

@@ -11,19 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
-import typing
-from typing import Optional
+from typing import Any, Optional, Union
-import mlrun.common.helpers
-import mlrun.common.model_monitoring.helpers
 import mlrun.common.schemas.alert as alert_objects
 import mlrun.common.schemas.model_monitoring.constants as mm_constant
 import mlrun.datastore
-import mlrun.serving
-import mlrun.utils.helpers
-import mlrun.utils.v3io_clients
+import mlrun.model_monitoring
 from mlrun.model_monitoring.helpers import get_stream_path
+from mlrun.serving import GraphContext
 from mlrun.serving.utils import StepToDict
 from mlrun.utils import logger
@@ -62,7 +59,7 @@ class _PushToMonitoringWriter(StepToDict):
         self,
         event: tuple[
             list[
-                typing.Union[
+                Union[
                     ModelMonitoringApplicationResult, ModelMonitoringApplicationMetric
                 ]
             ],
@@ -121,50 +118,35 @@ class _PushToMonitoringWriter(StepToDict):
 class _PrepareMonitoringEvent(StepToDict):
-    def __init__(self, application_name: str):
+    def __init__(self, context: GraphContext, application_name: str) -> None:
         """
         Class for preparing the application event for the application step.
         :param application_name: Application name.
         """
+        self.graph_context = context
+        self.application_name = application_name
+        self.model_endpoints: dict[str, mlrun.model_monitoring.ModelEndpoint] = {}
-        self.context = self._create_mlrun_context(application_name)
-        self.model_endpoints = {}
-    def do(self, event: dict[str, dict]) -> MonitoringApplicationContext:
+    def do(self, event: dict[str, Any]) -> MonitoringApplicationContext:
         """
         Prepare the application event for the application step.
         :param event: Application event.
-        :return: Application event.
+        :return: Application context.
         """
-        if not event.get("mlrun_context"):
-            application_context = MonitoringApplicationContext().from_dict(
-                event,
-                context=self.context,
-                model_endpoint_dict=self.model_endpoints,
-            )
-        else:
-            application_context = MonitoringApplicationContext().from_dict(event)
+        application_context = MonitoringApplicationContext(
+            graph_context=self.graph_context,
+            application_name=self.application_name,
+            event=event,
+            model_endpoint_dict=self.model_endpoints,
+        )
         self.model_endpoints.setdefault(
             application_context.endpoint_id, application_context.model_endpoint
         )
-        return application_context
-    @staticmethod
-    def _create_mlrun_context(app_name: str):
-        artifact_path = mlrun.utils.helpers.template_artifact_path(
-            mlrun.mlconf.artifact_path, mlrun.mlconf.default_project
-        )
-        context = mlrun.get_or_create_ctx(
-            f"{app_name}-logger",
-            spec={
-                "metadata": {"labels": {"kind": mlrun.runtimes.RuntimeKinds.serving}},
-                "spec": {mlrun.utils.helpers.RunKeys.output_path: artifact_path},
-            },
-        )
-        context.__class__ = MonitoringApplicationContext
-        return context
+        return application_context
 class _ApplicationErrorHandler(StepToDict):
@@ -181,13 +163,13 @@ class _ApplicationErrorHandler(StepToDict):
         logger.error(f"Error in application step: {event}")
-        event_data = mlrun.common.schemas.Event(
+        event_data = alert_objects.Event(
             kind=alert_objects.EventKind.MM_APP_FAILED,
-            entity={
-                "kind": alert_objects.EventEntityKind.MODEL_MONITORING_APPLICATION,
-                "project": self.project,
-                "ids": [f"{self.project}_{event.body.application_name}"],
-            },
+            entity=alert_objects.EventEntities(
+                kind=alert_objects.EventEntityKind.MODEL_MONITORING_APPLICATION,
+                project=self.project,
+                ids=[f"{self.project}_{event.body.application_name}"],
+            ),
             value_dict={
                 "Error": event.error,
                 "Timestamp": event.timestamp,

mlrun 1.7.0rc39__py3-none-any.whl → 1.7.0rc42__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc39py3-none-any.whl → 1.7.0rc42py3-none-any.whl