PyPI - mlrun - Versions diffs - 1.7.0rc39__py3-none-any.whl → 1.7.0rc42__py3-none-any.whl - Mend

mlrun 1.7.0rc39py3-none-any.whl → 1.7.0rc42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (58) hide show

mlrun/common/constants.py +3 -0
mlrun/common/db/sql_session.py +3 -2
mlrun/common/helpers.py +0 -1
mlrun/common/schemas/api_gateway.py +6 -6
mlrun/common/schemas/common.py +4 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
mlrun/config.py +1 -1
mlrun/data_types/to_pandas.py +12 -12
mlrun/datastore/alibaba_oss.py +1 -0
mlrun/datastore/azure_blob.py +1 -6
mlrun/datastore/base.py +12 -0
mlrun/datastore/dbfs_store.py +1 -5
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +1 -9
mlrun/datastore/redis.py +1 -0
mlrun/datastore/s3.py +1 -0
mlrun/datastore/storeytargets.py +147 -0
mlrun/datastore/targets.py +67 -69
mlrun/datastore/v3io.py +1 -0
mlrun/errors.py +7 -4
mlrun/feature_store/feature_vector.py +3 -1
mlrun/feature_store/retrieval/job.py +3 -1
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/model.py +1 -1
mlrun/model_monitoring/api.py +1 -2
mlrun/model_monitoring/applications/_application_steps.py +25 -43
mlrun/model_monitoring/applications/context.py +206 -70
mlrun/model_monitoring/controller.py +0 -1
mlrun/model_monitoring/db/stores/__init__.py +3 -3
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +14 -4
mlrun/model_monitoring/db/tsdb/__init__.py +3 -3
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +18 -10
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -23
mlrun/model_monitoring/helpers.py +38 -1
mlrun/model_monitoring/stream_processing.py +8 -26
mlrun/package/packagers/default_packager.py +2 -2
mlrun/projects/project.py +17 -16
mlrun/runtimes/funcdoc.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +9 -0
mlrun/runtimes/nuclio/application/application.py +131 -55
mlrun/runtimes/nuclio/function.py +4 -10
mlrun/runtimes/nuclio/serving.py +2 -2
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +16 -0
mlrun/serving/routers.py +1 -1
mlrun/serving/server.py +19 -5
mlrun/serving/states.py +8 -0
mlrun/serving/v2_serving.py +34 -26
mlrun/utils/helpers.py +12 -2
mlrun/utils/v3io_clients.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/METADATA +2 -2
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/RECORD +58 -57
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py CHANGED Viewed

@@ -24,6 +24,7 @@ import mlrun.common.model_monitoring
 import mlrun.common.schemas.model_monitoring as mm_schemas
 import mlrun.feature_store.steps
 import mlrun.utils.v3io_clients
+from mlrun.common.schemas import EventFieldType
 from mlrun.model_monitoring.db import TSDBConnector
 from mlrun.model_monitoring.helpers import get_invocations_fqn
 from mlrun.utils import logger
@@ -64,14 +65,17 @@ class V3IOTSDBConnector(TSDBConnector):
         self.container = container
         self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
-        self._frames_client: v3io_frames.client.ClientBase = (
-            self._get_v3io_frames_client(self.container)
-        )
+        self._frames_client: Optional[v3io_frames.client.ClientBase] = None
         self._init_tables_path()
+        self._create_table = create_table
-        if create_table:
-            self.create_tables()
+    @property
+    def frames_client(self) -> v3io_frames.client.ClientBase:
+        if not self._frames_client:
+            self._frames_client = self._get_v3io_frames_client(self.container)
+            if self._create_table:
+                self.create_tables()
+        return self._frames_client
     def _init_tables_path(self):
         self.tables = {}
@@ -151,7 +155,7 @@ class V3IOTSDBConnector(TSDBConnector):
         for table_name in application_tables:
             logger.info("Creating table in V3IO TSDB", table_name=table_name)
             table = self.tables[table_name]
-            self._frames_client.create(
+            self.frames_client.create(
                 backend=_TSDB_BE,
                 table=table,
                 if_exists=v3io_frames.IGNORE,
@@ -161,8 +165,9 @@ class V3IOTSDBConnector(TSDBConnector):
     def apply_monitoring_stream_steps(
         self,
         graph,
-        tsdb_batching_max_events: int = 10,
-        tsdb_batching_timeout_secs: int = 300,
+        tsdb_batching_max_events: int = 1000,
+        tsdb_batching_timeout_secs: int = 30,
+        sample_window: int = 10,
     ):
         """
         Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
@@ -173,6 +178,7 @@ class V3IOTSDBConnector(TSDBConnector):
         - endpoint_features (Prediction and feature names and values)
         - custom_metrics (user-defined metrics)
         """
         # Write latency per prediction, labeled by endpoint ID only
         graph.add_step(
             "storey.TSDBTarget",
@@ -197,17 +203,23 @@ class V3IOTSDBConnector(TSDBConnector):
             key=mm_schemas.EventFieldType.ENDPOINT_ID,
         )
+        # Emits the event in window size of events based on sample_window size (10 by default)
+        graph.add_step(
+            "storey.steps.SampleWindow",
+            name="sample",
+            after="Rename",
+            window_size=sample_window,
+            key=EventFieldType.ENDPOINT_ID,
+        )
         # Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
         # stats and details about the events
-        def apply_process_before_tsdb():
-            graph.add_step(
-                "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ProcessBeforeTSDB",
-                name="ProcessBeforeTSDB",
-                after="sample",
-            )
-        apply_process_before_tsdb()
+        graph.add_step(
+            "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ProcessBeforeTSDB",
+            name="ProcessBeforeTSDB",
+            after="sample",
+        )
         # Unpacked keys from each dictionary and write to TSDB target
         def apply_filter_and_unpacked_keys(name, keys):
@@ -273,8 +285,8 @@ class V3IOTSDBConnector(TSDBConnector):
     def handle_model_error(
         self,
         graph,
-        tsdb_batching_max_events: int = 10,
-        tsdb_batching_timeout_secs: int = 60,
+        tsdb_batching_max_events: int = 1000,
+        tsdb_batching_timeout_secs: int = 30,
         **kwargs,
     ) -> None:
         graph.add_step(
@@ -333,7 +345,7 @@ class V3IOTSDBConnector(TSDBConnector):
             raise ValueError(f"Invalid {kind = }")
         try:
-            self._frames_client.write(
+            self.frames_client.write(
                 backend=_TSDB_BE,
                 table=table,
                 dfs=pd.DataFrame.from_records([event]),
@@ -360,7 +372,7 @@ class V3IOTSDBConnector(TSDBConnector):
             tables = mm_schemas.V3IOTSDBTables.list()
         for table_to_delete in tables:
             try:
-                self._frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
+                self.frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
             except v3io_frames.DeleteError as e:
                 logger.warning(
                     f"Failed to delete TSDB table '{table}'",
@@ -476,7 +488,7 @@ class V3IOTSDBConnector(TSDBConnector):
         aggregators = ",".join(agg_funcs) if agg_funcs else None
         table_path = self.tables[table]
         try:
-            df = self._frames_client.read(
+            df = self.frames_client.read(
                 backend=_TSDB_BE,
                 table=table_path,
                 start=start,
@@ -579,7 +591,7 @@ class V3IOTSDBConnector(TSDBConnector):
         logger.debug("Querying V3IO TSDB", query=query)
-        df: pd.DataFrame = self._frames_client.read(
+        df: pd.DataFrame = self.frames_client.read(
             backend=_TSDB_BE,
             start=start,
             end=end,

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -19,9 +19,11 @@ import numpy as np
 import pandas as pd
 import mlrun
+import mlrun.artifacts
 import mlrun.common.model_monitoring.helpers
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.data_types.infer
+import mlrun.model_monitoring
 from mlrun.common.schemas.model_monitoring.model_endpoints import (
     ModelEndpointMonitoringMetric,
     ModelEndpointMonitoringMetricType,
@@ -253,7 +255,7 @@ def calculate_inputs_statistics(
     )
     # Recalculate the histograms over the bins that are set in the sample-set of the end point:
-    for feature in inputs_statistics.keys():
+    for feature in list(inputs_statistics):
         if feature in sample_set_statistics:
             counts, bins = np.histogram(
                 inputs[feature].to_numpy(),
@@ -270,6 +272,9 @@ def calculate_inputs_statistics(
                     inputs_statistics[feature]["hist"]
                 )
             )
+        else:
+            # If the feature is not in the sample set and doesn't have a histogram, remove it from the statistics:
+            inputs_statistics.pop(feature)
     return inputs_statistics
@@ -322,3 +327,35 @@ def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
         name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
         full_name=get_invocations_fqn(project),
     )
+def enrich_model_endpoint_with_model_uri(
+    model_endpoint: ModelEndpoint,
+    model_obj: mlrun.artifacts.ModelArtifact,
+):
+    """
+    Enrich the model endpoint object with the model uri from the model object. We will use a unique reference
+    to the model object that includes the project, db_key, iter, and tree.
+    In addition, we verify that the model object is of type `ModelArtifact`.
+    :param model_endpoint:    An object representing the model endpoint that will be enriched with the model uri.
+    :param model_obj:         An object representing the model artifact.
+    :raise: `MLRunInvalidArgumentError` if the model object is not of type `ModelArtifact`.
+    """
+    mlrun.utils.helpers.verify_field_of_type(
+        field_name="model_endpoint.spec.model_uri",
+        field_value=model_obj,
+        expected_type=mlrun.artifacts.ModelArtifact,
+    )
+    # Update model_uri with a unique reference to handle future changes
+    model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
+        project=model_endpoint.metadata.project,
+        key=model_obj.db_key,
+        iter=model_obj.iter,
+        tree=model_obj.tree,
+    )
+    model_endpoint.spec.model_uri = mlrun.datastore.get_store_uri(
+        kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
+    )

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -37,6 +37,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
     ModelEndpointTarget,
     ProjectSecretKeys,
 )
+from mlrun.model_monitoring.db import StoreBase, TSDBConnector
 from mlrun.utils import logger
@@ -48,14 +49,12 @@ class EventStreamProcessor:
         parquet_batching_max_events: int,
         parquet_batching_timeout_secs: int,
         parquet_target: str,
-        sample_window: int = 10,
         aggregate_windows: typing.Optional[list[str]] = None,
-        aggregate_period: str = "30s",
+        aggregate_period: str = "5m",
         model_monitoring_access_key: str = None,
     ):
         # General configurations, mainly used for the storey steps in the future serving graph
         self.project = project
-        self.sample_window = sample_window
         self.aggregate_windows = aggregate_windows or ["5m", "1h"]
         self.aggregate_period = aggregate_period
@@ -133,7 +132,8 @@ class EventStreamProcessor:
     def apply_monitoring_serving_graph(
         self,
         fn: mlrun.runtimes.ServingRuntime,
-        secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
+        tsdb_connector: TSDBConnector,
+        endpoint_store: StoreBase,
     ) -> None:
         """
         Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -161,8 +161,8 @@ class EventStreamProcessor:
            using CE, the parquet target path is based on the defined MLRun artifact path.
         :param fn: A serving function.
-        :param secret_provider: An optional callable function that provides the connection string from the project
-                                secret.
+        :param tsdb_connector: Time series database connector.
+        :param endpoint_store: KV/SQL store used for endpoint data.
         """
         graph = typing.cast(
@@ -190,10 +190,6 @@ class EventStreamProcessor:
             _fn="(event.get('error') is not None)",
         )
-        tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
-            project=self.project, secret_provider=secret_provider
-        )
         tsdb_connector.handle_model_error(
             graph,
         )
@@ -306,24 +302,9 @@ class EventStreamProcessor:
                 table=self.kv_path,
             )
-        store_object = mlrun.model_monitoring.get_store_object(
-            project=self.project, secret_provider=secret_provider
-        )
-        if store_object.type == ModelEndpointTarget.V3IO_NOSQL:
+        if endpoint_store.type == ModelEndpointTarget.V3IO_NOSQL:
             apply_infer_schema()
-        # Emits the event in window size of events based on sample_window size (10 by default)
-        def apply_storey_sample_window():
-            graph.add_step(
-                "storey.steps.SampleWindow",
-                name="sample",
-                after="Rename",
-                window_size=self.sample_window,
-                key=EventFieldType.ENDPOINT_ID,
-            )
-        apply_storey_sample_window()
         tsdb_connector.apply_monitoring_stream_steps(graph=graph)
         # Parquet branch
@@ -353,6 +334,7 @@ class EventStreamProcessor:
                 index_cols=[EventFieldType.ENDPOINT_ID],
                 key_bucketing_number=0,
                 time_partitioning_granularity="hour",
+                time_field=EventFieldType.TIMESTAMP,
                 partition_cols=["$key", "$year", "$month", "$day", "$hour"],
             )

mlrun/package/packagers/default_packager.py CHANGED Viewed

@@ -34,7 +34,7 @@ class _DefaultPackagerMeta(ABCMeta):
     dynamically generated docstring that will include a summary of the packager.
     """
-    def __new__(mcls, name: str, bases: tuple, namespace: dict, **kwargs):
+    def __new__(cls, name: str, bases: tuple, namespace: dict, **kwargs):
         """
         Create a new DefaultPackager metaclass that saves the original packager docstring to another attribute named
         `_packager_doc`.
@@ -48,7 +48,7 @@ class _DefaultPackagerMeta(ABCMeta):
         namespace["_packager_doc"] = namespace.get("__doc__", "")
         # Continue creating the metaclass:
-        return super().__new__(mcls, name, bases, namespace, **kwargs)
+        return super().__new__(cls, name, bases, namespace, **kwargs)
     @property
     def __doc__(cls: type["DefaultPackager"]) -> str:

mlrun/projects/project.py CHANGED Viewed

@@ -1557,15 +1557,15 @@ class MlrunProject(ModelObj):
         self,
         item,
         body=None,
-        tag="",
-        local_path="",
-        artifact_path=None,
-        format=None,
-        upload=None,
-        labels=None,
-        target_path=None,
+        tag: str = "",
+        local_path: str = "",
+        artifact_path: Optional[str] = None,
+        format: Optional[str] = None,
+        upload: Optional[bool] = None,
+        labels: Optional[dict[str, str]] = None,
+        target_path: Optional[str] = None,
         **kwargs,
-    ):
+    ) -> Artifact:
         """Log an output artifact and optionally upload it to datastore
         If the artifact already exists with the same key and tag, it will be overwritten.
@@ -1664,7 +1664,7 @@ class MlrunProject(ModelObj):
         stats=None,
         target_path="",
         extra_data=None,
-        label_column: str = None,
+        label_column: Optional[str] = None,
         **kwargs,
     ) -> DatasetArtifact:
         """
@@ -1741,15 +1741,15 @@ class MlrunProject(ModelObj):
         artifact_path=None,
         upload=None,
         labels=None,
-        inputs: list[Feature] = None,
-        outputs: list[Feature] = None,
-        feature_vector: str = None,
-        feature_weights: list = None,
+        inputs: Optional[list[Feature]] = None,
+        outputs: Optional[list[Feature]] = None,
+        feature_vector: Optional[str] = None,
+        feature_weights: Optional[list] = None,
         training_set=None,
         label_column=None,
         extra_data=None,
         **kwargs,
-    ):
+    ) -> ModelArtifact:
         """Log a model artifact and optionally upload it to datastore
         If the model already exists with the same key and tag, it will be overwritten.
@@ -3040,8 +3040,9 @@ class MlrunProject(ModelObj):
                 "Remote repo is not defined, use .create_remote() + push()"
             )
-        if engine not in ["remote"]:
-            # for remote runs we don't require the functions to be synced as they can be loaded dynamically during run
+        if engine not in ["remote"] and not schedule:
+            # For remote/scheduled runs we don't require the functions to be synced as they can be loaded dynamically
+            # during run
             self.sync_functions(always=sync)
             if not self.spec._function_objects:
                 raise ValueError(

mlrun/runtimes/funcdoc.py CHANGED Viewed

@@ -247,7 +247,7 @@ class ASTVisitor(ast.NodeVisitor):
         self.exprs.append(node)
         super().generic_visit(node)
-    def visit_FunctionDef(self, node):
+    def visit_FunctionDef(self, node):  # noqa: N802
         self.funcs.append(node)
         self.generic_visit(node)

mlrun/runtimes/nuclio/api_gateway.py CHANGED Viewed

@@ -578,6 +578,15 @@ class APIGateway(ModelObj):
             "true"
         )
+    def with_gateway_timeout(self, gateway_timeout: int):
+        """
+        Set gateway proxy connect/read/send timeout annotations
+        :param gateway_timeout: The timeout in seconds
+        """
+        mlrun.runtimes.utils.enrich_gateway_timeout_annotations(
+            self.metadata.annotations, gateway_timeout
+        )
     @classmethod
     def from_scheme(cls, api_gateway: schemas.APIGateway):
         project = api_gateway.metadata.labels.get(

mlrun 1.7.0rc39__py3-none-any.whl → 1.7.0rc42__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc39py3-none-any.whl → 1.7.0rc42py3-none-any.whl