PyPI - mlrun - Versions diffs - 1.5.0rc12__py3-none-any.whl → 1.5.0rc13__py3-none-any.whl - Mend

mlrun 1.5.0rc12py3-none-any.whl → 1.5.0rc13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (45) hide show

mlrun/__main__.py +31 -2
mlrun/api/api/endpoints/functions.py +110 -52
mlrun/api/crud/model_monitoring/deployment.py +208 -38
mlrun/api/crud/model_monitoring/helpers.py +19 -6
mlrun/api/crud/model_monitoring/model_endpoints.py +14 -1
mlrun/api/db/sqldb/db.py +3 -1
mlrun/api/utils/builder.py +2 -4
mlrun/common/model_monitoring/helpers.py +19 -5
mlrun/common/schemas/model_monitoring/constants.py +69 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
mlrun/config.py +30 -12
mlrun/datastore/__init__.py +1 -0
mlrun/datastore/sources.py +4 -30
mlrun/datastore/targets.py +68 -31
mlrun/db/httpdb.py +20 -6
mlrun/feature_store/api.py +3 -31
mlrun/feature_store/feature_vector.py +1 -1
mlrun/feature_store/retrieval/base.py +8 -3
mlrun/launcher/remote.py +3 -3
mlrun/lists.py +11 -0
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +1 -1
mlrun/model_monitoring/application.py +313 -0
mlrun/model_monitoring/batch_application.py +526 -0
mlrun/model_monitoring/batch_application_handler.py +32 -0
mlrun/model_monitoring/evidently_application.py +89 -0
mlrun/model_monitoring/helpers.py +39 -3
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +37 -0
mlrun/model_monitoring/tracking_policy.py +4 -4
mlrun/model_monitoring/writer.py +37 -0
mlrun/projects/pipelines.py +38 -4
mlrun/projects/project.py +257 -43
mlrun/run.py +5 -2
mlrun/runtimes/__init__.py +2 -0
mlrun/runtimes/function.py +2 -1
mlrun/utils/helpers.py +12 -0
mlrun/utils/http.py +3 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.5.0rc12.dist-info → mlrun-1.5.0rc13.dist-info}/METADATA +5 -5
{mlrun-1.5.0rc12.dist-info → mlrun-1.5.0rc13.dist-info}/RECORD +45 -40
/mlrun/model_monitoring/{model_monitoring_batch.py → batch.py} +0 -0
{mlrun-1.5.0rc12.dist-info → mlrun-1.5.0rc13.dist-info}/LICENSE +0 -0
{mlrun-1.5.0rc12.dist-info → mlrun-1.5.0rc13.dist-info}/WHEEL +0 -0
{mlrun-1.5.0rc12.dist-info → mlrun-1.5.0rc13.dist-info}/entry_points.txt +0 -0
{mlrun-1.5.0rc12.dist-info → mlrun-1.5.0rc13.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -77,6 +77,30 @@ class EventFieldType:
     DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
     POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
+    SAMPLE_PARQUET_PATH = "sample_parquet_path"
+class ApplicationEvent:
+    APPLICATION_NAME = "application_name"
+    CURRENT_STATS = "current_stats"
+    FEATURE_STATS = "feature_stats"
+    SAMPLE_PARQUET_PATH = "sample_parquet_path"
+    SCHEDULE_TIME = "schedule_time"
+    LAST_REQUEST = "last_request"
+    ENDPOINT_ID = "endpoint_id"
+    OUTPUT_STREAM_URI = "output_stream_uri"
+class WriterEvent:
+    APPLICATION_NAME = "application_name"
+    ENDPOINT_ID = "endpoint_id"
+    SCHEDULE_TIME = "schedule_time"
+    RESULT_NAME = "result_name"
+    RESULT_VALUE = "result_value"
+    RESULT_KIND = "result_kind"
+    RESULT_STATUS = "result_status"
+    RESULT_EXTRA_DATA = "result_extra_data"
 class EventLiveStats:
     LATENCY_AVG_5M = "latency_avg_5m"
@@ -106,6 +130,7 @@ class ModelEndpointTarget:
 class ProjectSecretKeys:
     ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
     ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
+    PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
     KAFKA_BOOTSTRAP_SERVERS = "KAFKA_BOOTSTRAP_SERVERS"
     STREAM_PATH = "STREAM_PATH"
@@ -120,6 +145,7 @@ class FileTargetKind:
     EVENTS = "events"
     STREAM = "stream"
     PARQUET = "parquet"
+    BATCH_CONTROLLER_PARQUET = "batch_controller_parquet"
     LOG_STREAM = "log_stream"
@@ -143,6 +169,22 @@ class PrometheusMetric:
     DRIFT_STATUS = "drift_status"
+class MonitoringFunctionNames:
+    WRITER = "model-monitoring-writer"
+    BATCH = "model-monitoring-batch"
+    BATCH_APPLICATION = "model-monitoring-batch-application"
+    STREAM = None
+    @staticmethod
+    def all():
+        return [
+            MonitoringFunctionNames.WRITER,
+            MonitoringFunctionNames.STREAM,
+            MonitoringFunctionNames.BATCH,
+            MonitoringFunctionNames.BATCH_APPLICATION,
+        ]
 @dataclass
 class FunctionURI:
     project: str
@@ -208,3 +250,30 @@ class DriftStatus(Enum):
     NO_DRIFT = "NO_DRIFT"
     DRIFT_DETECTED = "DRIFT_DETECTED"
     POSSIBLE_DRIFT = "POSSIBLE_DRIFT"
+class ResultKindApp(enum.Enum):
+    """
+    Enum for the result kind values
+    """
+    data_drift = 0
+    concept_drift = 1
+    model_performance = 2
+    system_performance = 3
+class ResultStatusApp(enum.Enum):
+    """
+    Enum for the result status values, detected means that the app detected some problem.
+    """
+    irrelevant = -1
+    no_detection = 0
+    potential_detection = 1
+    detected = 2
+class ModelMonitoringAppTag:
+    KEY = "type"
+    VAL = "model-monitoring-application"

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -111,6 +111,16 @@ class ModelEndpointSpec(ObjectSpec):
             ),
         }
+    @validator("model_uri")
+    def validate_model_uri(cls, model_uri):
+        """Validate that the model uri includes the required prefix"""
+        prefix, uri = mlrun.datastore.parse_store_uri(model_uri)
+        if prefix and prefix != mlrun.utils.helpers.StorePrefix.Model:
+            return mlrun.datastore.get_store_uri(
+                mlrun.utils.helpers.StorePrefix.Model, uri
+            )
+        return model_uri
 class Histogram(BaseModel):
     buckets: List[float]

mlrun/config.py CHANGED Viewed

@@ -403,6 +403,7 @@ default_config = {
     },
     "model_endpoint_monitoring": {
         "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
+        "application_stream_args": {"shard_count": 3, "retention_period_hours": 24},
         "drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
         # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
         # stream, and endpoints.
@@ -417,6 +418,7 @@ default_config = {
         # Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
         # when the user is working in CE environment and has not provided any stream path.
         "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
+        "default_http_sink_app": "http://nuclio-{project}-{application_name}.mlrun.svc.cluster.local:8080",
         "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10000,
         "parquet_batching_timeout_secs": timedelta(minutes=30).total_seconds(),
@@ -981,20 +983,22 @@ class Config:
         kind: str = "",
         target: str = "online",
         artifact_path: str = None,
+        application_name: str = None,
     ) -> str:
         """Get the full path from the configuration based on the provided project and kind.
-        :param project:        Project name.
-        :param kind:           Kind of target path (e.g. events, log_stream, endpoints, etc.)
-        :param target:         Can be either online or offline. If the target is online, then we try to get a specific
-                               path for the provided kind. If it doesn't exist, use the default path.
-                               If the target path is offline and the offline path is already a full path in the
-                               configuration, then the result will be that path as-is. If the offline path is a
-                               relative path, then the result will be based on the project artifact path and the offline
-                               relative path. If project artifact path wasn't provided, then we use MLRun artifact
-                               path instead.
-        :param artifact_path:  Optional artifact path that will be used as a relative path. If not provided, the
-                               relative artifact path will be taken from the global MLRun artifact path.
+        :param project:         Project name.
+        :param kind:            Kind of target path (e.g. events, log_stream, endpoints, etc.)
+        :param target:          Can be either online or offline. If the target is online, then we try to get a specific
+                                path for the provided kind. If it doesn't exist, use the default path.
+                                If the target path is offline and the offline path is already a full path in the
+                                configuration, then the result will be that path as-is. If the offline path is a
+                                relative path, then the result will be based on the project artifact path and the
+                                offline relative path. If project artifact path wasn't provided, then we use MLRun
+                                artifact path instead.
+        :param artifact_path:   Optional artifact path that will be used as a relative path. If not provided, the
+                                relative artifact path will be taken from the global MLRun artifact path.
+        :param application_name:Application name, None for model_monitoring_stream.
         :return: Full configured path for the provided kind.
         """
@@ -1006,8 +1010,22 @@ class Config:
             if store_prefix_dict.get(kind):
                 # Target exist in store prefix and has a valid string value
                 return store_prefix_dict[kind].format(project=project)
+            if (
+                application_name
+                != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
+            ):
+                return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
+                    project=project,
+                    kind=kind
+                    if application_name is None
+                    else f"{kind}-{application_name.lower()}",
+                )
             return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
-                project=project, kind=kind
+                project=project,
+                kind=kind
+                if application_name is None
+                else f"{kind}-{application_name.lower()}",
             )
         # Get the current offline path from the configuration

mlrun/datastore/__init__.py CHANGED Viewed

@@ -31,6 +31,7 @@ __all__ = [
     "RedisStore",
     "DatabricksFileSystemDisableCache",
     "DatabricksFileBugFixed",
+    "get_stream_pusher",
 ]
 import fsspec

mlrun/datastore/sources.py CHANGED Viewed

@@ -138,7 +138,6 @@ class CSVSource(BaseSourceDriver):
     :parameter path: path to CSV file
     :parameter key_field: the CSV field to be used as the key for events. May be an int (field index) or string
         (field name) if with_header is True. Defaults to None (no key). Can be a list of keys.
-    :parameter time_field: DEPRECATED. Use parse_dates to parse timestamps.
     :parameter schedule: string to configure scheduling of the ingestion job.
     :parameter attributes: additional parameters to pass to storey. For example:
         attributes={"timestamp_format": '%Y%m%d%H'}
@@ -156,29 +155,13 @@ class CSVSource(BaseSourceDriver):
         path: str = None,
         attributes: Dict[str, str] = None,
         key_field: str = None,
-        time_field: str = None,
         schedule: str = None,
         parse_dates: Union[None, int, str, List[int], List[str]] = None,
         **kwargs,
     ):
-        super().__init__(
-            name, path, attributes, key_field, time_field, schedule, **kwargs
-        )
-        if time_field is not None:
-            warnings.warn(
-                "CSVSource's time_field parameter is deprecated in 1.3.0 and will be removed in 1.5.0. "
-                "Use parse_dates instead.",
-                # TODO: remove in 1.5.0
-                FutureWarning,
-            )
-            if isinstance(parse_dates, (int, str)):
-                parse_dates = [parse_dates]
-            if parse_dates is None:
-                parse_dates = [time_field]
-            elif time_field not in parse_dates:
-                parse_dates = copy(parse_dates)
-                parse_dates.append(time_field)
+        super().__init__(name, path, attributes, key_field, schedule=schedule, **kwargs)
+        if parse_dates and not isinstance(parse_dates, list):
+            parse_dates = [parse_dates]
         self._parse_dates = parse_dates
     def to_step(self, key_field=None, time_field=None, context=None):
@@ -724,16 +707,7 @@ class DataFrameSource:
     support_storey = True
-    def __init__(
-        self, df, key_field=None, time_field=None, context=None, iterator=False
-    ):
-        if time_field:
-            warnings.warn(
-                "DataFrameSource's time_field parameter has no effect. "
-                "It is deprecated in 1.3.0 and will be removed in 1.5.0",
-                FutureWarning,
-            )
+    def __init__(self, df, key_field=None, context=None, iterator=False):
         self._df = df
         if isinstance(key_field, str):
             self.key_field = [key_field]

mlrun/datastore/targets.py CHANGED Viewed

@@ -484,6 +484,7 @@ class BaseStoreTarget(DataTargetBase):
         if hasattr(df, "rdd"):
             options = self.get_spark_options(key_column, timestamp_key)
             options.update(kwargs)
+            df = self.prepare_spark_df(df, key_column, timestamp_key, options)
             df.write.mode("overwrite").save(**options)
         elif hasattr(df, "dask"):
             dask_options = self.get_dask_options()
@@ -513,36 +514,41 @@ class BaseStoreTarget(DataTargetBase):
                 dir = os.path.dirname(target_path)
                 if dir:
                     os.makedirs(dir, exist_ok=True)
-            partition_cols = []
-            if target_path.endswith(".parquet") or target_path.endswith(".pq"):
-                partition_cols = None
             target_df = df
-            if timestamp_key and (
-                self.partitioned or self.time_partitioning_granularity
-            ):
-                target_df = df.copy(deep=False)
-                time_partitioning_granularity = self.time_partitioning_granularity
-                if not time_partitioning_granularity and self.partitioned:
-                    time_partitioning_granularity = (
-                        mlrun.utils.helpers.DEFAULT_TIME_PARTITIONING_GRANULARITY
-                    )
-                for unit, fmt in [
-                    ("year", "%Y"),
-                    ("month", "%m"),
-                    ("day", "%d"),
-                    ("hour", "%H"),
-                    ("minute", "%M"),
-                ]:
-                    partition_cols.append(unit)
-                    target_df[unit] = pd.DatetimeIndex(target_df[timestamp_key]).format(
-                        date_format=fmt
-                    )
-                    if unit == time_partitioning_granularity:
-                        break
+            partition_cols = None  # single parquet file
+            if not target_path.endswith(".parquet") and not target_path.endswith(
+                ".pq"
+            ):  # directory
+                partition_cols = []
+                if timestamp_key and (
+                    self.partitioned or self.time_partitioning_granularity
+                ):
+                    target_df = df.copy(deep=False)
+                    time_partitioning_granularity = self.time_partitioning_granularity
+                    if not time_partitioning_granularity and self.partitioned:
+                        time_partitioning_granularity = (
+                            mlrun.utils.helpers.DEFAULT_TIME_PARTITIONING_GRANULARITY
+                        )
+                    for unit, fmt in [
+                        ("year", "%Y"),
+                        ("month", "%m"),
+                        ("day", "%d"),
+                        ("hour", "%H"),
+                        ("minute", "%M"),
+                    ]:
+                        partition_cols.append(unit)
+                        target_df[unit] = pd.DatetimeIndex(
+                            target_df[timestamp_key]
+                        ).format(date_format=fmt)
+                        if unit == time_partitioning_granularity:
+                            break
+                # Partitioning will be performed on timestamp_key and then on self.partition_cols
+                # (We might want to give the user control on this order as additional functionality)
+                partition_cols += self.partition_cols or []
             storage_options = self._get_store().get_storage_options()
             self._write_dataframe(
                 target_df,
-                storage_options,
+                self.storage_options or storage_options,
                 target_path,
                 partition_cols=partition_cols,
                 **kwargs,
@@ -690,7 +696,7 @@ class BaseStoreTarget(DataTargetBase):
         # options used in spark.read.load(**options)
         raise NotImplementedError()
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
         return df
     def get_dask_options(self):
@@ -924,6 +930,37 @@ class ParquetTarget(BaseStoreTarget):
             return self.path.endswith(".parquet") or self.path.endswith(".pq")
         return False
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
+        # If partitioning by time, add the necessary columns
+        if (
+            timestamp_key
+            and isinstance(spark_options, dict)
+            and "partitionBy" in spark_options
+        ):
+            from pyspark.sql.functions import (
+                dayofmonth,
+                hour,
+                minute,
+                month,
+                second,
+                year,
+            )
+            time_unit_to_op = {
+                "year": year,
+                "month": month,
+                "day": dayofmonth,
+                "hour": hour,
+                "minute": minute,
+                "second": second,
+            }
+            timestamp_col = df[timestamp_key]
+            for partition in spark_options["partitionBy"]:
+                if partition not in df.columns and partition in time_unit_to_op:
+                    op = time_unit_to_op[partition]
+                    df = df.withColumn(partition, op(timestamp_col))
+        return df
 class CSVTarget(BaseStoreTarget):
     kind = TargetTypes.csv
@@ -973,7 +1010,7 @@ class CSVTarget(BaseStoreTarget):
             "header": "true",
         }
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         import pyspark.sql.functions as funcs
         for col_name, col_type in df.dtypes:
@@ -1067,7 +1104,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
             **self.attributes,
         )
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         raise NotImplementedError()
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
@@ -1139,7 +1176,7 @@ class NoSqlTarget(NoSqlBaseTarget):
             spark_options["columnUpdate"] = True
         return spark_options
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         from pyspark.sql.functions import col
         spark_udf_directory = os.path.dirname(os.path.abspath(__file__))
@@ -1232,7 +1269,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
         endpoint, uri = self._get_server_endpoint()
         return endpoint
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         from pyspark.sql.functions import col
         spark_udf_directory = os.path.dirname(os.path.abspath(__file__))

mlrun/db/httpdb.py CHANGED Viewed

@@ -18,6 +18,7 @@ import tempfile
 import time
 import traceback
 import typing
+import warnings
 from datetime import datetime, timedelta
 from os import path, remove
 from typing import Dict, List, Optional, Union
@@ -1411,6 +1412,8 @@ class HTTPRunDB(RunDBInterface):
         namespace=None,
         artifact_path=None,
         ops=None,
+        # TODO: deprecated, remove in 1.6.0
+        ttl=None,
         cleanup_ttl=None,
     ):
         """Submit a KFP pipeline for execution.
@@ -1423,9 +1426,18 @@ class HTTPRunDB(RunDBInterface):
         :param namespace: Kubernetes namespace to execute the pipeline in.
         :param artifact_path: A path to artifacts used by this pipeline.
         :param ops: Transformers to apply on all ops in the pipeline.
+        :param ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the workflow
+            and all its resources are deleted) (deprecated, use cleanup_ttl instead)
         :param cleanup_ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
                             workflow and all its resources are deleted)
         """
+        if ttl:
+            warnings.warn(
+                "'ttl' is deprecated, use 'cleanup_ttl' instead. "
+                "This will be removed in 1.6.0",
+                # TODO: Remove this in 1.6.0
+                FutureWarning,
+            )
         if isinstance(pipeline, str):
             pipe_file = pipeline
@@ -1433,7 +1445,7 @@ class HTTPRunDB(RunDBInterface):
             pipe_file = tempfile.NamedTemporaryFile(suffix=".yaml", delete=False).name
             conf = new_pipe_metadata(
                 artifact_path=artifact_path,
-                cleanup_ttl=cleanup_ttl,
+                cleanup_ttl=cleanup_ttl or ttl,
                 op_transformers=ops,
             )
             kfp.compiler.Compiler().compile(
@@ -1471,15 +1483,17 @@ class HTTPRunDB(RunDBInterface):
                 headers=headers,
             )
         except OSError as err:
-            logger.error(f"error cannot submit pipeline: {err_to_str(err)}")
-            raise OSError(f"error: cannot cannot submit pipeline, {err_to_str(err)}")
+            logger.error("Error: Cannot submit pipeline", err=err_to_str(err))
+            raise OSError(f"Error: Cannot submit pipeline, {err_to_str(err)}")
         if not resp.ok:
-            logger.error(f"bad resp!!\n{resp.text}")
-            raise ValueError(f"bad submit pipeline response, {resp.text}")
+            logger.error("Failed to submit pipeline", respones_text=resp.text)
+            raise ValueError(f"Failed to submit pipeline, {resp.text}")
         resp = resp.json()
-        logger.info(f"submitted pipeline {resp['name']} id={resp['id']}")
+        logger.info(
+            "Pipeline submitted successfully", pipeline_name=resp["name"], id=resp["id"]
+        )
         return resp["id"]
     def list_pipelines(

mlrun/feature_store/api.py CHANGED Viewed

@@ -975,37 +975,9 @@ def _ingest_with_spark(
             )
             df_to_write = df
-            # If partitioning by time, add the necessary columns
-            if timestamp_key and "partitionBy" in spark_options:
-                from pyspark.sql.functions import (
-                    dayofmonth,
-                    hour,
-                    minute,
-                    month,
-                    second,
-                    year,
-                )
-                time_unit_to_op = {
-                    "year": year,
-                    "month": month,
-                    "day": dayofmonth,
-                    "hour": hour,
-                    "minute": minute,
-                    "second": second,
-                }
-                timestamp_col = df_to_write[timestamp_key]
-                for partition in spark_options["partitionBy"]:
-                    if (
-                        partition not in df_to_write.columns
-                        and partition in time_unit_to_op
-                    ):
-                        op = time_unit_to_op[partition]
-                        df_to_write = df_to_write.withColumn(
-                            partition, op(timestamp_col)
-                        )
-            df_to_write = target.prepare_spark_df(df_to_write, key_columns)
+            df_to_write = target.prepare_spark_df(
+                df_to_write, key_columns, timestamp_key, spark_options
+            )
             if overwrite:
                 df_to_write.write.mode("overwrite").save(**spark_options)
             else:

mlrun/feature_store/feature_vector.py CHANGED Viewed

@@ -631,7 +631,7 @@ class FeatureVector(ModelObj):
             feature_set_fields:  list of field (name, alias) per featureset
         """
         processed_features = {}  # dict of name to (featureset, feature object)
-        feature_set_objects = {}
+        feature_set_objects = self.feature_set_objects or {}
         index_keys = []
         feature_set_fields = collections.defaultdict(list)
         features = copy(self.spec.features)

mlrun/feature_store/retrieval/base.py CHANGED Viewed

@@ -136,7 +136,7 @@ class BaseMerger(abc.ABC):
             order_by=order_by,
         )
-    def _write_to_offline_target(self):
+    def _write_to_offline_target(self, timestamp_key=None):
         if self._target:
             is_persistent_vector = self.vector.metadata.name is not None
             if not self._target.path and not is_persistent_vector:
@@ -144,7 +144,12 @@ class BaseMerger(abc.ABC):
                     "target path was not specified"
                 )
             self._target.set_resource(self.vector)
-            size = self._target.write_dataframe(self._result_df)
+            size = self._target.write_dataframe(
+                self._result_df,
+                timestamp_key=timestamp_key
+                if not self._drop_indexes and timestamp_key not in self._drop_columns
+                else None,
+            )
             if is_persistent_vector:
                 target_status = self._target.update_resource_status("ready", size=size)
                 logger.info(f"wrote target: {target_status}")
@@ -361,7 +366,7 @@ class BaseMerger(abc.ABC):
                 )
             self._order_by(order_by_active)
-        self._write_to_offline_target()
+        self._write_to_offline_target(timestamp_key=result_timestamp)
         return OfflineVectorResponse(self)
     def init_online_vector_service(

mlrun/launcher/remote.py CHANGED Viewed

@@ -89,7 +89,7 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
             else:
                 raise mlrun.errors.MLRunRuntimeError(
-                    "function image is not built/ready, set auto_build=True or use .deploy() method first"
+                    "Function image is not built/ready, set auto_build=True or use .deploy() method first"
                 )
         if runtime.verbose:
@@ -122,11 +122,11 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
             resp = db.submit_job(run, schedule=schedule)
             if schedule:
                 action = resp.pop("action", "created")
-                logger.info(f"task schedule {action}", **resp)
+                logger.info(f"Task schedule {action}", **resp)
                 return
         except (requests.HTTPError, Exception) as err:
-            logger.error(f"got remote run err, {mlrun.errors.err_to_str(err)}")
+            logger.error("Failed remote run", error=mlrun.errors.err_to_str(err))
             if isinstance(err, requests.HTTPError):
                 runtime._handle_submit_job_http_error(err)

mlrun/lists.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import warnings
 from copy import copy
 from typing import List
@@ -219,6 +220,16 @@ class ArtifactList(list):
         """return as a list of artifact objects"""
         return [dict_to_artifact(artifact) for artifact in self]
+    def objects(self) -> List[Artifact]:
+        """return as a list of artifact objects"""
+        warnings.warn(
+            "'objects' is deprecated in 1.3.0 and will be removed in 1.6.0. "
+            "Use 'to_objects' instead.",
+            # TODO: remove in 1.6.0
+            FutureWarning,
+        )
+        return [dict_to_artifact(artifact) for artifact in self]
     def dataitems(self) -> List["mlrun.DataItem"]:
         """return as a list of DataItem objects"""
         dataitems = []

mlrun/model_monitoring/__init__.py CHANGED Viewed

@@ -15,7 +15,6 @@
 # flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 # for backwards compatibility
 from .helpers import get_stream_path
 from .model_endpoint import ModelEndpoint
 from .stores import ModelEndpointStore, ModelEndpointStoreType, get_model_endpoint_store

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -28,9 +28,9 @@ from mlrun.common.schemas.model_monitoring import EventFieldType, ModelMonitorin
 from mlrun.data_types.infer import InferOptions, get_df_stats
 from mlrun.utils import logger
+from .batch import VirtualDrift
 from .features_drift_table import FeaturesDriftTablePlot
 from .model_endpoint import ModelEndpoint
-from .model_monitoring_batch import VirtualDrift
 # A union of all supported dataset types:
 DatasetType = typing.Union[

mlrun 1.5.0rc12__py3-none-any.whl → 1.5.0rc13__py3-none-any.whl

Potentially problematic release.

mlrun 1.5.0rc12py3-none-any.whl → 1.5.0rc13py3-none-any.whl