PyPI - mlrun - Versions diffs - 1.6.0rc6__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl - Mend

mlrun 1.6.0rc6py3-none-any.whl → 1.6.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (50) hide show

mlrun/__main__.py +32 -31
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/workflow.py +2 -0
mlrun/config.py +3 -3
mlrun/datastore/base.py +9 -3
mlrun/datastore/datastore.py +10 -7
mlrun/datastore/datastore_profile.py +19 -2
mlrun/datastore/dbfs_store.py +6 -6
mlrun/datastore/s3.py +6 -2
mlrun/datastore/sources.py +12 -2
mlrun/datastore/targets.py +43 -20
mlrun/db/httpdb.py +22 -0
mlrun/feature_store/feature_set.py +5 -2
mlrun/feature_store/retrieval/spark_merger.py +7 -1
mlrun/kfpops.py +1 -1
mlrun/launcher/client.py +1 -6
mlrun/launcher/remote.py +5 -3
mlrun/model.py +2 -2
mlrun/model_monitoring/batch_application.py +61 -94
mlrun/package/packager.py +115 -89
mlrun/package/packagers/default_packager.py +66 -65
mlrun/package/packagers/numpy_packagers.py +109 -62
mlrun/package/packagers/pandas_packagers.py +12 -23
mlrun/package/packagers/python_standard_library_packagers.py +35 -57
mlrun/package/packagers_manager.py +16 -13
mlrun/package/utils/_pickler.py +8 -18
mlrun/package/utils/_supported_format.py +1 -1
mlrun/projects/pipelines.py +63 -4
mlrun/projects/project.py +34 -11
mlrun/runtimes/__init__.py +6 -0
mlrun/runtimes/base.py +12 -1
mlrun/runtimes/daskjob.py +73 -5
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -0
mlrun/runtimes/function.py +53 -4
mlrun/runtimes/kubejob.py +1 -1
mlrun/runtimes/local.py +9 -9
mlrun/runtimes/pod.py +1 -1
mlrun/runtimes/remotesparkjob.py +1 -0
mlrun/runtimes/serving.py +11 -1
mlrun/runtimes/sparkjob/spark3job.py +4 -1
mlrun/runtimes/utils.py +1 -46
mlrun/utils/helpers.py +1 -17
mlrun/utils/notifications/notification_pusher.py +27 -6
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/METADATA +7 -6
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/RECORD +50 -50
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/WHEEL +1 -1
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/LICENSE +0 -0
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/top_level.txt +0 -0

mlrun/datastore/targets.py CHANGED Viewed

@@ -24,6 +24,7 @@ from typing import Any, Dict, List, Optional, Union
 from urllib.parse import urlparse
 import pandas as pd
+from mergedeep import merge
 import mlrun
 import mlrun.utils.helpers
@@ -293,6 +294,8 @@ def add_target_steps(graph, resource, targets, to_df=False, final_step=None):
         driver = get_target_driver(target, resource)
         table = driver.get_table_object() or table
         driver.update_resource_status()
+        if target.after_step:
+            target.attributes["infer_columns_from_data"] = True
         driver.add_writer_step(
             graph,
             target.after_step or final_step,
@@ -435,17 +438,20 @@ class BaseStoreTarget(DataTargetBase):
             prefix=self.credentials_prefix,
         )
-    def _get_store(self):
+    def _get_store_and_path(self):
         credentials_prefix_secrets = (
             {"CREDENTIALS_PREFIX": self.credentials_prefix}
             if self.credentials_prefix
             else None
         )
-        store, _ = mlrun.store_manager.get_or_create_store(
+        store, resolved_store_path = mlrun.store_manager.get_or_create_store(
             self.get_target_path(),
             credentials_prefix_secrets,
         )
-        return store
+        if self.get_target_path().startswith("ds://"):
+            return store, store.url + resolved_store_path
+        else:
+            return store, self.get_target_path()
     def _get_column_list(self, features, timestamp_key, key_columns, with_type=False):
         result = []
@@ -494,17 +500,18 @@ class BaseStoreTarget(DataTargetBase):
             df.write.mode("overwrite").save(**options)
         elif hasattr(df, "dask"):
             dask_options = self.get_dask_options()
-            storage_options = self._get_store().get_storage_options()
+            store, target_path = self._get_store_and_path()
+            storage_options = store.get_storage_options()
             df = df.repartition(partition_size="100MB")
             try:
                 if dask_options["format"] == "parquet":
                     df.to_parquet(
-                        generate_path_with_chunk(self, chunk_id),
+                        generate_path_with_chunk(self, chunk_id, target_path),
                         storage_options=storage_options,
                     )
                 elif dask_options["format"] == "csv":
                     df.to_csv(
-                        generate_path_with_chunk(self, chunk_id),
+                        generate_path_with_chunk(self, chunk_id, target_path),
                         storage_options=storage_options,
                     )
                 else:
@@ -514,8 +521,9 @@ class BaseStoreTarget(DataTargetBase):
             except Exception as exc:
                 raise RuntimeError("Failed to write Dask Dataframe") from exc
         else:
-            target_path = generate_path_with_chunk(self, chunk_id)
-            file_system = self._get_store().get_filesystem(False)
+            store, target_path = self._get_store_and_path()
+            target_path = generate_path_with_chunk(self, chunk_id, target_path)
+            file_system = store.get_filesystem(False)
             if file_system.protocol == "file":
                 dir = os.path.dirname(target_path)
                 if dir:
@@ -551,10 +559,16 @@ class BaseStoreTarget(DataTargetBase):
                 # Partitioning will be performed on timestamp_key and then on self.partition_cols
                 # (We might want to give the user control on this order as additional functionality)
                 partition_cols += self.partition_cols or []
-            storage_options = self._get_store().get_storage_options()
+            storage_options = store.get_storage_options()
+            if storage_options and self.storage_options:
+                storage_options = merge(storage_options, self.storage_options)
+            else:
+                storage_options = storage_options or self.storage_options
             self._write_dataframe(
                 target_df,
-                self.storage_options or storage_options,
+                storage_options,
                 target_path,
                 partition_cols=partition_cols,
                 **kwargs,
@@ -603,6 +617,7 @@ class BaseStoreTarget(DataTargetBase):
         driver._resource = resource
         driver.run_id = spec.run_id
+        driver.after_step = spec.after_step
         return driver
     def get_table_object(self):
@@ -673,7 +688,8 @@ class BaseStoreTarget(DataTargetBase):
         raise NotImplementedError()
     def purge(self):
-        self._get_store().rm(self.get_target_path(), recursive=True)
+        store, target_path = self._get_store_and_path()
+        store.rm(target_path, recursive=True)
     def as_df(
         self,
@@ -860,18 +876,25 @@ class ParquetTarget(BaseStoreTarget):
                 "update_last_written": featureset_status.update_last_written_for_target
             }
+        store, target_path = self._get_store_and_path()
+        storage_options = store.get_storage_options()
+        if storage_options and self.storage_options:
+            storage_options = merge(storage_options, self.storage_options)
+        else:
+            storage_options = storage_options or self.storage_options
         graph.add_step(
             name=self.name or "ParquetTarget",
             after=after,
             graph_shape="cylinder",
             class_name="storey.ParquetTarget",
-            path=self.get_target_path(),
+            path=target_path,
             columns=column_list,
             index_cols=tuple_key_columns,
             partition_cols=partition_cols,
             time_field=timestamp_key,
-            storage_options=self.storage_options
-            or self._get_store().get_storage_options(),
+            storage_options=storage_options,
             max_events=self.max_events,
             flush_after_seconds=self.flush_after_seconds,
             **self.attributes,
@@ -1009,17 +1032,17 @@ class CSVTarget(BaseStoreTarget):
         column_list = self._get_column_list(
             features=features, timestamp_key=timestamp_key, key_columns=key_columns
         )
+        store, target_path = self._get_store_and_path()
         graph.add_step(
             name=self.name or "CSVTarget",
             after=after,
             graph_shape="cylinder",
             class_name="storey.CSVTarget",
-            path=self.get_target_path(),
+            path=target_path,
             columns=column_list,
             header=True,
             index_cols=key_columns,
-            storage_options=self._get_store().get_storage_options(),
+            storage_options=store.get_storage_options(),
             **self.attributes,
         )
@@ -1923,8 +1946,8 @@ def _get_target_path(driver, resource, run_id_mode=False):
     return f"{data_prefix}/{kind_prefix}/{name}{suffix}"
-def generate_path_with_chunk(target, chunk_id):
-    prefix, suffix = os.path.splitext(target.get_target_path())
+def generate_path_with_chunk(target, chunk_id, path):
+    prefix, suffix = os.path.splitext(path)
     if chunk_id and not target.partitioned and not target.time_partitioning_granularity:
         return f"{prefix}/{chunk_id:0>4}{suffix}"
-    return target.get_target_path()
+    return path

mlrun/db/httpdb.py CHANGED Viewed

@@ -3143,6 +3143,21 @@ class HTTPRunDB(RunDBInterface):
             body=dict_to_json(authorization_verification_input.dict()),
         )
+    def list_api_gateways(self, project=None):
+        """
+        Returns a list of Nuclio api gateways
+        :param project: optional str parameter to filter by project, if not passed, default Nuclio's value is taken
+        :return: json with the list of Nuclio Api Gateways
+            (json example is here
+            https://github.com/nuclio/nuclio/blob/development/docs/reference/api/README.md#listing-all-api-gateways)
+        """
+        project = project or config.default_project
+        error = "list api gateways"
+        endpoint_path = f"projects/{project}/nuclio/api-gateways"
+        resp = self.api_call("GET", endpoint_path, error)
+        return resp.json()
     def trigger_migrations(self) -> Optional[mlrun.common.schemas.BackgroundTask]:
         """Trigger migrations (will do nothing if no migrations are needed) and wait for them to finish if actually
         triggered
@@ -3238,6 +3253,7 @@ class HTTPRunDB(RunDBInterface):
         source: Optional[str] = None,
         run_name: Optional[str] = None,
         namespace: Optional[str] = None,
+        notifications: typing.List[mlrun.model.Notification] = None,
     ):
         """
         Submitting workflow for a remote execution.
@@ -3250,6 +3266,7 @@ class HTTPRunDB(RunDBInterface):
         :param source:          source url of the project
         :param run_name:        run name to override the default: 'workflow-runner-<workflow name>'
         :param namespace:       kubernetes namespace if other than default
+        :param notifications:   list of notifications to send when workflow execution is completed
         :returns:    :py:class:`~mlrun.common.schemas.WorkflowResponse`.
         """
@@ -3281,6 +3298,11 @@ class HTTPRunDB(RunDBInterface):
             req["spec"] = workflow_spec
         req["spec"]["image"] = image
         req["spec"]["name"] = workflow_name
+        if notifications:
+            req["notifications"] = [
+                notification.to_dict() for notification in notifications
+            ]
         response = self.api_call(
             "POST",
             f"projects/{project}/workflows/{workflow_name}/submit",

mlrun/feature_store/feature_set.py CHANGED Viewed

@@ -16,6 +16,7 @@ from datetime import datetime
 from typing import Dict, List, Optional, Union
 import pandas as pd
+import pytz
 from storey import EmitEveryEvent, EmitPolicy
 import mlrun
@@ -929,9 +930,11 @@ class FeatureSet(ModelObj):
                 )
             df = self.spec.source.to_dataframe(
                 columns=columns,
+                # overwrite `source.start_time` when the source is schedule.
                 start_time=start_time
-                or pd.Timestamp.min,  # overwrite `source.start_time` when the source is schedule.
-                end_time=end_time or pd.Timestamp.max,
+                or pd.to_datetime(pd.Timestamp.min, unit="ns").replace(tzinfo=pytz.UTC),
+                end_time=end_time
+                or pd.to_datetime(pd.Timestamp.max, unit="ns").replace(tzinfo=pytz.UTC),
                 time_field=time_column,
                 **kwargs,
             )

mlrun/feature_store/retrieval/spark_merger.py CHANGED Viewed

@@ -172,11 +172,17 @@ class SparkFeatureMerger(BaseMerger):
                 # when we upgrade pyspark, we should check whether this workaround is still necessary
                 # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
                 if semver.parse(pd.__version__)["major"] >= 2:
+                    import pyspark.sql.functions as pyspark_functions
                     type_conversion_dict = {}
                     for field in df.schema.fields:
                         if str(field.dataType) == "TimestampType":
                             df = df.withColumn(
-                                field.name, df[field.name].cast("string")
+                                field.name,
+                                pyspark_functions.date_format(
+                                    pyspark_functions.to_timestamp(field.name),
+                                    "yyyy-MM-dd'T'HH:mm:ss.SSS",
+                                ),
                             )
                             type_conversion_dict[field.name] = "datetime64[ns]"
                     df = df.toPandas()

mlrun/kfpops.py CHANGED Viewed

@@ -93,7 +93,7 @@ def write_kfpmeta(struct):
             val = results[key]
         try:
             path = "/".join([KFP_ARTIFACTS_DIR, key])
-            logger.info("writing artifact output", path=path, val=val)
+            logger.info("Writing artifact output", path=path, val=val)
             with open(path, "w") as fp:
                 fp.write(str(val))
         except Exception as exc:

mlrun/launcher/client.py CHANGED Viewed

@@ -52,12 +52,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
         if runtime.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
             return
-        build = runtime.spec.build
-        require_build = (
-            build.commands
-            or build.requirements
-            or (build.source and not build.load_source_on_run)
-        )
+        require_build = runtime.requires_build()
         image = runtime.spec.image
         # we allow users to not set an image, in that case we'll use the default
         if (

mlrun/launcher/remote.py CHANGED Viewed

@@ -90,9 +90,11 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
                 runtime.deploy(skip_deployed=True, show_on_failure=True)
             else:
-                raise mlrun.errors.MLRunRuntimeError(
-                    "Function image is not built/ready, set auto_build=True or use .deploy() method first"
-                )
+                if runtime.requires_build():
+                    logger.warning(
+                        "Function image is not built/ready and function requires build - execution will fail. "
+                        "Need to set auto_build=True or use .deploy() method first"
+                    )
         if runtime.verbose:
             logger.info(f"runspec:\n{run.to_yaml()}")

mlrun/model.py CHANGED Viewed

@@ -922,7 +922,7 @@ class RunSpec(ModelObj):
         """
         Set the dictionary of k8s states (pod phase) to thresholds time strings.
         The state will be matched against the pod's status. The threshold should be a time string that conforms
-        to timelength python package standards and is at least 1 second (-1 for infinite). If the phase is active
+        to timelength python package standards and is at least 1 minute (-1 for infinite). If the phase is active
         for longer than the threshold, the run will be marked as aborted and the pod will be deleted.
         See mlconf.function.spec.state_thresholds for the state options and default values.
@@ -1433,7 +1433,7 @@ class RunObject(RunTemplate):
             self.logs(watch=False)
         if raise_on_failure and state != mlrun.runtimes.constants.RunStates.completed:
             raise mlrun.errors.MLRunRuntimeError(
-                f"task {self.metadata.name} did not complete (state={state})"
+                f"Task {self.metadata.name} did not complete (state={state})"
             )
         return state

mlrun/model_monitoring/batch_application.py CHANGED Viewed

@@ -11,25 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 import concurrent.futures
 import datetime
 import json
 import os
 import re
-from typing import List, Tuple
+from typing import Callable, Optional, Tuple
-import numpy as np
 import pandas as pd
 import mlrun
-import mlrun.common.helpers
-import mlrun.common.model_monitoring.helpers
-import mlrun.common.schemas.model_monitoring
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.data_types.infer
 import mlrun.feature_store as fstore
-import mlrun.utils.v3io_clients
 from mlrun.datastore import get_stream_pusher
 from mlrun.datastore.targets import ParquetTarget
 from mlrun.model_monitoring.batch import calculate_inputs_statistics
@@ -72,46 +67,39 @@ class BatchApplicationProcessor:
         # Get the batch interval range
         self.batch_dict = context.parameters[
-            mlrun.common.schemas.model_monitoring.EventFieldType.BATCH_INTERVALS_DICT
+            mm_constants.EventFieldType.BATCH_INTERVALS_DICT
         ]
-        # TODO: This will be removed in 1.5.0 once the job params can be parsed with different types
+        # TODO: This will be removed once the job params can be parsed with different types
         # Convert batch dict string into a dictionary
         if isinstance(self.batch_dict, str):
             self._parse_batch_dict_str()
         # If provided, only model endpoints in that that list will be analyzed
         self.model_endpoints = context.parameters.get(
-            mlrun.common.schemas.model_monitoring.EventFieldType.MODEL_ENDPOINTS, None
-        )
-        self.v3io_access_key = os.environ.get("V3IO_ACCESS_KEY")
-        self.model_monitoring_access_key = (
-            os.environ.get("MODEL_MONITORING_ACCESS_KEY") or self.v3io_access_key
+            mm_constants.EventFieldType.MODEL_ENDPOINTS, None
         )
+        self.model_monitoring_access_key = self._get_model_monitoring_access_key()
         self.parquet_directory = get_monitoring_parquet_path(
             project=project,
-            kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
+            kind=mm_constants.FileTargetKind.BATCH_CONTROLLER_PARQUET,
         )
         self.storage_options = None
         if not mlrun.mlconf.is_ce_mode():
-            self._initialize_v3io_configurations(
-                model_monitoring_access_key=self.model_monitoring_access_key
-            )
+            self._initialize_v3io_configurations()
         elif self.parquet_directory.startswith("s3://"):
             self.storage_options = mlrun.mlconf.get_s3_storage_options()
-    def _initialize_v3io_configurations(
-        self,
-        v3io_access_key: str = None,
-        v3io_framesd: str = None,
-        v3io_api: str = None,
-        model_monitoring_access_key: str = None,
-    ):
-        # Get the V3IO configurations
-        self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
-        self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
-        self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
-        self.model_monitoring_access_key = model_monitoring_access_key
+    @staticmethod
+    def _get_model_monitoring_access_key() -> Optional[str]:
+        access_key = os.getenv(mm_constants.ProjectSecretKeys.ACCESS_KEY)
+        # allow access key to be empty and don't fetch v3io access key if not needed
+        if access_key is None:
+            access_key = mlrun.mlconf.get_v3io_access_key()
+        return access_key
+    def _initialize_v3io_configurations(self) -> None:
+        self.v3io_framesd = mlrun.mlconf.v3io_framesd
+        self.v3io_api = mlrun.mlconf.v3io_api
         self.storage_options = dict(
             v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
         )
@@ -126,9 +114,7 @@ class BatchApplicationProcessor:
                 self.project
             ).list_model_monitoring_functions()
             if application:
-                applications_names = np.unique(
-                    [app.metadata.name for app in application]
-                ).tolist()
+                applications_names = list({app.metadata.name for app in application})
             else:
                 logger.info("There are no monitoring application found in this project")
                 applications_names = []
@@ -144,26 +130,18 @@ class BatchApplicationProcessor:
             futures = []
             for endpoint in endpoints:
                 if (
-                    endpoint[
-                        mlrun.common.schemas.model_monitoring.EventFieldType.ACTIVE
-                    ]
-                    and endpoint[
-                        mlrun.common.schemas.model_monitoring.EventFieldType.MONITORING_MODE
-                    ]
-                    == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled.value
+                    endpoint[mm_constants.EventFieldType.ACTIVE]
+                    and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
+                    == mm_constants.ModelMonitoringMode.enabled.value
                 ):
                     # Skip router endpoint:
                     if (
-                        int(
-                            endpoint[
-                                mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_TYPE
-                            ]
-                        )
-                        == mlrun.common.schemas.model_monitoring.EndpointType.ROUTER
+                        int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
+                        == mm_constants.EndpointType.ROUTER
                     ):
                         # Router endpoint has no feature stats
                         logger.info(
-                            f"{endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]} is router skipping"
+                            f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
                         )
                         continue
                     future = pool.submit(
@@ -184,10 +162,11 @@ class BatchApplicationProcessor:
             self._delete_old_parquet()
-    @staticmethod
+    @classmethod
     def model_endpoint_process(
+        cls,
         endpoint: dict,
-        applications_names: List[str],
+        applications_names: list[str],
         bath_dict: dict,
         project: str,
         parquet_directory: str,
@@ -207,20 +186,14 @@ class BatchApplicationProcessor:
         :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
         """
-        endpoint_id = endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]
+        endpoint_id = endpoint[mm_constants.EventFieldType.UID]
         try:
             # Getting batch interval start time and end time
-            start_time, end_time = BatchApplicationProcessor._get_interval_range(
-                bath_dict
-            )
+            start_time, end_time = cls._get_interval_range(bath_dict)
             m_fs = fstore.get_feature_set(
-                endpoint[
-                    mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
-                ]
+                endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
             )
-            labels = endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.LABEL_NAMES
-            ]
+            labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
             if labels:
                 if isinstance(labels, str):
                     labels = json.loads(labels)
@@ -232,7 +205,7 @@ class BatchApplicationProcessor:
             try:
                 # get sample data
-                df = BatchApplicationProcessor._get_sample_df(
+                df = cls._get_sample_df(
                     m_fs,
                     endpoint_id,
                     end_time,
@@ -245,14 +218,10 @@ class BatchApplicationProcessor:
                     logger.warn(
                         "Not enough model events since the beginning of the batch interval",
                         featureset_name=m_fs.metadata.name,
-                        endpoint=endpoint[
-                            mlrun.common.schemas.model_monitoring.EventFieldType.UID
-                        ],
+                        endpoint=endpoint[mm_constants.EventFieldType.UID],
                         min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
-                        start_time=str(
-                            datetime.datetime.now() - datetime.timedelta(hours=1)
-                        ),
-                        end_time=str(datetime.datetime.now()),
+                        start_time=start_time,
+                        end_time=end_time,
                     )
                     return
@@ -264,9 +233,7 @@ class BatchApplicationProcessor:
                 logger.warn(
                     "Parquet not found, probably due to not enough model events",
                     # parquet_target=m_fs.status.targets[0].path, TODO:
-                    endpoint=endpoint[
-                        mlrun.common.schemas.model_monitoring.EventFieldType.UID
-                    ],
+                    endpoint=endpoint[mm_constants.EventFieldType.UID],
                     min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
                 )
                 return
@@ -282,15 +249,11 @@ class BatchApplicationProcessor:
             m_fs.save()
             # Get the timestamp of the latest request:
-            latest_request = df[
-                mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP
-            ].iloc[-1]
+            latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
             # Get the feature stats from the model endpoint for reference data
             feature_stats = json.loads(
-                endpoint[
-                    mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS
-                ]
+                endpoint[mm_constants.EventFieldType.FEATURE_STATS]
             )
             # Get the current stats:
@@ -300,7 +263,7 @@ class BatchApplicationProcessor:
             )
             # create and push data to all applications
-            BatchApplicationProcessor._push_to_applications(
+            cls._push_to_applications(
                 current_stats,
                 feature_stats,
                 parquet_directory,
@@ -314,22 +277,27 @@ class BatchApplicationProcessor:
         except FileNotFoundError as e:
             logger.error(
-                f"Exception for endpoint {endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]}"
+                f"Exception for endpoint {endpoint[mm_constants.EventFieldType.UID]}"
             )
             return endpoint_id, e
     @staticmethod
-    def _get_interval_range(batch_dict) -> Tuple[datetime.datetime, datetime.datetime]:
+    def _get_interval_range(
+        batch_dict: dict[str, int],
+        now_func: Callable[[], datetime.datetime] = datetime.datetime.now,
+    ) -> Tuple[datetime.datetime, datetime.datetime]:
         """Getting batch interval time range"""
         minutes, hours, days = (
-            batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.MINUTES],
-            batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.HOURS],
-            batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.DAYS],
+            batch_dict[mm_constants.EventFieldType.MINUTES],
+            batch_dict[mm_constants.EventFieldType.HOURS],
+            batch_dict[mm_constants.EventFieldType.DAYS],
+        )
+        end_time = now_func() - datetime.timedelta(
+            seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
         )
-        start_time = datetime.datetime.now() - datetime.timedelta(
+        start_time = end_time - datetime.timedelta(
             minutes=minutes, hours=hours, days=days
         )
-        end_time = datetime.datetime.now()
         return start_time, end_time
     def _parse_batch_dict_str(self):
@@ -357,7 +325,7 @@ class BatchApplicationProcessor:
             ("minute", "%M"),
         ]:
             schedule_time_str += f"{unit}={schedule_time.strftime(fmt)}/"
-        endpoint_str = f"{mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID}={endpoint_id}"
+        endpoint_str = f"{mm_constants.EventFieldType.ENDPOINT_ID}={endpoint_id}"
         return f"{parquet_directory}/{schedule_time_str}/{endpoint_str}"
@@ -374,10 +342,11 @@ class BatchApplicationProcessor:
         base_directory = get_monitoring_parquet_path(
             project=self.project,
-            kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
+            kind=mm_constants.FileTargetKind.BATCH_CONTROLLER_PARQUET,
         )
         target = ParquetTarget(path=base_directory)
-        fs = target._get_store().get_filesystem()
+        store, _ = target._get_store_and_path()
+        fs = store.get_filesystem()
         try:
             # List all subdirectories in the base directory
@@ -452,7 +421,7 @@ class BatchApplicationProcessor:
             mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
             mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
                 project=project,
-                application_name=mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.WRITER,
+                application_name=mm_constants.MonitoringFunctionNames.WRITER,
             ),
         }
         for app_name in applications_names:
@@ -500,9 +469,7 @@ class BatchApplicationProcessor:
         }  # to avoid exception when the taf is not latest
         entity_rows = pd.DataFrame(
             {
-                mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: [
-                    endpoint_id
-                ],
+                mm_constants.EventFieldType.ENDPOINT_ID: [endpoint_id],
                 "scheduled_time": [end_time],
             }
         )
@@ -512,12 +479,12 @@ class BatchApplicationProcessor:
             entity_timestamp_column="scheduled_time",
             start_time=start_time,
             end_time=end_time,
-            timestamp_for_filtering=mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP,
+            timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
             target=ParquetTarget(
                 path=parquet_directory,
                 time_partitioning_granularity="minute",
                 partition_cols=[
-                    mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID,
+                    mm_constants.EventFieldType.ENDPOINT_ID,
                 ],
                 storage_options=storage_options,
             ),

mlrun 1.6.0rc6__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.0rc6py3-none-any.whl → 1.6.0rc8py3-none-any.whl