PyPI - mlrun - Versions diffs - 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl - Mend

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show

mlrun/api/api/deps.py +14 -1
mlrun/api/api/endpoints/frontend_spec.py +0 -2
mlrun/api/api/endpoints/functions.py +15 -27
mlrun/api/api/endpoints/grafana_proxy.py +435 -74
mlrun/api/api/endpoints/healthz.py +5 -18
mlrun/api/api/endpoints/model_endpoints.py +33 -37
mlrun/api/api/utils.py +6 -13
mlrun/api/crud/__init__.py +14 -16
mlrun/api/crud/logs.py +5 -7
mlrun/api/crud/model_monitoring/__init__.py +2 -2
mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
mlrun/api/crud/pipelines.py +2 -3
mlrun/api/db/sqldb/models/models_mysql.py +52 -19
mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
mlrun/api/db/sqldb/session.py +19 -26
mlrun/api/schemas/__init__.py +2 -0
mlrun/api/schemas/constants.py +0 -13
mlrun/api/schemas/frontend_spec.py +0 -1
mlrun/api/schemas/model_endpoints.py +38 -195
mlrun/api/schemas/schedule.py +2 -2
mlrun/api/utils/clients/log_collector.py +5 -0
mlrun/builder.py +9 -41
mlrun/config.py +1 -76
mlrun/data_types/__init__.py +1 -6
mlrun/data_types/data_types.py +1 -3
mlrun/datastore/__init__.py +2 -9
mlrun/datastore/sources.py +20 -25
mlrun/datastore/store_resources.py +1 -1
mlrun/datastore/targets.py +34 -67
mlrun/datastore/utils.py +4 -26
mlrun/db/base.py +2 -4
mlrun/db/filedb.py +5 -13
mlrun/db/httpdb.py +32 -64
mlrun/db/sqldb.py +2 -4
mlrun/errors.py +0 -5
mlrun/execution.py +0 -2
mlrun/feature_store/api.py +8 -24
mlrun/feature_store/feature_set.py +6 -28
mlrun/feature_store/feature_vector.py +0 -2
mlrun/feature_store/ingestion.py +11 -8
mlrun/feature_store/retrieval/base.py +43 -271
mlrun/feature_store/retrieval/dask_merger.py +153 -55
mlrun/feature_store/retrieval/job.py +3 -12
mlrun/feature_store/retrieval/local_merger.py +130 -48
mlrun/feature_store/retrieval/spark_merger.py +125 -126
mlrun/features.py +2 -7
mlrun/model_monitoring/constants.py +6 -48
mlrun/model_monitoring/helpers.py +35 -118
mlrun/model_monitoring/model_monitoring_batch.py +260 -293
mlrun/model_monitoring/stream_processing_fs.py +253 -220
mlrun/platforms/iguazio.py +0 -33
mlrun/projects/project.py +72 -34
mlrun/runtimes/base.py +0 -5
mlrun/runtimes/daskjob.py +0 -2
mlrun/runtimes/function.py +3 -29
mlrun/runtimes/kubejob.py +15 -39
mlrun/runtimes/local.py +45 -7
mlrun/runtimes/mpijob/abstract.py +0 -2
mlrun/runtimes/mpijob/v1.py +0 -2
mlrun/runtimes/pod.py +0 -2
mlrun/runtimes/remotesparkjob.py +0 -2
mlrun/runtimes/serving.py +0 -6
mlrun/runtimes/sparkjob/abstract.py +2 -39
mlrun/runtimes/sparkjob/spark3job.py +0 -2
mlrun/serving/__init__.py +1 -2
mlrun/serving/routers.py +35 -35
mlrun/serving/server.py +12 -22
mlrun/serving/states.py +30 -162
mlrun/serving/v2_serving.py +10 -13
mlrun/utils/clones.py +1 -1
mlrun/utils/model_monitoring.py +96 -122
mlrun/utils/version/version.json +2 -2
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
mlrun/api/crud/model_monitoring/grafana.py +0 -427
mlrun/datastore/spark_udf.py +0 -40
mlrun/model_monitoring/__init__.py +0 -44
mlrun/model_monitoring/common.py +0 -112
mlrun/model_monitoring/model_endpoint.py +0 -141
mlrun/model_monitoring/stores/__init__.py +0 -106
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
mlrun/model_monitoring/stores/models/__init__.py +0 -23
mlrun/model_monitoring/stores/models/base.py +0 -18
mlrun/model_monitoring/stores/models/mysql.py +0 -100
mlrun/model_monitoring/stores/models/sqlite.py +0 -98
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
mlrun/utils/db.py +0 -52
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/model_monitoring_batch.py CHANGED Viewed

@@ -31,12 +31,11 @@ import mlrun
 import mlrun.api.schemas
 import mlrun.data_types.infer
 import mlrun.feature_store as fstore
-import mlrun.model_monitoring
-import mlrun.model_monitoring.stores
 import mlrun.run
 import mlrun.utils.helpers
 import mlrun.utils.model_monitoring
 import mlrun.utils.v3io_clients
+from mlrun.model_monitoring.constants import EventFieldType
 from mlrun.utils import logger
@@ -462,7 +461,6 @@ def calculate_inputs_statistics(
     :returns: The calculated statistics of the inputs data.
     """
     # Use `DFDataInfer` to calculate the statistics over the inputs:
     inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
         df=inputs,
@@ -495,6 +493,8 @@ class BatchProcessor:
         self,
         context: mlrun.run.MLClientCtx,
         project: str,
+        model_monitoring_access_key: str,
+        v3io_access_key: str,
     ):
         """
@@ -502,16 +502,60 @@ class BatchProcessor:
         :param context:                     An MLRun context.
         :param project:                     Project name.
+        :param model_monitoring_access_key: Access key to apply the model monitoring process.
+        :param v3io_access_key:             Token key for v3io.
         """
         self.context = context
         self.project = project
+        self.v3io_access_key = v3io_access_key
+        self.model_monitoring_access_key = (
+            model_monitoring_access_key or v3io_access_key
+        )
         # Initialize virtual drift object
         self.virtual_drift = VirtualDrift(inf_capping=10)
+        # Define the required paths for the project objects.
+        # Note that the kv table, tsdb, and the input stream paths are located at the default location
+        # while the parquet path is located at the user-space location
+        template = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default
+        kv_path = template.format(project=self.project, kind="endpoints")
+        (
+            _,
+            self.kv_container,
+            self.kv_path,
+        ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(kv_path)
+        tsdb_path = template.format(project=project, kind="events")
+        (
+            _,
+            self.tsdb_container,
+            self.tsdb_path,
+        ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(tsdb_path)
+        stream_path = template.format(project=self.project, kind="log_stream")
+        (
+            _,
+            self.stream_container,
+            self.stream_path,
+        ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(stream_path)
+        self.parquet_path = (
+            mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
+                project=project, kind="parquet"
+            )
+        )
         logger.info(
             "Initializing BatchProcessor",
             project=project,
+            model_monitoring_access_key_initalized=bool(model_monitoring_access_key),
+            v3io_access_key_initialized=bool(v3io_access_key),
+            parquet_path=self.parquet_path,
+            kv_container=self.kv_container,
+            kv_path=self.kv_path,
+            tsdb_container=self.tsdb_container,
+            tsdb_path=self.tsdb_path,
+            stream_container=self.stream_container,
+            stream_path=self.stream_path,
         )
         # Get drift thresholds from the model monitoring configuration
@@ -523,54 +567,7 @@ class BatchProcessor:
         )
         # Get a runtime database
-        self.db = mlrun.model_monitoring.stores.get_model_endpoint_store(
-            project=project
-        )
-        if not mlrun.mlconf.is_ce_mode():
-            # TODO: Once there is a time series DB alternative in a non-CE deployment, we need to update this if
-            #  statement to be applied only for V3IO TSDB
-            self._initialize_v3io_configurations()
-        # If an error occurs, it will be raised using the following argument
-        self.exception = None
-        # Get the batch interval range
-        self.batch_dict = context.parameters[
-            mlrun.model_monitoring.EventFieldType.BATCH_INTERVALS_DICT
-        ]
-        # TODO: This will be removed in 1.5.0 once the job params can be parsed with different types
-        # Convert batch dict string into a dictionary
-        if isinstance(self.batch_dict, str):
-            self._parse_batch_dict_str()
-    def _initialize_v3io_configurations(self):
-        self.v3io_access_key = os.environ.get("V3IO_ACCESS_KEY")
-        self.model_monitoring_access_key = (
-            os.environ.get("MODEL_MONITORING_ACCESS_KEY") or self.v3io_access_key
-        )
-        # Define the required paths for the project objects
-        tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
-            project=self.project, kind=mlrun.model_monitoring.FileTargetKind.EVENTS
-        )
-        (
-            _,
-            self.tsdb_container,
-            self.tsdb_path,
-        ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(tsdb_path)
-        # stream_path = template.format(project=self.project, kind="log_stream")
-        stream_path = mlrun.mlconf.get_model_monitoring_file_target_path(
-            project=self.project,
-            kind=mlrun.model_monitoring.FileTargetKind.LOG_STREAM,
-        )
-        (
-            _,
-            self.stream_container,
-            self.stream_path,
-        ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(stream_path)
+        self.db = mlrun.get_run_db()
         # Get the frames clients based on the v3io configuration
         # it will be used later for writing the results into the tsdb
@@ -583,26 +580,33 @@ class BatchProcessor:
             token=self.v3io_access_key,
         )
+        # If an error occurs, it will be raised using the following argument
+        self.exception = None
+        # Get the batch interval range
+        self.batch_dict = context.parameters[EventFieldType.BATCH_INTERVALS_DICT]
+        # TODO: This will be removed in 1.2.0 once the job params can be parsed with different types
+        # Convert batch dict string into a dictionary
+        if isinstance(self.batch_dict, str):
+            self._parse_batch_dict_str()
     def post_init(self):
         """
         Preprocess of the batch processing.
         """
-        if not mlrun.mlconf.is_ce_mode():
-            # Create v3io stream based on the input stream
-            response = self.v3io.create_stream(
-                container=self.stream_container,
-                path=self.stream_path,
-                shard_count=1,
-                raise_for_status=v3io.dataplane.RaiseForStatus.never,
-                access_key=self.v3io_access_key,
-            )
+        # create v3io stream based on the input stream
+        response = self.v3io.create_stream(
+            container=self.stream_container,
+            path=self.stream_path,
+            shard_count=1,
+            raise_for_status=v3io.dataplane.RaiseForStatus.never,
+            access_key=self.v3io_access_key,
+        )
-            if not (
-                response.status_code == 400 and "ResourceInUse" in str(response.body)
-            ):
-                response.raise_for_status([409, 204, 403])
-        pass
+        if not (response.status_code == 400 and "ResourceInUse" in str(response.body)):
+            response.raise_for_status([409, 204, 403])
     def run(self):
         """
@@ -610,202 +614,231 @@ class BatchProcessor:
         """
         # Get model endpoints (each deployed project has at least 1 serving model):
         try:
-            endpoints = self.db.list_model_endpoints()
+            endpoints = self.db.list_model_endpoints(self.project)
         except Exception as e:
             logger.error("Failed to list endpoints", exc=e)
             return
-        for endpoint in endpoints:
+        active_endpoints = set()
+        for endpoint in endpoints.endpoints:
             if (
-                endpoint[mlrun.model_monitoring.EventFieldType.ACTIVE]
-                and endpoint[mlrun.model_monitoring.EventFieldType.MONITORING_MODE]
-                == mlrun.model_monitoring.ModelMonitoringMode.enabled.value
+                endpoint.spec.active
+                and endpoint.spec.monitoring_mode
+                == mlrun.api.schemas.ModelMonitoringMode.enabled.value
             ):
+                active_endpoints.add(endpoint.metadata.uid)
+        # perform drift analysis for each model endpoint
+        for endpoint_id in active_endpoints:
+            try:
+                # Get model endpoint object:
+                endpoint = self.db.get_model_endpoint(
+                    project=self.project, endpoint_id=endpoint_id
+                )
                 # Skip router endpoint:
                 if (
-                    int(endpoint[mlrun.model_monitoring.EventFieldType.ENDPOINT_TYPE])
-                    == mlrun.model_monitoring.EndpointType.ROUTER
+                    endpoint.status.endpoint_type
+                    == mlrun.utils.model_monitoring.EndpointType.ROUTER
                 ):
-                    # Router endpoint has no feature stats
-                    logger.info(
-                        f"{endpoint[mlrun.model_monitoring.EventFieldType.UID]} is router skipping"
-                    )
+                    # endpoint.status.feature_stats is None
+                    logger.info(f"{endpoint_id} is router skipping")
                     continue
-                self.update_drift_metrics(endpoint=endpoint)
-    def update_drift_metrics(self, endpoint: dict):
-        try:
-            # Convert feature set into dataframe and get the latest dataset
-            (
-                _,
-                serving_function_name,
-                _,
-                _,
-            ) = mlrun.utils.helpers.parse_versioned_object_uri(
-                endpoint[mlrun.model_monitoring.EventFieldType.FUNCTION_URI]
-            )
+                # convert feature set into dataframe and get the latest dataset
+                (
+                    _,
+                    serving_function_name,
+                    _,
+                    _,
+                ) = mlrun.utils.helpers.parse_versioned_object_uri(
+                    endpoint.spec.function_uri
+                )
-            model_name = endpoint[mlrun.model_monitoring.EventFieldType.MODEL].replace(
-                ":", "-"
-            )
+                model_name = endpoint.spec.model.replace(":", "-")
-            m_fs = fstore.get_feature_set(
-                f"store://feature-sets/{self.project}/monitoring-{serving_function_name}-{model_name}"
-            )
+                m_fs = fstore.get_feature_set(
+                    f"store://feature-sets/{self.project}/monitoring-{serving_function_name}-{model_name}"
+                )
-            # Getting batch interval start time and end time
-            start_time, end_time = self._get_interval_range()
+                # Getting batch interval start time and end time
+                start_time, end_time = self.get_interval_range()
-            try:
-                df = m_fs.to_dataframe(
-                    start_time=start_time,
-                    end_time=end_time,
-                    time_column=mlrun.model_monitoring.EventFieldType.TIMESTAMP,
-                )
+                try:
+                    df = m_fs.to_dataframe(
+                        start_time=start_time,
+                        end_time=end_time,
+                        time_column="timestamp",
+                    )
+                    if len(df) == 0:
+                        logger.warn(
+                            "Not enough model events since the beginning of the batch interval",
+                            parquet_target=m_fs.status.targets[0].path,
+                            endpoint=endpoint_id,
+                            min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
+                            start_time=str(
+                                datetime.datetime.now() - datetime.timedelta(hours=1)
+                            ),
+                            end_time=str(datetime.datetime.now()),
+                        )
+                        continue
-                if len(df) == 0:
+                # TODO: The below warn will be removed once the state of the Feature Store target is updated
+                #       as expected. In that case, the existence of the file will be checked before trying to get
+                #       the offline data from the feature set.
+                # Continue if not enough events provided since the deployment of the model endpoint
+                except FileNotFoundError:
                     logger.warn(
-                        "Not enough model events since the beginning of the batch interval",
+                        "Parquet not found, probably due to not enough model events",
                         parquet_target=m_fs.status.targets[0].path,
-                        endpoint=endpoint[mlrun.model_monitoring.EventFieldType.UID],
+                        endpoint=endpoint_id,
                         min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
-                        start_time=str(
-                            datetime.datetime.now() - datetime.timedelta(hours=1)
-                        ),
-                        end_time=str(datetime.datetime.now()),
                     )
-                    return
-            # TODO: The below warn will be removed once the state of the Feature Store target is updated
-            #       as expected. In that case, the existence of the file will be checked before trying to get
-            #       the offline data from the feature set.
-            # Continue if not enough events provided since the deployment of the model endpoint
-            except FileNotFoundError:
-                logger.warn(
-                    "Parquet not found, probably due to not enough model events",
-                    parquet_target=m_fs.status.targets[0].path,
-                    endpoint=endpoint[mlrun.model_monitoring.EventFieldType.UID],
-                    min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
-                )
-                return
-            # Get feature names from monitoring feature set
-            feature_names = [
-                feature_name["name"] for feature_name in m_fs.spec.features.to_dict()
-            ]
-            # Create DataFrame based on the input features
-            stats_columns = [
-                mlrun.model_monitoring.EventFieldType.TIMESTAMP,
-                *feature_names,
-            ]
+                    continue
-            # Add label names if provided
-            if endpoint[mlrun.model_monitoring.EventFieldType.LABEL_NAMES]:
-                labels = endpoint[mlrun.model_monitoring.EventFieldType.LABEL_NAMES]
-                if isinstance(labels, str):
-                    labels = json.loads(labels)
-                stats_columns.extend(labels)
-            named_features_df = df[stats_columns].copy()
-            # Infer feature set stats and schema
-            fstore.api._infer_from_static_df(
-                named_features_df,
-                m_fs,
-                options=mlrun.data_types.infer.InferOptions.all_stats(),
-            )
+                # Get feature names from monitoring feature set
+                feature_names = [
+                    feature_name["name"]
+                    for feature_name in m_fs.spec.features.to_dict()
+                ]
+                # Create DataFrame based on the input features
+                stats_columns = [
+                    "timestamp",
+                    *feature_names,
+                ]
+                # Add label names if provided
+                if endpoint.spec.label_names:
+                    stats_columns.extend(endpoint.spec.label_names)
+                named_features_df = df[stats_columns].copy()
+                # Infer feature set stats and schema
+                fstore.api._infer_from_static_df(
+                    named_features_df,
+                    m_fs,
+                    options=mlrun.data_types.infer.InferOptions.all_stats(),
+                )
-            # Save feature set to apply changes
-            m_fs.save()
+                # Save feature set to apply changes
+                m_fs.save()
-            # Get the timestamp of the latest request:
-            timestamp = df[mlrun.model_monitoring.EventFieldType.TIMESTAMP].iloc[-1]
+                # Get the timestamp of the latest request:
+                timestamp = df["timestamp"].iloc[-1]
-            # Get the feature stats from the model endpoint for reference data
-            feature_stats = json.loads(
-                endpoint[mlrun.model_monitoring.EventFieldType.FEATURE_STATS]
-            )
+                # Get the current stats:
+                current_stats = calculate_inputs_statistics(
+                    sample_set_statistics=endpoint.status.feature_stats,
+                    inputs=named_features_df,
+                )
-            # Get the current stats:
-            current_stats = calculate_inputs_statistics(
-                sample_set_statistics=feature_stats,
-                inputs=named_features_df,
-            )
+                # Compute the drift based on the histogram of the current stats and the histogram of the original
+                # feature stats that can be found in the model endpoint object:
+                drift_result = self.virtual_drift.compute_drift_from_histograms(
+                    feature_stats=endpoint.status.feature_stats,
+                    current_stats=current_stats,
+                )
+                logger.info("Drift result", drift_result=drift_result)
-            # Compute the drift based on the histogram of the current stats and the histogram of the original
-            # feature stats that can be found in the model endpoint object:
-            drift_result = self.virtual_drift.compute_drift_from_histograms(
-                feature_stats=feature_stats,
-                current_stats=current_stats,
-            )
-            logger.info("Drift result", drift_result=drift_result)
-            # Get drift thresholds from the model configuration:
-            monitor_configuration = (
-                json.loads(
-                    endpoint[
-                        mlrun.model_monitoring.EventFieldType.MONITOR_CONFIGURATION
-                    ]
+                # Get drift thresholds from the model configuration:
+                monitor_configuration = endpoint.spec.monitor_configuration or {}
+                possible_drift = monitor_configuration.get(
+                    "possible_drift", self.default_possible_drift_threshold
+                )
+                drift_detected = monitor_configuration.get(
+                    "drift_detected", self.default_drift_detected_threshold
                 )
-                or {}
-            )
-            possible_drift = monitor_configuration.get(
-                "possible_drift", self.default_possible_drift_threshold
-            )
-            drift_detected = monitor_configuration.get(
-                "drift_detected", self.default_drift_detected_threshold
-            )
-            # Check for possible drift based on the results of the statistical metrics defined above:
-            drift_status, drift_measure = self.virtual_drift.check_for_drift(
-                metrics_results_dictionary=drift_result,
-                possible_drift_threshold=possible_drift,
-                drift_detected_threshold=drift_detected,
-            )
-            logger.info(
-                "Drift status",
-                endpoint_id=endpoint[mlrun.model_monitoring.EventFieldType.UID],
-                drift_status=drift_status.value,
-                drift_measure=drift_measure,
-            )
+                # Check for possible drift based on the results of the statistical metrics defined above:
+                drift_status, drift_measure = self.virtual_drift.check_for_drift(
+                    metrics_results_dictionary=drift_result,
+                    possible_drift_threshold=possible_drift,
+                    drift_detected_threshold=drift_detected,
+                )
+                logger.info(
+                    "Drift status",
+                    endpoint_id=endpoint_id,
+                    drift_status=drift_status.value,
+                    drift_measure=drift_measure,
+                )
-            attributes = {
-                "current_stats": json.dumps(current_stats),
-                "drift_measures": json.dumps(drift_result),
-                "drift_status": drift_status.value,
-            }
+                # If drift was detected, add the results to the input stream
+                if (
+                    drift_status == DriftStatus.POSSIBLE_DRIFT
+                    or drift_status == DriftStatus.DRIFT_DETECTED
+                ):
+                    self.v3io.stream.put_records(
+                        container=self.stream_container,
+                        stream_path=self.stream_path,
+                        records=[
+                            {
+                                "data": json.dumps(
+                                    {
+                                        "endpoint_id": endpoint_id,
+                                        "drift_status": drift_status.value,
+                                        "drift_measure": drift_measure,
+                                        "drift_per_feature": {**drift_result},
+                                    }
+                                )
+                            }
+                        ],
+                    )
-            self.db.update_model_endpoint(
-                endpoint_id=endpoint[mlrun.model_monitoring.EventFieldType.UID],
-                attributes=attributes,
-            )
+                attributes = {
+                    "current_stats": json.dumps(current_stats),
+                    "drift_measures": json.dumps(drift_result),
+                    "drift_status": drift_status.value,
+                }
-            if not mlrun.mlconf.is_ce_mode():
-                # Update drift results in TSDB
-                self._update_drift_in_input_stream(
-                    endpoint_id=endpoint[mlrun.model_monitoring.EventFieldType.UID],
-                    drift_status=drift_status,
-                    drift_measure=drift_measure,
-                    drift_result=drift_result,
-                    timestamp=timestamp,
-                )
-                logger.info(
-                    "Done updating drift measures",
-                    endpoint_id=endpoint[mlrun.model_monitoring.EventFieldType.UID],
+                self.db.patch_model_endpoint(
+                    project=self.project,
+                    endpoint_id=endpoint_id,
+                    attributes=attributes,
                 )
-        except Exception as e:
-            logger.error(
-                f"Exception for endpoint {endpoint[mlrun.model_monitoring.EventFieldType.UID]}"
-            )
-            self.exception = e
+                # Update the results in tsdb:
+                tsdb_drift_measures = {
+                    "endpoint_id": endpoint_id,
+                    "timestamp": pd.to_datetime(
+                        timestamp,
+                        format=EventFieldType.TIME_FORMAT,
+                    ),
+                    "record_type": "drift_measures",
+                    "tvd_mean": drift_result["tvd_mean"],
+                    "kld_mean": drift_result["kld_mean"],
+                    "hellinger_mean": drift_result["hellinger_mean"],
+                }
+                try:
+                    self.frames.write(
+                        backend="tsdb",
+                        table=self.tsdb_path,
+                        dfs=pd.DataFrame.from_dict([tsdb_drift_measures]),
+                        index_cols=["timestamp", "endpoint_id", "record_type"],
+                    )
+                except v3io_frames.errors.Error as err:
+                    logger.warn(
+                        "Could not write drift measures to TSDB",
+                        err=err,
+                        tsdb_path=self.tsdb_path,
+                        endpoint=endpoint_id,
+                    )
+                logger.info("Done updating drift measures", endpoint_id=endpoint_id)
-    def _get_interval_range(self) -> Tuple[datetime.datetime, datetime.datetime]:
+            except Exception as e:
+                logger.error(f"Exception for endpoint {endpoint_id}")
+                self.exception = e
+    def get_interval_range(self) -> Tuple[datetime.datetime, datetime.datetime]:
         """Getting batch interval time range"""
         minutes, hours, days = (
-            self.batch_dict[mlrun.model_monitoring.EventFieldType.MINUTES],
-            self.batch_dict[mlrun.model_monitoring.EventFieldType.HOURS],
-            self.batch_dict[mlrun.model_monitoring.EventFieldType.DAYS],
+            self.batch_dict[EventFieldType.MINUTES],
+            self.batch_dict[EventFieldType.HOURS],
+            self.batch_dict[EventFieldType.DAYS],
         )
         start_time = datetime.datetime.now() - datetime.timedelta(
             minutes=minutes, hours=hours, days=days
@@ -825,79 +858,13 @@ class BatchProcessor:
             pair_list = pair.split(":")
             self.batch_dict[pair_list[0]] = float(pair_list[1])
-    def _update_drift_in_input_stream(
-        self,
-        endpoint_id: str,
-        drift_status: DriftStatus,
-        drift_measure: float,
-        drift_result: Dict[str, Dict[str, Any]],
-        timestamp: pd._libs.tslibs.timestamps.Timestamp,
-    ):
-        """Update drift results in input stream.
-        :param endpoint_id:   The unique id of the model endpoint.
-        :param drift_status:  Drift status result. Possible values can be found under DriftStatus enum class.
-        :param drift_measure: The drift result (float) based on the mean of the Total Variance Distance and the
-                              Hellinger distance.
-        :param drift_result:  A dictionary that includes the drift results for each feature.
-        :param timestamp:     Pandas Timestamp value.
-        """
-        if (
-            drift_status == DriftStatus.POSSIBLE_DRIFT
-            or drift_status == DriftStatus.DRIFT_DETECTED
-        ):
-            self.v3io.stream.put_records(
-                container=self.stream_container,
-                stream_path=self.stream_path,
-                records=[
-                    {
-                        "data": json.dumps(
-                            {
-                                "endpoint_id": endpoint_id,
-                                "drift_status": drift_status.value,
-                                "drift_measure": drift_measure,
-                                "drift_per_feature": {**drift_result},
-                            }
-                        )
-                    }
-                ],
-            )
-        # Update the results in tsdb:
-        tsdb_drift_measures = {
-            "endpoint_id": endpoint_id,
-            "timestamp": pd.to_datetime(
-                timestamp,
-                format=mlrun.model_monitoring.EventFieldType.TIME_FORMAT,
-            ),
-            "record_type": "drift_measures",
-            "tvd_mean": drift_result["tvd_mean"],
-            "kld_mean": drift_result["kld_mean"],
-            "hellinger_mean": drift_result["hellinger_mean"],
-        }
-        try:
-            self.frames.write(
-                backend="tsdb",
-                table=self.tsdb_path,
-                dfs=pd.DataFrame.from_dict([tsdb_drift_measures]),
-                index_cols=["timestamp", "endpoint_id", "record_type"],
-            )
-        except v3io_frames.errors.Error as err:
-            logger.warn(
-                "Could not write drift measures to TSDB",
-                err=err,
-                tsdb_path=self.tsdb_path,
-                endpoint=endpoint_id,
-            )
 def handler(context: mlrun.run.MLClientCtx):
     batch_processor = BatchProcessor(
         context=context,
         project=context.project,
+        model_monitoring_access_key=os.environ.get("MODEL_MONITORING_ACCESS_KEY"),
+        v3io_access_key=os.environ.get("V3IO_ACCESS_KEY"),
     )
     batch_processor.post_init()
     batch_processor.run()

mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl