mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +26 -22
- mlrun/__main__.py +15 -16
- mlrun/alerts/alert.py +150 -15
- mlrun/api/schemas/__init__.py +1 -9
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +62 -19
- mlrun/artifacts/dataset.py +17 -17
- mlrun/artifacts/document.py +454 -0
- mlrun/artifacts/manager.py +28 -18
- mlrun/artifacts/model.py +91 -59
- mlrun/artifacts/plots.py +2 -2
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -1
- mlrun/common/formatters/feature_set.py +2 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +12 -62
- mlrun/common/runtimes/constants.py +25 -4
- mlrun/common/schemas/__init__.py +9 -5
- mlrun/common/schemas/alert.py +114 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +22 -9
- mlrun/common/schemas/auth.py +8 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +4 -4
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +4 -8
- mlrun/common/schemas/model_monitoring/constants.py +127 -46
- mlrun/common/schemas/model_monitoring/grafana.py +18 -12
- mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +142 -0
- mlrun/common/schemas/pipeline.py +3 -3
- mlrun/common/schemas/project.py +26 -18
- mlrun/common/schemas/runs.py +3 -3
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +6 -5
- mlrun/common/types.py +1 -0
- mlrun/config.py +157 -89
- mlrun/data_types/__init__.py +5 -3
- mlrun/data_types/infer.py +13 -3
- mlrun/data_types/spark.py +2 -1
- mlrun/datastore/__init__.py +59 -18
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +19 -24
- mlrun/datastore/datastore.py +10 -4
- mlrun/datastore/datastore_profile.py +178 -45
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +14 -3
- mlrun/datastore/sources.py +89 -92
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/storeytargets.py +51 -16
- mlrun/datastore/targets.py +38 -31
- mlrun/datastore/utils.py +87 -4
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +291 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +286 -100
- mlrun/db/httpdb.py +1562 -490
- mlrun/db/nopdb.py +250 -83
- mlrun/errors.py +6 -2
- mlrun/execution.py +194 -50
- mlrun/feature_store/__init__.py +2 -10
- mlrun/feature_store/api.py +20 -458
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +105 -479
- mlrun/feature_store/feature_vector_utils.py +466 -0
- mlrun/feature_store/retrieval/base.py +15 -11
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/retrieval/storey_merger.py +1 -1
- mlrun/feature_store/steps.py +3 -3
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +31 -31
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/k8s_utils.py +2 -5
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/client.py +2 -2
- mlrun/launcher/local.py +6 -2
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +8 -4
- mlrun/model.py +132 -46
- mlrun/model_monitoring/__init__.py +3 -5
- mlrun/model_monitoring/api.py +113 -98
- mlrun/model_monitoring/applications/__init__.py +0 -5
- mlrun/model_monitoring/applications/_application_steps.py +81 -50
- mlrun/model_monitoring/applications/base.py +467 -14
- mlrun/model_monitoring/applications/context.py +212 -134
- mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
- mlrun/model_monitoring/applications/evidently/base.py +146 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
- mlrun/model_monitoring/applications/results.py +67 -15
- mlrun/model_monitoring/controller.py +701 -315
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +242 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
- mlrun/model_monitoring/db/tsdb/base.py +243 -49
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
- mlrun/model_monitoring/helpers.py +356 -114
- mlrun/model_monitoring/stream_processing.py +190 -345
- mlrun/model_monitoring/tracking_policy.py +11 -4
- mlrun/model_monitoring/writer.py +49 -90
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +2 -2
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +35 -32
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +30 -30
- mlrun/projects/pipelines.py +116 -47
- mlrun/projects/project.py +1292 -329
- mlrun/render.py +5 -9
- mlrun/run.py +57 -14
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +30 -22
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
- mlrun/runtimes/function_reference.py +5 -2
- mlrun/runtimes/generators.py +3 -2
- mlrun/runtimes/kubejob.py +6 -7
- mlrun/runtimes/mounts.py +574 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -13
- mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
- mlrun/runtimes/nuclio/function.py +127 -70
- mlrun/runtimes/nuclio/serving.py +105 -37
- mlrun/runtimes/pod.py +159 -54
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +22 -12
- mlrun/runtimes/utils.py +7 -6
- mlrun/secrets.py +2 -2
- mlrun/serving/__init__.py +8 -0
- mlrun/serving/merger.py +7 -5
- mlrun/serving/remote.py +35 -22
- mlrun/serving/routers.py +186 -240
- mlrun/serving/server.py +41 -10
- mlrun/serving/states.py +432 -118
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +161 -203
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +35 -22
- mlrun/utils/clones.py +7 -4
- mlrun/utils/helpers.py +511 -58
- mlrun/utils/logger.py +119 -13
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +39 -15
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +16 -8
- mlrun/utils/notifications/notification/webhook.py +24 -8
- mlrun/utils/notifications/notification_pusher.py +191 -200
- mlrun/utils/regex.py +12 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
- mlrun-1.8.0.dist-info/RECORD +351 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/applications/evidently_base.py +0 -137
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.2rc3.dist-info/RECORD +0 -351
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import json
|
|
16
15
|
from dataclasses import dataclass
|
|
17
16
|
from typing import Final, Optional, Protocol, Union, cast
|
|
18
17
|
|
|
@@ -25,10 +24,10 @@ import mlrun.model_monitoring.applications.context as mm_context
|
|
|
25
24
|
import mlrun.model_monitoring.applications.results as mm_results
|
|
26
25
|
import mlrun.model_monitoring.features_drift_table as mm_drift_table
|
|
27
26
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
28
|
-
EventFieldType,
|
|
29
27
|
HistogramDataDriftApplicationConstants,
|
|
30
28
|
ResultKindApp,
|
|
31
29
|
ResultStatusApp,
|
|
30
|
+
StatsKind,
|
|
32
31
|
)
|
|
33
32
|
from mlrun.model_monitoring.applications import (
|
|
34
33
|
ModelMonitoringApplicationBase,
|
|
@@ -103,23 +102,28 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
103
102
|
Each metric is calculated over all the features individually and the mean is taken as the metric value.
|
|
104
103
|
The average of Hellinger and total variance distance is taken as the result.
|
|
105
104
|
|
|
106
|
-
The application
|
|
105
|
+
The application can log two artifacts (disabled by default due to performance issues):
|
|
107
106
|
|
|
108
|
-
*
|
|
109
|
-
*
|
|
107
|
+
* JSON with the general drift value per feature.
|
|
108
|
+
* Plotly table with the various metrics and histograms per feature.
|
|
110
109
|
|
|
111
|
-
This application is deployed by default when calling
|
|
110
|
+
This application is deployed by default when calling
|
|
111
|
+
:py:func:`~mlrun.projects.MlrunProject.enable_model_monitoring`.
|
|
112
|
+
To avoid it, pass :code:`deploy_histogram_data_drift_app=False`.
|
|
112
113
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
To avoid it, pass `deploy_histogram_data_drift_app=False`.
|
|
114
|
+
If you want to change the application defaults, such as the classifier or which artifacts to produce, you
|
|
115
|
+
need to inherit from this class and deploy it as any other model monitoring application.
|
|
116
|
+
Please make sure to keep the default application name. This ensures that the full functionality of the application,
|
|
117
|
+
including the statistics view in the UI, is available.
|
|
118
118
|
"""
|
|
119
119
|
|
|
120
120
|
NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
|
|
121
121
|
|
|
122
122
|
_REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
|
|
123
|
+
_STATS_TYPES: tuple[StatsKind, StatsKind] = (
|
|
124
|
+
StatsKind.CURRENT_STATS,
|
|
125
|
+
StatsKind.DRIFT_MEASURES,
|
|
126
|
+
)
|
|
123
127
|
|
|
124
128
|
metrics: list[type[HistogramDistanceMetric]] = [
|
|
125
129
|
HellingerDistance,
|
|
@@ -127,16 +131,26 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
127
131
|
TotalVarianceDistance,
|
|
128
132
|
]
|
|
129
133
|
|
|
130
|
-
def __init__(
|
|
134
|
+
def __init__(
|
|
135
|
+
self,
|
|
136
|
+
value_classifier: Optional[ValueClassifier] = None,
|
|
137
|
+
produce_json_artifact: bool = False,
|
|
138
|
+
produce_plotly_artifact: bool = False,
|
|
139
|
+
) -> None:
|
|
131
140
|
"""
|
|
132
|
-
:param value_classifier:
|
|
133
|
-
|
|
141
|
+
:param value_classifier: Classifier object that adheres to the :py:class:`~ValueClassifier` protocol.
|
|
142
|
+
If not provided, the default :py:class:`~DataDriftClassifier` is used.
|
|
143
|
+
:param produce_json_artifact: Whether to produce the JSON artifact or not, ``False`` by default.
|
|
144
|
+
:param produce_plotly_artifact: Whether to produce the Plotly artifact or not, ``False`` by default.
|
|
134
145
|
"""
|
|
135
146
|
self._value_classifier = value_classifier or DataDriftClassifier()
|
|
136
147
|
assert self._REQUIRED_METRICS <= set(
|
|
137
148
|
self.metrics
|
|
138
149
|
), "TVD and Hellinger distance are required for the general data drift result"
|
|
139
150
|
|
|
151
|
+
self._produce_json_artifact = produce_json_artifact
|
|
152
|
+
self._produce_plotly_artifact = produce_plotly_artifact
|
|
153
|
+
|
|
140
154
|
def _compute_metrics_per_feature(
|
|
141
155
|
self, monitoring_context: mm_context.MonitoringApplicationContext
|
|
142
156
|
) -> DataFrame:
|
|
@@ -167,10 +181,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
167
181
|
return metrics_per_feature
|
|
168
182
|
|
|
169
183
|
def _get_general_drift_result(
|
|
170
|
-
self,
|
|
171
|
-
metrics: list[mm_results.ModelMonitoringApplicationMetric],
|
|
172
|
-
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
173
|
-
metrics_per_feature: DataFrame,
|
|
184
|
+
self, metrics: list[mm_results.ModelMonitoringApplicationMetric]
|
|
174
185
|
) -> mm_results.ModelMonitoringApplicationResult:
|
|
175
186
|
"""Get the general drift result from the metrics list"""
|
|
176
187
|
value = cast(
|
|
@@ -189,21 +200,12 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
189
200
|
)
|
|
190
201
|
|
|
191
202
|
status = self._value_classifier.value_to_status(value)
|
|
203
|
+
|
|
192
204
|
return mm_results.ModelMonitoringApplicationResult(
|
|
193
205
|
name=HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
|
|
194
206
|
value=value,
|
|
195
207
|
kind=ResultKindApp.data_drift,
|
|
196
208
|
status=status,
|
|
197
|
-
extra_data={
|
|
198
|
-
EventFieldType.CURRENT_STATS: json.dumps(
|
|
199
|
-
monitoring_context.sample_df_stats
|
|
200
|
-
),
|
|
201
|
-
EventFieldType.DRIFT_MEASURES: json.dumps(
|
|
202
|
-
metrics_per_feature.T.to_dict()
|
|
203
|
-
| {metric.name: metric.value for metric in metrics}
|
|
204
|
-
),
|
|
205
|
-
EventFieldType.DRIFT_STATUS: status.value,
|
|
206
|
-
},
|
|
207
209
|
)
|
|
208
210
|
|
|
209
211
|
@staticmethod
|
|
@@ -225,6 +227,36 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
225
227
|
|
|
226
228
|
return metrics
|
|
227
229
|
|
|
230
|
+
@staticmethod
|
|
231
|
+
def _get_stats(
|
|
232
|
+
metrics: list[mm_results.ModelMonitoringApplicationMetric],
|
|
233
|
+
metrics_per_feature: DataFrame,
|
|
234
|
+
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
235
|
+
) -> list[mm_results._ModelMonitoringApplicationStats]:
|
|
236
|
+
"""
|
|
237
|
+
Return a list of the statistics.
|
|
238
|
+
|
|
239
|
+
:param metrics: the calculated metrics
|
|
240
|
+
:param metrics_per_feature: metric calculated per feature
|
|
241
|
+
:param monitoring_context: context object for current monitoring application
|
|
242
|
+
:returns: list of mm_results._ModelMonitoringApplicationStats for histogram data drift application
|
|
243
|
+
"""
|
|
244
|
+
stats = []
|
|
245
|
+
for stats_type in HistogramDataDriftApplication._STATS_TYPES:
|
|
246
|
+
stats.append(
|
|
247
|
+
mm_results._ModelMonitoringApplicationStats(
|
|
248
|
+
name=stats_type,
|
|
249
|
+
stats=metrics_per_feature.T.to_dict()
|
|
250
|
+
| {metric.name: metric.value for metric in metrics}
|
|
251
|
+
if stats_type == StatsKind.DRIFT_MEASURES
|
|
252
|
+
else monitoring_context.sample_df_stats,
|
|
253
|
+
timestamp=monitoring_context.end_infer_time.isoformat(
|
|
254
|
+
sep=" ", timespec="microseconds"
|
|
255
|
+
),
|
|
256
|
+
)
|
|
257
|
+
)
|
|
258
|
+
return stats
|
|
259
|
+
|
|
228
260
|
@staticmethod
|
|
229
261
|
def _get_shared_features_sample_stats(
|
|
230
262
|
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
@@ -275,55 +307,55 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
275
307
|
cast(str, key): (self._value_classifier.value_to_status(value), value)
|
|
276
308
|
for key, value in drift_per_feature_values.items()
|
|
277
309
|
}
|
|
278
|
-
monitoring_context.logger.debug("
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
drift_results=drift_results,
|
|
285
|
-
)
|
|
310
|
+
monitoring_context.logger.debug("Producing plotly artifact")
|
|
311
|
+
artifact = mm_drift_table.FeaturesDriftTablePlot().produce(
|
|
312
|
+
sample_set_statistics=sample_set_statistics,
|
|
313
|
+
inputs_statistics=inputs_statistics,
|
|
314
|
+
metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
|
|
315
|
+
drift_results=drift_results,
|
|
286
316
|
)
|
|
317
|
+
monitoring_context.logger.debug("Logging plotly artifact")
|
|
318
|
+
monitoring_context.log_artifact(artifact)
|
|
287
319
|
monitoring_context.logger.debug("Logged plotly artifact successfully")
|
|
288
320
|
|
|
289
321
|
def _log_drift_artifacts(
|
|
290
322
|
self,
|
|
291
323
|
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
292
324
|
metrics_per_feature: DataFrame,
|
|
293
|
-
log_json_artifact: bool = True,
|
|
294
325
|
) -> None:
|
|
295
326
|
"""Log JSON and Plotly drift data per feature artifacts"""
|
|
327
|
+
if not self._produce_json_artifact and not self._produce_plotly_artifact:
|
|
328
|
+
return
|
|
329
|
+
|
|
296
330
|
drift_per_feature_values = metrics_per_feature[
|
|
297
331
|
[HellingerDistance.NAME, TotalVarianceDistance.NAME]
|
|
298
332
|
].mean(axis=1)
|
|
299
333
|
|
|
300
|
-
if
|
|
334
|
+
if self._produce_json_artifact:
|
|
301
335
|
self._log_json_artifact(drift_per_feature_values, monitoring_context)
|
|
302
336
|
|
|
303
|
-
self.
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
337
|
+
if self._produce_plotly_artifact:
|
|
338
|
+
self._log_plotly_table_artifact(
|
|
339
|
+
sample_set_statistics=self._get_shared_features_sample_stats(
|
|
340
|
+
monitoring_context
|
|
341
|
+
),
|
|
342
|
+
inputs_statistics=monitoring_context.feature_stats,
|
|
343
|
+
metrics_per_feature=metrics_per_feature,
|
|
344
|
+
drift_per_feature_values=drift_per_feature_values,
|
|
345
|
+
monitoring_context=monitoring_context,
|
|
346
|
+
)
|
|
312
347
|
|
|
313
348
|
def do_tracking(
|
|
314
|
-
self,
|
|
315
|
-
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
349
|
+
self, monitoring_context: mm_context.MonitoringApplicationContext
|
|
316
350
|
) -> list[
|
|
317
351
|
Union[
|
|
318
352
|
mm_results.ModelMonitoringApplicationResult,
|
|
319
353
|
mm_results.ModelMonitoringApplicationMetric,
|
|
354
|
+
mm_results._ModelMonitoringApplicationStats,
|
|
320
355
|
]
|
|
321
356
|
]:
|
|
322
357
|
"""
|
|
323
358
|
Calculate and return the data drift metrics, averaged over the features.
|
|
324
|
-
|
|
325
|
-
Refer to `ModelMonitoringApplicationBaseV2` for the meaning of the
|
|
326
|
-
function arguments.
|
|
327
359
|
"""
|
|
328
360
|
monitoring_context.logger.debug("Starting to run the application")
|
|
329
361
|
if not monitoring_context.feature_stats:
|
|
@@ -342,13 +374,14 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
|
|
|
342
374
|
)
|
|
343
375
|
monitoring_context.logger.debug("Computing average per metric")
|
|
344
376
|
metrics = self._get_metrics(metrics_per_feature)
|
|
345
|
-
result = self._get_general_drift_result(
|
|
377
|
+
result = self._get_general_drift_result(metrics=metrics)
|
|
378
|
+
stats = self._get_stats(
|
|
346
379
|
metrics=metrics,
|
|
347
380
|
monitoring_context=monitoring_context,
|
|
348
381
|
metrics_per_feature=metrics_per_feature,
|
|
349
382
|
)
|
|
350
|
-
|
|
383
|
+
metrics_result_and_stats = metrics + [result] + stats
|
|
351
384
|
monitoring_context.logger.debug(
|
|
352
|
-
"Finished running the application", results=
|
|
385
|
+
"Finished running the application", results=metrics_result_and_stats
|
|
353
386
|
)
|
|
354
|
-
return
|
|
387
|
+
return metrics_result_and_stats
|
|
@@ -17,20 +17,26 @@ import json
|
|
|
17
17
|
import re
|
|
18
18
|
from abc import ABC, abstractmethod
|
|
19
19
|
|
|
20
|
+
from pydantic.v1 import validator
|
|
21
|
+
from pydantic.v1.dataclasses import dataclass
|
|
22
|
+
|
|
20
23
|
import mlrun.common.helpers
|
|
21
24
|
import mlrun.common.model_monitoring.helpers
|
|
22
|
-
import mlrun.common.schemas.model_monitoring.constants as
|
|
25
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
23
26
|
import mlrun.utils.v3io_clients
|
|
27
|
+
from mlrun.utils import logger
|
|
28
|
+
|
|
29
|
+
_RESULT_EXTRA_DATA_MAX_SIZE = 998
|
|
24
30
|
|
|
25
31
|
|
|
26
32
|
class _ModelMonitoringApplicationDataRes(ABC):
|
|
27
33
|
name: str
|
|
28
34
|
|
|
29
35
|
def __post_init__(self):
|
|
30
|
-
pat = re.compile(
|
|
36
|
+
pat = re.compile(mm_constants.RESULT_NAME_PATTERN)
|
|
31
37
|
if not re.fullmatch(pat, self.name):
|
|
32
38
|
raise mlrun.errors.MLRunValueError(
|
|
33
|
-
"Attribute name must comply with the regex `
|
|
39
|
+
f"Attribute name must comply with the regex `{mm_constants.RESULT_NAME_PATTERN}`"
|
|
34
40
|
)
|
|
35
41
|
|
|
36
42
|
@abstractmethod
|
|
@@ -38,7 +44,7 @@ class _ModelMonitoringApplicationDataRes(ABC):
|
|
|
38
44
|
raise NotImplementedError
|
|
39
45
|
|
|
40
46
|
|
|
41
|
-
@
|
|
47
|
+
@dataclass
|
|
42
48
|
class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
|
|
43
49
|
"""
|
|
44
50
|
Class representing the result of a custom model monitoring application.
|
|
@@ -49,13 +55,16 @@ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
|
|
|
49
55
|
:param value: (float) Value of the application result.
|
|
50
56
|
:param kind: (ResultKindApp) Kind of application result.
|
|
51
57
|
:param status: (ResultStatusApp) Status of the application result.
|
|
52
|
-
:param extra_data: (dict) Extra data associated with the application result.
|
|
58
|
+
:param extra_data: (dict) Extra data associated with the application result. Note that if the extra data is
|
|
59
|
+
exceeding the maximum size of 998 characters, it will be ignored and a message will
|
|
60
|
+
be logged. In this case, we recommend logging the extra data as a separate artifact or
|
|
61
|
+
shortening it.
|
|
53
62
|
"""
|
|
54
63
|
|
|
55
64
|
name: str
|
|
56
65
|
value: float
|
|
57
|
-
kind:
|
|
58
|
-
status:
|
|
66
|
+
kind: mm_constants.ResultKindApp
|
|
67
|
+
status: mm_constants.ResultStatusApp
|
|
59
68
|
extra_data: dict = dataclasses.field(default_factory=dict)
|
|
60
69
|
|
|
61
70
|
def to_dict(self):
|
|
@@ -65,15 +74,30 @@ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
|
|
|
65
74
|
:returns: (dict) Dictionary representation of the result.
|
|
66
75
|
"""
|
|
67
76
|
return {
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
77
|
+
mm_constants.ResultData.RESULT_NAME: self.name,
|
|
78
|
+
mm_constants.ResultData.RESULT_VALUE: self.value,
|
|
79
|
+
mm_constants.ResultData.RESULT_KIND: self.kind.value,
|
|
80
|
+
mm_constants.ResultData.RESULT_STATUS: self.status.value,
|
|
81
|
+
mm_constants.ResultData.RESULT_EXTRA_DATA: json.dumps(self.extra_data),
|
|
73
82
|
}
|
|
74
83
|
|
|
84
|
+
@validator("extra_data")
|
|
85
|
+
@classmethod
|
|
86
|
+
def validate_extra_data_len(cls, result_extra_data: dict):
|
|
87
|
+
"""Ensure that the extra data is not exceeding the maximum size which is important to avoid
|
|
88
|
+
possible storage issues."""
|
|
89
|
+
extra_data_len = len(json.dumps(result_extra_data))
|
|
90
|
+
if extra_data_len > _RESULT_EXTRA_DATA_MAX_SIZE:
|
|
91
|
+
logger.warning(
|
|
92
|
+
f"Extra data is too long and won't be stored: {extra_data_len} characters while the maximum "
|
|
93
|
+
f"is {_RESULT_EXTRA_DATA_MAX_SIZE} characters."
|
|
94
|
+
f"Please shorten the extra data or log it as a separate artifact."
|
|
95
|
+
)
|
|
96
|
+
return {}
|
|
97
|
+
return result_extra_data
|
|
75
98
|
|
|
76
|
-
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
77
101
|
class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
|
|
78
102
|
"""
|
|
79
103
|
Class representing a single metric of a custom model monitoring application.
|
|
@@ -94,6 +118,34 @@ class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
|
|
|
94
118
|
:returns: (dict) Dictionary representation of the result.
|
|
95
119
|
"""
|
|
96
120
|
return {
|
|
97
|
-
|
|
98
|
-
|
|
121
|
+
mm_constants.MetricData.METRIC_NAME: self.name,
|
|
122
|
+
mm_constants.MetricData.METRIC_VALUE: self.value,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclasses.dataclass
|
|
127
|
+
class _ModelMonitoringApplicationStats(_ModelMonitoringApplicationDataRes):
|
|
128
|
+
"""
|
|
129
|
+
Class representing the stats of histogram data drift application.
|
|
130
|
+
|
|
131
|
+
:param name (mm_constant.StatsKind) Enum mm_constant.StatsData of the stats data kind of the event
|
|
132
|
+
:param (str) iso format representation of the timestamp the event took place
|
|
133
|
+
:param stats (dict) Dictionary representation of the stats calculated for the event
|
|
134
|
+
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
name: mm_constants.StatsKind
|
|
138
|
+
timestamp: str
|
|
139
|
+
stats: dict = dataclasses.field(default_factory=dict)
|
|
140
|
+
|
|
141
|
+
def to_dict(self):
|
|
142
|
+
"""
|
|
143
|
+
Convert the object to a dictionary format suitable for writing.
|
|
144
|
+
|
|
145
|
+
:returns: (dict) Dictionary representation of the result.
|
|
146
|
+
"""
|
|
147
|
+
return {
|
|
148
|
+
mm_constants.StatsData.STATS_NAME: self.name,
|
|
149
|
+
mm_constants.StatsData.STATS: self.stats,
|
|
150
|
+
mm_constants.StatsData.TIMESTAMP: self.timestamp,
|
|
99
151
|
}
|