mlrun 1.7.1rc4__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +23 -21
- mlrun/__main__.py +3 -3
- mlrun/alerts/alert.py +148 -14
- mlrun/artifacts/__init__.py +1 -2
- mlrun/artifacts/base.py +46 -12
- mlrun/artifacts/dataset.py +16 -16
- mlrun/artifacts/document.py +334 -0
- mlrun/artifacts/manager.py +15 -13
- mlrun/artifacts/model.py +66 -53
- mlrun/common/constants.py +7 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +1 -29
- mlrun/common/runtimes/constants.py +1 -2
- mlrun/common/schemas/__init__.py +6 -2
- mlrun/common/schemas/alert.py +111 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +11 -7
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -3
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +2 -1
- mlrun/common/schemas/model_monitoring/constants.py +66 -14
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +137 -0
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +25 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +67 -10
- mlrun/data_types/__init__.py +0 -2
- mlrun/data_types/infer.py +3 -1
- mlrun/data_types/spark.py +2 -1
- mlrun/datastore/__init__.py +0 -2
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +12 -4
- mlrun/datastore/datastore.py +9 -3
- mlrun/datastore/datastore_profile.py +79 -20
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +4 -1
- mlrun/datastore/sources.py +52 -51
- mlrun/datastore/store_resources.py +0 -2
- mlrun/datastore/targets.py +21 -21
- mlrun/datastore/utils.py +2 -2
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +194 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +208 -82
- mlrun/db/factory.py +0 -3
- mlrun/db/httpdb.py +1237 -386
- mlrun/db/nopdb.py +201 -74
- mlrun/errors.py +2 -2
- mlrun/execution.py +136 -50
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +41 -40
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +27 -24
- mlrun/feature_store/retrieval/base.py +14 -9
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/steps.py +2 -2
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +29 -27
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +4 -3
- mlrun/model.py +117 -46
- mlrun/model_monitoring/__init__.py +4 -4
- mlrun/model_monitoring/api.py +61 -59
- mlrun/model_monitoring/applications/_application_steps.py +17 -17
- mlrun/model_monitoring/applications/base.py +165 -6
- mlrun/model_monitoring/applications/context.py +88 -37
- mlrun/model_monitoring/applications/evidently_base.py +1 -2
- mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
- mlrun/model_monitoring/applications/results.py +55 -3
- mlrun/model_monitoring/controller.py +207 -239
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +156 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/base.py +78 -25
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +90 -16
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +279 -59
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
- mlrun/model_monitoring/helpers.py +152 -49
- mlrun/model_monitoring/stream_processing.py +99 -283
- mlrun/model_monitoring/tracking_policy.py +10 -3
- mlrun/model_monitoring/writer.py +48 -36
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +31 -14
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +27 -27
- mlrun/projects/pipelines.py +75 -38
- mlrun/projects/project.py +865 -206
- mlrun/run.py +53 -10
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +15 -11
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/generators.py +2 -1
- mlrun/runtimes/kubejob.py +4 -5
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -11
- mlrun/runtimes/nuclio/function.py +19 -17
- mlrun/runtimes/nuclio/serving.py +18 -11
- mlrun/runtimes/pod.py +154 -45
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +21 -11
- mlrun/runtimes/utils.py +6 -5
- mlrun/serving/merger.py +6 -4
- mlrun/serving/remote.py +18 -17
- mlrun/serving/routers.py +185 -172
- mlrun/serving/server.py +7 -1
- mlrun/serving/states.py +97 -78
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +74 -65
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/clones.py +1 -1
- mlrun/utils/helpers.py +66 -18
- mlrun/utils/logger.py +106 -4
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +33 -14
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +6 -6
- mlrun/utils/notifications/notification_pusher.py +86 -44
- mlrun/utils/regex.py +3 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +191 -186
- mlrun-1.8.0rc8.dist-info/RECORD +347 -0
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.1rc4.dist-info/RECORD +0 -351
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from datetime import datetime, timezone
|
|
15
|
+
from datetime import datetime, timedelta, timezone
|
|
16
16
|
from io import StringIO
|
|
17
17
|
from typing import Literal, Optional, Union
|
|
18
18
|
|
|
@@ -168,6 +168,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
168
168
|
tsdb_batching_max_events: int = 1000,
|
|
169
169
|
tsdb_batching_timeout_secs: int = 30,
|
|
170
170
|
sample_window: int = 10,
|
|
171
|
+
aggregate_windows: Optional[list[str]] = None,
|
|
172
|
+
aggregate_period: str = "1m",
|
|
173
|
+
**kwarg,
|
|
171
174
|
):
|
|
172
175
|
"""
|
|
173
176
|
Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
|
|
@@ -178,7 +181,40 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
178
181
|
- endpoint_features (Prediction and feature names and values)
|
|
179
182
|
- custom_metrics (user-defined metrics)
|
|
180
183
|
"""
|
|
184
|
+
aggregate_windows = aggregate_windows or ["5m", "1h"]
|
|
181
185
|
|
|
186
|
+
# Calculate number of predictions and average latency
|
|
187
|
+
def apply_storey_aggregations():
|
|
188
|
+
# Calculate number of predictions for each window (5 min and 1 hour by default)
|
|
189
|
+
graph.add_step(
|
|
190
|
+
class_name="storey.AggregateByKey",
|
|
191
|
+
aggregates=[
|
|
192
|
+
{
|
|
193
|
+
"name": EventFieldType.LATENCY,
|
|
194
|
+
"column": EventFieldType.LATENCY,
|
|
195
|
+
"operations": ["count", "avg"],
|
|
196
|
+
"windows": aggregate_windows,
|
|
197
|
+
"period": aggregate_period,
|
|
198
|
+
}
|
|
199
|
+
],
|
|
200
|
+
name=EventFieldType.LATENCY,
|
|
201
|
+
after="MapFeatureNames",
|
|
202
|
+
step_name="Aggregates",
|
|
203
|
+
table=".",
|
|
204
|
+
key_field=EventFieldType.ENDPOINT_ID,
|
|
205
|
+
)
|
|
206
|
+
# Calculate average latency time for each window (5 min and 1 hour by default)
|
|
207
|
+
graph.add_step(
|
|
208
|
+
class_name="storey.Rename",
|
|
209
|
+
mapping={
|
|
210
|
+
"latency_count_5m": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_5M,
|
|
211
|
+
"latency_count_1h": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_1H,
|
|
212
|
+
},
|
|
213
|
+
name="Rename",
|
|
214
|
+
after=EventFieldType.LATENCY,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
apply_storey_aggregations()
|
|
182
218
|
# Write latency per prediction, labeled by endpoint ID only
|
|
183
219
|
graph.add_step(
|
|
184
220
|
"storey.TSDBTarget",
|
|
@@ -310,6 +346,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
310
346
|
],
|
|
311
347
|
index_cols=[
|
|
312
348
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
349
|
+
mm_schemas.EventFieldType.ERROR_TYPE,
|
|
313
350
|
],
|
|
314
351
|
max_events=tsdb_batching_max_events,
|
|
315
352
|
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
@@ -338,9 +375,6 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
338
375
|
elif kind == mm_schemas.WriterEventKind.RESULT:
|
|
339
376
|
table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
340
377
|
index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
|
|
341
|
-
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
342
|
-
# TODO: remove this when extra data is supported (ML-7460)
|
|
343
|
-
event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
|
|
344
378
|
else:
|
|
345
379
|
raise ValueError(f"Invalid {kind = }")
|
|
346
380
|
|
|
@@ -544,6 +578,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
544
578
|
end: datetime,
|
|
545
579
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
546
580
|
type: Literal["metrics", "results"] = "results",
|
|
581
|
+
with_result_extra_data: bool = False,
|
|
547
582
|
) -> Union[
|
|
548
583
|
list[
|
|
549
584
|
Union[
|
|
@@ -565,6 +600,12 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
565
600
|
"""
|
|
566
601
|
|
|
567
602
|
if type == "metrics":
|
|
603
|
+
if with_result_extra_data:
|
|
604
|
+
logger.warning(
|
|
605
|
+
"The 'with_result_extra_data' parameter is not supported for metrics, just for results",
|
|
606
|
+
project=self.project,
|
|
607
|
+
endpoint_id=endpoint_id,
|
|
608
|
+
)
|
|
568
609
|
table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
|
|
569
610
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
570
611
|
columns = [mm_schemas.MetricData.METRIC_VALUE]
|
|
@@ -577,6 +618,8 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
577
618
|
mm_schemas.ResultData.RESULT_STATUS,
|
|
578
619
|
mm_schemas.ResultData.RESULT_KIND,
|
|
579
620
|
]
|
|
621
|
+
if with_result_extra_data:
|
|
622
|
+
columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
|
|
580
623
|
df_handler = self.df_to_results_values
|
|
581
624
|
else:
|
|
582
625
|
raise ValueError(f"Invalid {type = }")
|
|
@@ -605,6 +648,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
605
648
|
endpoint_id=endpoint_id,
|
|
606
649
|
is_empty=df.empty,
|
|
607
650
|
)
|
|
651
|
+
if not with_result_extra_data and type == "results":
|
|
652
|
+
# Set the extra data to an empty string if it's not requested
|
|
653
|
+
df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
|
|
608
654
|
|
|
609
655
|
return df_handler(df=df, metrics=metrics, project=self.project)
|
|
610
656
|
|
|
@@ -700,12 +746,13 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
700
746
|
def get_last_request(
|
|
701
747
|
self,
|
|
702
748
|
endpoint_ids: Union[str, list[str]],
|
|
703
|
-
start:
|
|
704
|
-
end:
|
|
749
|
+
start: Optional[datetime] = None,
|
|
750
|
+
end: Optional[datetime] = None,
|
|
705
751
|
) -> pd.DataFrame:
|
|
706
752
|
endpoint_ids = (
|
|
707
753
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
708
754
|
)
|
|
755
|
+
start, end = self._get_start_end(start, end)
|
|
709
756
|
df = self._get_records(
|
|
710
757
|
table=mm_schemas.FileTargetKind.PREDICTIONS,
|
|
711
758
|
start=start,
|
|
@@ -734,12 +781,14 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
734
781
|
def get_drift_status(
|
|
735
782
|
self,
|
|
736
783
|
endpoint_ids: Union[str, list[str]],
|
|
737
|
-
start:
|
|
738
|
-
end:
|
|
784
|
+
start: Optional[datetime] = None,
|
|
785
|
+
end: Optional[datetime] = None,
|
|
739
786
|
) -> pd.DataFrame:
|
|
740
787
|
endpoint_ids = (
|
|
741
788
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
742
789
|
)
|
|
790
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
791
|
+
start, end = self._get_start_end(start, end)
|
|
743
792
|
df = self._get_records(
|
|
744
793
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
745
794
|
start=start,
|
|
@@ -758,9 +807,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
758
807
|
def get_metrics_metadata(
|
|
759
808
|
self,
|
|
760
809
|
endpoint_id: str,
|
|
761
|
-
start:
|
|
762
|
-
end:
|
|
810
|
+
start: Optional[datetime] = None,
|
|
811
|
+
end: Optional[datetime] = None,
|
|
763
812
|
) -> pd.DataFrame:
|
|
813
|
+
start, end = self._get_start_end(start, end)
|
|
764
814
|
df = self._get_records(
|
|
765
815
|
table=mm_schemas.V3IOTSDBTables.METRICS,
|
|
766
816
|
start=start,
|
|
@@ -778,9 +828,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
778
828
|
def get_results_metadata(
|
|
779
829
|
self,
|
|
780
830
|
endpoint_id: str,
|
|
781
|
-
start:
|
|
782
|
-
end:
|
|
831
|
+
start: Optional[datetime] = None,
|
|
832
|
+
end: Optional[datetime] = None,
|
|
783
833
|
) -> pd.DataFrame:
|
|
834
|
+
start, end = self._get_start_end(start, end)
|
|
784
835
|
df = self._get_records(
|
|
785
836
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
786
837
|
start=start,
|
|
@@ -803,18 +854,20 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
803
854
|
def get_error_count(
|
|
804
855
|
self,
|
|
805
856
|
endpoint_ids: Union[str, list[str]],
|
|
806
|
-
start:
|
|
807
|
-
end:
|
|
857
|
+
start: Optional[datetime] = None,
|
|
858
|
+
end: Optional[datetime] = None,
|
|
808
859
|
) -> pd.DataFrame:
|
|
809
860
|
endpoint_ids = (
|
|
810
861
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
811
862
|
)
|
|
863
|
+
start, end = self._get_start_end(start, end)
|
|
812
864
|
df = self._get_records(
|
|
813
865
|
table=mm_schemas.FileTargetKind.ERRORS,
|
|
814
866
|
start=start,
|
|
815
867
|
end=end,
|
|
816
868
|
columns=[mm_schemas.EventFieldType.ERROR_COUNT],
|
|
817
|
-
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})"
|
|
869
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
|
|
870
|
+
f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'",
|
|
818
871
|
agg_funcs=["count"],
|
|
819
872
|
)
|
|
820
873
|
if not df.empty:
|
|
@@ -830,12 +883,14 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
830
883
|
def get_avg_latency(
|
|
831
884
|
self,
|
|
832
885
|
endpoint_ids: Union[str, list[str]],
|
|
833
|
-
start:
|
|
834
|
-
end:
|
|
886
|
+
start: Optional[datetime] = None,
|
|
887
|
+
end: Optional[datetime] = None,
|
|
835
888
|
) -> pd.DataFrame:
|
|
836
889
|
endpoint_ids = (
|
|
837
890
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
838
891
|
)
|
|
892
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
893
|
+
start, end = self._get_start_end(start, end)
|
|
839
894
|
df = self._get_records(
|
|
840
895
|
table=mm_schemas.FileTargetKind.PREDICTIONS,
|
|
841
896
|
start=start,
|
|
@@ -846,4 +901,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
846
901
|
)
|
|
847
902
|
if not df.empty:
|
|
848
903
|
df.dropna(inplace=True)
|
|
904
|
+
df.rename(
|
|
905
|
+
columns={
|
|
906
|
+
f"avg({mm_schemas.EventFieldType.LATENCY})": f"avg_{mm_schemas.EventFieldType.LATENCY}"
|
|
907
|
+
},
|
|
908
|
+
inplace=True,
|
|
909
|
+
)
|
|
849
910
|
return df.reset_index(drop=True)
|
|
@@ -13,26 +13,32 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import datetime
|
|
16
|
+
import os
|
|
16
17
|
import typing
|
|
17
18
|
|
|
18
19
|
import numpy as np
|
|
19
20
|
import pandas as pd
|
|
20
21
|
|
|
21
22
|
if typing.TYPE_CHECKING:
|
|
23
|
+
from mlrun.datastore import DataItem
|
|
22
24
|
from mlrun.db.base import RunDBInterface
|
|
23
25
|
from mlrun.projects import MlrunProject
|
|
24
26
|
|
|
27
|
+
from fnmatch import fnmatchcase
|
|
28
|
+
from typing import Optional
|
|
29
|
+
|
|
25
30
|
import mlrun
|
|
26
31
|
import mlrun.artifacts
|
|
27
32
|
import mlrun.common.model_monitoring.helpers
|
|
28
33
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
29
34
|
import mlrun.data_types.infer
|
|
30
35
|
import mlrun.model_monitoring
|
|
36
|
+
import mlrun.utils.helpers
|
|
37
|
+
from mlrun.common.schemas import ModelEndpoint
|
|
31
38
|
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
32
39
|
ModelEndpointMonitoringMetric,
|
|
33
40
|
_compose_full_name,
|
|
34
41
|
)
|
|
35
|
-
from mlrun.model_monitoring.model_endpoint import ModelEndpoint
|
|
36
42
|
from mlrun.utils import logger
|
|
37
43
|
|
|
38
44
|
|
|
@@ -42,6 +48,70 @@ class _BatchDict(typing.TypedDict):
|
|
|
42
48
|
days: int
|
|
43
49
|
|
|
44
50
|
|
|
51
|
+
def _is_results_regex_match(
|
|
52
|
+
existing_result_name: Optional[str],
|
|
53
|
+
result_name_filters: Optional[list[str]],
|
|
54
|
+
) -> bool:
|
|
55
|
+
if existing_result_name.count(".") != 3 or any(
|
|
56
|
+
part == "" for part in existing_result_name.split(".")
|
|
57
|
+
):
|
|
58
|
+
logger.warning(
|
|
59
|
+
f"_is_results_regex_match: existing_result_name illegal, will be ignored."
|
|
60
|
+
f" existing_result_name: {existing_result_name}"
|
|
61
|
+
)
|
|
62
|
+
return False
|
|
63
|
+
existing_result_name = ".".join(existing_result_name.split(".")[i] for i in [1, 3])
|
|
64
|
+
for result_name_filter in result_name_filters:
|
|
65
|
+
if fnmatchcase(existing_result_name, result_name_filter):
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def filter_results_by_regex(
|
|
71
|
+
existing_result_names: Optional[list[str]] = None,
|
|
72
|
+
result_name_filters: Optional[list[str]] = None,
|
|
73
|
+
) -> list[str]:
|
|
74
|
+
"""
|
|
75
|
+
Filter a list of existing result names by a list of filters.
|
|
76
|
+
|
|
77
|
+
This function returns only the results that match the filters provided. If no filters are given,
|
|
78
|
+
it returns all results. Invalid inputs are ignored.
|
|
79
|
+
|
|
80
|
+
:param existing_result_names: List of existing results' fully qualified names (FQNs)
|
|
81
|
+
in the format: endpoint_id.app_name.type.name.
|
|
82
|
+
Example: mep1.app1.result.metric1
|
|
83
|
+
:param result_name_filters: List of filters in the format: app.result_name.
|
|
84
|
+
Wildcards can be used, such as app.result* or *.result
|
|
85
|
+
|
|
86
|
+
:return: List of FQNs of the matching results
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
if not result_name_filters:
|
|
90
|
+
return existing_result_names
|
|
91
|
+
|
|
92
|
+
if not existing_result_names:
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
# filters validations
|
|
96
|
+
validated_filters = []
|
|
97
|
+
for result_name_filter in result_name_filters:
|
|
98
|
+
if result_name_filter.count(".") != 1:
|
|
99
|
+
logger.warning(
|
|
100
|
+
f"filter_results_by_regex: result_name_filter illegal, will be ignored."
|
|
101
|
+
f"Filter: {result_name_filter}"
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
validated_filters.append(result_name_filter)
|
|
105
|
+
filtered_metrics_names = []
|
|
106
|
+
for existing_result_name in existing_result_names:
|
|
107
|
+
if _is_results_regex_match(
|
|
108
|
+
existing_result_name=existing_result_name,
|
|
109
|
+
result_name_filters=validated_filters,
|
|
110
|
+
):
|
|
111
|
+
filtered_metrics_names.append(existing_result_name)
|
|
112
|
+
return filtered_metrics_names
|
|
113
|
+
|
|
114
|
+
|
|
45
115
|
def get_stream_path(
|
|
46
116
|
project: str,
|
|
47
117
|
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
@@ -98,19 +168,64 @@ def get_monitoring_parquet_path(
|
|
|
98
168
|
return parquet_path
|
|
99
169
|
|
|
100
170
|
|
|
101
|
-
def
|
|
102
|
-
|
|
103
|
-
|
|
171
|
+
def get_monitoring_stats_directory_path(
|
|
172
|
+
project: str,
|
|
173
|
+
kind: str = mm_constants.FileTargetKind.STATS,
|
|
174
|
+
) -> str:
|
|
175
|
+
"""
|
|
176
|
+
Get model monitoring stats target for the current project and kind. The stats target path is based on the
|
|
177
|
+
project artifact path. If project artifact path is not defined, the stats target path will be based on MLRun
|
|
178
|
+
artifact path.
|
|
179
|
+
:param project: Project object.
|
|
180
|
+
:param kind: indicate the kind of the stats path
|
|
181
|
+
:return: Monitoring stats target path.
|
|
182
|
+
"""
|
|
183
|
+
stats_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
184
|
+
project=project,
|
|
185
|
+
kind=kind,
|
|
186
|
+
)
|
|
187
|
+
return stats_path
|
|
104
188
|
|
|
105
|
-
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
106
189
|
|
|
107
|
-
|
|
190
|
+
def _get_monitoring_current_stats_file_path(project: str, endpoint_id: str) -> str:
|
|
191
|
+
return os.path.join(
|
|
192
|
+
get_monitoring_stats_directory_path(project),
|
|
193
|
+
f"{endpoint_id}_current_stats.json",
|
|
194
|
+
)
|
|
195
|
+
|
|
108
196
|
|
|
197
|
+
def _get_monitoring_drift_measures_file_path(project: str, endpoint_id: str) -> str:
|
|
198
|
+
return os.path.join(
|
|
199
|
+
get_monitoring_stats_directory_path(project),
|
|
200
|
+
f"{endpoint_id}_drift_measures.json",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def get_monitoring_current_stats_data(project: str, endpoint_id: str) -> "DataItem":
|
|
205
|
+
"""
|
|
206
|
+
getter for data item of current stats for project and endpoint
|
|
207
|
+
:param project: project name str
|
|
208
|
+
:param endpoint_id: endpoint id str
|
|
209
|
+
:return: DataItem
|
|
109
210
|
"""
|
|
211
|
+
return mlrun.datastore.store_manager.object(
|
|
212
|
+
_get_monitoring_current_stats_file_path(
|
|
213
|
+
project=project, endpoint_id=endpoint_id
|
|
214
|
+
)
|
|
215
|
+
)
|
|
110
216
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
217
|
+
|
|
218
|
+
def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataItem":
|
|
219
|
+
"""
|
|
220
|
+
getter for data item of drift measures for project and endpoint
|
|
221
|
+
:param project: project name str
|
|
222
|
+
:param endpoint_id: endpoint id str
|
|
223
|
+
:return: DataItem
|
|
224
|
+
"""
|
|
225
|
+
return mlrun.datastore.store_manager.object(
|
|
226
|
+
_get_monitoring_drift_measures_file_path(
|
|
227
|
+
project=project, endpoint_id=endpoint_id
|
|
228
|
+
)
|
|
114
229
|
)
|
|
115
230
|
|
|
116
231
|
|
|
@@ -186,19 +301,24 @@ def update_model_endpoint_last_request(
|
|
|
186
301
|
:param current_request: current request time
|
|
187
302
|
:param db: DB interface.
|
|
188
303
|
"""
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
304
|
+
is_batch_endpoint = (
|
|
305
|
+
model_endpoint.metadata.endpoint_type == mm_constants.EndpointType.BATCH_EP
|
|
306
|
+
)
|
|
307
|
+
if not is_batch_endpoint:
|
|
192
308
|
logger.info(
|
|
193
309
|
"Update model endpoint last request time (EP with serving)",
|
|
194
310
|
project=project,
|
|
195
311
|
endpoint_id=model_endpoint.metadata.uid,
|
|
312
|
+
name=model_endpoint.metadata.name,
|
|
313
|
+
function_name=model_endpoint.spec.function_name,
|
|
196
314
|
last_request=model_endpoint.status.last_request,
|
|
197
315
|
current_request=current_request,
|
|
198
316
|
)
|
|
199
317
|
db.patch_model_endpoint(
|
|
200
318
|
project=project,
|
|
201
319
|
endpoint_id=model_endpoint.metadata.uid,
|
|
320
|
+
name=model_endpoint.metadata.name,
|
|
321
|
+
function_name=model_endpoint.spec.function_name,
|
|
202
322
|
attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
|
|
203
323
|
)
|
|
204
324
|
else: # model endpoint without any serving function - close the window "manually"
|
|
@@ -217,7 +337,7 @@ def update_model_endpoint_last_request(
|
|
|
217
337
|
+ datetime.timedelta(
|
|
218
338
|
seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
|
|
219
339
|
)
|
|
220
|
-
)
|
|
340
|
+
)
|
|
221
341
|
logger.info(
|
|
222
342
|
"Bumping model endpoint last request time (EP without serving)",
|
|
223
343
|
project=project,
|
|
@@ -229,6 +349,8 @@ def update_model_endpoint_last_request(
|
|
|
229
349
|
db.patch_model_endpoint(
|
|
230
350
|
project=project,
|
|
231
351
|
endpoint_id=model_endpoint.metadata.uid,
|
|
352
|
+
name=model_endpoint.metadata.name,
|
|
353
|
+
function_name=model_endpoint.spec.function_name,
|
|
232
354
|
attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
|
|
233
355
|
)
|
|
234
356
|
|
|
@@ -270,17 +392,6 @@ def calculate_inputs_statistics(
|
|
|
270
392
|
return inputs_statistics
|
|
271
393
|
|
|
272
394
|
|
|
273
|
-
def get_endpoint_record(
|
|
274
|
-
project: str,
|
|
275
|
-
endpoint_id: str,
|
|
276
|
-
secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
|
|
277
|
-
) -> dict[str, typing.Any]:
|
|
278
|
-
model_endpoint_store = mlrun.model_monitoring.get_store_object(
|
|
279
|
-
project=project, secret_provider=secret_provider
|
|
280
|
-
)
|
|
281
|
-
return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
|
|
282
|
-
|
|
283
|
-
|
|
284
395
|
def get_result_instance_fqn(
|
|
285
396
|
model_endpoint_id: str, app_name: str, result_name: str
|
|
286
397
|
) -> str:
|
|
@@ -320,33 +431,25 @@ def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
|
|
|
320
431
|
)
|
|
321
432
|
|
|
322
433
|
|
|
323
|
-
def
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
In addition, we verify that the model object is of type `ModelArtifact`.
|
|
434
|
+
def _get_monitoring_schedules_folder_path(project: str) -> str:
|
|
435
|
+
return typing.cast(
|
|
436
|
+
str,
|
|
437
|
+
mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
438
|
+
project=project, kind=mm_constants.FileTargetKind.MONITORING_SCHEDULES
|
|
439
|
+
),
|
|
440
|
+
)
|
|
331
441
|
|
|
332
|
-
:param model_endpoint: An object representing the model endpoint that will be enriched with the model uri.
|
|
333
|
-
:param model_obj: An object representing the model artifact.
|
|
334
442
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
field_name="model_endpoint.spec.model_uri",
|
|
339
|
-
field_value=model_obj,
|
|
340
|
-
expected_type=mlrun.artifacts.ModelArtifact,
|
|
443
|
+
def _get_monitoring_schedules_file_path(*, project: str, endpoint_id: str) -> str:
|
|
444
|
+
return os.path.join(
|
|
445
|
+
_get_monitoring_schedules_folder_path(project), f"{endpoint_id}.json"
|
|
341
446
|
)
|
|
342
447
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
model_endpoint.spec.model_uri = mlrun.datastore.get_store_uri(
|
|
351
|
-
kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
|
|
448
|
+
|
|
449
|
+
def get_monitoring_schedules_data(*, project: str, endpoint_id: str) -> "DataItem":
|
|
450
|
+
"""
|
|
451
|
+
Get the model monitoring schedules' data item of the project's model endpoint.
|
|
452
|
+
"""
|
|
453
|
+
return mlrun.datastore.store_manager.object(
|
|
454
|
+
_get_monitoring_schedules_file_path(project=project, endpoint_id=endpoint_id)
|
|
352
455
|
)
|