mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +23 -21
- mlrun/__main__.py +3 -3
- mlrun/alerts/alert.py +148 -14
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +55 -12
- mlrun/artifacts/dataset.py +16 -16
- mlrun/artifacts/document.py +378 -0
- mlrun/artifacts/manager.py +26 -17
- mlrun/artifacts/model.py +66 -53
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +1 -29
- mlrun/common/runtimes/constants.py +1 -2
- mlrun/common/schemas/__init__.py +6 -2
- mlrun/common/schemas/alert.py +111 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +11 -7
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -3
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +2 -1
- mlrun/common/schemas/model_monitoring/constants.py +67 -14
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +92 -147
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +137 -0
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +25 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +68 -10
- mlrun/data_types/__init__.py +0 -2
- mlrun/data_types/data_types.py +1 -0
- mlrun/data_types/infer.py +3 -1
- mlrun/data_types/spark.py +5 -3
- mlrun/data_types/to_pandas.py +11 -2
- mlrun/datastore/__init__.py +2 -2
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +12 -4
- mlrun/datastore/datastore.py +9 -3
- mlrun/datastore/datastore_profile.py +79 -20
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +4 -1
- mlrun/datastore/sources.py +52 -51
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/targets.py +23 -22
- mlrun/datastore/utils.py +2 -2
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +229 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +213 -83
- mlrun/db/factory.py +0 -3
- mlrun/db/httpdb.py +1265 -387
- mlrun/db/nopdb.py +205 -74
- mlrun/errors.py +2 -2
- mlrun/execution.py +136 -50
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +41 -40
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +27 -24
- mlrun/feature_store/retrieval/base.py +14 -9
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/steps.py +2 -2
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +29 -27
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +4 -3
- mlrun/model.py +117 -46
- mlrun/model_monitoring/__init__.py +4 -4
- mlrun/model_monitoring/api.py +72 -59
- mlrun/model_monitoring/applications/_application_steps.py +17 -17
- mlrun/model_monitoring/applications/base.py +165 -6
- mlrun/model_monitoring/applications/context.py +88 -37
- mlrun/model_monitoring/applications/evidently_base.py +0 -1
- mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
- mlrun/model_monitoring/applications/results.py +55 -3
- mlrun/model_monitoring/controller.py +207 -239
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +156 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/base.py +78 -25
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
- mlrun/model_monitoring/helpers.py +151 -49
- mlrun/model_monitoring/stream_processing.py +99 -283
- mlrun/model_monitoring/tracking_policy.py +10 -3
- mlrun/model_monitoring/writer.py +48 -36
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +31 -14
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +27 -27
- mlrun/projects/pipelines.py +71 -36
- mlrun/projects/project.py +890 -220
- mlrun/run.py +53 -10
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +15 -11
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/generators.py +2 -1
- mlrun/runtimes/kubejob.py +4 -5
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -11
- mlrun/runtimes/nuclio/function.py +19 -17
- mlrun/runtimes/nuclio/serving.py +18 -13
- mlrun/runtimes/pod.py +154 -45
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +21 -11
- mlrun/runtimes/utils.py +6 -5
- mlrun/serving/merger.py +6 -4
- mlrun/serving/remote.py +18 -17
- mlrun/serving/routers.py +185 -172
- mlrun/serving/server.py +7 -1
- mlrun/serving/states.py +97 -78
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +105 -72
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/clones.py +1 -1
- mlrun/utils/helpers.py +63 -19
- mlrun/utils/logger.py +106 -4
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +33 -14
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +6 -6
- mlrun/utils/notifications/notification_pusher.py +86 -44
- mlrun/utils/regex.py +11 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/METADATA +29 -24
- mlrun-1.8.0rc11.dist-info/RECORD +347 -0
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.1rc10.dist-info/RECORD +0 -351
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/LICENSE +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/WHEEL +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/top_level.txt +0 -0
|
@@ -13,12 +13,14 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
|
+
from datetime import datetime
|
|
16
17
|
|
|
17
18
|
import mlrun.feature_store.steps
|
|
18
19
|
from mlrun.common.schemas.model_monitoring import (
|
|
19
20
|
EventFieldType,
|
|
20
21
|
EventKeyMetrics,
|
|
21
22
|
)
|
|
23
|
+
from mlrun.utils import logger
|
|
22
24
|
|
|
23
25
|
|
|
24
26
|
class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
|
|
@@ -40,3 +42,34 @@ class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
|
|
|
40
42
|
event[EventFieldType.TABLE_COLUMN] = "_" + event.get(EventFieldType.ENDPOINT_ID)
|
|
41
43
|
|
|
42
44
|
return event
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ErrorExtractor(mlrun.feature_store.steps.MapClass):
|
|
48
|
+
def __init__(self, **kwargs):
|
|
49
|
+
"""
|
|
50
|
+
Prepare the event for insertion into the TDEngine error table
|
|
51
|
+
"""
|
|
52
|
+
super().__init__(**kwargs)
|
|
53
|
+
|
|
54
|
+
def do(self, event):
|
|
55
|
+
error = str(event.get("error"))
|
|
56
|
+
if len(error) > 1000:
|
|
57
|
+
error = error[-1000:]
|
|
58
|
+
logger.warning(
|
|
59
|
+
f"Error message exceeds 1000 chars: The error message writen to TSDB will be it last "
|
|
60
|
+
f"1000 chars, Error: {error}",
|
|
61
|
+
event=event,
|
|
62
|
+
)
|
|
63
|
+
timestamp = datetime.fromisoformat(event.get("when"))
|
|
64
|
+
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
65
|
+
event = {
|
|
66
|
+
EventFieldType.MODEL_ERROR: error,
|
|
67
|
+
EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
|
|
68
|
+
EventFieldType.ENDPOINT_ID: endpoint_id,
|
|
69
|
+
EventFieldType.TIME: timestamp,
|
|
70
|
+
EventFieldType.PROJECT: event[EventFieldType.FUNCTION_URI].split("/")[0],
|
|
71
|
+
EventFieldType.TABLE_COLUMN: "_err_"
|
|
72
|
+
+ event.get(EventFieldType.ENDPOINT_ID),
|
|
73
|
+
}
|
|
74
|
+
logger.info("Write error to errors TSDB table", event=event)
|
|
75
|
+
return event
|
|
@@ -13,8 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import typing
|
|
16
|
-
from datetime import datetime
|
|
17
|
-
from typing import Union
|
|
16
|
+
from datetime import datetime, timedelta, timezone
|
|
18
17
|
|
|
19
18
|
import pandas as pd
|
|
20
19
|
import taosws
|
|
@@ -90,6 +89,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
90
89
|
mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
|
|
91
90
|
project=self.project, database=self.database
|
|
92
91
|
),
|
|
92
|
+
mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
|
|
93
|
+
project=self.project, database=self.database
|
|
94
|
+
),
|
|
93
95
|
}
|
|
94
96
|
|
|
95
97
|
def create_tables(self):
|
|
@@ -122,7 +124,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
122
124
|
table_name = (
|
|
123
125
|
f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
|
|
124
126
|
).replace("-", "_")
|
|
125
|
-
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
126
127
|
|
|
127
128
|
else:
|
|
128
129
|
# Write a new metric
|
|
@@ -163,7 +164,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
163
164
|
def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
|
|
164
165
|
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
165
166
|
|
|
166
|
-
def apply_monitoring_stream_steps(self, graph):
|
|
167
|
+
def apply_monitoring_stream_steps(self, graph, **kwarg):
|
|
167
168
|
"""
|
|
168
169
|
Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
|
|
169
170
|
different key metric dictionaries. This data is being used by the monitoring dashboards in
|
|
@@ -196,7 +197,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
196
197
|
mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
|
|
197
198
|
],
|
|
198
199
|
tag_cols=[
|
|
199
|
-
mm_schemas.EventFieldType.PROJECT,
|
|
200
200
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
201
201
|
],
|
|
202
202
|
max_events=1000,
|
|
@@ -209,8 +209,37 @@ class TDEngineConnector(TSDBConnector):
|
|
|
209
209
|
after="ProcessBeforeTDEngine",
|
|
210
210
|
)
|
|
211
211
|
|
|
212
|
-
def handle_model_error(
|
|
213
|
-
|
|
212
|
+
def handle_model_error(
|
|
213
|
+
self,
|
|
214
|
+
graph,
|
|
215
|
+
tsdb_batching_max_events: int = 1000,
|
|
216
|
+
tsdb_batching_timeout_secs: int = 30,
|
|
217
|
+
**kwargs,
|
|
218
|
+
) -> None:
|
|
219
|
+
graph.add_step(
|
|
220
|
+
"mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ErrorExtractor",
|
|
221
|
+
name="error_extractor",
|
|
222
|
+
after="ForwardError",
|
|
223
|
+
)
|
|
224
|
+
graph.add_step(
|
|
225
|
+
"storey.TDEngineTarget",
|
|
226
|
+
name="tsdb_error",
|
|
227
|
+
after="error_extractor",
|
|
228
|
+
url=self._tdengine_connection_string,
|
|
229
|
+
supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
230
|
+
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
231
|
+
time_col=mm_schemas.EventFieldType.TIME,
|
|
232
|
+
database=self.database,
|
|
233
|
+
columns=[
|
|
234
|
+
mm_schemas.EventFieldType.MODEL_ERROR,
|
|
235
|
+
],
|
|
236
|
+
tag_cols=[
|
|
237
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
238
|
+
mm_schemas.EventFieldType.ERROR_TYPE,
|
|
239
|
+
],
|
|
240
|
+
max_events=tsdb_batching_max_events,
|
|
241
|
+
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
242
|
+
)
|
|
214
243
|
|
|
215
244
|
def delete_tsdb_resources(self):
|
|
216
245
|
"""
|
|
@@ -265,6 +294,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
265
294
|
limit: typing.Optional[int] = None,
|
|
266
295
|
sliding_window_step: typing.Optional[str] = None,
|
|
267
296
|
timestamp_column: str = mm_schemas.EventFieldType.TIME,
|
|
297
|
+
group_by: typing.Optional[typing.Union[list[str], str]] = None,
|
|
298
|
+
preform_agg_columns: typing.Optional[list] = None,
|
|
299
|
+
order_by: typing.Optional[str] = None,
|
|
300
|
+
desc: typing.Optional[bool] = None,
|
|
268
301
|
) -> pd.DataFrame:
|
|
269
302
|
"""
|
|
270
303
|
Getting records from TSDB data collection.
|
|
@@ -284,6 +317,14 @@ class TDEngineConnector(TSDBConnector):
|
|
|
284
317
|
`sliding_window_step` is provided, interval must be provided as well. Provided
|
|
285
318
|
as a string in the format of '1m', '1h', etc.
|
|
286
319
|
:param timestamp_column: The column name that holds the timestamp index.
|
|
320
|
+
:param group_by: The column name to group by. Note that if `group_by` is provided, aggregation
|
|
321
|
+
functions must bg provided
|
|
322
|
+
:param preform_agg_columns: The columns to preform aggregation on.
|
|
323
|
+
notice that all aggregation functions provided will preform on those columns.
|
|
324
|
+
If not provided The default behavior is to preform on all columns in columns,
|
|
325
|
+
if an empty list was provided The aggregation won't be performed.
|
|
326
|
+
:param order_by: The column or alias to preform ordering on the query.
|
|
327
|
+
:param desc: Whether or not to sort the results in descending order.
|
|
287
328
|
|
|
288
329
|
:return: DataFrame with the provided attributes from the data collection.
|
|
289
330
|
:raise: MLRunInvalidArgumentError if query the provided table failed.
|
|
@@ -301,6 +342,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
301
342
|
sliding_window_step=sliding_window_step,
|
|
302
343
|
timestamp_column=timestamp_column,
|
|
303
344
|
database=self.database,
|
|
345
|
+
group_by=group_by,
|
|
346
|
+
preform_agg_funcs_columns=preform_agg_columns,
|
|
347
|
+
order_by=order_by,
|
|
348
|
+
desc=desc,
|
|
304
349
|
)
|
|
305
350
|
logger.debug("Querying TDEngine", query=full_query)
|
|
306
351
|
try:
|
|
@@ -323,6 +368,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
323
368
|
end: datetime,
|
|
324
369
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
325
370
|
type: typing.Literal["metrics", "results"],
|
|
371
|
+
with_result_extra_data: bool = False,
|
|
326
372
|
) -> typing.Union[
|
|
327
373
|
list[
|
|
328
374
|
typing.Union[
|
|
@@ -340,6 +386,12 @@ class TDEngineConnector(TSDBConnector):
|
|
|
340
386
|
timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
|
|
341
387
|
columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
342
388
|
if type == "metrics":
|
|
389
|
+
if with_result_extra_data:
|
|
390
|
+
logger.warning(
|
|
391
|
+
"The 'with_result_extra_data' parameter is not supported for metrics, just for results",
|
|
392
|
+
project=self.project,
|
|
393
|
+
endpoint_id=endpoint_id,
|
|
394
|
+
)
|
|
343
395
|
table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
|
|
344
396
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
345
397
|
columns += [name, mm_schemas.MetricData.METRIC_VALUE]
|
|
@@ -353,6 +405,8 @@ class TDEngineConnector(TSDBConnector):
|
|
|
353
405
|
mm_schemas.ResultData.RESULT_STATUS,
|
|
354
406
|
mm_schemas.ResultData.RESULT_KIND,
|
|
355
407
|
]
|
|
408
|
+
if with_result_extra_data:
|
|
409
|
+
columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
|
|
356
410
|
df_handler = self.df_to_results_values
|
|
357
411
|
else:
|
|
358
412
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -389,6 +443,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
389
443
|
is_empty=df.empty,
|
|
390
444
|
)
|
|
391
445
|
|
|
446
|
+
if not with_result_extra_data and type == "results":
|
|
447
|
+
# Set the extra data to an empty string if it's not requested
|
|
448
|
+
df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
|
|
449
|
+
|
|
392
450
|
return df_handler(df=df, metrics=metrics, project=self.project)
|
|
393
451
|
|
|
394
452
|
def read_predictions(
|
|
@@ -452,51 +510,219 @@ class TDEngineConnector(TSDBConnector):
|
|
|
452
510
|
|
|
453
511
|
def get_last_request(
|
|
454
512
|
self,
|
|
455
|
-
endpoint_ids: Union[str, list[str]],
|
|
456
|
-
start:
|
|
457
|
-
end:
|
|
513
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
514
|
+
start: typing.Optional[datetime] = None,
|
|
515
|
+
end: typing.Optional[datetime] = None,
|
|
458
516
|
) -> pd.DataFrame:
|
|
459
|
-
|
|
517
|
+
endpoint_ids = (
|
|
518
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
519
|
+
)
|
|
520
|
+
start, end = self._get_start_end(start, end)
|
|
521
|
+
df = self._get_records(
|
|
522
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
523
|
+
start=start,
|
|
524
|
+
end=end,
|
|
525
|
+
columns=[
|
|
526
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
527
|
+
mm_schemas.EventFieldType.TIME,
|
|
528
|
+
mm_schemas.EventFieldType.LATENCY,
|
|
529
|
+
],
|
|
530
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
531
|
+
timestamp_column=mm_schemas.EventFieldType.TIME,
|
|
532
|
+
agg_funcs=["last"],
|
|
533
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
534
|
+
preform_agg_columns=[mm_schemas.EventFieldType.TIME],
|
|
535
|
+
)
|
|
536
|
+
if not df.empty:
|
|
537
|
+
df.dropna(inplace=True)
|
|
538
|
+
df.rename(
|
|
539
|
+
columns={
|
|
540
|
+
f"last({mm_schemas.EventFieldType.TIME})": mm_schemas.EventFieldType.LAST_REQUEST,
|
|
541
|
+
f"{mm_schemas.EventFieldType.LATENCY}": "last_latency",
|
|
542
|
+
},
|
|
543
|
+
inplace=True,
|
|
544
|
+
)
|
|
545
|
+
df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
|
|
546
|
+
mm_schemas.EventFieldType.LAST_REQUEST
|
|
547
|
+
].map(
|
|
548
|
+
lambda last_request: datetime.strptime(
|
|
549
|
+
last_request, "%Y-%m-%d %H:%M:%S.%f %z"
|
|
550
|
+
).astimezone(tz=timezone.utc)
|
|
551
|
+
)
|
|
552
|
+
return df
|
|
460
553
|
|
|
461
554
|
def get_drift_status(
|
|
462
555
|
self,
|
|
463
|
-
endpoint_ids: Union[str, list[str]],
|
|
464
|
-
start:
|
|
465
|
-
end:
|
|
556
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
557
|
+
start: typing.Optional[datetime] = None,
|
|
558
|
+
end: typing.Optional[datetime] = None,
|
|
466
559
|
) -> pd.DataFrame:
|
|
467
|
-
|
|
560
|
+
endpoint_ids = (
|
|
561
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
562
|
+
)
|
|
563
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
564
|
+
start, end = self._get_start_end(start, end)
|
|
565
|
+
df = self._get_records(
|
|
566
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
567
|
+
start=start,
|
|
568
|
+
end=end,
|
|
569
|
+
columns=[
|
|
570
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
571
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
572
|
+
],
|
|
573
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
574
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
575
|
+
agg_funcs=["max"],
|
|
576
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
577
|
+
preform_agg_columns=[mm_schemas.ResultData.RESULT_STATUS],
|
|
578
|
+
)
|
|
579
|
+
df.rename(
|
|
580
|
+
columns={
|
|
581
|
+
f"max({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS
|
|
582
|
+
},
|
|
583
|
+
inplace=True,
|
|
584
|
+
)
|
|
585
|
+
if not df.empty:
|
|
586
|
+
df.dropna(inplace=True)
|
|
587
|
+
return df
|
|
468
588
|
|
|
469
589
|
def get_metrics_metadata(
|
|
470
590
|
self,
|
|
471
591
|
endpoint_id: str,
|
|
472
|
-
start:
|
|
473
|
-
end:
|
|
592
|
+
start: typing.Optional[datetime] = None,
|
|
593
|
+
end: typing.Optional[datetime] = None,
|
|
474
594
|
) -> pd.DataFrame:
|
|
475
|
-
|
|
595
|
+
start, end = self._get_start_end(start, end)
|
|
596
|
+
df = self._get_records(
|
|
597
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
|
|
598
|
+
start=start,
|
|
599
|
+
end=end,
|
|
600
|
+
columns=[
|
|
601
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
602
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
603
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
604
|
+
],
|
|
605
|
+
filter_query=f"endpoint_id='{endpoint_id}'",
|
|
606
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
607
|
+
group_by=[
|
|
608
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
609
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
610
|
+
],
|
|
611
|
+
agg_funcs=["last"],
|
|
612
|
+
)
|
|
613
|
+
df.rename(
|
|
614
|
+
columns={
|
|
615
|
+
f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
616
|
+
f"last({mm_schemas.MetricData.METRIC_NAME})": mm_schemas.MetricData.METRIC_NAME,
|
|
617
|
+
f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
618
|
+
},
|
|
619
|
+
inplace=True,
|
|
620
|
+
)
|
|
621
|
+
if not df.empty:
|
|
622
|
+
df.dropna(inplace=True)
|
|
623
|
+
return df
|
|
476
624
|
|
|
477
625
|
def get_results_metadata(
|
|
478
626
|
self,
|
|
479
627
|
endpoint_id: str,
|
|
480
|
-
start:
|
|
481
|
-
end:
|
|
628
|
+
start: typing.Optional[datetime] = None,
|
|
629
|
+
end: typing.Optional[datetime] = None,
|
|
482
630
|
) -> pd.DataFrame:
|
|
483
|
-
|
|
631
|
+
start, end = self._get_start_end(start, end)
|
|
632
|
+
df = self._get_records(
|
|
633
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
634
|
+
start=start,
|
|
635
|
+
end=end,
|
|
636
|
+
columns=[
|
|
637
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
638
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
639
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
640
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
641
|
+
],
|
|
642
|
+
filter_query=f"endpoint_id='{endpoint_id}'",
|
|
643
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
644
|
+
group_by=[
|
|
645
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
646
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
647
|
+
],
|
|
648
|
+
agg_funcs=["last"],
|
|
649
|
+
)
|
|
650
|
+
df.rename(
|
|
651
|
+
columns={
|
|
652
|
+
f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
653
|
+
f"last({mm_schemas.ResultData.RESULT_NAME})": mm_schemas.ResultData.RESULT_NAME,
|
|
654
|
+
f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
|
|
655
|
+
f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
656
|
+
},
|
|
657
|
+
inplace=True,
|
|
658
|
+
)
|
|
659
|
+
if not df.empty:
|
|
660
|
+
df.dropna(inplace=True)
|
|
661
|
+
return df
|
|
484
662
|
|
|
485
663
|
def get_error_count(
|
|
486
664
|
self,
|
|
487
|
-
endpoint_ids: Union[str, list[str]],
|
|
488
|
-
start:
|
|
489
|
-
end:
|
|
665
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
666
|
+
start: typing.Optional[datetime] = None,
|
|
667
|
+
end: typing.Optional[datetime] = None,
|
|
490
668
|
) -> pd.DataFrame:
|
|
491
|
-
|
|
669
|
+
endpoint_ids = (
|
|
670
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
671
|
+
)
|
|
672
|
+
start, end = self._get_start_end(start, end)
|
|
673
|
+
df = self._get_records(
|
|
674
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
675
|
+
start=start,
|
|
676
|
+
end=end,
|
|
677
|
+
columns=[
|
|
678
|
+
mm_schemas.EventFieldType.MODEL_ERROR,
|
|
679
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
680
|
+
],
|
|
681
|
+
agg_funcs=["count"],
|
|
682
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
|
|
683
|
+
f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'",
|
|
684
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
685
|
+
preform_agg_columns=[mm_schemas.EventFieldType.MODEL_ERROR],
|
|
686
|
+
)
|
|
687
|
+
df.rename(
|
|
688
|
+
columns={f"count({mm_schemas.EventFieldType.MODEL_ERROR})": "error_count"},
|
|
689
|
+
inplace=True,
|
|
690
|
+
)
|
|
691
|
+
if not df.empty:
|
|
692
|
+
df.dropna(inplace=True)
|
|
693
|
+
return df
|
|
492
694
|
|
|
493
695
|
def get_avg_latency(
|
|
494
696
|
self,
|
|
495
|
-
endpoint_ids: Union[str, list[str]],
|
|
496
|
-
start:
|
|
497
|
-
end:
|
|
697
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
698
|
+
start: typing.Optional[datetime] = None,
|
|
699
|
+
end: typing.Optional[datetime] = None,
|
|
498
700
|
) -> pd.DataFrame:
|
|
499
|
-
|
|
701
|
+
endpoint_ids = (
|
|
702
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
703
|
+
)
|
|
704
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
705
|
+
start, end = self._get_start_end(start, end)
|
|
706
|
+
df = self._get_records(
|
|
707
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
708
|
+
start=start,
|
|
709
|
+
end=end,
|
|
710
|
+
columns=[
|
|
711
|
+
mm_schemas.EventFieldType.LATENCY,
|
|
712
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
713
|
+
],
|
|
714
|
+
agg_funcs=["avg"],
|
|
715
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
716
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
717
|
+
preform_agg_columns=[mm_schemas.EventFieldType.LATENCY],
|
|
718
|
+
)
|
|
719
|
+
df.rename(
|
|
720
|
+
columns={f"avg({mm_schemas.EventFieldType.LATENCY})": "avg_latency"},
|
|
721
|
+
inplace=True,
|
|
722
|
+
)
|
|
723
|
+
if not df.empty:
|
|
724
|
+
df.dropna(inplace=True)
|
|
725
|
+
return df
|
|
500
726
|
|
|
501
727
|
# Note: this function serves as a reference for checking the TSDB for the existence of a metric.
|
|
502
728
|
#
|
|
@@ -150,6 +150,7 @@ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
|
|
|
150
150
|
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
151
151
|
event = {
|
|
152
152
|
EventFieldType.MODEL_ERROR: str(error),
|
|
153
|
+
EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
|
|
153
154
|
EventFieldType.ENDPOINT_ID: endpoint_id,
|
|
154
155
|
EventFieldType.TIMESTAMP: timestamp,
|
|
155
156
|
EventFieldType.ERROR_COUNT: 1.0,
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from datetime import datetime, timezone
|
|
15
|
+
from datetime import datetime, timedelta, timezone
|
|
16
16
|
from io import StringIO
|
|
17
17
|
from typing import Literal, Optional, Union
|
|
18
18
|
|
|
@@ -168,6 +168,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
168
168
|
tsdb_batching_max_events: int = 1000,
|
|
169
169
|
tsdb_batching_timeout_secs: int = 30,
|
|
170
170
|
sample_window: int = 10,
|
|
171
|
+
aggregate_windows: Optional[list[str]] = None,
|
|
172
|
+
aggregate_period: str = "1m",
|
|
173
|
+
**kwarg,
|
|
171
174
|
):
|
|
172
175
|
"""
|
|
173
176
|
Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
|
|
@@ -178,7 +181,40 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
178
181
|
- endpoint_features (Prediction and feature names and values)
|
|
179
182
|
- custom_metrics (user-defined metrics)
|
|
180
183
|
"""
|
|
184
|
+
aggregate_windows = aggregate_windows or ["5m", "1h"]
|
|
181
185
|
|
|
186
|
+
# Calculate number of predictions and average latency
|
|
187
|
+
def apply_storey_aggregations():
|
|
188
|
+
# Calculate number of predictions for each window (5 min and 1 hour by default)
|
|
189
|
+
graph.add_step(
|
|
190
|
+
class_name="storey.AggregateByKey",
|
|
191
|
+
aggregates=[
|
|
192
|
+
{
|
|
193
|
+
"name": EventFieldType.LATENCY,
|
|
194
|
+
"column": EventFieldType.LATENCY,
|
|
195
|
+
"operations": ["count", "avg"],
|
|
196
|
+
"windows": aggregate_windows,
|
|
197
|
+
"period": aggregate_period,
|
|
198
|
+
}
|
|
199
|
+
],
|
|
200
|
+
name=EventFieldType.LATENCY,
|
|
201
|
+
after="MapFeatureNames",
|
|
202
|
+
step_name="Aggregates",
|
|
203
|
+
table=".",
|
|
204
|
+
key_field=EventFieldType.ENDPOINT_ID,
|
|
205
|
+
)
|
|
206
|
+
# Calculate average latency time for each window (5 min and 1 hour by default)
|
|
207
|
+
graph.add_step(
|
|
208
|
+
class_name="storey.Rename",
|
|
209
|
+
mapping={
|
|
210
|
+
"latency_count_5m": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_5M,
|
|
211
|
+
"latency_count_1h": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_1H,
|
|
212
|
+
},
|
|
213
|
+
name="Rename",
|
|
214
|
+
after=EventFieldType.LATENCY,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
apply_storey_aggregations()
|
|
182
218
|
# Write latency per prediction, labeled by endpoint ID only
|
|
183
219
|
graph.add_step(
|
|
184
220
|
"storey.TSDBTarget",
|
|
@@ -310,6 +346,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
310
346
|
],
|
|
311
347
|
index_cols=[
|
|
312
348
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
349
|
+
mm_schemas.EventFieldType.ERROR_TYPE,
|
|
313
350
|
],
|
|
314
351
|
max_events=tsdb_batching_max_events,
|
|
315
352
|
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
@@ -338,9 +375,6 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
338
375
|
elif kind == mm_schemas.WriterEventKind.RESULT:
|
|
339
376
|
table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
340
377
|
index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
|
|
341
|
-
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
342
|
-
# TODO: remove this when extra data is supported (ML-7460)
|
|
343
|
-
event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
|
|
344
378
|
else:
|
|
345
379
|
raise ValueError(f"Invalid {kind = }")
|
|
346
380
|
|
|
@@ -544,6 +578,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
544
578
|
end: datetime,
|
|
545
579
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
546
580
|
type: Literal["metrics", "results"] = "results",
|
|
581
|
+
with_result_extra_data: bool = False,
|
|
547
582
|
) -> Union[
|
|
548
583
|
list[
|
|
549
584
|
Union[
|
|
@@ -565,6 +600,12 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
565
600
|
"""
|
|
566
601
|
|
|
567
602
|
if type == "metrics":
|
|
603
|
+
if with_result_extra_data:
|
|
604
|
+
logger.warning(
|
|
605
|
+
"The 'with_result_extra_data' parameter is not supported for metrics, just for results",
|
|
606
|
+
project=self.project,
|
|
607
|
+
endpoint_id=endpoint_id,
|
|
608
|
+
)
|
|
568
609
|
table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
|
|
569
610
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
570
611
|
columns = [mm_schemas.MetricData.METRIC_VALUE]
|
|
@@ -577,6 +618,8 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
577
618
|
mm_schemas.ResultData.RESULT_STATUS,
|
|
578
619
|
mm_schemas.ResultData.RESULT_KIND,
|
|
579
620
|
]
|
|
621
|
+
if with_result_extra_data:
|
|
622
|
+
columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
|
|
580
623
|
df_handler = self.df_to_results_values
|
|
581
624
|
else:
|
|
582
625
|
raise ValueError(f"Invalid {type = }")
|
|
@@ -605,6 +648,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
605
648
|
endpoint_id=endpoint_id,
|
|
606
649
|
is_empty=df.empty,
|
|
607
650
|
)
|
|
651
|
+
if not with_result_extra_data and type == "results":
|
|
652
|
+
# Set the extra data to an empty string if it's not requested
|
|
653
|
+
df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
|
|
608
654
|
|
|
609
655
|
return df_handler(df=df, metrics=metrics, project=self.project)
|
|
610
656
|
|
|
@@ -700,12 +746,13 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
700
746
|
def get_last_request(
|
|
701
747
|
self,
|
|
702
748
|
endpoint_ids: Union[str, list[str]],
|
|
703
|
-
start:
|
|
704
|
-
end:
|
|
749
|
+
start: Optional[datetime] = None,
|
|
750
|
+
end: Optional[datetime] = None,
|
|
705
751
|
) -> pd.DataFrame:
|
|
706
752
|
endpoint_ids = (
|
|
707
753
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
708
754
|
)
|
|
755
|
+
start, end = self._get_start_end(start, end)
|
|
709
756
|
df = self._get_records(
|
|
710
757
|
table=mm_schemas.FileTargetKind.PREDICTIONS,
|
|
711
758
|
start=start,
|
|
@@ -734,12 +781,14 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
734
781
|
def get_drift_status(
|
|
735
782
|
self,
|
|
736
783
|
endpoint_ids: Union[str, list[str]],
|
|
737
|
-
start:
|
|
738
|
-
end:
|
|
784
|
+
start: Optional[datetime] = None,
|
|
785
|
+
end: Optional[datetime] = None,
|
|
739
786
|
) -> pd.DataFrame:
|
|
740
787
|
endpoint_ids = (
|
|
741
788
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
742
789
|
)
|
|
790
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
791
|
+
start, end = self._get_start_end(start, end)
|
|
743
792
|
df = self._get_records(
|
|
744
793
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
745
794
|
start=start,
|
|
@@ -758,9 +807,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
758
807
|
def get_metrics_metadata(
|
|
759
808
|
self,
|
|
760
809
|
endpoint_id: str,
|
|
761
|
-
start:
|
|
762
|
-
end:
|
|
810
|
+
start: Optional[datetime] = None,
|
|
811
|
+
end: Optional[datetime] = None,
|
|
763
812
|
) -> pd.DataFrame:
|
|
813
|
+
start, end = self._get_start_end(start, end)
|
|
764
814
|
df = self._get_records(
|
|
765
815
|
table=mm_schemas.V3IOTSDBTables.METRICS,
|
|
766
816
|
start=start,
|
|
@@ -778,9 +828,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
778
828
|
def get_results_metadata(
|
|
779
829
|
self,
|
|
780
830
|
endpoint_id: str,
|
|
781
|
-
start:
|
|
782
|
-
end:
|
|
831
|
+
start: Optional[datetime] = None,
|
|
832
|
+
end: Optional[datetime] = None,
|
|
783
833
|
) -> pd.DataFrame:
|
|
834
|
+
start, end = self._get_start_end(start, end)
|
|
784
835
|
df = self._get_records(
|
|
785
836
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
786
837
|
start=start,
|
|
@@ -803,18 +854,20 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
803
854
|
def get_error_count(
|
|
804
855
|
self,
|
|
805
856
|
endpoint_ids: Union[str, list[str]],
|
|
806
|
-
start:
|
|
807
|
-
end:
|
|
857
|
+
start: Optional[datetime] = None,
|
|
858
|
+
end: Optional[datetime] = None,
|
|
808
859
|
) -> pd.DataFrame:
|
|
809
860
|
endpoint_ids = (
|
|
810
861
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
811
862
|
)
|
|
863
|
+
start, end = self._get_start_end(start, end)
|
|
812
864
|
df = self._get_records(
|
|
813
865
|
table=mm_schemas.FileTargetKind.ERRORS,
|
|
814
866
|
start=start,
|
|
815
867
|
end=end,
|
|
816
868
|
columns=[mm_schemas.EventFieldType.ERROR_COUNT],
|
|
817
|
-
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})"
|
|
869
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
|
|
870
|
+
f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'",
|
|
818
871
|
agg_funcs=["count"],
|
|
819
872
|
)
|
|
820
873
|
if not df.empty:
|
|
@@ -830,12 +883,14 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
830
883
|
def get_avg_latency(
|
|
831
884
|
self,
|
|
832
885
|
endpoint_ids: Union[str, list[str]],
|
|
833
|
-
start:
|
|
834
|
-
end:
|
|
886
|
+
start: Optional[datetime] = None,
|
|
887
|
+
end: Optional[datetime] = None,
|
|
835
888
|
) -> pd.DataFrame:
|
|
836
889
|
endpoint_ids = (
|
|
837
890
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
838
891
|
)
|
|
892
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
893
|
+
start, end = self._get_start_end(start, end)
|
|
839
894
|
df = self._get_records(
|
|
840
895
|
table=mm_schemas.FileTargetKind.PREDICTIONS,
|
|
841
896
|
start=start,
|
|
@@ -846,4 +901,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
846
901
|
)
|
|
847
902
|
if not df.empty:
|
|
848
903
|
df.dropna(inplace=True)
|
|
904
|
+
df.rename(
|
|
905
|
+
columns={
|
|
906
|
+
f"avg({mm_schemas.EventFieldType.LATENCY})": f"avg_{mm_schemas.EventFieldType.LATENCY}"
|
|
907
|
+
},
|
|
908
|
+
inplace=True,
|
|
909
|
+
)
|
|
849
910
|
return df.reset_index(drop=True)
|