mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +18 -18
- mlrun/__main__.py +3 -3
- mlrun/alerts/alert.py +19 -12
- mlrun/artifacts/__init__.py +0 -2
- mlrun/artifacts/base.py +34 -11
- mlrun/artifacts/dataset.py +16 -16
- mlrun/artifacts/manager.py +13 -13
- mlrun/artifacts/model.py +66 -53
- mlrun/common/constants.py +6 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/common/formatters/model_endpoint.py +30 -0
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -3
- mlrun/common/model_monitoring/helpers.py +1 -1
- mlrun/common/runtimes/constants.py +1 -2
- mlrun/common/schemas/__init__.py +7 -2
- mlrun/common/schemas/alert.py +31 -18
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +7 -13
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -2
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +8 -1
- mlrun/common/schemas/model_monitoring/constants.py +62 -12
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +22 -6
- mlrun/common/schemas/notification.py +18 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +137 -0
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +22 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +65 -15
- mlrun/data_types/__init__.py +0 -2
- mlrun/data_types/data_types.py +0 -1
- mlrun/data_types/infer.py +3 -1
- mlrun/data_types/spark.py +4 -4
- mlrun/data_types/to_pandas.py +2 -11
- mlrun/datastore/__init__.py +0 -2
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +12 -4
- mlrun/datastore/datastore.py +9 -3
- mlrun/datastore/datastore_profile.py +20 -20
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +4 -1
- mlrun/datastore/sources.py +51 -49
- mlrun/datastore/store_resources.py +0 -2
- mlrun/datastore/targets.py +22 -23
- mlrun/datastore/utils.py +2 -2
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +170 -64
- mlrun/db/factory.py +3 -0
- mlrun/db/httpdb.py +986 -238
- mlrun/db/nopdb.py +155 -57
- mlrun/errors.py +2 -2
- mlrun/execution.py +55 -29
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +40 -40
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +27 -24
- mlrun/feature_store/retrieval/base.py +14 -9
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/steps.py +2 -2
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +29 -27
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +4 -3
- mlrun/model.py +110 -46
- mlrun/model_monitoring/__init__.py +1 -2
- mlrun/model_monitoring/api.py +6 -6
- mlrun/model_monitoring/applications/_application_steps.py +13 -15
- mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
- mlrun/model_monitoring/applications/results.py +55 -3
- mlrun/model_monitoring/controller.py +185 -223
- mlrun/model_monitoring/db/_schedules.py +156 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/stores/__init__.py +1 -1
- mlrun/model_monitoring/db/stores/base/store.py +6 -65
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
- mlrun/model_monitoring/db/tsdb/base.py +76 -24
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +253 -28
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
- mlrun/model_monitoring/helpers.py +91 -1
- mlrun/model_monitoring/model_endpoint.py +4 -2
- mlrun/model_monitoring/stream_processing.py +16 -13
- mlrun/model_monitoring/tracking_policy.py +10 -3
- mlrun/model_monitoring/writer.py +47 -26
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +31 -14
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +3 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +27 -27
- mlrun/projects/pipelines.py +34 -35
- mlrun/projects/project.py +535 -182
- mlrun/run.py +13 -10
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +15 -11
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/generators.py +2 -1
- mlrun/runtimes/kubejob.py +4 -5
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -11
- mlrun/runtimes/nuclio/function.py +13 -13
- mlrun/runtimes/nuclio/serving.py +9 -9
- mlrun/runtimes/pod.py +154 -45
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +21 -11
- mlrun/runtimes/utils.py +6 -5
- mlrun/serving/merger.py +6 -4
- mlrun/serving/remote.py +18 -17
- mlrun/serving/routers.py +27 -27
- mlrun/serving/server.py +1 -1
- mlrun/serving/states.py +76 -71
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +4 -4
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/helpers.py +70 -16
- mlrun/utils/logger.py +106 -4
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +33 -14
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +149 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +18 -22
- mlrun/utils/notifications/notification_pusher.py +43 -31
- mlrun/utils/regex.py +3 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/METADATA +18 -14
- mlrun-1.8.0rc2.dist-info/RECORD +358 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/WHEEL +1 -1
- mlrun-1.7.2rc3.dist-info/RECORD +0 -351
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -13,8 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import typing
|
|
16
|
-
from datetime import datetime
|
|
17
|
-
from typing import Union
|
|
16
|
+
from datetime import datetime, timedelta, timezone
|
|
18
17
|
|
|
19
18
|
import pandas as pd
|
|
20
19
|
import taosws
|
|
@@ -90,6 +89,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
90
89
|
mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
|
|
91
90
|
project=self.project, database=self.database
|
|
92
91
|
),
|
|
92
|
+
mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
|
|
93
|
+
project=self.project, database=self.database
|
|
94
|
+
),
|
|
93
95
|
}
|
|
94
96
|
|
|
95
97
|
def create_tables(self):
|
|
@@ -122,7 +124,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
122
124
|
table_name = (
|
|
123
125
|
f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
|
|
124
126
|
).replace("-", "_")
|
|
125
|
-
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
126
127
|
|
|
127
128
|
else:
|
|
128
129
|
# Write a new metric
|
|
@@ -196,7 +197,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
196
197
|
mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
|
|
197
198
|
],
|
|
198
199
|
tag_cols=[
|
|
199
|
-
mm_schemas.EventFieldType.PROJECT,
|
|
200
200
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
201
201
|
],
|
|
202
202
|
max_events=1000,
|
|
@@ -209,8 +209,37 @@ class TDEngineConnector(TSDBConnector):
|
|
|
209
209
|
after="ProcessBeforeTDEngine",
|
|
210
210
|
)
|
|
211
211
|
|
|
212
|
-
def handle_model_error(
|
|
213
|
-
|
|
212
|
+
def handle_model_error(
|
|
213
|
+
self,
|
|
214
|
+
graph,
|
|
215
|
+
tsdb_batching_max_events: int = 1000,
|
|
216
|
+
tsdb_batching_timeout_secs: int = 30,
|
|
217
|
+
**kwargs,
|
|
218
|
+
) -> None:
|
|
219
|
+
graph.add_step(
|
|
220
|
+
"mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ErrorExtractor",
|
|
221
|
+
name="error_extractor",
|
|
222
|
+
after="ForwardError",
|
|
223
|
+
)
|
|
224
|
+
graph.add_step(
|
|
225
|
+
"storey.TDEngineTarget",
|
|
226
|
+
name="tsdb_error",
|
|
227
|
+
after="error_extractor",
|
|
228
|
+
url=self._tdengine_connection_string,
|
|
229
|
+
supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
230
|
+
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
231
|
+
time_col=mm_schemas.EventFieldType.TIME,
|
|
232
|
+
database=self.database,
|
|
233
|
+
columns=[
|
|
234
|
+
mm_schemas.EventFieldType.MODEL_ERROR,
|
|
235
|
+
],
|
|
236
|
+
tag_cols=[
|
|
237
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
238
|
+
mm_schemas.EventFieldType.ERROR_TYPE,
|
|
239
|
+
],
|
|
240
|
+
max_events=tsdb_batching_max_events,
|
|
241
|
+
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
242
|
+
)
|
|
214
243
|
|
|
215
244
|
def delete_tsdb_resources(self):
|
|
216
245
|
"""
|
|
@@ -265,6 +294,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
265
294
|
limit: typing.Optional[int] = None,
|
|
266
295
|
sliding_window_step: typing.Optional[str] = None,
|
|
267
296
|
timestamp_column: str = mm_schemas.EventFieldType.TIME,
|
|
297
|
+
group_by: typing.Optional[typing.Union[list[str], str]] = None,
|
|
298
|
+
preform_agg_columns: typing.Optional[list] = None,
|
|
299
|
+
order_by: typing.Optional[str] = None,
|
|
300
|
+
desc: typing.Optional[bool] = None,
|
|
268
301
|
) -> pd.DataFrame:
|
|
269
302
|
"""
|
|
270
303
|
Getting records from TSDB data collection.
|
|
@@ -284,6 +317,14 @@ class TDEngineConnector(TSDBConnector):
|
|
|
284
317
|
`sliding_window_step` is provided, interval must be provided as well. Provided
|
|
285
318
|
as a string in the format of '1m', '1h', etc.
|
|
286
319
|
:param timestamp_column: The column name that holds the timestamp index.
|
|
320
|
+
:param group_by: The column name to group by. Note that if `group_by` is provided, aggregation
|
|
321
|
+
functions must bg provided
|
|
322
|
+
:param preform_agg_columns: The columns to preform aggregation on.
|
|
323
|
+
notice that all aggregation functions provided will preform on those columns.
|
|
324
|
+
If not provided The default behavior is to preform on all columns in columns,
|
|
325
|
+
if an empty list was provided The aggregation won't be performed.
|
|
326
|
+
:param order_by: The column or alias to preform ordering on the query.
|
|
327
|
+
:param desc: Whether or not to sort the results in descending order.
|
|
287
328
|
|
|
288
329
|
:return: DataFrame with the provided attributes from the data collection.
|
|
289
330
|
:raise: MLRunInvalidArgumentError if query the provided table failed.
|
|
@@ -301,6 +342,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
301
342
|
sliding_window_step=sliding_window_step,
|
|
302
343
|
timestamp_column=timestamp_column,
|
|
303
344
|
database=self.database,
|
|
345
|
+
group_by=group_by,
|
|
346
|
+
preform_agg_funcs_columns=preform_agg_columns,
|
|
347
|
+
order_by=order_by,
|
|
348
|
+
desc=desc,
|
|
304
349
|
)
|
|
305
350
|
logger.debug("Querying TDEngine", query=full_query)
|
|
306
351
|
try:
|
|
@@ -323,6 +368,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
323
368
|
end: datetime,
|
|
324
369
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
325
370
|
type: typing.Literal["metrics", "results"],
|
|
371
|
+
with_result_extra_data: bool = False,
|
|
326
372
|
) -> typing.Union[
|
|
327
373
|
list[
|
|
328
374
|
typing.Union[
|
|
@@ -340,6 +386,12 @@ class TDEngineConnector(TSDBConnector):
|
|
|
340
386
|
timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
|
|
341
387
|
columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
342
388
|
if type == "metrics":
|
|
389
|
+
if with_result_extra_data:
|
|
390
|
+
logger.warning(
|
|
391
|
+
"The 'with_result_extra_data' parameter is not supported for metrics, just for results",
|
|
392
|
+
project=self.project,
|
|
393
|
+
endpoint_id=endpoint_id,
|
|
394
|
+
)
|
|
343
395
|
table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
|
|
344
396
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
345
397
|
columns += [name, mm_schemas.MetricData.METRIC_VALUE]
|
|
@@ -353,6 +405,8 @@ class TDEngineConnector(TSDBConnector):
|
|
|
353
405
|
mm_schemas.ResultData.RESULT_STATUS,
|
|
354
406
|
mm_schemas.ResultData.RESULT_KIND,
|
|
355
407
|
]
|
|
408
|
+
if with_result_extra_data:
|
|
409
|
+
columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
|
|
356
410
|
df_handler = self.df_to_results_values
|
|
357
411
|
else:
|
|
358
412
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -389,6 +443,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
389
443
|
is_empty=df.empty,
|
|
390
444
|
)
|
|
391
445
|
|
|
446
|
+
if not with_result_extra_data and type == "results":
|
|
447
|
+
# Set the extra data to an empty string if it's not requested
|
|
448
|
+
df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
|
|
449
|
+
|
|
392
450
|
return df_handler(df=df, metrics=metrics, project=self.project)
|
|
393
451
|
|
|
394
452
|
def read_predictions(
|
|
@@ -452,51 +510,218 @@ class TDEngineConnector(TSDBConnector):
|
|
|
452
510
|
|
|
453
511
|
def get_last_request(
|
|
454
512
|
self,
|
|
455
|
-
endpoint_ids: Union[str, list[str]],
|
|
456
|
-
start:
|
|
457
|
-
end:
|
|
513
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
514
|
+
start: typing.Optional[datetime] = None,
|
|
515
|
+
end: typing.Optional[datetime] = None,
|
|
458
516
|
) -> pd.DataFrame:
|
|
459
|
-
|
|
517
|
+
endpoint_ids = (
|
|
518
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
519
|
+
)
|
|
520
|
+
start, end = self._get_start_end(start, end)
|
|
521
|
+
df = self._get_records(
|
|
522
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
523
|
+
start=start,
|
|
524
|
+
end=end,
|
|
525
|
+
columns=[
|
|
526
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
527
|
+
mm_schemas.EventFieldType.TIME,
|
|
528
|
+
mm_schemas.EventFieldType.LATENCY,
|
|
529
|
+
],
|
|
530
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
531
|
+
timestamp_column=mm_schemas.EventFieldType.TIME,
|
|
532
|
+
agg_funcs=["last"],
|
|
533
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
534
|
+
preform_agg_columns=[mm_schemas.EventFieldType.TIME],
|
|
535
|
+
)
|
|
536
|
+
if not df.empty:
|
|
537
|
+
df.dropna(inplace=True)
|
|
538
|
+
df.rename(
|
|
539
|
+
columns={
|
|
540
|
+
f"last({mm_schemas.EventFieldType.TIME})": mm_schemas.EventFieldType.LAST_REQUEST,
|
|
541
|
+
f"{mm_schemas.EventFieldType.LATENCY}": "last_latency",
|
|
542
|
+
},
|
|
543
|
+
inplace=True,
|
|
544
|
+
)
|
|
545
|
+
df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
|
|
546
|
+
mm_schemas.EventFieldType.LAST_REQUEST
|
|
547
|
+
].map(
|
|
548
|
+
lambda last_request: datetime.strptime(
|
|
549
|
+
last_request, "%Y-%m-%d %H:%M:%S.%f %z"
|
|
550
|
+
).astimezone(tz=timezone.utc)
|
|
551
|
+
)
|
|
552
|
+
return df
|
|
460
553
|
|
|
461
554
|
def get_drift_status(
|
|
462
555
|
self,
|
|
463
|
-
endpoint_ids: Union[str, list[str]],
|
|
464
|
-
start:
|
|
465
|
-
end:
|
|
556
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
557
|
+
start: typing.Optional[datetime] = None,
|
|
558
|
+
end: typing.Optional[datetime] = None,
|
|
466
559
|
) -> pd.DataFrame:
|
|
467
|
-
|
|
560
|
+
endpoint_ids = (
|
|
561
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
562
|
+
)
|
|
563
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
564
|
+
start, end = self._get_start_end(start, end)
|
|
565
|
+
df = self._get_records(
|
|
566
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
567
|
+
start=start,
|
|
568
|
+
end=end,
|
|
569
|
+
columns=[
|
|
570
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
571
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
572
|
+
],
|
|
573
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
574
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
575
|
+
agg_funcs=["max"],
|
|
576
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
577
|
+
preform_agg_columns=[mm_schemas.ResultData.RESULT_STATUS],
|
|
578
|
+
)
|
|
579
|
+
df.rename(
|
|
580
|
+
columns={
|
|
581
|
+
f"max({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS
|
|
582
|
+
},
|
|
583
|
+
inplace=True,
|
|
584
|
+
)
|
|
585
|
+
if not df.empty:
|
|
586
|
+
df.dropna(inplace=True)
|
|
587
|
+
return df
|
|
468
588
|
|
|
469
589
|
def get_metrics_metadata(
|
|
470
590
|
self,
|
|
471
591
|
endpoint_id: str,
|
|
472
|
-
start:
|
|
473
|
-
end:
|
|
592
|
+
start: typing.Optional[datetime] = None,
|
|
593
|
+
end: typing.Optional[datetime] = None,
|
|
474
594
|
) -> pd.DataFrame:
|
|
475
|
-
|
|
595
|
+
start, end = self._get_start_end(start, end)
|
|
596
|
+
df = self._get_records(
|
|
597
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
|
|
598
|
+
start=start,
|
|
599
|
+
end=end,
|
|
600
|
+
columns=[
|
|
601
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
602
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
603
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
604
|
+
],
|
|
605
|
+
filter_query=f"endpoint_id='{endpoint_id}'",
|
|
606
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
607
|
+
group_by=[
|
|
608
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
609
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
610
|
+
],
|
|
611
|
+
agg_funcs=["last"],
|
|
612
|
+
)
|
|
613
|
+
df.rename(
|
|
614
|
+
columns={
|
|
615
|
+
f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
616
|
+
f"last({mm_schemas.MetricData.METRIC_NAME})": mm_schemas.MetricData.METRIC_NAME,
|
|
617
|
+
f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
618
|
+
},
|
|
619
|
+
inplace=True,
|
|
620
|
+
)
|
|
621
|
+
if not df.empty:
|
|
622
|
+
df.dropna(inplace=True)
|
|
623
|
+
return df
|
|
476
624
|
|
|
477
625
|
def get_results_metadata(
|
|
478
626
|
self,
|
|
479
627
|
endpoint_id: str,
|
|
480
|
-
start:
|
|
481
|
-
end:
|
|
628
|
+
start: typing.Optional[datetime] = None,
|
|
629
|
+
end: typing.Optional[datetime] = None,
|
|
482
630
|
) -> pd.DataFrame:
|
|
483
|
-
|
|
631
|
+
start, end = self._get_start_end(start, end)
|
|
632
|
+
df = self._get_records(
|
|
633
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
634
|
+
start=start,
|
|
635
|
+
end=end,
|
|
636
|
+
columns=[
|
|
637
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
638
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
639
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
640
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
641
|
+
],
|
|
642
|
+
filter_query=f"endpoint_id='{endpoint_id}'",
|
|
643
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
644
|
+
group_by=[
|
|
645
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
646
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
647
|
+
],
|
|
648
|
+
agg_funcs=["last"],
|
|
649
|
+
)
|
|
650
|
+
df.rename(
|
|
651
|
+
columns={
|
|
652
|
+
f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
653
|
+
f"last({mm_schemas.ResultData.RESULT_NAME})": mm_schemas.ResultData.RESULT_NAME,
|
|
654
|
+
f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
|
|
655
|
+
f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
656
|
+
},
|
|
657
|
+
inplace=True,
|
|
658
|
+
)
|
|
659
|
+
if not df.empty:
|
|
660
|
+
df.dropna(inplace=True)
|
|
661
|
+
return df
|
|
484
662
|
|
|
485
663
|
def get_error_count(
|
|
486
664
|
self,
|
|
487
|
-
endpoint_ids: Union[str, list[str]],
|
|
488
|
-
start:
|
|
489
|
-
end:
|
|
665
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
666
|
+
start: typing.Optional[datetime] = None,
|
|
667
|
+
end: typing.Optional[datetime] = None,
|
|
490
668
|
) -> pd.DataFrame:
|
|
491
|
-
|
|
669
|
+
endpoint_ids = (
|
|
670
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
671
|
+
)
|
|
672
|
+
start, end = self._get_start_end(start, end)
|
|
673
|
+
df = self._get_records(
|
|
674
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
675
|
+
start=start,
|
|
676
|
+
end=end,
|
|
677
|
+
columns=[
|
|
678
|
+
mm_schemas.EventFieldType.MODEL_ERROR,
|
|
679
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
680
|
+
],
|
|
681
|
+
agg_funcs=["count"],
|
|
682
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
|
|
683
|
+
f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'",
|
|
684
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
685
|
+
preform_agg_columns=[mm_schemas.EventFieldType.MODEL_ERROR],
|
|
686
|
+
)
|
|
687
|
+
df.rename(
|
|
688
|
+
columns={f"count({mm_schemas.EventFieldType.MODEL_ERROR})": "error_count"},
|
|
689
|
+
inplace=True,
|
|
690
|
+
)
|
|
691
|
+
if not df.empty:
|
|
692
|
+
df.dropna(inplace=True)
|
|
693
|
+
return df
|
|
492
694
|
|
|
493
695
|
def get_avg_latency(
|
|
494
696
|
self,
|
|
495
|
-
endpoint_ids: Union[str, list[str]],
|
|
496
|
-
start:
|
|
497
|
-
end:
|
|
697
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
698
|
+
start: typing.Optional[datetime] = None,
|
|
699
|
+
end: typing.Optional[datetime] = None,
|
|
498
700
|
) -> pd.DataFrame:
|
|
499
|
-
|
|
701
|
+
endpoint_ids = (
|
|
702
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
703
|
+
)
|
|
704
|
+
start, end = self._get_start_end(start, end)
|
|
705
|
+
df = self._get_records(
|
|
706
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
707
|
+
start=start,
|
|
708
|
+
end=end,
|
|
709
|
+
columns=[
|
|
710
|
+
mm_schemas.EventFieldType.LATENCY,
|
|
711
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
712
|
+
],
|
|
713
|
+
agg_funcs=["avg"],
|
|
714
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
715
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
716
|
+
preform_agg_columns=[mm_schemas.EventFieldType.LATENCY],
|
|
717
|
+
)
|
|
718
|
+
df.rename(
|
|
719
|
+
columns={f"avg({mm_schemas.EventFieldType.LATENCY})": "avg_latency"},
|
|
720
|
+
inplace=True,
|
|
721
|
+
)
|
|
722
|
+
if not df.empty:
|
|
723
|
+
df.dropna(inplace=True)
|
|
724
|
+
return df
|
|
500
725
|
|
|
501
726
|
# Note: this function serves as a reference for checking the TSDB for the existence of a metric.
|
|
502
727
|
#
|
|
@@ -150,6 +150,7 @@ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
|
|
|
150
150
|
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
151
151
|
event = {
|
|
152
152
|
EventFieldType.MODEL_ERROR: str(error),
|
|
153
|
+
EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
|
|
153
154
|
EventFieldType.ENDPOINT_ID: endpoint_id,
|
|
154
155
|
EventFieldType.TIMESTAMP: timestamp,
|
|
155
156
|
EventFieldType.ERROR_COUNT: 1.0,
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from datetime import datetime, timezone
|
|
15
|
+
from datetime import datetime, timedelta, timezone
|
|
16
16
|
from io import StringIO
|
|
17
17
|
from typing import Literal, Optional, Union
|
|
18
18
|
|
|
@@ -310,6 +310,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
310
310
|
],
|
|
311
311
|
index_cols=[
|
|
312
312
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
313
|
+
mm_schemas.EventFieldType.ERROR_TYPE,
|
|
313
314
|
],
|
|
314
315
|
max_events=tsdb_batching_max_events,
|
|
315
316
|
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
@@ -338,9 +339,6 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
338
339
|
elif kind == mm_schemas.WriterEventKind.RESULT:
|
|
339
340
|
table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
340
341
|
index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
|
|
341
|
-
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
342
|
-
# TODO: remove this when extra data is supported (ML-7460)
|
|
343
|
-
event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
|
|
344
342
|
else:
|
|
345
343
|
raise ValueError(f"Invalid {kind = }")
|
|
346
344
|
|
|
@@ -544,6 +542,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
544
542
|
end: datetime,
|
|
545
543
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
546
544
|
type: Literal["metrics", "results"] = "results",
|
|
545
|
+
with_result_extra_data: bool = False,
|
|
547
546
|
) -> Union[
|
|
548
547
|
list[
|
|
549
548
|
Union[
|
|
@@ -565,6 +564,12 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
565
564
|
"""
|
|
566
565
|
|
|
567
566
|
if type == "metrics":
|
|
567
|
+
if with_result_extra_data:
|
|
568
|
+
logger.warning(
|
|
569
|
+
"The 'with_result_extra_data' parameter is not supported for metrics, just for results",
|
|
570
|
+
project=self.project,
|
|
571
|
+
endpoint_id=endpoint_id,
|
|
572
|
+
)
|
|
568
573
|
table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
|
|
569
574
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
570
575
|
columns = [mm_schemas.MetricData.METRIC_VALUE]
|
|
@@ -577,6 +582,8 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
577
582
|
mm_schemas.ResultData.RESULT_STATUS,
|
|
578
583
|
mm_schemas.ResultData.RESULT_KIND,
|
|
579
584
|
]
|
|
585
|
+
if with_result_extra_data:
|
|
586
|
+
columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
|
|
580
587
|
df_handler = self.df_to_results_values
|
|
581
588
|
else:
|
|
582
589
|
raise ValueError(f"Invalid {type = }")
|
|
@@ -605,6 +612,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
605
612
|
endpoint_id=endpoint_id,
|
|
606
613
|
is_empty=df.empty,
|
|
607
614
|
)
|
|
615
|
+
if not with_result_extra_data and type == "results":
|
|
616
|
+
# Set the extra data to an empty string if it's not requested
|
|
617
|
+
df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
|
|
608
618
|
|
|
609
619
|
return df_handler(df=df, metrics=metrics, project=self.project)
|
|
610
620
|
|
|
@@ -700,12 +710,13 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
700
710
|
def get_last_request(
|
|
701
711
|
self,
|
|
702
712
|
endpoint_ids: Union[str, list[str]],
|
|
703
|
-
start:
|
|
704
|
-
end:
|
|
713
|
+
start: Optional[datetime] = None,
|
|
714
|
+
end: Optional[datetime] = None,
|
|
705
715
|
) -> pd.DataFrame:
|
|
706
716
|
endpoint_ids = (
|
|
707
717
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
708
718
|
)
|
|
719
|
+
start, end = self._get_start_end(start, end)
|
|
709
720
|
df = self._get_records(
|
|
710
721
|
table=mm_schemas.FileTargetKind.PREDICTIONS,
|
|
711
722
|
start=start,
|
|
@@ -734,12 +745,14 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
734
745
|
def get_drift_status(
|
|
735
746
|
self,
|
|
736
747
|
endpoint_ids: Union[str, list[str]],
|
|
737
|
-
start:
|
|
738
|
-
end:
|
|
748
|
+
start: Optional[datetime] = None,
|
|
749
|
+
end: Optional[datetime] = None,
|
|
739
750
|
) -> pd.DataFrame:
|
|
740
751
|
endpoint_ids = (
|
|
741
752
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
742
753
|
)
|
|
754
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
755
|
+
start, end = self._get_start_end(start, end)
|
|
743
756
|
df = self._get_records(
|
|
744
757
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
745
758
|
start=start,
|
|
@@ -758,9 +771,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
758
771
|
def get_metrics_metadata(
|
|
759
772
|
self,
|
|
760
773
|
endpoint_id: str,
|
|
761
|
-
start:
|
|
762
|
-
end:
|
|
774
|
+
start: Optional[datetime] = None,
|
|
775
|
+
end: Optional[datetime] = None,
|
|
763
776
|
) -> pd.DataFrame:
|
|
777
|
+
start, end = self._get_start_end(start, end)
|
|
764
778
|
df = self._get_records(
|
|
765
779
|
table=mm_schemas.V3IOTSDBTables.METRICS,
|
|
766
780
|
start=start,
|
|
@@ -778,9 +792,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
778
792
|
def get_results_metadata(
|
|
779
793
|
self,
|
|
780
794
|
endpoint_id: str,
|
|
781
|
-
start:
|
|
782
|
-
end:
|
|
795
|
+
start: Optional[datetime] = None,
|
|
796
|
+
end: Optional[datetime] = None,
|
|
783
797
|
) -> pd.DataFrame:
|
|
798
|
+
start, end = self._get_start_end(start, end)
|
|
784
799
|
df = self._get_records(
|
|
785
800
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
786
801
|
start=start,
|
|
@@ -803,18 +818,20 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
803
818
|
def get_error_count(
|
|
804
819
|
self,
|
|
805
820
|
endpoint_ids: Union[str, list[str]],
|
|
806
|
-
start:
|
|
807
|
-
end:
|
|
821
|
+
start: Optional[datetime] = None,
|
|
822
|
+
end: Optional[datetime] = None,
|
|
808
823
|
) -> pd.DataFrame:
|
|
809
824
|
endpoint_ids = (
|
|
810
825
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
811
826
|
)
|
|
827
|
+
start, end = self._get_start_end(start, end)
|
|
812
828
|
df = self._get_records(
|
|
813
829
|
table=mm_schemas.FileTargetKind.ERRORS,
|
|
814
830
|
start=start,
|
|
815
831
|
end=end,
|
|
816
832
|
columns=[mm_schemas.EventFieldType.ERROR_COUNT],
|
|
817
|
-
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})"
|
|
833
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
|
|
834
|
+
f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'",
|
|
818
835
|
agg_funcs=["count"],
|
|
819
836
|
)
|
|
820
837
|
if not df.empty:
|
|
@@ -830,12 +847,13 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
830
847
|
def get_avg_latency(
|
|
831
848
|
self,
|
|
832
849
|
endpoint_ids: Union[str, list[str]],
|
|
833
|
-
start:
|
|
834
|
-
end:
|
|
850
|
+
start: Optional[datetime] = None,
|
|
851
|
+
end: Optional[datetime] = None,
|
|
835
852
|
) -> pd.DataFrame:
|
|
836
853
|
endpoint_ids = (
|
|
837
854
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
838
855
|
)
|
|
856
|
+
start, end = self._get_start_end(start, end)
|
|
839
857
|
df = self._get_records(
|
|
840
858
|
table=mm_schemas.FileTargetKind.PREDICTIONS,
|
|
841
859
|
start=start,
|
|
@@ -13,12 +13,14 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import datetime
|
|
16
|
+
import os
|
|
16
17
|
import typing
|
|
17
18
|
|
|
18
19
|
import numpy as np
|
|
19
20
|
import pandas as pd
|
|
20
21
|
|
|
21
22
|
if typing.TYPE_CHECKING:
|
|
23
|
+
from mlrun.datastore import DataItem
|
|
22
24
|
from mlrun.db.base import RunDBInterface
|
|
23
25
|
from mlrun.projects import MlrunProject
|
|
24
26
|
|
|
@@ -28,6 +30,7 @@ import mlrun.common.model_monitoring.helpers
|
|
|
28
30
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
29
31
|
import mlrun.data_types.infer
|
|
30
32
|
import mlrun.model_monitoring
|
|
33
|
+
import mlrun.utils.helpers
|
|
31
34
|
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
32
35
|
ModelEndpointMonitoringMetric,
|
|
33
36
|
_compose_full_name,
|
|
@@ -98,7 +101,70 @@ def get_monitoring_parquet_path(
|
|
|
98
101
|
return parquet_path
|
|
99
102
|
|
|
100
103
|
|
|
101
|
-
def
|
|
104
|
+
def get_monitoring_stats_directory_path(
|
|
105
|
+
project: str,
|
|
106
|
+
kind: str = mm_constants.FileTargetKind.STATS,
|
|
107
|
+
) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Get model monitoring stats target for the current project and kind. The stats target path is based on the
|
|
110
|
+
project artifact path. If project artifact path is not defined, the stats target path will be based on MLRun
|
|
111
|
+
artifact path.
|
|
112
|
+
:param project: Project object.
|
|
113
|
+
:param kind: indicate the kind of the stats path
|
|
114
|
+
:return: Monitoring stats target path.
|
|
115
|
+
"""
|
|
116
|
+
stats_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
117
|
+
project=project,
|
|
118
|
+
kind=kind,
|
|
119
|
+
)
|
|
120
|
+
return stats_path
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _get_monitoring_current_stats_file_path(project: str, endpoint_id: str) -> str:
|
|
124
|
+
return os.path.join(
|
|
125
|
+
get_monitoring_stats_directory_path(project),
|
|
126
|
+
f"{endpoint_id}_current_stats.json",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _get_monitoring_drift_measures_file_path(project: str, endpoint_id: str) -> str:
|
|
131
|
+
return os.path.join(
|
|
132
|
+
get_monitoring_stats_directory_path(project),
|
|
133
|
+
f"{endpoint_id}_drift_measures.json",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def get_monitoring_current_stats_data(project: str, endpoint_id: str) -> "DataItem":
|
|
138
|
+
"""
|
|
139
|
+
getter for data item of current stats for project and endpoint
|
|
140
|
+
:param project: project name str
|
|
141
|
+
:param endpoint_id: endpoint id str
|
|
142
|
+
:return: DataItem
|
|
143
|
+
"""
|
|
144
|
+
return mlrun.datastore.store_manager.object(
|
|
145
|
+
_get_monitoring_current_stats_file_path(
|
|
146
|
+
project=project, endpoint_id=endpoint_id
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataItem":
|
|
152
|
+
"""
|
|
153
|
+
getter for data item of drift measures for project and endpoint
|
|
154
|
+
:param project: project name str
|
|
155
|
+
:param endpoint_id: endpoint id str
|
|
156
|
+
:return: DataItem
|
|
157
|
+
"""
|
|
158
|
+
return mlrun.datastore.store_manager.object(
|
|
159
|
+
_get_monitoring_drift_measures_file_path(
|
|
160
|
+
project=project, endpoint_id=endpoint_id
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_connection_string(
|
|
166
|
+
secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
|
|
167
|
+
) -> str:
|
|
102
168
|
"""Get endpoint store connection string from the project secret. If wasn't set, take it from the system
|
|
103
169
|
configurations.
|
|
104
170
|
|
|
@@ -350,3 +416,27 @@ def enrich_model_endpoint_with_model_uri(
|
|
|
350
416
|
model_endpoint.spec.model_uri = mlrun.datastore.get_store_uri(
|
|
351
417
|
kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
|
|
352
418
|
)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def _get_monitoring_schedules_folder_path(project: str) -> str:
|
|
422
|
+
return typing.cast(
|
|
423
|
+
str,
|
|
424
|
+
mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
425
|
+
project=project, kind=mm_constants.FileTargetKind.MONITORING_SCHEDULES
|
|
426
|
+
),
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def _get_monitoring_schedules_file_path(*, project: str, endpoint_id: str) -> str:
|
|
431
|
+
return os.path.join(
|
|
432
|
+
_get_monitoring_schedules_folder_path(project), f"{endpoint_id}.json"
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def get_monitoring_schedules_data(*, project: str, endpoint_id: str) -> "DataItem":
|
|
437
|
+
"""
|
|
438
|
+
Get the model monitoring schedules' data item of the project's model endpoint.
|
|
439
|
+
"""
|
|
440
|
+
return mlrun.datastore.store_manager.object(
|
|
441
|
+
_get_monitoring_schedules_file_path(project=project, endpoint_id=endpoint_id)
|
|
442
|
+
)
|