mlrun 1.7.1rc4__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +23 -21
- mlrun/__main__.py +3 -3
- mlrun/alerts/alert.py +148 -14
- mlrun/artifacts/__init__.py +1 -2
- mlrun/artifacts/base.py +46 -12
- mlrun/artifacts/dataset.py +16 -16
- mlrun/artifacts/document.py +334 -0
- mlrun/artifacts/manager.py +15 -13
- mlrun/artifacts/model.py +66 -53
- mlrun/common/constants.py +7 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +1 -29
- mlrun/common/runtimes/constants.py +1 -2
- mlrun/common/schemas/__init__.py +6 -2
- mlrun/common/schemas/alert.py +111 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +11 -7
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -3
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +2 -1
- mlrun/common/schemas/model_monitoring/constants.py +66 -14
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +137 -0
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +25 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +67 -10
- mlrun/data_types/__init__.py +0 -2
- mlrun/data_types/infer.py +3 -1
- mlrun/data_types/spark.py +2 -1
- mlrun/datastore/__init__.py +0 -2
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +12 -4
- mlrun/datastore/datastore.py +9 -3
- mlrun/datastore/datastore_profile.py +79 -20
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +4 -1
- mlrun/datastore/sources.py +52 -51
- mlrun/datastore/store_resources.py +0 -2
- mlrun/datastore/targets.py +21 -21
- mlrun/datastore/utils.py +2 -2
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +194 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +208 -82
- mlrun/db/factory.py +0 -3
- mlrun/db/httpdb.py +1237 -386
- mlrun/db/nopdb.py +201 -74
- mlrun/errors.py +2 -2
- mlrun/execution.py +136 -50
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +41 -40
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +27 -24
- mlrun/feature_store/retrieval/base.py +14 -9
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/steps.py +2 -2
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +29 -27
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +4 -3
- mlrun/model.py +117 -46
- mlrun/model_monitoring/__init__.py +4 -4
- mlrun/model_monitoring/api.py +61 -59
- mlrun/model_monitoring/applications/_application_steps.py +17 -17
- mlrun/model_monitoring/applications/base.py +165 -6
- mlrun/model_monitoring/applications/context.py +88 -37
- mlrun/model_monitoring/applications/evidently_base.py +1 -2
- mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
- mlrun/model_monitoring/applications/results.py +55 -3
- mlrun/model_monitoring/controller.py +207 -239
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +156 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/base.py +78 -25
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +90 -16
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +279 -59
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
- mlrun/model_monitoring/helpers.py +152 -49
- mlrun/model_monitoring/stream_processing.py +99 -283
- mlrun/model_monitoring/tracking_policy.py +10 -3
- mlrun/model_monitoring/writer.py +48 -36
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +31 -14
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +27 -27
- mlrun/projects/pipelines.py +75 -38
- mlrun/projects/project.py +865 -206
- mlrun/run.py +53 -10
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +15 -11
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/generators.py +2 -1
- mlrun/runtimes/kubejob.py +4 -5
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -11
- mlrun/runtimes/nuclio/function.py +19 -17
- mlrun/runtimes/nuclio/serving.py +18 -11
- mlrun/runtimes/pod.py +154 -45
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +21 -11
- mlrun/runtimes/utils.py +6 -5
- mlrun/serving/merger.py +6 -4
- mlrun/serving/remote.py +18 -17
- mlrun/serving/routers.py +185 -172
- mlrun/serving/server.py +7 -1
- mlrun/serving/states.py +97 -78
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +74 -65
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/clones.py +1 -1
- mlrun/utils/helpers.py +66 -18
- mlrun/utils/logger.py +106 -4
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +33 -14
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +6 -6
- mlrun/utils/notifications/notification_pusher.py +86 -44
- mlrun/utils/regex.py +3 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +191 -186
- mlrun-1.8.0rc8.dist-info/RECORD +347 -0
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.1rc4.dist-info/RECORD +0 -351
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
|
@@ -13,8 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import typing
|
|
16
|
-
from datetime import datetime
|
|
17
|
-
from typing import Union
|
|
16
|
+
from datetime import datetime, timedelta, timezone
|
|
18
17
|
|
|
19
18
|
import pandas as pd
|
|
20
19
|
import taosws
|
|
@@ -82,13 +81,16 @@ class TDEngineConnector(TSDBConnector):
|
|
|
82
81
|
"""Initialize the super tables for the TSDB."""
|
|
83
82
|
self.tables = {
|
|
84
83
|
mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
|
|
85
|
-
self.database
|
|
84
|
+
project=self.project, database=self.database
|
|
86
85
|
),
|
|
87
86
|
mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
|
|
88
|
-
self.database
|
|
87
|
+
project=self.project, database=self.database
|
|
89
88
|
),
|
|
90
89
|
mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
|
|
91
|
-
self.database
|
|
90
|
+
project=self.project, database=self.database
|
|
91
|
+
),
|
|
92
|
+
mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
|
|
93
|
+
project=self.project, database=self.database
|
|
92
94
|
),
|
|
93
95
|
}
|
|
94
96
|
|
|
@@ -112,11 +114,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
112
114
|
"""
|
|
113
115
|
|
|
114
116
|
table_name = (
|
|
115
|
-
f"{self.project}_"
|
|
116
117
|
f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
|
|
117
|
-
f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}
|
|
118
|
+
f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
|
|
118
119
|
)
|
|
119
|
-
event[mm_schemas.EventFieldType.PROJECT] = self.project
|
|
120
120
|
|
|
121
121
|
if kind == mm_schemas.WriterEventKind.RESULT:
|
|
122
122
|
# Write a new result
|
|
@@ -124,7 +124,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
124
124
|
table_name = (
|
|
125
125
|
f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
|
|
126
126
|
).replace("-", "_")
|
|
127
|
-
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
128
127
|
|
|
129
128
|
else:
|
|
130
129
|
# Write a new metric
|
|
@@ -165,7 +164,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
165
164
|
def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
|
|
166
165
|
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
167
166
|
|
|
168
|
-
def apply_monitoring_stream_steps(self, graph):
|
|
167
|
+
def apply_monitoring_stream_steps(self, graph, **kwarg):
|
|
169
168
|
"""
|
|
170
169
|
Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
|
|
171
170
|
different key metric dictionaries. This data is being used by the monitoring dashboards in
|
|
@@ -187,7 +186,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
187
186
|
name=name,
|
|
188
187
|
after=after,
|
|
189
188
|
url=self._tdengine_connection_string,
|
|
190
|
-
supertable=
|
|
189
|
+
supertable=self.tables[
|
|
190
|
+
mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
191
|
+
].super_table,
|
|
191
192
|
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
192
193
|
time_col=mm_schemas.EventFieldType.TIME,
|
|
193
194
|
database=self.database,
|
|
@@ -196,7 +197,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
196
197
|
mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
|
|
197
198
|
],
|
|
198
199
|
tag_cols=[
|
|
199
|
-
mm_schemas.EventFieldType.PROJECT,
|
|
200
200
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
201
201
|
],
|
|
202
202
|
max_events=1000,
|
|
@@ -209,8 +209,37 @@ class TDEngineConnector(TSDBConnector):
|
|
|
209
209
|
after="ProcessBeforeTDEngine",
|
|
210
210
|
)
|
|
211
211
|
|
|
212
|
-
def handle_model_error(
|
|
213
|
-
|
|
212
|
+
def handle_model_error(
|
|
213
|
+
self,
|
|
214
|
+
graph,
|
|
215
|
+
tsdb_batching_max_events: int = 1000,
|
|
216
|
+
tsdb_batching_timeout_secs: int = 30,
|
|
217
|
+
**kwargs,
|
|
218
|
+
) -> None:
|
|
219
|
+
graph.add_step(
|
|
220
|
+
"mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ErrorExtractor",
|
|
221
|
+
name="error_extractor",
|
|
222
|
+
after="ForwardError",
|
|
223
|
+
)
|
|
224
|
+
graph.add_step(
|
|
225
|
+
"storey.TDEngineTarget",
|
|
226
|
+
name="tsdb_error",
|
|
227
|
+
after="error_extractor",
|
|
228
|
+
url=self._tdengine_connection_string,
|
|
229
|
+
supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
230
|
+
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
231
|
+
time_col=mm_schemas.EventFieldType.TIME,
|
|
232
|
+
database=self.database,
|
|
233
|
+
columns=[
|
|
234
|
+
mm_schemas.EventFieldType.MODEL_ERROR,
|
|
235
|
+
],
|
|
236
|
+
tag_cols=[
|
|
237
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
238
|
+
mm_schemas.EventFieldType.ERROR_TYPE,
|
|
239
|
+
],
|
|
240
|
+
max_events=tsdb_batching_max_events,
|
|
241
|
+
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
242
|
+
)
|
|
214
243
|
|
|
215
244
|
def delete_tsdb_resources(self):
|
|
216
245
|
"""
|
|
@@ -220,22 +249,23 @@ class TDEngineConnector(TSDBConnector):
|
|
|
220
249
|
"Deleting all project resources using the TDEngine connector",
|
|
221
250
|
project=self.project,
|
|
222
251
|
)
|
|
252
|
+
drop_statements = []
|
|
223
253
|
for table in self.tables:
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
254
|
+
drop_statements.append(self.tables[table].drop_supertable_query())
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
self.connection.run(
|
|
258
|
+
statements=drop_statements,
|
|
229
259
|
timeout=self._timeout,
|
|
230
260
|
retries=self._retries,
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
261
|
+
)
|
|
262
|
+
except Exception as e:
|
|
263
|
+
logger.warning(
|
|
264
|
+
"Failed to drop TDEngine tables. You may need to drop them manually. "
|
|
265
|
+
"These can be found under the following supertables: app_results, "
|
|
266
|
+
"metrics, and predictions.",
|
|
267
|
+
project=self.project,
|
|
268
|
+
error=mlrun.errors.err_to_str(e),
|
|
239
269
|
)
|
|
240
270
|
logger.debug(
|
|
241
271
|
"Deleted all project resources using the TDEngine connector",
|
|
@@ -264,6 +294,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
264
294
|
limit: typing.Optional[int] = None,
|
|
265
295
|
sliding_window_step: typing.Optional[str] = None,
|
|
266
296
|
timestamp_column: str = mm_schemas.EventFieldType.TIME,
|
|
297
|
+
group_by: typing.Optional[typing.Union[list[str], str]] = None,
|
|
298
|
+
preform_agg_columns: typing.Optional[list] = None,
|
|
299
|
+
order_by: typing.Optional[str] = None,
|
|
300
|
+
desc: typing.Optional[bool] = None,
|
|
267
301
|
) -> pd.DataFrame:
|
|
268
302
|
"""
|
|
269
303
|
Getting records from TSDB data collection.
|
|
@@ -283,18 +317,19 @@ class TDEngineConnector(TSDBConnector):
|
|
|
283
317
|
`sliding_window_step` is provided, interval must be provided as well. Provided
|
|
284
318
|
as a string in the format of '1m', '1h', etc.
|
|
285
319
|
:param timestamp_column: The column name that holds the timestamp index.
|
|
320
|
+
:param group_by: The column name to group by. Note that if `group_by` is provided, aggregation
|
|
321
|
+
functions must bg provided
|
|
322
|
+
:param preform_agg_columns: The columns to preform aggregation on.
|
|
323
|
+
notice that all aggregation functions provided will preform on those columns.
|
|
324
|
+
If not provided The default behavior is to preform on all columns in columns,
|
|
325
|
+
if an empty list was provided The aggregation won't be performed.
|
|
326
|
+
:param order_by: The column or alias to preform ordering on the query.
|
|
327
|
+
:param desc: Whether or not to sort the results in descending order.
|
|
286
328
|
|
|
287
329
|
:return: DataFrame with the provided attributes from the data collection.
|
|
288
330
|
:raise: MLRunInvalidArgumentError if query the provided table failed.
|
|
289
331
|
"""
|
|
290
332
|
|
|
291
|
-
project_condition = f"project = '{self.project}'"
|
|
292
|
-
filter_query = (
|
|
293
|
-
f"({filter_query}) AND ({project_condition})"
|
|
294
|
-
if filter_query
|
|
295
|
-
else project_condition
|
|
296
|
-
)
|
|
297
|
-
|
|
298
333
|
full_query = tdengine_schemas.TDEngineSchema._get_records_query(
|
|
299
334
|
table=table,
|
|
300
335
|
start=start,
|
|
@@ -307,6 +342,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
307
342
|
sliding_window_step=sliding_window_step,
|
|
308
343
|
timestamp_column=timestamp_column,
|
|
309
344
|
database=self.database,
|
|
345
|
+
group_by=group_by,
|
|
346
|
+
preform_agg_funcs_columns=preform_agg_columns,
|
|
347
|
+
order_by=order_by,
|
|
348
|
+
desc=desc,
|
|
310
349
|
)
|
|
311
350
|
logger.debug("Querying TDEngine", query=full_query)
|
|
312
351
|
try:
|
|
@@ -329,6 +368,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
329
368
|
end: datetime,
|
|
330
369
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
331
370
|
type: typing.Literal["metrics", "results"],
|
|
371
|
+
with_result_extra_data: bool = False,
|
|
332
372
|
) -> typing.Union[
|
|
333
373
|
list[
|
|
334
374
|
typing.Union[
|
|
@@ -346,12 +386,18 @@ class TDEngineConnector(TSDBConnector):
|
|
|
346
386
|
timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
|
|
347
387
|
columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
348
388
|
if type == "metrics":
|
|
349
|
-
|
|
389
|
+
if with_result_extra_data:
|
|
390
|
+
logger.warning(
|
|
391
|
+
"The 'with_result_extra_data' parameter is not supported for metrics, just for results",
|
|
392
|
+
project=self.project,
|
|
393
|
+
endpoint_id=endpoint_id,
|
|
394
|
+
)
|
|
395
|
+
table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
|
|
350
396
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
351
397
|
columns += [name, mm_schemas.MetricData.METRIC_VALUE]
|
|
352
398
|
df_handler = self.df_to_metrics_values
|
|
353
399
|
elif type == "results":
|
|
354
|
-
table = mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
400
|
+
table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
|
|
355
401
|
name = mm_schemas.ResultData.RESULT_NAME
|
|
356
402
|
columns += [
|
|
357
403
|
name,
|
|
@@ -359,6 +405,8 @@ class TDEngineConnector(TSDBConnector):
|
|
|
359
405
|
mm_schemas.ResultData.RESULT_STATUS,
|
|
360
406
|
mm_schemas.ResultData.RESULT_KIND,
|
|
361
407
|
]
|
|
408
|
+
if with_result_extra_data:
|
|
409
|
+
columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
|
|
362
410
|
df_handler = self.df_to_results_values
|
|
363
411
|
else:
|
|
364
412
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -395,6 +443,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
395
443
|
is_empty=df.empty,
|
|
396
444
|
)
|
|
397
445
|
|
|
446
|
+
if not with_result_extra_data and type == "results":
|
|
447
|
+
# Set the extra data to an empty string if it's not requested
|
|
448
|
+
df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
|
|
449
|
+
|
|
398
450
|
return df_handler(df=df, metrics=metrics, project=self.project)
|
|
399
451
|
|
|
400
452
|
def read_predictions(
|
|
@@ -417,7 +469,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
417
469
|
"both or neither of `aggregation_window` and `agg_funcs` must be provided"
|
|
418
470
|
)
|
|
419
471
|
df = self._get_records(
|
|
420
|
-
table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
|
|
472
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
421
473
|
start=start,
|
|
422
474
|
end=end,
|
|
423
475
|
columns=[mm_schemas.EventFieldType.LATENCY],
|
|
@@ -458,51 +510,219 @@ class TDEngineConnector(TSDBConnector):
|
|
|
458
510
|
|
|
459
511
|
def get_last_request(
|
|
460
512
|
self,
|
|
461
|
-
endpoint_ids: Union[str, list[str]],
|
|
462
|
-
start:
|
|
463
|
-
end:
|
|
513
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
514
|
+
start: typing.Optional[datetime] = None,
|
|
515
|
+
end: typing.Optional[datetime] = None,
|
|
464
516
|
) -> pd.DataFrame:
|
|
465
|
-
|
|
517
|
+
endpoint_ids = (
|
|
518
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
519
|
+
)
|
|
520
|
+
start, end = self._get_start_end(start, end)
|
|
521
|
+
df = self._get_records(
|
|
522
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
523
|
+
start=start,
|
|
524
|
+
end=end,
|
|
525
|
+
columns=[
|
|
526
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
527
|
+
mm_schemas.EventFieldType.TIME,
|
|
528
|
+
mm_schemas.EventFieldType.LATENCY,
|
|
529
|
+
],
|
|
530
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
531
|
+
timestamp_column=mm_schemas.EventFieldType.TIME,
|
|
532
|
+
agg_funcs=["last"],
|
|
533
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
534
|
+
preform_agg_columns=[mm_schemas.EventFieldType.TIME],
|
|
535
|
+
)
|
|
536
|
+
if not df.empty:
|
|
537
|
+
df.dropna(inplace=True)
|
|
538
|
+
df.rename(
|
|
539
|
+
columns={
|
|
540
|
+
f"last({mm_schemas.EventFieldType.TIME})": mm_schemas.EventFieldType.LAST_REQUEST,
|
|
541
|
+
f"{mm_schemas.EventFieldType.LATENCY}": "last_latency",
|
|
542
|
+
},
|
|
543
|
+
inplace=True,
|
|
544
|
+
)
|
|
545
|
+
df[mm_schemas.EventFieldType.LAST_REQUEST] = df[
|
|
546
|
+
mm_schemas.EventFieldType.LAST_REQUEST
|
|
547
|
+
].map(
|
|
548
|
+
lambda last_request: datetime.strptime(
|
|
549
|
+
last_request, "%Y-%m-%d %H:%M:%S.%f %z"
|
|
550
|
+
).astimezone(tz=timezone.utc)
|
|
551
|
+
)
|
|
552
|
+
return df
|
|
466
553
|
|
|
467
554
|
def get_drift_status(
|
|
468
555
|
self,
|
|
469
|
-
endpoint_ids: Union[str, list[str]],
|
|
470
|
-
start:
|
|
471
|
-
end:
|
|
556
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
557
|
+
start: typing.Optional[datetime] = None,
|
|
558
|
+
end: typing.Optional[datetime] = None,
|
|
472
559
|
) -> pd.DataFrame:
|
|
473
|
-
|
|
560
|
+
endpoint_ids = (
|
|
561
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
562
|
+
)
|
|
563
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
564
|
+
start, end = self._get_start_end(start, end)
|
|
565
|
+
df = self._get_records(
|
|
566
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
567
|
+
start=start,
|
|
568
|
+
end=end,
|
|
569
|
+
columns=[
|
|
570
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
571
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
572
|
+
],
|
|
573
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
574
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
575
|
+
agg_funcs=["max"],
|
|
576
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
577
|
+
preform_agg_columns=[mm_schemas.ResultData.RESULT_STATUS],
|
|
578
|
+
)
|
|
579
|
+
df.rename(
|
|
580
|
+
columns={
|
|
581
|
+
f"max({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS
|
|
582
|
+
},
|
|
583
|
+
inplace=True,
|
|
584
|
+
)
|
|
585
|
+
if not df.empty:
|
|
586
|
+
df.dropna(inplace=True)
|
|
587
|
+
return df
|
|
474
588
|
|
|
475
589
|
def get_metrics_metadata(
|
|
476
590
|
self,
|
|
477
591
|
endpoint_id: str,
|
|
478
|
-
start:
|
|
479
|
-
end:
|
|
592
|
+
start: typing.Optional[datetime] = None,
|
|
593
|
+
end: typing.Optional[datetime] = None,
|
|
480
594
|
) -> pd.DataFrame:
|
|
481
|
-
|
|
595
|
+
start, end = self._get_start_end(start, end)
|
|
596
|
+
df = self._get_records(
|
|
597
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
|
|
598
|
+
start=start,
|
|
599
|
+
end=end,
|
|
600
|
+
columns=[
|
|
601
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
602
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
603
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
604
|
+
],
|
|
605
|
+
filter_query=f"endpoint_id='{endpoint_id}'",
|
|
606
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
607
|
+
group_by=[
|
|
608
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
609
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
610
|
+
],
|
|
611
|
+
agg_funcs=["last"],
|
|
612
|
+
)
|
|
613
|
+
df.rename(
|
|
614
|
+
columns={
|
|
615
|
+
f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
616
|
+
f"last({mm_schemas.MetricData.METRIC_NAME})": mm_schemas.MetricData.METRIC_NAME,
|
|
617
|
+
f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
618
|
+
},
|
|
619
|
+
inplace=True,
|
|
620
|
+
)
|
|
621
|
+
if not df.empty:
|
|
622
|
+
df.dropna(inplace=True)
|
|
623
|
+
return df
|
|
482
624
|
|
|
483
625
|
def get_results_metadata(
|
|
484
626
|
self,
|
|
485
627
|
endpoint_id: str,
|
|
486
|
-
start:
|
|
487
|
-
end:
|
|
628
|
+
start: typing.Optional[datetime] = None,
|
|
629
|
+
end: typing.Optional[datetime] = None,
|
|
488
630
|
) -> pd.DataFrame:
|
|
489
|
-
|
|
631
|
+
start, end = self._get_start_end(start, end)
|
|
632
|
+
df = self._get_records(
|
|
633
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
634
|
+
start=start,
|
|
635
|
+
end=end,
|
|
636
|
+
columns=[
|
|
637
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
638
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
639
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
640
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
641
|
+
],
|
|
642
|
+
filter_query=f"endpoint_id='{endpoint_id}'",
|
|
643
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
644
|
+
group_by=[
|
|
645
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
646
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
647
|
+
],
|
|
648
|
+
agg_funcs=["last"],
|
|
649
|
+
)
|
|
650
|
+
df.rename(
|
|
651
|
+
columns={
|
|
652
|
+
f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
653
|
+
f"last({mm_schemas.ResultData.RESULT_NAME})": mm_schemas.ResultData.RESULT_NAME,
|
|
654
|
+
f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
|
|
655
|
+
f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
656
|
+
},
|
|
657
|
+
inplace=True,
|
|
658
|
+
)
|
|
659
|
+
if not df.empty:
|
|
660
|
+
df.dropna(inplace=True)
|
|
661
|
+
return df
|
|
490
662
|
|
|
491
663
|
def get_error_count(
|
|
492
664
|
self,
|
|
493
|
-
endpoint_ids: Union[str, list[str]],
|
|
494
|
-
start:
|
|
495
|
-
end:
|
|
665
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
666
|
+
start: typing.Optional[datetime] = None,
|
|
667
|
+
end: typing.Optional[datetime] = None,
|
|
496
668
|
) -> pd.DataFrame:
|
|
497
|
-
|
|
669
|
+
endpoint_ids = (
|
|
670
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
671
|
+
)
|
|
672
|
+
start, end = self._get_start_end(start, end)
|
|
673
|
+
df = self._get_records(
|
|
674
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
675
|
+
start=start,
|
|
676
|
+
end=end,
|
|
677
|
+
columns=[
|
|
678
|
+
mm_schemas.EventFieldType.MODEL_ERROR,
|
|
679
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
680
|
+
],
|
|
681
|
+
agg_funcs=["count"],
|
|
682
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
|
|
683
|
+
f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'",
|
|
684
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
685
|
+
preform_agg_columns=[mm_schemas.EventFieldType.MODEL_ERROR],
|
|
686
|
+
)
|
|
687
|
+
df.rename(
|
|
688
|
+
columns={f"count({mm_schemas.EventFieldType.MODEL_ERROR})": "error_count"},
|
|
689
|
+
inplace=True,
|
|
690
|
+
)
|
|
691
|
+
if not df.empty:
|
|
692
|
+
df.dropna(inplace=True)
|
|
693
|
+
return df
|
|
498
694
|
|
|
499
695
|
def get_avg_latency(
|
|
500
696
|
self,
|
|
501
|
-
endpoint_ids: Union[str, list[str]],
|
|
502
|
-
start:
|
|
503
|
-
end:
|
|
697
|
+
endpoint_ids: typing.Union[str, list[str]],
|
|
698
|
+
start: typing.Optional[datetime] = None,
|
|
699
|
+
end: typing.Optional[datetime] = None,
|
|
504
700
|
) -> pd.DataFrame:
|
|
505
|
-
|
|
701
|
+
endpoint_ids = (
|
|
702
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
703
|
+
)
|
|
704
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
705
|
+
start, end = self._get_start_end(start, end)
|
|
706
|
+
df = self._get_records(
|
|
707
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
708
|
+
start=start,
|
|
709
|
+
end=end,
|
|
710
|
+
columns=[
|
|
711
|
+
mm_schemas.EventFieldType.LATENCY,
|
|
712
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
713
|
+
],
|
|
714
|
+
agg_funcs=["avg"],
|
|
715
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
716
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
717
|
+
preform_agg_columns=[mm_schemas.EventFieldType.LATENCY],
|
|
718
|
+
)
|
|
719
|
+
df.rename(
|
|
720
|
+
columns={f"avg({mm_schemas.EventFieldType.LATENCY})": "avg_latency"},
|
|
721
|
+
inplace=True,
|
|
722
|
+
)
|
|
723
|
+
if not df.empty:
|
|
724
|
+
df.dropna(inplace=True)
|
|
725
|
+
return df
|
|
506
726
|
|
|
507
727
|
# Note: this function serves as a reference for checking the TSDB for the existence of a metric.
|
|
508
728
|
#
|
|
@@ -150,6 +150,7 @@ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
|
|
|
150
150
|
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
151
151
|
event = {
|
|
152
152
|
EventFieldType.MODEL_ERROR: str(error),
|
|
153
|
+
EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
|
|
153
154
|
EventFieldType.ENDPOINT_ID: endpoint_id,
|
|
154
155
|
EventFieldType.TIMESTAMP: timestamp,
|
|
155
156
|
EventFieldType.ERROR_COUNT: 1.0,
|