mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +30 -27
- mlrun/common/schemas/alert.py +3 -0
- mlrun/common/schemas/notification.py +1 -0
- mlrun/datastore/alibaba_oss.py +2 -2
- mlrun/datastore/azure_blob.py +6 -3
- mlrun/datastore/base.py +1 -1
- mlrun/datastore/dbfs_store.py +2 -2
- mlrun/datastore/google_cloud_storage.py +83 -20
- mlrun/datastore/s3.py +2 -2
- mlrun/datastore/sources.py +54 -0
- mlrun/datastore/targets.py +9 -53
- mlrun/db/httpdb.py +6 -1
- mlrun/errors.py +8 -0
- mlrun/execution.py +7 -0
- mlrun/feature_store/api.py +5 -0
- mlrun/feature_store/retrieval/job.py +1 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/api.py +9 -0
- mlrun/model_monitoring/applications/_application_steps.py +36 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
- mlrun/model_monitoring/controller.py +15 -11
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +5 -5
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +35 -7
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -1
- mlrun/model_monitoring/helpers.py +16 -17
- mlrun/model_monitoring/stream_processing.py +2 -3
- mlrun/projects/pipelines.py +19 -30
- mlrun/projects/project.py +69 -51
- mlrun/run.py +8 -6
- mlrun/runtimes/__init__.py +4 -0
- mlrun/runtimes/nuclio/api_gateway.py +9 -0
- mlrun/runtimes/nuclio/application/application.py +112 -54
- mlrun/runtimes/nuclio/function.py +1 -1
- mlrun/utils/helpers.py +33 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/METADATA +8 -11
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/RECORD +42 -42
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/top_level.txt +0 -0
mlrun/model.py
CHANGED
|
@@ -679,7 +679,24 @@ class ImageBuilder(ModelObj):
|
|
|
679
679
|
|
|
680
680
|
|
|
681
681
|
class Notification(ModelObj):
|
|
682
|
-
"""Notification
|
|
682
|
+
"""Notification object
|
|
683
|
+
|
|
684
|
+
:param kind: notification implementation kind - slack, webhook, etc.
|
|
685
|
+
:param name: for logging and identification
|
|
686
|
+
:param message: message content in the notification
|
|
687
|
+
:param severity: severity to display in the notification
|
|
688
|
+
:param when: list of statuses to trigger the notification: 'running', 'completed', 'error'
|
|
689
|
+
:param condition: optional condition to trigger the notification, a jinja2 expression that can use run data
|
|
690
|
+
to evaluate if the notification should be sent in addition to the 'when' statuses.
|
|
691
|
+
e.g.: '{{ run["status"]["results"]["accuracy"] < 0.9}}'
|
|
692
|
+
:param params: Implementation specific parameters for the notification implementation (e.g. slack webhook url,
|
|
693
|
+
git repository details, etc.)
|
|
694
|
+
:param secret_params: secret parameters for the notification implementation, same as params but will be stored
|
|
695
|
+
in a k8s secret and passed as a secret reference to the implementation.
|
|
696
|
+
:param status: notification status - pending, sent, error
|
|
697
|
+
:param sent_time: time the notification was sent
|
|
698
|
+
:param reason: failure reason if the notification failed to send
|
|
699
|
+
"""
|
|
683
700
|
|
|
684
701
|
def __init__(
|
|
685
702
|
self,
|
|
@@ -1468,7 +1485,11 @@ class RunObject(RunTemplate):
|
|
|
1468
1485
|
@property
|
|
1469
1486
|
def error(self) -> str:
|
|
1470
1487
|
"""error string if failed"""
|
|
1471
|
-
if
|
|
1488
|
+
if (
|
|
1489
|
+
self.status
|
|
1490
|
+
and self.status.state
|
|
1491
|
+
in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
1492
|
+
):
|
|
1472
1493
|
unknown_error = ""
|
|
1473
1494
|
if (
|
|
1474
1495
|
self.status.state
|
|
@@ -1484,8 +1505,8 @@ class RunObject(RunTemplate):
|
|
|
1484
1505
|
|
|
1485
1506
|
return (
|
|
1486
1507
|
self.status.error
|
|
1487
|
-
or self.status.reason
|
|
1488
1508
|
or self.status.status_text
|
|
1509
|
+
or self.status.reason
|
|
1489
1510
|
or unknown_error
|
|
1490
1511
|
)
|
|
1491
1512
|
return ""
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -616,7 +616,16 @@ def _create_model_monitoring_function_base(
|
|
|
616
616
|
app_step = prepare_step.to(class_name=application_class, **application_kwargs)
|
|
617
617
|
else:
|
|
618
618
|
app_step = prepare_step.to(class_name=application_class)
|
|
619
|
+
|
|
619
620
|
app_step.__class__ = mlrun.serving.MonitoringApplicationStep
|
|
621
|
+
|
|
622
|
+
app_step.error_handler(
|
|
623
|
+
name="ApplicationErrorHandler",
|
|
624
|
+
class_name="mlrun.model_monitoring.applications._application_steps._ApplicationErrorHandler",
|
|
625
|
+
full_event=True,
|
|
626
|
+
project=project,
|
|
627
|
+
)
|
|
628
|
+
|
|
620
629
|
app_step.to(
|
|
621
630
|
class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
|
|
622
631
|
name="PushToMonitoringWriter",
|
|
@@ -17,6 +17,7 @@ from typing import Optional
|
|
|
17
17
|
|
|
18
18
|
import mlrun.common.helpers
|
|
19
19
|
import mlrun.common.model_monitoring.helpers
|
|
20
|
+
import mlrun.common.schemas.alert as alert_objects
|
|
20
21
|
import mlrun.common.schemas.model_monitoring.constants as mm_constant
|
|
21
22
|
import mlrun.datastore
|
|
22
23
|
import mlrun.serving
|
|
@@ -164,3 +165,38 @@ class _PrepareMonitoringEvent(StepToDict):
|
|
|
164
165
|
)
|
|
165
166
|
context.__class__ = MonitoringApplicationContext
|
|
166
167
|
return context
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class _ApplicationErrorHandler(StepToDict):
|
|
171
|
+
def __init__(self, project: str, name: Optional[str] = None):
|
|
172
|
+
self.project = project
|
|
173
|
+
self.name = name or "ApplicationErrorHandler"
|
|
174
|
+
|
|
175
|
+
def do(self, event):
|
|
176
|
+
"""
|
|
177
|
+
Handle model monitoring application error. This step will generate an event, describing the error.
|
|
178
|
+
|
|
179
|
+
:param event: Application event.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
logger.error(f"Error in application step: {event}")
|
|
183
|
+
|
|
184
|
+
event_data = mlrun.common.schemas.Event(
|
|
185
|
+
kind=alert_objects.EventKind.MM_APP_FAILED,
|
|
186
|
+
entity={
|
|
187
|
+
"kind": alert_objects.EventEntityKind.MODEL_MONITORING_APPLICATION,
|
|
188
|
+
"project": self.project,
|
|
189
|
+
"ids": [f"{self.project}_{event.body.application_name}"],
|
|
190
|
+
},
|
|
191
|
+
value_dict={
|
|
192
|
+
"Error": event.error,
|
|
193
|
+
"Timestamp": event.timestamp,
|
|
194
|
+
"Application Class": event.body.application_name,
|
|
195
|
+
"Endpoint ID": event.body.endpoint_id,
|
|
196
|
+
},
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
mlrun.get_run_db().generate_event(
|
|
200
|
+
name=alert_objects.EventKind.MM_APP_FAILED, event_data=event_data
|
|
201
|
+
)
|
|
202
|
+
logger.info("Event generated successfully")
|
|
@@ -91,7 +91,9 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
91
91
|
"""
|
|
92
92
|
MLRun's default data drift application for model monitoring.
|
|
93
93
|
|
|
94
|
-
The application expects tabular numerical data, and calculates three metrics over the features' histograms.
|
|
94
|
+
The application expects tabular numerical data, and calculates three metrics over the shared features' histograms.
|
|
95
|
+
The metrics are calculated on features that have reference data from the training dataset. When there is no
|
|
96
|
+
reference data (`feature_stats`), this application send a warning log and does nothing.
|
|
95
97
|
The three metrics are:
|
|
96
98
|
|
|
97
99
|
* Hellinger distance.
|
|
@@ -112,6 +114,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
112
114
|
|
|
113
115
|
project.enable_model_monitoring()
|
|
114
116
|
|
|
117
|
+
To avoid it, pass `deploy_histogram_data_drift_app=False`.
|
|
115
118
|
"""
|
|
116
119
|
|
|
117
120
|
NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
|
|
@@ -223,19 +226,18 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
223
226
|
return metrics
|
|
224
227
|
|
|
225
228
|
@staticmethod
|
|
226
|
-
def
|
|
227
|
-
|
|
229
|
+
def _get_shared_features_sample_stats(
|
|
230
|
+
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
228
231
|
) -> mlrun.common.model_monitoring.helpers.FeatureStats:
|
|
229
232
|
"""
|
|
230
|
-
|
|
231
|
-
in the plotly artifact
|
|
233
|
+
Filter out features without reference data in `feature_stats`, e.g. `timestamp`.
|
|
232
234
|
"""
|
|
233
|
-
|
|
234
|
-
|
|
235
|
+
return mlrun.common.model_monitoring.helpers.FeatureStats(
|
|
236
|
+
{
|
|
237
|
+
key: monitoring_context.sample_df_stats[key]
|
|
238
|
+
for key in monitoring_context.feature_stats
|
|
239
|
+
}
|
|
235
240
|
)
|
|
236
|
-
if EventFieldType.TIMESTAMP in sample_set_statistics:
|
|
237
|
-
del sample_set_statistics[EventFieldType.TIMESTAMP]
|
|
238
|
-
return sample_set_statistics
|
|
239
241
|
|
|
240
242
|
@staticmethod
|
|
241
243
|
def _log_json_artifact(
|
|
@@ -299,8 +301,8 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
299
301
|
self._log_json_artifact(drift_per_feature_values, monitoring_context)
|
|
300
302
|
|
|
301
303
|
self._log_plotly_table_artifact(
|
|
302
|
-
sample_set_statistics=self.
|
|
303
|
-
monitoring_context
|
|
304
|
+
sample_set_statistics=self._get_shared_features_sample_stats(
|
|
305
|
+
monitoring_context
|
|
304
306
|
),
|
|
305
307
|
inputs_statistics=monitoring_context.feature_stats,
|
|
306
308
|
metrics_per_feature=metrics_per_feature,
|
|
@@ -325,7 +327,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
|
|
|
325
327
|
"""
|
|
326
328
|
monitoring_context.logger.debug("Starting to run the application")
|
|
327
329
|
if not monitoring_context.feature_stats:
|
|
328
|
-
monitoring_context.logger.
|
|
330
|
+
monitoring_context.logger.warning(
|
|
329
331
|
"No feature statistics found, skipping the application. \n"
|
|
330
332
|
"In order to run the application, training set must be provided when logging the model."
|
|
331
333
|
)
|
|
@@ -335,19 +335,23 @@ class MonitoringApplicationController:
|
|
|
335
335
|
return
|
|
336
336
|
monitoring_functions = self.project_obj.list_model_monitoring_functions()
|
|
337
337
|
if monitoring_functions:
|
|
338
|
-
# Gets only application in ready state
|
|
339
338
|
applications_names = list(
|
|
340
|
-
{
|
|
341
|
-
app.metadata.name
|
|
342
|
-
for app in monitoring_functions
|
|
343
|
-
if (
|
|
344
|
-
app.status.state == "ready"
|
|
345
|
-
# workaround for the default app, as its `status.state` is `None`
|
|
346
|
-
or app.metadata.name
|
|
347
|
-
== mm_constants.HistogramDataDriftApplicationConstants.NAME
|
|
348
|
-
)
|
|
349
|
-
}
|
|
339
|
+
{app.metadata.name for app in monitoring_functions}
|
|
350
340
|
)
|
|
341
|
+
# if monitoring_functions: - TODO : ML-7700
|
|
342
|
+
# Gets only application in ready state
|
|
343
|
+
# applications_names = list(
|
|
344
|
+
# {
|
|
345
|
+
# app.metadata.name
|
|
346
|
+
# for app in monitoring_functions
|
|
347
|
+
# if (
|
|
348
|
+
# app.status.state == "ready"
|
|
349
|
+
# # workaround for the default app, as its `status.state` is `None`
|
|
350
|
+
# or app.metadata.name
|
|
351
|
+
# == mm_constants.HistogramDataDriftApplicationConstants.NAME
|
|
352
|
+
# )
|
|
353
|
+
# }
|
|
354
|
+
# )
|
|
351
355
|
if not applications_names:
|
|
352
356
|
logger.info("No monitoring functions found", project=self.project)
|
|
353
357
|
return
|
|
@@ -34,11 +34,11 @@ fields_to_encode_decode = [
|
|
|
34
34
|
]
|
|
35
35
|
|
|
36
36
|
_METRIC_FIELDS: list[str] = [
|
|
37
|
-
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
38
|
-
mm_schemas.MetricData.METRIC_NAME,
|
|
39
|
-
mm_schemas.MetricData.METRIC_VALUE,
|
|
40
|
-
mm_schemas.WriterEvent.START_INFER_TIME,
|
|
41
|
-
mm_schemas.WriterEvent.END_INFER_TIME,
|
|
37
|
+
mm_schemas.WriterEvent.APPLICATION_NAME.value,
|
|
38
|
+
mm_schemas.MetricData.METRIC_NAME.value,
|
|
39
|
+
mm_schemas.MetricData.METRIC_VALUE.value,
|
|
40
|
+
mm_schemas.WriterEvent.START_INFER_TIME.value,
|
|
41
|
+
mm_schemas.WriterEvent.END_INFER_TIME.value,
|
|
42
42
|
]
|
|
43
43
|
|
|
44
44
|
|
|
@@ -17,6 +17,8 @@ from dataclasses import dataclass
|
|
|
17
17
|
from io import StringIO
|
|
18
18
|
from typing import Optional, Union
|
|
19
19
|
|
|
20
|
+
import taosws
|
|
21
|
+
|
|
20
22
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
21
23
|
import mlrun.common.types
|
|
22
24
|
|
|
@@ -28,6 +30,9 @@ class _TDEngineColumnType:
|
|
|
28
30
|
self.data_type = data_type
|
|
29
31
|
self.length = length
|
|
30
32
|
|
|
33
|
+
def values_to_column(self, values):
|
|
34
|
+
raise NotImplementedError()
|
|
35
|
+
|
|
31
36
|
def __str__(self):
|
|
32
37
|
if self.length is not None:
|
|
33
38
|
return f"{self.data_type}({self.length})"
|
|
@@ -44,6 +49,26 @@ class _TDEngineColumn(mlrun.common.types.StrEnum):
|
|
|
44
49
|
BINARY_10000 = _TDEngineColumnType("BINARY", 10000)
|
|
45
50
|
|
|
46
51
|
|
|
52
|
+
def values_to_column(values, column_type):
|
|
53
|
+
if column_type == _TDEngineColumn.TIMESTAMP:
|
|
54
|
+
timestamps = [round(timestamp.timestamp() * 1000) for timestamp in values]
|
|
55
|
+
return taosws.millis_timestamps_to_column(timestamps)
|
|
56
|
+
if column_type == _TDEngineColumn.FLOAT:
|
|
57
|
+
return taosws.floats_to_column(values)
|
|
58
|
+
if column_type == _TDEngineColumn.INT:
|
|
59
|
+
return taosws.ints_to_column(values)
|
|
60
|
+
if column_type == _TDEngineColumn.BINARY_40:
|
|
61
|
+
return taosws.binary_to_column(values)
|
|
62
|
+
if column_type == _TDEngineColumn.BINARY_64:
|
|
63
|
+
return taosws.binary_to_column(values)
|
|
64
|
+
if column_type == _TDEngineColumn.BINARY_10000:
|
|
65
|
+
return taosws.binary_to_column(values)
|
|
66
|
+
|
|
67
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
68
|
+
f"unsupported column type '{column_type}'"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
47
72
|
@dataclass
|
|
48
73
|
class TDEngineSchema:
|
|
49
74
|
"""
|
|
@@ -55,13 +80,14 @@ class TDEngineSchema:
|
|
|
55
80
|
def __init__(
|
|
56
81
|
self,
|
|
57
82
|
super_table: str,
|
|
58
|
-
columns: dict[str,
|
|
83
|
+
columns: dict[str, _TDEngineColumn],
|
|
59
84
|
tags: dict[str, str],
|
|
85
|
+
database: Optional[str] = None,
|
|
60
86
|
):
|
|
61
87
|
self.super_table = super_table
|
|
62
88
|
self.columns = columns
|
|
63
89
|
self.tags = tags
|
|
64
|
-
self.database = _MODEL_MONITORING_DATABASE
|
|
90
|
+
self.database = database or _MODEL_MONITORING_DATABASE
|
|
65
91
|
|
|
66
92
|
def _create_super_table_query(self) -> str:
|
|
67
93
|
columns = ", ".join(f"{col} {val}" for col, val in self.columns.items())
|
|
@@ -83,11 +109,23 @@ class TDEngineSchema:
|
|
|
83
109
|
|
|
84
110
|
def _insert_subtable_query(
|
|
85
111
|
self,
|
|
112
|
+
connection: taosws.Connection,
|
|
86
113
|
subtable: str,
|
|
87
114
|
values: dict[str, Union[str, int, float, datetime.datetime]],
|
|
88
|
-
) ->
|
|
89
|
-
|
|
90
|
-
|
|
115
|
+
) -> taosws.TaosStmt:
|
|
116
|
+
stmt = connection.statement()
|
|
117
|
+
question_marks = ", ".join("?" * len(self.columns))
|
|
118
|
+
stmt.prepare(f"INSERT INTO ? VALUES ({question_marks});")
|
|
119
|
+
stmt.set_tbname_tags(subtable, [])
|
|
120
|
+
|
|
121
|
+
bind_params = []
|
|
122
|
+
|
|
123
|
+
for col_name, col_type in self.columns.items():
|
|
124
|
+
val = values[col_name]
|
|
125
|
+
bind_params.append(values_to_column([val], col_type))
|
|
126
|
+
|
|
127
|
+
stmt.bind_param(bind_params)
|
|
128
|
+
return stmt
|
|
91
129
|
|
|
92
130
|
def _delete_subtable_query(
|
|
93
131
|
self,
|
|
@@ -188,53 +226,53 @@ class TDEngineSchema:
|
|
|
188
226
|
|
|
189
227
|
@dataclass
|
|
190
228
|
class AppResultTable(TDEngineSchema):
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
database = _MODEL_MONITORING_DATABASE
|
|
229
|
+
def __init__(self, database: Optional[str] = None):
|
|
230
|
+
super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
231
|
+
columns = {
|
|
232
|
+
mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
233
|
+
mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
234
|
+
mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
|
|
235
|
+
mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
|
|
236
|
+
}
|
|
237
|
+
tags = {
|
|
238
|
+
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
239
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
240
|
+
mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
|
|
241
|
+
mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
|
|
242
|
+
mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
|
|
243
|
+
}
|
|
244
|
+
super().__init__(super_table, columns, tags, database)
|
|
208
245
|
|
|
209
246
|
|
|
210
247
|
@dataclass
|
|
211
248
|
class Metrics(TDEngineSchema):
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
249
|
+
def __init__(self, database: Optional[str] = None):
|
|
250
|
+
super_table = mm_schemas.TDEngineSuperTables.METRICS
|
|
251
|
+
columns = {
|
|
252
|
+
mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
253
|
+
mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
254
|
+
mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
|
|
255
|
+
}
|
|
256
|
+
tags = {
|
|
257
|
+
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
258
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
259
|
+
mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
|
|
260
|
+
mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
|
|
261
|
+
}
|
|
262
|
+
super().__init__(super_table, columns, tags, database)
|
|
226
263
|
|
|
227
264
|
|
|
228
265
|
@dataclass
|
|
229
266
|
class Predictions(TDEngineSchema):
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
267
|
+
def __init__(self, database: Optional[str] = None):
|
|
268
|
+
super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
269
|
+
columns = {
|
|
270
|
+
mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
|
|
271
|
+
mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
|
|
272
|
+
mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
|
|
273
|
+
}
|
|
274
|
+
tags = {
|
|
275
|
+
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
276
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
277
|
+
}
|
|
278
|
+
super().__init__(super_table, columns, tags, database)
|
|
@@ -58,15 +58,26 @@ class TDEngineConnector(TSDBConnector):
|
|
|
58
58
|
except taosws.QueryError:
|
|
59
59
|
# Database already exists
|
|
60
60
|
pass
|
|
61
|
-
|
|
61
|
+
try:
|
|
62
|
+
conn.execute(f"USE {self.database}")
|
|
63
|
+
except taosws.QueryError as e:
|
|
64
|
+
raise mlrun.errors.MLRunTSDBConnectionFailure(
|
|
65
|
+
f"Failed to use TDEngine database {self.database}, {mlrun.errors.err_to_str(e)}"
|
|
66
|
+
)
|
|
62
67
|
return conn
|
|
63
68
|
|
|
64
69
|
def _init_super_tables(self):
|
|
65
70
|
"""Initialize the super tables for the TSDB."""
|
|
66
71
|
self.tables = {
|
|
67
|
-
mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
|
|
68
|
-
|
|
69
|
-
|
|
72
|
+
mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
|
|
73
|
+
self.database
|
|
74
|
+
),
|
|
75
|
+
mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
|
|
76
|
+
self.database
|
|
77
|
+
),
|
|
78
|
+
mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
|
|
79
|
+
self.database
|
|
80
|
+
),
|
|
70
81
|
}
|
|
71
82
|
|
|
72
83
|
def create_tables(self):
|
|
@@ -97,6 +108,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
97
108
|
table_name = (
|
|
98
109
|
f"{table_name}_" f"{event[mm_schemas.ResultData.RESULT_NAME]}"
|
|
99
110
|
).replace("-", "_")
|
|
111
|
+
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
100
112
|
|
|
101
113
|
else:
|
|
102
114
|
# Write a new metric
|
|
@@ -105,14 +117,30 @@ class TDEngineConnector(TSDBConnector):
|
|
|
105
117
|
f"{table_name}_" f"{event[mm_schemas.MetricData.METRIC_NAME]}"
|
|
106
118
|
).replace("-", "_")
|
|
107
119
|
|
|
120
|
+
# Convert the datetime strings to datetime objects
|
|
121
|
+
event[mm_schemas.WriterEvent.END_INFER_TIME] = self._convert_to_datetime(
|
|
122
|
+
val=event[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
123
|
+
)
|
|
124
|
+
event[mm_schemas.WriterEvent.START_INFER_TIME] = self._convert_to_datetime(
|
|
125
|
+
val=event[mm_schemas.WriterEvent.START_INFER_TIME]
|
|
126
|
+
)
|
|
127
|
+
|
|
108
128
|
create_table_query = table._create_subtable_query(
|
|
109
129
|
subtable=table_name, values=event
|
|
110
130
|
)
|
|
111
131
|
self._connection.execute(create_table_query)
|
|
112
|
-
|
|
113
|
-
|
|
132
|
+
|
|
133
|
+
insert_statement = table._insert_subtable_query(
|
|
134
|
+
self._connection,
|
|
135
|
+
subtable=table_name,
|
|
136
|
+
values=event,
|
|
114
137
|
)
|
|
115
|
-
|
|
138
|
+
insert_statement.add_batch()
|
|
139
|
+
insert_statement.execute()
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
|
|
143
|
+
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
116
144
|
|
|
117
145
|
def apply_monitoring_stream_steps(self, graph):
|
|
118
146
|
"""
|
|
@@ -326,7 +326,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
326
326
|
elif kind == mm_schemas.WriterEventKind.RESULT:
|
|
327
327
|
table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
328
328
|
index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
|
|
329
|
-
|
|
329
|
+
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
330
|
+
# TODO: remove this when extra data is supported (ML-7460)
|
|
331
|
+
event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
|
|
330
332
|
else:
|
|
331
333
|
raise ValueError(f"Invalid {kind = }")
|
|
332
334
|
|
|
@@ -20,10 +20,8 @@ import pandas as pd
|
|
|
20
20
|
|
|
21
21
|
import mlrun
|
|
22
22
|
import mlrun.common.model_monitoring.helpers
|
|
23
|
-
import mlrun.common.schemas
|
|
24
|
-
|
|
25
|
-
EventFieldType,
|
|
26
|
-
)
|
|
23
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
24
|
+
import mlrun.data_types.infer
|
|
27
25
|
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
28
26
|
ModelEndpointMonitoringMetric,
|
|
29
27
|
ModelEndpointMonitoringMetricType,
|
|
@@ -35,7 +33,6 @@ from mlrun.utils import logger
|
|
|
35
33
|
if typing.TYPE_CHECKING:
|
|
36
34
|
from mlrun.db.base import RunDBInterface
|
|
37
35
|
from mlrun.projects import MlrunProject
|
|
38
|
-
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
39
36
|
|
|
40
37
|
|
|
41
38
|
class _BatchDict(typing.TypedDict):
|
|
@@ -45,26 +42,29 @@ class _BatchDict(typing.TypedDict):
|
|
|
45
42
|
|
|
46
43
|
|
|
47
44
|
def get_stream_path(
|
|
48
|
-
project: str,
|
|
45
|
+
project: str,
|
|
46
|
+
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
47
|
+
stream_uri: typing.Optional[str] = None,
|
|
49
48
|
) -> str:
|
|
50
49
|
"""
|
|
51
50
|
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
52
51
|
|
|
53
52
|
:param project: Project name.
|
|
54
|
-
:param function_name:
|
|
53
|
+
:param function_name: Application name. Default is model_monitoring_stream.
|
|
54
|
+
:param stream_uri: Stream URI. If provided, it will be used instead of the one from the project secret.
|
|
55
55
|
|
|
56
56
|
:return: Monitoring stream path to the relevant application.
|
|
57
57
|
"""
|
|
58
58
|
|
|
59
|
-
stream_uri = mlrun.get_secret_or_env(
|
|
60
|
-
|
|
59
|
+
stream_uri = stream_uri or mlrun.get_secret_or_env(
|
|
60
|
+
mm_constants.ProjectSecretKeys.STREAM_PATH
|
|
61
61
|
)
|
|
62
62
|
|
|
63
63
|
if not stream_uri or stream_uri == "v3io":
|
|
64
64
|
# TODO : remove the first part of this condition in 1.9.0
|
|
65
65
|
stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
66
66
|
project=project,
|
|
67
|
-
kind=
|
|
67
|
+
kind=mm_constants.FileTargetKind.STREAM,
|
|
68
68
|
target="online",
|
|
69
69
|
function_name=function_name,
|
|
70
70
|
)
|
|
@@ -78,7 +78,7 @@ def get_stream_path(
|
|
|
78
78
|
|
|
79
79
|
def get_monitoring_parquet_path(
|
|
80
80
|
project: "MlrunProject",
|
|
81
|
-
kind: str =
|
|
81
|
+
kind: str = mm_constants.FileTargetKind.PARQUET,
|
|
82
82
|
) -> str:
|
|
83
83
|
"""Get model monitoring parquet target for the current project and kind. The parquet target path is based on the
|
|
84
84
|
project artifact path. If project artifact path is not defined, the parquet target path will be based on MLRun
|
|
@@ -111,7 +111,7 @@ def get_connection_string(secret_provider: typing.Callable[[str], str] = None) -
|
|
|
111
111
|
"""
|
|
112
112
|
|
|
113
113
|
return mlrun.get_secret_or_env(
|
|
114
|
-
key=
|
|
114
|
+
key=mm_constants.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
|
|
115
115
|
secret_provider=secret_provider,
|
|
116
116
|
)
|
|
117
117
|
|
|
@@ -126,7 +126,7 @@ def get_tsdb_connection_string(
|
|
|
126
126
|
"""
|
|
127
127
|
|
|
128
128
|
return mlrun.get_secret_or_env(
|
|
129
|
-
key=
|
|
129
|
+
key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
|
|
130
130
|
secret_provider=secret_provider,
|
|
131
131
|
)
|
|
132
132
|
|
|
@@ -200,7 +200,7 @@ def update_model_endpoint_last_request(
|
|
|
200
200
|
db.patch_model_endpoint(
|
|
201
201
|
project=project,
|
|
202
202
|
endpoint_id=model_endpoint.metadata.uid,
|
|
203
|
-
attributes={EventFieldType.LAST_REQUEST: current_request},
|
|
203
|
+
attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
|
|
204
204
|
)
|
|
205
205
|
else:
|
|
206
206
|
try:
|
|
@@ -229,7 +229,7 @@ def update_model_endpoint_last_request(
|
|
|
229
229
|
db.patch_model_endpoint(
|
|
230
230
|
project=project,
|
|
231
231
|
endpoint_id=model_endpoint.metadata.uid,
|
|
232
|
-
attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
|
|
232
|
+
attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
|
|
233
233
|
)
|
|
234
234
|
|
|
235
235
|
|
|
@@ -249,8 +249,7 @@ def calculate_inputs_statistics(
|
|
|
249
249
|
|
|
250
250
|
# Use `DFDataInfer` to calculate the statistics over the inputs:
|
|
251
251
|
inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
|
|
252
|
-
df=inputs,
|
|
253
|
-
options=mlrun.data_types.infer.InferOptions.Histogram,
|
|
252
|
+
df=inputs, options=mlrun.data_types.infer.InferOptions.Histogram
|
|
254
253
|
)
|
|
255
254
|
|
|
256
255
|
# Recalculate the histograms over the bins that are set in the sample-set of the end point:
|
|
@@ -202,7 +202,7 @@ class EventStreamProcessor:
|
|
|
202
202
|
def apply_process_endpoint_event():
|
|
203
203
|
graph.add_step(
|
|
204
204
|
"ProcessEndpointEvent",
|
|
205
|
-
after="
|
|
205
|
+
after="extract_endpoint", # TODO: change this to FilterError in ML-7456
|
|
206
206
|
full_event=True,
|
|
207
207
|
project=self.project,
|
|
208
208
|
)
|
|
@@ -527,9 +527,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
527
527
|
# If error key has been found in the current event,
|
|
528
528
|
# increase the error counter by 1 and raise the error description
|
|
529
529
|
error = event.get("error")
|
|
530
|
-
if error:
|
|
530
|
+
if error: # TODO: delete this in ML-7456
|
|
531
531
|
self.error_count[endpoint_id] += 1
|
|
532
|
-
# TODO: write to tsdb / kv once in a while
|
|
533
532
|
raise mlrun.errors.MLRunInvalidArgumentError(str(error))
|
|
534
533
|
|
|
535
534
|
# Validate event fields
|