mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +4 -2
- mlrun/alerts/alert.py +75 -8
- mlrun/artifacts/base.py +1 -0
- mlrun/artifacts/manager.py +9 -2
- mlrun/common/constants.py +4 -1
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
- mlrun/common/formatters/run.py +3 -0
- mlrun/common/helpers.py +0 -1
- mlrun/common/schemas/__init__.py +3 -1
- mlrun/common/schemas/alert.py +15 -12
- mlrun/common/schemas/api_gateway.py +6 -6
- mlrun/common/schemas/auth.py +5 -0
- mlrun/common/schemas/client_spec.py +0 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/frontend_spec.py +7 -0
- mlrun/common/schemas/function.py +7 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -3
- mlrun/common/schemas/model_monitoring/constants.py +41 -26
- mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
- mlrun/common/schemas/notification.py +69 -12
- mlrun/common/schemas/project.py +45 -12
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +1 -0
- mlrun/config.py +91 -35
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +57 -25
- mlrun/datastore/__init__.py +1 -0
- mlrun/datastore/alibaba_oss.py +3 -2
- mlrun/datastore/azure_blob.py +125 -37
- mlrun/datastore/base.py +42 -21
- mlrun/datastore/datastore.py +4 -2
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +85 -29
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -0
- mlrun/datastore/s3.py +25 -12
- mlrun/datastore/sources.py +76 -4
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +102 -131
- mlrun/datastore/v3io.py +1 -0
- mlrun/db/base.py +15 -6
- mlrun/db/httpdb.py +57 -28
- mlrun/db/nopdb.py +29 -5
- mlrun/errors.py +20 -3
- mlrun/execution.py +46 -5
- mlrun/feature_store/api.py +25 -1
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_vector.py +3 -1
- mlrun/feature_store/retrieval/job.py +4 -1
- mlrun/feature_store/retrieval/spark_merger.py +10 -39
- mlrun/feature_store/steps.py +8 -0
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -3
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/k8s_utils.py +48 -2
- mlrun/launcher/client.py +6 -6
- mlrun/launcher/local.py +2 -2
- mlrun/model.py +215 -34
- mlrun/model_monitoring/api.py +38 -24
- mlrun/model_monitoring/applications/__init__.py +1 -2
- mlrun/model_monitoring/applications/_application_steps.py +60 -29
- mlrun/model_monitoring/applications/base.py +2 -174
- mlrun/model_monitoring/applications/context.py +197 -70
- mlrun/model_monitoring/applications/evidently_base.py +11 -85
- mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
- mlrun/model_monitoring/applications/results.py +4 -4
- mlrun/model_monitoring/controller.py +110 -282
- mlrun/model_monitoring/db/stores/__init__.py +8 -3
- mlrun/model_monitoring/db/stores/base/store.py +3 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
- mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
- mlrun/model_monitoring/db/tsdb/base.py +147 -15
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
- mlrun/model_monitoring/helpers.py +70 -50
- mlrun/model_monitoring/stream_processing.py +96 -195
- mlrun/model_monitoring/writer.py +13 -5
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/projects/operations.py +16 -8
- mlrun/projects/pipelines.py +126 -115
- mlrun/projects/project.py +286 -129
- mlrun/render.py +3 -3
- mlrun/run.py +38 -19
- mlrun/runtimes/__init__.py +19 -8
- mlrun/runtimes/base.py +4 -1
- mlrun/runtimes/daskjob.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -1
- mlrun/runtimes/kubejob.py +6 -6
- mlrun/runtimes/local.py +12 -5
- mlrun/runtimes/nuclio/api_gateway.py +68 -8
- mlrun/runtimes/nuclio/application/application.py +307 -70
- mlrun/runtimes/nuclio/function.py +63 -14
- mlrun/runtimes/nuclio/serving.py +10 -10
- mlrun/runtimes/pod.py +25 -19
- mlrun/runtimes/remotesparkjob.py +2 -5
- mlrun/runtimes/sparkjob/spark3job.py +16 -17
- mlrun/runtimes/utils.py +34 -0
- mlrun/serving/routers.py +2 -5
- mlrun/serving/server.py +37 -19
- mlrun/serving/states.py +30 -3
- mlrun/serving/v2_serving.py +44 -35
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +150 -36
- mlrun/utils/http.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +0 -1
- mlrun/utils/notifications/notification/webhook.py +8 -1
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/v3io_clients.py +2 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/evidently_application.py +0 -20
- mlrun/model_monitoring/prometheus.py +0 -216
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,8 @@ from dataclasses import dataclass
|
|
|
17
17
|
from io import StringIO
|
|
18
18
|
from typing import Optional, Union
|
|
19
19
|
|
|
20
|
+
import taosws
|
|
21
|
+
|
|
20
22
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
21
23
|
import mlrun.common.types
|
|
22
24
|
|
|
@@ -28,6 +30,9 @@ class _TDEngineColumnType:
|
|
|
28
30
|
self.data_type = data_type
|
|
29
31
|
self.length = length
|
|
30
32
|
|
|
33
|
+
def values_to_column(self, values):
|
|
34
|
+
raise NotImplementedError()
|
|
35
|
+
|
|
31
36
|
def __str__(self):
|
|
32
37
|
if self.length is not None:
|
|
33
38
|
return f"{self.data_type}({self.length})"
|
|
@@ -44,6 +49,26 @@ class _TDEngineColumn(mlrun.common.types.StrEnum):
|
|
|
44
49
|
BINARY_10000 = _TDEngineColumnType("BINARY", 10000)
|
|
45
50
|
|
|
46
51
|
|
|
52
|
+
def values_to_column(values, column_type):
|
|
53
|
+
if column_type == _TDEngineColumn.TIMESTAMP:
|
|
54
|
+
timestamps = [round(timestamp.timestamp() * 1000) for timestamp in values]
|
|
55
|
+
return taosws.millis_timestamps_to_column(timestamps)
|
|
56
|
+
if column_type == _TDEngineColumn.FLOAT:
|
|
57
|
+
return taosws.floats_to_column(values)
|
|
58
|
+
if column_type == _TDEngineColumn.INT:
|
|
59
|
+
return taosws.ints_to_column(values)
|
|
60
|
+
if column_type == _TDEngineColumn.BINARY_40:
|
|
61
|
+
return taosws.binary_to_column(values)
|
|
62
|
+
if column_type == _TDEngineColumn.BINARY_64:
|
|
63
|
+
return taosws.binary_to_column(values)
|
|
64
|
+
if column_type == _TDEngineColumn.BINARY_10000:
|
|
65
|
+
return taosws.binary_to_column(values)
|
|
66
|
+
|
|
67
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
68
|
+
f"unsupported column type '{column_type}'"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
47
72
|
@dataclass
|
|
48
73
|
class TDEngineSchema:
|
|
49
74
|
"""
|
|
@@ -55,39 +80,53 @@ class TDEngineSchema:
|
|
|
55
80
|
def __init__(
|
|
56
81
|
self,
|
|
57
82
|
super_table: str,
|
|
58
|
-
columns: dict[str,
|
|
83
|
+
columns: dict[str, _TDEngineColumn],
|
|
59
84
|
tags: dict[str, str],
|
|
85
|
+
database: Optional[str] = None,
|
|
60
86
|
):
|
|
61
87
|
self.super_table = super_table
|
|
62
88
|
self.columns = columns
|
|
63
89
|
self.tags = tags
|
|
64
|
-
self.database = _MODEL_MONITORING_DATABASE
|
|
90
|
+
self.database = database or _MODEL_MONITORING_DATABASE
|
|
65
91
|
|
|
66
92
|
def _create_super_table_query(self) -> str:
|
|
67
93
|
columns = ", ".join(f"{col} {val}" for col, val in self.columns.items())
|
|
68
94
|
tags = ", ".join(f"{col} {val}" for col, val in self.tags.items())
|
|
69
95
|
return f"CREATE STABLE if NOT EXISTS {self.database}.{self.super_table} ({columns}) TAGS ({tags});"
|
|
70
96
|
|
|
71
|
-
def
|
|
97
|
+
def _create_subtable_sql(
|
|
72
98
|
self,
|
|
73
99
|
subtable: str,
|
|
74
100
|
values: dict[str, Union[str, int, float, datetime.datetime]],
|
|
75
101
|
) -> str:
|
|
76
102
|
try:
|
|
77
|
-
|
|
103
|
+
tags = ", ".join(f"'{values[val]}'" for val in self.tags)
|
|
78
104
|
except KeyError:
|
|
79
105
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
80
106
|
f"values must contain all tags: {self.tags.keys()}"
|
|
81
107
|
)
|
|
82
|
-
return f"CREATE TABLE if NOT EXISTS {self.database}.{subtable} USING {self.super_table} TAGS ({
|
|
108
|
+
return f"CREATE TABLE if NOT EXISTS {self.database}.{subtable} USING {self.super_table} TAGS ({tags});"
|
|
83
109
|
|
|
84
|
-
|
|
85
|
-
|
|
110
|
+
@staticmethod
|
|
111
|
+
def _insert_subtable_stmt(
|
|
112
|
+
statement: taosws.TaosStmt,
|
|
113
|
+
columns: dict[str, _TDEngineColumn],
|
|
86
114
|
subtable: str,
|
|
87
115
|
values: dict[str, Union[str, int, float, datetime.datetime]],
|
|
88
|
-
) ->
|
|
89
|
-
|
|
90
|
-
|
|
116
|
+
) -> taosws.TaosStmt:
|
|
117
|
+
question_marks = ", ".join("?" * len(columns))
|
|
118
|
+
statement.prepare(f"INSERT INTO ? VALUES ({question_marks});")
|
|
119
|
+
statement.set_tbname(subtable)
|
|
120
|
+
|
|
121
|
+
bind_params = []
|
|
122
|
+
|
|
123
|
+
for col_name, col_type in columns.items():
|
|
124
|
+
val = values[col_name]
|
|
125
|
+
bind_params.append(values_to_column([val], col_type))
|
|
126
|
+
|
|
127
|
+
statement.bind_param(bind_params)
|
|
128
|
+
statement.add_batch()
|
|
129
|
+
return statement
|
|
91
130
|
|
|
92
131
|
def _delete_subtable_query(
|
|
93
132
|
self,
|
|
@@ -125,8 +164,8 @@ class TDEngineSchema:
|
|
|
125
164
|
@staticmethod
|
|
126
165
|
def _get_records_query(
|
|
127
166
|
table: str,
|
|
128
|
-
start: datetime,
|
|
129
|
-
end: datetime,
|
|
167
|
+
start: datetime.datetime,
|
|
168
|
+
end: datetime.datetime,
|
|
130
169
|
columns_to_filter: list[str] = None,
|
|
131
170
|
filter_query: Optional[str] = None,
|
|
132
171
|
interval: Optional[str] = None,
|
|
@@ -173,7 +212,7 @@ class TDEngineSchema:
|
|
|
173
212
|
if filter_query:
|
|
174
213
|
query.write(f"{filter_query} AND ")
|
|
175
214
|
if start:
|
|
176
|
-
query.write(f"{timestamp_column} >= '{start}'
|
|
215
|
+
query.write(f"{timestamp_column} >= '{start}' AND ")
|
|
177
216
|
if end:
|
|
178
217
|
query.write(f"{timestamp_column} <= '{end}'")
|
|
179
218
|
if interval:
|
|
@@ -188,53 +227,53 @@ class TDEngineSchema:
|
|
|
188
227
|
|
|
189
228
|
@dataclass
|
|
190
229
|
class AppResultTable(TDEngineSchema):
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
database = _MODEL_MONITORING_DATABASE
|
|
230
|
+
def __init__(self, database: Optional[str] = None):
|
|
231
|
+
super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
232
|
+
columns = {
|
|
233
|
+
mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
234
|
+
mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
235
|
+
mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
|
|
236
|
+
mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
|
|
237
|
+
}
|
|
238
|
+
tags = {
|
|
239
|
+
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
240
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
241
|
+
mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
|
|
242
|
+
mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
|
|
243
|
+
mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
|
|
244
|
+
}
|
|
245
|
+
super().__init__(super_table, columns, tags, database)
|
|
208
246
|
|
|
209
247
|
|
|
210
248
|
@dataclass
|
|
211
249
|
class Metrics(TDEngineSchema):
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
250
|
+
def __init__(self, database: Optional[str] = None):
|
|
251
|
+
super_table = mm_schemas.TDEngineSuperTables.METRICS
|
|
252
|
+
columns = {
|
|
253
|
+
mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
254
|
+
mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
255
|
+
mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
|
|
256
|
+
}
|
|
257
|
+
tags = {
|
|
258
|
+
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
259
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
260
|
+
mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
|
|
261
|
+
mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
|
|
262
|
+
}
|
|
263
|
+
super().__init__(super_table, columns, tags, database)
|
|
226
264
|
|
|
227
265
|
|
|
228
266
|
@dataclass
|
|
229
267
|
class Predictions(TDEngineSchema):
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
268
|
+
def __init__(self, database: Optional[str] = None):
|
|
269
|
+
super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
270
|
+
columns = {
|
|
271
|
+
mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
|
|
272
|
+
mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
|
|
273
|
+
mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
|
|
274
|
+
}
|
|
275
|
+
tags = {
|
|
276
|
+
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
277
|
+
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
278
|
+
}
|
|
279
|
+
super().__init__(super_table, columns, tags, database)
|
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import json
|
|
17
16
|
|
|
@@ -21,8 +20,6 @@ from mlrun.common.schemas.model_monitoring import (
|
|
|
21
20
|
EventKeyMetrics,
|
|
22
21
|
)
|
|
23
22
|
|
|
24
|
-
_TABLE_COLUMN = "table_column"
|
|
25
|
-
|
|
26
23
|
|
|
27
24
|
class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
|
|
28
25
|
def __init__(self, **kwargs):
|
|
@@ -14,14 +14,20 @@
|
|
|
14
14
|
|
|
15
15
|
import typing
|
|
16
16
|
from datetime import datetime
|
|
17
|
+
from typing import Union
|
|
17
18
|
|
|
18
19
|
import pandas as pd
|
|
19
20
|
import taosws
|
|
21
|
+
from taoswswrap.tdengine_connection import (
|
|
22
|
+
Statement,
|
|
23
|
+
TDEngineConnection,
|
|
24
|
+
)
|
|
20
25
|
|
|
21
26
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
22
27
|
import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
|
|
23
28
|
import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
|
|
24
29
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
30
|
+
from mlrun.model_monitoring.db.tsdb.tdengine.schemas import TDEngineSchema
|
|
25
31
|
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
26
32
|
from mlrun.utils import logger
|
|
27
33
|
|
|
@@ -46,39 +52,50 @@ class TDEngineConnector(TSDBConnector):
|
|
|
46
52
|
)
|
|
47
53
|
self._tdengine_connection_string = kwargs.get("connection_string")
|
|
48
54
|
self.database = database
|
|
49
|
-
|
|
55
|
+
|
|
56
|
+
self._connection = None
|
|
50
57
|
self._init_super_tables()
|
|
51
58
|
|
|
52
|
-
|
|
59
|
+
@property
|
|
60
|
+
def connection(self) -> TDEngineConnection:
|
|
61
|
+
if not self._connection:
|
|
62
|
+
self._connection = self._create_connection()
|
|
63
|
+
return self._connection
|
|
64
|
+
|
|
65
|
+
def _create_connection(self) -> TDEngineConnection:
|
|
53
66
|
"""Establish a connection to the TSDB server."""
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
pass
|
|
60
|
-
conn.execute(f"USE {self.database}")
|
|
67
|
+
logger.debug("Creating a new connection to TDEngine", project=self.project)
|
|
68
|
+
conn = TDEngineConnection(self._tdengine_connection_string)
|
|
69
|
+
conn.run(statements=f"CREATE DATABASE IF NOT EXISTS {self.database}")
|
|
70
|
+
conn.prefix_statements = [f"USE {self.database}"]
|
|
71
|
+
logger.debug("Connected to TDEngine", project=self.project)
|
|
61
72
|
return conn
|
|
62
73
|
|
|
63
74
|
def _init_super_tables(self):
|
|
64
75
|
"""Initialize the super tables for the TSDB."""
|
|
65
76
|
self.tables = {
|
|
66
|
-
mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
|
|
67
|
-
|
|
68
|
-
|
|
77
|
+
mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
|
|
78
|
+
self.database
|
|
79
|
+
),
|
|
80
|
+
mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
|
|
81
|
+
self.database
|
|
82
|
+
),
|
|
83
|
+
mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
|
|
84
|
+
self.database
|
|
85
|
+
),
|
|
69
86
|
}
|
|
70
87
|
|
|
71
88
|
def create_tables(self):
|
|
72
89
|
"""Create TDEngine supertables."""
|
|
73
90
|
for table in self.tables:
|
|
74
91
|
create_table_query = self.tables[table]._create_super_table_query()
|
|
75
|
-
self.
|
|
92
|
+
self.connection.run(statements=create_table_query)
|
|
76
93
|
|
|
77
94
|
def write_application_event(
|
|
78
95
|
self,
|
|
79
96
|
event: dict,
|
|
80
97
|
kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
|
|
81
|
-
):
|
|
98
|
+
) -> None:
|
|
82
99
|
"""
|
|
83
100
|
Write a single result or metric to TSDB.
|
|
84
101
|
"""
|
|
@@ -94,24 +111,46 @@ class TDEngineConnector(TSDBConnector):
|
|
|
94
111
|
# Write a new result
|
|
95
112
|
table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS]
|
|
96
113
|
table_name = (
|
|
97
|
-
f"{table_name}_
|
|
114
|
+
f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
|
|
98
115
|
).replace("-", "_")
|
|
116
|
+
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
99
117
|
|
|
100
118
|
else:
|
|
101
119
|
# Write a new metric
|
|
102
120
|
table = self.tables[mm_schemas.TDEngineSuperTables.METRICS]
|
|
103
121
|
table_name = (
|
|
104
|
-
f"{table_name}_
|
|
122
|
+
f"{table_name}_{event[mm_schemas.MetricData.METRIC_NAME]}"
|
|
105
123
|
).replace("-", "_")
|
|
106
124
|
|
|
107
|
-
|
|
108
|
-
|
|
125
|
+
# Escape the table name for case-sensitivity (ML-7908)
|
|
126
|
+
# https://github.com/taosdata/taos-connector-python/issues/260
|
|
127
|
+
table_name = f"`{table_name}`"
|
|
128
|
+
|
|
129
|
+
# Convert the datetime strings to datetime objects
|
|
130
|
+
event[mm_schemas.WriterEvent.END_INFER_TIME] = self._convert_to_datetime(
|
|
131
|
+
val=event[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
132
|
+
)
|
|
133
|
+
event[mm_schemas.WriterEvent.START_INFER_TIME] = self._convert_to_datetime(
|
|
134
|
+
val=event[mm_schemas.WriterEvent.START_INFER_TIME]
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
create_table_sql = table._create_subtable_sql(subtable=table_name, values=event)
|
|
138
|
+
|
|
139
|
+
insert_statement = Statement(
|
|
140
|
+
TDEngineSchema._insert_subtable_stmt,
|
|
141
|
+
dict(columns=table.columns, subtable=table_name, values=event),
|
|
109
142
|
)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
143
|
+
|
|
144
|
+
self.connection.run(
|
|
145
|
+
statements=[
|
|
146
|
+
create_table_sql,
|
|
147
|
+
insert_statement,
|
|
148
|
+
]
|
|
113
149
|
)
|
|
114
|
-
|
|
150
|
+
|
|
151
|
+
@staticmethod
|
|
152
|
+
def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
|
|
153
|
+
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
115
154
|
|
|
116
155
|
def apply_monitoring_stream_steps(self, graph):
|
|
117
156
|
"""
|
|
@@ -147,7 +186,8 @@ class TDEngineConnector(TSDBConnector):
|
|
|
147
186
|
mm_schemas.EventFieldType.PROJECT,
|
|
148
187
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
149
188
|
],
|
|
150
|
-
max_events=
|
|
189
|
+
max_events=1000,
|
|
190
|
+
flush_after_seconds=30,
|
|
151
191
|
)
|
|
152
192
|
|
|
153
193
|
apply_process_before_tsdb()
|
|
@@ -156,22 +196,31 @@ class TDEngineConnector(TSDBConnector):
|
|
|
156
196
|
after="ProcessBeforeTDEngine",
|
|
157
197
|
)
|
|
158
198
|
|
|
199
|
+
def handle_model_error(self, graph, **kwargs) -> None:
|
|
200
|
+
pass
|
|
201
|
+
|
|
159
202
|
def delete_tsdb_resources(self):
|
|
160
203
|
"""
|
|
161
204
|
Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
|
|
162
205
|
"""
|
|
206
|
+
logger.debug(
|
|
207
|
+
"Deleting all project resources using the TDEngine connector",
|
|
208
|
+
project=self.project,
|
|
209
|
+
)
|
|
163
210
|
for table in self.tables:
|
|
164
211
|
get_subtable_names_query = self.tables[table]._get_subtables_query(
|
|
165
212
|
values={mm_schemas.EventFieldType.PROJECT: self.project}
|
|
166
213
|
)
|
|
167
|
-
subtables = self.
|
|
214
|
+
subtables = self.connection.run(query=get_subtable_names_query).data
|
|
215
|
+
drop_statements = []
|
|
168
216
|
for subtable in subtables:
|
|
169
|
-
|
|
170
|
-
subtable=subtable[0]
|
|
217
|
+
drop_statements.append(
|
|
218
|
+
self.tables[table]._drop_subtable_query(subtable=subtable[0])
|
|
171
219
|
)
|
|
172
|
-
|
|
173
|
-
logger.
|
|
174
|
-
|
|
220
|
+
self.connection.run(statements=drop_statements)
|
|
221
|
+
logger.debug(
|
|
222
|
+
"Deleted all project resources using the TDEngine connector",
|
|
223
|
+
project=self.project,
|
|
175
224
|
)
|
|
176
225
|
|
|
177
226
|
def get_model_endpoint_real_time_metrics(
|
|
@@ -222,7 +271,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
222
271
|
|
|
223
272
|
project_condition = f"project = '{self.project}'"
|
|
224
273
|
filter_query = (
|
|
225
|
-
f"{filter_query} AND {project_condition}"
|
|
274
|
+
f"({filter_query}) AND ({project_condition})"
|
|
226
275
|
if filter_query
|
|
227
276
|
else project_condition
|
|
228
277
|
)
|
|
@@ -240,17 +289,16 @@ class TDEngineConnector(TSDBConnector):
|
|
|
240
289
|
timestamp_column=timestamp_column,
|
|
241
290
|
database=self.database,
|
|
242
291
|
)
|
|
292
|
+
logger.debug("Querying TDEngine", query=full_query)
|
|
243
293
|
try:
|
|
244
|
-
query_result = self.
|
|
294
|
+
query_result = self.connection.run(query=full_query)
|
|
245
295
|
except taosws.QueryError as e:
|
|
246
296
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
247
297
|
f"Failed to query table {table} in database {self.database}, {str(e)}"
|
|
248
298
|
)
|
|
249
|
-
columns = []
|
|
250
|
-
for column in query_result.fields:
|
|
251
|
-
columns.append(column.name())
|
|
252
299
|
|
|
253
|
-
|
|
300
|
+
df_columns = [field.name for field in query_result.fields]
|
|
301
|
+
return pd.DataFrame(query_result.data, columns=df_columns)
|
|
254
302
|
|
|
255
303
|
def read_metrics_data(
|
|
256
304
|
self,
|
|
@@ -274,13 +322,22 @@ class TDEngineConnector(TSDBConnector):
|
|
|
274
322
|
],
|
|
275
323
|
],
|
|
276
324
|
]:
|
|
325
|
+
timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
|
|
326
|
+
columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
277
327
|
if type == "metrics":
|
|
278
328
|
table = mm_schemas.TDEngineSuperTables.METRICS
|
|
279
329
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
330
|
+
columns += [name, mm_schemas.MetricData.METRIC_VALUE]
|
|
280
331
|
df_handler = self.df_to_metrics_values
|
|
281
332
|
elif type == "results":
|
|
282
333
|
table = mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
283
334
|
name = mm_schemas.ResultData.RESULT_NAME
|
|
335
|
+
columns += [
|
|
336
|
+
name,
|
|
337
|
+
mm_schemas.ResultData.RESULT_VALUE,
|
|
338
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
339
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
340
|
+
]
|
|
284
341
|
df_handler = self.df_to_results_values
|
|
285
342
|
else:
|
|
286
343
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -289,18 +346,19 @@ class TDEngineConnector(TSDBConnector):
|
|
|
289
346
|
|
|
290
347
|
metrics_condition = " OR ".join(
|
|
291
348
|
[
|
|
292
|
-
f"({mm_schemas.WriterEvent.APPLICATION_NAME}
|
|
349
|
+
f"({mm_schemas.WriterEvent.APPLICATION_NAME}='{metric.app}' AND {name}='{metric.name}')"
|
|
293
350
|
for metric in metrics
|
|
294
351
|
]
|
|
295
352
|
)
|
|
296
|
-
filter_query = f"endpoint_id='{endpoint_id}' AND ({metrics_condition})"
|
|
353
|
+
filter_query = f"(endpoint_id='{endpoint_id}') AND ({metrics_condition})"
|
|
297
354
|
|
|
298
355
|
df = self._get_records(
|
|
299
356
|
table=table,
|
|
300
357
|
start=start,
|
|
301
358
|
end=end,
|
|
302
359
|
filter_query=filter_query,
|
|
303
|
-
timestamp_column=
|
|
360
|
+
timestamp_column=timestamp_column,
|
|
361
|
+
columns=columns,
|
|
304
362
|
)
|
|
305
363
|
|
|
306
364
|
df[mm_schemas.WriterEvent.END_INFER_TIME] = pd.to_datetime(
|
|
@@ -377,6 +435,54 @@ class TDEngineConnector(TSDBConnector):
|
|
|
377
435
|
), # pyright: ignore[reportArgumentType]
|
|
378
436
|
)
|
|
379
437
|
|
|
438
|
+
def get_last_request(
|
|
439
|
+
self,
|
|
440
|
+
endpoint_ids: Union[str, list[str]],
|
|
441
|
+
start: Union[datetime, str] = "0",
|
|
442
|
+
end: Union[datetime, str] = "now",
|
|
443
|
+
) -> pd.DataFrame:
|
|
444
|
+
pass
|
|
445
|
+
|
|
446
|
+
def get_drift_status(
|
|
447
|
+
self,
|
|
448
|
+
endpoint_ids: Union[str, list[str]],
|
|
449
|
+
start: Union[datetime, str] = "now-24h",
|
|
450
|
+
end: Union[datetime, str] = "now",
|
|
451
|
+
) -> pd.DataFrame:
|
|
452
|
+
pass
|
|
453
|
+
|
|
454
|
+
def get_metrics_metadata(
|
|
455
|
+
self,
|
|
456
|
+
endpoint_id: str,
|
|
457
|
+
start: Union[datetime, str] = "0",
|
|
458
|
+
end: Union[datetime, str] = "now",
|
|
459
|
+
) -> pd.DataFrame:
|
|
460
|
+
pass
|
|
461
|
+
|
|
462
|
+
def get_results_metadata(
|
|
463
|
+
self,
|
|
464
|
+
endpoint_id: str,
|
|
465
|
+
start: Union[datetime, str] = "0",
|
|
466
|
+
end: Union[datetime, str] = "now",
|
|
467
|
+
) -> pd.DataFrame:
|
|
468
|
+
pass
|
|
469
|
+
|
|
470
|
+
def get_error_count(
|
|
471
|
+
self,
|
|
472
|
+
endpoint_ids: Union[str, list[str]],
|
|
473
|
+
start: Union[datetime, str] = "0",
|
|
474
|
+
end: Union[datetime, str] = "now",
|
|
475
|
+
) -> pd.DataFrame:
|
|
476
|
+
pass
|
|
477
|
+
|
|
478
|
+
def get_avg_latency(
|
|
479
|
+
self,
|
|
480
|
+
endpoint_ids: Union[str, list[str]],
|
|
481
|
+
start: Union[datetime, str] = "0",
|
|
482
|
+
end: Union[datetime, str] = "now",
|
|
483
|
+
) -> pd.DataFrame:
|
|
484
|
+
pass
|
|
485
|
+
|
|
380
486
|
# Note: this function serves as a reference for checking the TSDB for the existence of a metric.
|
|
381
487
|
#
|
|
382
488
|
# def read_prediction_metric_for_endpoint_if_exists(
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from typing import Any
|
|
15
16
|
|
|
16
17
|
import mlrun.feature_store.steps
|
|
17
18
|
from mlrun.common.schemas.model_monitoring import (
|
|
@@ -19,6 +20,25 @@ from mlrun.common.schemas.model_monitoring import (
|
|
|
19
20
|
EventKeyMetrics,
|
|
20
21
|
EventLiveStats,
|
|
21
22
|
)
|
|
23
|
+
from mlrun.utils import logger
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
|
|
27
|
+
"""
|
|
28
|
+
Normalize user defined keys - input data to a model and its predictions,
|
|
29
|
+
to a form V3IO frames tolerates.
|
|
30
|
+
|
|
31
|
+
The dictionary keys should conform to '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'.
|
|
32
|
+
"""
|
|
33
|
+
prefix = "_"
|
|
34
|
+
|
|
35
|
+
def norm_key(key: str) -> str:
|
|
36
|
+
key = key.replace("-", "_") # hyphens `-` are not allowed
|
|
37
|
+
if key and key[0].isdigit(): # starting with a digit is not allowed
|
|
38
|
+
return prefix + key
|
|
39
|
+
return key
|
|
40
|
+
|
|
41
|
+
return {norm_key(k): v for k, v in event.items()}
|
|
22
42
|
|
|
23
43
|
|
|
24
44
|
class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
|
|
@@ -68,8 +88,8 @@ class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
|
|
|
68
88
|
# endpoint_features includes the event values of each feature and prediction
|
|
69
89
|
endpoint_features = {
|
|
70
90
|
EventFieldType.RECORD_TYPE: EventKeyMetrics.ENDPOINT_FEATURES,
|
|
71
|
-
**event[EventFieldType.NAMED_PREDICTIONS],
|
|
72
|
-
**event[EventFieldType.NAMED_FEATURES],
|
|
91
|
+
**_normalize_dict_for_v3io_frames(event[EventFieldType.NAMED_PREDICTIONS]),
|
|
92
|
+
**_normalize_dict_for_v3io_frames(event[EventFieldType.NAMED_FEATURES]),
|
|
73
93
|
**base_event,
|
|
74
94
|
}
|
|
75
95
|
# Create a dictionary that includes both base_metrics and endpoint_features
|
|
@@ -115,3 +135,24 @@ class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
|
|
|
115
135
|
else:
|
|
116
136
|
unpacked[key] = new_event[key]
|
|
117
137
|
return unpacked if unpacked else None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class ErrorExtractor(mlrun.feature_store.steps.MapClass):
|
|
141
|
+
def __init__(self, **kwargs):
|
|
142
|
+
"""
|
|
143
|
+
Prepare the event for insertion into the errors TSDB table.
|
|
144
|
+
"""
|
|
145
|
+
super().__init__(**kwargs)
|
|
146
|
+
|
|
147
|
+
def do(self, event):
|
|
148
|
+
error = event.get("error")
|
|
149
|
+
timestamp = datetime.fromisoformat(event.get("when"))
|
|
150
|
+
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
151
|
+
event = {
|
|
152
|
+
EventFieldType.MODEL_ERROR: str(error),
|
|
153
|
+
EventFieldType.ENDPOINT_ID: endpoint_id,
|
|
154
|
+
EventFieldType.TIMESTAMP: timestamp,
|
|
155
|
+
EventFieldType.ERROR_COUNT: 1.0,
|
|
156
|
+
}
|
|
157
|
+
logger.info("Write error to errors TSDB table", event=event)
|
|
158
|
+
return event
|