mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show
  1. mlrun/__main__.py +4 -2
  2. mlrun/alerts/alert.py +75 -8
  3. mlrun/artifacts/base.py +1 -0
  4. mlrun/artifacts/manager.py +9 -2
  5. mlrun/common/constants.py +4 -1
  6. mlrun/common/db/sql_session.py +3 -2
  7. mlrun/common/formatters/__init__.py +1 -0
  8. mlrun/common/formatters/artifact.py +1 -0
  9. mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
  10. mlrun/common/formatters/run.py +3 -0
  11. mlrun/common/helpers.py +0 -1
  12. mlrun/common/schemas/__init__.py +3 -1
  13. mlrun/common/schemas/alert.py +15 -12
  14. mlrun/common/schemas/api_gateway.py +6 -6
  15. mlrun/common/schemas/auth.py +5 -0
  16. mlrun/common/schemas/client_spec.py +0 -1
  17. mlrun/common/schemas/common.py +7 -4
  18. mlrun/common/schemas/frontend_spec.py +7 -0
  19. mlrun/common/schemas/function.py +7 -0
  20. mlrun/common/schemas/model_monitoring/__init__.py +4 -3
  21. mlrun/common/schemas/model_monitoring/constants.py +41 -26
  22. mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
  23. mlrun/common/schemas/notification.py +69 -12
  24. mlrun/common/schemas/project.py +45 -12
  25. mlrun/common/schemas/workflow.py +10 -2
  26. mlrun/common/types.py +1 -0
  27. mlrun/config.py +91 -35
  28. mlrun/data_types/data_types.py +6 -1
  29. mlrun/data_types/spark.py +2 -2
  30. mlrun/data_types/to_pandas.py +57 -25
  31. mlrun/datastore/__init__.py +1 -0
  32. mlrun/datastore/alibaba_oss.py +3 -2
  33. mlrun/datastore/azure_blob.py +125 -37
  34. mlrun/datastore/base.py +42 -21
  35. mlrun/datastore/datastore.py +4 -2
  36. mlrun/datastore/datastore_profile.py +1 -1
  37. mlrun/datastore/dbfs_store.py +3 -7
  38. mlrun/datastore/filestore.py +1 -3
  39. mlrun/datastore/google_cloud_storage.py +85 -29
  40. mlrun/datastore/inmem.py +4 -1
  41. mlrun/datastore/redis.py +1 -0
  42. mlrun/datastore/s3.py +25 -12
  43. mlrun/datastore/sources.py +76 -4
  44. mlrun/datastore/spark_utils.py +30 -0
  45. mlrun/datastore/storeytargets.py +151 -0
  46. mlrun/datastore/targets.py +102 -131
  47. mlrun/datastore/v3io.py +1 -0
  48. mlrun/db/base.py +15 -6
  49. mlrun/db/httpdb.py +57 -28
  50. mlrun/db/nopdb.py +29 -5
  51. mlrun/errors.py +20 -3
  52. mlrun/execution.py +46 -5
  53. mlrun/feature_store/api.py +25 -1
  54. mlrun/feature_store/common.py +6 -11
  55. mlrun/feature_store/feature_vector.py +3 -1
  56. mlrun/feature_store/retrieval/job.py +4 -1
  57. mlrun/feature_store/retrieval/spark_merger.py +10 -39
  58. mlrun/feature_store/steps.py +8 -0
  59. mlrun/frameworks/_common/plan.py +3 -3
  60. mlrun/frameworks/_ml_common/plan.py +1 -1
  61. mlrun/frameworks/parallel_coordinates.py +2 -3
  62. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  63. mlrun/k8s_utils.py +48 -2
  64. mlrun/launcher/client.py +6 -6
  65. mlrun/launcher/local.py +2 -2
  66. mlrun/model.py +215 -34
  67. mlrun/model_monitoring/api.py +38 -24
  68. mlrun/model_monitoring/applications/__init__.py +1 -2
  69. mlrun/model_monitoring/applications/_application_steps.py +60 -29
  70. mlrun/model_monitoring/applications/base.py +2 -174
  71. mlrun/model_monitoring/applications/context.py +197 -70
  72. mlrun/model_monitoring/applications/evidently_base.py +11 -85
  73. mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
  74. mlrun/model_monitoring/applications/results.py +4 -4
  75. mlrun/model_monitoring/controller.py +110 -282
  76. mlrun/model_monitoring/db/stores/__init__.py +8 -3
  77. mlrun/model_monitoring/db/stores/base/store.py +3 -0
  78. mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
  79. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
  80. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
  81. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
  82. mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
  83. mlrun/model_monitoring/db/tsdb/base.py +147 -15
  84. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
  85. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
  86. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
  87. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
  88. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
  89. mlrun/model_monitoring/helpers.py +70 -50
  90. mlrun/model_monitoring/stream_processing.py +96 -195
  91. mlrun/model_monitoring/writer.py +13 -5
  92. mlrun/package/packagers/default_packager.py +2 -2
  93. mlrun/projects/operations.py +16 -8
  94. mlrun/projects/pipelines.py +126 -115
  95. mlrun/projects/project.py +286 -129
  96. mlrun/render.py +3 -3
  97. mlrun/run.py +38 -19
  98. mlrun/runtimes/__init__.py +19 -8
  99. mlrun/runtimes/base.py +4 -1
  100. mlrun/runtimes/daskjob.py +1 -1
  101. mlrun/runtimes/funcdoc.py +1 -1
  102. mlrun/runtimes/kubejob.py +6 -6
  103. mlrun/runtimes/local.py +12 -5
  104. mlrun/runtimes/nuclio/api_gateway.py +68 -8
  105. mlrun/runtimes/nuclio/application/application.py +307 -70
  106. mlrun/runtimes/nuclio/function.py +63 -14
  107. mlrun/runtimes/nuclio/serving.py +10 -10
  108. mlrun/runtimes/pod.py +25 -19
  109. mlrun/runtimes/remotesparkjob.py +2 -5
  110. mlrun/runtimes/sparkjob/spark3job.py +16 -17
  111. mlrun/runtimes/utils.py +34 -0
  112. mlrun/serving/routers.py +2 -5
  113. mlrun/serving/server.py +37 -19
  114. mlrun/serving/states.py +30 -3
  115. mlrun/serving/v2_serving.py +44 -35
  116. mlrun/track/trackers/mlflow_tracker.py +5 -0
  117. mlrun/utils/async_http.py +1 -1
  118. mlrun/utils/db.py +18 -0
  119. mlrun/utils/helpers.py +150 -36
  120. mlrun/utils/http.py +1 -1
  121. mlrun/utils/notifications/notification/__init__.py +0 -1
  122. mlrun/utils/notifications/notification/webhook.py +8 -1
  123. mlrun/utils/notifications/notification_pusher.py +1 -1
  124. mlrun/utils/v3io_clients.py +2 -2
  125. mlrun/utils/version/version.json +2 -2
  126. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
  127. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
  128. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
  129. mlrun/feature_store/retrieval/conversion.py +0 -271
  130. mlrun/model_monitoring/controller_handler.py +0 -37
  131. mlrun/model_monitoring/evidently_application.py +0 -20
  132. mlrun/model_monitoring/prometheus.py +0 -216
  133. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
  134. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
  135. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
@@ -17,6 +17,8 @@ from dataclasses import dataclass
17
17
  from io import StringIO
18
18
  from typing import Optional, Union
19
19
 
20
+ import taosws
21
+
20
22
  import mlrun.common.schemas.model_monitoring as mm_schemas
21
23
  import mlrun.common.types
22
24
 
@@ -28,6 +30,9 @@ class _TDEngineColumnType:
28
30
  self.data_type = data_type
29
31
  self.length = length
30
32
 
33
+ def values_to_column(self, values):
34
+ raise NotImplementedError()
35
+
31
36
  def __str__(self):
32
37
  if self.length is not None:
33
38
  return f"{self.data_type}({self.length})"
@@ -44,6 +49,26 @@ class _TDEngineColumn(mlrun.common.types.StrEnum):
44
49
  BINARY_10000 = _TDEngineColumnType("BINARY", 10000)
45
50
 
46
51
 
52
+ def values_to_column(values, column_type):
53
+ if column_type == _TDEngineColumn.TIMESTAMP:
54
+ timestamps = [round(timestamp.timestamp() * 1000) for timestamp in values]
55
+ return taosws.millis_timestamps_to_column(timestamps)
56
+ if column_type == _TDEngineColumn.FLOAT:
57
+ return taosws.floats_to_column(values)
58
+ if column_type == _TDEngineColumn.INT:
59
+ return taosws.ints_to_column(values)
60
+ if column_type == _TDEngineColumn.BINARY_40:
61
+ return taosws.binary_to_column(values)
62
+ if column_type == _TDEngineColumn.BINARY_64:
63
+ return taosws.binary_to_column(values)
64
+ if column_type == _TDEngineColumn.BINARY_10000:
65
+ return taosws.binary_to_column(values)
66
+
67
+ raise mlrun.errors.MLRunInvalidArgumentError(
68
+ f"unsupported column type '{column_type}'"
69
+ )
70
+
71
+
47
72
  @dataclass
48
73
  class TDEngineSchema:
49
74
  """
@@ -55,39 +80,53 @@ class TDEngineSchema:
55
80
  def __init__(
56
81
  self,
57
82
  super_table: str,
58
- columns: dict[str, str],
83
+ columns: dict[str, _TDEngineColumn],
59
84
  tags: dict[str, str],
85
+ database: Optional[str] = None,
60
86
  ):
61
87
  self.super_table = super_table
62
88
  self.columns = columns
63
89
  self.tags = tags
64
- self.database = _MODEL_MONITORING_DATABASE
90
+ self.database = database or _MODEL_MONITORING_DATABASE
65
91
 
66
92
  def _create_super_table_query(self) -> str:
67
93
  columns = ", ".join(f"{col} {val}" for col, val in self.columns.items())
68
94
  tags = ", ".join(f"{col} {val}" for col, val in self.tags.items())
69
95
  return f"CREATE STABLE if NOT EXISTS {self.database}.{self.super_table} ({columns}) TAGS ({tags});"
70
96
 
71
- def _create_subtable_query(
97
+ def _create_subtable_sql(
72
98
  self,
73
99
  subtable: str,
74
100
  values: dict[str, Union[str, int, float, datetime.datetime]],
75
101
  ) -> str:
76
102
  try:
77
- values = ", ".join(f"'{values[val]}'" for val in self.tags)
103
+ tags = ", ".join(f"'{values[val]}'" for val in self.tags)
78
104
  except KeyError:
79
105
  raise mlrun.errors.MLRunInvalidArgumentError(
80
106
  f"values must contain all tags: {self.tags.keys()}"
81
107
  )
82
- return f"CREATE TABLE if NOT EXISTS {self.database}.{subtable} USING {self.super_table} TAGS ({values});"
108
+ return f"CREATE TABLE if NOT EXISTS {self.database}.{subtable} USING {self.super_table} TAGS ({tags});"
83
109
 
84
- def _insert_subtable_query(
85
- self,
110
+ @staticmethod
111
+ def _insert_subtable_stmt(
112
+ statement: taosws.TaosStmt,
113
+ columns: dict[str, _TDEngineColumn],
86
114
  subtable: str,
87
115
  values: dict[str, Union[str, int, float, datetime.datetime]],
88
- ) -> str:
89
- values = ", ".join(f"'{values[val]}'" for val in self.columns)
90
- return f"INSERT INTO {self.database}.{subtable} VALUES ({values});"
116
+ ) -> taosws.TaosStmt:
117
+ question_marks = ", ".join("?" * len(columns))
118
+ statement.prepare(f"INSERT INTO ? VALUES ({question_marks});")
119
+ statement.set_tbname(subtable)
120
+
121
+ bind_params = []
122
+
123
+ for col_name, col_type in columns.items():
124
+ val = values[col_name]
125
+ bind_params.append(values_to_column([val], col_type))
126
+
127
+ statement.bind_param(bind_params)
128
+ statement.add_batch()
129
+ return statement
91
130
 
92
131
  def _delete_subtable_query(
93
132
  self,
@@ -125,8 +164,8 @@ class TDEngineSchema:
125
164
  @staticmethod
126
165
  def _get_records_query(
127
166
  table: str,
128
- start: datetime,
129
- end: datetime,
167
+ start: datetime.datetime,
168
+ end: datetime.datetime,
130
169
  columns_to_filter: list[str] = None,
131
170
  filter_query: Optional[str] = None,
132
171
  interval: Optional[str] = None,
@@ -173,7 +212,7 @@ class TDEngineSchema:
173
212
  if filter_query:
174
213
  query.write(f"{filter_query} AND ")
175
214
  if start:
176
- query.write(f"{timestamp_column} >= '{start}'" + " AND ")
215
+ query.write(f"{timestamp_column} >= '{start}' AND ")
177
216
  if end:
178
217
  query.write(f"{timestamp_column} <= '{end}'")
179
218
  if interval:
@@ -188,53 +227,53 @@ class TDEngineSchema:
188
227
 
189
228
  @dataclass
190
229
  class AppResultTable(TDEngineSchema):
191
- super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
192
- columns = {
193
- mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
194
- mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
195
- mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
196
- mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
197
- mm_schemas.ResultData.CURRENT_STATS: _TDEngineColumn.BINARY_10000,
198
- }
199
-
200
- tags = {
201
- mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
202
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
203
- mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
204
- mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
205
- mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
206
- }
207
- database = _MODEL_MONITORING_DATABASE
230
+ def __init__(self, database: Optional[str] = None):
231
+ super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
232
+ columns = {
233
+ mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
234
+ mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
235
+ mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
236
+ mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
237
+ }
238
+ tags = {
239
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
240
+ mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
241
+ mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
242
+ mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
243
+ mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
244
+ }
245
+ super().__init__(super_table, columns, tags, database)
208
246
 
209
247
 
210
248
  @dataclass
211
249
  class Metrics(TDEngineSchema):
212
- super_table = mm_schemas.TDEngineSuperTables.METRICS
213
- columns = {
214
- mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
215
- mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
216
- mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
217
- }
218
-
219
- tags = {
220
- mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
221
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
222
- mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
223
- mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
224
- }
225
- database = _MODEL_MONITORING_DATABASE
250
+ def __init__(self, database: Optional[str] = None):
251
+ super_table = mm_schemas.TDEngineSuperTables.METRICS
252
+ columns = {
253
+ mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
254
+ mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
255
+ mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
256
+ }
257
+ tags = {
258
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
259
+ mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
260
+ mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
261
+ mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
262
+ }
263
+ super().__init__(super_table, columns, tags, database)
226
264
 
227
265
 
228
266
  @dataclass
229
267
  class Predictions(TDEngineSchema):
230
- super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
231
- columns = {
232
- mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
233
- mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
234
- mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
235
- }
236
- tags = {
237
- mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
238
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
239
- }
240
- database = _MODEL_MONITORING_DATABASE
268
+ def __init__(self, database: Optional[str] = None):
269
+ super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
270
+ columns = {
271
+ mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
272
+ mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
273
+ mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
274
+ }
275
+ tags = {
276
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
277
+ mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
278
+ }
279
+ super().__init__(super_table, columns, tags, database)
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import json
17
16
 
@@ -21,8 +20,6 @@ from mlrun.common.schemas.model_monitoring import (
21
20
  EventKeyMetrics,
22
21
  )
23
22
 
24
- _TABLE_COLUMN = "table_column"
25
-
26
23
 
27
24
  class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
28
25
  def __init__(self, **kwargs):
@@ -14,14 +14,20 @@
14
14
 
15
15
  import typing
16
16
  from datetime import datetime
17
+ from typing import Union
17
18
 
18
19
  import pandas as pd
19
20
  import taosws
21
+ from taoswswrap.tdengine_connection import (
22
+ Statement,
23
+ TDEngineConnection,
24
+ )
20
25
 
21
26
  import mlrun.common.schemas.model_monitoring as mm_schemas
22
27
  import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
23
28
  import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
24
29
  from mlrun.model_monitoring.db import TSDBConnector
30
+ from mlrun.model_monitoring.db.tsdb.tdengine.schemas import TDEngineSchema
25
31
  from mlrun.model_monitoring.helpers import get_invocations_fqn
26
32
  from mlrun.utils import logger
27
33
 
@@ -46,39 +52,50 @@ class TDEngineConnector(TSDBConnector):
46
52
  )
47
53
  self._tdengine_connection_string = kwargs.get("connection_string")
48
54
  self.database = database
49
- self._connection = self._create_connection()
55
+
56
+ self._connection = None
50
57
  self._init_super_tables()
51
58
 
52
- def _create_connection(self):
59
+ @property
60
+ def connection(self) -> TDEngineConnection:
61
+ if not self._connection:
62
+ self._connection = self._create_connection()
63
+ return self._connection
64
+
65
+ def _create_connection(self) -> TDEngineConnection:
53
66
  """Establish a connection to the TSDB server."""
54
- conn = taosws.connect(self._tdengine_connection_string)
55
- try:
56
- conn.execute(f"CREATE DATABASE {self.database}")
57
- except taosws.QueryError:
58
- # Database already exists
59
- pass
60
- conn.execute(f"USE {self.database}")
67
+ logger.debug("Creating a new connection to TDEngine", project=self.project)
68
+ conn = TDEngineConnection(self._tdengine_connection_string)
69
+ conn.run(statements=f"CREATE DATABASE IF NOT EXISTS {self.database}")
70
+ conn.prefix_statements = [f"USE {self.database}"]
71
+ logger.debug("Connected to TDEngine", project=self.project)
61
72
  return conn
62
73
 
63
74
  def _init_super_tables(self):
64
75
  """Initialize the super tables for the TSDB."""
65
76
  self.tables = {
66
- mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(),
67
- mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(),
68
- mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(),
77
+ mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
78
+ self.database
79
+ ),
80
+ mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
81
+ self.database
82
+ ),
83
+ mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
84
+ self.database
85
+ ),
69
86
  }
70
87
 
71
88
  def create_tables(self):
72
89
  """Create TDEngine supertables."""
73
90
  for table in self.tables:
74
91
  create_table_query = self.tables[table]._create_super_table_query()
75
- self._connection.execute(create_table_query)
92
+ self.connection.run(statements=create_table_query)
76
93
 
77
94
  def write_application_event(
78
95
  self,
79
96
  event: dict,
80
97
  kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
81
- ):
98
+ ) -> None:
82
99
  """
83
100
  Write a single result or metric to TSDB.
84
101
  """
@@ -94,24 +111,46 @@ class TDEngineConnector(TSDBConnector):
94
111
  # Write a new result
95
112
  table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS]
96
113
  table_name = (
97
- f"{table_name}_" f"{event[mm_schemas.ResultData.RESULT_NAME]}"
114
+ f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
98
115
  ).replace("-", "_")
116
+ event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
99
117
 
100
118
  else:
101
119
  # Write a new metric
102
120
  table = self.tables[mm_schemas.TDEngineSuperTables.METRICS]
103
121
  table_name = (
104
- f"{table_name}_" f"{event[mm_schemas.MetricData.METRIC_NAME]}"
122
+ f"{table_name}_{event[mm_schemas.MetricData.METRIC_NAME]}"
105
123
  ).replace("-", "_")
106
124
 
107
- create_table_query = table._create_subtable_query(
108
- subtable=table_name, values=event
125
+ # Escape the table name for case-sensitivity (ML-7908)
126
+ # https://github.com/taosdata/taos-connector-python/issues/260
127
+ table_name = f"`{table_name}`"
128
+
129
+ # Convert the datetime strings to datetime objects
130
+ event[mm_schemas.WriterEvent.END_INFER_TIME] = self._convert_to_datetime(
131
+ val=event[mm_schemas.WriterEvent.END_INFER_TIME]
132
+ )
133
+ event[mm_schemas.WriterEvent.START_INFER_TIME] = self._convert_to_datetime(
134
+ val=event[mm_schemas.WriterEvent.START_INFER_TIME]
135
+ )
136
+
137
+ create_table_sql = table._create_subtable_sql(subtable=table_name, values=event)
138
+
139
+ insert_statement = Statement(
140
+ TDEngineSchema._insert_subtable_stmt,
141
+ dict(columns=table.columns, subtable=table_name, values=event),
109
142
  )
110
- self._connection.execute(create_table_query)
111
- insert_table_query = table._insert_subtable_query(
112
- subtable=table_name, values=event
143
+
144
+ self.connection.run(
145
+ statements=[
146
+ create_table_sql,
147
+ insert_statement,
148
+ ]
113
149
  )
114
- self._connection.execute(insert_table_query)
150
+
151
+ @staticmethod
152
+ def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
153
+ return datetime.fromisoformat(val) if isinstance(val, str) else val
115
154
 
116
155
  def apply_monitoring_stream_steps(self, graph):
117
156
  """
@@ -147,7 +186,8 @@ class TDEngineConnector(TSDBConnector):
147
186
  mm_schemas.EventFieldType.PROJECT,
148
187
  mm_schemas.EventFieldType.ENDPOINT_ID,
149
188
  ],
150
- max_events=10,
189
+ max_events=1000,
190
+ flush_after_seconds=30,
151
191
  )
152
192
 
153
193
  apply_process_before_tsdb()
@@ -156,22 +196,31 @@ class TDEngineConnector(TSDBConnector):
156
196
  after="ProcessBeforeTDEngine",
157
197
  )
158
198
 
199
+ def handle_model_error(self, graph, **kwargs) -> None:
200
+ pass
201
+
159
202
  def delete_tsdb_resources(self):
160
203
  """
161
204
  Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
162
205
  """
206
+ logger.debug(
207
+ "Deleting all project resources using the TDEngine connector",
208
+ project=self.project,
209
+ )
163
210
  for table in self.tables:
164
211
  get_subtable_names_query = self.tables[table]._get_subtables_query(
165
212
  values={mm_schemas.EventFieldType.PROJECT: self.project}
166
213
  )
167
- subtables = self._connection.query(get_subtable_names_query)
214
+ subtables = self.connection.run(query=get_subtable_names_query).data
215
+ drop_statements = []
168
216
  for subtable in subtables:
169
- drop_query = self.tables[table]._drop_subtable_query(
170
- subtable=subtable[0]
217
+ drop_statements.append(
218
+ self.tables[table]._drop_subtable_query(subtable=subtable[0])
171
219
  )
172
- self._connection.execute(drop_query)
173
- logger.info(
174
- f"Deleted all project resources in the TSDB connector for project {self.project}"
220
+ self.connection.run(statements=drop_statements)
221
+ logger.debug(
222
+ "Deleted all project resources using the TDEngine connector",
223
+ project=self.project,
175
224
  )
176
225
 
177
226
  def get_model_endpoint_real_time_metrics(
@@ -222,7 +271,7 @@ class TDEngineConnector(TSDBConnector):
222
271
 
223
272
  project_condition = f"project = '{self.project}'"
224
273
  filter_query = (
225
- f"{filter_query} AND {project_condition}"
274
+ f"({filter_query}) AND ({project_condition})"
226
275
  if filter_query
227
276
  else project_condition
228
277
  )
@@ -240,17 +289,16 @@ class TDEngineConnector(TSDBConnector):
240
289
  timestamp_column=timestamp_column,
241
290
  database=self.database,
242
291
  )
292
+ logger.debug("Querying TDEngine", query=full_query)
243
293
  try:
244
- query_result = self._connection.query(full_query)
294
+ query_result = self.connection.run(query=full_query)
245
295
  except taosws.QueryError as e:
246
296
  raise mlrun.errors.MLRunInvalidArgumentError(
247
297
  f"Failed to query table {table} in database {self.database}, {str(e)}"
248
298
  )
249
- columns = []
250
- for column in query_result.fields:
251
- columns.append(column.name())
252
299
 
253
- return pd.DataFrame(query_result, columns=columns)
300
+ df_columns = [field.name for field in query_result.fields]
301
+ return pd.DataFrame(query_result.data, columns=df_columns)
254
302
 
255
303
  def read_metrics_data(
256
304
  self,
@@ -274,13 +322,22 @@ class TDEngineConnector(TSDBConnector):
274
322
  ],
275
323
  ],
276
324
  ]:
325
+ timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
326
+ columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
277
327
  if type == "metrics":
278
328
  table = mm_schemas.TDEngineSuperTables.METRICS
279
329
  name = mm_schemas.MetricData.METRIC_NAME
330
+ columns += [name, mm_schemas.MetricData.METRIC_VALUE]
280
331
  df_handler = self.df_to_metrics_values
281
332
  elif type == "results":
282
333
  table = mm_schemas.TDEngineSuperTables.APP_RESULTS
283
334
  name = mm_schemas.ResultData.RESULT_NAME
335
+ columns += [
336
+ name,
337
+ mm_schemas.ResultData.RESULT_VALUE,
338
+ mm_schemas.ResultData.RESULT_STATUS,
339
+ mm_schemas.ResultData.RESULT_KIND,
340
+ ]
284
341
  df_handler = self.df_to_results_values
285
342
  else:
286
343
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -289,18 +346,19 @@ class TDEngineConnector(TSDBConnector):
289
346
 
290
347
  metrics_condition = " OR ".join(
291
348
  [
292
- f"({mm_schemas.WriterEvent.APPLICATION_NAME} = '{metric.app}' AND {name} = '{metric.name}')"
349
+ f"({mm_schemas.WriterEvent.APPLICATION_NAME}='{metric.app}' AND {name}='{metric.name}')"
293
350
  for metric in metrics
294
351
  ]
295
352
  )
296
- filter_query = f"endpoint_id='{endpoint_id}' AND ({metrics_condition})"
353
+ filter_query = f"(endpoint_id='{endpoint_id}') AND ({metrics_condition})"
297
354
 
298
355
  df = self._get_records(
299
356
  table=table,
300
357
  start=start,
301
358
  end=end,
302
359
  filter_query=filter_query,
303
- timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
360
+ timestamp_column=timestamp_column,
361
+ columns=columns,
304
362
  )
305
363
 
306
364
  df[mm_schemas.WriterEvent.END_INFER_TIME] = pd.to_datetime(
@@ -377,6 +435,54 @@ class TDEngineConnector(TSDBConnector):
377
435
  ), # pyright: ignore[reportArgumentType]
378
436
  )
379
437
 
438
+ def get_last_request(
439
+ self,
440
+ endpoint_ids: Union[str, list[str]],
441
+ start: Union[datetime, str] = "0",
442
+ end: Union[datetime, str] = "now",
443
+ ) -> pd.DataFrame:
444
+ pass
445
+
446
+ def get_drift_status(
447
+ self,
448
+ endpoint_ids: Union[str, list[str]],
449
+ start: Union[datetime, str] = "now-24h",
450
+ end: Union[datetime, str] = "now",
451
+ ) -> pd.DataFrame:
452
+ pass
453
+
454
+ def get_metrics_metadata(
455
+ self,
456
+ endpoint_id: str,
457
+ start: Union[datetime, str] = "0",
458
+ end: Union[datetime, str] = "now",
459
+ ) -> pd.DataFrame:
460
+ pass
461
+
462
+ def get_results_metadata(
463
+ self,
464
+ endpoint_id: str,
465
+ start: Union[datetime, str] = "0",
466
+ end: Union[datetime, str] = "now",
467
+ ) -> pd.DataFrame:
468
+ pass
469
+
470
+ def get_error_count(
471
+ self,
472
+ endpoint_ids: Union[str, list[str]],
473
+ start: Union[datetime, str] = "0",
474
+ end: Union[datetime, str] = "now",
475
+ ) -> pd.DataFrame:
476
+ pass
477
+
478
+ def get_avg_latency(
479
+ self,
480
+ endpoint_ids: Union[str, list[str]],
481
+ start: Union[datetime, str] = "0",
482
+ end: Union[datetime, str] = "now",
483
+ ) -> pd.DataFrame:
484
+ pass
485
+
380
486
  # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
381
487
  #
382
488
  # def read_prediction_metric_for_endpoint_if_exists(
@@ -11,7 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
14
+ from datetime import datetime
15
+ from typing import Any
15
16
 
16
17
  import mlrun.feature_store.steps
17
18
  from mlrun.common.schemas.model_monitoring import (
@@ -19,6 +20,25 @@ from mlrun.common.schemas.model_monitoring import (
19
20
  EventKeyMetrics,
20
21
  EventLiveStats,
21
22
  )
23
+ from mlrun.utils import logger
24
+
25
+
26
+ def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
27
+ """
28
+ Normalize user defined keys - input data to a model and its predictions,
29
+ to a form V3IO frames tolerates.
30
+
31
+ The dictionary keys should conform to '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'.
32
+ """
33
+ prefix = "_"
34
+
35
+ def norm_key(key: str) -> str:
36
+ key = key.replace("-", "_") # hyphens `-` are not allowed
37
+ if key and key[0].isdigit(): # starting with a digit is not allowed
38
+ return prefix + key
39
+ return key
40
+
41
+ return {norm_key(k): v for k, v in event.items()}
22
42
 
23
43
 
24
44
  class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
@@ -68,8 +88,8 @@ class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
68
88
  # endpoint_features includes the event values of each feature and prediction
69
89
  endpoint_features = {
70
90
  EventFieldType.RECORD_TYPE: EventKeyMetrics.ENDPOINT_FEATURES,
71
- **event[EventFieldType.NAMED_PREDICTIONS],
72
- **event[EventFieldType.NAMED_FEATURES],
91
+ **_normalize_dict_for_v3io_frames(event[EventFieldType.NAMED_PREDICTIONS]),
92
+ **_normalize_dict_for_v3io_frames(event[EventFieldType.NAMED_FEATURES]),
73
93
  **base_event,
74
94
  }
75
95
  # Create a dictionary that includes both base_metrics and endpoint_features
@@ -115,3 +135,24 @@ class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
115
135
  else:
116
136
  unpacked[key] = new_event[key]
117
137
  return unpacked if unpacked else None
138
+
139
+
140
+ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
141
+ def __init__(self, **kwargs):
142
+ """
143
+ Prepare the event for insertion into the errors TSDB table.
144
+ """
145
+ super().__init__(**kwargs)
146
+
147
+ def do(self, event):
148
+ error = event.get("error")
149
+ timestamp = datetime.fromisoformat(event.get("when"))
150
+ endpoint_id = event[EventFieldType.ENDPOINT_ID]
151
+ event = {
152
+ EventFieldType.MODEL_ERROR: str(error),
153
+ EventFieldType.ENDPOINT_ID: endpoint_id,
154
+ EventFieldType.TIMESTAMP: timestamp,
155
+ EventFieldType.ERROR_COUNT: 1.0,
156
+ }
157
+ logger.info("Write error to errors TSDB table", event=event)
158
+ return event