mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show
  1. mlrun/__init__.py +24 -3
  2. mlrun/__main__.py +0 -4
  3. mlrun/artifacts/dataset.py +2 -2
  4. mlrun/artifacts/document.py +6 -1
  5. mlrun/artifacts/llm_prompt.py +21 -15
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/artifacts/plots.py +1 -1
  8. mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
  9. mlrun/auth/nuclio.py +89 -0
  10. mlrun/auth/providers.py +429 -0
  11. mlrun/auth/utils.py +415 -0
  12. mlrun/common/constants.py +14 -0
  13. mlrun/common/model_monitoring/helpers.py +123 -0
  14. mlrun/common/runtimes/constants.py +28 -0
  15. mlrun/common/schemas/__init__.py +14 -3
  16. mlrun/common/schemas/alert.py +2 -2
  17. mlrun/common/schemas/api_gateway.py +3 -0
  18. mlrun/common/schemas/auth.py +12 -10
  19. mlrun/common/schemas/client_spec.py +4 -0
  20. mlrun/common/schemas/constants.py +25 -0
  21. mlrun/common/schemas/frontend_spec.py +1 -8
  22. mlrun/common/schemas/function.py +34 -0
  23. mlrun/common/schemas/hub.py +33 -20
  24. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  25. mlrun/common/schemas/model_monitoring/constants.py +12 -15
  26. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  27. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  28. mlrun/common/schemas/pipeline.py +1 -1
  29. mlrun/common/schemas/secret.py +17 -2
  30. mlrun/common/secrets.py +95 -1
  31. mlrun/common/types.py +10 -10
  32. mlrun/config.py +69 -19
  33. mlrun/data_types/infer.py +2 -2
  34. mlrun/datastore/__init__.py +12 -5
  35. mlrun/datastore/azure_blob.py +162 -47
  36. mlrun/datastore/base.py +274 -10
  37. mlrun/datastore/datastore.py +7 -2
  38. mlrun/datastore/datastore_profile.py +84 -22
  39. mlrun/datastore/model_provider/huggingface_provider.py +225 -41
  40. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  41. mlrun/datastore/model_provider/model_provider.py +206 -74
  42. mlrun/datastore/model_provider/openai_provider.py +226 -66
  43. mlrun/datastore/s3.py +39 -18
  44. mlrun/datastore/sources.py +1 -1
  45. mlrun/datastore/store_resources.py +4 -4
  46. mlrun/datastore/storeytargets.py +17 -12
  47. mlrun/datastore/targets.py +1 -1
  48. mlrun/datastore/utils.py +25 -6
  49. mlrun/datastore/v3io.py +1 -1
  50. mlrun/db/base.py +63 -32
  51. mlrun/db/httpdb.py +373 -153
  52. mlrun/db/nopdb.py +54 -21
  53. mlrun/errors.py +4 -2
  54. mlrun/execution.py +66 -25
  55. mlrun/feature_store/api.py +1 -1
  56. mlrun/feature_store/common.py +1 -1
  57. mlrun/feature_store/feature_vector_utils.py +1 -1
  58. mlrun/feature_store/steps.py +8 -6
  59. mlrun/frameworks/_common/utils.py +3 -3
  60. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  61. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
  62. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  63. mlrun/frameworks/_ml_common/utils.py +2 -1
  64. mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
  65. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
  66. mlrun/frameworks/onnx/dataset.py +2 -1
  67. mlrun/frameworks/onnx/mlrun_interface.py +2 -1
  68. mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
  69. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
  70. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
  71. mlrun/frameworks/pytorch/utils.py +2 -1
  72. mlrun/frameworks/sklearn/metric.py +2 -1
  73. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
  74. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
  75. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
  76. mlrun/hub/__init__.py +52 -0
  77. mlrun/hub/base.py +142 -0
  78. mlrun/hub/module.py +172 -0
  79. mlrun/hub/step.py +113 -0
  80. mlrun/k8s_utils.py +105 -16
  81. mlrun/launcher/base.py +15 -7
  82. mlrun/launcher/local.py +4 -1
  83. mlrun/model.py +14 -4
  84. mlrun/model_monitoring/__init__.py +0 -1
  85. mlrun/model_monitoring/api.py +65 -28
  86. mlrun/model_monitoring/applications/__init__.py +1 -1
  87. mlrun/model_monitoring/applications/base.py +299 -128
  88. mlrun/model_monitoring/applications/context.py +2 -4
  89. mlrun/model_monitoring/controller.py +132 -58
  90. mlrun/model_monitoring/db/_schedules.py +38 -29
  91. mlrun/model_monitoring/db/_stats.py +6 -16
  92. mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
  93. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  94. mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
  95. mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
  96. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
  97. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
  98. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
  99. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
  100. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
  101. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
  102. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
  103. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
  104. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
  105. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
  106. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
  107. mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
  108. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
  109. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
  110. mlrun/model_monitoring/features_drift_table.py +2 -1
  111. mlrun/model_monitoring/helpers.py +30 -6
  112. mlrun/model_monitoring/stream_processing.py +34 -28
  113. mlrun/model_monitoring/writer.py +224 -4
  114. mlrun/package/__init__.py +2 -1
  115. mlrun/platforms/__init__.py +0 -43
  116. mlrun/platforms/iguazio.py +8 -4
  117. mlrun/projects/operations.py +17 -11
  118. mlrun/projects/pipelines.py +2 -2
  119. mlrun/projects/project.py +187 -123
  120. mlrun/run.py +95 -21
  121. mlrun/runtimes/__init__.py +2 -186
  122. mlrun/runtimes/base.py +103 -25
  123. mlrun/runtimes/constants.py +225 -0
  124. mlrun/runtimes/daskjob.py +5 -2
  125. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  126. mlrun/runtimes/local.py +5 -2
  127. mlrun/runtimes/mounts.py +20 -2
  128. mlrun/runtimes/nuclio/__init__.py +12 -7
  129. mlrun/runtimes/nuclio/api_gateway.py +36 -6
  130. mlrun/runtimes/nuclio/application/application.py +339 -40
  131. mlrun/runtimes/nuclio/function.py +222 -72
  132. mlrun/runtimes/nuclio/serving.py +132 -42
  133. mlrun/runtimes/pod.py +213 -21
  134. mlrun/runtimes/utils.py +49 -9
  135. mlrun/secrets.py +99 -14
  136. mlrun/serving/__init__.py +2 -0
  137. mlrun/serving/remote.py +84 -11
  138. mlrun/serving/routers.py +26 -44
  139. mlrun/serving/server.py +138 -51
  140. mlrun/serving/serving_wrapper.py +6 -2
  141. mlrun/serving/states.py +997 -283
  142. mlrun/serving/steps.py +62 -0
  143. mlrun/serving/system_steps.py +149 -95
  144. mlrun/serving/v2_serving.py +9 -10
  145. mlrun/track/trackers/mlflow_tracker.py +29 -31
  146. mlrun/utils/helpers.py +292 -94
  147. mlrun/utils/http.py +9 -2
  148. mlrun/utils/notifications/notification/base.py +18 -0
  149. mlrun/utils/notifications/notification/git.py +3 -5
  150. mlrun/utils/notifications/notification/mail.py +39 -16
  151. mlrun/utils/notifications/notification/slack.py +2 -4
  152. mlrun/utils/notifications/notification/webhook.py +2 -5
  153. mlrun/utils/notifications/notification_pusher.py +3 -3
  154. mlrun/utils/version/version.json +2 -2
  155. mlrun/utils/version/version.py +3 -4
  156. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
  157. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
  158. mlrun/api/schemas/__init__.py +0 -259
  159. mlrun/db/auth_utils.py +0 -152
  160. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
  161. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
  162. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
  163. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
  164. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
  165. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
  166. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
  167. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
@@ -1,344 +0,0 @@
1
- # Copyright 2024 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import datetime
16
- from dataclasses import dataclass
17
- from io import StringIO
18
- from typing import Optional, Union
19
-
20
- import taosws
21
-
22
- import mlrun.common.schemas.model_monitoring as mm_schemas
23
- import mlrun.common.types
24
-
25
- _MODEL_MONITORING_DATABASE = "mlrun_model_monitoring"
26
-
27
-
28
- class _TDEngineColumnType:
29
- def __init__(self, data_type: str, length: Optional[int] = None):
30
- self.data_type = data_type
31
- self.length = length
32
-
33
- def values_to_column(self, values):
34
- raise NotImplementedError()
35
-
36
- def __str__(self):
37
- if self.length is not None:
38
- return f"{self.data_type}({self.length})"
39
- else:
40
- return self.data_type
41
-
42
-
43
- class _TDEngineColumn(mlrun.common.types.StrEnum):
44
- TIMESTAMP = _TDEngineColumnType("TIMESTAMP")
45
- FLOAT = _TDEngineColumnType("FLOAT")
46
- INT = _TDEngineColumnType("INT")
47
- BINARY_40 = _TDEngineColumnType("BINARY", 40)
48
- BINARY_64 = _TDEngineColumnType("BINARY", 64)
49
- BINARY_1000 = _TDEngineColumnType("BINARY", 1000)
50
-
51
-
52
- def values_to_column(values, column_type):
53
- if column_type == _TDEngineColumn.TIMESTAMP:
54
- timestamps = [round(timestamp.timestamp() * 1000) for timestamp in values]
55
- return taosws.millis_timestamps_to_column(timestamps)
56
- if column_type == _TDEngineColumn.FLOAT:
57
- return taosws.floats_to_column(values)
58
- if column_type == _TDEngineColumn.INT:
59
- return taosws.ints_to_column(values)
60
- if column_type == _TDEngineColumn.BINARY_40:
61
- return taosws.binary_to_column(values)
62
- if column_type == _TDEngineColumn.BINARY_64:
63
- return taosws.binary_to_column(values)
64
- if column_type == _TDEngineColumn.BINARY_1000:
65
- return taosws.binary_to_column(values)
66
-
67
- raise mlrun.errors.MLRunInvalidArgumentError(
68
- f"unsupported column type '{column_type}'"
69
- )
70
-
71
-
72
- @dataclass
73
- class TDEngineSchema:
74
- """
75
- A class to represent a supertable schema in TDengine. Using this schema, you can generate the relevant queries to
76
- create, insert, delete and query data from TDengine. At the moment, there are 3 schemas: AppResultTable,
77
- Metrics, and Predictions.
78
- """
79
-
80
- def __init__(
81
- self,
82
- super_table: str,
83
- columns: dict[str, _TDEngineColumn],
84
- tags: dict[str, str],
85
- project: str,
86
- database: Optional[str] = None,
87
- ):
88
- self.super_table = f"{super_table}_{project.replace('-', '_')}"
89
- self.columns = columns
90
- self.tags = tags
91
- self.database = database or _MODEL_MONITORING_DATABASE
92
-
93
- def _create_super_table_query(self) -> str:
94
- columns = ", ".join(f"{col} {val}" for col, val in self.columns.items())
95
- tags = ", ".join(f"{col} {val}" for col, val in self.tags.items())
96
- return f"CREATE STABLE if NOT EXISTS {self.database}.{self.super_table} ({columns}) TAGS ({tags});"
97
-
98
- def _create_subtable_sql(
99
- self,
100
- subtable: str,
101
- values: dict[str, Union[str, int, float, datetime.datetime]],
102
- ) -> str:
103
- try:
104
- tags = ", ".join(f"'{values[val]}'" for val in self.tags)
105
- except KeyError:
106
- raise mlrun.errors.MLRunInvalidArgumentError(
107
- f"values must contain all tags: {self.tags.keys()}"
108
- )
109
- return f"CREATE TABLE if NOT EXISTS {self.database}.{subtable} USING {self.super_table} TAGS ({tags});"
110
-
111
- def _delete_subtable_query(
112
- self,
113
- subtable: str,
114
- values: dict[str, Union[str, int, float, datetime.datetime]],
115
- ) -> str:
116
- values = " AND ".join(
117
- f"{val} LIKE '{values[val]}'" for val in self.tags if val in values
118
- )
119
- if not values:
120
- raise mlrun.errors.MLRunInvalidArgumentError(
121
- f"values must contain at least one tag: {self.tags.keys()}"
122
- )
123
- return f"DELETE FROM {self.database}.{subtable} WHERE {values};"
124
-
125
- def drop_subtable_query(
126
- self,
127
- subtable: str,
128
- ) -> str:
129
- return f"DROP TABLE if EXISTS {self.database}.`{subtable}`;"
130
-
131
- def drop_supertable_query(self) -> str:
132
- return f"DROP STABLE if EXISTS {self.database}.{self.super_table};"
133
-
134
- def _get_subtables_query_by_tag(
135
- self,
136
- filter_tag: str,
137
- filter_values: list[str],
138
- operator: str = "OR",
139
- ) -> str:
140
- if filter_tag not in self.tags:
141
- raise mlrun.errors.MLRunInvalidArgumentError(
142
- f"`filter_tag` must be one of the tags: {self.tags.keys()}"
143
- )
144
-
145
- values = f" {operator} ".join(
146
- f"{filter_tag} LIKE '{val}'" for val in filter_values
147
- )
148
-
149
- return f"SELECT DISTINCT tbname FROM {self.database}.{self.super_table} WHERE {values};"
150
-
151
- @staticmethod
152
- def _get_records_query(
153
- table: str,
154
- start: datetime.datetime,
155
- end: datetime.datetime,
156
- columns_to_filter: Optional[list[str]] = None,
157
- filter_query: Optional[str] = None,
158
- interval: Optional[str] = None,
159
- limit: int = 0,
160
- agg_funcs: Optional[list] = None,
161
- sliding_window_step: Optional[str] = None,
162
- timestamp_column: str = "time",
163
- database: str = _MODEL_MONITORING_DATABASE,
164
- group_by: Optional[Union[list[str], str]] = None,
165
- preform_agg_funcs_columns: Optional[list[str]] = None,
166
- order_by: Optional[str] = None,
167
- desc: Optional[bool] = None,
168
- partition_by: Optional[str] = None,
169
- ) -> str:
170
- if agg_funcs and not columns_to_filter:
171
- raise mlrun.errors.MLRunInvalidArgumentError(
172
- "`columns_to_filter` must be provided when using aggregate functions"
173
- )
174
-
175
- # if aggregate function or interval is provided, the other must be provided as well
176
- if interval and not agg_funcs:
177
- raise mlrun.errors.MLRunInvalidArgumentError(
178
- "`agg_funcs` must be provided when using interval"
179
- )
180
- if partition_by and not agg_funcs:
181
- raise mlrun.errors.MLRunInvalidArgumentError(
182
- "`agg_funcs` must be provided when using partition by"
183
- )
184
- if sliding_window_step and not interval:
185
- raise mlrun.errors.MLRunInvalidArgumentError(
186
- "`interval` must be provided when using sliding window"
187
- )
188
- if group_by and not agg_funcs:
189
- raise mlrun.errors.MLRunInvalidArgumentError(
190
- "aggregate functions must be provided when using group by"
191
- )
192
- if desc and not order_by:
193
- raise mlrun.errors.MLRunInvalidArgumentError(
194
- "`order_by` must be provided when using descending"
195
- )
196
-
197
- with StringIO() as query:
198
- query.write("SELECT ")
199
- if interval:
200
- query.write("_wstart, _wend, ")
201
- if agg_funcs:
202
- preform_agg_funcs_columns = (
203
- columns_to_filter
204
- if preform_agg_funcs_columns is None
205
- else preform_agg_funcs_columns
206
- )
207
- query.write(
208
- ", ".join(
209
- [
210
- f"{a}({col})"
211
- if col.upper()
212
- in map(
213
- str.upper, preform_agg_funcs_columns
214
- ) # Case-insensitive check
215
- else f"{col}"
216
- for a in agg_funcs
217
- for col in columns_to_filter
218
- ]
219
- )
220
- )
221
- elif columns_to_filter:
222
- query.write(", ".join(columns_to_filter))
223
- else:
224
- query.write("*")
225
- query.write(f" FROM {database}.{table}")
226
-
227
- if any([filter_query, start, end]):
228
- query.write(" WHERE ")
229
- if filter_query:
230
- query.write(f"{filter_query} AND ")
231
- if start:
232
- query.write(f"{timestamp_column} >= '{start}' AND ")
233
- if end:
234
- query.write(f"{timestamp_column} <= '{end}'")
235
- if group_by:
236
- if isinstance(group_by, list):
237
- group_by = ", ".join(group_by)
238
- query.write(f" GROUP BY {group_by}")
239
- if partition_by:
240
- query.write(f" PARTITION BY {partition_by}")
241
- if order_by:
242
- desc = " DESC" if desc else ""
243
- query.write(f" ORDER BY {order_by}{desc}")
244
- if interval:
245
- query.write(f" INTERVAL({interval})")
246
- if sliding_window_step:
247
- query.write(f" SLIDING({sliding_window_step})")
248
- if limit:
249
- query.write(f" LIMIT {limit}")
250
- query.write(";")
251
- return query.getvalue()
252
-
253
-
254
- @dataclass
255
- class AppResultTable(TDEngineSchema):
256
- def __init__(self, project: str, database: Optional[str] = None):
257
- super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
258
- columns = {
259
- mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
260
- mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
261
- mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
262
- mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
263
- mm_schemas.ResultData.RESULT_EXTRA_DATA: _TDEngineColumn.BINARY_1000,
264
- }
265
- tags = {
266
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
267
- mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
268
- mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
269
- mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
270
- }
271
- super().__init__(
272
- super_table=super_table,
273
- columns=columns,
274
- tags=tags,
275
- database=database,
276
- project=project,
277
- )
278
-
279
-
280
- @dataclass
281
- class Metrics(TDEngineSchema):
282
- def __init__(self, project: str, database: Optional[str] = None):
283
- super_table = mm_schemas.TDEngineSuperTables.METRICS
284
- columns = {
285
- mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
286
- mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
287
- mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
288
- }
289
- tags = {
290
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
291
- mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
292
- mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
293
- }
294
- super().__init__(
295
- super_table=super_table,
296
- columns=columns,
297
- tags=tags,
298
- database=database,
299
- project=project,
300
- )
301
-
302
-
303
- @dataclass
304
- class Predictions(TDEngineSchema):
305
- def __init__(self, project: str, database: Optional[str] = None):
306
- super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
307
- columns = {
308
- mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
309
- mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
310
- mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_1000,
311
- mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT: _TDEngineColumn.FLOAT,
312
- mm_schemas.EventFieldType.EFFECTIVE_SAMPLE_COUNT: _TDEngineColumn.INT,
313
- }
314
- tags = {
315
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
316
- }
317
- super().__init__(
318
- super_table=super_table,
319
- columns=columns,
320
- tags=tags,
321
- database=database,
322
- project=project,
323
- )
324
-
325
-
326
- @dataclass
327
- class Errors(TDEngineSchema):
328
- def __init__(self, project: str, database: Optional[str] = None):
329
- super_table = mm_schemas.TDEngineSuperTables.ERRORS
330
- columns = {
331
- mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
332
- mm_schemas.EventFieldType.MODEL_ERROR: _TDEngineColumn.BINARY_1000,
333
- }
334
- tags = {
335
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
336
- mm_schemas.EventFieldType.ERROR_TYPE: _TDEngineColumn.BINARY_64,
337
- }
338
- super().__init__(
339
- super_table=super_table,
340
- columns=columns,
341
- tags=tags,
342
- database=database,
343
- project=project,
344
- )
@@ -1,75 +0,0 @@
1
- # Copyright 2024 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import json
16
- from datetime import datetime
17
-
18
- import mlrun.feature_store.steps
19
- from mlrun.common.schemas.model_monitoring import (
20
- EventFieldType,
21
- EventKeyMetrics,
22
- )
23
- from mlrun.utils import logger
24
-
25
-
26
- class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
27
- def __init__(self, **kwargs):
28
- """
29
- Process the data before writing to TDEngine. This step create the relevant keys for the TDEngine table,
30
- including project name, custom metrics, time column, and table name column.
31
-
32
- :returns: Event as a dictionary which will be written into the TDEngine Predictions table.
33
- """
34
- super().__init__(**kwargs)
35
-
36
- def do(self, event):
37
- event[EventFieldType.PROJECT] = event[EventFieldType.FUNCTION_URI].split("/")[0]
38
- event[EventKeyMetrics.CUSTOM_METRICS] = json.dumps(
39
- event.get(EventFieldType.METRICS, {})
40
- )
41
- event[EventFieldType.TIME] = event.get(EventFieldType.TIMESTAMP)
42
- event[EventFieldType.TABLE_COLUMN] = "_" + event.get(EventFieldType.ENDPOINT_ID)
43
-
44
- return event
45
-
46
-
47
- class ErrorExtractor(mlrun.feature_store.steps.MapClass):
48
- def __init__(self, **kwargs):
49
- """
50
- Prepare the event for insertion into the TDEngine error table
51
- """
52
- super().__init__(**kwargs)
53
-
54
- def do(self, event):
55
- error = str(event.get("error"))
56
- if len(error) > 1000:
57
- error = error[-1000:]
58
- logger.warning(
59
- f"Error message exceeds 1000 chars: The error message writen to TSDB will be it last "
60
- f"1000 chars, Error: {error}",
61
- event=event,
62
- )
63
- timestamp = datetime.fromisoformat(event.get("when"))
64
- endpoint_id = event[EventFieldType.ENDPOINT_ID]
65
- event = {
66
- EventFieldType.MODEL_ERROR: error,
67
- EventFieldType.ERROR_TYPE: EventFieldType.INFER_ERROR,
68
- EventFieldType.ENDPOINT_ID: endpoint_id,
69
- EventFieldType.TIME: timestamp,
70
- EventFieldType.PROJECT: event[EventFieldType.FUNCTION_URI].split("/")[0],
71
- EventFieldType.TABLE_COLUMN: "_err_"
72
- + event.get(EventFieldType.ENDPOINT_ID),
73
- }
74
- logger.info("Write error to errors TSDB table", event=event)
75
- return event
@@ -1,281 +0,0 @@
1
- # Copyright 2025 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import time
15
- from collections.abc import Callable
16
- from enum import Enum
17
- from typing import Any, Final, Optional, Union
18
-
19
- import taosws
20
- from taosws import TaosStmt
21
-
22
- import mlrun
23
- from mlrun.utils import logger
24
-
25
-
26
- class _StrEnum(str, Enum):
27
- pass
28
-
29
-
30
- class TimestampPrecision(_StrEnum):
31
- ms = "ms" # milliseconds
32
- us = "us" # microseconds
33
- ns = "ns" # nanoseconds
34
-
35
-
36
- _TS_PRECISION_TO_FACTOR_AND_FUNC: Final[
37
- dict[TimestampPrecision, tuple[int, Callable[[list[int]], taosws.PyColumnView]]]
38
- ] = {
39
- TimestampPrecision.ms: (10**3, taosws.millis_timestamps_to_column),
40
- TimestampPrecision.us: (10**6, taosws.micros_timestamps_to_column),
41
- TimestampPrecision.ns: (10**9, taosws.nanos_timestamps_to_column),
42
- }
43
-
44
-
45
- class QueryResult:
46
- def __init__(self, data, fields):
47
- self.data = data
48
- self.fields = fields
49
-
50
- def __eq__(self, other):
51
- return self.data == other.data and self.fields == other.fields
52
-
53
- def __repr__(self):
54
- return f"QueryResult({self.data}, {self.fields})"
55
-
56
-
57
- class Field:
58
- def __init__(self, name, type, bytes):
59
- self.name = name
60
- self.type = type
61
- self.bytes = bytes
62
-
63
- def __eq__(self, other):
64
- return (
65
- self.name == other.name
66
- and self.type == other.type
67
- and self.bytes == other.bytes
68
- )
69
-
70
- def __repr__(self):
71
- return f"Field({self.name}, {self.type}, {self.bytes})"
72
-
73
-
74
- class TDEngineError(Exception):
75
- pass
76
-
77
-
78
- class ErrorResult:
79
- def __init__(self, tb, err):
80
- self.tb = tb
81
- self.err = err
82
-
83
-
84
- def _get_timestamp_column(
85
- values: list, timestamp_precision: TimestampPrecision
86
- ) -> taosws.PyColumnView:
87
- factor, to_col_func = _TS_PRECISION_TO_FACTOR_AND_FUNC[timestamp_precision]
88
- timestamps = [round(timestamp.timestamp() * factor) for timestamp in values]
89
- return to_col_func(timestamps)
90
-
91
-
92
- def values_to_column(
93
- values: list,
94
- column_type: str,
95
- timestamp_precision: TimestampPrecision = TimestampPrecision.ms,
96
- ) -> taosws.PyColumnView:
97
- if column_type == "TIMESTAMP":
98
- return _get_timestamp_column(values, timestamp_precision)
99
- if column_type == "FLOAT":
100
- return taosws.floats_to_column(values)
101
- if column_type == "INT":
102
- return taosws.ints_to_column(values)
103
- if column_type.startswith("BINARY"):
104
- return taosws.binary_to_column(values)
105
-
106
- raise NotImplementedError(f"Unsupported column type '{column_type}'")
107
-
108
-
109
- class Statement:
110
- def __init__(
111
- self,
112
- columns: dict[str, str],
113
- subtable: str,
114
- values: dict[str, Any],
115
- timestamp_precision: str = TimestampPrecision.ms,
116
- ) -> None:
117
- self.columns = columns
118
- self.subtable = subtable
119
- self.values = values
120
- self.timestamp_precision = TimestampPrecision[timestamp_precision]
121
-
122
- def prepare(self, statement: TaosStmt) -> TaosStmt:
123
- question_marks = ", ".join("?" * len(self.columns))
124
- statement.prepare(f"INSERT INTO ? VALUES ({question_marks});")
125
- statement.set_tbname(self.subtable)
126
-
127
- bind_params = []
128
-
129
- for col_name, col_type in self.columns.items():
130
- val = self.values[col_name]
131
- bind_params.append(
132
- values_to_column(
133
- [val], col_type, timestamp_precision=self.timestamp_precision
134
- )
135
- )
136
-
137
- statement.bind_param(bind_params)
138
- statement.add_batch()
139
- return statement
140
-
141
-
142
- class TDEngineConnection:
143
- def __init__(self, connection_string, max_retries=3, retry_delay=0.5):
144
- self._connection_string = connection_string
145
- self.prefix_statements = []
146
- self._max_retries = max_retries
147
- self._retry_delay = retry_delay
148
-
149
- self._conn = self._create_connection()
150
-
151
- def _create_connection(self):
152
- """Create a new TDEngine connection."""
153
- return taosws.connect(self._connection_string)
154
-
155
- def _reconnect(self):
156
- """Close current connection and create a new one."""
157
- try:
158
- if hasattr(self, "_conn") and self._conn:
159
- self._conn.close()
160
- except Exception as e:
161
- logger.warning(f"Error closing connection during reconnect: {e}")
162
-
163
- self._conn = self._create_connection()
164
- logger.info("Successfully reconnected to TDEngine")
165
-
166
- def _execute_with_retry(self, operation, operation_name, *args, **kwargs):
167
- """
168
- Execute an operation with retry logic for connection failures.
169
-
170
- :param operation: The function to execute
171
- :param operation_name: Name of the operation for logging
172
- :param args: Arguments to pass to the operation
173
- :param kwargs: Keyword arguments to pass to the operation
174
- :return: Result of the operation
175
- """
176
- last_exception = None
177
-
178
- for attempt in range(self._max_retries + 1): # +1 for initial attempt
179
- try:
180
- return operation(*args, **kwargs)
181
-
182
- except taosws.Error as e:
183
- last_exception = e
184
-
185
- if attempt < self._max_retries:
186
- logger.warning(
187
- f"Connection error during {operation_name} "
188
- f"(attempt {attempt + 1}/{self._max_retries + 1}): {e}. "
189
- f"Retrying in {self._retry_delay} seconds..."
190
- )
191
-
192
- # Wait before retrying
193
- time.sleep(self._retry_delay)
194
-
195
- # Reconnect
196
- try:
197
- self._reconnect()
198
- except Exception as reconnect_error:
199
- logger.error(f"Failed to reconnect: {reconnect_error}")
200
- if attempt == self._max_retries - 1:
201
- # Last attempt, raise the reconnection error
202
- raise TDEngineError(
203
- f"Failed to reconnect after {operation_name} failure: {reconnect_error}"
204
- ) from reconnect_error
205
- continue
206
- else:
207
- # Max retries exceeded
208
- logger.error(
209
- f"Max retries ({self._max_retries}) exceeded for {operation_name}"
210
- )
211
- break
212
-
213
- except Exception as e:
214
- # Non-TDEngine error, don't retry
215
- raise TDEngineError(
216
- f"Unexpected error during {operation_name}: {e}"
217
- ) from e
218
-
219
- # If we get here, all retries failed
220
- raise TDEngineError(
221
- f"Failed to {operation_name} after {self._max_retries} retries: {last_exception}"
222
- ) from last_exception
223
-
224
- def _execute_statement(self, statement):
225
- """Execute a single statement (string or Statement object)."""
226
- if isinstance(statement, Statement):
227
- prepared_statement = statement.prepare(self._conn.statement())
228
- prepared_statement.execute()
229
- else:
230
- self._conn.execute(statement)
231
-
232
- def _execute_query(self, query):
233
- """Execute a query and return the result."""
234
- return self._conn.query(query)
235
-
236
- def run(
237
- self,
238
- statements: Optional[Union[str, Statement, list[Union[str, Statement]]]] = None,
239
- query: Optional[str] = None,
240
- ) -> Optional[QueryResult]:
241
- statements = statements or []
242
- if not isinstance(statements, list):
243
- statements = [statements]
244
-
245
- # Execute all statements with retry logic
246
- all_statements = self.prefix_statements + statements
247
- for i, statement in enumerate(all_statements):
248
- operation_name = f"execute statement {i + 1}/{len(all_statements)}"
249
- if isinstance(statement, Statement):
250
- operation_name += " (prepared)"
251
- else:
252
- operation_name += f" `{statement}`"
253
-
254
- self._execute_with_retry(self._execute_statement, operation_name, statement)
255
-
256
- if not query:
257
- return None
258
-
259
- # Execute query with retry logic
260
- res = self._execute_with_retry(
261
- self._execute_query, f"execute query `{query}`", query
262
- )
263
-
264
- # Process results
265
- fields = [
266
- Field(field.name(), field.type(), field.bytes()) for field in res.fields
267
- ]
268
-
269
- return QueryResult(list(res), fields)
270
-
271
- def close(self):
272
- """Close the connection."""
273
- try:
274
- if self._conn:
275
- self._conn.close()
276
- logger.debug("TDEngine connection closed")
277
- self._conn = None
278
- except Exception as e:
279
- logger.warning(
280
- f"Error closing TDEngine connection: {mlrun.errors.err_to_str(e)}"
281
- )