mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -2
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +21 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +113 -2
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +11 -0
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +224 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +374 -102
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +231 -22
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +864 -228
  77. mlrun/db/nopdb.py +268 -16
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1125 -414
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +207 -180
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +40 -14
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/api_gateway.py +646 -177
  178. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  179. mlrun/runtimes/nuclio/application/application.py +758 -0
  180. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  181. mlrun/runtimes/nuclio/function.py +188 -68
  182. mlrun/runtimes/nuclio/serving.py +57 -60
  183. mlrun/runtimes/pod.py +191 -58
  184. mlrun/runtimes/remotesparkjob.py +11 -8
  185. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  186. mlrun/runtimes/utils.py +40 -73
  187. mlrun/secrets.py +6 -2
  188. mlrun/serving/__init__.py +8 -1
  189. mlrun/serving/remote.py +2 -3
  190. mlrun/serving/routers.py +89 -64
  191. mlrun/serving/server.py +54 -26
  192. mlrun/serving/states.py +187 -56
  193. mlrun/serving/utils.py +19 -11
  194. mlrun/serving/v2_serving.py +136 -63
  195. mlrun/track/tracker.py +2 -1
  196. mlrun/track/trackers/mlflow_tracker.py +5 -0
  197. mlrun/utils/async_http.py +26 -6
  198. mlrun/utils/db.py +18 -0
  199. mlrun/utils/helpers.py +375 -105
  200. mlrun/utils/http.py +2 -2
  201. mlrun/utils/logger.py +75 -9
  202. mlrun/utils/notifications/notification/__init__.py +14 -10
  203. mlrun/utils/notifications/notification/base.py +48 -0
  204. mlrun/utils/notifications/notification/console.py +2 -0
  205. mlrun/utils/notifications/notification/git.py +24 -1
  206. mlrun/utils/notifications/notification/ipython.py +2 -0
  207. mlrun/utils/notifications/notification/slack.py +96 -21
  208. mlrun/utils/notifications/notification/webhook.py +63 -2
  209. mlrun/utils/notifications/notification_pusher.py +146 -16
  210. mlrun/utils/regex.py +9 -0
  211. mlrun/utils/retryer.py +3 -2
  212. mlrun/utils/v3io_clients.py +2 -3
  213. mlrun/utils/version/version.json +2 -2
  214. mlrun-1.7.2.dist-info/METADATA +390 -0
  215. mlrun-1.7.2.dist-info/RECORD +351 -0
  216. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  217. mlrun/feature_store/retrieval/conversion.py +0 -271
  218. mlrun/kfpops.py +0 -868
  219. mlrun/model_monitoring/application.py +0 -310
  220. mlrun/model_monitoring/batch.py +0 -974
  221. mlrun/model_monitoring/controller_handler.py +0 -37
  222. mlrun/model_monitoring/prometheus.py +0 -216
  223. mlrun/model_monitoring/stores/__init__.py +0 -111
  224. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  225. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  226. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  227. mlrun/model_monitoring/stores/models/base.py +0 -84
  228. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  229. mlrun/platforms/other.py +0 -305
  230. mlrun-1.7.0rc5.dist-info/METADATA +0 -269
  231. mlrun-1.7.0rc5.dist-info/RECORD +0 -323
  232. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  233. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  234. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,522 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import typing
16
+ from datetime import datetime
17
+ from typing import Union
18
+
19
+ import pandas as pd
20
+ import taosws
21
+ from taoswswrap.tdengine_connection import (
22
+ Statement,
23
+ TDEngineConnection,
24
+ )
25
+
26
+ import mlrun.common.schemas.model_monitoring as mm_schemas
27
+ import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
28
+ import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
29
+ from mlrun.model_monitoring.db import TSDBConnector
30
+ from mlrun.model_monitoring.db.tsdb.tdengine.schemas import TDEngineSchema
31
+ from mlrun.model_monitoring.helpers import get_invocations_fqn
32
+ from mlrun.utils import logger
33
+
34
+
35
+ class TDEngineConnector(TSDBConnector):
36
+ """
37
+ Handles the TSDB operations when the TSDB connector is of type TDEngine.
38
+ """
39
+
40
+ type: str = mm_schemas.TSDBTarget.TDEngine
41
+
42
+ def __init__(
43
+ self,
44
+ project: str,
45
+ database: str = tdengine_schemas._MODEL_MONITORING_DATABASE,
46
+ **kwargs,
47
+ ):
48
+ super().__init__(project=project)
49
+ if "connection_string" not in kwargs:
50
+ raise mlrun.errors.MLRunInvalidArgumentError(
51
+ "connection_string is a required parameter for TDEngineConnector."
52
+ )
53
+ self._tdengine_connection_string = kwargs.get("connection_string")
54
+ self.database = database
55
+
56
+ self._connection = None
57
+ self._init_super_tables()
58
+
59
+ self._timeout = mlrun.mlconf.model_endpoint_monitoring.tdengine.timeout
60
+ self._retries = mlrun.mlconf.model_endpoint_monitoring.tdengine.retries
61
+
62
+ @property
63
+ def connection(self) -> TDEngineConnection:
64
+ if not self._connection:
65
+ self._connection = self._create_connection()
66
+ return self._connection
67
+
68
+ def _create_connection(self) -> TDEngineConnection:
69
+ """Establish a connection to the TSDB server."""
70
+ logger.debug("Creating a new connection to TDEngine", project=self.project)
71
+ conn = TDEngineConnection(self._tdengine_connection_string)
72
+ conn.run(
73
+ statements=f"CREATE DATABASE IF NOT EXISTS {self.database}",
74
+ timeout=self._timeout,
75
+ retries=self._retries,
76
+ )
77
+ conn.prefix_statements = [f"USE {self.database}"]
78
+ logger.debug("Connected to TDEngine", project=self.project)
79
+ return conn
80
+
81
+ def _init_super_tables(self):
82
+ """Initialize the super tables for the TSDB."""
83
+ self.tables = {
84
+ mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
85
+ project=self.project, database=self.database
86
+ ),
87
+ mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
88
+ project=self.project, database=self.database
89
+ ),
90
+ mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
91
+ project=self.project, database=self.database
92
+ ),
93
+ }
94
+
95
+ def create_tables(self):
96
+ """Create TDEngine supertables."""
97
+ for table in self.tables:
98
+ create_table_query = self.tables[table]._create_super_table_query()
99
+ self.connection.run(
100
+ statements=create_table_query,
101
+ timeout=self._timeout,
102
+ retries=self._retries,
103
+ )
104
+
105
+ def write_application_event(
106
+ self,
107
+ event: dict,
108
+ kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
109
+ ) -> None:
110
+ """
111
+ Write a single result or metric to TSDB.
112
+ """
113
+
114
+ table_name = (
115
+ f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
116
+ f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
117
+ )
118
+
119
+ if kind == mm_schemas.WriterEventKind.RESULT:
120
+ # Write a new result
121
+ table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS]
122
+ table_name = (
123
+ f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
124
+ ).replace("-", "_")
125
+ event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
126
+
127
+ else:
128
+ # Write a new metric
129
+ table = self.tables[mm_schemas.TDEngineSuperTables.METRICS]
130
+ table_name = (
131
+ f"{table_name}_{event[mm_schemas.MetricData.METRIC_NAME]}"
132
+ ).replace("-", "_")
133
+
134
+ # Escape the table name for case-sensitivity (ML-7908)
135
+ # https://github.com/taosdata/taos-connector-python/issues/260
136
+ table_name = f"`{table_name}`"
137
+
138
+ # Convert the datetime strings to datetime objects
139
+ event[mm_schemas.WriterEvent.END_INFER_TIME] = self._convert_to_datetime(
140
+ val=event[mm_schemas.WriterEvent.END_INFER_TIME]
141
+ )
142
+ event[mm_schemas.WriterEvent.START_INFER_TIME] = self._convert_to_datetime(
143
+ val=event[mm_schemas.WriterEvent.START_INFER_TIME]
144
+ )
145
+
146
+ create_table_sql = table._create_subtable_sql(subtable=table_name, values=event)
147
+
148
+ insert_statement = Statement(
149
+ TDEngineSchema._insert_subtable_stmt,
150
+ dict(columns=table.columns, subtable=table_name, values=event),
151
+ )
152
+
153
+ self.connection.run(
154
+ statements=[
155
+ create_table_sql,
156
+ insert_statement,
157
+ ],
158
+ timeout=self._timeout,
159
+ retries=self._retries,
160
+ )
161
+
162
+ @staticmethod
163
+ def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
164
+ return datetime.fromisoformat(val) if isinstance(val, str) else val
165
+
166
+ def apply_monitoring_stream_steps(self, graph):
167
+ """
168
+ Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
169
+ different key metric dictionaries. This data is being used by the monitoring dashboards in
170
+ grafana. At the moment, we store two types of data:
171
+ - prediction latency.
172
+ - custom metrics.
173
+ """
174
+
175
+ def apply_process_before_tsdb():
176
+ graph.add_step(
177
+ "mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ProcessBeforeTDEngine",
178
+ name="ProcessBeforeTDEngine",
179
+ after="MapFeatureNames",
180
+ )
181
+
182
+ def apply_tdengine_target(name, after):
183
+ graph.add_step(
184
+ "storey.TDEngineTarget",
185
+ name=name,
186
+ after=after,
187
+ url=self._tdengine_connection_string,
188
+ supertable=self.tables[
189
+ mm_schemas.TDEngineSuperTables.PREDICTIONS
190
+ ].super_table,
191
+ table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
192
+ time_col=mm_schemas.EventFieldType.TIME,
193
+ database=self.database,
194
+ columns=[
195
+ mm_schemas.EventFieldType.LATENCY,
196
+ mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
197
+ ],
198
+ tag_cols=[
199
+ mm_schemas.EventFieldType.PROJECT,
200
+ mm_schemas.EventFieldType.ENDPOINT_ID,
201
+ ],
202
+ max_events=1000,
203
+ flush_after_seconds=30,
204
+ )
205
+
206
+ apply_process_before_tsdb()
207
+ apply_tdengine_target(
208
+ name="TDEngineTarget",
209
+ after="ProcessBeforeTDEngine",
210
+ )
211
+
212
+ def handle_model_error(self, graph, **kwargs) -> None:
213
+ pass
214
+
215
+ def delete_tsdb_resources(self):
216
+ """
217
+ Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
218
+ """
219
+ logger.debug(
220
+ "Deleting all project resources using the TDEngine connector",
221
+ project=self.project,
222
+ )
223
+ drop_statements = []
224
+ for table in self.tables:
225
+ drop_statements.append(self.tables[table].drop_supertable_query())
226
+
227
+ try:
228
+ self.connection.run(
229
+ statements=drop_statements,
230
+ timeout=self._timeout,
231
+ retries=self._retries,
232
+ )
233
+ except Exception as e:
234
+ logger.warning(
235
+ "Failed to drop TDEngine tables. You may need to drop them manually. "
236
+ "These can be found under the following supertables: app_results, "
237
+ "metrics, and predictions.",
238
+ project=self.project,
239
+ error=mlrun.errors.err_to_str(e),
240
+ )
241
+ logger.debug(
242
+ "Deleted all project resources using the TDEngine connector",
243
+ project=self.project,
244
+ )
245
+
246
+ def get_model_endpoint_real_time_metrics(
247
+ self,
248
+ endpoint_id: str,
249
+ metrics: list[str],
250
+ start: str,
251
+ end: str,
252
+ ) -> dict[str, list[tuple[str, float]]]:
253
+ # Not implemented, use get_records() instead
254
+ pass
255
+
256
+ def _get_records(
257
+ self,
258
+ table: str,
259
+ start: datetime,
260
+ end: datetime,
261
+ columns: typing.Optional[list[str]] = None,
262
+ filter_query: typing.Optional[str] = None,
263
+ interval: typing.Optional[str] = None,
264
+ agg_funcs: typing.Optional[list] = None,
265
+ limit: typing.Optional[int] = None,
266
+ sliding_window_step: typing.Optional[str] = None,
267
+ timestamp_column: str = mm_schemas.EventFieldType.TIME,
268
+ ) -> pd.DataFrame:
269
+ """
270
+ Getting records from TSDB data collection.
271
+ :param table: Either a supertable or a subtable name.
272
+ :param start: The start time of the metrics.
273
+ :param end: The end time of the metrics.
274
+ :param columns: Columns to include in the result.
275
+ :param filter_query: Optional filter expression as a string. TDengine supports SQL-like syntax.
276
+ :param interval: The interval to aggregate the data by. Note that if interval is provided,
277
+ `agg_funcs` must bg provided as well. Provided as a string in the format of '1m',
278
+ '1h', etc.
279
+ :param agg_funcs: The aggregation functions to apply on the columns. Note that if `agg_funcs` is
280
+ provided, `interval` must bg provided as well. Provided as a list of strings in
281
+ the format of ['sum', 'avg', 'count', ...].
282
+ :param limit: The maximum number of records to return.
283
+ :param sliding_window_step: The time step for which the time window moves forward. Note that if
284
+ `sliding_window_step` is provided, interval must be provided as well. Provided
285
+ as a string in the format of '1m', '1h', etc.
286
+ :param timestamp_column: The column name that holds the timestamp index.
287
+
288
+ :return: DataFrame with the provided attributes from the data collection.
289
+ :raise: MLRunInvalidArgumentError if query the provided table failed.
290
+ """
291
+
292
+ full_query = tdengine_schemas.TDEngineSchema._get_records_query(
293
+ table=table,
294
+ start=start,
295
+ end=end,
296
+ columns_to_filter=columns,
297
+ filter_query=filter_query,
298
+ interval=interval,
299
+ limit=limit,
300
+ agg_funcs=agg_funcs,
301
+ sliding_window_step=sliding_window_step,
302
+ timestamp_column=timestamp_column,
303
+ database=self.database,
304
+ )
305
+ logger.debug("Querying TDEngine", query=full_query)
306
+ try:
307
+ query_result = self.connection.run(
308
+ query=full_query, timeout=self._timeout, retries=self._retries
309
+ )
310
+ except taosws.QueryError as e:
311
+ raise mlrun.errors.MLRunInvalidArgumentError(
312
+ f"Failed to query table {table} in database {self.database}, {str(e)}"
313
+ )
314
+
315
+ df_columns = [field.name for field in query_result.fields]
316
+ return pd.DataFrame(query_result.data, columns=df_columns)
317
+
318
+ def read_metrics_data(
319
+ self,
320
+ *,
321
+ endpoint_id: str,
322
+ start: datetime,
323
+ end: datetime,
324
+ metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
325
+ type: typing.Literal["metrics", "results"],
326
+ ) -> typing.Union[
327
+ list[
328
+ typing.Union[
329
+ mm_schemas.ModelEndpointMonitoringResultValues,
330
+ mm_schemas.ModelEndpointMonitoringMetricNoData,
331
+ ],
332
+ ],
333
+ list[
334
+ typing.Union[
335
+ mm_schemas.ModelEndpointMonitoringMetricValues,
336
+ mm_schemas.ModelEndpointMonitoringMetricNoData,
337
+ ],
338
+ ],
339
+ ]:
340
+ timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
341
+ columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
342
+ if type == "metrics":
343
+ table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
344
+ name = mm_schemas.MetricData.METRIC_NAME
345
+ columns += [name, mm_schemas.MetricData.METRIC_VALUE]
346
+ df_handler = self.df_to_metrics_values
347
+ elif type == "results":
348
+ table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
349
+ name = mm_schemas.ResultData.RESULT_NAME
350
+ columns += [
351
+ name,
352
+ mm_schemas.ResultData.RESULT_VALUE,
353
+ mm_schemas.ResultData.RESULT_STATUS,
354
+ mm_schemas.ResultData.RESULT_KIND,
355
+ ]
356
+ df_handler = self.df_to_results_values
357
+ else:
358
+ raise mlrun.errors.MLRunInvalidArgumentError(
359
+ f"Invalid type {type}, must be either 'metrics' or 'results'."
360
+ )
361
+
362
+ metrics_condition = " OR ".join(
363
+ [
364
+ f"({mm_schemas.WriterEvent.APPLICATION_NAME}='{metric.app}' AND {name}='{metric.name}')"
365
+ for metric in metrics
366
+ ]
367
+ )
368
+ filter_query = f"(endpoint_id='{endpoint_id}') AND ({metrics_condition})"
369
+
370
+ df = self._get_records(
371
+ table=table,
372
+ start=start,
373
+ end=end,
374
+ filter_query=filter_query,
375
+ timestamp_column=timestamp_column,
376
+ columns=columns,
377
+ )
378
+
379
+ df[mm_schemas.WriterEvent.END_INFER_TIME] = pd.to_datetime(
380
+ df[mm_schemas.WriterEvent.END_INFER_TIME]
381
+ )
382
+ df.set_index(mm_schemas.WriterEvent.END_INFER_TIME, inplace=True)
383
+
384
+ logger.debug(
385
+ "Converting a DataFrame to a list of metrics or results values",
386
+ table=table,
387
+ project=self.project,
388
+ endpoint_id=endpoint_id,
389
+ is_empty=df.empty,
390
+ )
391
+
392
+ return df_handler(df=df, metrics=metrics, project=self.project)
393
+
394
+ def read_predictions(
395
+ self,
396
+ *,
397
+ endpoint_id: str,
398
+ start: datetime,
399
+ end: datetime,
400
+ aggregation_window: typing.Optional[str] = None,
401
+ agg_funcs: typing.Optional[list] = None,
402
+ limit: typing.Optional[int] = None,
403
+ ) -> typing.Union[
404
+ mm_schemas.ModelEndpointMonitoringMetricValues,
405
+ mm_schemas.ModelEndpointMonitoringMetricNoData,
406
+ ]:
407
+ if (agg_funcs and not aggregation_window) or (
408
+ aggregation_window and not agg_funcs
409
+ ):
410
+ raise mlrun.errors.MLRunInvalidArgumentError(
411
+ "both or neither of `aggregation_window` and `agg_funcs` must be provided"
412
+ )
413
+ df = self._get_records(
414
+ table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
415
+ start=start,
416
+ end=end,
417
+ columns=[mm_schemas.EventFieldType.LATENCY],
418
+ filter_query=f"endpoint_id='{endpoint_id}'",
419
+ agg_funcs=agg_funcs,
420
+ interval=aggregation_window,
421
+ limit=limit,
422
+ )
423
+
424
+ full_name = get_invocations_fqn(self.project)
425
+
426
+ if df.empty:
427
+ return mm_schemas.ModelEndpointMonitoringMetricNoData(
428
+ full_name=full_name,
429
+ type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
430
+ )
431
+
432
+ if aggregation_window:
433
+ # _wend column, which represents the end time of each window, will be used as the time index
434
+ df["_wend"] = pd.to_datetime(df["_wend"])
435
+ df.set_index("_wend", inplace=True)
436
+
437
+ latency_column = (
438
+ f"{agg_funcs[0]}({mm_schemas.EventFieldType.LATENCY})"
439
+ if agg_funcs
440
+ else mm_schemas.EventFieldType.LATENCY
441
+ )
442
+
443
+ return mm_schemas.ModelEndpointMonitoringMetricValues(
444
+ full_name=full_name,
445
+ values=list(
446
+ zip(
447
+ df.index,
448
+ df[latency_column],
449
+ )
450
+ ), # pyright: ignore[reportArgumentType]
451
+ )
452
+
453
+ def get_last_request(
454
+ self,
455
+ endpoint_ids: Union[str, list[str]],
456
+ start: Union[datetime, str] = "0",
457
+ end: Union[datetime, str] = "now",
458
+ ) -> pd.DataFrame:
459
+ pass
460
+
461
+ def get_drift_status(
462
+ self,
463
+ endpoint_ids: Union[str, list[str]],
464
+ start: Union[datetime, str] = "now-24h",
465
+ end: Union[datetime, str] = "now",
466
+ ) -> pd.DataFrame:
467
+ pass
468
+
469
+ def get_metrics_metadata(
470
+ self,
471
+ endpoint_id: str,
472
+ start: Union[datetime, str] = "0",
473
+ end: Union[datetime, str] = "now",
474
+ ) -> pd.DataFrame:
475
+ pass
476
+
477
+ def get_results_metadata(
478
+ self,
479
+ endpoint_id: str,
480
+ start: Union[datetime, str] = "0",
481
+ end: Union[datetime, str] = "now",
482
+ ) -> pd.DataFrame:
483
+ pass
484
+
485
+ def get_error_count(
486
+ self,
487
+ endpoint_ids: Union[str, list[str]],
488
+ start: Union[datetime, str] = "0",
489
+ end: Union[datetime, str] = "now",
490
+ ) -> pd.DataFrame:
491
+ pass
492
+
493
+ def get_avg_latency(
494
+ self,
495
+ endpoint_ids: Union[str, list[str]],
496
+ start: Union[datetime, str] = "0",
497
+ end: Union[datetime, str] = "now",
498
+ ) -> pd.DataFrame:
499
+ pass
500
+
501
+ # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
502
+ #
503
+ # def read_prediction_metric_for_endpoint_if_exists(
504
+ # self, endpoint_id: str
505
+ # ) -> typing.Optional[mm_schemas.ModelEndpointMonitoringMetric]:
506
+ # """
507
+ # Read the "invocations" metric for the provided model endpoint, and return the metric object
508
+ # if it exists.
509
+ #
510
+ # :param endpoint_id: The model endpoint identifier.
511
+ # :return: `None` if the invocations metric does not exist, otherwise return the
512
+ # corresponding metric object.
513
+ # """
514
+ # # Read just one record, because we just want to check if there is any data for this endpoint_id
515
+ # predictions = self.read_predictions(
516
+ # endpoint_id=endpoint_id,
517
+ # start=datetime.min,
518
+ # end=mlrun.utils.now_date(),
519
+ # limit=1,
520
+ # )
521
+ # if predictions:
522
+ # return get_invocations_metric(self.project)
@@ -0,0 +1,15 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .v3io_connector import V3IOTSDBConnector
@@ -0,0 +1,158 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from datetime import datetime
15
+ from typing import Any
16
+
17
+ import mlrun.feature_store.steps
18
+ from mlrun.common.schemas.model_monitoring import (
19
+ EventFieldType,
20
+ EventKeyMetrics,
21
+ EventLiveStats,
22
+ )
23
+ from mlrun.utils import logger
24
+
25
+
26
+ def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
27
+ """
28
+ Normalize user defined keys - input data to a model and its predictions,
29
+ to a form V3IO frames tolerates.
30
+
31
+ The dictionary keys should conform to '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'.
32
+ """
33
+ prefix = "_"
34
+
35
+ def norm_key(key: str) -> str:
36
+ key = key.replace("-", "_") # hyphens `-` are not allowed
37
+ if key and key[0].isdigit(): # starting with a digit is not allowed
38
+ return prefix + key
39
+ return key
40
+
41
+ return {norm_key(k): v for k, v in event.items()}
42
+
43
+
44
+ class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
45
+ def __init__(self, **kwargs):
46
+ """
47
+ Process the data before writing to TSDB. This step creates a dictionary that includes 3 different dictionaries
48
+ that each one of them contains important details and stats about the events:
49
+ 1. base_metrics: stats about the average latency and the amount of predictions over time. It is based on
50
+ storey.AggregateByKey which was executed in step 5.
51
+ 2. endpoint_features: feature names and values along with the prediction names and value.
52
+ 3. custom_metric (opt): optional metrics provided by the user.
53
+ :returns: Dictionary of 2-3 dictionaries that contains stats and details about the events.
54
+ """
55
+ super().__init__(**kwargs)
56
+
57
+ def do(self, event):
58
+ # Compute prediction per second
59
+ event[EventLiveStats.PREDICTIONS_PER_SECOND] = (
60
+ float(event[EventLiveStats.PREDICTIONS_COUNT_5M]) / 300
61
+ )
62
+ base_fields = [
63
+ EventFieldType.TIMESTAMP,
64
+ EventFieldType.ENDPOINT_ID,
65
+ EventFieldType.ENDPOINT_TYPE,
66
+ ]
67
+
68
+ # Getting event timestamp and endpoint_id
69
+ base_event = {k: event[k] for k in base_fields}
70
+
71
+ # base_metrics includes the stats about the average latency and the amount of predictions over time
72
+ base_metrics = {
73
+ EventFieldType.RECORD_TYPE: EventKeyMetrics.BASE_METRICS,
74
+ EventLiveStats.PREDICTIONS_PER_SECOND: event[
75
+ EventLiveStats.PREDICTIONS_PER_SECOND
76
+ ],
77
+ EventLiveStats.PREDICTIONS_COUNT_5M: event[
78
+ EventLiveStats.PREDICTIONS_COUNT_5M
79
+ ],
80
+ EventLiveStats.PREDICTIONS_COUNT_1H: event[
81
+ EventLiveStats.PREDICTIONS_COUNT_1H
82
+ ],
83
+ EventLiveStats.LATENCY_AVG_5M: event[EventLiveStats.LATENCY_AVG_5M],
84
+ EventLiveStats.LATENCY_AVG_1H: event[EventLiveStats.LATENCY_AVG_1H],
85
+ **base_event,
86
+ }
87
+
88
+ # endpoint_features includes the event values of each feature and prediction
89
+ endpoint_features = {
90
+ EventFieldType.RECORD_TYPE: EventKeyMetrics.ENDPOINT_FEATURES,
91
+ **_normalize_dict_for_v3io_frames(event[EventFieldType.NAMED_PREDICTIONS]),
92
+ **_normalize_dict_for_v3io_frames(event[EventFieldType.NAMED_FEATURES]),
93
+ **base_event,
94
+ }
95
+ # Create a dictionary that includes both base_metrics and endpoint_features
96
+ processed = {
97
+ EventKeyMetrics.BASE_METRICS: base_metrics,
98
+ EventKeyMetrics.ENDPOINT_FEATURES: endpoint_features,
99
+ }
100
+
101
+ # If metrics provided, add another dictionary if custom_metrics values
102
+ if event[EventFieldType.METRICS]:
103
+ processed[EventKeyMetrics.CUSTOM_METRICS] = {
104
+ EventFieldType.RECORD_TYPE: EventKeyMetrics.CUSTOM_METRICS,
105
+ **event[EventFieldType.METRICS],
106
+ **base_event,
107
+ }
108
+
109
+ return processed
110
+
111
+
112
+ class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
113
+ def __init__(self, keys, **kwargs):
114
+ """
115
+ Create unpacked event dictionary based on provided key metrics (base_metrics, endpoint_features,
116
+ or custom_metric). Please note that the next step of the TSDB target requires an unpacked dictionary.
117
+ :param keys: list of key metrics.
118
+ :returns: An unpacked dictionary of event filtered by the provided key metrics.
119
+ """
120
+ super().__init__(**kwargs)
121
+ self.keys = keys
122
+
123
+ def do(self, event):
124
+ # Keep only the relevant dictionary based on the provided keys
125
+ new_event = {}
126
+ for key in self.keys:
127
+ if key in event:
128
+ new_event[key] = event[key]
129
+
130
+ # Create unpacked dictionary
131
+ unpacked = {}
132
+ for key in new_event.keys():
133
+ if key in self.keys:
134
+ unpacked = {**unpacked, **new_event[key]}
135
+ else:
136
+ unpacked[key] = new_event[key]
137
+ return unpacked if unpacked else None
138
+
139
+
140
+ class ErrorExtractor(mlrun.feature_store.steps.MapClass):
141
+ def __init__(self, **kwargs):
142
+ """
143
+ Prepare the event for insertion into the errors TSDB table.
144
+ """
145
+ super().__init__(**kwargs)
146
+
147
+ def do(self, event):
148
+ error = event.get("error")
149
+ timestamp = datetime.fromisoformat(event.get("when"))
150
+ endpoint_id = event[EventFieldType.ENDPOINT_ID]
151
+ event = {
152
+ EventFieldType.MODEL_ERROR: str(error),
153
+ EventFieldType.ENDPOINT_ID: endpoint_id,
154
+ EventFieldType.TIMESTAMP: timestamp,
155
+ EventFieldType.ERROR_COUNT: 1.0,
156
+ }
157
+ logger.info("Write error to errors TSDB table", event=event)
158
+ return event