mlrun 1.7.1rc4__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +1 -2
  5. mlrun/artifacts/base.py +46 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +334 -0
  8. mlrun/artifacts/manager.py +15 -13
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +7 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +66 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +67 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +2 -1
  57. mlrun/datastore/__init__.py +0 -2
  58. mlrun/datastore/alibaba_oss.py +4 -1
  59. mlrun/datastore/azure_blob.py +4 -1
  60. mlrun/datastore/base.py +12 -4
  61. mlrun/datastore/datastore.py +9 -3
  62. mlrun/datastore/datastore_profile.py +79 -20
  63. mlrun/datastore/dbfs_store.py +4 -1
  64. mlrun/datastore/filestore.py +4 -1
  65. mlrun/datastore/google_cloud_storage.py +4 -1
  66. mlrun/datastore/hdfs.py +4 -1
  67. mlrun/datastore/inmem.py +4 -1
  68. mlrun/datastore/redis.py +4 -1
  69. mlrun/datastore/s3.py +4 -1
  70. mlrun/datastore/sources.py +52 -51
  71. mlrun/datastore/store_resources.py +0 -2
  72. mlrun/datastore/targets.py +21 -21
  73. mlrun/datastore/utils.py +2 -2
  74. mlrun/datastore/v3io.py +4 -1
  75. mlrun/datastore/vectorstore.py +194 -0
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +208 -82
  78. mlrun/db/factory.py +0 -3
  79. mlrun/db/httpdb.py +1237 -386
  80. mlrun/db/nopdb.py +201 -74
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +136 -50
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +41 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +117 -46
  158. mlrun/model_monitoring/__init__.py +4 -4
  159. mlrun/model_monitoring/api.py +61 -59
  160. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  161. mlrun/model_monitoring/applications/base.py +165 -6
  162. mlrun/model_monitoring/applications/context.py +88 -37
  163. mlrun/model_monitoring/applications/evidently_base.py +1 -2
  164. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  165. mlrun/model_monitoring/applications/results.py +55 -3
  166. mlrun/model_monitoring/controller.py +207 -239
  167. mlrun/model_monitoring/db/__init__.py +0 -2
  168. mlrun/model_monitoring/db/_schedules.py +156 -0
  169. mlrun/model_monitoring/db/_stats.py +189 -0
  170. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  171. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +90 -16
  172. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  173. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +279 -59
  174. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  175. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  176. mlrun/model_monitoring/helpers.py +152 -49
  177. mlrun/model_monitoring/stream_processing.py +99 -283
  178. mlrun/model_monitoring/tracking_policy.py +10 -3
  179. mlrun/model_monitoring/writer.py +48 -36
  180. mlrun/package/__init__.py +3 -6
  181. mlrun/package/context_handler.py +1 -1
  182. mlrun/package/packager.py +12 -9
  183. mlrun/package/packagers/__init__.py +0 -2
  184. mlrun/package/packagers/default_packager.py +14 -11
  185. mlrun/package/packagers/numpy_packagers.py +16 -7
  186. mlrun/package/packagers/pandas_packagers.py +18 -18
  187. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  188. mlrun/package/packagers_manager.py +31 -14
  189. mlrun/package/utils/__init__.py +0 -3
  190. mlrun/package/utils/_pickler.py +6 -6
  191. mlrun/platforms/__init__.py +47 -16
  192. mlrun/platforms/iguazio.py +4 -1
  193. mlrun/projects/operations.py +27 -27
  194. mlrun/projects/pipelines.py +75 -38
  195. mlrun/projects/project.py +865 -206
  196. mlrun/run.py +53 -10
  197. mlrun/runtimes/__init__.py +1 -3
  198. mlrun/runtimes/base.py +15 -11
  199. mlrun/runtimes/daskjob.py +9 -9
  200. mlrun/runtimes/generators.py +2 -1
  201. mlrun/runtimes/kubejob.py +4 -5
  202. mlrun/runtimes/mounts.py +572 -0
  203. mlrun/runtimes/mpijob/__init__.py +0 -2
  204. mlrun/runtimes/mpijob/abstract.py +7 -6
  205. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  206. mlrun/runtimes/nuclio/application/application.py +11 -11
  207. mlrun/runtimes/nuclio/function.py +19 -17
  208. mlrun/runtimes/nuclio/serving.py +18 -11
  209. mlrun/runtimes/pod.py +154 -45
  210. mlrun/runtimes/remotesparkjob.py +3 -2
  211. mlrun/runtimes/sparkjob/__init__.py +0 -2
  212. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  213. mlrun/runtimes/utils.py +6 -5
  214. mlrun/serving/merger.py +6 -4
  215. mlrun/serving/remote.py +18 -17
  216. mlrun/serving/routers.py +185 -172
  217. mlrun/serving/server.py +7 -1
  218. mlrun/serving/states.py +97 -78
  219. mlrun/serving/utils.py +13 -2
  220. mlrun/serving/v1_serving.py +3 -2
  221. mlrun/serving/v2_serving.py +74 -65
  222. mlrun/track/__init__.py +1 -1
  223. mlrun/track/tracker.py +2 -2
  224. mlrun/track/trackers/mlflow_tracker.py +6 -5
  225. mlrun/utils/async_http.py +1 -1
  226. mlrun/utils/clones.py +1 -1
  227. mlrun/utils/helpers.py +66 -18
  228. mlrun/utils/logger.py +106 -4
  229. mlrun/utils/notifications/notification/__init__.py +22 -19
  230. mlrun/utils/notifications/notification/base.py +33 -14
  231. mlrun/utils/notifications/notification/console.py +6 -6
  232. mlrun/utils/notifications/notification/git.py +11 -11
  233. mlrun/utils/notifications/notification/ipython.py +10 -9
  234. mlrun/utils/notifications/notification/mail.py +176 -0
  235. mlrun/utils/notifications/notification/slack.py +6 -6
  236. mlrun/utils/notifications/notification/webhook.py +6 -6
  237. mlrun/utils/notifications/notification_pusher.py +86 -44
  238. mlrun/utils/regex.py +3 -1
  239. mlrun/utils/version/version.json +2 -2
  240. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +191 -186
  241. mlrun-1.8.0rc8.dist-info/RECORD +347 -0
  242. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +1 -1
  243. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  244. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  245. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  246. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  247. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  248. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  249. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  250. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  251. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  252. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  253. mlrun/model_monitoring/model_endpoint.py +0 -118
  254. mlrun-1.7.1rc4.dist-info/RECORD +0 -351
  255. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
  256. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
  257. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,156 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ from contextlib import AbstractContextManager
17
+ from types import TracebackType
18
+ from typing import Final, Optional
19
+
20
+ import botocore.exceptions
21
+
22
+ import mlrun.common.schemas
23
+ import mlrun.errors
24
+ import mlrun.model_monitoring.helpers
25
+ from mlrun.utils import logger
26
+
27
+
28
+ class ModelMonitoringSchedulesFile(AbstractContextManager):
29
+ DEFAULT_SCHEDULES: Final = {}
30
+ INITIAL_CONTENT = json.dumps(DEFAULT_SCHEDULES)
31
+ ENCODING = "utf-8"
32
+
33
+ def __init__(self, project: str, endpoint_id: str) -> None:
34
+ """
35
+ Initialize applications monitoring schedules file object.
36
+ The JSON file stores a dictionary of registered application name as key and Unix timestamp as value.
37
+ When working with the schedules data, use this class as a context manager to read and write the data.
38
+
39
+ :param project: The project name.
40
+ :param endpoint_id: The endpoint ID.
41
+ """
42
+ # `self._item` is the persistent version of the monitoring schedules.
43
+ self._item = mlrun.model_monitoring.helpers.get_monitoring_schedules_data(
44
+ project=project, endpoint_id=endpoint_id
45
+ )
46
+ self._path = self._item.url
47
+ self._fs = self._item.store.filesystem
48
+ # `self._schedules` is an in-memory copy of the DB for all the applications for
49
+ # the same model endpoint.
50
+ self._schedules: dict[str, int] = self.DEFAULT_SCHEDULES.copy()
51
+ # Does `self._schedules` hold the content of `self._item`?
52
+ self._open_schedules = False
53
+
54
+ @classmethod
55
+ def from_model_endpoint(
56
+ cls, model_endpoint: mlrun.common.schemas.ModelEndpoint
57
+ ) -> "ModelMonitoringSchedulesFile":
58
+ return cls(
59
+ project=model_endpoint.metadata.project,
60
+ endpoint_id=model_endpoint.metadata.uid,
61
+ )
62
+
63
+ def create(self) -> None:
64
+ """Create a schedules file with initial content - an empty dictionary"""
65
+ logger.debug("Creating model monitoring schedules file", path=self._item.url)
66
+ self._item.put(self.INITIAL_CONTENT)
67
+
68
+ def delete(self) -> None:
69
+ """Delete schedules file if it exists"""
70
+ if (
71
+ self._fs is None # In-memory store
72
+ or self._fs.exists(self._path)
73
+ ):
74
+ logger.debug(
75
+ "Deleting model monitoring schedules file", path=self._item.url
76
+ )
77
+ self._item.delete()
78
+ else:
79
+ logger.debug(
80
+ "Model monitoring schedules file does not exist, nothing to delete",
81
+ path=self._item.url,
82
+ )
83
+
84
+ def _open(self) -> None:
85
+ try:
86
+ content = self._item.get()
87
+ except (
88
+ mlrun.errors.MLRunNotFoundError,
89
+ # Different errors are raised for S3 or local storage, see ML-8042
90
+ botocore.exceptions.ClientError,
91
+ FileNotFoundError,
92
+ ) as err:
93
+ if (
94
+ isinstance(err, botocore.exceptions.ClientError)
95
+ # Add a log only to "NoSuchKey" errors codes - equivalent to `FileNotFoundError`
96
+ and err.response["Error"]["Code"] != "NoSuchKey"
97
+ ):
98
+ raise
99
+
100
+ logger.exception(
101
+ "The schedules file was not found. It should have been created "
102
+ "as a part of the model endpoint's creation",
103
+ path=self._path,
104
+ )
105
+ raise
106
+
107
+ if isinstance(content, bytes):
108
+ content = content.decode(encoding=self.ENCODING)
109
+ self._schedules = json.loads(content)
110
+ self._open_schedules = True
111
+
112
+ def _close(self) -> None:
113
+ self._item.put(json.dumps(self._schedules))
114
+ self._schedules = self.DEFAULT_SCHEDULES
115
+ self._open_schedules = False
116
+
117
+ def __enter__(self) -> "ModelMonitoringSchedulesFile":
118
+ self._open()
119
+ return super().__enter__()
120
+
121
+ def __exit__(
122
+ self,
123
+ exc_type: Optional[type[BaseException]],
124
+ exc_value: Optional[BaseException],
125
+ traceback: Optional[TracebackType],
126
+ ) -> Optional[bool]:
127
+ self._close()
128
+
129
+ def _check_open_schedules(self) -> None:
130
+ if not self._open_schedules:
131
+ raise mlrun.errors.MLRunValueError(
132
+ "Open the schedules file as a context manager first"
133
+ )
134
+
135
+ def get_application_time(self, application: str) -> Optional[int]:
136
+ self._check_open_schedules()
137
+ return self._schedules.get(application)
138
+
139
+ def update_application_time(self, application: str, timestamp: int) -> None:
140
+ self._check_open_schedules()
141
+ self._schedules[application] = timestamp
142
+
143
+
144
+ def delete_model_monitoring_schedules_folder(project: str) -> None:
145
+ """Delete the model monitoring schedules folder of the project"""
146
+ folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_folder_path(
147
+ project
148
+ )
149
+ fs = mlrun.datastore.store_manager.object(folder).store.filesystem
150
+ if fs and fs.exists(folder):
151
+ logger.debug("Deleting model monitoring schedules folder", folder=folder)
152
+ fs.rm(folder, recursive=True)
153
+ elif fs is None: # In-memory store
154
+ raise mlrun.errors.MLRunValueError(
155
+ "Cannot delete a folder without a file-system"
156
+ )
@@ -0,0 +1,189 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import abc
15
+ import json
16
+ from abc import abstractmethod
17
+ from datetime import datetime, timezone
18
+ from typing import cast
19
+
20
+ import botocore.exceptions
21
+ import fsspec
22
+
23
+ import mlrun.datastore.base
24
+ from mlrun.common.schemas.model_monitoring.constants import StatsKind
25
+ from mlrun.model_monitoring.helpers import (
26
+ get_monitoring_current_stats_data,
27
+ get_monitoring_drift_measures_data,
28
+ get_monitoring_stats_directory_path,
29
+ )
30
+ from mlrun.utils import logger
31
+
32
+
33
+ class ModelMonitoringStatsFile(abc.ABC):
34
+ """
35
+ Abstract class
36
+ Initialize applications monitoring stats file object.
37
+ The JSON file stores a dictionary of registered application name as key and Unix timestamp as value.
38
+ When working with the schedules data, use this class as a context manager to read and write the data.
39
+ """
40
+
41
+ def __init__(self, item: mlrun.datastore.base.DataItem, file_type: str):
42
+ self._path = item.url
43
+ self._item = item
44
+ self._file_type = file_type
45
+ self._fs = cast(fsspec.AbstractFileSystem, self._item.store.filesystem)
46
+
47
+ def create(self) -> None:
48
+ """Create a json file with initial content - an empty dictionary"""
49
+ logger.debug(
50
+ f"Creating model monitoring {self._file_type} file", path=self._item.url
51
+ )
52
+ self._item.put(
53
+ json.dumps(
54
+ {
55
+ "data": dict(),
56
+ "timestamp": mlrun.utils.datetime_now().isoformat(
57
+ sep=" ", timespec="microseconds"
58
+ ),
59
+ }
60
+ )
61
+ )
62
+
63
+ def delete(self) -> None:
64
+ """Delete json file if it exists"""
65
+ if self._fs.exists(self._path):
66
+ logger.debug(
67
+ f"Deleting model monitoring {self._file_type} file", path=self._item.url
68
+ )
69
+ self._item.delete()
70
+ else:
71
+ logger.debug(
72
+ f"Model monitoring {self._file_type} file does not exist, nothing to delete",
73
+ path=self._item.url,
74
+ )
75
+
76
+ def read(self) -> tuple[dict, datetime]:
77
+ """
78
+ Read the stats data and timestamp saved in file
79
+ :return: tuple[dict, str] dictionary with stats data and timestamp saved in file
80
+ """
81
+ try:
82
+ content = json.loads(self._item.get().decode())
83
+ timestamp = content.get("timestamp")
84
+ if timestamp is not None:
85
+ timestamp = datetime.fromisoformat(timestamp).astimezone(
86
+ tz=timezone.utc
87
+ )
88
+ return content.get("data"), timestamp
89
+ except (
90
+ mlrun.errors.MLRunNotFoundError,
91
+ # Different errors are raised for S3 or local storage, see ML-8042
92
+ botocore.exceptions.ClientError,
93
+ FileNotFoundError,
94
+ ) as err:
95
+ if (
96
+ isinstance(err, botocore.exceptions.ClientError)
97
+ # Add a log only to "NoSuchKey" errors codes - equivalent to `FileNotFoundError`
98
+ and err.response["Error"]["Code"] != "NoSuchKey"
99
+ ):
100
+ raise
101
+
102
+ logger.exception(
103
+ "The Stats file was not found. It should have been created "
104
+ "as a part of the model endpoint's creation",
105
+ path=self._path,
106
+ error=err,
107
+ )
108
+ raise
109
+
110
+ def write(self, stats: dict, timestamp: datetime) -> None:
111
+ """
112
+ Write stats data to file overwrite the existing file
113
+ :param stats: dictionary with the stats data
114
+ :param timestamp: datetime object with the timestamp of last entry point for the stats calculation
115
+ """
116
+ content = {
117
+ "data": stats,
118
+ "timestamp": timestamp.isoformat(sep=" ", timespec="microseconds"),
119
+ }
120
+ self._item.put(json.dumps(content))
121
+
122
+ @classmethod
123
+ @abstractmethod
124
+ def from_model_endpoint(
125
+ cls, model_endpoint: mlrun.common.schemas.ModelEndpoint
126
+ ) -> "ModelMonitoringStatsFile":
127
+ """
128
+ Return ModelMonitoringStatsFile child object using ModelEndpoint metadata
129
+ :param model_endpoint: The current model endpoint to get a stats object for
130
+ :return: ModelMonitoringStatsFile child object instance
131
+ """
132
+ pass
133
+
134
+
135
+ class ModelMonitoringCurrentStatsFile(ModelMonitoringStatsFile):
136
+ def __init__(self, project: str, endpoint_id: str) -> None:
137
+ """
138
+ Initialize File object specific for current stats.
139
+ :param project: (str) Project name
140
+ :param endpoint_id: (str) Endpoint name
141
+ """
142
+ super().__init__(
143
+ get_monitoring_current_stats_data(project, endpoint_id),
144
+ StatsKind.CURRENT_STATS.value,
145
+ )
146
+
147
+ @classmethod
148
+ def from_model_endpoint(
149
+ cls, model_endpoint: mlrun.common.schemas.ModelEndpoint
150
+ ) -> "ModelMonitoringCurrentStatsFile":
151
+ return cls(
152
+ project=model_endpoint.metadata.project,
153
+ endpoint_id=model_endpoint.metadata.uid,
154
+ )
155
+
156
+
157
+ class ModelMonitoringDriftMeasuresFile(ModelMonitoringStatsFile):
158
+ def __init__(self, project: str, endpoint_id: str) -> None:
159
+ """
160
+ Initialize File object specific for drift measures.
161
+ :param project: (str) Project name
162
+ :param endpoint_id: (str) Endpoint name
163
+ """
164
+ super().__init__(
165
+ get_monitoring_drift_measures_data(project, endpoint_id),
166
+ StatsKind.DRIFT_MEASURES.value,
167
+ )
168
+
169
+ @classmethod
170
+ def from_model_endpoint(
171
+ cls, model_endpoint: mlrun.common.schemas.ModelEndpoint
172
+ ) -> "ModelMonitoringDriftMeasuresFile":
173
+ return cls(
174
+ project=model_endpoint.metadata.project,
175
+ endpoint_id=model_endpoint.metadata.uid,
176
+ )
177
+
178
+
179
+ def delete_model_monitoring_stats_folder(project: str) -> None:
180
+ """Delete the model monitoring schedules folder of the project"""
181
+ folder = get_monitoring_stats_directory_path(project)
182
+ fs = mlrun.datastore.store_manager.object(folder).store.filesystem
183
+ if fs and fs.exists(folder):
184
+ logger.debug("Deleting model monitoring stats folder", folder=folder)
185
+ fs.rm(folder, recursive=True)
186
+ elif fs is None: # In-memory store
187
+ raise mlrun.errors.MLRunValueError(
188
+ "Cannot delete a folder without a file-system"
189
+ )
@@ -15,10 +15,9 @@
15
15
  import typing
16
16
  from abc import ABC, abstractmethod
17
17
  from datetime import datetime
18
- from typing import Union
19
18
 
20
19
  import pandas as pd
21
- import pydantic
20
+ import pydantic.v1
22
21
 
23
22
  import mlrun.common.schemas.model_monitoring as mm_schemas
24
23
  import mlrun.model_monitoring.db.tsdb.helpers
@@ -48,7 +47,7 @@ class TSDBConnector(ABC):
48
47
  self.project = project
49
48
 
50
49
  @abstractmethod
51
- def apply_monitoring_stream_steps(self, graph) -> None:
50
+ def apply_monitoring_stream_steps(self, graph, **kwargs) -> None:
52
51
  """
53
52
  Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
54
53
  different key metric dictionaries. This data is being used by the monitoring dashboards in
@@ -132,6 +131,7 @@ class TSDBConnector(ABC):
132
131
  end: datetime,
133
132
  metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
134
133
  type: typing.Literal["metrics", "results"],
134
+ with_result_extra_data: bool,
135
135
  ) -> typing.Union[
136
136
  list[
137
137
  typing.Union[
@@ -150,11 +150,13 @@ class TSDBConnector(ABC):
150
150
  Read metrics OR results from the TSDB and return as a list.
151
151
 
152
152
  :param endpoint_id: The model endpoint identifier.
153
- :param start: The start time of the query.
154
- :param end: The end time of the query.
155
- :param metrics: The list of metrics to get the values for.
156
- :param type: "metrics" or "results" - the type of each item in metrics.
157
- :return: A list of result values or a list of metric values.
153
+ :param start: The start time of the query.
154
+ :param end: The end time of the query.
155
+ :param metrics: The list of metrics to get the values for.
156
+ :param type: "metrics" or "results" - the type of each item in metrics.
157
+ :param with_result_extra_data: Whether to include the extra data in the results, relevant only when
158
+ `type="results"`.
159
+ :return: A list of result values or a list of metric values.
158
160
  """
159
161
 
160
162
  @abstractmethod
@@ -193,9 +195,9 @@ class TSDBConnector(ABC):
193
195
  @abstractmethod
194
196
  def get_last_request(
195
197
  self,
196
- endpoint_ids: Union[str, list[str]],
197
- start: Union[datetime, str] = "0",
198
- end: Union[datetime, str] = "now",
198
+ endpoint_ids: typing.Union[str, list[str]],
199
+ start: typing.Optional[datetime] = None,
200
+ end: typing.Optional[datetime] = None,
199
201
  ) -> pd.DataFrame:
200
202
  """
201
203
  Fetches data from the predictions TSDB table and returns the most recent request
@@ -212,9 +214,9 @@ class TSDBConnector(ABC):
212
214
  @abstractmethod
213
215
  def get_drift_status(
214
216
  self,
215
- endpoint_ids: Union[str, list[str]],
216
- start: Union[datetime, str] = "now-24h",
217
- end: Union[datetime, str] = "now",
217
+ endpoint_ids: typing.Union[str, list[str]],
218
+ start: typing.Optional[datetime] = None,
219
+ end: typing.Optional[datetime] = None,
218
220
  ) -> pd.DataFrame:
219
221
  """
220
222
  Fetches data from the app-results TSDB table and returns the highest status among all
@@ -233,8 +235,8 @@ class TSDBConnector(ABC):
233
235
  def get_metrics_metadata(
234
236
  self,
235
237
  endpoint_id: str,
236
- start: Union[datetime, str] = "0",
237
- end: Union[datetime, str] = "now",
238
+ start: typing.Optional[datetime] = None,
239
+ end: typing.Optional[datetime] = None,
238
240
  ) -> pd.DataFrame:
239
241
  """
240
242
  Fetches distinct metrics metadata from the metrics TSDB table for a specified model endpoint.
@@ -251,8 +253,8 @@ class TSDBConnector(ABC):
251
253
  def get_results_metadata(
252
254
  self,
253
255
  endpoint_id: str,
254
- start: Union[datetime, str] = "0",
255
- end: Union[datetime, str] = "now",
256
+ start: typing.Optional[datetime] = None,
257
+ end: typing.Optional[datetime] = None,
256
258
  ) -> pd.DataFrame:
257
259
  """
258
260
  Fetches distinct results metadata from the app-results TSDB table for a specified model endpoint.
@@ -268,9 +270,9 @@ class TSDBConnector(ABC):
268
270
  @abstractmethod
269
271
  def get_error_count(
270
272
  self,
271
- endpoint_ids: Union[str, list[str]],
272
- start: Union[datetime, str] = "0",
273
- end: Union[datetime, str] = "now",
273
+ endpoint_ids: typing.Union[str, list[str]],
274
+ start: typing.Optional[datetime] = None,
275
+ end: typing.Optional[datetime] = None,
274
276
  ) -> pd.DataFrame:
275
277
  """
276
278
  Fetches data from the error TSDB table and returns the error count for each specified endpoint.
@@ -286,12 +288,13 @@ class TSDBConnector(ABC):
286
288
  @abstractmethod
287
289
  def get_avg_latency(
288
290
  self,
289
- endpoint_ids: Union[str, list[str]],
290
- start: Union[datetime, str] = "0",
291
- end: Union[datetime, str] = "now",
291
+ endpoint_ids: typing.Union[str, list[str]],
292
+ start: typing.Optional[datetime] = None,
293
+ end: typing.Optional[datetime] = None,
292
294
  ) -> pd.DataFrame:
293
295
  """
294
296
  Fetches data from the predictions TSDB table and returns the average latency for each specified endpoint
297
+ in the provided time range, which by default is the last 24 hours.
295
298
 
296
299
  :param endpoint_ids: A list of model endpoint identifiers.
297
300
  :param start: The start time for the query.
@@ -420,11 +423,12 @@ class TSDBConnector(ABC):
420
423
  sub_df.index,
421
424
  sub_df[mm_schemas.ResultData.RESULT_VALUE],
422
425
  sub_df[mm_schemas.ResultData.RESULT_STATUS],
426
+ sub_df[mm_schemas.ResultData.RESULT_EXTRA_DATA],
423
427
  )
424
428
  ), # pyright: ignore[reportArgumentType]
425
429
  )
426
430
  )
427
- except pydantic.ValidationError:
431
+ except pydantic.v1.ValidationError:
428
432
  logger.exception(
429
433
  "Failed to convert data-frame into `ModelEndpointMonitoringResultValues`",
430
434
  full_name=full_name,
@@ -446,3 +450,52 @@ class TSDBConnector(ABC):
446
450
  )
447
451
 
448
452
  return metrics_values
453
+
454
+ @staticmethod
455
+ def df_to_metrics_list(
456
+ *,
457
+ df: pd.DataFrame,
458
+ project: str,
459
+ type: str,
460
+ ) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
461
+ """
462
+ Parse a DataFrame of metrics from the TSDB into a list of mm metrics objects.
463
+
464
+ :param df: The DataFrame to parse.
465
+ :param project: The project name.
466
+ :param type: The type of the metrics (either "result" or "metric").
467
+
468
+ :return: A list of mm metrics objects.
469
+ """
470
+ return list(
471
+ map(
472
+ lambda record: mm_schemas.ModelEndpointMonitoringMetric(
473
+ project=project,
474
+ type=type,
475
+ app=record.get(mm_schemas.WriterEvent.APPLICATION_NAME),
476
+ name=record.get(mm_schemas.ResultData.RESULT_NAME)
477
+ or record.get(mm_schemas.MetricData.METRIC_NAME),
478
+ kind=record.get(mm_schemas.ResultData.RESULT_KIND),
479
+ ),
480
+ df.to_dict("records"),
481
+ )
482
+ )
483
+
484
+ @staticmethod
485
+ def _get_start_end(
486
+ start: typing.Union[datetime, None],
487
+ end: typing.Union[datetime, None],
488
+ ) -> tuple[datetime, datetime]:
489
+ """
490
+ static utils function for tsdb start end format
491
+ :param start: Either None or datetime, None is handled as datetime.min(tz=timezone.utc)
492
+ :param end: Either None or datetime, None is handled as datetime.now(tz=timezone.utc)
493
+ :return: start datetime, end datetime
494
+ """
495
+ start = start or mlrun.utils.datetime_min()
496
+ end = end or mlrun.utils.datetime_now()
497
+ if not (isinstance(start, datetime) and isinstance(end, datetime)):
498
+ raise mlrun.errors.MLRunInvalidArgumentError(
499
+ "Both start and end must be datetime objects"
500
+ )
501
+ return start, end