mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (291) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +26 -112
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +46 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +47 -48
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +69 -0
  13. mlrun/common/db/sql_session.py +2 -3
  14. mlrun/common/formatters/__init__.py +19 -0
  15. mlrun/common/formatters/artifact.py +21 -0
  16. mlrun/common/formatters/base.py +78 -0
  17. mlrun/common/formatters/function.py +41 -0
  18. mlrun/common/formatters/pipeline.py +53 -0
  19. mlrun/common/formatters/project.py +51 -0
  20. mlrun/common/helpers.py +1 -2
  21. mlrun/common/model_monitoring/helpers.py +9 -5
  22. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  23. mlrun/common/schemas/__init__.py +24 -4
  24. mlrun/common/schemas/alert.py +203 -0
  25. mlrun/common/schemas/api_gateway.py +148 -0
  26. mlrun/common/schemas/artifact.py +18 -8
  27. mlrun/common/schemas/auth.py +11 -5
  28. mlrun/common/schemas/background_task.py +1 -1
  29. mlrun/common/schemas/client_spec.py +4 -1
  30. mlrun/common/schemas/feature_store.py +16 -16
  31. mlrun/common/schemas/frontend_spec.py +8 -7
  32. mlrun/common/schemas/function.py +5 -1
  33. mlrun/common/schemas/hub.py +11 -18
  34. mlrun/common/schemas/memory_reports.py +2 -2
  35. mlrun/common/schemas/model_monitoring/__init__.py +18 -3
  36. mlrun/common/schemas/model_monitoring/constants.py +83 -26
  37. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  38. mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
  39. mlrun/common/schemas/notification.py +4 -4
  40. mlrun/common/schemas/object.py +2 -2
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +1 -10
  43. mlrun/common/schemas/project.py +24 -23
  44. mlrun/common/schemas/runtime_resource.py +8 -12
  45. mlrun/common/schemas/schedule.py +3 -3
  46. mlrun/common/schemas/tag.py +1 -2
  47. mlrun/common/schemas/workflow.py +2 -2
  48. mlrun/common/types.py +7 -1
  49. mlrun/config.py +54 -17
  50. mlrun/data_types/to_pandas.py +10 -12
  51. mlrun/datastore/__init__.py +5 -8
  52. mlrun/datastore/alibaba_oss.py +130 -0
  53. mlrun/datastore/azure_blob.py +17 -5
  54. mlrun/datastore/base.py +62 -39
  55. mlrun/datastore/datastore.py +28 -9
  56. mlrun/datastore/datastore_profile.py +146 -20
  57. mlrun/datastore/filestore.py +0 -1
  58. mlrun/datastore/google_cloud_storage.py +6 -2
  59. mlrun/datastore/hdfs.py +56 -0
  60. mlrun/datastore/inmem.py +2 -2
  61. mlrun/datastore/redis.py +6 -2
  62. mlrun/datastore/s3.py +9 -0
  63. mlrun/datastore/snowflake_utils.py +43 -0
  64. mlrun/datastore/sources.py +201 -96
  65. mlrun/datastore/spark_utils.py +1 -2
  66. mlrun/datastore/store_resources.py +7 -7
  67. mlrun/datastore/targets.py +358 -104
  68. mlrun/datastore/utils.py +72 -58
  69. mlrun/datastore/v3io.py +5 -1
  70. mlrun/db/base.py +185 -35
  71. mlrun/db/factory.py +1 -1
  72. mlrun/db/httpdb.py +614 -179
  73. mlrun/db/nopdb.py +210 -26
  74. mlrun/errors.py +12 -1
  75. mlrun/execution.py +41 -24
  76. mlrun/feature_store/__init__.py +0 -2
  77. mlrun/feature_store/api.py +40 -72
  78. mlrun/feature_store/common.py +1 -1
  79. mlrun/feature_store/feature_set.py +76 -55
  80. mlrun/feature_store/feature_vector.py +28 -30
  81. mlrun/feature_store/ingestion.py +7 -6
  82. mlrun/feature_store/retrieval/base.py +16 -11
  83. mlrun/feature_store/retrieval/conversion.py +11 -13
  84. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  85. mlrun/feature_store/retrieval/job.py +9 -3
  86. mlrun/feature_store/retrieval/local_merger.py +2 -0
  87. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  88. mlrun/feature_store/steps.py +37 -34
  89. mlrun/features.py +9 -20
  90. mlrun/frameworks/_common/artifacts_library.py +9 -9
  91. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  92. mlrun/frameworks/_common/model_handler.py +48 -48
  93. mlrun/frameworks/_common/plan.py +2 -3
  94. mlrun/frameworks/_common/producer.py +3 -4
  95. mlrun/frameworks/_common/utils.py +5 -5
  96. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  97. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  98. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  99. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  100. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  101. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  102. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  103. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  104. mlrun/frameworks/_ml_common/plan.py +1 -1
  105. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  109. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  110. mlrun/frameworks/_ml_common/utils.py +4 -4
  111. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  112. mlrun/frameworks/huggingface/model_server.py +4 -4
  113. mlrun/frameworks/lgbm/__init__.py +33 -33
  114. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  117. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  119. mlrun/frameworks/lgbm/model_handler.py +10 -10
  120. mlrun/frameworks/lgbm/model_server.py +6 -6
  121. mlrun/frameworks/lgbm/utils.py +5 -5
  122. mlrun/frameworks/onnx/dataset.py +8 -8
  123. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  124. mlrun/frameworks/onnx/model_handler.py +6 -6
  125. mlrun/frameworks/onnx/model_server.py +7 -7
  126. mlrun/frameworks/parallel_coordinates.py +4 -3
  127. mlrun/frameworks/pytorch/__init__.py +18 -18
  128. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  129. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  130. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  131. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  132. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  133. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  134. mlrun/frameworks/pytorch/model_handler.py +17 -17
  135. mlrun/frameworks/pytorch/model_server.py +7 -7
  136. mlrun/frameworks/sklearn/__init__.py +13 -13
  137. mlrun/frameworks/sklearn/estimator.py +4 -4
  138. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  139. mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
  140. mlrun/frameworks/sklearn/model_handler.py +2 -2
  141. mlrun/frameworks/tf_keras/__init__.py +10 -7
  142. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  143. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  144. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  145. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  146. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  147. mlrun/frameworks/tf_keras/model_server.py +6 -6
  148. mlrun/frameworks/xgboost/__init__.py +13 -13
  149. mlrun/frameworks/xgboost/model_handler.py +6 -6
  150. mlrun/k8s_utils.py +14 -16
  151. mlrun/launcher/__init__.py +1 -1
  152. mlrun/launcher/base.py +16 -15
  153. mlrun/launcher/client.py +8 -6
  154. mlrun/launcher/factory.py +1 -1
  155. mlrun/launcher/local.py +17 -11
  156. mlrun/launcher/remote.py +16 -10
  157. mlrun/lists.py +7 -6
  158. mlrun/model.py +238 -73
  159. mlrun/model_monitoring/__init__.py +1 -1
  160. mlrun/model_monitoring/api.py +138 -315
  161. mlrun/model_monitoring/application.py +5 -296
  162. mlrun/model_monitoring/applications/__init__.py +24 -0
  163. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  164. mlrun/model_monitoring/applications/base.py +282 -0
  165. mlrun/model_monitoring/applications/context.py +214 -0
  166. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  167. mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
  168. mlrun/model_monitoring/applications/results.py +99 -0
  169. mlrun/model_monitoring/controller.py +104 -84
  170. mlrun/model_monitoring/controller_handler.py +13 -5
  171. mlrun/model_monitoring/db/__init__.py +18 -0
  172. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  173. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  174. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
  175. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  176. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  177. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  178. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  179. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  180. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  181. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  182. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
  183. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  184. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  185. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  187. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  188. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  189. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  190. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  191. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  192. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  193. mlrun/model_monitoring/evidently_application.py +6 -118
  194. mlrun/model_monitoring/features_drift_table.py +134 -106
  195. mlrun/model_monitoring/helpers.py +127 -28
  196. mlrun/model_monitoring/metrics/__init__.py +13 -0
  197. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  198. mlrun/model_monitoring/model_endpoint.py +3 -2
  199. mlrun/model_monitoring/prometheus.py +1 -4
  200. mlrun/model_monitoring/stream_processing.py +62 -231
  201. mlrun/model_monitoring/tracking_policy.py +9 -2
  202. mlrun/model_monitoring/writer.py +152 -124
  203. mlrun/package/__init__.py +6 -6
  204. mlrun/package/context_handler.py +5 -5
  205. mlrun/package/packager.py +7 -7
  206. mlrun/package/packagers/default_packager.py +6 -6
  207. mlrun/package/packagers/numpy_packagers.py +15 -15
  208. mlrun/package/packagers/pandas_packagers.py +5 -5
  209. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  210. mlrun/package/packagers_manager.py +19 -23
  211. mlrun/package/utils/_formatter.py +6 -6
  212. mlrun/package/utils/_pickler.py +2 -2
  213. mlrun/package/utils/_supported_format.py +4 -4
  214. mlrun/package/utils/log_hint_utils.py +2 -2
  215. mlrun/package/utils/type_hint_utils.py +4 -9
  216. mlrun/platforms/__init__.py +11 -10
  217. mlrun/platforms/iguazio.py +24 -203
  218. mlrun/projects/operations.py +35 -21
  219. mlrun/projects/pipelines.py +68 -99
  220. mlrun/projects/project.py +830 -266
  221. mlrun/render.py +3 -11
  222. mlrun/run.py +162 -166
  223. mlrun/runtimes/__init__.py +62 -7
  224. mlrun/runtimes/base.py +39 -32
  225. mlrun/runtimes/daskjob.py +8 -8
  226. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  227. mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
  228. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  229. mlrun/runtimes/funcdoc.py +0 -28
  230. mlrun/runtimes/function_reference.py +1 -1
  231. mlrun/runtimes/kubejob.py +28 -122
  232. mlrun/runtimes/local.py +6 -3
  233. mlrun/runtimes/mpijob/__init__.py +0 -20
  234. mlrun/runtimes/mpijob/abstract.py +9 -10
  235. mlrun/runtimes/mpijob/v1.py +1 -1
  236. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  237. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  238. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  239. mlrun/runtimes/nuclio/application/application.py +523 -0
  240. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  241. mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
  242. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  243. mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
  244. mlrun/runtimes/pod.py +286 -88
  245. mlrun/runtimes/remotesparkjob.py +2 -2
  246. mlrun/runtimes/sparkjob/spark3job.py +51 -34
  247. mlrun/runtimes/utils.py +7 -75
  248. mlrun/secrets.py +9 -5
  249. mlrun/serving/remote.py +2 -7
  250. mlrun/serving/routers.py +13 -10
  251. mlrun/serving/server.py +22 -26
  252. mlrun/serving/states.py +99 -25
  253. mlrun/serving/utils.py +3 -3
  254. mlrun/serving/v1_serving.py +6 -7
  255. mlrun/serving/v2_serving.py +59 -20
  256. mlrun/track/tracker.py +2 -1
  257. mlrun/track/tracker_manager.py +3 -3
  258. mlrun/track/trackers/mlflow_tracker.py +1 -2
  259. mlrun/utils/async_http.py +5 -7
  260. mlrun/utils/azure_vault.py +1 -1
  261. mlrun/utils/clones.py +1 -2
  262. mlrun/utils/condition_evaluator.py +3 -3
  263. mlrun/utils/db.py +3 -3
  264. mlrun/utils/helpers.py +183 -197
  265. mlrun/utils/http.py +2 -5
  266. mlrun/utils/logger.py +76 -14
  267. mlrun/utils/notifications/notification/__init__.py +17 -12
  268. mlrun/utils/notifications/notification/base.py +14 -2
  269. mlrun/utils/notifications/notification/console.py +2 -0
  270. mlrun/utils/notifications/notification/git.py +3 -1
  271. mlrun/utils/notifications/notification/ipython.py +3 -1
  272. mlrun/utils/notifications/notification/slack.py +101 -21
  273. mlrun/utils/notifications/notification/webhook.py +11 -1
  274. mlrun/utils/notifications/notification_pusher.py +155 -30
  275. mlrun/utils/retryer.py +208 -0
  276. mlrun/utils/singleton.py +1 -1
  277. mlrun/utils/v3io_clients.py +2 -4
  278. mlrun/utils/version/version.json +2 -2
  279. mlrun/utils/version/version.py +2 -6
  280. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
  281. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  282. mlrun/kfpops.py +0 -868
  283. mlrun/model_monitoring/batch.py +0 -1095
  284. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  285. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  286. mlrun/platforms/other.py +0 -306
  287. mlrun-1.6.4rc2.dist-info/RECORD +0 -314
  288. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  289. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
  290. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  291. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
@@ -17,26 +17,28 @@ import datetime
17
17
  import json
18
18
  import os
19
19
  import re
20
- from typing import Any, Iterator, NamedTuple, Optional, Union, cast
20
+ from collections.abc import Iterator
21
+ from typing import Any, NamedTuple, Optional, Union, cast
21
22
 
22
- from v3io.dataplane.response import HttpResponseError
23
+ import nuclio
23
24
 
24
25
  import mlrun
25
26
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
26
27
  import mlrun.data_types.infer
27
28
  import mlrun.feature_store as fstore
29
+ import mlrun.model_monitoring.db.stores
28
30
  from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
29
31
  from mlrun.datastore import get_stream_pusher
30
32
  from mlrun.datastore.targets import ParquetTarget
31
- from mlrun.model_monitoring.batch import calculate_inputs_statistics
33
+ from mlrun.errors import err_to_str
32
34
  from mlrun.model_monitoring.helpers import (
33
35
  _BatchDict,
34
36
  batch_dict2timedelta,
37
+ calculate_inputs_statistics,
35
38
  get_monitoring_parquet_path,
36
39
  get_stream_path,
37
40
  )
38
- from mlrun.utils import create_logger, datetime_now, logger
39
- from mlrun.utils.v3io_clients import get_v3io_client
41
+ from mlrun.utils import datetime_now, logger
40
42
 
41
43
 
42
44
  class _Interval(NamedTuple):
@@ -45,8 +47,6 @@ class _Interval(NamedTuple):
45
47
 
46
48
 
47
49
  class _BatchWindow:
48
- V3IO_CONTAINER_FORMAT = "users/pipelines/{project}/monitoring-schedules/functions"
49
-
50
50
  def __init__(
51
51
  self,
52
52
  project: str,
@@ -62,27 +62,22 @@ class _BatchWindow:
62
62
  All the time values are in seconds.
63
63
  The start and stop time are in seconds since the epoch.
64
64
  """
65
+ self.project = project
65
66
  self._endpoint = endpoint
66
67
  self._application = application
67
68
  self._first_request = first_request
68
- self._kv_storage = get_v3io_client(
69
- endpoint=mlrun.mlconf.v3io_api,
70
- # Avoid noisy warning logs before the KV table is created
71
- logger=create_logger(name="v3io_client", level="error"),
72
- ).kv
73
- self._v3io_container = self.V3IO_CONTAINER_FORMAT.format(project=project)
74
69
  self._stop = last_updated
75
70
  self._step = timedelta_seconds
71
+ self._db = mlrun.model_monitoring.get_store_object(project=self.project)
76
72
  self._start = self._get_last_analyzed()
77
73
 
78
74
  def _get_last_analyzed(self) -> Optional[int]:
79
75
  try:
80
- data = self._kv_storage.get(
81
- container=self._v3io_container,
82
- table_path=self._endpoint,
83
- key=self._application,
76
+ last_analyzed = self._db.get_last_analyzed(
77
+ endpoint_id=self._endpoint,
78
+ application_name=self._application,
84
79
  )
85
- except HttpResponseError as err:
80
+ except mlrun.errors.MLRunNotFoundError:
86
81
  logger.info(
87
82
  "No last analyzed time was found for this endpoint and "
88
83
  "application, as this is probably the first time this "
@@ -93,7 +88,7 @@ class _BatchWindow:
93
88
  first_request=self._first_request,
94
89
  last_updated=self._stop,
95
90
  )
96
- logger.debug("Error while getting last analyzed time", err=err)
91
+
97
92
  if self._first_request and self._stop:
98
93
  # TODO : Change the timedelta according to the policy.
99
94
  first_period_in_seconds = max(
@@ -105,7 +100,6 @@ class _BatchWindow:
105
100
  )
106
101
  return self._first_request
107
102
 
108
- last_analyzed = data.output.item[mm_constants.SchedulingKeys.LAST_ANALYZED]
109
103
  logger.info(
110
104
  "Got the last analyzed time for this endpoint and application",
111
105
  endpoint=self._endpoint,
@@ -121,11 +115,11 @@ class _BatchWindow:
121
115
  application=self._application,
122
116
  last_analyzed=last_analyzed,
123
117
  )
124
- self._kv_storage.put(
125
- container=self._v3io_container,
126
- table_path=self._endpoint,
127
- key=self._application,
128
- attributes={mm_constants.SchedulingKeys.LAST_ANALYZED: last_analyzed},
118
+
119
+ self._db.update_last_analyzed(
120
+ endpoint_id=self._endpoint,
121
+ application_name=self._application,
122
+ last_analyzed=last_analyzed,
129
123
  )
130
124
 
131
125
  def get_intervals(
@@ -281,33 +275,33 @@ class MonitoringApplicationController:
281
275
 
282
276
  def __init__(
283
277
  self,
284
- context: mlrun.run.MLClientCtx,
278
+ mlrun_context: mlrun.run.MLClientCtx,
285
279
  project: str,
286
280
  ):
287
281
  """
288
282
  Initialize Monitoring Application Processor object.
289
283
 
290
- :param context: An MLRun context.
284
+ :param mlrun_context: An MLRun context.
291
285
  :param project: Project name.
292
286
  """
293
- self.context = context
287
+ self.context = mlrun_context
294
288
  self.project = project
295
289
  self.project_obj = mlrun.get_or_create_project(project)
296
290
 
297
- context.logger.debug(f"Initializing {self.__class__.__name__}", project=project)
291
+ mlrun_context.logger.debug(
292
+ f"Initializing {self.__class__.__name__}", project=project
293
+ )
298
294
 
299
- self.db = mlrun.model_monitoring.get_model_endpoint_store(project=project)
295
+ self.db = mlrun.model_monitoring.get_store_object(project=project)
300
296
 
301
297
  self._batch_window_generator = _BatchWindowGenerator(
302
- batch_dict=context.parameters[
303
- mm_constants.EventFieldType.BATCH_INTERVALS_DICT
304
- ]
298
+ batch_dict=json.loads(
299
+ mlrun.get_secret_or_env(
300
+ mm_constants.EventFieldType.BATCH_INTERVALS_DICT
301
+ )
302
+ )
305
303
  )
306
304
 
307
- # If provided, only model endpoints in that that list will be analyzed
308
- self.model_endpoints = context.parameters.get(
309
- mm_constants.EventFieldType.MODEL_ENDPOINTS, None
310
- )
311
305
  self.model_monitoring_access_key = self._get_model_monitoring_access_key()
312
306
  self.parquet_directory = get_monitoring_parquet_path(
313
307
  self.project_obj,
@@ -334,66 +328,91 @@ class MonitoringApplicationController:
334
328
  v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
335
329
  )
336
330
 
337
- def run(self):
331
+ def run(self, event: nuclio.Event):
338
332
  """
339
333
  Main method for run all the relevant monitoring applications on each endpoint
334
+
335
+ :param event: trigger event
340
336
  """
337
+ logger.info("Start running monitoring controller")
341
338
  try:
342
- endpoints = self.db.list_model_endpoints(uids=self.model_endpoints)
339
+ applications_names = []
340
+ endpoints = self.db.list_model_endpoints()
341
+ if not endpoints:
342
+ self.context.logger.info(
343
+ "No model endpoints found", project=self.project
344
+ )
345
+ return
343
346
  monitoring_functions = self.project_obj.list_model_monitoring_functions()
344
347
  if monitoring_functions:
348
+ # Gets only application in ready state
345
349
  applications_names = list(
346
- {app.metadata.name for app in monitoring_functions}
350
+ {
351
+ app.metadata.name
352
+ for app in monitoring_functions
353
+ if (
354
+ app.status.state == "ready"
355
+ # workaround for the default app, as its `status.state` is `None`
356
+ or app.metadata.name
357
+ == mm_constants.HistogramDataDriftApplicationConstants.NAME
358
+ )
359
+ }
347
360
  )
348
- else:
361
+ if not applications_names:
349
362
  self.context.logger.info(
350
363
  "No monitoring functions found", project=self.project
351
364
  )
352
- applications_names = []
365
+ return
366
+ self.context.logger.info(
367
+ "Starting to iterate over the applications",
368
+ applications=applications_names,
369
+ )
353
370
 
354
371
  except Exception as e:
355
- self.context.logger.error("Failed to list endpoints", exc=e)
356
- return
357
- if endpoints and applications_names:
358
- # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
359
- pool = concurrent.futures.ProcessPoolExecutor(
360
- max_workers=min(len(endpoints), 10),
372
+ self.context.logger.error(
373
+ "Failed to list endpoints and monitoring applications",
374
+ exc=err_to_str(e),
361
375
  )
362
- futures = []
363
- for endpoint in endpoints:
376
+ return
377
+ # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
378
+ pool = concurrent.futures.ProcessPoolExecutor(
379
+ max_workers=min(len(endpoints), 10),
380
+ )
381
+ futures = []
382
+ for endpoint in endpoints:
383
+ if (
384
+ endpoint[mm_constants.EventFieldType.ACTIVE]
385
+ and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
386
+ == mm_constants.ModelMonitoringMode.enabled.value
387
+ ):
388
+ # Skip router endpoint:
364
389
  if (
365
- endpoint[mm_constants.EventFieldType.ACTIVE]
366
- and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
367
- == mm_constants.ModelMonitoringMode.enabled.value
390
+ int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
391
+ == mm_constants.EndpointType.ROUTER
368
392
  ):
369
- # Skip router endpoint:
370
- if (
371
- int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
372
- == mm_constants.EndpointType.ROUTER
373
- ):
374
- # Router endpoint has no feature stats
375
- logger.info(
376
- f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
377
- )
378
- continue
379
- future = pool.submit(
380
- MonitoringApplicationController.model_endpoint_process,
381
- endpoint=endpoint,
382
- applications_names=applications_names,
383
- batch_window_generator=self._batch_window_generator,
384
- project=self.project,
385
- parquet_directory=self.parquet_directory,
386
- storage_options=self.storage_options,
387
- model_monitoring_access_key=self.model_monitoring_access_key,
393
+ # Router endpoint has no feature stats
394
+ logger.info(
395
+ f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
388
396
  )
389
- futures.append(future)
397
+ continue
398
+ future = pool.submit(
399
+ MonitoringApplicationController.model_endpoint_process,
400
+ endpoint=endpoint,
401
+ applications_names=applications_names,
402
+ batch_window_generator=self._batch_window_generator,
403
+ project=self.project,
404
+ parquet_directory=self.parquet_directory,
405
+ storage_options=self.storage_options,
406
+ model_monitoring_access_key=self.model_monitoring_access_key,
407
+ )
408
+ futures.append(future)
390
409
 
391
- for future in concurrent.futures.as_completed(futures):
392
- result = future.result()
393
- if result:
394
- self.context.log_results(result)
410
+ for future in concurrent.futures.as_completed(futures):
411
+ result = future.result()
412
+ if result:
413
+ self.context.log_results(result)
395
414
 
396
- self._delete_old_parquet(endpoints=endpoints)
415
+ self._delete_old_parquet(endpoints=endpoints)
397
416
 
398
417
  @classmethod
399
418
  def model_endpoint_process(
@@ -438,6 +457,7 @@ class MonitoringApplicationController:
438
457
  )
439
458
 
440
459
  for start_infer_time, end_infer_time in batch_window.get_intervals():
460
+ # start - TODO : delete in 1.9.0 (V1 app deprecation)
441
461
  try:
442
462
  # Get application sample data
443
463
  offline_response = cls._get_sample_df(
@@ -483,10 +503,9 @@ class MonitoringApplicationController:
483
503
 
484
504
  # Get the current stats:
485
505
  current_stats = calculate_inputs_statistics(
486
- sample_set_statistics=feature_stats,
487
- inputs=df,
506
+ sample_set_statistics=feature_stats, inputs=df
488
507
  )
489
-
508
+ # end - TODO : delete in 1.9.0 (V1 app deprecation)
490
509
  cls._push_to_applications(
491
510
  current_stats=current_stats,
492
511
  feature_stats=feature_stats,
@@ -517,7 +536,7 @@ class MonitoringApplicationController:
517
536
  """
518
537
  if self.parquet_directory.startswith("v3io:///"):
519
538
  # create fs with access to the user side (under projects)
520
- store, _ = mlrun.store_manager.get_or_create_store(
539
+ store, _, _ = mlrun.store_manager.get_or_create_store(
521
540
  self.parquet_directory,
522
541
  {"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
523
542
  )
@@ -593,12 +612,13 @@ class MonitoringApplicationController:
593
612
  mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
594
613
  mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
595
614
  project=project,
596
- application_name=mm_constants.MonitoringFunctionNames.WRITER,
615
+ function_name=mm_constants.MonitoringFunctionNames.WRITER,
597
616
  ),
617
+ mm_constants.ApplicationEvent.MLRUN_CONTEXT: {}, # TODO : for future use by ad-hoc batch infer
598
618
  }
599
619
  for app_name in applications_names:
600
620
  data.update({mm_constants.ApplicationEvent.APPLICATION_NAME: app_name})
601
- stream_uri = get_stream_path(project=project, application_name=app_name)
621
+ stream_uri = get_stream_path(project=project, function_name=app_name)
602
622
 
603
623
  logger.info(
604
624
  f"push endpoint_id {endpoint_id} to {app_name} by stream :{stream_uri}"
@@ -11,19 +11,27 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ import nuclio
14
15
 
15
16
  import mlrun
16
17
  from mlrun.model_monitoring.controller import MonitoringApplicationController
17
18
 
18
19
 
19
- def handler(context: mlrun.run.MLClientCtx) -> None:
20
+ def handler(context: nuclio.Context, event: nuclio.Event) -> None:
20
21
  """
21
22
  Run model monitoring application processor
22
23
 
23
- :param context: the MLRun context
24
+ :param context: the Nuclio context
25
+ :param event: trigger event
24
26
  """
27
+ context.user_data.monitor_app_controller.run(event)
28
+
29
+
30
+ def init_context(context):
31
+ mlrun_context = mlrun.get_or_create_ctx("model_monitoring_controller")
32
+ mlrun_context.logger.info("Initialize monitoring app controller")
25
33
  monitor_app_controller = MonitoringApplicationController(
26
- context=context,
27
- project=context.project,
34
+ mlrun_context=mlrun_context,
35
+ project=mlrun_context.project,
28
36
  )
29
- monitor_app_controller.run()
37
+ setattr(context.user_data, "monitor_app_controller", monitor_app_controller)
@@ -0,0 +1,18 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .stores import ObjectStoreFactory, get_store_object
16
+ from .stores.base import StoreBase
17
+ from .tsdb import get_tsdb_connector
18
+ from .tsdb.base import TSDBConnector
@@ -1,4 +1,4 @@
1
- # Copyright 2023 Iguazio
1
+ # Copyright 2024 Iguazio
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -12,64 +12,56 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
-
17
15
  import enum
18
16
  import typing
17
+ import warnings
19
18
 
20
19
  import mlrun.common.schemas.secret
21
20
  import mlrun.errors
22
21
 
23
- from .model_endpoint_store import ModelEndpointStore
22
+ from .base import StoreBase
24
23
 
25
24
 
26
- class ModelEndpointStoreType(enum.Enum):
27
- """Enum class to handle the different store type values for saving a model endpoint record."""
25
+ class ObjectStoreFactory(enum.Enum):
26
+ """Enum class to handle the different store type values for saving model monitoring records."""
28
27
 
29
28
  v3io_nosql = "v3io-nosql"
30
29
  SQL = "sql"
31
30
 
32
- def to_endpoint_store(
31
+ def to_object_store(
33
32
  self,
34
33
  project: str,
35
34
  access_key: str = None,
36
- endpoint_store_connection: str = None,
37
35
  secret_provider: typing.Callable = None,
38
- ) -> ModelEndpointStore:
36
+ ) -> StoreBase:
39
37
  """
40
- Return a ModelEndpointStore object based on the provided enum value.
41
-
42
- :param project: The name of the project.
43
- :param access_key: Access key with permission to the DB table. Note that if access key is None
44
- and the endpoint target is from type KV then the access key will be
45
- retrieved from the environment variable.
46
- :param endpoint_store_connection: A valid connection string for model endpoint target. Contains several
47
- key-value pairs that required for the database connection.
48
- e.g. A root user with password 1234, tries to connect a schema called
49
- mlrun within a local MySQL DB instance:
50
- 'mysql+pymysql://root:1234@localhost:3306/mlrun'.
38
+ Return a StoreBase object based on the provided enum value.
39
+
40
+ :param project: The name of the project.
41
+ :param access_key: Access key with permission to the DB table. Note that if access key is None
42
+ and the endpoint target is from type KV then the access key will be
43
+ retrieved from the environment variable.
51
44
  :param secret_provider: An optional secret provider to get the connection string secret.
52
45
 
53
- :return: `ModelEndpointStore` object.
46
+ :return: `StoreBase` object.
54
47
 
55
48
  """
56
49
 
57
- if self.value == ModelEndpointStoreType.v3io_nosql.value:
58
- from .kv_model_endpoint_store import KVModelEndpointStore
50
+ if self == self.v3io_nosql:
51
+ from mlrun.model_monitoring.db.stores.v3io_kv.kv_store import KVStoreBase
59
52
 
60
53
  # Get V3IO access key from env
61
54
  access_key = access_key or mlrun.mlconf.get_v3io_access_key()
62
55
 
63
- return KVModelEndpointStore(project=project, access_key=access_key)
56
+ return KVStoreBase(project=project, access_key=access_key)
64
57
 
65
58
  # Assuming SQL store target if store type is not KV.
66
59
  # Update these lines once there are more than two store target types.
67
60
 
68
- from .sql_model_endpoint_store import SQLModelEndpointStore
61
+ from mlrun.model_monitoring.db.stores.sqldb.sql_store import SQLStoreBase
69
62
 
70
- return SQLModelEndpointStore(
63
+ return SQLStoreBase(
71
64
  project=project,
72
- sql_connection_string=endpoint_store_connection,
73
65
  secret_provider=secret_provider,
74
66
  )
75
67
 
@@ -88,7 +80,24 @@ def get_model_endpoint_store(
88
80
  project: str,
89
81
  access_key: str = None,
90
82
  secret_provider: typing.Callable = None,
91
- ) -> ModelEndpointStore:
83
+ ) -> StoreBase:
84
+ # Leaving here for backwards compatibility
85
+ warnings.warn(
86
+ "The 'get_model_endpoint_store' function is deprecated and will be removed in 1.9.0. "
87
+ "Please use `get_store_object` instead.",
88
+ # TODO: remove in 1.9.0
89
+ FutureWarning,
90
+ )
91
+ return get_store_object(
92
+ project=project, access_key=access_key, secret_provider=secret_provider
93
+ )
94
+
95
+
96
+ def get_store_object(
97
+ project: str,
98
+ access_key: str = None,
99
+ secret_provider: typing.Callable = None,
100
+ ) -> StoreBase:
92
101
  """
93
102
  Getting the DB target type based on mlrun.config.model_endpoint_monitoring.store_type.
94
103
 
@@ -96,16 +105,14 @@ def get_model_endpoint_store(
96
105
  :param access_key: Access key with permission to the DB table.
97
106
  :param secret_provider: An optional secret provider to get the connection string secret.
98
107
 
99
- :return: `ModelEndpointStore` object. Using this object, the user can apply different operations on the
100
- model endpoint record such as write, update, get and delete.
108
+ :return: `StoreBase` object. Using this object, the user can apply different operations on the
109
+ model monitoring record such as write, update, get and delete a model endpoint.
101
110
  """
102
111
 
103
- # Get store type value from ModelEndpointStoreType enum class
104
- model_endpoint_store_type = ModelEndpointStoreType(
105
- mlrun.mlconf.model_endpoint_monitoring.store_type
106
- )
112
+ # Get store type value from ObjectStoreFactory enum class
113
+ store_type = ObjectStoreFactory(mlrun.mlconf.model_endpoint_monitoring.store_type)
107
114
 
108
- # Convert into model endpoint store target object
109
- return model_endpoint_store_type.to_endpoint_store(
115
+ # Convert into store target object
116
+ return store_type.to_object_store(
110
117
  project=project, access_key=access_key, secret_provider=secret_provider
111
118
  )
@@ -0,0 +1,15 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .store import StoreBase