mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +1 -2
  5. mlrun/artifacts/base.py +46 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +334 -0
  8. mlrun/artifacts/manager.py +15 -13
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +7 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +66 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +67 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +2 -1
  57. mlrun/datastore/__init__.py +0 -2
  58. mlrun/datastore/alibaba_oss.py +4 -1
  59. mlrun/datastore/azure_blob.py +4 -1
  60. mlrun/datastore/base.py +12 -4
  61. mlrun/datastore/datastore.py +9 -3
  62. mlrun/datastore/datastore_profile.py +79 -20
  63. mlrun/datastore/dbfs_store.py +4 -1
  64. mlrun/datastore/filestore.py +4 -1
  65. mlrun/datastore/google_cloud_storage.py +4 -1
  66. mlrun/datastore/hdfs.py +4 -1
  67. mlrun/datastore/inmem.py +4 -1
  68. mlrun/datastore/redis.py +4 -1
  69. mlrun/datastore/s3.py +4 -1
  70. mlrun/datastore/sources.py +52 -51
  71. mlrun/datastore/store_resources.py +0 -2
  72. mlrun/datastore/targets.py +21 -21
  73. mlrun/datastore/utils.py +2 -2
  74. mlrun/datastore/v3io.py +4 -1
  75. mlrun/datastore/vectorstore.py +194 -0
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +208 -82
  78. mlrun/db/factory.py +0 -3
  79. mlrun/db/httpdb.py +1237 -386
  80. mlrun/db/nopdb.py +201 -74
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +136 -50
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +41 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +117 -46
  158. mlrun/model_monitoring/__init__.py +4 -4
  159. mlrun/model_monitoring/api.py +61 -59
  160. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  161. mlrun/model_monitoring/applications/base.py +165 -6
  162. mlrun/model_monitoring/applications/context.py +88 -37
  163. mlrun/model_monitoring/applications/evidently_base.py +0 -1
  164. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  165. mlrun/model_monitoring/applications/results.py +55 -3
  166. mlrun/model_monitoring/controller.py +207 -239
  167. mlrun/model_monitoring/db/__init__.py +0 -2
  168. mlrun/model_monitoring/db/_schedules.py +156 -0
  169. mlrun/model_monitoring/db/_stats.py +189 -0
  170. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  171. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
  172. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  173. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
  174. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  175. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  176. mlrun/model_monitoring/helpers.py +152 -49
  177. mlrun/model_monitoring/stream_processing.py +99 -283
  178. mlrun/model_monitoring/tracking_policy.py +10 -3
  179. mlrun/model_monitoring/writer.py +48 -36
  180. mlrun/package/__init__.py +3 -6
  181. mlrun/package/context_handler.py +1 -1
  182. mlrun/package/packager.py +12 -9
  183. mlrun/package/packagers/__init__.py +0 -2
  184. mlrun/package/packagers/default_packager.py +14 -11
  185. mlrun/package/packagers/numpy_packagers.py +16 -7
  186. mlrun/package/packagers/pandas_packagers.py +18 -18
  187. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  188. mlrun/package/packagers_manager.py +31 -14
  189. mlrun/package/utils/__init__.py +0 -3
  190. mlrun/package/utils/_pickler.py +6 -6
  191. mlrun/platforms/__init__.py +47 -16
  192. mlrun/platforms/iguazio.py +4 -1
  193. mlrun/projects/operations.py +27 -27
  194. mlrun/projects/pipelines.py +71 -36
  195. mlrun/projects/project.py +865 -206
  196. mlrun/run.py +53 -10
  197. mlrun/runtimes/__init__.py +1 -3
  198. mlrun/runtimes/base.py +15 -11
  199. mlrun/runtimes/daskjob.py +9 -9
  200. mlrun/runtimes/generators.py +2 -1
  201. mlrun/runtimes/kubejob.py +4 -5
  202. mlrun/runtimes/mounts.py +572 -0
  203. mlrun/runtimes/mpijob/__init__.py +0 -2
  204. mlrun/runtimes/mpijob/abstract.py +7 -6
  205. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  206. mlrun/runtimes/nuclio/application/application.py +11 -11
  207. mlrun/runtimes/nuclio/function.py +19 -17
  208. mlrun/runtimes/nuclio/serving.py +18 -11
  209. mlrun/runtimes/pod.py +154 -45
  210. mlrun/runtimes/remotesparkjob.py +3 -2
  211. mlrun/runtimes/sparkjob/__init__.py +0 -2
  212. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  213. mlrun/runtimes/utils.py +6 -5
  214. mlrun/serving/merger.py +6 -4
  215. mlrun/serving/remote.py +18 -17
  216. mlrun/serving/routers.py +185 -172
  217. mlrun/serving/server.py +7 -1
  218. mlrun/serving/states.py +97 -78
  219. mlrun/serving/utils.py +13 -2
  220. mlrun/serving/v1_serving.py +3 -2
  221. mlrun/serving/v2_serving.py +74 -65
  222. mlrun/track/__init__.py +1 -1
  223. mlrun/track/tracker.py +2 -2
  224. mlrun/track/trackers/mlflow_tracker.py +6 -5
  225. mlrun/utils/async_http.py +1 -1
  226. mlrun/utils/clones.py +1 -1
  227. mlrun/utils/helpers.py +54 -16
  228. mlrun/utils/logger.py +106 -4
  229. mlrun/utils/notifications/notification/__init__.py +22 -19
  230. mlrun/utils/notifications/notification/base.py +33 -14
  231. mlrun/utils/notifications/notification/console.py +6 -6
  232. mlrun/utils/notifications/notification/git.py +11 -11
  233. mlrun/utils/notifications/notification/ipython.py +10 -9
  234. mlrun/utils/notifications/notification/mail.py +176 -0
  235. mlrun/utils/notifications/notification/slack.py +6 -6
  236. mlrun/utils/notifications/notification/webhook.py +6 -6
  237. mlrun/utils/notifications/notification_pusher.py +86 -44
  238. mlrun/utils/regex.py +3 -1
  239. mlrun/utils/version/version.json +2 -2
  240. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +21 -16
  241. mlrun-1.8.0rc8.dist-info/RECORD +347 -0
  242. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  243. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  244. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  245. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  246. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  247. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  248. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  249. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  250. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  251. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  252. mlrun/model_monitoring/model_endpoint.py +0 -118
  253. mlrun-1.7.1rc10.dist-info/RECORD +0 -351
  254. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
  255. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +0 -0
  256. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
  257. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,6 @@
14
14
 
15
15
  import collections
16
16
  import datetime
17
- import json
18
17
  import os
19
18
  import typing
20
19
 
@@ -30,14 +29,12 @@ import mlrun.model_monitoring.db
30
29
  import mlrun.serving.states
31
30
  import mlrun.utils
32
31
  from mlrun.common.schemas.model_monitoring.constants import (
32
+ EndpointType,
33
33
  EventFieldType,
34
- EventKeyMetrics,
35
- EventLiveStats,
36
34
  FileTargetKind,
37
- ModelEndpointTarget,
38
35
  ProjectSecretKeys,
39
36
  )
40
- from mlrun.model_monitoring.db import StoreBase, TSDBConnector
37
+ from mlrun.model_monitoring.db import TSDBConnector
41
38
  from mlrun.utils import logger
42
39
 
43
40
 
@@ -51,7 +48,7 @@ class EventStreamProcessor:
51
48
  parquet_target: str,
52
49
  aggregate_windows: typing.Optional[list[str]] = None,
53
50
  aggregate_period: str = "5m",
54
- model_monitoring_access_key: str = None,
51
+ model_monitoring_access_key: typing.Optional[str] = None,
55
52
  ):
56
53
  # General configurations, mainly used for the storey steps in the future serving graph
57
54
  self.project = project
@@ -85,7 +82,7 @@ class EventStreamProcessor:
85
82
  v3io_access_key: typing.Optional[str] = None,
86
83
  v3io_framesd: typing.Optional[str] = None,
87
84
  v3io_api: typing.Optional[str] = None,
88
- model_monitoring_access_key: str = None,
85
+ model_monitoring_access_key: typing.Optional[str] = None,
89
86
  ):
90
87
  # Get the V3IO configurations
91
88
  self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
@@ -101,18 +98,6 @@ class EventStreamProcessor:
101
98
  v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
102
99
  )
103
100
 
104
- # KV path
105
- kv_path = mlrun.mlconf.get_model_monitoring_file_target_path(
106
- project=self.project, kind=FileTargetKind.ENDPOINTS
107
- )
108
- (
109
- _,
110
- self.kv_container,
111
- self.kv_path,
112
- ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
113
- kv_path
114
- )
115
-
116
101
  # TSDB path and configurations
117
102
  tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
118
103
  project=self.project, kind=FileTargetKind.EVENTS
@@ -133,7 +118,6 @@ class EventStreamProcessor:
133
118
  self,
134
119
  fn: mlrun.runtimes.ServingRuntime,
135
120
  tsdb_connector: TSDBConnector,
136
- endpoint_store: StoreBase,
137
121
  ) -> None:
138
122
  """
139
123
  Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -162,31 +146,23 @@ class EventStreamProcessor:
162
146
 
163
147
  :param fn: A serving function.
164
148
  :param tsdb_connector: Time series database connector.
165
- :param endpoint_store: KV/SQL store used for endpoint data.
166
149
  """
167
150
 
168
151
  graph = typing.cast(
169
152
  mlrun.serving.states.RootFlowStep,
170
153
  fn.set_topology(mlrun.serving.states.StepKinds.flow),
171
154
  )
172
- graph.add_step(
173
- "ExtractEndpointID",
174
- "extract_endpoint",
175
- full_event=True,
176
- )
177
155
 
178
156
  # split the graph between event with error vs valid event
179
157
  graph.add_step(
180
158
  "storey.Filter",
181
159
  "FilterError",
182
- after="extract_endpoint",
183
160
  _fn="(event.get('error') is None)",
184
161
  )
185
162
 
186
163
  graph.add_step(
187
164
  "storey.Filter",
188
165
  "ForwardError",
189
- after="extract_endpoint",
190
166
  _fn="(event.get('error') is not None)",
191
167
  )
192
168
 
@@ -198,7 +174,7 @@ class EventStreamProcessor:
198
174
  def apply_process_endpoint_event():
199
175
  graph.add_step(
200
176
  "ProcessEndpointEvent",
201
- after="extract_endpoint", # TODO: change this to FilterError in ML-7456
177
+ after="FilterError",
202
178
  full_event=True,
203
179
  project=self.project,
204
180
  )
@@ -233,79 +209,11 @@ class EventStreamProcessor:
233
209
  )
234
210
 
235
211
  apply_map_feature_names()
236
-
237
- # Calculate number of predictions and average latency
238
- def apply_storey_aggregations():
239
- # Calculate number of predictions for each window (5 min and 1 hour by default)
240
- graph.add_step(
241
- class_name="storey.AggregateByKey",
242
- aggregates=[
243
- {
244
- "name": EventFieldType.LATENCY,
245
- "column": EventFieldType.LATENCY,
246
- "operations": ["count", "avg"],
247
- "windows": self.aggregate_windows,
248
- "period": self.aggregate_period,
249
- }
250
- ],
251
- name=EventFieldType.LATENCY,
252
- after="MapFeatureNames",
253
- step_name="Aggregates",
254
- table=".",
255
- key_field=EventFieldType.ENDPOINT_ID,
256
- )
257
- # Calculate average latency time for each window (5 min and 1 hour by default)
258
- graph.add_step(
259
- class_name="storey.Rename",
260
- mapping={
261
- "latency_count_5m": EventLiveStats.PREDICTIONS_COUNT_5M,
262
- "latency_count_1h": EventLiveStats.PREDICTIONS_COUNT_1H,
263
- },
264
- name="Rename",
265
- after=EventFieldType.LATENCY,
266
- )
267
-
268
- apply_storey_aggregations()
269
-
270
- # KV/SQL branch
271
- # Filter relevant keys from the event before writing the data into the database table
272
- def apply_process_before_endpoint_update():
273
- graph.add_step(
274
- "ProcessBeforeEndpointUpdate",
275
- name="ProcessBeforeEndpointUpdate",
276
- after="Rename",
277
- )
278
-
279
- apply_process_before_endpoint_update()
280
-
281
- # Write the filtered event to KV/SQL table. At this point, the serving graph updates the stats
282
- # about average latency and the amount of predictions over time
283
- def apply_update_endpoint():
284
- graph.add_step(
285
- "UpdateEndpoint",
286
- name="UpdateEndpoint",
287
- after="ProcessBeforeEndpointUpdate",
288
- project=self.project,
289
- )
290
-
291
- apply_update_endpoint()
292
-
293
- # (only for V3IO KV target) - Apply infer_schema on the model endpoints table for generating schema file
294
- # which will be used by Grafana monitoring dashboards
295
- def apply_infer_schema():
296
- graph.add_step(
297
- "InferSchema",
298
- name="InferSchema",
299
- after="UpdateEndpoint",
300
- v3io_framesd=self.v3io_framesd,
301
- container=self.kv_container,
302
- table=self.kv_path,
303
- )
304
-
305
- if endpoint_store.type == ModelEndpointTarget.V3IO_NOSQL:
306
- apply_infer_schema()
307
-
308
- tsdb_connector.apply_monitoring_stream_steps(graph=graph)
212
+ tsdb_connector.apply_monitoring_stream_steps(
213
+ graph=graph,
214
+ aggregate_windows=self.aggregate_windows,
215
+ aggregate_period=self.aggregate_period,
216
+ )
309
217
 
310
218
  # Parquet branch
311
219
  # Filter and validate different keys before writing the data to Parquet target
@@ -341,91 +249,6 @@ class EventStreamProcessor:
341
249
  apply_parquet_target()
342
250
 
343
251
 
344
- class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
345
- def __init__(self, **kwargs):
346
- """
347
- Filter relevant keys from the event before writing the data to database table (in EndpointUpdate step).
348
- Note that in the endpoint table we only keep metadata (function_uri, model_class, etc.) and stats about the
349
- average latency and the number of predictions (per 5min and 1hour).
350
-
351
- :returns: A filtered event as a dictionary which will be written to the endpoint table in the next step.
352
- """
353
- super().__init__(**kwargs)
354
-
355
- def do(self, event):
356
- # Compute prediction per second
357
- event[EventLiveStats.PREDICTIONS_PER_SECOND] = (
358
- float(event[EventLiveStats.PREDICTIONS_COUNT_5M]) / 300
359
- )
360
- # Filter relevant keys
361
- e = {
362
- k: event[k]
363
- for k in [
364
- EventFieldType.FUNCTION_URI,
365
- EventFieldType.MODEL,
366
- EventFieldType.MODEL_CLASS,
367
- EventFieldType.ENDPOINT_ID,
368
- EventFieldType.LABELS,
369
- EventFieldType.FIRST_REQUEST,
370
- EventFieldType.LAST_REQUEST,
371
- EventFieldType.ERROR_COUNT,
372
- ]
373
- }
374
-
375
- # Add generic metrics statistics
376
- generic_metrics = {
377
- k: event[k]
378
- for k in [
379
- EventLiveStats.LATENCY_AVG_5M,
380
- EventLiveStats.LATENCY_AVG_1H,
381
- EventLiveStats.PREDICTIONS_PER_SECOND,
382
- EventLiveStats.PREDICTIONS_COUNT_5M,
383
- EventLiveStats.PREDICTIONS_COUNT_1H,
384
- ]
385
- }
386
-
387
- e[EventFieldType.METRICS] = json.dumps(
388
- {EventKeyMetrics.GENERIC: generic_metrics}
389
- )
390
-
391
- # Write labels as json string as required by the DB format
392
- e[EventFieldType.LABELS] = json.dumps(e[EventFieldType.LABELS])
393
-
394
- return e
395
-
396
-
397
- class ExtractEndpointID(mlrun.feature_store.steps.MapClass):
398
- def __init__(self, **kwargs) -> None:
399
- """
400
- Generate the model endpoint ID based on the event parameters and attach it to the event.
401
- """
402
- super().__init__(**kwargs)
403
-
404
- def do(self, full_event) -> typing.Union[storey.Event, None]:
405
- # Getting model version and function uri from event
406
- # and use them for retrieving the endpoint_id
407
- function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
408
- if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
409
- return None
410
-
411
- model = full_event.body.get(EventFieldType.MODEL)
412
- if not is_not_none(model, [EventFieldType.MODEL]):
413
- return None
414
-
415
- version = full_event.body.get(EventFieldType.VERSION)
416
- versioned_model = f"{model}:{version}" if version else f"{model}:latest"
417
-
418
- endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
419
- function_uri=function_uri,
420
- versioned_model=versioned_model,
421
- )
422
-
423
- endpoint_id = str(endpoint_id)
424
- full_event.body[EventFieldType.ENDPOINT_ID] = endpoint_id
425
- full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
426
- return full_event
427
-
428
-
429
252
  class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
430
253
  def __init__(self, **kwargs):
431
254
  """
@@ -498,20 +321,27 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
498
321
 
499
322
  def do(self, full_event):
500
323
  event = full_event.body
324
+ # Getting model version and function uri from event
325
+ # and use them for retrieving the endpoint_id
326
+ function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
327
+ if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
328
+ return None
329
+
330
+ model = full_event.body.get(EventFieldType.MODEL)
331
+ if not is_not_none(model, [EventFieldType.MODEL]):
332
+ return None
501
333
 
502
- versioned_model = event[EventFieldType.VERSIONED_MODEL]
334
+ version = full_event.body.get(EventFieldType.VERSION)
335
+ versioned_model = f"{model}:{version}" if version else f"{model}:latest"
336
+
337
+ full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
503
338
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
504
- function_uri = event[EventFieldType.FUNCTION_URI]
505
339
 
506
340
  # In case this process fails, resume state from existing record
507
- self.resume_state(endpoint_id)
508
-
509
- # If error key has been found in the current event,
510
- # increase the error counter by 1 and raise the error description
511
- error = event.get("error")
512
- if error: # TODO: delete this in ML-7456
513
- self.error_count[endpoint_id] += 1
514
- raise mlrun.errors.MLRunInvalidArgumentError(str(error))
341
+ self.resume_state(
342
+ endpoint_id,
343
+ full_event.body.get(EventFieldType.MODEL),
344
+ )
515
345
 
516
346
  # Validate event fields
517
347
  model_class = event.get("model_class") or event.get("class")
@@ -535,11 +365,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
535
365
  # Set time for the first request of the current endpoint
536
366
  self.first_request[endpoint_id] = timestamp
537
367
 
538
- # Validate that the request time of the current event is later than the previous request time
539
- self._validate_last_request_timestamp(
540
- endpoint_id=endpoint_id, timestamp=timestamp
541
- )
542
-
543
368
  # Set time for the last reqeust of the current endpoint
544
369
  self.last_request[endpoint_id] = timestamp
545
370
 
@@ -609,6 +434,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
609
434
  {
610
435
  EventFieldType.FUNCTION_URI: function_uri,
611
436
  EventFieldType.MODEL: versioned_model,
437
+ EventFieldType.ENDPOINT_NAME: event.get(EventFieldType.MODEL),
612
438
  EventFieldType.MODEL_CLASS: model_class,
613
439
  EventFieldType.TIMESTAMP: timestamp,
614
440
  EventFieldType.ENDPOINT_ID: endpoint_id,
@@ -635,33 +461,19 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
635
461
  storey_event = storey.Event(body=events, key=endpoint_id)
636
462
  return storey_event
637
463
 
638
- def _validate_last_request_timestamp(self, endpoint_id: str, timestamp: str):
639
- """Validate that the request time of the current event is later than the previous request time that has
640
- already been processed.
641
-
642
- :param endpoint_id: The unique id of the model endpoint.
643
- :param timestamp: Event request time as a string.
644
-
645
- :raise MLRunPreconditionFailedError: If the request time of the current is later than the previous request time.
646
- """
647
-
648
- if (
649
- endpoint_id in self.last_request
650
- and self.last_request[endpoint_id] > timestamp
651
- ):
652
- logger.error(
653
- f"current event request time {timestamp} is earlier than the last request time "
654
- f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
655
- )
656
-
657
- def resume_state(self, endpoint_id):
464
+ def resume_state(self, endpoint_id, endpoint_name):
658
465
  # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
659
466
  # left them
660
467
  if endpoint_id not in self.endpoints:
661
468
  logger.info("Trying to resume state", endpoint_id=endpoint_id)
662
- endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
663
- project=self.project,
664
- endpoint_id=endpoint_id,
469
+ endpoint_record = (
470
+ mlrun.db.get_run_db()
471
+ .get_model_endpoint(
472
+ project=self.project,
473
+ endpoint_id=endpoint_id,
474
+ name=endpoint_name,
475
+ )
476
+ .flat_dict()
665
477
  )
666
478
 
667
479
  # If model endpoint found, get first_request, last_request and error_count values
@@ -735,6 +547,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
735
547
  # and labels columns were not found in the current event
736
548
  self.feature_names = {}
737
549
  self.label_columns = {}
550
+ self.first_request = {}
738
551
 
739
552
  # Dictionary to manage the model endpoint types - important for the V3IO TSDB
740
553
  self.endpoint_type = {}
@@ -766,23 +579,29 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
766
579
  if isinstance(feature_value, int):
767
580
  feature_values[index] = float(feature_value)
768
581
 
582
+ attributes_to_update = {}
583
+ endpoint_record = None
769
584
  # Get feature names and label columns
770
585
  if endpoint_id not in self.feature_names:
771
- endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
772
- project=self.project,
773
- endpoint_id=endpoint_id,
586
+ endpoint_record = (
587
+ mlrun.db.get_run_db()
588
+ .get_model_endpoint(
589
+ project=self.project,
590
+ endpoint_id=endpoint_id,
591
+ name=event[EventFieldType.ENDPOINT_NAME],
592
+ )
593
+ .flat_dict()
774
594
  )
775
595
  feature_names = endpoint_record.get(EventFieldType.FEATURE_NAMES)
776
- feature_names = json.loads(feature_names) if feature_names else None
777
596
 
778
597
  label_columns = endpoint_record.get(EventFieldType.LABEL_NAMES)
779
- label_columns = json.loads(label_columns) if label_columns else None
780
598
 
781
599
  # If feature names were not found,
782
600
  # try to retrieve them from the previous events of the current process
783
601
  if not feature_names and self._infer_columns_from_data:
784
602
  feature_names = self._infer_feature_names_from_data(event)
785
603
 
604
+ endpoint_type = int(endpoint_record.get(EventFieldType.ENDPOINT_TYPE))
786
605
  if not feature_names:
787
606
  logger.warn(
788
607
  "Feature names are not initialized, they will be automatically generated",
@@ -793,19 +612,14 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
793
612
  ]
794
613
 
795
614
  # Update the endpoint record with the generated features
796
- update_endpoint_record(
797
- project=self.project,
798
- endpoint_id=endpoint_id,
799
- attributes={
800
- EventFieldType.FEATURE_NAMES: json.dumps(feature_names)
801
- },
802
- )
615
+ attributes_to_update[EventFieldType.FEATURE_NAMES] = feature_names
803
616
 
804
- update_monitoring_feature_set(
805
- endpoint_record=endpoint_record,
806
- feature_names=feature_names,
807
- feature_values=feature_values,
808
- )
617
+ if endpoint_type != EndpointType.ROUTER.value:
618
+ update_monitoring_feature_set(
619
+ endpoint_record=endpoint_record,
620
+ feature_names=feature_names,
621
+ feature_values=feature_values,
622
+ )
809
623
 
810
624
  # Similar process with label columns
811
625
  if not label_columns and self._infer_columns_from_data:
@@ -819,17 +633,13 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
819
633
  label_columns = [
820
634
  f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
821
635
  ]
822
-
823
- update_endpoint_record(
824
- project=self.project,
825
- endpoint_id=endpoint_id,
826
- attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
827
- )
828
- update_monitoring_feature_set(
829
- endpoint_record=endpoint_record,
830
- feature_names=label_columns,
831
- feature_values=label_values,
832
- )
636
+ attributes_to_update[EventFieldType.LABEL_NAMES] = label_columns
637
+ if endpoint_type != EndpointType.ROUTER.value:
638
+ update_monitoring_feature_set(
639
+ endpoint_record=endpoint_record,
640
+ feature_names=label_columns,
641
+ feature_values=label_values,
642
+ )
833
643
 
834
644
  self.label_columns[endpoint_id] = label_columns
835
645
  self.feature_names[endpoint_id] = feature_names
@@ -842,9 +652,39 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
842
652
  )
843
653
 
844
654
  # Update the endpoint type within the endpoint types dictionary
845
- endpoint_type = int(endpoint_record.get(EventFieldType.ENDPOINT_TYPE))
846
655
  self.endpoint_type[endpoint_id] = endpoint_type
847
656
 
657
+ # Update the first request time in the endpoint record
658
+ if endpoint_id not in self.first_request:
659
+ endpoint_record = endpoint_record or (
660
+ mlrun.db.get_run_db()
661
+ .get_model_endpoint(
662
+ project=self.project,
663
+ endpoint_id=endpoint_id,
664
+ name=event[EventFieldType.ENDPOINT_NAME],
665
+ )
666
+ .flat_dict()
667
+ )
668
+ if not endpoint_record.get(EventFieldType.FIRST_REQUEST):
669
+ attributes_to_update[EventFieldType.FIRST_REQUEST] = (
670
+ mlrun.utils.enrich_datetime_with_tz_info(
671
+ event[EventFieldType.FIRST_REQUEST]
672
+ )
673
+ )
674
+ self.first_request[endpoint_id] = True
675
+ if attributes_to_update:
676
+ logger.info(
677
+ "Updating endpoint record",
678
+ endpoint_id=endpoint_id,
679
+ attributes=attributes_to_update,
680
+ )
681
+ update_endpoint_record(
682
+ project=self.project,
683
+ endpoint_id=endpoint_id,
684
+ attributes=attributes_to_update,
685
+ endpoint_name=event[EventFieldType.ENDPOINT_NAME],
686
+ )
687
+
848
688
  # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
849
689
  feature_names = self.feature_names[endpoint_id]
850
690
  self._map_dictionary_values(
@@ -895,30 +735,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
895
735
  event[mapping_dictionary][name] = value
896
736
 
897
737
 
898
- class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
899
- def __init__(self, project: str, **kwargs):
900
- """
901
- Update the model endpoint record in the DB. Note that the event at this point includes metadata and stats about
902
- the average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
903
- such as "Model Monitoring - Performance" which can be found in Grafana.
904
-
905
- :returns: Event as a dictionary (without any changes) for the next step (InferSchema).
906
- """
907
- super().__init__(**kwargs)
908
- self.project = project
909
-
910
- def do(self, event: dict):
911
- # Remove labels from the event
912
- event.pop(EventFieldType.LABELS)
913
-
914
- update_endpoint_record(
915
- project=self.project,
916
- endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
917
- attributes=event,
918
- )
919
- return event
920
-
921
-
922
738
  class InferSchema(mlrun.feature_store.steps.MapClass):
923
739
  def __init__(
924
740
  self,
@@ -963,14 +779,14 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
963
779
  def update_endpoint_record(
964
780
  project: str,
965
781
  endpoint_id: str,
782
+ endpoint_name: str,
966
783
  attributes: dict,
967
784
  ):
968
- model_endpoint_store = mlrun.model_monitoring.get_store_object(
785
+ mlrun.db.get_run_db().patch_model_endpoint(
969
786
  project=project,
970
- )
971
-
972
- model_endpoint_store.update_model_endpoint(
973
- endpoint_id=endpoint_id, attributes=attributes
787
+ endpoint_id=endpoint_id,
788
+ attributes=attributes,
789
+ name=endpoint_name,
974
790
  )
975
791
 
976
792
 
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import warnings
16
- from typing import Union
16
+ from typing import Optional, Union
17
17
 
18
18
  import mlrun.common.schemas.schedule
19
19
  import mlrun.model
@@ -74,7 +74,9 @@ class TrackingPolicy(mlrun.model.ModelObj):
74
74
  self.default_controller_image = default_controller_image
75
75
 
76
76
  @classmethod
77
- def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
77
+ def from_dict(
78
+ cls, struct=None, fields=None, deprecated_fields: Optional[dict] = None
79
+ ):
78
80
  new_obj = super().from_dict(
79
81
  struct, fields=cls._dict_fields, deprecated_fields=deprecated_fields
80
82
  )
@@ -102,7 +104,12 @@ class TrackingPolicy(mlrun.model.ModelObj):
102
104
  )
103
105
  return new_obj
104
106
 
105
- def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
107
+ def to_dict(
108
+ self,
109
+ fields: Optional[list] = None,
110
+ exclude: Optional[list] = None,
111
+ strip: bool = False,
112
+ ):
106
113
  struct = super().to_dict(
107
114
  fields,
108
115
  exclude=[