mlrun 1.7.2rc4__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show
  1. mlrun/__init__.py +26 -22
  2. mlrun/__main__.py +15 -16
  3. mlrun/alerts/alert.py +150 -15
  4. mlrun/api/schemas/__init__.py +1 -9
  5. mlrun/artifacts/__init__.py +2 -3
  6. mlrun/artifacts/base.py +62 -19
  7. mlrun/artifacts/dataset.py +17 -17
  8. mlrun/artifacts/document.py +454 -0
  9. mlrun/artifacts/manager.py +28 -18
  10. mlrun/artifacts/model.py +91 -59
  11. mlrun/artifacts/plots.py +2 -2
  12. mlrun/common/constants.py +8 -0
  13. mlrun/common/formatters/__init__.py +1 -0
  14. mlrun/common/formatters/artifact.py +1 -1
  15. mlrun/common/formatters/feature_set.py +2 -0
  16. mlrun/common/formatters/function.py +1 -0
  17. mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
  18. mlrun/common/formatters/pipeline.py +1 -2
  19. mlrun/common/formatters/project.py +9 -0
  20. mlrun/common/model_monitoring/__init__.py +0 -5
  21. mlrun/common/model_monitoring/helpers.py +12 -62
  22. mlrun/common/runtimes/constants.py +25 -4
  23. mlrun/common/schemas/__init__.py +9 -5
  24. mlrun/common/schemas/alert.py +114 -19
  25. mlrun/common/schemas/api_gateway.py +3 -3
  26. mlrun/common/schemas/artifact.py +22 -9
  27. mlrun/common/schemas/auth.py +8 -4
  28. mlrun/common/schemas/background_task.py +7 -7
  29. mlrun/common/schemas/client_spec.py +4 -4
  30. mlrun/common/schemas/clusterization_spec.py +2 -2
  31. mlrun/common/schemas/common.py +53 -3
  32. mlrun/common/schemas/constants.py +15 -0
  33. mlrun/common/schemas/datastore_profile.py +1 -1
  34. mlrun/common/schemas/feature_store.py +9 -9
  35. mlrun/common/schemas/frontend_spec.py +4 -4
  36. mlrun/common/schemas/function.py +10 -10
  37. mlrun/common/schemas/hub.py +1 -1
  38. mlrun/common/schemas/k8s.py +3 -3
  39. mlrun/common/schemas/memory_reports.py +3 -3
  40. mlrun/common/schemas/model_monitoring/__init__.py +4 -8
  41. mlrun/common/schemas/model_monitoring/constants.py +127 -46
  42. mlrun/common/schemas/model_monitoring/grafana.py +18 -12
  43. mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
  44. mlrun/common/schemas/notification.py +24 -3
  45. mlrun/common/schemas/object.py +1 -1
  46. mlrun/common/schemas/pagination.py +4 -4
  47. mlrun/common/schemas/partition.py +142 -0
  48. mlrun/common/schemas/pipeline.py +3 -3
  49. mlrun/common/schemas/project.py +26 -18
  50. mlrun/common/schemas/runs.py +3 -3
  51. mlrun/common/schemas/runtime_resource.py +5 -5
  52. mlrun/common/schemas/schedule.py +1 -1
  53. mlrun/common/schemas/secret.py +1 -1
  54. mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
  55. mlrun/common/schemas/tag.py +3 -3
  56. mlrun/common/schemas/workflow.py +6 -5
  57. mlrun/common/types.py +1 -0
  58. mlrun/config.py +157 -89
  59. mlrun/data_types/__init__.py +5 -3
  60. mlrun/data_types/infer.py +13 -3
  61. mlrun/data_types/spark.py +2 -1
  62. mlrun/datastore/__init__.py +59 -18
  63. mlrun/datastore/alibaba_oss.py +4 -1
  64. mlrun/datastore/azure_blob.py +4 -1
  65. mlrun/datastore/base.py +19 -24
  66. mlrun/datastore/datastore.py +10 -4
  67. mlrun/datastore/datastore_profile.py +178 -45
  68. mlrun/datastore/dbfs_store.py +4 -1
  69. mlrun/datastore/filestore.py +4 -1
  70. mlrun/datastore/google_cloud_storage.py +4 -1
  71. mlrun/datastore/hdfs.py +4 -1
  72. mlrun/datastore/inmem.py +4 -1
  73. mlrun/datastore/redis.py +4 -1
  74. mlrun/datastore/s3.py +14 -3
  75. mlrun/datastore/sources.py +89 -92
  76. mlrun/datastore/store_resources.py +7 -4
  77. mlrun/datastore/storeytargets.py +51 -16
  78. mlrun/datastore/targets.py +38 -31
  79. mlrun/datastore/utils.py +87 -4
  80. mlrun/datastore/v3io.py +4 -1
  81. mlrun/datastore/vectorstore.py +291 -0
  82. mlrun/datastore/wasbfs/fs.py +13 -12
  83. mlrun/db/base.py +286 -100
  84. mlrun/db/httpdb.py +1562 -490
  85. mlrun/db/nopdb.py +250 -83
  86. mlrun/errors.py +6 -2
  87. mlrun/execution.py +194 -50
  88. mlrun/feature_store/__init__.py +2 -10
  89. mlrun/feature_store/api.py +20 -458
  90. mlrun/feature_store/common.py +9 -9
  91. mlrun/feature_store/feature_set.py +20 -18
  92. mlrun/feature_store/feature_vector.py +105 -479
  93. mlrun/feature_store/feature_vector_utils.py +466 -0
  94. mlrun/feature_store/retrieval/base.py +15 -11
  95. mlrun/feature_store/retrieval/job.py +2 -1
  96. mlrun/feature_store/retrieval/storey_merger.py +1 -1
  97. mlrun/feature_store/steps.py +3 -3
  98. mlrun/features.py +30 -13
  99. mlrun/frameworks/__init__.py +1 -2
  100. mlrun/frameworks/_common/__init__.py +1 -2
  101. mlrun/frameworks/_common/artifacts_library.py +2 -2
  102. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  103. mlrun/frameworks/_common/model_handler.py +31 -31
  104. mlrun/frameworks/_common/producer.py +3 -1
  105. mlrun/frameworks/_dl_common/__init__.py +1 -2
  106. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  107. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  108. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  109. mlrun/frameworks/_ml_common/__init__.py +1 -2
  110. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  111. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  112. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  113. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  114. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  115. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  116. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  117. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  118. mlrun/frameworks/huggingface/__init__.py +1 -2
  119. mlrun/frameworks/huggingface/model_server.py +9 -9
  120. mlrun/frameworks/lgbm/__init__.py +47 -44
  121. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  122. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  123. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  124. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  125. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  126. mlrun/frameworks/lgbm/model_handler.py +15 -11
  127. mlrun/frameworks/lgbm/model_server.py +11 -7
  128. mlrun/frameworks/lgbm/utils.py +2 -2
  129. mlrun/frameworks/onnx/__init__.py +1 -2
  130. mlrun/frameworks/onnx/dataset.py +3 -3
  131. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  132. mlrun/frameworks/onnx/model_handler.py +7 -5
  133. mlrun/frameworks/onnx/model_server.py +8 -6
  134. mlrun/frameworks/parallel_coordinates.py +11 -11
  135. mlrun/frameworks/pytorch/__init__.py +22 -23
  136. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  137. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  138. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  139. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  140. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  141. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  142. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  143. mlrun/frameworks/pytorch/model_handler.py +21 -17
  144. mlrun/frameworks/pytorch/model_server.py +13 -9
  145. mlrun/frameworks/sklearn/__init__.py +19 -18
  146. mlrun/frameworks/sklearn/estimator.py +2 -2
  147. mlrun/frameworks/sklearn/metric.py +3 -3
  148. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  149. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  150. mlrun/frameworks/sklearn/model_handler.py +4 -3
  151. mlrun/frameworks/tf_keras/__init__.py +11 -12
  152. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  153. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  154. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  155. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  156. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  157. mlrun/frameworks/tf_keras/model_server.py +12 -8
  158. mlrun/frameworks/xgboost/__init__.py +19 -18
  159. mlrun/frameworks/xgboost/model_handler.py +13 -9
  160. mlrun/k8s_utils.py +2 -5
  161. mlrun/launcher/base.py +3 -4
  162. mlrun/launcher/client.py +2 -2
  163. mlrun/launcher/local.py +6 -2
  164. mlrun/launcher/remote.py +1 -1
  165. mlrun/lists.py +8 -4
  166. mlrun/model.py +132 -46
  167. mlrun/model_monitoring/__init__.py +3 -5
  168. mlrun/model_monitoring/api.py +113 -98
  169. mlrun/model_monitoring/applications/__init__.py +0 -5
  170. mlrun/model_monitoring/applications/_application_steps.py +81 -50
  171. mlrun/model_monitoring/applications/base.py +467 -14
  172. mlrun/model_monitoring/applications/context.py +212 -134
  173. mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
  174. mlrun/model_monitoring/applications/evidently/base.py +146 -0
  175. mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
  176. mlrun/model_monitoring/applications/results.py +67 -15
  177. mlrun/model_monitoring/controller.py +701 -315
  178. mlrun/model_monitoring/db/__init__.py +0 -2
  179. mlrun/model_monitoring/db/_schedules.py +242 -0
  180. mlrun/model_monitoring/db/_stats.py +189 -0
  181. mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
  182. mlrun/model_monitoring/db/tsdb/base.py +243 -49
  183. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
  184. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  185. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
  187. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  188. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
  189. mlrun/model_monitoring/helpers.py +356 -114
  190. mlrun/model_monitoring/stream_processing.py +190 -345
  191. mlrun/model_monitoring/tracking_policy.py +11 -4
  192. mlrun/model_monitoring/writer.py +49 -90
  193. mlrun/package/__init__.py +3 -6
  194. mlrun/package/context_handler.py +2 -2
  195. mlrun/package/packager.py +12 -9
  196. mlrun/package/packagers/__init__.py +0 -2
  197. mlrun/package/packagers/default_packager.py +14 -11
  198. mlrun/package/packagers/numpy_packagers.py +16 -7
  199. mlrun/package/packagers/pandas_packagers.py +18 -18
  200. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  201. mlrun/package/packagers_manager.py +35 -32
  202. mlrun/package/utils/__init__.py +0 -3
  203. mlrun/package/utils/_pickler.py +6 -6
  204. mlrun/platforms/__init__.py +47 -16
  205. mlrun/platforms/iguazio.py +4 -1
  206. mlrun/projects/operations.py +30 -30
  207. mlrun/projects/pipelines.py +116 -47
  208. mlrun/projects/project.py +1292 -329
  209. mlrun/render.py +5 -9
  210. mlrun/run.py +57 -14
  211. mlrun/runtimes/__init__.py +1 -3
  212. mlrun/runtimes/base.py +30 -22
  213. mlrun/runtimes/daskjob.py +9 -9
  214. mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
  215. mlrun/runtimes/function_reference.py +5 -2
  216. mlrun/runtimes/generators.py +3 -2
  217. mlrun/runtimes/kubejob.py +6 -7
  218. mlrun/runtimes/mounts.py +574 -0
  219. mlrun/runtimes/mpijob/__init__.py +0 -2
  220. mlrun/runtimes/mpijob/abstract.py +7 -6
  221. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  222. mlrun/runtimes/nuclio/application/application.py +11 -13
  223. mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
  224. mlrun/runtimes/nuclio/function.py +127 -70
  225. mlrun/runtimes/nuclio/serving.py +105 -37
  226. mlrun/runtimes/pod.py +159 -54
  227. mlrun/runtimes/remotesparkjob.py +3 -2
  228. mlrun/runtimes/sparkjob/__init__.py +0 -2
  229. mlrun/runtimes/sparkjob/spark3job.py +22 -12
  230. mlrun/runtimes/utils.py +7 -6
  231. mlrun/secrets.py +2 -2
  232. mlrun/serving/__init__.py +8 -0
  233. mlrun/serving/merger.py +7 -5
  234. mlrun/serving/remote.py +35 -22
  235. mlrun/serving/routers.py +186 -240
  236. mlrun/serving/server.py +41 -10
  237. mlrun/serving/states.py +432 -118
  238. mlrun/serving/utils.py +13 -2
  239. mlrun/serving/v1_serving.py +3 -2
  240. mlrun/serving/v2_serving.py +161 -203
  241. mlrun/track/__init__.py +1 -1
  242. mlrun/track/tracker.py +2 -2
  243. mlrun/track/trackers/mlflow_tracker.py +6 -5
  244. mlrun/utils/async_http.py +35 -22
  245. mlrun/utils/clones.py +7 -4
  246. mlrun/utils/helpers.py +511 -58
  247. mlrun/utils/logger.py +119 -13
  248. mlrun/utils/notifications/notification/__init__.py +22 -19
  249. mlrun/utils/notifications/notification/base.py +39 -15
  250. mlrun/utils/notifications/notification/console.py +6 -6
  251. mlrun/utils/notifications/notification/git.py +11 -11
  252. mlrun/utils/notifications/notification/ipython.py +10 -9
  253. mlrun/utils/notifications/notification/mail.py +176 -0
  254. mlrun/utils/notifications/notification/slack.py +16 -8
  255. mlrun/utils/notifications/notification/webhook.py +24 -8
  256. mlrun/utils/notifications/notification_pusher.py +191 -200
  257. mlrun/utils/regex.py +12 -2
  258. mlrun/utils/version/version.json +2 -2
  259. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/METADATA +69 -54
  260. mlrun-1.8.0.dist-info/RECORD +351 -0
  261. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
  262. mlrun/model_monitoring/applications/evidently_base.py +0 -137
  263. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  264. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  265. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  266. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  267. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  268. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  269. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  270. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  271. mlrun/model_monitoring/model_endpoint.py +0 -118
  272. mlrun-1.7.2rc4.dist-info/RECORD +0 -351
  273. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
  274. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
  275. {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
@@ -12,32 +12,24 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import collections
16
15
  import datetime
17
- import json
18
- import os
19
16
  import typing
20
17
 
21
- import storey
22
-
23
18
  import mlrun
24
19
  import mlrun.common.model_monitoring.helpers
25
- import mlrun.config
26
- import mlrun.datastore.targets
27
20
  import mlrun.feature_store as fstore
28
21
  import mlrun.feature_store.steps
29
- import mlrun.model_monitoring.db
30
22
  import mlrun.serving.states
31
23
  import mlrun.utils
32
24
  from mlrun.common.schemas.model_monitoring.constants import (
25
+ ControllerEvent,
26
+ ControllerEventKind,
27
+ EndpointType,
33
28
  EventFieldType,
34
- EventKeyMetrics,
35
- EventLiveStats,
36
29
  FileTargetKind,
37
- ModelEndpointTarget,
38
30
  ProjectSecretKeys,
39
31
  )
40
- from mlrun.model_monitoring.db import StoreBase, TSDBConnector
32
+ from mlrun.model_monitoring.db import TSDBConnector
41
33
  from mlrun.utils import logger
42
34
 
43
35
 
@@ -51,7 +43,7 @@ class EventStreamProcessor:
51
43
  parquet_target: str,
52
44
  aggregate_windows: typing.Optional[list[str]] = None,
53
45
  aggregate_period: str = "5m",
54
- model_monitoring_access_key: str = None,
46
+ model_monitoring_access_key: typing.Optional[str] = None,
55
47
  ):
56
48
  # General configurations, mainly used for the storey steps in the future serving graph
57
49
  self.project = project
@@ -69,14 +61,11 @@ class EventStreamProcessor:
69
61
  parquet_batching_max_events=self.parquet_batching_max_events,
70
62
  )
71
63
 
72
- self.storage_options = None
73
64
  self.tsdb_configurations = {}
74
65
  if not mlrun.mlconf.is_ce_mode():
75
66
  self._initialize_v3io_configurations(
76
67
  model_monitoring_access_key=model_monitoring_access_key
77
68
  )
78
- elif self.parquet_path.startswith("s3://"):
79
- self.storage_options = mlrun.mlconf.get_s3_storage_options()
80
69
 
81
70
  def _initialize_v3io_configurations(
82
71
  self,
@@ -85,33 +74,18 @@ class EventStreamProcessor:
85
74
  v3io_access_key: typing.Optional[str] = None,
86
75
  v3io_framesd: typing.Optional[str] = None,
87
76
  v3io_api: typing.Optional[str] = None,
88
- model_monitoring_access_key: str = None,
77
+ model_monitoring_access_key: typing.Optional[str] = None,
89
78
  ):
90
79
  # Get the V3IO configurations
91
80
  self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
92
81
  self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
93
82
 
94
- self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
83
+ self.v3io_access_key = v3io_access_key or mlrun.mlconf.get_v3io_access_key()
95
84
  self.model_monitoring_access_key = (
96
85
  model_monitoring_access_key
97
- or os.environ.get(ProjectSecretKeys.ACCESS_KEY)
86
+ or mlrun.get_secret_or_env(ProjectSecretKeys.ACCESS_KEY)
98
87
  or self.v3io_access_key
99
88
  )
100
- self.storage_options = dict(
101
- v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
102
- )
103
-
104
- # KV path
105
- kv_path = mlrun.mlconf.get_model_monitoring_file_target_path(
106
- project=self.project, kind=FileTargetKind.ENDPOINTS
107
- )
108
- (
109
- _,
110
- self.kv_container,
111
- self.kv_path,
112
- ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
113
- kv_path
114
- )
115
89
 
116
90
  # TSDB path and configurations
117
91
  tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
@@ -133,7 +107,7 @@ class EventStreamProcessor:
133
107
  self,
134
108
  fn: mlrun.runtimes.ServingRuntime,
135
109
  tsdb_connector: TSDBConnector,
136
- endpoint_store: StoreBase,
110
+ controller_stream_uri: str,
137
111
  ) -> None:
138
112
  """
139
113
  Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -162,31 +136,25 @@ class EventStreamProcessor:
162
136
 
163
137
  :param fn: A serving function.
164
138
  :param tsdb_connector: Time series database connector.
165
- :param endpoint_store: KV/SQL store used for endpoint data.
139
+ :param controller_stream_uri: The controller stream URI. Runs on server api pod so needed to be provided as
140
+ input
166
141
  """
167
142
 
168
143
  graph = typing.cast(
169
144
  mlrun.serving.states.RootFlowStep,
170
- fn.set_topology(mlrun.serving.states.StepKinds.flow),
171
- )
172
- graph.add_step(
173
- "ExtractEndpointID",
174
- "extract_endpoint",
175
- full_event=True,
145
+ fn.set_topology(mlrun.serving.states.StepKinds.flow, engine="async"),
176
146
  )
177
147
 
178
148
  # split the graph between event with error vs valid event
179
149
  graph.add_step(
180
150
  "storey.Filter",
181
151
  "FilterError",
182
- after="extract_endpoint",
183
152
  _fn="(event.get('error') is None)",
184
153
  )
185
154
 
186
155
  graph.add_step(
187
156
  "storey.Filter",
188
157
  "ForwardError",
189
- after="extract_endpoint",
190
158
  _fn="(event.get('error') is not None)",
191
159
  )
192
160
 
@@ -198,7 +166,7 @@ class EventStreamProcessor:
198
166
  def apply_process_endpoint_event():
199
167
  graph.add_step(
200
168
  "ProcessEndpointEvent",
201
- after="extract_endpoint", # TODO: change this to FilterError in ML-7456
169
+ after="FilterError",
202
170
  full_event=True,
203
171
  project=self.project,
204
172
  )
@@ -233,79 +201,25 @@ class EventStreamProcessor:
233
201
  )
234
202
 
235
203
  apply_map_feature_names()
204
+ # split the graph between event with error vs valid event
205
+ graph.add_step(
206
+ "storey.Filter",
207
+ "FilterNOP",
208
+ after="MapFeatureNames",
209
+ _fn="(event.get('kind', " ") != 'nop_event')",
210
+ )
211
+ graph.add_step(
212
+ "storey.Filter",
213
+ "ForwardNOP",
214
+ after="MapFeatureNames",
215
+ _fn="(event.get('kind', " ") == 'nop_event')",
216
+ )
236
217
 
237
- # Calculate number of predictions and average latency
238
- def apply_storey_aggregations():
239
- # Calculate number of predictions for each window (5 min and 1 hour by default)
240
- graph.add_step(
241
- class_name="storey.AggregateByKey",
242
- aggregates=[
243
- {
244
- "name": EventFieldType.LATENCY,
245
- "column": EventFieldType.LATENCY,
246
- "operations": ["count", "avg"],
247
- "windows": self.aggregate_windows,
248
- "period": self.aggregate_period,
249
- }
250
- ],
251
- name=EventFieldType.LATENCY,
252
- after="MapFeatureNames",
253
- step_name="Aggregates",
254
- table=".",
255
- key_field=EventFieldType.ENDPOINT_ID,
256
- )
257
- # Calculate average latency time for each window (5 min and 1 hour by default)
258
- graph.add_step(
259
- class_name="storey.Rename",
260
- mapping={
261
- "latency_count_5m": EventLiveStats.PREDICTIONS_COUNT_5M,
262
- "latency_count_1h": EventLiveStats.PREDICTIONS_COUNT_1H,
263
- },
264
- name="Rename",
265
- after=EventFieldType.LATENCY,
266
- )
267
-
268
- apply_storey_aggregations()
269
-
270
- # KV/SQL branch
271
- # Filter relevant keys from the event before writing the data into the database table
272
- def apply_process_before_endpoint_update():
273
- graph.add_step(
274
- "ProcessBeforeEndpointUpdate",
275
- name="ProcessBeforeEndpointUpdate",
276
- after="Rename",
277
- )
278
-
279
- apply_process_before_endpoint_update()
280
-
281
- # Write the filtered event to KV/SQL table. At this point, the serving graph updates the stats
282
- # about average latency and the amount of predictions over time
283
- def apply_update_endpoint():
284
- graph.add_step(
285
- "UpdateEndpoint",
286
- name="UpdateEndpoint",
287
- after="ProcessBeforeEndpointUpdate",
288
- project=self.project,
289
- )
290
-
291
- apply_update_endpoint()
292
-
293
- # (only for V3IO KV target) - Apply infer_schema on the model endpoints table for generating schema file
294
- # which will be used by Grafana monitoring dashboards
295
- def apply_infer_schema():
296
- graph.add_step(
297
- "InferSchema",
298
- name="InferSchema",
299
- after="UpdateEndpoint",
300
- v3io_framesd=self.v3io_framesd,
301
- container=self.kv_container,
302
- table=self.kv_path,
303
- )
304
-
305
- if endpoint_store.type == ModelEndpointTarget.V3IO_NOSQL:
306
- apply_infer_schema()
307
-
308
- tsdb_connector.apply_monitoring_stream_steps(graph=graph)
218
+ tsdb_connector.apply_monitoring_stream_steps(
219
+ graph=graph,
220
+ aggregate_windows=self.aggregate_windows,
221
+ aggregate_period=self.aggregate_period,
222
+ )
309
223
 
310
224
  # Parquet branch
311
225
  # Filter and validate different keys before writing the data to Parquet target
@@ -313,7 +227,7 @@ class EventStreamProcessor:
313
227
  graph.add_step(
314
228
  "ProcessBeforeParquet",
315
229
  name="ProcessBeforeParquet",
316
- after="MapFeatureNames",
230
+ after="FilterNOP",
317
231
  _fn="(event)",
318
232
  )
319
233
 
@@ -322,12 +236,12 @@ class EventStreamProcessor:
322
236
  # Write the Parquet target file, partitioned by key (endpoint_id) and time.
323
237
  def apply_parquet_target():
324
238
  graph.add_step(
325
- "storey.ParquetTarget",
239
+ "mlrun.datastore.storeytargets.ParquetStoreyTarget",
240
+ alternative_v3io_access_key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ACCESS_KEY,
326
241
  name="ParquetTarget",
327
242
  after="ProcessBeforeParquet",
328
243
  graph_shape="cylinder",
329
244
  path=self.parquet_path,
330
- storage_options=self.storage_options,
331
245
  max_events=self.parquet_batching_max_events,
332
246
  flush_after_seconds=self.parquet_batching_timeout_secs,
333
247
  attributes={"infer_columns_from_data": True},
@@ -340,90 +254,20 @@ class EventStreamProcessor:
340
254
 
341
255
  apply_parquet_target()
342
256
 
257
+ # controller branch
258
+ def apply_push_controller_stream(stream_uri: str):
259
+ graph.add_step(
260
+ ">>",
261
+ "controller_stream",
262
+ path=stream_uri,
263
+ sharding_func=ControllerEvent.ENDPOINT_ID,
264
+ after="ForwardNOP",
265
+ # Force using the pipeline key instead of the one in the profile in case of v3io profile.
266
+ # In case of Kafka, this parameter will be ignored.
267
+ alternative_v3io_access_key="V3IO_ACCESS_KEY",
268
+ )
343
269
 
344
- class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
345
- def __init__(self, **kwargs):
346
- """
347
- Filter relevant keys from the event before writing the data to database table (in EndpointUpdate step).
348
- Note that in the endpoint table we only keep metadata (function_uri, model_class, etc.) and stats about the
349
- average latency and the number of predictions (per 5min and 1hour).
350
-
351
- :returns: A filtered event as a dictionary which will be written to the endpoint table in the next step.
352
- """
353
- super().__init__(**kwargs)
354
-
355
- def do(self, event):
356
- # Compute prediction per second
357
- event[EventLiveStats.PREDICTIONS_PER_SECOND] = (
358
- float(event[EventLiveStats.PREDICTIONS_COUNT_5M]) / 300
359
- )
360
- # Filter relevant keys
361
- e = {
362
- k: event[k]
363
- for k in [
364
- EventFieldType.FUNCTION_URI,
365
- EventFieldType.MODEL,
366
- EventFieldType.MODEL_CLASS,
367
- EventFieldType.ENDPOINT_ID,
368
- EventFieldType.LABELS,
369
- EventFieldType.FIRST_REQUEST,
370
- EventFieldType.LAST_REQUEST,
371
- EventFieldType.ERROR_COUNT,
372
- ]
373
- }
374
-
375
- # Add generic metrics statistics
376
- generic_metrics = {
377
- k: event[k]
378
- for k in [
379
- EventLiveStats.LATENCY_AVG_5M,
380
- EventLiveStats.LATENCY_AVG_1H,
381
- EventLiveStats.PREDICTIONS_PER_SECOND,
382
- EventLiveStats.PREDICTIONS_COUNT_5M,
383
- EventLiveStats.PREDICTIONS_COUNT_1H,
384
- ]
385
- }
386
-
387
- e[EventFieldType.METRICS] = json.dumps(
388
- {EventKeyMetrics.GENERIC: generic_metrics}
389
- )
390
-
391
- # Write labels as json string as required by the DB format
392
- e[EventFieldType.LABELS] = json.dumps(e[EventFieldType.LABELS])
393
-
394
- return e
395
-
396
-
397
- class ExtractEndpointID(mlrun.feature_store.steps.MapClass):
398
- def __init__(self, **kwargs) -> None:
399
- """
400
- Generate the model endpoint ID based on the event parameters and attach it to the event.
401
- """
402
- super().__init__(**kwargs)
403
-
404
- def do(self, full_event) -> typing.Union[storey.Event, None]:
405
- # Getting model version and function uri from event
406
- # and use them for retrieving the endpoint_id
407
- function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
408
- if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
409
- return None
410
-
411
- model = full_event.body.get(EventFieldType.MODEL)
412
- if not is_not_none(model, [EventFieldType.MODEL]):
413
- return None
414
-
415
- version = full_event.body.get(EventFieldType.VERSION)
416
- versioned_model = f"{model}:{version}" if version else f"{model}:latest"
417
-
418
- endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
419
- function_uri=function_uri,
420
- versioned_model=versioned_model,
421
- )
422
-
423
- endpoint_id = str(endpoint_id)
424
- full_event.body[EventFieldType.ENDPOINT_ID] = endpoint_id
425
- full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
426
- return full_event
270
+ apply_push_controller_stream(controller_stream_uri)
427
271
 
428
272
 
429
273
  class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
@@ -490,28 +334,34 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
490
334
  self.first_request: dict[str, str] = dict()
491
335
  self.last_request: dict[str, str] = dict()
492
336
 
493
- # Number of errors (value) per endpoint (key)
494
- self.error_count: dict[str, int] = collections.defaultdict(int)
495
-
496
337
  # Set of endpoints in the current events
497
338
  self.endpoints: set[str] = set()
498
339
 
499
340
  def do(self, full_event):
500
341
  event = full_event.body
342
+ if event.get(ControllerEvent.KIND, "") == ControllerEventKind.NOP_EVENT:
343
+ logger.debug(
344
+ "Skipped nop event inside of ProcessEndpointEvent", event=event
345
+ )
346
+ full_event.body = [event]
347
+ return full_event
348
+ # Getting model version and function uri from event
349
+ # and use them for retrieving the endpoint_id
350
+ function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
351
+ if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
352
+ return None
353
+
354
+ model = full_event.body.get(EventFieldType.MODEL)
355
+ if not is_not_none(model, [EventFieldType.MODEL]):
356
+ return None
501
357
 
502
- versioned_model = event[EventFieldType.VERSIONED_MODEL]
503
358
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
504
- function_uri = event[EventFieldType.FUNCTION_URI]
505
359
 
506
360
  # In case this process fails, resume state from existing record
507
- self.resume_state(endpoint_id)
508
-
509
- # If error key has been found in the current event,
510
- # increase the error counter by 1 and raise the error description
511
- error = event.get("error")
512
- if error: # TODO: delete this in ML-7456
513
- self.error_count[endpoint_id] += 1
514
- raise mlrun.errors.MLRunInvalidArgumentError(str(error))
361
+ self.resume_state(
362
+ endpoint_id=endpoint_id,
363
+ endpoint_name=full_event.body.get(EventFieldType.MODEL),
364
+ )
515
365
 
516
366
  # Validate event fields
517
367
  model_class = event.get("model_class") or event.get("class")
@@ -524,10 +374,9 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
524
374
  predictions = event.get("resp", {}).get("outputs")
525
375
 
526
376
  if not self.is_valid(
527
- endpoint_id,
528
- is_not_none,
529
- timestamp,
530
- ["when"],
377
+ validation_function=is_not_none,
378
+ field=timestamp,
379
+ dict_path=["when"],
531
380
  ):
532
381
  return None
533
382
 
@@ -535,45 +384,33 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
535
384
  # Set time for the first request of the current endpoint
536
385
  self.first_request[endpoint_id] = timestamp
537
386
 
538
- # Validate that the request time of the current event is later than the previous request time
539
- self._validate_last_request_timestamp(
540
- endpoint_id=endpoint_id, timestamp=timestamp
541
- )
542
-
543
- # Set time for the last reqeust of the current endpoint
544
- self.last_request[endpoint_id] = timestamp
545
-
546
387
  if not self.is_valid(
547
- endpoint_id,
548
- is_not_none,
549
- request_id,
550
- ["request", "id"],
388
+ validation_function=is_not_none,
389
+ field=request_id,
390
+ dict_path=["request", "id"],
551
391
  ):
552
392
  return None
553
393
  if not self.is_valid(
554
- endpoint_id,
555
- is_not_none,
556
- latency,
557
- ["microsec"],
394
+ validation_function=is_not_none,
395
+ field=latency,
396
+ dict_path=["microsec"],
558
397
  ):
559
398
  return None
560
399
  if not self.is_valid(
561
- endpoint_id,
562
- is_not_none,
563
- features,
564
- ["request", "inputs"],
400
+ validation_function=is_not_none,
401
+ field=features,
402
+ dict_path=["request", "inputs"],
565
403
  ):
566
404
  return None
567
405
  if not self.is_valid(
568
- endpoint_id,
569
- is_not_none,
570
- predictions,
571
- ["resp", "outputs"],
406
+ validation_function=is_not_none,
407
+ field=predictions,
408
+ dict_path=["resp", "outputs"],
572
409
  ):
573
410
  return None
574
411
 
575
412
  # Convert timestamp to a datetime object
576
- timestamp = datetime.datetime.fromisoformat(timestamp)
413
+ timestamp_obj = datetime.datetime.fromisoformat(timestamp)
577
414
 
578
415
  # Separate each model invocation into sub events that will be stored as dictionary
579
416
  # in list of events. This list will be used as the body for the storey event.
@@ -605,96 +442,93 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
605
442
  if not isinstance(feature, list):
606
443
  feature = [feature]
607
444
 
445
+ effective_sample_count, estimated_prediction_count = (
446
+ self._get_effective_and_estimated_counts(event=event)
447
+ )
448
+
608
449
  events.append(
609
450
  {
610
451
  EventFieldType.FUNCTION_URI: function_uri,
611
- EventFieldType.MODEL: versioned_model,
452
+ EventFieldType.ENDPOINT_NAME: event.get(EventFieldType.MODEL),
612
453
  EventFieldType.MODEL_CLASS: model_class,
613
- EventFieldType.TIMESTAMP: timestamp,
454
+ EventFieldType.TIMESTAMP: timestamp_obj,
614
455
  EventFieldType.ENDPOINT_ID: endpoint_id,
615
456
  EventFieldType.REQUEST_ID: request_id,
616
457
  EventFieldType.LATENCY: latency,
617
458
  EventFieldType.FEATURES: feature,
618
459
  EventFieldType.PREDICTION: prediction,
619
460
  EventFieldType.FIRST_REQUEST: self.first_request[endpoint_id],
620
- EventFieldType.LAST_REQUEST: self.last_request[endpoint_id],
461
+ EventFieldType.LAST_REQUEST: timestamp,
621
462
  EventFieldType.LAST_REQUEST_TIMESTAMP: mlrun.utils.enrich_datetime_with_tz_info(
622
- self.last_request[endpoint_id]
463
+ timestamp
623
464
  ).timestamp(),
624
- EventFieldType.ERROR_COUNT: self.error_count[endpoint_id],
625
465
  EventFieldType.LABELS: event.get(EventFieldType.LABELS, {}),
626
466
  EventFieldType.METRICS: event.get(EventFieldType.METRICS, {}),
627
467
  EventFieldType.ENTITIES: event.get("request", {}).get(
628
468
  EventFieldType.ENTITIES, {}
629
469
  ),
470
+ EventFieldType.EFFECTIVE_SAMPLE_COUNT: effective_sample_count,
471
+ EventFieldType.ESTIMATED_PREDICTION_COUNT: estimated_prediction_count,
630
472
  }
631
473
  )
632
474
 
633
475
  # Create a storey event object with list of events, based on endpoint_id which will be used
634
476
  # in the upcoming steps
635
- storey_event = storey.Event(body=events, key=endpoint_id)
636
- return storey_event
637
-
638
- def _validate_last_request_timestamp(self, endpoint_id: str, timestamp: str):
639
- """Validate that the request time of the current event is later than the previous request time that has
640
- already been processed.
641
-
642
- :param endpoint_id: The unique id of the model endpoint.
643
- :param timestamp: Event request time as a string.
644
-
645
- :raise MLRunPreconditionFailedError: If the request time of the current is later than the previous request time.
646
- """
647
-
648
- if (
649
- endpoint_id in self.last_request
650
- and self.last_request[endpoint_id] > timestamp
651
- ):
652
- logger.error(
653
- f"current event request time {timestamp} is earlier than the last request time "
654
- f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
655
- )
477
+ full_event.key = endpoint_id
478
+ full_event.body = events
479
+ return full_event
656
480
 
657
- def resume_state(self, endpoint_id):
481
+ def resume_state(self, endpoint_id, endpoint_name):
658
482
  # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
659
483
  # left them
660
484
  if endpoint_id not in self.endpoints:
661
485
  logger.info("Trying to resume state", endpoint_id=endpoint_id)
662
- endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
663
- project=self.project,
664
- endpoint_id=endpoint_id,
486
+ endpoint_record = (
487
+ mlrun.db.get_run_db()
488
+ .get_model_endpoint(
489
+ project=self.project,
490
+ endpoint_id=endpoint_id,
491
+ name=endpoint_name,
492
+ tsdb_metrics=False,
493
+ )
494
+ .flat_dict()
665
495
  )
666
496
 
667
- # If model endpoint found, get first_request, last_request and error_count values
497
+ # If model endpoint found, get first_request & last_request values
668
498
  if endpoint_record:
669
499
  first_request = endpoint_record.get(EventFieldType.FIRST_REQUEST)
670
500
 
671
501
  if first_request:
672
502
  self.first_request[endpoint_id] = first_request
673
503
 
674
- last_request = endpoint_record.get(EventFieldType.LAST_REQUEST)
675
- if last_request:
676
- self.last_request[endpoint_id] = last_request
677
-
678
- error_count = endpoint_record.get(EventFieldType.ERROR_COUNT)
679
-
680
- if error_count:
681
- self.error_count[endpoint_id] = int(error_count)
682
-
683
504
  # add endpoint to endpoints set
684
505
  self.endpoints.add(endpoint_id)
685
506
 
686
507
  def is_valid(
687
508
  self,
688
- endpoint_id: str,
689
509
  validation_function,
690
510
  field: typing.Any,
691
511
  dict_path: list[str],
692
512
  ):
693
513
  if validation_function(field, dict_path):
694
514
  return True
695
- self.error_count[endpoint_id] += 1
515
+
696
516
  return False
697
517
 
518
+ @staticmethod
519
+ def _get_effective_and_estimated_counts(event):
520
+ """
521
+ Calculate the `effective_sample_count` and the `estimated_prediction_count` based on the event's
522
+ sampling percentage. These values will be stored in the TSDB target.
523
+ Note that In non-batch serving, the `effective_sample_count` is always set to 1. In addition, when the sampling
524
+ percentage is 100%, the `estimated_prediction_count` is equal to the `effective_sample_count`.
525
+ """
526
+ effective_sample_count = event.get(EventFieldType.EFFECTIVE_SAMPLE_COUNT, 1)
527
+ estimated_prediction_count = effective_sample_count * (
528
+ 100 / event.get(EventFieldType.SAMPLING_PERCENTAGE, 100)
529
+ )
530
+ return effective_sample_count, estimated_prediction_count
531
+
698
532
 
699
533
  def is_not_none(field: typing.Any, dict_path: list[str]):
700
534
  if field is not None:
@@ -735,6 +569,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
735
569
  # and labels columns were not found in the current event
736
570
  self.feature_names = {}
737
571
  self.label_columns = {}
572
+ self.first_request = {}
738
573
 
739
574
  # Dictionary to manage the model endpoint types - important for the V3IO TSDB
740
575
  self.endpoint_type = {}
@@ -756,6 +591,8 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
756
591
  return None
757
592
 
758
593
  def do(self, event: dict):
594
+ if event.get(ControllerEvent.KIND, "") == ControllerEventKind.NOP_EVENT:
595
+ return event
759
596
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
760
597
 
761
598
  feature_values = event[EventFieldType.FEATURES]
@@ -766,23 +603,30 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
766
603
  if isinstance(feature_value, int):
767
604
  feature_values[index] = float(feature_value)
768
605
 
606
+ attributes_to_update = {}
607
+ endpoint_record = None
769
608
  # Get feature names and label columns
770
609
  if endpoint_id not in self.feature_names:
771
- endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
772
- project=self.project,
773
- endpoint_id=endpoint_id,
610
+ endpoint_record = (
611
+ mlrun.db.get_run_db()
612
+ .get_model_endpoint(
613
+ project=self.project,
614
+ endpoint_id=endpoint_id,
615
+ name=event[EventFieldType.ENDPOINT_NAME],
616
+ tsdb_metrics=False,
617
+ )
618
+ .flat_dict()
774
619
  )
775
620
  feature_names = endpoint_record.get(EventFieldType.FEATURE_NAMES)
776
- feature_names = json.loads(feature_names) if feature_names else None
777
621
 
778
622
  label_columns = endpoint_record.get(EventFieldType.LABEL_NAMES)
779
- label_columns = json.loads(label_columns) if label_columns else None
780
623
 
781
624
  # If feature names were not found,
782
625
  # try to retrieve them from the previous events of the current process
783
626
  if not feature_names and self._infer_columns_from_data:
784
627
  feature_names = self._infer_feature_names_from_data(event)
785
628
 
629
+ endpoint_type = int(endpoint_record.get(EventFieldType.ENDPOINT_TYPE))
786
630
  if not feature_names:
787
631
  logger.warn(
788
632
  "Feature names are not initialized, they will be automatically generated",
@@ -793,19 +637,14 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
793
637
  ]
794
638
 
795
639
  # Update the endpoint record with the generated features
796
- update_endpoint_record(
797
- project=self.project,
798
- endpoint_id=endpoint_id,
799
- attributes={
800
- EventFieldType.FEATURE_NAMES: json.dumps(feature_names)
801
- },
802
- )
640
+ attributes_to_update[EventFieldType.FEATURE_NAMES] = feature_names
803
641
 
804
- update_monitoring_feature_set(
805
- endpoint_record=endpoint_record,
806
- feature_names=feature_names,
807
- feature_values=feature_values,
808
- )
642
+ if endpoint_type != EndpointType.ROUTER.value:
643
+ update_monitoring_feature_set(
644
+ endpoint_record=endpoint_record,
645
+ feature_names=feature_names,
646
+ feature_values=feature_values,
647
+ )
809
648
 
810
649
  # Similar process with label columns
811
650
  if not label_columns and self._infer_columns_from_data:
@@ -819,17 +658,13 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
819
658
  label_columns = [
820
659
  f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
821
660
  ]
822
-
823
- update_endpoint_record(
824
- project=self.project,
825
- endpoint_id=endpoint_id,
826
- attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
827
- )
828
- update_monitoring_feature_set(
829
- endpoint_record=endpoint_record,
830
- feature_names=label_columns,
831
- feature_values=label_values,
832
- )
661
+ attributes_to_update[EventFieldType.LABEL_NAMES] = label_columns
662
+ if endpoint_type != EndpointType.ROUTER.value:
663
+ update_monitoring_feature_set(
664
+ endpoint_record=endpoint_record,
665
+ feature_names=label_columns,
666
+ feature_values=label_values,
667
+ )
833
668
 
834
669
  self.label_columns[endpoint_id] = label_columns
835
670
  self.feature_names[endpoint_id] = feature_names
@@ -842,9 +677,41 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
842
677
  )
843
678
 
844
679
  # Update the endpoint type within the endpoint types dictionary
845
- endpoint_type = int(endpoint_record.get(EventFieldType.ENDPOINT_TYPE))
846
680
  self.endpoint_type[endpoint_id] = endpoint_type
847
681
 
682
+ # Update the first request time in the endpoint record
683
+ if endpoint_id not in self.first_request:
684
+ endpoint_record = endpoint_record or (
685
+ mlrun.db.get_run_db()
686
+ .get_model_endpoint(
687
+ project=self.project,
688
+ endpoint_id=endpoint_id,
689
+ name=event[EventFieldType.ENDPOINT_NAME],
690
+ tsdb_metrics=False,
691
+ )
692
+ .flat_dict()
693
+ )
694
+ if not endpoint_record.get(EventFieldType.FIRST_REQUEST):
695
+ attributes_to_update[EventFieldType.FIRST_REQUEST] = (
696
+ mlrun.utils.enrich_datetime_with_tz_info(
697
+ event[EventFieldType.FIRST_REQUEST]
698
+ )
699
+ )
700
+ self.first_request[endpoint_id] = True
701
+
702
+ if attributes_to_update:
703
+ logger.info(
704
+ "Updating endpoint record",
705
+ endpoint_id=endpoint_id,
706
+ attributes=attributes_to_update,
707
+ )
708
+ update_endpoint_record(
709
+ project=self.project,
710
+ endpoint_id=endpoint_id,
711
+ attributes=attributes_to_update,
712
+ endpoint_name=event[EventFieldType.ENDPOINT_NAME],
713
+ )
714
+
848
715
  # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
849
716
  feature_names = self.feature_names[endpoint_id]
850
717
  self._map_dictionary_values(
@@ -890,35 +757,13 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
890
757
 
891
758
  """
892
759
  event[mapping_dictionary] = {}
760
+ diff = len(named_iters) - len(values_iters)
761
+ values_iters += [None] * diff
893
762
  for name, value in zip(named_iters, values_iters):
894
763
  event[name] = value
895
764
  event[mapping_dictionary][name] = value
896
765
 
897
766
 
898
- class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
899
- def __init__(self, project: str, **kwargs):
900
- """
901
- Update the model endpoint record in the DB. Note that the event at this point includes metadata and stats about
902
- the average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
903
- such as "Model Monitoring - Performance" which can be found in Grafana.
904
-
905
- :returns: Event as a dictionary (without any changes) for the next step (InferSchema).
906
- """
907
- super().__init__(**kwargs)
908
- self.project = project
909
-
910
- def do(self, event: dict):
911
- # Remove labels from the event
912
- event.pop(EventFieldType.LABELS)
913
-
914
- update_endpoint_record(
915
- project=self.project,
916
- endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
917
- attributes=event,
918
- )
919
- return event
920
-
921
-
922
767
  class InferSchema(mlrun.feature_store.steps.MapClass):
923
768
  def __init__(
924
769
  self,
@@ -963,14 +808,14 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
963
808
  def update_endpoint_record(
964
809
  project: str,
965
810
  endpoint_id: str,
811
+ endpoint_name: str,
966
812
  attributes: dict,
967
813
  ):
968
- model_endpoint_store = mlrun.model_monitoring.get_store_object(
814
+ mlrun.db.get_run_db().patch_model_endpoint(
969
815
  project=project,
970
- )
971
-
972
- model_endpoint_store.update_model_endpoint(
973
- endpoint_id=endpoint_id, attributes=attributes
816
+ endpoint_id=endpoint_id,
817
+ attributes=attributes,
818
+ name=endpoint_name,
974
819
  )
975
820
 
976
821