mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (160) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +23 -111
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +169 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +36 -253
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +46 -42
  10. mlrun/artifacts/model.py +9 -141
  11. mlrun/artifacts/plots.py +14 -375
  12. mlrun/common/constants.py +65 -3
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
  15. mlrun/common/formatters/base.py +113 -0
  16. mlrun/common/formatters/function.py +46 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +10 -5
  21. mlrun/common/schemas/alert.py +92 -11
  22. mlrun/common/schemas/api_gateway.py +56 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +2 -0
  25. mlrun/common/schemas/client_spec.py +1 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/model_monitoring/__init__.py +15 -3
  29. mlrun/common/schemas/model_monitoring/constants.py +58 -7
  30. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  31. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  32. mlrun/common/schemas/pipeline.py +0 -9
  33. mlrun/common/schemas/project.py +5 -11
  34. mlrun/common/types.py +1 -0
  35. mlrun/config.py +30 -9
  36. mlrun/data_types/to_pandas.py +9 -9
  37. mlrun/datastore/base.py +41 -9
  38. mlrun/datastore/datastore.py +6 -2
  39. mlrun/datastore/datastore_profile.py +56 -4
  40. mlrun/datastore/inmem.py +2 -2
  41. mlrun/datastore/redis.py +2 -2
  42. mlrun/datastore/s3.py +5 -0
  43. mlrun/datastore/sources.py +147 -7
  44. mlrun/datastore/store_resources.py +7 -7
  45. mlrun/datastore/targets.py +110 -42
  46. mlrun/datastore/utils.py +42 -0
  47. mlrun/db/base.py +54 -10
  48. mlrun/db/httpdb.py +282 -79
  49. mlrun/db/nopdb.py +52 -10
  50. mlrun/errors.py +11 -0
  51. mlrun/execution.py +26 -9
  52. mlrun/feature_store/__init__.py +0 -2
  53. mlrun/feature_store/api.py +12 -47
  54. mlrun/feature_store/feature_set.py +9 -0
  55. mlrun/feature_store/feature_vector.py +8 -0
  56. mlrun/feature_store/ingestion.py +7 -6
  57. mlrun/feature_store/retrieval/base.py +9 -4
  58. mlrun/feature_store/retrieval/conversion.py +9 -9
  59. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  60. mlrun/feature_store/retrieval/job.py +9 -3
  61. mlrun/feature_store/retrieval/local_merger.py +2 -0
  62. mlrun/feature_store/retrieval/spark_merger.py +16 -0
  63. mlrun/frameworks/__init__.py +6 -0
  64. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  65. mlrun/frameworks/parallel_coordinates.py +2 -1
  66. mlrun/frameworks/tf_keras/__init__.py +4 -1
  67. mlrun/k8s_utils.py +10 -11
  68. mlrun/launcher/base.py +4 -3
  69. mlrun/launcher/client.py +5 -3
  70. mlrun/launcher/local.py +12 -2
  71. mlrun/launcher/remote.py +9 -2
  72. mlrun/lists.py +6 -2
  73. mlrun/model.py +47 -21
  74. mlrun/model_monitoring/__init__.py +1 -1
  75. mlrun/model_monitoring/api.py +42 -18
  76. mlrun/model_monitoring/application.py +5 -305
  77. mlrun/model_monitoring/applications/__init__.py +11 -0
  78. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  79. mlrun/model_monitoring/applications/base.py +280 -0
  80. mlrun/model_monitoring/applications/context.py +214 -0
  81. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  82. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  83. mlrun/model_monitoring/applications/results.py +99 -0
  84. mlrun/model_monitoring/controller.py +3 -1
  85. mlrun/model_monitoring/db/__init__.py +2 -0
  86. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  87. mlrun/model_monitoring/db/stores/base/store.py +22 -37
  88. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  89. mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
  90. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
  91. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  92. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
  93. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
  94. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  95. mlrun/model_monitoring/db/tsdb/base.py +316 -0
  96. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  97. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  98. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  99. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  100. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +401 -0
  101. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  102. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  103. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +658 -0
  104. mlrun/model_monitoring/evidently_application.py +6 -118
  105. mlrun/model_monitoring/helpers.py +63 -1
  106. mlrun/model_monitoring/model_endpoint.py +3 -2
  107. mlrun/model_monitoring/stream_processing.py +57 -216
  108. mlrun/model_monitoring/writer.py +134 -124
  109. mlrun/package/__init__.py +13 -1
  110. mlrun/package/packagers/__init__.py +6 -1
  111. mlrun/package/utils/_formatter.py +2 -2
  112. mlrun/platforms/__init__.py +10 -9
  113. mlrun/platforms/iguazio.py +21 -202
  114. mlrun/projects/operations.py +24 -12
  115. mlrun/projects/pipelines.py +79 -102
  116. mlrun/projects/project.py +271 -103
  117. mlrun/render.py +15 -14
  118. mlrun/run.py +16 -46
  119. mlrun/runtimes/__init__.py +6 -3
  120. mlrun/runtimes/base.py +14 -7
  121. mlrun/runtimes/daskjob.py +1 -0
  122. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  123. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  124. mlrun/runtimes/funcdoc.py +0 -28
  125. mlrun/runtimes/kubejob.py +2 -1
  126. mlrun/runtimes/local.py +12 -3
  127. mlrun/runtimes/mpijob/__init__.py +0 -20
  128. mlrun/runtimes/mpijob/v1.py +1 -1
  129. mlrun/runtimes/nuclio/api_gateway.py +194 -84
  130. mlrun/runtimes/nuclio/application/application.py +170 -8
  131. mlrun/runtimes/nuclio/function.py +39 -49
  132. mlrun/runtimes/pod.py +16 -36
  133. mlrun/runtimes/remotesparkjob.py +9 -3
  134. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  135. mlrun/runtimes/utils.py +6 -45
  136. mlrun/serving/__init__.py +8 -1
  137. mlrun/serving/server.py +2 -1
  138. mlrun/serving/states.py +51 -8
  139. mlrun/serving/utils.py +19 -11
  140. mlrun/serving/v2_serving.py +5 -1
  141. mlrun/track/tracker.py +2 -1
  142. mlrun/utils/async_http.py +25 -5
  143. mlrun/utils/helpers.py +157 -83
  144. mlrun/utils/logger.py +39 -7
  145. mlrun/utils/notifications/notification/__init__.py +14 -9
  146. mlrun/utils/notifications/notification/base.py +1 -1
  147. mlrun/utils/notifications/notification/slack.py +34 -7
  148. mlrun/utils/notifications/notification/webhook.py +1 -1
  149. mlrun/utils/notifications/notification_pusher.py +147 -16
  150. mlrun/utils/regex.py +9 -0
  151. mlrun/utils/v3io_clients.py +0 -1
  152. mlrun/utils/version/version.json +2 -2
  153. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/METADATA +14 -6
  154. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/RECORD +158 -138
  155. mlrun/kfpops.py +0 -865
  156. mlrun/platforms/other.py +0 -305
  157. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/LICENSE +0 -0
  158. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/WHEEL +0 -0
  159. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/entry_points.txt +0 -0
  160. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/top_level.txt +0 -0
@@ -30,7 +30,6 @@ import mlrun.model_monitoring.db
30
30
  import mlrun.model_monitoring.prometheus
31
31
  import mlrun.serving.states
32
32
  import mlrun.utils
33
- import mlrun.utils.v3io_clients
34
33
  from mlrun.common.schemas.model_monitoring.constants import (
35
34
  EventFieldType,
36
35
  EventKeyMetrics,
@@ -40,7 +39,6 @@ from mlrun.common.schemas.model_monitoring.constants import (
40
39
  ProjectSecretKeys,
41
40
  PrometheusEndpoints,
42
41
  )
43
- from mlrun.model_monitoring.helpers import get_endpoint_record
44
42
  from mlrun.utils import logger
45
43
 
46
44
 
@@ -79,6 +77,7 @@ class EventStreamProcessor:
79
77
  )
80
78
 
81
79
  self.storage_options = None
80
+ self.tsdb_configurations = {}
82
81
  if not mlrun.mlconf.is_ce_mode():
83
82
  self._initialize_v3io_configurations(
84
83
  model_monitoring_access_key=model_monitoring_access_key
@@ -137,33 +136,38 @@ class EventStreamProcessor:
137
136
  self.tsdb_batching_max_events = tsdb_batching_max_events
138
137
  self.tsdb_batching_timeout_secs = tsdb_batching_timeout_secs
139
138
 
140
- def apply_monitoring_serving_graph(self, fn: mlrun.runtimes.ServingRuntime) -> None:
139
+ def apply_monitoring_serving_graph(
140
+ self,
141
+ fn: mlrun.runtimes.ServingRuntime,
142
+ tsdb_service_provider: typing.Optional[typing.Callable] = None,
143
+ ) -> None:
141
144
  """
142
- Apply monitoring serving graph to a given serving function. The following serving graph includes about 20 steps
143
- of different operations that are executed on the events from the model server. Each event has
144
- metadata (function_uri, timestamp, class, etc.) but also inputs and predictions from the model server.
145
- Throughout the serving graph, the results are written to 3 different databases:
146
- 1. KV/SQL (steps 9-11): Stores metadata and stats about the average latency and the amount of predictions over
147
- time per endpoint. for example the amount of predictions of endpoint x in the last 5 min. This data is used
148
- by the monitoring dashboards in grafana. The model endpoints table also contains data on the model endpoint
149
- from other processes, such as current_stats that is being calculated by the monitoring batch job
150
- process. If the target is from type KV, then the model endpoints table can be found under
151
- v3io:///users/pipelines/project-name/model-endpoints/endpoints/. If the target is SQL, then the table
152
- is stored within the database that was defined in the provided connection string and can be found
153
- under mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection.
154
- 2. V3IO TSDB/Prometheus (steps 13-21): Stores live data of different key metric dictionaries in tsdb target.
155
- This data is being used by the monitoring dashboards in grafana. If using V3IO TSDB (steps 13-19), results
145
+ Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
146
+ parts that each one them includes several steps of different operations that are executed on the events from
147
+ the model server.
148
+ Each event has metadata (function_uri, timestamp, class, etc.) but also inputs, predictions and optional
149
+ metrics from the model server.
150
+ In ths first part, the serving graph processes the event and splits it into sub-events. This part also includes
151
+ validation of the event data and adding important details to the event such as endpoint_id.
152
+ In the next parts, the serving graph stores data to 3 different targets:
153
+ 1. KV/SQL: Metadata and basic stats about the average latency and the amount of predictions over
154
+ time per endpoint. for example the amount of predictions of endpoint x in the last 5 min. The model
155
+ endpoints table also contains data on the model endpoint from other processes, such as feature_stats that
156
+ represents sample statistics from the training data. If the target is from type KV, then the model endpoints
157
+ table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/. If the target is
158
+ SQL, then the table is stored within the database that was defined in the provided connection string.
159
+ 2. TSDB: live data of different key metric dictionaries in tsdb target.
160
+ This data is being used by the monitoring dashboards in grafana. If using V3IO TSDB, results
156
161
  can be found under v3io:///users/pipelines/project-name/model-endpoints/events/. In that case, we generate
157
162
  3 different key metric dictionaries: base_metrics (average latency and predictions over time),
158
163
  endpoint_features (Prediction and feature names and values), and custom_metrics (user-defined metrics).
159
- If using Prometheus (steps 20-21), we update metrics in the Prometheus registry that is stored in the
160
- monitoring stream local memory.
161
- 3. Parquet (steps 22-23): This Parquet file includes the required data for the model monitoring batch job
162
- that run every hour by default. If defined, the parquet target path can be found under
163
- mlrun.mlconf.model_endpoint_monitoring.offline. Otherwise, the default parquet path is under
164
- mlrun.mlconf.model_endpoint_monitoring.user_space.
164
+ 3. Parquet: This Parquet file includes the required data for the model monitoring applications. If defined,
165
+ the parquet target path can be found under mlrun.mlconf.model_endpoint_monitoring.offline. Otherwise,
166
+ the default parquet path is under mlrun.mlconf.model_endpoint_monitoring.user_space. Note that if you are
167
+ using CE, the parquet target path is based on the defined MLRun artifact path.
165
168
 
166
169
  :param fn: A serving function.
170
+ :param tsdb_service_provider: An optional callable function that provides the TSDB connection string.
167
171
  """
168
172
 
169
173
  graph = typing.cast(
@@ -171,7 +175,7 @@ class EventStreamProcessor:
171
175
  fn.set_topology(mlrun.serving.states.StepKinds.flow),
172
176
  )
173
177
 
174
- # Step 1 - Event routing based on the provided path
178
+ # Event routing based on the provided path
175
179
  def apply_event_routing():
176
180
  typing.cast(
177
181
  mlrun.serving.TaskStep,
@@ -184,7 +188,7 @@ class EventStreamProcessor:
184
188
 
185
189
  apply_event_routing()
186
190
 
187
- # Step 2 - Filter out events with '-' in the path basename from going forward
191
+ # Filter out events with '-' in the path basename from going forward
188
192
  # through the next steps of the stream graph
189
193
  def apply_storey_filter_stream_events():
190
194
  # Filter events with Prometheus endpoints path
@@ -197,7 +201,7 @@ class EventStreamProcessor:
197
201
 
198
202
  apply_storey_filter_stream_events()
199
203
 
200
- # Step 3 - Process endpoint event: splitting into sub-events and validate event data
204
+ # Process endpoint event: splitting into sub-events and validate event data
201
205
  def apply_process_endpoint_event():
202
206
  graph.add_step(
203
207
  "ProcessEndpointEvent",
@@ -208,7 +212,7 @@ class EventStreamProcessor:
208
212
 
209
213
  apply_process_endpoint_event()
210
214
 
211
- # Steps 4,5 - Applying Storey operations of filtering and flatten
215
+ # Applying Storey operations of filtering and flatten
212
216
  def apply_storey_filter_and_flatmap():
213
217
  # Remove none values from each event
214
218
  graph.add_step(
@@ -225,7 +229,7 @@ class EventStreamProcessor:
225
229
 
226
230
  apply_storey_filter_and_flatmap()
227
231
 
228
- # Step 6 - Validating feature names and map each feature to its value
232
+ # Validating feature names and map each feature to its value
229
233
  def apply_map_feature_names():
230
234
  graph.add_step(
231
235
  "MapFeatureNames",
@@ -237,9 +241,9 @@ class EventStreamProcessor:
237
241
 
238
242
  apply_map_feature_names()
239
243
 
240
- # Step 7 - Calculate number of predictions and average latency
244
+ # Calculate number of predictions and average latency
241
245
  def apply_storey_aggregations():
242
- # Step 7.1 - Calculate number of predictions for each window (5 min and 1 hour by default)
246
+ # Calculate number of predictions for each window (5 min and 1 hour by default)
243
247
  graph.add_step(
244
248
  class_name="storey.AggregateByKey",
245
249
  aggregates=[
@@ -257,7 +261,7 @@ class EventStreamProcessor:
257
261
  table=".",
258
262
  key_field=EventFieldType.ENDPOINT_ID,
259
263
  )
260
- # Step 7.2 - Calculate average latency time for each window (5 min and 1 hour by default)
264
+ # Calculate average latency time for each window (5 min and 1 hour by default)
261
265
  graph.add_step(
262
266
  class_name="storey.Rename",
263
267
  mapping={
@@ -270,8 +274,8 @@ class EventStreamProcessor:
270
274
 
271
275
  apply_storey_aggregations()
272
276
 
273
- # Steps 8-10 - KV/SQL branch
274
- # Step 8 - Filter relevant keys from the event before writing the data into the database table
277
+ # KV/SQL branch
278
+ # Filter relevant keys from the event before writing the data into the database table
275
279
  def apply_process_before_endpoint_update():
276
280
  graph.add_step(
277
281
  "ProcessBeforeEndpointUpdate",
@@ -281,7 +285,7 @@ class EventStreamProcessor:
281
285
 
282
286
  apply_process_before_endpoint_update()
283
287
 
284
- # Step 9 - Write the filtered event to KV/SQL table. At this point, the serving graph updates the stats
288
+ # Write the filtered event to KV/SQL table. At this point, the serving graph updates the stats
285
289
  # about average latency and the amount of predictions over time
286
290
  def apply_update_endpoint():
287
291
  graph.add_step(
@@ -294,7 +298,7 @@ class EventStreamProcessor:
294
298
 
295
299
  apply_update_endpoint()
296
300
 
297
- # Step 10 (only for KV target) - Apply infer_schema on the model endpoints table for generating schema file
301
+ # (only for V3IO KV target) - Apply infer_schema on the model endpoints table for generating schema file
298
302
  # which will be used by Grafana monitoring dashboards
299
303
  def apply_infer_schema():
300
304
  graph.add_step(
@@ -309,7 +313,7 @@ class EventStreamProcessor:
309
313
  if self.model_endpoint_store_target == ModelEndpointTarget.V3IO_NOSQL:
310
314
  apply_infer_schema()
311
315
 
312
- # Step 11 - Emits the event in window size of events based on sample_window size (10 by default)
316
+ # Emits the event in window size of events based on sample_window size (10 by default)
313
317
  def apply_storey_sample_window():
314
318
  graph.add_step(
315
319
  "storey.steps.SampleWindow",
@@ -321,84 +325,16 @@ class EventStreamProcessor:
321
325
 
322
326
  apply_storey_sample_window()
323
327
 
324
- # Steps 12-19 - TSDB branch (skip to Prometheus if in CE env)
325
- # Steps 20-21 - Prometheus branch
328
+ # TSDB branch (skip to Prometheus if in CE env)
326
329
  if not mlrun.mlconf.is_ce_mode():
327
- # TSDB branch
328
-
329
- # Step 12 - Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
330
- # stats and details about the events
331
- def apply_process_before_tsdb():
332
- graph.add_step(
333
- "ProcessBeforeTSDB", name="ProcessBeforeTSDB", after="sample"
334
- )
335
-
336
- apply_process_before_tsdb()
337
-
338
- # Steps 13-19: - Unpacked keys from each dictionary and write to TSDB target
339
- def apply_filter_and_unpacked_keys(name, keys):
340
- graph.add_step(
341
- "FilterAndUnpackKeys",
342
- name=name,
343
- after="ProcessBeforeTSDB",
344
- keys=[keys],
345
- )
346
-
347
- def apply_tsdb_target(name, after):
348
- graph.add_step(
349
- "storey.TSDBTarget",
350
- name=name,
351
- after=after,
352
- path=self.tsdb_path,
353
- rate="10/m",
354
- time_col=EventFieldType.TIMESTAMP,
355
- container=self.tsdb_container,
356
- v3io_frames=self.v3io_framesd,
357
- infer_columns_from_data=True,
358
- index_cols=[
359
- EventFieldType.ENDPOINT_ID,
360
- EventFieldType.RECORD_TYPE,
361
- EventFieldType.ENDPOINT_TYPE,
362
- ],
363
- max_events=self.tsdb_batching_max_events,
364
- flush_after_seconds=self.tsdb_batching_timeout_secs,
365
- key=EventFieldType.ENDPOINT_ID,
366
- )
367
-
368
- # Steps 13-14 - unpacked base_metrics dictionary
369
- apply_filter_and_unpacked_keys(
370
- name="FilterAndUnpackKeys1",
371
- keys=EventKeyMetrics.BASE_METRICS,
372
- )
373
- apply_tsdb_target(name="tsdb1", after="FilterAndUnpackKeys1")
374
-
375
- # Steps 15-16 - unpacked endpoint_features dictionary
376
- apply_filter_and_unpacked_keys(
377
- name="FilterAndUnpackKeys2",
378
- keys=EventKeyMetrics.ENDPOINT_FEATURES,
330
+ tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
331
+ project=self.project, secret_provider=tsdb_service_provider
379
332
  )
380
- apply_tsdb_target(name="tsdb2", after="FilterAndUnpackKeys2")
333
+ tsdb_connector.apply_monitoring_stream_steps(graph=graph)
381
334
 
382
- # Steps 17-19 - unpacked custom_metrics dictionary. In addition, use storey.Filter remove none values
383
- apply_filter_and_unpacked_keys(
384
- name="FilterAndUnpackKeys3",
385
- keys=EventKeyMetrics.CUSTOM_METRICS,
386
- )
387
-
388
- def apply_storey_filter():
389
- graph.add_step(
390
- "storey.Filter",
391
- "FilterNotNone",
392
- after="FilterAndUnpackKeys3",
393
- _fn="(event is not None)",
394
- )
395
-
396
- apply_storey_filter()
397
- apply_tsdb_target(name="tsdb3", after="FilterNotNone")
398
335
  else:
399
- # Prometheus branch
400
-
401
- # Step 20 - Increase the prediction counter by 1 and update the latency value
336
+ # Prometheus
337
+ # Increase the prediction counter by 1 and update the latency value
402
338
  graph.add_step(
403
339
  "IncCounter",
404
340
  name="IncCounter",
@@ -406,7 +342,7 @@ class EventStreamProcessor:
406
342
  project=self.project,
407
343
  )
408
344
 
409
- # Step 21 - Record a sample of features and labels
345
+ # Record a sample of features and labels
410
346
  def apply_record_features_to_prometheus():
411
347
  graph.add_step(
412
348
  "RecordFeatures",
@@ -417,8 +353,8 @@ class EventStreamProcessor:
417
353
 
418
354
  apply_record_features_to_prometheus()
419
355
 
420
- # Steps 22-23 - Parquet branch
421
- # Step 22 - Filter and validate different keys before writing the data to Parquet target
356
+ # Parquet branch
357
+ # Filter and validate different keys before writing the data to Parquet target
422
358
  def apply_process_before_parquet():
423
359
  graph.add_step(
424
360
  "ProcessBeforeParquet",
@@ -429,7 +365,7 @@ class EventStreamProcessor:
429
365
 
430
366
  apply_process_before_parquet()
431
367
 
432
- # Step 23 - Write the Parquet target file, partitioned by key (endpoint_id) and time.
368
+ # Write the Parquet target file, partitioned by key (endpoint_id) and time.
433
369
  def apply_parquet_target():
434
370
  graph.add_step(
435
371
  "storey.ParquetTarget",
@@ -503,76 +439,6 @@ class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
503
439
  return e
504
440
 
505
441
 
506
- class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
507
- def __init__(self, **kwargs):
508
- """
509
- Process the data before writing to TSDB. This step creates a dictionary that includes 3 different dictionaries
510
- that each one of them contains important details and stats about the events:
511
- 1. base_metrics: stats about the average latency and the amount of predictions over time. It is based on
512
- storey.AggregateByKey which was executed in step 5.
513
- 2. endpoint_features: feature names and values along with the prediction names and value.
514
- 3. custom_metric (opt): optional metrics provided by the user.
515
-
516
- :returns: Dictionary of 2-3 dictionaries that contains stats and details about the events.
517
-
518
- """
519
- super().__init__(**kwargs)
520
-
521
- def do(self, event):
522
- # Compute prediction per second
523
- event[EventLiveStats.PREDICTIONS_PER_SECOND] = (
524
- float(event[EventLiveStats.PREDICTIONS_COUNT_5M]) / 300
525
- )
526
- base_fields = [
527
- EventFieldType.TIMESTAMP,
528
- EventFieldType.ENDPOINT_ID,
529
- EventFieldType.ENDPOINT_TYPE,
530
- ]
531
-
532
- # Getting event timestamp and endpoint_id
533
- base_event = {k: event[k] for k in base_fields}
534
-
535
- # base_metrics includes the stats about the average latency and the amount of predictions over time
536
- base_metrics = {
537
- EventFieldType.RECORD_TYPE: EventKeyMetrics.BASE_METRICS,
538
- EventLiveStats.PREDICTIONS_PER_SECOND: event[
539
- EventLiveStats.PREDICTIONS_PER_SECOND
540
- ],
541
- EventLiveStats.PREDICTIONS_COUNT_5M: event[
542
- EventLiveStats.PREDICTIONS_COUNT_5M
543
- ],
544
- EventLiveStats.PREDICTIONS_COUNT_1H: event[
545
- EventLiveStats.PREDICTIONS_COUNT_1H
546
- ],
547
- EventLiveStats.LATENCY_AVG_5M: event[EventLiveStats.LATENCY_AVG_5M],
548
- EventLiveStats.LATENCY_AVG_1H: event[EventLiveStats.LATENCY_AVG_1H],
549
- **base_event,
550
- }
551
-
552
- # endpoint_features includes the event values of each feature and prediction
553
- endpoint_features = {
554
- EventFieldType.RECORD_TYPE: EventKeyMetrics.ENDPOINT_FEATURES,
555
- **event[EventFieldType.NAMED_PREDICTIONS],
556
- **event[EventFieldType.NAMED_FEATURES],
557
- **base_event,
558
- }
559
- # Create a dictionary that includes both base_metrics and endpoint_features
560
- processed = {
561
- EventKeyMetrics.BASE_METRICS: base_metrics,
562
- EventKeyMetrics.ENDPOINT_FEATURES: endpoint_features,
563
- }
564
-
565
- # If metrics provided, add another dictionary if custom_metrics values
566
- if event[EventFieldType.METRICS]:
567
- processed[EventKeyMetrics.CUSTOM_METRICS] = {
568
- EventFieldType.RECORD_TYPE: EventKeyMetrics.CUSTOM_METRICS,
569
- **event[EventFieldType.METRICS],
570
- **base_event,
571
- }
572
-
573
- return processed
574
-
575
-
576
442
  class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
577
443
  def __init__(self, **kwargs):
578
444
  """
@@ -807,7 +673,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
807
673
  # left them
808
674
  if endpoint_id not in self.endpoints:
809
675
  logger.info("Trying to resume state", endpoint_id=endpoint_id)
810
- endpoint_record = get_endpoint_record(
676
+ endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
811
677
  project=self.project,
812
678
  endpoint_id=endpoint_id,
813
679
  )
@@ -853,36 +719,6 @@ def is_not_none(field: typing.Any, dict_path: list[str]):
853
719
  return False
854
720
 
855
721
 
856
- class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
857
- def __init__(self, keys, **kwargs):
858
- """
859
- Create unpacked event dictionary based on provided key metrics (base_metrics, endpoint_features,
860
- or custom_metric). Please note that the next step of the TSDB target requires an unpacked dictionary.
861
-
862
- :param keys: list of key metrics.
863
-
864
- :returns: An unpacked dictionary of event filtered by the provided key metrics.
865
- """
866
- super().__init__(**kwargs)
867
- self.keys = keys
868
-
869
- def do(self, event):
870
- # Keep only the relevant dictionary based on the provided keys
871
- new_event = {}
872
- for key in self.keys:
873
- if key in event:
874
- new_event[key] = event[key]
875
-
876
- # Create unpacked dictionary
877
- unpacked = {}
878
- for key in new_event.keys():
879
- if key in self.keys:
880
- unpacked = {**unpacked, **new_event[key]}
881
- else:
882
- unpacked[key] = new_event[key]
883
- return unpacked if unpacked else None
884
-
885
-
886
722
  class MapFeatureNames(mlrun.feature_store.steps.MapClass):
887
723
  def __init__(
888
724
  self,
@@ -940,7 +776,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
940
776
  label_values = event[EventFieldType.PREDICTION]
941
777
  # Get feature names and label columns
942
778
  if endpoint_id not in self.feature_names:
943
- endpoint_record = get_endpoint_record(
779
+ endpoint_record = mlrun.model_monitoring.helpers.get_endpoint_record(
944
780
  project=self.project,
945
781
  endpoint_id=endpoint_id,
946
782
  )
@@ -1081,6 +917,9 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
1081
917
  self.model_endpoint_store_target = model_endpoint_store_target
1082
918
 
1083
919
  def do(self, event: dict):
920
+ # Remove labels from the event
921
+ event.pop(EventFieldType.LABELS)
922
+
1084
923
  update_endpoint_record(
1085
924
  project=self.project,
1086
925
  endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
@@ -1118,6 +957,8 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
1118
957
  def do(self, event: dict):
1119
958
  key_set = set(event.keys())
1120
959
  if not key_set.issubset(self.keys):
960
+ import mlrun.utils.v3io_clients
961
+
1121
962
  self.keys.update(key_set)
1122
963
  # Apply infer_schema on the kv table for generating the schema file
1123
964
  mlrun.utils.v3io_clients.get_frames_client(