mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show
  1. mlrun/__init__.py +24 -3
  2. mlrun/__main__.py +0 -4
  3. mlrun/artifacts/dataset.py +2 -2
  4. mlrun/artifacts/document.py +6 -1
  5. mlrun/artifacts/llm_prompt.py +21 -15
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/artifacts/plots.py +1 -1
  8. mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
  9. mlrun/auth/nuclio.py +89 -0
  10. mlrun/auth/providers.py +429 -0
  11. mlrun/auth/utils.py +415 -0
  12. mlrun/common/constants.py +14 -0
  13. mlrun/common/model_monitoring/helpers.py +123 -0
  14. mlrun/common/runtimes/constants.py +28 -0
  15. mlrun/common/schemas/__init__.py +14 -3
  16. mlrun/common/schemas/alert.py +2 -2
  17. mlrun/common/schemas/api_gateway.py +3 -0
  18. mlrun/common/schemas/auth.py +12 -10
  19. mlrun/common/schemas/client_spec.py +4 -0
  20. mlrun/common/schemas/constants.py +25 -0
  21. mlrun/common/schemas/frontend_spec.py +1 -8
  22. mlrun/common/schemas/function.py +34 -0
  23. mlrun/common/schemas/hub.py +33 -20
  24. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  25. mlrun/common/schemas/model_monitoring/constants.py +12 -15
  26. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  27. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  28. mlrun/common/schemas/pipeline.py +1 -1
  29. mlrun/common/schemas/secret.py +17 -2
  30. mlrun/common/secrets.py +95 -1
  31. mlrun/common/types.py +10 -10
  32. mlrun/config.py +69 -19
  33. mlrun/data_types/infer.py +2 -2
  34. mlrun/datastore/__init__.py +12 -5
  35. mlrun/datastore/azure_blob.py +162 -47
  36. mlrun/datastore/base.py +274 -10
  37. mlrun/datastore/datastore.py +7 -2
  38. mlrun/datastore/datastore_profile.py +84 -22
  39. mlrun/datastore/model_provider/huggingface_provider.py +225 -41
  40. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  41. mlrun/datastore/model_provider/model_provider.py +206 -74
  42. mlrun/datastore/model_provider/openai_provider.py +226 -66
  43. mlrun/datastore/s3.py +39 -18
  44. mlrun/datastore/sources.py +1 -1
  45. mlrun/datastore/store_resources.py +4 -4
  46. mlrun/datastore/storeytargets.py +17 -12
  47. mlrun/datastore/targets.py +1 -1
  48. mlrun/datastore/utils.py +25 -6
  49. mlrun/datastore/v3io.py +1 -1
  50. mlrun/db/base.py +63 -32
  51. mlrun/db/httpdb.py +373 -153
  52. mlrun/db/nopdb.py +54 -21
  53. mlrun/errors.py +4 -2
  54. mlrun/execution.py +66 -25
  55. mlrun/feature_store/api.py +1 -1
  56. mlrun/feature_store/common.py +1 -1
  57. mlrun/feature_store/feature_vector_utils.py +1 -1
  58. mlrun/feature_store/steps.py +8 -6
  59. mlrun/frameworks/_common/utils.py +3 -3
  60. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  61. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
  62. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  63. mlrun/frameworks/_ml_common/utils.py +2 -1
  64. mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
  65. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
  66. mlrun/frameworks/onnx/dataset.py +2 -1
  67. mlrun/frameworks/onnx/mlrun_interface.py +2 -1
  68. mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
  69. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
  70. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
  71. mlrun/frameworks/pytorch/utils.py +2 -1
  72. mlrun/frameworks/sklearn/metric.py +2 -1
  73. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
  74. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
  75. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
  76. mlrun/hub/__init__.py +52 -0
  77. mlrun/hub/base.py +142 -0
  78. mlrun/hub/module.py +172 -0
  79. mlrun/hub/step.py +113 -0
  80. mlrun/k8s_utils.py +105 -16
  81. mlrun/launcher/base.py +15 -7
  82. mlrun/launcher/local.py +4 -1
  83. mlrun/model.py +14 -4
  84. mlrun/model_monitoring/__init__.py +0 -1
  85. mlrun/model_monitoring/api.py +65 -28
  86. mlrun/model_monitoring/applications/__init__.py +1 -1
  87. mlrun/model_monitoring/applications/base.py +299 -128
  88. mlrun/model_monitoring/applications/context.py +2 -4
  89. mlrun/model_monitoring/controller.py +132 -58
  90. mlrun/model_monitoring/db/_schedules.py +38 -29
  91. mlrun/model_monitoring/db/_stats.py +6 -16
  92. mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
  93. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  94. mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
  95. mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
  96. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
  97. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
  98. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
  99. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
  100. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
  101. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
  102. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
  103. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
  104. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
  105. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
  106. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
  107. mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
  108. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
  109. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
  110. mlrun/model_monitoring/features_drift_table.py +2 -1
  111. mlrun/model_monitoring/helpers.py +30 -6
  112. mlrun/model_monitoring/stream_processing.py +34 -28
  113. mlrun/model_monitoring/writer.py +224 -4
  114. mlrun/package/__init__.py +2 -1
  115. mlrun/platforms/__init__.py +0 -43
  116. mlrun/platforms/iguazio.py +8 -4
  117. mlrun/projects/operations.py +17 -11
  118. mlrun/projects/pipelines.py +2 -2
  119. mlrun/projects/project.py +187 -123
  120. mlrun/run.py +95 -21
  121. mlrun/runtimes/__init__.py +2 -186
  122. mlrun/runtimes/base.py +103 -25
  123. mlrun/runtimes/constants.py +225 -0
  124. mlrun/runtimes/daskjob.py +5 -2
  125. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  126. mlrun/runtimes/local.py +5 -2
  127. mlrun/runtimes/mounts.py +20 -2
  128. mlrun/runtimes/nuclio/__init__.py +12 -7
  129. mlrun/runtimes/nuclio/api_gateway.py +36 -6
  130. mlrun/runtimes/nuclio/application/application.py +339 -40
  131. mlrun/runtimes/nuclio/function.py +222 -72
  132. mlrun/runtimes/nuclio/serving.py +132 -42
  133. mlrun/runtimes/pod.py +213 -21
  134. mlrun/runtimes/utils.py +49 -9
  135. mlrun/secrets.py +99 -14
  136. mlrun/serving/__init__.py +2 -0
  137. mlrun/serving/remote.py +84 -11
  138. mlrun/serving/routers.py +26 -44
  139. mlrun/serving/server.py +138 -51
  140. mlrun/serving/serving_wrapper.py +6 -2
  141. mlrun/serving/states.py +997 -283
  142. mlrun/serving/steps.py +62 -0
  143. mlrun/serving/system_steps.py +149 -95
  144. mlrun/serving/v2_serving.py +9 -10
  145. mlrun/track/trackers/mlflow_tracker.py +29 -31
  146. mlrun/utils/helpers.py +292 -94
  147. mlrun/utils/http.py +9 -2
  148. mlrun/utils/notifications/notification/base.py +18 -0
  149. mlrun/utils/notifications/notification/git.py +3 -5
  150. mlrun/utils/notifications/notification/mail.py +39 -16
  151. mlrun/utils/notifications/notification/slack.py +2 -4
  152. mlrun/utils/notifications/notification/webhook.py +2 -5
  153. mlrun/utils/notifications/notification_pusher.py +3 -3
  154. mlrun/utils/version/version.json +2 -2
  155. mlrun/utils/version/version.py +3 -4
  156. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
  157. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
  158. mlrun/api/schemas/__init__.py +0 -259
  159. mlrun/db/auth_utils.py +0 -152
  160. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
  161. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
  162. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
  163. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
  164. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
  165. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
  166. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
  167. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
@@ -25,10 +25,12 @@ from mlrun.utils import logger
25
25
 
26
26
  def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
27
27
  """
28
- Normalize user defined keys - input data to a model and its predictions,
29
- to a form V3IO frames tolerates.
28
+ Normalize user-defined keys (e.g., model input data and predictions) to a format V3IO Frames tolerates.
30
29
 
31
- The dictionary keys should conform to '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'.
30
+ - Keys must match regex: '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'
31
+ - Replace invalid characters (e.g., '-') with '_'.
32
+ - Prefix keys starting with digits with '_'.
33
+ - Flatten nested dictionaries using dot notation, while normalizing keys recursively.
32
34
  """
33
35
  prefix = "_"
34
36
 
@@ -38,7 +40,18 @@ def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
38
40
  return prefix + key
39
41
  return key
40
42
 
41
- return {norm_key(k): v for k, v in event.items()}
43
+ def flatten_dict(d: dict[str, Any], parent_key: str = "") -> dict[str, Any]:
44
+ items = {}
45
+ for k, v in d.items():
46
+ new_key = norm_key(k)
47
+ full_key = f"{parent_key}:{new_key}" if parent_key else new_key
48
+ if isinstance(v, dict):
49
+ items.update(flatten_dict(v, full_key))
50
+ else:
51
+ items[full_key] = v
52
+ return items
53
+
54
+ return flatten_dict(event)
42
55
 
43
56
 
44
57
  class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
@@ -138,11 +151,9 @@ class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
138
151
 
139
152
 
140
153
  class ErrorExtractor(mlrun.feature_store.steps.MapClass):
141
- def __init__(self, **kwargs):
142
- """
143
- Prepare the event for insertion into the errors TSDB table.
144
- """
145
- super().__init__(**kwargs)
154
+ """
155
+ Prepare the event for insertion into the errors TSDB table.
156
+ """
146
157
 
147
158
  def do(self, event):
148
159
  error = event.get("error")
@@ -12,9 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import math
15
+ from collections import defaultdict
15
16
  from datetime import datetime, timedelta
16
17
  from io import StringIO
17
- from typing import Callable, Literal, Optional, Union
18
+ from typing import Literal, Optional, Union
18
19
 
19
20
  import pandas as pd
20
21
  import v3io_frames
@@ -25,6 +26,7 @@ import mlrun.common.schemas.model_monitoring as mm_schemas
25
26
  import mlrun.feature_store.steps
26
27
  import mlrun.utils.v3io_clients
27
28
  from mlrun.common.schemas import EventFieldType
29
+ from mlrun.config import config
28
30
  from mlrun.model_monitoring.db import TSDBConnector
29
31
  from mlrun.model_monitoring.helpers import get_invocations_fqn, get_start_end
30
32
  from mlrun.utils import logger
@@ -224,6 +226,24 @@ class V3IOTSDBConnector(TSDBConnector):
224
226
  - endpoint_features (Prediction and feature names and values)
225
227
  - custom_metrics (user-defined metrics)
226
228
  """
229
+
230
+ def apply_list_to_single_dict():
231
+ graph.add_step(
232
+ "storey.Map",
233
+ "MapListToSingleDict",
234
+ after="FilterNOP",
235
+ _fn="(event[0] if isinstance(event, list) else event)",
236
+ )
237
+ graph.add_step(
238
+ "mlrun.model_monitoring.stream_processing.MapFeatureNames",
239
+ name="MapFeatureNamesTSDB",
240
+ infer_columns_from_data=True,
241
+ project=self.project,
242
+ after="MapListToSingleDict",
243
+ )
244
+
245
+ apply_list_to_single_dict()
246
+
227
247
  aggregate_windows = aggregate_windows or ["5m", "1h"]
228
248
 
229
249
  # Calculate number of predictions and average latency
@@ -241,7 +261,7 @@ class V3IOTSDBConnector(TSDBConnector):
241
261
  }
242
262
  ],
243
263
  name=EventFieldType.LATENCY,
244
- after="FilterNOP",
264
+ after="MapFeatureNamesTSDB",
245
265
  step_name="Aggregates",
246
266
  table=".",
247
267
  key_field=EventFieldType.ENDPOINT_ID,
@@ -262,7 +282,7 @@ class V3IOTSDBConnector(TSDBConnector):
262
282
  graph.add_step(
263
283
  "storey.TSDBTarget",
264
284
  name="tsdb_predictions",
265
- after="FilterNOP",
285
+ after="MapFeatureNamesTSDB",
266
286
  path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.PREDICTIONS]}",
267
287
  time_col=mm_schemas.EventFieldType.TIMESTAMP,
268
288
  container=self.container,
@@ -369,6 +389,49 @@ class V3IOTSDBConnector(TSDBConnector):
369
389
  apply_storey_filter()
370
390
  apply_tsdb_target(name="tsdb3", after="FilterNotNone")
371
391
 
392
+ def apply_writer_steps(self, graph, after, **kwargs) -> None:
393
+ graph.add_step(
394
+ "storey.TSDBTarget",
395
+ name="tsdb_metrics",
396
+ after=after,
397
+ path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.METRICS]}",
398
+ time_col=mm_schemas.WriterEvent.END_INFER_TIME,
399
+ container=self.container,
400
+ v3io_frames=self.v3io_framesd,
401
+ infer_columns_from_data=True,
402
+ graph_shape="cylinder",
403
+ index_cols=[
404
+ mm_schemas.WriterEvent.APPLICATION_NAME,
405
+ mm_schemas.WriterEvent.ENDPOINT_NAME,
406
+ mm_schemas.WriterEvent.ENDPOINT_ID,
407
+ mm_schemas.MetricData.METRIC_NAME,
408
+ ],
409
+ max_events=config.model_endpoint_monitoring.writer_graph.max_events,
410
+ flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
411
+ key=mm_schemas.EventFieldType.ENDPOINT_ID,
412
+ )
413
+
414
+ graph.add_step(
415
+ "storey.TSDBTarget",
416
+ name="tsdb_app_results",
417
+ after=after,
418
+ path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]}",
419
+ time_col=mm_schemas.WriterEvent.END_INFER_TIME,
420
+ container=self.container,
421
+ v3io_frames=self.v3io_framesd,
422
+ infer_columns_from_data=True,
423
+ graph_shape="cylinder",
424
+ index_cols=[
425
+ mm_schemas.WriterEvent.APPLICATION_NAME,
426
+ mm_schemas.WriterEvent.ENDPOINT_NAME,
427
+ mm_schemas.WriterEvent.ENDPOINT_ID,
428
+ mm_schemas.ResultData.RESULT_NAME,
429
+ ],
430
+ max_events=config.model_endpoint_monitoring.writer_graph.max_events,
431
+ flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
432
+ key=mm_schemas.EventFieldType.ENDPOINT_ID,
433
+ )
434
+
372
435
  def handle_model_error(
373
436
  self,
374
437
  graph,
@@ -492,7 +555,8 @@ class V3IOTSDBConnector(TSDBConnector):
492
555
  # Split the endpoint ids into chunks to avoid exceeding the v3io-engine filter-expression limit
493
556
  for i in range(0, len(endpoint_ids), V3IO_FRAMESD_MEPS_LIMIT):
494
557
  endpoint_id_chunk = endpoint_ids[i : i + V3IO_FRAMESD_MEPS_LIMIT]
495
- filter_query = f"endpoint_id IN({str(endpoint_id_chunk)[1:-1]}) "
558
+ endpoints_list = "', '".join(endpoint_id_chunk)
559
+ filter_query = f"endpoint_id IN('{endpoints_list}')"
496
560
  for table in tables:
497
561
  try:
498
562
  self.frames_client.delete(
@@ -532,6 +596,43 @@ class V3IOTSDBConnector(TSDBConnector):
532
596
  project=self.project,
533
597
  )
534
598
 
599
+ def delete_application_records(
600
+ self, application_name: str, endpoint_ids: Optional[list[str]] = None
601
+ ) -> None:
602
+ """
603
+ Delete application records from the TSDB for the given model endpoints or all if ``endpoint_ids`` is ``None``.
604
+ """
605
+ base_filter_query = f"application_name=='{application_name}'"
606
+
607
+ filter_queries: list[str] = []
608
+ if endpoint_ids:
609
+ for i in range(0, len(endpoint_ids), V3IO_FRAMESD_MEPS_LIMIT):
610
+ endpoint_id_chunk = endpoint_ids[i : i + V3IO_FRAMESD_MEPS_LIMIT]
611
+ endpoints_list = "', '".join(endpoint_id_chunk)
612
+ filter_queries.append(
613
+ f"{base_filter_query} AND endpoint_id IN ('{endpoints_list}')"
614
+ )
615
+ else:
616
+ filter_queries = [base_filter_query]
617
+
618
+ for table in [
619
+ self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS],
620
+ self.tables[mm_schemas.V3IOTSDBTables.METRICS],
621
+ ]:
622
+ logger.debug(
623
+ "Deleting application records from TSDB",
624
+ table=table,
625
+ filter_queries=filter_queries,
626
+ project=self.project,
627
+ )
628
+ for filter_query in filter_queries:
629
+ self.frames_client.delete(
630
+ backend=_TSDB_BE,
631
+ table=table,
632
+ filter=filter_query,
633
+ start="0",
634
+ )
635
+
535
636
  def get_model_endpoint_real_time_metrics(
536
637
  self, endpoint_id: str, metrics: list[str], start: str, end: str
537
638
  ) -> dict[str, list[tuple[str, float]]]:
@@ -935,6 +1036,9 @@ class V3IOTSDBConnector(TSDBConnector):
935
1036
  start: Optional[datetime] = None,
936
1037
  end: Optional[datetime] = None,
937
1038
  ) -> dict[str, float]:
1039
+ if not endpoint_ids:
1040
+ return {}
1041
+
938
1042
  # Get the last request timestamp for each endpoint from the KV table.
939
1043
  # The result of the query is a list of dictionaries,
940
1044
  # each dictionary contains the endpoint id and the last request timestamp.
@@ -1145,11 +1249,9 @@ class V3IOTSDBConnector(TSDBConnector):
1145
1249
  )
1146
1250
  return df.reset_index(drop=True)
1147
1251
 
1148
- async def add_basic_metrics(
1252
+ def add_basic_metrics(
1149
1253
  self,
1150
1254
  model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
1151
- project: str,
1152
- run_in_threadpool: Callable,
1153
1255
  metric_list: Optional[list[str]] = None,
1154
1256
  ) -> list[mlrun.common.schemas.ModelEndpoint]:
1155
1257
  """
@@ -1157,8 +1259,6 @@ class V3IOTSDBConnector(TSDBConnector):
1157
1259
 
1158
1260
  :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
1159
1261
  be filled with the relevant basic metrics.
1160
- :param project: The name of the project.
1161
- :param run_in_threadpool: A function that runs another function in a thread pool.
1162
1262
  :param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
1163
1263
 
1164
1264
  :return: A list of `ModelEndpointMonitoringMetric` objects.
@@ -1187,8 +1287,7 @@ class V3IOTSDBConnector(TSDBConnector):
1187
1287
  function,
1188
1288
  _,
1189
1289
  ) in metric_name_to_function_and_column_name.items():
1190
- metric_name_to_result[metric_name] = await run_in_threadpool(
1191
- function,
1290
+ metric_name_to_result[metric_name] = function(
1192
1291
  endpoint_ids=uids,
1193
1292
  get_raw=True,
1194
1293
  )
@@ -1259,7 +1358,7 @@ class V3IOTSDBConnector(TSDBConnector):
1259
1358
  else:
1260
1359
  filter_query = app_filter_query
1261
1360
 
1262
- df = self._get_records(
1361
+ raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
1263
1362
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
1264
1363
  start=start,
1265
1364
  end=end,
@@ -1268,39 +1367,33 @@ class V3IOTSDBConnector(TSDBConnector):
1268
1367
  mm_schemas.ResultData.RESULT_STATUS,
1269
1368
  ],
1270
1369
  filter_query=filter_query,
1370
+ get_raw=True,
1271
1371
  )
1272
1372
 
1273
- # filter result status
1274
- if result_status_list and not df.empty:
1275
- df = df[df[mm_schemas.ResultData.RESULT_STATUS].isin(result_status_list)]
1276
-
1277
- if df.empty:
1373
+ if not raw_frames:
1278
1374
  return {}
1279
- else:
1280
- # convert application name to lower case
1281
- df[mm_schemas.ApplicationEvent.APPLICATION_NAME] = df[
1282
- mm_schemas.ApplicationEvent.APPLICATION_NAME
1283
- ].str.lower()
1284
-
1285
- df = (
1286
- df[
1287
- [
1288
- mm_schemas.ApplicationEvent.APPLICATION_NAME,
1289
- mm_schemas.ResultData.RESULT_STATUS,
1290
- mm_schemas.ResultData.RESULT_VALUE,
1291
- ]
1292
- ]
1293
- .groupby(
1294
- [
1295
- mm_schemas.ApplicationEvent.APPLICATION_NAME,
1296
- mm_schemas.ResultData.RESULT_STATUS,
1297
- ],
1298
- observed=True,
1299
- )
1300
- .count()
1301
- )
1302
1375
 
1303
- return df[mm_schemas.ResultData.RESULT_VALUE].to_dict()
1376
+ # Count occurrences by (application_name, result_status) from RawFrame objects
1377
+ count_dict = {}
1378
+
1379
+ for frame in raw_frames:
1380
+ # Extract column data from each RawFrame
1381
+ app_name = frame.column_data(mm_schemas.ApplicationEvent.APPLICATION_NAME)[
1382
+ 0
1383
+ ]
1384
+ statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
1385
+
1386
+ for status in statuses:
1387
+ # Filter by result status if specified
1388
+ if result_status_list and status not in result_status_list:
1389
+ continue
1390
+
1391
+ # Convert application name to lower case
1392
+ key = (app_name.lower(), status)
1393
+
1394
+ # Update the count in the dictionary
1395
+ count_dict[key] = count_dict.get(key, 0) + 1
1396
+ return count_dict
1304
1397
 
1305
1398
  def count_processed_model_endpoints(
1306
1399
  self,
@@ -1459,19 +1552,110 @@ class V3IOTSDBConnector(TSDBConnector):
1459
1552
  table = mm_schemas.V3IOTSDBTables.APP_RESULTS
1460
1553
  start, end, interval = self._prepare_aligned_start_end(start, end)
1461
1554
 
1462
- # get per time-interval x endpoint_id combination the max result status
1463
- df = self._get_records(
1555
+ raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
1464
1556
  table=table,
1465
1557
  start=start,
1466
1558
  end=end,
1467
- interval=interval,
1468
- sliding_window_step=interval,
1469
1559
  columns=[mm_schemas.ResultData.RESULT_STATUS],
1470
- agg_funcs=["max"],
1471
- group_by=mm_schemas.WriterEvent.ENDPOINT_ID,
1560
+ get_raw=True,
1472
1561
  )
1473
- if df.empty:
1562
+
1563
+ if not raw_frames:
1474
1564
  return mm_schemas.ModelEndpointDriftValues(values=[])
1475
- df = df[df[f"max({mm_schemas.ResultData.RESULT_STATUS})"] >= 1]
1476
- df = df.reset_index(names="_wstart")
1477
- return self._df_to_drift_data(df)
1565
+
1566
+ # Combine aggregation, filtering, and conversion in one pass
1567
+ drift_values = self._process_drifted_endpoints_data(
1568
+ raw_frames=raw_frames, start=start, end=end, interval=interval
1569
+ )
1570
+
1571
+ return drift_values
1572
+
1573
+ @staticmethod
1574
+ def _process_drifted_endpoints_data(
1575
+ raw_frames: list[v3io_frames.client.RawFrame],
1576
+ start: datetime,
1577
+ end: datetime,
1578
+ interval: str,
1579
+ ) -> mm_schemas.ModelEndpointDriftValues:
1580
+ """
1581
+ Optimized single-pass processing of drift data from RawFrame objects.
1582
+ Combines aggregation, filtering, and conversion into one operation.
1583
+
1584
+ :param raw_frames: List of RawFrame objects containing drift data.
1585
+ :param start: Start datetime for filtering data.
1586
+ :param end: End datetime for filtering data.
1587
+ :param interval: Time interval string (e.g., '5min') for aggregation
1588
+
1589
+ :returns: ModelEndpointDriftValues with counts of suspected and detected per timestamp
1590
+ """
1591
+
1592
+ if not raw_frames:
1593
+ return mm_schemas.ModelEndpointDriftValues(values=[])
1594
+
1595
+ # Pre-compute constants
1596
+ interval_td = pd.Timedelta(interval)
1597
+ interval_ns = interval_td.value # nanoseconds for integer arithmetic
1598
+ start_ns = pd.Timestamp(start).value
1599
+ end_ns = pd.Timestamp(end).value
1600
+
1601
+ suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
1602
+ detected_val = mm_schemas.constants.ResultStatusApp.detected.value
1603
+
1604
+ # Single dictionary to track: bucket_start_ns -> {endpoint_id -> max_status}
1605
+ # This allows us to calculate max per endpoint per bucket in one pass
1606
+ bucket_endpoint_status = defaultdict(dict)
1607
+
1608
+ for frame in raw_frames:
1609
+ endpoint_id = frame.column_data(EventFieldType.ENDPOINT_ID)[0]
1610
+ result_statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
1611
+ timestamps = frame.indices()[0].times
1612
+
1613
+ for status, timestamp_ns in zip(result_statuses, timestamps):
1614
+ # Early skip: invalid status or outside time range
1615
+ if status is None or math.isnan(status) or status < 1:
1616
+ continue
1617
+ if not (start_ns <= timestamp_ns < end_ns):
1618
+ continue
1619
+
1620
+ # Calculate bucket using integer arithmetic
1621
+ bucket_index = (timestamp_ns - start_ns) // interval_ns
1622
+ bucket_start_ns = start_ns + (bucket_index * interval_ns)
1623
+
1624
+ # Initialize bucket if needed
1625
+ bucket = bucket_endpoint_status[bucket_start_ns]
1626
+ bucket[endpoint_id] = max(bucket.get(endpoint_id, status), status)
1627
+
1628
+ if not bucket_endpoint_status:
1629
+ return mm_schemas.ModelEndpointDriftValues(values=[])
1630
+
1631
+ # Second pass: count suspected/detected per timestamp bucket
1632
+ # Structure: bucket_start_ns -> {count_suspected, count_detected}
1633
+ timestamp_counts = {}
1634
+
1635
+ for bucket_start_ns, endpoint_statuses in bucket_endpoint_status.items():
1636
+ count_suspected = 0
1637
+ count_detected = 0
1638
+
1639
+ for status in endpoint_statuses.values():
1640
+ if status == suspected_val:
1641
+ count_suspected += 1
1642
+ elif status == detected_val:
1643
+ count_detected += 1
1644
+
1645
+ # Only store if there are counts
1646
+ if count_suspected > 0 or count_detected > 0:
1647
+ timestamp_counts[bucket_start_ns] = (count_suspected, count_detected)
1648
+
1649
+ # Convert to final format (sorted by timestamp)
1650
+ values = [
1651
+ (
1652
+ pd.Timestamp(bucket_ns, unit="ns", tz="UTC").to_pydatetime(),
1653
+ count_suspected,
1654
+ count_detected,
1655
+ )
1656
+ for bucket_ns, (count_suspected, count_detected) in sorted(
1657
+ timestamp_counts.items()
1658
+ )
1659
+ ]
1660
+
1661
+ return mm_schemas.ModelEndpointDriftValues(values=values)
@@ -14,7 +14,8 @@
14
14
 
15
15
  import functools
16
16
  import sys
17
- from typing import Callable, Union
17
+ from collections.abc import Callable
18
+ from typing import Union
18
19
 
19
20
  import numpy as np
20
21
  import plotly.graph_objects as go
@@ -15,8 +15,9 @@
15
15
  import datetime
16
16
  import functools
17
17
  import os
18
+ from collections.abc import Callable
18
19
  from fnmatch import fnmatchcase
19
- from typing import TYPE_CHECKING, Callable, Optional, TypedDict, Union, cast
20
+ from typing import TYPE_CHECKING, Optional, TypedDict, Union, cast
20
21
 
21
22
  import numpy as np
22
23
  import pandas as pd
@@ -143,7 +144,7 @@ def get_stream_path(
143
144
  return stream_uri.replace("v3io://", f"ds://{profile.name}")
144
145
 
145
146
  elif isinstance(
146
- profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
147
+ profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
147
148
  ):
148
149
  topic = mlrun.common.model_monitoring.helpers.get_kafka_topic(
149
150
  project=project, function_name=function_name
@@ -152,7 +153,7 @@ def get_stream_path(
152
153
  else:
153
154
  raise mlrun.errors.MLRunValueError(
154
155
  f"Received an unexpected stream profile type: {type(profile)}\n"
155
- "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
156
+ "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
156
157
  )
157
158
 
158
159
 
@@ -300,7 +301,7 @@ def _get_v3io_output_stream(
300
301
 
301
302
  def _get_kafka_output_stream(
302
303
  *,
303
- kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource,
304
+ kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream,
304
305
  project: str,
305
306
  function_name: str,
306
307
  mock: bool = False,
@@ -356,7 +357,7 @@ def get_output_stream(
356
357
  )
357
358
 
358
359
  elif isinstance(
359
- profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
360
+ profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
360
361
  ):
361
362
  return _get_kafka_output_stream(
362
363
  kafka_profile=profile,
@@ -368,7 +369,7 @@ def get_output_stream(
368
369
  else:
369
370
  raise mlrun.errors.MLRunValueError(
370
371
  f"Received an unexpected stream profile type: {type(profile)}\n"
371
- "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
372
+ "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
372
373
  )
373
374
 
374
375
 
@@ -659,3 +660,26 @@ def get_start_end(
659
660
  )
660
661
 
661
662
  return start, end
663
+
664
+
665
+ def validate_time_range(
666
+ start: Optional[datetime.datetime] = None, end: Optional[datetime.datetime] = None
667
+ ) -> tuple[datetime.datetime, datetime.datetime]:
668
+ """
669
+ validate start and end parameters and set default values if needed.
670
+ :param start: Either None or datetime, None is handled as datetime.now(tz=timezone.utc) - timedelta(days=1)
671
+ :param end: Either None or datetime, None is handled as datetime.now(tz=timezone.utc)
672
+ :return: start datetime, end datetime
673
+ """
674
+ end = end or mlrun.utils.helpers.datetime_now()
675
+ start = start or (end - datetime.timedelta(days=1))
676
+ if start.tzinfo is None or end.tzinfo is None:
677
+ raise mlrun.errors.MLRunInvalidArgumentTypeError(
678
+ "Custom start and end times must contain the timezone."
679
+ )
680
+ if start > end:
681
+ raise mlrun.errors.MLRunInvalidArgumentError(
682
+ "The start time must be before the end time. Note that if end time is not provided, "
683
+ "the current time is used by default."
684
+ )
685
+ return start, end
@@ -200,9 +200,25 @@ class EventStreamProcessor:
200
200
  after="ProcessEndpointEvent",
201
201
  )
202
202
 
203
+ # split the graph between NOP event to regular event
204
+ graph.add_step(
205
+ "storey.Filter",
206
+ "FilterNOP",
207
+ after="filter_none",
208
+ _fn="(not (isinstance(event, dict) and event.get('kind', "
209
+ ") == 'nop_event'))",
210
+ )
211
+ graph.add_step(
212
+ "storey.Filter",
213
+ "ForwardNOP",
214
+ after="filter_none",
215
+ _fn="(isinstance(event, dict) and event.get('kind', "
216
+ ") == 'nop_event')",
217
+ )
218
+
203
219
  # flatten the events
204
220
  graph.add_step(
205
- "storey.FlatMap", "flatten_events", _fn="(event)", after="filter_none"
221
+ "storey.FlatMap", "flatten_events", _fn="(event)", after="FilterNOP"
206
222
  )
207
223
 
208
224
  apply_storey_filter_and_flatmap()
@@ -218,19 +234,6 @@ class EventStreamProcessor:
218
234
  )
219
235
 
220
236
  apply_map_feature_names()
221
- # split the graph between event with error vs valid event
222
- graph.add_step(
223
- "storey.Filter",
224
- "FilterNOP",
225
- after="MapFeatureNames",
226
- _fn="(event.get('kind', " ") != 'nop_event')",
227
- )
228
- graph.add_step(
229
- "storey.Filter",
230
- "ForwardNOP",
231
- after="MapFeatureNames",
232
- _fn="(event.get('kind', " ") == 'nop_event')",
233
- )
234
237
 
235
238
  tsdb_connector.apply_monitoring_stream_steps(
236
239
  graph=graph,
@@ -244,7 +247,7 @@ class EventStreamProcessor:
244
247
  graph.add_step(
245
248
  "ProcessBeforeParquet",
246
249
  name="ProcessBeforeParquet",
247
- after="FilterNOP",
250
+ after="MapFeatureNames",
248
251
  _fn="(event)",
249
252
  )
250
253
 
@@ -370,7 +373,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
370
373
  logger.debug(
371
374
  "Skipped nop event inside of ProcessEndpointEvent", event=event
372
375
  )
373
- full_event.body = [event]
374
376
  return full_event
375
377
  # Getting model version and function uri from event
376
378
  # and use them for retrieving the endpoint_id
@@ -396,6 +398,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
396
398
  request_id = event.get("request", {}).get("id") or event.get("resp", {}).get(
397
399
  "id"
398
400
  )
401
+ feature_names = event.get("request", {}).get("input_schema")
402
+ labels_names = event.get("resp", {}).get("output_schema")
399
403
  latency = event.get("microsec")
400
404
  features = event.get("request", {}).get("inputs")
401
405
  predictions = event.get("resp", {}).get("outputs")
@@ -496,6 +500,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
496
500
  ),
497
501
  EventFieldType.EFFECTIVE_SAMPLE_COUNT: effective_sample_count,
498
502
  EventFieldType.ESTIMATED_PREDICTION_COUNT: estimated_prediction_count,
503
+ EventFieldType.FEATURE_NAMES: feature_names,
504
+ EventFieldType.LABEL_NAMES: labels_names,
499
505
  }
500
506
  )
501
507
 
@@ -602,19 +608,19 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
602
608
  self.endpoint_type = {}
603
609
 
604
610
  def _infer_feature_names_from_data(self, event):
605
- for endpoint_id in self.feature_names:
606
- if len(self.feature_names[endpoint_id]) >= len(
607
- event[EventFieldType.FEATURES]
608
- ):
609
- return self.feature_names[endpoint_id]
611
+ endpoint_id = event[EventFieldType.ENDPOINT_ID]
612
+ if endpoint_id in self.feature_names and len(
613
+ self.feature_names[endpoint_id]
614
+ ) >= len(event[EventFieldType.FEATURES]):
615
+ return self.feature_names[endpoint_id]
610
616
  return None
611
617
 
612
618
  def _infer_label_columns_from_data(self, event):
613
- for endpoint_id in self.label_columns:
614
- if len(self.label_columns[endpoint_id]) >= len(
615
- event[EventFieldType.PREDICTION]
616
- ):
617
- return self.label_columns[endpoint_id]
619
+ endpoint_id = event[EventFieldType.ENDPOINT_ID]
620
+ if endpoint_id in self.label_columns and len(
621
+ self.label_columns[endpoint_id]
622
+ ) >= len(event[EventFieldType.PREDICTION]):
623
+ return self.label_columns[endpoint_id]
618
624
  return None
619
625
 
620
626
  def do(self, event: dict):
@@ -659,7 +665,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
659
665
  "Feature names are not initialized, they will be automatically generated",
660
666
  endpoint_id=endpoint_id,
661
667
  )
662
- feature_names = [
668
+ feature_names = event.get(EventFieldType.FEATURE_NAMES) or [
663
669
  f"f{i}" for i, _ in enumerate(event[EventFieldType.FEATURES])
664
670
  ]
665
671
 
@@ -682,7 +688,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
682
688
  "label column names are not initialized, they will be automatically generated",
683
689
  endpoint_id=endpoint_id,
684
690
  )
685
- label_columns = [
691
+ label_columns = event.get(EventFieldType.LABEL_NAMES) or [
686
692
  f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
687
693
  ]
688
694
  attributes_to_update[EventFieldType.LABEL_NAMES] = label_columns