mlrun 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (150) hide show
  1. mlrun/__init__.py +3 -2
  2. mlrun/__main__.py +0 -4
  3. mlrun/artifacts/dataset.py +2 -2
  4. mlrun/artifacts/plots.py +1 -1
  5. mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
  6. mlrun/auth/nuclio.py +89 -0
  7. mlrun/auth/providers.py +429 -0
  8. mlrun/auth/utils.py +415 -0
  9. mlrun/common/constants.py +7 -0
  10. mlrun/common/model_monitoring/helpers.py +41 -4
  11. mlrun/common/runtimes/constants.py +28 -0
  12. mlrun/common/schemas/__init__.py +13 -3
  13. mlrun/common/schemas/alert.py +2 -2
  14. mlrun/common/schemas/api_gateway.py +3 -0
  15. mlrun/common/schemas/auth.py +10 -10
  16. mlrun/common/schemas/client_spec.py +4 -0
  17. mlrun/common/schemas/constants.py +25 -0
  18. mlrun/common/schemas/frontend_spec.py +1 -8
  19. mlrun/common/schemas/function.py +24 -0
  20. mlrun/common/schemas/hub.py +3 -2
  21. mlrun/common/schemas/model_monitoring/__init__.py +1 -1
  22. mlrun/common/schemas/model_monitoring/constants.py +2 -2
  23. mlrun/common/schemas/secret.py +17 -2
  24. mlrun/common/secrets.py +95 -1
  25. mlrun/common/types.py +10 -10
  26. mlrun/config.py +53 -15
  27. mlrun/data_types/infer.py +2 -2
  28. mlrun/datastore/__init__.py +2 -3
  29. mlrun/datastore/base.py +274 -10
  30. mlrun/datastore/datastore.py +1 -1
  31. mlrun/datastore/datastore_profile.py +49 -17
  32. mlrun/datastore/model_provider/huggingface_provider.py +6 -2
  33. mlrun/datastore/model_provider/model_provider.py +2 -2
  34. mlrun/datastore/model_provider/openai_provider.py +2 -2
  35. mlrun/datastore/s3.py +15 -16
  36. mlrun/datastore/sources.py +1 -1
  37. mlrun/datastore/store_resources.py +4 -4
  38. mlrun/datastore/storeytargets.py +16 -10
  39. mlrun/datastore/targets.py +1 -1
  40. mlrun/datastore/utils.py +16 -3
  41. mlrun/datastore/v3io.py +1 -1
  42. mlrun/db/base.py +36 -12
  43. mlrun/db/httpdb.py +316 -101
  44. mlrun/db/nopdb.py +29 -11
  45. mlrun/errors.py +4 -2
  46. mlrun/execution.py +11 -12
  47. mlrun/feature_store/api.py +1 -1
  48. mlrun/feature_store/common.py +1 -1
  49. mlrun/feature_store/feature_vector_utils.py +1 -1
  50. mlrun/feature_store/steps.py +8 -6
  51. mlrun/frameworks/_common/utils.py +3 -3
  52. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  53. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
  54. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  55. mlrun/frameworks/_ml_common/utils.py +2 -1
  56. mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
  57. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
  58. mlrun/frameworks/onnx/dataset.py +2 -1
  59. mlrun/frameworks/onnx/mlrun_interface.py +2 -1
  60. mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
  61. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
  62. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
  63. mlrun/frameworks/pytorch/utils.py +2 -1
  64. mlrun/frameworks/sklearn/metric.py +2 -1
  65. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
  66. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
  67. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
  68. mlrun/hub/__init__.py +37 -0
  69. mlrun/hub/base.py +142 -0
  70. mlrun/hub/module.py +67 -76
  71. mlrun/hub/step.py +113 -0
  72. mlrun/launcher/base.py +2 -1
  73. mlrun/launcher/local.py +2 -1
  74. mlrun/model.py +12 -2
  75. mlrun/model_monitoring/__init__.py +0 -1
  76. mlrun/model_monitoring/api.py +2 -2
  77. mlrun/model_monitoring/applications/base.py +20 -6
  78. mlrun/model_monitoring/applications/context.py +1 -0
  79. mlrun/model_monitoring/controller.py +7 -17
  80. mlrun/model_monitoring/db/_schedules.py +2 -16
  81. mlrun/model_monitoring/db/_stats.py +2 -13
  82. mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
  83. mlrun/model_monitoring/db/tsdb/base.py +2 -4
  84. mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
  85. mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
  86. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
  87. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
  88. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
  89. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
  90. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
  91. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
  92. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
  93. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
  94. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
  95. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
  96. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
  97. mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
  98. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +4 -6
  99. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +147 -79
  100. mlrun/model_monitoring/features_drift_table.py +2 -1
  101. mlrun/model_monitoring/helpers.py +2 -1
  102. mlrun/model_monitoring/stream_processing.py +18 -16
  103. mlrun/model_monitoring/writer.py +4 -3
  104. mlrun/package/__init__.py +2 -1
  105. mlrun/platforms/__init__.py +0 -44
  106. mlrun/platforms/iguazio.py +1 -1
  107. mlrun/projects/operations.py +11 -10
  108. mlrun/projects/project.py +81 -82
  109. mlrun/run.py +4 -7
  110. mlrun/runtimes/__init__.py +2 -204
  111. mlrun/runtimes/base.py +89 -21
  112. mlrun/runtimes/constants.py +225 -0
  113. mlrun/runtimes/daskjob.py +4 -2
  114. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  115. mlrun/runtimes/mounts.py +5 -0
  116. mlrun/runtimes/nuclio/__init__.py +12 -8
  117. mlrun/runtimes/nuclio/api_gateway.py +36 -6
  118. mlrun/runtimes/nuclio/application/application.py +200 -32
  119. mlrun/runtimes/nuclio/function.py +154 -49
  120. mlrun/runtimes/nuclio/serving.py +55 -42
  121. mlrun/runtimes/pod.py +59 -10
  122. mlrun/secrets.py +46 -2
  123. mlrun/serving/__init__.py +2 -0
  124. mlrun/serving/remote.py +5 -5
  125. mlrun/serving/routers.py +3 -3
  126. mlrun/serving/server.py +46 -43
  127. mlrun/serving/serving_wrapper.py +6 -2
  128. mlrun/serving/states.py +554 -207
  129. mlrun/serving/steps.py +1 -1
  130. mlrun/serving/system_steps.py +42 -33
  131. mlrun/track/trackers/mlflow_tracker.py +29 -31
  132. mlrun/utils/helpers.py +89 -16
  133. mlrun/utils/http.py +9 -2
  134. mlrun/utils/notifications/notification/git.py +1 -1
  135. mlrun/utils/notifications/notification/mail.py +39 -16
  136. mlrun/utils/notifications/notification_pusher.py +2 -2
  137. mlrun/utils/version/version.json +2 -2
  138. mlrun/utils/version/version.py +3 -4
  139. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +39 -49
  140. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +144 -130
  141. mlrun/db/auth_utils.py +0 -152
  142. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -343
  143. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
  144. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1368
  146. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +0 -51
  147. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
  148. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
  149. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
  150. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
@@ -12,9 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import math
15
+ from collections import defaultdict
15
16
  from datetime import datetime, timedelta
16
17
  from io import StringIO
17
- from typing import Callable, Literal, Optional, Union
18
+ from typing import Literal, Optional, Union
18
19
 
19
20
  import pandas as pd
20
21
  import v3io_frames
@@ -225,6 +226,24 @@ class V3IOTSDBConnector(TSDBConnector):
225
226
  - endpoint_features (Prediction and feature names and values)
226
227
  - custom_metrics (user-defined metrics)
227
228
  """
229
+
230
+ def apply_list_to_single_dict():
231
+ graph.add_step(
232
+ "storey.Map",
233
+ "MapListToSingleDict",
234
+ after="FilterNOP",
235
+ _fn="(event[0] if isinstance(event, list) else event)",
236
+ )
237
+ graph.add_step(
238
+ "mlrun.model_monitoring.stream_processing.MapFeatureNames",
239
+ name="MapFeatureNamesTSDB",
240
+ infer_columns_from_data=True,
241
+ project=self.project,
242
+ after="MapListToSingleDict",
243
+ )
244
+
245
+ apply_list_to_single_dict()
246
+
228
247
  aggregate_windows = aggregate_windows or ["5m", "1h"]
229
248
 
230
249
  # Calculate number of predictions and average latency
@@ -242,7 +261,7 @@ class V3IOTSDBConnector(TSDBConnector):
242
261
  }
243
262
  ],
244
263
  name=EventFieldType.LATENCY,
245
- after="FilterNOP",
264
+ after="MapFeatureNamesTSDB",
246
265
  step_name="Aggregates",
247
266
  table=".",
248
267
  key_field=EventFieldType.ENDPOINT_ID,
@@ -263,7 +282,7 @@ class V3IOTSDBConnector(TSDBConnector):
263
282
  graph.add_step(
264
283
  "storey.TSDBTarget",
265
284
  name="tsdb_predictions",
266
- after="FilterNOP",
285
+ after="MapFeatureNamesTSDB",
267
286
  path=f"{self.container}/{self.tables[mm_schemas.V3IOTSDBTables.PREDICTIONS]}",
268
287
  time_col=mm_schemas.EventFieldType.TIMESTAMP,
269
288
  container=self.container,
@@ -1230,11 +1249,9 @@ class V3IOTSDBConnector(TSDBConnector):
1230
1249
  )
1231
1250
  return df.reset_index(drop=True)
1232
1251
 
1233
- async def add_basic_metrics(
1252
+ def add_basic_metrics(
1234
1253
  self,
1235
1254
  model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
1236
- project: str,
1237
- run_in_threadpool: Callable,
1238
1255
  metric_list: Optional[list[str]] = None,
1239
1256
  ) -> list[mlrun.common.schemas.ModelEndpoint]:
1240
1257
  """
@@ -1242,8 +1259,6 @@ class V3IOTSDBConnector(TSDBConnector):
1242
1259
 
1243
1260
  :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
1244
1261
  be filled with the relevant basic metrics.
1245
- :param project: The name of the project.
1246
- :param run_in_threadpool: A function that runs another function in a thread pool.
1247
1262
  :param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
1248
1263
 
1249
1264
  :return: A list of `ModelEndpointMonitoringMetric` objects.
@@ -1272,8 +1287,7 @@ class V3IOTSDBConnector(TSDBConnector):
1272
1287
  function,
1273
1288
  _,
1274
1289
  ) in metric_name_to_function_and_column_name.items():
1275
- metric_name_to_result[metric_name] = await run_in_threadpool(
1276
- function,
1290
+ metric_name_to_result[metric_name] = function(
1277
1291
  endpoint_ids=uids,
1278
1292
  get_raw=True,
1279
1293
  )
@@ -1344,7 +1358,7 @@ class V3IOTSDBConnector(TSDBConnector):
1344
1358
  else:
1345
1359
  filter_query = app_filter_query
1346
1360
 
1347
- df = self._get_records(
1361
+ raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
1348
1362
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
1349
1363
  start=start,
1350
1364
  end=end,
@@ -1353,39 +1367,33 @@ class V3IOTSDBConnector(TSDBConnector):
1353
1367
  mm_schemas.ResultData.RESULT_STATUS,
1354
1368
  ],
1355
1369
  filter_query=filter_query,
1370
+ get_raw=True,
1356
1371
  )
1357
1372
 
1358
- # filter result status
1359
- if result_status_list and not df.empty:
1360
- df = df[df[mm_schemas.ResultData.RESULT_STATUS].isin(result_status_list)]
1361
-
1362
- if df.empty:
1373
+ if not raw_frames:
1363
1374
  return {}
1364
- else:
1365
- # convert application name to lower case
1366
- df[mm_schemas.ApplicationEvent.APPLICATION_NAME] = df[
1367
- mm_schemas.ApplicationEvent.APPLICATION_NAME
1368
- ].str.lower()
1369
-
1370
- df = (
1371
- df[
1372
- [
1373
- mm_schemas.ApplicationEvent.APPLICATION_NAME,
1374
- mm_schemas.ResultData.RESULT_STATUS,
1375
- mm_schemas.ResultData.RESULT_VALUE,
1376
- ]
1377
- ]
1378
- .groupby(
1379
- [
1380
- mm_schemas.ApplicationEvent.APPLICATION_NAME,
1381
- mm_schemas.ResultData.RESULT_STATUS,
1382
- ],
1383
- observed=True,
1384
- )
1385
- .count()
1386
- )
1387
1375
 
1388
- return df[mm_schemas.ResultData.RESULT_VALUE].to_dict()
1376
+ # Count occurrences by (application_name, result_status) from RawFrame objects
1377
+ count_dict = {}
1378
+
1379
+ for frame in raw_frames:
1380
+ # Extract column data from each RawFrame
1381
+ app_name = frame.column_data(mm_schemas.ApplicationEvent.APPLICATION_NAME)[
1382
+ 0
1383
+ ]
1384
+ statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
1385
+
1386
+ for status in statuses:
1387
+ # Filter by result status if specified
1388
+ if result_status_list and status not in result_status_list:
1389
+ continue
1390
+
1391
+ # Convert application name to lower case
1392
+ key = (app_name.lower(), status)
1393
+
1394
+ # Update the count in the dictionary
1395
+ count_dict[key] = count_dict.get(key, 0) + 1
1396
+ return count_dict
1389
1397
 
1390
1398
  def count_processed_model_endpoints(
1391
1399
  self,
@@ -1543,51 +1551,111 @@ class V3IOTSDBConnector(TSDBConnector):
1543
1551
  ) -> mm_schemas.ModelEndpointDriftValues:
1544
1552
  table = mm_schemas.V3IOTSDBTables.APP_RESULTS
1545
1553
  start, end, interval = self._prepare_aligned_start_end(start, end)
1546
- df = self._get_records(
1554
+
1555
+ raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
1547
1556
  table=table,
1548
1557
  start=start,
1549
1558
  end=end,
1550
1559
  columns=[mm_schemas.ResultData.RESULT_STATUS],
1560
+ get_raw=True,
1551
1561
  )
1552
- df = self._aggregate_raw_drift_data(df, start, end, interval)
1553
- if df.empty:
1562
+
1563
+ if not raw_frames:
1554
1564
  return mm_schemas.ModelEndpointDriftValues(values=[])
1555
- df = df[df[f"max({mm_schemas.ResultData.RESULT_STATUS})"] >= 1]
1556
- return self._df_to_drift_data(df)
1565
+
1566
+ # Combine aggregation, filtering, and conversion in one pass
1567
+ drift_values = self._process_drifted_endpoints_data(
1568
+ raw_frames=raw_frames, start=start, end=end, interval=interval
1569
+ )
1570
+
1571
+ return drift_values
1557
1572
 
1558
1573
  @staticmethod
1559
- def _aggregate_raw_drift_data(
1560
- df: pd.DataFrame, start: datetime, end: datetime, interval: str
1561
- ) -> pd.DataFrame:
1562
- if df.empty:
1563
- return df
1564
- if not isinstance(df.index, pd.DatetimeIndex):
1565
- raise TypeError("Expected a DatetimeIndex on the DataFrame (time index).")
1566
- df[EventFieldType.ENDPOINT_ID] = (
1567
- df[EventFieldType.ENDPOINT_ID].astype("string").str.strip()
1568
- ) # remove extra data carried by the category dtype
1569
- window = df.loc[
1570
- (df.index >= start) & (df.index < end),
1571
- [mm_schemas.ResultData.RESULT_STATUS, EventFieldType.ENDPOINT_ID],
1572
- ]
1573
- out = (
1574
- window.groupby(
1575
- [
1576
- EventFieldType.ENDPOINT_ID,
1577
- pd.Grouper(
1578
- freq=interval, origin=start, label="left", closed="left"
1579
- ),
1580
- ]
1581
- # align to start, [start, end) intervals
1582
- )[mm_schemas.ResultData.RESULT_STATUS]
1583
- .max()
1584
- .reset_index()
1585
- .rename(
1586
- columns={
1587
- mm_schemas.ResultData.RESULT_STATUS: f"max({mm_schemas.ResultData.RESULT_STATUS})"
1588
- }
1574
+ def _process_drifted_endpoints_data(
1575
+ raw_frames: list[v3io_frames.client.RawFrame],
1576
+ start: datetime,
1577
+ end: datetime,
1578
+ interval: str,
1579
+ ) -> mm_schemas.ModelEndpointDriftValues:
1580
+ """
1581
+ Optimized single-pass processing of drift data from RawFrame objects.
1582
+ Combines aggregation, filtering, and conversion into one operation.
1583
+
1584
+ :param raw_frames: List of RawFrame objects containing drift data.
1585
+ :param start: Start datetime for filtering data.
1586
+ :param end: End datetime for filtering data.
1587
+ :param interval: Time interval string (e.g., '5min') for aggregation
1588
+
1589
+ :returns: ModelEndpointDriftValues with counts of suspected and detected per timestamp
1590
+ """
1591
+
1592
+ if not raw_frames:
1593
+ return mm_schemas.ModelEndpointDriftValues(values=[])
1594
+
1595
+ # Pre-compute constants
1596
+ interval_td = pd.Timedelta(interval)
1597
+ interval_ns = interval_td.value # nanoseconds for integer arithmetic
1598
+ start_ns = pd.Timestamp(start).value
1599
+ end_ns = pd.Timestamp(end).value
1600
+
1601
+ suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
1602
+ detected_val = mm_schemas.constants.ResultStatusApp.detected.value
1603
+
1604
+ # Single dictionary to track: bucket_start_ns -> {endpoint_id -> max_status}
1605
+ # This allows us to calculate max per endpoint per bucket in one pass
1606
+ bucket_endpoint_status = defaultdict(dict)
1607
+
1608
+ for frame in raw_frames:
1609
+ endpoint_id = frame.column_data(EventFieldType.ENDPOINT_ID)[0]
1610
+ result_statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
1611
+ timestamps = frame.indices()[0].times
1612
+
1613
+ for status, timestamp_ns in zip(result_statuses, timestamps):
1614
+ # Early skip: invalid status or outside time range
1615
+ if status is None or math.isnan(status) or status < 1:
1616
+ continue
1617
+ if not (start_ns <= timestamp_ns < end_ns):
1618
+ continue
1619
+
1620
+ # Calculate bucket using integer arithmetic
1621
+ bucket_index = (timestamp_ns - start_ns) // interval_ns
1622
+ bucket_start_ns = start_ns + (bucket_index * interval_ns)
1623
+
1624
+ # Initialize bucket if needed
1625
+ bucket = bucket_endpoint_status[bucket_start_ns]
1626
+ bucket[endpoint_id] = max(bucket.get(endpoint_id, status), status)
1627
+
1628
+ if not bucket_endpoint_status:
1629
+ return mm_schemas.ModelEndpointDriftValues(values=[])
1630
+
1631
+ # Second pass: count suspected/detected per timestamp bucket
1632
+ # Structure: bucket_start_ns -> {count_suspected, count_detected}
1633
+ timestamp_counts = {}
1634
+
1635
+ for bucket_start_ns, endpoint_statuses in bucket_endpoint_status.items():
1636
+ count_suspected = 0
1637
+ count_detected = 0
1638
+
1639
+ for status in endpoint_statuses.values():
1640
+ if status == suspected_val:
1641
+ count_suspected += 1
1642
+ elif status == detected_val:
1643
+ count_detected += 1
1644
+
1645
+ # Only store if there are counts
1646
+ if count_suspected > 0 or count_detected > 0:
1647
+ timestamp_counts[bucket_start_ns] = (count_suspected, count_detected)
1648
+
1649
+ # Convert to final format (sorted by timestamp)
1650
+ values = [
1651
+ (
1652
+ pd.Timestamp(bucket_ns, unit="ns", tz="UTC").to_pydatetime(),
1653
+ count_suspected,
1654
+ count_detected,
1589
1655
  )
1590
- )
1591
- return out.rename(
1592
- columns={"time": "_wstart"}
1593
- ) # rename datetime column to _wstart to align with the tdengine result
1656
+ for bucket_ns, (count_suspected, count_detected) in sorted(
1657
+ timestamp_counts.items()
1658
+ )
1659
+ ]
1660
+
1661
+ return mm_schemas.ModelEndpointDriftValues(values=values)
@@ -14,7 +14,8 @@
14
14
 
15
15
  import functools
16
16
  import sys
17
- from typing import Callable, Union
17
+ from collections.abc import Callable
18
+ from typing import Union
18
19
 
19
20
  import numpy as np
20
21
  import plotly.graph_objects as go
@@ -15,8 +15,9 @@
15
15
  import datetime
16
16
  import functools
17
17
  import os
18
+ from collections.abc import Callable
18
19
  from fnmatch import fnmatchcase
19
- from typing import TYPE_CHECKING, Callable, Optional, TypedDict, Union, cast
20
+ from typing import TYPE_CHECKING, Optional, TypedDict, Union, cast
20
21
 
21
22
  import numpy as np
22
23
  import pandas as pd
@@ -200,9 +200,25 @@ class EventStreamProcessor:
200
200
  after="ProcessEndpointEvent",
201
201
  )
202
202
 
203
+ # split the graph between NOP event to regular event
204
+ graph.add_step(
205
+ "storey.Filter",
206
+ "FilterNOP",
207
+ after="filter_none",
208
+ _fn="(not (isinstance(event, dict) and event.get('kind', "
209
+ ") == 'nop_event'))",
210
+ )
211
+ graph.add_step(
212
+ "storey.Filter",
213
+ "ForwardNOP",
214
+ after="filter_none",
215
+ _fn="(isinstance(event, dict) and event.get('kind', "
216
+ ") == 'nop_event')",
217
+ )
218
+
203
219
  # flatten the events
204
220
  graph.add_step(
205
- "storey.FlatMap", "flatten_events", _fn="(event)", after="filter_none"
221
+ "storey.FlatMap", "flatten_events", _fn="(event)", after="FilterNOP"
206
222
  )
207
223
 
208
224
  apply_storey_filter_and_flatmap()
@@ -218,19 +234,6 @@ class EventStreamProcessor:
218
234
  )
219
235
 
220
236
  apply_map_feature_names()
221
- # split the graph between event with error vs valid event
222
- graph.add_step(
223
- "storey.Filter",
224
- "FilterNOP",
225
- after="MapFeatureNames",
226
- _fn="(event.get('kind', " ") != 'nop_event')",
227
- )
228
- graph.add_step(
229
- "storey.Filter",
230
- "ForwardNOP",
231
- after="MapFeatureNames",
232
- _fn="(event.get('kind', " ") == 'nop_event')",
233
- )
234
237
 
235
238
  tsdb_connector.apply_monitoring_stream_steps(
236
239
  graph=graph,
@@ -244,7 +247,7 @@ class EventStreamProcessor:
244
247
  graph.add_step(
245
248
  "ProcessBeforeParquet",
246
249
  name="ProcessBeforeParquet",
247
- after="FilterNOP",
250
+ after="MapFeatureNames",
248
251
  _fn="(event)",
249
252
  )
250
253
 
@@ -370,7 +373,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
370
373
  logger.debug(
371
374
  "Skipped nop event inside of ProcessEndpointEvent", event=event
372
375
  )
373
- full_event.body = [event]
374
376
  return full_event
375
377
  # Getting model version and function uri from event
376
378
  # and use them for retrieving the endpoint_id
@@ -14,8 +14,9 @@
14
14
 
15
15
  import json
16
16
  import typing
17
- from datetime import datetime, timezone
18
- from typing import Any, Callable, NewType, Optional
17
+ from collections.abc import Callable
18
+ from datetime import UTC, datetime
19
+ from typing import Any, NewType, Optional
19
20
 
20
21
  import storey
21
22
 
@@ -171,7 +172,7 @@ class ModelMonitoringWriter(StepToDict):
171
172
  )
172
173
  stat_kind = event.get(StatsData.STATS_NAME)
173
174
  data, timestamp_str = event.get(StatsData.STATS), event.get(StatsData.TIMESTAMP)
174
- timestamp = datetime.fromisoformat(timestamp_str).astimezone(tz=timezone.utc)
175
+ timestamp = datetime.fromisoformat(timestamp_str).astimezone(tz=UTC)
175
176
  if stat_kind == StatsKind.CURRENT_STATS.value:
176
177
  ModelMonitoringCurrentStatsFile(self.project, endpoint_id).write(
177
178
  data, timestamp
mlrun/package/__init__.py CHANGED
@@ -15,7 +15,8 @@
15
15
  import functools
16
16
  import inspect
17
17
  from collections import OrderedDict
18
- from typing import Callable, Optional, Union
18
+ from collections.abc import Callable
19
+ from typing import Optional, Union
19
20
 
20
21
  from ..config import config
21
22
  from .context_handler import ContextHandler
@@ -25,50 +25,6 @@ from .iguazio import (
25
25
  )
26
26
 
27
27
 
28
- # TODO: Remove in 1.11.0
29
- class _DeprecationHelper:
30
- """A helper class to deprecate old schemas"""
31
-
32
- def __init__(self, new_target: str, version="1.8.0"):
33
- self._new_target = new_target
34
- self._version = version
35
-
36
- def __call__(self, *args, **kwargs):
37
- self._warn()
38
- return self._lazy_load()(*args, **kwargs)
39
-
40
- def __getattr__(self, attr):
41
- self._warn()
42
- return getattr(self._lazy_load(), attr)
43
-
44
- def _lazy_load(self, *args, **kwargs):
45
- import mlrun.runtimes.mounts as mlrun_mounts
46
-
47
- return getattr(mlrun_mounts, self._new_target)
48
-
49
- def _warn(self):
50
- warnings.warn(
51
- f"mlrun.platforms.{self._new_target} is deprecated since version {self._version}, "
52
- f"and will be removed in 1.11.0. Use mlrun.runtimes.mounts.{self._new_target} instead.",
53
- FutureWarning,
54
- )
55
-
56
-
57
- # TODO: Remove in 1.11.0
58
- # For backwards compatibility
59
- VolumeMount = _DeprecationHelper("VolumeMount")
60
- auto_mount = _DeprecationHelper("auto_mount")
61
- mount_configmap = _DeprecationHelper("mount_configmap")
62
- mount_hostpath = _DeprecationHelper("mount_hostpath")
63
- mount_pvc = _DeprecationHelper("mount_pvc")
64
- mount_s3 = _DeprecationHelper("mount_s3")
65
- mount_secret = _DeprecationHelper("mount_secret")
66
- mount_v3io = _DeprecationHelper("mount_v3io")
67
- set_env_variables = _DeprecationHelper("set_env_variables")
68
- v3io_cred = _DeprecationHelper("v3io_cred")
69
- # eof 'For backwards compatibility'
70
-
71
-
72
28
  def watch_stream(
73
29
  url,
74
30
  shard_ids: Optional[list] = None,
@@ -140,7 +140,7 @@ class OutputStream:
140
140
  self._lazy_init()
141
141
 
142
142
  def dump_record(rec):
143
- if not isinstance(rec, (str, bytes)):
143
+ if not isinstance(rec, str | bytes):
144
144
  return dict_to_json(rec)
145
145
  return str(rec)
146
146
 
@@ -85,17 +85,17 @@ def run_function(
85
85
  ) -> Union[mlrun.model.RunObject, mlrun_pipelines.models.PipelineNodeWrapper]:
86
86
  """Run a local or remote task as part of a local/kubeflow pipeline
87
87
 
88
- run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
89
- function can be specified as an object or by name (str), when the function is specified by name it is looked up
90
- in the current project eliminating the need to redefine/edit functions.
88
+ run_function() allows you to execute a function locally, on a remote cluster, or as part of an automated workflow.
89
+ The function can be specified as an object or by name (str). When the function is specified by name it is looked up
90
+ in the current project, eliminating the need to redefine/edit functions.
91
91
 
92
- when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
92
+ When functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
93
93
  e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
94
- project runs provide additional notifications/reporting and exception handling.
95
- inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
96
- some behavior may differ between regular runs and deferred KFP runs.
94
+ Project runs provide additional notifications/reporting and exception handling.
95
+ Inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG.
96
+ Some behavior may differ between regular runs and deferred KFP runs.
97
97
 
98
- example (use with function object)::
98
+ Example (use with function object)::
99
99
 
100
100
  LABELS = "is_error"
101
101
  MODEL_CLASS = "sklearn.ensemble.RandomForestClassifier"
@@ -107,7 +107,7 @@ def run_function(
107
107
  inputs={"dataset": DATA_PATH},
108
108
  )
109
109
 
110
- example (use with project)::
110
+ Example (use with project)::
111
111
 
112
112
  # create a project with two functions (local and from hub)
113
113
  project = mlrun.new_project(project_name, "./proj)
@@ -119,7 +119,7 @@ def run_function(
119
119
  run2 = run_function("train", params={"label_columns": LABELS, "model_class": MODEL_CLASS},
120
120
  inputs={"dataset": run1.outputs["data"]})
121
121
 
122
- example (use in pipeline)::
122
+ Example (use in pipeline)::
123
123
 
124
124
  @dsl.pipeline(name="test pipeline", description="test")
125
125
  def my_pipe(url=""):
@@ -405,6 +405,7 @@ def deploy_function(
405
405
  :param project_object: override the project object to use, will default to the project set in the runtime context.
406
406
  """
407
407
  engine, function = _get_engine_and_function(function, project_object)
408
+ # TODO in ML-11599 need to handle redeployment with different auth token name
408
409
  if function.kind not in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
409
410
  raise mlrun.errors.MLRunInvalidArgumentError(
410
411
  "deploy is used with real-time functions, for other kinds use build_function()"