mlrun 1.10.0rc37__py3-none-any.whl → 1.10.0rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (33) hide show
  1. mlrun/artifacts/document.py +6 -1
  2. mlrun/common/constants.py +6 -0
  3. mlrun/common/model_monitoring/helpers.py +1 -1
  4. mlrun/common/schemas/model_monitoring/constants.py +0 -2
  5. mlrun/common/secrets.py +22 -1
  6. mlrun/launcher/local.py +2 -0
  7. mlrun/model.py +7 -1
  8. mlrun/model_monitoring/api.py +3 -2
  9. mlrun/model_monitoring/applications/base.py +6 -3
  10. mlrun/model_monitoring/applications/context.py +1 -0
  11. mlrun/model_monitoring/db/tsdb/base.py +2 -4
  12. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +17 -11
  13. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +154 -76
  14. mlrun/projects/project.py +15 -2
  15. mlrun/run.py +26 -1
  16. mlrun/runtimes/__init__.py +18 -0
  17. mlrun/runtimes/base.py +3 -0
  18. mlrun/runtimes/local.py +5 -2
  19. mlrun/runtimes/mounts.py +5 -0
  20. mlrun/runtimes/nuclio/application/application.py +2 -0
  21. mlrun/runtimes/nuclio/function.py +14 -0
  22. mlrun/runtimes/nuclio/serving.py +67 -4
  23. mlrun/runtimes/pod.py +59 -10
  24. mlrun/serving/server.py +42 -10
  25. mlrun/serving/states.py +75 -26
  26. mlrun/utils/helpers.py +86 -10
  27. mlrun/utils/version/version.json +2 -2
  28. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/METADATA +3 -3
  29. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/RECORD +33 -33
  30. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/WHEEL +0 -0
  31. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/entry_points.txt +0 -0
  32. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/licenses/LICENSE +0 -0
  33. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/top_level.txt +0 -0
@@ -359,7 +359,12 @@ class DocumentArtifact(Artifact):
359
359
  self,
360
360
  splitter: Optional["TextSplitter"] = None, # noqa: F821
361
361
  ) -> list["Document"]: # noqa: F821
362
- from langchain.schema import Document
362
+ # Try new langchain 1.0+ import path first
363
+ try:
364
+ from langchain_core.documents import Document
365
+ except ImportError:
366
+ # Fall back to old langchain <1.0 import path
367
+ from langchain.schema import Document
363
368
 
364
369
  """
365
370
  Create LC documents from the artifact
mlrun/common/constants.py CHANGED
@@ -27,6 +27,12 @@ DASK_LABEL_PREFIX = "dask.org/"
27
27
  NUCLIO_LABEL_PREFIX = "nuclio.io/"
28
28
  RESERVED_TAG_NAME_LATEST = "latest"
29
29
 
30
+ # Kubernetes DNS-1123 label name length limit
31
+ K8S_DNS_1123_LABEL_MAX_LENGTH = 63
32
+
33
+
34
+ RESERVED_BATCH_JOB_SUFFIX = "-batch"
35
+
30
36
  JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
31
37
  JOB_TYPE_PROJECT_LOADER = "project-loader"
32
38
  JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
@@ -170,6 +170,6 @@ def log_background_task_state(
170
170
  f"Model endpoint creation task is still in progress with the current state: "
171
171
  f"{background_task_state}. Events will not be monitored for the next "
172
172
  f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
173
- function_name=server.function.name,
173
+ function_name=server.function_name,
174
174
  background_task_check_timestamp=background_task_check_timestamp.isoformat(),
175
175
  )
@@ -486,8 +486,6 @@ class ModelMonitoringLabels:
486
486
 
487
487
  _RESERVED_FUNCTION_NAMES = MonitoringFunctionNames.list() + [SpecialApps.MLRUN_INFRA]
488
488
 
489
- _RESERVED_EVALUATE_FUNCTION_SUFFIX = "-batch"
490
-
491
489
 
492
490
  class ModelEndpointMonitoringMetricType(StrEnum):
493
491
  RESULT = "result"
mlrun/common/secrets.py CHANGED
@@ -11,10 +11,31 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import re
15
15
  from abc import ABC, abstractmethod
16
16
 
17
17
  import mlrun.common.schemas
18
+ from mlrun.config import config as mlconf
19
+
20
+ _AUTH_SECRET_NAME_TEMPLATE = re.escape(
21
+ mlconf.secret_stores.kubernetes.auth_secret_name.format(
22
+ hashed_access_key="",
23
+ )
24
+ )
25
+ AUTH_SECRET_PATTERN = re.compile(f"^{_AUTH_SECRET_NAME_TEMPLATE}.*")
26
+
27
+
28
+ def validate_not_forbidden_secret(secret_name: str) -> None:
29
+ """
30
+ Forbid client-supplied references to internal MLRun auth/project secrets.
31
+ No-op when running inside the API server (API enrichments are allowed).
32
+ """
33
+ if not secret_name or mlrun.config.is_running_as_api():
34
+ return
35
+ if AUTH_SECRET_PATTERN.match(secret_name):
36
+ raise mlrun.errors.MLRunInvalidArgumentError(
37
+ f"Forbidden secret '{secret_name}' matches MLRun auth-secret pattern."
38
+ )
18
39
 
19
40
 
20
41
  class SecretProviderInterface(ABC):
mlrun/launcher/local.py CHANGED
@@ -243,6 +243,8 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
243
243
 
244
244
  # if the handler has module prefix force "local" (vs "handler") runtime
245
245
  kind = "local" if isinstance(handler, str) and "." in handler else ""
246
+
247
+ # Create temporary local function for execution
246
248
  fn = mlrun.new_function(meta.name, command=command, args=args, kind=kind)
247
249
  fn.metadata = meta
248
250
  setattr(fn, "_is_run_local", True)
mlrun/model.py CHANGED
@@ -29,6 +29,7 @@ import pydantic.v1.error_wrappers
29
29
  import mlrun
30
30
  import mlrun.common.constants as mlrun_constants
31
31
  import mlrun.common.schemas.notification
32
+ import mlrun.common.secrets
32
33
  import mlrun.utils.regex
33
34
 
34
35
  from .utils import (
@@ -1616,7 +1617,12 @@ class RunTemplate(ModelObj):
1616
1617
 
1617
1618
  :returns: The RunTemplate object
1618
1619
  """
1619
-
1620
+ if kind == "azure_vault" and isinstance(source, dict):
1621
+ candidate_secret_name = (source.get("k8s_secret") or "").strip()
1622
+ if candidate_secret_name:
1623
+ mlrun.common.secrets.validate_not_forbidden_secret(
1624
+ candidate_secret_name
1625
+ )
1620
1626
  if kind == "vault" and isinstance(source, list):
1621
1627
  source = {"project": self.metadata.project, "secrets": source}
1622
1628
 
@@ -563,9 +563,10 @@ def _create_model_monitoring_function_base(
563
563
  "An application cannot have the following names: "
564
564
  f"{mm_constants._RESERVED_FUNCTION_NAMES}"
565
565
  )
566
- if name and name.endswith(mm_constants._RESERVED_EVALUATE_FUNCTION_SUFFIX):
566
+ _, has_valid_suffix, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(name)
567
+ if name and not has_valid_suffix:
567
568
  raise mlrun.errors.MLRunValueError(
568
- "Model monitoring application names cannot end with `-batch`"
569
+ f"Model monitoring application names cannot end with `{suffix}`"
569
570
  )
570
571
  if func is None:
571
572
  func = ""
@@ -799,10 +799,13 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
799
799
  f"`{mm_constants.APP_NAME_REGEX.pattern}`. "
800
800
  "Please choose another `func_name`."
801
801
  )
802
- if not job_name.endswith(mm_constants._RESERVED_EVALUATE_FUNCTION_SUFFIX):
803
- job_name += mm_constants._RESERVED_EVALUATE_FUNCTION_SUFFIX
802
+ job_name, was_renamed, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(
803
+ job_name
804
+ )
805
+ if was_renamed:
804
806
  mlrun.utils.logger.info(
805
- 'Changing function name - adding `"-batch"` suffix', func_name=job_name
807
+ f'Changing function name - adding `"{suffix}"` suffix',
808
+ func_name=job_name,
806
809
  )
807
810
 
808
811
  return job_name
@@ -249,6 +249,7 @@ class MonitoringApplicationContext:
249
249
  project=self.project_name,
250
250
  endpoint_id=self.endpoint_id,
251
251
  feature_analysis=True,
252
+ tsdb_metrics=False,
252
253
  )
253
254
  return self._model_endpoint
254
255
 
@@ -14,7 +14,7 @@
14
14
 
15
15
  from abc import ABC, abstractmethod
16
16
  from datetime import datetime, timedelta
17
- from typing import Callable, ClassVar, Literal, Optional, Union
17
+ from typing import ClassVar, Literal, Optional, Union
18
18
 
19
19
  import pandas as pd
20
20
  import pydantic.v1
@@ -444,11 +444,9 @@ class TSDBConnector(ABC):
444
444
  ]
445
445
  """
446
446
 
447
- async def add_basic_metrics(
447
+ def add_basic_metrics(
448
448
  self,
449
449
  model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
450
- project: str,
451
- run_in_threadpool: Callable,
452
450
  metric_list: Optional[list[str]] = None,
453
451
  ) -> list[mlrun.common.schemas.ModelEndpoint]:
454
452
  raise NotImplementedError()
@@ -14,7 +14,7 @@
14
14
 
15
15
  import threading
16
16
  from datetime import datetime, timedelta
17
- from typing import Callable, Final, Literal, Optional, Union
17
+ from typing import Final, Literal, Optional, Union
18
18
 
19
19
  import pandas as pd
20
20
  import taosws
@@ -55,14 +55,12 @@ class TDEngineConnector(TSDBConnector):
55
55
  """
56
56
 
57
57
  type: str = mm_schemas.TSDBTarget.TDEngine
58
- database = f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
59
58
 
60
59
  def __init__(
61
60
  self,
62
61
  project: str,
63
62
  profile: DatastoreProfile,
64
63
  timestamp_precision: TDEngineTimestampPrecision = TDEngineTimestampPrecision.MICROSECOND,
65
- **kwargs,
66
64
  ):
67
65
  super().__init__(project=project)
68
66
 
@@ -72,6 +70,15 @@ class TDEngineConnector(TSDBConnector):
72
70
  timestamp_precision
73
71
  )
74
72
 
73
+ if not mlrun.mlconf.system_id:
74
+ raise mlrun.errors.MLRunInvalidArgumentError(
75
+ "system_id is not set in mlrun.mlconf. "
76
+ "TDEngineConnector requires system_id to be configured for database name construction. "
77
+ "Please ensure MLRun configuration is properly loaded before creating TDEngineConnector."
78
+ )
79
+ self.database = (
80
+ f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
81
+ )
75
82
  self._init_super_tables()
76
83
 
77
84
  @property
@@ -1050,7 +1057,6 @@ class TDEngineConnector(TSDBConnector):
1050
1057
  ]
1051
1058
  ):
1052
1059
  metric_objects = []
1053
-
1054
1060
  if not df_results.empty:
1055
1061
  df_results.rename(
1056
1062
  columns={
@@ -1062,7 +1068,9 @@ class TDEngineConnector(TSDBConnector):
1062
1068
  metric_objects.append(
1063
1069
  mm_schemas.ApplicationResultRecord(
1064
1070
  time=datetime.fromisoformat(
1065
- row[mm_schemas.WriterEvent.END_INFER_TIME]
1071
+ row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
1072
+ " +", "+"
1073
+ )
1066
1074
  ),
1067
1075
  result_name=row[mm_schemas.ResultData.RESULT_NAME],
1068
1076
  kind=row[mm_schemas.ResultData.RESULT_KIND],
@@ -1082,7 +1090,9 @@ class TDEngineConnector(TSDBConnector):
1082
1090
  metric_objects.append(
1083
1091
  mm_schemas.ApplicationMetricRecord(
1084
1092
  time=datetime.fromisoformat(
1085
- row[mm_schemas.WriterEvent.END_INFER_TIME]
1093
+ row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
1094
+ " +", "+"
1095
+ )
1086
1096
  ),
1087
1097
  metric_name=row[mm_schemas.MetricData.METRIC_NAME],
1088
1098
  value=row[mm_schemas.MetricData.METRIC_VALUE],
@@ -1241,11 +1251,9 @@ class TDEngineConnector(TSDBConnector):
1241
1251
  df.dropna(inplace=True)
1242
1252
  return df
1243
1253
 
1244
- async def add_basic_metrics(
1254
+ def add_basic_metrics(
1245
1255
  self,
1246
1256
  model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
1247
- project: str,
1248
- run_in_threadpool: Callable,
1249
1257
  metric_list: Optional[list[str]] = None,
1250
1258
  ) -> list[mlrun.common.schemas.ModelEndpoint]:
1251
1259
  """
@@ -1253,8 +1261,6 @@ class TDEngineConnector(TSDBConnector):
1253
1261
 
1254
1262
  :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
1255
1263
  be filled with the relevant basic metrics.
1256
- :param project: The name of the project.
1257
- :param run_in_threadpool: A function that runs another function in a thread pool.
1258
1264
  :param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
1259
1265
 
1260
1266
  :return: A list of `ModelEndpointMonitoringMetric` objects.
@@ -12,9 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import math
15
- from datetime import datetime, timedelta
15
+ from datetime import datetime, timedelta, timezone
16
16
  from io import StringIO
17
- from typing import Callable, Literal, Optional, Union
17
+ from typing import Literal, Optional, Union
18
18
 
19
19
  import pandas as pd
20
20
  import v3io_frames
@@ -1230,11 +1230,9 @@ class V3IOTSDBConnector(TSDBConnector):
1230
1230
  )
1231
1231
  return df.reset_index(drop=True)
1232
1232
 
1233
- async def add_basic_metrics(
1233
+ def add_basic_metrics(
1234
1234
  self,
1235
1235
  model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
1236
- project: str,
1237
- run_in_threadpool: Callable,
1238
1236
  metric_list: Optional[list[str]] = None,
1239
1237
  ) -> list[mlrun.common.schemas.ModelEndpoint]:
1240
1238
  """
@@ -1242,8 +1240,6 @@ class V3IOTSDBConnector(TSDBConnector):
1242
1240
 
1243
1241
  :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
1244
1242
  be filled with the relevant basic metrics.
1245
- :param project: The name of the project.
1246
- :param run_in_threadpool: A function that runs another function in a thread pool.
1247
1243
  :param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
1248
1244
 
1249
1245
  :return: A list of `ModelEndpointMonitoringMetric` objects.
@@ -1272,8 +1268,7 @@ class V3IOTSDBConnector(TSDBConnector):
1272
1268
  function,
1273
1269
  _,
1274
1270
  ) in metric_name_to_function_and_column_name.items():
1275
- metric_name_to_result[metric_name] = await run_in_threadpool(
1276
- function,
1271
+ metric_name_to_result[metric_name] = function(
1277
1272
  endpoint_ids=uids,
1278
1273
  get_raw=True,
1279
1274
  )
@@ -1344,7 +1339,7 @@ class V3IOTSDBConnector(TSDBConnector):
1344
1339
  else:
1345
1340
  filter_query = app_filter_query
1346
1341
 
1347
- df = self._get_records(
1342
+ raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
1348
1343
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
1349
1344
  start=start,
1350
1345
  end=end,
@@ -1353,39 +1348,33 @@ class V3IOTSDBConnector(TSDBConnector):
1353
1348
  mm_schemas.ResultData.RESULT_STATUS,
1354
1349
  ],
1355
1350
  filter_query=filter_query,
1351
+ get_raw=True,
1356
1352
  )
1357
1353
 
1358
- # filter result status
1359
- if result_status_list and not df.empty:
1360
- df = df[df[mm_schemas.ResultData.RESULT_STATUS].isin(result_status_list)]
1361
-
1362
- if df.empty:
1354
+ if not raw_frames:
1363
1355
  return {}
1364
- else:
1365
- # convert application name to lower case
1366
- df[mm_schemas.ApplicationEvent.APPLICATION_NAME] = df[
1367
- mm_schemas.ApplicationEvent.APPLICATION_NAME
1368
- ].str.lower()
1369
-
1370
- df = (
1371
- df[
1372
- [
1373
- mm_schemas.ApplicationEvent.APPLICATION_NAME,
1374
- mm_schemas.ResultData.RESULT_STATUS,
1375
- mm_schemas.ResultData.RESULT_VALUE,
1376
- ]
1377
- ]
1378
- .groupby(
1379
- [
1380
- mm_schemas.ApplicationEvent.APPLICATION_NAME,
1381
- mm_schemas.ResultData.RESULT_STATUS,
1382
- ],
1383
- observed=True,
1384
- )
1385
- .count()
1386
- )
1387
1356
 
1388
- return df[mm_schemas.ResultData.RESULT_VALUE].to_dict()
1357
+ # Count occurrences by (application_name, result_status) from RawFrame objects
1358
+ count_dict = {}
1359
+
1360
+ for frame in raw_frames:
1361
+ # Extract column data from each RawFrame
1362
+ app_name = frame.column_data(mm_schemas.ApplicationEvent.APPLICATION_NAME)[
1363
+ 0
1364
+ ]
1365
+ statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
1366
+
1367
+ for status in statuses:
1368
+ # Filter by result status if specified
1369
+ if result_status_list and status not in result_status_list:
1370
+ continue
1371
+
1372
+ # Convert application name to lower case
1373
+ key = (app_name.lower(), status)
1374
+
1375
+ # Update the count in the dictionary
1376
+ count_dict[key] = count_dict.get(key, 0) + 1
1377
+ return count_dict
1389
1378
 
1390
1379
  def count_processed_model_endpoints(
1391
1380
  self,
@@ -1543,51 +1532,140 @@ class V3IOTSDBConnector(TSDBConnector):
1543
1532
  ) -> mm_schemas.ModelEndpointDriftValues:
1544
1533
  table = mm_schemas.V3IOTSDBTables.APP_RESULTS
1545
1534
  start, end, interval = self._prepare_aligned_start_end(start, end)
1546
- df = self._get_records(
1535
+ raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
1547
1536
  table=table,
1548
1537
  start=start,
1549
1538
  end=end,
1550
1539
  columns=[mm_schemas.ResultData.RESULT_STATUS],
1540
+ get_raw=True,
1551
1541
  )
1552
- df = self._aggregate_raw_drift_data(df, start, end, interval)
1553
- if df.empty:
1542
+
1543
+ if not raw_frames:
1544
+ return mm_schemas.ModelEndpointDriftValues(values=[])
1545
+
1546
+ aggregated_data = self._aggregate_raw_drift_data(
1547
+ raw_frames=raw_frames, start=start, end=end, interval=interval
1548
+ )
1549
+ if not aggregated_data:
1550
+ return mm_schemas.ModelEndpointDriftValues(values=[])
1551
+
1552
+ # Filter to only include entries with max result_status >= 1
1553
+ filtered_data = [
1554
+ (endpoint_id, timestamp, max_status)
1555
+ for endpoint_id, timestamp, max_status in aggregated_data
1556
+ if max_status >= 1
1557
+ ]
1558
+
1559
+ if not filtered_data:
1554
1560
  return mm_schemas.ModelEndpointDriftValues(values=[])
1555
- df = df[df[f"max({mm_schemas.ResultData.RESULT_STATUS})"] >= 1]
1556
- return self._df_to_drift_data(df)
1561
+
1562
+ return self._convert_drift_data_to_values(aggregated_data=filtered_data)
1557
1563
 
1558
1564
  @staticmethod
1559
1565
  def _aggregate_raw_drift_data(
1560
- df: pd.DataFrame, start: datetime, end: datetime, interval: str
1561
- ) -> pd.DataFrame:
1562
- if df.empty:
1563
- return df
1564
- if not isinstance(df.index, pd.DatetimeIndex):
1565
- raise TypeError("Expected a DatetimeIndex on the DataFrame (time index).")
1566
- df[EventFieldType.ENDPOINT_ID] = (
1567
- df[EventFieldType.ENDPOINT_ID].astype("string").str.strip()
1568
- ) # remove extra data carried by the category dtype
1569
- window = df.loc[
1570
- (df.index >= start) & (df.index < end),
1571
- [mm_schemas.ResultData.RESULT_STATUS, EventFieldType.ENDPOINT_ID],
1566
+ raw_frames: list[v3io_frames.client.RawFrame],
1567
+ start: datetime,
1568
+ end: datetime,
1569
+ interval: str,
1570
+ ) -> list[tuple[str, datetime, float]]:
1571
+ """
1572
+ Aggregate raw drift data from RawFrame objects.
1573
+
1574
+ :param raw_frames: List of RawFrame objects containing drift data.
1575
+ :param start: Start datetime for filtering data.
1576
+ :param end: End datetime for filtering data.
1577
+ :param interval: Time interval string (e.g., '5min') for aggregation
1578
+
1579
+ :returns: list of tuples: (endpoint_id, timestamp, max_result_status)
1580
+ """
1581
+ if not raw_frames:
1582
+ return []
1583
+
1584
+ # Parse interval to get timedelta
1585
+ interval_td = pd.Timedelta(interval)
1586
+
1587
+ # Collect all data points from RawFrame objects
1588
+ data_points = []
1589
+ for frame in raw_frames:
1590
+ endpoint_id = frame.column_data(EventFieldType.ENDPOINT_ID)[0]
1591
+ result_statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
1592
+ timestamps = frame.indices()[0].times
1593
+
1594
+ # Combine data from this frame
1595
+ for i, (status, timestamp) in enumerate(zip(result_statuses, timestamps)):
1596
+ # V3IO TSDB returns timestamps in nanoseconds
1597
+ timestamp_dt = pd.Timestamp(
1598
+ timestamp, unit="ns", tzinfo=timezone.utc
1599
+ ).to_pydatetime()
1600
+
1601
+ # Filter by time window
1602
+ if start <= timestamp_dt < end:
1603
+ data_points.append((endpoint_id, timestamp_dt, status))
1604
+
1605
+ if not data_points:
1606
+ return []
1607
+
1608
+ # Group by endpoint_id and time intervals, then find max status
1609
+ # Create time buckets aligned to start
1610
+ grouped_data = {}
1611
+ for endpoint_id, timestamp, status in data_points:
1612
+ # Calculate which interval bucket this timestamp falls into
1613
+ time_diff = timestamp - start
1614
+ bucket_index = int(time_diff / interval_td)
1615
+ bucket_start = start + (bucket_index * interval_td)
1616
+
1617
+ key = (endpoint_id, bucket_start)
1618
+ if key not in grouped_data:
1619
+ grouped_data[key] = status
1620
+ else:
1621
+ # Keep the maximum status value
1622
+ grouped_data[key] = max(grouped_data[key], status)
1623
+
1624
+ # Convert to list of tuples
1625
+ result = [
1626
+ (endpoint_id, timestamp, max_status)
1627
+ for (endpoint_id, timestamp), max_status in grouped_data.items()
1572
1628
  ]
1573
- out = (
1574
- window.groupby(
1575
- [
1576
- EventFieldType.ENDPOINT_ID,
1577
- pd.Grouper(
1578
- freq=interval, origin=start, label="left", closed="left"
1579
- ),
1580
- ]
1581
- # align to start, [start, end) intervals
1582
- )[mm_schemas.ResultData.RESULT_STATUS]
1583
- .max()
1584
- .reset_index()
1585
- .rename(
1586
- columns={
1587
- mm_schemas.ResultData.RESULT_STATUS: f"max({mm_schemas.ResultData.RESULT_STATUS})"
1629
+
1630
+ return result
1631
+
1632
+ @staticmethod
1633
+ def _convert_drift_data_to_values(
1634
+ aggregated_data: list[tuple[str, datetime, float]],
1635
+ ) -> mm_schemas.ModelEndpointDriftValues:
1636
+ """
1637
+ Convert aggregated drift data to ModelEndpointDriftValues format.
1638
+
1639
+ :param aggregated_data: List of tuples (endpoint_id, timestamp, max_result_status)
1640
+ :return: ModelEndpointDriftValues with counts of suspected and detected per timestamp
1641
+ """
1642
+ suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
1643
+ detected_val = mm_schemas.constants.ResultStatusApp.detected.value
1644
+
1645
+ # Group by timestamp and result status, then count occurrences
1646
+ timestamp_status_counts = {}
1647
+ for _, timestamp, max_status in aggregated_data:
1648
+ key = (timestamp, max_status)
1649
+ timestamp_status_counts[key] = timestamp_status_counts.get(key, 0) + 1
1650
+
1651
+ # Organize by timestamp with counts for suspected and detected
1652
+ timestamp_counts = {}
1653
+ for (timestamp, status), count in timestamp_status_counts.items():
1654
+ if timestamp not in timestamp_counts:
1655
+ timestamp_counts[timestamp] = {
1656
+ "count_suspected": 0,
1657
+ "count_detected": 0,
1588
1658
  }
1589
- )
1590
- )
1591
- return out.rename(
1592
- columns={"time": "_wstart"}
1593
- ) # rename datetime column to _wstart to align with the tdengine result
1659
+
1660
+ if status == suspected_val:
1661
+ timestamp_counts[timestamp]["count_suspected"] = count
1662
+ elif status == detected_val:
1663
+ timestamp_counts[timestamp]["count_detected"] = count
1664
+
1665
+ # Convert to the expected format: list of (timestamp, count_suspected, count_detected)
1666
+ values = [
1667
+ (timestamp, counts["count_suspected"], counts["count_detected"])
1668
+ for timestamp, counts in sorted(timestamp_counts.items())
1669
+ ]
1670
+
1671
+ return mm_schemas.ModelEndpointDriftValues(values=values)
mlrun/projects/project.py CHANGED
@@ -45,6 +45,7 @@ import mlrun.common.runtimes.constants
45
45
  import mlrun.common.schemas.alert
46
46
  import mlrun.common.schemas.artifact
47
47
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
48
+ import mlrun.common.secrets
48
49
  import mlrun.datastore.datastore_profile
49
50
  import mlrun.db
50
51
  import mlrun.errors
@@ -3418,7 +3419,12 @@ class MlrunProject(ModelObj):
3418
3419
  self._initialized = True
3419
3420
  return self.spec._function_objects
3420
3421
 
3421
- def with_secrets(self, kind, source, prefix=""):
3422
+ def with_secrets(
3423
+ self,
3424
+ kind,
3425
+ source,
3426
+ prefix="",
3427
+ ):
3422
3428
  """register a secrets source (file, env or dict)
3423
3429
 
3424
3430
  read secrets from a source provider to be used in workflows, example::
@@ -3440,12 +3446,19 @@ class MlrunProject(ModelObj):
3440
3446
 
3441
3447
  This will enable access to all secrets in vault registered to the current project.
3442
3448
 
3443
- :param kind: secret type (file, inline, env, vault)
3449
+ :param kind: secret type (file, inline, env, vault, azure_vault)
3444
3450
  :param source: secret data or link (see example)
3445
3451
  :param prefix: add a prefix to the keys in this source
3446
3452
 
3447
3453
  :returns: project object
3448
3454
  """
3455
+ # Block using mlrun-auth-secrets.* via azure_vault's k8s_secret param (client-side only)
3456
+ if kind == "azure_vault" and isinstance(source, dict):
3457
+ candidate_secret_name = (source.get("k8s_secret") or "").strip()
3458
+ if candidate_secret_name:
3459
+ mlrun.common.secrets.validate_not_forbidden_secret(
3460
+ candidate_secret_name
3461
+ )
3449
3462
 
3450
3463
  if kind == "vault" and isinstance(source, list):
3451
3464
  source = {"project": self.metadata.name, "secrets": source}
mlrun/run.py CHANGED
@@ -118,7 +118,25 @@ def function_to_module(code="", workdir=None, secrets=None, silent=False):
118
118
  raise ValueError("nothing to run, specify command or function")
119
119
 
120
120
  command = os.path.join(workdir or "", command)
121
- mod_name = mlrun.utils.helpers.get_module_name_from_path(command)
121
+
122
+ source_file_path_object, working_dir_path_object = (
123
+ mlrun.utils.helpers.get_source_and_working_dir_paths(command)
124
+ )
125
+ if source_file_path_object.is_relative_to(working_dir_path_object):
126
+ mod_name = mlrun.utils.helpers.get_relative_module_name_from_path(
127
+ source_file_path_object, working_dir_path_object
128
+ )
129
+ elif source_file_path_object.is_relative_to(
130
+ pathlib.Path(tempfile.gettempdir()).resolve()
131
+ ):
132
+ mod_name = Path(command).stem
133
+ else:
134
+ raise mlrun.errors.MLRunRuntimeError(
135
+ f"Cannot run source file '{command}': it must be located either under the current working "
136
+ f"directory ('{working_dir_path_object}') or the system temporary directory ('{tempfile.gettempdir()}'). "
137
+ f"This is required when running with local=True."
138
+ )
139
+
122
140
  spec = imputil.spec_from_file_location(mod_name, command)
123
141
  if spec is None:
124
142
  raise OSError(f"cannot import from {command!r}")
@@ -537,6 +555,7 @@ def new_function(
537
555
 
538
556
  # make sure function name is valid
539
557
  name = mlrun.utils.helpers.normalize_name(name)
558
+ mlrun.utils.helpers.validate_function_name(name)
540
559
 
541
560
  runner.metadata.name = name
542
561
  runner.metadata.project = (
@@ -576,6 +595,7 @@ def new_function(
576
595
  )
577
596
 
578
597
  runner.prepare_image_for_deploy()
598
+
579
599
  return runner
580
600
 
581
601
 
@@ -780,6 +800,9 @@ def code_to_function(
780
800
  kind=sub_kind,
781
801
  ignored_tags=ignored_tags,
782
802
  )
803
+
804
+ mlrun.utils.helpers.validate_function_name(name)
805
+
783
806
  spec["spec"]["env"].append(
784
807
  {
785
808
  "name": "MLRUN_HTTPDB__NUCLIO__EXPLICIT_ACK",
@@ -832,6 +855,7 @@ def code_to_function(
832
855
  runtime.spec.build.code_origin = code_origin
833
856
  runtime.spec.build.origin_filename = filename or (name + ".ipynb")
834
857
  update_common(runtime, spec)
858
+
835
859
  return runtime
836
860
 
837
861
  if kind is None or kind in ["", "Function"]:
@@ -845,6 +869,7 @@ def code_to_function(
845
869
 
846
870
  if not name:
847
871
  raise ValueError("name must be specified")
872
+
848
873
  h = get_in(spec, "spec.handler", "").split(":")
849
874
  runtime.handler = h[0] if len(h) <= 1 else h[1]
850
875
  runtime.metadata = get_in(spec, "spec.metadata")