mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (50) hide show
  1. mlrun/artifacts/llm_prompt.py +8 -1
  2. mlrun/common/model_monitoring/helpers.py +86 -0
  3. mlrun/common/schemas/hub.py +25 -18
  4. mlrun/common/schemas/model_monitoring/constants.py +1 -0
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
  6. mlrun/config.py +2 -3
  7. mlrun/datastore/__init__.py +2 -2
  8. mlrun/datastore/azure_blob.py +66 -43
  9. mlrun/datastore/datastore_profile.py +35 -5
  10. mlrun/datastore/model_provider/huggingface_provider.py +122 -30
  11. mlrun/datastore/model_provider/model_provider.py +62 -4
  12. mlrun/datastore/model_provider/openai_provider.py +114 -43
  13. mlrun/datastore/s3.py +24 -2
  14. mlrun/datastore/storeytargets.py +2 -3
  15. mlrun/db/base.py +15 -1
  16. mlrun/db/httpdb.py +17 -6
  17. mlrun/db/nopdb.py +14 -0
  18. mlrun/k8s_utils.py +0 -14
  19. mlrun/model_monitoring/api.py +2 -2
  20. mlrun/model_monitoring/applications/base.py +37 -10
  21. mlrun/model_monitoring/applications/context.py +1 -4
  22. mlrun/model_monitoring/controller.py +15 -5
  23. mlrun/model_monitoring/db/_schedules.py +2 -4
  24. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -1
  25. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  26. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -0
  27. mlrun/model_monitoring/helpers.py +5 -5
  28. mlrun/platforms/iguazio.py +7 -3
  29. mlrun/projects/project.py +33 -29
  30. mlrun/runtimes/base.py +0 -3
  31. mlrun/runtimes/mounts.py +15 -2
  32. mlrun/runtimes/nuclio/__init__.py +1 -0
  33. mlrun/runtimes/nuclio/application/application.py +11 -2
  34. mlrun/runtimes/nuclio/function.py +10 -0
  35. mlrun/runtimes/nuclio/serving.py +4 -0
  36. mlrun/runtimes/pod.py +153 -11
  37. mlrun/runtimes/utils.py +22 -5
  38. mlrun/serving/routers.py +23 -41
  39. mlrun/serving/server.py +26 -14
  40. mlrun/serving/states.py +3 -3
  41. mlrun/serving/system_steps.py +52 -29
  42. mlrun/serving/v2_serving.py +9 -10
  43. mlrun/utils/helpers.py +5 -2
  44. mlrun/utils/version/version.json +2 -2
  45. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/METADATA +24 -23
  46. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/RECORD +50 -50
  47. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/WHEEL +0 -0
  48. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/entry_points.txt +0 -0
  49. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/licenses/LICENSE +0 -0
  50. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/top_level.txt +0 -0
mlrun/db/base.py CHANGED
@@ -722,7 +722,7 @@ class RunDBInterface(ABC):
722
722
  tsdb_metrics: bool = False,
723
723
  metric_list: Optional[list[str]] = None,
724
724
  top_level: bool = False,
725
- mode: Optional[mlrun.common.schemas.EndpointMode] = None,
725
+ modes: Optional[list[mm_constants.EndpointMode]] = None,
726
726
  uids: Optional[list[str]] = None,
727
727
  latest_only: bool = False,
728
728
  ) -> mlrun.common.schemas.ModelEndpointList:
@@ -792,6 +792,7 @@ class RunDBInterface(ABC):
792
792
  version: Optional[str] = None,
793
793
  tag: Optional[str] = None,
794
794
  force_refresh: bool = False,
795
+ object_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
795
796
  ):
796
797
  pass
797
798
 
@@ -803,6 +804,19 @@ class RunDBInterface(ABC):
803
804
  version: Optional[str] = None,
804
805
  tag: str = "latest",
805
806
  force_refresh: bool = False,
807
+ item_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
808
+ ):
809
+ pass
810
+
811
+ @abstractmethod
812
+ def get_hub_asset(
813
+ self,
814
+ source_name: str,
815
+ item_name: str,
816
+ asset_name: str,
817
+ version: Optional[str] = None,
818
+ tag: str = "latest",
819
+ item_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
806
820
  ):
807
821
  pass
808
822
 
mlrun/db/httpdb.py CHANGED
@@ -45,6 +45,7 @@ import mlrun.runtimes.nuclio.api_gateway
45
45
  import mlrun.runtimes.nuclio.function
46
46
  import mlrun.utils
47
47
  from mlrun.alerts.alert import AlertConfig
48
+ from mlrun.common.schemas.hub import HubSourceType
48
49
  from mlrun.db.auth_utils import OAuthClientIDTokenProvider, StaticTokenProvider
49
50
  from mlrun.errors import MLRunInvalidArgumentError, err_to_str
50
51
  from mlrun.secrets import get_secret_or_env
@@ -3770,7 +3771,7 @@ class HTTPRunDB(RunDBInterface):
3770
3771
  tsdb_metrics: bool = False,
3771
3772
  metric_list: Optional[list[str]] = None,
3772
3773
  top_level: bool = False,
3773
- mode: mm_constants.EndpointMode = None,
3774
+ modes: Optional[list[mm_constants.EndpointMode]] = None,
3774
3775
  uids: Optional[list[str]] = None,
3775
3776
  latest_only: bool = False,
3776
3777
  ) -> mlrun.common.schemas.ModelEndpointList:
@@ -3791,8 +3792,8 @@ class HTTPRunDB(RunDBInterface):
3791
3792
  If tsdb_metrics=False, this parameter will be ignored and no tsdb metrics
3792
3793
  will be included.
3793
3794
  :param top_level: Whether to return only top level model endpoints.
3794
- :param mode: Specifies the mode of the model endpoint. Can be "real-time" (0), "batch" (1), or
3795
- both if set to None.
3795
+ :param modes: Specifies the modes of the model endpoints. Can be "real-time" (0), "batch" (1),
3796
+ "batch_legacy" (2). If set to None, all are included.
3796
3797
  :param uids: A list of unique ids to filter by.
3797
3798
  :param latest_only: Whether to return only the latest model endpoint version.
3798
3799
  :return: A list of model endpoints.
@@ -3801,6 +3802,8 @@ class HTTPRunDB(RunDBInterface):
3801
3802
  labels = self._parse_labels(labels)
3802
3803
  if names and isinstance(names, str):
3803
3804
  names = [names]
3805
+ if isinstance(modes, mm_constants.EndpointMode):
3806
+ modes = [modes]
3804
3807
  response = self.api_call(
3805
3808
  method=mlrun.common.types.HTTPMethod.GET,
3806
3809
  path=path,
@@ -3816,7 +3819,7 @@ class HTTPRunDB(RunDBInterface):
3816
3819
  "tsdb-metrics": tsdb_metrics,
3817
3820
  "metric": metric_list,
3818
3821
  "top-level": top_level,
3819
- "mode": mode,
3822
+ "mode": modes,
3820
3823
  "uid": uids,
3821
3824
  "latest-only": latest_only,
3822
3825
  },
@@ -4079,7 +4082,7 @@ class HTTPRunDB(RunDBInterface):
4079
4082
  response = self.api_call(
4080
4083
  method=mlrun.common.types.HTTPMethod.DELETE,
4081
4084
  path=f"projects/{project}/model-monitoring/functions",
4082
- params={"functions": functions},
4085
+ params={"function": functions},
4083
4086
  )
4084
4087
  deletion_failed = False
4085
4088
  if response.status_code == http.HTTPStatus.ACCEPTED:
@@ -4359,6 +4362,7 @@ class HTTPRunDB(RunDBInterface):
4359
4362
  version: Optional[str] = None,
4360
4363
  tag: Optional[str] = None,
4361
4364
  force_refresh: bool = False,
4365
+ object_type: HubSourceType = HubSourceType.functions,
4362
4366
  ):
4363
4367
  """
4364
4368
  Retrieve the item catalog for a specified hub source.
@@ -4371,6 +4375,7 @@ class HTTPRunDB(RunDBInterface):
4371
4375
  rather than rely on cached information which may exist from previous get requests. For example,
4372
4376
  if the source was re-built,
4373
4377
  this will make the server get the updated information. Default is ``False``.
4378
+ :param object_type: Type of object to retrieve from the hub source (e.g: functions, modules).
4374
4379
  :returns: :py:class:`~mlrun.common.schemas.hub.HubCatalog` object, which is essentially a list
4375
4380
  of :py:class:`~mlrun.common.schemas.hub.HubItem` entries.
4376
4381
  """
@@ -4379,6 +4384,7 @@ class HTTPRunDB(RunDBInterface):
4379
4384
  "version": version,
4380
4385
  "tag": tag,
4381
4386
  "force-refresh": force_refresh,
4387
+ "object_type": object_type,
4382
4388
  }
4383
4389
  response = self.api_call(method="GET", path=path, params=params)
4384
4390
  return mlrun.common.schemas.HubCatalog(**response.json())
@@ -4390,6 +4396,7 @@ class HTTPRunDB(RunDBInterface):
4390
4396
  version: Optional[str] = None,
4391
4397
  tag: str = "latest",
4392
4398
  force_refresh: bool = False,
4399
+ item_type: HubSourceType = HubSourceType.functions,
4393
4400
  ):
4394
4401
  """
4395
4402
  Retrieve a specific hub item.
@@ -4401,6 +4408,7 @@ class HTTPRunDB(RunDBInterface):
4401
4408
  :param force_refresh: Make the server fetch the information from the actual hub
4402
4409
  source, rather than
4403
4410
  rely on cached information. Default is ``False``.
4411
+ :param item_type: The type of item to retrieve from the hub source (e.g: functions, modules).
4404
4412
  :returns: :py:class:`~mlrun.common.schemas.hub.HubItem`.
4405
4413
  """
4406
4414
  path = (f"hub/sources/{source_name}/items/{item_name}",)
@@ -4408,6 +4416,7 @@ class HTTPRunDB(RunDBInterface):
4408
4416
  "version": version,
4409
4417
  "tag": tag,
4410
4418
  "force-refresh": force_refresh,
4419
+ "item_type": item_type,
4411
4420
  }
4412
4421
  response = self.api_call(method="GET", path=path, params=params)
4413
4422
  return mlrun.common.schemas.HubItem(**response.json())
@@ -4419,6 +4428,7 @@ class HTTPRunDB(RunDBInterface):
4419
4428
  asset_name: str,
4420
4429
  version: Optional[str] = None,
4421
4430
  tag: str = "latest",
4431
+ item_type: HubSourceType = HubSourceType.functions,
4422
4432
  ):
4423
4433
  """
4424
4434
  Get hub asset from item.
@@ -4428,13 +4438,14 @@ class HTTPRunDB(RunDBInterface):
4428
4438
  :param asset_name: Name of the asset to retrieve.
4429
4439
  :param version: Get a specific version of the item. Default is ``None``.
4430
4440
  :param tag: Get a specific version of the item identified by tag. Default is ``latest``.
4431
-
4441
+ :param item_type: The type of item to retrieve from the hub source (e.g: functions, modules).
4432
4442
  :returns: http response with the asset in the content attribute
4433
4443
  """
4434
4444
  path = f"hub/sources/{source_name}/items/{item_name}/assets/{asset_name}"
4435
4445
  params = {
4436
4446
  "version": version,
4437
4447
  "tag": tag,
4448
+ "item_type": item_type,
4438
4449
  }
4439
4450
  response = self.api_call(method="GET", path=path, params=params)
4440
4451
  return response
mlrun/db/nopdb.py CHANGED
@@ -626,6 +626,7 @@ class NopDB(RunDBInterface):
626
626
  tsdb_metrics: bool = False,
627
627
  metric_list: Optional[list[str]] = None,
628
628
  top_level: bool = False,
629
+ modes: Optional[list[mm_constants.EndpointMode]] = None,
629
630
  uids: Optional[list[str]] = None,
630
631
  latest_only: bool = False,
631
632
  ) -> mlrun.common.schemas.ModelEndpointList:
@@ -688,6 +689,7 @@ class NopDB(RunDBInterface):
688
689
  version: Optional[str] = None,
689
690
  tag: Optional[str] = None,
690
691
  force_refresh: bool = False,
692
+ object_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
691
693
  ):
692
694
  pass
693
695
 
@@ -699,6 +701,18 @@ class NopDB(RunDBInterface):
699
701
  version: Optional[str] = None,
700
702
  tag: str = "latest",
701
703
  force_refresh: bool = False,
704
+ item_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
705
+ ):
706
+ pass
707
+
708
+ def get_hub_asset(
709
+ self,
710
+ source_name: str,
711
+ item_name: str,
712
+ asset_name: str,
713
+ version: Optional[str] = None,
714
+ tag: str = "latest",
715
+ item_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
702
716
  ):
703
717
  pass
704
718
 
mlrun/k8s_utils.py CHANGED
@@ -470,20 +470,6 @@ def _handle_allow_mode(
470
470
  list[kubernetes.client.V1Toleration],
471
471
  typing.Optional[kubernetes.client.V1Affinity],
472
472
  ]:
473
- for op in [
474
- mlrun.common.schemas.NodeSelectorOperator.node_selector_op_not_in.value,
475
- mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value,
476
- ]:
477
- affinity = _prune_affinity_node_selector_requirement(
478
- generate_preemptible_node_selector_requirements(op),
479
- affinity=affinity,
480
- )
481
-
482
- node_selector = _prune_node_selector(
483
- mlconfig.get_preemptible_node_selector(),
484
- enriched_node_selector=node_selector,
485
- )
486
-
487
473
  tolerations = _merge_tolerations(tolerations, preemptible_tolerations)
488
474
  return node_selector, tolerations, affinity
489
475
 
@@ -350,8 +350,8 @@ def _generate_model_endpoint(
350
350
  project=project,
351
351
  name=model_endpoint_name,
352
352
  endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
353
- # Due to backwards compatibility, old batch model endpoint will be analyzed as real time endpoint
354
- mode=mlrun.common.schemas.model_monitoring.EndpointMode.REAL_TIME,
353
+ # Due to backwards compatibility, this endpoint will be created as a legacy batch endpoint.
354
+ mode=mlrun.common.schemas.model_monitoring.EndpointMode.BATCH_LEGACY,
355
355
  ),
356
356
  spec=mlrun.common.schemas.ModelEndpointSpec(
357
357
  function_name=function_name or "function",
@@ -18,7 +18,7 @@ from abc import ABC, abstractmethod
18
18
  from collections import defaultdict
19
19
  from collections.abc import Iterator
20
20
  from contextlib import contextmanager, nullcontext
21
- from datetime import datetime, timedelta
21
+ from datetime import datetime, timedelta, timezone
22
22
  from typing import Any, Literal, Optional, Union, cast
23
23
 
24
24
  import pandas as pd
@@ -347,6 +347,21 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
347
347
  feature_stats=feature_stats,
348
348
  )
349
349
  )
350
+
351
+ if (
352
+ monitoring_context.endpoint_id
353
+ and monitoring_context.sample_df.empty
354
+ ):
355
+ # The current sample is empty
356
+ context.logger.warning(
357
+ "No sample data available for tracking",
358
+ application_name=application_name,
359
+ endpoint_id=monitoring_context.endpoint_id,
360
+ start_time=monitoring_context.start_infer_time,
361
+ end_time=monitoring_context.end_infer_time,
362
+ )
363
+ return
364
+
350
365
  result = self.do_tracking(monitoring_context)
351
366
  endpoints_output[monitoring_context.endpoint_id].append(
352
367
  (monitoring_context, result)
@@ -591,6 +606,16 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
591
606
  start_dt = datetime.fromisoformat(start)
592
607
  end_dt = datetime.fromisoformat(end)
593
608
 
609
+ # If `start_dt` and `end_dt` do not include time zone information - change them to UTC
610
+ if (start_dt.tzinfo is None) and (end_dt.tzinfo is None):
611
+ start_dt = start_dt.replace(tzinfo=timezone.utc)
612
+ end_dt = end_dt.replace(tzinfo=timezone.utc)
613
+ elif (start_dt.tzinfo is None) or (end_dt.tzinfo is None):
614
+ raise mlrun.errors.MLRunValueError(
615
+ "The start and end times must either both include time zone information or both be naive (no time "
616
+ f"zone). Asserting the above failed, aborting the evaluate request: start={start}, end={end}."
617
+ )
618
+
594
619
  if existing_data_handling != ExistingDataHandling.delete_all:
595
620
  start_dt = cls._validate_monotonically_increasing_data(
596
621
  application_schedules=application_schedules,
@@ -841,7 +866,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
841
866
  :py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
842
867
  model monitoring logic as a :py:class:`~mlrun.runtimes.KubejobRuntime`, which is an MLRun function.
843
868
 
844
- This function has default values for all of its arguments. You should be change them when you want to pass
869
+ This function has default values for all of its arguments. You should change them when you want to pass
845
870
  data to the application.
846
871
 
847
872
  :param func_path: The path to the function. If ``None``, the current notebook is used.
@@ -858,6 +883,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
858
883
  :param reference_data: Pandas data-frame or :py:class:`~mlrun.artifacts.dataset.DatasetArtifact` URI as
859
884
  the reference dataset.
860
885
  When set, its statistics override the model endpoint's feature statistics.
886
+ You do not need to have a model endpoint to use this option.
861
887
  :param image: Docker image to run the job on (when running remotely).
862
888
  :param with_repo: Whether to clone the current repo to the build source.
863
889
  :param class_handler: The relative path to the class, useful when using Git sources or code from images.
@@ -878,8 +904,9 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
878
904
  :param start: The start time of the endpoint's data, not included.
879
905
  If you want the model endpoint's data at ``start`` included, you need to subtract a
880
906
  small ``datetime.timedelta`` from it.
881
- Make sure to include the time zone when constructing `datetime.datetime` objects
882
- manually.
907
+ Make sure to include the time zone when constructing ``datetime.datetime`` objects
908
+ manually. When both ``start`` and ``end`` times do not include a time zone, they will
909
+ be treated as UTC.
883
910
  :param end: The end time of the endpoint's data, included.
884
911
  Please note: when ``start`` and ``end`` are set, they create a left-open time interval
885
912
  ("window") :math:`(\\operatorname{start}, \\operatorname{end}]` that excludes the
@@ -902,13 +929,13 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
902
929
  if ``endpoints`` are passed.
903
930
  Note: the model monitoring infrastructure must be up for the writing to work.
904
931
  :param existing_data_handling:
905
- How to handle the existing application data for the model endpoints when writing the
906
- new data. Relevant only when ``write_output=True``. The default is
907
- ``"fail_on_overlap"``. The options are:
932
+ How to handle the existing application data for the model endpoints when writing
933
+ new data whose requested ``start`` time precedes the ``end`` time of a previous run
934
+ that also wrote to the database. Relevant only when ``write_output=True``.
935
+ The options are:
908
936
 
909
- - ``"fail_on_overlap"``: when the requested ``start`` time precedes the
910
- ``end`` time of a previous run that also wrote to the database - an error is raised.
911
- - ``"skip_overlap"``: when the previously described situation occurs, the relevant
937
+ - ``"fail_on_overlap"``: Default. An error is raised.
938
+ - ``"skip_overlap"``: the overlapping data is ignored and the
912
939
  time window is cut so that it starts at the earliest possible time after ``start``.
913
940
  - ``"delete_all"``: delete all the data that was written by the application to the
914
941
  model endpoints, regardless of the time window, and write the new data.
@@ -24,15 +24,12 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
24
24
  import mlrun.errors
25
25
  import mlrun.feature_store as fstore
26
26
  import mlrun.feature_store.feature_set as fs
27
- import mlrun.features
28
27
  import mlrun.serving
29
28
  import mlrun.utils
30
29
  from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
31
30
  from mlrun.common.model_monitoring.helpers import FeatureStats
32
31
  from mlrun.common.schemas import ModelEndpoint
33
- from mlrun.model_monitoring.helpers import (
34
- calculate_inputs_statistics,
35
- )
32
+ from mlrun.model_monitoring.helpers import calculate_inputs_statistics
36
33
 
37
34
 
38
35
  class _ArtifactsLogger(Protocol):
@@ -801,8 +801,17 @@ class MonitoringApplicationController:
801
801
  logger.info("Starting monitoring controller chief")
802
802
  applications_names = []
803
803
  endpoints = self.project_obj.list_model_endpoints(
804
- tsdb_metrics=False, mode=mm_constants.EndpointMode.REAL_TIME
804
+ tsdb_metrics=False,
805
+ modes=[
806
+ mm_constants.EndpointMode.REAL_TIME,
807
+ mm_constants.EndpointMode.BATCH_LEGACY,
808
+ ],
805
809
  ).endpoints
810
+
811
+ if not endpoints:
812
+ logger.info("No model endpoints found", project=self.project)
813
+ return
814
+
806
815
  last_request_dict = self.tsdb_connector.get_last_request(
807
816
  endpoint_ids=[mep.metadata.uid for mep in endpoints]
808
817
  )
@@ -811,9 +820,6 @@ class MonitoringApplicationController:
811
820
  mm_constants.EventFieldType.ENDPOINT_ID
812
821
  )[mm_constants.ModelEndpointSchema.LAST_REQUEST].to_dict()
813
822
 
814
- if not endpoints:
815
- logger.info("No model endpoints found", project=self.project)
816
- return
817
823
  monitoring_functions = self.project_obj.list_model_monitoring_functions()
818
824
  if monitoring_functions:
819
825
  # if monitoring_functions: - TODO : ML-7700
@@ -859,7 +865,11 @@ class MonitoringApplicationController:
859
865
  for endpoint in endpoints:
860
866
  last_request = last_request_dict.get(endpoint.metadata.uid, None)
861
867
  if isinstance(last_request, float):
862
- last_request = pd.to_datetime(last_request, unit="ms", utc=True)
868
+ last_request = datetime.datetime.fromtimestamp(
869
+ last_request, tz=datetime.timezone.utc
870
+ )
871
+ elif isinstance(last_request, pd.Timestamp):
872
+ last_request = last_request.to_pydatetime()
863
873
  endpoint.status.last_request = (
864
874
  last_request or endpoint.status.last_request
865
875
  )
@@ -16,7 +16,7 @@ import json
16
16
  import sys
17
17
  from abc import ABC, abstractmethod
18
18
  from contextlib import AbstractContextManager
19
- from datetime import datetime, timezone
19
+ from datetime import datetime
20
20
  from types import TracebackType
21
21
  from typing import TYPE_CHECKING, Final, Optional
22
22
 
@@ -281,9 +281,7 @@ class ModelMonitoringSchedulesFileApplication(ModelMonitoringSchedulesFileBase):
281
281
  self, endpoint_uid: str, last_analyzed: datetime
282
282
  ) -> None:
283
283
  self._check_open_schedules()
284
- self._schedules[endpoint_uid] = last_analyzed.astimezone(
285
- timezone.utc
286
- ).isoformat()
284
+ self._schedules[endpoint_uid] = last_analyzed.isoformat()
287
285
 
288
286
  def delete_endpoints_last_analyzed(self, endpoint_uids: list[str]) -> None:
289
287
  self._check_open_schedules()
@@ -721,7 +721,9 @@ class TDEngineConnector(TSDBConnector):
721
721
  endpoint_ids: Union[str, list[str]],
722
722
  start: Optional[datetime] = None,
723
723
  end: Optional[datetime] = None,
724
- ) -> pd.DataFrame:
724
+ ) -> Union[pd.DataFrame, dict[str, float]]:
725
+ if not endpoint_ids:
726
+ return {}
725
727
  filter_query = self._generate_filter_query(
726
728
  filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
727
729
  filter_values=endpoint_ids,
@@ -25,10 +25,12 @@ from mlrun.utils import logger
25
25
 
26
26
  def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
27
27
  """
28
- Normalize user defined keys - input data to a model and its predictions,
29
- to a form V3IO frames tolerates.
28
+ Normalize user-defined keys (e.g., model input data and predictions) to a format V3IO Frames tolerates.
30
29
 
31
- The dictionary keys should conform to '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'.
30
+ - Keys must match regex: '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'
31
+ - Replace invalid characters (e.g., '-') with '_'.
32
+ - Prefix keys starting with digits with '_'.
33
+ - Flatten nested dictionaries using dot notation, while normalizing keys recursively.
32
34
  """
33
35
  prefix = "_"
34
36
 
@@ -38,7 +40,18 @@ def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
38
40
  return prefix + key
39
41
  return key
40
42
 
41
- return {norm_key(k): v for k, v in event.items()}
43
+ def flatten_dict(d: dict[str, Any], parent_key: str = "") -> dict[str, Any]:
44
+ items = {}
45
+ for k, v in d.items():
46
+ new_key = norm_key(k)
47
+ full_key = f"{parent_key}.{new_key}" if parent_key else new_key
48
+ if isinstance(v, dict):
49
+ items.update(flatten_dict(v, full_key))
50
+ else:
51
+ items[full_key] = v
52
+ return items
53
+
54
+ return flatten_dict(event)
42
55
 
43
56
 
44
57
  class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
@@ -973,6 +973,9 @@ class V3IOTSDBConnector(TSDBConnector):
973
973
  start: Optional[datetime] = None,
974
974
  end: Optional[datetime] = None,
975
975
  ) -> dict[str, float]:
976
+ if not endpoint_ids:
977
+ return {}
978
+
976
979
  # Get the last request timestamp for each endpoint from the KV table.
977
980
  # The result of the query is a list of dictionaries,
978
981
  # each dictionary contains the endpoint id and the last request timestamp.
@@ -143,7 +143,7 @@ def get_stream_path(
143
143
  return stream_uri.replace("v3io://", f"ds://{profile.name}")
144
144
 
145
145
  elif isinstance(
146
- profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
146
+ profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
147
147
  ):
148
148
  topic = mlrun.common.model_monitoring.helpers.get_kafka_topic(
149
149
  project=project, function_name=function_name
@@ -152,7 +152,7 @@ def get_stream_path(
152
152
  else:
153
153
  raise mlrun.errors.MLRunValueError(
154
154
  f"Received an unexpected stream profile type: {type(profile)}\n"
155
- "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
155
+ "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
156
156
  )
157
157
 
158
158
 
@@ -300,7 +300,7 @@ def _get_v3io_output_stream(
300
300
 
301
301
  def _get_kafka_output_stream(
302
302
  *,
303
- kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource,
303
+ kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream,
304
304
  project: str,
305
305
  function_name: str,
306
306
  mock: bool = False,
@@ -356,7 +356,7 @@ def get_output_stream(
356
356
  )
357
357
 
358
358
  elif isinstance(
359
- profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
359
+ profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
360
360
  ):
361
361
  return _get_kafka_output_stream(
362
362
  kafka_profile=profile,
@@ -368,7 +368,7 @@ def get_output_stream(
368
368
  else:
369
369
  raise mlrun.errors.MLRunValueError(
370
370
  f"Received an unexpected stream profile type: {type(profile)}\n"
371
- "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
371
+ "Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
372
372
  )
373
373
 
374
374
 
@@ -96,7 +96,11 @@ class OutputStream:
96
96
  if access_key:
97
97
  v3io_client_kwargs["access_key"] = access_key
98
98
 
99
- self._v3io_client = v3io.dataplane.Client(**v3io_client_kwargs)
99
+ if not mock:
100
+ self._v3io_client = v3io.dataplane.Client(**v3io_client_kwargs)
101
+ else:
102
+ self._v3io_client = None
103
+
100
104
  self._container, self._stream_path = split_path(stream_path)
101
105
  self._shards = shards
102
106
  self._retention_in_hours = retention_in_hours
@@ -105,7 +109,7 @@ class OutputStream:
105
109
  self._mock = mock
106
110
  self._mock_queue = []
107
111
 
108
- def create_stream(self):
112
+ def create_stream(self) -> None:
109
113
  # this import creates an import loop via the utils module, so putting it in execution path
110
114
  from mlrun.utils.helpers import logger
111
115
 
@@ -210,7 +214,7 @@ class KafkaOutputStream:
210
214
  self._initialized = False
211
215
 
212
216
  def _lazy_init(self):
213
- if self._initialized:
217
+ if self._initialized or self._mock:
214
218
  return
215
219
 
216
220
  import kafka