mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/llm_prompt.py +8 -1
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/hub.py +25 -18
- mlrun/common/schemas/model_monitoring/constants.py +1 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
- mlrun/config.py +2 -3
- mlrun/datastore/__init__.py +2 -2
- mlrun/datastore/azure_blob.py +66 -43
- mlrun/datastore/datastore_profile.py +35 -5
- mlrun/datastore/model_provider/huggingface_provider.py +122 -30
- mlrun/datastore/model_provider/model_provider.py +62 -4
- mlrun/datastore/model_provider/openai_provider.py +114 -43
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/db/base.py +15 -1
- mlrun/db/httpdb.py +17 -6
- mlrun/db/nopdb.py +14 -0
- mlrun/k8s_utils.py +0 -14
- mlrun/model_monitoring/api.py +2 -2
- mlrun/model_monitoring/applications/base.py +37 -10
- mlrun/model_monitoring/applications/context.py +1 -4
- mlrun/model_monitoring/controller.py +15 -5
- mlrun/model_monitoring/db/_schedules.py +2 -4
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -1
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -0
- mlrun/model_monitoring/helpers.py +5 -5
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/project.py +33 -29
- mlrun/runtimes/base.py +0 -3
- mlrun/runtimes/mounts.py +15 -2
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +11 -2
- mlrun/runtimes/nuclio/function.py +10 -0
- mlrun/runtimes/nuclio/serving.py +4 -0
- mlrun/runtimes/pod.py +153 -11
- mlrun/runtimes/utils.py +22 -5
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +26 -14
- mlrun/serving/states.py +3 -3
- mlrun/serving/system_steps.py +52 -29
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +5 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/METADATA +24 -23
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/RECORD +50 -50
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/top_level.txt +0 -0
mlrun/db/base.py
CHANGED
|
@@ -722,7 +722,7 @@ class RunDBInterface(ABC):
|
|
|
722
722
|
tsdb_metrics: bool = False,
|
|
723
723
|
metric_list: Optional[list[str]] = None,
|
|
724
724
|
top_level: bool = False,
|
|
725
|
-
|
|
725
|
+
modes: Optional[list[mm_constants.EndpointMode]] = None,
|
|
726
726
|
uids: Optional[list[str]] = None,
|
|
727
727
|
latest_only: bool = False,
|
|
728
728
|
) -> mlrun.common.schemas.ModelEndpointList:
|
|
@@ -792,6 +792,7 @@ class RunDBInterface(ABC):
|
|
|
792
792
|
version: Optional[str] = None,
|
|
793
793
|
tag: Optional[str] = None,
|
|
794
794
|
force_refresh: bool = False,
|
|
795
|
+
object_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
|
|
795
796
|
):
|
|
796
797
|
pass
|
|
797
798
|
|
|
@@ -803,6 +804,19 @@ class RunDBInterface(ABC):
|
|
|
803
804
|
version: Optional[str] = None,
|
|
804
805
|
tag: str = "latest",
|
|
805
806
|
force_refresh: bool = False,
|
|
807
|
+
item_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
|
|
808
|
+
):
|
|
809
|
+
pass
|
|
810
|
+
|
|
811
|
+
@abstractmethod
|
|
812
|
+
def get_hub_asset(
|
|
813
|
+
self,
|
|
814
|
+
source_name: str,
|
|
815
|
+
item_name: str,
|
|
816
|
+
asset_name: str,
|
|
817
|
+
version: Optional[str] = None,
|
|
818
|
+
tag: str = "latest",
|
|
819
|
+
item_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
|
|
806
820
|
):
|
|
807
821
|
pass
|
|
808
822
|
|
mlrun/db/httpdb.py
CHANGED
|
@@ -45,6 +45,7 @@ import mlrun.runtimes.nuclio.api_gateway
|
|
|
45
45
|
import mlrun.runtimes.nuclio.function
|
|
46
46
|
import mlrun.utils
|
|
47
47
|
from mlrun.alerts.alert import AlertConfig
|
|
48
|
+
from mlrun.common.schemas.hub import HubSourceType
|
|
48
49
|
from mlrun.db.auth_utils import OAuthClientIDTokenProvider, StaticTokenProvider
|
|
49
50
|
from mlrun.errors import MLRunInvalidArgumentError, err_to_str
|
|
50
51
|
from mlrun.secrets import get_secret_or_env
|
|
@@ -3770,7 +3771,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3770
3771
|
tsdb_metrics: bool = False,
|
|
3771
3772
|
metric_list: Optional[list[str]] = None,
|
|
3772
3773
|
top_level: bool = False,
|
|
3773
|
-
|
|
3774
|
+
modes: Optional[list[mm_constants.EndpointMode]] = None,
|
|
3774
3775
|
uids: Optional[list[str]] = None,
|
|
3775
3776
|
latest_only: bool = False,
|
|
3776
3777
|
) -> mlrun.common.schemas.ModelEndpointList:
|
|
@@ -3791,8 +3792,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3791
3792
|
If tsdb_metrics=False, this parameter will be ignored and no tsdb metrics
|
|
3792
3793
|
will be included.
|
|
3793
3794
|
:param top_level: Whether to return only top level model endpoints.
|
|
3794
|
-
:param
|
|
3795
|
-
|
|
3795
|
+
:param modes: Specifies the modes of the model endpoints. Can be "real-time" (0), "batch" (1),
|
|
3796
|
+
"batch_legacy" (2). If set to None, all are included.
|
|
3796
3797
|
:param uids: A list of unique ids to filter by.
|
|
3797
3798
|
:param latest_only: Whether to return only the latest model endpoint version.
|
|
3798
3799
|
:return: A list of model endpoints.
|
|
@@ -3801,6 +3802,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3801
3802
|
labels = self._parse_labels(labels)
|
|
3802
3803
|
if names and isinstance(names, str):
|
|
3803
3804
|
names = [names]
|
|
3805
|
+
if isinstance(modes, mm_constants.EndpointMode):
|
|
3806
|
+
modes = [modes]
|
|
3804
3807
|
response = self.api_call(
|
|
3805
3808
|
method=mlrun.common.types.HTTPMethod.GET,
|
|
3806
3809
|
path=path,
|
|
@@ -3816,7 +3819,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3816
3819
|
"tsdb-metrics": tsdb_metrics,
|
|
3817
3820
|
"metric": metric_list,
|
|
3818
3821
|
"top-level": top_level,
|
|
3819
|
-
"mode":
|
|
3822
|
+
"mode": modes,
|
|
3820
3823
|
"uid": uids,
|
|
3821
3824
|
"latest-only": latest_only,
|
|
3822
3825
|
},
|
|
@@ -4079,7 +4082,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4079
4082
|
response = self.api_call(
|
|
4080
4083
|
method=mlrun.common.types.HTTPMethod.DELETE,
|
|
4081
4084
|
path=f"projects/{project}/model-monitoring/functions",
|
|
4082
|
-
params={"
|
|
4085
|
+
params={"function": functions},
|
|
4083
4086
|
)
|
|
4084
4087
|
deletion_failed = False
|
|
4085
4088
|
if response.status_code == http.HTTPStatus.ACCEPTED:
|
|
@@ -4359,6 +4362,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4359
4362
|
version: Optional[str] = None,
|
|
4360
4363
|
tag: Optional[str] = None,
|
|
4361
4364
|
force_refresh: bool = False,
|
|
4365
|
+
object_type: HubSourceType = HubSourceType.functions,
|
|
4362
4366
|
):
|
|
4363
4367
|
"""
|
|
4364
4368
|
Retrieve the item catalog for a specified hub source.
|
|
@@ -4371,6 +4375,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4371
4375
|
rather than rely on cached information which may exist from previous get requests. For example,
|
|
4372
4376
|
if the source was re-built,
|
|
4373
4377
|
this will make the server get the updated information. Default is ``False``.
|
|
4378
|
+
:param object_type: Type of object to retrieve from the hub source (e.g: functions, modules).
|
|
4374
4379
|
:returns: :py:class:`~mlrun.common.schemas.hub.HubCatalog` object, which is essentially a list
|
|
4375
4380
|
of :py:class:`~mlrun.common.schemas.hub.HubItem` entries.
|
|
4376
4381
|
"""
|
|
@@ -4379,6 +4384,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4379
4384
|
"version": version,
|
|
4380
4385
|
"tag": tag,
|
|
4381
4386
|
"force-refresh": force_refresh,
|
|
4387
|
+
"object_type": object_type,
|
|
4382
4388
|
}
|
|
4383
4389
|
response = self.api_call(method="GET", path=path, params=params)
|
|
4384
4390
|
return mlrun.common.schemas.HubCatalog(**response.json())
|
|
@@ -4390,6 +4396,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4390
4396
|
version: Optional[str] = None,
|
|
4391
4397
|
tag: str = "latest",
|
|
4392
4398
|
force_refresh: bool = False,
|
|
4399
|
+
item_type: HubSourceType = HubSourceType.functions,
|
|
4393
4400
|
):
|
|
4394
4401
|
"""
|
|
4395
4402
|
Retrieve a specific hub item.
|
|
@@ -4401,6 +4408,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4401
4408
|
:param force_refresh: Make the server fetch the information from the actual hub
|
|
4402
4409
|
source, rather than
|
|
4403
4410
|
rely on cached information. Default is ``False``.
|
|
4411
|
+
:param item_type: The type of item to retrieve from the hub source (e.g: functions, modules).
|
|
4404
4412
|
:returns: :py:class:`~mlrun.common.schemas.hub.HubItem`.
|
|
4405
4413
|
"""
|
|
4406
4414
|
path = (f"hub/sources/{source_name}/items/{item_name}",)
|
|
@@ -4408,6 +4416,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4408
4416
|
"version": version,
|
|
4409
4417
|
"tag": tag,
|
|
4410
4418
|
"force-refresh": force_refresh,
|
|
4419
|
+
"item_type": item_type,
|
|
4411
4420
|
}
|
|
4412
4421
|
response = self.api_call(method="GET", path=path, params=params)
|
|
4413
4422
|
return mlrun.common.schemas.HubItem(**response.json())
|
|
@@ -4419,6 +4428,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4419
4428
|
asset_name: str,
|
|
4420
4429
|
version: Optional[str] = None,
|
|
4421
4430
|
tag: str = "latest",
|
|
4431
|
+
item_type: HubSourceType = HubSourceType.functions,
|
|
4422
4432
|
):
|
|
4423
4433
|
"""
|
|
4424
4434
|
Get hub asset from item.
|
|
@@ -4428,13 +4438,14 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4428
4438
|
:param asset_name: Name of the asset to retrieve.
|
|
4429
4439
|
:param version: Get a specific version of the item. Default is ``None``.
|
|
4430
4440
|
:param tag: Get a specific version of the item identified by tag. Default is ``latest``.
|
|
4431
|
-
|
|
4441
|
+
:param item_type: The type of item to retrieve from the hub source (e.g: functions, modules).
|
|
4432
4442
|
:returns: http response with the asset in the content attribute
|
|
4433
4443
|
"""
|
|
4434
4444
|
path = f"hub/sources/{source_name}/items/{item_name}/assets/{asset_name}"
|
|
4435
4445
|
params = {
|
|
4436
4446
|
"version": version,
|
|
4437
4447
|
"tag": tag,
|
|
4448
|
+
"item_type": item_type,
|
|
4438
4449
|
}
|
|
4439
4450
|
response = self.api_call(method="GET", path=path, params=params)
|
|
4440
4451
|
return response
|
mlrun/db/nopdb.py
CHANGED
|
@@ -626,6 +626,7 @@ class NopDB(RunDBInterface):
|
|
|
626
626
|
tsdb_metrics: bool = False,
|
|
627
627
|
metric_list: Optional[list[str]] = None,
|
|
628
628
|
top_level: bool = False,
|
|
629
|
+
modes: Optional[list[mm_constants.EndpointMode]] = None,
|
|
629
630
|
uids: Optional[list[str]] = None,
|
|
630
631
|
latest_only: bool = False,
|
|
631
632
|
) -> mlrun.common.schemas.ModelEndpointList:
|
|
@@ -688,6 +689,7 @@ class NopDB(RunDBInterface):
|
|
|
688
689
|
version: Optional[str] = None,
|
|
689
690
|
tag: Optional[str] = None,
|
|
690
691
|
force_refresh: bool = False,
|
|
692
|
+
object_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
|
|
691
693
|
):
|
|
692
694
|
pass
|
|
693
695
|
|
|
@@ -699,6 +701,18 @@ class NopDB(RunDBInterface):
|
|
|
699
701
|
version: Optional[str] = None,
|
|
700
702
|
tag: str = "latest",
|
|
701
703
|
force_refresh: bool = False,
|
|
704
|
+
item_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
|
|
705
|
+
):
|
|
706
|
+
pass
|
|
707
|
+
|
|
708
|
+
def get_hub_asset(
|
|
709
|
+
self,
|
|
710
|
+
source_name: str,
|
|
711
|
+
item_name: str,
|
|
712
|
+
asset_name: str,
|
|
713
|
+
version: Optional[str] = None,
|
|
714
|
+
tag: str = "latest",
|
|
715
|
+
item_type: mlrun.common.schemas.hub.HubSourceType = mlrun.common.schemas.hub.HubSourceType.functions,
|
|
702
716
|
):
|
|
703
717
|
pass
|
|
704
718
|
|
mlrun/k8s_utils.py
CHANGED
|
@@ -470,20 +470,6 @@ def _handle_allow_mode(
|
|
|
470
470
|
list[kubernetes.client.V1Toleration],
|
|
471
471
|
typing.Optional[kubernetes.client.V1Affinity],
|
|
472
472
|
]:
|
|
473
|
-
for op in [
|
|
474
|
-
mlrun.common.schemas.NodeSelectorOperator.node_selector_op_not_in.value,
|
|
475
|
-
mlrun.common.schemas.NodeSelectorOperator.node_selector_op_in.value,
|
|
476
|
-
]:
|
|
477
|
-
affinity = _prune_affinity_node_selector_requirement(
|
|
478
|
-
generate_preemptible_node_selector_requirements(op),
|
|
479
|
-
affinity=affinity,
|
|
480
|
-
)
|
|
481
|
-
|
|
482
|
-
node_selector = _prune_node_selector(
|
|
483
|
-
mlconfig.get_preemptible_node_selector(),
|
|
484
|
-
enriched_node_selector=node_selector,
|
|
485
|
-
)
|
|
486
|
-
|
|
487
473
|
tolerations = _merge_tolerations(tolerations, preemptible_tolerations)
|
|
488
474
|
return node_selector, tolerations, affinity
|
|
489
475
|
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -350,8 +350,8 @@ def _generate_model_endpoint(
|
|
|
350
350
|
project=project,
|
|
351
351
|
name=model_endpoint_name,
|
|
352
352
|
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
|
|
353
|
-
# Due to backwards compatibility,
|
|
354
|
-
mode=mlrun.common.schemas.model_monitoring.EndpointMode.
|
|
353
|
+
# Due to backwards compatibility, this endpoint will be created as a legacy batch endpoint.
|
|
354
|
+
mode=mlrun.common.schemas.model_monitoring.EndpointMode.BATCH_LEGACY,
|
|
355
355
|
),
|
|
356
356
|
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
357
357
|
function_name=function_name or "function",
|
|
@@ -18,7 +18,7 @@ from abc import ABC, abstractmethod
|
|
|
18
18
|
from collections import defaultdict
|
|
19
19
|
from collections.abc import Iterator
|
|
20
20
|
from contextlib import contextmanager, nullcontext
|
|
21
|
-
from datetime import datetime, timedelta
|
|
21
|
+
from datetime import datetime, timedelta, timezone
|
|
22
22
|
from typing import Any, Literal, Optional, Union, cast
|
|
23
23
|
|
|
24
24
|
import pandas as pd
|
|
@@ -347,6 +347,21 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
347
347
|
feature_stats=feature_stats,
|
|
348
348
|
)
|
|
349
349
|
)
|
|
350
|
+
|
|
351
|
+
if (
|
|
352
|
+
monitoring_context.endpoint_id
|
|
353
|
+
and monitoring_context.sample_df.empty
|
|
354
|
+
):
|
|
355
|
+
# The current sample is empty
|
|
356
|
+
context.logger.warning(
|
|
357
|
+
"No sample data available for tracking",
|
|
358
|
+
application_name=application_name,
|
|
359
|
+
endpoint_id=monitoring_context.endpoint_id,
|
|
360
|
+
start_time=monitoring_context.start_infer_time,
|
|
361
|
+
end_time=monitoring_context.end_infer_time,
|
|
362
|
+
)
|
|
363
|
+
return
|
|
364
|
+
|
|
350
365
|
result = self.do_tracking(monitoring_context)
|
|
351
366
|
endpoints_output[monitoring_context.endpoint_id].append(
|
|
352
367
|
(monitoring_context, result)
|
|
@@ -591,6 +606,16 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
591
606
|
start_dt = datetime.fromisoformat(start)
|
|
592
607
|
end_dt = datetime.fromisoformat(end)
|
|
593
608
|
|
|
609
|
+
# If `start_dt` and `end_dt` do not include time zone information - change them to UTC
|
|
610
|
+
if (start_dt.tzinfo is None) and (end_dt.tzinfo is None):
|
|
611
|
+
start_dt = start_dt.replace(tzinfo=timezone.utc)
|
|
612
|
+
end_dt = end_dt.replace(tzinfo=timezone.utc)
|
|
613
|
+
elif (start_dt.tzinfo is None) or (end_dt.tzinfo is None):
|
|
614
|
+
raise mlrun.errors.MLRunValueError(
|
|
615
|
+
"The start and end times must either both include time zone information or both be naive (no time "
|
|
616
|
+
f"zone). Asserting the above failed, aborting the evaluate request: start={start}, end={end}."
|
|
617
|
+
)
|
|
618
|
+
|
|
594
619
|
if existing_data_handling != ExistingDataHandling.delete_all:
|
|
595
620
|
start_dt = cls._validate_monotonically_increasing_data(
|
|
596
621
|
application_schedules=application_schedules,
|
|
@@ -841,7 +866,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
841
866
|
:py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
|
|
842
867
|
model monitoring logic as a :py:class:`~mlrun.runtimes.KubejobRuntime`, which is an MLRun function.
|
|
843
868
|
|
|
844
|
-
This function has default values for all of its arguments. You should
|
|
869
|
+
This function has default values for all of its arguments. You should change them when you want to pass
|
|
845
870
|
data to the application.
|
|
846
871
|
|
|
847
872
|
:param func_path: The path to the function. If ``None``, the current notebook is used.
|
|
@@ -858,6 +883,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
858
883
|
:param reference_data: Pandas data-frame or :py:class:`~mlrun.artifacts.dataset.DatasetArtifact` URI as
|
|
859
884
|
the reference dataset.
|
|
860
885
|
When set, its statistics override the model endpoint's feature statistics.
|
|
886
|
+
You do not need to have a model endpoint to use this option.
|
|
861
887
|
:param image: Docker image to run the job on (when running remotely).
|
|
862
888
|
:param with_repo: Whether to clone the current repo to the build source.
|
|
863
889
|
:param class_handler: The relative path to the class, useful when using Git sources or code from images.
|
|
@@ -878,8 +904,9 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
878
904
|
:param start: The start time of the endpoint's data, not included.
|
|
879
905
|
If you want the model endpoint's data at ``start`` included, you need to subtract a
|
|
880
906
|
small ``datetime.timedelta`` from it.
|
|
881
|
-
Make sure to include the time zone when constructing
|
|
882
|
-
manually.
|
|
907
|
+
Make sure to include the time zone when constructing ``datetime.datetime`` objects
|
|
908
|
+
manually. When both ``start`` and ``end`` times do not include a time zone, they will
|
|
909
|
+
be treated as UTC.
|
|
883
910
|
:param end: The end time of the endpoint's data, included.
|
|
884
911
|
Please note: when ``start`` and ``end`` are set, they create a left-open time interval
|
|
885
912
|
("window") :math:`(\\operatorname{start}, \\operatorname{end}]` that excludes the
|
|
@@ -902,13 +929,13 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
902
929
|
if ``endpoints`` are passed.
|
|
903
930
|
Note: the model monitoring infrastructure must be up for the writing to work.
|
|
904
931
|
:param existing_data_handling:
|
|
905
|
-
How to handle the existing application data for the model endpoints when writing
|
|
906
|
-
new data
|
|
907
|
-
|
|
932
|
+
How to handle the existing application data for the model endpoints when writing
|
|
933
|
+
new data whose requested ``start`` time precedes the ``end`` time of a previous run
|
|
934
|
+
that also wrote to the database. Relevant only when ``write_output=True``.
|
|
935
|
+
The options are:
|
|
908
936
|
|
|
909
|
-
- ``"fail_on_overlap"``:
|
|
910
|
-
|
|
911
|
-
- ``"skip_overlap"``: when the previously described situation occurs, the relevant
|
|
937
|
+
- ``"fail_on_overlap"``: Default. An error is raised.
|
|
938
|
+
- ``"skip_overlap"``: the overlapping data is ignored and the
|
|
912
939
|
time window is cut so that it starts at the earliest possible time after ``start``.
|
|
913
940
|
- ``"delete_all"``: delete all the data that was written by the application to the
|
|
914
941
|
model endpoints, regardless of the time window, and write the new data.
|
|
@@ -24,15 +24,12 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
|
24
24
|
import mlrun.errors
|
|
25
25
|
import mlrun.feature_store as fstore
|
|
26
26
|
import mlrun.feature_store.feature_set as fs
|
|
27
|
-
import mlrun.features
|
|
28
27
|
import mlrun.serving
|
|
29
28
|
import mlrun.utils
|
|
30
29
|
from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
|
|
31
30
|
from mlrun.common.model_monitoring.helpers import FeatureStats
|
|
32
31
|
from mlrun.common.schemas import ModelEndpoint
|
|
33
|
-
from mlrun.model_monitoring.helpers import
|
|
34
|
-
calculate_inputs_statistics,
|
|
35
|
-
)
|
|
32
|
+
from mlrun.model_monitoring.helpers import calculate_inputs_statistics
|
|
36
33
|
|
|
37
34
|
|
|
38
35
|
class _ArtifactsLogger(Protocol):
|
|
@@ -801,8 +801,17 @@ class MonitoringApplicationController:
|
|
|
801
801
|
logger.info("Starting monitoring controller chief")
|
|
802
802
|
applications_names = []
|
|
803
803
|
endpoints = self.project_obj.list_model_endpoints(
|
|
804
|
-
tsdb_metrics=False,
|
|
804
|
+
tsdb_metrics=False,
|
|
805
|
+
modes=[
|
|
806
|
+
mm_constants.EndpointMode.REAL_TIME,
|
|
807
|
+
mm_constants.EndpointMode.BATCH_LEGACY,
|
|
808
|
+
],
|
|
805
809
|
).endpoints
|
|
810
|
+
|
|
811
|
+
if not endpoints:
|
|
812
|
+
logger.info("No model endpoints found", project=self.project)
|
|
813
|
+
return
|
|
814
|
+
|
|
806
815
|
last_request_dict = self.tsdb_connector.get_last_request(
|
|
807
816
|
endpoint_ids=[mep.metadata.uid for mep in endpoints]
|
|
808
817
|
)
|
|
@@ -811,9 +820,6 @@ class MonitoringApplicationController:
|
|
|
811
820
|
mm_constants.EventFieldType.ENDPOINT_ID
|
|
812
821
|
)[mm_constants.ModelEndpointSchema.LAST_REQUEST].to_dict()
|
|
813
822
|
|
|
814
|
-
if not endpoints:
|
|
815
|
-
logger.info("No model endpoints found", project=self.project)
|
|
816
|
-
return
|
|
817
823
|
monitoring_functions = self.project_obj.list_model_monitoring_functions()
|
|
818
824
|
if monitoring_functions:
|
|
819
825
|
# if monitoring_functions: - TODO : ML-7700
|
|
@@ -859,7 +865,11 @@ class MonitoringApplicationController:
|
|
|
859
865
|
for endpoint in endpoints:
|
|
860
866
|
last_request = last_request_dict.get(endpoint.metadata.uid, None)
|
|
861
867
|
if isinstance(last_request, float):
|
|
862
|
-
last_request =
|
|
868
|
+
last_request = datetime.datetime.fromtimestamp(
|
|
869
|
+
last_request, tz=datetime.timezone.utc
|
|
870
|
+
)
|
|
871
|
+
elif isinstance(last_request, pd.Timestamp):
|
|
872
|
+
last_request = last_request.to_pydatetime()
|
|
863
873
|
endpoint.status.last_request = (
|
|
864
874
|
last_request or endpoint.status.last_request
|
|
865
875
|
)
|
|
@@ -16,7 +16,7 @@ import json
|
|
|
16
16
|
import sys
|
|
17
17
|
from abc import ABC, abstractmethod
|
|
18
18
|
from contextlib import AbstractContextManager
|
|
19
|
-
from datetime import datetime
|
|
19
|
+
from datetime import datetime
|
|
20
20
|
from types import TracebackType
|
|
21
21
|
from typing import TYPE_CHECKING, Final, Optional
|
|
22
22
|
|
|
@@ -281,9 +281,7 @@ class ModelMonitoringSchedulesFileApplication(ModelMonitoringSchedulesFileBase):
|
|
|
281
281
|
self, endpoint_uid: str, last_analyzed: datetime
|
|
282
282
|
) -> None:
|
|
283
283
|
self._check_open_schedules()
|
|
284
|
-
self._schedules[endpoint_uid] = last_analyzed.
|
|
285
|
-
timezone.utc
|
|
286
|
-
).isoformat()
|
|
284
|
+
self._schedules[endpoint_uid] = last_analyzed.isoformat()
|
|
287
285
|
|
|
288
286
|
def delete_endpoints_last_analyzed(self, endpoint_uids: list[str]) -> None:
|
|
289
287
|
self._check_open_schedules()
|
|
@@ -721,7 +721,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
721
721
|
endpoint_ids: Union[str, list[str]],
|
|
722
722
|
start: Optional[datetime] = None,
|
|
723
723
|
end: Optional[datetime] = None,
|
|
724
|
-
) -> pd.DataFrame:
|
|
724
|
+
) -> Union[pd.DataFrame, dict[str, float]]:
|
|
725
|
+
if not endpoint_ids:
|
|
726
|
+
return {}
|
|
725
727
|
filter_query = self._generate_filter_query(
|
|
726
728
|
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
727
729
|
filter_values=endpoint_ids,
|
|
@@ -25,10 +25,12 @@ from mlrun.utils import logger
|
|
|
25
25
|
|
|
26
26
|
def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
|
|
27
27
|
"""
|
|
28
|
-
Normalize user
|
|
29
|
-
to a form V3IO frames tolerates.
|
|
28
|
+
Normalize user-defined keys (e.g., model input data and predictions) to a format V3IO Frames tolerates.
|
|
30
29
|
|
|
31
|
-
|
|
30
|
+
- Keys must match regex: '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'
|
|
31
|
+
- Replace invalid characters (e.g., '-') with '_'.
|
|
32
|
+
- Prefix keys starting with digits with '_'.
|
|
33
|
+
- Flatten nested dictionaries using dot notation, while normalizing keys recursively.
|
|
32
34
|
"""
|
|
33
35
|
prefix = "_"
|
|
34
36
|
|
|
@@ -38,7 +40,18 @@ def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
|
|
|
38
40
|
return prefix + key
|
|
39
41
|
return key
|
|
40
42
|
|
|
41
|
-
|
|
43
|
+
def flatten_dict(d: dict[str, Any], parent_key: str = "") -> dict[str, Any]:
|
|
44
|
+
items = {}
|
|
45
|
+
for k, v in d.items():
|
|
46
|
+
new_key = norm_key(k)
|
|
47
|
+
full_key = f"{parent_key}.{new_key}" if parent_key else new_key
|
|
48
|
+
if isinstance(v, dict):
|
|
49
|
+
items.update(flatten_dict(v, full_key))
|
|
50
|
+
else:
|
|
51
|
+
items[full_key] = v
|
|
52
|
+
return items
|
|
53
|
+
|
|
54
|
+
return flatten_dict(event)
|
|
42
55
|
|
|
43
56
|
|
|
44
57
|
class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
|
|
@@ -973,6 +973,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
973
973
|
start: Optional[datetime] = None,
|
|
974
974
|
end: Optional[datetime] = None,
|
|
975
975
|
) -> dict[str, float]:
|
|
976
|
+
if not endpoint_ids:
|
|
977
|
+
return {}
|
|
978
|
+
|
|
976
979
|
# Get the last request timestamp for each endpoint from the KV table.
|
|
977
980
|
# The result of the query is a list of dictionaries,
|
|
978
981
|
# each dictionary contains the endpoint id and the last request timestamp.
|
|
@@ -143,7 +143,7 @@ def get_stream_path(
|
|
|
143
143
|
return stream_uri.replace("v3io://", f"ds://{profile.name}")
|
|
144
144
|
|
|
145
145
|
elif isinstance(
|
|
146
|
-
profile, mlrun.datastore.datastore_profile.
|
|
146
|
+
profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
|
|
147
147
|
):
|
|
148
148
|
topic = mlrun.common.model_monitoring.helpers.get_kafka_topic(
|
|
149
149
|
project=project, function_name=function_name
|
|
@@ -152,7 +152,7 @@ def get_stream_path(
|
|
|
152
152
|
else:
|
|
153
153
|
raise mlrun.errors.MLRunValueError(
|
|
154
154
|
f"Received an unexpected stream profile type: {type(profile)}\n"
|
|
155
|
-
"Expects `DatastoreProfileV3io` or `
|
|
155
|
+
"Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
|
|
156
156
|
)
|
|
157
157
|
|
|
158
158
|
|
|
@@ -300,7 +300,7 @@ def _get_v3io_output_stream(
|
|
|
300
300
|
|
|
301
301
|
def _get_kafka_output_stream(
|
|
302
302
|
*,
|
|
303
|
-
kafka_profile: mlrun.datastore.datastore_profile.
|
|
303
|
+
kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream,
|
|
304
304
|
project: str,
|
|
305
305
|
function_name: str,
|
|
306
306
|
mock: bool = False,
|
|
@@ -356,7 +356,7 @@ def get_output_stream(
|
|
|
356
356
|
)
|
|
357
357
|
|
|
358
358
|
elif isinstance(
|
|
359
|
-
profile, mlrun.datastore.datastore_profile.
|
|
359
|
+
profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaStream
|
|
360
360
|
):
|
|
361
361
|
return _get_kafka_output_stream(
|
|
362
362
|
kafka_profile=profile,
|
|
@@ -368,7 +368,7 @@ def get_output_stream(
|
|
|
368
368
|
else:
|
|
369
369
|
raise mlrun.errors.MLRunValueError(
|
|
370
370
|
f"Received an unexpected stream profile type: {type(profile)}\n"
|
|
371
|
-
"Expects `DatastoreProfileV3io` or `
|
|
371
|
+
"Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaStream`."
|
|
372
372
|
)
|
|
373
373
|
|
|
374
374
|
|
mlrun/platforms/iguazio.py
CHANGED
|
@@ -96,7 +96,11 @@ class OutputStream:
|
|
|
96
96
|
if access_key:
|
|
97
97
|
v3io_client_kwargs["access_key"] = access_key
|
|
98
98
|
|
|
99
|
-
|
|
99
|
+
if not mock:
|
|
100
|
+
self._v3io_client = v3io.dataplane.Client(**v3io_client_kwargs)
|
|
101
|
+
else:
|
|
102
|
+
self._v3io_client = None
|
|
103
|
+
|
|
100
104
|
self._container, self._stream_path = split_path(stream_path)
|
|
101
105
|
self._shards = shards
|
|
102
106
|
self._retention_in_hours = retention_in_hours
|
|
@@ -105,7 +109,7 @@ class OutputStream:
|
|
|
105
109
|
self._mock = mock
|
|
106
110
|
self._mock_queue = []
|
|
107
111
|
|
|
108
|
-
def create_stream(self):
|
|
112
|
+
def create_stream(self) -> None:
|
|
109
113
|
# this import creates an import loop via the utils module, so putting it in execution path
|
|
110
114
|
from mlrun.utils.helpers import logger
|
|
111
115
|
|
|
@@ -210,7 +214,7 @@ class KafkaOutputStream:
|
|
|
210
214
|
self._initialized = False
|
|
211
215
|
|
|
212
216
|
def _lazy_init(self):
|
|
213
|
-
if self._initialized:
|
|
217
|
+
if self._initialized or self._mock:
|
|
214
218
|
return
|
|
215
219
|
|
|
216
220
|
import kafka
|