mlrun 1.8.0rc29__py3-none-any.whl → 1.8.0rc31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +2 -34
- mlrun/api/schemas/__init__.py +1 -6
- mlrun/artifacts/document.py +3 -3
- mlrun/artifacts/manager.py +1 -0
- mlrun/artifacts/model.py +3 -3
- mlrun/common/model_monitoring/helpers.py +16 -7
- mlrun/common/runtimes/constants.py +5 -0
- mlrun/common/schemas/__init__.py +0 -2
- mlrun/common/schemas/model_monitoring/__init__.py +0 -2
- mlrun/common/schemas/model_monitoring/constants.py +4 -7
- mlrun/common/schemas/model_monitoring/grafana.py +17 -11
- mlrun/config.py +9 -36
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/sources.py +14 -13
- mlrun/datastore/storeytargets.py +20 -3
- mlrun/db/httpdb.py +4 -30
- mlrun/k8s_utils.py +2 -5
- mlrun/launcher/base.py +16 -0
- mlrun/model_monitoring/api.py +1 -2
- mlrun/model_monitoring/applications/_application_steps.py +23 -37
- mlrun/model_monitoring/applications/base.py +55 -40
- mlrun/model_monitoring/applications/context.py +0 -3
- mlrun/model_monitoring/applications/results.py +16 -16
- mlrun/model_monitoring/controller.py +35 -31
- mlrun/model_monitoring/db/tsdb/__init__.py +9 -5
- mlrun/model_monitoring/db/tsdb/base.py +60 -39
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +122 -53
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +140 -14
- mlrun/model_monitoring/helpers.py +124 -16
- mlrun/model_monitoring/stream_processing.py +6 -21
- mlrun/projects/pipelines.py +11 -3
- mlrun/projects/project.py +104 -115
- mlrun/run.py +2 -2
- mlrun/runtimes/nuclio/function.py +4 -2
- mlrun/serving/routers.py +3 -4
- mlrun/serving/server.py +10 -8
- mlrun/serving/states.py +12 -2
- mlrun/serving/v2_serving.py +25 -20
- mlrun/utils/async_http.py +32 -19
- mlrun/utils/helpers.py +5 -2
- mlrun/utils/logger.py +14 -10
- mlrun/utils/notifications/notification_pusher.py +25 -0
- mlrun/utils/regex.py +1 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/METADATA +4 -4
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/RECORD +50 -50
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc29.dist-info → mlrun-1.8.0rc31.dist-info}/top_level.txt +0 -0
|
@@ -11,10 +11,11 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import asyncio
|
|
15
|
+
import math
|
|
15
16
|
from datetime import datetime, timedelta, timezone
|
|
16
17
|
from io import StringIO
|
|
17
|
-
from typing import Literal, Optional, Union
|
|
18
|
+
from typing import Callable, Literal, Optional, Union
|
|
18
19
|
|
|
19
20
|
import pandas as pd
|
|
20
21
|
import v3io_frames
|
|
@@ -491,8 +492,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
491
492
|
interval: Optional[str] = None,
|
|
492
493
|
agg_funcs: Optional[list[str]] = None,
|
|
493
494
|
sliding_window_step: Optional[str] = None,
|
|
495
|
+
get_raw: bool = False,
|
|
494
496
|
**kwargs,
|
|
495
|
-
) -> pd.DataFrame:
|
|
497
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
496
498
|
"""
|
|
497
499
|
Getting records from V3IO TSDB data collection.
|
|
498
500
|
:param table: Path to the collection to query.
|
|
@@ -517,6 +519,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
517
519
|
`sliding_window_step` is provided, interval must be provided as well. Provided
|
|
518
520
|
as a string in the format of '1m', '1h', etc.
|
|
519
521
|
:param kwargs: Additional keyword arguments passed to the read method of frames client.
|
|
522
|
+
:param get_raw: Whether to return the request as raw frames rather than a pandas dataframe.
|
|
523
|
+
Defaults to False. This can greatly improve performance when a dataframe isn't
|
|
524
|
+
needed.
|
|
525
|
+
|
|
520
526
|
:return: DataFrame with the provided attributes from the data collection.
|
|
521
527
|
:raise: MLRunNotFoundError if the provided table wasn't found.
|
|
522
528
|
"""
|
|
@@ -530,7 +536,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
530
536
|
aggregators = ",".join(agg_funcs) if agg_funcs else None
|
|
531
537
|
table_path = self.tables[table]
|
|
532
538
|
try:
|
|
533
|
-
|
|
539
|
+
res = self.frames_client.read(
|
|
534
540
|
backend=_TSDB_BE,
|
|
535
541
|
table=table_path,
|
|
536
542
|
start=start,
|
|
@@ -540,15 +546,18 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
540
546
|
aggregation_window=interval,
|
|
541
547
|
aggregators=aggregators,
|
|
542
548
|
step=sliding_window_step,
|
|
549
|
+
get_raw=get_raw,
|
|
543
550
|
**kwargs,
|
|
544
551
|
)
|
|
552
|
+
if get_raw:
|
|
553
|
+
res = list(res)
|
|
545
554
|
except v3io_frames.Error as err:
|
|
546
555
|
if _is_no_schema_error(err):
|
|
547
|
-
return pd.DataFrame()
|
|
556
|
+
return [] if get_raw else pd.DataFrame()
|
|
548
557
|
else:
|
|
549
558
|
raise err
|
|
550
559
|
|
|
551
|
-
return
|
|
560
|
+
return res
|
|
552
561
|
|
|
553
562
|
def _get_v3io_source_directory(self) -> str:
|
|
554
563
|
"""
|
|
@@ -778,16 +787,23 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
778
787
|
endpoint_ids: Union[str, list[str]],
|
|
779
788
|
start: Optional[datetime] = None,
|
|
780
789
|
end: Optional[datetime] = None,
|
|
781
|
-
|
|
790
|
+
get_raw: bool = False,
|
|
791
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
782
792
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
783
793
|
start, end = self._get_start_end(start, end)
|
|
784
|
-
|
|
794
|
+
res = self._get_records(
|
|
785
795
|
table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
|
|
786
796
|
start=start,
|
|
787
797
|
end=end,
|
|
788
798
|
filter_query=filter_query,
|
|
789
799
|
agg_funcs=["last"],
|
|
800
|
+
get_raw=get_raw,
|
|
790
801
|
)
|
|
802
|
+
|
|
803
|
+
if get_raw:
|
|
804
|
+
return res
|
|
805
|
+
|
|
806
|
+
df = res
|
|
791
807
|
if not df.empty:
|
|
792
808
|
df.rename(
|
|
793
809
|
columns={
|
|
@@ -811,11 +827,12 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
811
827
|
endpoint_ids: Union[str, list[str]],
|
|
812
828
|
start: Optional[datetime] = None,
|
|
813
829
|
end: Optional[datetime] = None,
|
|
814
|
-
|
|
830
|
+
get_raw: bool = False,
|
|
831
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
815
832
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
816
833
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
817
834
|
start, end = self._get_start_end(start, end)
|
|
818
|
-
|
|
835
|
+
res = self._get_records(
|
|
819
836
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
820
837
|
start=start,
|
|
821
838
|
end=end,
|
|
@@ -823,7 +840,12 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
823
840
|
filter_query=filter_query,
|
|
824
841
|
agg_funcs=["max"],
|
|
825
842
|
group_by="endpoint_id",
|
|
843
|
+
get_raw=get_raw,
|
|
826
844
|
)
|
|
845
|
+
if get_raw:
|
|
846
|
+
return res
|
|
847
|
+
|
|
848
|
+
df = res
|
|
827
849
|
if not df.empty:
|
|
828
850
|
df.columns = [
|
|
829
851
|
col[len("max(") : -1] if "max(" in col else col for col in df.columns
|
|
@@ -884,21 +906,28 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
884
906
|
endpoint_ids: Union[str, list[str]],
|
|
885
907
|
start: Optional[datetime] = None,
|
|
886
908
|
end: Optional[datetime] = None,
|
|
887
|
-
|
|
909
|
+
get_raw: bool = False,
|
|
910
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
888
911
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
889
912
|
if filter_query:
|
|
890
913
|
filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'"
|
|
891
914
|
else:
|
|
892
915
|
filter_query = f"{mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}' z"
|
|
893
916
|
start, end = self._get_start_end(start, end)
|
|
894
|
-
|
|
917
|
+
res = self._get_records(
|
|
895
918
|
table=mm_schemas.FileTargetKind.ERRORS,
|
|
896
919
|
start=start,
|
|
897
920
|
end=end,
|
|
898
921
|
columns=[mm_schemas.EventFieldType.ERROR_COUNT],
|
|
899
922
|
filter_query=filter_query,
|
|
900
923
|
agg_funcs=["count"],
|
|
924
|
+
get_raw=get_raw,
|
|
901
925
|
)
|
|
926
|
+
|
|
927
|
+
if get_raw:
|
|
928
|
+
return res
|
|
929
|
+
|
|
930
|
+
df = res
|
|
902
931
|
if not df.empty:
|
|
903
932
|
df.rename(
|
|
904
933
|
columns={
|
|
@@ -914,18 +943,25 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
914
943
|
endpoint_ids: Union[str, list[str]],
|
|
915
944
|
start: Optional[datetime] = None,
|
|
916
945
|
end: Optional[datetime] = None,
|
|
917
|
-
|
|
946
|
+
get_raw: bool = False,
|
|
947
|
+
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
918
948
|
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
919
949
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
920
950
|
start, end = self._get_start_end(start, end)
|
|
921
|
-
|
|
951
|
+
res = self._get_records(
|
|
922
952
|
table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
|
|
923
953
|
start=start,
|
|
924
954
|
end=end,
|
|
925
955
|
columns=[mm_schemas.EventFieldType.LATENCY],
|
|
926
956
|
filter_query=filter_query,
|
|
927
957
|
agg_funcs=["avg"],
|
|
958
|
+
get_raw=get_raw,
|
|
928
959
|
)
|
|
960
|
+
|
|
961
|
+
if get_raw:
|
|
962
|
+
return res
|
|
963
|
+
|
|
964
|
+
df = res
|
|
929
965
|
if not df.empty:
|
|
930
966
|
df.dropna(inplace=True)
|
|
931
967
|
df.rename(
|
|
@@ -935,3 +971,93 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
935
971
|
inplace=True,
|
|
936
972
|
)
|
|
937
973
|
return df.reset_index(drop=True)
|
|
974
|
+
|
|
975
|
+
async def add_basic_metrics(
|
|
976
|
+
self,
|
|
977
|
+
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
978
|
+
project: str,
|
|
979
|
+
run_in_threadpool: Callable,
|
|
980
|
+
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
981
|
+
"""
|
|
982
|
+
Fetch basic metrics from V3IO TSDB and add them to MEP objects.
|
|
983
|
+
|
|
984
|
+
:param model_endpoint_objects: A list of `ModelEndpoint` objects that will
|
|
985
|
+
be filled with the relevant basic metrics.
|
|
986
|
+
:param project: The name of the project.
|
|
987
|
+
:param run_in_threadpool: A function that runs another function in a thread pool.
|
|
988
|
+
|
|
989
|
+
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
990
|
+
"""
|
|
991
|
+
|
|
992
|
+
uids = []
|
|
993
|
+
model_endpoint_objects_by_uid = {}
|
|
994
|
+
for model_endpoint_object in model_endpoint_objects:
|
|
995
|
+
uid = model_endpoint_object.metadata.uid
|
|
996
|
+
uids.append(uid)
|
|
997
|
+
model_endpoint_objects_by_uid[uid] = model_endpoint_object
|
|
998
|
+
|
|
999
|
+
coroutines = [
|
|
1000
|
+
run_in_threadpool(
|
|
1001
|
+
self.get_error_count,
|
|
1002
|
+
endpoint_ids=uids,
|
|
1003
|
+
get_raw=True,
|
|
1004
|
+
),
|
|
1005
|
+
run_in_threadpool(
|
|
1006
|
+
self.get_last_request,
|
|
1007
|
+
endpoint_ids=uids,
|
|
1008
|
+
get_raw=True,
|
|
1009
|
+
),
|
|
1010
|
+
run_in_threadpool(
|
|
1011
|
+
self.get_avg_latency,
|
|
1012
|
+
endpoint_ids=uids,
|
|
1013
|
+
get_raw=True,
|
|
1014
|
+
),
|
|
1015
|
+
run_in_threadpool(
|
|
1016
|
+
self.get_drift_status,
|
|
1017
|
+
endpoint_ids=uids,
|
|
1018
|
+
get_raw=True,
|
|
1019
|
+
),
|
|
1020
|
+
]
|
|
1021
|
+
|
|
1022
|
+
(
|
|
1023
|
+
error_count_res,
|
|
1024
|
+
last_request_res,
|
|
1025
|
+
avg_latency_res,
|
|
1026
|
+
drift_status_res,
|
|
1027
|
+
) = await asyncio.gather(*coroutines)
|
|
1028
|
+
|
|
1029
|
+
def add_metric(
|
|
1030
|
+
metric: str,
|
|
1031
|
+
column_name: str,
|
|
1032
|
+
frames: list,
|
|
1033
|
+
):
|
|
1034
|
+
for frame in frames:
|
|
1035
|
+
endpoint_ids = frame.column_data("endpoint_id")
|
|
1036
|
+
metric_data = frame.column_data(column_name)
|
|
1037
|
+
for index, endpoint_id in enumerate(endpoint_ids):
|
|
1038
|
+
mep = model_endpoint_objects_by_uid.get(endpoint_id)
|
|
1039
|
+
value = metric_data[index]
|
|
1040
|
+
if mep and value is not None and not math.isnan(value):
|
|
1041
|
+
setattr(mep.status, metric, value)
|
|
1042
|
+
|
|
1043
|
+
add_metric(
|
|
1044
|
+
"error_count",
|
|
1045
|
+
"count(error_count)",
|
|
1046
|
+
error_count_res,
|
|
1047
|
+
)
|
|
1048
|
+
add_metric(
|
|
1049
|
+
"last_request",
|
|
1050
|
+
"last(last_request_timestamp)",
|
|
1051
|
+
last_request_res,
|
|
1052
|
+
)
|
|
1053
|
+
add_metric(
|
|
1054
|
+
"avg_latency",
|
|
1055
|
+
"max(result_status)",
|
|
1056
|
+
drift_status_res,
|
|
1057
|
+
)
|
|
1058
|
+
add_metric(
|
|
1059
|
+
"result_status",
|
|
1060
|
+
"avg(latency)",
|
|
1061
|
+
avg_latency_res,
|
|
1062
|
+
)
|
|
1063
|
+
return list(model_endpoint_objects_by_uid.values())
|
|
@@ -16,7 +16,7 @@ import datetime
|
|
|
16
16
|
import functools
|
|
17
17
|
import os
|
|
18
18
|
from fnmatch import fnmatchcase
|
|
19
|
-
from typing import TYPE_CHECKING, Callable, Optional, TypedDict, cast
|
|
19
|
+
from typing import TYPE_CHECKING, Callable, Optional, TypedDict, Union, cast
|
|
20
20
|
|
|
21
21
|
import numpy as np
|
|
22
22
|
import pandas as pd
|
|
@@ -28,6 +28,7 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
|
28
28
|
import mlrun.data_types.infer
|
|
29
29
|
import mlrun.datastore.datastore_profile
|
|
30
30
|
import mlrun.model_monitoring
|
|
31
|
+
import mlrun.platforms.iguazio
|
|
31
32
|
import mlrun.utils.helpers
|
|
32
33
|
from mlrun.common.schemas import ModelEndpoint
|
|
33
34
|
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
@@ -245,21 +246,6 @@ def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataI
|
|
|
245
246
|
)
|
|
246
247
|
|
|
247
248
|
|
|
248
|
-
def get_tsdb_connection_string(
|
|
249
|
-
secret_provider: Optional[Callable[[str], str]] = None,
|
|
250
|
-
) -> str:
|
|
251
|
-
"""Get TSDB connection string from the project secret. If wasn't set, take it from the system
|
|
252
|
-
configurations.
|
|
253
|
-
:param secret_provider: An optional secret provider to get the connection string secret.
|
|
254
|
-
:return: Valid TSDB connection string.
|
|
255
|
-
"""
|
|
256
|
-
|
|
257
|
-
return mlrun.get_secret_or_env(
|
|
258
|
-
key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
|
|
259
|
-
secret_provider=secret_provider,
|
|
260
|
-
)
|
|
261
|
-
|
|
262
|
-
|
|
263
249
|
def _get_profile(
|
|
264
250
|
project: str,
|
|
265
251
|
secret_provider: Optional[Callable[[str], str]],
|
|
@@ -294,6 +280,112 @@ _get_stream_profile = functools.partial(
|
|
|
294
280
|
)
|
|
295
281
|
|
|
296
282
|
|
|
283
|
+
def _get_v3io_output_stream(
|
|
284
|
+
*,
|
|
285
|
+
v3io_profile: mlrun.datastore.datastore_profile.DatastoreProfileV3io,
|
|
286
|
+
project: str,
|
|
287
|
+
function_name: str,
|
|
288
|
+
v3io_access_key: Optional[str],
|
|
289
|
+
mock: bool = False,
|
|
290
|
+
) -> mlrun.platforms.iguazio.OutputStream:
|
|
291
|
+
stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
292
|
+
project=project,
|
|
293
|
+
kind=mm_constants.FileTargetKind.STREAM,
|
|
294
|
+
target="online",
|
|
295
|
+
function_name=function_name,
|
|
296
|
+
)
|
|
297
|
+
endpoint, stream_path = mlrun.platforms.iguazio.parse_path(stream_uri)
|
|
298
|
+
return mlrun.platforms.iguazio.OutputStream(
|
|
299
|
+
stream_path,
|
|
300
|
+
endpoint=endpoint,
|
|
301
|
+
access_key=v3io_access_key or v3io_profile.v3io_access_key,
|
|
302
|
+
mock=mock,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _get_kafka_output_stream(
|
|
307
|
+
*,
|
|
308
|
+
kafka_profile: mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource,
|
|
309
|
+
project: str,
|
|
310
|
+
function_name: str,
|
|
311
|
+
mock: bool = False,
|
|
312
|
+
) -> mlrun.platforms.iguazio.KafkaOutputStream:
|
|
313
|
+
topic = mlrun.common.model_monitoring.helpers.get_kafka_topic(
|
|
314
|
+
project=project, function_name=function_name
|
|
315
|
+
)
|
|
316
|
+
profile_attributes = kafka_profile.attributes()
|
|
317
|
+
producer_options = profile_attributes.get("producer_options", {})
|
|
318
|
+
if "sasl" in profile_attributes:
|
|
319
|
+
sasl = profile_attributes["sasl"]
|
|
320
|
+
producer_options.update(
|
|
321
|
+
{
|
|
322
|
+
"security_protocol": "SASL_PLAINTEXT",
|
|
323
|
+
"sasl_mechanism": sasl["mechanism"],
|
|
324
|
+
"sasl_plain_username": sasl["user"],
|
|
325
|
+
"sasl_plain_password": sasl["password"],
|
|
326
|
+
},
|
|
327
|
+
)
|
|
328
|
+
return mlrun.platforms.iguazio.KafkaOutputStream(
|
|
329
|
+
brokers=kafka_profile.brokers,
|
|
330
|
+
topic=topic,
|
|
331
|
+
producer_options=producer_options,
|
|
332
|
+
mock=mock,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def get_output_stream(
|
|
337
|
+
project: str,
|
|
338
|
+
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
339
|
+
secret_provider: Optional[Callable[[str], str]] = None,
|
|
340
|
+
profile: Optional[mlrun.datastore.datastore_profile.DatastoreProfile] = None,
|
|
341
|
+
v3io_access_key: Optional[str] = None,
|
|
342
|
+
mock: bool = False,
|
|
343
|
+
) -> Union[
|
|
344
|
+
mlrun.platforms.iguazio.OutputStream, mlrun.platforms.iguazio.KafkaOutputStream
|
|
345
|
+
]:
|
|
346
|
+
"""
|
|
347
|
+
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
348
|
+
|
|
349
|
+
:param project: Project name.
|
|
350
|
+
:param function_name: Application name. Default is model_monitoring_stream.
|
|
351
|
+
:param secret_provider: Optional secret provider to get the connection string secret.
|
|
352
|
+
If not set, the env vars are used.
|
|
353
|
+
:param profile: Optional datastore profile of the stream (V3IO/KafkaSource profile).
|
|
354
|
+
:param v3io_access_key: Optional V3IO access key.
|
|
355
|
+
:param mock: Should the output stream be mocked or not.
|
|
356
|
+
:return: Monitoring stream path to the relevant application.
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
profile = profile or _get_stream_profile(
|
|
360
|
+
project=project, secret_provider=secret_provider
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
if isinstance(profile, mlrun.datastore.datastore_profile.DatastoreProfileV3io):
|
|
364
|
+
return _get_v3io_output_stream(
|
|
365
|
+
v3io_profile=profile,
|
|
366
|
+
project=project,
|
|
367
|
+
function_name=function_name,
|
|
368
|
+
v3io_access_key=v3io_access_key,
|
|
369
|
+
mock=mock,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
elif isinstance(
|
|
373
|
+
profile, mlrun.datastore.datastore_profile.DatastoreProfileKafkaSource
|
|
374
|
+
):
|
|
375
|
+
return _get_kafka_output_stream(
|
|
376
|
+
kafka_profile=profile,
|
|
377
|
+
project=project,
|
|
378
|
+
function_name=function_name,
|
|
379
|
+
mock=mock,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
else:
|
|
383
|
+
raise mlrun.errors.MLRunValueError(
|
|
384
|
+
f"Received an unexpected stream profile type: {type(profile)}\n"
|
|
385
|
+
"Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
|
|
297
389
|
def batch_dict2timedelta(batch_dict: _BatchDict) -> datetime.timedelta:
|
|
298
390
|
"""
|
|
299
391
|
Convert a batch dictionary to timedelta.
|
|
@@ -447,6 +539,22 @@ def get_result_instance_fqn(
|
|
|
447
539
|
return f"{model_endpoint_id}.{app_name}.result.{result_name}"
|
|
448
540
|
|
|
449
541
|
|
|
542
|
+
def get_alert_name_from_result_fqn(result_fqn: str):
|
|
543
|
+
"""
|
|
544
|
+
:param result_fqn: current get_result_instance_fqn format: `{model_endpoint_id}.{app_name}.result.{result_name}`
|
|
545
|
+
|
|
546
|
+
:return: shorter fqn without forbidden alert characters.
|
|
547
|
+
"""
|
|
548
|
+
if result_fqn.count(".") != 3 or result_fqn.split(".")[2] != "result":
|
|
549
|
+
raise mlrun.errors.MLRunValueError(
|
|
550
|
+
f"result_fqn: {result_fqn} is not in the correct format: {{model_endpoint_id}}.{{app_name}}."
|
|
551
|
+
f"result.{{result_name}}"
|
|
552
|
+
)
|
|
553
|
+
# Name format cannot contain "."
|
|
554
|
+
# The third component is always `result`, so it is not necessary for checking uniqueness.
|
|
555
|
+
return "_".join(result_fqn.split(".")[i] for i in [0, 1, 3])
|
|
556
|
+
|
|
557
|
+
|
|
450
558
|
def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
|
|
451
559
|
return get_result_instance_fqn(
|
|
452
560
|
model_endpoint_id,
|
|
@@ -13,18 +13,14 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import datetime
|
|
16
|
-
import os
|
|
17
16
|
import typing
|
|
18
17
|
|
|
19
18
|
import storey
|
|
20
19
|
|
|
21
20
|
import mlrun
|
|
22
21
|
import mlrun.common.model_monitoring.helpers
|
|
23
|
-
import mlrun.config
|
|
24
|
-
import mlrun.datastore.targets
|
|
25
22
|
import mlrun.feature_store as fstore
|
|
26
23
|
import mlrun.feature_store.steps
|
|
27
|
-
import mlrun.model_monitoring.db
|
|
28
24
|
import mlrun.serving.states
|
|
29
25
|
import mlrun.utils
|
|
30
26
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
@@ -68,14 +64,11 @@ class EventStreamProcessor:
|
|
|
68
64
|
parquet_batching_max_events=self.parquet_batching_max_events,
|
|
69
65
|
)
|
|
70
66
|
|
|
71
|
-
self.storage_options = None
|
|
72
67
|
self.tsdb_configurations = {}
|
|
73
68
|
if not mlrun.mlconf.is_ce_mode():
|
|
74
69
|
self._initialize_v3io_configurations(
|
|
75
70
|
model_monitoring_access_key=model_monitoring_access_key
|
|
76
71
|
)
|
|
77
|
-
elif self.parquet_path.startswith("s3://"):
|
|
78
|
-
self.storage_options = mlrun.mlconf.get_s3_storage_options()
|
|
79
72
|
|
|
80
73
|
def _initialize_v3io_configurations(
|
|
81
74
|
self,
|
|
@@ -90,17 +83,12 @@ class EventStreamProcessor:
|
|
|
90
83
|
self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
|
|
91
84
|
self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
|
|
92
85
|
|
|
93
|
-
self.v3io_access_key = v3io_access_key or mlrun.
|
|
94
|
-
"V3IO_ACCESS_KEY"
|
|
95
|
-
)
|
|
86
|
+
self.v3io_access_key = v3io_access_key or mlrun.mlconf.get_v3io_access_key()
|
|
96
87
|
self.model_monitoring_access_key = (
|
|
97
88
|
model_monitoring_access_key
|
|
98
|
-
or
|
|
89
|
+
or mlrun.get_secret_or_env(ProjectSecretKeys.ACCESS_KEY)
|
|
99
90
|
or self.v3io_access_key
|
|
100
91
|
)
|
|
101
|
-
self.storage_options = dict(
|
|
102
|
-
v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
|
|
103
|
-
)
|
|
104
92
|
|
|
105
93
|
# TSDB path and configurations
|
|
106
94
|
tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
@@ -251,12 +239,12 @@ class EventStreamProcessor:
|
|
|
251
239
|
# Write the Parquet target file, partitioned by key (endpoint_id) and time.
|
|
252
240
|
def apply_parquet_target():
|
|
253
241
|
graph.add_step(
|
|
254
|
-
"
|
|
242
|
+
"mlrun.datastore.storeytargets.ParquetStoreyTarget",
|
|
243
|
+
alternative_v3io_access_key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ACCESS_KEY,
|
|
255
244
|
name="ParquetTarget",
|
|
256
245
|
after="ProcessBeforeParquet",
|
|
257
246
|
graph_shape="cylinder",
|
|
258
247
|
path=self.parquet_path,
|
|
259
|
-
storage_options=self.storage_options,
|
|
260
248
|
max_events=self.parquet_batching_max_events,
|
|
261
249
|
flush_after_seconds=self.parquet_batching_timeout_secs,
|
|
262
250
|
attributes={"infer_columns_from_data": True},
|
|
@@ -392,10 +380,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
392
380
|
if not is_not_none(model, [EventFieldType.MODEL]):
|
|
393
381
|
return None
|
|
394
382
|
|
|
395
|
-
version = full_event.body.get(EventFieldType.VERSION)
|
|
396
|
-
versioned_model = f"{model}:{version}" if version else f"{model}:latest"
|
|
397
|
-
|
|
398
|
-
full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
|
|
399
383
|
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
400
384
|
|
|
401
385
|
# In case this process fails, resume state from existing record
|
|
@@ -493,7 +477,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
493
477
|
events.append(
|
|
494
478
|
{
|
|
495
479
|
EventFieldType.FUNCTION_URI: function_uri,
|
|
496
|
-
EventFieldType.MODEL: versioned_model,
|
|
497
480
|
EventFieldType.ENDPOINT_NAME: event.get(EventFieldType.MODEL),
|
|
498
481
|
EventFieldType.MODEL_CLASS: model_class,
|
|
499
482
|
EventFieldType.TIMESTAMP: timestamp,
|
|
@@ -802,6 +785,8 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
802
785
|
|
|
803
786
|
"""
|
|
804
787
|
event[mapping_dictionary] = {}
|
|
788
|
+
diff = len(named_iters) - len(values_iters)
|
|
789
|
+
values_iters += [None] * diff
|
|
805
790
|
for name, value in zip(named_iters, values_iters):
|
|
806
791
|
event[name] = value
|
|
807
792
|
event[mapping_dictionary][name] = value
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -615,13 +615,21 @@ class _KFPRunner(_PipelineRunner):
|
|
|
615
615
|
"Notifications will only be sent if you wait for pipeline completion. "
|
|
616
616
|
"Some of the features (like setting message or severity level) are not supported."
|
|
617
617
|
)
|
|
618
|
-
# for start message, fallback to old notification behavior
|
|
619
618
|
for notification in notifications or []:
|
|
620
619
|
params = notification.params
|
|
621
620
|
params.update(notification.secret_params)
|
|
622
|
-
project.notifiers.add_notification(
|
|
621
|
+
project.notifiers.add_notification(
|
|
622
|
+
notification_type=notification.kind,
|
|
623
|
+
params=params,
|
|
624
|
+
name=notification.name,
|
|
625
|
+
message=notification.message,
|
|
626
|
+
severity=notification.severity,
|
|
627
|
+
when=notification.when,
|
|
628
|
+
condition=notification.condition,
|
|
629
|
+
secret_params=notification.secret_params,
|
|
630
|
+
)
|
|
623
631
|
|
|
624
|
-
project.spec.notifications =
|
|
632
|
+
project.spec.notifications = project.notifiers.server_notifications
|
|
625
633
|
|
|
626
634
|
run_id = _run_pipeline(
|
|
627
635
|
workflow_handler,
|