mlrun 1.10.0rc10__py3-none-any.whl → 1.10.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/manager.py +1 -1
- mlrun/common/constants.py +11 -0
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/functions.py +2 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +19 -1
- mlrun/common/schemas/serving.py +1 -0
- mlrun/common/schemas/workflow.py +3 -2
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +4 -2
- mlrun/datastore/datastore.py +46 -14
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/s3.py +16 -5
- mlrun/datastore/sources.py +2 -2
- mlrun/datastore/targets.py +2 -2
- mlrun/db/__init__.py +0 -1
- mlrun/db/base.py +12 -0
- mlrun/db/httpdb.py +35 -0
- mlrun/db/nopdb.py +10 -0
- mlrun/execution.py +12 -0
- mlrun/frameworks/tf_keras/mlrun_interface.py +7 -18
- mlrun/launcher/base.py +1 -0
- mlrun/launcher/client.py +1 -0
- mlrun/launcher/local.py +4 -0
- mlrun/model.py +15 -4
- mlrun/model_monitoring/applications/base.py +74 -56
- mlrun/model_monitoring/db/tsdb/base.py +52 -19
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +179 -11
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +26 -11
- mlrun/model_monitoring/helpers.py +48 -0
- mlrun/projects/pipelines.py +12 -3
- mlrun/projects/project.py +30 -0
- mlrun/runtimes/daskjob.py +2 -0
- mlrun/runtimes/kubejob.py +4 -0
- mlrun/runtimes/mpijob/abstract.py +2 -0
- mlrun/runtimes/mpijob/v1.py +2 -0
- mlrun/runtimes/nuclio/function.py +2 -0
- mlrun/runtimes/nuclio/serving.py +59 -0
- mlrun/runtimes/pod.py +3 -0
- mlrun/runtimes/remotesparkjob.py +2 -0
- mlrun/runtimes/sparkjob/spark3job.py +2 -0
- mlrun/serving/server.py +97 -3
- mlrun/serving/states.py +146 -38
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/METADATA +13 -6
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/RECORD +49 -51
- mlrun/db/sql_types.py +0 -160
- mlrun/utils/db.py +0 -71
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/top_level.txt +0 -0
|
@@ -19,7 +19,7 @@ from collections import defaultdict
|
|
|
19
19
|
from collections.abc import Iterator
|
|
20
20
|
from contextlib import contextmanager
|
|
21
21
|
from datetime import datetime, timedelta
|
|
22
|
-
from typing import Any, Optional, Union, cast
|
|
22
|
+
from typing import Any, Literal, Optional, Union, cast
|
|
23
23
|
|
|
24
24
|
import pandas as pd
|
|
25
25
|
|
|
@@ -223,7 +223,9 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
223
223
|
context: "mlrun.MLClientCtx",
|
|
224
224
|
sample_data: Optional[pd.DataFrame] = None,
|
|
225
225
|
reference_data: Optional[pd.DataFrame] = None,
|
|
226
|
-
endpoints:
|
|
226
|
+
endpoints: Union[
|
|
227
|
+
list[tuple[str, str]], list[list[str]], list[str], Literal["all"], None
|
|
228
|
+
] = None,
|
|
227
229
|
start: Optional[str] = None,
|
|
228
230
|
end: Optional[str] = None,
|
|
229
231
|
base_period: Optional[int] = None,
|
|
@@ -280,10 +282,13 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
280
282
|
return result
|
|
281
283
|
|
|
282
284
|
if endpoints is not None:
|
|
285
|
+
resolved_endpoints = self._handle_endpoints_type_evaluate(
|
|
286
|
+
project=project, endpoints=endpoints
|
|
287
|
+
)
|
|
283
288
|
for window_start, window_end in self._window_generator(
|
|
284
289
|
start, end, base_period
|
|
285
290
|
):
|
|
286
|
-
for endpoint_name, endpoint_id in
|
|
291
|
+
for endpoint_name, endpoint_id in resolved_endpoints:
|
|
287
292
|
result = call_do_tracking(
|
|
288
293
|
event={
|
|
289
294
|
mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
|
|
@@ -306,52 +311,63 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
306
311
|
|
|
307
312
|
@staticmethod
|
|
308
313
|
def _handle_endpoints_type_evaluate(
|
|
309
|
-
project:
|
|
310
|
-
endpoints: Union[
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
314
|
+
project: "mlrun.MlrunProject",
|
|
315
|
+
endpoints: Union[
|
|
316
|
+
list[tuple[str, str]], list[list[str]], list[str], Literal["all"]
|
|
317
|
+
],
|
|
318
|
+
) -> Union[list[tuple[str, str]], list[list[str]]]:
|
|
319
|
+
if not endpoints:
|
|
320
|
+
raise mlrun.errors.MLRunValueError(
|
|
321
|
+
"The endpoints list cannot be empty. If you want to run on all the endpoints, "
|
|
322
|
+
'use `endpoints="all"`.'
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if isinstance(endpoints, list) and isinstance(endpoints[0], (tuple, list)):
|
|
326
|
+
return endpoints
|
|
327
|
+
|
|
328
|
+
if not (isinstance(endpoints, list) and isinstance(endpoints[0], str)):
|
|
329
|
+
if isinstance(endpoints, str):
|
|
330
|
+
if endpoints != "all":
|
|
331
|
+
raise mlrun.errors.MLRunValueError(
|
|
332
|
+
'A string input for `endpoints` can only be "all" for all the model endpoints in '
|
|
333
|
+
"the project. If you want to select a single model endpoint with the given name, "
|
|
334
|
+
f'use a list: `endpoints=["{endpoints}"]`.'
|
|
322
335
|
)
|
|
323
|
-
|
|
336
|
+
else:
|
|
337
|
+
raise mlrun.errors.MLRunValueError(
|
|
338
|
+
f"Could not resolve endpoints as list of [(name, uid)], {endpoints=}"
|
|
324
339
|
)
|
|
325
|
-
if endpoints_list:
|
|
326
|
-
list_endpoints_result = [
|
|
327
|
-
(endpoint.metadata.name, endpoint.metadata.uid)
|
|
328
|
-
for endpoint in endpoints_list
|
|
329
|
-
]
|
|
330
|
-
retrieve_ep_names = list(
|
|
331
|
-
map(lambda endpoint: endpoint[0], list_endpoints_result)
|
|
332
|
-
)
|
|
333
|
-
missing = set(
|
|
334
|
-
[endpoints] if isinstance(endpoints, str) else endpoints
|
|
335
|
-
) - set(retrieve_ep_names)
|
|
336
|
-
if missing:
|
|
337
|
-
logger.warning(
|
|
338
|
-
"Could not list all the required endpoints.",
|
|
339
|
-
missing_endpoint=missing,
|
|
340
|
-
endpoints=list_endpoints_result,
|
|
341
|
-
)
|
|
342
|
-
endpoints = list_endpoints_result
|
|
343
|
-
else:
|
|
344
|
-
raise mlrun.errors.MLRunNotFoundError(
|
|
345
|
-
f"Did not find any model_endpoint named ' {endpoints}'"
|
|
346
|
-
)
|
|
347
340
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
341
|
+
if endpoints == "all":
|
|
342
|
+
endpoint_names = None
|
|
343
|
+
else:
|
|
344
|
+
endpoint_names = endpoints
|
|
345
|
+
|
|
346
|
+
endpoints_list = project.list_model_endpoints(
|
|
347
|
+
names=endpoint_names, latest_only=True
|
|
348
|
+
).endpoints
|
|
349
|
+
if endpoints_list:
|
|
350
|
+
list_endpoints_result = [
|
|
351
|
+
(endpoint.metadata.name, endpoint.metadata.uid)
|
|
352
|
+
for endpoint in endpoints_list
|
|
353
|
+
]
|
|
354
|
+
if endpoints != "all":
|
|
355
|
+
missing = set(endpoints) - {
|
|
356
|
+
endpoint[0] for endpoint in list_endpoints_result
|
|
357
|
+
}
|
|
358
|
+
if missing:
|
|
359
|
+
logger.warning(
|
|
360
|
+
"Could not list all the required endpoints",
|
|
361
|
+
missing_endpoint=missing,
|
|
362
|
+
endpoints=list_endpoints_result,
|
|
363
|
+
)
|
|
364
|
+
return list_endpoints_result
|
|
365
|
+
else:
|
|
366
|
+
if endpoints != "all":
|
|
367
|
+
err_msg_suffix = f" named '{endpoints}'"
|
|
368
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
369
|
+
f"Did not find any model endpoints {err_msg_suffix}"
|
|
370
|
+
)
|
|
355
371
|
|
|
356
372
|
@staticmethod
|
|
357
373
|
def _window_generator(
|
|
@@ -546,7 +562,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
546
562
|
class_handler: Optional[str] = None,
|
|
547
563
|
requirements: Optional[Union[str, list[str]]] = None,
|
|
548
564
|
requirements_file: str = "",
|
|
549
|
-
endpoints:
|
|
565
|
+
endpoints: Union[list[tuple[str, str]], list[str], Literal["all"], None] = None,
|
|
550
566
|
start: Optional[datetime] = None,
|
|
551
567
|
end: Optional[datetime] = None,
|
|
552
568
|
base_period: Optional[int] = None,
|
|
@@ -577,10 +593,16 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
577
593
|
:param class_handler: The relative path to the class, useful when using Git sources or code from images.
|
|
578
594
|
:param requirements: List of Python requirements to be installed in the image.
|
|
579
595
|
:param requirements_file: Path to a Python requirements file to be installed in the image.
|
|
580
|
-
:param endpoints:
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
596
|
+
:param endpoints: The model endpoints to get the data from. The options are:
|
|
597
|
+
|
|
598
|
+
- a list of tuples of the model endpoints ``[(name, uid), ...]``
|
|
599
|
+
- a list of model endpoint names ``[name, ...]``
|
|
600
|
+
- ``"all"`` for all the project's model endpoints
|
|
601
|
+
|
|
602
|
+
Note: a model endpoint name retrieves all the active model endpoints using this
|
|
603
|
+
name, which may be more than one per name when the same name is used across
|
|
604
|
+
multiple serving functions.
|
|
605
|
+
|
|
584
606
|
If provided, and ``sample_data`` is not ``None``, you have to provide also the
|
|
585
607
|
``start`` and ``end`` times of the data to analyze from the model endpoints.
|
|
586
608
|
:param start: The start time of the endpoint's data, not included.
|
|
@@ -629,12 +651,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
629
651
|
project=project,
|
|
630
652
|
)
|
|
631
653
|
|
|
632
|
-
params: dict[str, Union[list
|
|
654
|
+
params: dict[str, Union[list, str, int, None, ds_profile.DatastoreProfile]] = {}
|
|
633
655
|
if endpoints:
|
|
634
|
-
endpoints = cls._handle_endpoints_type_evaluate(
|
|
635
|
-
project=project.name,
|
|
636
|
-
endpoints=endpoints,
|
|
637
|
-
)
|
|
638
656
|
params["endpoints"] = endpoints
|
|
639
657
|
if sample_data is None:
|
|
640
658
|
if start is None or end is None:
|
|
@@ -358,6 +358,58 @@ class TSDBConnector(ABC):
|
|
|
358
358
|
}
|
|
359
359
|
"""
|
|
360
360
|
|
|
361
|
+
@abstractmethod
|
|
362
|
+
def count_processed_model_endpoints(
|
|
363
|
+
self,
|
|
364
|
+
start: Optional[Union[datetime, str]] = None,
|
|
365
|
+
end: Optional[Union[datetime, str]] = None,
|
|
366
|
+
application_names: Optional[Union[str, list[str]]] = None,
|
|
367
|
+
) -> dict[str, int]:
|
|
368
|
+
"""
|
|
369
|
+
Count the number of processed model endpoints within a given time range for specific applications.
|
|
370
|
+
:param start: The start time of the query. Last 24 hours is used by default.
|
|
371
|
+
:param end: The end time of the query. The current time is used by default.
|
|
372
|
+
:param application_names: A list of application names to filter the results by. If not provided, all
|
|
373
|
+
applications are included.
|
|
374
|
+
:return: The count of processed model endpoints.
|
|
375
|
+
"""
|
|
376
|
+
|
|
377
|
+
@abstractmethod
|
|
378
|
+
def calculate_latest_metrics(
|
|
379
|
+
self,
|
|
380
|
+
start: Optional[Union[datetime, str]] = None,
|
|
381
|
+
end: Optional[Union[datetime, str]] = None,
|
|
382
|
+
application_names: Optional[Union[str, list[str]]] = None,
|
|
383
|
+
) -> list[
|
|
384
|
+
Union[mm_schemas.ApplicationResultRecord, mm_schemas.ApplicationMetricRecord]
|
|
385
|
+
]:
|
|
386
|
+
"""
|
|
387
|
+
Calculate the latest metrics and results across applications.
|
|
388
|
+
:param start: The start time of the query. Last 24 hours is used by default.
|
|
389
|
+
:param end: The end time of the query. The current time is used by default.
|
|
390
|
+
:param application_names: A list of application names to filter the results by. If not provided, all
|
|
391
|
+
applications are included.
|
|
392
|
+
:return: A list containing the latest metrics and results for each application.
|
|
393
|
+
example::
|
|
394
|
+
[
|
|
395
|
+
{
|
|
396
|
+
"type": "metric",
|
|
397
|
+
"time": "2025-06-29 13:36:37 +00:00",
|
|
398
|
+
"metric_name": "hellinger_mean",
|
|
399
|
+
"value": 0.123456,
|
|
400
|
+
},
|
|
401
|
+
{
|
|
402
|
+
"type": "result",
|
|
403
|
+
"time": "2025-06-29 13:36:37 +00:00",
|
|
404
|
+
"result_name": "drift_status",
|
|
405
|
+
"kind": "2",
|
|
406
|
+
"status": 0,
|
|
407
|
+
"value": 15.4,
|
|
408
|
+
},
|
|
409
|
+
...
|
|
410
|
+
]
|
|
411
|
+
"""
|
|
412
|
+
|
|
361
413
|
async def add_basic_metrics(
|
|
362
414
|
self,
|
|
363
415
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
@@ -651,22 +703,3 @@ class TSDBConnector(ABC):
|
|
|
651
703
|
)
|
|
652
704
|
)
|
|
653
705
|
return {dict_key: metrics}
|
|
654
|
-
|
|
655
|
-
@staticmethod
|
|
656
|
-
def _get_start_end(
|
|
657
|
-
start: Union[datetime, None],
|
|
658
|
-
end: Union[datetime, None],
|
|
659
|
-
) -> tuple[datetime, datetime]:
|
|
660
|
-
"""
|
|
661
|
-
static utils function for tsdb start end format
|
|
662
|
-
:param start: Either None or datetime, None is handled as datetime.min(tz=timezone.utc)
|
|
663
|
-
:param end: Either None or datetime, None is handled as datetime.now(tz=timezone.utc)
|
|
664
|
-
:return: start datetime, end datetime
|
|
665
|
-
"""
|
|
666
|
-
start = start or mlrun.utils.datetime_min()
|
|
667
|
-
end = end or mlrun.utils.datetime_now()
|
|
668
|
-
if not (isinstance(start, datetime) and isinstance(end, datetime)):
|
|
669
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
670
|
-
"Both start and end must be datetime objects"
|
|
671
|
-
)
|
|
672
|
-
return start, end
|
|
@@ -29,7 +29,7 @@ from mlrun.model_monitoring.db.tsdb.tdengine.tdengine_connection import (
|
|
|
29
29
|
Statement,
|
|
30
30
|
TDEngineConnection,
|
|
31
31
|
)
|
|
32
|
-
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
32
|
+
from mlrun.model_monitoring.helpers import get_invocations_fqn, get_start_end
|
|
33
33
|
from mlrun.utils import logger
|
|
34
34
|
|
|
35
35
|
# Thread-local storage for connections
|
|
@@ -689,7 +689,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
689
689
|
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
690
690
|
filter_values=endpoint_ids,
|
|
691
691
|
)
|
|
692
|
-
start, end =
|
|
692
|
+
start, end = get_start_end(start, end)
|
|
693
693
|
df = self._get_records(
|
|
694
694
|
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
695
695
|
start=start,
|
|
@@ -734,7 +734,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
734
734
|
filter_values=endpoint_ids,
|
|
735
735
|
)
|
|
736
736
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
737
|
-
start, end =
|
|
737
|
+
start, end = get_start_end(start, end)
|
|
738
738
|
df = self._get_records(
|
|
739
739
|
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
740
740
|
start=start,
|
|
@@ -768,9 +768,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
768
768
|
result_status_list: Optional[list[int]] = None,
|
|
769
769
|
) -> dict[tuple[str, int], int]:
|
|
770
770
|
filter_query = ""
|
|
771
|
-
|
|
772
|
-
start = start
|
|
773
|
-
|
|
771
|
+
|
|
772
|
+
start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
|
|
773
|
+
|
|
774
774
|
if endpoint_ids:
|
|
775
775
|
filter_query = self._generate_filter_query(
|
|
776
776
|
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
@@ -825,13 +825,182 @@ class TDEngineConnector(TSDBConnector):
|
|
|
825
825
|
for _, row in df.iterrows()
|
|
826
826
|
}
|
|
827
827
|
|
|
828
|
+
def count_processed_model_endpoints(
|
|
829
|
+
self,
|
|
830
|
+
start: Optional[Union[datetime, str]] = None,
|
|
831
|
+
end: Optional[Union[datetime, str]] = None,
|
|
832
|
+
application_names: Optional[Union[str, list[str]]] = None,
|
|
833
|
+
) -> dict:
|
|
834
|
+
filter_query = ""
|
|
835
|
+
start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
|
|
836
|
+
|
|
837
|
+
if application_names:
|
|
838
|
+
filter_query = self._generate_filter_query(
|
|
839
|
+
filter_column=mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
840
|
+
filter_values=application_names,
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
def get_application_endpoints_records(super_table: str) -> pd.DataFrame:
|
|
844
|
+
return self._get_records(
|
|
845
|
+
table=super_table,
|
|
846
|
+
start=start,
|
|
847
|
+
end=end,
|
|
848
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
849
|
+
columns=[
|
|
850
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
851
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
852
|
+
],
|
|
853
|
+
filter_query=filter_query,
|
|
854
|
+
group_by=[
|
|
855
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
856
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
857
|
+
],
|
|
858
|
+
preform_agg_columns=[mm_schemas.ResultData.RESULT_VALUE],
|
|
859
|
+
agg_funcs=["last"],
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
df_results = get_application_endpoints_records(
|
|
863
|
+
super_table=self.tables[
|
|
864
|
+
mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
865
|
+
].super_table
|
|
866
|
+
)
|
|
867
|
+
df_metrics = get_application_endpoints_records(
|
|
868
|
+
super_table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
combined_df = pd.concat([df_results, df_metrics]).drop_duplicates()
|
|
872
|
+
|
|
873
|
+
if combined_df.empty:
|
|
874
|
+
return {}
|
|
875
|
+
grouped_df = combined_df.groupby(
|
|
876
|
+
mm_schemas.WriterEvent.APPLICATION_NAME
|
|
877
|
+
).count()
|
|
878
|
+
|
|
879
|
+
# Convert DataFrame to a dictionary
|
|
880
|
+
return grouped_df[mm_schemas.WriterEvent.ENDPOINT_ID].to_dict()
|
|
881
|
+
|
|
882
|
+
def calculate_latest_metrics(
|
|
883
|
+
self,
|
|
884
|
+
start: Optional[Union[datetime, str]] = None,
|
|
885
|
+
end: Optional[Union[datetime, str]] = None,
|
|
886
|
+
application_names: Optional[Union[str, list[str]]] = None,
|
|
887
|
+
) -> list[
|
|
888
|
+
Union[mm_schemas.ApplicationResultRecord, mm_schemas.ApplicationMetricRecord]
|
|
889
|
+
]:
|
|
890
|
+
metric_list = []
|
|
891
|
+
filter_query = ""
|
|
892
|
+
start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
|
|
893
|
+
|
|
894
|
+
if application_names:
|
|
895
|
+
filter_query = self._generate_filter_query(
|
|
896
|
+
filter_column=mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
897
|
+
filter_values=application_names,
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
def get_latest_metrics_records(
|
|
901
|
+
record_type: Literal["metrics", "results"],
|
|
902
|
+
) -> pd.DataFrame:
|
|
903
|
+
columns = [
|
|
904
|
+
mm_schemas.WriterEvent.END_INFER_TIME,
|
|
905
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
906
|
+
]
|
|
907
|
+
if record_type == "results":
|
|
908
|
+
table = self.tables[
|
|
909
|
+
mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
910
|
+
].super_table
|
|
911
|
+
columns += [
|
|
912
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
913
|
+
mm_schemas.ResultData.RESULT_VALUE,
|
|
914
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
915
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
916
|
+
]
|
|
917
|
+
agg_column = mm_schemas.ResultData.RESULT_VALUE
|
|
918
|
+
else:
|
|
919
|
+
table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
|
|
920
|
+
columns += [
|
|
921
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
922
|
+
mm_schemas.MetricData.METRIC_VALUE,
|
|
923
|
+
]
|
|
924
|
+
agg_column = mm_schemas.MetricData.METRIC_VALUE
|
|
925
|
+
|
|
926
|
+
return self._get_records(
|
|
927
|
+
table=table,
|
|
928
|
+
start=start,
|
|
929
|
+
end=end,
|
|
930
|
+
columns=columns,
|
|
931
|
+
filter_query=filter_query,
|
|
932
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
933
|
+
# Aggregate per application/metric pair regardless of timestamp
|
|
934
|
+
group_by=columns[1:],
|
|
935
|
+
preform_agg_columns=[agg_column],
|
|
936
|
+
agg_funcs=["last"],
|
|
937
|
+
)
|
|
938
|
+
|
|
939
|
+
df_results = get_latest_metrics_records(record_type="results")
|
|
940
|
+
df_metrics = get_latest_metrics_records(record_type="metrics")
|
|
941
|
+
|
|
942
|
+
if df_results.empty and df_metrics.empty:
|
|
943
|
+
return metric_list
|
|
944
|
+
|
|
945
|
+
def build_metric_objects() -> (
|
|
946
|
+
list[
|
|
947
|
+
Union[
|
|
948
|
+
mm_schemas.ApplicationResultRecord,
|
|
949
|
+
mm_schemas.ApplicationMetricRecord,
|
|
950
|
+
]
|
|
951
|
+
]
|
|
952
|
+
):
|
|
953
|
+
metric_objects = []
|
|
954
|
+
|
|
955
|
+
if not df_results.empty:
|
|
956
|
+
df_results.rename(
|
|
957
|
+
columns={
|
|
958
|
+
f"last({mm_schemas.ResultData.RESULT_VALUE})": mm_schemas.ResultData.RESULT_VALUE,
|
|
959
|
+
},
|
|
960
|
+
inplace=True,
|
|
961
|
+
)
|
|
962
|
+
for _, row in df_results.iterrows():
|
|
963
|
+
metric_objects.append(
|
|
964
|
+
mm_schemas.ApplicationResultRecord(
|
|
965
|
+
time=datetime.fromisoformat(
|
|
966
|
+
row[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
967
|
+
),
|
|
968
|
+
result_name=row[mm_schemas.ResultData.RESULT_NAME],
|
|
969
|
+
kind=row[mm_schemas.ResultData.RESULT_KIND],
|
|
970
|
+
status=row[mm_schemas.ResultData.RESULT_STATUS],
|
|
971
|
+
value=row[mm_schemas.ResultData.RESULT_VALUE],
|
|
972
|
+
)
|
|
973
|
+
)
|
|
974
|
+
|
|
975
|
+
if not df_metrics.empty:
|
|
976
|
+
df_metrics.rename(
|
|
977
|
+
columns={
|
|
978
|
+
f"last({mm_schemas.MetricData.METRIC_VALUE})": mm_schemas.MetricData.METRIC_VALUE,
|
|
979
|
+
},
|
|
980
|
+
inplace=True,
|
|
981
|
+
)
|
|
982
|
+
for _, row in df_metrics.iterrows():
|
|
983
|
+
metric_objects.append(
|
|
984
|
+
mm_schemas.ApplicationMetricRecord(
|
|
985
|
+
time=datetime.fromisoformat(
|
|
986
|
+
row[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
987
|
+
),
|
|
988
|
+
metric_name=row[mm_schemas.MetricData.METRIC_NAME],
|
|
989
|
+
value=row[mm_schemas.MetricData.METRIC_VALUE],
|
|
990
|
+
)
|
|
991
|
+
)
|
|
992
|
+
|
|
993
|
+
return metric_objects
|
|
994
|
+
|
|
995
|
+
return build_metric_objects()
|
|
996
|
+
|
|
828
997
|
def get_metrics_metadata(
|
|
829
998
|
self,
|
|
830
999
|
endpoint_id: Union[str, list[str]],
|
|
831
1000
|
start: Optional[datetime] = None,
|
|
832
1001
|
end: Optional[datetime] = None,
|
|
833
1002
|
) -> pd.DataFrame:
|
|
834
|
-
start, end =
|
|
1003
|
+
start, end = get_start_end(start, end)
|
|
835
1004
|
df = self._get_records(
|
|
836
1005
|
table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
|
|
837
1006
|
start=start,
|
|
@@ -871,7 +1040,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
871
1040
|
start: Optional[datetime] = None,
|
|
872
1041
|
end: Optional[datetime] = None,
|
|
873
1042
|
) -> pd.DataFrame:
|
|
874
|
-
start, end =
|
|
1043
|
+
start, end = get_start_end(start, end)
|
|
875
1044
|
df = self._get_records(
|
|
876
1045
|
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
877
1046
|
start=start,
|
|
@@ -919,7 +1088,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
919
1088
|
filter_values=endpoint_ids,
|
|
920
1089
|
)
|
|
921
1090
|
filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'"
|
|
922
|
-
start, end =
|
|
1091
|
+
start, end = get_start_end(start, end)
|
|
923
1092
|
df = self._get_records(
|
|
924
1093
|
table=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
925
1094
|
start=start,
|
|
@@ -951,8 +1120,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
951
1120
|
endpoint_ids = (
|
|
952
1121
|
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
953
1122
|
)
|
|
954
|
-
start = start
|
|
955
|
-
start, end = self._get_start_end(start, end)
|
|
1123
|
+
start, end = get_start_end(start, end, delta=timedelta(hours=24))
|
|
956
1124
|
df = self._get_records(
|
|
957
1125
|
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
958
1126
|
start=start,
|
|
@@ -26,7 +26,7 @@ import mlrun.feature_store.steps
|
|
|
26
26
|
import mlrun.utils.v3io_clients
|
|
27
27
|
from mlrun.common.schemas import EventFieldType
|
|
28
28
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
29
|
-
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
29
|
+
from mlrun.model_monitoring.helpers import get_invocations_fqn, get_start_end
|
|
30
30
|
from mlrun.utils import logger
|
|
31
31
|
|
|
32
32
|
_TSDB_BE = "tsdb"
|
|
@@ -522,7 +522,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
522
522
|
try:
|
|
523
523
|
self.v3io_client.kv.delete(
|
|
524
524
|
container=self.container,
|
|
525
|
-
|
|
525
|
+
table_path=self.last_request_table,
|
|
526
526
|
key=endpoint_id,
|
|
527
527
|
)
|
|
528
528
|
except Exception as e:
|
|
@@ -956,8 +956,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
956
956
|
filter_values=endpoint_ids,
|
|
957
957
|
)
|
|
958
958
|
|
|
959
|
-
start = start
|
|
960
|
-
start, end = self._get_start_end(start, end)
|
|
959
|
+
start, end = get_start_end(start, end, delta=timedelta(hours=24))
|
|
961
960
|
res = self._get_records(
|
|
962
961
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
963
962
|
start=start,
|
|
@@ -984,7 +983,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
984
983
|
start: Optional[datetime] = None,
|
|
985
984
|
end: Optional[datetime] = None,
|
|
986
985
|
) -> pd.DataFrame:
|
|
987
|
-
start, end =
|
|
986
|
+
start, end = get_start_end(start, end)
|
|
988
987
|
filter_query = self._generate_filter_query(
|
|
989
988
|
filter_key=mm_schemas.ApplicationEvent.ENDPOINT_ID,
|
|
990
989
|
filter_values=endpoint_id,
|
|
@@ -1009,7 +1008,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1009
1008
|
start: Optional[datetime] = None,
|
|
1010
1009
|
end: Optional[datetime] = None,
|
|
1011
1010
|
) -> pd.DataFrame:
|
|
1012
|
-
start, end =
|
|
1011
|
+
start, end = get_start_end(start, end)
|
|
1013
1012
|
filter_query = self._generate_filter_query(
|
|
1014
1013
|
filter_key=mm_schemas.ApplicationEvent.ENDPOINT_ID,
|
|
1015
1014
|
filter_values=endpoint_id,
|
|
@@ -1048,7 +1047,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1048
1047
|
filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'"
|
|
1049
1048
|
else:
|
|
1050
1049
|
filter_query = f"{mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}' z"
|
|
1051
|
-
start, end =
|
|
1050
|
+
start, end = get_start_end(start, end)
|
|
1052
1051
|
res = self._get_records(
|
|
1053
1052
|
table=mm_schemas.FileTargetKind.ERRORS,
|
|
1054
1053
|
start=start,
|
|
@@ -1085,7 +1084,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1085
1084
|
filter_values=endpoint_ids,
|
|
1086
1085
|
)
|
|
1087
1086
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
1088
|
-
start, end =
|
|
1087
|
+
start, end = get_start_end(start, end)
|
|
1089
1088
|
res = self._get_records(
|
|
1090
1089
|
table=mm_schemas.V3IOTSDBTables.PREDICTIONS,
|
|
1091
1090
|
start=start,
|
|
@@ -1207,9 +1206,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1207
1206
|
application_names: Optional[Union[str, list[str]]] = None,
|
|
1208
1207
|
result_status_list: Optional[list[int]] = None,
|
|
1209
1208
|
) -> dict[tuple[str, int], int]:
|
|
1210
|
-
|
|
1211
|
-
start = start or (now - timedelta(hours=24))
|
|
1212
|
-
end = end or now
|
|
1209
|
+
start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
|
|
1213
1210
|
filter_query = ""
|
|
1214
1211
|
if endpoint_ids:
|
|
1215
1212
|
filter_query = self._generate_filter_query(
|
|
@@ -1268,3 +1265,21 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1268
1265
|
)
|
|
1269
1266
|
|
|
1270
1267
|
return df[mm_schemas.ResultData.RESULT_VALUE].to_dict()
|
|
1268
|
+
|
|
1269
|
+
def count_processed_model_endpoints(
|
|
1270
|
+
self,
|
|
1271
|
+
start: Optional[Union[datetime, str]] = None,
|
|
1272
|
+
end: Optional[Union[datetime, str]] = None,
|
|
1273
|
+
application_names: Optional[Union[str, list[str]]] = None,
|
|
1274
|
+
) -> dict[str, int]:
|
|
1275
|
+
raise NotImplementedError
|
|
1276
|
+
|
|
1277
|
+
def calculate_latest_metrics(
|
|
1278
|
+
self,
|
|
1279
|
+
start: Optional[Union[datetime, str]] = None,
|
|
1280
|
+
end: Optional[Union[datetime, str]] = None,
|
|
1281
|
+
application_names: Optional[Union[str, list[str]]] = None,
|
|
1282
|
+
) -> list[
|
|
1283
|
+
Union[mm_schemas.ApplicationResultRecord, mm_schemas.ApplicationMetricRecord]
|
|
1284
|
+
]:
|
|
1285
|
+
raise NotImplementedError
|
|
@@ -589,3 +589,51 @@ def _get_monitoring_schedules_file_chief_path(
|
|
|
589
589
|
return os.path.join(
|
|
590
590
|
_get_monitoring_schedules_folder_path(project), f"{project}.json"
|
|
591
591
|
)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def get_start_end(
|
|
595
|
+
start: Union[datetime.datetime, None],
|
|
596
|
+
end: Union[datetime.datetime, None],
|
|
597
|
+
delta: Optional[datetime.timedelta] = None,
|
|
598
|
+
) -> tuple[datetime.datetime, datetime.datetime]:
|
|
599
|
+
"""
|
|
600
|
+
static utils function for tsdb start end format
|
|
601
|
+
:param start: Either None or datetime, None is handled as datetime.min(tz=timezone.utc) unless `delta`
|
|
602
|
+
is provided.
|
|
603
|
+
:param end: Either None or datetime, None is handled as datetime.now(tz=timezone.utc)
|
|
604
|
+
:param delta: Optional timedelta to define a time span.
|
|
605
|
+
- If both `start` and `end` are provided, `delta` is ignored.
|
|
606
|
+
- If only one of `start` or `end` is provided, the other will be
|
|
607
|
+
calculated using `delta`.
|
|
608
|
+
- If neither `start` nor `end` is provided, `end` defaults to now,
|
|
609
|
+
and `start` is calculated as `end - delta`.
|
|
610
|
+
:return: start datetime, end datetime
|
|
611
|
+
"""
|
|
612
|
+
|
|
613
|
+
if delta and start and end:
|
|
614
|
+
# If both start and end are provided, delta is ignored
|
|
615
|
+
pass
|
|
616
|
+
elif delta:
|
|
617
|
+
if start and not end:
|
|
618
|
+
end = start + delta
|
|
619
|
+
else:
|
|
620
|
+
end = end or mlrun.utils.datetime_now()
|
|
621
|
+
start = end - delta
|
|
622
|
+
else:
|
|
623
|
+
start = start or mlrun.utils.datetime_min()
|
|
624
|
+
end = end or mlrun.utils.datetime_now()
|
|
625
|
+
|
|
626
|
+
if not (
|
|
627
|
+
isinstance(start, datetime.datetime) and isinstance(end, datetime.datetime)
|
|
628
|
+
):
|
|
629
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
630
|
+
"Both start and end must be datetime objects"
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
if start > end:
|
|
634
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
635
|
+
"The start time must be before the end time. Note that if end time is not provided, "
|
|
636
|
+
"the current time is used by default"
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
return start, end
|