mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/api/api/deps.py +14 -1
- mlrun/api/api/endpoints/frontend_spec.py +0 -2
- mlrun/api/api/endpoints/functions.py +15 -27
- mlrun/api/api/endpoints/grafana_proxy.py +435 -74
- mlrun/api/api/endpoints/healthz.py +5 -18
- mlrun/api/api/endpoints/model_endpoints.py +33 -37
- mlrun/api/api/utils.py +6 -13
- mlrun/api/crud/__init__.py +14 -16
- mlrun/api/crud/logs.py +5 -7
- mlrun/api/crud/model_monitoring/__init__.py +2 -2
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
- mlrun/api/crud/pipelines.py +2 -3
- mlrun/api/db/sqldb/models/models_mysql.py +52 -19
- mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
- mlrun/api/db/sqldb/session.py +19 -26
- mlrun/api/schemas/__init__.py +2 -0
- mlrun/api/schemas/constants.py +0 -13
- mlrun/api/schemas/frontend_spec.py +0 -1
- mlrun/api/schemas/model_endpoints.py +38 -195
- mlrun/api/schemas/schedule.py +2 -2
- mlrun/api/utils/clients/log_collector.py +5 -0
- mlrun/builder.py +9 -41
- mlrun/config.py +1 -76
- mlrun/data_types/__init__.py +1 -6
- mlrun/data_types/data_types.py +1 -3
- mlrun/datastore/__init__.py +2 -9
- mlrun/datastore/sources.py +20 -25
- mlrun/datastore/store_resources.py +1 -1
- mlrun/datastore/targets.py +34 -67
- mlrun/datastore/utils.py +4 -26
- mlrun/db/base.py +2 -4
- mlrun/db/filedb.py +5 -13
- mlrun/db/httpdb.py +32 -64
- mlrun/db/sqldb.py +2 -4
- mlrun/errors.py +0 -5
- mlrun/execution.py +0 -2
- mlrun/feature_store/api.py +8 -24
- mlrun/feature_store/feature_set.py +6 -28
- mlrun/feature_store/feature_vector.py +0 -2
- mlrun/feature_store/ingestion.py +11 -8
- mlrun/feature_store/retrieval/base.py +43 -271
- mlrun/feature_store/retrieval/dask_merger.py +153 -55
- mlrun/feature_store/retrieval/job.py +3 -12
- mlrun/feature_store/retrieval/local_merger.py +130 -48
- mlrun/feature_store/retrieval/spark_merger.py +125 -126
- mlrun/features.py +2 -7
- mlrun/model_monitoring/constants.py +6 -48
- mlrun/model_monitoring/helpers.py +35 -118
- mlrun/model_monitoring/model_monitoring_batch.py +260 -293
- mlrun/model_monitoring/stream_processing_fs.py +253 -220
- mlrun/platforms/iguazio.py +0 -33
- mlrun/projects/project.py +72 -34
- mlrun/runtimes/base.py +0 -5
- mlrun/runtimes/daskjob.py +0 -2
- mlrun/runtimes/function.py +3 -29
- mlrun/runtimes/kubejob.py +15 -39
- mlrun/runtimes/local.py +45 -7
- mlrun/runtimes/mpijob/abstract.py +0 -2
- mlrun/runtimes/mpijob/v1.py +0 -2
- mlrun/runtimes/pod.py +0 -2
- mlrun/runtimes/remotesparkjob.py +0 -2
- mlrun/runtimes/serving.py +0 -6
- mlrun/runtimes/sparkjob/abstract.py +2 -39
- mlrun/runtimes/sparkjob/spark3job.py +0 -2
- mlrun/serving/__init__.py +1 -2
- mlrun/serving/routers.py +35 -35
- mlrun/serving/server.py +12 -22
- mlrun/serving/states.py +30 -162
- mlrun/serving/v2_serving.py +10 -13
- mlrun/utils/clones.py +1 -1
- mlrun/utils/model_monitoring.py +96 -122
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
- mlrun/api/crud/model_monitoring/grafana.py +0 -427
- mlrun/datastore/spark_udf.py +0 -40
- mlrun/model_monitoring/__init__.py +0 -44
- mlrun/model_monitoring/common.py +0 -112
- mlrun/model_monitoring/model_endpoint.py +0 -141
- mlrun/model_monitoring/stores/__init__.py +0 -106
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -23
- mlrun/model_monitoring/stores/models/base.py +0 -18
- mlrun/model_monitoring/stores/models/mysql.py +0 -100
- mlrun/model_monitoring/stores/models/sqlite.py +0 -98
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
- mlrun/utils/db.py +0 -52
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
mlrun/api/api/deps.py
CHANGED
|
@@ -70,7 +70,20 @@ def verify_api_state(request: Request):
|
|
|
70
70
|
"memory-reports",
|
|
71
71
|
]
|
|
72
72
|
if not any(enabled_endpoint in path for enabled_endpoint in enabled_endpoints):
|
|
73
|
-
message =
|
|
73
|
+
message = (
|
|
74
|
+
"API is waiting for migrations to be triggered. Send POST request to /api/operations/migrations to"
|
|
75
|
+
" trigger it"
|
|
76
|
+
)
|
|
77
|
+
if (
|
|
78
|
+
mlrun.mlconf.httpdb.state
|
|
79
|
+
== mlrun.api.schemas.APIStates.migrations_in_progress
|
|
80
|
+
):
|
|
81
|
+
message = "Migrations are in progress"
|
|
82
|
+
elif (
|
|
83
|
+
mlrun.mlconf.httpdb.state
|
|
84
|
+
== mlrun.api.schemas.APIStates.migrations_failed
|
|
85
|
+
):
|
|
86
|
+
message = "Migrations failed, API can't be started"
|
|
74
87
|
raise mlrun.errors.MLRunPreconditionFailedError(message)
|
|
75
88
|
|
|
76
89
|
|
|
@@ -24,7 +24,6 @@ import mlrun.builder
|
|
|
24
24
|
import mlrun.runtimes
|
|
25
25
|
import mlrun.runtimes.utils
|
|
26
26
|
import mlrun.utils.helpers
|
|
27
|
-
from mlrun.api.api.utils import get_allowed_path_prefixes_list
|
|
28
27
|
from mlrun.config import config
|
|
29
28
|
from mlrun.platforms import is_iguazio_session_cookie
|
|
30
29
|
|
|
@@ -83,7 +82,6 @@ def get_frontend_spec(
|
|
|
83
82
|
default_function_pod_resources=mlrun.mlconf.default_function_pod_resources.to_dict(),
|
|
84
83
|
default_function_preemption_mode=mlrun.mlconf.function_defaults.preemption_mode,
|
|
85
84
|
feature_store_data_prefixes=config.feature_store.data_prefixes.to_dict(),
|
|
86
|
-
allowed_artifact_path_prefixes_list=get_allowed_path_prefixes_list(),
|
|
87
85
|
# ce_mode is deprecated, we will use the full ce config instead and ce_mode will be removed in 1.6.0
|
|
88
86
|
ce_mode=config.ce.mode,
|
|
89
87
|
ce=config.ce.to_dict(),
|
|
@@ -40,7 +40,6 @@ import mlrun.api.utils.auth.verifier
|
|
|
40
40
|
import mlrun.api.utils.background_tasks
|
|
41
41
|
import mlrun.api.utils.clients.chief
|
|
42
42
|
import mlrun.api.utils.singletons.project_member
|
|
43
|
-
import mlrun.model_monitoring.constants
|
|
44
43
|
from mlrun.api.api import deps
|
|
45
44
|
from mlrun.api.api.utils import get_run_db_instance, log_and_raise, log_path
|
|
46
45
|
from mlrun.api.crud.secrets import Secrets, SecretsClientType
|
|
@@ -631,33 +630,25 @@ def _build_function(
|
|
|
631
630
|
try:
|
|
632
631
|
if fn.spec.track_models:
|
|
633
632
|
logger.info("Tracking enabled, initializing model monitoring")
|
|
634
|
-
|
|
635
|
-
#
|
|
636
|
-
model_monitoring_access_key =
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
if mlrun.utils.model_monitoring.get_stream_path(
|
|
644
|
-
project=fn.metadata.project
|
|
645
|
-
).startswith("v3io://"):
|
|
646
|
-
# Initialize model monitoring V3IO stream
|
|
647
|
-
_create_model_monitoring_stream(
|
|
648
|
-
project=fn.metadata.project,
|
|
649
|
-
function=fn,
|
|
650
|
-
)
|
|
633
|
+
_init_serving_function_stream_args(fn=fn)
|
|
634
|
+
# get model monitoring access key
|
|
635
|
+
model_monitoring_access_key = _process_model_monitoring_secret(
|
|
636
|
+
db_session,
|
|
637
|
+
fn.metadata.project,
|
|
638
|
+
"MODEL_MONITORING_ACCESS_KEY",
|
|
639
|
+
)
|
|
640
|
+
# initialize model monitoring stream
|
|
641
|
+
_create_model_monitoring_stream(project=fn.metadata.project)
|
|
651
642
|
|
|
652
643
|
if fn.spec.tracking_policy:
|
|
653
|
-
#
|
|
644
|
+
# convert to `TrackingPolicy` object as `fn.spec.tracking_policy` is provided as a dict
|
|
654
645
|
fn.spec.tracking_policy = (
|
|
655
646
|
mlrun.utils.model_monitoring.TrackingPolicy.from_dict(
|
|
656
647
|
fn.spec.tracking_policy
|
|
657
648
|
)
|
|
658
649
|
)
|
|
659
650
|
else:
|
|
660
|
-
#
|
|
651
|
+
# initialize tracking policy with default values
|
|
661
652
|
fn.spec.tracking_policy = (
|
|
662
653
|
mlrun.utils.model_monitoring.TrackingPolicy()
|
|
663
654
|
)
|
|
@@ -665,10 +656,10 @@ def _build_function(
|
|
|
665
656
|
# deploy both model monitoring stream and model monitoring batch job
|
|
666
657
|
mlrun.api.crud.ModelEndpoints().deploy_monitoring_functions(
|
|
667
658
|
project=fn.metadata.project,
|
|
659
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
668
660
|
db_session=db_session,
|
|
669
661
|
auth_info=auth_info,
|
|
670
662
|
tracking_policy=fn.spec.tracking_policy,
|
|
671
|
-
model_monitoring_access_key=model_monitoring_access_key,
|
|
672
663
|
)
|
|
673
664
|
except Exception as exc:
|
|
674
665
|
logger.warning(
|
|
@@ -819,12 +810,9 @@ async def _get_function_status(data, auth_info: mlrun.api.schemas.AuthInfo):
|
|
|
819
810
|
)
|
|
820
811
|
|
|
821
812
|
|
|
822
|
-
def _create_model_monitoring_stream(project: str
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
stream_path = mlrun.mlconf.get_model_monitoring_file_target_path(
|
|
827
|
-
project=project, kind="events"
|
|
813
|
+
def _create_model_monitoring_stream(project: str):
|
|
814
|
+
stream_path = config.model_endpoint_monitoring.store_prefixes.default.format(
|
|
815
|
+
project=project, kind="stream"
|
|
828
816
|
)
|
|
829
817
|
|
|
830
818
|
_, container, stream_path = parse_model_endpoint_store_prefix(stream_path)
|
|
@@ -13,37 +13,36 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
import asyncio
|
|
16
|
-
import
|
|
16
|
+
import json
|
|
17
17
|
from http import HTTPStatus
|
|
18
|
-
from typing import List, Union
|
|
18
|
+
from typing import Any, Dict, List, Optional, Set, Union
|
|
19
19
|
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
20
22
|
from fastapi import APIRouter, Depends, Request, Response
|
|
21
23
|
from fastapi.concurrency import run_in_threadpool
|
|
22
24
|
from sqlalchemy.orm import Session
|
|
23
25
|
|
|
24
26
|
import mlrun.api.crud
|
|
25
|
-
import mlrun.api.crud.model_monitoring.grafana
|
|
26
27
|
import mlrun.api.schemas
|
|
27
28
|
import mlrun.api.utils.auth.verifier
|
|
28
|
-
import mlrun.model_monitoring
|
|
29
29
|
from mlrun.api.api import deps
|
|
30
|
-
from mlrun.api.schemas import
|
|
30
|
+
from mlrun.api.schemas import (
|
|
31
|
+
GrafanaColumn,
|
|
32
|
+
GrafanaDataPoint,
|
|
33
|
+
GrafanaNumberColumn,
|
|
34
|
+
GrafanaTable,
|
|
35
|
+
GrafanaTimeSeriesTarget,
|
|
36
|
+
ProjectsFormat,
|
|
37
|
+
)
|
|
38
|
+
from mlrun.api.utils.singletons.project_member import get_project_member
|
|
39
|
+
from mlrun.errors import MLRunBadRequestError
|
|
40
|
+
from mlrun.utils import config, logger
|
|
41
|
+
from mlrun.utils.model_monitoring import parse_model_endpoint_store_prefix
|
|
42
|
+
from mlrun.utils.v3io_clients import get_frames_client
|
|
31
43
|
|
|
32
44
|
router = APIRouter()
|
|
33
45
|
|
|
34
|
-
NAME_TO_SEARCH_FUNCTION_DICTIONARY = {
|
|
35
|
-
"list_projects": mlrun.api.crud.model_monitoring.grafana.grafana_list_projects,
|
|
36
|
-
}
|
|
37
|
-
NAME_TO_QUERY_FUNCTION_DICTIONARY = {
|
|
38
|
-
"list_endpoints": mlrun.api.crud.model_monitoring.grafana.grafana_list_endpoints,
|
|
39
|
-
"individual_feature_analysis": mlrun.api.crud.model_monitoring.grafana.grafana_individual_feature_analysis,
|
|
40
|
-
"overall_feature_analysis": mlrun.api.crud.model_monitoring.grafana.grafana_overall_feature_analysis,
|
|
41
|
-
"incoming_features": mlrun.api.crud.model_monitoring.grafana.grafana_incoming_features,
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
SUPPORTED_QUERY_FUNCTIONS = set(NAME_TO_QUERY_FUNCTION_DICTIONARY.keys())
|
|
45
|
-
SUPPORTED_SEARCH_FUNCTIONS = set(NAME_TO_SEARCH_FUNCTION_DICTIONARY)
|
|
46
|
-
|
|
47
46
|
|
|
48
47
|
@router.get("/grafana-proxy/model-endpoints", status_code=HTTPStatus.OK.value)
|
|
49
48
|
def grafana_proxy_model_endpoints_check_connection(
|
|
@@ -53,95 +52,457 @@ def grafana_proxy_model_endpoints_check_connection(
|
|
|
53
52
|
Root of grafana proxy for the model-endpoints API, used for validating the model-endpoints data source
|
|
54
53
|
connectivity.
|
|
55
54
|
"""
|
|
56
|
-
|
|
57
|
-
mlrun.api.crud.ModelEndpoints().get_access_key(auth_info)
|
|
55
|
+
mlrun.api.crud.ModelEndpoints().get_access_key(auth_info)
|
|
58
56
|
return Response(status_code=HTTPStatus.OK.value)
|
|
59
57
|
|
|
60
58
|
|
|
61
|
-
@router.post(
|
|
62
|
-
|
|
59
|
+
@router.post(
|
|
60
|
+
"/grafana-proxy/model-endpoints/query",
|
|
61
|
+
response_model=List[Union[GrafanaTable, GrafanaTimeSeriesTarget]],
|
|
62
|
+
)
|
|
63
|
+
async def grafana_proxy_model_endpoints_query(
|
|
63
64
|
request: Request,
|
|
64
65
|
auth_info: mlrun.api.schemas.AuthInfo = Depends(deps.authenticate_request),
|
|
65
|
-
|
|
66
|
-
) -> List[str]:
|
|
66
|
+
) -> List[Union[GrafanaTable, GrafanaTimeSeriesTarget]]:
|
|
67
67
|
"""
|
|
68
|
-
|
|
68
|
+
Query route for model-endpoints grafana proxy API, used for creating an interface between grafana queries and
|
|
69
69
|
model-endpoints logic.
|
|
70
70
|
|
|
71
71
|
This implementation requires passing target_endpoint query parameter in order to dispatch different
|
|
72
72
|
model-endpoint monitoring functions.
|
|
73
|
-
|
|
74
|
-
:param request: An api request with the required target and parameters.
|
|
75
|
-
:param auth_info: The auth info of the request.
|
|
76
|
-
:param db_session: A session that manages the current dialog with the database.
|
|
77
|
-
|
|
78
|
-
:return: List of results. e.g. list of available project names.
|
|
79
73
|
"""
|
|
80
|
-
if not mlrun.mlconf.is_ce_mode():
|
|
81
|
-
mlrun.api.crud.ModelEndpoints().get_access_key(auth_info)
|
|
82
74
|
body = await request.json()
|
|
83
|
-
query_parameters =
|
|
84
|
-
|
|
85
|
-
)
|
|
86
|
-
mlrun.api.crud.model_monitoring.grafana.validate_query_parameters(
|
|
87
|
-
query_parameters, SUPPORTED_SEARCH_FUNCTIONS
|
|
88
|
-
)
|
|
75
|
+
query_parameters = _parse_query_parameters(body)
|
|
76
|
+
_validate_query_parameters(query_parameters, SUPPORTED_QUERY_FUNCTIONS)
|
|
77
|
+
query_parameters = _drop_grafana_escape_chars(query_parameters)
|
|
89
78
|
|
|
90
79
|
# At this point everything is validated and we can access everything that is needed without performing all previous
|
|
91
80
|
# checks again.
|
|
92
81
|
target_endpoint = query_parameters["target_endpoint"]
|
|
93
|
-
function =
|
|
94
|
-
|
|
82
|
+
function = NAME_TO_QUERY_FUNCTION_DICTIONARY[target_endpoint]
|
|
95
83
|
if asyncio.iscoroutinefunction(function):
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
result = await run_in_threadpool(
|
|
99
|
-
function, db_session, auth_info, query_parameters
|
|
100
|
-
)
|
|
84
|
+
return await function(body, query_parameters, auth_info)
|
|
85
|
+
result = await run_in_threadpool(function, body, query_parameters, auth_info)
|
|
101
86
|
return result
|
|
102
87
|
|
|
103
88
|
|
|
104
|
-
@router.post(
|
|
105
|
-
|
|
106
|
-
response_model=List[Union[GrafanaTable, GrafanaTimeSeriesTarget]],
|
|
107
|
-
)
|
|
108
|
-
async def grafana_proxy_model_endpoints_query(
|
|
89
|
+
@router.post("/grafana-proxy/model-endpoints/search", response_model=List[str])
|
|
90
|
+
async def grafana_proxy_model_endpoints_search(
|
|
109
91
|
request: Request,
|
|
110
92
|
auth_info: mlrun.api.schemas.AuthInfo = Depends(deps.authenticate_request),
|
|
111
|
-
|
|
93
|
+
db_session: Session = Depends(deps.get_db_session),
|
|
94
|
+
) -> List[str]:
|
|
112
95
|
"""
|
|
113
|
-
|
|
96
|
+
Search route for model-endpoints grafana proxy API, used for creating an interface between grafana queries and
|
|
114
97
|
model-endpoints logic.
|
|
115
98
|
|
|
116
99
|
This implementation requires passing target_endpoint query parameter in order to dispatch different
|
|
117
100
|
model-endpoint monitoring functions.
|
|
118
101
|
"""
|
|
102
|
+
mlrun.api.crud.ModelEndpoints().get_access_key(auth_info)
|
|
103
|
+
body = await request.json()
|
|
104
|
+
query_parameters = _parse_search_parameters(body)
|
|
105
|
+
|
|
106
|
+
_validate_query_parameters(query_parameters, SUPPORTED_SEARCH_FUNCTIONS)
|
|
107
|
+
|
|
108
|
+
# At this point everything is validated and we can access everything that is needed without performing all previous
|
|
109
|
+
# checks again.
|
|
110
|
+
target_endpoint = query_parameters["target_endpoint"]
|
|
111
|
+
function = NAME_TO_SEARCH_FUNCTION_DICTIONARY[target_endpoint]
|
|
112
|
+
if asyncio.iscoroutinefunction(function):
|
|
113
|
+
return await function(db_session, auth_info)
|
|
114
|
+
result = await run_in_threadpool(function, db_session, auth_info)
|
|
115
|
+
return result
|
|
116
|
+
|
|
119
117
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
118
|
+
def grafana_list_projects(
|
|
119
|
+
db_session: Session, auth_info: mlrun.api.schemas.AuthInfo
|
|
120
|
+
) -> List[str]:
|
|
121
|
+
projects_output = get_project_member().list_projects(
|
|
122
|
+
db_session, format_=ProjectsFormat.name_only, leader_session=auth_info.session
|
|
125
123
|
)
|
|
124
|
+
return projects_output.projects
|
|
126
125
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
126
|
+
|
|
127
|
+
async def grafana_list_endpoints(
|
|
128
|
+
body: Dict[str, Any],
|
|
129
|
+
query_parameters: Dict[str, str],
|
|
130
|
+
auth_info: mlrun.api.schemas.AuthInfo,
|
|
131
|
+
) -> List[GrafanaTable]:
|
|
132
|
+
project = query_parameters.get("project")
|
|
133
|
+
|
|
134
|
+
# Filters
|
|
135
|
+
model = query_parameters.get("model", None)
|
|
136
|
+
function = query_parameters.get("function", None)
|
|
137
|
+
labels = query_parameters.get("labels", "")
|
|
138
|
+
labels = labels.split(",") if labels else []
|
|
139
|
+
|
|
140
|
+
# Metrics to include
|
|
141
|
+
metrics = query_parameters.get("metrics", "")
|
|
142
|
+
metrics = metrics.split(",") if metrics else []
|
|
143
|
+
|
|
144
|
+
# Time range for metrics
|
|
145
|
+
start = body.get("rangeRaw", {}).get("start", "now-1h")
|
|
146
|
+
end = body.get("rangeRaw", {}).get("end", "now")
|
|
147
|
+
|
|
148
|
+
if project:
|
|
149
|
+
await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_permissions(
|
|
150
|
+
project,
|
|
151
|
+
mlrun.api.schemas.AuthorizationAction.read,
|
|
152
|
+
auth_info,
|
|
153
|
+
)
|
|
154
|
+
endpoint_list = await run_in_threadpool(
|
|
155
|
+
mlrun.api.crud.ModelEndpoints().list_model_endpoints,
|
|
156
|
+
auth_info=auth_info,
|
|
157
|
+
project=project,
|
|
158
|
+
model=model,
|
|
159
|
+
function=function,
|
|
160
|
+
labels=labels,
|
|
161
|
+
metrics=metrics,
|
|
162
|
+
start=start,
|
|
163
|
+
end=end,
|
|
164
|
+
)
|
|
165
|
+
allowed_endpoints = await mlrun.api.utils.auth.verifier.AuthVerifier().filter_project_resources_by_permissions(
|
|
166
|
+
mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint,
|
|
167
|
+
endpoint_list.endpoints,
|
|
168
|
+
lambda _endpoint: (
|
|
169
|
+
_endpoint.metadata.project,
|
|
170
|
+
_endpoint.metadata.uid,
|
|
171
|
+
),
|
|
172
|
+
auth_info,
|
|
173
|
+
)
|
|
174
|
+
endpoint_list.endpoints = allowed_endpoints
|
|
175
|
+
|
|
176
|
+
columns = [
|
|
177
|
+
GrafanaColumn(text="endpoint_id", type="string"),
|
|
178
|
+
GrafanaColumn(text="endpoint_function", type="string"),
|
|
179
|
+
GrafanaColumn(text="endpoint_model", type="string"),
|
|
180
|
+
GrafanaColumn(text="endpoint_model_class", type="string"),
|
|
181
|
+
GrafanaColumn(text="first_request", type="time"),
|
|
182
|
+
GrafanaColumn(text="last_request", type="time"),
|
|
183
|
+
GrafanaColumn(text="accuracy", type="number"),
|
|
184
|
+
GrafanaColumn(text="error_count", type="number"),
|
|
185
|
+
GrafanaColumn(text="drift_status", type="number"),
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
metric_columns = []
|
|
189
|
+
|
|
190
|
+
found_metrics = set()
|
|
191
|
+
for endpoint in endpoint_list.endpoints:
|
|
192
|
+
if endpoint.status.metrics is not None:
|
|
193
|
+
for key in endpoint.status.metrics.keys():
|
|
194
|
+
if key not in found_metrics:
|
|
195
|
+
found_metrics.add(key)
|
|
196
|
+
metric_columns.append(GrafanaColumn(text=key, type="number"))
|
|
197
|
+
|
|
198
|
+
columns = columns + metric_columns
|
|
199
|
+
table = GrafanaTable(columns=columns)
|
|
200
|
+
|
|
201
|
+
for endpoint in endpoint_list.endpoints:
|
|
202
|
+
row = [
|
|
203
|
+
endpoint.metadata.uid,
|
|
204
|
+
endpoint.spec.function_uri,
|
|
205
|
+
endpoint.spec.model,
|
|
206
|
+
endpoint.spec.model_class,
|
|
207
|
+
endpoint.status.first_request,
|
|
208
|
+
endpoint.status.last_request,
|
|
209
|
+
endpoint.status.accuracy,
|
|
210
|
+
endpoint.status.error_count,
|
|
211
|
+
endpoint.status.drift_status,
|
|
212
|
+
]
|
|
213
|
+
|
|
214
|
+
if endpoint.status.metrics is not None and metric_columns:
|
|
215
|
+
for metric_column in metric_columns:
|
|
216
|
+
row.append(endpoint.status.metrics[metric_column.text])
|
|
217
|
+
|
|
218
|
+
table.add_row(*row)
|
|
219
|
+
|
|
220
|
+
return [table]
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
async def grafana_individual_feature_analysis(
|
|
224
|
+
body: Dict[str, Any],
|
|
225
|
+
query_parameters: Dict[str, str],
|
|
226
|
+
auth_info: mlrun.api.schemas.AuthInfo,
|
|
227
|
+
):
|
|
228
|
+
endpoint_id = query_parameters.get("endpoint_id")
|
|
229
|
+
project = query_parameters.get("project")
|
|
230
|
+
await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
|
|
231
|
+
mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint,
|
|
232
|
+
project,
|
|
233
|
+
endpoint_id,
|
|
234
|
+
mlrun.api.schemas.AuthorizationAction.read,
|
|
235
|
+
auth_info,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
endpoint = await run_in_threadpool(
|
|
239
|
+
mlrun.api.crud.ModelEndpoints().get_model_endpoint,
|
|
240
|
+
auth_info=auth_info,
|
|
241
|
+
project=project,
|
|
242
|
+
endpoint_id=endpoint_id,
|
|
243
|
+
feature_analysis=True,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Load JSON data from KV, make sure not to fail if a field is missing
|
|
247
|
+
feature_stats = endpoint.status.feature_stats or {}
|
|
248
|
+
current_stats = endpoint.status.current_stats or {}
|
|
249
|
+
drift_measures = endpoint.status.drift_measures or {}
|
|
250
|
+
|
|
251
|
+
table = GrafanaTable(
|
|
252
|
+
columns=[
|
|
253
|
+
GrafanaColumn(text="feature_name", type="string"),
|
|
254
|
+
GrafanaColumn(text="actual_min", type="number"),
|
|
255
|
+
GrafanaColumn(text="actual_mean", type="number"),
|
|
256
|
+
GrafanaColumn(text="actual_max", type="number"),
|
|
257
|
+
GrafanaColumn(text="expected_min", type="number"),
|
|
258
|
+
GrafanaColumn(text="expected_mean", type="number"),
|
|
259
|
+
GrafanaColumn(text="expected_max", type="number"),
|
|
260
|
+
GrafanaColumn(text="tvd", type="number"),
|
|
261
|
+
GrafanaColumn(text="hellinger", type="number"),
|
|
262
|
+
GrafanaColumn(text="kld", type="number"),
|
|
263
|
+
]
|
|
130
264
|
)
|
|
131
|
-
|
|
132
|
-
|
|
265
|
+
|
|
266
|
+
for feature, base_stat in feature_stats.items():
|
|
267
|
+
current_stat = current_stats.get(feature, {})
|
|
268
|
+
drift_measure = drift_measures.get(feature, {})
|
|
269
|
+
|
|
270
|
+
table.add_row(
|
|
271
|
+
feature,
|
|
272
|
+
current_stat.get("min"),
|
|
273
|
+
current_stat.get("mean"),
|
|
274
|
+
current_stat.get("max"),
|
|
275
|
+
base_stat.get("min"),
|
|
276
|
+
base_stat.get("mean"),
|
|
277
|
+
base_stat.get("max"),
|
|
278
|
+
drift_measure.get("tvd"),
|
|
279
|
+
drift_measure.get("hellinger"),
|
|
280
|
+
drift_measure.get("kld"),
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
return [table]
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
async def grafana_overall_feature_analysis(
|
|
287
|
+
body: Dict[str, Any],
|
|
288
|
+
query_parameters: Dict[str, str],
|
|
289
|
+
auth_info: mlrun.api.schemas.AuthInfo,
|
|
290
|
+
):
|
|
291
|
+
endpoint_id = query_parameters.get("endpoint_id")
|
|
292
|
+
project = query_parameters.get("project")
|
|
293
|
+
await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
|
|
294
|
+
mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint,
|
|
295
|
+
project,
|
|
296
|
+
endpoint_id,
|
|
297
|
+
mlrun.api.schemas.AuthorizationAction.read,
|
|
298
|
+
auth_info,
|
|
299
|
+
)
|
|
300
|
+
endpoint = await run_in_threadpool(
|
|
301
|
+
mlrun.api.crud.ModelEndpoints().get_model_endpoint,
|
|
302
|
+
auth_info=auth_info,
|
|
303
|
+
project=project,
|
|
304
|
+
endpoint_id=endpoint_id,
|
|
305
|
+
feature_analysis=True,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
table = GrafanaTable(
|
|
309
|
+
columns=[
|
|
310
|
+
GrafanaNumberColumn(text="tvd_sum"),
|
|
311
|
+
GrafanaNumberColumn(text="tvd_mean"),
|
|
312
|
+
GrafanaNumberColumn(text="hellinger_sum"),
|
|
313
|
+
GrafanaNumberColumn(text="hellinger_mean"),
|
|
314
|
+
GrafanaNumberColumn(text="kld_sum"),
|
|
315
|
+
GrafanaNumberColumn(text="kld_mean"),
|
|
316
|
+
]
|
|
133
317
|
)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
318
|
+
|
|
319
|
+
if endpoint.status.drift_measures:
|
|
320
|
+
table.add_row(
|
|
321
|
+
endpoint.status.drift_measures.get("tvd_sum"),
|
|
322
|
+
endpoint.status.drift_measures.get("tvd_mean"),
|
|
323
|
+
endpoint.status.drift_measures.get("hellinger_sum"),
|
|
324
|
+
endpoint.status.drift_measures.get("hellinger_mean"),
|
|
325
|
+
endpoint.status.drift_measures.get("kld_sum"),
|
|
326
|
+
endpoint.status.drift_measures.get("kld_mean"),
|
|
137
327
|
)
|
|
328
|
+
|
|
329
|
+
return [table]
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
async def grafana_incoming_features(
|
|
333
|
+
body: Dict[str, Any],
|
|
334
|
+
query_parameters: Dict[str, str],
|
|
335
|
+
auth_info: mlrun.api.schemas.AuthInfo,
|
|
336
|
+
):
|
|
337
|
+
endpoint_id = query_parameters.get("endpoint_id")
|
|
338
|
+
project = query_parameters.get("project")
|
|
339
|
+
start = body.get("rangeRaw", {}).get("from", "now-1h")
|
|
340
|
+
end = body.get("rangeRaw", {}).get("to", "now")
|
|
341
|
+
|
|
342
|
+
await mlrun.api.utils.auth.verifier.AuthVerifier().query_project_resource_permissions(
|
|
343
|
+
mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint,
|
|
344
|
+
project,
|
|
345
|
+
endpoint_id,
|
|
346
|
+
mlrun.api.schemas.AuthorizationAction.read,
|
|
347
|
+
auth_info,
|
|
138
348
|
)
|
|
139
349
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
350
|
+
endpoint = await run_in_threadpool(
|
|
351
|
+
mlrun.api.crud.ModelEndpoints().get_model_endpoint,
|
|
352
|
+
auth_info=auth_info,
|
|
353
|
+
project=project,
|
|
354
|
+
endpoint_id=endpoint_id,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
time_series = []
|
|
358
|
+
|
|
359
|
+
feature_names = endpoint.spec.feature_names
|
|
360
|
+
|
|
361
|
+
if not feature_names:
|
|
362
|
+
logger.warn(
|
|
363
|
+
"'feature_names' is either missing or not initialized in endpoint record",
|
|
364
|
+
endpoint_id=endpoint.metadata.uid,
|
|
365
|
+
)
|
|
366
|
+
return time_series
|
|
367
|
+
|
|
368
|
+
path = config.model_endpoint_monitoring.store_prefixes.default.format(
|
|
369
|
+
project=project, kind=mlrun.api.schemas.ModelMonitoringStoreKinds.EVENTS
|
|
370
|
+
)
|
|
371
|
+
_, container, path = parse_model_endpoint_store_prefix(path)
|
|
372
|
+
|
|
373
|
+
client = get_frames_client(
|
|
374
|
+
token=auth_info.data_session,
|
|
375
|
+
address=config.v3io_framesd,
|
|
376
|
+
container=container,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
data: pd.DataFrame = await run_in_threadpool(
|
|
380
|
+
client.read,
|
|
381
|
+
backend="tsdb",
|
|
382
|
+
table=path,
|
|
383
|
+
columns=feature_names,
|
|
384
|
+
filter=f"endpoint_id=='{endpoint_id}'",
|
|
385
|
+
start=start,
|
|
386
|
+
end=end,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
data.drop(["endpoint_id"], axis=1, inplace=True, errors="ignore")
|
|
390
|
+
data.index = data.index.astype(np.int64) // 10**6
|
|
391
|
+
|
|
392
|
+
for feature, indexed_values in data.to_dict().items():
|
|
393
|
+
target = GrafanaTimeSeriesTarget(target=feature)
|
|
394
|
+
for index, value in indexed_values.items():
|
|
395
|
+
data_point = GrafanaDataPoint(value=float(value), timestamp=index)
|
|
396
|
+
target.add_data_point(data_point)
|
|
397
|
+
time_series.append(target)
|
|
398
|
+
|
|
399
|
+
return time_series
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _parse_query_parameters(request_body: Dict[str, Any]) -> Dict[str, str]:
|
|
403
|
+
"""
|
|
404
|
+
This function searches for the target field in Grafana's SimpleJson json. Once located, the target string is
|
|
405
|
+
parsed by splitting on semi-colons (;). Each part in the resulting list is then split by an equal sign (=) to be
|
|
406
|
+
read as key-value pairs.
|
|
407
|
+
"""
|
|
408
|
+
|
|
409
|
+
# Try to get the target
|
|
410
|
+
targets = request_body.get("targets", [])
|
|
411
|
+
|
|
412
|
+
if len(targets) > 1:
|
|
413
|
+
logger.warn(
|
|
414
|
+
f"The 'targets' list contains more then one element ({len(targets)}), all targets except the first one are "
|
|
415
|
+
f"ignored."
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
target_obj = targets[0] if targets else {}
|
|
419
|
+
target_query = target_obj.get("target") if target_obj else ""
|
|
420
|
+
|
|
421
|
+
if not target_query:
|
|
422
|
+
raise MLRunBadRequestError(f"Target missing in request body:\n {request_body}")
|
|
423
|
+
|
|
424
|
+
parameters = _parse_parameters(target_query)
|
|
425
|
+
|
|
426
|
+
return parameters
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def _parse_search_parameters(request_body: Dict[str, Any]) -> Dict[str, str]:
|
|
430
|
+
"""
|
|
431
|
+
This function searches for the target field in Grafana's SimpleJson json. Once located, the target string is
|
|
432
|
+
parsed by splitting on semi-colons (;). Each part in the resulting list is then split by an equal sign (=) to be
|
|
433
|
+
read as key-value pairs.
|
|
434
|
+
"""
|
|
435
|
+
|
|
436
|
+
# Try to get the target
|
|
437
|
+
target = request_body.get("target")
|
|
438
|
+
|
|
439
|
+
if not target:
|
|
440
|
+
raise MLRunBadRequestError(f"Target missing in request body:\n {request_body}")
|
|
441
|
+
|
|
442
|
+
parameters = _parse_parameters(target)
|
|
443
|
+
|
|
444
|
+
return parameters
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _parse_parameters(target_query):
|
|
448
|
+
parameters = {}
|
|
449
|
+
for query in filter(lambda q: q, target_query.split(";")):
|
|
450
|
+
query_parts = query.split("=")
|
|
451
|
+
if len(query_parts) < 2:
|
|
452
|
+
raise MLRunBadRequestError(
|
|
453
|
+
f"Query must contain both query key and query value. Expected query_key=query_value, found {query} "
|
|
454
|
+
f"instead."
|
|
455
|
+
)
|
|
456
|
+
parameters[query_parts[0]] = query_parts[1]
|
|
457
|
+
return parameters
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _drop_grafana_escape_chars(query_parameters: Dict[str, str]):
|
|
461
|
+
query_parameters = dict(query_parameters)
|
|
462
|
+
endpoint_id = query_parameters.get("endpoint_id")
|
|
463
|
+
if endpoint_id is not None:
|
|
464
|
+
query_parameters["endpoint_id"] = endpoint_id.replace("\\", "")
|
|
465
|
+
return query_parameters
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _validate_query_parameters(
|
|
469
|
+
query_parameters: Dict[str, str], supported_endpoints: Optional[Set[str]] = None
|
|
470
|
+
):
|
|
471
|
+
"""Validates the parameters sent via Grafana's SimpleJson query"""
|
|
472
|
+
if "target_endpoint" not in query_parameters:
|
|
473
|
+
raise MLRunBadRequestError(
|
|
474
|
+
f"Expected 'target_endpoint' field in query, found {query_parameters} instead"
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
if (
|
|
478
|
+
supported_endpoints is not None
|
|
479
|
+
and query_parameters["target_endpoint"] not in supported_endpoints
|
|
480
|
+
):
|
|
481
|
+
raise MLRunBadRequestError(
|
|
482
|
+
f"{query_parameters['target_endpoint']} unsupported in query parameters: {query_parameters}. "
|
|
483
|
+
f"Currently supports: {','.join(supported_endpoints)}"
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _json_loads_or_default(string: Optional[str], default: Any):
|
|
488
|
+
if string is None:
|
|
489
|
+
return default
|
|
490
|
+
obj = json.loads(string)
|
|
491
|
+
if not obj:
|
|
492
|
+
return default
|
|
493
|
+
return obj
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
NAME_TO_QUERY_FUNCTION_DICTIONARY = {
|
|
497
|
+
"list_endpoints": grafana_list_endpoints,
|
|
498
|
+
"individual_feature_analysis": grafana_individual_feature_analysis,
|
|
499
|
+
"overall_feature_analysis": grafana_overall_feature_analysis,
|
|
500
|
+
"incoming_features": grafana_incoming_features,
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
NAME_TO_SEARCH_FUNCTION_DICTIONARY = {
|
|
504
|
+
"list_projects": grafana_list_projects,
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
SUPPORTED_QUERY_FUNCTIONS = set(NAME_TO_QUERY_FUNCTION_DICTIONARY.keys())
|
|
508
|
+
SUPPORTED_SEARCH_FUNCTIONS = set(NAME_TO_SEARCH_FUNCTION_DICTIONARY)
|