mlrun 1.10.0rc19__py3-none-any.whl → 1.10.0rc21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/model_monitoring/constants.py +4 -11
- mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
- mlrun/datastore/model_provider/huggingface_provider.py +109 -20
- mlrun/datastore/model_provider/model_provider.py +110 -32
- mlrun/datastore/model_provider/openai_provider.py +87 -31
- mlrun/db/base.py +0 -19
- mlrun/db/httpdb.py +10 -46
- mlrun/db/nopdb.py +0 -10
- mlrun/launcher/base.py +0 -6
- mlrun/model_monitoring/api.py +43 -22
- mlrun/model_monitoring/applications/base.py +1 -1
- mlrun/model_monitoring/controller.py +112 -38
- mlrun/model_monitoring/db/_schedules.py +13 -9
- mlrun/model_monitoring/stream_processing.py +16 -12
- mlrun/platforms/__init__.py +3 -2
- mlrun/projects/project.py +2 -2
- mlrun/run.py +38 -5
- mlrun/serving/server.py +23 -0
- mlrun/serving/states.py +76 -29
- mlrun/serving/system_steps.py +60 -36
- mlrun/utils/helpers.py +27 -13
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/METADATA +6 -5
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/RECORD +30 -31
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/top_level.txt +0 -0
mlrun/db/httpdb.py
CHANGED
|
@@ -24,6 +24,7 @@ from datetime import datetime, timedelta
|
|
|
24
24
|
from os import environ, path, remove
|
|
25
25
|
from typing import Literal, Optional, Union
|
|
26
26
|
from urllib.parse import urlparse
|
|
27
|
+
from uuid import UUID
|
|
27
28
|
|
|
28
29
|
import pydantic.v1
|
|
29
30
|
import requests
|
|
@@ -2554,50 +2555,6 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2554
2555
|
resp = self.api_call("GET", path, error_message)
|
|
2555
2556
|
return FeatureSet.from_dict(resp.json())
|
|
2556
2557
|
|
|
2557
|
-
def list_features(
|
|
2558
|
-
self,
|
|
2559
|
-
project: Optional[str] = None,
|
|
2560
|
-
name: Optional[str] = None,
|
|
2561
|
-
tag: Optional[str] = None,
|
|
2562
|
-
entities: Optional[list[str]] = None,
|
|
2563
|
-
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
2564
|
-
) -> list[dict]:
|
|
2565
|
-
"""List feature-sets which contain specific features. This function may return multiple versions of the same
|
|
2566
|
-
feature-set if a specific tag is not requested. Note that the various filters of this function actually
|
|
2567
|
-
refer to the feature-set object containing the features, not to the features themselves.
|
|
2568
|
-
|
|
2569
|
-
:param project: Project which contains these features.
|
|
2570
|
-
:param name: Name of the feature to look for. The name is used in a like query, and is not case-sensitive. For
|
|
2571
|
-
example, looking for ``feat`` will return features which are named ``MyFeature`` as well as ``defeat``.
|
|
2572
|
-
:param tag: Return feature-sets which contain the features looked for, and are tagged with the specific tag.
|
|
2573
|
-
:param entities: Return only feature-sets which contain an entity whose name is contained in this list.
|
|
2574
|
-
:param labels: Filter feature-sets by label key-value pairs or key existence. This can be provided as:
|
|
2575
|
-
- A dictionary in the format `{"label": "value"}` to match specific label key-value pairs,
|
|
2576
|
-
or `{"label": None}` to check for key existence.
|
|
2577
|
-
- A list of strings formatted as `"label=value"` to match specific label key-value pairs,
|
|
2578
|
-
or just `"label"` for key existence.
|
|
2579
|
-
- A comma-separated string formatted as `"label1=value1,label2"` to match entities with
|
|
2580
|
-
the specified key-value pairs or key existence.
|
|
2581
|
-
:returns: A list of mapping from feature to a digest of the feature-set, which contains the feature-set
|
|
2582
|
-
meta-data. Multiple entries may be returned for any specific feature due to multiple tags or versions
|
|
2583
|
-
of the feature-set.
|
|
2584
|
-
"""
|
|
2585
|
-
|
|
2586
|
-
project = project or config.active_project
|
|
2587
|
-
labels = self._parse_labels(labels)
|
|
2588
|
-
params = {
|
|
2589
|
-
"name": name,
|
|
2590
|
-
"tag": tag,
|
|
2591
|
-
"entity": entities or [],
|
|
2592
|
-
"label": labels,
|
|
2593
|
-
}
|
|
2594
|
-
|
|
2595
|
-
path = f"projects/{project}/features"
|
|
2596
|
-
|
|
2597
|
-
error_message = f"Failed listing features, project: {project}, query: {params}"
|
|
2598
|
-
resp = self.api_call("GET", path, error_message, params=params)
|
|
2599
|
-
return resp.json()["features"]
|
|
2600
|
-
|
|
2601
2558
|
def list_features_v2(
|
|
2602
2559
|
self,
|
|
2603
2560
|
project: Optional[str] = None,
|
|
@@ -3834,8 +3791,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3834
3791
|
If tsdb_metrics=False, this parameter will be ignored and no tsdb metrics
|
|
3835
3792
|
will be included.
|
|
3836
3793
|
:param top_level: Whether to return only top level model endpoints.
|
|
3837
|
-
:param mode: Specifies the mode of the model endpoint. Can be "real-time", "batch", or
|
|
3838
|
-
to None.
|
|
3794
|
+
:param mode: Specifies the mode of the model endpoint. Can be "real-time" (0), "batch" (1), or
|
|
3795
|
+
both if set to None.
|
|
3839
3796
|
:param uids: A list of unique ids to filter by.
|
|
3840
3797
|
:param latest_only: Whether to return only the latest model endpoint version.
|
|
3841
3798
|
:return: A list of model endpoints.
|
|
@@ -3968,6 +3925,13 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3968
3925
|
raise MLRunInvalidArgumentError(
|
|
3969
3926
|
"Either endpoint_uid or function_name and function_tag must be provided"
|
|
3970
3927
|
)
|
|
3928
|
+
if uid:
|
|
3929
|
+
try:
|
|
3930
|
+
UUID(uid)
|
|
3931
|
+
except (ValueError, TypeError):
|
|
3932
|
+
raise MLRunInvalidArgumentError(
|
|
3933
|
+
"endpoint_id must be a valid UUID string"
|
|
3934
|
+
)
|
|
3971
3935
|
|
|
3972
3936
|
def update_model_monitoring_controller(
|
|
3973
3937
|
self,
|
mlrun/db/nopdb.py
CHANGED
|
@@ -376,16 +376,6 @@ class NopDB(RunDBInterface):
|
|
|
376
376
|
) -> dict:
|
|
377
377
|
pass
|
|
378
378
|
|
|
379
|
-
def list_features(
|
|
380
|
-
self,
|
|
381
|
-
project: str,
|
|
382
|
-
name: Optional[str] = None,
|
|
383
|
-
tag: Optional[str] = None,
|
|
384
|
-
entities: Optional[list[str]] = None,
|
|
385
|
-
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
386
|
-
) -> mlrun.common.schemas.FeaturesOutput:
|
|
387
|
-
pass
|
|
388
|
-
|
|
389
379
|
def list_features_v2(
|
|
390
380
|
self,
|
|
391
381
|
project: str,
|
mlrun/launcher/base.py
CHANGED
|
@@ -281,12 +281,6 @@ class BaseLauncher(abc.ABC):
|
|
|
281
281
|
|
|
282
282
|
run.metadata.name = mlrun.utils.normalize_name(
|
|
283
283
|
name=name or run.metadata.name or def_name,
|
|
284
|
-
# if name or runspec.metadata.name are set then it means that is user defined name and we want to warn the
|
|
285
|
-
# user that the passed name needs to be set without underscore, if its not user defined but rather enriched
|
|
286
|
-
# from the handler(function) name then we replace the underscore without warning the user.
|
|
287
|
-
# most of the time handlers will have `_` in the handler name (python convention is to separate function
|
|
288
|
-
# words with `_`), therefore we don't want to be noisy when normalizing the run name
|
|
289
|
-
verbose=bool(name or run.metadata.name),
|
|
290
284
|
)
|
|
291
285
|
mlrun.utils.verify_field_regex(
|
|
292
286
|
"run.metadata.name", run.metadata.name, mlrun.utils.regex.run_name
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -18,6 +18,7 @@ from datetime import datetime
|
|
|
18
18
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pandas as pd
|
|
21
|
+
from deprecated import deprecated
|
|
21
22
|
|
|
22
23
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
23
24
|
import mlrun.datastore.base
|
|
@@ -45,6 +46,14 @@ DatasetType = typing.Union[
|
|
|
45
46
|
]
|
|
46
47
|
|
|
47
48
|
|
|
49
|
+
# TODO: Remove this in 1.12.0
|
|
50
|
+
@deprecated(
|
|
51
|
+
version="1.10.0",
|
|
52
|
+
reason="This function is deprecated and will be removed in 1.12. You can generate a model endpoint by either "
|
|
53
|
+
"deploying a monitored serving function as a real-time service or running it as an offline job. "
|
|
54
|
+
"To retrieve model endpoints, use `project.list_model_endpoints()`",
|
|
55
|
+
category=FutureWarning,
|
|
56
|
+
)
|
|
48
57
|
def get_or_create_model_endpoint(
|
|
49
58
|
project: str,
|
|
50
59
|
model_endpoint_name: str,
|
|
@@ -67,8 +76,8 @@ def get_or_create_model_endpoint(
|
|
|
67
76
|
:param model_endpoint_name: If a new model endpoint is created, the model endpoint name will be presented
|
|
68
77
|
under this endpoint (applicable only to new endpoint_id).
|
|
69
78
|
:param model_path: The model store path (applicable only to new endpoint_id).
|
|
70
|
-
:param endpoint_id: Model endpoint unique ID. If not exist in DB, will generate a new record
|
|
71
|
-
|
|
79
|
+
:param endpoint_id: Model endpoint unique ID. If not exist in DB, will generate a new record with a
|
|
80
|
+
newly generated ID.
|
|
72
81
|
:param function_name: If a new model endpoint is created, use this function name.
|
|
73
82
|
:param function_tag: If a new model endpoint is created, use this function tag.
|
|
74
83
|
:param context: MLRun context. If `function_name` not provided, use the context to generate the
|
|
@@ -91,25 +100,26 @@ def get_or_create_model_endpoint(
|
|
|
91
100
|
function_name = FunctionURI.from_string(
|
|
92
101
|
context.to_dict()["spec"]["function"]
|
|
93
102
|
).function
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
103
|
+
if endpoint_id or function_name:
|
|
104
|
+
try:
|
|
105
|
+
model_endpoint = db_session.get_model_endpoint(
|
|
106
|
+
project=project,
|
|
107
|
+
name=model_endpoint_name,
|
|
108
|
+
endpoint_id=endpoint_id,
|
|
109
|
+
function_name=function_name,
|
|
110
|
+
function_tag=function_tag or "latest",
|
|
111
|
+
feature_analysis=feature_analysis,
|
|
112
|
+
)
|
|
113
|
+
# If other fields provided, validate that they are correspond to the existing model endpoint data
|
|
114
|
+
_model_endpoint_validations(
|
|
115
|
+
model_endpoint=model_endpoint,
|
|
116
|
+
model_path=model_path,
|
|
117
|
+
sample_set_statistics=sample_set_statistics,
|
|
118
|
+
)
|
|
109
119
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
120
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
121
|
+
# Create a new model endpoint with the provided details
|
|
122
|
+
pass
|
|
113
123
|
if not model_endpoint:
|
|
114
124
|
model_endpoint = _generate_model_endpoint(
|
|
115
125
|
project=project,
|
|
@@ -123,6 +133,13 @@ def get_or_create_model_endpoint(
|
|
|
123
133
|
return model_endpoint
|
|
124
134
|
|
|
125
135
|
|
|
136
|
+
# TODO: Remove this in 1.12.0
|
|
137
|
+
@deprecated(
|
|
138
|
+
version="1.10.0",
|
|
139
|
+
reason="This function is deprecated and will be removed in 1.12. "
|
|
140
|
+
"Instead, run a monitored serving function as a job",
|
|
141
|
+
category=FutureWarning,
|
|
142
|
+
)
|
|
126
143
|
def record_results(
|
|
127
144
|
project: str,
|
|
128
145
|
model_path: str,
|
|
@@ -144,8 +161,8 @@ def record_results(
|
|
|
144
161
|
:param model_path: The model Store path.
|
|
145
162
|
:param model_endpoint_name: If a new model endpoint is generated, the model endpoint name will be presented
|
|
146
163
|
under this endpoint.
|
|
147
|
-
:param endpoint_id: Model endpoint unique ID. If not exist in DB, will generate a new record
|
|
148
|
-
|
|
164
|
+
:param endpoint_id: Model endpoint unique ID. If not exist in DB, will generate a new record with a
|
|
165
|
+
newly generated ID.
|
|
149
166
|
:param function_name: If a new model endpoint is created, use this function name for generating the
|
|
150
167
|
function URI.
|
|
151
168
|
:param context: MLRun context. Note that the context is required generating the model endpoint.
|
|
@@ -236,6 +253,7 @@ def _model_endpoint_validations(
|
|
|
236
253
|
key=model_obj.key,
|
|
237
254
|
iter=model_obj.iter,
|
|
238
255
|
tree=model_obj.tree,
|
|
256
|
+
uid=model_obj.uid,
|
|
239
257
|
)
|
|
240
258
|
|
|
241
259
|
# Enrich the uri schema with the store prefix
|
|
@@ -325,12 +343,15 @@ def _generate_model_endpoint(
|
|
|
325
343
|
|
|
326
344
|
:return `mlrun.common.schemas.ModelEndpoint` object.
|
|
327
345
|
"""
|
|
346
|
+
|
|
328
347
|
current_time = datetime_now()
|
|
329
348
|
model_endpoint = mlrun.common.schemas.ModelEndpoint(
|
|
330
349
|
metadata=mlrun.common.schemas.ModelEndpointMetadata(
|
|
331
350
|
project=project,
|
|
332
351
|
name=model_endpoint_name,
|
|
333
352
|
endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.BATCH_EP,
|
|
353
|
+
# Due to backwards compatibility, old batch model endpoint will be analyzed as real time endpoint
|
|
354
|
+
mode=mlrun.common.schemas.model_monitoring.EndpointMode.REAL_TIME,
|
|
334
355
|
),
|
|
335
356
|
spec=mlrun.common.schemas.ModelEndpointSpec(
|
|
336
357
|
function_name=function_name or "function",
|
|
@@ -647,7 +647,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
647
647
|
else:
|
|
648
648
|
class_name = handler_to_class.split(".")[-1].split("::")[0]
|
|
649
649
|
|
|
650
|
-
job_name = mlrun.utils.normalize_name(class_name
|
|
650
|
+
job_name = mlrun.utils.normalize_name(class_name)
|
|
651
651
|
|
|
652
652
|
if not mm_constants.APP_NAME_REGEX.fullmatch(job_name):
|
|
653
653
|
raise mlrun.errors.MLRunValueError(
|
|
@@ -11,33 +11,37 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import collections
|
|
15
15
|
import concurrent.futures
|
|
16
16
|
import datetime
|
|
17
17
|
import json
|
|
18
18
|
import os
|
|
19
19
|
import traceback
|
|
20
|
+
import warnings
|
|
20
21
|
from collections.abc import Iterator
|
|
21
22
|
from contextlib import AbstractContextManager
|
|
22
23
|
from types import TracebackType
|
|
23
|
-
from typing import Any, NamedTuple, Optional, Union, cast
|
|
24
|
+
from typing import Any, Final, NamedTuple, Optional, Union, cast
|
|
24
25
|
|
|
25
26
|
import nuclio_sdk
|
|
27
|
+
import numpy as np
|
|
26
28
|
import pandas as pd
|
|
27
29
|
|
|
28
30
|
import mlrun
|
|
29
31
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
32
|
+
import mlrun.feature_store as fstore
|
|
30
33
|
import mlrun.model_monitoring
|
|
31
34
|
import mlrun.model_monitoring.db._schedules as schedules
|
|
32
35
|
import mlrun.model_monitoring.helpers
|
|
33
36
|
import mlrun.platforms.iguazio
|
|
37
|
+
from mlrun.common.schemas import EndpointType
|
|
34
38
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
35
39
|
ControllerEvent,
|
|
36
40
|
ControllerEventEndpointPolicy,
|
|
37
41
|
)
|
|
38
42
|
from mlrun.errors import err_to_str
|
|
39
43
|
from mlrun.model_monitoring.helpers import batch_dict2timedelta
|
|
40
|
-
from mlrun.utils import logger
|
|
44
|
+
from mlrun.utils import datetime_now, logger
|
|
41
45
|
|
|
42
46
|
_SECONDS_IN_DAY = int(datetime.timedelta(days=1).total_seconds())
|
|
43
47
|
_SECONDS_IN_MINUTE = 60
|
|
@@ -49,14 +53,16 @@ class _Interval(NamedTuple):
|
|
|
49
53
|
|
|
50
54
|
|
|
51
55
|
class _BatchWindow:
|
|
56
|
+
TIMESTAMP_RESOLUTION_MICRO: Final = 1e-6 # 0.000001 seconds or 1 microsecond
|
|
57
|
+
|
|
52
58
|
def __init__(
|
|
53
59
|
self,
|
|
54
60
|
*,
|
|
55
61
|
schedules_file: schedules.ModelMonitoringSchedulesFileEndpoint,
|
|
56
62
|
application: str,
|
|
57
63
|
timedelta_seconds: int,
|
|
58
|
-
last_updated:
|
|
59
|
-
first_request:
|
|
64
|
+
last_updated: float,
|
|
65
|
+
first_request: float,
|
|
60
66
|
endpoint_mode: mm_constants.EndpointMode = mm_constants.EndpointMode.REAL_TIME,
|
|
61
67
|
) -> None:
|
|
62
68
|
"""
|
|
@@ -73,15 +79,17 @@ class _BatchWindow:
|
|
|
73
79
|
self._endpoint_mode = endpoint_mode
|
|
74
80
|
self._start = self._get_last_analyzed()
|
|
75
81
|
|
|
76
|
-
def _get_saved_last_analyzed(
|
|
77
|
-
|
|
82
|
+
def _get_saved_last_analyzed(
|
|
83
|
+
self,
|
|
84
|
+
) -> Optional[float]:
|
|
85
|
+
return self._db.get_application_time(self._application)
|
|
78
86
|
|
|
79
|
-
def _update_last_analyzed(self, last_analyzed:
|
|
87
|
+
def _update_last_analyzed(self, last_analyzed: float) -> None:
|
|
80
88
|
self._db.update_application_time(
|
|
81
89
|
application=self._application, timestamp=last_analyzed
|
|
82
90
|
)
|
|
83
91
|
|
|
84
|
-
def _get_initial_last_analyzed(self) ->
|
|
92
|
+
def _get_initial_last_analyzed(self) -> float:
|
|
85
93
|
if self._endpoint_mode == mm_constants.EndpointMode.BATCH:
|
|
86
94
|
logger.info(
|
|
87
95
|
"No last analyzed time was found for this endpoint and application, as this is "
|
|
@@ -107,7 +115,7 @@ class _BatchWindow:
|
|
|
107
115
|
self._stop - first_period_in_seconds,
|
|
108
116
|
)
|
|
109
117
|
|
|
110
|
-
def _get_last_analyzed(self) ->
|
|
118
|
+
def _get_last_analyzed(self) -> float:
|
|
111
119
|
saved_last_analyzed = self._get_saved_last_analyzed()
|
|
112
120
|
if saved_last_analyzed is not None:
|
|
113
121
|
if self._endpoint_mode == mm_constants.EndpointMode.BATCH:
|
|
@@ -127,13 +135,16 @@ class _BatchWindow:
|
|
|
127
135
|
# Iterate timestamp from start until timestamp <= stop - step
|
|
128
136
|
# so that the last interval will end at (timestamp + step) <= stop.
|
|
129
137
|
# Add 1 to stop - step to get <= and not <.
|
|
130
|
-
for timestamp in
|
|
138
|
+
for timestamp in np.arange(
|
|
139
|
+
self._start, self._stop - self._step + 1, self._step
|
|
140
|
+
):
|
|
131
141
|
entered = True
|
|
132
142
|
start_time = datetime.datetime.fromtimestamp(
|
|
133
143
|
timestamp, tz=datetime.timezone.utc
|
|
134
144
|
)
|
|
135
145
|
end_time = datetime.datetime.fromtimestamp(
|
|
136
|
-
timestamp + self._step,
|
|
146
|
+
timestamp - self.TIMESTAMP_RESOLUTION_MICRO + self._step,
|
|
147
|
+
tz=datetime.timezone.utc,
|
|
137
148
|
)
|
|
138
149
|
yield _Interval(start_time, end_time)
|
|
139
150
|
|
|
@@ -149,7 +160,7 @@ class _BatchWindow:
|
|
|
149
160
|
# If the endpoint is a batch endpoint, we need to update the last analyzed time
|
|
150
161
|
# to the end of the batch time.
|
|
151
162
|
if last_analyzed:
|
|
152
|
-
if last_analyzed < self._stop:
|
|
163
|
+
if last_analyzed - self.TIMESTAMP_RESOLUTION_MICRO < self._stop:
|
|
153
164
|
# If the last analyzed time is earlier than the stop time,
|
|
154
165
|
# yield the final partial interval from last_analyzed to stop
|
|
155
166
|
yield _Interval(
|
|
@@ -223,7 +234,7 @@ class _BatchWindowGenerator(AbstractContextManager):
|
|
|
223
234
|
def get_application_list(self) -> set[str]:
|
|
224
235
|
return self._schedules_file.get_application_list()
|
|
225
236
|
|
|
226
|
-
def get_min_last_analyzed(self) -> Optional[
|
|
237
|
+
def get_min_last_analyzed(self) -> Optional[float]:
|
|
227
238
|
return self._schedules_file.get_min_timestamp()
|
|
228
239
|
|
|
229
240
|
@classmethod
|
|
@@ -231,22 +242,29 @@ class _BatchWindowGenerator(AbstractContextManager):
|
|
|
231
242
|
cls,
|
|
232
243
|
last_request: datetime.datetime,
|
|
233
244
|
endpoint_mode: mm_constants.EndpointMode,
|
|
234
|
-
|
|
245
|
+
not_old_batch_endpoint: bool,
|
|
246
|
+
) -> float:
|
|
235
247
|
"""
|
|
236
248
|
Get the last updated time of a model endpoint.
|
|
237
249
|
"""
|
|
238
250
|
|
|
239
251
|
if endpoint_mode == mm_constants.EndpointMode.REAL_TIME:
|
|
240
|
-
last_updated =
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
float,
|
|
244
|
-
mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
|
|
245
|
-
)
|
|
252
|
+
last_updated = last_request.timestamp() - cast(
|
|
253
|
+
float,
|
|
254
|
+
mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
|
|
246
255
|
)
|
|
256
|
+
if not not_old_batch_endpoint:
|
|
257
|
+
# If the endpoint does not have a stream, `last_updated` should be
|
|
258
|
+
# the minimum between the current time and the last updated time.
|
|
259
|
+
# This compensates for the bumping mechanism - see
|
|
260
|
+
# `update_model_endpoint_last_request`.
|
|
261
|
+
last_updated = min(datetime_now().timestamp(), last_updated)
|
|
262
|
+
logger.debug(
|
|
263
|
+
"The endpoint does not have a stream", last_updated=last_updated
|
|
264
|
+
)
|
|
247
265
|
|
|
248
266
|
return last_updated
|
|
249
|
-
return
|
|
267
|
+
return last_request.timestamp()
|
|
250
268
|
|
|
251
269
|
def get_intervals(
|
|
252
270
|
self,
|
|
@@ -255,6 +273,7 @@ class _BatchWindowGenerator(AbstractContextManager):
|
|
|
255
273
|
first_request: datetime.datetime,
|
|
256
274
|
last_request: datetime.datetime,
|
|
257
275
|
endpoint_mode: mm_constants.EndpointMode,
|
|
276
|
+
not_old_batch_endpoint: bool,
|
|
258
277
|
) -> Iterator[_Interval]:
|
|
259
278
|
"""
|
|
260
279
|
Get the batch window for a specific endpoint and application.
|
|
@@ -266,8 +285,10 @@ class _BatchWindowGenerator(AbstractContextManager):
|
|
|
266
285
|
schedules_file=self._schedules_file,
|
|
267
286
|
application=application,
|
|
268
287
|
timedelta_seconds=self._timedelta,
|
|
269
|
-
last_updated=self._get_last_updated_time(
|
|
270
|
-
|
|
288
|
+
last_updated=self._get_last_updated_time(
|
|
289
|
+
last_request, endpoint_mode, not_old_batch_endpoint
|
|
290
|
+
),
|
|
291
|
+
first_request=first_request.timestamp(),
|
|
271
292
|
endpoint_mode=endpoint_mode,
|
|
272
293
|
)
|
|
273
294
|
yield from self.batch_window.get_intervals()
|
|
@@ -291,6 +312,8 @@ class MonitoringApplicationController:
|
|
|
291
312
|
Note that the MonitoringApplicationController object requires access keys along with valid project configurations.
|
|
292
313
|
"""
|
|
293
314
|
|
|
315
|
+
_MAX_FEATURE_SET_PER_WORKER = 1000
|
|
316
|
+
|
|
294
317
|
def __init__(self) -> None:
|
|
295
318
|
"""Initialize Monitoring Application Controller"""
|
|
296
319
|
self.project = cast(str, mlrun.mlconf.active_project)
|
|
@@ -324,6 +347,9 @@ class MonitoringApplicationController:
|
|
|
324
347
|
mlrun.platforms.iguazio.KafkaOutputStream,
|
|
325
348
|
],
|
|
326
349
|
] = {}
|
|
350
|
+
self.feature_sets: collections.OrderedDict[
|
|
351
|
+
str, mlrun.feature_store.FeatureSet
|
|
352
|
+
] = collections.OrderedDict()
|
|
327
353
|
self.tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
|
|
328
354
|
project=self.project
|
|
329
355
|
)
|
|
@@ -433,15 +459,14 @@ class MonitoringApplicationController:
|
|
|
433
459
|
base_period_minutes, current_min_last_analyzed, current_time
|
|
434
460
|
)
|
|
435
461
|
and (
|
|
436
|
-
|
|
437
|
-
!= last_timestamp_sent
|
|
462
|
+
endpoint.status.last_request.timestamp() != last_timestamp_sent
|
|
438
463
|
or current_min_last_analyzed != last_analyzed_sent
|
|
439
464
|
)
|
|
440
465
|
):
|
|
441
466
|
# Write to schedule chief file the last_request, min_last_analyzed we pushed event to stream
|
|
442
467
|
schedules_file.update_endpoint_timestamps(
|
|
443
468
|
endpoint_uid=endpoint.metadata.uid,
|
|
444
|
-
last_request=
|
|
469
|
+
last_request=endpoint.status.last_request.timestamp(),
|
|
445
470
|
last_analyzed=current_min_last_analyzed,
|
|
446
471
|
)
|
|
447
472
|
return True
|
|
@@ -460,13 +485,14 @@ class MonitoringApplicationController:
|
|
|
460
485
|
last_request=endpoint.status.last_request,
|
|
461
486
|
first_request=endpoint.status.first_request,
|
|
462
487
|
endpoint_type=endpoint.metadata.endpoint_type,
|
|
488
|
+
feature_set_uri=endpoint.spec.monitoring_feature_set_uri,
|
|
463
489
|
)
|
|
464
490
|
return False
|
|
465
491
|
|
|
466
492
|
@staticmethod
|
|
467
493
|
def _should_send_nop_event(
|
|
468
494
|
base_period_minutes: int,
|
|
469
|
-
min_last_analyzed:
|
|
495
|
+
min_last_analyzed: float,
|
|
470
496
|
current_time: datetime.datetime,
|
|
471
497
|
):
|
|
472
498
|
if min_last_analyzed:
|
|
@@ -515,7 +541,7 @@ class MonitoringApplicationController:
|
|
|
515
541
|
try:
|
|
516
542
|
project_name = event[ControllerEvent.PROJECT]
|
|
517
543
|
endpoint_id = event[ControllerEvent.ENDPOINT_ID]
|
|
518
|
-
|
|
544
|
+
not_old_batch_endpoint = True
|
|
519
545
|
if (
|
|
520
546
|
event[ControllerEvent.KIND]
|
|
521
547
|
== mm_constants.ControllerEventKind.BATCH_COMPLETE
|
|
@@ -572,6 +598,10 @@ class MonitoringApplicationController:
|
|
|
572
598
|
|
|
573
599
|
endpoint_mode = mm_constants.EndpointMode.REAL_TIME
|
|
574
600
|
|
|
601
|
+
not_old_batch_endpoint = (
|
|
602
|
+
event[ControllerEvent.ENDPOINT_TYPE] != EndpointType.BATCH_EP
|
|
603
|
+
)
|
|
604
|
+
|
|
575
605
|
logger.info(
|
|
576
606
|
"Starting to analyze", timestamp=last_stream_timestamp.isoformat()
|
|
577
607
|
)
|
|
@@ -590,16 +620,49 @@ class MonitoringApplicationController:
|
|
|
590
620
|
first_request=first_request,
|
|
591
621
|
last_request=last_stream_timestamp,
|
|
592
622
|
endpoint_mode=endpoint_mode,
|
|
623
|
+
not_old_batch_endpoint=not_old_batch_endpoint,
|
|
593
624
|
):
|
|
594
625
|
data_in_window = False
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
626
|
+
if not_old_batch_endpoint:
|
|
627
|
+
# Serving endpoint - get the relevant window data from the TSDB
|
|
628
|
+
prediction_metric = self.tsdb_connector.read_predictions(
|
|
629
|
+
start=start_infer_time,
|
|
630
|
+
end=end_infer_time,
|
|
631
|
+
endpoint_id=endpoint_id,
|
|
632
|
+
)
|
|
633
|
+
if prediction_metric.data:
|
|
634
|
+
data_in_window = True
|
|
635
|
+
else:
|
|
636
|
+
# Old batch endpoint - get the relevant window data from the parquet target
|
|
637
|
+
warnings.warn(
|
|
638
|
+
"Analyzing batch model endpoints with real time processing events is "
|
|
639
|
+
"deprecated in 1.10.0 and will be removed in 1.12.0. "
|
|
640
|
+
"Instead, use job-based serving to invoke and analyze offline batch model"
|
|
641
|
+
"endpoints.",
|
|
642
|
+
# TODO: Remove this in 1.12.0
|
|
643
|
+
FutureWarning,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
if endpoint_id not in self.feature_sets:
|
|
647
|
+
self.feature_sets[endpoint_id] = fstore.get_feature_set(
|
|
648
|
+
event[ControllerEvent.FEATURE_SET_URI]
|
|
649
|
+
)
|
|
650
|
+
self.feature_sets.move_to_end(endpoint_id, last=False)
|
|
651
|
+
if (
|
|
652
|
+
len(self.feature_sets)
|
|
653
|
+
> self._MAX_FEATURE_SET_PER_WORKER
|
|
654
|
+
):
|
|
655
|
+
self.feature_sets.popitem(last=True)
|
|
656
|
+
m_fs = self.feature_sets.get(endpoint_id)
|
|
657
|
+
|
|
658
|
+
df = m_fs.to_dataframe(
|
|
659
|
+
start_time=start_infer_time,
|
|
660
|
+
end_time=end_infer_time,
|
|
661
|
+
time_column=mm_constants.EventFieldType.TIMESTAMP,
|
|
662
|
+
storage_options=self.storage_options,
|
|
663
|
+
)
|
|
664
|
+
if len(df) > 0:
|
|
665
|
+
data_in_window = True
|
|
603
666
|
|
|
604
667
|
if not data_in_window:
|
|
605
668
|
logger.info(
|
|
@@ -616,7 +679,10 @@ class MonitoringApplicationController:
|
|
|
616
679
|
endpoint_id=endpoint_id,
|
|
617
680
|
)
|
|
618
681
|
self._push_to_applications(
|
|
619
|
-
start_infer_time=start_infer_time
|
|
682
|
+
start_infer_time=start_infer_time
|
|
683
|
+
- datetime.timedelta(
|
|
684
|
+
batch_window_generator.batch_window.TIMESTAMP_RESOLUTION_MICRO
|
|
685
|
+
), # We subtract a microsecond to ensure that the apps will retrieve start time data.
|
|
620
686
|
end_infer_time=end_infer_time,
|
|
621
687
|
endpoint_id=endpoint_id,
|
|
622
688
|
endpoint_name=endpoint_name,
|
|
@@ -653,6 +719,9 @@ class MonitoringApplicationController:
|
|
|
653
719
|
ControllerEvent.ENDPOINT_TYPE: event[
|
|
654
720
|
ControllerEvent.ENDPOINT_TYPE
|
|
655
721
|
],
|
|
722
|
+
ControllerEvent.FEATURE_SET_URI: event[
|
|
723
|
+
ControllerEvent.FEATURE_SET_URI
|
|
724
|
+
],
|
|
656
725
|
ControllerEvent.FIRST_REQUEST: event[
|
|
657
726
|
ControllerEvent.FIRST_REQUEST
|
|
658
727
|
],
|
|
@@ -842,6 +911,7 @@ class MonitoringApplicationController:
|
|
|
842
911
|
sep=" ", timespec="microseconds"
|
|
843
912
|
),
|
|
844
913
|
endpoint_type=endpoint.metadata.endpoint_type,
|
|
914
|
+
feature_set_uri=endpoint.spec.monitoring_feature_set_uri,
|
|
845
915
|
endpoint_policy=json.dumps(policy),
|
|
846
916
|
)
|
|
847
917
|
policy[ControllerEventEndpointPolicy.ENDPOINT_UPDATED] = (
|
|
@@ -859,6 +929,7 @@ class MonitoringApplicationController:
|
|
|
859
929
|
sep=" ", timespec="microseconds"
|
|
860
930
|
),
|
|
861
931
|
endpoint_type=endpoint.metadata.endpoint_type.value,
|
|
932
|
+
feature_set_uri=endpoint.spec.monitoring_feature_set_uri,
|
|
862
933
|
endpoint_policy=policy,
|
|
863
934
|
)
|
|
864
935
|
|
|
@@ -871,6 +942,7 @@ class MonitoringApplicationController:
|
|
|
871
942
|
timestamp: str,
|
|
872
943
|
first_request: str,
|
|
873
944
|
endpoint_type: int,
|
|
945
|
+
feature_set_uri: str,
|
|
874
946
|
endpoint_policy: dict[str, Any],
|
|
875
947
|
) -> None:
|
|
876
948
|
"""
|
|
@@ -883,6 +955,7 @@ class MonitoringApplicationController:
|
|
|
883
955
|
:param endpoint_id: endpoint id string
|
|
884
956
|
:param endpoint_name: the endpoint name string
|
|
885
957
|
:param endpoint_type: Enum of the endpoint type
|
|
958
|
+
:param feature_set_uri: the feature set uri string
|
|
886
959
|
"""
|
|
887
960
|
event = {
|
|
888
961
|
ControllerEvent.KIND.value: kind,
|
|
@@ -892,6 +965,7 @@ class MonitoringApplicationController:
|
|
|
892
965
|
ControllerEvent.TIMESTAMP.value: timestamp,
|
|
893
966
|
ControllerEvent.FIRST_REQUEST.value: first_request,
|
|
894
967
|
ControllerEvent.ENDPOINT_TYPE.value: endpoint_type,
|
|
968
|
+
ControllerEvent.FEATURE_SET_URI.value: feature_set_uri,
|
|
895
969
|
ControllerEvent.ENDPOINT_POLICY.value: endpoint_policy,
|
|
896
970
|
}
|
|
897
971
|
logger.info(
|