mlrun 1.10.0rc16__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +21 -15
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +9 -0
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +13 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +1 -0
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +32 -10
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/datastore.py +9 -4
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +363 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +211 -74
- mlrun/datastore/model_provider/openai_provider.py +243 -71
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +27 -19
- mlrun/db/httpdb.py +57 -48
- mlrun/db/nopdb.py +25 -10
- mlrun/execution.py +55 -13
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +2 -0
- mlrun/model.py +9 -3
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +372 -136
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +36 -13
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +29 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
- mlrun/model_monitoring/helpers.py +28 -5
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +6 -1
- mlrun/projects/pipelines.py +2 -2
- mlrun/projects/project.py +128 -45
- mlrun/run.py +94 -17
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +1 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +147 -17
- mlrun/runtimes/nuclio/function.py +70 -27
- mlrun/runtimes/nuclio/serving.py +85 -4
- mlrun/runtimes/pod.py +213 -21
- mlrun/runtimes/utils.py +49 -9
- mlrun/secrets.py +54 -13
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +211 -40
- mlrun/serving/states.py +536 -156
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +136 -81
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +212 -82
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +44 -45
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +97 -92
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
mlrun/__init__.py
CHANGED
|
@@ -31,11 +31,13 @@ from typing import Optional
|
|
|
31
31
|
|
|
32
32
|
import dotenv
|
|
33
33
|
|
|
34
|
+
from .common.constants import MLRUN_ACTIVE_PROJECT
|
|
34
35
|
from .config import config as mlconf
|
|
35
36
|
from .datastore import DataItem, ModelProvider, store_manager
|
|
36
37
|
from .db import get_run_db
|
|
37
38
|
from .errors import MLRunInvalidArgumentError, MLRunNotFoundError
|
|
38
39
|
from .execution import MLClientCtx
|
|
40
|
+
from .hub import get_hub_module, import_module
|
|
39
41
|
from .model import RunObject, RunTemplate, new_task
|
|
40
42
|
from .package import ArtifactType, DefaultPackager, Packager, handler
|
|
41
43
|
from .projects import (
|
|
@@ -167,11 +169,29 @@ def set_environment(
|
|
|
167
169
|
|
|
168
170
|
|
|
169
171
|
def get_current_project(silent: bool = False) -> Optional[MlrunProject]:
|
|
170
|
-
if
|
|
172
|
+
if pipeline_context.project:
|
|
173
|
+
return pipeline_context.project
|
|
174
|
+
|
|
175
|
+
project_name = environ.get(MLRUN_ACTIVE_PROJECT, None)
|
|
176
|
+
if not project_name:
|
|
177
|
+
if not silent:
|
|
178
|
+
raise MLRunInvalidArgumentError(
|
|
179
|
+
"No current project is initialized. Use new, get or load project functions first."
|
|
180
|
+
)
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
project = load_project(
|
|
184
|
+
name=project_name,
|
|
185
|
+
url=project_name,
|
|
186
|
+
save=False,
|
|
187
|
+
sync_functions=False,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if not project and not silent:
|
|
171
191
|
raise MLRunInvalidArgumentError(
|
|
172
192
|
"No current project is initialized. Use new, get or load project functions first."
|
|
173
193
|
)
|
|
174
|
-
return
|
|
194
|
+
return project
|
|
175
195
|
|
|
176
196
|
|
|
177
197
|
def get_sample_path(subpath=""):
|
mlrun/artifacts/document.py
CHANGED
|
@@ -359,7 +359,12 @@ class DocumentArtifact(Artifact):
|
|
|
359
359
|
self,
|
|
360
360
|
splitter: Optional["TextSplitter"] = None, # noqa: F821
|
|
361
361
|
) -> list["Document"]: # noqa: F821
|
|
362
|
-
|
|
362
|
+
# Try new langchain 1.0+ import path first
|
|
363
|
+
try:
|
|
364
|
+
from langchain_core.documents import Document
|
|
365
|
+
except ImportError:
|
|
366
|
+
# Fall back to old langchain <1.0 import path
|
|
367
|
+
from langchain.schema import Document
|
|
363
368
|
|
|
364
369
|
"""
|
|
365
370
|
Create LC documents from the artifact
|
mlrun/artifacts/llm_prompt.py
CHANGED
|
@@ -29,7 +29,7 @@ class LLMPromptArtifactSpec(ArtifactSpec):
|
|
|
29
29
|
_dict_fields = ArtifactSpec._dict_fields + [
|
|
30
30
|
"prompt_template",
|
|
31
31
|
"prompt_legend",
|
|
32
|
-
"
|
|
32
|
+
"invocation_config",
|
|
33
33
|
"description",
|
|
34
34
|
]
|
|
35
35
|
PROMPT_TEMPLATE_KEYS = ("content", "role")
|
|
@@ -41,7 +41,7 @@ class LLMPromptArtifactSpec(ArtifactSpec):
|
|
|
41
41
|
prompt_template: Optional[list[dict]] = None,
|
|
42
42
|
prompt_path: Optional[str] = None,
|
|
43
43
|
prompt_legend: Optional[dict] = None,
|
|
44
|
-
|
|
44
|
+
invocation_config: Optional[dict] = None,
|
|
45
45
|
description: Optional[str] = None,
|
|
46
46
|
target_path: Optional[str] = None,
|
|
47
47
|
**kwargs,
|
|
@@ -62,12 +62,17 @@ class LLMPromptArtifactSpec(ArtifactSpec):
|
|
|
62
62
|
parent_uri=model_artifact.uri
|
|
63
63
|
if isinstance(model_artifact, model_art.ModelArtifact)
|
|
64
64
|
else model_artifact,
|
|
65
|
+
format=kwargs.pop("format", "") or "json",
|
|
65
66
|
**kwargs,
|
|
66
67
|
)
|
|
67
68
|
|
|
68
69
|
self.prompt_template = prompt_template
|
|
69
70
|
self.prompt_legend = prompt_legend
|
|
70
|
-
|
|
71
|
+
if invocation_config is not None and not isinstance(invocation_config, dict):
|
|
72
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
73
|
+
"LLMPromptArtifact invocation_config must be a dictionary or None"
|
|
74
|
+
)
|
|
75
|
+
self.invocation_config = invocation_config or {}
|
|
71
76
|
self.description = description
|
|
72
77
|
self._model_artifact = (
|
|
73
78
|
model_artifact
|
|
@@ -83,19 +88,20 @@ class LLMPromptArtifactSpec(ArtifactSpec):
|
|
|
83
88
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
84
89
|
"Expected prompt_template to be a list of dicts"
|
|
85
90
|
)
|
|
86
|
-
keys_to_pop = []
|
|
87
91
|
for message in prompt_template:
|
|
92
|
+
if set(key.lower() for key in message.keys()) != set(
|
|
93
|
+
self.PROMPT_TEMPLATE_KEYS
|
|
94
|
+
):
|
|
95
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
96
|
+
f"Expected prompt_template to contain dicts with keys "
|
|
97
|
+
f"{self.PROMPT_TEMPLATE_KEYS}, got {message.keys()}"
|
|
98
|
+
)
|
|
99
|
+
keys_to_pop = []
|
|
88
100
|
for key in message.keys():
|
|
89
101
|
if isinstance(key, str):
|
|
90
|
-
if key.
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
f"only has keys from {self.PROMPT_TEMPLATE_KEYS}"
|
|
94
|
-
)
|
|
95
|
-
else:
|
|
96
|
-
if not key.islower():
|
|
97
|
-
message[key.lower()] = message[key]
|
|
98
|
-
keys_to_pop.append(key)
|
|
102
|
+
if not key.islower():
|
|
103
|
+
message[key.lower()] = message[key]
|
|
104
|
+
keys_to_pop.append(key)
|
|
99
105
|
else:
|
|
100
106
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
101
107
|
f"Expected prompt_template to contain dict that only"
|
|
@@ -169,7 +175,7 @@ class LLMPromptArtifact(Artifact):
|
|
|
169
175
|
prompt_template: Optional[list[dict]] = None,
|
|
170
176
|
prompt_path: Optional[str] = None,
|
|
171
177
|
prompt_legend: Optional[dict] = None,
|
|
172
|
-
|
|
178
|
+
invocation_config: Optional[dict] = None,
|
|
173
179
|
description: Optional[str] = None,
|
|
174
180
|
target_path=None,
|
|
175
181
|
**kwargs,
|
|
@@ -179,7 +185,7 @@ class LLMPromptArtifact(Artifact):
|
|
|
179
185
|
prompt_path=prompt_path,
|
|
180
186
|
prompt_legend=prompt_legend,
|
|
181
187
|
model_artifact=model_artifact,
|
|
182
|
-
|
|
188
|
+
invocation_config=invocation_config,
|
|
183
189
|
target_path=target_path,
|
|
184
190
|
description=description,
|
|
185
191
|
)
|
mlrun/artifacts/model.py
CHANGED
|
@@ -190,10 +190,10 @@ class ModelArtifact(Artifact):
|
|
|
190
190
|
"""
|
|
191
191
|
super().__init__(key, body, format=format, target_path=target_path, **kwargs)
|
|
192
192
|
model_file = str(model_file or "")
|
|
193
|
-
if model_file and model_url:
|
|
193
|
+
if (model_file or model_dir or body) and model_url:
|
|
194
194
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
195
|
-
"Arguments 'model_file' and '
|
|
196
|
-
" used together with '
|
|
195
|
+
"Arguments 'model_file' and 'model_url' cannot be"
|
|
196
|
+
" used together with 'model_file', 'model_dir' or 'body'."
|
|
197
197
|
)
|
|
198
198
|
if model_file and "/" in model_file:
|
|
199
199
|
if model_dir:
|
mlrun/common/constants.py
CHANGED
|
@@ -27,9 +27,16 @@ DASK_LABEL_PREFIX = "dask.org/"
|
|
|
27
27
|
NUCLIO_LABEL_PREFIX = "nuclio.io/"
|
|
28
28
|
RESERVED_TAG_NAME_LATEST = "latest"
|
|
29
29
|
|
|
30
|
+
# Kubernetes DNS-1123 label name length limit
|
|
31
|
+
K8S_DNS_1123_LABEL_MAX_LENGTH = 63
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
RESERVED_BATCH_JOB_SUFFIX = "-batch"
|
|
35
|
+
|
|
30
36
|
JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
|
|
31
37
|
JOB_TYPE_PROJECT_LOADER = "project-loader"
|
|
32
38
|
JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
|
|
39
|
+
MLRUN_ACTIVE_PROJECT = "MLRUN_ACTIVE_PROJECT"
|
|
33
40
|
|
|
34
41
|
|
|
35
42
|
class MLRunInternalLabels:
|
|
@@ -84,6 +91,8 @@ class MLRunInternalLabels:
|
|
|
84
91
|
original_workflow_id = "original-workflow-id"
|
|
85
92
|
workflow_id = "workflow-id"
|
|
86
93
|
retrying = "retrying"
|
|
94
|
+
rerun_counter = "rerun-counter"
|
|
95
|
+
rerun_index = "rerun-index"
|
|
87
96
|
|
|
88
97
|
owner = "owner"
|
|
89
98
|
v3io_user = "v3io_user"
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import sys
|
|
16
16
|
import typing
|
|
17
|
+
from datetime import datetime
|
|
17
18
|
|
|
18
19
|
import mlrun.common
|
|
19
20
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
@@ -24,6 +25,7 @@ BinCounts = typing.NewType("BinCounts", list[int])
|
|
|
24
25
|
BinEdges = typing.NewType("BinEdges", list[float])
|
|
25
26
|
|
|
26
27
|
_MAX_FLOAT = sys.float_info.max
|
|
28
|
+
logger = mlrun.utils.create_logger(level="info", name="mm_helpers")
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
def parse_model_endpoint_project_prefix(path: str, project_name: str):
|
|
@@ -87,3 +89,87 @@ def pad_features_hist(feature_stats: FeatureStats) -> None:
|
|
|
87
89
|
for feature in feature_stats.values():
|
|
88
90
|
if hist_key in feature:
|
|
89
91
|
pad_hist(Histogram(feature[hist_key]))
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_model_endpoints_creation_task_status(
|
|
95
|
+
server,
|
|
96
|
+
) -> tuple[
|
|
97
|
+
mlrun.common.schemas.BackgroundTaskState,
|
|
98
|
+
typing.Optional[datetime],
|
|
99
|
+
typing.Optional[set[str]],
|
|
100
|
+
]:
|
|
101
|
+
background_task = None
|
|
102
|
+
background_task_state = mlrun.common.schemas.BackgroundTaskState.running
|
|
103
|
+
background_task_check_timestamp = None
|
|
104
|
+
model_endpoint_uids = None
|
|
105
|
+
try:
|
|
106
|
+
background_task = mlrun.get_run_db().get_project_background_task(
|
|
107
|
+
server.project, server.model_endpoint_creation_task_name
|
|
108
|
+
)
|
|
109
|
+
background_task_check_timestamp = mlrun.utils.now_date()
|
|
110
|
+
log_background_task_state(
|
|
111
|
+
server, background_task.status.state, background_task_check_timestamp
|
|
112
|
+
)
|
|
113
|
+
background_task_state = background_task.status.state
|
|
114
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
115
|
+
logger.warning(
|
|
116
|
+
"Model endpoint creation task not found listing model endpoints",
|
|
117
|
+
project=server.project,
|
|
118
|
+
task_name=server.model_endpoint_creation_task_name,
|
|
119
|
+
)
|
|
120
|
+
if background_task is None:
|
|
121
|
+
model_endpoints = mlrun.get_run_db().list_model_endpoints(
|
|
122
|
+
project=server.project,
|
|
123
|
+
function_name=server.function_name,
|
|
124
|
+
function_tag=server.function_tag,
|
|
125
|
+
tsdb_metrics=False,
|
|
126
|
+
)
|
|
127
|
+
if model_endpoints:
|
|
128
|
+
model_endpoint_uids = {
|
|
129
|
+
endpoint.metadata.uid for endpoint in model_endpoints.endpoints
|
|
130
|
+
}
|
|
131
|
+
logger.info(
|
|
132
|
+
"Model endpoints found after background task not found, model monitoring will monitor "
|
|
133
|
+
"events",
|
|
134
|
+
project=server.project,
|
|
135
|
+
function_name=server.function_name,
|
|
136
|
+
function_tag=server.function_tag,
|
|
137
|
+
uids=model_endpoint_uids,
|
|
138
|
+
)
|
|
139
|
+
background_task_state = mlrun.common.schemas.BackgroundTaskState.succeeded
|
|
140
|
+
else:
|
|
141
|
+
logger.warning(
|
|
142
|
+
"Model endpoints not found after background task not found, model monitoring will not "
|
|
143
|
+
"monitor events",
|
|
144
|
+
project=server.project,
|
|
145
|
+
function_name=server.function_name,
|
|
146
|
+
function_tag=server.function_tag,
|
|
147
|
+
)
|
|
148
|
+
background_task_state = mlrun.common.schemas.BackgroundTaskState.failed
|
|
149
|
+
return background_task_state, background_task_check_timestamp, model_endpoint_uids
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def log_background_task_state(
|
|
153
|
+
server,
|
|
154
|
+
background_task_state: mlrun.common.schemas.BackgroundTaskState,
|
|
155
|
+
background_task_check_timestamp: typing.Optional[datetime],
|
|
156
|
+
):
|
|
157
|
+
logger.info(
|
|
158
|
+
"Checking model endpoint creation task status",
|
|
159
|
+
task_name=server.model_endpoint_creation_task_name,
|
|
160
|
+
)
|
|
161
|
+
if (
|
|
162
|
+
background_task_state
|
|
163
|
+
in mlrun.common.schemas.BackgroundTaskState.terminal_states()
|
|
164
|
+
):
|
|
165
|
+
logger.info(
|
|
166
|
+
f"Model endpoint creation task completed with state {background_task_state}"
|
|
167
|
+
)
|
|
168
|
+
else: # in progress
|
|
169
|
+
logger.info(
|
|
170
|
+
f"Model endpoint creation task is still in progress with the current state: "
|
|
171
|
+
f"{background_task_state}. Events will not be monitored for the next "
|
|
172
|
+
f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
|
|
173
|
+
function_name=server.function_name,
|
|
174
|
+
background_task_check_timestamp=background_task_check_timestamp.isoformat(),
|
|
175
|
+
)
|
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -133,6 +133,7 @@ from .k8s import NodeSelectorOperator, Resources, ResourceSpec
|
|
|
133
133
|
from .memory_reports import MostCommonObjectTypesReport, ObjectTypeReport
|
|
134
134
|
from .model_monitoring import (
|
|
135
135
|
DriftStatus,
|
|
136
|
+
EndpointMode,
|
|
136
137
|
EndpointType,
|
|
137
138
|
EndpointUID,
|
|
138
139
|
EventFieldType,
|
|
@@ -153,6 +154,7 @@ from .model_monitoring import (
|
|
|
153
154
|
ModelEndpointSchema,
|
|
154
155
|
ModelEndpointSpec,
|
|
155
156
|
ModelEndpointStatus,
|
|
157
|
+
ModelMonitoringInfraLabel,
|
|
156
158
|
ModelMonitoringMode,
|
|
157
159
|
MonitoringFunctionNames,
|
|
158
160
|
TSDBTarget,
|
mlrun/common/schemas/auth.py
CHANGED
|
@@ -55,6 +55,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
|
|
|
55
55
|
secret = "secret"
|
|
56
56
|
run = "run"
|
|
57
57
|
model_endpoint = "model-endpoint"
|
|
58
|
+
model_monitoring = "model-monitoring"
|
|
58
59
|
pipeline = "pipeline"
|
|
59
60
|
hub_source = "hub-source"
|
|
60
61
|
workflow = "workflow"
|
|
@@ -96,6 +97,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
|
|
|
96
97
|
# runtime resource doesn't have an identifier, we don't need any auth granularity behind project level
|
|
97
98
|
AuthorizationResourceTypes.runtime_resource: "/projects/{project_name}/runtime-resources",
|
|
98
99
|
AuthorizationResourceTypes.model_endpoint: "/projects/{project_name}/model-endpoints/{resource_name}",
|
|
100
|
+
AuthorizationResourceTypes.model_monitoring: "/projects/{project_name}/model-monitoring/{resource_name}",
|
|
99
101
|
AuthorizationResourceTypes.pipeline: "/projects/{project_name}/pipelines/{resource_name}",
|
|
100
102
|
AuthorizationResourceTypes.datastore_profile: "/projects/{project_name}/datastore_profiles",
|
|
101
103
|
# Hub sources are not project-scoped, and auth is globally on the sources endpoint.
|
mlrun/common/schemas/function.py
CHANGED
|
@@ -114,11 +114,21 @@ class StateThresholds(pydantic.v1.BaseModel):
|
|
|
114
114
|
default: typing.Optional[dict[str, str]]
|
|
115
115
|
|
|
116
116
|
|
|
117
|
+
class Backoff(pydantic.v1.BaseModel):
|
|
118
|
+
default_base_delay: typing.Optional[str]
|
|
119
|
+
min_base_delay: typing.Optional[str]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class RetrySpec(pydantic.v1.BaseModel):
|
|
123
|
+
backoff: Backoff
|
|
124
|
+
|
|
125
|
+
|
|
117
126
|
class FunctionSpec(pydantic.v1.BaseModel):
|
|
118
127
|
image_pull_secret: typing.Optional[ImagePullSecret]
|
|
119
128
|
security_context: typing.Optional[SecurityContext]
|
|
120
129
|
service_account: typing.Optional[ServiceAccount]
|
|
121
130
|
state_thresholds: typing.Optional[StateThresholds]
|
|
131
|
+
retry: typing.Optional[RetrySpec]
|
|
122
132
|
|
|
123
133
|
class Config:
|
|
124
134
|
extra = pydantic.v1.Extra.allow
|
mlrun/common/schemas/hub.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from datetime import datetime, timezone
|
|
16
16
|
from typing import Optional
|
|
17
17
|
|
|
18
|
+
import deepdiff
|
|
18
19
|
from pydantic.v1 import BaseModel, Extra, Field
|
|
19
20
|
|
|
20
21
|
import mlrun.common.types
|
|
@@ -36,9 +37,9 @@ class HubObjectMetadata(BaseModel):
|
|
|
36
37
|
extra = Extra.allow
|
|
37
38
|
|
|
38
39
|
|
|
39
|
-
# Currently only functions are supported. Will add more in the future.
|
|
40
40
|
class HubSourceType(mlrun.common.types.StrEnum):
|
|
41
41
|
functions = "functions"
|
|
42
|
+
modules = "modules"
|
|
42
43
|
|
|
43
44
|
|
|
44
45
|
# Sources-related objects
|
|
@@ -46,7 +47,6 @@ class HubSourceSpec(ObjectSpec):
|
|
|
46
47
|
path: str # URL to base directory, should include schema (s3://, etc...)
|
|
47
48
|
channel: str
|
|
48
49
|
credentials: Optional[dict] = {}
|
|
49
|
-
object_type: HubSourceType = Field(HubSourceType.functions, const=True)
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
class HubSource(BaseModel):
|
|
@@ -55,11 +55,11 @@ class HubSource(BaseModel):
|
|
|
55
55
|
spec: HubSourceSpec
|
|
56
56
|
status: Optional[ObjectStatus] = ObjectStatus(state="created")
|
|
57
57
|
|
|
58
|
-
def get_full_uri(self, relative_path):
|
|
59
|
-
return f"{self.spec.path}/{
|
|
58
|
+
def get_full_uri(self, relative_path, object_type):
|
|
59
|
+
return f"{self.spec.path}/{object_type}/{self.spec.channel}/{relative_path}"
|
|
60
60
|
|
|
61
|
-
def get_catalog_uri(self):
|
|
62
|
-
return self.get_full_uri(mlrun.mlconf.hub.catalog_filename)
|
|
61
|
+
def get_catalog_uri(self, object_type):
|
|
62
|
+
return self.get_full_uri(mlrun.mlconf.hub.catalog_filename, object_type)
|
|
63
63
|
|
|
64
64
|
@classmethod
|
|
65
65
|
def generate_default_source(cls):
|
|
@@ -78,11 +78,23 @@ class HubSource(BaseModel):
|
|
|
78
78
|
spec=HubSourceSpec(
|
|
79
79
|
path=mlrun.mlconf.hub.default_source.url,
|
|
80
80
|
channel=mlrun.mlconf.hub.default_source.channel,
|
|
81
|
-
object_type=HubSourceType(mlrun.mlconf.hub.default_source.object_type),
|
|
82
81
|
),
|
|
83
82
|
status=ObjectStatus(state="created"),
|
|
84
83
|
)
|
|
85
84
|
|
|
85
|
+
def diff(self, another_source: "HubSource") -> dict:
|
|
86
|
+
"""
|
|
87
|
+
Compare this HubSource with another one.
|
|
88
|
+
Returns a dict of differences (metadata, spec, status).
|
|
89
|
+
"""
|
|
90
|
+
exclude_paths = [
|
|
91
|
+
"root['metadata']['updated']",
|
|
92
|
+
"root['metadata']['created']",
|
|
93
|
+
]
|
|
94
|
+
return deepdiff.DeepDiff(
|
|
95
|
+
self.dict(), another_source.dict(), exclude_paths=exclude_paths
|
|
96
|
+
)
|
|
97
|
+
|
|
86
98
|
|
|
87
99
|
last_source_index = -1
|
|
88
100
|
|
|
@@ -94,21 +106,16 @@ class IndexedHubSource(BaseModel):
|
|
|
94
106
|
|
|
95
107
|
# Item-related objects
|
|
96
108
|
class HubItemMetadata(HubObjectMetadata):
|
|
97
|
-
source: HubSourceType =
|
|
109
|
+
source: HubSourceType = HubSourceType.functions
|
|
98
110
|
version: str
|
|
99
111
|
tag: Optional[str]
|
|
100
112
|
|
|
101
113
|
def get_relative_path(self) -> str:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
return f"{modified_name}/{version}/"
|
|
108
|
-
else:
|
|
109
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
110
|
-
f"Bad source for hub item - {self.source}"
|
|
111
|
-
)
|
|
114
|
+
# This is needed since the hub deployment script modifies the paths to use _ instead of -.
|
|
115
|
+
modified_name = self.name.replace("-", "_")
|
|
116
|
+
# Prefer using the tag if exists. Otherwise, use version.
|
|
117
|
+
version = self.tag or self.version
|
|
118
|
+
return f"{modified_name}/{version}/"
|
|
112
119
|
|
|
113
120
|
|
|
114
121
|
class HubItemSpec(ObjectSpec):
|
|
@@ -127,3 +134,8 @@ class HubCatalog(BaseModel):
|
|
|
127
134
|
kind: ObjectKind = Field(ObjectKind.hub_catalog, const=True)
|
|
128
135
|
channel: str
|
|
129
136
|
catalog: list[HubItem]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class HubModuleType(mlrun.common.types.StrEnum):
|
|
140
|
+
generic = "generic"
|
|
141
|
+
monitoring_app = "monitoring_application"
|
|
@@ -16,6 +16,7 @@ from .constants import (
|
|
|
16
16
|
INTERSECT_DICT_KEYS,
|
|
17
17
|
ApplicationEvent,
|
|
18
18
|
DriftStatus,
|
|
19
|
+
EndpointMode,
|
|
19
20
|
EndpointType,
|
|
20
21
|
EndpointUID,
|
|
21
22
|
EventFieldType,
|
|
@@ -29,6 +30,7 @@ from .constants import (
|
|
|
29
30
|
ModelEndpointMonitoringMetricType,
|
|
30
31
|
ModelEndpointSchema,
|
|
31
32
|
ModelMonitoringAppLabel,
|
|
33
|
+
ModelMonitoringInfraLabel,
|
|
32
34
|
ModelMonitoringMode,
|
|
33
35
|
MonitoringFunctionNames,
|
|
34
36
|
PredictionsQueryConstants,
|
|
@@ -34,6 +34,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
|
|
|
34
34
|
UID = "uid"
|
|
35
35
|
PROJECT = "project"
|
|
36
36
|
ENDPOINT_TYPE = "endpoint_type"
|
|
37
|
+
MODE = "mode"
|
|
37
38
|
NAME = "name"
|
|
38
39
|
CREATED = "created"
|
|
39
40
|
UPDATED = "updated"
|
|
@@ -195,6 +196,10 @@ class WriterEventKind(MonitoringStrEnum):
|
|
|
195
196
|
RESULT = "result"
|
|
196
197
|
STATS = "stats"
|
|
197
198
|
|
|
199
|
+
@classmethod
|
|
200
|
+
def user_app_outputs(cls):
|
|
201
|
+
return [cls.METRIC, cls.RESULT]
|
|
202
|
+
|
|
198
203
|
|
|
199
204
|
class ControllerEvent(MonitoringStrEnum):
|
|
200
205
|
KIND = "kind"
|
|
@@ -205,6 +210,11 @@ class ControllerEvent(MonitoringStrEnum):
|
|
|
205
210
|
FIRST_REQUEST = "first_request"
|
|
206
211
|
FEATURE_SET_URI = "feature_set_uri"
|
|
207
212
|
ENDPOINT_TYPE = "endpoint_type"
|
|
213
|
+
|
|
214
|
+
# first_timestamp and last_timestamp are used to batch completed events
|
|
215
|
+
FIRST_TIMESTAMP = "first_timestamp"
|
|
216
|
+
LAST_TIMESTAMP = "last_timestamp"
|
|
217
|
+
|
|
208
218
|
ENDPOINT_POLICY = "endpoint_policy"
|
|
209
219
|
# Note: currently under endpoint policy we will have a dictionary including the keys: "application_names"
|
|
210
220
|
# "base_period", and "updated_endpoint" stand for when the MEP was updated
|
|
@@ -219,6 +229,7 @@ class ControllerEventEndpointPolicy(MonitoringStrEnum):
|
|
|
219
229
|
class ControllerEventKind(MonitoringStrEnum):
|
|
220
230
|
NOP_EVENT = "nop_event"
|
|
221
231
|
REGULAR_EVENT = "regular_event"
|
|
232
|
+
BATCH_COMPLETE = "batch_complete"
|
|
222
233
|
|
|
223
234
|
|
|
224
235
|
class MetricData(MonitoringStrEnum):
|
|
@@ -297,6 +308,7 @@ class FileTargetKind:
|
|
|
297
308
|
MONITORING_APPLICATION = "monitoring_application"
|
|
298
309
|
ERRORS = "errors"
|
|
299
310
|
STATS = "stats"
|
|
311
|
+
PARQUET_STATS = "parquet_stats"
|
|
300
312
|
LAST_REQUEST = "last_request"
|
|
301
313
|
|
|
302
314
|
|
|
@@ -321,6 +333,12 @@ class EndpointType(IntEnum):
|
|
|
321
333
|
return [cls.NODE_EP, cls.ROUTER, cls.BATCH_EP]
|
|
322
334
|
|
|
323
335
|
|
|
336
|
+
class EndpointMode(IntEnum):
|
|
337
|
+
REAL_TIME = 0
|
|
338
|
+
BATCH = 1
|
|
339
|
+
BATCH_LEGACY = 2 # legacy batch mode, used for endpoints created through the batch inference job
|
|
340
|
+
|
|
341
|
+
|
|
324
342
|
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
325
343
|
STREAM = "model-monitoring-stream"
|
|
326
344
|
APPLICATION_CONTROLLER = "model-monitoring-controller"
|
|
@@ -474,19 +492,25 @@ class ModelEndpointMonitoringMetricType(StrEnum):
|
|
|
474
492
|
METRIC = "metric"
|
|
475
493
|
|
|
476
494
|
|
|
495
|
+
# refer to `mlrun.utils.regex.project_name`
|
|
496
|
+
_INNER_PROJECT_PATTERN = r"[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?"
|
|
497
|
+
PROJECT_PATTERN = rf"^{_INNER_PROJECT_PATTERN}$"
|
|
498
|
+
|
|
499
|
+
MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
|
|
500
|
+
|
|
477
501
|
_FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
|
|
502
|
+
_RESULT_NAME_PATTERN = r"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
503
|
+
|
|
478
504
|
FQN_PATTERN = (
|
|
479
|
-
rf"^(?P<project>{
|
|
505
|
+
rf"^(?P<project>{_INNER_PROJECT_PATTERN})\."
|
|
480
506
|
rf"(?P<app>{_FQN_PART_PATTERN})\."
|
|
481
507
|
rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
|
|
482
|
-
rf"(?P<name>{
|
|
508
|
+
rf"(?P<name>{_RESULT_NAME_PATTERN})$"
|
|
483
509
|
)
|
|
484
510
|
FQN_REGEX = re.compile(FQN_PATTERN)
|
|
511
|
+
APP_NAME_REGEX = re.compile(_FQN_PART_PATTERN)
|
|
512
|
+
RESULT_NAME_REGEX = re.compile(_RESULT_NAME_PATTERN)
|
|
485
513
|
|
|
486
|
-
# refer to `mlrun.utils.regex.project_name`
|
|
487
|
-
PROJECT_PATTERN = r"^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$"
|
|
488
|
-
MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
|
|
489
|
-
RESULT_NAME_PATTERN = r"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
490
514
|
|
|
491
515
|
INTERSECT_DICT_KEYS = {
|
|
492
516
|
ModelEndpointMonitoringMetricType.METRIC: "intersect_metrics",
|
|
@@ -54,12 +54,21 @@ class FunctionSummary(BaseModel):
|
|
|
54
54
|
|
|
55
55
|
return cls(
|
|
56
56
|
type=func_type,
|
|
57
|
-
name=func_dict["metadata"]["name"]
|
|
57
|
+
name=func_dict["metadata"]["name"]
|
|
58
|
+
if func_type != FunctionsType.APPLICATION
|
|
59
|
+
else func_dict["spec"]
|
|
60
|
+
.get("graph", {})
|
|
61
|
+
.get("steps", {})
|
|
62
|
+
.get("PrepareMonitoringEvent", {})
|
|
63
|
+
.get("class_args", {})
|
|
64
|
+
.get("application_name"),
|
|
58
65
|
application_class=""
|
|
59
66
|
if func_type != FunctionsType.APPLICATION
|
|
60
|
-
else func_dict["spec"]
|
|
61
|
-
|
|
62
|
-
|
|
67
|
+
else func_dict["spec"]
|
|
68
|
+
.get("graph", {})
|
|
69
|
+
.get("steps", {})
|
|
70
|
+
.get("PushToMonitoringWriter", {})
|
|
71
|
+
.get("after", [None])[0],
|
|
63
72
|
project_name=func_dict["metadata"]["project"],
|
|
64
73
|
updated_time=func_dict["metadata"].get("updated"),
|
|
65
74
|
status=func_dict["status"].get("state"),
|
|
@@ -28,6 +28,7 @@ from .constants import (
|
|
|
28
28
|
FQN_REGEX,
|
|
29
29
|
MODEL_ENDPOINT_ID_PATTERN,
|
|
30
30
|
PROJECT_PATTERN,
|
|
31
|
+
EndpointMode,
|
|
31
32
|
EndpointType,
|
|
32
33
|
ModelEndpointMonitoringMetricType,
|
|
33
34
|
ModelMonitoringMode,
|
|
@@ -118,6 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
118
119
|
project: constr(regex=PROJECT_PATTERN)
|
|
119
120
|
endpoint_type: EndpointType = EndpointType.NODE_EP
|
|
120
121
|
uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
|
|
122
|
+
mode: Optional[EndpointMode] = None
|
|
121
123
|
|
|
122
124
|
@classmethod
|
|
123
125
|
def mutable_fields(cls):
|
|
@@ -129,6 +131,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
129
131
|
return str(v)
|
|
130
132
|
return v
|
|
131
133
|
|
|
134
|
+
@validator("mode", pre=True, always=True)
|
|
135
|
+
def _set_mode_based_on_endpoint_type(cls, v, values): # noqa: N805
|
|
136
|
+
if v is None:
|
|
137
|
+
if values.get("endpoint_type") == EndpointType.BATCH_EP:
|
|
138
|
+
return EndpointMode.BATCH_LEGACY
|
|
139
|
+
else:
|
|
140
|
+
return EndpointMode.REAL_TIME
|
|
141
|
+
return v
|
|
142
|
+
|
|
132
143
|
|
|
133
144
|
class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
134
145
|
model_class: Optional[str] = ""
|
mlrun/common/schemas/pipeline.py
CHANGED
mlrun/common/schemas/serving.py
CHANGED
mlrun/common/schemas/workflow.py
CHANGED
|
@@ -53,6 +53,7 @@ class RerunWorkflowRequest(pydantic.v1.BaseModel):
|
|
|
53
53
|
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
|
|
54
54
|
original_workflow_runner_uid: typing.Optional[str] = None
|
|
55
55
|
original_workflow_name: typing.Optional[str] = None
|
|
56
|
+
rerun_index: typing.Optional[int] = None
|
|
56
57
|
|
|
57
58
|
|
|
58
59
|
class WorkflowResponse(pydantic.v1.BaseModel):
|