mlrun 1.10.0rc3__py3-none-any.whl → 1.10.0rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/__init__.py +1 -0
- mlrun/artifacts/base.py +14 -2
- mlrun/artifacts/helpers.py +40 -0
- mlrun/artifacts/llm_prompt.py +165 -0
- mlrun/artifacts/manager.py +13 -1
- mlrun/artifacts/model.py +91 -11
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/runtimes/constants.py +0 -14
- mlrun/common/schemas/artifact.py +12 -12
- mlrun/common/schemas/pipeline.py +0 -16
- mlrun/common/schemas/project.py +0 -17
- mlrun/common/schemas/runs.py +0 -17
- mlrun/config.py +1 -1
- mlrun/datastore/base.py +2 -2
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/datastore_profile.py +1 -9
- mlrun/datastore/redis.py +2 -3
- mlrun/datastore/sources.py +0 -9
- mlrun/datastore/storeytargets.py +2 -5
- mlrun/datastore/targets.py +6 -56
- mlrun/datastore/utils.py +1 -11
- mlrun/db/base.py +1 -0
- mlrun/db/httpdb.py +6 -0
- mlrun/db/nopdb.py +1 -0
- mlrun/execution.py +87 -1
- mlrun/model.py +0 -5
- mlrun/projects/project.py +241 -4
- mlrun/run.py +0 -18
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/serving/states.py +67 -3
- mlrun/serving/v2_serving.py +1 -1
- mlrun/utils/helpers.py +58 -7
- mlrun/utils/notifications/notification/slack.py +5 -1
- mlrun/utils/notifications/notification_pusher.py +2 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/METADATA +5 -5
- {mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/RECORD +42 -40
- {mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/WHEEL +1 -1
- {mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc3.dist-info → mlrun-1.10.0rc4.dist-info}/top_level.txt +0 -0
mlrun/artifacts/__init__.py
CHANGED
|
@@ -24,6 +24,7 @@ from .base import (
|
|
|
24
24
|
)
|
|
25
25
|
from .dataset import DatasetArtifact, TableArtifact, update_dataset_meta
|
|
26
26
|
from .document import DocumentArtifact, DocumentLoaderSpec, MLRunLoader
|
|
27
|
+
from .llm_prompt import LLMPromptArtifact, LLMPromptArtifactSpec
|
|
27
28
|
from .manager import (
|
|
28
29
|
ArtifactManager,
|
|
29
30
|
ArtifactProducer,
|
mlrun/artifacts/base.py
CHANGED
|
@@ -106,6 +106,8 @@ class ArtifactSpec(ModelObj):
|
|
|
106
106
|
"extra_data",
|
|
107
107
|
"unpackaging_instructions",
|
|
108
108
|
"producer",
|
|
109
|
+
"parent_uri",
|
|
110
|
+
"has_children",
|
|
109
111
|
]
|
|
110
112
|
|
|
111
113
|
_extra_fields = ["annotations", "sources", "license", "encoding"]
|
|
@@ -128,6 +130,7 @@ class ArtifactSpec(ModelObj):
|
|
|
128
130
|
extra_data=None,
|
|
129
131
|
body=None,
|
|
130
132
|
unpackaging_instructions: typing.Optional[dict] = None,
|
|
133
|
+
parent_uri: typing.Optional[str] = None,
|
|
131
134
|
):
|
|
132
135
|
self.src_path = src_path
|
|
133
136
|
self.target_path = target_path
|
|
@@ -138,6 +141,8 @@ class ArtifactSpec(ModelObj):
|
|
|
138
141
|
self.db_key = db_key
|
|
139
142
|
self.extra_data = extra_data or {}
|
|
140
143
|
self.unpackaging_instructions = unpackaging_instructions
|
|
144
|
+
self.parent_uri = parent_uri
|
|
145
|
+
self.has_children = False
|
|
141
146
|
|
|
142
147
|
self._body = body
|
|
143
148
|
self.encoding = None
|
|
@@ -253,7 +258,10 @@ class Artifact(ModelObj):
|
|
|
253
258
|
self.spec.target_path = target_path or self.spec.target_path
|
|
254
259
|
self.spec.format = format or self.spec.format
|
|
255
260
|
self.spec.viewer = viewer or self.spec.viewer
|
|
256
|
-
self.spec.src_path = src_path
|
|
261
|
+
self.spec.src_path = src_path or self.spec.src_path
|
|
262
|
+
|
|
263
|
+
# temp flag to indicate if the source path is a temporary file (if True it will be deleted after upload)
|
|
264
|
+
self._src_is_temp = False
|
|
257
265
|
|
|
258
266
|
if body:
|
|
259
267
|
self.spec._body = body
|
|
@@ -341,7 +349,7 @@ class Artifact(ModelObj):
|
|
|
341
349
|
|
|
342
350
|
def before_log(self):
|
|
343
351
|
for key, item in self.spec.extra_data.items():
|
|
344
|
-
if hasattr(item, "get_target_path"):
|
|
352
|
+
if hasattr(item, "get_target_path") and item.get_target_path():
|
|
345
353
|
self.spec.extra_data[key] = item.get_target_path()
|
|
346
354
|
|
|
347
355
|
@property
|
|
@@ -451,6 +459,10 @@ class Artifact(ModelObj):
|
|
|
451
459
|
url=target_path or self.spec.target_path
|
|
452
460
|
).upload(source_path)
|
|
453
461
|
|
|
462
|
+
if self._src_is_temp and os.path.exists(self.spec.src_path):
|
|
463
|
+
# delete the temporary file if it was created for the upload
|
|
464
|
+
os.remove(self.spec.src_path)
|
|
465
|
+
|
|
454
466
|
def resolve_body_target_hash_path(
|
|
455
467
|
self, body: typing.Union[bytes, str], artifact_path: str
|
|
456
468
|
) -> (str, str):
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import mlrun.datastore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def check_artifact_parent(
|
|
19
|
+
artifact_project: str,
|
|
20
|
+
expected_parent_uri: str,
|
|
21
|
+
) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Check if the artifact's parent URI is valid and under the same project.
|
|
24
|
+
:param artifact_project: Artifact project name
|
|
25
|
+
:param expected_parent_uri: Expected parent URI of the artifact
|
|
26
|
+
:raise: MLRunInvalidArgumentError if the parent URI is invalid or not under the same project
|
|
27
|
+
"""
|
|
28
|
+
# check if the parent_uri is a valid artifact uri and it is under the same project
|
|
29
|
+
if mlrun.datastore.is_store_uri(expected_parent_uri):
|
|
30
|
+
project, _, _, _, _, _ = mlrun.utils.parse_artifact_uri(
|
|
31
|
+
mlrun.datastore.parse_store_uri(expected_parent_uri)[1]
|
|
32
|
+
)
|
|
33
|
+
if project != artifact_project:
|
|
34
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
35
|
+
f"parent_uri ({expected_parent_uri}) must be under the same project ({artifact_project})"
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
39
|
+
f"parent_uri ({expected_parent_uri}) must be a valid artifact URI"
|
|
40
|
+
)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import tempfile
|
|
15
|
+
from typing import Optional, Union
|
|
16
|
+
|
|
17
|
+
import mlrun
|
|
18
|
+
import mlrun.artifacts.model as model_art
|
|
19
|
+
import mlrun.common
|
|
20
|
+
from mlrun.artifacts import Artifact, ArtifactMetadata, ArtifactSpec
|
|
21
|
+
from mlrun.utils import StorePrefix, logger
|
|
22
|
+
|
|
23
|
+
MAX_PROMPT_LENGTH = 1024
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class LLMPromptArtifactSpec(ArtifactSpec):
|
|
27
|
+
_dict_fields = ArtifactSpec._dict_fields + [
|
|
28
|
+
"prompt_string",
|
|
29
|
+
"prompt_legend",
|
|
30
|
+
"model_configuration",
|
|
31
|
+
"description",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
model_artifact: Union[model_art.ModelArtifact, str] = None,
|
|
37
|
+
prompt_string: Optional[str] = None,
|
|
38
|
+
prompt_path: Optional[str] = None,
|
|
39
|
+
prompt_legend: Optional[dict] = None,
|
|
40
|
+
model_configuration: Optional[dict] = None,
|
|
41
|
+
description: Optional[str] = None,
|
|
42
|
+
target_path: Optional[str] = None,
|
|
43
|
+
**kwargs,
|
|
44
|
+
):
|
|
45
|
+
if prompt_string and prompt_path:
|
|
46
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
47
|
+
"Cannot specify both 'prompt_string' and 'prompt_path'"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
super().__init__(
|
|
51
|
+
src_path=prompt_path,
|
|
52
|
+
target_path=target_path,
|
|
53
|
+
parent_uri=model_artifact.uri
|
|
54
|
+
if isinstance(model_artifact, model_art.ModelArtifact)
|
|
55
|
+
else model_artifact,
|
|
56
|
+
body=prompt_string,
|
|
57
|
+
**kwargs,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
self.prompt_string = prompt_string
|
|
61
|
+
self.prompt_legend = prompt_legend
|
|
62
|
+
self.model_configuration = model_configuration
|
|
63
|
+
self.description = description
|
|
64
|
+
self._model_artifact = None
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def model_uri(self):
|
|
68
|
+
return self.parent_uri
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class LLMPromptArtifact(Artifact):
|
|
72
|
+
"""
|
|
73
|
+
LLM Prompt Artifact
|
|
74
|
+
|
|
75
|
+
This artifact is used to store and manage LLM prompts.
|
|
76
|
+
Stores the prompt string/path and a link to the related model artifact.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
kind = mlrun.common.schemas.ArtifactCategories.llm_prompt
|
|
80
|
+
_store_prefix = StorePrefix.LLMPrompt
|
|
81
|
+
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
key: Optional[str] = None,
|
|
85
|
+
project: Optional[str] = None,
|
|
86
|
+
model_artifact: Union[
|
|
87
|
+
model_art.ModelArtifact, str
|
|
88
|
+
] = None, # TODO support partial model uri
|
|
89
|
+
prompt_string: Optional[str] = None,
|
|
90
|
+
prompt_path: Optional[str] = None,
|
|
91
|
+
prompt_legend: Optional[dict] = None,
|
|
92
|
+
model_configuration: Optional[dict] = None,
|
|
93
|
+
description: Optional[str] = None,
|
|
94
|
+
target_path=None,
|
|
95
|
+
**kwargs,
|
|
96
|
+
):
|
|
97
|
+
llm_prompt_spec = LLMPromptArtifactSpec(
|
|
98
|
+
prompt_string=prompt_string,
|
|
99
|
+
prompt_path=prompt_path,
|
|
100
|
+
prompt_legend=prompt_legend,
|
|
101
|
+
model_artifact=model_artifact,
|
|
102
|
+
model_configuration=model_configuration,
|
|
103
|
+
target_path=target_path,
|
|
104
|
+
description=description,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
llm_metadata = ArtifactMetadata(
|
|
108
|
+
key=key,
|
|
109
|
+
project=project or "",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
super().__init__(spec=llm_prompt_spec, metadata=llm_metadata, **kwargs)
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def spec(self) -> LLMPromptArtifactSpec:
|
|
116
|
+
return self._spec
|
|
117
|
+
|
|
118
|
+
@spec.setter
|
|
119
|
+
def spec(self, spec: LLMPromptArtifactSpec):
|
|
120
|
+
self._spec = self._verify_dict(spec, "spec", LLMPromptArtifactSpec)
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def model_artifact(self) -> Optional[model_art.ModelArtifact]:
|
|
124
|
+
"""
|
|
125
|
+
Get the model artifact linked to this prompt artifact.
|
|
126
|
+
"""
|
|
127
|
+
if self.spec._model_artifact:
|
|
128
|
+
return self.spec._model_artifact
|
|
129
|
+
if self.spec.model_uri:
|
|
130
|
+
self.spec._model_artifact, target = (
|
|
131
|
+
mlrun.datastore.store_manager.get_store_artifact(self.spec.model_uri)
|
|
132
|
+
)
|
|
133
|
+
return self.spec._model_artifact
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
def read_prompt(self) -> Optional[str]:
|
|
137
|
+
"""
|
|
138
|
+
Read the prompt string from the artifact.
|
|
139
|
+
"""
|
|
140
|
+
if self.spec.prompt_string:
|
|
141
|
+
return self.spec.prompt_string
|
|
142
|
+
if self.spec.target_path:
|
|
143
|
+
with mlrun.datastore.store_manager.object(url=self.spec.target_path).open(
|
|
144
|
+
mode="r"
|
|
145
|
+
) as p_file:
|
|
146
|
+
return p_file.read()
|
|
147
|
+
|
|
148
|
+
def before_log(self):
|
|
149
|
+
"""
|
|
150
|
+
Prepare the artifact before logging.
|
|
151
|
+
This method is called before the artifact is logged.
|
|
152
|
+
"""
|
|
153
|
+
if self.spec.prompt_string and len(self.spec.prompt_string) > MAX_PROMPT_LENGTH:
|
|
154
|
+
logger.debug(
|
|
155
|
+
"Prompt string exceeds maximum length, saving to a temporary file."
|
|
156
|
+
)
|
|
157
|
+
with tempfile.NamedTemporaryFile(
|
|
158
|
+
delete=False, mode="w", suffix=".txt"
|
|
159
|
+
) as temp_file:
|
|
160
|
+
temp_file.write(self.spec.prompt_string)
|
|
161
|
+
self.spec.src_path = temp_file.name
|
|
162
|
+
self.spec.prompt_string = None
|
|
163
|
+
self._src_is_temp = True
|
|
164
|
+
|
|
165
|
+
super().before_log()
|
mlrun/artifacts/manager.py
CHANGED
|
@@ -17,6 +17,7 @@ import typing
|
|
|
17
17
|
from os.path import exists, isdir
|
|
18
18
|
from urllib.parse import urlparse
|
|
19
19
|
|
|
20
|
+
import mlrun.artifacts.helpers
|
|
20
21
|
import mlrun.common.schemas.artifact
|
|
21
22
|
import mlrun.config
|
|
22
23
|
import mlrun.utils.regex
|
|
@@ -42,6 +43,7 @@ from .dataset import (
|
|
|
42
43
|
TableArtifact,
|
|
43
44
|
)
|
|
44
45
|
from .document import DocumentArtifact
|
|
46
|
+
from .llm_prompt import LLMPromptArtifact
|
|
45
47
|
from .model import ModelArtifact
|
|
46
48
|
from .plots import (
|
|
47
49
|
PlotArtifact,
|
|
@@ -59,6 +61,7 @@ artifact_types = {
|
|
|
59
61
|
"dataset": DatasetArtifact,
|
|
60
62
|
"plotly": PlotlyArtifact,
|
|
61
63
|
"document": DocumentArtifact,
|
|
64
|
+
"llm-prompt": LLMPromptArtifact,
|
|
62
65
|
}
|
|
63
66
|
|
|
64
67
|
|
|
@@ -221,7 +224,12 @@ class ArtifactManager:
|
|
|
221
224
|
else:
|
|
222
225
|
key = item.key
|
|
223
226
|
target_path = target_path or item.target_path
|
|
224
|
-
|
|
227
|
+
if isinstance(item, ModelArtifact) and item.model_url:
|
|
228
|
+
if upload:
|
|
229
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
230
|
+
"log_artifact of ModelArtifact does not accept arguments for both upload and model_url parameters"
|
|
231
|
+
)
|
|
232
|
+
upload = False
|
|
225
233
|
validate_artifact_key_name(key, "artifact.key")
|
|
226
234
|
validate_inline_artifact_body_size(item.spec.inline)
|
|
227
235
|
src_path = local_path or item.src_path # TODO: remove src_path
|
|
@@ -262,6 +270,10 @@ class ArtifactManager:
|
|
|
262
270
|
item.iter = producer.iteration
|
|
263
271
|
project = project or producer.project
|
|
264
272
|
item.project = project
|
|
273
|
+
if item.spec.parent_uri:
|
|
274
|
+
mlrun.artifacts.helpers.check_artifact_parent(
|
|
275
|
+
artifact_project=item.project, expected_parent_uri=item.spec.parent_uri
|
|
276
|
+
)
|
|
265
277
|
if is_retained_producer:
|
|
266
278
|
# if the producer is retained, we want to use the original target path
|
|
267
279
|
target_path = target_path or item.target_path
|
mlrun/artifacts/model.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import tempfile
|
|
16
16
|
import warnings
|
|
17
17
|
from os import path
|
|
18
|
-
from typing import Any, Optional
|
|
18
|
+
from typing import Any, Optional, Union
|
|
19
19
|
|
|
20
20
|
import pandas as pd
|
|
21
21
|
import yaml
|
|
@@ -46,6 +46,7 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
46
46
|
"feature_weights",
|
|
47
47
|
"feature_stats",
|
|
48
48
|
"model_target_file",
|
|
49
|
+
"model_url",
|
|
49
50
|
]
|
|
50
51
|
_exclude_fields_from_uid_hash = ArtifactSpec._exclude_fields_from_uid_hash + [
|
|
51
52
|
"metrics",
|
|
@@ -79,6 +80,7 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
79
80
|
feature_weights=None,
|
|
80
81
|
feature_stats=None,
|
|
81
82
|
model_target_file=None,
|
|
83
|
+
model_url=None,
|
|
82
84
|
):
|
|
83
85
|
super().__init__(
|
|
84
86
|
src_path,
|
|
@@ -102,6 +104,7 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
102
104
|
self.feature_weights = feature_weights
|
|
103
105
|
self.feature_stats = feature_stats
|
|
104
106
|
self.model_target_file = model_target_file
|
|
107
|
+
self.model_url = model_url
|
|
105
108
|
|
|
106
109
|
@property
|
|
107
110
|
def inputs(self) -> ObjectList:
|
|
@@ -121,6 +124,18 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
121
124
|
def outputs(self, outputs: list[Feature]) -> None:
|
|
122
125
|
self._outputs = ObjectList.from_list(Feature, outputs)
|
|
123
126
|
|
|
127
|
+
@property
|
|
128
|
+
def default_config(self):
|
|
129
|
+
return self.parameters.get("default_config", {})
|
|
130
|
+
|
|
131
|
+
@default_config.setter
|
|
132
|
+
def default_config(self, default_config):
|
|
133
|
+
# skip storing 'default_config' if value is empty or unset
|
|
134
|
+
if default_config:
|
|
135
|
+
self.parameters["default_config"] = default_config
|
|
136
|
+
else:
|
|
137
|
+
self.parameters.pop("default_config", None)
|
|
138
|
+
|
|
124
139
|
|
|
125
140
|
class ModelArtifact(Artifact):
|
|
126
141
|
"""ML Model artifact
|
|
@@ -148,8 +163,32 @@ class ModelArtifact(Artifact):
|
|
|
148
163
|
feature_weights=None,
|
|
149
164
|
extra_data=None,
|
|
150
165
|
model_dir=None,
|
|
166
|
+
model_url: Optional[str] = None,
|
|
167
|
+
default_config: Optional[dict] = None,
|
|
151
168
|
**kwargs,
|
|
152
169
|
):
|
|
170
|
+
"""
|
|
171
|
+
:param key: Artifact key or artifact class ()
|
|
172
|
+
:param body: Will use the body as the artifact content
|
|
173
|
+
:param format: Optional, format to use (e.g. csv, parquet, ..)
|
|
174
|
+
:param model_file: Path to the local model file we upload (see also model_dir)
|
|
175
|
+
or to a model file data url (e.g. `http://host/path/model.pkl`)
|
|
176
|
+
:param metrics: The key/value dict of model metrics
|
|
177
|
+
:param target_path: Absolute target path (instead of using artifact_path + local_path)
|
|
178
|
+
:param parameters: Key/value dict of model parameters
|
|
179
|
+
:param inputs: Ordered list of model input features (name, type, ..)
|
|
180
|
+
:param outputs: Ordered list of model output/result elements (name, type, ..)
|
|
181
|
+
:param framework: Name of the ML framework
|
|
182
|
+
:param algorithm: Training algorithm name
|
|
183
|
+
:param feature_vector: Feature store feature vector uri (store://feature-vectors/<project>/<name>[:tag])
|
|
184
|
+
:param feature_weights: List of feature weights, one per input column
|
|
185
|
+
:param extra_data: Extra artifacts and files to log with the model.
|
|
186
|
+
:param model_dir: Path to the local dir holding the model file and extra files
|
|
187
|
+
:param model_url: Remote model url.
|
|
188
|
+
:param default_config: Default configuration for client building
|
|
189
|
+
Saved as a sub-dictionary under the parameter.
|
|
190
|
+
:param kwargs:
|
|
191
|
+
"""
|
|
153
192
|
if key or body or format or target_path:
|
|
154
193
|
warnings.warn(
|
|
155
194
|
"Artifact constructor parameters are deprecated in 1.7.0 and will be removed in 1.10.0. "
|
|
@@ -158,10 +197,18 @@ class ModelArtifact(Artifact):
|
|
|
158
197
|
)
|
|
159
198
|
super().__init__(key, body, format=format, target_path=target_path, **kwargs)
|
|
160
199
|
model_file = str(model_file or "")
|
|
200
|
+
if model_file and model_url:
|
|
201
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
202
|
+
"Arguments 'model_file' and 'model_dir' cannot be"
|
|
203
|
+
" used together with 'model_url'."
|
|
204
|
+
)
|
|
161
205
|
if model_file and "/" in model_file:
|
|
206
|
+
if model_dir:
|
|
207
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
208
|
+
"'model_file' cannot contain '/' (i.e., be a full path) when 'model_dir' is also specified"
|
|
209
|
+
)
|
|
162
210
|
model_dir = path.dirname(model_file)
|
|
163
211
|
model_file = path.basename(model_file)
|
|
164
|
-
|
|
165
212
|
self.spec.model_file = model_file
|
|
166
213
|
self.spec.src_path = model_dir
|
|
167
214
|
self.spec.parameters = parameters or {}
|
|
@@ -174,6 +221,8 @@ class ModelArtifact(Artifact):
|
|
|
174
221
|
self.spec.feature_vector = feature_vector
|
|
175
222
|
self.spec.feature_weights = feature_weights
|
|
176
223
|
self.spec.feature_stats = None
|
|
224
|
+
self.spec.model_url = model_url
|
|
225
|
+
self.default_config = default_config
|
|
177
226
|
|
|
178
227
|
@property
|
|
179
228
|
def spec(self) -> ModelArtifactSpec:
|
|
@@ -211,6 +260,14 @@ class ModelArtifact(Artifact):
|
|
|
211
260
|
def model_file(self, model_file):
|
|
212
261
|
self.spec.model_file = model_file
|
|
213
262
|
|
|
263
|
+
@property
|
|
264
|
+
def model_url(self):
|
|
265
|
+
return self.spec.model_url
|
|
266
|
+
|
|
267
|
+
@model_url.setter
|
|
268
|
+
def model_url(self, model_url):
|
|
269
|
+
self.spec.model_url = model_url
|
|
270
|
+
|
|
214
271
|
@property
|
|
215
272
|
def parameters(self):
|
|
216
273
|
return self.spec.parameters
|
|
@@ -219,6 +276,14 @@ class ModelArtifact(Artifact):
|
|
|
219
276
|
def parameters(self, parameters):
|
|
220
277
|
self.spec.parameters = parameters
|
|
221
278
|
|
|
279
|
+
@property
|
|
280
|
+
def default_config(self):
|
|
281
|
+
return self.spec.default_config
|
|
282
|
+
|
|
283
|
+
@default_config.setter
|
|
284
|
+
def default_config(self, default_config):
|
|
285
|
+
self.spec.default_config = default_config
|
|
286
|
+
|
|
222
287
|
@property
|
|
223
288
|
def metrics(self):
|
|
224
289
|
return self.spec.metrics
|
|
@@ -299,8 +364,10 @@ class ModelArtifact(Artifact):
|
|
|
299
364
|
return True
|
|
300
365
|
|
|
301
366
|
def before_log(self):
|
|
302
|
-
if not self.spec.model_file:
|
|
303
|
-
raise ValueError(
|
|
367
|
+
if not self.spec.model_file and not self.spec.model_url:
|
|
368
|
+
raise ValueError(
|
|
369
|
+
"ModelArtifact must have either model_file or model_url attributes"
|
|
370
|
+
)
|
|
304
371
|
|
|
305
372
|
super().before_log()
|
|
306
373
|
|
|
@@ -406,8 +473,13 @@ class ModelArtifact(Artifact):
|
|
|
406
473
|
return mlrun.get_dataitem(target_model_path).get()
|
|
407
474
|
|
|
408
475
|
|
|
409
|
-
def get_model(
|
|
410
|
-
|
|
476
|
+
def get_model(
|
|
477
|
+
model_dir: Optional[
|
|
478
|
+
Union[str, ModelArtifact, "mlrun.datastore.base.DataItem"]
|
|
479
|
+
] = None,
|
|
480
|
+
suffix="",
|
|
481
|
+
) -> (str, ModelArtifact, dict):
|
|
482
|
+
"""return model file, model spec object, and dictionary of extra data items
|
|
411
483
|
|
|
412
484
|
this function will get the model file, metadata, and extra data
|
|
413
485
|
the returned model file is always local, when using remote urls
|
|
@@ -428,6 +500,7 @@ def get_model(model_dir, suffix=""):
|
|
|
428
500
|
:returns: model filename, model artifact object, extra data dict
|
|
429
501
|
|
|
430
502
|
"""
|
|
503
|
+
# TODO support LLMPromptArtifact
|
|
431
504
|
model_file = ""
|
|
432
505
|
model_spec = None
|
|
433
506
|
extra_dataitems = {}
|
|
@@ -435,18 +508,25 @@ def get_model(model_dir, suffix=""):
|
|
|
435
508
|
|
|
436
509
|
if hasattr(model_dir, "artifact_url"):
|
|
437
510
|
model_dir = model_dir.artifact_url
|
|
438
|
-
|
|
439
511
|
alternative_suffix = next(
|
|
440
512
|
(
|
|
441
513
|
optional_suffix
|
|
442
514
|
for optional_suffix in MODEL_OPTIONAL_SUFFIXES
|
|
443
|
-
if model_dir
|
|
515
|
+
if isinstance(model_dir, str)
|
|
516
|
+
and model_dir.lower().endswith(optional_suffix)
|
|
444
517
|
),
|
|
445
518
|
None,
|
|
446
519
|
)
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
520
|
+
is_store_uri = isinstance(model_dir, str) and mlrun.datastore.is_store_uri(
|
|
521
|
+
model_dir
|
|
522
|
+
)
|
|
523
|
+
if is_store_uri or isinstance(model_dir, ModelArtifact):
|
|
524
|
+
if is_store_uri:
|
|
525
|
+
model_spec, target = mlrun.datastore.store_manager.get_store_artifact(
|
|
526
|
+
model_dir
|
|
527
|
+
)
|
|
528
|
+
else:
|
|
529
|
+
model_spec, target = model_dir, model_dir.get_target_path()
|
|
450
530
|
if not model_spec or model_spec.kind != "model":
|
|
451
531
|
raise ValueError(f"store artifact ({model_dir}) is not model kind")
|
|
452
532
|
# in case model_target_file is specified, use it, because that means that the actual model target path
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import enum
|
|
16
15
|
import typing
|
|
17
16
|
|
|
18
17
|
import mlrun.common.constants as mlrun_constants
|
|
@@ -237,19 +236,6 @@ class RunStates:
|
|
|
237
236
|
}[pipeline_run_status]
|
|
238
237
|
|
|
239
238
|
|
|
240
|
-
# TODO: remove this class in 1.10.0 - use only MlrunInternalLabels
|
|
241
|
-
class RunLabels(enum.Enum):
|
|
242
|
-
owner = mlrun_constants.MLRunInternalLabels.owner
|
|
243
|
-
v3io_user = mlrun_constants.MLRunInternalLabels.v3io_user
|
|
244
|
-
|
|
245
|
-
@staticmethod
|
|
246
|
-
def all():
|
|
247
|
-
return [
|
|
248
|
-
RunLabels.owner,
|
|
249
|
-
RunLabels.v3io_user,
|
|
250
|
-
]
|
|
251
|
-
|
|
252
|
-
|
|
253
239
|
class SparkApplicationStates:
|
|
254
240
|
"""
|
|
255
241
|
https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go#L321
|
mlrun/common/schemas/artifact.py
CHANGED
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
17
|
import pydantic.v1
|
|
18
|
-
from deprecated import deprecated
|
|
19
18
|
|
|
20
19
|
import mlrun.common.types
|
|
21
20
|
|
|
@@ -26,6 +25,7 @@ class ArtifactCategories(mlrun.common.types.StrEnum):
|
|
|
26
25
|
model = "model"
|
|
27
26
|
dataset = "dataset"
|
|
28
27
|
document = "document"
|
|
28
|
+
llm_prompt = "llm-prompt"
|
|
29
29
|
other = "other"
|
|
30
30
|
|
|
31
31
|
# we define the link as a category to prevent import cycles, but it's not a real category
|
|
@@ -41,19 +41,27 @@ class ArtifactCategories(mlrun.common.types.StrEnum):
|
|
|
41
41
|
return [ArtifactCategories.dataset.value, link_kind], False
|
|
42
42
|
if self.value == ArtifactCategories.document.value:
|
|
43
43
|
return [ArtifactCategories.document.value, link_kind], False
|
|
44
|
+
if self.value == ArtifactCategories.llm_prompt.value:
|
|
45
|
+
return [ArtifactCategories.llm_prompt.value, link_kind], False
|
|
44
46
|
if self.value == ArtifactCategories.other.value:
|
|
45
47
|
return (
|
|
46
48
|
[
|
|
47
49
|
ArtifactCategories.model.value,
|
|
48
50
|
ArtifactCategories.dataset.value,
|
|
49
51
|
ArtifactCategories.document.value,
|
|
52
|
+
ArtifactCategories.llm_prompt.value,
|
|
50
53
|
],
|
|
51
54
|
True,
|
|
52
55
|
)
|
|
53
56
|
|
|
54
57
|
@classmethod
|
|
55
58
|
def from_kind(cls, kind: str) -> "ArtifactCategories":
|
|
56
|
-
if kind in [
|
|
59
|
+
if kind in [
|
|
60
|
+
cls.model.value,
|
|
61
|
+
cls.dataset.value,
|
|
62
|
+
cls.document.value,
|
|
63
|
+
cls.llm_prompt.value,
|
|
64
|
+
]:
|
|
57
65
|
return cls(kind)
|
|
58
66
|
return cls.other
|
|
59
67
|
|
|
@@ -64,6 +72,7 @@ class ArtifactCategories(mlrun.common.types.StrEnum):
|
|
|
64
72
|
ArtifactCategories.model,
|
|
65
73
|
ArtifactCategories.dataset,
|
|
66
74
|
ArtifactCategories.document,
|
|
75
|
+
ArtifactCategories.llm_prompt,
|
|
67
76
|
]
|
|
68
77
|
|
|
69
78
|
|
|
@@ -78,16 +87,6 @@ class ArtifactIdentifier(pydantic.v1.BaseModel):
|
|
|
78
87
|
# hash: typing.Optional[str]
|
|
79
88
|
|
|
80
89
|
|
|
81
|
-
@deprecated(
|
|
82
|
-
version="1.7.0",
|
|
83
|
-
reason="mlrun.common.schemas.ArtifactsFormat is deprecated and will be removed in 1.10.0. "
|
|
84
|
-
"Use mlrun.common.formatters.ArtifactFormat instead.",
|
|
85
|
-
category=FutureWarning,
|
|
86
|
-
)
|
|
87
|
-
class ArtifactsFormat(mlrun.common.types.StrEnum):
|
|
88
|
-
full = "full"
|
|
89
|
-
|
|
90
|
-
|
|
91
90
|
class ArtifactMetadata(pydantic.v1.BaseModel):
|
|
92
91
|
key: str
|
|
93
92
|
project: str
|
|
@@ -108,6 +107,7 @@ class ArtifactSpec(pydantic.v1.BaseModel):
|
|
|
108
107
|
db_key: typing.Optional[str]
|
|
109
108
|
extra_data: typing.Optional[dict[str, typing.Any]]
|
|
110
109
|
unpackaging_instructions: typing.Optional[dict[str, typing.Any]]
|
|
110
|
+
parent_uri: typing.Optional[str]
|
|
111
111
|
|
|
112
112
|
class Config:
|
|
113
113
|
extra = pydantic.v1.Extra.allow
|
mlrun/common/schemas/pipeline.py
CHANGED
|
@@ -15,22 +15,6 @@
|
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
17
|
import pydantic.v1
|
|
18
|
-
from deprecated import deprecated
|
|
19
|
-
|
|
20
|
-
import mlrun.common.types
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@deprecated(
|
|
24
|
-
version="1.7.0",
|
|
25
|
-
reason="mlrun.common.schemas.PipelinesFormat is deprecated and will be removed in 1.10.0. "
|
|
26
|
-
"Use mlrun.common.formatters.PipelineFormat instead.",
|
|
27
|
-
category=FutureWarning,
|
|
28
|
-
)
|
|
29
|
-
class PipelinesFormat(mlrun.common.types.StrEnum):
|
|
30
|
-
full = "full"
|
|
31
|
-
metadata_only = "metadata_only"
|
|
32
|
-
summary = "summary"
|
|
33
|
-
name_only = "name_only"
|
|
34
18
|
|
|
35
19
|
|
|
36
20
|
class PipelinesPagination(str):
|