mlrun 1.10.0rc12__py3-none-any.whl → 1.10.0rc14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/llm_prompt.py +111 -21
- mlrun/common/constants.py +0 -1
- mlrun/common/schemas/__init__.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +1 -1
- mlrun/common/schemas/serving.py +7 -0
- mlrun/common/schemas/workflow.py +0 -1
- mlrun/config.py +2 -0
- mlrun/datastore/model_provider/model_provider.py +81 -3
- mlrun/datastore/model_provider/openai_provider.py +52 -28
- mlrun/datastore/remote_client.py +11 -0
- mlrun/execution.py +19 -5
- mlrun/model.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +175 -8
- mlrun/projects/pipelines.py +40 -18
- mlrun/projects/project.py +15 -4
- mlrun/run.py +25 -2
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/server.py +1 -0
- mlrun/serving/states.py +521 -27
- mlrun/serving/system_steps.py +6 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc14.dist-info}/METADATA +2 -2
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc14.dist-info}/RECORD +27 -27
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc14.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc14.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc14.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc14.dist-info}/top_level.txt +0 -0
mlrun/artifacts/llm_prompt.py
CHANGED
|
@@ -11,12 +11,13 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
import json
|
|
14
15
|
import tempfile
|
|
15
16
|
from typing import Optional, Union
|
|
16
17
|
|
|
17
18
|
import mlrun
|
|
18
19
|
import mlrun.artifacts.model as model_art
|
|
19
|
-
import mlrun.common
|
|
20
|
+
import mlrun.common.schemas
|
|
20
21
|
from mlrun.artifacts import Artifact, ArtifactMetadata, ArtifactSpec
|
|
21
22
|
from mlrun.utils import StorePrefix, logger
|
|
22
23
|
|
|
@@ -25,16 +26,18 @@ MAX_PROMPT_LENGTH = 1024
|
|
|
25
26
|
|
|
26
27
|
class LLMPromptArtifactSpec(ArtifactSpec):
|
|
27
28
|
_dict_fields = ArtifactSpec._dict_fields + [
|
|
28
|
-
"
|
|
29
|
+
"prompt_template",
|
|
29
30
|
"prompt_legend",
|
|
30
31
|
"model_configuration",
|
|
31
32
|
"description",
|
|
32
33
|
]
|
|
34
|
+
PROMPT_TEMPLATE_KEYS = ("content", "role")
|
|
35
|
+
PROMPT_LEGENDS_KEYS = ("field", "description")
|
|
33
36
|
|
|
34
37
|
def __init__(
|
|
35
38
|
self,
|
|
36
39
|
model_artifact: Union[model_art.ModelArtifact, str] = None,
|
|
37
|
-
|
|
40
|
+
prompt_template: Optional[list[dict]] = None,
|
|
38
41
|
prompt_path: Optional[str] = None,
|
|
39
42
|
prompt_legend: Optional[dict] = None,
|
|
40
43
|
model_configuration: Optional[dict] = None,
|
|
@@ -42,31 +45,107 @@ class LLMPromptArtifactSpec(ArtifactSpec):
|
|
|
42
45
|
target_path: Optional[str] = None,
|
|
43
46
|
**kwargs,
|
|
44
47
|
):
|
|
45
|
-
if
|
|
48
|
+
if prompt_template and prompt_path:
|
|
46
49
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
47
|
-
"Cannot specify both '
|
|
50
|
+
"Cannot specify both 'prompt_template' and 'prompt_path'"
|
|
48
51
|
)
|
|
49
|
-
|
|
52
|
+
if prompt_legend:
|
|
53
|
+
self._verify_prompt_legend(prompt_legend)
|
|
54
|
+
if prompt_path:
|
|
55
|
+
self._verify_prompt_path(prompt_path)
|
|
56
|
+
if prompt_template:
|
|
57
|
+
self._verify_prompt_template(prompt_template)
|
|
50
58
|
super().__init__(
|
|
51
59
|
src_path=prompt_path,
|
|
52
60
|
target_path=target_path,
|
|
53
61
|
parent_uri=model_artifact.uri
|
|
54
62
|
if isinstance(model_artifact, model_art.ModelArtifact)
|
|
55
63
|
else model_artifact,
|
|
56
|
-
body=prompt_string,
|
|
57
64
|
**kwargs,
|
|
58
65
|
)
|
|
59
66
|
|
|
60
|
-
self.
|
|
67
|
+
self.prompt_template = prompt_template
|
|
61
68
|
self.prompt_legend = prompt_legend
|
|
62
69
|
self.model_configuration = model_configuration
|
|
63
70
|
self.description = description
|
|
64
|
-
self._model_artifact =
|
|
71
|
+
self._model_artifact = (
|
|
72
|
+
model_artifact
|
|
73
|
+
if isinstance(model_artifact, model_art.ModelArtifact)
|
|
74
|
+
else None
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def _verify_prompt_template(self, prompt_template):
|
|
78
|
+
if not (
|
|
79
|
+
isinstance(prompt_template, list)
|
|
80
|
+
and all(isinstance(item, dict) for item in prompt_template)
|
|
81
|
+
):
|
|
82
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
83
|
+
"Expected prompt_template to be a list of dicts"
|
|
84
|
+
)
|
|
85
|
+
keys_to_pop = []
|
|
86
|
+
for message in prompt_template:
|
|
87
|
+
for key in message.keys():
|
|
88
|
+
if isinstance(key, str):
|
|
89
|
+
if key.lower() not in self.PROMPT_TEMPLATE_KEYS:
|
|
90
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
91
|
+
f"Expected prompt_template to contain dict that "
|
|
92
|
+
f"only has keys from {self.PROMPT_TEMPLATE_KEYS}"
|
|
93
|
+
)
|
|
94
|
+
else:
|
|
95
|
+
if not key.islower():
|
|
96
|
+
message[key.lower()] = message[key]
|
|
97
|
+
keys_to_pop.append(key)
|
|
98
|
+
else:
|
|
99
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
100
|
+
f"Expected prompt_template to contain dict that only"
|
|
101
|
+
f" has str keys got {key} of type {type(key)}"
|
|
102
|
+
)
|
|
103
|
+
for key_to_pop in keys_to_pop:
|
|
104
|
+
message.pop(key_to_pop)
|
|
65
105
|
|
|
66
106
|
@property
|
|
67
107
|
def model_uri(self):
|
|
68
108
|
return self.parent_uri
|
|
69
109
|
|
|
110
|
+
@staticmethod
|
|
111
|
+
def _verify_prompt_legend(prompt_legend: dict):
|
|
112
|
+
if prompt_legend is None:
|
|
113
|
+
return True
|
|
114
|
+
for place_holder, body_map in prompt_legend.items():
|
|
115
|
+
if isinstance(body_map, dict):
|
|
116
|
+
if body_map.get("field") is None:
|
|
117
|
+
body_map["field"] = place_holder
|
|
118
|
+
body_map["description"] = body_map.get("description")
|
|
119
|
+
if diff := set(body_map.keys()) - set(
|
|
120
|
+
LLMPromptArtifactSpec.PROMPT_LEGENDS_KEYS
|
|
121
|
+
):
|
|
122
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
123
|
+
"prompt_legend values must contain only 'field' and "
|
|
124
|
+
f"'description' keys, got extra fields: {diff}"
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
128
|
+
f"Wrong prompt_legend format, {place_holder} is not mapped to dict"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def _verify_prompt_path(prompt_path: str):
|
|
133
|
+
with mlrun.datastore.store_manager.object(prompt_path).open(mode="r") as p_file:
|
|
134
|
+
try:
|
|
135
|
+
json.load(p_file)
|
|
136
|
+
except json.JSONDecodeError:
|
|
137
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
138
|
+
f"Failed on decoding str in path "
|
|
139
|
+
f"{prompt_path} expected file to contain a "
|
|
140
|
+
f"json format."
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def get_body(self):
|
|
144
|
+
if self.prompt_template:
|
|
145
|
+
return json.dumps(self.prompt_template)
|
|
146
|
+
else:
|
|
147
|
+
return None
|
|
148
|
+
|
|
70
149
|
|
|
71
150
|
class LLMPromptArtifact(Artifact):
|
|
72
151
|
"""
|
|
@@ -86,7 +165,7 @@ class LLMPromptArtifact(Artifact):
|
|
|
86
165
|
model_artifact: Union[
|
|
87
166
|
model_art.ModelArtifact, str
|
|
88
167
|
] = None, # TODO support partial model uri
|
|
89
|
-
|
|
168
|
+
prompt_template: Optional[list[dict]] = None,
|
|
90
169
|
prompt_path: Optional[str] = None,
|
|
91
170
|
prompt_legend: Optional[dict] = None,
|
|
92
171
|
model_configuration: Optional[dict] = None,
|
|
@@ -95,7 +174,7 @@ class LLMPromptArtifact(Artifact):
|
|
|
95
174
|
**kwargs,
|
|
96
175
|
):
|
|
97
176
|
llm_prompt_spec = LLMPromptArtifactSpec(
|
|
98
|
-
|
|
177
|
+
prompt_template=prompt_template,
|
|
99
178
|
prompt_path=prompt_path,
|
|
100
179
|
prompt_legend=prompt_legend,
|
|
101
180
|
model_artifact=model_artifact,
|
|
@@ -133,33 +212,44 @@ class LLMPromptArtifact(Artifact):
|
|
|
133
212
|
return self.spec._model_artifact
|
|
134
213
|
return None
|
|
135
214
|
|
|
136
|
-
def read_prompt(self) -> Optional[str]:
|
|
215
|
+
def read_prompt(self) -> Optional[Union[str, list[dict]]]:
|
|
137
216
|
"""
|
|
138
|
-
Read the prompt
|
|
217
|
+
Read the prompt json from the artifact or if provided prompt template.
|
|
218
|
+
@:param as_str: True to return the prompt string or a list of dicts.
|
|
219
|
+
@:return prompt string or list of dicts
|
|
139
220
|
"""
|
|
140
|
-
if self.spec.
|
|
141
|
-
return self.spec.
|
|
221
|
+
if self.spec.prompt_template:
|
|
222
|
+
return self.spec.prompt_template
|
|
142
223
|
if self.spec.target_path:
|
|
143
224
|
with mlrun.datastore.store_manager.object(url=self.spec.target_path).open(
|
|
144
225
|
mode="r"
|
|
145
226
|
) as p_file:
|
|
146
|
-
|
|
227
|
+
try:
|
|
228
|
+
return json.load(p_file)
|
|
229
|
+
except json.JSONDecodeError:
|
|
230
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
231
|
+
f"Failed on decoding str in path "
|
|
232
|
+
f"{self.spec.target_path} expected file to contain a "
|
|
233
|
+
f"json format."
|
|
234
|
+
)
|
|
147
235
|
|
|
148
236
|
def before_log(self):
|
|
149
237
|
"""
|
|
150
238
|
Prepare the artifact before logging.
|
|
151
239
|
This method is called before the artifact is logged.
|
|
152
240
|
"""
|
|
153
|
-
if
|
|
241
|
+
if (
|
|
242
|
+
self.spec.prompt_template
|
|
243
|
+
and len(str(self.spec.prompt_template)) > MAX_PROMPT_LENGTH
|
|
244
|
+
):
|
|
154
245
|
logger.debug(
|
|
155
246
|
"Prompt string exceeds maximum length, saving to a temporary file."
|
|
156
247
|
)
|
|
157
248
|
with tempfile.NamedTemporaryFile(
|
|
158
|
-
delete=False, mode="w", suffix=".
|
|
249
|
+
delete=False, mode="w", suffix=".json"
|
|
159
250
|
) as temp_file:
|
|
160
|
-
temp_file.write(self.spec.
|
|
251
|
+
temp_file.write(json.dumps(self.spec.prompt_template))
|
|
161
252
|
self.spec.src_path = temp_file.name
|
|
162
|
-
self.spec.
|
|
253
|
+
self.spec.prompt_template = None
|
|
163
254
|
self._src_is_temp = True
|
|
164
|
-
|
|
165
255
|
super().before_log()
|
mlrun/common/constants.py
CHANGED
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -214,7 +214,7 @@ from .secret import (
|
|
|
214
214
|
SecretsData,
|
|
215
215
|
UserSecretCreationRequest,
|
|
216
216
|
)
|
|
217
|
-
from .serving import ModelRunnerStepData, MonitoringData
|
|
217
|
+
from .serving import ModelRunnerStepData, ModelsData, MonitoringData
|
|
218
218
|
from .tag import Tag, TagObjects
|
|
219
219
|
from .workflow import (
|
|
220
220
|
GetWorkflowResponse,
|
|
@@ -336,8 +336,8 @@ class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBa
|
|
|
336
336
|
|
|
337
337
|
class ApplicationBaseRecord(BaseModel):
|
|
338
338
|
type: Literal["metric", "result"]
|
|
339
|
-
time: datetime
|
|
340
339
|
value: float
|
|
340
|
+
time: Optional[datetime] = None
|
|
341
341
|
|
|
342
342
|
|
|
343
343
|
class ApplicationResultRecord(ApplicationBaseRecord):
|
mlrun/common/schemas/serving.py
CHANGED
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import enum
|
|
16
|
+
|
|
15
17
|
from pydantic.v1 import BaseModel
|
|
16
18
|
|
|
17
19
|
from mlrun.common.types import StrEnum
|
|
@@ -40,3 +42,8 @@ class MonitoringData(StrEnum):
|
|
|
40
42
|
MODEL_PATH = "model_path"
|
|
41
43
|
MODEL_ENDPOINT_UID = "model_endpoint_uid"
|
|
42
44
|
MODEL_CLASS = "model_class"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ModelsData(enum.Enum):
|
|
48
|
+
MODEL_CLASS = 0
|
|
49
|
+
MODEL_PARAMETERS = 1
|
mlrun/common/schemas/workflow.py
CHANGED
|
@@ -49,7 +49,6 @@ class WorkflowRequest(pydantic.v1.BaseModel):
|
|
|
49
49
|
class RerunWorkflowRequest(pydantic.v1.BaseModel):
|
|
50
50
|
run_name: typing.Optional[str] = None
|
|
51
51
|
run_id: typing.Optional[str] = None
|
|
52
|
-
original_workflow_id: typing.Optional[str] = None
|
|
53
52
|
notifications: typing.Optional[list[Notification]] = None
|
|
54
53
|
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
|
|
55
54
|
|
mlrun/config.py
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
from collections.abc import Awaitable
|
|
15
|
-
from typing import Callable, Optional, TypeVar
|
|
15
|
+
from typing import Callable, Optional, TypeVar, Union
|
|
16
16
|
|
|
17
17
|
import mlrun.errors
|
|
18
18
|
from mlrun.datastore.remote_client import (
|
|
@@ -23,6 +23,23 @@ T = TypeVar("T")
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class ModelProvider(BaseRemoteClient):
|
|
26
|
+
"""
|
|
27
|
+
The ModelProvider class is an abstract base for integrating with external
|
|
28
|
+
model providers, primarily generative AI (GenAI) services.
|
|
29
|
+
|
|
30
|
+
Designed to be subclassed, it defines a consistent interface and shared
|
|
31
|
+
functionality for tasks such as text generation, embeddings, and invoking
|
|
32
|
+
fine-tuned models. Subclasses should implement provider-specific logic,
|
|
33
|
+
including SDK client initialization, model invocation, and custom operations.
|
|
34
|
+
|
|
35
|
+
Key Features:
|
|
36
|
+
- Establishes a consistent, reusable client management for model provider integrations.
|
|
37
|
+
- Simplifies GenAI service integration by abstracting common operations.
|
|
38
|
+
- Reduces duplication through shared components for common tasks.
|
|
39
|
+
- Holds default invocation parameters (e.g., temperature, max_tokens) to avoid boilerplate
|
|
40
|
+
code and promote consistency.
|
|
41
|
+
"""
|
|
42
|
+
|
|
26
43
|
support_async = False
|
|
27
44
|
|
|
28
45
|
def __init__(
|
|
@@ -44,9 +61,65 @@ class ModelProvider(BaseRemoteClient):
|
|
|
44
61
|
self._default_async_operation = None
|
|
45
62
|
|
|
46
63
|
def load_client(self) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Initializes the SDK client for the model provider with the given keyword arguments
|
|
66
|
+
and assigns it to an instance attribute (e.g., self._client).
|
|
67
|
+
|
|
68
|
+
Subclasses should override this method to:
|
|
69
|
+
- Create and configure the provider-specific client instance.
|
|
70
|
+
- Assign the client instance to self._client.
|
|
71
|
+
- Define a default operation callable (e.g., a method to invoke model completions)
|
|
72
|
+
and assign it to self._default_operation.
|
|
73
|
+
"""
|
|
74
|
+
|
|
47
75
|
raise NotImplementedError("load_client method is not implemented")
|
|
48
76
|
|
|
49
|
-
def invoke(
|
|
77
|
+
def invoke(
|
|
78
|
+
self,
|
|
79
|
+
messages: Optional[list[dict]] = None,
|
|
80
|
+
as_str: bool = False,
|
|
81
|
+
**invoke_kwargs,
|
|
82
|
+
) -> Optional[Union[str, T]]:
|
|
83
|
+
"""
|
|
84
|
+
Invokes a generative AI model with the provided messages and additional parameters.
|
|
85
|
+
This method is designed to be a flexible interface for interacting with various
|
|
86
|
+
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
87
|
+
a list of messages (following a standardized format) and receive a response. The
|
|
88
|
+
response can be returned as plain text or in its full structured format, depending
|
|
89
|
+
on the `as_str` parameter.
|
|
90
|
+
|
|
91
|
+
:param messages: A list of dictionaries representing the conversation history or input messages.
|
|
92
|
+
Each dictionary should follow the format::
|
|
93
|
+
{"role": "system"| "user" | "assistant" ..., "content": "Message content as a string"}
|
|
94
|
+
Example:
|
|
95
|
+
|
|
96
|
+
.. code-block:: json
|
|
97
|
+
|
|
98
|
+
[
|
|
99
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
100
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
This format is consistent across all backends. Defaults to None if no messages
|
|
104
|
+
are provided.
|
|
105
|
+
|
|
106
|
+
:param as_str: A boolean flag indicating whether to return the response as a plain string.
|
|
107
|
+
- If True, the function extracts and returns the main content of the first
|
|
108
|
+
response.
|
|
109
|
+
- If False, the function returns the full response object,
|
|
110
|
+
which may include additional metadata or multiple response options.
|
|
111
|
+
Defaults to False.
|
|
112
|
+
|
|
113
|
+
:param invoke_kwargs:
|
|
114
|
+
Additional keyword arguments to be passed to the underlying model API call.
|
|
115
|
+
These can include parameters such as temperature, max tokens, etc.,
|
|
116
|
+
depending on the capabilities of the specific backend being used.
|
|
117
|
+
|
|
118
|
+
:return:
|
|
119
|
+
- If `as_str` is True: Returns the main content of the first response as a string.
|
|
120
|
+
- If `as_str` is False: Returns the full response object.
|
|
121
|
+
|
|
122
|
+
"""
|
|
50
123
|
raise NotImplementedError("invoke method is not implemented")
|
|
51
124
|
|
|
52
125
|
def customized_invoke(
|
|
@@ -78,5 +151,10 @@ class ModelProvider(BaseRemoteClient):
|
|
|
78
151
|
async def async_customized_invoke(self, **kwargs):
|
|
79
152
|
raise NotImplementedError("async_customized_invoke is not implemented")
|
|
80
153
|
|
|
81
|
-
async def async_invoke(
|
|
154
|
+
async def async_invoke(
|
|
155
|
+
self,
|
|
156
|
+
messages: Optional[list[dict]] = None,
|
|
157
|
+
as_str: bool = False,
|
|
158
|
+
**invoke_kwargs,
|
|
159
|
+
) -> Awaitable[str]:
|
|
82
160
|
raise NotImplementedError("async_invoke is not implemented")
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import Callable, Optional, TypeVar
|
|
15
|
+
from typing import Callable, Optional, TypeVar, Union
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
18
|
from mlrun.datastore.model_provider.model_provider import ModelProvider
|
|
@@ -21,6 +21,18 @@ T = TypeVar("T")
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class OpenAIProvider(ModelProvider):
|
|
24
|
+
"""
|
|
25
|
+
OpenAIProvider is a wrapper around the OpenAI SDK that provides an interface
|
|
26
|
+
for interacting with OpenAI's generative AI services.
|
|
27
|
+
|
|
28
|
+
It supports both synchronous and asynchronous operations, allowing flexible
|
|
29
|
+
integration into various workflows.
|
|
30
|
+
|
|
31
|
+
This class extends the ModelProvider base class and implements OpenAI-specific
|
|
32
|
+
functionality, including client initialization, model invocation, and custom
|
|
33
|
+
operations tailored to the OpenAI API.
|
|
34
|
+
"""
|
|
35
|
+
|
|
24
36
|
def __init__(
|
|
25
37
|
self,
|
|
26
38
|
parent,
|
|
@@ -59,6 +71,19 @@ class OpenAIProvider(ModelProvider):
|
|
|
59
71
|
return self.endpoint
|
|
60
72
|
|
|
61
73
|
def load_client(self) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Initializes the OpenAI SDK client using the provided options.
|
|
76
|
+
|
|
77
|
+
This method imports the `OpenAI` class from the `openai` package, instantiates
|
|
78
|
+
a client with the given keyword arguments (`self.options`), and assigns it to
|
|
79
|
+
`self._client`.
|
|
80
|
+
|
|
81
|
+
It also sets the default operation to `self.client.chat.completions.create`, which is
|
|
82
|
+
typically used for invoking chat-based model completions.
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
ImportError: If the `openai` package is not installed.
|
|
86
|
+
"""
|
|
62
87
|
try:
|
|
63
88
|
from openai import OpenAI # noqa
|
|
64
89
|
|
|
@@ -87,34 +112,33 @@ class OpenAIProvider(ModelProvider):
|
|
|
87
112
|
else:
|
|
88
113
|
return self._default_operation(**invoke_kwargs, model=self.model)
|
|
89
114
|
|
|
90
|
-
def
|
|
91
|
-
self,
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
elif prompt:
|
|
101
|
-
messages = [
|
|
102
|
-
{
|
|
103
|
-
"role": "user",
|
|
104
|
-
"content": prompt,
|
|
105
|
-
},
|
|
106
|
-
]
|
|
107
|
-
else:
|
|
108
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
109
|
-
"must provide 'messages' or 'prompt' to invoke"
|
|
110
|
-
)
|
|
111
|
-
return messages, invoke_kwargs
|
|
115
|
+
def invoke(
|
|
116
|
+
self,
|
|
117
|
+
messages: Optional[list[dict]] = None,
|
|
118
|
+
as_str: bool = False,
|
|
119
|
+
**invoke_kwargs,
|
|
120
|
+
) -> Optional[Union[str, T]]:
|
|
121
|
+
"""
|
|
122
|
+
OpenAI-specific implementation of `ModelProvider.invoke`.
|
|
123
|
+
Invokes an OpenAI model operation using the sync client.
|
|
124
|
+
For full details, see `ModelProvider.invoke`.
|
|
112
125
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
126
|
+
:param messages: Same as ModelProvider.invoke.
|
|
127
|
+
|
|
128
|
+
:param as_str: bool
|
|
129
|
+
If `True`, returns only the main content of the first response
|
|
130
|
+
(`response.choices[0].message.content`).
|
|
131
|
+
If `False`, returns the full response object, whose type depends on
|
|
132
|
+
the specific OpenAI SDK operation used (e.g., chat completion, completion, etc.).
|
|
133
|
+
|
|
134
|
+
:param invoke_kwargs:
|
|
135
|
+
Same as ModelProvider.invoke.
|
|
136
|
+
|
|
137
|
+
"""
|
|
138
|
+
invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
|
|
117
139
|
response = self._default_operation(
|
|
118
140
|
model=self.endpoint, messages=messages, **invoke_kwargs
|
|
119
141
|
)
|
|
120
|
-
|
|
142
|
+
if as_str:
|
|
143
|
+
return response.choices[0].message.content
|
|
144
|
+
return response
|
mlrun/datastore/remote_client.py
CHANGED
|
@@ -18,6 +18,17 @@ import mlrun
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class BaseRemoteClient:
|
|
21
|
+
"""
|
|
22
|
+
The BaseRemoteClient class serves as a foundational component for managing
|
|
23
|
+
secrets and configurations.
|
|
24
|
+
It is designed to be extended by subclasses that interact with external services,
|
|
25
|
+
such as file systems (e.g., Datastore) or model providers (e.g., ModelProvider).
|
|
26
|
+
|
|
27
|
+
This class is intended to provide shared functionality and should not be
|
|
28
|
+
used directly. Instead, create a subclass to implement logic specific to
|
|
29
|
+
your use case, such as interactions with S3 storage or invoking model providers like OpenAI.
|
|
30
|
+
"""
|
|
31
|
+
|
|
21
32
|
def __init__(self, parent, kind, name, endpoint="", secrets: Optional[dict] = None):
|
|
22
33
|
self._parent = parent
|
|
23
34
|
self.kind = kind
|
mlrun/execution.py
CHANGED
|
@@ -94,6 +94,7 @@ class MLClientCtx:
|
|
|
94
94
|
self._state_thresholds = {}
|
|
95
95
|
self._retry_spec = {}
|
|
96
96
|
self._retry_count = None
|
|
97
|
+
self._retries = []
|
|
97
98
|
|
|
98
99
|
self._labels = {}
|
|
99
100
|
self._annotations = {}
|
|
@@ -468,6 +469,7 @@ class MLClientCtx:
|
|
|
468
469
|
for key, uri in status.get("artifact_uris", {}).items():
|
|
469
470
|
self._artifacts_manager.artifact_uris[key] = uri
|
|
470
471
|
self._retry_count = status.get("retry_count", self._retry_count)
|
|
472
|
+
self._retries = status.get("retries", self._retries)
|
|
471
473
|
# if run is a retry, the state needs to move to running
|
|
472
474
|
if include_status:
|
|
473
475
|
self._state = status.get("state", self._state)
|
|
@@ -911,7 +913,7 @@ class MLClientCtx:
|
|
|
911
913
|
def log_llm_prompt(
|
|
912
914
|
self,
|
|
913
915
|
key,
|
|
914
|
-
|
|
916
|
+
prompt_template: Optional[list[dict]] = None,
|
|
915
917
|
prompt_path: Optional[str] = None,
|
|
916
918
|
prompt_legend: Optional[dict] = None,
|
|
917
919
|
model_artifact: Union[ModelArtifact, str] = None,
|
|
@@ -935,7 +937,7 @@ class MLClientCtx:
|
|
|
935
937
|
# Log an inline prompt
|
|
936
938
|
context.log_llm_prompt(
|
|
937
939
|
key="qa-prompt",
|
|
938
|
-
|
|
940
|
+
prompt_template=[{"role: "user", "content": "question with {place_holder}"}],
|
|
939
941
|
model_artifact=model,
|
|
940
942
|
prompt_legend={"question": "user_input"},
|
|
941
943
|
model_configuration={"temperature": 0.7, "max_tokens": 128},
|
|
@@ -943,10 +945,16 @@ class MLClientCtx:
|
|
|
943
945
|
)
|
|
944
946
|
|
|
945
947
|
:param key: Unique name of the artifact.
|
|
946
|
-
:param
|
|
948
|
+
:param prompt_template: Raw prompt list of dicts -
|
|
949
|
+
[{"role": "system", "content": "You are a {profession} advisor"},
|
|
950
|
+
"role": "user", "content": "I need your help with {profession}"]. only "role" and "content" keys allow in any
|
|
951
|
+
str format (upper/lower case), keys will be modified to lower case.
|
|
952
|
+
Cannot be used with `prompt_path`.
|
|
947
953
|
:param prompt_path: Path to a file containing the prompt content. Cannot be used with `prompt_string`.
|
|
948
954
|
:param prompt_legend: A dictionary where each key is a placeholder in the prompt (e.g., ``{user_name}``)
|
|
949
|
-
and the value is a description
|
|
955
|
+
and the value is a dictionary holding two keys, "field", "description". "field" points to the field in
|
|
956
|
+
the event where the value of the place-holder inside the event, if None or not exist will be replaced
|
|
957
|
+
with the place-holder name. "description" will point to explanation of what that placeholder represents.
|
|
950
958
|
Useful for documenting and clarifying dynamic parts of the prompt.
|
|
951
959
|
:param model_artifact: Reference to the parent model (either `ModelArtifact` or model URI string).
|
|
952
960
|
:param model_configuration: Dictionary of generation parameters (e.g., temperature, max_tokens).
|
|
@@ -961,10 +969,15 @@ class MLClientCtx:
|
|
|
961
969
|
:returns: The logged `LLMPromptArtifact` object.
|
|
962
970
|
"""
|
|
963
971
|
|
|
972
|
+
if not prompt_template and not prompt_path:
|
|
973
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
974
|
+
"Either 'prompt_template' or 'prompt_path' must be provided"
|
|
975
|
+
)
|
|
976
|
+
|
|
964
977
|
llm_prompt = LLMPromptArtifact(
|
|
965
978
|
key=key,
|
|
966
979
|
project=self.project or "",
|
|
967
|
-
|
|
980
|
+
prompt_template=prompt_template,
|
|
968
981
|
prompt_path=prompt_path,
|
|
969
982
|
prompt_legend=prompt_legend,
|
|
970
983
|
model_artifact=model_artifact,
|
|
@@ -1262,6 +1275,7 @@ class MLClientCtx:
|
|
|
1262
1275
|
"start_time": to_date_str(self._start_time),
|
|
1263
1276
|
"last_update": to_date_str(self._last_update),
|
|
1264
1277
|
"retry_count": self._retry_count,
|
|
1278
|
+
"retries": self._retries,
|
|
1265
1279
|
},
|
|
1266
1280
|
}
|
|
1267
1281
|
|
mlrun/model.py
CHANGED
|
@@ -1375,6 +1375,7 @@ class RunStatus(ModelObj):
|
|
|
1375
1375
|
notifications: Optional[dict[str, Notification]] = None,
|
|
1376
1376
|
artifact_uris: Optional[dict[str, str]] = None,
|
|
1377
1377
|
retry_count: Optional[int] = None,
|
|
1378
|
+
retries: Optional[list[dict]] = None,
|
|
1378
1379
|
):
|
|
1379
1380
|
self.state = state or "created"
|
|
1380
1381
|
self.status_text = status_text
|
|
@@ -1393,6 +1394,7 @@ class RunStatus(ModelObj):
|
|
|
1393
1394
|
# Artifact key -> URI mapping, since the full artifacts are not stored in the runs DB table
|
|
1394
1395
|
self._artifact_uris = artifact_uris or {}
|
|
1395
1396
|
self._retry_count = retry_count or None
|
|
1397
|
+
self._retries = retries or []
|
|
1396
1398
|
|
|
1397
1399
|
@classmethod
|
|
1398
1400
|
def from_dict(
|
|
@@ -1461,6 +1463,19 @@ class RunStatus(ModelObj):
|
|
|
1461
1463
|
"""
|
|
1462
1464
|
self._retry_count = retry_count
|
|
1463
1465
|
|
|
1466
|
+
@property
|
|
1467
|
+
def retries(self) -> list[dict]:
|
|
1468
|
+
"""List of metadata for each retry attempt."""
|
|
1469
|
+
return self._retries
|
|
1470
|
+
|
|
1471
|
+
@retries.setter
|
|
1472
|
+
def retries(self, retries: list[dict]):
|
|
1473
|
+
"""
|
|
1474
|
+
Set the list of retry attempt metadata.
|
|
1475
|
+
:param retries: A list of dictionaries, each representing a retry attempt.
|
|
1476
|
+
"""
|
|
1477
|
+
self._retries = retries
|
|
1478
|
+
|
|
1464
1479
|
def is_failed(self) -> Optional[bool]:
|
|
1465
1480
|
"""
|
|
1466
1481
|
This method returns whether a run has failed.
|