mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +123 -25
- mlrun/artifacts/manager.py +0 -5
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +10 -1
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +3 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +14 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +3 -1
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +33 -11
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/datastore.py +9 -4
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +363 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +230 -65
- mlrun/datastore/model_provider/openai_provider.py +295 -42
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +47 -19
- mlrun/db/httpdb.py +120 -56
- mlrun/db/nopdb.py +38 -10
- mlrun/execution.py +70 -19
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +15 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +509 -117
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +116 -33
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +100 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
- mlrun/model_monitoring/helpers.py +54 -9
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +6 -1
- mlrun/projects/pipelines.py +46 -26
- mlrun/projects/project.py +166 -58
- mlrun/run.py +94 -17
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +7 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +149 -17
- mlrun/runtimes/nuclio/function.py +76 -27
- mlrun/runtimes/nuclio/serving.py +97 -15
- mlrun/runtimes/pod.py +234 -21
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +49 -11
- mlrun/secrets.py +54 -13
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +320 -80
- mlrun/serving/states.py +725 -157
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +200 -119
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +288 -88
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
16
|
+
|
|
17
|
+
import mlrun
|
|
18
|
+
from mlrun.datastore.model_provider.model_provider import (
|
|
19
|
+
InvokeResponseFormat,
|
|
20
|
+
ModelProvider,
|
|
21
|
+
UsageResponseKeys,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from transformers.pipelines.base import Pipeline
|
|
26
|
+
from transformers.pipelines.text_generation import ChatType
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class HuggingFaceProvider(ModelProvider):
|
|
30
|
+
"""
|
|
31
|
+
HuggingFaceProvider is a wrapper around the Hugging Face Transformers pipeline
|
|
32
|
+
that provides an interface for interacting with a wide range of Hugging Face models.
|
|
33
|
+
|
|
34
|
+
It supports synchronous operations, enabling flexible integration into various workflows.
|
|
35
|
+
|
|
36
|
+
This class extends the ModelProvider base class and implements Hugging Face-specific
|
|
37
|
+
functionality, including pipeline initialization, default text generation operations,
|
|
38
|
+
and custom operations tailored to the Hugging Face Transformers pipeline API.
|
|
39
|
+
|
|
40
|
+
Note: The pipeline object will download the model (if not already cached) and load it
|
|
41
|
+
into memory for inference. Ensure you have the required CPU/GPU and memory to use this operation.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
parent,
|
|
47
|
+
schema,
|
|
48
|
+
name,
|
|
49
|
+
endpoint="",
|
|
50
|
+
secrets: Optional[dict] = None,
|
|
51
|
+
default_invoke_kwargs: Optional[dict] = None,
|
|
52
|
+
):
|
|
53
|
+
endpoint = endpoint or mlrun.mlconf.model_providers.huggingface_default_model
|
|
54
|
+
if schema != "huggingface":
|
|
55
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
56
|
+
"HuggingFaceProvider supports only 'huggingface' as the provider kind."
|
|
57
|
+
)
|
|
58
|
+
super().__init__(
|
|
59
|
+
parent=parent,
|
|
60
|
+
kind=schema,
|
|
61
|
+
name=name,
|
|
62
|
+
endpoint=endpoint,
|
|
63
|
+
secrets=secrets,
|
|
64
|
+
default_invoke_kwargs=default_invoke_kwargs,
|
|
65
|
+
)
|
|
66
|
+
self.options = self.get_client_options()
|
|
67
|
+
self._expected_operation_type = None
|
|
68
|
+
self._download_model()
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def _extract_string_output(response: list[dict]) -> str:
|
|
72
|
+
"""
|
|
73
|
+
Extracts the first generated string from Hugging Face pipeline output
|
|
74
|
+
"""
|
|
75
|
+
if not isinstance(response, list) or len(response) == 0:
|
|
76
|
+
raise ValueError("Empty or invalid pipeline output")
|
|
77
|
+
if len(response) != 1:
|
|
78
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
79
|
+
"HuggingFaceProvider: extracting string from response is only supported for single-response outputs"
|
|
80
|
+
)
|
|
81
|
+
return response[0].get("generated_text")
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def parse_endpoint_and_path(cls, endpoint, subpath) -> (str, str):
|
|
85
|
+
if endpoint and subpath:
|
|
86
|
+
endpoint = endpoint + subpath
|
|
87
|
+
# In HuggingFace, "/" in a model name is part of the name — `subpath` is not used.
|
|
88
|
+
subpath = ""
|
|
89
|
+
return endpoint, subpath
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def client(self) -> Any:
|
|
93
|
+
"""
|
|
94
|
+
Lazily return the HuggingFace-pipeline client.
|
|
95
|
+
|
|
96
|
+
If the client has not been initialized yet, it will be created
|
|
97
|
+
by calling `load_client`.
|
|
98
|
+
"""
|
|
99
|
+
self.load_client()
|
|
100
|
+
return self._client
|
|
101
|
+
|
|
102
|
+
def _download_model(self):
|
|
103
|
+
"""
|
|
104
|
+
Pre-downloads model files locally to prevent race conditions in multiprocessing.
|
|
105
|
+
|
|
106
|
+
Uses snapshot_download with local_dir_use_symlinks=False to ensure proper
|
|
107
|
+
file copying for safe concurrent access across multiple processes.
|
|
108
|
+
|
|
109
|
+
:raises:
|
|
110
|
+
ImportError: If huggingface_hub package is not installed.
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
from huggingface_hub import snapshot_download
|
|
114
|
+
|
|
115
|
+
# Download the model and tokenizer files directly to the cache.
|
|
116
|
+
snapshot_download(
|
|
117
|
+
repo_id=self.model,
|
|
118
|
+
local_dir_use_symlinks=False,
|
|
119
|
+
token=self._get_secret_or_env("HF_TOKEN") or None,
|
|
120
|
+
)
|
|
121
|
+
except ImportError as exc:
|
|
122
|
+
raise ImportError("huggingface_hub package is not installed") from exc
|
|
123
|
+
|
|
124
|
+
def _response_handler(
|
|
125
|
+
self,
|
|
126
|
+
response: Union[str, list],
|
|
127
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
128
|
+
messages: Union[str, list[str], "ChatType", list["ChatType"]] = None,
|
|
129
|
+
**kwargs,
|
|
130
|
+
) -> Union[str, list, dict[str, Any]]:
|
|
131
|
+
"""
|
|
132
|
+
Processes and formats the raw response from the HuggingFace pipeline according to the specified format.
|
|
133
|
+
|
|
134
|
+
The response should exclude the user’s input (no repetition in the output).
|
|
135
|
+
This can be accomplished by invoking the pipeline with `return_full_text=False`.
|
|
136
|
+
|
|
137
|
+
:param response: The raw response from the HuggingFace pipeline, typically a list of dictionaries
|
|
138
|
+
containing generated text sequences.
|
|
139
|
+
:param invoke_response_format: Determines how the response should be processed and returned. Options:
|
|
140
|
+
|
|
141
|
+
- STRING: Return only the main generated content as a string,
|
|
142
|
+
for single-answer responses.
|
|
143
|
+
- USAGE: Return a dictionary combining the string response with
|
|
144
|
+
token usage statistics:
|
|
145
|
+
|
|
146
|
+
.. code-block:: json
|
|
147
|
+
|
|
148
|
+
{
|
|
149
|
+
"answer": "<generated_text>",
|
|
150
|
+
"usage": {
|
|
151
|
+
"prompt_tokens": <int>,
|
|
152
|
+
"completion_tokens": <int>,
|
|
153
|
+
"total_tokens": <int>
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
Note: Token counts are estimated after answer generation and
|
|
158
|
+
may differ from the actual tokens generated by the model due to
|
|
159
|
+
internal decoding behavior and implementation details.
|
|
160
|
+
|
|
161
|
+
- FULL: Return the full raw response object.
|
|
162
|
+
|
|
163
|
+
:param messages: The original input messages used for token count estimation in USAGE mode.
|
|
164
|
+
Can be a string, list of strings, or chat format messages.
|
|
165
|
+
:param kwargs: Additional parameters for response processing.
|
|
166
|
+
|
|
167
|
+
:return: The processed response in the format specified by `invoke_response_format`.
|
|
168
|
+
Can be a string, dictionary, or the original response object.
|
|
169
|
+
|
|
170
|
+
:raises MLRunInvalidArgumentError: If extracting the string response fails.
|
|
171
|
+
:raises MLRunRuntimeError: If applying the chat template to the model fails during token usage calculation.
|
|
172
|
+
"""
|
|
173
|
+
if InvokeResponseFormat.is_str_response(invoke_response_format.value):
|
|
174
|
+
str_response = self._extract_string_output(response)
|
|
175
|
+
if invoke_response_format == InvokeResponseFormat.STRING:
|
|
176
|
+
return str_response
|
|
177
|
+
if invoke_response_format == InvokeResponseFormat.USAGE:
|
|
178
|
+
tokenizer = self.client.tokenizer
|
|
179
|
+
if not isinstance(messages, str):
|
|
180
|
+
try:
|
|
181
|
+
messages = tokenizer.apply_chat_template(
|
|
182
|
+
messages, tokenize=False, add_generation_prompt=True
|
|
183
|
+
)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
186
|
+
f"Failed to apply chat template using the tokenizer for model '{self.model}'. "
|
|
187
|
+
"This may indicate that the tokenizer does not support chat formatting, "
|
|
188
|
+
"or that the input format is invalid. "
|
|
189
|
+
f"Original error: {e}"
|
|
190
|
+
)
|
|
191
|
+
prompt_tokens = len(tokenizer.encode(messages))
|
|
192
|
+
completion_tokens = len(tokenizer.encode(str_response))
|
|
193
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
194
|
+
usage = {
|
|
195
|
+
"prompt_tokens": prompt_tokens,
|
|
196
|
+
"completion_tokens": completion_tokens,
|
|
197
|
+
"total_tokens": total_tokens,
|
|
198
|
+
}
|
|
199
|
+
response = {
|
|
200
|
+
UsageResponseKeys.ANSWER: str_response,
|
|
201
|
+
UsageResponseKeys.USAGE: usage,
|
|
202
|
+
}
|
|
203
|
+
return response
|
|
204
|
+
|
|
205
|
+
def load_client(self) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Initializes the Hugging Face pipeline using the provided options.
|
|
208
|
+
|
|
209
|
+
This method imports the `pipeline` function from the `transformers` package,
|
|
210
|
+
creates a pipeline instance with the specified task and model (from `self.options`),
|
|
211
|
+
and assigns it to `self._client`.
|
|
212
|
+
|
|
213
|
+
Note: Hugging Face pipelines are synchronous and do not support async invocation.
|
|
214
|
+
|
|
215
|
+
:raises:
|
|
216
|
+
ImportError: If the `transformers` package is not installed.
|
|
217
|
+
"""
|
|
218
|
+
if self._client:
|
|
219
|
+
return
|
|
220
|
+
try:
|
|
221
|
+
from transformers import pipeline, AutoModelForCausalLM # noqa
|
|
222
|
+
from transformers import AutoTokenizer # noqa
|
|
223
|
+
from transformers.pipelines.base import Pipeline # noqa
|
|
224
|
+
|
|
225
|
+
self.options["model_kwargs"] = self.options.get("model_kwargs", {})
|
|
226
|
+
self.options["model_kwargs"]["local_files_only"] = True
|
|
227
|
+
self._client = pipeline(model=self.model, **self.options)
|
|
228
|
+
self._expected_operation_type = Pipeline
|
|
229
|
+
except ImportError as exc:
|
|
230
|
+
raise ImportError("transformers package is not installed") from exc
|
|
231
|
+
|
|
232
|
+
def get_client_options(self):
|
|
233
|
+
res = dict(
|
|
234
|
+
task=self._get_secret_or_env("HF_TASK") or "text-generation",
|
|
235
|
+
token=self._get_secret_or_env("HF_TOKEN"),
|
|
236
|
+
device=self._get_secret_or_env("HF_DEVICE"),
|
|
237
|
+
device_map=self._get_secret_or_env("HF_DEVICE_MAP"),
|
|
238
|
+
trust_remote_code=self._get_secret_or_env("HF_TRUST_REMOTE_CODE"),
|
|
239
|
+
model_kwargs=self._get_secret_or_env("HF_MODEL_KWARGS"),
|
|
240
|
+
)
|
|
241
|
+
return self._sanitize_options(res)
|
|
242
|
+
|
|
243
|
+
def custom_invoke(
|
|
244
|
+
self, operation: Optional["Pipeline"] = None, **invoke_kwargs
|
|
245
|
+
) -> Union[list, dict, Any]:
|
|
246
|
+
"""
|
|
247
|
+
Invokes a HuggingFace pipeline operation with the given keyword arguments.
|
|
248
|
+
|
|
249
|
+
This method provides flexibility to use a custom pipeline object for specific tasks
|
|
250
|
+
(e.g., image classification, sentiment analysis).
|
|
251
|
+
|
|
252
|
+
The operation must be a Pipeline object from the transformers library that accepts keyword arguments.
|
|
253
|
+
|
|
254
|
+
Example:
|
|
255
|
+
```python
|
|
256
|
+
from transformers import pipeline
|
|
257
|
+
from PIL import Image
|
|
258
|
+
|
|
259
|
+
# Using custom pipeline for image classification
|
|
260
|
+
image = Image.open(image_path)
|
|
261
|
+
pipeline_object = pipeline("image-classification", model="microsoft/resnet-50")
|
|
262
|
+
result = hf_provider.custom_invoke(
|
|
263
|
+
pipeline_object,
|
|
264
|
+
inputs=image,
|
|
265
|
+
)
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
:param operation: A Pipeline object from the transformers library.
|
|
269
|
+
If not provided, defaults to the provider's configured pipeline.
|
|
270
|
+
:param invoke_kwargs: Keyword arguments to pass to the pipeline operation.
|
|
271
|
+
These are merged with `default_invoke_kwargs` and may include
|
|
272
|
+
parameters such as `inputs`, `max_length`, `temperature`, or task-specific options.
|
|
273
|
+
|
|
274
|
+
:return: The full response returned by the pipeline operation.
|
|
275
|
+
Format depends on the pipeline task (list for text generation,
|
|
276
|
+
dict for classification, etc.).
|
|
277
|
+
|
|
278
|
+
:raises MLRunInvalidArgumentError: If the operation is not a valid Pipeline object.
|
|
279
|
+
|
|
280
|
+
"""
|
|
281
|
+
invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
|
|
282
|
+
if operation:
|
|
283
|
+
if not isinstance(operation, self._expected_operation_type):
|
|
284
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
285
|
+
"Huggingface operation must inherit" " from 'Pipeline' object"
|
|
286
|
+
)
|
|
287
|
+
return operation(**invoke_kwargs)
|
|
288
|
+
else:
|
|
289
|
+
return self.client(**invoke_kwargs)
|
|
290
|
+
|
|
291
|
+
def invoke(
|
|
292
|
+
self,
|
|
293
|
+
messages: Union[str, list[str], "ChatType", list["ChatType"]],
|
|
294
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
295
|
+
**invoke_kwargs,
|
|
296
|
+
) -> Union[str, list, dict[str, Any]]:
|
|
297
|
+
"""
|
|
298
|
+
HuggingFace-specific implementation of model invocation using the synchronous pipeline client.
|
|
299
|
+
Invokes a HuggingFace model operation for text generation tasks.
|
|
300
|
+
|
|
301
|
+
Note: Ensure your environment has sufficient computational resources (CPU/GPU and memory) to run the model.
|
|
302
|
+
|
|
303
|
+
:param messages:
|
|
304
|
+
Input for the text generation model. Can be provided in multiple formats:
|
|
305
|
+
|
|
306
|
+
- A single string: Direct text input for generation
|
|
307
|
+
- A list of strings: Multiple text inputs for batch processing
|
|
308
|
+
- Chat format: A list of dictionaries with "role" and "content" keys:
|
|
309
|
+
|
|
310
|
+
.. code-block:: json
|
|
311
|
+
|
|
312
|
+
[
|
|
313
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
314
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
:param invoke_response_format: InvokeResponseFormat
|
|
318
|
+
Specifies the format of the returned response. Options:
|
|
319
|
+
|
|
320
|
+
- "string": Returns only the generated text content, extracted from a single response.
|
|
321
|
+
- "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
|
|
322
|
+
|
|
323
|
+
.. code-block:: json
|
|
324
|
+
{
|
|
325
|
+
"answer": "<generated_text>",
|
|
326
|
+
"usage": {
|
|
327
|
+
"prompt_tokens": <int>,
|
|
328
|
+
"completion_tokens": <int>,
|
|
329
|
+
"total_tokens": <int>
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
Note: For usage mode, the model tokenizer should support apply_chat_template.
|
|
334
|
+
|
|
335
|
+
- "full": Returns the raw response object from the HuggingFace model,
|
|
336
|
+
typically a list of generated sequences (dictionaries).
|
|
337
|
+
This format does not include token usage statistics.
|
|
338
|
+
|
|
339
|
+
:param invoke_kwargs:
|
|
340
|
+
Additional keyword arguments passed to the HuggingFace pipeline.
|
|
341
|
+
|
|
342
|
+
:return:
|
|
343
|
+
A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
|
|
344
|
+
|
|
345
|
+
:raises MLRunInvalidArgumentError:
|
|
346
|
+
If the pipeline task is not "text-generation" or if the response contains multiple outputs when extracting
|
|
347
|
+
string content.
|
|
348
|
+
:raises MLRunRuntimeError:
|
|
349
|
+
If using "usage" response mode and the model tokenizer does not support chat template formatting.
|
|
350
|
+
"""
|
|
351
|
+
if self.client.task != "text-generation":
|
|
352
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
353
|
+
"HuggingFaceProvider.invoke supports text-generation task only"
|
|
354
|
+
)
|
|
355
|
+
if InvokeResponseFormat.is_str_response(invoke_response_format.value):
|
|
356
|
+
invoke_kwargs["return_full_text"] = False
|
|
357
|
+
response = self.custom_invoke(text_inputs=messages, **invoke_kwargs)
|
|
358
|
+
response = self._response_handler(
|
|
359
|
+
messages=messages,
|
|
360
|
+
response=response,
|
|
361
|
+
invoke_response_format=invoke_response_format,
|
|
362
|
+
)
|
|
363
|
+
return response
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Any, Optional, Union
|
|
16
|
+
|
|
17
|
+
import mlrun
|
|
18
|
+
from mlrun.datastore.model_provider.model_provider import (
|
|
19
|
+
InvokeResponseFormat,
|
|
20
|
+
ModelProvider,
|
|
21
|
+
UsageResponseKeys,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MockModelProvider(ModelProvider):
|
|
26
|
+
support_async = False
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
parent,
|
|
31
|
+
kind,
|
|
32
|
+
name,
|
|
33
|
+
endpoint="",
|
|
34
|
+
secrets: Optional[dict] = None,
|
|
35
|
+
default_invoke_kwargs: Optional[dict] = None,
|
|
36
|
+
):
|
|
37
|
+
super().__init__(
|
|
38
|
+
parent=parent, name=name, kind=kind, endpoint=endpoint, secrets=secrets
|
|
39
|
+
)
|
|
40
|
+
self.default_invoke_kwargs = default_invoke_kwargs or {}
|
|
41
|
+
self._client = None
|
|
42
|
+
self._async_client = None
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def _extract_string_output(response: Any) -> str:
|
|
46
|
+
"""
|
|
47
|
+
Extracts string response from response object
|
|
48
|
+
"""
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
def load_client(self) -> None:
|
|
52
|
+
"""
|
|
53
|
+
Initializes the SDK client for the model provider with the given keyword arguments
|
|
54
|
+
and assigns it to an instance attribute (e.g., self._client).
|
|
55
|
+
|
|
56
|
+
Subclasses should override this method to:
|
|
57
|
+
- Create and configure the provider-specific client instance.
|
|
58
|
+
- Assign the client instance to self._client.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
def invoke(
|
|
64
|
+
self,
|
|
65
|
+
messages: Union[list[dict], Any],
|
|
66
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
67
|
+
**invoke_kwargs,
|
|
68
|
+
) -> Union[str, dict[str, Any], Any]:
|
|
69
|
+
if invoke_response_format == InvokeResponseFormat.STRING:
|
|
70
|
+
return (
|
|
71
|
+
"You are using a mock model provider, no actual inference is performed."
|
|
72
|
+
)
|
|
73
|
+
elif invoke_response_format == InvokeResponseFormat.FULL:
|
|
74
|
+
return {
|
|
75
|
+
UsageResponseKeys.USAGE: {"prompt_tokens": 0, "completion_tokens": 0},
|
|
76
|
+
UsageResponseKeys.ANSWER: "You are using a mock model provider, no actual inference is performed.",
|
|
77
|
+
"extra": {},
|
|
78
|
+
}
|
|
79
|
+
elif invoke_response_format == InvokeResponseFormat.USAGE:
|
|
80
|
+
return {
|
|
81
|
+
UsageResponseKeys.ANSWER: "You are using a mock model provider, no actual inference is performed.",
|
|
82
|
+
UsageResponseKeys.USAGE: {"prompt_tokens": 0, "completion_tokens": 0},
|
|
83
|
+
}
|
|
84
|
+
else:
|
|
85
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
86
|
+
f"Unsupported invoke response format: {invoke_response_format}"
|
|
87
|
+
)
|