mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/schemas/hub.py +14 -0
- mlrun/common/schemas/model_monitoring/constants.py +1 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
- mlrun/datastore/azure_blob.py +66 -43
- mlrun/datastore/datastore_profile.py +8 -2
- mlrun/datastore/model_provider/huggingface_provider.py +118 -30
- mlrun/datastore/model_provider/model_provider.py +61 -3
- mlrun/datastore/model_provider/openai_provider.py +114 -43
- mlrun/db/base.py +1 -1
- mlrun/db/httpdb.py +6 -4
- mlrun/db/nopdb.py +1 -0
- mlrun/model_monitoring/api.py +2 -2
- mlrun/model_monitoring/applications/base.py +22 -10
- mlrun/model_monitoring/applications/context.py +1 -4
- mlrun/model_monitoring/controller.py +10 -2
- mlrun/model_monitoring/db/_schedules.py +2 -4
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/project.py +28 -24
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +11 -2
- mlrun/runtimes/nuclio/function.py +10 -0
- mlrun/runtimes/nuclio/serving.py +4 -0
- mlrun/runtimes/utils.py +22 -5
- mlrun/serving/server.py +25 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/METADATA +23 -22
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/RECORD +31 -31
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/top_level.txt +0 -0
mlrun/common/schemas/hub.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from datetime import datetime, timezone
|
|
16
16
|
from typing import Optional
|
|
17
17
|
|
|
18
|
+
import deepdiff
|
|
18
19
|
from pydantic.v1 import BaseModel, Extra, Field
|
|
19
20
|
|
|
20
21
|
import mlrun.common.types
|
|
@@ -83,6 +84,19 @@ class HubSource(BaseModel):
|
|
|
83
84
|
status=ObjectStatus(state="created"),
|
|
84
85
|
)
|
|
85
86
|
|
|
87
|
+
def diff(self, another_source: "HubSource") -> dict:
|
|
88
|
+
"""
|
|
89
|
+
Compare this HubSource with another one.
|
|
90
|
+
Returns a dict of differences (metadata, spec, status).
|
|
91
|
+
"""
|
|
92
|
+
exclude_paths = [
|
|
93
|
+
"root['metadata']['updated']",
|
|
94
|
+
"root['metadata']['created']",
|
|
95
|
+
]
|
|
96
|
+
return deepdiff.DeepDiff(
|
|
97
|
+
self.dict(), another_source.dict(), exclude_paths=exclude_paths
|
|
98
|
+
)
|
|
99
|
+
|
|
86
100
|
|
|
87
101
|
last_source_index = -1
|
|
88
102
|
|
|
@@ -331,6 +331,7 @@ class EndpointType(IntEnum):
|
|
|
331
331
|
class EndpointMode(IntEnum):
|
|
332
332
|
REAL_TIME = 0
|
|
333
333
|
BATCH = 1
|
|
334
|
+
BATCH_LEGACY = 2 # legacy batch mode, used for endpoints created through the batch inference job
|
|
334
335
|
|
|
335
336
|
|
|
336
337
|
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
@@ -119,7 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
119
119
|
project: constr(regex=PROJECT_PATTERN)
|
|
120
120
|
endpoint_type: EndpointType = EndpointType.NODE_EP
|
|
121
121
|
uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
|
|
122
|
-
mode: EndpointMode =
|
|
122
|
+
mode: Optional[EndpointMode] = None
|
|
123
123
|
|
|
124
124
|
@classmethod
|
|
125
125
|
def mutable_fields(cls):
|
|
@@ -131,6 +131,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
131
131
|
return str(v)
|
|
132
132
|
return v
|
|
133
133
|
|
|
134
|
+
@validator("mode", pre=True, always=True)
|
|
135
|
+
def _set_mode_based_on_endpoint_type(cls, v, values): # noqa: N805
|
|
136
|
+
if v is None:
|
|
137
|
+
if values.get("endpoint_type") == EndpointType.BATCH_EP:
|
|
138
|
+
return EndpointMode.BATCH_LEGACY
|
|
139
|
+
else:
|
|
140
|
+
return EndpointMode.REAL_TIME
|
|
141
|
+
return v
|
|
142
|
+
|
|
134
143
|
|
|
135
144
|
class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
136
145
|
model_class: Optional[str] = ""
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -229,18 +229,25 @@ class AzureBlobStore(DataStore):
|
|
|
229
229
|
st = self.storage_options
|
|
230
230
|
service = "blob"
|
|
231
231
|
primary_url = None
|
|
232
|
-
|
|
232
|
+
|
|
233
|
+
# Parse connection string (fills account_name/account_key or SAS)
|
|
234
|
+
connection_string = st.get("connection_string")
|
|
235
|
+
if connection_string:
|
|
233
236
|
primary_url, _, parsed_credential = parse_connection_str(
|
|
234
|
-
|
|
237
|
+
connection_string, credential=None, service=service
|
|
235
238
|
)
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
+
|
|
240
|
+
if isinstance(parsed_credential, str):
|
|
241
|
+
# SharedAccessSignature as raw string
|
|
242
|
+
parsed_credential = {"sas_token": parsed_credential}
|
|
243
|
+
|
|
244
|
+
for key in ["account_name", "account_key", "sas_token"]:
|
|
245
|
+
if parsed_value := parsed_credential.get(key):
|
|
239
246
|
if key in st and st[key] != parsed_value:
|
|
240
247
|
if key == "account_name":
|
|
241
248
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
242
|
-
f"Storage option for '{key}' is '{st[key]}'
|
|
243
|
-
|
|
249
|
+
f"Storage option for '{key}' is '{st[key]}', "
|
|
250
|
+
f"which does not match corresponding connection string '{parsed_value}'"
|
|
244
251
|
)
|
|
245
252
|
else:
|
|
246
253
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -249,6 +256,7 @@ class AzureBlobStore(DataStore):
|
|
|
249
256
|
st[key] = parsed_value
|
|
250
257
|
|
|
251
258
|
account_name = st.get("account_name")
|
|
259
|
+
# Derive host (prefer connection string primary URL)
|
|
252
260
|
if primary_url:
|
|
253
261
|
if primary_url.startswith("http://"):
|
|
254
262
|
primary_url = primary_url[len("http://") :]
|
|
@@ -258,48 +266,63 @@ class AzureBlobStore(DataStore):
|
|
|
258
266
|
elif account_name:
|
|
259
267
|
host = f"{account_name}.{service}.core.windows.net"
|
|
260
268
|
else:
|
|
269
|
+
# nothing to configure yet
|
|
261
270
|
return res
|
|
262
271
|
|
|
263
|
-
|
|
272
|
+
host = host.rstrip("/")
|
|
273
|
+
|
|
274
|
+
# Account key (optional; WASB supports it)
|
|
275
|
+
if "account_key" in st and st["account_key"]:
|
|
264
276
|
res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
|
|
265
277
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
|
|
273
|
-
"client_id"
|
|
274
|
-
]
|
|
275
|
-
if "client_secret" in st:
|
|
276
|
-
res[f"spark.hadoop.fs.azure.account.oauth2.client.secret.{host}"] = st[
|
|
277
|
-
"client_secret"
|
|
278
|
-
]
|
|
279
|
-
if "tenant_id" in st:
|
|
280
|
-
tenant_id = st["tenant_id"]
|
|
281
|
-
res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
|
|
282
|
-
f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
|
|
283
|
-
)
|
|
278
|
+
# --- WASB + SAS (container-scoped key; no provider classes needed) ---
|
|
279
|
+
if "sas_token" in st and st["sas_token"]:
|
|
280
|
+
sas = st["sas_token"].lstrip("?")
|
|
281
|
+
if container := getattr(self, "endpoint", None) or st.get("container"):
|
|
282
|
+
# fs.azure.sas.<container>.<account>.blob.core.windows.net = <sas>
|
|
283
|
+
res[f"spark.hadoop.fs.azure.sas.{container}.{host}"] = sas
|
|
284
284
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
|
|
285
|
+
else:
|
|
286
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
287
|
+
"Container name is required for WASB SAS. "
|
|
288
|
+
"Set self.endpoint or storage_options['container']."
|
|
289
|
+
)
|
|
291
290
|
return res
|
|
292
291
|
|
|
293
292
|
@property
|
|
294
293
|
def spark_url(self):
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
294
|
+
# Build: wasbs://<container>@<host>
|
|
295
|
+
st = self.storage_options
|
|
296
|
+
service = "blob"
|
|
297
|
+
|
|
298
|
+
container = getattr(self, "endpoint", None) or st.get("container")
|
|
299
|
+
if not container:
|
|
300
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
301
|
+
"Container is required to build the WASB URL "
|
|
302
|
+
"(self.endpoint or storage_options['container'])."
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Prefer host from connection string; else synthesize from account_name
|
|
306
|
+
host = None
|
|
307
|
+
account_name = st.get("account_name")
|
|
308
|
+
connection_string = st.get("connection_string")
|
|
309
|
+
|
|
310
|
+
if connection_string:
|
|
311
|
+
primary_url, _, _ = parse_connection_str(
|
|
312
|
+
connection_string, credential=None, service=service
|
|
313
|
+
)
|
|
314
|
+
if primary_url.startswith("http://"):
|
|
315
|
+
primary_url = primary_url[len("http://") :]
|
|
316
|
+
if primary_url.startswith("https://"):
|
|
317
|
+
primary_url = primary_url[len("https://") :]
|
|
318
|
+
host = primary_url.rstrip("/")
|
|
319
|
+
|
|
320
|
+
if not host and account_name:
|
|
321
|
+
host = f"{account_name}.{service}.core.windows.net"
|
|
322
|
+
|
|
323
|
+
if not host:
|
|
324
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
325
|
+
"account_name is required (or provide a connection_string) to build the WASB URL."
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
return f"wasbs://{container}@{host}"
|
|
@@ -333,7 +333,9 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
333
333
|
# in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
|
|
334
334
|
subpath = subpath[1:]
|
|
335
335
|
if self.bucket:
|
|
336
|
-
return
|
|
336
|
+
return (
|
|
337
|
+
f"gcs://{self.bucket}/{subpath}" if subpath else f"gcs://{self.bucket}"
|
|
338
|
+
)
|
|
337
339
|
else:
|
|
338
340
|
return f"gcs://{subpath}"
|
|
339
341
|
|
|
@@ -370,7 +372,11 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
370
372
|
# in azure the path after schema is starts with container, wherefore it should not start with "/".
|
|
371
373
|
subpath = subpath[1:]
|
|
372
374
|
if self.container:
|
|
373
|
-
return
|
|
375
|
+
return (
|
|
376
|
+
f"az://{self.container}/{subpath}"
|
|
377
|
+
if subpath
|
|
378
|
+
else f"az://{self.container}"
|
|
379
|
+
)
|
|
374
380
|
else:
|
|
375
381
|
return f"az://{subpath}"
|
|
376
382
|
|
|
@@ -36,6 +36,9 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
36
36
|
This class extends the ModelProvider base class and implements Hugging Face-specific
|
|
37
37
|
functionality, including pipeline initialization, default text generation operations,
|
|
38
38
|
and custom operations tailored to the Hugging Face Transformers pipeline API.
|
|
39
|
+
|
|
40
|
+
Note: The pipeline object will download the model (if not already cached) and load it
|
|
41
|
+
into memory for inference. Ensure you have the required CPU/GPU and memory to use this operation.
|
|
39
42
|
"""
|
|
40
43
|
|
|
41
44
|
def __init__(
|
|
@@ -62,13 +65,12 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
62
65
|
)
|
|
63
66
|
self.options = self.get_client_options()
|
|
64
67
|
self._expected_operation_type = None
|
|
65
|
-
self.
|
|
68
|
+
self._download_model()
|
|
66
69
|
|
|
67
70
|
@staticmethod
|
|
68
71
|
def _extract_string_output(response: list[dict]) -> str:
|
|
69
72
|
"""
|
|
70
|
-
Extracts the first generated string from Hugging Face pipeline output
|
|
71
|
-
regardless of whether it's plain text-generation or chat-style output.
|
|
73
|
+
Extracts the first generated string from Hugging Face pipeline output
|
|
72
74
|
"""
|
|
73
75
|
if not isinstance(response, list) or len(response) == 0:
|
|
74
76
|
raise ValueError("Empty or invalid pipeline output")
|
|
@@ -86,6 +88,35 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
86
88
|
subpath = ""
|
|
87
89
|
return endpoint, subpath
|
|
88
90
|
|
|
91
|
+
@property
|
|
92
|
+
def client(self) -> Any:
|
|
93
|
+
"""
|
|
94
|
+
Lazily return the HuggingFace-pipeline client.
|
|
95
|
+
|
|
96
|
+
If the client has not been initialized yet, it will be created
|
|
97
|
+
by calling `load_client`.
|
|
98
|
+
"""
|
|
99
|
+
self.load_client()
|
|
100
|
+
return self._client
|
|
101
|
+
|
|
102
|
+
def _download_model(self):
|
|
103
|
+
"""
|
|
104
|
+
Pre-downloads model files locally to prevent race conditions in multiprocessing.
|
|
105
|
+
|
|
106
|
+
Uses snapshot_download with local_dir_use_symlinks=False to ensure proper
|
|
107
|
+
file copying for safe concurrent access across multiple processes.
|
|
108
|
+
|
|
109
|
+
:raises:
|
|
110
|
+
ImportError: If huggingface_hub package is not installed.
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
from huggingface_hub import snapshot_download
|
|
114
|
+
|
|
115
|
+
# Download the model and tokenizer files directly to the cache.
|
|
116
|
+
snapshot_download(repo_id=self.model, local_dir_use_symlinks=False)
|
|
117
|
+
except ImportError as exc:
|
|
118
|
+
raise ImportError("huggingface_hub package is not installed") from exc
|
|
119
|
+
|
|
89
120
|
def _response_handler(
|
|
90
121
|
self,
|
|
91
122
|
response: Union[str, list],
|
|
@@ -94,27 +125,46 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
94
125
|
**kwargs,
|
|
95
126
|
) -> Union[str, list, dict[str, Any]]:
|
|
96
127
|
"""
|
|
97
|
-
|
|
128
|
+
Processes and formats the raw response from the HuggingFace pipeline according to the specified format.
|
|
98
129
|
|
|
99
|
-
|
|
130
|
+
The response should exclude the user’s input (no repetition in the output).
|
|
131
|
+
This can be accomplished by invoking the pipeline with `return_full_text=False`.
|
|
100
132
|
|
|
101
|
-
:param
|
|
102
|
-
|
|
103
|
-
:param invoke_response_format:
|
|
133
|
+
:param response: The raw response from the HuggingFace pipeline, typically a list of dictionaries
|
|
134
|
+
containing generated text sequences.
|
|
135
|
+
:param invoke_response_format: Determines how the response should be processed and returned. Options:
|
|
104
136
|
|
|
105
|
-
|
|
106
|
-
|
|
137
|
+
- STRING: Return only the main generated content as a string,
|
|
138
|
+
for single-answer responses.
|
|
139
|
+
- USAGE: Return a dictionary combining the string response with
|
|
140
|
+
token usage statistics:
|
|
107
141
|
|
|
108
|
-
|
|
109
|
-
may differ from the actual tokens generated by the model due to
|
|
110
|
-
internal decoding behavior and implementation details.
|
|
142
|
+
.. code-block:: json
|
|
111
143
|
|
|
112
|
-
|
|
144
|
+
{
|
|
145
|
+
"answer": "<generated_text>",
|
|
146
|
+
"usage": {
|
|
147
|
+
"prompt_tokens": <int>,
|
|
148
|
+
"completion_tokens": <int>,
|
|
149
|
+
"total_tokens": <int>
|
|
150
|
+
}
|
|
151
|
+
}
|
|
113
152
|
|
|
114
|
-
|
|
153
|
+
Note: Token counts are estimated after answer generation and
|
|
154
|
+
may differ from the actual tokens generated by the model due to
|
|
155
|
+
internal decoding behavior and implementation details.
|
|
156
|
+
|
|
157
|
+
- FULL: Return the full raw response object.
|
|
158
|
+
|
|
159
|
+
:param messages: The original input messages used for token count estimation in USAGE mode.
|
|
160
|
+
Can be a string, list of strings, or chat format messages.
|
|
161
|
+
:param kwargs: Additional parameters for response processing.
|
|
162
|
+
|
|
163
|
+
:return: The processed response in the format specified by `invoke_response_format`.
|
|
164
|
+
Can be a string, dictionary, or the original response object.
|
|
115
165
|
|
|
116
166
|
:raises MLRunInvalidArgumentError: If extracting the string response fails.
|
|
117
|
-
:raises MLRunRuntimeError: If applying the chat template to the model fails.
|
|
167
|
+
:raises MLRunRuntimeError: If applying the chat template to the model fails during token usage calculation.
|
|
118
168
|
"""
|
|
119
169
|
if InvokeResponseFormat.is_str_response(invoke_response_format.value):
|
|
120
170
|
str_response = self._extract_string_output(response)
|
|
@@ -161,11 +211,15 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
161
211
|
:raises:
|
|
162
212
|
ImportError: If the `transformers` package is not installed.
|
|
163
213
|
"""
|
|
214
|
+
if self._client:
|
|
215
|
+
return
|
|
164
216
|
try:
|
|
165
217
|
from transformers import pipeline, AutoModelForCausalLM # noqa
|
|
166
218
|
from transformers import AutoTokenizer # noqa
|
|
167
219
|
from transformers.pipelines.base import Pipeline # noqa
|
|
168
220
|
|
|
221
|
+
self.options["model_kwargs"] = self.options.get("model_kwargs", {})
|
|
222
|
+
self.options["model_kwargs"]["local_files_only"] = True
|
|
169
223
|
self._client = pipeline(model=self.model, **self.options)
|
|
170
224
|
self._expected_operation_type = Pipeline
|
|
171
225
|
except ImportError as exc:
|
|
@@ -186,23 +240,38 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
186
240
|
self, operation: Optional["Pipeline"] = None, **invoke_kwargs
|
|
187
241
|
) -> Union[list, dict, Any]:
|
|
188
242
|
"""
|
|
189
|
-
HuggingFace
|
|
190
|
-
|
|
243
|
+
Invokes a HuggingFace pipeline operation with the given keyword arguments.
|
|
244
|
+
|
|
245
|
+
This method provides flexibility to use a custom pipeline object for specific tasks
|
|
246
|
+
(e.g., image classification, sentiment analysis).
|
|
247
|
+
|
|
248
|
+
The operation must be a Pipeline object from the transformers library that accepts keyword arguments.
|
|
191
249
|
|
|
192
250
|
Example:
|
|
193
|
-
|
|
251
|
+
```python
|
|
252
|
+
from transformers import pipeline
|
|
253
|
+
from PIL import Image
|
|
254
|
+
|
|
255
|
+
# Using custom pipeline for image classification
|
|
194
256
|
image = Image.open(image_path)
|
|
195
|
-
pipeline_object =
|
|
257
|
+
pipeline_object = pipeline("image-classification", model="microsoft/resnet-50")
|
|
196
258
|
result = hf_provider.custom_invoke(
|
|
197
259
|
pipeline_object,
|
|
198
260
|
inputs=image,
|
|
199
261
|
)
|
|
200
|
-
|
|
262
|
+
```
|
|
201
263
|
|
|
264
|
+
:param operation: A Pipeline object from the transformers library.
|
|
265
|
+
If not provided, defaults to the provider's configured pipeline.
|
|
266
|
+
:param invoke_kwargs: Keyword arguments to pass to the pipeline operation.
|
|
267
|
+
These are merged with `default_invoke_kwargs` and may include
|
|
268
|
+
parameters such as `inputs`, `max_length`, `temperature`, or task-specific options.
|
|
202
269
|
|
|
203
|
-
:
|
|
204
|
-
|
|
205
|
-
|
|
270
|
+
:return: The full response returned by the pipeline operation.
|
|
271
|
+
Format depends on the pipeline task (list for text generation,
|
|
272
|
+
dict for classification, etc.).
|
|
273
|
+
|
|
274
|
+
:raises MLRunInvalidArgumentError: If the operation is not a valid Pipeline object.
|
|
206
275
|
|
|
207
276
|
"""
|
|
208
277
|
invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
|
|
@@ -222,12 +291,24 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
222
291
|
**invoke_kwargs,
|
|
223
292
|
) -> Union[str, list, dict[str, Any]]:
|
|
224
293
|
"""
|
|
225
|
-
HuggingFace-specific implementation of
|
|
226
|
-
Invokes a HuggingFace model operation
|
|
227
|
-
|
|
294
|
+
HuggingFace-specific implementation of model invocation using the synchronous pipeline client.
|
|
295
|
+
Invokes a HuggingFace model operation for text generation tasks.
|
|
296
|
+
|
|
297
|
+
Note: Ensure your environment has sufficient computational resources (CPU/GPU and memory) to run the model.
|
|
228
298
|
|
|
229
299
|
:param messages:
|
|
230
|
-
|
|
300
|
+
Input for the text generation model. Can be provided in multiple formats:
|
|
301
|
+
|
|
302
|
+
- A single string: Direct text input for generation
|
|
303
|
+
- A list of strings: Multiple text inputs for batch processing
|
|
304
|
+
- Chat format: A list of dictionaries with "role" and "content" keys:
|
|
305
|
+
|
|
306
|
+
.. code-block:: json
|
|
307
|
+
|
|
308
|
+
[
|
|
309
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
310
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
311
|
+
]
|
|
231
312
|
|
|
232
313
|
:param invoke_response_format: InvokeResponseFormat
|
|
233
314
|
Specifies the format of the returned response. Options:
|
|
@@ -245,17 +326,24 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
245
326
|
}
|
|
246
327
|
}
|
|
247
328
|
|
|
329
|
+
Note: For usage mode, the model tokenizer should support apply_chat_template.
|
|
330
|
+
|
|
248
331
|
- "full": Returns the raw response object from the HuggingFace model,
|
|
249
332
|
typically a list of generated sequences (dictionaries).
|
|
250
333
|
This format does not include token usage statistics.
|
|
251
334
|
|
|
252
335
|
:param invoke_kwargs:
|
|
253
|
-
Additional keyword arguments passed to the HuggingFace
|
|
336
|
+
Additional keyword arguments passed to the HuggingFace pipeline.
|
|
254
337
|
|
|
255
338
|
:return:
|
|
256
339
|
A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
|
|
257
|
-
"""
|
|
258
340
|
|
|
341
|
+
:raises MLRunInvalidArgumentError:
|
|
342
|
+
If the pipeline task is not "text-generation" or if the response contains multiple outputs when extracting
|
|
343
|
+
string content.
|
|
344
|
+
:raises MLRunRuntimeError:
|
|
345
|
+
If using "usage" response mode and the model tokenizer does not support chat template formatting.
|
|
346
|
+
"""
|
|
259
347
|
if self.client.task != "text-generation":
|
|
260
348
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
261
349
|
"HuggingFaceProvider.invoke supports text-generation task only"
|
|
@@ -108,7 +108,7 @@ class ModelProvider(BaseRemoteClient):
|
|
|
108
108
|
additional metadata or token usage statistics, in this format:
|
|
109
109
|
{"answer": <string>, "usage": <dict>}
|
|
110
110
|
|
|
111
|
-
- FULL: Return the full raw response object
|
|
111
|
+
- FULL: Return the full raw response object.
|
|
112
112
|
|
|
113
113
|
:param kwargs: Additional parameters that may be required by specific implementations.
|
|
114
114
|
|
|
@@ -164,7 +164,9 @@ class ModelProvider(BaseRemoteClient):
|
|
|
164
164
|
)
|
|
165
165
|
return self._async_client
|
|
166
166
|
|
|
167
|
-
def custom_invoke(
|
|
167
|
+
def custom_invoke(
|
|
168
|
+
self, operation: Optional[Callable] = None, **invoke_kwargs
|
|
169
|
+
) -> Any:
|
|
168
170
|
"""
|
|
169
171
|
Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
|
|
170
172
|
|
|
@@ -263,5 +265,61 @@ class ModelProvider(BaseRemoteClient):
|
|
|
263
265
|
invoke_response_format=InvokeResponseFormat.FULL,
|
|
264
266
|
**invoke_kwargs,
|
|
265
267
|
) -> Union[str, dict[str, Any], Any]:
|
|
266
|
-
"""
|
|
268
|
+
"""
|
|
269
|
+
Asynchronously invokes a generative AI model with the provided messages and additional parameters.
|
|
270
|
+
This method is designed to be a flexible interface for interacting with various
|
|
271
|
+
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
272
|
+
a list of messages (following a standardized format) and receive a response.
|
|
273
|
+
|
|
274
|
+
:param messages: A list of dictionaries representing the conversation history or input messages.
|
|
275
|
+
Each dictionary should follow the format::
|
|
276
|
+
{"role": "system"| "user" | "assistant" ..., "content":
|
|
277
|
+
"Message content as a string"}
|
|
278
|
+
|
|
279
|
+
Example:
|
|
280
|
+
|
|
281
|
+
.. code-block:: json
|
|
282
|
+
|
|
283
|
+
[
|
|
284
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
285
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
286
|
+
]
|
|
287
|
+
|
|
288
|
+
This format is consistent across all backends. Defaults to None if no messages
|
|
289
|
+
are provided.
|
|
290
|
+
|
|
291
|
+
:param invoke_response_format: Determines how the model response is returned:
|
|
292
|
+
|
|
293
|
+
- string: Returns only the generated text content from the model output,
|
|
294
|
+
for single-answer responses only.
|
|
295
|
+
|
|
296
|
+
- usage: Combines the STRING response with additional metadata (token usage),
|
|
297
|
+
and returns the result in a dictionary.
|
|
298
|
+
|
|
299
|
+
Note: The usage dictionary may contain additional
|
|
300
|
+
keys depending on the model provider:
|
|
301
|
+
|
|
302
|
+
.. code-block:: json
|
|
303
|
+
|
|
304
|
+
{
|
|
305
|
+
"answer": "<generated_text>",
|
|
306
|
+
"usage": {
|
|
307
|
+
"prompt_tokens": <int>,
|
|
308
|
+
"completion_tokens": <int>,
|
|
309
|
+
"total_tokens": <int>
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
- full: Returns the full model output.
|
|
315
|
+
|
|
316
|
+
:param invoke_kwargs:
|
|
317
|
+
Additional keyword arguments to be passed to the underlying model API call.
|
|
318
|
+
These can include parameters such as temperature, max tokens, etc.,
|
|
319
|
+
depending on the capabilities of the specific backend being used.
|
|
320
|
+
|
|
321
|
+
:return: The invoke result formatted according to the specified
|
|
322
|
+
invoke_response_format parameter.
|
|
323
|
+
|
|
324
|
+
"""
|
|
267
325
|
raise NotImplementedError("async_invoke is not implemented")
|