mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +123 -25
- mlrun/artifacts/manager.py +0 -5
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +10 -1
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +3 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +14 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +3 -1
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +33 -11
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/datastore.py +9 -4
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +363 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +230 -65
- mlrun/datastore/model_provider/openai_provider.py +295 -42
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +47 -19
- mlrun/db/httpdb.py +120 -56
- mlrun/db/nopdb.py +38 -10
- mlrun/execution.py +70 -19
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +15 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +509 -117
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +116 -33
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +100 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
- mlrun/model_monitoring/helpers.py +54 -9
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +6 -1
- mlrun/projects/pipelines.py +46 -26
- mlrun/projects/project.py +166 -58
- mlrun/run.py +94 -17
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +7 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +149 -17
- mlrun/runtimes/nuclio/function.py +76 -27
- mlrun/runtimes/nuclio/serving.py +97 -15
- mlrun/runtimes/pod.py +234 -21
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +49 -11
- mlrun/secrets.py +54 -13
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +320 -80
- mlrun/serving/states.py +725 -157
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +200 -119
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +288 -88
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2025 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import contextlib
|
|
15
16
|
import time
|
|
16
17
|
from pathlib import Path
|
|
17
18
|
from typing import Optional
|
|
@@ -30,6 +31,40 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
|
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
class AzureBlobStore(DataStore):
|
|
34
|
+
"""
|
|
35
|
+
Azure Blob Storage datastore implementation.
|
|
36
|
+
|
|
37
|
+
Supports multiple URL schemas: az://, wasbs://, wasb://
|
|
38
|
+
|
|
39
|
+
Supported Connection String Formats:
|
|
40
|
+
====================================
|
|
41
|
+
|
|
42
|
+
1. Account Key (Standard):
|
|
43
|
+
"DefaultEndpointsProtocol=https;AccountName=<account>;AccountKey=<key>;EndpointSuffix=core.windows.net"
|
|
44
|
+
|
|
45
|
+
2. SAS Token:
|
|
46
|
+
"BlobEndpoint=https://<account>.blob.core.windows.net/;SharedAccessSignature=<sas_token>"
|
|
47
|
+
|
|
48
|
+
3. Minimal BlobEndpoint:
|
|
49
|
+
"BlobEndpoint=https://<account>.blob.core.windows.net/;AccountName=<account>;AccountKey=<key>"
|
|
50
|
+
|
|
51
|
+
4. Custom Domain:
|
|
52
|
+
"BlobEndpoint=https://<account>.mydomain.com/;AccountName=<account>;AccountKey=<key>"
|
|
53
|
+
|
|
54
|
+
5. China/Government Cloud:
|
|
55
|
+
"DefaultEndpointsProtocol=https;AccountName=<account>;AccountKey=<key>;EndpointSuffix=core.chinacloudapi.cn"
|
|
56
|
+
|
|
57
|
+
6. Full Service Endpoints with SAS:
|
|
58
|
+
"BlobEndpoint=https://<account>.blob.core.windows.net/;QueueEndpoint=...;SharedAccessSignature=<sas>"
|
|
59
|
+
|
|
60
|
+
Authentication Methods:
|
|
61
|
+
======================
|
|
62
|
+
- Account Key (connection_string or storage_options)
|
|
63
|
+
- SAS Token (connection_string or storage_options)
|
|
64
|
+
- OAuth/Azure AD (storage_options: client_id, client_secret, tenant_id)
|
|
65
|
+
|
|
66
|
+
"""
|
|
67
|
+
|
|
33
68
|
using_bucket = True
|
|
34
69
|
max_concurrency = 100
|
|
35
70
|
max_blocksize = 1024 * 1024 * 4
|
|
@@ -40,6 +75,12 @@ class AzureBlobStore(DataStore):
|
|
|
40
75
|
def __init__(
|
|
41
76
|
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
42
77
|
):
|
|
78
|
+
# Extract container from WASBS endpoint before calling super()
|
|
79
|
+
self._container_from_endpoint = None
|
|
80
|
+
if schema in ["wasbs", "wasb"] and endpoint and "@" in endpoint:
|
|
81
|
+
# Handle container@host format
|
|
82
|
+
self._container_from_endpoint, endpoint = endpoint.split("@", 1)
|
|
83
|
+
|
|
43
84
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
44
85
|
self._service_client = None
|
|
45
86
|
self._storage_options = None
|
|
@@ -67,6 +108,34 @@ class AzureBlobStore(DataStore):
|
|
|
67
108
|
or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
|
|
68
109
|
credential=self._get_secret_or_env("credential"),
|
|
69
110
|
)
|
|
111
|
+
# Use container extracted from WASBS endpoint during initialization
|
|
112
|
+
if self._container_from_endpoint:
|
|
113
|
+
res["container"] = self._container_from_endpoint
|
|
114
|
+
|
|
115
|
+
# For az:// URLs, endpoint contains the container name
|
|
116
|
+
if not res.get("container") and self.kind in ["az"]:
|
|
117
|
+
if container := getattr(self, "endpoint", None):
|
|
118
|
+
res["container"] = container
|
|
119
|
+
|
|
120
|
+
# Last resort: For wasbs:// without container, check if connection string has BlobEndpoint with container
|
|
121
|
+
if not res.get("container") and self.kind in ["wasbs", "wasb"]:
|
|
122
|
+
connection_string = res.get("connection_string")
|
|
123
|
+
if connection_string and "BlobEndpoint=" in connection_string:
|
|
124
|
+
# Try to extract container from BlobEndpoint URL
|
|
125
|
+
for part in connection_string.split(";"):
|
|
126
|
+
if part.startswith("BlobEndpoint="):
|
|
127
|
+
blob_endpoint = part.split("=", 1)[1]
|
|
128
|
+
# Parse URL to get path component
|
|
129
|
+
from urllib.parse import urlparse
|
|
130
|
+
|
|
131
|
+
parsed = urlparse(blob_endpoint)
|
|
132
|
+
if parsed.path and parsed.path.strip("/"):
|
|
133
|
+
# Extract first path segment as container
|
|
134
|
+
path_parts = parsed.path.strip("/").split("/")
|
|
135
|
+
if path_parts[0]:
|
|
136
|
+
res["container"] = path_parts[0]
|
|
137
|
+
break
|
|
138
|
+
|
|
70
139
|
self._storage_options = self._sanitize_options(res)
|
|
71
140
|
return self._storage_options
|
|
72
141
|
|
|
@@ -165,7 +234,18 @@ class AzureBlobStore(DataStore):
|
|
|
165
234
|
# if called without passing dataitem - like in fset.purge_targets,
|
|
166
235
|
# key will include schema.
|
|
167
236
|
if not schema:
|
|
168
|
-
|
|
237
|
+
# For wasbs/wasb, the filesystem is scoped to the container, so we need to use
|
|
238
|
+
# the container name as the base path, not the hostname endpoint.
|
|
239
|
+
# For az://, endpoint already contains the container name.
|
|
240
|
+
if self.kind in ["wasbs", "wasb"]:
|
|
241
|
+
container = self.storage_options.get("container")
|
|
242
|
+
if container:
|
|
243
|
+
key = Path(container, key).as_posix()
|
|
244
|
+
else:
|
|
245
|
+
# If no container found, use endpoint (might be hostname, but better than nothing)
|
|
246
|
+
key = Path(self.endpoint, key).as_posix()
|
|
247
|
+
else:
|
|
248
|
+
key = Path(self.endpoint, key).as_posix()
|
|
169
249
|
return key
|
|
170
250
|
|
|
171
251
|
def upload(self, key, src_path):
|
|
@@ -229,18 +309,27 @@ class AzureBlobStore(DataStore):
|
|
|
229
309
|
st = self.storage_options
|
|
230
310
|
service = "blob"
|
|
231
311
|
primary_url = None
|
|
232
|
-
|
|
312
|
+
|
|
313
|
+
# Parse connection string (fills account_name/account_key or SAS)
|
|
314
|
+
connection_string = st.get("connection_string")
|
|
315
|
+
if connection_string:
|
|
233
316
|
primary_url, _, parsed_credential = parse_connection_str(
|
|
234
|
-
|
|
317
|
+
connection_string, credential=None, service=service
|
|
235
318
|
)
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
319
|
+
|
|
320
|
+
if isinstance(parsed_credential, str):
|
|
321
|
+
# SharedAccessSignature as raw string
|
|
322
|
+
parsed_credential = {"sas_token": parsed_credential}
|
|
323
|
+
|
|
324
|
+
for key in ["account_name", "account_key", "sas_token"]:
|
|
325
|
+
if parsed_value := parsed_credential.get(key):
|
|
326
|
+
# Only check for conflicts if storage options has a non-empty value for this key
|
|
327
|
+
existing_value = st.get(key)
|
|
328
|
+
if existing_value and existing_value != parsed_value:
|
|
240
329
|
if key == "account_name":
|
|
241
330
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
242
|
-
f"Storage option for '{key}' is '{
|
|
243
|
-
|
|
331
|
+
f"Storage option for '{key}' is '{existing_value}', "
|
|
332
|
+
f"which does not match corresponding connection string '{parsed_value}'"
|
|
244
333
|
)
|
|
245
334
|
else:
|
|
246
335
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -249,57 +338,83 @@ class AzureBlobStore(DataStore):
|
|
|
249
338
|
st[key] = parsed_value
|
|
250
339
|
|
|
251
340
|
account_name = st.get("account_name")
|
|
341
|
+
# Derive host (prefer connection string primary URL)
|
|
252
342
|
if primary_url:
|
|
253
343
|
if primary_url.startswith("http://"):
|
|
254
344
|
primary_url = primary_url[len("http://") :]
|
|
255
345
|
if primary_url.startswith("https://"):
|
|
256
346
|
primary_url = primary_url[len("https://") :]
|
|
257
|
-
|
|
347
|
+
# Remove any path components from the host
|
|
348
|
+
host = primary_url.split("/")[0]
|
|
258
349
|
elif account_name:
|
|
259
350
|
host = f"{account_name}.{service}.core.windows.net"
|
|
260
351
|
else:
|
|
352
|
+
# nothing to configure yet
|
|
261
353
|
return res
|
|
262
354
|
|
|
263
|
-
|
|
355
|
+
host = host.rstrip("/")
|
|
356
|
+
|
|
357
|
+
# Account key (optional; WASB supports it)
|
|
358
|
+
if "account_key" in st and st["account_key"]:
|
|
264
359
|
res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
|
|
265
360
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
"org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
|
|
270
|
-
)
|
|
271
|
-
if "client_id" in st:
|
|
272
|
-
res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
|
|
273
|
-
"client_id"
|
|
274
|
-
]
|
|
275
|
-
if "client_secret" in st:
|
|
276
|
-
res[f"spark.hadoop.fs.azure.account.oauth2.client.secret.{host}"] = st[
|
|
277
|
-
"client_secret"
|
|
278
|
-
]
|
|
279
|
-
if "tenant_id" in st:
|
|
280
|
-
tenant_id = st["tenant_id"]
|
|
281
|
-
res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
|
|
282
|
-
f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
|
|
283
|
-
)
|
|
361
|
+
# --- WASB + SAS (container-scoped key; no provider classes needed) ---
|
|
362
|
+
if "sas_token" in st and st["sas_token"]:
|
|
363
|
+
sas = st["sas_token"].lstrip("?")
|
|
284
364
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
365
|
+
container = st.get("container")
|
|
366
|
+
|
|
367
|
+
if container:
|
|
368
|
+
# fs.azure.sas.<container>.<account>.blob.core.windows.net = <sas>
|
|
369
|
+
res[f"spark.hadoop.fs.azure.sas.{container}.{host}"] = sas
|
|
370
|
+
|
|
371
|
+
else:
|
|
372
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
373
|
+
"Container name is required for WASB SAS. "
|
|
374
|
+
"Set self.endpoint or storage_options['container']."
|
|
375
|
+
)
|
|
291
376
|
return res
|
|
292
377
|
|
|
293
378
|
@property
|
|
294
379
|
def spark_url(self):
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
380
|
+
# Build: wasbs://<container>@<host>
|
|
381
|
+
st = self.storage_options
|
|
382
|
+
service = "blob"
|
|
383
|
+
|
|
384
|
+
container = st.get("container")
|
|
385
|
+
|
|
386
|
+
if not container:
|
|
387
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
388
|
+
"Container name is required to build the WASB URL. "
|
|
389
|
+
"Set storage_options['container'] or use datastore profile with container specified."
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Prefer host from connection string; else synthesize from account_name
|
|
393
|
+
host = None
|
|
394
|
+
account_name = st.get("account_name")
|
|
395
|
+
connection_string = st.get("connection_string")
|
|
396
|
+
|
|
397
|
+
if connection_string:
|
|
398
|
+
with contextlib.suppress(Exception):
|
|
399
|
+
primary_url, _, _ = parse_connection_str(
|
|
400
|
+
connection_string, credential=None, service=service
|
|
401
|
+
)
|
|
402
|
+
if primary_url.startswith("http://"):
|
|
403
|
+
primary_url = primary_url[len("http://") :]
|
|
404
|
+
if primary_url.startswith("https://"):
|
|
405
|
+
primary_url = primary_url[len("https://") :]
|
|
406
|
+
# Remove any path components from the host
|
|
407
|
+
host = primary_url.split("/")[0].rstrip("/")
|
|
408
|
+
if not host and account_name:
|
|
409
|
+
host = f"{account_name}.{service}.core.windows.net"
|
|
410
|
+
|
|
411
|
+
# For wasbs:// URLs where endpoint is already the host
|
|
412
|
+
if not host and self.kind in ["wasbs", "wasb"] and hasattr(self, "endpoint"):
|
|
413
|
+
host = getattr(self, "endpoint", None)
|
|
414
|
+
|
|
415
|
+
if not host:
|
|
416
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
417
|
+
"account_name is required (or provide a connection_string) to build the WASB URL."
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
return f"wasbs://{container}@{host}"
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -38,6 +38,8 @@ from ..utils import DB_SCHEMA, RunKeys
|
|
|
38
38
|
from .base import DataItem, DataStore, HttpStore
|
|
39
39
|
from .filestore import FileStore
|
|
40
40
|
from .inmem import InMemoryStore
|
|
41
|
+
from .model_provider.huggingface_provider import HuggingFaceProvider
|
|
42
|
+
from .model_provider.mock_model_provider import MockModelProvider
|
|
41
43
|
from .model_provider.openai_provider import OpenAIProvider
|
|
42
44
|
from .store_resources import get_store_resource, is_store_uri
|
|
43
45
|
from .v3io import V3ioStore
|
|
@@ -102,8 +104,11 @@ def schema_to_store(schema) -> DataStore.__subclasses__():
|
|
|
102
104
|
def schema_to_model_provider(
|
|
103
105
|
schema: str, raise_missing_schema_exception=True
|
|
104
106
|
) -> type[ModelProvider]:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
+
schema_dict = {
|
|
108
|
+
"openai": OpenAIProvider,
|
|
109
|
+
"huggingface": HuggingFaceProvider,
|
|
110
|
+
"mock": MockModelProvider,
|
|
111
|
+
}
|
|
107
112
|
provider_class = schema_dict.get(schema, None)
|
|
108
113
|
if not provider_class:
|
|
109
114
|
if raise_missing_schema_exception:
|
|
@@ -247,7 +252,7 @@ class StoreManager:
|
|
|
247
252
|
|
|
248
253
|
if schema == "ds":
|
|
249
254
|
datastore_profile = datastore_profile_read(url, project_name, secrets)
|
|
250
|
-
secrets = merge(secrets or {}, datastore_profile.secrets() or {})
|
|
255
|
+
secrets = merge({}, secrets or {}, datastore_profile.secrets() or {})
|
|
251
256
|
url = datastore_profile.url(subpath)
|
|
252
257
|
schema, endpoint, parsed_url = parse_url(url)
|
|
253
258
|
subpath = parsed_url.path
|
|
@@ -281,7 +286,7 @@ class StoreManager:
|
|
|
281
286
|
endpoint, subpath
|
|
282
287
|
)
|
|
283
288
|
remote_client = remote_client_class(
|
|
284
|
-
self, schema, cache_key,
|
|
289
|
+
self, schema, cache_key, endpoint, secrets=secrets, **kwargs
|
|
285
290
|
)
|
|
286
291
|
if not secrets and not mlrun.config.is_running_as_api():
|
|
287
292
|
cache[cache_key] = remote_client
|
|
@@ -19,6 +19,7 @@ import typing
|
|
|
19
19
|
from urllib.parse import ParseResult, urlparse
|
|
20
20
|
|
|
21
21
|
import pydantic.v1
|
|
22
|
+
from deprecated import deprecated
|
|
22
23
|
from mergedeep import merge
|
|
23
24
|
|
|
24
25
|
import mlrun
|
|
@@ -138,6 +139,15 @@ class ConfigProfile(DatastoreProfile):
|
|
|
138
139
|
return res
|
|
139
140
|
|
|
140
141
|
|
|
142
|
+
# TODO: Remove in 1.12.0
|
|
143
|
+
@deprecated(
|
|
144
|
+
version="1.10.0",
|
|
145
|
+
reason=(
|
|
146
|
+
"This class is deprecated from mlrun 1.10.0, and will be removed in 1.12.0. "
|
|
147
|
+
"Use `DatastoreProfileKafkaStream` instead."
|
|
148
|
+
),
|
|
149
|
+
category=FutureWarning,
|
|
150
|
+
)
|
|
141
151
|
class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
142
152
|
type: str = pydantic.v1.Field("kafka_target")
|
|
143
153
|
_private_attributes = "kwargs_private"
|
|
@@ -158,8 +168,8 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
|
158
168
|
return attributes
|
|
159
169
|
|
|
160
170
|
|
|
161
|
-
class
|
|
162
|
-
type: str = pydantic.v1.Field("
|
|
171
|
+
class DatastoreProfileKafkaStream(DatastoreProfile):
|
|
172
|
+
type: str = pydantic.v1.Field("kafka_stream")
|
|
163
173
|
_private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
|
|
164
174
|
brokers: typing.Union[str, list[str]]
|
|
165
175
|
topics: typing.Union[str, list[str]]
|
|
@@ -198,6 +208,19 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
|
198
208
|
return attributes
|
|
199
209
|
|
|
200
210
|
|
|
211
|
+
# TODO: Remove in 1.12.0
|
|
212
|
+
@deprecated(
|
|
213
|
+
version="1.10.0",
|
|
214
|
+
reason=(
|
|
215
|
+
"This class is deprecated from mlrun 1.10.0, and will be removed in 1.12.0. "
|
|
216
|
+
"Use `DatastoreProfileKafkaStream` instead."
|
|
217
|
+
),
|
|
218
|
+
category=FutureWarning,
|
|
219
|
+
)
|
|
220
|
+
class DatastoreProfileKafkaSource(DatastoreProfileKafkaStream):
|
|
221
|
+
type: str = pydantic.v1.Field("kafka_source")
|
|
222
|
+
|
|
223
|
+
|
|
201
224
|
class DatastoreProfileV3io(DatastoreProfile):
|
|
202
225
|
type: str = pydantic.v1.Field("v3io")
|
|
203
226
|
v3io_access_key: typing.Optional[str] = None
|
|
@@ -232,7 +255,7 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
232
255
|
if self.secret_key:
|
|
233
256
|
res["AWS_SECRET_ACCESS_KEY"] = self.secret_key
|
|
234
257
|
if self.endpoint_url:
|
|
235
|
-
res["
|
|
258
|
+
res["AWS_ENDPOINT_URL_S3"] = self.endpoint_url
|
|
236
259
|
if self.force_non_anonymous:
|
|
237
260
|
res["S3_NON_ANONYMOUS"] = self.force_non_anonymous
|
|
238
261
|
if self.profile_name:
|
|
@@ -333,7 +356,9 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
333
356
|
# in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
|
|
334
357
|
subpath = subpath[1:]
|
|
335
358
|
if self.bucket:
|
|
336
|
-
return
|
|
359
|
+
return (
|
|
360
|
+
f"gcs://{self.bucket}/{subpath}" if subpath else f"gcs://{self.bucket}"
|
|
361
|
+
)
|
|
337
362
|
else:
|
|
338
363
|
return f"gcs://{subpath}"
|
|
339
364
|
|
|
@@ -370,7 +395,11 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
370
395
|
# in azure the path after schema is starts with container, wherefore it should not start with "/".
|
|
371
396
|
subpath = subpath[1:]
|
|
372
397
|
if self.container:
|
|
373
|
-
return
|
|
398
|
+
return (
|
|
399
|
+
f"az://{self.container}/{subpath}"
|
|
400
|
+
if subpath
|
|
401
|
+
else f"az://{self.container}"
|
|
402
|
+
)
|
|
374
403
|
else:
|
|
375
404
|
return f"az://{subpath}"
|
|
376
405
|
|
|
@@ -486,6 +515,31 @@ class OpenAIProfile(DatastoreProfile):
|
|
|
486
515
|
return f"{self.type}://{subpath.lstrip('/')}"
|
|
487
516
|
|
|
488
517
|
|
|
518
|
+
class HuggingFaceProfile(DatastoreProfile):
|
|
519
|
+
type: str = pydantic.v1.Field("huggingface")
|
|
520
|
+
_private_attributes = ("token", "model_kwargs")
|
|
521
|
+
task: typing.Optional[str] = None
|
|
522
|
+
token: typing.Optional[str] = None
|
|
523
|
+
device: typing.Optional[typing.Union[int, str]] = None
|
|
524
|
+
device_map: typing.Union[str, dict[str, typing.Union[int, str]], None] = None
|
|
525
|
+
trust_remote_code: bool = None
|
|
526
|
+
model_kwargs: typing.Optional[dict[str, typing.Any]] = None
|
|
527
|
+
|
|
528
|
+
def secrets(self) -> dict:
|
|
529
|
+
keys = {
|
|
530
|
+
"HF_TASK": self.task,
|
|
531
|
+
"HF_TOKEN": self.token,
|
|
532
|
+
"HF_DEVICE": self.device,
|
|
533
|
+
"HF_DEVICE_MAP": self.device_map,
|
|
534
|
+
"HF_TRUST_REMOTE_CODE": self.trust_remote_code,
|
|
535
|
+
"HF_MODEL_KWARGS": self.model_kwargs,
|
|
536
|
+
}
|
|
537
|
+
return {k: v for k, v in keys.items() if v}
|
|
538
|
+
|
|
539
|
+
def url(self, subpath):
|
|
540
|
+
return f"{self.type}://{subpath.lstrip('/')}"
|
|
541
|
+
|
|
542
|
+
|
|
489
543
|
_DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
|
|
490
544
|
"v3io": DatastoreProfileV3io,
|
|
491
545
|
"s3": DatastoreProfileS3,
|
|
@@ -493,6 +547,7 @@ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
|
|
|
493
547
|
"basic": DatastoreProfileBasic,
|
|
494
548
|
"kafka_target": DatastoreProfileKafkaTarget,
|
|
495
549
|
"kafka_source": DatastoreProfileKafkaSource,
|
|
550
|
+
"kafka_stream": DatastoreProfileKafkaStream,
|
|
496
551
|
"dbfs": DatastoreProfileDBFS,
|
|
497
552
|
"gcs": DatastoreProfileGCS,
|
|
498
553
|
"az": DatastoreProfileAzureBlob,
|
|
@@ -500,6 +555,7 @@ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
|
|
|
500
555
|
"taosws": DatastoreProfileTDEngine,
|
|
501
556
|
"config": ConfigProfile,
|
|
502
557
|
"openai": OpenAIProfile,
|
|
558
|
+
"huggingface": HuggingFaceProfile,
|
|
503
559
|
}
|
|
504
560
|
|
|
505
561
|
|