mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/llm_prompt.py +8 -1
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/hub.py +25 -18
- mlrun/common/schemas/model_monitoring/constants.py +1 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
- mlrun/config.py +2 -3
- mlrun/datastore/__init__.py +2 -2
- mlrun/datastore/azure_blob.py +66 -43
- mlrun/datastore/datastore_profile.py +35 -5
- mlrun/datastore/model_provider/huggingface_provider.py +122 -30
- mlrun/datastore/model_provider/model_provider.py +62 -4
- mlrun/datastore/model_provider/openai_provider.py +114 -43
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/db/base.py +15 -1
- mlrun/db/httpdb.py +17 -6
- mlrun/db/nopdb.py +14 -0
- mlrun/k8s_utils.py +0 -14
- mlrun/model_monitoring/api.py +2 -2
- mlrun/model_monitoring/applications/base.py +37 -10
- mlrun/model_monitoring/applications/context.py +1 -4
- mlrun/model_monitoring/controller.py +15 -5
- mlrun/model_monitoring/db/_schedules.py +2 -4
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +3 -1
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -0
- mlrun/model_monitoring/helpers.py +5 -5
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/project.py +33 -29
- mlrun/runtimes/base.py +0 -3
- mlrun/runtimes/mounts.py +15 -2
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +11 -2
- mlrun/runtimes/nuclio/function.py +10 -0
- mlrun/runtimes/nuclio/serving.py +4 -0
- mlrun/runtimes/pod.py +153 -11
- mlrun/runtimes/utils.py +22 -5
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +26 -14
- mlrun/serving/states.py +3 -3
- mlrun/serving/system_steps.py +52 -29
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +5 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/METADATA +24 -23
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/RECORD +50 -50
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc26.dist-info}/top_level.txt +0 -0
mlrun/artifacts/llm_prompt.py
CHANGED
|
@@ -62,12 +62,19 @@ class LLMPromptArtifactSpec(ArtifactSpec):
|
|
|
62
62
|
parent_uri=model_artifact.uri
|
|
63
63
|
if isinstance(model_artifact, model_art.ModelArtifact)
|
|
64
64
|
else model_artifact,
|
|
65
|
+
format=kwargs.pop("format", "") or "json",
|
|
65
66
|
**kwargs,
|
|
66
67
|
)
|
|
67
68
|
|
|
68
69
|
self.prompt_template = prompt_template
|
|
69
70
|
self.prompt_legend = prompt_legend
|
|
70
|
-
|
|
71
|
+
if model_configuration is not None and not isinstance(
|
|
72
|
+
model_configuration, dict
|
|
73
|
+
):
|
|
74
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
75
|
+
"LLMPromptArtifact model_configuration must be a dictionary or None"
|
|
76
|
+
)
|
|
77
|
+
self.model_configuration = model_configuration or {}
|
|
71
78
|
self.description = description
|
|
72
79
|
self._model_artifact = (
|
|
73
80
|
model_artifact
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import sys
|
|
16
16
|
import typing
|
|
17
|
+
from datetime import datetime
|
|
17
18
|
|
|
18
19
|
import mlrun.common
|
|
19
20
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
@@ -24,6 +25,7 @@ BinCounts = typing.NewType("BinCounts", list[int])
|
|
|
24
25
|
BinEdges = typing.NewType("BinEdges", list[float])
|
|
25
26
|
|
|
26
27
|
_MAX_FLOAT = sys.float_info.max
|
|
28
|
+
logger = mlrun.utils.create_logger(level="info", name="mm_helpers")
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
def parse_model_endpoint_project_prefix(path: str, project_name: str):
|
|
@@ -87,3 +89,87 @@ def pad_features_hist(feature_stats: FeatureStats) -> None:
|
|
|
87
89
|
for feature in feature_stats.values():
|
|
88
90
|
if hist_key in feature:
|
|
89
91
|
pad_hist(Histogram(feature[hist_key]))
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_model_endpoints_creation_task_status(
|
|
95
|
+
server,
|
|
96
|
+
) -> tuple[
|
|
97
|
+
mlrun.common.schemas.BackgroundTaskState,
|
|
98
|
+
typing.Optional[datetime],
|
|
99
|
+
typing.Optional[set[str]],
|
|
100
|
+
]:
|
|
101
|
+
background_task = None
|
|
102
|
+
background_task_state = mlrun.common.schemas.BackgroundTaskState.running
|
|
103
|
+
background_task_check_timestamp = None
|
|
104
|
+
model_endpoint_uids = None
|
|
105
|
+
try:
|
|
106
|
+
background_task = mlrun.get_run_db().get_project_background_task(
|
|
107
|
+
server.project, server.model_endpoint_creation_task_name
|
|
108
|
+
)
|
|
109
|
+
background_task_check_timestamp = mlrun.utils.now_date()
|
|
110
|
+
log_background_task_state(
|
|
111
|
+
server, background_task.status.state, background_task_check_timestamp
|
|
112
|
+
)
|
|
113
|
+
background_task_state = background_task.status.state
|
|
114
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
115
|
+
logger.warning(
|
|
116
|
+
"Model endpoint creation task not found listing model endpoints",
|
|
117
|
+
project=server.project,
|
|
118
|
+
task_name=server.model_endpoint_creation_task_name,
|
|
119
|
+
)
|
|
120
|
+
if background_task is None:
|
|
121
|
+
model_endpoints = mlrun.get_run_db().list_model_endpoints(
|
|
122
|
+
project=server.project,
|
|
123
|
+
function_name=server.function_name,
|
|
124
|
+
function_tag=server.function_tag,
|
|
125
|
+
tsdb_metrics=False,
|
|
126
|
+
)
|
|
127
|
+
if model_endpoints:
|
|
128
|
+
model_endpoint_uids = {
|
|
129
|
+
endpoint.metadata.uid for endpoint in model_endpoints.endpoints
|
|
130
|
+
}
|
|
131
|
+
logger.info(
|
|
132
|
+
"Model endpoints found after background task not found, model monitoring will monitor "
|
|
133
|
+
"events",
|
|
134
|
+
project=server.project,
|
|
135
|
+
function_name=server.function_name,
|
|
136
|
+
function_tag=server.function_tag,
|
|
137
|
+
uids=model_endpoint_uids,
|
|
138
|
+
)
|
|
139
|
+
background_task_state = mlrun.common.schemas.BackgroundTaskState.succeeded
|
|
140
|
+
else:
|
|
141
|
+
logger.warning(
|
|
142
|
+
"Model endpoints not found after background task not found, model monitoring will not "
|
|
143
|
+
"monitor events",
|
|
144
|
+
project=server.project,
|
|
145
|
+
function_name=server.function_name,
|
|
146
|
+
function_tag=server.function_tag,
|
|
147
|
+
)
|
|
148
|
+
background_task_state = mlrun.common.schemas.BackgroundTaskState.failed
|
|
149
|
+
return background_task_state, background_task_check_timestamp, model_endpoint_uids
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def log_background_task_state(
|
|
153
|
+
server,
|
|
154
|
+
background_task_state: mlrun.common.schemas.BackgroundTaskState,
|
|
155
|
+
background_task_check_timestamp: typing.Optional[datetime],
|
|
156
|
+
):
|
|
157
|
+
logger.info(
|
|
158
|
+
"Checking model endpoint creation task status",
|
|
159
|
+
task_name=server.model_endpoint_creation_task_name,
|
|
160
|
+
)
|
|
161
|
+
if (
|
|
162
|
+
background_task_state
|
|
163
|
+
in mlrun.common.schemas.BackgroundTaskState.terminal_states()
|
|
164
|
+
):
|
|
165
|
+
logger.info(
|
|
166
|
+
f"Model endpoint creation task completed with state {background_task_state}"
|
|
167
|
+
)
|
|
168
|
+
else: # in progress
|
|
169
|
+
logger.info(
|
|
170
|
+
f"Model endpoint creation task is still in progress with the current state: "
|
|
171
|
+
f"{background_task_state}. Events will not be monitored for the next "
|
|
172
|
+
f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
|
|
173
|
+
function_name=server.function.name,
|
|
174
|
+
background_task_check_timestamp=background_task_check_timestamp.isoformat(),
|
|
175
|
+
)
|
mlrun/common/schemas/hub.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from datetime import datetime, timezone
|
|
16
16
|
from typing import Optional
|
|
17
17
|
|
|
18
|
+
import deepdiff
|
|
18
19
|
from pydantic.v1 import BaseModel, Extra, Field
|
|
19
20
|
|
|
20
21
|
import mlrun.common.types
|
|
@@ -36,9 +37,9 @@ class HubObjectMetadata(BaseModel):
|
|
|
36
37
|
extra = Extra.allow
|
|
37
38
|
|
|
38
39
|
|
|
39
|
-
# Currently only functions are supported. Will add more in the future.
|
|
40
40
|
class HubSourceType(mlrun.common.types.StrEnum):
|
|
41
41
|
functions = "functions"
|
|
42
|
+
modules = "modules"
|
|
42
43
|
|
|
43
44
|
|
|
44
45
|
# Sources-related objects
|
|
@@ -46,7 +47,6 @@ class HubSourceSpec(ObjectSpec):
|
|
|
46
47
|
path: str # URL to base directory, should include schema (s3://, etc...)
|
|
47
48
|
channel: str
|
|
48
49
|
credentials: Optional[dict] = {}
|
|
49
|
-
object_type: HubSourceType = Field(HubSourceType.functions, const=True)
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
class HubSource(BaseModel):
|
|
@@ -55,11 +55,11 @@ class HubSource(BaseModel):
|
|
|
55
55
|
spec: HubSourceSpec
|
|
56
56
|
status: Optional[ObjectStatus] = ObjectStatus(state="created")
|
|
57
57
|
|
|
58
|
-
def get_full_uri(self, relative_path):
|
|
59
|
-
return f"{self.spec.path}/{
|
|
58
|
+
def get_full_uri(self, relative_path, object_type):
|
|
59
|
+
return f"{self.spec.path}/{object_type}/{self.spec.channel}/{relative_path}"
|
|
60
60
|
|
|
61
|
-
def get_catalog_uri(self):
|
|
62
|
-
return self.get_full_uri(mlrun.mlconf.hub.catalog_filename)
|
|
61
|
+
def get_catalog_uri(self, object_type):
|
|
62
|
+
return self.get_full_uri(mlrun.mlconf.hub.catalog_filename, object_type)
|
|
63
63
|
|
|
64
64
|
@classmethod
|
|
65
65
|
def generate_default_source(cls):
|
|
@@ -78,11 +78,23 @@ class HubSource(BaseModel):
|
|
|
78
78
|
spec=HubSourceSpec(
|
|
79
79
|
path=mlrun.mlconf.hub.default_source.url,
|
|
80
80
|
channel=mlrun.mlconf.hub.default_source.channel,
|
|
81
|
-
object_type=HubSourceType(mlrun.mlconf.hub.default_source.object_type),
|
|
82
81
|
),
|
|
83
82
|
status=ObjectStatus(state="created"),
|
|
84
83
|
)
|
|
85
84
|
|
|
85
|
+
def diff(self, another_source: "HubSource") -> dict:
|
|
86
|
+
"""
|
|
87
|
+
Compare this HubSource with another one.
|
|
88
|
+
Returns a dict of differences (metadata, spec, status).
|
|
89
|
+
"""
|
|
90
|
+
exclude_paths = [
|
|
91
|
+
"root['metadata']['updated']",
|
|
92
|
+
"root['metadata']['created']",
|
|
93
|
+
]
|
|
94
|
+
return deepdiff.DeepDiff(
|
|
95
|
+
self.dict(), another_source.dict(), exclude_paths=exclude_paths
|
|
96
|
+
)
|
|
97
|
+
|
|
86
98
|
|
|
87
99
|
last_source_index = -1
|
|
88
100
|
|
|
@@ -94,21 +106,16 @@ class IndexedHubSource(BaseModel):
|
|
|
94
106
|
|
|
95
107
|
# Item-related objects
|
|
96
108
|
class HubItemMetadata(HubObjectMetadata):
|
|
97
|
-
source: HubSourceType =
|
|
109
|
+
source: HubSourceType = HubSourceType.functions
|
|
98
110
|
version: str
|
|
99
111
|
tag: Optional[str]
|
|
100
112
|
|
|
101
113
|
def get_relative_path(self) -> str:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
return f"{modified_name}/{version}/"
|
|
108
|
-
else:
|
|
109
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
110
|
-
f"Bad source for hub item - {self.source}"
|
|
111
|
-
)
|
|
114
|
+
# This is needed since the hub deployment script modifies the paths to use _ instead of -.
|
|
115
|
+
modified_name = self.name.replace("-", "_")
|
|
116
|
+
# Prefer using the tag if exists. Otherwise, use version.
|
|
117
|
+
version = self.tag or self.version
|
|
118
|
+
return f"{modified_name}/{version}/"
|
|
112
119
|
|
|
113
120
|
|
|
114
121
|
class HubItemSpec(ObjectSpec):
|
|
@@ -331,6 +331,7 @@ class EndpointType(IntEnum):
|
|
|
331
331
|
class EndpointMode(IntEnum):
|
|
332
332
|
REAL_TIME = 0
|
|
333
333
|
BATCH = 1
|
|
334
|
+
BATCH_LEGACY = 2 # legacy batch mode, used for endpoints created through the batch inference job
|
|
334
335
|
|
|
335
336
|
|
|
336
337
|
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
@@ -119,7 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
119
119
|
project: constr(regex=PROJECT_PATTERN)
|
|
120
120
|
endpoint_type: EndpointType = EndpointType.NODE_EP
|
|
121
121
|
uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
|
|
122
|
-
mode: EndpointMode =
|
|
122
|
+
mode: Optional[EndpointMode] = None
|
|
123
123
|
|
|
124
124
|
@classmethod
|
|
125
125
|
def mutable_fields(cls):
|
|
@@ -131,6 +131,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
131
131
|
return str(v)
|
|
132
132
|
return v
|
|
133
133
|
|
|
134
|
+
@validator("mode", pre=True, always=True)
|
|
135
|
+
def _set_mode_based_on_endpoint_type(cls, v, values): # noqa: N805
|
|
136
|
+
if v is None:
|
|
137
|
+
if values.get("endpoint_type") == EndpointType.BATCH_EP:
|
|
138
|
+
return EndpointMode.BATCH_LEGACY
|
|
139
|
+
else:
|
|
140
|
+
return EndpointMode.REAL_TIME
|
|
141
|
+
return v
|
|
142
|
+
|
|
134
143
|
|
|
135
144
|
class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
136
145
|
model_class: Optional[str] = ""
|
mlrun/config.py
CHANGED
|
@@ -718,7 +718,6 @@ default_config = {
|
|
|
718
718
|
"name": "default",
|
|
719
719
|
"description": "MLRun global function hub",
|
|
720
720
|
"url": "https://mlrun.github.io/marketplace",
|
|
721
|
-
"object_type": "functions",
|
|
722
721
|
"channel": "master",
|
|
723
722
|
},
|
|
724
723
|
},
|
|
@@ -1000,9 +999,9 @@ class Config:
|
|
|
1000
999
|
)
|
|
1001
1000
|
|
|
1002
1001
|
@staticmethod
|
|
1003
|
-
def
|
|
1002
|
+
def get_default_hub_source_url_prefix(object_type) -> str:
|
|
1004
1003
|
default_source = config.hub.default_source
|
|
1005
|
-
return f"{default_source.url}/{
|
|
1004
|
+
return f"{default_source.url}/{object_type}/{default_source.channel}/"
|
|
1006
1005
|
|
|
1007
1006
|
@staticmethod
|
|
1008
1007
|
def decode_base64_config_and_load_to_object(
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -43,7 +43,7 @@ import storey
|
|
|
43
43
|
|
|
44
44
|
import mlrun.datastore.wasbfs
|
|
45
45
|
from mlrun.datastore.datastore_profile import (
|
|
46
|
-
|
|
46
|
+
DatastoreProfileKafkaStream,
|
|
47
47
|
DatastoreProfileKafkaTarget,
|
|
48
48
|
DatastoreProfileV3io,
|
|
49
49
|
)
|
|
@@ -123,7 +123,7 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
123
123
|
)
|
|
124
124
|
if isinstance(
|
|
125
125
|
datastore_profile,
|
|
126
|
-
(
|
|
126
|
+
(DatastoreProfileKafkaStream, DatastoreProfileKafkaTarget),
|
|
127
127
|
):
|
|
128
128
|
attributes = datastore_profile.attributes()
|
|
129
129
|
brokers = attributes.pop("brokers", None)
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -229,18 +229,25 @@ class AzureBlobStore(DataStore):
|
|
|
229
229
|
st = self.storage_options
|
|
230
230
|
service = "blob"
|
|
231
231
|
primary_url = None
|
|
232
|
-
|
|
232
|
+
|
|
233
|
+
# Parse connection string (fills account_name/account_key or SAS)
|
|
234
|
+
connection_string = st.get("connection_string")
|
|
235
|
+
if connection_string:
|
|
233
236
|
primary_url, _, parsed_credential = parse_connection_str(
|
|
234
|
-
|
|
237
|
+
connection_string, credential=None, service=service
|
|
235
238
|
)
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
+
|
|
240
|
+
if isinstance(parsed_credential, str):
|
|
241
|
+
# SharedAccessSignature as raw string
|
|
242
|
+
parsed_credential = {"sas_token": parsed_credential}
|
|
243
|
+
|
|
244
|
+
for key in ["account_name", "account_key", "sas_token"]:
|
|
245
|
+
if parsed_value := parsed_credential.get(key):
|
|
239
246
|
if key in st and st[key] != parsed_value:
|
|
240
247
|
if key == "account_name":
|
|
241
248
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
242
|
-
f"Storage option for '{key}' is '{st[key]}'
|
|
243
|
-
|
|
249
|
+
f"Storage option for '{key}' is '{st[key]}', "
|
|
250
|
+
f"which does not match corresponding connection string '{parsed_value}'"
|
|
244
251
|
)
|
|
245
252
|
else:
|
|
246
253
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -249,6 +256,7 @@ class AzureBlobStore(DataStore):
|
|
|
249
256
|
st[key] = parsed_value
|
|
250
257
|
|
|
251
258
|
account_name = st.get("account_name")
|
|
259
|
+
# Derive host (prefer connection string primary URL)
|
|
252
260
|
if primary_url:
|
|
253
261
|
if primary_url.startswith("http://"):
|
|
254
262
|
primary_url = primary_url[len("http://") :]
|
|
@@ -258,48 +266,63 @@ class AzureBlobStore(DataStore):
|
|
|
258
266
|
elif account_name:
|
|
259
267
|
host = f"{account_name}.{service}.core.windows.net"
|
|
260
268
|
else:
|
|
269
|
+
# nothing to configure yet
|
|
261
270
|
return res
|
|
262
271
|
|
|
263
|
-
|
|
272
|
+
host = host.rstrip("/")
|
|
273
|
+
|
|
274
|
+
# Account key (optional; WASB supports it)
|
|
275
|
+
if "account_key" in st and st["account_key"]:
|
|
264
276
|
res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
|
|
265
277
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
|
|
273
|
-
"client_id"
|
|
274
|
-
]
|
|
275
|
-
if "client_secret" in st:
|
|
276
|
-
res[f"spark.hadoop.fs.azure.account.oauth2.client.secret.{host}"] = st[
|
|
277
|
-
"client_secret"
|
|
278
|
-
]
|
|
279
|
-
if "tenant_id" in st:
|
|
280
|
-
tenant_id = st["tenant_id"]
|
|
281
|
-
res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
|
|
282
|
-
f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
|
|
283
|
-
)
|
|
278
|
+
# --- WASB + SAS (container-scoped key; no provider classes needed) ---
|
|
279
|
+
if "sas_token" in st and st["sas_token"]:
|
|
280
|
+
sas = st["sas_token"].lstrip("?")
|
|
281
|
+
if container := getattr(self, "endpoint", None) or st.get("container"):
|
|
282
|
+
# fs.azure.sas.<container>.<account>.blob.core.windows.net = <sas>
|
|
283
|
+
res[f"spark.hadoop.fs.azure.sas.{container}.{host}"] = sas
|
|
284
284
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
|
|
285
|
+
else:
|
|
286
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
287
|
+
"Container name is required for WASB SAS. "
|
|
288
|
+
"Set self.endpoint or storage_options['container']."
|
|
289
|
+
)
|
|
291
290
|
return res
|
|
292
291
|
|
|
293
292
|
@property
|
|
294
293
|
def spark_url(self):
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
294
|
+
# Build: wasbs://<container>@<host>
|
|
295
|
+
st = self.storage_options
|
|
296
|
+
service = "blob"
|
|
297
|
+
|
|
298
|
+
container = getattr(self, "endpoint", None) or st.get("container")
|
|
299
|
+
if not container:
|
|
300
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
301
|
+
"Container is required to build the WASB URL "
|
|
302
|
+
"(self.endpoint or storage_options['container'])."
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Prefer host from connection string; else synthesize from account_name
|
|
306
|
+
host = None
|
|
307
|
+
account_name = st.get("account_name")
|
|
308
|
+
connection_string = st.get("connection_string")
|
|
309
|
+
|
|
310
|
+
if connection_string:
|
|
311
|
+
primary_url, _, _ = parse_connection_str(
|
|
312
|
+
connection_string, credential=None, service=service
|
|
313
|
+
)
|
|
314
|
+
if primary_url.startswith("http://"):
|
|
315
|
+
primary_url = primary_url[len("http://") :]
|
|
316
|
+
if primary_url.startswith("https://"):
|
|
317
|
+
primary_url = primary_url[len("https://") :]
|
|
318
|
+
host = primary_url.rstrip("/")
|
|
319
|
+
|
|
320
|
+
if not host and account_name:
|
|
321
|
+
host = f"{account_name}.{service}.core.windows.net"
|
|
322
|
+
|
|
323
|
+
if not host:
|
|
324
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
325
|
+
"account_name is required (or provide a connection_string) to build the WASB URL."
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
return f"wasbs://{container}@{host}"
|
|
@@ -19,6 +19,7 @@ import typing
|
|
|
19
19
|
from urllib.parse import ParseResult, urlparse
|
|
20
20
|
|
|
21
21
|
import pydantic.v1
|
|
22
|
+
from deprecated import deprecated
|
|
22
23
|
from mergedeep import merge
|
|
23
24
|
|
|
24
25
|
import mlrun
|
|
@@ -138,6 +139,15 @@ class ConfigProfile(DatastoreProfile):
|
|
|
138
139
|
return res
|
|
139
140
|
|
|
140
141
|
|
|
142
|
+
# TODO: Remove in 1.12.0
|
|
143
|
+
@deprecated(
|
|
144
|
+
version="1.10.0",
|
|
145
|
+
reason=(
|
|
146
|
+
"This class is deprecated from mlrun 1.10.0, and will be removed in 1.12.0. "
|
|
147
|
+
"Use `DatastoreProfileKafkaStream` instead."
|
|
148
|
+
),
|
|
149
|
+
category=FutureWarning,
|
|
150
|
+
)
|
|
141
151
|
class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
142
152
|
type: str = pydantic.v1.Field("kafka_target")
|
|
143
153
|
_private_attributes = "kwargs_private"
|
|
@@ -158,8 +168,8 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
|
158
168
|
return attributes
|
|
159
169
|
|
|
160
170
|
|
|
161
|
-
class
|
|
162
|
-
type: str = pydantic.v1.Field("
|
|
171
|
+
class DatastoreProfileKafkaStream(DatastoreProfile):
|
|
172
|
+
type: str = pydantic.v1.Field("kafka_stream")
|
|
163
173
|
_private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
|
|
164
174
|
brokers: typing.Union[str, list[str]]
|
|
165
175
|
topics: typing.Union[str, list[str]]
|
|
@@ -198,6 +208,19 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
|
198
208
|
return attributes
|
|
199
209
|
|
|
200
210
|
|
|
211
|
+
# TODO: Remove in 1.12.0
|
|
212
|
+
@deprecated(
|
|
213
|
+
version="1.10.0",
|
|
214
|
+
reason=(
|
|
215
|
+
"This class is deprecated from mlrun 1.10.0, and will be removed in 1.12.0. "
|
|
216
|
+
"Use `DatastoreProfileKafkaStream` instead."
|
|
217
|
+
),
|
|
218
|
+
category=FutureWarning,
|
|
219
|
+
)
|
|
220
|
+
class DatastoreProfileKafkaSource(DatastoreProfileKafkaStream):
|
|
221
|
+
type: str = pydantic.v1.Field("kafka_source")
|
|
222
|
+
|
|
223
|
+
|
|
201
224
|
class DatastoreProfileV3io(DatastoreProfile):
|
|
202
225
|
type: str = pydantic.v1.Field("v3io")
|
|
203
226
|
v3io_access_key: typing.Optional[str] = None
|
|
@@ -232,7 +255,7 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
232
255
|
if self.secret_key:
|
|
233
256
|
res["AWS_SECRET_ACCESS_KEY"] = self.secret_key
|
|
234
257
|
if self.endpoint_url:
|
|
235
|
-
res["
|
|
258
|
+
res["AWS_ENDPOINT_URL_S3"] = self.endpoint_url
|
|
236
259
|
if self.force_non_anonymous:
|
|
237
260
|
res["S3_NON_ANONYMOUS"] = self.force_non_anonymous
|
|
238
261
|
if self.profile_name:
|
|
@@ -333,7 +356,9 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
333
356
|
# in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
|
|
334
357
|
subpath = subpath[1:]
|
|
335
358
|
if self.bucket:
|
|
336
|
-
return
|
|
359
|
+
return (
|
|
360
|
+
f"gcs://{self.bucket}/{subpath}" if subpath else f"gcs://{self.bucket}"
|
|
361
|
+
)
|
|
337
362
|
else:
|
|
338
363
|
return f"gcs://{subpath}"
|
|
339
364
|
|
|
@@ -370,7 +395,11 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
370
395
|
# in azure the path after schema is starts with container, wherefore it should not start with "/".
|
|
371
396
|
subpath = subpath[1:]
|
|
372
397
|
if self.container:
|
|
373
|
-
return
|
|
398
|
+
return (
|
|
399
|
+
f"az://{self.container}/{subpath}"
|
|
400
|
+
if subpath
|
|
401
|
+
else f"az://{self.container}"
|
|
402
|
+
)
|
|
374
403
|
else:
|
|
375
404
|
return f"az://{subpath}"
|
|
376
405
|
|
|
@@ -518,6 +547,7 @@ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
|
|
|
518
547
|
"basic": DatastoreProfileBasic,
|
|
519
548
|
"kafka_target": DatastoreProfileKafkaTarget,
|
|
520
549
|
"kafka_source": DatastoreProfileKafkaSource,
|
|
550
|
+
"kafka_stream": DatastoreProfileKafkaStream,
|
|
521
551
|
"dbfs": DatastoreProfileDBFS,
|
|
522
552
|
"gcs": DatastoreProfileGCS,
|
|
523
553
|
"az": DatastoreProfileAzureBlob,
|