mlrun 1.6.2rc5__py3-none-any.whl → 1.6.2rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/db/sql_session.py +0 -3
- mlrun/common/model_monitoring/helpers.py +2 -4
- mlrun/common/schemas/__init__.py +0 -1
- mlrun/common/schemas/project.py +0 -2
- mlrun/config.py +11 -30
- mlrun/datastore/azure_blob.py +9 -9
- mlrun/datastore/base.py +44 -22
- mlrun/datastore/google_cloud_storage.py +6 -6
- mlrun/datastore/v3io.py +46 -70
- mlrun/db/base.py +0 -18
- mlrun/db/httpdb.py +25 -28
- mlrun/execution.py +3 -3
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +5 -10
- mlrun/kfpops.py +10 -19
- mlrun/model.py +0 -5
- mlrun/model_monitoring/api.py +8 -8
- mlrun/model_monitoring/batch.py +1 -1
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +1 -0
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +1 -3
- mlrun/platforms/iguazio.py +65 -6
- mlrun/projects/pipelines.py +11 -21
- mlrun/projects/project.py +46 -65
- mlrun/runtimes/base.py +1 -24
- mlrun/runtimes/function.py +9 -9
- mlrun/runtimes/kubejob.py +3 -5
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/pod.py +3 -3
- mlrun/runtimes/serving.py +3 -3
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +2 -4
- mlrun/utils/async_http.py +3 -3
- mlrun/utils/helpers.py +0 -8
- mlrun/utils/http.py +3 -3
- mlrun/utils/logger.py +2 -2
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/METADATA +16 -14
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/RECORD +47 -48
- mlrun/common/schemas/common.py +0 -40
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/LICENSE +0 -0
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/WHEEL +0 -0
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.2rc5.dist-info → mlrun-1.6.2rc6.dist-info}/top_level.txt +0 -0
mlrun/common/db/sql_session.py
CHANGED
|
@@ -63,12 +63,9 @@ def _init_engine(dsn=None):
|
|
|
63
63
|
max_overflow = config.httpdb.db.connections_pool_max_overflow
|
|
64
64
|
if max_overflow is None:
|
|
65
65
|
max_overflow = config.httpdb.max_workers
|
|
66
|
-
|
|
67
66
|
kwargs = {
|
|
68
67
|
"pool_size": pool_size,
|
|
69
68
|
"max_overflow": max_overflow,
|
|
70
|
-
"pool_pre_ping": config.httpdb.db.connections_pool_pre_ping,
|
|
71
|
-
"pool_recycle": config.httpdb.db.connections_pool_recycle,
|
|
72
69
|
}
|
|
73
70
|
engine = create_engine(dsn, **kwargs)
|
|
74
71
|
_engines[dsn] = engine
|
|
@@ -82,15 +82,13 @@ def parse_monitoring_stream_path(
|
|
|
82
82
|
if application_name is None:
|
|
83
83
|
stream_uri = (
|
|
84
84
|
mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
|
|
85
|
-
project=project
|
|
85
|
+
project=project
|
|
86
86
|
)
|
|
87
87
|
)
|
|
88
88
|
else:
|
|
89
89
|
stream_uri = (
|
|
90
90
|
mlrun.mlconf.model_endpoint_monitoring.default_http_sink_app.format(
|
|
91
|
-
project=project,
|
|
92
|
-
application_name=application_name,
|
|
93
|
-
namespace=mlrun.mlconf.namespace,
|
|
91
|
+
project=project, application_name=application_name
|
|
94
92
|
)
|
|
95
93
|
)
|
|
96
94
|
return stream_uri
|
mlrun/common/schemas/__init__.py
CHANGED
mlrun/common/schemas/project.py
CHANGED
|
@@ -19,7 +19,6 @@ import pydantic
|
|
|
19
19
|
|
|
20
20
|
import mlrun.common.types
|
|
21
21
|
|
|
22
|
-
from .common import ImageBuilder
|
|
23
22
|
from .object import ObjectKind, ObjectStatus
|
|
24
23
|
|
|
25
24
|
|
|
@@ -86,7 +85,6 @@ class ProjectSpec(pydantic.BaseModel):
|
|
|
86
85
|
desired_state: typing.Optional[ProjectDesiredState] = ProjectDesiredState.online
|
|
87
86
|
custom_packagers: typing.Optional[typing.List[typing.Tuple[str, bool]]] = None
|
|
88
87
|
default_image: typing.Optional[str] = None
|
|
89
|
-
build: typing.Optional[ImageBuilder] = None
|
|
90
88
|
|
|
91
89
|
class Config:
|
|
92
90
|
extra = pydantic.Extra.allow
|
mlrun/config.py
CHANGED
|
@@ -288,12 +288,6 @@ default_config = {
|
|
|
288
288
|
"state": "online",
|
|
289
289
|
"retry_api_call_on_exception": "enabled",
|
|
290
290
|
"http_connection_timeout_keep_alive": 11,
|
|
291
|
-
# http client used by httpdb
|
|
292
|
-
"http": {
|
|
293
|
-
# when True, the client will verify the server's TLS
|
|
294
|
-
# set to False for backwards compatibility.
|
|
295
|
-
"verify": False,
|
|
296
|
-
},
|
|
297
291
|
"db": {
|
|
298
292
|
"commit_retry_timeout": 30,
|
|
299
293
|
"commit_retry_interval": 3,
|
|
@@ -312,11 +306,7 @@ default_config = {
|
|
|
312
306
|
# default is 16MB, max 1G, for more info https://dev.mysql.com/doc/refman/8.0/en/packet-too-large.html
|
|
313
307
|
"max_allowed_packet": 64000000, # 64MB
|
|
314
308
|
},
|
|
315
|
-
#
|
|
316
|
-
"connections_pool_pre_ping": True,
|
|
317
|
-
# this setting causes the pool to recycle connections after the given number of seconds has passed
|
|
318
|
-
"connections_pool_recycle": 60 * 60,
|
|
319
|
-
# None defaults to httpdb.max_workers
|
|
309
|
+
# None will set this to be equal to the httpdb.max_workers
|
|
320
310
|
"connections_pool_size": None,
|
|
321
311
|
"connections_pool_max_overflow": None,
|
|
322
312
|
# below is a db-specific configuration
|
|
@@ -444,7 +434,7 @@ default_config = {
|
|
|
444
434
|
# pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
|
|
445
435
|
# git+https://github.com/mlrun/mlrun@development. by default uses the version
|
|
446
436
|
"mlrun_version_specifier": "",
|
|
447
|
-
"kaniko_image": "gcr.io/kaniko-project/executor:v1.
|
|
437
|
+
"kaniko_image": "gcr.io/kaniko-project/executor:v1.8.0", # kaniko builder image
|
|
448
438
|
"kaniko_init_container_image": "alpine:3.18",
|
|
449
439
|
# image for kaniko init container when docker registry is ECR
|
|
450
440
|
"kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
|
|
@@ -491,8 +481,8 @@ default_config = {
|
|
|
491
481
|
"offline_storage_path": "model-endpoints/{kind}",
|
|
492
482
|
# Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
|
|
493
483
|
# when the user is working in CE environment and has not provided any stream path.
|
|
494
|
-
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.
|
|
495
|
-
"default_http_sink_app": "http://nuclio-{project}-{application_name}.
|
|
484
|
+
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
|
|
485
|
+
"default_http_sink_app": "http://nuclio-{project}-{application_name}.mlrun.svc.cluster.local:8080",
|
|
496
486
|
"batch_processing_function_branch": "master",
|
|
497
487
|
"parquet_batching_max_events": 10_000,
|
|
498
488
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
@@ -612,7 +602,7 @@ default_config = {
|
|
|
612
602
|
"workflows": {
|
|
613
603
|
"default_workflow_runner_name": "workflow-runner-{}",
|
|
614
604
|
# Default timeout seconds for retrieving workflow id after execution:
|
|
615
|
-
"timeouts": {"local": 120, "kfp": 30, "remote":
|
|
605
|
+
"timeouts": {"local": 120, "kfp": 30, "remote": 30},
|
|
616
606
|
},
|
|
617
607
|
"log_collector": {
|
|
618
608
|
"address": "localhost:8282",
|
|
@@ -964,10 +954,10 @@ class Config:
|
|
|
964
954
|
with_gpu = (
|
|
965
955
|
with_gpu_requests if requirement == "requests" else with_gpu_limits
|
|
966
956
|
)
|
|
967
|
-
resources[
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
957
|
+
resources[
|
|
958
|
+
requirement
|
|
959
|
+
] = self.get_default_function_pod_requirement_resources(
|
|
960
|
+
requirement, with_gpu
|
|
971
961
|
)
|
|
972
962
|
return resources
|
|
973
963
|
|
|
@@ -1350,21 +1340,12 @@ def read_env(env=None, prefix=env_prefix):
|
|
|
1350
1340
|
if igz_domain:
|
|
1351
1341
|
config["ui_url"] = f"https://mlrun-ui.{igz_domain}"
|
|
1352
1342
|
|
|
1353
|
-
if
|
|
1343
|
+
if config.get("log_level"):
|
|
1354
1344
|
import mlrun.utils.logger
|
|
1355
1345
|
|
|
1356
1346
|
# logger created (because of imports mess) before the config is loaded (in tests), therefore we're changing its
|
|
1357
1347
|
# level manually
|
|
1358
|
-
mlrun.utils.logger.set_logger_level(log_level)
|
|
1359
|
-
|
|
1360
|
-
if log_formatter_name := config.get("log_formatter"):
|
|
1361
|
-
import mlrun.utils.logger
|
|
1362
|
-
|
|
1363
|
-
log_formatter = mlrun.utils.create_formatter_instance(
|
|
1364
|
-
mlrun.utils.FormatterKinds(log_formatter_name)
|
|
1365
|
-
)
|
|
1366
|
-
mlrun.utils.logger.get_handler("default").setFormatter(log_formatter)
|
|
1367
|
-
|
|
1348
|
+
mlrun.utils.logger.set_logger_level(config["log_level"])
|
|
1368
1349
|
# The default function pod resource values are of type str; however, when reading from environment variable numbers,
|
|
1369
1350
|
# it converts them to type int if contains only number, so we want to convert them to str.
|
|
1370
1351
|
_convert_resources_to_str(config)
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -175,9 +175,9 @@ class AzureBlobStore(DataStore):
|
|
|
175
175
|
|
|
176
176
|
if "client_secret" in st or "client_id" in st or "tenant_id" in st:
|
|
177
177
|
res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
|
|
178
|
-
res[
|
|
179
|
-
"
|
|
180
|
-
|
|
178
|
+
res[
|
|
179
|
+
f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"
|
|
180
|
+
] = "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
|
|
181
181
|
if "client_id" in st:
|
|
182
182
|
res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
|
|
183
183
|
"client_id"
|
|
@@ -188,14 +188,14 @@ class AzureBlobStore(DataStore):
|
|
|
188
188
|
]
|
|
189
189
|
if "tenant_id" in st:
|
|
190
190
|
tenant_id = st["tenant_id"]
|
|
191
|
-
res[
|
|
192
|
-
f"
|
|
193
|
-
|
|
191
|
+
res[
|
|
192
|
+
f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"
|
|
193
|
+
] = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
|
|
194
194
|
|
|
195
195
|
if "sas_token" in st:
|
|
196
196
|
res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
|
|
197
|
-
res[
|
|
198
|
-
"
|
|
199
|
-
|
|
197
|
+
res[
|
|
198
|
+
f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"
|
|
199
|
+
] = "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
|
|
200
200
|
res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
|
|
201
201
|
return res
|
mlrun/datastore/base.py
CHANGED
|
@@ -27,7 +27,6 @@ import requests
|
|
|
27
27
|
import urllib3
|
|
28
28
|
from deprecated import deprecated
|
|
29
29
|
|
|
30
|
-
import mlrun.config
|
|
31
30
|
import mlrun.errors
|
|
32
31
|
from mlrun.errors import err_to_str
|
|
33
32
|
from mlrun.utils import StorePrefix, is_ipython, logger
|
|
@@ -35,6 +34,10 @@ from mlrun.utils import StorePrefix, is_ipython, logger
|
|
|
35
34
|
from .store_resources import is_store_uri, parse_store_uri
|
|
36
35
|
from .utils import filter_df_start_end_time, select_columns_from_df
|
|
37
36
|
|
|
37
|
+
verify_ssl = False
|
|
38
|
+
if not verify_ssl:
|
|
39
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
40
|
+
|
|
38
41
|
|
|
39
42
|
class FileStats:
|
|
40
43
|
def __init__(self, size, modified, content_type=None):
|
|
@@ -640,6 +643,45 @@ def basic_auth_header(user, password):
|
|
|
640
643
|
return {"Authorization": authstr}
|
|
641
644
|
|
|
642
645
|
|
|
646
|
+
def http_get(url, headers=None, auth=None):
|
|
647
|
+
try:
|
|
648
|
+
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
649
|
+
except OSError as exc:
|
|
650
|
+
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
651
|
+
|
|
652
|
+
mlrun.errors.raise_for_status(response)
|
|
653
|
+
|
|
654
|
+
return response.content
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def http_head(url, headers=None, auth=None):
|
|
658
|
+
try:
|
|
659
|
+
response = requests.head(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
660
|
+
except OSError as exc:
|
|
661
|
+
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
662
|
+
|
|
663
|
+
mlrun.errors.raise_for_status(response)
|
|
664
|
+
|
|
665
|
+
return response.headers
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def http_put(url, data, headers=None, auth=None, session=None):
|
|
669
|
+
try:
|
|
670
|
+
put_api = session.put if session else requests.put
|
|
671
|
+
response = put_api(
|
|
672
|
+
url, data=data, headers=headers, auth=auth, verify=verify_ssl
|
|
673
|
+
)
|
|
674
|
+
except OSError as exc:
|
|
675
|
+
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}") from exc
|
|
676
|
+
|
|
677
|
+
mlrun.errors.raise_for_status(response)
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def http_upload(url, file_path, headers=None, auth=None):
|
|
681
|
+
with open(file_path, "rb") as data:
|
|
682
|
+
http_put(url, data, headers, auth)
|
|
683
|
+
|
|
684
|
+
|
|
643
685
|
class HttpStore(DataStore):
|
|
644
686
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
645
687
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
@@ -667,7 +709,7 @@ class HttpStore(DataStore):
|
|
|
667
709
|
raise ValueError("unimplemented")
|
|
668
710
|
|
|
669
711
|
def get(self, key, size=None, offset=0):
|
|
670
|
-
data =
|
|
712
|
+
data = http_get(self.url + self._join(key), self._headers, self.auth)
|
|
671
713
|
if offset:
|
|
672
714
|
data = data[offset:]
|
|
673
715
|
if size:
|
|
@@ -687,26 +729,6 @@ class HttpStore(DataStore):
|
|
|
687
729
|
f"schema as it is not secure and is not recommended."
|
|
688
730
|
)
|
|
689
731
|
|
|
690
|
-
def _http_get(
|
|
691
|
-
self,
|
|
692
|
-
url,
|
|
693
|
-
headers=None,
|
|
694
|
-
auth=None,
|
|
695
|
-
):
|
|
696
|
-
# import here to prevent import cycle
|
|
697
|
-
from mlrun.config import config as mlconf
|
|
698
|
-
|
|
699
|
-
verify_ssl = mlconf.httpdb.http.verify
|
|
700
|
-
try:
|
|
701
|
-
if not verify_ssl:
|
|
702
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
703
|
-
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
704
|
-
except OSError as exc:
|
|
705
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
706
|
-
|
|
707
|
-
mlrun.errors.raise_for_status(response)
|
|
708
|
-
return response.content
|
|
709
|
-
|
|
710
732
|
|
|
711
733
|
# This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
|
|
712
734
|
# Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.
|
|
@@ -147,13 +147,13 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
147
147
|
if "project_id" in credentials:
|
|
148
148
|
res["spark.hadoop.fs.gs.project.id"] = credentials["project_id"]
|
|
149
149
|
if "private_key_id" in credentials:
|
|
150
|
-
res[
|
|
151
|
-
|
|
152
|
-
|
|
150
|
+
res[
|
|
151
|
+
"spark.hadoop.fs.gs.auth.service.account.private.key.id"
|
|
152
|
+
] = credentials["private_key_id"]
|
|
153
153
|
if "private_key" in credentials:
|
|
154
|
-
res[
|
|
155
|
-
|
|
156
|
-
|
|
154
|
+
res[
|
|
155
|
+
"spark.hadoop.fs.gs.auth.service.account.private.key"
|
|
156
|
+
] = credentials["private_key"]
|
|
157
157
|
if "client_email" in credentials:
|
|
158
158
|
res["spark.hadoop.fs.gs.auth.service.account.email"] = credentials[
|
|
159
159
|
"client_email"
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -15,11 +15,12 @@
|
|
|
15
15
|
import mmap
|
|
16
16
|
import os
|
|
17
17
|
import time
|
|
18
|
+
from copy import deepcopy
|
|
18
19
|
from datetime import datetime
|
|
19
20
|
|
|
20
21
|
import fsspec
|
|
21
|
-
import
|
|
22
|
-
|
|
22
|
+
import requests
|
|
23
|
+
import v3io.dataplane
|
|
23
24
|
|
|
24
25
|
import mlrun
|
|
25
26
|
from mlrun.datastore.helpers import ONE_GB, ONE_MB
|
|
@@ -29,6 +30,11 @@ from .base import (
|
|
|
29
30
|
DataStore,
|
|
30
31
|
FileStats,
|
|
31
32
|
basic_auth_header,
|
|
33
|
+
get_range,
|
|
34
|
+
http_get,
|
|
35
|
+
http_head,
|
|
36
|
+
http_put,
|
|
37
|
+
http_upload,
|
|
32
38
|
)
|
|
33
39
|
|
|
34
40
|
V3IO_LOCAL_ROOT = "v3io"
|
|
@@ -41,18 +47,17 @@ class V3ioStore(DataStore):
|
|
|
41
47
|
|
|
42
48
|
self.headers = None
|
|
43
49
|
self.secure = self.kind == "v3ios"
|
|
44
|
-
|
|
45
|
-
token = self._get_secret_or_env("V3IO_ACCESS_KEY")
|
|
46
|
-
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
47
|
-
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
48
50
|
if self.endpoint.startswith("https://"):
|
|
49
51
|
self.endpoint = self.endpoint[len("https://") :]
|
|
50
52
|
self.secure = True
|
|
51
53
|
elif self.endpoint.startswith("http://"):
|
|
52
54
|
self.endpoint = self.endpoint[len("http://") :]
|
|
53
55
|
self.secure = False
|
|
54
|
-
|
|
55
|
-
|
|
56
|
+
|
|
57
|
+
token = self._get_secret_or_env("V3IO_ACCESS_KEY")
|
|
58
|
+
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
59
|
+
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
60
|
+
|
|
56
61
|
self.auth = None
|
|
57
62
|
self.token = token
|
|
58
63
|
if token:
|
|
@@ -60,16 +65,6 @@ class V3ioStore(DataStore):
|
|
|
60
65
|
elif username and password:
|
|
61
66
|
self.headers = basic_auth_header(username, password)
|
|
62
67
|
|
|
63
|
-
@staticmethod
|
|
64
|
-
def _do_object_request(function: callable, *args, **kwargs):
|
|
65
|
-
try:
|
|
66
|
-
return function(*args, **kwargs)
|
|
67
|
-
except HttpResponseError as http_response_error:
|
|
68
|
-
raise mlrun.errors.err_for_status_code(
|
|
69
|
-
status_code=http_response_error.status_code,
|
|
70
|
-
message=mlrun.errors.err_to_str(http_response_error),
|
|
71
|
-
)
|
|
72
|
-
|
|
73
68
|
@staticmethod
|
|
74
69
|
def uri_to_ipython(endpoint, subpath):
|
|
75
70
|
return V3IO_LOCAL_ROOT + subpath
|
|
@@ -96,19 +91,13 @@ class V3ioStore(DataStore):
|
|
|
96
91
|
|
|
97
92
|
def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
|
|
98
93
|
"""helper function for upload method, allows for controlling max_chunk_size in testing"""
|
|
99
|
-
container, path = split_path(self._join(key))
|
|
100
94
|
file_size = os.path.getsize(src_path) # in bytes
|
|
101
95
|
if file_size <= ONE_MB:
|
|
102
|
-
|
|
103
|
-
data = source_file.read()
|
|
104
|
-
self._do_object_request(
|
|
105
|
-
self.object.put,
|
|
106
|
-
container=container,
|
|
107
|
-
path=path,
|
|
108
|
-
body=data,
|
|
109
|
-
append=False,
|
|
110
|
-
)
|
|
96
|
+
http_upload(self.url + self._join(key), src_path, self.headers, None)
|
|
111
97
|
return
|
|
98
|
+
append_header = deepcopy(self.headers)
|
|
99
|
+
append_header["Range"] = "-1"
|
|
100
|
+
|
|
112
101
|
# chunk must be a multiple of the ALLOCATIONGRANULARITY
|
|
113
102
|
# https://docs.python.org/3/library/mmap.html
|
|
114
103
|
if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
|
|
@@ -125,13 +114,11 @@ class V3ioStore(DataStore):
|
|
|
125
114
|
access=mmap.ACCESS_READ,
|
|
126
115
|
offset=file_offset,
|
|
127
116
|
) as mmap_obj:
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
body=mmap_obj,
|
|
134
|
-
append=append,
|
|
117
|
+
http_put(
|
|
118
|
+
self.url + self._join(key),
|
|
119
|
+
mmap_obj,
|
|
120
|
+
append_header if file_offset else self.headers,
|
|
121
|
+
None,
|
|
135
122
|
)
|
|
136
123
|
file_offset += chunk_size
|
|
137
124
|
|
|
@@ -139,55 +126,43 @@ class V3ioStore(DataStore):
|
|
|
139
126
|
return self._upload(key, src_path)
|
|
140
127
|
|
|
141
128
|
def get(self, key, size=None, offset=0):
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
offset=offset,
|
|
148
|
-
num_bytes=size,
|
|
149
|
-
).body
|
|
129
|
+
headers = self.headers
|
|
130
|
+
if size or offset:
|
|
131
|
+
headers = deepcopy(headers)
|
|
132
|
+
headers["Range"] = get_range(size, offset)
|
|
133
|
+
return http_get(self.url + self._join(key), headers)
|
|
150
134
|
|
|
151
|
-
def _put(self, key, data,
|
|
135
|
+
def _put(self, key, data, max_chunk_size: int = ONE_GB):
|
|
152
136
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
153
|
-
container, path = split_path(self._join(key))
|
|
154
137
|
buffer_size = len(data) # in bytes
|
|
155
138
|
if buffer_size <= ONE_MB:
|
|
156
|
-
self.
|
|
157
|
-
self.object.put,
|
|
158
|
-
container=container,
|
|
159
|
-
path=path,
|
|
160
|
-
body=data,
|
|
161
|
-
append=append,
|
|
162
|
-
)
|
|
139
|
+
http_put(self.url + self._join(key), data, self.headers, None)
|
|
163
140
|
return
|
|
141
|
+
append_header = deepcopy(self.headers)
|
|
142
|
+
append_header["Range"] = "-1"
|
|
164
143
|
buffer_offset = 0
|
|
165
144
|
try:
|
|
166
145
|
data = memoryview(data)
|
|
167
146
|
except TypeError:
|
|
168
147
|
pass
|
|
169
148
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
149
|
+
with requests.Session() as requests_session:
|
|
150
|
+
while buffer_offset < buffer_size:
|
|
151
|
+
chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
|
|
152
|
+
http_put(
|
|
153
|
+
self.url + self._join(key),
|
|
154
|
+
data[buffer_offset : buffer_offset + chunk_size],
|
|
155
|
+
append_header if buffer_offset else self.headers,
|
|
156
|
+
None,
|
|
157
|
+
requests_session,
|
|
158
|
+
)
|
|
159
|
+
buffer_offset += chunk_size
|
|
181
160
|
|
|
182
161
|
def put(self, key, data, append=False):
|
|
183
|
-
return self._put(key, data
|
|
162
|
+
return self._put(key, data)
|
|
184
163
|
|
|
185
164
|
def stat(self, key):
|
|
186
|
-
|
|
187
|
-
response = self._do_object_request(
|
|
188
|
-
function=self.object.head, container=container, path=path
|
|
189
|
-
)
|
|
190
|
-
head = dict(response.headers)
|
|
165
|
+
head = http_head(self.url + self._join(key), self.headers)
|
|
191
166
|
size = int(head.get("Content-Length", "0"))
|
|
192
167
|
datestr = head.get("Last-Modified", "0")
|
|
193
168
|
modified = time.mktime(
|
|
@@ -196,6 +171,7 @@ class V3ioStore(DataStore):
|
|
|
196
171
|
return FileStats(size, modified)
|
|
197
172
|
|
|
198
173
|
def listdir(self, key):
|
|
174
|
+
v3io_client = v3io.dataplane.Client(endpoint=self.url, access_key=self.token)
|
|
199
175
|
container, subpath = split_path(self._join(key))
|
|
200
176
|
if not subpath.endswith("/"):
|
|
201
177
|
subpath += "/"
|
|
@@ -204,7 +180,7 @@ class V3ioStore(DataStore):
|
|
|
204
180
|
subpath_length = len(subpath) - 1
|
|
205
181
|
|
|
206
182
|
try:
|
|
207
|
-
response =
|
|
183
|
+
response = v3io_client.container.list(
|
|
208
184
|
container=container,
|
|
209
185
|
path=subpath,
|
|
210
186
|
get_all_attributes=False,
|
mlrun/db/base.py
CHANGED
|
@@ -677,21 +677,3 @@ class RunDBInterface(ABC):
|
|
|
677
677
|
self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
|
|
678
678
|
):
|
|
679
679
|
pass
|
|
680
|
-
|
|
681
|
-
def submit_workflow(
|
|
682
|
-
self,
|
|
683
|
-
project: str,
|
|
684
|
-
name: str,
|
|
685
|
-
workflow_spec: Union[
|
|
686
|
-
"mlrun.projects.pipelines.WorkflowSpec",
|
|
687
|
-
"mlrun.common.schemas.WorkflowSpec",
|
|
688
|
-
dict,
|
|
689
|
-
],
|
|
690
|
-
arguments: Optional[dict] = None,
|
|
691
|
-
artifact_path: Optional[str] = None,
|
|
692
|
-
source: Optional[str] = None,
|
|
693
|
-
run_name: Optional[str] = None,
|
|
694
|
-
namespace: Optional[str] = None,
|
|
695
|
-
notifications: list["mlrun.model.Notification"] = None,
|
|
696
|
-
) -> "mlrun.common.schemas.WorkflowResponse":
|
|
697
|
-
pass
|