arvados-python-client 3.0.0__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arvados-python-client-3.0.0/arvados_python_client.egg-info → arvados-python-client-3.1.0}/PKG-INFO +1 -1
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/__init__.py +31 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/basedirs.py +6 -4
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/http_to_keep.py +2 -4
- arvados-python-client-3.1.0/arvados/_version.py +1 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/arvfile.py +11 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/collection.py +73 -51
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/_util.py +39 -8
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/arv_copy.py +107 -43
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/put.py +5 -7
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/keep.py +10 -6
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/util.py +20 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados-v1-discovery.json +21 -1
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0/arvados_python_client.egg-info}/PKG-INFO +1 -1
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_version.py +1 -1
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/run_test_server.py +98 -50
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_copy.py +19 -8
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_put.py +5 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_basedirs.py +8 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_cmd_util.py +40 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_internal.py +41 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_keep_client.py +5 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_util.py +34 -0
- arvados-python-client-3.0.0/arvados/_version.py +0 -1
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/LICENSE-2.0.txt +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/MANIFEST.in +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/README.rst +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/__init__.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/diskcache.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/pycurl.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/report_template.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/streams.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/api.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/cache.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/__init__.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/federation_migrate.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/get.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/keepdocker.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/ls.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/run.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/ws.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/config.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/errors.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/events.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/logging.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/retry.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/safeapi.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/vocabulary.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/SOURCES.txt +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/dependency_links.txt +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/not-zip-safe +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/requires.txt +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/top_level.txt +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-copy +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-federation-migrate +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-get +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-keepdocker +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-ls +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-normalize +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-put +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-ws +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/discovery2pydoc.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/setup.cfg +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/setup.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/__init__.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/arvados_testutil.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/keepstub.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/manifest_examples.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/performance/__init__.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/performance/performance_profiler.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/performance/test_a_sample.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_api.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_get.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_keepdocker.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_ls.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_normalize.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_ws.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arvfile.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_benchmark_collections.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_collections.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_computed_permissions.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_config.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_errors.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_events.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_http_cache.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_http_to_keep.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_keep_locator.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_retry.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_retry_job_helpers.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_storage_classes.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_stream.py +0 -0
- {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_vocabulary.py +0 -0
|
@@ -10,10 +10,15 @@ time.
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
import functools
|
|
13
|
+
import operator
|
|
13
14
|
import re
|
|
14
15
|
import time
|
|
15
16
|
import warnings
|
|
16
17
|
|
|
18
|
+
import typing as t
|
|
19
|
+
|
|
20
|
+
HT = t.TypeVar('HT', bound=t.Hashable)
|
|
21
|
+
|
|
17
22
|
class Timer:
|
|
18
23
|
def __init__(self, verbose=False):
|
|
19
24
|
self.verbose = verbose
|
|
@@ -81,3 +86,29 @@ def deprecated(version=None, preferred=None):
|
|
|
81
86
|
deprecated_wrapper.__doc__ = docstring
|
|
82
87
|
return deprecated_wrapper
|
|
83
88
|
return deprecated_decorator
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def parse_seq(
|
|
92
|
+
s: str,
|
|
93
|
+
split: t.Callable[[str], t.Iterable[str]]=operator.methodcaller('split', ','),
|
|
94
|
+
clean: t.Callable[[str], str]=operator.methodcaller('strip'),
|
|
95
|
+
check: t.Callable[[str], bool]=bool,
|
|
96
|
+
) -> t.Iterator[str]:
|
|
97
|
+
"""Split, clean, and filter a string into multiple items
|
|
98
|
+
|
|
99
|
+
The default arguments split on commas, strip substrings, and skip empty
|
|
100
|
+
items.
|
|
101
|
+
"""
|
|
102
|
+
return (word for substr in split(s) if check(word := clean(substr)))
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def uniq(it: t.Iterable[HT]) -> t.Iterator[HT]:
|
|
106
|
+
"""Yield only unique items from an iterable
|
|
107
|
+
|
|
108
|
+
The items must be hashable.
|
|
109
|
+
"""
|
|
110
|
+
seen = set()
|
|
111
|
+
for item in it:
|
|
112
|
+
if item not in seen:
|
|
113
|
+
seen.add(item)
|
|
114
|
+
yield item
|
|
@@ -126,12 +126,14 @@ class BaseDirectories:
|
|
|
126
126
|
self._env = env
|
|
127
127
|
self._xdg_subdir = PurePath(xdg_subdir)
|
|
128
128
|
|
|
129
|
+
def search_paths(self) -> Iterator[Path]:
|
|
130
|
+
return itertools.chain(
|
|
131
|
+
self._spec.iter_systemd(self._env),
|
|
132
|
+
self._spec.iter_xdg(self._env, self._xdg_subdir))
|
|
133
|
+
|
|
129
134
|
def search(self, name: str) -> Iterator[Path]:
|
|
130
135
|
any_found = False
|
|
131
|
-
for search_path in
|
|
132
|
-
self._spec.iter_systemd(self._env),
|
|
133
|
-
self._spec.iter_xdg(self._env, self._xdg_subdir),
|
|
134
|
-
):
|
|
136
|
+
for search_path in self.search_paths():
|
|
135
137
|
path = search_path / name
|
|
136
138
|
if path.exists():
|
|
137
139
|
yield path
|
{arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/http_to_keep.py
RENAMED
|
@@ -16,6 +16,7 @@ import pycurl
|
|
|
16
16
|
|
|
17
17
|
import arvados
|
|
18
18
|
import arvados.collection
|
|
19
|
+
import arvados._internal
|
|
19
20
|
from .pycurl import PyCurlHelper
|
|
20
21
|
|
|
21
22
|
logger = logging.getLogger('arvados.http_import')
|
|
@@ -250,11 +251,8 @@ def check_cached_url(api, project_uuid, url, etags,
|
|
|
250
251
|
utcnow=datetime.datetime.utcnow,
|
|
251
252
|
varying_url_params="",
|
|
252
253
|
prefer_cached_downloads=False):
|
|
253
|
-
|
|
254
254
|
logger.info("Checking Keep for %s", url)
|
|
255
|
-
|
|
256
|
-
varying_params = [s.strip() for s in varying_url_params.split(",")]
|
|
257
|
-
|
|
255
|
+
varying_params = set(arvados._internal.parse_seq(varying_url_params))
|
|
258
256
|
parsed = urllib.parse.urlparse(url)
|
|
259
257
|
query = [q for q in urllib.parse.parse_qsl(parsed.query)
|
|
260
258
|
if q[0] not in varying_params]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '3.1.0'
|
|
@@ -23,8 +23,16 @@ from .errors import KeepWriteError, AssertionError, ArgumentError
|
|
|
23
23
|
from .keep import KeepLocator
|
|
24
24
|
from .retry import retry_method
|
|
25
25
|
|
|
26
|
+
ADD = "add"
|
|
27
|
+
"""Argument value for `Collection` methods to represent an added item"""
|
|
28
|
+
DEL = "del"
|
|
29
|
+
"""Argument value for `Collection` methods to represent a removed item"""
|
|
26
30
|
MOD = "mod"
|
|
31
|
+
"""Argument value for `Collection` methods to represent a modified item"""
|
|
32
|
+
TOK = "tok"
|
|
33
|
+
"""Argument value for `Collection` methods to represent an item with token differences"""
|
|
27
34
|
WRITE = "write"
|
|
35
|
+
"""Argument value for `Collection` methods to represent that a file was written to"""
|
|
28
36
|
|
|
29
37
|
_logger = logging.getLogger('arvados.arvfile')
|
|
30
38
|
|
|
@@ -841,6 +849,8 @@ class ArvadosFile(object):
|
|
|
841
849
|
def replace_contents(self, other):
|
|
842
850
|
"""Replace segments of this file with segments from another `ArvadosFile` object."""
|
|
843
851
|
|
|
852
|
+
eventtype = TOK if self == other else MOD
|
|
853
|
+
|
|
844
854
|
map_loc = {}
|
|
845
855
|
self._segments = []
|
|
846
856
|
for other_segment in other.segments():
|
|
@@ -857,6 +867,7 @@ class ArvadosFile(object):
|
|
|
857
867
|
self._segments.append(streams.Range(new_loc, other_segment.range_start, other_segment.range_size, other_segment.segment_offset))
|
|
858
868
|
|
|
859
869
|
self.set_committed(False)
|
|
870
|
+
self.parent.notify(eventtype, self.parent, self.name, (self, self))
|
|
860
871
|
|
|
861
872
|
def __eq__(self, other):
|
|
862
873
|
if other is self:
|
|
@@ -30,7 +30,7 @@ from stat import *
|
|
|
30
30
|
|
|
31
31
|
from ._internal import streams
|
|
32
32
|
from .api import ThreadSafeAPIClient
|
|
33
|
-
from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock
|
|
33
|
+
from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock, ADD, DEL, MOD, TOK, WRITE
|
|
34
34
|
from .keep import KeepLocator, KeepClient
|
|
35
35
|
import arvados.config as config
|
|
36
36
|
import arvados.errors as errors
|
|
@@ -58,14 +58,7 @@ else:
|
|
|
58
58
|
|
|
59
59
|
_logger = logging.getLogger('arvados.collection')
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
"""Argument value for `Collection` methods to represent an added item"""
|
|
63
|
-
DEL = "del"
|
|
64
|
-
"""Argument value for `Collection` methods to represent a removed item"""
|
|
65
|
-
MOD = "mod"
|
|
66
|
-
"""Argument value for `Collection` methods to represent a modified item"""
|
|
67
|
-
TOK = "tok"
|
|
68
|
-
"""Argument value for `Collection` methods to represent an item with token differences"""
|
|
61
|
+
|
|
69
62
|
FILE = "file"
|
|
70
63
|
"""`create_type` value for `Collection.find_or_create`"""
|
|
71
64
|
COLLECTION = "collection"
|
|
@@ -922,9 +915,12 @@ class RichCollectionBase(CollectionBase):
|
|
|
922
915
|
# Overwrite path with new item; this can happen if
|
|
923
916
|
# path was a file and is now a collection or vice versa
|
|
924
917
|
self.copy(final, path, overwrite=True)
|
|
925
|
-
|
|
926
|
-
# Local
|
|
927
|
-
#
|
|
918
|
+
elif event_type == MOD:
|
|
919
|
+
# Local doesn't match the "start" value or local
|
|
920
|
+
# is missing (presumably deleted) so save change
|
|
921
|
+
# to conflict file. Don't do this for TOK events
|
|
922
|
+
# which means the file didn't change but only had
|
|
923
|
+
# tokens updated.
|
|
928
924
|
self.copy(final, conflictpath)
|
|
929
925
|
elif event_type == DEL:
|
|
930
926
|
if local == initial:
|
|
@@ -992,8 +988,13 @@ class RichCollectionBase(CollectionBase):
|
|
|
992
988
|
was modified.
|
|
993
989
|
|
|
994
990
|
* item: arvados.arvfile.ArvadosFile |
|
|
995
|
-
arvados.collection.Subcollection ---
|
|
996
|
-
within `collection
|
|
991
|
+
arvados.collection.Subcollection --- For ADD events, the new
|
|
992
|
+
contents at `name` within `collection`; for DEL events, the
|
|
993
|
+
item that was removed. For MOD and TOK events, a 2-tuple of
|
|
994
|
+
the previous item and the new item (may be the same object
|
|
995
|
+
or different, depending on whether the action involved it
|
|
996
|
+
being modified in place or replaced).
|
|
997
|
+
|
|
997
998
|
"""
|
|
998
999
|
if self._callback:
|
|
999
1000
|
self._callback(event, collection, name, item)
|
|
@@ -1134,7 +1135,7 @@ class Collection(RichCollectionBase):
|
|
|
1134
1135
|
self._manifest_text = None
|
|
1135
1136
|
self._portable_data_hash = None
|
|
1136
1137
|
self._api_response = None
|
|
1137
|
-
self.
|
|
1138
|
+
self._token_refresh_timestamp = 0
|
|
1138
1139
|
|
|
1139
1140
|
self.lock = threading.RLock()
|
|
1140
1141
|
self.events = None
|
|
@@ -1200,20 +1201,6 @@ class Collection(RichCollectionBase):
|
|
|
1200
1201
|
def writable(self) -> bool:
|
|
1201
1202
|
return True
|
|
1202
1203
|
|
|
1203
|
-
@synchronized
|
|
1204
|
-
def known_past_version(
|
|
1205
|
-
self,
|
|
1206
|
-
modified_at_and_portable_data_hash: Tuple[Optional[str], Optional[str]]
|
|
1207
|
-
) -> bool:
|
|
1208
|
-
"""Indicate whether an API record for this collection has been seen before
|
|
1209
|
-
|
|
1210
|
-
As this collection object loads records from the API server, it records
|
|
1211
|
-
their `modified_at` and `portable_data_hash` fields. This method accepts
|
|
1212
|
-
a 2-tuple with values for those fields, and returns `True` if the
|
|
1213
|
-
combination was previously loaded.
|
|
1214
|
-
"""
|
|
1215
|
-
return modified_at_and_portable_data_hash in self._past_versions
|
|
1216
|
-
|
|
1217
1204
|
@synchronized
|
|
1218
1205
|
@retry_method
|
|
1219
1206
|
def update(
|
|
@@ -1245,23 +1232,61 @@ class Collection(RichCollectionBase):
|
|
|
1245
1232
|
the collection's API record from the API server. If not specified,
|
|
1246
1233
|
uses the `num_retries` provided when this instance was constructed.
|
|
1247
1234
|
"""
|
|
1235
|
+
|
|
1236
|
+
token_refresh_period = 60*60
|
|
1237
|
+
time_since_last_token_refresh = (time.time() - self._token_refresh_timestamp)
|
|
1238
|
+
upstream_response = None
|
|
1239
|
+
|
|
1248
1240
|
if other is None:
|
|
1249
1241
|
if self._manifest_locator is None:
|
|
1250
1242
|
raise errors.ArgumentError("`other` is None but collection does not have a manifest_locator uuid")
|
|
1251
|
-
|
|
1252
|
-
if
|
|
1253
|
-
response.get("portable_data_hash") != self.portable_data_hash()):
|
|
1254
|
-
# The record on the server is different from our current one, but we've seen it before,
|
|
1255
|
-
# so ignore it because it's already been merged.
|
|
1256
|
-
# However, if it's the same as our current record, proceed with the update, because we want to update
|
|
1257
|
-
# our tokens.
|
|
1243
|
+
|
|
1244
|
+
if re.match(arvados.util.portable_data_hash_pattern, self._manifest_locator) and time_since_last_token_refresh < token_refresh_period:
|
|
1258
1245
|
return
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
other = CollectionReader(
|
|
1262
|
-
|
|
1246
|
+
|
|
1247
|
+
upstream_response = self._my_api().collections().get(uuid=self._manifest_locator).execute(num_retries=num_retries)
|
|
1248
|
+
other = CollectionReader(upstream_response["manifest_text"])
|
|
1249
|
+
|
|
1250
|
+
if self.committed():
|
|
1251
|
+
# 1st case, no local changes, content is the same
|
|
1252
|
+
if self.portable_data_hash() == other.portable_data_hash() and time_since_last_token_refresh < token_refresh_period:
|
|
1253
|
+
# No difference in content. Remember the API record
|
|
1254
|
+
# (metadata such as name or properties may have changed)
|
|
1255
|
+
# but don't update the token refresh timestamp.
|
|
1256
|
+
if upstream_response is not None:
|
|
1257
|
+
self._remember_api_response(upstream_response)
|
|
1258
|
+
return
|
|
1259
|
+
|
|
1260
|
+
# 2nd case, no local changes, but either upstream changed
|
|
1261
|
+
# or we want to refresh tokens.
|
|
1262
|
+
|
|
1263
|
+
self.apply(self.diff(other))
|
|
1264
|
+
if upstream_response is not None:
|
|
1265
|
+
self._remember_api_response(upstream_response)
|
|
1266
|
+
self._update_token_timestamp()
|
|
1267
|
+
self.set_committed(True)
|
|
1268
|
+
return
|
|
1269
|
+
|
|
1270
|
+
# 3rd case, upstream changed, but we also have uncommitted
|
|
1271
|
+
# changes that we want to incorporate so they don't get lost.
|
|
1272
|
+
|
|
1273
|
+
# _manifest_text stores the text from last time we received a
|
|
1274
|
+
# record from the API server. This is the state of the
|
|
1275
|
+
# collection before our uncommitted changes.
|
|
1276
|
+
baseline = Collection(self._manifest_text)
|
|
1277
|
+
|
|
1278
|
+
# Get the set of changes between our baseline and the other
|
|
1279
|
+
# collection and apply them to self.
|
|
1280
|
+
#
|
|
1281
|
+
# If a file was modified in both 'self' and 'other', the
|
|
1282
|
+
# 'apply' method keeps the contents of 'self' and creates a
|
|
1283
|
+
# conflict file with the contents of 'other'.
|
|
1263
1284
|
self.apply(baseline.diff(other))
|
|
1264
|
-
|
|
1285
|
+
|
|
1286
|
+
# Remember the new baseline, changes to a file
|
|
1287
|
+
if upstream_response is not None:
|
|
1288
|
+
self._remember_api_response(upstream_response)
|
|
1289
|
+
|
|
1265
1290
|
|
|
1266
1291
|
@synchronized
|
|
1267
1292
|
def _my_api(self):
|
|
@@ -1295,7 +1320,11 @@ class Collection(RichCollectionBase):
|
|
|
1295
1320
|
|
|
1296
1321
|
def _remember_api_response(self, response):
|
|
1297
1322
|
self._api_response = response
|
|
1298
|
-
self.
|
|
1323
|
+
self._manifest_text = self._api_response['manifest_text']
|
|
1324
|
+
self._portable_data_hash = self._api_response['portable_data_hash']
|
|
1325
|
+
|
|
1326
|
+
def _update_token_timestamp(self):
|
|
1327
|
+
self._token_refresh_timestamp = time.time()
|
|
1299
1328
|
|
|
1300
1329
|
def _populate_from_api_server(self):
|
|
1301
1330
|
# As in KeepClient itself, we must wait until the last
|
|
@@ -1308,8 +1337,7 @@ class Collection(RichCollectionBase):
|
|
|
1308
1337
|
self._remember_api_response(self._my_api().collections().get(
|
|
1309
1338
|
uuid=self._manifest_locator).execute(
|
|
1310
1339
|
num_retries=self.num_retries))
|
|
1311
|
-
|
|
1312
|
-
self._portable_data_hash = self._api_response['portable_data_hash']
|
|
1340
|
+
|
|
1313
1341
|
# If not overriden via kwargs, we should try to load the
|
|
1314
1342
|
# replication_desired and storage_classes_desired from the API server
|
|
1315
1343
|
if self.replication_desired is None:
|
|
@@ -1534,8 +1562,6 @@ class Collection(RichCollectionBase):
|
|
|
1534
1562
|
uuid=self._manifest_locator,
|
|
1535
1563
|
body=body
|
|
1536
1564
|
).execute(num_retries=num_retries))
|
|
1537
|
-
self._manifest_text = self._api_response["manifest_text"]
|
|
1538
|
-
self._portable_data_hash = self._api_response["portable_data_hash"]
|
|
1539
1565
|
self.set_committed(True)
|
|
1540
1566
|
elif body:
|
|
1541
1567
|
self._remember_api_response(self._my_api().collections().update(
|
|
@@ -1654,12 +1680,7 @@ class Collection(RichCollectionBase):
|
|
|
1654
1680
|
body["preserve_version"] = preserve_version
|
|
1655
1681
|
|
|
1656
1682
|
self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries))
|
|
1657
|
-
text = self._api_response["manifest_text"]
|
|
1658
|
-
|
|
1659
1683
|
self._manifest_locator = self._api_response["uuid"]
|
|
1660
|
-
self._portable_data_hash = self._api_response["portable_data_hash"]
|
|
1661
|
-
|
|
1662
|
-
self._manifest_text = text
|
|
1663
1684
|
self.set_committed(True)
|
|
1664
1685
|
|
|
1665
1686
|
return text
|
|
@@ -1743,6 +1764,7 @@ class Collection(RichCollectionBase):
|
|
|
1743
1764
|
stream_name = None
|
|
1744
1765
|
state = STREAM_NAME
|
|
1745
1766
|
|
|
1767
|
+
self._update_token_timestamp()
|
|
1746
1768
|
self.set_committed(True)
|
|
1747
1769
|
|
|
1748
1770
|
@synchronized
|
|
@@ -3,14 +3,20 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
6
|
+
import dataclasses
|
|
6
7
|
import errno
|
|
7
8
|
import json
|
|
8
9
|
import logging
|
|
10
|
+
import operator
|
|
9
11
|
import os
|
|
10
12
|
import re
|
|
11
13
|
import signal
|
|
12
14
|
import sys
|
|
13
15
|
|
|
16
|
+
import typing as t
|
|
17
|
+
|
|
18
|
+
from .. import _internal
|
|
19
|
+
|
|
14
20
|
FILTER_STR_RE = re.compile(r'''
|
|
15
21
|
^\(
|
|
16
22
|
\ *(\w+)
|
|
@@ -19,16 +25,41 @@ FILTER_STR_RE = re.compile(r'''
|
|
|
19
25
|
\ *\)$
|
|
20
26
|
''', re.ASCII | re.VERBOSE)
|
|
21
27
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
28
|
+
T = t.TypeVar('T')
|
|
29
|
+
|
|
30
|
+
@dataclasses.dataclass(unsafe_hash=True)
|
|
31
|
+
class RangedValue(t.Generic[T]):
|
|
32
|
+
"""Validate that an argument string is within a valid range of values"""
|
|
33
|
+
parse_func: t.Callable[[str], T]
|
|
34
|
+
valid_range: t.Container[T]
|
|
35
|
+
|
|
36
|
+
def __call__(self, s: str) -> T:
|
|
37
|
+
value = self.parse_func(s)
|
|
38
|
+
if value in self.valid_range:
|
|
39
|
+
return value
|
|
40
|
+
else:
|
|
41
|
+
raise ValueError(f"{value!r} is not a valid value")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclasses.dataclass(unsafe_hash=True)
|
|
45
|
+
class UniqueSplit(t.Generic[T]):
|
|
46
|
+
"""Parse a string into a list of unique values"""
|
|
47
|
+
split: t.Callable[[str], t.Iterable[str]]=operator.methodcaller('split', ',')
|
|
48
|
+
clean: t.Callable[[str], str]=operator.methodcaller('strip')
|
|
49
|
+
check: t.Callable[[str], bool]=bool
|
|
50
|
+
|
|
51
|
+
def __call__(self, s: str) -> T:
|
|
52
|
+
return list(_internal.uniq(_internal.parse_seq(s, self.split, self.clean, self.check)))
|
|
53
|
+
|
|
27
54
|
|
|
28
55
|
retry_opt = argparse.ArgumentParser(add_help=False)
|
|
29
|
-
retry_opt.add_argument(
|
|
30
|
-
|
|
31
|
-
|
|
56
|
+
retry_opt.add_argument(
|
|
57
|
+
'--retries',
|
|
58
|
+
type=RangedValue(int, range(0, sys.maxsize)),
|
|
59
|
+
default=10,
|
|
60
|
+
help="""Maximum number of times to retry server requests that encounter
|
|
61
|
+
temporary failures (e.g., server down). Default %(default)r.
|
|
62
|
+
""")
|
|
32
63
|
|
|
33
64
|
def _ignore_error(error):
|
|
34
65
|
return None
|
|
@@ -33,6 +33,10 @@ import io
|
|
|
33
33
|
import json
|
|
34
34
|
import queue
|
|
35
35
|
import threading
|
|
36
|
+
import errno
|
|
37
|
+
|
|
38
|
+
import httplib2.error
|
|
39
|
+
import googleapiclient
|
|
36
40
|
|
|
37
41
|
import arvados
|
|
38
42
|
import arvados.config
|
|
@@ -40,6 +44,7 @@ import arvados.keep
|
|
|
40
44
|
import arvados.util
|
|
41
45
|
import arvados.commands._util as arv_cmd
|
|
42
46
|
import arvados.commands.keepdocker
|
|
47
|
+
from arvados.logging import log_handler
|
|
43
48
|
|
|
44
49
|
from arvados._internal import basedirs, http_to_keep
|
|
45
50
|
from arvados._version import __version__
|
|
@@ -48,6 +53,9 @@ COMMIT_HASH_RE = re.compile(r'^[0-9a-f]{1,40}$')
|
|
|
48
53
|
|
|
49
54
|
logger = logging.getLogger('arvados.arv-copy')
|
|
50
55
|
|
|
56
|
+
# Set this up so connection errors get logged.
|
|
57
|
+
googleapi_logger = logging.getLogger('googleapiclient.http')
|
|
58
|
+
|
|
51
59
|
# local_repo_dir records which git repositories from the Arvados source
|
|
52
60
|
# instance have been checked out locally during this run, and to which
|
|
53
61
|
# directories.
|
|
@@ -112,7 +120,18 @@ If not provided, will use the default client configuration from the environment
|
|
|
112
120
|
'--project-uuid', dest='project_uuid',
|
|
113
121
|
help='The UUID of the project at the destination to which the collection or workflow should be copied.')
|
|
114
122
|
copy_opts.add_argument(
|
|
115
|
-
'--
|
|
123
|
+
'--replication',
|
|
124
|
+
type=arv_cmd.RangedValue(int, range(1, sys.maxsize)),
|
|
125
|
+
metavar='N',
|
|
126
|
+
help="""
|
|
127
|
+
Number of replicas per storage class for the copied collections at the destination.
|
|
128
|
+
If not provided (or if provided with invalid value),
|
|
129
|
+
use the destination's default replication-level setting (if found),
|
|
130
|
+
or the fallback value 2.
|
|
131
|
+
""")
|
|
132
|
+
copy_opts.add_argument(
|
|
133
|
+
'--storage-classes',
|
|
134
|
+
type=arv_cmd.UniqueSplit(),
|
|
116
135
|
help='Comma separated list of storage classes to be used when saving data to the destinaton Arvados instance.')
|
|
117
136
|
copy_opts.add_argument("--varying-url-params", type=str, default="",
|
|
118
137
|
help="A comma separated list of URL query parameters that should be ignored when storing HTTP URLs in Keep.")
|
|
@@ -131,9 +150,6 @@ If not provided, will use the default client configuration from the environment
|
|
|
131
150
|
parents=[copy_opts, arv_cmd.retry_opt])
|
|
132
151
|
args = parser.parse_args()
|
|
133
152
|
|
|
134
|
-
if args.storage_classes:
|
|
135
|
-
args.storage_classes = [x for x in args.storage_classes.strip().replace(' ', '').split(',') if x]
|
|
136
|
-
|
|
137
153
|
if args.verbose:
|
|
138
154
|
logger.setLevel(logging.DEBUG)
|
|
139
155
|
else:
|
|
@@ -142,10 +158,29 @@ If not provided, will use the default client configuration from the environment
|
|
|
142
158
|
if not args.source_arvados and arvados.util.uuid_pattern.match(args.object_uuid):
|
|
143
159
|
args.source_arvados = args.object_uuid[:5]
|
|
144
160
|
|
|
161
|
+
if not args.destination_arvados and args.project_uuid:
|
|
162
|
+
args.destination_arvados = args.project_uuid[:5]
|
|
163
|
+
|
|
164
|
+
# Make sure errors trying to connect to clusters get logged.
|
|
165
|
+
googleapi_logger.setLevel(logging.WARN)
|
|
166
|
+
googleapi_logger.addHandler(log_handler)
|
|
167
|
+
|
|
145
168
|
# Create API clients for the source and destination instances
|
|
146
169
|
src_arv = api_for_instance(args.source_arvados, args.retries)
|
|
147
170
|
dst_arv = api_for_instance(args.destination_arvados, args.retries)
|
|
148
171
|
|
|
172
|
+
# Once we've successfully contacted the clusters, we probably
|
|
173
|
+
# don't want to see logging about retries (unless the user asked
|
|
174
|
+
# for verbose output).
|
|
175
|
+
if not args.verbose:
|
|
176
|
+
googleapi_logger.setLevel(logging.ERROR)
|
|
177
|
+
|
|
178
|
+
if src_arv.config()["ClusterID"] == dst_arv.config()["ClusterID"]:
|
|
179
|
+
logger.info("Copying within cluster %s", src_arv.config()["ClusterID"])
|
|
180
|
+
else:
|
|
181
|
+
logger.info("Source cluster is %s", src_arv.config()["ClusterID"])
|
|
182
|
+
logger.info("Destination cluster is %s", dst_arv.config()["ClusterID"])
|
|
183
|
+
|
|
149
184
|
if not args.project_uuid:
|
|
150
185
|
args.project_uuid = dst_arv.users().current().execute(num_retries=args.retries)["uuid"]
|
|
151
186
|
|
|
@@ -213,43 +248,64 @@ def set_src_owner_uuid(resource, uuid, args):
|
|
|
213
248
|
# configuration directory.
|
|
214
249
|
#
|
|
215
250
|
def api_for_instance(instance_name, num_retries):
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
dirs = basedirs.BaseDirectories('CONFIG')
|
|
224
|
-
config_file = next(dirs.search(f'{instance_name}.conf'), '')
|
|
251
|
+
msg = []
|
|
252
|
+
if instance_name:
|
|
253
|
+
if '/' in instance_name:
|
|
254
|
+
config_file = instance_name
|
|
255
|
+
else:
|
|
256
|
+
dirs = basedirs.BaseDirectories('CONFIG')
|
|
257
|
+
config_file = next(dirs.search(f'{instance_name}.conf'), '')
|
|
225
258
|
|
|
259
|
+
try:
|
|
260
|
+
cfg = arvados.config.load(config_file)
|
|
261
|
+
|
|
262
|
+
if 'ARVADOS_API_HOST' in cfg and 'ARVADOS_API_TOKEN' in cfg:
|
|
263
|
+
api_is_insecure = (
|
|
264
|
+
cfg.get('ARVADOS_API_HOST_INSECURE', '').lower() in set(
|
|
265
|
+
['1', 't', 'true', 'y', 'yes']))
|
|
266
|
+
return arvados.api('v1',
|
|
267
|
+
host=cfg['ARVADOS_API_HOST'],
|
|
268
|
+
token=cfg['ARVADOS_API_TOKEN'],
|
|
269
|
+
insecure=api_is_insecure,
|
|
270
|
+
num_retries=num_retries,
|
|
271
|
+
)
|
|
272
|
+
else:
|
|
273
|
+
msg.append('missing ARVADOS_API_HOST or ARVADOS_API_TOKEN for {} in config file {}'.format(instance_name, config_file))
|
|
274
|
+
except OSError as e:
|
|
275
|
+
if e.errno in (errno.EHOSTUNREACH, errno.ECONNREFUSED, errno.ECONNRESET, errno.ENETUNREACH):
|
|
276
|
+
verb = 'connect to instance from'
|
|
277
|
+
elif config_file:
|
|
278
|
+
verb = 'open'
|
|
279
|
+
else:
|
|
280
|
+
verb = 'find'
|
|
281
|
+
searchlist = ":".join(str(p) for p in dirs.search_paths())
|
|
282
|
+
config_file = f'{instance_name}.conf in path {searchlist}'
|
|
283
|
+
msg.append(("Could not {} config file {}: {}").format(
|
|
284
|
+
verb, config_file, e.strerror))
|
|
285
|
+
except (httplib2.error.HttpLib2Error, googleapiclient.errors.Error) as e:
|
|
286
|
+
msg.append("Failed to connect to instance {} at {}, error was {}".format(instance_name, cfg['ARVADOS_API_HOST'], e))
|
|
287
|
+
|
|
288
|
+
default_api = None
|
|
289
|
+
default_instance = None
|
|
226
290
|
try:
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
host=cfg['ARVADOS_API_HOST'],
|
|
246
|
-
token=cfg['ARVADOS_API_TOKEN'],
|
|
247
|
-
insecure=api_is_insecure,
|
|
248
|
-
num_retries=num_retries,
|
|
249
|
-
)
|
|
250
|
-
else:
|
|
251
|
-
abort('need ARVADOS_API_HOST and ARVADOS_API_TOKEN for {}'.format(instance_name))
|
|
252
|
-
return client
|
|
291
|
+
default_api = arvados.api('v1', num_retries=num_retries)
|
|
292
|
+
default_instance = default_api.config()["ClusterID"]
|
|
293
|
+
except ValueError:
|
|
294
|
+
pass
|
|
295
|
+
except (httplib2.error.HttpLib2Error, googleapiclient.errors.Error, OSError) as e:
|
|
296
|
+
msg.append("Failed to connect to default instance, error was {}".format(e))
|
|
297
|
+
|
|
298
|
+
if default_api is not None and (not instance_name or instance_name == default_instance):
|
|
299
|
+
# Use default settings
|
|
300
|
+
return default_api
|
|
301
|
+
|
|
302
|
+
if instance_name and default_instance and instance_name != default_instance:
|
|
303
|
+
msg.append("Default credentials are for {} but need to connect to {}".format(default_instance, instance_name))
|
|
304
|
+
|
|
305
|
+
for m in msg:
|
|
306
|
+
logger.error(m)
|
|
307
|
+
|
|
308
|
+
abort('Unable to find usable ARVADOS_API_HOST and ARVADOS_API_TOKEN')
|
|
253
309
|
|
|
254
310
|
# Check if git is available
|
|
255
311
|
def check_git_availability():
|
|
@@ -587,6 +643,14 @@ def copy_collection(obj_uuid, src, dst, args):
|
|
|
587
643
|
).execute(num_retries=args.retries)['manifest_text']
|
|
588
644
|
return create_collection_from(c, src, dst, args)
|
|
589
645
|
|
|
646
|
+
if args.replication is None:
|
|
647
|
+
# Obtain default or fallback collection replication setting on the
|
|
648
|
+
# destination
|
|
649
|
+
try:
|
|
650
|
+
args.replication = int(dst.config()["Collections"]["DefaultReplication"])
|
|
651
|
+
except (KeyError, TypeError, ValueError):
|
|
652
|
+
args.replication = 2
|
|
653
|
+
|
|
590
654
|
# Fetch the collection's manifest.
|
|
591
655
|
manifest = c['manifest_text']
|
|
592
656
|
logger.debug("Copying collection %s with manifest: <%s>", obj_uuid, manifest)
|
|
@@ -678,7 +742,7 @@ def copy_collection(obj_uuid, src, dst, args):
|
|
|
678
742
|
|
|
679
743
|
try:
|
|
680
744
|
logger.debug("Putting block %s (%s bytes)", blockhash, loc.size)
|
|
681
|
-
dst_locator = dst_keep.put(data, classes=(args.storage_classes or []))
|
|
745
|
+
dst_locator = dst_keep.put(data, copies=args.replication, classes=(args.storage_classes or []))
|
|
682
746
|
with lock:
|
|
683
747
|
dst_locators[blockhash] = dst_locator
|
|
684
748
|
bytes_written += loc.size
|
|
@@ -870,17 +934,17 @@ def uuid_type(api, object_uuid):
|
|
|
870
934
|
def copy_from_http(url, src, dst, args):
|
|
871
935
|
|
|
872
936
|
project_uuid = args.project_uuid
|
|
873
|
-
|
|
937
|
+
# Ensure string of varying parameters is well-formed
|
|
874
938
|
prefer_cached_downloads = args.prefer_cached_downloads
|
|
875
939
|
|
|
876
940
|
cached = http_to_keep.check_cached_url(src, project_uuid, url, {},
|
|
877
|
-
varying_url_params=varying_url_params,
|
|
941
|
+
varying_url_params=args.varying_url_params,
|
|
878
942
|
prefer_cached_downloads=prefer_cached_downloads)
|
|
879
943
|
if cached[2] is not None:
|
|
880
944
|
return copy_collection(cached[2], src, dst, args)
|
|
881
945
|
|
|
882
946
|
cached = http_to_keep.http_to_keep(dst, project_uuid, url,
|
|
883
|
-
varying_url_params=varying_url_params,
|
|
947
|
+
varying_url_params=args.varying_url_params,
|
|
884
948
|
prefer_cached_downloads=prefer_cached_downloads)
|
|
885
949
|
|
|
886
950
|
if cached is not None:
|