arvados-python-client 2.7.1__tar.gz → 2.7.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arvados-python-client-2.7.1/arvados_python_client.egg-info → arvados-python-client-2.7.3}/PKG-INFO +1 -1
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/__init__.py +14 -5
- arvados-python-client-2.7.3/arvados/_version.py +1 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/arvfile.py +75 -26
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/collection.py +2 -1
- arvados-python-client-2.7.3/arvados/commands/_util.py +158 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/keepdocker.py +26 -24
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/diskcache.py +68 -61
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/keep.py +44 -46
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3/arvados_python_client.egg-info}/PKG-INFO +1 -1
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/SOURCES.txt +2 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_keepdocker.py +32 -4
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arvfile.py +1 -0
- arvados-python-client-2.7.3/tests/test_cmd_util.py +194 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_keep_client.py +36 -130
- arvados-python-client-2.7.3/tests/test_storage_classes.py +128 -0
- arvados-python-client-2.7.1/arvados/_version.py +0 -1
- arvados-python-client-2.7.1/arvados/commands/_util.py +0 -65
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/LICENSE-2.0.txt +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/MANIFEST.in +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/README.rst +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/_normalize_stream.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/_pycurlhelper.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/_ranges.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/api.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/cache.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/__init__.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/arv_copy.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/federation_migrate.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/get.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/ls.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/migrate19.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/put.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/run.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/ws.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/config.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/crunch.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/errors.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/events.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/http_to_keep.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/logging.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/retry.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/safeapi.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/stream.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/timer.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/util.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/vocabulary.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados-v1-discovery.json +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/dependency_links.txt +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/not-zip-safe +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/requires.txt +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/top_level.txt +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_version.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-copy +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-federation-migrate +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-get +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-keepdocker +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-ls +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-migrate-docker19 +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-normalize +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-put +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-ws +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/discovery2pydoc.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/setup.cfg +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/setup.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/__init__.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/arvados_testutil.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/keepstub.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/manifest_examples.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/performance/__init__.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/performance/performance_profiler.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/performance/test_a_sample.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/run_test_server.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/slow_test.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_api.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_copy.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_get.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_ls.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_normalize.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_put.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_ws.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_benchmark_collections.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_cache.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_collections.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_crunch.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_errors.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_events.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_http.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_keep_locator.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_retry.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_retry_job_helpers.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_safeapi.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_sdk.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_stream.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_util.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_vocabulary.py +0 -0
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
This module provides the entire Python SDK for Arvados. The most useful modules
|
|
7
7
|
include:
|
|
8
8
|
|
|
9
|
-
* arvados.api - After you `import arvados`, you can call `arvados.api
|
|
10
|
-
|
|
9
|
+
* arvados.api - After you `import arvados`, you can call `arvados.api` as a
|
|
10
|
+
shortcut to the client constructor function `arvados.api.api`.
|
|
11
11
|
|
|
12
12
|
* arvados.collection - The `arvados.collection.Collection` class provides a
|
|
13
13
|
high-level interface to read and write collections. It coordinates sending
|
|
@@ -26,15 +26,24 @@ import types
|
|
|
26
26
|
|
|
27
27
|
from collections import UserDict
|
|
28
28
|
|
|
29
|
-
from .
|
|
29
|
+
from . import api, errors, util
|
|
30
|
+
from .api import api_from_config, http_cache
|
|
30
31
|
from .collection import CollectionReader, CollectionWriter, ResumableCollectionWriter
|
|
31
32
|
from arvados.keep import *
|
|
32
33
|
from arvados.stream import *
|
|
33
34
|
from .arvfile import StreamFileReader
|
|
34
35
|
from .logging import log_format, log_date_format, log_handler
|
|
35
36
|
from .retry import RetryLoop
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
|
|
38
|
+
# Previous versions of the PySDK used to say `from .api import api`. This
|
|
39
|
+
# made it convenient to call the API client constructor, but difficult to
|
|
40
|
+
# access the rest of the `arvados.api` module. The magic below fixes that
|
|
41
|
+
# bug while retaining backwards compatibility: `arvados.api` is now the
|
|
42
|
+
# module and you can import it normally, but we make that module callable so
|
|
43
|
+
# all the existing code that says `arvados.api('v1', ...)` still works.
|
|
44
|
+
class _CallableAPIModule(api.__class__):
|
|
45
|
+
__call__ = staticmethod(api.api)
|
|
46
|
+
api.__class__ = _CallableAPIModule
|
|
38
47
|
|
|
39
48
|
# Override logging module pulled in via `from ... import *`
|
|
40
49
|
# so users can `import arvados.logging`.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '2.7.3'
|
|
@@ -491,7 +491,7 @@ class _BlockManager(object):
|
|
|
491
491
|
self._put_queue = None
|
|
492
492
|
self._put_threads = None
|
|
493
493
|
self.lock = threading.Lock()
|
|
494
|
-
self.
|
|
494
|
+
self.prefetch_lookahead = self._keep.num_prefetch_threads
|
|
495
495
|
self.num_put_threads = put_threads or _BlockManager.DEFAULT_PUT_THREADS
|
|
496
496
|
self.copies = copies
|
|
497
497
|
self.storage_classes = storage_classes_func or (lambda: [])
|
|
@@ -803,7 +803,7 @@ class _BlockManager(object):
|
|
|
803
803
|
"""Initiate a background download of a block.
|
|
804
804
|
"""
|
|
805
805
|
|
|
806
|
-
if not self.
|
|
806
|
+
if not self.prefetch_lookahead:
|
|
807
807
|
return
|
|
808
808
|
|
|
809
809
|
with self.lock:
|
|
@@ -825,7 +825,7 @@ class ArvadosFile(object):
|
|
|
825
825
|
"""
|
|
826
826
|
|
|
827
827
|
__slots__ = ('parent', 'name', '_writers', '_committed',
|
|
828
|
-
'_segments', 'lock', '_current_bblock', 'fuse_entry')
|
|
828
|
+
'_segments', 'lock', '_current_bblock', 'fuse_entry', '_read_counter')
|
|
829
829
|
|
|
830
830
|
def __init__(self, parent, name, stream=[], segments=[]):
|
|
831
831
|
"""
|
|
@@ -846,6 +846,7 @@ class ArvadosFile(object):
|
|
|
846
846
|
for s in segments:
|
|
847
847
|
self._add_segment(stream, s.locator, s.range_size)
|
|
848
848
|
self._current_bblock = None
|
|
849
|
+
self._read_counter = 0
|
|
849
850
|
|
|
850
851
|
def writable(self):
|
|
851
852
|
return self.parent.writable()
|
|
@@ -1047,20 +1048,47 @@ class ArvadosFile(object):
|
|
|
1047
1048
|
# size == self.size()
|
|
1048
1049
|
pass
|
|
1049
1050
|
|
|
1050
|
-
def readfrom(self, offset, size, num_retries, exact=False):
|
|
1051
|
+
def readfrom(self, offset, size, num_retries, exact=False, return_memoryview=False):
|
|
1051
1052
|
"""Read up to `size` bytes from the file starting at `offset`.
|
|
1052
1053
|
|
|
1053
|
-
:
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1054
|
+
Arguments:
|
|
1055
|
+
|
|
1056
|
+
* exact: bool --- If False (default), return less data than
|
|
1057
|
+
requested if the read crosses a block boundary and the next
|
|
1058
|
+
block isn't cached. If True, only return less data than
|
|
1059
|
+
requested when hitting EOF.
|
|
1060
|
+
|
|
1061
|
+
* return_memoryview: bool --- If False (default) return a
|
|
1062
|
+
`bytes` object, which may entail making a copy in some
|
|
1063
|
+
situations. If True, return a `memoryview` object which may
|
|
1064
|
+
avoid making a copy, but may be incompatible with code
|
|
1065
|
+
expecting a `bytes` object.
|
|
1066
|
+
|
|
1057
1067
|
"""
|
|
1058
1068
|
|
|
1059
1069
|
with self.lock:
|
|
1060
1070
|
if size == 0 or offset >= self.size():
|
|
1061
|
-
return b''
|
|
1071
|
+
return memoryview(b'') if return_memoryview else b''
|
|
1062
1072
|
readsegs = locators_and_ranges(self._segments, offset, size)
|
|
1063
|
-
|
|
1073
|
+
|
|
1074
|
+
prefetch = None
|
|
1075
|
+
prefetch_lookahead = self.parent._my_block_manager().prefetch_lookahead
|
|
1076
|
+
if prefetch_lookahead:
|
|
1077
|
+
# Doing prefetch on every read() call is surprisingly expensive
|
|
1078
|
+
# when we're trying to deliver data at 600+ MiBps and want
|
|
1079
|
+
# the read() fast path to be as lightweight as possible.
|
|
1080
|
+
#
|
|
1081
|
+
# Only prefetching every 128 read operations
|
|
1082
|
+
# dramatically reduces the overhead while still
|
|
1083
|
+
# getting the benefit of prefetching (e.g. when
|
|
1084
|
+
# reading 128 KiB at a time, it checks for prefetch
|
|
1085
|
+
# every 16 MiB).
|
|
1086
|
+
self._read_counter = (self._read_counter+1) % 128
|
|
1087
|
+
if self._read_counter == 1:
|
|
1088
|
+
prefetch = locators_and_ranges(self._segments,
|
|
1089
|
+
offset + size,
|
|
1090
|
+
config.KEEP_BLOCK_SIZE * prefetch_lookahead,
|
|
1091
|
+
limit=(1+prefetch_lookahead))
|
|
1064
1092
|
|
|
1065
1093
|
locs = set()
|
|
1066
1094
|
data = []
|
|
@@ -1068,17 +1096,22 @@ class ArvadosFile(object):
|
|
|
1068
1096
|
block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
|
|
1069
1097
|
if block:
|
|
1070
1098
|
blockview = memoryview(block)
|
|
1071
|
-
data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size]
|
|
1099
|
+
data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size])
|
|
1072
1100
|
locs.add(lr.locator)
|
|
1073
1101
|
else:
|
|
1074
1102
|
break
|
|
1075
1103
|
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1104
|
+
if prefetch:
|
|
1105
|
+
for lr in prefetch:
|
|
1106
|
+
if lr.locator not in locs:
|
|
1107
|
+
self.parent._my_block_manager().block_prefetch(lr.locator)
|
|
1108
|
+
locs.add(lr.locator)
|
|
1109
|
+
|
|
1110
|
+
if len(data) == 1:
|
|
1111
|
+
return data[0] if return_memoryview else data[0].tobytes()
|
|
1112
|
+
else:
|
|
1113
|
+
return memoryview(b''.join(data)) if return_memoryview else b''.join(data)
|
|
1080
1114
|
|
|
1081
|
-
return b''.join(data)
|
|
1082
1115
|
|
|
1083
1116
|
@must_be_writable
|
|
1084
1117
|
@synchronized
|
|
@@ -1243,33 +1276,49 @@ class ArvadosFileReader(ArvadosFileReaderBase):
|
|
|
1243
1276
|
|
|
1244
1277
|
@_FileLikeObjectBase._before_close
|
|
1245
1278
|
@retry_method
|
|
1246
|
-
def read(self, size=None,
|
|
1279
|
+
def read(self, size=-1, num_retries=None, return_memoryview=False):
|
|
1247
1280
|
"""Read up to `size` bytes from the file and return the result.
|
|
1248
1281
|
|
|
1249
|
-
Starts at the current file position. If `size` is None,
|
|
1250
|
-
entire remainder of the file.
|
|
1282
|
+
Starts at the current file position. If `size` is negative or None,
|
|
1283
|
+
read the entire remainder of the file.
|
|
1284
|
+
|
|
1285
|
+
Returns None if the file pointer is at the end of the file.
|
|
1286
|
+
|
|
1287
|
+
Returns a `bytes` object, unless `return_memoryview` is True,
|
|
1288
|
+
in which case it returns a memory view, which may avoid an
|
|
1289
|
+
unnecessary data copy in some situations.
|
|
1290
|
+
|
|
1251
1291
|
"""
|
|
1252
|
-
if size is None:
|
|
1292
|
+
if size < 0 or size is None:
|
|
1253
1293
|
data = []
|
|
1254
|
-
|
|
1294
|
+
#
|
|
1295
|
+
# specify exact=False, return_memoryview=True here so that we
|
|
1296
|
+
# only copy data once into the final buffer.
|
|
1297
|
+
#
|
|
1298
|
+
rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries, exact=False, return_memoryview=True)
|
|
1255
1299
|
while rd:
|
|
1256
1300
|
data.append(rd)
|
|
1257
1301
|
self._filepos += len(rd)
|
|
1258
|
-
rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
|
|
1259
|
-
return b''.join(data)
|
|
1302
|
+
rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries, exact=False, return_memoryview=True)
|
|
1303
|
+
return memoryview(b''.join(data)) if return_memoryview else b''.join(data)
|
|
1260
1304
|
else:
|
|
1261
|
-
data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
|
|
1305
|
+
data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True, return_memoryview=return_memoryview)
|
|
1262
1306
|
self._filepos += len(data)
|
|
1263
1307
|
return data
|
|
1264
1308
|
|
|
1265
1309
|
@_FileLikeObjectBase._before_close
|
|
1266
1310
|
@retry_method
|
|
1267
|
-
def readfrom(self, offset, size, num_retries=None):
|
|
1311
|
+
def readfrom(self, offset, size, num_retries=None, return_memoryview=False):
|
|
1268
1312
|
"""Read up to `size` bytes from the stream, starting at the specified file offset.
|
|
1269
1313
|
|
|
1270
1314
|
This method does not change the file position.
|
|
1315
|
+
|
|
1316
|
+
Returns a `bytes` object, unless `return_memoryview` is True,
|
|
1317
|
+
in which case it returns a memory view, which may avoid an
|
|
1318
|
+
unnecessary data copy in some situations.
|
|
1319
|
+
|
|
1271
1320
|
"""
|
|
1272
|
-
return self.arvadosfile.readfrom(offset, size, num_retries)
|
|
1321
|
+
return self.arvadosfile.readfrom(offset, size, num_retries, exact=True, return_memoryview=return_memoryview)
|
|
1273
1322
|
|
|
1274
1323
|
def flush(self):
|
|
1275
1324
|
pass
|
|
@@ -341,7 +341,7 @@ class RichCollectionBase(CollectionBase):
|
|
|
341
341
|
self,
|
|
342
342
|
path: str,
|
|
343
343
|
mode: str="r",
|
|
344
|
-
encoding: Optional[str]=None
|
|
344
|
+
encoding: Optional[str]=None
|
|
345
345
|
) -> IO:
|
|
346
346
|
"""Open a file-like object within the collection
|
|
347
347
|
|
|
@@ -361,6 +361,7 @@ class RichCollectionBase(CollectionBase):
|
|
|
361
361
|
* encoding: str | None --- The text encoding of the file. Only used
|
|
362
362
|
when the file is opened in text mode. The default is
|
|
363
363
|
platform-dependent.
|
|
364
|
+
|
|
364
365
|
"""
|
|
365
366
|
if not re.search(r'^[rwa][bt]?\+?$', mode):
|
|
366
367
|
raise errors.ArgumentError("Invalid mode {!r}".format(mode))
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# Copyright (C) The Arvados Authors. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import errno
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import signal
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
FILTER_STR_RE = re.compile(r'''
|
|
15
|
+
^\(
|
|
16
|
+
\ *(\w+)
|
|
17
|
+
\ *(<|<=|=|>=|>)
|
|
18
|
+
\ *(\w+)
|
|
19
|
+
\ *\)$
|
|
20
|
+
''', re.ASCII | re.VERBOSE)
|
|
21
|
+
|
|
22
|
+
def _pos_int(s):
|
|
23
|
+
num = int(s)
|
|
24
|
+
if num < 0:
|
|
25
|
+
raise ValueError("can't accept negative value: %s" % (num,))
|
|
26
|
+
return num
|
|
27
|
+
|
|
28
|
+
retry_opt = argparse.ArgumentParser(add_help=False)
|
|
29
|
+
retry_opt.add_argument('--retries', type=_pos_int, default=10, help="""
|
|
30
|
+
Maximum number of times to retry server requests that encounter temporary
|
|
31
|
+
failures (e.g., server down). Default 10.""")
|
|
32
|
+
|
|
33
|
+
def _ignore_error(error):
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
def _raise_error(error):
|
|
37
|
+
raise error
|
|
38
|
+
|
|
39
|
+
def make_home_conf_dir(path, mode=None, errors='ignore'):
|
|
40
|
+
# Make the directory path under the user's home directory, making parent
|
|
41
|
+
# directories as needed.
|
|
42
|
+
# If the directory is newly created, and a mode is specified, chmod it
|
|
43
|
+
# with those permissions.
|
|
44
|
+
# If there's an error, return None if errors is 'ignore', else raise an
|
|
45
|
+
# exception.
|
|
46
|
+
error_handler = _ignore_error if (errors == 'ignore') else _raise_error
|
|
47
|
+
tilde_path = os.path.join('~', path)
|
|
48
|
+
abs_path = os.path.expanduser(tilde_path)
|
|
49
|
+
if abs_path == tilde_path:
|
|
50
|
+
return error_handler(ValueError("no home directory available"))
|
|
51
|
+
try:
|
|
52
|
+
os.makedirs(abs_path)
|
|
53
|
+
except OSError as error:
|
|
54
|
+
if error.errno != errno.EEXIST:
|
|
55
|
+
return error_handler(error)
|
|
56
|
+
else:
|
|
57
|
+
if mode is not None:
|
|
58
|
+
os.chmod(abs_path, mode)
|
|
59
|
+
return abs_path
|
|
60
|
+
|
|
61
|
+
CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
|
|
62
|
+
|
|
63
|
+
def exit_signal_handler(sigcode, frame):
|
|
64
|
+
logging.getLogger('arvados').error("Caught signal {}, exiting.".format(sigcode))
|
|
65
|
+
sys.exit(-sigcode)
|
|
66
|
+
|
|
67
|
+
def install_signal_handlers():
|
|
68
|
+
global orig_signal_handlers
|
|
69
|
+
orig_signal_handlers = {sigcode: signal.signal(sigcode, exit_signal_handler)
|
|
70
|
+
for sigcode in CAUGHT_SIGNALS}
|
|
71
|
+
|
|
72
|
+
def restore_signal_handlers():
|
|
73
|
+
for sigcode, orig_handler in orig_signal_handlers.items():
|
|
74
|
+
signal.signal(sigcode, orig_handler)
|
|
75
|
+
|
|
76
|
+
def validate_filters(filters):
|
|
77
|
+
"""Validate user-provided filters
|
|
78
|
+
|
|
79
|
+
This function validates that a user-defined object represents valid
|
|
80
|
+
Arvados filters that can be passed to an API client: that it's a list of
|
|
81
|
+
3-element lists with the field name and operator given as strings. If any
|
|
82
|
+
of these conditions are not true, it raises a ValueError with details about
|
|
83
|
+
the problem.
|
|
84
|
+
|
|
85
|
+
It returns validated filters. Currently the provided filters are returned
|
|
86
|
+
unmodified. Future versions of this function may clean up the filters with
|
|
87
|
+
"obvious" type conversions, so callers SHOULD use the returned value for
|
|
88
|
+
Arvados API calls.
|
|
89
|
+
"""
|
|
90
|
+
if not isinstance(filters, list):
|
|
91
|
+
raise ValueError(f"filters are not a list: {filters!r}")
|
|
92
|
+
for index, f in enumerate(filters):
|
|
93
|
+
if isinstance(f, str):
|
|
94
|
+
match = FILTER_STR_RE.fullmatch(f)
|
|
95
|
+
if match is None:
|
|
96
|
+
raise ValueError(f"filter at index {index} has invalid syntax: {f!r}")
|
|
97
|
+
s, op, o = match.groups()
|
|
98
|
+
if s[0].isdigit():
|
|
99
|
+
raise ValueError(f"filter at index {index} has invalid syntax: bad field name {s!r}")
|
|
100
|
+
if o[0].isdigit():
|
|
101
|
+
raise ValueError(f"filter at index {index} has invalid syntax: bad field name {o!r}")
|
|
102
|
+
continue
|
|
103
|
+
elif not isinstance(f, list):
|
|
104
|
+
raise ValueError(f"filter at index {index} is not a string or list: {f!r}")
|
|
105
|
+
try:
|
|
106
|
+
s, op, o = f
|
|
107
|
+
except ValueError:
|
|
108
|
+
raise ValueError(
|
|
109
|
+
f"filter at index {index} does not have three items (field name, operator, operand): {f!r}",
|
|
110
|
+
) from None
|
|
111
|
+
if not isinstance(s, str):
|
|
112
|
+
raise ValueError(f"filter at index {index} field name is not a string: {s!r}")
|
|
113
|
+
if not isinstance(op, str):
|
|
114
|
+
raise ValueError(f"filter at index {index} operator is not a string: {op!r}")
|
|
115
|
+
return filters
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class JSONArgument:
|
|
119
|
+
"""Parse a JSON file from a command line argument string or path
|
|
120
|
+
|
|
121
|
+
JSONArgument objects can be called with a string and return an arbitrary
|
|
122
|
+
object. First it will try to decode the string as JSON. If that fails, it
|
|
123
|
+
will try to open a file at the path named by the string, and decode it as
|
|
124
|
+
JSON. If that fails, it raises ValueError with more detail.
|
|
125
|
+
|
|
126
|
+
This is designed to be used as an argparse argument type.
|
|
127
|
+
Typical usage looks like:
|
|
128
|
+
|
|
129
|
+
parser = argparse.ArgumentParser()
|
|
130
|
+
parser.add_argument('--object', type=JSONArgument(), ...)
|
|
131
|
+
|
|
132
|
+
You can construct JSONArgument with an optional validation function. If
|
|
133
|
+
given, it is called with the object decoded from user input, and its
|
|
134
|
+
return value replaces it. It should raise ValueError if there is a problem
|
|
135
|
+
with the input. (argparse turns ValueError into a useful error message.)
|
|
136
|
+
|
|
137
|
+
filters_type = JSONArgument(validate_filters)
|
|
138
|
+
parser.add_argument('--filters', type=filters_type, ...)
|
|
139
|
+
"""
|
|
140
|
+
def __init__(self, validator=None):
|
|
141
|
+
self.validator = validator
|
|
142
|
+
|
|
143
|
+
def __call__(self, value):
|
|
144
|
+
try:
|
|
145
|
+
retval = json.loads(value)
|
|
146
|
+
except json.JSONDecodeError:
|
|
147
|
+
try:
|
|
148
|
+
with open(value, 'rb') as json_file:
|
|
149
|
+
retval = json.load(json_file)
|
|
150
|
+
except json.JSONDecodeError as error:
|
|
151
|
+
raise ValueError(f"error decoding JSON from file {value!r}: {error}") from None
|
|
152
|
+
except (FileNotFoundError, ValueError):
|
|
153
|
+
raise ValueError(f"not a valid JSON string or file path: {value!r}") from None
|
|
154
|
+
except OSError as error:
|
|
155
|
+
raise ValueError(f"error reading JSON file path {value!r}: {error.strerror}") from None
|
|
156
|
+
if self.validator is not None:
|
|
157
|
+
retval = self.validator(retval)
|
|
158
|
+
return retval
|
|
@@ -2,34 +2,29 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from builtins import next
|
|
6
5
|
import argparse
|
|
7
6
|
import collections
|
|
8
7
|
import datetime
|
|
9
8
|
import errno
|
|
9
|
+
import fcntl
|
|
10
10
|
import json
|
|
11
|
+
import logging
|
|
11
12
|
import os
|
|
12
13
|
import re
|
|
14
|
+
import subprocess
|
|
13
15
|
import sys
|
|
14
16
|
import tarfile
|
|
15
17
|
import tempfile
|
|
16
|
-
|
|
17
|
-
import
|
|
18
|
-
import fcntl
|
|
18
|
+
|
|
19
|
+
import ciso8601
|
|
19
20
|
from operator import itemgetter
|
|
20
21
|
from stat import *
|
|
21
22
|
|
|
22
|
-
import subprocess
|
|
23
|
-
|
|
24
23
|
import arvados
|
|
24
|
+
import arvados.config
|
|
25
25
|
import arvados.util
|
|
26
26
|
import arvados.commands._util as arv_cmd
|
|
27
27
|
import arvados.commands.put as arv_put
|
|
28
|
-
from arvados.collection import CollectionReader
|
|
29
|
-
import ciso8601
|
|
30
|
-
import logging
|
|
31
|
-
import arvados.config
|
|
32
|
-
|
|
33
28
|
from arvados._version import __version__
|
|
34
29
|
|
|
35
30
|
logger = logging.getLogger('arvados.keepdocker')
|
|
@@ -356,6 +351,25 @@ def _uuid2pdh(api, uuid):
|
|
|
356
351
|
select=['portable_data_hash'],
|
|
357
352
|
).execute()['items'][0]['portable_data_hash']
|
|
358
353
|
|
|
354
|
+
def load_image_metadata(image_file):
|
|
355
|
+
"""Load an image manifest and config from an archive
|
|
356
|
+
|
|
357
|
+
Given an image archive as an open binary file object, this function loads
|
|
358
|
+
the image manifest and configuration, deserializing each from JSON and
|
|
359
|
+
returning them in a 2-tuple of dicts.
|
|
360
|
+
"""
|
|
361
|
+
image_file.seek(0)
|
|
362
|
+
with tarfile.open(fileobj=image_file) as image_tar:
|
|
363
|
+
with image_tar.extractfile('manifest.json') as manifest_file:
|
|
364
|
+
image_manifest_list = json.load(manifest_file)
|
|
365
|
+
# Because arv-keepdocker only saves one image, there should only be
|
|
366
|
+
# one manifest. This extracts that from the list and raises
|
|
367
|
+
# ValueError if there's not exactly one.
|
|
368
|
+
image_manifest, = image_manifest_list
|
|
369
|
+
with image_tar.extractfile(image_manifest['Config']) as config_file:
|
|
370
|
+
image_config = json.load(config_file)
|
|
371
|
+
return image_manifest, image_config
|
|
372
|
+
|
|
359
373
|
def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None):
|
|
360
374
|
args = arg_parser.parse_args(arguments)
|
|
361
375
|
if api is None:
|
|
@@ -532,21 +546,9 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
|
|
|
532
546
|
# Managed properties could be already set
|
|
533
547
|
coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {})
|
|
534
548
|
coll_properties.update({"docker-image-repo-tag": image_repo_tag})
|
|
535
|
-
|
|
536
549
|
api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries)
|
|
537
550
|
|
|
538
|
-
|
|
539
|
-
image_file.seek(0)
|
|
540
|
-
image_tar = tarfile.open(fileobj=image_file)
|
|
541
|
-
image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
|
|
542
|
-
if image_hash_type:
|
|
543
|
-
json_filename = raw_image_hash + '.json'
|
|
544
|
-
else:
|
|
545
|
-
json_filename = raw_image_hash + '/json'
|
|
546
|
-
json_file = image_tar.extractfile(image_tar.getmember(json_filename))
|
|
547
|
-
image_metadata = json.loads(json_file.read().decode('utf-8'))
|
|
548
|
-
json_file.close()
|
|
549
|
-
image_tar.close()
|
|
551
|
+
_, image_metadata = load_image_metadata(image_file)
|
|
550
552
|
link_base = {'head_uuid': coll_uuid, 'properties': {}}
|
|
551
553
|
if 'created' in image_metadata:
|
|
552
554
|
link_base['properties']['image_timestamp'] = image_metadata['created']
|