arvados-python-client 2.7.1__tar.gz → 2.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arvados-python-client-2.7.1/arvados_python_client.egg-info → arvados-python-client-2.7.2}/PKG-INFO +1 -1
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/__init__.py +14 -5
- arvados-python-client-2.7.2/arvados/_version.py +1 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/arvfile.py +33 -10
- arvados-python-client-2.7.2/arvados/commands/_util.py +158 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/keepdocker.py +26 -24
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/diskcache.py +68 -61
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/keep.py +44 -46
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2/arvados_python_client.egg-info}/PKG-INFO +1 -1
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/SOURCES.txt +2 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_keepdocker.py +32 -4
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arvfile.py +1 -0
- arvados-python-client-2.7.2/tests/test_cmd_util.py +194 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_keep_client.py +36 -130
- arvados-python-client-2.7.2/tests/test_storage_classes.py +128 -0
- arvados-python-client-2.7.1/arvados/_version.py +0 -1
- arvados-python-client-2.7.1/arvados/commands/_util.py +0 -65
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/LICENSE-2.0.txt +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/MANIFEST.in +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/README.rst +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/_normalize_stream.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/_pycurlhelper.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/_ranges.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/api.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/cache.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/collection.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/__init__.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/arv_copy.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/federation_migrate.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/get.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/ls.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/migrate19.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/put.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/run.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/ws.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/config.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/crunch.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/errors.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/events.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/http_to_keep.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/logging.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/retry.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/safeapi.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/stream.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/timer.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/util.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/vocabulary.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados-v1-discovery.json +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/dependency_links.txt +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/not-zip-safe +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/requires.txt +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/top_level.txt +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_version.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-copy +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-federation-migrate +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-get +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-keepdocker +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-ls +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-migrate-docker19 +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-normalize +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-put +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-ws +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/discovery2pydoc.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/setup.cfg +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/setup.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/__init__.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/arvados_testutil.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/keepstub.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/manifest_examples.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/performance/__init__.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/performance/performance_profiler.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/performance/test_a_sample.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/run_test_server.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/slow_test.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_api.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_copy.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_get.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_ls.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_normalize.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_put.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_ws.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_benchmark_collections.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_cache.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_collections.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_crunch.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_errors.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_events.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_http.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_keep_locator.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_retry.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_retry_job_helpers.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_safeapi.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_sdk.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_stream.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_util.py +0 -0
- {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_vocabulary.py +0 -0
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
This module provides the entire Python SDK for Arvados. The most useful modules
|
|
7
7
|
include:
|
|
8
8
|
|
|
9
|
-
* arvados.api - After you `import arvados`, you can call `arvados.api
|
|
10
|
-
|
|
9
|
+
* arvados.api - After you `import arvados`, you can call `arvados.api` as a
|
|
10
|
+
shortcut to the client constructor function `arvados.api.api`.
|
|
11
11
|
|
|
12
12
|
* arvados.collection - The `arvados.collection.Collection` class provides a
|
|
13
13
|
high-level interface to read and write collections. It coordinates sending
|
|
@@ -26,15 +26,24 @@ import types
|
|
|
26
26
|
|
|
27
27
|
from collections import UserDict
|
|
28
28
|
|
|
29
|
-
from .
|
|
29
|
+
from . import api, errors, util
|
|
30
|
+
from .api import api_from_config, http_cache
|
|
30
31
|
from .collection import CollectionReader, CollectionWriter, ResumableCollectionWriter
|
|
31
32
|
from arvados.keep import *
|
|
32
33
|
from arvados.stream import *
|
|
33
34
|
from .arvfile import StreamFileReader
|
|
34
35
|
from .logging import log_format, log_date_format, log_handler
|
|
35
36
|
from .retry import RetryLoop
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
|
|
38
|
+
# Previous versions of the PySDK used to say `from .api import api`. This
|
|
39
|
+
# made it convenient to call the API client constructor, but difficult to
|
|
40
|
+
# access the rest of the `arvados.api` module. The magic below fixes that
|
|
41
|
+
# bug while retaining backwards compatibility: `arvados.api` is now the
|
|
42
|
+
# module and you can import it normally, but we make that module callable so
|
|
43
|
+
# all the existing code that says `arvados.api('v1', ...)` still works.
|
|
44
|
+
class _CallableAPIModule(api.__class__):
|
|
45
|
+
__call__ = staticmethod(api.api)
|
|
46
|
+
api.__class__ = _CallableAPIModule
|
|
38
47
|
|
|
39
48
|
# Override logging module pulled in via `from ... import *`
|
|
40
49
|
# so users can `import arvados.logging`.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '2.7.2'
|
|
@@ -491,7 +491,7 @@ class _BlockManager(object):
|
|
|
491
491
|
self._put_queue = None
|
|
492
492
|
self._put_threads = None
|
|
493
493
|
self.lock = threading.Lock()
|
|
494
|
-
self.
|
|
494
|
+
self.prefetch_lookahead = self._keep.num_prefetch_threads
|
|
495
495
|
self.num_put_threads = put_threads or _BlockManager.DEFAULT_PUT_THREADS
|
|
496
496
|
self.copies = copies
|
|
497
497
|
self.storage_classes = storage_classes_func or (lambda: [])
|
|
@@ -803,7 +803,7 @@ class _BlockManager(object):
|
|
|
803
803
|
"""Initiate a background download of a block.
|
|
804
804
|
"""
|
|
805
805
|
|
|
806
|
-
if not self.
|
|
806
|
+
if not self.prefetch_lookahead:
|
|
807
807
|
return
|
|
808
808
|
|
|
809
809
|
with self.lock:
|
|
@@ -825,7 +825,7 @@ class ArvadosFile(object):
|
|
|
825
825
|
"""
|
|
826
826
|
|
|
827
827
|
__slots__ = ('parent', 'name', '_writers', '_committed',
|
|
828
|
-
'_segments', 'lock', '_current_bblock', 'fuse_entry')
|
|
828
|
+
'_segments', 'lock', '_current_bblock', 'fuse_entry', '_read_counter')
|
|
829
829
|
|
|
830
830
|
def __init__(self, parent, name, stream=[], segments=[]):
|
|
831
831
|
"""
|
|
@@ -846,6 +846,7 @@ class ArvadosFile(object):
|
|
|
846
846
|
for s in segments:
|
|
847
847
|
self._add_segment(stream, s.locator, s.range_size)
|
|
848
848
|
self._current_bblock = None
|
|
849
|
+
self._read_counter = 0
|
|
849
850
|
|
|
850
851
|
def writable(self):
|
|
851
852
|
return self.parent.writable()
|
|
@@ -1060,7 +1061,25 @@ class ArvadosFile(object):
|
|
|
1060
1061
|
if size == 0 or offset >= self.size():
|
|
1061
1062
|
return b''
|
|
1062
1063
|
readsegs = locators_and_ranges(self._segments, offset, size)
|
|
1063
|
-
|
|
1064
|
+
|
|
1065
|
+
prefetch = None
|
|
1066
|
+
prefetch_lookahead = self.parent._my_block_manager().prefetch_lookahead
|
|
1067
|
+
if prefetch_lookahead:
|
|
1068
|
+
# Doing prefetch on every read() call is surprisingly expensive
|
|
1069
|
+
# when we're trying to deliver data at 600+ MiBps and want
|
|
1070
|
+
# the read() fast path to be as lightweight as possible.
|
|
1071
|
+
#
|
|
1072
|
+
# Only prefetching every 128 read operations
|
|
1073
|
+
# dramatically reduces the overhead while still
|
|
1074
|
+
# getting the benefit of prefetching (e.g. when
|
|
1075
|
+
# reading 128 KiB at a time, it checks for prefetch
|
|
1076
|
+
# every 16 MiB).
|
|
1077
|
+
self._read_counter = (self._read_counter+1) % 128
|
|
1078
|
+
if self._read_counter == 1:
|
|
1079
|
+
prefetch = locators_and_ranges(self._segments,
|
|
1080
|
+
offset + size,
|
|
1081
|
+
config.KEEP_BLOCK_SIZE * prefetch_lookahead,
|
|
1082
|
+
limit=(1+prefetch_lookahead))
|
|
1064
1083
|
|
|
1065
1084
|
locs = set()
|
|
1066
1085
|
data = []
|
|
@@ -1068,17 +1087,21 @@ class ArvadosFile(object):
|
|
|
1068
1087
|
block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
|
|
1069
1088
|
if block:
|
|
1070
1089
|
blockview = memoryview(block)
|
|
1071
|
-
data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size]
|
|
1090
|
+
data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size])
|
|
1072
1091
|
locs.add(lr.locator)
|
|
1073
1092
|
else:
|
|
1074
1093
|
break
|
|
1075
1094
|
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1095
|
+
if prefetch:
|
|
1096
|
+
for lr in prefetch:
|
|
1097
|
+
if lr.locator not in locs:
|
|
1098
|
+
self.parent._my_block_manager().block_prefetch(lr.locator)
|
|
1099
|
+
locs.add(lr.locator)
|
|
1080
1100
|
|
|
1081
|
-
|
|
1101
|
+
if len(data) == 1:
|
|
1102
|
+
return data[0]
|
|
1103
|
+
else:
|
|
1104
|
+
return b''.join(data)
|
|
1082
1105
|
|
|
1083
1106
|
@must_be_writable
|
|
1084
1107
|
@synchronized
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# Copyright (C) The Arvados Authors. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import errno
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import signal
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
FILTER_STR_RE = re.compile(r'''
|
|
15
|
+
^\(
|
|
16
|
+
\ *(\w+)
|
|
17
|
+
\ *(<|<=|=|>=|>)
|
|
18
|
+
\ *(\w+)
|
|
19
|
+
\ *\)$
|
|
20
|
+
''', re.ASCII | re.VERBOSE)
|
|
21
|
+
|
|
22
|
+
def _pos_int(s):
|
|
23
|
+
num = int(s)
|
|
24
|
+
if num < 0:
|
|
25
|
+
raise ValueError("can't accept negative value: %s" % (num,))
|
|
26
|
+
return num
|
|
27
|
+
|
|
28
|
+
retry_opt = argparse.ArgumentParser(add_help=False)
|
|
29
|
+
retry_opt.add_argument('--retries', type=_pos_int, default=10, help="""
|
|
30
|
+
Maximum number of times to retry server requests that encounter temporary
|
|
31
|
+
failures (e.g., server down). Default 10.""")
|
|
32
|
+
|
|
33
|
+
def _ignore_error(error):
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
def _raise_error(error):
|
|
37
|
+
raise error
|
|
38
|
+
|
|
39
|
+
def make_home_conf_dir(path, mode=None, errors='ignore'):
|
|
40
|
+
# Make the directory path under the user's home directory, making parent
|
|
41
|
+
# directories as needed.
|
|
42
|
+
# If the directory is newly created, and a mode is specified, chmod it
|
|
43
|
+
# with those permissions.
|
|
44
|
+
# If there's an error, return None if errors is 'ignore', else raise an
|
|
45
|
+
# exception.
|
|
46
|
+
error_handler = _ignore_error if (errors == 'ignore') else _raise_error
|
|
47
|
+
tilde_path = os.path.join('~', path)
|
|
48
|
+
abs_path = os.path.expanduser(tilde_path)
|
|
49
|
+
if abs_path == tilde_path:
|
|
50
|
+
return error_handler(ValueError("no home directory available"))
|
|
51
|
+
try:
|
|
52
|
+
os.makedirs(abs_path)
|
|
53
|
+
except OSError as error:
|
|
54
|
+
if error.errno != errno.EEXIST:
|
|
55
|
+
return error_handler(error)
|
|
56
|
+
else:
|
|
57
|
+
if mode is not None:
|
|
58
|
+
os.chmod(abs_path, mode)
|
|
59
|
+
return abs_path
|
|
60
|
+
|
|
61
|
+
CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
|
|
62
|
+
|
|
63
|
+
def exit_signal_handler(sigcode, frame):
|
|
64
|
+
logging.getLogger('arvados').error("Caught signal {}, exiting.".format(sigcode))
|
|
65
|
+
sys.exit(-sigcode)
|
|
66
|
+
|
|
67
|
+
def install_signal_handlers():
|
|
68
|
+
global orig_signal_handlers
|
|
69
|
+
orig_signal_handlers = {sigcode: signal.signal(sigcode, exit_signal_handler)
|
|
70
|
+
for sigcode in CAUGHT_SIGNALS}
|
|
71
|
+
|
|
72
|
+
def restore_signal_handlers():
|
|
73
|
+
for sigcode, orig_handler in orig_signal_handlers.items():
|
|
74
|
+
signal.signal(sigcode, orig_handler)
|
|
75
|
+
|
|
76
|
+
def validate_filters(filters):
|
|
77
|
+
"""Validate user-provided filters
|
|
78
|
+
|
|
79
|
+
This function validates that a user-defined object represents valid
|
|
80
|
+
Arvados filters that can be passed to an API client: that it's a list of
|
|
81
|
+
3-element lists with the field name and operator given as strings. If any
|
|
82
|
+
of these conditions are not true, it raises a ValueError with details about
|
|
83
|
+
the problem.
|
|
84
|
+
|
|
85
|
+
It returns validated filters. Currently the provided filters are returned
|
|
86
|
+
unmodified. Future versions of this function may clean up the filters with
|
|
87
|
+
"obvious" type conversions, so callers SHOULD use the returned value for
|
|
88
|
+
Arvados API calls.
|
|
89
|
+
"""
|
|
90
|
+
if not isinstance(filters, list):
|
|
91
|
+
raise ValueError(f"filters are not a list: {filters!r}")
|
|
92
|
+
for index, f in enumerate(filters):
|
|
93
|
+
if isinstance(f, str):
|
|
94
|
+
match = FILTER_STR_RE.fullmatch(f)
|
|
95
|
+
if match is None:
|
|
96
|
+
raise ValueError(f"filter at index {index} has invalid syntax: {f!r}")
|
|
97
|
+
s, op, o = match.groups()
|
|
98
|
+
if s[0].isdigit():
|
|
99
|
+
raise ValueError(f"filter at index {index} has invalid syntax: bad field name {s!r}")
|
|
100
|
+
if o[0].isdigit():
|
|
101
|
+
raise ValueError(f"filter at index {index} has invalid syntax: bad field name {o!r}")
|
|
102
|
+
continue
|
|
103
|
+
elif not isinstance(f, list):
|
|
104
|
+
raise ValueError(f"filter at index {index} is not a string or list: {f!r}")
|
|
105
|
+
try:
|
|
106
|
+
s, op, o = f
|
|
107
|
+
except ValueError:
|
|
108
|
+
raise ValueError(
|
|
109
|
+
f"filter at index {index} does not have three items (field name, operator, operand): {f!r}",
|
|
110
|
+
) from None
|
|
111
|
+
if not isinstance(s, str):
|
|
112
|
+
raise ValueError(f"filter at index {index} field name is not a string: {s!r}")
|
|
113
|
+
if not isinstance(op, str):
|
|
114
|
+
raise ValueError(f"filter at index {index} operator is not a string: {op!r}")
|
|
115
|
+
return filters
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class JSONArgument:
|
|
119
|
+
"""Parse a JSON file from a command line argument string or path
|
|
120
|
+
|
|
121
|
+
JSONArgument objects can be called with a string and return an arbitrary
|
|
122
|
+
object. First it will try to decode the string as JSON. If that fails, it
|
|
123
|
+
will try to open a file at the path named by the string, and decode it as
|
|
124
|
+
JSON. If that fails, it raises ValueError with more detail.
|
|
125
|
+
|
|
126
|
+
This is designed to be used as an argparse argument type.
|
|
127
|
+
Typical usage looks like:
|
|
128
|
+
|
|
129
|
+
parser = argparse.ArgumentParser()
|
|
130
|
+
parser.add_argument('--object', type=JSONArgument(), ...)
|
|
131
|
+
|
|
132
|
+
You can construct JSONArgument with an optional validation function. If
|
|
133
|
+
given, it is called with the object decoded from user input, and its
|
|
134
|
+
return value replaces it. It should raise ValueError if there is a problem
|
|
135
|
+
with the input. (argparse turns ValueError into a useful error message.)
|
|
136
|
+
|
|
137
|
+
filters_type = JSONArgument(validate_filters)
|
|
138
|
+
parser.add_argument('--filters', type=filters_type, ...)
|
|
139
|
+
"""
|
|
140
|
+
def __init__(self, validator=None):
|
|
141
|
+
self.validator = validator
|
|
142
|
+
|
|
143
|
+
def __call__(self, value):
|
|
144
|
+
try:
|
|
145
|
+
retval = json.loads(value)
|
|
146
|
+
except json.JSONDecodeError:
|
|
147
|
+
try:
|
|
148
|
+
with open(value, 'rb') as json_file:
|
|
149
|
+
retval = json.load(json_file)
|
|
150
|
+
except json.JSONDecodeError as error:
|
|
151
|
+
raise ValueError(f"error decoding JSON from file {value!r}: {error}") from None
|
|
152
|
+
except (FileNotFoundError, ValueError):
|
|
153
|
+
raise ValueError(f"not a valid JSON string or file path: {value!r}") from None
|
|
154
|
+
except OSError as error:
|
|
155
|
+
raise ValueError(f"error reading JSON file path {value!r}: {error.strerror}") from None
|
|
156
|
+
if self.validator is not None:
|
|
157
|
+
retval = self.validator(retval)
|
|
158
|
+
return retval
|
|
@@ -2,34 +2,29 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from builtins import next
|
|
6
5
|
import argparse
|
|
7
6
|
import collections
|
|
8
7
|
import datetime
|
|
9
8
|
import errno
|
|
9
|
+
import fcntl
|
|
10
10
|
import json
|
|
11
|
+
import logging
|
|
11
12
|
import os
|
|
12
13
|
import re
|
|
14
|
+
import subprocess
|
|
13
15
|
import sys
|
|
14
16
|
import tarfile
|
|
15
17
|
import tempfile
|
|
16
|
-
|
|
17
|
-
import
|
|
18
|
-
import fcntl
|
|
18
|
+
|
|
19
|
+
import ciso8601
|
|
19
20
|
from operator import itemgetter
|
|
20
21
|
from stat import *
|
|
21
22
|
|
|
22
|
-
import subprocess
|
|
23
|
-
|
|
24
23
|
import arvados
|
|
24
|
+
import arvados.config
|
|
25
25
|
import arvados.util
|
|
26
26
|
import arvados.commands._util as arv_cmd
|
|
27
27
|
import arvados.commands.put as arv_put
|
|
28
|
-
from arvados.collection import CollectionReader
|
|
29
|
-
import ciso8601
|
|
30
|
-
import logging
|
|
31
|
-
import arvados.config
|
|
32
|
-
|
|
33
28
|
from arvados._version import __version__
|
|
34
29
|
|
|
35
30
|
logger = logging.getLogger('arvados.keepdocker')
|
|
@@ -356,6 +351,25 @@ def _uuid2pdh(api, uuid):
|
|
|
356
351
|
select=['portable_data_hash'],
|
|
357
352
|
).execute()['items'][0]['portable_data_hash']
|
|
358
353
|
|
|
354
|
+
def load_image_metadata(image_file):
|
|
355
|
+
"""Load an image manifest and config from an archive
|
|
356
|
+
|
|
357
|
+
Given an image archive as an open binary file object, this function loads
|
|
358
|
+
the image manifest and configuration, deserializing each from JSON and
|
|
359
|
+
returning them in a 2-tuple of dicts.
|
|
360
|
+
"""
|
|
361
|
+
image_file.seek(0)
|
|
362
|
+
with tarfile.open(fileobj=image_file) as image_tar:
|
|
363
|
+
with image_tar.extractfile('manifest.json') as manifest_file:
|
|
364
|
+
image_manifest_list = json.load(manifest_file)
|
|
365
|
+
# Because arv-keepdocker only saves one image, there should only be
|
|
366
|
+
# one manifest. This extracts that from the list and raises
|
|
367
|
+
# ValueError if there's not exactly one.
|
|
368
|
+
image_manifest, = image_manifest_list
|
|
369
|
+
with image_tar.extractfile(image_manifest['Config']) as config_file:
|
|
370
|
+
image_config = json.load(config_file)
|
|
371
|
+
return image_manifest, image_config
|
|
372
|
+
|
|
359
373
|
def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None):
|
|
360
374
|
args = arg_parser.parse_args(arguments)
|
|
361
375
|
if api is None:
|
|
@@ -532,21 +546,9 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
|
|
|
532
546
|
# Managed properties could be already set
|
|
533
547
|
coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {})
|
|
534
548
|
coll_properties.update({"docker-image-repo-tag": image_repo_tag})
|
|
535
|
-
|
|
536
549
|
api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries)
|
|
537
550
|
|
|
538
|
-
|
|
539
|
-
image_file.seek(0)
|
|
540
|
-
image_tar = tarfile.open(fileobj=image_file)
|
|
541
|
-
image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
|
|
542
|
-
if image_hash_type:
|
|
543
|
-
json_filename = raw_image_hash + '.json'
|
|
544
|
-
else:
|
|
545
|
-
json_filename = raw_image_hash + '/json'
|
|
546
|
-
json_file = image_tar.extractfile(image_tar.getmember(json_filename))
|
|
547
|
-
image_metadata = json.loads(json_file.read().decode('utf-8'))
|
|
548
|
-
json_file.close()
|
|
549
|
-
image_tar.close()
|
|
551
|
+
_, image_metadata = load_image_metadata(image_file)
|
|
550
552
|
link_base = {'head_uuid': coll_uuid, 'properties': {}}
|
|
551
553
|
if 'created' in image_metadata:
|
|
552
554
|
link_base['properties']['image_timestamp'] = image_metadata['created']
|
|
@@ -13,6 +13,7 @@ import time
|
|
|
13
13
|
import errno
|
|
14
14
|
import logging
|
|
15
15
|
import weakref
|
|
16
|
+
import collections
|
|
16
17
|
|
|
17
18
|
_logger = logging.getLogger('arvados.keep')
|
|
18
19
|
|
|
@@ -31,6 +32,15 @@ class DiskCacheSlot(object):
|
|
|
31
32
|
|
|
32
33
|
def get(self):
|
|
33
34
|
self.ready.wait()
|
|
35
|
+
# 'content' can None, an empty byte string, or a nonempty mmap
|
|
36
|
+
# region. If it is an mmap region, we want to advise the
|
|
37
|
+
# kernel we're going to use it. This nudges the kernel to
|
|
38
|
+
# re-read most or all of the block if necessary (instead of
|
|
39
|
+
# just a few pages at a time), reducing the number of page
|
|
40
|
+
# faults and improving performance by 4x compared to not
|
|
41
|
+
# calling madvise.
|
|
42
|
+
if self.content:
|
|
43
|
+
self.content.madvise(mmap.MADV_WILLNEED)
|
|
34
44
|
return self.content
|
|
35
45
|
|
|
36
46
|
def set(self, value):
|
|
@@ -39,18 +49,18 @@ class DiskCacheSlot(object):
|
|
|
39
49
|
if value is None:
|
|
40
50
|
self.content = None
|
|
41
51
|
self.ready.set()
|
|
42
|
-
return
|
|
52
|
+
return False
|
|
43
53
|
|
|
44
54
|
if len(value) == 0:
|
|
45
55
|
# Can't mmap a 0 length file
|
|
46
56
|
self.content = b''
|
|
47
57
|
self.ready.set()
|
|
48
|
-
return
|
|
58
|
+
return True
|
|
49
59
|
|
|
50
60
|
if self.content is not None:
|
|
51
61
|
# Has been set already
|
|
52
62
|
self.ready.set()
|
|
53
|
-
return
|
|
63
|
+
return False
|
|
54
64
|
|
|
55
65
|
blockdir = os.path.join(self.cachedir, self.locator[0:3])
|
|
56
66
|
os.makedirs(blockdir, mode=0o700, exist_ok=True)
|
|
@@ -73,6 +83,7 @@ class DiskCacheSlot(object):
|
|
|
73
83
|
self.content = mmap.mmap(self.filehandle.fileno(), 0, access=mmap.ACCESS_READ)
|
|
74
84
|
# only set the event when mmap is successful
|
|
75
85
|
self.ready.set()
|
|
86
|
+
return True
|
|
76
87
|
finally:
|
|
77
88
|
if tmpfile is not None:
|
|
78
89
|
# If the tempfile hasn't been renamed on disk yet, try to delete it.
|
|
@@ -95,65 +106,61 @@ class DiskCacheSlot(object):
|
|
|
95
106
|
return len(self.content)
|
|
96
107
|
|
|
97
108
|
def evict(self):
|
|
98
|
-
if
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
+
if not self.content:
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
# The mmap region might be in use when we decided to evict
|
|
113
|
+
# it. This can happen if the cache is too small.
|
|
114
|
+
#
|
|
115
|
+
# If we call close() now, it'll throw an error if
|
|
116
|
+
# something tries to access it.
|
|
117
|
+
#
|
|
118
|
+
# However, we don't need to explicitly call mmap.close()
|
|
119
|
+
#
|
|
120
|
+
# I confirmed in mmapmodule.c that that both close
|
|
121
|
+
# and deallocate do the same thing:
|
|
122
|
+
#
|
|
123
|
+
# a) close the file descriptor
|
|
124
|
+
# b) unmap the memory range
|
|
125
|
+
#
|
|
126
|
+
# So we can forget it in the cache and delete the file on
|
|
127
|
+
# disk, and it will tear it down after any other
|
|
128
|
+
# lingering Python references to the mapped memory are
|
|
129
|
+
# gone.
|
|
130
|
+
|
|
131
|
+
blockdir = os.path.join(self.cachedir, self.locator[0:3])
|
|
132
|
+
final = os.path.join(blockdir, self.locator) + cacheblock_suffix
|
|
133
|
+
try:
|
|
134
|
+
fcntl.flock(self.filehandle, fcntl.LOCK_UN)
|
|
135
|
+
|
|
136
|
+
# try to get an exclusive lock, this ensures other
|
|
137
|
+
# processes are not using the block. It is
|
|
138
|
+
# nonblocking and will throw an exception if we
|
|
139
|
+
# can't get it, which is fine because that means
|
|
140
|
+
# we just won't try to delete it.
|
|
109
141
|
#
|
|
110
|
-
#
|
|
111
|
-
#
|
|
142
|
+
# I should note here, the file locking is not
|
|
143
|
+
# strictly necessary, we could just remove it and
|
|
144
|
+
# the kernel would ensure that the underlying
|
|
145
|
+
# inode remains available as long as other
|
|
146
|
+
# processes still have the file open. However, if
|
|
147
|
+
# you have multiple processes sharing the cache
|
|
148
|
+
# and deleting each other's files, you'll end up
|
|
149
|
+
# with a bunch of ghost files that don't show up
|
|
150
|
+
# in the file system but are still taking up
|
|
151
|
+
# space, which isn't particularly user friendly.
|
|
152
|
+
# The locking strategy ensures that cache blocks
|
|
153
|
+
# in use remain visible.
|
|
112
154
|
#
|
|
113
|
-
|
|
114
|
-
# disk, and it will tear it down after any other
|
|
115
|
-
# lingering Python references to the mapped memory are
|
|
116
|
-
# gone.
|
|
117
|
-
|
|
118
|
-
blockdir = os.path.join(self.cachedir, self.locator[0:3])
|
|
119
|
-
final = os.path.join(blockdir, self.locator) + cacheblock_suffix
|
|
120
|
-
try:
|
|
121
|
-
fcntl.flock(self.filehandle, fcntl.LOCK_UN)
|
|
122
|
-
|
|
123
|
-
# try to get an exclusive lock, this ensures other
|
|
124
|
-
# processes are not using the block. It is
|
|
125
|
-
# nonblocking and will throw an exception if we
|
|
126
|
-
# can't get it, which is fine because that means
|
|
127
|
-
# we just won't try to delete it.
|
|
128
|
-
#
|
|
129
|
-
# I should note here, the file locking is not
|
|
130
|
-
# strictly necessary, we could just remove it and
|
|
131
|
-
# the kernel would ensure that the underlying
|
|
132
|
-
# inode remains available as long as other
|
|
133
|
-
# processes still have the file open. However, if
|
|
134
|
-
# you have multiple processes sharing the cache
|
|
135
|
-
# and deleting each other's files, you'll end up
|
|
136
|
-
# with a bunch of ghost files that don't show up
|
|
137
|
-
# in the file system but are still taking up
|
|
138
|
-
# space, which isn't particularly user friendly.
|
|
139
|
-
# The locking strategy ensures that cache blocks
|
|
140
|
-
# in use remain visible.
|
|
141
|
-
#
|
|
142
|
-
fcntl.flock(self.filehandle, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
143
|
-
|
|
144
|
-
os.remove(final)
|
|
145
|
-
return True
|
|
146
|
-
except OSError:
|
|
147
|
-
pass
|
|
148
|
-
finally:
|
|
149
|
-
self.filehandle = None
|
|
150
|
-
self.linger = weakref.ref(self.content)
|
|
151
|
-
self.content = None
|
|
152
|
-
return False
|
|
155
|
+
fcntl.flock(self.filehandle, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
153
156
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
+
os.remove(final)
|
|
158
|
+
return True
|
|
159
|
+
except OSError:
|
|
160
|
+
pass
|
|
161
|
+
finally:
|
|
162
|
+
self.filehandle = None
|
|
163
|
+
self.content = None
|
|
157
164
|
|
|
158
165
|
@staticmethod
|
|
159
166
|
def get_from_disk(locator, cachedir):
|
|
@@ -237,13 +244,13 @@ class DiskCacheSlot(object):
|
|
|
237
244
|
|
|
238
245
|
# Map in all the files we found, up to maxslots, if we exceed
|
|
239
246
|
# maxslots, start throwing things out.
|
|
240
|
-
cachelist =
|
|
247
|
+
cachelist: collections.OrderedDict = collections.OrderedDict()
|
|
241
248
|
for b in blocks:
|
|
242
249
|
got = DiskCacheSlot.get_from_disk(b[0], cachedir)
|
|
243
250
|
if got is None:
|
|
244
251
|
continue
|
|
245
252
|
if len(cachelist) < maxslots:
|
|
246
|
-
cachelist.
|
|
253
|
+
cachelist[got.locator] = got
|
|
247
254
|
else:
|
|
248
255
|
# we found more blocks than maxslots, try to
|
|
249
256
|
# throw it out of the cache.
|