arvados-python-client 2.7.1__tar.gz → 2.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {arvados-python-client-2.7.1/arvados_python_client.egg-info → arvados-python-client-2.7.2}/PKG-INFO +1 -1
  2. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/__init__.py +14 -5
  3. arvados-python-client-2.7.2/arvados/_version.py +1 -0
  4. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/arvfile.py +33 -10
  5. arvados-python-client-2.7.2/arvados/commands/_util.py +158 -0
  6. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/keepdocker.py +26 -24
  7. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/diskcache.py +68 -61
  8. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/keep.py +44 -46
  9. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2/arvados_python_client.egg-info}/PKG-INFO +1 -1
  10. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/SOURCES.txt +2 -0
  11. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_keepdocker.py +32 -4
  12. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arvfile.py +1 -0
  13. arvados-python-client-2.7.2/tests/test_cmd_util.py +194 -0
  14. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_keep_client.py +36 -130
  15. arvados-python-client-2.7.2/tests/test_storage_classes.py +128 -0
  16. arvados-python-client-2.7.1/arvados/_version.py +0 -1
  17. arvados-python-client-2.7.1/arvados/commands/_util.py +0 -65
  18. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/LICENSE-2.0.txt +0 -0
  19. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/MANIFEST.in +0 -0
  20. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/README.rst +0 -0
  21. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/_normalize_stream.py +0 -0
  22. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/_pycurlhelper.py +0 -0
  23. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/_ranges.py +0 -0
  24. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/api.py +0 -0
  25. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/cache.py +0 -0
  26. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/collection.py +0 -0
  27. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/__init__.py +0 -0
  28. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/arv_copy.py +0 -0
  29. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/federation_migrate.py +0 -0
  30. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/get.py +0 -0
  31. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/ls.py +0 -0
  32. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/migrate19.py +0 -0
  33. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/put.py +0 -0
  34. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/run.py +0 -0
  35. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/commands/ws.py +0 -0
  36. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/config.py +0 -0
  37. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/crunch.py +0 -0
  38. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/errors.py +0 -0
  39. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/events.py +0 -0
  40. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/http_to_keep.py +0 -0
  41. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/logging.py +0 -0
  42. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/retry.py +0 -0
  43. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/safeapi.py +0 -0
  44. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/stream.py +0 -0
  45. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/timer.py +0 -0
  46. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/util.py +0 -0
  47. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados/vocabulary.py +0 -0
  48. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados-v1-discovery.json +0 -0
  49. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/dependency_links.txt +0 -0
  50. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/not-zip-safe +0 -0
  51. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/requires.txt +0 -0
  52. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_python_client.egg-info/top_level.txt +0 -0
  53. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/arvados_version.py +0 -0
  54. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-copy +0 -0
  55. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-federation-migrate +0 -0
  56. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-get +0 -0
  57. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-keepdocker +0 -0
  58. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-ls +0 -0
  59. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-migrate-docker19 +0 -0
  60. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-normalize +0 -0
  61. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-put +0 -0
  62. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/bin/arv-ws +0 -0
  63. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/discovery2pydoc.py +0 -0
  64. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/setup.cfg +0 -0
  65. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/setup.py +0 -0
  66. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/__init__.py +0 -0
  67. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/arvados_testutil.py +0 -0
  68. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/keepstub.py +0 -0
  69. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/manifest_examples.py +0 -0
  70. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/performance/__init__.py +0 -0
  71. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/performance/performance_profiler.py +0 -0
  72. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/performance/test_a_sample.py +0 -0
  73. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/run_test_server.py +0 -0
  74. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/slow_test.py +0 -0
  75. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_api.py +0 -0
  76. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_copy.py +0 -0
  77. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_get.py +0 -0
  78. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_ls.py +0 -0
  79. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_normalize.py +0 -0
  80. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_put.py +0 -0
  81. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_arv_ws.py +0 -0
  82. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_benchmark_collections.py +0 -0
  83. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_cache.py +0 -0
  84. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_collections.py +0 -0
  85. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_crunch.py +0 -0
  86. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_errors.py +0 -0
  87. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_events.py +0 -0
  88. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_http.py +0 -0
  89. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_keep_locator.py +0 -0
  90. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_retry.py +0 -0
  91. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_retry_job_helpers.py +0 -0
  92. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_safeapi.py +0 -0
  93. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_sdk.py +0 -0
  94. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_stream.py +0 -0
  95. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_util.py +0 -0
  96. {arvados-python-client-2.7.1 → arvados-python-client-2.7.2}/tests/test_vocabulary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.1
2
2
  Name: arvados-python-client
3
- Version: 2.7.1
3
+ Version: 2.7.2
4
4
  Summary: Arvados client library
5
5
  Home-page: https://arvados.org
6
6
  Author: Arvados
@@ -6,8 +6,8 @@
6
6
  This module provides the entire Python SDK for Arvados. The most useful modules
7
7
  include:
8
8
 
9
- * arvados.api - After you `import arvados`, you can call `arvados.api.api` as
10
- `arvados.api` to construct a client object.
9
+ * arvados.api - After you `import arvados`, you can call `arvados.api` as a
10
+ shortcut to the client constructor function `arvados.api.api`.
11
11
 
12
12
  * arvados.collection - The `arvados.collection.Collection` class provides a
13
13
  high-level interface to read and write collections. It coordinates sending
@@ -26,15 +26,24 @@ import types
26
26
 
27
27
  from collections import UserDict
28
28
 
29
- from .api import api, api_from_config, http_cache
29
+ from . import api, errors, util
30
+ from .api import api_from_config, http_cache
30
31
  from .collection import CollectionReader, CollectionWriter, ResumableCollectionWriter
31
32
  from arvados.keep import *
32
33
  from arvados.stream import *
33
34
  from .arvfile import StreamFileReader
34
35
  from .logging import log_format, log_date_format, log_handler
35
36
  from .retry import RetryLoop
36
- import arvados.errors as errors
37
- import arvados.util as util
37
+
38
+ # Previous versions of the PySDK used to say `from .api import api`. This
39
+ # made it convenient to call the API client constructor, but difficult to
40
+ # access the rest of the `arvados.api` module. The magic below fixes that
41
+ # bug while retaining backwards compatibility: `arvados.api` is now the
42
+ # module and you can import it normally, but we make that module callable so
43
+ # all the existing code that says `arvados.api('v1', ...)` still works.
44
+ class _CallableAPIModule(api.__class__):
45
+ __call__ = staticmethod(api.api)
46
+ api.__class__ = _CallableAPIModule
38
47
 
39
48
  # Override logging module pulled in via `from ... import *`
40
49
  # so users can `import arvados.logging`.
@@ -0,0 +1 @@
1
+ __version__ = '2.7.2'
@@ -491,7 +491,7 @@ class _BlockManager(object):
491
491
  self._put_queue = None
492
492
  self._put_threads = None
493
493
  self.lock = threading.Lock()
494
- self.prefetch_enabled = True
494
+ self.prefetch_lookahead = self._keep.num_prefetch_threads
495
495
  self.num_put_threads = put_threads or _BlockManager.DEFAULT_PUT_THREADS
496
496
  self.copies = copies
497
497
  self.storage_classes = storage_classes_func or (lambda: [])
@@ -803,7 +803,7 @@ class _BlockManager(object):
803
803
  """Initiate a background download of a block.
804
804
  """
805
805
 
806
- if not self.prefetch_enabled:
806
+ if not self.prefetch_lookahead:
807
807
  return
808
808
 
809
809
  with self.lock:
@@ -825,7 +825,7 @@ class ArvadosFile(object):
825
825
  """
826
826
 
827
827
  __slots__ = ('parent', 'name', '_writers', '_committed',
828
- '_segments', 'lock', '_current_bblock', 'fuse_entry')
828
+ '_segments', 'lock', '_current_bblock', 'fuse_entry', '_read_counter')
829
829
 
830
830
  def __init__(self, parent, name, stream=[], segments=[]):
831
831
  """
@@ -846,6 +846,7 @@ class ArvadosFile(object):
846
846
  for s in segments:
847
847
  self._add_segment(stream, s.locator, s.range_size)
848
848
  self._current_bblock = None
849
+ self._read_counter = 0
849
850
 
850
851
  def writable(self):
851
852
  return self.parent.writable()
@@ -1060,7 +1061,25 @@ class ArvadosFile(object):
1060
1061
  if size == 0 or offset >= self.size():
1061
1062
  return b''
1062
1063
  readsegs = locators_and_ranges(self._segments, offset, size)
1063
- prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE * self.parent._my_block_manager()._keep.num_prefetch_threads, limit=32)
1064
+
1065
+ prefetch = None
1066
+ prefetch_lookahead = self.parent._my_block_manager().prefetch_lookahead
1067
+ if prefetch_lookahead:
1068
+ # Doing prefetch on every read() call is surprisingly expensive
1069
+ # when we're trying to deliver data at 600+ MiBps and want
1070
+ # the read() fast path to be as lightweight as possible.
1071
+ #
1072
+ # Only prefetching every 128 read operations
1073
+ # dramatically reduces the overhead while still
1074
+ # getting the benefit of prefetching (e.g. when
1075
+ # reading 128 KiB at a time, it checks for prefetch
1076
+ # every 16 MiB).
1077
+ self._read_counter = (self._read_counter+1) % 128
1078
+ if self._read_counter == 1:
1079
+ prefetch = locators_and_ranges(self._segments,
1080
+ offset + size,
1081
+ config.KEEP_BLOCK_SIZE * prefetch_lookahead,
1082
+ limit=(1+prefetch_lookahead))
1064
1083
 
1065
1084
  locs = set()
1066
1085
  data = []
@@ -1068,17 +1087,21 @@ class ArvadosFile(object):
1068
1087
  block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
1069
1088
  if block:
1070
1089
  blockview = memoryview(block)
1071
- data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size].tobytes())
1090
+ data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size])
1072
1091
  locs.add(lr.locator)
1073
1092
  else:
1074
1093
  break
1075
1094
 
1076
- for lr in prefetch:
1077
- if lr.locator not in locs:
1078
- self.parent._my_block_manager().block_prefetch(lr.locator)
1079
- locs.add(lr.locator)
1095
+ if prefetch:
1096
+ for lr in prefetch:
1097
+ if lr.locator not in locs:
1098
+ self.parent._my_block_manager().block_prefetch(lr.locator)
1099
+ locs.add(lr.locator)
1080
1100
 
1081
- return b''.join(data)
1101
+ if len(data) == 1:
1102
+ return data[0]
1103
+ else:
1104
+ return b''.join(data)
1082
1105
 
1083
1106
  @must_be_writable
1084
1107
  @synchronized
@@ -0,0 +1,158 @@
1
+ # Copyright (C) The Arvados Authors. All rights reserved.
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import argparse
6
+ import errno
7
+ import json
8
+ import logging
9
+ import os
10
+ import re
11
+ import signal
12
+ import sys
13
+
14
+ FILTER_STR_RE = re.compile(r'''
15
+ ^\(
16
+ \ *(\w+)
17
+ \ *(<|<=|=|>=|>)
18
+ \ *(\w+)
19
+ \ *\)$
20
+ ''', re.ASCII | re.VERBOSE)
21
+
22
+ def _pos_int(s):
23
+ num = int(s)
24
+ if num < 0:
25
+ raise ValueError("can't accept negative value: %s" % (num,))
26
+ return num
27
+
28
+ retry_opt = argparse.ArgumentParser(add_help=False)
29
+ retry_opt.add_argument('--retries', type=_pos_int, default=10, help="""
30
+ Maximum number of times to retry server requests that encounter temporary
31
+ failures (e.g., server down). Default 10.""")
32
+
33
+ def _ignore_error(error):
34
+ return None
35
+
36
+ def _raise_error(error):
37
+ raise error
38
+
39
+ def make_home_conf_dir(path, mode=None, errors='ignore'):
40
+ # Make the directory path under the user's home directory, making parent
41
+ # directories as needed.
42
+ # If the directory is newly created, and a mode is specified, chmod it
43
+ # with those permissions.
44
+ # If there's an error, return None if errors is 'ignore', else raise an
45
+ # exception.
46
+ error_handler = _ignore_error if (errors == 'ignore') else _raise_error
47
+ tilde_path = os.path.join('~', path)
48
+ abs_path = os.path.expanduser(tilde_path)
49
+ if abs_path == tilde_path:
50
+ return error_handler(ValueError("no home directory available"))
51
+ try:
52
+ os.makedirs(abs_path)
53
+ except OSError as error:
54
+ if error.errno != errno.EEXIST:
55
+ return error_handler(error)
56
+ else:
57
+ if mode is not None:
58
+ os.chmod(abs_path, mode)
59
+ return abs_path
60
+
61
+ CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
62
+
63
+ def exit_signal_handler(sigcode, frame):
64
+ logging.getLogger('arvados').error("Caught signal {}, exiting.".format(sigcode))
65
+ sys.exit(-sigcode)
66
+
67
+ def install_signal_handlers():
68
+ global orig_signal_handlers
69
+ orig_signal_handlers = {sigcode: signal.signal(sigcode, exit_signal_handler)
70
+ for sigcode in CAUGHT_SIGNALS}
71
+
72
+ def restore_signal_handlers():
73
+ for sigcode, orig_handler in orig_signal_handlers.items():
74
+ signal.signal(sigcode, orig_handler)
75
+
76
+ def validate_filters(filters):
77
+ """Validate user-provided filters
78
+
79
+ This function validates that a user-defined object represents valid
80
+ Arvados filters that can be passed to an API client: that it's a list of
81
+ 3-element lists with the field name and operator given as strings. If any
82
+ of these conditions are not true, it raises a ValueError with details about
83
+ the problem.
84
+
85
+ It returns validated filters. Currently the provided filters are returned
86
+ unmodified. Future versions of this function may clean up the filters with
87
+ "obvious" type conversions, so callers SHOULD use the returned value for
88
+ Arvados API calls.
89
+ """
90
+ if not isinstance(filters, list):
91
+ raise ValueError(f"filters are not a list: {filters!r}")
92
+ for index, f in enumerate(filters):
93
+ if isinstance(f, str):
94
+ match = FILTER_STR_RE.fullmatch(f)
95
+ if match is None:
96
+ raise ValueError(f"filter at index {index} has invalid syntax: {f!r}")
97
+ s, op, o = match.groups()
98
+ if s[0].isdigit():
99
+ raise ValueError(f"filter at index {index} has invalid syntax: bad field name {s!r}")
100
+ if o[0].isdigit():
101
+ raise ValueError(f"filter at index {index} has invalid syntax: bad field name {o!r}")
102
+ continue
103
+ elif not isinstance(f, list):
104
+ raise ValueError(f"filter at index {index} is not a string or list: {f!r}")
105
+ try:
106
+ s, op, o = f
107
+ except ValueError:
108
+ raise ValueError(
109
+ f"filter at index {index} does not have three items (field name, operator, operand): {f!r}",
110
+ ) from None
111
+ if not isinstance(s, str):
112
+ raise ValueError(f"filter at index {index} field name is not a string: {s!r}")
113
+ if not isinstance(op, str):
114
+ raise ValueError(f"filter at index {index} operator is not a string: {op!r}")
115
+ return filters
116
+
117
+
118
+ class JSONArgument:
119
+ """Parse a JSON file from a command line argument string or path
120
+
121
+ JSONArgument objects can be called with a string and return an arbitrary
122
+ object. First it will try to decode the string as JSON. If that fails, it
123
+ will try to open a file at the path named by the string, and decode it as
124
+ JSON. If that fails, it raises ValueError with more detail.
125
+
126
+ This is designed to be used as an argparse argument type.
127
+ Typical usage looks like:
128
+
129
+ parser = argparse.ArgumentParser()
130
+ parser.add_argument('--object', type=JSONArgument(), ...)
131
+
132
+ You can construct JSONArgument with an optional validation function. If
133
+ given, it is called with the object decoded from user input, and its
134
+ return value replaces it. It should raise ValueError if there is a problem
135
+ with the input. (argparse turns ValueError into a useful error message.)
136
+
137
+ filters_type = JSONArgument(validate_filters)
138
+ parser.add_argument('--filters', type=filters_type, ...)
139
+ """
140
+ def __init__(self, validator=None):
141
+ self.validator = validator
142
+
143
+ def __call__(self, value):
144
+ try:
145
+ retval = json.loads(value)
146
+ except json.JSONDecodeError:
147
+ try:
148
+ with open(value, 'rb') as json_file:
149
+ retval = json.load(json_file)
150
+ except json.JSONDecodeError as error:
151
+ raise ValueError(f"error decoding JSON from file {value!r}: {error}") from None
152
+ except (FileNotFoundError, ValueError):
153
+ raise ValueError(f"not a valid JSON string or file path: {value!r}") from None
154
+ except OSError as error:
155
+ raise ValueError(f"error reading JSON file path {value!r}: {error.strerror}") from None
156
+ if self.validator is not None:
157
+ retval = self.validator(retval)
158
+ return retval
@@ -2,34 +2,29 @@
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
- from builtins import next
6
5
  import argparse
7
6
  import collections
8
7
  import datetime
9
8
  import errno
9
+ import fcntl
10
10
  import json
11
+ import logging
11
12
  import os
12
13
  import re
14
+ import subprocess
13
15
  import sys
14
16
  import tarfile
15
17
  import tempfile
16
- import shutil
17
- import _strptime
18
- import fcntl
18
+
19
+ import ciso8601
19
20
  from operator import itemgetter
20
21
  from stat import *
21
22
 
22
- import subprocess
23
-
24
23
  import arvados
24
+ import arvados.config
25
25
  import arvados.util
26
26
  import arvados.commands._util as arv_cmd
27
27
  import arvados.commands.put as arv_put
28
- from arvados.collection import CollectionReader
29
- import ciso8601
30
- import logging
31
- import arvados.config
32
-
33
28
  from arvados._version import __version__
34
29
 
35
30
  logger = logging.getLogger('arvados.keepdocker')
@@ -356,6 +351,25 @@ def _uuid2pdh(api, uuid):
356
351
  select=['portable_data_hash'],
357
352
  ).execute()['items'][0]['portable_data_hash']
358
353
 
354
+ def load_image_metadata(image_file):
355
+ """Load an image manifest and config from an archive
356
+
357
+ Given an image archive as an open binary file object, this function loads
358
+ the image manifest and configuration, deserializing each from JSON and
359
+ returning them in a 2-tuple of dicts.
360
+ """
361
+ image_file.seek(0)
362
+ with tarfile.open(fileobj=image_file) as image_tar:
363
+ with image_tar.extractfile('manifest.json') as manifest_file:
364
+ image_manifest_list = json.load(manifest_file)
365
+ # Because arv-keepdocker only saves one image, there should only be
366
+ # one manifest. This extracts that from the list and raises
367
+ # ValueError if there's not exactly one.
368
+ image_manifest, = image_manifest_list
369
+ with image_tar.extractfile(image_manifest['Config']) as config_file:
370
+ image_config = json.load(config_file)
371
+ return image_manifest, image_config
372
+
359
373
  def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None):
360
374
  args = arg_parser.parse_args(arguments)
361
375
  if api is None:
@@ -532,21 +546,9 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
532
546
  # Managed properties could be already set
533
547
  coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {})
534
548
  coll_properties.update({"docker-image-repo-tag": image_repo_tag})
535
-
536
549
  api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries)
537
550
 
538
- # Read the image metadata and make Arvados links from it.
539
- image_file.seek(0)
540
- image_tar = tarfile.open(fileobj=image_file)
541
- image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
542
- if image_hash_type:
543
- json_filename = raw_image_hash + '.json'
544
- else:
545
- json_filename = raw_image_hash + '/json'
546
- json_file = image_tar.extractfile(image_tar.getmember(json_filename))
547
- image_metadata = json.loads(json_file.read().decode('utf-8'))
548
- json_file.close()
549
- image_tar.close()
551
+ _, image_metadata = load_image_metadata(image_file)
550
552
  link_base = {'head_uuid': coll_uuid, 'properties': {}}
551
553
  if 'created' in image_metadata:
552
554
  link_base['properties']['image_timestamp'] = image_metadata['created']
@@ -13,6 +13,7 @@ import time
13
13
  import errno
14
14
  import logging
15
15
  import weakref
16
+ import collections
16
17
 
17
18
  _logger = logging.getLogger('arvados.keep')
18
19
 
@@ -31,6 +32,15 @@ class DiskCacheSlot(object):
31
32
 
32
33
  def get(self):
33
34
  self.ready.wait()
35
+ # 'content' can None, an empty byte string, or a nonempty mmap
36
+ # region. If it is an mmap region, we want to advise the
37
+ # kernel we're going to use it. This nudges the kernel to
38
+ # re-read most or all of the block if necessary (instead of
39
+ # just a few pages at a time), reducing the number of page
40
+ # faults and improving performance by 4x compared to not
41
+ # calling madvise.
42
+ if self.content:
43
+ self.content.madvise(mmap.MADV_WILLNEED)
34
44
  return self.content
35
45
 
36
46
  def set(self, value):
@@ -39,18 +49,18 @@ class DiskCacheSlot(object):
39
49
  if value is None:
40
50
  self.content = None
41
51
  self.ready.set()
42
- return
52
+ return False
43
53
 
44
54
  if len(value) == 0:
45
55
  # Can't mmap a 0 length file
46
56
  self.content = b''
47
57
  self.ready.set()
48
- return
58
+ return True
49
59
 
50
60
  if self.content is not None:
51
61
  # Has been set already
52
62
  self.ready.set()
53
- return
63
+ return False
54
64
 
55
65
  blockdir = os.path.join(self.cachedir, self.locator[0:3])
56
66
  os.makedirs(blockdir, mode=0o700, exist_ok=True)
@@ -73,6 +83,7 @@ class DiskCacheSlot(object):
73
83
  self.content = mmap.mmap(self.filehandle.fileno(), 0, access=mmap.ACCESS_READ)
74
84
  # only set the event when mmap is successful
75
85
  self.ready.set()
86
+ return True
76
87
  finally:
77
88
  if tmpfile is not None:
78
89
  # If the tempfile hasn't been renamed on disk yet, try to delete it.
@@ -95,65 +106,61 @@ class DiskCacheSlot(object):
95
106
  return len(self.content)
96
107
 
97
108
  def evict(self):
98
- if self.content is not None and len(self.content) > 0:
99
- # The mmap region might be in use when we decided to evict
100
- # it. This can happen if the cache is too small.
101
- #
102
- # If we call close() now, it'll throw an error if
103
- # something tries to access it.
104
- #
105
- # However, we don't need to explicitly call mmap.close()
106
- #
107
- # I confirmed in mmapmodule.c that that both close
108
- # and deallocate do the same thing:
109
+ if not self.content:
110
+ return
111
+
112
+ # The mmap region might be in use when we decided to evict
113
+ # it. This can happen if the cache is too small.
114
+ #
115
+ # If we call close() now, it'll throw an error if
116
+ # something tries to access it.
117
+ #
118
+ # However, we don't need to explicitly call mmap.close()
119
+ #
120
+ # I confirmed in mmapmodule.c that that both close
121
+ # and deallocate do the same thing:
122
+ #
123
+ # a) close the file descriptor
124
+ # b) unmap the memory range
125
+ #
126
+ # So we can forget it in the cache and delete the file on
127
+ # disk, and it will tear it down after any other
128
+ # lingering Python references to the mapped memory are
129
+ # gone.
130
+
131
+ blockdir = os.path.join(self.cachedir, self.locator[0:3])
132
+ final = os.path.join(blockdir, self.locator) + cacheblock_suffix
133
+ try:
134
+ fcntl.flock(self.filehandle, fcntl.LOCK_UN)
135
+
136
+ # try to get an exclusive lock, this ensures other
137
+ # processes are not using the block. It is
138
+ # nonblocking and will throw an exception if we
139
+ # can't get it, which is fine because that means
140
+ # we just won't try to delete it.
109
141
  #
110
- # a) close the file descriptor
111
- # b) unmap the memory range
142
+ # I should note here, the file locking is not
143
+ # strictly necessary, we could just remove it and
144
+ # the kernel would ensure that the underlying
145
+ # inode remains available as long as other
146
+ # processes still have the file open. However, if
147
+ # you have multiple processes sharing the cache
148
+ # and deleting each other's files, you'll end up
149
+ # with a bunch of ghost files that don't show up
150
+ # in the file system but are still taking up
151
+ # space, which isn't particularly user friendly.
152
+ # The locking strategy ensures that cache blocks
153
+ # in use remain visible.
112
154
  #
113
- # So we can forget it in the cache and delete the file on
114
- # disk, and it will tear it down after any other
115
- # lingering Python references to the mapped memory are
116
- # gone.
117
-
118
- blockdir = os.path.join(self.cachedir, self.locator[0:3])
119
- final = os.path.join(blockdir, self.locator) + cacheblock_suffix
120
- try:
121
- fcntl.flock(self.filehandle, fcntl.LOCK_UN)
122
-
123
- # try to get an exclusive lock, this ensures other
124
- # processes are not using the block. It is
125
- # nonblocking and will throw an exception if we
126
- # can't get it, which is fine because that means
127
- # we just won't try to delete it.
128
- #
129
- # I should note here, the file locking is not
130
- # strictly necessary, we could just remove it and
131
- # the kernel would ensure that the underlying
132
- # inode remains available as long as other
133
- # processes still have the file open. However, if
134
- # you have multiple processes sharing the cache
135
- # and deleting each other's files, you'll end up
136
- # with a bunch of ghost files that don't show up
137
- # in the file system but are still taking up
138
- # space, which isn't particularly user friendly.
139
- # The locking strategy ensures that cache blocks
140
- # in use remain visible.
141
- #
142
- fcntl.flock(self.filehandle, fcntl.LOCK_EX | fcntl.LOCK_NB)
143
-
144
- os.remove(final)
145
- return True
146
- except OSError:
147
- pass
148
- finally:
149
- self.filehandle = None
150
- self.linger = weakref.ref(self.content)
151
- self.content = None
152
- return False
155
+ fcntl.flock(self.filehandle, fcntl.LOCK_EX | fcntl.LOCK_NB)
153
156
 
154
- def gone(self):
155
- # Test if an evicted object is lingering
156
- return self.content is None and (self.linger is None or self.linger() is None)
157
+ os.remove(final)
158
+ return True
159
+ except OSError:
160
+ pass
161
+ finally:
162
+ self.filehandle = None
163
+ self.content = None
157
164
 
158
165
  @staticmethod
159
166
  def get_from_disk(locator, cachedir):
@@ -237,13 +244,13 @@ class DiskCacheSlot(object):
237
244
 
238
245
  # Map in all the files we found, up to maxslots, if we exceed
239
246
  # maxslots, start throwing things out.
240
- cachelist = []
247
+ cachelist: collections.OrderedDict = collections.OrderedDict()
241
248
  for b in blocks:
242
249
  got = DiskCacheSlot.get_from_disk(b[0], cachedir)
243
250
  if got is None:
244
251
  continue
245
252
  if len(cachelist) < maxslots:
246
- cachelist.append(got)
253
+ cachelist[got.locator] = got
247
254
  else:
248
255
  # we found more blocks than maxslots, try to
249
256
  # throw it out of the cache.