arvados-python-client 2.7.1__tar.gz → 2.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {arvados-python-client-2.7.1/arvados_python_client.egg-info → arvados-python-client-2.7.3}/PKG-INFO +1 -1
  2. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/__init__.py +14 -5
  3. arvados-python-client-2.7.3/arvados/_version.py +1 -0
  4. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/arvfile.py +75 -26
  5. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/collection.py +2 -1
  6. arvados-python-client-2.7.3/arvados/commands/_util.py +158 -0
  7. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/keepdocker.py +26 -24
  8. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/diskcache.py +68 -61
  9. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/keep.py +44 -46
  10. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3/arvados_python_client.egg-info}/PKG-INFO +1 -1
  11. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/SOURCES.txt +2 -0
  12. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_keepdocker.py +32 -4
  13. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arvfile.py +1 -0
  14. arvados-python-client-2.7.3/tests/test_cmd_util.py +194 -0
  15. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_keep_client.py +36 -130
  16. arvados-python-client-2.7.3/tests/test_storage_classes.py +128 -0
  17. arvados-python-client-2.7.1/arvados/_version.py +0 -1
  18. arvados-python-client-2.7.1/arvados/commands/_util.py +0 -65
  19. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/LICENSE-2.0.txt +0 -0
  20. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/MANIFEST.in +0 -0
  21. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/README.rst +0 -0
  22. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/_normalize_stream.py +0 -0
  23. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/_pycurlhelper.py +0 -0
  24. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/_ranges.py +0 -0
  25. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/api.py +0 -0
  26. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/cache.py +0 -0
  27. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/__init__.py +0 -0
  28. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/arv_copy.py +0 -0
  29. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/federation_migrate.py +0 -0
  30. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/get.py +0 -0
  31. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/ls.py +0 -0
  32. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/migrate19.py +0 -0
  33. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/put.py +0 -0
  34. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/run.py +0 -0
  35. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/commands/ws.py +0 -0
  36. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/config.py +0 -0
  37. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/crunch.py +0 -0
  38. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/errors.py +0 -0
  39. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/events.py +0 -0
  40. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/http_to_keep.py +0 -0
  41. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/logging.py +0 -0
  42. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/retry.py +0 -0
  43. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/safeapi.py +0 -0
  44. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/stream.py +0 -0
  45. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/timer.py +0 -0
  46. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/util.py +0 -0
  47. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados/vocabulary.py +0 -0
  48. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados-v1-discovery.json +0 -0
  49. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/dependency_links.txt +0 -0
  50. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/not-zip-safe +0 -0
  51. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/requires.txt +0 -0
  52. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_python_client.egg-info/top_level.txt +0 -0
  53. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/arvados_version.py +0 -0
  54. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-copy +0 -0
  55. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-federation-migrate +0 -0
  56. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-get +0 -0
  57. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-keepdocker +0 -0
  58. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-ls +0 -0
  59. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-migrate-docker19 +0 -0
  60. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-normalize +0 -0
  61. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-put +0 -0
  62. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/bin/arv-ws +0 -0
  63. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/discovery2pydoc.py +0 -0
  64. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/setup.cfg +0 -0
  65. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/setup.py +0 -0
  66. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/__init__.py +0 -0
  67. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/arvados_testutil.py +0 -0
  68. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/keepstub.py +0 -0
  69. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/manifest_examples.py +0 -0
  70. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/performance/__init__.py +0 -0
  71. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/performance/performance_profiler.py +0 -0
  72. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/performance/test_a_sample.py +0 -0
  73. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/run_test_server.py +0 -0
  74. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/slow_test.py +0 -0
  75. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_api.py +0 -0
  76. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_copy.py +0 -0
  77. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_get.py +0 -0
  78. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_ls.py +0 -0
  79. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_normalize.py +0 -0
  80. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_put.py +0 -0
  81. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_arv_ws.py +0 -0
  82. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_benchmark_collections.py +0 -0
  83. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_cache.py +0 -0
  84. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_collections.py +0 -0
  85. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_crunch.py +0 -0
  86. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_errors.py +0 -0
  87. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_events.py +0 -0
  88. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_http.py +0 -0
  89. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_keep_locator.py +0 -0
  90. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_retry.py +0 -0
  91. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_retry_job_helpers.py +0 -0
  92. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_safeapi.py +0 -0
  93. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_sdk.py +0 -0
  94. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_stream.py +0 -0
  95. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_util.py +0 -0
  96. {arvados-python-client-2.7.1 → arvados-python-client-2.7.3}/tests/test_vocabulary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.1
2
2
  Name: arvados-python-client
3
- Version: 2.7.1
3
+ Version: 2.7.3
4
4
  Summary: Arvados client library
5
5
  Home-page: https://arvados.org
6
6
  Author: Arvados
@@ -6,8 +6,8 @@
6
6
  This module provides the entire Python SDK for Arvados. The most useful modules
7
7
  include:
8
8
 
9
- * arvados.api - After you `import arvados`, you can call `arvados.api.api` as
10
- `arvados.api` to construct a client object.
9
+ * arvados.api - After you `import arvados`, you can call `arvados.api` as a
10
+ shortcut to the client constructor function `arvados.api.api`.
11
11
 
12
12
  * arvados.collection - The `arvados.collection.Collection` class provides a
13
13
  high-level interface to read and write collections. It coordinates sending
@@ -26,15 +26,24 @@ import types
26
26
 
27
27
  from collections import UserDict
28
28
 
29
- from .api import api, api_from_config, http_cache
29
+ from . import api, errors, util
30
+ from .api import api_from_config, http_cache
30
31
  from .collection import CollectionReader, CollectionWriter, ResumableCollectionWriter
31
32
  from arvados.keep import *
32
33
  from arvados.stream import *
33
34
  from .arvfile import StreamFileReader
34
35
  from .logging import log_format, log_date_format, log_handler
35
36
  from .retry import RetryLoop
36
- import arvados.errors as errors
37
- import arvados.util as util
37
+
38
+ # Previous versions of the PySDK used to say `from .api import api`. This
39
+ # made it convenient to call the API client constructor, but difficult to
40
+ # access the rest of the `arvados.api` module. The magic below fixes that
41
+ # bug while retaining backwards compatibility: `arvados.api` is now the
42
+ # module and you can import it normally, but we make that module callable so
43
+ # all the existing code that says `arvados.api('v1', ...)` still works.
44
+ class _CallableAPIModule(api.__class__):
45
+ __call__ = staticmethod(api.api)
46
+ api.__class__ = _CallableAPIModule
38
47
 
39
48
  # Override logging module pulled in via `from ... import *`
40
49
  # so users can `import arvados.logging`.
@@ -0,0 +1 @@
1
+ __version__ = '2.7.3'
@@ -491,7 +491,7 @@ class _BlockManager(object):
491
491
  self._put_queue = None
492
492
  self._put_threads = None
493
493
  self.lock = threading.Lock()
494
- self.prefetch_enabled = True
494
+ self.prefetch_lookahead = self._keep.num_prefetch_threads
495
495
  self.num_put_threads = put_threads or _BlockManager.DEFAULT_PUT_THREADS
496
496
  self.copies = copies
497
497
  self.storage_classes = storage_classes_func or (lambda: [])
@@ -803,7 +803,7 @@ class _BlockManager(object):
803
803
  """Initiate a background download of a block.
804
804
  """
805
805
 
806
- if not self.prefetch_enabled:
806
+ if not self.prefetch_lookahead:
807
807
  return
808
808
 
809
809
  with self.lock:
@@ -825,7 +825,7 @@ class ArvadosFile(object):
825
825
  """
826
826
 
827
827
  __slots__ = ('parent', 'name', '_writers', '_committed',
828
- '_segments', 'lock', '_current_bblock', 'fuse_entry')
828
+ '_segments', 'lock', '_current_bblock', 'fuse_entry', '_read_counter')
829
829
 
830
830
  def __init__(self, parent, name, stream=[], segments=[]):
831
831
  """
@@ -846,6 +846,7 @@ class ArvadosFile(object):
846
846
  for s in segments:
847
847
  self._add_segment(stream, s.locator, s.range_size)
848
848
  self._current_bblock = None
849
+ self._read_counter = 0
849
850
 
850
851
  def writable(self):
851
852
  return self.parent.writable()
@@ -1047,20 +1048,47 @@ class ArvadosFile(object):
1047
1048
  # size == self.size()
1048
1049
  pass
1049
1050
 
1050
- def readfrom(self, offset, size, num_retries, exact=False):
1051
+ def readfrom(self, offset, size, num_retries, exact=False, return_memoryview=False):
1051
1052
  """Read up to `size` bytes from the file starting at `offset`.
1052
1053
 
1053
- :exact:
1054
- If False (default), return less data than requested if the read
1055
- crosses a block boundary and the next block isn't cached. If True,
1056
- only return less data than requested when hitting EOF.
1054
+ Arguments:
1055
+
1056
+ * exact: bool --- If False (default), return less data than
1057
+ requested if the read crosses a block boundary and the next
1058
+ block isn't cached. If True, only return less data than
1059
+ requested when hitting EOF.
1060
+
1061
+ * return_memoryview: bool --- If False (default) return a
1062
+ `bytes` object, which may entail making a copy in some
1063
+ situations. If True, return a `memoryview` object which may
1064
+ avoid making a copy, but may be incompatible with code
1065
+ expecting a `bytes` object.
1066
+
1057
1067
  """
1058
1068
 
1059
1069
  with self.lock:
1060
1070
  if size == 0 or offset >= self.size():
1061
- return b''
1071
+ return memoryview(b'') if return_memoryview else b''
1062
1072
  readsegs = locators_and_ranges(self._segments, offset, size)
1063
- prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE * self.parent._my_block_manager()._keep.num_prefetch_threads, limit=32)
1073
+
1074
+ prefetch = None
1075
+ prefetch_lookahead = self.parent._my_block_manager().prefetch_lookahead
1076
+ if prefetch_lookahead:
1077
+ # Doing prefetch on every read() call is surprisingly expensive
1078
+ # when we're trying to deliver data at 600+ MiBps and want
1079
+ # the read() fast path to be as lightweight as possible.
1080
+ #
1081
+ # Only prefetching every 128 read operations
1082
+ # dramatically reduces the overhead while still
1083
+ # getting the benefit of prefetching (e.g. when
1084
+ # reading 128 KiB at a time, it checks for prefetch
1085
+ # every 16 MiB).
1086
+ self._read_counter = (self._read_counter+1) % 128
1087
+ if self._read_counter == 1:
1088
+ prefetch = locators_and_ranges(self._segments,
1089
+ offset + size,
1090
+ config.KEEP_BLOCK_SIZE * prefetch_lookahead,
1091
+ limit=(1+prefetch_lookahead))
1064
1092
 
1065
1093
  locs = set()
1066
1094
  data = []
@@ -1068,17 +1096,22 @@ class ArvadosFile(object):
1068
1096
  block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
1069
1097
  if block:
1070
1098
  blockview = memoryview(block)
1071
- data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size].tobytes())
1099
+ data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size])
1072
1100
  locs.add(lr.locator)
1073
1101
  else:
1074
1102
  break
1075
1103
 
1076
- for lr in prefetch:
1077
- if lr.locator not in locs:
1078
- self.parent._my_block_manager().block_prefetch(lr.locator)
1079
- locs.add(lr.locator)
1104
+ if prefetch:
1105
+ for lr in prefetch:
1106
+ if lr.locator not in locs:
1107
+ self.parent._my_block_manager().block_prefetch(lr.locator)
1108
+ locs.add(lr.locator)
1109
+
1110
+ if len(data) == 1:
1111
+ return data[0] if return_memoryview else data[0].tobytes()
1112
+ else:
1113
+ return memoryview(b''.join(data)) if return_memoryview else b''.join(data)
1080
1114
 
1081
- return b''.join(data)
1082
1115
 
1083
1116
  @must_be_writable
1084
1117
  @synchronized
@@ -1243,33 +1276,49 @@ class ArvadosFileReader(ArvadosFileReaderBase):
1243
1276
 
1244
1277
  @_FileLikeObjectBase._before_close
1245
1278
  @retry_method
1246
- def read(self, size=None, num_retries=None):
1279
+ def read(self, size=-1, num_retries=None, return_memoryview=False):
1247
1280
  """Read up to `size` bytes from the file and return the result.
1248
1281
 
1249
- Starts at the current file position. If `size` is None, read the
1250
- entire remainder of the file.
1282
+ Starts at the current file position. If `size` is negative or None,
1283
+ read the entire remainder of the file.
1284
+
1285
+ Returns None if the file pointer is at the end of the file.
1286
+
1287
+ Returns a `bytes` object, unless `return_memoryview` is True,
1288
+ in which case it returns a memory view, which may avoid an
1289
+ unnecessary data copy in some situations.
1290
+
1251
1291
  """
1252
- if size is None:
1292
+ if size < 0 or size is None:
1253
1293
  data = []
1254
- rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
1294
+ #
1295
+ # specify exact=False, return_memoryview=True here so that we
1296
+ # only copy data once into the final buffer.
1297
+ #
1298
+ rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries, exact=False, return_memoryview=True)
1255
1299
  while rd:
1256
1300
  data.append(rd)
1257
1301
  self._filepos += len(rd)
1258
- rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
1259
- return b''.join(data)
1302
+ rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries, exact=False, return_memoryview=True)
1303
+ return memoryview(b''.join(data)) if return_memoryview else b''.join(data)
1260
1304
  else:
1261
- data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
1305
+ data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True, return_memoryview=return_memoryview)
1262
1306
  self._filepos += len(data)
1263
1307
  return data
1264
1308
 
1265
1309
  @_FileLikeObjectBase._before_close
1266
1310
  @retry_method
1267
- def readfrom(self, offset, size, num_retries=None):
1311
+ def readfrom(self, offset, size, num_retries=None, return_memoryview=False):
1268
1312
  """Read up to `size` bytes from the stream, starting at the specified file offset.
1269
1313
 
1270
1314
  This method does not change the file position.
1315
+
1316
+ Returns a `bytes` object, unless `return_memoryview` is True,
1317
+ in which case it returns a memory view, which may avoid an
1318
+ unnecessary data copy in some situations.
1319
+
1271
1320
  """
1272
- return self.arvadosfile.readfrom(offset, size, num_retries)
1321
+ return self.arvadosfile.readfrom(offset, size, num_retries, exact=True, return_memoryview=return_memoryview)
1273
1322
 
1274
1323
  def flush(self):
1275
1324
  pass
@@ -341,7 +341,7 @@ class RichCollectionBase(CollectionBase):
341
341
  self,
342
342
  path: str,
343
343
  mode: str="r",
344
- encoding: Optional[str]=None,
344
+ encoding: Optional[str]=None
345
345
  ) -> IO:
346
346
  """Open a file-like object within the collection
347
347
 
@@ -361,6 +361,7 @@ class RichCollectionBase(CollectionBase):
361
361
  * encoding: str | None --- The text encoding of the file. Only used
362
362
  when the file is opened in text mode. The default is
363
363
  platform-dependent.
364
+
364
365
  """
365
366
  if not re.search(r'^[rwa][bt]?\+?$', mode):
366
367
  raise errors.ArgumentError("Invalid mode {!r}".format(mode))
@@ -0,0 +1,158 @@
1
+ # Copyright (C) The Arvados Authors. All rights reserved.
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import argparse
6
+ import errno
7
+ import json
8
+ import logging
9
+ import os
10
+ import re
11
+ import signal
12
+ import sys
13
+
14
+ FILTER_STR_RE = re.compile(r'''
15
+ ^\(
16
+ \ *(\w+)
17
+ \ *(<|<=|=|>=|>)
18
+ \ *(\w+)
19
+ \ *\)$
20
+ ''', re.ASCII | re.VERBOSE)
21
+
22
+ def _pos_int(s):
23
+ num = int(s)
24
+ if num < 0:
25
+ raise ValueError("can't accept negative value: %s" % (num,))
26
+ return num
27
+
28
+ retry_opt = argparse.ArgumentParser(add_help=False)
29
+ retry_opt.add_argument('--retries', type=_pos_int, default=10, help="""
30
+ Maximum number of times to retry server requests that encounter temporary
31
+ failures (e.g., server down). Default 10.""")
32
+
33
+ def _ignore_error(error):
34
+ return None
35
+
36
+ def _raise_error(error):
37
+ raise error
38
+
39
+ def make_home_conf_dir(path, mode=None, errors='ignore'):
40
+ # Make the directory path under the user's home directory, making parent
41
+ # directories as needed.
42
+ # If the directory is newly created, and a mode is specified, chmod it
43
+ # with those permissions.
44
+ # If there's an error, return None if errors is 'ignore', else raise an
45
+ # exception.
46
+ error_handler = _ignore_error if (errors == 'ignore') else _raise_error
47
+ tilde_path = os.path.join('~', path)
48
+ abs_path = os.path.expanduser(tilde_path)
49
+ if abs_path == tilde_path:
50
+ return error_handler(ValueError("no home directory available"))
51
+ try:
52
+ os.makedirs(abs_path)
53
+ except OSError as error:
54
+ if error.errno != errno.EEXIST:
55
+ return error_handler(error)
56
+ else:
57
+ if mode is not None:
58
+ os.chmod(abs_path, mode)
59
+ return abs_path
60
+
61
+ CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
62
+
63
+ def exit_signal_handler(sigcode, frame):
64
+ logging.getLogger('arvados').error("Caught signal {}, exiting.".format(sigcode))
65
+ sys.exit(-sigcode)
66
+
67
+ def install_signal_handlers():
68
+ global orig_signal_handlers
69
+ orig_signal_handlers = {sigcode: signal.signal(sigcode, exit_signal_handler)
70
+ for sigcode in CAUGHT_SIGNALS}
71
+
72
+ def restore_signal_handlers():
73
+ for sigcode, orig_handler in orig_signal_handlers.items():
74
+ signal.signal(sigcode, orig_handler)
75
+
76
+ def validate_filters(filters):
77
+ """Validate user-provided filters
78
+
79
+ This function validates that a user-defined object represents valid
80
+ Arvados filters that can be passed to an API client: that it's a list of
81
+ 3-element lists with the field name and operator given as strings. If any
82
+ of these conditions are not true, it raises a ValueError with details about
83
+ the problem.
84
+
85
+ It returns validated filters. Currently the provided filters are returned
86
+ unmodified. Future versions of this function may clean up the filters with
87
+ "obvious" type conversions, so callers SHOULD use the returned value for
88
+ Arvados API calls.
89
+ """
90
+ if not isinstance(filters, list):
91
+ raise ValueError(f"filters are not a list: {filters!r}")
92
+ for index, f in enumerate(filters):
93
+ if isinstance(f, str):
94
+ match = FILTER_STR_RE.fullmatch(f)
95
+ if match is None:
96
+ raise ValueError(f"filter at index {index} has invalid syntax: {f!r}")
97
+ s, op, o = match.groups()
98
+ if s[0].isdigit():
99
+ raise ValueError(f"filter at index {index} has invalid syntax: bad field name {s!r}")
100
+ if o[0].isdigit():
101
+ raise ValueError(f"filter at index {index} has invalid syntax: bad field name {o!r}")
102
+ continue
103
+ elif not isinstance(f, list):
104
+ raise ValueError(f"filter at index {index} is not a string or list: {f!r}")
105
+ try:
106
+ s, op, o = f
107
+ except ValueError:
108
+ raise ValueError(
109
+ f"filter at index {index} does not have three items (field name, operator, operand): {f!r}",
110
+ ) from None
111
+ if not isinstance(s, str):
112
+ raise ValueError(f"filter at index {index} field name is not a string: {s!r}")
113
+ if not isinstance(op, str):
114
+ raise ValueError(f"filter at index {index} operator is not a string: {op!r}")
115
+ return filters
116
+
117
+
118
+ class JSONArgument:
119
+ """Parse a JSON file from a command line argument string or path
120
+
121
+ JSONArgument objects can be called with a string and return an arbitrary
122
+ object. First it will try to decode the string as JSON. If that fails, it
123
+ will try to open a file at the path named by the string, and decode it as
124
+ JSON. If that fails, it raises ValueError with more detail.
125
+
126
+ This is designed to be used as an argparse argument type.
127
+ Typical usage looks like:
128
+
129
+ parser = argparse.ArgumentParser()
130
+ parser.add_argument('--object', type=JSONArgument(), ...)
131
+
132
+ You can construct JSONArgument with an optional validation function. If
133
+ given, it is called with the object decoded from user input, and its
134
+ return value replaces it. It should raise ValueError if there is a problem
135
+ with the input. (argparse turns ValueError into a useful error message.)
136
+
137
+ filters_type = JSONArgument(validate_filters)
138
+ parser.add_argument('--filters', type=filters_type, ...)
139
+ """
140
+ def __init__(self, validator=None):
141
+ self.validator = validator
142
+
143
+ def __call__(self, value):
144
+ try:
145
+ retval = json.loads(value)
146
+ except json.JSONDecodeError:
147
+ try:
148
+ with open(value, 'rb') as json_file:
149
+ retval = json.load(json_file)
150
+ except json.JSONDecodeError as error:
151
+ raise ValueError(f"error decoding JSON from file {value!r}: {error}") from None
152
+ except (FileNotFoundError, ValueError):
153
+ raise ValueError(f"not a valid JSON string or file path: {value!r}") from None
154
+ except OSError as error:
155
+ raise ValueError(f"error reading JSON file path {value!r}: {error.strerror}") from None
156
+ if self.validator is not None:
157
+ retval = self.validator(retval)
158
+ return retval
@@ -2,34 +2,29 @@
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
- from builtins import next
6
5
  import argparse
7
6
  import collections
8
7
  import datetime
9
8
  import errno
9
+ import fcntl
10
10
  import json
11
+ import logging
11
12
  import os
12
13
  import re
14
+ import subprocess
13
15
  import sys
14
16
  import tarfile
15
17
  import tempfile
16
- import shutil
17
- import _strptime
18
- import fcntl
18
+
19
+ import ciso8601
19
20
  from operator import itemgetter
20
21
  from stat import *
21
22
 
22
- import subprocess
23
-
24
23
  import arvados
24
+ import arvados.config
25
25
  import arvados.util
26
26
  import arvados.commands._util as arv_cmd
27
27
  import arvados.commands.put as arv_put
28
- from arvados.collection import CollectionReader
29
- import ciso8601
30
- import logging
31
- import arvados.config
32
-
33
28
  from arvados._version import __version__
34
29
 
35
30
  logger = logging.getLogger('arvados.keepdocker')
@@ -356,6 +351,25 @@ def _uuid2pdh(api, uuid):
356
351
  select=['portable_data_hash'],
357
352
  ).execute()['items'][0]['portable_data_hash']
358
353
 
354
+ def load_image_metadata(image_file):
355
+ """Load an image manifest and config from an archive
356
+
357
+ Given an image archive as an open binary file object, this function loads
358
+ the image manifest and configuration, deserializing each from JSON and
359
+ returning them in a 2-tuple of dicts.
360
+ """
361
+ image_file.seek(0)
362
+ with tarfile.open(fileobj=image_file) as image_tar:
363
+ with image_tar.extractfile('manifest.json') as manifest_file:
364
+ image_manifest_list = json.load(manifest_file)
365
+ # Because arv-keepdocker only saves one image, there should only be
366
+ # one manifest. This extracts that from the list and raises
367
+ # ValueError if there's not exactly one.
368
+ image_manifest, = image_manifest_list
369
+ with image_tar.extractfile(image_manifest['Config']) as config_file:
370
+ image_config = json.load(config_file)
371
+ return image_manifest, image_config
372
+
359
373
  def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None):
360
374
  args = arg_parser.parse_args(arguments)
361
375
  if api is None:
@@ -532,21 +546,9 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
532
546
  # Managed properties could be already set
533
547
  coll_properties = api.collections().get(uuid=coll_uuid).execute(num_retries=args.retries).get('properties', {})
534
548
  coll_properties.update({"docker-image-repo-tag": image_repo_tag})
535
-
536
549
  api.collections().update(uuid=coll_uuid, body={"properties": coll_properties}).execute(num_retries=args.retries)
537
550
 
538
- # Read the image metadata and make Arvados links from it.
539
- image_file.seek(0)
540
- image_tar = tarfile.open(fileobj=image_file)
541
- image_hash_type, _, raw_image_hash = image_hash.rpartition(':')
542
- if image_hash_type:
543
- json_filename = raw_image_hash + '.json'
544
- else:
545
- json_filename = raw_image_hash + '/json'
546
- json_file = image_tar.extractfile(image_tar.getmember(json_filename))
547
- image_metadata = json.loads(json_file.read().decode('utf-8'))
548
- json_file.close()
549
- image_tar.close()
551
+ _, image_metadata = load_image_metadata(image_file)
550
552
  link_base = {'head_uuid': coll_uuid, 'properties': {}}
551
553
  if 'created' in image_metadata:
552
554
  link_base['properties']['image_timestamp'] = image_metadata['created']