arvados-python-client 3.0.0__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {arvados-python-client-3.0.0/arvados_python_client.egg-info → arvados-python-client-3.1.0}/PKG-INFO +1 -1
  2. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/__init__.py +31 -0
  3. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/basedirs.py +6 -4
  4. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/http_to_keep.py +2 -4
  5. arvados-python-client-3.1.0/arvados/_version.py +1 -0
  6. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/arvfile.py +11 -0
  7. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/collection.py +73 -51
  8. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/_util.py +39 -8
  9. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/arv_copy.py +107 -43
  10. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/put.py +5 -7
  11. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/keep.py +10 -6
  12. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/util.py +20 -0
  13. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados-v1-discovery.json +21 -1
  14. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0/arvados_python_client.egg-info}/PKG-INFO +1 -1
  15. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_version.py +1 -1
  16. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/run_test_server.py +98 -50
  17. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_copy.py +19 -8
  18. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_put.py +5 -0
  19. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_basedirs.py +8 -0
  20. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_cmd_util.py +40 -0
  21. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_internal.py +41 -0
  22. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_keep_client.py +5 -0
  23. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_util.py +34 -0
  24. arvados-python-client-3.0.0/arvados/_version.py +0 -1
  25. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/LICENSE-2.0.txt +0 -0
  26. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/MANIFEST.in +0 -0
  27. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/README.rst +0 -0
  28. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/__init__.py +0 -0
  29. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/diskcache.py +0 -0
  30. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/pycurl.py +0 -0
  31. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/report_template.py +0 -0
  32. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/_internal/streams.py +0 -0
  33. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/api.py +0 -0
  34. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/cache.py +0 -0
  35. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/__init__.py +0 -0
  36. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/federation_migrate.py +0 -0
  37. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/get.py +0 -0
  38. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/keepdocker.py +0 -0
  39. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/ls.py +0 -0
  40. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/run.py +0 -0
  41. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/commands/ws.py +0 -0
  42. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/config.py +0 -0
  43. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/errors.py +0 -0
  44. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/events.py +0 -0
  45. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/logging.py +0 -0
  46. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/retry.py +0 -0
  47. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/safeapi.py +0 -0
  48. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados/vocabulary.py +0 -0
  49. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/SOURCES.txt +0 -0
  50. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/dependency_links.txt +0 -0
  51. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/not-zip-safe +0 -0
  52. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/requires.txt +0 -0
  53. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/arvados_python_client.egg-info/top_level.txt +0 -0
  54. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-copy +0 -0
  55. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-federation-migrate +0 -0
  56. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-get +0 -0
  57. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-keepdocker +0 -0
  58. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-ls +0 -0
  59. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-normalize +0 -0
  60. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-put +0 -0
  61. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/bin/arv-ws +0 -0
  62. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/discovery2pydoc.py +0 -0
  63. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/setup.cfg +0 -0
  64. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/setup.py +0 -0
  65. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/__init__.py +0 -0
  66. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/arvados_testutil.py +0 -0
  67. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/keepstub.py +0 -0
  68. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/manifest_examples.py +0 -0
  69. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/performance/__init__.py +0 -0
  70. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/performance/performance_profiler.py +0 -0
  71. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/performance/test_a_sample.py +0 -0
  72. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_api.py +0 -0
  73. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_get.py +0 -0
  74. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_keepdocker.py +0 -0
  75. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_ls.py +0 -0
  76. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_normalize.py +0 -0
  77. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arv_ws.py +0 -0
  78. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_arvfile.py +0 -0
  79. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_benchmark_collections.py +0 -0
  80. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_collections.py +0 -0
  81. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_computed_permissions.py +0 -0
  82. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_config.py +0 -0
  83. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_errors.py +0 -0
  84. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_events.py +0 -0
  85. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_http_cache.py +0 -0
  86. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_http_to_keep.py +0 -0
  87. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_keep_locator.py +0 -0
  88. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_retry.py +0 -0
  89. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_retry_job_helpers.py +0 -0
  90. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_storage_classes.py +0 -0
  91. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_stream.py +0 -0
  92. {arvados-python-client-3.0.0 → arvados-python-client-3.1.0}/tests/test_vocabulary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.2
2
2
  Name: arvados-python-client
3
- Version: 3.0.0
3
+ Version: 3.1.0
4
4
  Summary: Arvados client library
5
5
  Home-page: https://arvados.org
6
6
  Author: Arvados
@@ -10,10 +10,15 @@ time.
10
10
  """
11
11
 
12
12
  import functools
13
+ import operator
13
14
  import re
14
15
  import time
15
16
  import warnings
16
17
 
18
+ import typing as t
19
+
20
+ HT = t.TypeVar('HT', bound=t.Hashable)
21
+
17
22
  class Timer:
18
23
  def __init__(self, verbose=False):
19
24
  self.verbose = verbose
@@ -81,3 +86,29 @@ def deprecated(version=None, preferred=None):
81
86
  deprecated_wrapper.__doc__ = docstring
82
87
  return deprecated_wrapper
83
88
  return deprecated_decorator
89
+
90
+
91
+ def parse_seq(
92
+ s: str,
93
+ split: t.Callable[[str], t.Iterable[str]]=operator.methodcaller('split', ','),
94
+ clean: t.Callable[[str], str]=operator.methodcaller('strip'),
95
+ check: t.Callable[[str], bool]=bool,
96
+ ) -> t.Iterator[str]:
97
+ """Split, clean, and filter a string into multiple items
98
+
99
+ The default arguments split on commas, strip substrings, and skip empty
100
+ items.
101
+ """
102
+ return (word for substr in split(s) if check(word := clean(substr)))
103
+
104
+
105
+ def uniq(it: t.Iterable[HT]) -> t.Iterator[HT]:
106
+ """Yield only unique items from an iterable
107
+
108
+ The items must be hashable.
109
+ """
110
+ seen = set()
111
+ for item in it:
112
+ if item not in seen:
113
+ seen.add(item)
114
+ yield item
@@ -126,12 +126,14 @@ class BaseDirectories:
126
126
  self._env = env
127
127
  self._xdg_subdir = PurePath(xdg_subdir)
128
128
 
129
+ def search_paths(self) -> Iterator[Path]:
130
+ return itertools.chain(
131
+ self._spec.iter_systemd(self._env),
132
+ self._spec.iter_xdg(self._env, self._xdg_subdir))
133
+
129
134
  def search(self, name: str) -> Iterator[Path]:
130
135
  any_found = False
131
- for search_path in itertools.chain(
132
- self._spec.iter_systemd(self._env),
133
- self._spec.iter_xdg(self._env, self._xdg_subdir),
134
- ):
136
+ for search_path in self.search_paths():
135
137
  path = search_path / name
136
138
  if path.exists():
137
139
  yield path
@@ -16,6 +16,7 @@ import pycurl
16
16
 
17
17
  import arvados
18
18
  import arvados.collection
19
+ import arvados._internal
19
20
  from .pycurl import PyCurlHelper
20
21
 
21
22
  logger = logging.getLogger('arvados.http_import')
@@ -250,11 +251,8 @@ def check_cached_url(api, project_uuid, url, etags,
250
251
  utcnow=datetime.datetime.utcnow,
251
252
  varying_url_params="",
252
253
  prefer_cached_downloads=False):
253
-
254
254
  logger.info("Checking Keep for %s", url)
255
-
256
- varying_params = [s.strip() for s in varying_url_params.split(",")]
257
-
255
+ varying_params = set(arvados._internal.parse_seq(varying_url_params))
258
256
  parsed = urllib.parse.urlparse(url)
259
257
  query = [q for q in urllib.parse.parse_qsl(parsed.query)
260
258
  if q[0] not in varying_params]
@@ -0,0 +1 @@
1
+ __version__ = '3.1.0'
@@ -23,8 +23,16 @@ from .errors import KeepWriteError, AssertionError, ArgumentError
23
23
  from .keep import KeepLocator
24
24
  from .retry import retry_method
25
25
 
26
+ ADD = "add"
27
+ """Argument value for `Collection` methods to represent an added item"""
28
+ DEL = "del"
29
+ """Argument value for `Collection` methods to represent a removed item"""
26
30
  MOD = "mod"
31
+ """Argument value for `Collection` methods to represent a modified item"""
32
+ TOK = "tok"
33
+ """Argument value for `Collection` methods to represent an item with token differences"""
27
34
  WRITE = "write"
35
+ """Argument value for `Collection` methods to represent that a file was written to"""
28
36
 
29
37
  _logger = logging.getLogger('arvados.arvfile')
30
38
 
@@ -841,6 +849,8 @@ class ArvadosFile(object):
841
849
  def replace_contents(self, other):
842
850
  """Replace segments of this file with segments from another `ArvadosFile` object."""
843
851
 
852
+ eventtype = TOK if self == other else MOD
853
+
844
854
  map_loc = {}
845
855
  self._segments = []
846
856
  for other_segment in other.segments():
@@ -857,6 +867,7 @@ class ArvadosFile(object):
857
867
  self._segments.append(streams.Range(new_loc, other_segment.range_start, other_segment.range_size, other_segment.segment_offset))
858
868
 
859
869
  self.set_committed(False)
870
+ self.parent.notify(eventtype, self.parent, self.name, (self, self))
860
871
 
861
872
  def __eq__(self, other):
862
873
  if other is self:
@@ -30,7 +30,7 @@ from stat import *
30
30
 
31
31
  from ._internal import streams
32
32
  from .api import ThreadSafeAPIClient
33
- from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock
33
+ from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock, ADD, DEL, MOD, TOK, WRITE
34
34
  from .keep import KeepLocator, KeepClient
35
35
  import arvados.config as config
36
36
  import arvados.errors as errors
@@ -58,14 +58,7 @@ else:
58
58
 
59
59
  _logger = logging.getLogger('arvados.collection')
60
60
 
61
- ADD = "add"
62
- """Argument value for `Collection` methods to represent an added item"""
63
- DEL = "del"
64
- """Argument value for `Collection` methods to represent a removed item"""
65
- MOD = "mod"
66
- """Argument value for `Collection` methods to represent a modified item"""
67
- TOK = "tok"
68
- """Argument value for `Collection` methods to represent an item with token differences"""
61
+
69
62
  FILE = "file"
70
63
  """`create_type` value for `Collection.find_or_create`"""
71
64
  COLLECTION = "collection"
@@ -922,9 +915,12 @@ class RichCollectionBase(CollectionBase):
922
915
  # Overwrite path with new item; this can happen if
923
916
  # path was a file and is now a collection or vice versa
924
917
  self.copy(final, path, overwrite=True)
925
- else:
926
- # Local is missing (presumably deleted) or local doesn't
927
- # match the "start" value, so save change to conflict file
918
+ elif event_type == MOD:
919
+ # Local doesn't match the "start" value or local
920
+ # is missing (presumably deleted) so save change
921
+ # to conflict file. Don't do this for TOK events
922
+ # which means the file didn't change but only had
923
+ # tokens updated.
928
924
  self.copy(final, conflictpath)
929
925
  elif event_type == DEL:
930
926
  if local == initial:
@@ -992,8 +988,13 @@ class RichCollectionBase(CollectionBase):
992
988
  was modified.
993
989
 
994
990
  * item: arvados.arvfile.ArvadosFile |
995
- arvados.collection.Subcollection --- The new contents at `name`
996
- within `collection`.
991
+ arvados.collection.Subcollection --- For ADD events, the new
992
+ contents at `name` within `collection`; for DEL events, the
993
+ item that was removed. For MOD and TOK events, a 2-tuple of
994
+ the previous item and the new item (may be the same object
995
+ or different, depending on whether the action involved it
996
+ being modified in place or replaced).
997
+
997
998
  """
998
999
  if self._callback:
999
1000
  self._callback(event, collection, name, item)
@@ -1134,7 +1135,7 @@ class Collection(RichCollectionBase):
1134
1135
  self._manifest_text = None
1135
1136
  self._portable_data_hash = None
1136
1137
  self._api_response = None
1137
- self._past_versions = set()
1138
+ self._token_refresh_timestamp = 0
1138
1139
 
1139
1140
  self.lock = threading.RLock()
1140
1141
  self.events = None
@@ -1200,20 +1201,6 @@ class Collection(RichCollectionBase):
1200
1201
  def writable(self) -> bool:
1201
1202
  return True
1202
1203
 
1203
- @synchronized
1204
- def known_past_version(
1205
- self,
1206
- modified_at_and_portable_data_hash: Tuple[Optional[str], Optional[str]]
1207
- ) -> bool:
1208
- """Indicate whether an API record for this collection has been seen before
1209
-
1210
- As this collection object loads records from the API server, it records
1211
- their `modified_at` and `portable_data_hash` fields. This method accepts
1212
- a 2-tuple with values for those fields, and returns `True` if the
1213
- combination was previously loaded.
1214
- """
1215
- return modified_at_and_portable_data_hash in self._past_versions
1216
-
1217
1204
  @synchronized
1218
1205
  @retry_method
1219
1206
  def update(
@@ -1245,23 +1232,61 @@ class Collection(RichCollectionBase):
1245
1232
  the collection's API record from the API server. If not specified,
1246
1233
  uses the `num_retries` provided when this instance was constructed.
1247
1234
  """
1235
+
1236
+ token_refresh_period = 60*60
1237
+ time_since_last_token_refresh = (time.time() - self._token_refresh_timestamp)
1238
+ upstream_response = None
1239
+
1248
1240
  if other is None:
1249
1241
  if self._manifest_locator is None:
1250
1242
  raise errors.ArgumentError("`other` is None but collection does not have a manifest_locator uuid")
1251
- response = self._my_api().collections().get(uuid=self._manifest_locator).execute(num_retries=num_retries)
1252
- if (self.known_past_version((response.get("modified_at"), response.get("portable_data_hash"))) and
1253
- response.get("portable_data_hash") != self.portable_data_hash()):
1254
- # The record on the server is different from our current one, but we've seen it before,
1255
- # so ignore it because it's already been merged.
1256
- # However, if it's the same as our current record, proceed with the update, because we want to update
1257
- # our tokens.
1243
+
1244
+ if re.match(arvados.util.portable_data_hash_pattern, self._manifest_locator) and time_since_last_token_refresh < token_refresh_period:
1258
1245
  return
1259
- else:
1260
- self._remember_api_response(response)
1261
- other = CollectionReader(response["manifest_text"])
1262
- baseline = CollectionReader(self._manifest_text)
1246
+
1247
+ upstream_response = self._my_api().collections().get(uuid=self._manifest_locator).execute(num_retries=num_retries)
1248
+ other = CollectionReader(upstream_response["manifest_text"])
1249
+
1250
+ if self.committed():
1251
+ # 1st case, no local changes, content is the same
1252
+ if self.portable_data_hash() == other.portable_data_hash() and time_since_last_token_refresh < token_refresh_period:
1253
+ # No difference in content. Remember the API record
1254
+ # (metadata such as name or properties may have changed)
1255
+ # but don't update the token refresh timestamp.
1256
+ if upstream_response is not None:
1257
+ self._remember_api_response(upstream_response)
1258
+ return
1259
+
1260
+ # 2nd case, no local changes, but either upstream changed
1261
+ # or we want to refresh tokens.
1262
+
1263
+ self.apply(self.diff(other))
1264
+ if upstream_response is not None:
1265
+ self._remember_api_response(upstream_response)
1266
+ self._update_token_timestamp()
1267
+ self.set_committed(True)
1268
+ return
1269
+
1270
+ # 3rd case, upstream changed, but we also have uncommitted
1271
+ # changes that we want to incorporate so they don't get lost.
1272
+
1273
+ # _manifest_text stores the text from last time we received a
1274
+ # record from the API server. This is the state of the
1275
+ # collection before our uncommitted changes.
1276
+ baseline = Collection(self._manifest_text)
1277
+
1278
+ # Get the set of changes between our baseline and the other
1279
+ # collection and apply them to self.
1280
+ #
1281
+ # If a file was modified in both 'self' and 'other', the
1282
+ # 'apply' method keeps the contents of 'self' and creates a
1283
+ # conflict file with the contents of 'other'.
1263
1284
  self.apply(baseline.diff(other))
1264
- self._manifest_text = self.manifest_text()
1285
+
1286
+ # Remember the new baseline, changes to a file
1287
+ if upstream_response is not None:
1288
+ self._remember_api_response(upstream_response)
1289
+
1265
1290
 
1266
1291
  @synchronized
1267
1292
  def _my_api(self):
@@ -1295,7 +1320,11 @@ class Collection(RichCollectionBase):
1295
1320
 
1296
1321
  def _remember_api_response(self, response):
1297
1322
  self._api_response = response
1298
- self._past_versions.add((response.get("modified_at"), response.get("portable_data_hash")))
1323
+ self._manifest_text = self._api_response['manifest_text']
1324
+ self._portable_data_hash = self._api_response['portable_data_hash']
1325
+
1326
+ def _update_token_timestamp(self):
1327
+ self._token_refresh_timestamp = time.time()
1299
1328
 
1300
1329
  def _populate_from_api_server(self):
1301
1330
  # As in KeepClient itself, we must wait until the last
@@ -1308,8 +1337,7 @@ class Collection(RichCollectionBase):
1308
1337
  self._remember_api_response(self._my_api().collections().get(
1309
1338
  uuid=self._manifest_locator).execute(
1310
1339
  num_retries=self.num_retries))
1311
- self._manifest_text = self._api_response['manifest_text']
1312
- self._portable_data_hash = self._api_response['portable_data_hash']
1340
+
1313
1341
  # If not overriden via kwargs, we should try to load the
1314
1342
  # replication_desired and storage_classes_desired from the API server
1315
1343
  if self.replication_desired is None:
@@ -1534,8 +1562,6 @@ class Collection(RichCollectionBase):
1534
1562
  uuid=self._manifest_locator,
1535
1563
  body=body
1536
1564
  ).execute(num_retries=num_retries))
1537
- self._manifest_text = self._api_response["manifest_text"]
1538
- self._portable_data_hash = self._api_response["portable_data_hash"]
1539
1565
  self.set_committed(True)
1540
1566
  elif body:
1541
1567
  self._remember_api_response(self._my_api().collections().update(
@@ -1654,12 +1680,7 @@ class Collection(RichCollectionBase):
1654
1680
  body["preserve_version"] = preserve_version
1655
1681
 
1656
1682
  self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries))
1657
- text = self._api_response["manifest_text"]
1658
-
1659
1683
  self._manifest_locator = self._api_response["uuid"]
1660
- self._portable_data_hash = self._api_response["portable_data_hash"]
1661
-
1662
- self._manifest_text = text
1663
1684
  self.set_committed(True)
1664
1685
 
1665
1686
  return text
@@ -1743,6 +1764,7 @@ class Collection(RichCollectionBase):
1743
1764
  stream_name = None
1744
1765
  state = STREAM_NAME
1745
1766
 
1767
+ self._update_token_timestamp()
1746
1768
  self.set_committed(True)
1747
1769
 
1748
1770
  @synchronized
@@ -3,14 +3,20 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import argparse
6
+ import dataclasses
6
7
  import errno
7
8
  import json
8
9
  import logging
10
+ import operator
9
11
  import os
10
12
  import re
11
13
  import signal
12
14
  import sys
13
15
 
16
+ import typing as t
17
+
18
+ from .. import _internal
19
+
14
20
  FILTER_STR_RE = re.compile(r'''
15
21
  ^\(
16
22
  \ *(\w+)
@@ -19,16 +25,41 @@ FILTER_STR_RE = re.compile(r'''
19
25
  \ *\)$
20
26
  ''', re.ASCII | re.VERBOSE)
21
27
 
22
- def _pos_int(s):
23
- num = int(s)
24
- if num < 0:
25
- raise ValueError("can't accept negative value: %s" % (num,))
26
- return num
28
+ T = t.TypeVar('T')
29
+
30
+ @dataclasses.dataclass(unsafe_hash=True)
31
+ class RangedValue(t.Generic[T]):
32
+ """Validate that an argument string is within a valid range of values"""
33
+ parse_func: t.Callable[[str], T]
34
+ valid_range: t.Container[T]
35
+
36
+ def __call__(self, s: str) -> T:
37
+ value = self.parse_func(s)
38
+ if value in self.valid_range:
39
+ return value
40
+ else:
41
+ raise ValueError(f"{value!r} is not a valid value")
42
+
43
+
44
+ @dataclasses.dataclass(unsafe_hash=True)
45
+ class UniqueSplit(t.Generic[T]):
46
+ """Parse a string into a list of unique values"""
47
+ split: t.Callable[[str], t.Iterable[str]]=operator.methodcaller('split', ',')
48
+ clean: t.Callable[[str], str]=operator.methodcaller('strip')
49
+ check: t.Callable[[str], bool]=bool
50
+
51
+ def __call__(self, s: str) -> T:
52
+ return list(_internal.uniq(_internal.parse_seq(s, self.split, self.clean, self.check)))
53
+
27
54
 
28
55
  retry_opt = argparse.ArgumentParser(add_help=False)
29
- retry_opt.add_argument('--retries', type=_pos_int, default=10, help="""
30
- Maximum number of times to retry server requests that encounter temporary
31
- failures (e.g., server down). Default 10.""")
56
+ retry_opt.add_argument(
57
+ '--retries',
58
+ type=RangedValue(int, range(0, sys.maxsize)),
59
+ default=10,
60
+ help="""Maximum number of times to retry server requests that encounter
61
+ temporary failures (e.g., server down). Default %(default)r.
62
+ """)
32
63
 
33
64
  def _ignore_error(error):
34
65
  return None
@@ -33,6 +33,10 @@ import io
33
33
  import json
34
34
  import queue
35
35
  import threading
36
+ import errno
37
+
38
+ import httplib2.error
39
+ import googleapiclient
36
40
 
37
41
  import arvados
38
42
  import arvados.config
@@ -40,6 +44,7 @@ import arvados.keep
40
44
  import arvados.util
41
45
  import arvados.commands._util as arv_cmd
42
46
  import arvados.commands.keepdocker
47
+ from arvados.logging import log_handler
43
48
 
44
49
  from arvados._internal import basedirs, http_to_keep
45
50
  from arvados._version import __version__
@@ -48,6 +53,9 @@ COMMIT_HASH_RE = re.compile(r'^[0-9a-f]{1,40}$')
48
53
 
49
54
  logger = logging.getLogger('arvados.arv-copy')
50
55
 
56
+ # Set this up so connection errors get logged.
57
+ googleapi_logger = logging.getLogger('googleapiclient.http')
58
+
51
59
  # local_repo_dir records which git repositories from the Arvados source
52
60
  # instance have been checked out locally during this run, and to which
53
61
  # directories.
@@ -112,7 +120,18 @@ If not provided, will use the default client configuration from the environment
112
120
  '--project-uuid', dest='project_uuid',
113
121
  help='The UUID of the project at the destination to which the collection or workflow should be copied.')
114
122
  copy_opts.add_argument(
115
- '--storage-classes', dest='storage_classes',
123
+ '--replication',
124
+ type=arv_cmd.RangedValue(int, range(1, sys.maxsize)),
125
+ metavar='N',
126
+ help="""
127
+ Number of replicas per storage class for the copied collections at the destination.
128
+ If not provided (or if provided with invalid value),
129
+ use the destination's default replication-level setting (if found),
130
+ or the fallback value 2.
131
+ """)
132
+ copy_opts.add_argument(
133
+ '--storage-classes',
134
+ type=arv_cmd.UniqueSplit(),
116
135
  help='Comma separated list of storage classes to be used when saving data to the destinaton Arvados instance.')
117
136
  copy_opts.add_argument("--varying-url-params", type=str, default="",
118
137
  help="A comma separated list of URL query parameters that should be ignored when storing HTTP URLs in Keep.")
@@ -131,9 +150,6 @@ If not provided, will use the default client configuration from the environment
131
150
  parents=[copy_opts, arv_cmd.retry_opt])
132
151
  args = parser.parse_args()
133
152
 
134
- if args.storage_classes:
135
- args.storage_classes = [x for x in args.storage_classes.strip().replace(' ', '').split(',') if x]
136
-
137
153
  if args.verbose:
138
154
  logger.setLevel(logging.DEBUG)
139
155
  else:
@@ -142,10 +158,29 @@ If not provided, will use the default client configuration from the environment
142
158
  if not args.source_arvados and arvados.util.uuid_pattern.match(args.object_uuid):
143
159
  args.source_arvados = args.object_uuid[:5]
144
160
 
161
+ if not args.destination_arvados and args.project_uuid:
162
+ args.destination_arvados = args.project_uuid[:5]
163
+
164
+ # Make sure errors trying to connect to clusters get logged.
165
+ googleapi_logger.setLevel(logging.WARN)
166
+ googleapi_logger.addHandler(log_handler)
167
+
145
168
  # Create API clients for the source and destination instances
146
169
  src_arv = api_for_instance(args.source_arvados, args.retries)
147
170
  dst_arv = api_for_instance(args.destination_arvados, args.retries)
148
171
 
172
+ # Once we've successfully contacted the clusters, we probably
173
+ # don't want to see logging about retries (unless the user asked
174
+ # for verbose output).
175
+ if not args.verbose:
176
+ googleapi_logger.setLevel(logging.ERROR)
177
+
178
+ if src_arv.config()["ClusterID"] == dst_arv.config()["ClusterID"]:
179
+ logger.info("Copying within cluster %s", src_arv.config()["ClusterID"])
180
+ else:
181
+ logger.info("Source cluster is %s", src_arv.config()["ClusterID"])
182
+ logger.info("Destination cluster is %s", dst_arv.config()["ClusterID"])
183
+
149
184
  if not args.project_uuid:
150
185
  args.project_uuid = dst_arv.users().current().execute(num_retries=args.retries)["uuid"]
151
186
 
@@ -213,43 +248,64 @@ def set_src_owner_uuid(resource, uuid, args):
213
248
  # configuration directory.
214
249
  #
215
250
  def api_for_instance(instance_name, num_retries):
216
- if not instance_name:
217
- # Use environment
218
- return arvados.api('v1')
219
-
220
- if '/' in instance_name:
221
- config_file = instance_name
222
- else:
223
- dirs = basedirs.BaseDirectories('CONFIG')
224
- config_file = next(dirs.search(f'{instance_name}.conf'), '')
251
+ msg = []
252
+ if instance_name:
253
+ if '/' in instance_name:
254
+ config_file = instance_name
255
+ else:
256
+ dirs = basedirs.BaseDirectories('CONFIG')
257
+ config_file = next(dirs.search(f'{instance_name}.conf'), '')
225
258
 
259
+ try:
260
+ cfg = arvados.config.load(config_file)
261
+
262
+ if 'ARVADOS_API_HOST' in cfg and 'ARVADOS_API_TOKEN' in cfg:
263
+ api_is_insecure = (
264
+ cfg.get('ARVADOS_API_HOST_INSECURE', '').lower() in set(
265
+ ['1', 't', 'true', 'y', 'yes']))
266
+ return arvados.api('v1',
267
+ host=cfg['ARVADOS_API_HOST'],
268
+ token=cfg['ARVADOS_API_TOKEN'],
269
+ insecure=api_is_insecure,
270
+ num_retries=num_retries,
271
+ )
272
+ else:
273
+ msg.append('missing ARVADOS_API_HOST or ARVADOS_API_TOKEN for {} in config file {}'.format(instance_name, config_file))
274
+ except OSError as e:
275
+ if e.errno in (errno.EHOSTUNREACH, errno.ECONNREFUSED, errno.ECONNRESET, errno.ENETUNREACH):
276
+ verb = 'connect to instance from'
277
+ elif config_file:
278
+ verb = 'open'
279
+ else:
280
+ verb = 'find'
281
+ searchlist = ":".join(str(p) for p in dirs.search_paths())
282
+ config_file = f'{instance_name}.conf in path {searchlist}'
283
+ msg.append(("Could not {} config file {}: {}").format(
284
+ verb, config_file, e.strerror))
285
+ except (httplib2.error.HttpLib2Error, googleapiclient.errors.Error) as e:
286
+ msg.append("Failed to connect to instance {} at {}, error was {}".format(instance_name, cfg['ARVADOS_API_HOST'], e))
287
+
288
+ default_api = None
289
+ default_instance = None
226
290
  try:
227
- cfg = arvados.config.load(config_file)
228
- except OSError as e:
229
- if config_file:
230
- verb = 'open'
231
- else:
232
- verb = 'find'
233
- config_file = f'{instance_name}.conf'
234
- abort(("Could not {} config file {}: {}\n" +
235
- "You must make sure that your configuration tokens\n" +
236
- "for Arvados instance {} are in {} and that this\n" +
237
- "file is readable.").format(
238
- verb, config_file, e.strerror, instance_name, config_file))
239
-
240
- if 'ARVADOS_API_HOST' in cfg and 'ARVADOS_API_TOKEN' in cfg:
241
- api_is_insecure = (
242
- cfg.get('ARVADOS_API_HOST_INSECURE', '').lower() in set(
243
- ['1', 't', 'true', 'y', 'yes']))
244
- client = arvados.api('v1',
245
- host=cfg['ARVADOS_API_HOST'],
246
- token=cfg['ARVADOS_API_TOKEN'],
247
- insecure=api_is_insecure,
248
- num_retries=num_retries,
249
- )
250
- else:
251
- abort('need ARVADOS_API_HOST and ARVADOS_API_TOKEN for {}'.format(instance_name))
252
- return client
291
+ default_api = arvados.api('v1', num_retries=num_retries)
292
+ default_instance = default_api.config()["ClusterID"]
293
+ except ValueError:
294
+ pass
295
+ except (httplib2.error.HttpLib2Error, googleapiclient.errors.Error, OSError) as e:
296
+ msg.append("Failed to connect to default instance, error was {}".format(e))
297
+
298
+ if default_api is not None and (not instance_name or instance_name == default_instance):
299
+ # Use default settings
300
+ return default_api
301
+
302
+ if instance_name and default_instance and instance_name != default_instance:
303
+ msg.append("Default credentials are for {} but need to connect to {}".format(default_instance, instance_name))
304
+
305
+ for m in msg:
306
+ logger.error(m)
307
+
308
+ abort('Unable to find usable ARVADOS_API_HOST and ARVADOS_API_TOKEN')
253
309
 
254
310
  # Check if git is available
255
311
  def check_git_availability():
@@ -587,6 +643,14 @@ def copy_collection(obj_uuid, src, dst, args):
587
643
  ).execute(num_retries=args.retries)['manifest_text']
588
644
  return create_collection_from(c, src, dst, args)
589
645
 
646
+ if args.replication is None:
647
+ # Obtain default or fallback collection replication setting on the
648
+ # destination
649
+ try:
650
+ args.replication = int(dst.config()["Collections"]["DefaultReplication"])
651
+ except (KeyError, TypeError, ValueError):
652
+ args.replication = 2
653
+
590
654
  # Fetch the collection's manifest.
591
655
  manifest = c['manifest_text']
592
656
  logger.debug("Copying collection %s with manifest: <%s>", obj_uuid, manifest)
@@ -678,7 +742,7 @@ def copy_collection(obj_uuid, src, dst, args):
678
742
 
679
743
  try:
680
744
  logger.debug("Putting block %s (%s bytes)", blockhash, loc.size)
681
- dst_locator = dst_keep.put(data, classes=(args.storage_classes or []))
745
+ dst_locator = dst_keep.put(data, copies=args.replication, classes=(args.storage_classes or []))
682
746
  with lock:
683
747
  dst_locators[blockhash] = dst_locator
684
748
  bytes_written += loc.size
@@ -870,17 +934,17 @@ def uuid_type(api, object_uuid):
870
934
  def copy_from_http(url, src, dst, args):
871
935
 
872
936
  project_uuid = args.project_uuid
873
- varying_url_params = args.varying_url_params
937
+ # Ensure string of varying parameters is well-formed
874
938
  prefer_cached_downloads = args.prefer_cached_downloads
875
939
 
876
940
  cached = http_to_keep.check_cached_url(src, project_uuid, url, {},
877
- varying_url_params=varying_url_params,
941
+ varying_url_params=args.varying_url_params,
878
942
  prefer_cached_downloads=prefer_cached_downloads)
879
943
  if cached[2] is not None:
880
944
  return copy_collection(cached[2], src, dst, args)
881
945
 
882
946
  cached = http_to_keep.http_to_keep(dst, project_uuid, url,
883
- varying_url_params=varying_url_params,
947
+ varying_url_params=args.varying_url_params,
884
948
  prefer_cached_downloads=prefer_cached_downloads)
885
949
 
886
950
  if cached is not None: