dcicutils 8.8.4.1b15__tar.gz → 8.8.4.1b18__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/PKG-INFO +1 -1
  2. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/file_utils.py +122 -37
  3. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/http_utils.py +13 -2
  4. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/pyproject.toml +1 -1
  5. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/LICENSE.txt +0 -0
  6. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/README.rst +0 -0
  7. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/__init__.py +0 -0
  8. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/base.py +0 -0
  9. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/beanstalk_utils.py +0 -0
  10. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/bundle_utils.py +0 -0
  11. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/captured_output.py +0 -0
  12. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/cloudformation_utils.py +0 -0
  13. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/codebuild_utils.py +0 -0
  14. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/command_utils.py +0 -0
  15. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/common.py +0 -0
  16. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/contribution_scripts.py +0 -0
  17. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/contribution_utils.py +0 -0
  18. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/creds_utils.py +0 -0
  19. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/data_readers.py +0 -0
  20. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/data_utils.py +0 -0
  21. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/datetime_utils.py +0 -0
  22. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/deployment_utils.py +0 -0
  23. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/diff_utils.py +0 -0
  24. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/docker_utils.py +0 -0
  25. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ecr_scripts.py +0 -0
  26. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ecr_utils.py +0 -0
  27. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ecs_utils.py +0 -0
  28. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_base.py +0 -0
  29. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_manager.py +0 -0
  30. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_scripts.py +0 -0
  31. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_utils.py +0 -0
  32. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_utils_legacy.py +0 -0
  33. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/es_utils.py +0 -0
  34. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/exceptions.py +0 -0
  35. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ff_mocks.py +0 -0
  36. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ff_utils.py +0 -0
  37. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/function_cache_decorator.py +0 -0
  38. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/glacier_utils.py +0 -0
  39. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/jh_utils.py +0 -0
  40. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/kibana/dashboards.json +0 -0
  41. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/kibana/readme.md +0 -0
  42. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/lang_utils.py +0 -0
  43. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  44. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  45. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  46. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  47. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  48. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  49. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_utils.py +0 -0
  50. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/log_utils.py +0 -0
  51. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/misc_utils.py +0 -0
  52. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/obfuscation_utils.py +0 -0
  53. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/opensearch_utils.py +0 -0
  54. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/portal_object_utils.py +0 -0
  55. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/portal_utils.py +0 -0
  56. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/progress_bar.py +0 -0
  57. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/project_utils.py +0 -0
  58. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/qa_checkers.py +0 -0
  59. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/qa_utils.py +0 -0
  60. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/redis_tools.py +0 -0
  61. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/redis_utils.py +0 -0
  62. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/s3_utils.py +0 -0
  63. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/schema_utils.py +0 -0
  64. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/scripts/publish_to_pypi.py +0 -0
  65. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/scripts/run_license_checker.py +0 -0
  66. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/scripts/view_portal_object.py +0 -0
  67. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/secrets_utils.py +0 -0
  68. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/sheet_utils.py +0 -0
  69. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/snapshot_utils.py +0 -0
  70. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ssl_certificate_utils.py +0 -0
  71. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/structured_data.py +0 -0
  72. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/submitr/progress_constants.py +0 -0
  73. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/submitr/ref_lookup_strategy.py +0 -0
  74. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/task_utils.py +0 -0
  75. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/tmpfile_utils.py +0 -0
  76. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/trace_utils.py +0 -0
  77. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/validation_utils.py +0 -0
  78. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/variant_utils.py +0 -0
  79. {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.4.1b15
3
+ Version: 8.8.4.1b18
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -1,4 +1,6 @@
1
1
  import glob
2
+ import hashlib
3
+ import io
2
4
  import os
3
5
  import pathlib
4
6
  from datetime import datetime
@@ -21,46 +23,59 @@ def search_for_file(file: str,
21
23
  first file which is found is returns (as a string), or None if none; if the single flag
22
24
  is False, then all matched files are returned in a list, or and empty list if none.
23
25
  """
24
- if file and isinstance(file, (str, pathlib.PosixPath)):
25
- if os.path.isabs(file):
26
- if os.path.exists(file):
27
- return file if single else [file]
28
- return None if single else []
29
- files_found = []
30
- if not location:
31
- location = ["."]
32
- elif isinstance(location, (str, pathlib.PosixPath)):
33
- location = [location]
34
- elif not isinstance(location, list):
35
- location = []
26
+ if not (file and isinstance(file, (str, pathlib.PosixPath))):
27
+ return None if single is True else []
28
+ if os.path.isabs(file):
29
+ if os.path.exists(file):
30
+ return file if single is True else [file]
31
+ return None if single is True else []
32
+ files_found = []
33
+ if not location:
34
+ location = ["."]
35
+ elif isinstance(location, (str, pathlib.PosixPath)):
36
+ location = [location]
37
+ elif not isinstance(location, list):
38
+ location = []
39
+ location_pruned = []
40
+ for directory in location:
41
+ if not isinstance(directory, str):
42
+ if not isinstance(directory, pathlib.PosixPath):
43
+ continue
44
+ directory = str(directory)
45
+ if not (directory := directory.strip()):
46
+ continue
47
+ if os.path.isfile(directory):
48
+ # Allow a file; assume its parent directory was intended.
49
+ if not (directory := os.path.dirname(directory)):
50
+ continue
51
+ location_pruned.append(directory)
52
+ location = location_pruned
53
+ for directory in location:
54
+ if os.path.exists(os.path.join(directory, file)):
55
+ file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
56
+ if single is True:
57
+ return file_found
58
+ if file_found not in files_found:
59
+ files_found.append(file_found)
60
+ if recursive is True:
36
61
  for directory in location:
37
62
  if not directory:
38
63
  continue
39
- if isinstance(directory, (str, pathlib.PosixPath)) and os.path.exists(os.path.join(directory, file)):
40
- file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
41
- if single:
42
- return file_found
43
- if file_found not in files_found:
44
- files_found.append(file_found)
45
- if recursive:
46
- for directory in location:
47
- if not directory:
48
- continue
49
- if not directory.endswith("/**") and not file.startswith("**/"):
50
- path = f"{directory}/**/{file}"
51
- else:
52
- path = f"{directory}/{file}"
53
- files = glob.glob(path, recursive=recursive)
54
- if files:
55
- for file_found in files:
56
- file_found = os.path.abspath(file_found)
57
- if single:
58
- return file_found
59
- if file_found not in files_found:
60
- files_found.append(file_found)
61
- if files_found:
62
- return files_found[0] if single else files_found
63
- return None if single else []
64
+ if not directory.endswith("/**") and not file.startswith("**/"):
65
+ path = f"{directory}/**/{file}"
66
+ else:
67
+ path = f"{directory}/{file}"
68
+ files = glob.glob(path, recursive=True if recursive is True else False)
69
+ if files:
70
+ for file_found in files:
71
+ file_found = os.path.abspath(file_found)
72
+ if single is True:
73
+ return file_found
74
+ if file_found not in files_found:
75
+ files_found.append(file_found)
76
+ if files_found:
77
+ return files_found[0] if single is True else files_found
78
+ return None if single is True else []
64
79
 
65
80
 
66
81
  def normalize_file_path(path: str, home_directory: bool = True) -> str:
@@ -103,6 +118,76 @@ def are_files_equal(filea: str, fileb: str) -> bool:
103
118
  return False
104
119
 
105
120
 
121
+ def compute_file_md5(file: str) -> str:
122
+ """
123
+ Returns the md5 checksum for the given file.
124
+ """
125
+ if not isinstance(file, str):
126
+ return ""
127
+ try:
128
+ md5 = hashlib.md5()
129
+ with open(file, "rb") as file:
130
+ for chunk in iter(lambda: file.read(4096), b""):
131
+ md5.update(chunk)
132
+ return md5.hexdigest()
133
+ except Exception:
134
+ return ""
135
+
136
+
137
+ def compute_file_etag(file: str) -> Optional[str]:
138
+ """
139
+ Returns the AWS S3 "etag" for the given file; this value is md5-like but
140
+ not the same as a normal md5. We use this to compare that a file in S3
141
+ appears to be the exact the same file as a local file.
142
+ """
143
+ try:
144
+ with io.open(file, "rb") as f:
145
+ return _compute_file_etag(f)
146
+ except Exception:
147
+ return None
148
+
149
+
150
+ def _compute_file_etag(f: io.BufferedReader) -> str:
151
+ # See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
152
+ MULTIPART_THRESHOLD = 8388608
153
+ MULTIPART_CHUNKSIZE = 8388608
154
+ # BUFFER_SIZE = 1048576
155
+ # Verify some assumptions are correct
156
+ # assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
157
+ # assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
158
+ # assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
159
+ hash = hashlib.md5()
160
+ read = 0
161
+ chunks = None
162
+ while True:
163
+ # Read some from stdin, if we're at the end, stop reading
164
+ bits = f.read(1048576)
165
+ if len(bits) == 0:
166
+ break
167
+ read += len(bits)
168
+ hash.update(bits)
169
+ if chunks is None:
170
+ # We're handling a multi-part upload, so switch to calculating
171
+ # hashes of each chunk
172
+ if read >= MULTIPART_THRESHOLD:
173
+ chunks = b''
174
+ if chunks is not None:
175
+ if (read % MULTIPART_CHUNKSIZE) == 0:
176
+ # Dont with a chunk, add it to the list of hashes to hash later
177
+ chunks += hash.digest()
178
+ hash = hashlib.md5()
179
+ if chunks is None:
180
+ # Normal upload, just output the MD5 hash
181
+ etag = hash.hexdigest()
182
+ else:
183
+ # Multipart upload, need to output the hash of the hashes
184
+ if (read % MULTIPART_CHUNKSIZE) != 0:
185
+ # Add the last part if we have a partial chunk
186
+ chunks += hash.digest()
187
+ etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
188
+ return etag
189
+
190
+
106
191
  def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
107
192
  nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
108
193
  """
@@ -1,20 +1,31 @@
1
1
  from contextlib import contextmanager
2
2
  import requests
3
- from typing import Optional
3
+ from typing import Callable, Optional
4
4
  from dcicutils.tmpfile_utils import temporary_file
5
5
 
6
6
 
7
7
  @contextmanager
8
- def download(url: str, suffix: Optional[str] = None, binary: bool = True) -> Optional[str]:
8
+ def download(url: str, suffix: Optional[str] = None, binary: bool = True,
9
+ progress: Optional[Callable] = None) -> Optional[str]:
9
10
  """
10
11
  Context manager to ownload the given URL into a temporary file and yields the file
11
12
  path to it. An optional file suffix may be specified. Defaults to binary file mode;
12
13
  if this is not desired then pass False as the binary argument.
13
14
  """
15
+ if not callable(progress):
16
+ progress = None
14
17
  with temporary_file(suffix=suffix) as file:
15
18
  response = requests.get(url, stream=True)
19
+ if progress:
20
+ nbytes = 0
21
+ nbytes_total = None
22
+ if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
23
+ nbytes_total = int(content_length)
16
24
  with open(file, "wb" if binary is True else "w") as f:
17
25
  for chunk in response.iter_content(chunk_size=8192):
18
26
  if chunk:
19
27
  f.write(chunk)
28
+ if progress:
29
+ nbytes += len(chunk)
30
+ progress(nbytes, nbytes_total)
20
31
  yield file
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.8.4.1b15" # TODO: To become 8.8.5
3
+ version = "8.8.4.1b18" # TODO: To become 8.8.5
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"