dcicutils 8.9.0.0b0__tar.gz → 8.9.0.1b2__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/PKG-INFO +6 -4
  2. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/command_utils.py +69 -1
  3. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/creds_utils.py +1 -1
  4. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/ff_utils.py +4 -1
  5. dcicutils-8.9.0.1b2/dcicutils/file_utils.py +267 -0
  6. dcicutils-8.9.0.1b2/dcicutils/http_utils.py +39 -0
  7. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/misc_utils.py +82 -5
  8. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/portal_object_utils.py +24 -89
  9. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/portal_utils.py +249 -37
  10. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/schema_utils.py +1 -1
  11. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/scripts/view_portal_object.py +87 -5
  12. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/structured_data.py +59 -17
  13. dcicutils-8.9.0.1b2/dcicutils/submitr/ref_lookup_strategy.py +73 -0
  14. dcicutils-8.9.0.1b2/dcicutils/tmpfile_utils.py +76 -0
  15. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/zip_utils.py +27 -0
  16. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/pyproject.toml +8 -6
  17. dcicutils-8.9.0.0b0/dcicutils/file_utils.py +0 -58
  18. dcicutils-8.9.0.0b0/dcicutils/submitr/ref_lookup_strategy.py +0 -67
  19. dcicutils-8.9.0.0b0/dcicutils/tmpfile_utils.py +0 -36
  20. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/LICENSE.txt +0 -0
  21. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/README.rst +0 -0
  22. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/__init__.py +0 -0
  23. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/base.py +0 -0
  24. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/beanstalk_utils.py +0 -0
  25. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/bundle_utils.py +0 -0
  26. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/captured_output.py +0 -0
  27. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/cloudformation_utils.py +0 -0
  28. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/codebuild_utils.py +0 -0
  29. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/common.py +0 -0
  30. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/contribution_scripts.py +0 -0
  31. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/contribution_utils.py +0 -0
  32. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/data_readers.py +0 -0
  33. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/data_utils.py +0 -0
  34. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/datetime_utils.py +0 -0
  35. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/deployment_utils.py +0 -0
  36. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/diff_utils.py +0 -0
  37. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/docker_utils.py +0 -0
  38. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/ecr_scripts.py +0 -0
  39. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/ecr_utils.py +0 -0
  40. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/ecs_utils.py +0 -0
  41. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/env_base.py +0 -0
  42. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/env_manager.py +0 -0
  43. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/env_scripts.py +0 -0
  44. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/env_utils.py +0 -0
  45. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/env_utils_legacy.py +0 -0
  46. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/es_utils.py +0 -0
  47. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/exceptions.py +0 -0
  48. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/ff_mocks.py +0 -0
  49. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/function_cache_decorator.py +0 -0
  50. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/glacier_utils.py +0 -0
  51. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/jh_utils.py +0 -0
  52. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/kibana/dashboards.json +0 -0
  53. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/kibana/readme.md +0 -0
  54. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/lang_utils.py +0 -0
  55. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  56. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  57. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  58. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  59. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  60. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  61. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/license_utils.py +0 -0
  62. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/log_utils.py +0 -0
  63. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/obfuscation_utils.py +0 -0
  64. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/opensearch_utils.py +0 -0
  65. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/progress_bar.py +0 -0
  66. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/project_utils.py +0 -0
  67. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/qa_checkers.py +0 -0
  68. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/qa_utils.py +0 -0
  69. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/redis_tools.py +0 -0
  70. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/redis_utils.py +0 -0
  71. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/s3_utils.py +0 -0
  72. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/scripts/publish_to_pypi.py +0 -0
  73. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/scripts/run_license_checker.py +0 -0
  74. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/secrets_utils.py +0 -0
  75. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/sheet_utils.py +0 -0
  76. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/snapshot_utils.py +0 -0
  77. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/ssl_certificate_utils.py +0 -0
  78. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/submitr/progress_constants.py +0 -0
  79. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/task_utils.py +0 -0
  80. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/trace_utils.py +0 -0
  81. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/validation_utils.py +0 -0
  82. {dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/variant_utils.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.9.0.0b0
3
+ Version: 8.9.0.1b2
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
7
7
  Author: 4DN-DCIC Team
8
8
  Author-email: support@4dnucleome.org
9
- Requires-Python: >=3.8,<3.12
9
+ Requires-Python: >=3.8,<3.13
10
10
  Classifier: Development Status :: 4 - Beta
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Intended Audience :: Science/Research
@@ -24,9 +24,10 @@ Classifier: Programming Language :: Python :: 3.9
24
24
  Classifier: Topic :: Database :: Database Engines/Servers
25
25
  Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
26
26
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
27
+ Requires-Dist: appdirs (>=1.4.4,<2.0.0)
27
28
  Requires-Dist: aws-requests-auth (>=0.4.2,<1)
28
- Requires-Dist: boto3 (>=1.28.57,<2.0.0)
29
- Requires-Dist: botocore (>=1.31.57,<2.0.0)
29
+ Requires-Dist: boto3 (>=1.34.93,<2.0.0)
30
+ Requires-Dist: botocore (>=1.34.93,<2.0.0)
30
31
  Requires-Dist: chardet (>=5.2.0,<6.0.0)
31
32
  Requires-Dist: docker (>=4.4.4,<5.0.0)
32
33
  Requires-Dist: elasticsearch (==7.13.4)
@@ -42,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
42
43
  Requires-Dist: redis (>=4.5.1,<5.0.0)
43
44
  Requires-Dist: requests (>=2.21.0,<3.0.0)
44
45
  Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
46
+ Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
45
47
  Requires-Dist: structlog (>=19.2.0,<20.0.0)
46
48
  Requires-Dist: toml (>=0.10.1,<1)
47
49
  Requires-Dist: tqdm (>=4.66.2,<5.0.0)
@@ -1,3 +1,4 @@
1
+ from __future__ import annotations
1
2
  import contextlib
2
3
  import functools
3
4
  import glob
@@ -7,7 +8,7 @@ import re
7
8
  import requests
8
9
  import subprocess
9
10
 
10
- from typing import Optional
11
+ from typing import Callable, Optional
11
12
  from .exceptions import InvalidParameterError
12
13
  from .lang_utils import there_are
13
14
  from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
@@ -384,3 +385,70 @@ def script_catch_errors():
384
385
  message = str(e) # Note: We ignore the type, which isn't intended to be shown.
385
386
  PRINT(message)
386
387
  exit(1)
388
+
389
+
390
+ class Question:
391
+ """
392
+ Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
393
+ some maximum number times of the same answer in a row (consecutively), then asks them
394
+ if they want to automatically give that same answer to any/all subsequent questions.
395
+ Supports static/global list of such Question instances, hashed (only) by the question text.
396
+ """
397
+ _static_instances = {}
398
+
399
+ @staticmethod
400
+ def instance(question: Optional[str] = None,
401
+ max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
402
+ question = question if isinstance(question, str) else ""
403
+ if not (instance := Question._static_instances.get(question)):
404
+ Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
405
+ return instance
406
+
407
+ @staticmethod
408
+ def yes(question: Optional[str] = None,
409
+ max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
410
+ return Question.instance(question, max=max, printf=printf).ask()
411
+
412
+ def __init__(self, question: Optional[str] = None,
413
+ max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
414
+ self._question = question if isinstance(question, str) else ""
415
+ self._max = max if isinstance(max, int) and max > 0 else None
416
+ self._print = printf if callable(printf) else print
417
+ self._yes_consecutive_count = 0
418
+ self._no_consecutive_count = 0
419
+ self._yes_automatic = False
420
+ self._no_automatic = False
421
+
422
+ def ask(self, question: Optional[str] = None) -> bool:
423
+
424
+ def question_automatic(value: str) -> bool:
425
+ nonlocal self
426
+ RARROW = "▶"
427
+ LARROW = "◀"
428
+ if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
429
+ f" Do you want to answer {value} to all such questions?"
430
+ f" {LARROW}{LARROW}{LARROW}"):
431
+ return True
432
+ self._yes_consecutive_count = 0
433
+ self._no_consecutive_count = 0
434
+
435
+ if self._yes_automatic:
436
+ return True
437
+ elif self._no_automatic:
438
+ return False
439
+ elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
440
+ self._yes_consecutive_count += 1
441
+ self._no_consecutive_count = 0
442
+ if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
443
+ # Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
444
+ if question_automatic("YES"):
445
+ self._yes_automatic = True
446
+ return True
447
+ else:
448
+ self._no_consecutive_count += 1
449
+ self._yes_consecutive_count = 0
450
+ if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
451
+ # Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
452
+ if question_automatic("NO"):
453
+ self._no_automatic = True
454
+ return False
@@ -170,7 +170,7 @@ class KeyManager:
170
170
  raise ValueError(f"A KeyManager named {name!r} has already been defined.")
171
171
  key_manager_class._init_class_variables()
172
172
  key_manager_class._REGISTERED = True
173
- _KEY_MANAGERS[name] = cls
173
+ _KEY_MANAGERS[name] = key_manager_class
174
174
  return key_manager_class
175
175
  return _register_class
176
176
 
@@ -895,9 +895,12 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
895
895
  used to create the generator.
896
896
  Should NOT be used directly
897
897
  """
898
+ def get_es_host_local() -> Optional[str]:
899
+ return os.environ.get("ES_HOST_LOCAL", None)
898
900
  health = get_health_page(key=auth)
899
901
  if es_client is None:
900
- es_url = health['elasticsearch']
902
+ if not (es_url := get_es_host_local()):
903
+ es_url = health['elasticsearch']
901
904
  es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
902
905
  namespace_star = health.get('namespace', '') + '*'
903
906
  # match all given uuids to _id fields
@@ -0,0 +1,267 @@
1
+ import glob
2
+ import hashlib
3
+ import io
4
+ import os
5
+ import pathlib
6
+ from datetime import datetime
7
+ import random
8
+ import string
9
+ from tempfile import gettempdir as get_temporary_directory
10
+ from typing import List, Optional, Union
11
+ from uuid import uuid4 as uuid
12
+
13
+ HOME_DIRECTORY = str(pathlib.Path().home())
14
+
15
+
16
+ def search_for_file(file: str,
17
+ location: Union[str, pathlib.PosixPath, Optional[List[Union[str, pathlib.PosixPath]]]] = None,
18
+ recursive: bool = False,
19
+ single: bool = False,
20
+ order: bool = True) -> Union[List[str], Optional[str]]:
21
+ """
22
+ Searches for the existence of the given file name, first directly in the given directory or list
23
+ of directories, if specified, and if not then just in the current (working) directory; if the
24
+ given recursive flag is True then also searches all sub-directories of these directories;
25
+ returns the full path name to the file if found. If the single flag is True then just the
26
+ first file which is found is returns (as a string), or None if none; if the single flag
27
+ is False, then all matched files are returned in a list, or and empty list if none.
28
+ """
29
+ def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]:
30
+ def order_by(path: str):
31
+ return len(path.split(os.path.sep)), path
32
+ return sorted(paths, key=order_by)
33
+
34
+ if not (file and isinstance(file, (str, pathlib.PosixPath))):
35
+ return None if single is True else []
36
+ if os.path.isabs(file):
37
+ if os.path.exists(file):
38
+ return str(file) if single is True else [str(file)]
39
+ return None if single is True else []
40
+ files_found = []
41
+ if not location:
42
+ location = ["."]
43
+ elif isinstance(location, (str, pathlib.PosixPath)):
44
+ location = [location]
45
+ elif not isinstance(location, list):
46
+ location = []
47
+ location_pruned = []
48
+ for directory in location:
49
+ if not isinstance(directory, str):
50
+ if not isinstance(directory, pathlib.PosixPath):
51
+ continue
52
+ directory = str(directory)
53
+ if not (directory := directory.strip()):
54
+ continue
55
+ if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
56
+ # Actually, allow a file rather then a directory; assume its parent directory was intended.
57
+ if not (directory := os.path.dirname(directory)):
58
+ continue
59
+ if directory not in location_pruned:
60
+ location_pruned.append(directory)
61
+ location = location_pruned
62
+ for directory in location:
63
+ if os.path.exists(os.path.join(directory, file)):
64
+ file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
65
+ if single is True:
66
+ return file_found
67
+ if file_found not in files_found:
68
+ files_found.append(file_found)
69
+ if recursive is True:
70
+ for directory in location:
71
+ if not directory.endswith("/**") and not file.startswith("**/"):
72
+ path = f"{directory}/**/{file}"
73
+ else:
74
+ path = f"{directory}/{file}"
75
+ files = glob.glob(path, recursive=True if recursive is True else False)
76
+ if files:
77
+ for file_found in files:
78
+ file_found = os.path.abspath(file_found)
79
+ if single is True:
80
+ return file_found
81
+ if file_found not in files_found:
82
+ files_found.append(file_found)
83
+ if single is True:
84
+ return files_found[0] if files_found else None
85
+ elif order is True:
86
+ return order_by_fewest_number_of_paths_and_then_alphabetically(files_found)
87
+ else:
88
+ return files_found
89
+
90
+
91
+ def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str:
92
+ """
93
+ Normalizes the given path value and returns the result; does things like remove redundant
94
+ consecutive directory separators and redundant parent paths. If the given absolute argument
95
+ is True than converts the path to an absolute path. If the given expand_home argument is False
96
+ and if the path can reasonably be represented with a home directory indicator (i.e. "~"), then
97
+ converts it to such. If the expand_home argument is True and path starts with the home directory
98
+ indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
99
+ given path value is not actually even a string (or pathlib.Path) then returns an empty string.
100
+ """
101
+ if isinstance(value, pathlib.Path):
102
+ value = str(value)
103
+ elif not isinstance(value, str):
104
+ return ""
105
+ if not (value := value.strip()) or not (value := os.path.normpath(value)):
106
+ return ""
107
+ if expand_home is True:
108
+ value = os.path.expanduser(value)
109
+ elif (expand_home is False) and (os.name == "posix"):
110
+ if value.startswith(home := HOME_DIRECTORY + os.sep):
111
+ value = "~/" + value[len(home):]
112
+ elif value == HOME_DIRECTORY:
113
+ value = "~"
114
+ if absolute is True:
115
+ value = os.path.abspath(value)
116
+ return value
117
+
118
+
119
+ def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
120
+ try:
121
+ return os.path.getsize(file) if isinstance(file, str) else None
122
+ except Exception:
123
+ if raise_exception is True:
124
+ raise
125
+ return None
126
+
127
+
128
+ def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
129
+ try:
130
+ return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
131
+ except Exception:
132
+ if raise_exception is True:
133
+ raise
134
+ return None
135
+
136
+
137
+ def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
138
+ """
139
+ Returns True iff the contents of the two given files are exactly the same.
140
+ """
141
+ try:
142
+ with open(filea, "rb") as fa:
143
+ with open(fileb, "rb") as fb:
144
+ chunk_size = 4096
145
+ while True:
146
+ chunka = fa.read(chunk_size)
147
+ chunkb = fb.read(chunk_size)
148
+ if chunka != chunkb:
149
+ return False
150
+ if not chunka:
151
+ break
152
+ return True
153
+ except Exception:
154
+ if raise_exception is True:
155
+ raise
156
+ return False
157
+
158
+
159
+ def compute_file_md5(file: str, raise_exception: bool = True) -> str:
160
+ """
161
+ Returns the md5 checksum for the given file.
162
+ """
163
+ if not isinstance(file, str):
164
+ return ""
165
+ try:
166
+ md5 = hashlib.md5()
167
+ with open(file, "rb") as file:
168
+ for chunk in iter(lambda: file.read(4096), b""):
169
+ md5.update(chunk)
170
+ return md5.hexdigest()
171
+ except Exception:
172
+ if raise_exception is True:
173
+ raise
174
+ return ""
175
+
176
+
177
+ def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
178
+ """
179
+ Returns the AWS S3 "etag" for the given file; this value is md5-like but
180
+ not the same as a normal md5. We use this to compare that a file in S3
181
+ appears to be the exact the same file as a local file.
182
+ """
183
+ try:
184
+ with io.open(file, "rb") as f:
185
+ return _compute_file_etag(f)
186
+ except Exception:
187
+ if raise_exception is True:
188
+ raise
189
+ return None
190
+
191
+
192
+ def _compute_file_etag(f: io.BufferedReader) -> str:
193
+ # See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
194
+ MULTIPART_THRESHOLD = 8388608
195
+ MULTIPART_CHUNKSIZE = 8388608
196
+ # BUFFER_SIZE = 1048576
197
+ # Verify some assumptions are correct
198
+ # assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
199
+ # assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
200
+ # assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
201
+ hash = hashlib.md5()
202
+ read = 0
203
+ chunks = None
204
+ while True:
205
+ # Read some from stdin, if we're at the end, stop reading
206
+ bits = f.read(1048576)
207
+ if len(bits) == 0:
208
+ break
209
+ read += len(bits)
210
+ hash.update(bits)
211
+ if chunks is None:
212
+ # We're handling a multi-part upload, so switch to calculating
213
+ # hashes of each chunk
214
+ if read >= MULTIPART_THRESHOLD:
215
+ chunks = b''
216
+ if chunks is not None:
217
+ if (read % MULTIPART_CHUNKSIZE) == 0:
218
+ # Dont with a chunk, add it to the list of hashes to hash later
219
+ chunks += hash.digest()
220
+ hash = hashlib.md5()
221
+ if chunks is None:
222
+ # Normal upload, just output the MD5 hash
223
+ etag = hash.hexdigest()
224
+ else:
225
+ # Multipart upload, need to output the hash of the hashes
226
+ if (read % MULTIPART_CHUNKSIZE) != 0:
227
+ # Add the last part if we have a partial chunk
228
+ chunks += hash.digest()
229
+ etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
230
+ return etag
231
+
232
+
233
+ def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
234
+ nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
235
+ """
236
+ Write to the given file (name/path) some random content. If the given file is None then writes
237
+ to a temporary file. In either case, returns the file written to. The of bytes written is 1024
238
+ by default be can be specified with the nbytes argument; default to writing ASCII text but if
239
+ the binary argument is True then writes binary data as well; if not binary the content is in
240
+ lines of 80 characters each; use the line_length argumetn in this case to change the line length.
241
+ """
242
+ if not isinstance(nbytes, int) or nbytes < 0:
243
+ nbytes = 0
244
+ if not isinstance(file, str) or not file:
245
+ if not isinstance(prefix, str):
246
+ prefix = ""
247
+ if not isinstance(suffix, str):
248
+ suffix = ""
249
+ file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
250
+ file = os.path.join(get_temporary_directory(), file)
251
+ with open(file, "wb" if binary is True else "w") as f:
252
+ if binary is True:
253
+ f.write(os.urandom(nbytes))
254
+ else:
255
+ if (not isinstance(line_length, int)) or (line_length < 1):
256
+ line_length = 80
257
+ line_length += 1
258
+ nlines = nbytes // line_length
259
+ nremainder = nbytes % line_length
260
+ for n in range(nlines):
261
+ f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
262
+ f.write("\n")
263
+ if nremainder > 1:
264
+ f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
265
+ if nremainder > 0:
266
+ f.write("\n")
267
+ return file
@@ -0,0 +1,39 @@
1
+ from contextlib import contextmanager
2
+ import requests
3
+ from typing import Callable, Optional
4
+ from dcicutils.tmpfile_utils import temporary_file
5
+
6
+
7
+ @contextmanager
8
+ def download(url: str, suffix: Optional[str] = None, binary: bool = True,
9
+ progress: Optional[Callable] = None) -> Optional[str]:
10
+ """
11
+ Context manager to download the given URL into a temporary file and yields the file
12
+ path to it. An optional file suffix may be specified for this temporary file name.
13
+ Defaults to binary file mode; if not desired then pass False as the binary argument.
14
+ """
15
+ with temporary_file(suffix=suffix) as file:
16
+ download_to(url, file, binary=binary, progress=progress)
17
+ yield file
18
+
19
+
20
+ def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
21
+ """
22
+ Download the given URL into the given file. Defaults to binary
23
+ file mode; if not desired then pass False as the binary argument.
24
+ """
25
+ if not callable(progress):
26
+ progress = None
27
+ response = requests.get(url, stream=True)
28
+ if progress:
29
+ nbytes = 0
30
+ nbytes_total = None
31
+ if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
32
+ nbytes_total = int(content_length)
33
+ with open(file, "wb" if binary is True else "w") as f:
34
+ for chunk in response.iter_content(chunk_size=8192):
35
+ if chunk:
36
+ f.write(chunk)
37
+ if progress:
38
+ nbytes += len(chunk)
39
+ progress(nbytes, nbytes_total)
@@ -3,6 +3,7 @@ This file contains functions that might be generally useful.
3
3
  """
4
4
 
5
5
  from collections import namedtuple
6
+ import appdirs
6
7
  import contextlib
7
8
  import datetime
8
9
  import functools
@@ -13,10 +14,12 @@ import json
13
14
  import logging
14
15
  import math
15
16
  import os
17
+ import platform
16
18
  import pytz
17
19
  import re
18
20
  import rfc3986.validators
19
21
  import rfc3986.exceptions
22
+ import shortuuid
20
23
  import time
21
24
  import uuid
22
25
  import warnings
@@ -1152,7 +1155,8 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
1152
1155
 
1153
1156
  def remove_empty_properties(data: Optional[Union[list, dict]],
1154
1157
  isempty: Optional[Callable] = None,
1155
- isempty_array_element: Optional[Callable] = None) -> None:
1158
+ isempty_array_element: Optional[Callable] = None,
1159
+ raise_exception_on_nonempty_array_element_after_empty: bool = False) -> None:
1156
1160
  def _isempty(value: Any) -> bool: # noqa
1157
1161
  return isempty(value) if callable(isempty) else value in [None, "", {}, []]
1158
1162
  if isinstance(data, dict):
@@ -1160,11 +1164,22 @@ def remove_empty_properties(data: Optional[Union[list, dict]],
1160
1164
  if _isempty(value := data[key]):
1161
1165
  del data[key]
1162
1166
  else:
1163
- remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element)
1167
+ remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element,
1168
+ raise_exception_on_nonempty_array_element_after_empty= # noqa
1169
+ raise_exception_on_nonempty_array_element_after_empty)
1164
1170
  elif isinstance(data, list):
1165
1171
  for item in data:
1166
- remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element)
1172
+ remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element,
1173
+ raise_exception_on_nonempty_array_element_after_empty= # noqa
1174
+ raise_exception_on_nonempty_array_element_after_empty)
1167
1175
  if callable(isempty_array_element):
1176
+ if raise_exception_on_nonempty_array_element_after_empty is True:
1177
+ empty_element_seen = False
1178
+ for item in data:
1179
+ if not empty_element_seen and isempty_array_element(item):
1180
+ empty_element_seen = True
1181
+ elif empty_element_seen and not isempty_array_element(item):
1182
+ raise Exception("Non-empty element found after empty element.")
1168
1183
  data[:] = [item for item in data if not isempty_array_element(item)]
1169
1184
 
1170
1185
 
@@ -1522,7 +1537,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
1522
1537
  def create_dict(**kwargs) -> dict:
1523
1538
  result = {}
1524
1539
  for name in kwargs:
1525
- if kwargs[name]:
1540
+ if not (kwargs[name] is None):
1526
1541
  result[name] = kwargs[name]
1527
1542
  return result
1528
1543
 
@@ -2548,6 +2563,19 @@ def normalize_spaces(value: str) -> str:
2548
2563
  return re.sub(r"\s+", " ", value).strip()
2549
2564
 
2550
2565
 
2566
+ def normalize_string(value: Optional[str]) -> Optional[str]:
2567
+ """
2568
+ Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
2569
+ in the given string value and returns the result. If the given value is None returns an
2570
+ empty string. If the given value is not actually even a string then return None.
2571
+ """
2572
+ if value is None:
2573
+ return ""
2574
+ elif isinstance(value, str):
2575
+ return re.sub(r"\s+", " ", value).strip()
2576
+ return None
2577
+
2578
+
2551
2579
  def find_nth_from_end(string: str, substring: str, nth: int) -> int:
2552
2580
  """
2553
2581
  Returns the index of the nth occurrence of the given substring within
@@ -2590,7 +2618,11 @@ def format_size(nbytes: Union[int, float], precision: int = 2, nospace: bool = F
2590
2618
  nbytes = int(nbytes)
2591
2619
  return f"{nbytes} byte{'s' if nbytes != 1 else ''}"
2592
2620
  unit = (UNITS_TERSE if terse else UNITS)[index]
2593
- return f"{nbytes:.{precision}f}{'' if nospace else ' '}{unit}"
2621
+ size = f"{nbytes:.{precision}f}"
2622
+ if size.endswith(f".{'0' * precision}"):
2623
+ # Tidy up extraneous zeros.
2624
+ size = size[:-(precision - 1)]
2625
+ return f"{size}{'' if nospace else ' '}{unit}"
2594
2626
 
2595
2627
 
2596
2628
  def format_duration(seconds: Union[int, float]) -> str:
@@ -2670,3 +2702,48 @@ class JsonLinesReader:
2670
2702
  yield line
2671
2703
  else:
2672
2704
  raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
2705
+
2706
+
2707
+ def get_app_specific_directory() -> str:
2708
+ """
2709
+ Returns the standard system application specific directory:
2710
+ - On MacOS this directory: is: ~/Library/Application Support
2711
+ - On Linux this directory is: ~/.local/share
2712
+ - On Windows this directory is: %USERPROFILE%\\AppData\\Local # noqa
2713
+ N.B. This is has been tested on MacOS and Linux but not on Windows.
2714
+ """
2715
+ return appdirs.user_data_dir()
2716
+
2717
+
2718
+ def get_os_name() -> str:
2719
+ if os_name := platform.system():
2720
+ if os_name == "Darwin": return "osx" # noqa
2721
+ elif os_name == "Linux": return "linux" # noqa
2722
+ elif os_name == "Windows": return "windows" # noqa
2723
+ return ""
2724
+
2725
+
2726
+ def get_cpu_architecture_name() -> str:
2727
+ if os_architecture_name := platform.machine():
2728
+ if os_architecture_name == "x86_64": return "amd64" # noqa
2729
+ return os_architecture_name
2730
+ return ""
2731
+
2732
+
2733
+ def create_uuid(nodash: bool = False, upper: bool = False) -> str:
2734
+ value = str(uuid.uuid4())
2735
+ if nodash is True:
2736
+ value = value.replace("-", "")
2737
+ if upper is True:
2738
+ value = value.upper()
2739
+ return value
2740
+
2741
+
2742
+ def create_short_uuid(length: Optional[int] = None, upper: bool = False):
2743
+ # Not really techincally a uuid of course.
2744
+ if (length is None) or (not isinstance(length, int)) or (length < 1):
2745
+ length = 16
2746
+ value = shortuuid.ShortUUID().random(length=length)
2747
+ if upper is True:
2748
+ value = value.upper()
2749
+ return value