dcicutils 8.8.4.1b30__tar.gz → 8.9.0.0b0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/PKG-INFO +4 -6
  2. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/ff_utils.py +1 -4
  3. dcicutils-8.9.0.0b0/dcicutils/file_utils.py +58 -0
  4. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/misc_utils.py +1 -62
  5. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/schema_utils.py +16 -0
  6. dcicutils-8.9.0.0b0/dcicutils/tmpfile_utils.py +36 -0
  7. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/zip_utils.py +0 -27
  8. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/pyproject.toml +6 -8
  9. dcicutils-8.8.4.1b30/dcicutils/file_utils.py +0 -267
  10. dcicutils-8.8.4.1b30/dcicutils/http_utils.py +0 -39
  11. dcicutils-8.8.4.1b30/dcicutils/tmpfile_utils.py +0 -74
  12. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/LICENSE.txt +0 -0
  13. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/README.rst +0 -0
  14. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/__init__.py +0 -0
  15. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/base.py +0 -0
  16. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/beanstalk_utils.py +0 -0
  17. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/bundle_utils.py +0 -0
  18. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/captured_output.py +0 -0
  19. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/cloudformation_utils.py +0 -0
  20. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/codebuild_utils.py +0 -0
  21. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/command_utils.py +0 -0
  22. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/common.py +0 -0
  23. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/contribution_scripts.py +0 -0
  24. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/contribution_utils.py +0 -0
  25. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/creds_utils.py +0 -0
  26. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/data_readers.py +0 -0
  27. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/data_utils.py +0 -0
  28. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/datetime_utils.py +0 -0
  29. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/deployment_utils.py +0 -0
  30. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/diff_utils.py +0 -0
  31. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/docker_utils.py +0 -0
  32. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/ecr_scripts.py +0 -0
  33. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/ecr_utils.py +0 -0
  34. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/ecs_utils.py +0 -0
  35. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/env_base.py +0 -0
  36. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/env_manager.py +0 -0
  37. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/env_scripts.py +0 -0
  38. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/env_utils.py +0 -0
  39. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/env_utils_legacy.py +0 -0
  40. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/es_utils.py +0 -0
  41. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/exceptions.py +0 -0
  42. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/ff_mocks.py +0 -0
  43. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/function_cache_decorator.py +0 -0
  44. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/glacier_utils.py +0 -0
  45. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/jh_utils.py +0 -0
  46. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/kibana/dashboards.json +0 -0
  47. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/kibana/readme.md +0 -0
  48. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/lang_utils.py +0 -0
  49. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  50. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  51. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  52. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  53. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  54. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  55. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/license_utils.py +0 -0
  56. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/log_utils.py +0 -0
  57. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/obfuscation_utils.py +0 -0
  58. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/opensearch_utils.py +0 -0
  59. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/portal_object_utils.py +0 -0
  60. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/portal_utils.py +0 -0
  61. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/progress_bar.py +0 -0
  62. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/project_utils.py +0 -0
  63. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/qa_checkers.py +0 -0
  64. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/qa_utils.py +0 -0
  65. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/redis_tools.py +0 -0
  66. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/redis_utils.py +0 -0
  67. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/s3_utils.py +0 -0
  68. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/scripts/publish_to_pypi.py +0 -0
  69. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/scripts/run_license_checker.py +0 -0
  70. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/scripts/view_portal_object.py +0 -0
  71. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/secrets_utils.py +0 -0
  72. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/sheet_utils.py +0 -0
  73. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/snapshot_utils.py +0 -0
  74. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/ssl_certificate_utils.py +0 -0
  75. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/structured_data.py +0 -0
  76. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/submitr/progress_constants.py +0 -0
  77. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/submitr/ref_lookup_strategy.py +0 -0
  78. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/task_utils.py +0 -0
  79. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/trace_utils.py +0 -0
  80. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/validation_utils.py +0 -0
  81. {dcicutils-8.8.4.1b30 → dcicutils-8.9.0.0b0}/dcicutils/variant_utils.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.4.1b30
3
+ Version: 8.9.0.0b0
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
7
7
  Author: 4DN-DCIC Team
8
8
  Author-email: support@4dnucleome.org
9
- Requires-Python: >=3.8,<3.13
9
+ Requires-Python: >=3.8,<3.12
10
10
  Classifier: Development Status :: 4 - Beta
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Intended Audience :: Science/Research
@@ -24,10 +24,9 @@ Classifier: Programming Language :: Python :: 3.9
24
24
  Classifier: Topic :: Database :: Database Engines/Servers
25
25
  Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
26
26
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
27
- Requires-Dist: appdirs (>=1.4.4,<2.0.0)
28
27
  Requires-Dist: aws-requests-auth (>=0.4.2,<1)
29
- Requires-Dist: boto3 (>=1.34.93,<2.0.0)
30
- Requires-Dist: botocore (>=1.34.93,<2.0.0)
28
+ Requires-Dist: boto3 (>=1.28.57,<2.0.0)
29
+ Requires-Dist: botocore (>=1.31.57,<2.0.0)
31
30
  Requires-Dist: chardet (>=5.2.0,<6.0.0)
32
31
  Requires-Dist: docker (>=4.4.4,<5.0.0)
33
32
  Requires-Dist: elasticsearch (==7.13.4)
@@ -43,7 +42,6 @@ Requires-Dist: pytz (>=2020.4)
43
42
  Requires-Dist: redis (>=4.5.1,<5.0.0)
44
43
  Requires-Dist: requests (>=2.21.0,<3.0.0)
45
44
  Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
46
- Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
47
45
  Requires-Dist: structlog (>=19.2.0,<20.0.0)
48
46
  Requires-Dist: toml (>=0.10.1,<1)
49
47
  Requires-Dist: tqdm (>=4.66.2,<5.0.0)
@@ -895,12 +895,9 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
895
895
  used to create the generator.
896
896
  Should NOT be used directly
897
897
  """
898
- def get_es_host_local() -> Optional[str]:
899
- return os.environ.get("ES_HOST_LOCAL", None)
900
898
  health = get_health_page(key=auth)
901
899
  if es_client is None:
902
- if not (es_url := get_es_host_local()):
903
- es_url = health['elasticsearch']
900
+ es_url = health['elasticsearch']
904
901
  es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
905
902
  namespace_star = health.get('namespace', '') + '*'
906
903
  # match all given uuids to _id fields
@@ -0,0 +1,58 @@
1
+ import glob
2
+ import os
3
+ import pathlib
4
+ from typing import List, Optional, Union
5
+
6
+
7
+ def search_for_file(file: str,
8
+ location: Union[str, Optional[List[str]]] = None,
9
+ recursive: bool = False,
10
+ single: bool = False) -> Union[List[str], Optional[str]]:
11
+ """
12
+ Searches for the existence of the given file name, first directly in the given directory or list
13
+ of directories, if specified, and if not then just in the current (working) directory; if the
14
+ given recursive flag is True then also searches all sub-directories of these directories;
15
+ returns the full path name to the file if found. If the single flag is True then just the
16
+ first file which is found is returns (as a string), or None if none; if the single flag
17
+ is False, then all matched files are returned in a list, or and empty list if none.
18
+ """
19
+ if file and isinstance(file, (str, pathlib.PosixPath)):
20
+ if os.path.isabs(file):
21
+ if os.path.exists(file):
22
+ return file if single else [file]
23
+ return None if single else []
24
+ files_found = []
25
+ if not location:
26
+ location = ["."]
27
+ elif isinstance(location, (str, pathlib.PosixPath)):
28
+ location = [location]
29
+ elif not isinstance(location, list):
30
+ location = []
31
+ for directory in location:
32
+ if not directory:
33
+ continue
34
+ if isinstance(directory, (str, pathlib.PosixPath)) and os.path.exists(os.path.join(directory, file)):
35
+ file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
36
+ if single:
37
+ return file_found
38
+ if file_found not in files_found:
39
+ files_found.append(file_found)
40
+ if recursive:
41
+ for directory in location:
42
+ if not directory:
43
+ continue
44
+ if not directory.endswith("/**") and not file.startswith("**/"):
45
+ path = f"{directory}/**/{file}"
46
+ else:
47
+ path = f"{directory}/{file}"
48
+ files = glob.glob(path, recursive=recursive)
49
+ if files:
50
+ for file_found in files:
51
+ file_found = os.path.abspath(file_found)
52
+ if single:
53
+ return file_found
54
+ if file_found not in files_found:
55
+ files_found.append(file_found)
56
+ if files_found:
57
+ return files_found[0] if single else files_found
58
+ return None if single else []
@@ -3,7 +3,6 @@ This file contains functions that might be generally useful.
3
3
  """
4
4
 
5
5
  from collections import namedtuple
6
- import appdirs
7
6
  import contextlib
8
7
  import datetime
9
8
  import functools
@@ -14,12 +13,10 @@ import json
14
13
  import logging
15
14
  import math
16
15
  import os
17
- import platform
18
16
  import pytz
19
17
  import re
20
18
  import rfc3986.validators
21
19
  import rfc3986.exceptions
22
- import shortuuid
23
20
  import time
24
21
  import uuid
25
22
  import warnings
@@ -1525,7 +1522,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
1525
1522
  def create_dict(**kwargs) -> dict:
1526
1523
  result = {}
1527
1524
  for name in kwargs:
1528
- if not (kwargs[name] is None):
1525
+ if kwargs[name]:
1529
1526
  result[name] = kwargs[name]
1530
1527
  return result
1531
1528
 
@@ -2551,19 +2548,6 @@ def normalize_spaces(value: str) -> str:
2551
2548
  return re.sub(r"\s+", " ", value).strip()
2552
2549
 
2553
2550
 
2554
- def normalize_string(value: Optional[str]) -> Optional[str]:
2555
- """
2556
- Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
2557
- in the given string value and returns the result. If the given value is None returns an
2558
- empty string. If the given value is not actually even a string then return None.
2559
- """
2560
- if value is None:
2561
- return ""
2562
- elif isinstance(value, str):
2563
- return re.sub(r"\s+", " ", value).strip()
2564
- return None
2565
-
2566
-
2567
2551
  def find_nth_from_end(string: str, substring: str, nth: int) -> int:
2568
2552
  """
2569
2553
  Returns the index of the nth occurrence of the given substring within
@@ -2686,48 +2670,3 @@ class JsonLinesReader:
2686
2670
  yield line
2687
2671
  else:
2688
2672
  raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
2689
-
2690
-
2691
- def get_app_specific_directory() -> str:
2692
- """
2693
- Returns the standard system application specific directory:
2694
- - On MacOS this directory: is: ~/Library/Application Support
2695
- - On Linux this directory is: ~/.local/share
2696
- - On Windows this directory is: %USERPROFILE%\AppData\Local # noqa
2697
- N.B. This is has been tested on MacOS and Linux but not on Windows.
2698
- """
2699
- return appdirs.user_data_dir()
2700
-
2701
-
2702
- def get_os_name() -> str:
2703
- if os_name := platform.system():
2704
- if os_name == "Darwin": return "osx" # noqa
2705
- elif os_name == "Linux": return "linux" # noqa
2706
- elif os_name == "Windows": return "windows" # noqa
2707
- return ""
2708
-
2709
-
2710
- def get_cpu_architecture_name() -> str:
2711
- if os_architecture_name := platform.machine():
2712
- if os_architecture_name == "x86_64": return "amd64" # noqa
2713
- return os_architecture_name
2714
- return ""
2715
-
2716
-
2717
- def create_uuid(nodash: bool = False, upper: bool = False) -> str:
2718
- value = str(uuid.uuid4())
2719
- if nodash is True:
2720
- value = value.replace("-", "")
2721
- if upper is True:
2722
- value = value.upper()
2723
- return value
2724
-
2725
-
2726
- def create_short_uuid(length: Optional[int] = None, upper: bool = False):
2727
- # Not really techincally a uuid of course.
2728
- if (length is None) or (not isinstance(length, int)) or (length < 1):
2729
- length = 16
2730
- value = shortuuid.ShortUUID().random(length=length)
2731
- if upper is True:
2732
- value = value.upper()
2733
- return value
@@ -24,6 +24,7 @@ class JsonSchemaConstants:
24
24
 
25
25
 
26
26
  class EncodedSchemaConstants:
27
+ DESCRIPTION = "description"
27
28
  IDENTIFYING_PROPERTIES = "identifyingProperties"
28
29
  LINK_TO = "linkTo"
29
30
  MERGE_REF = "$merge"
@@ -187,6 +188,21 @@ def get_one_of_formats(schema: Dict[str, Any]) -> List[str]:
187
188
  ]
188
189
 
189
190
 
191
+ def is_link(property_schema: Dict[str, Any]) -> bool:
192
+ """Is property schema a link?"""
193
+ return property_schema.get(SchemaConstants.LINK_TO, False)
194
+
195
+
196
+ def get_enum(property_schema: Dict[str, Any]) -> List[str]:
197
+ """Return the enum of a property schema."""
198
+ return property_schema.get(SchemaConstants.ENUM, [])
199
+
200
+
201
+ def get_description(schema: Dict[str, Any]) -> str:
202
+ """Return the description of a schema."""
203
+ return schema.get(SchemaConstants.DESCRIPTION, "")
204
+
205
+
190
206
  class Schema:
191
207
 
192
208
  def __init__(self, schema: dict, type: Optional[str] = None) -> None:
@@ -0,0 +1,36 @@
1
+ from contextlib import contextmanager
2
+ import os
3
+ import shutil
4
+ import tempfile
5
+ from typing import List, Optional, Union
6
+
7
+
8
+ @contextmanager
9
+ def temporary_directory() -> str:
10
+ try:
11
+ with tempfile.TemporaryDirectory() as tmp_directory_name:
12
+ yield tmp_directory_name
13
+ finally:
14
+ remove_temporary_directory(tmp_directory_name)
15
+
16
+
17
+ @contextmanager
18
+ def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
19
+ content: Optional[Union[str, bytes, List[str]]] = None) -> str:
20
+ with temporary_directory() as tmp_directory_name:
21
+ tmp_file_name = os.path.join(tmp_directory_name, name or tempfile.mktemp(dir="")) + (suffix or "")
22
+ with open(tmp_file_name, "wb" if isinstance(content, bytes) else "w") as tmp_file:
23
+ if content is not None:
24
+ tmp_file.write("\n".join(content) if isinstance(content, list) else content)
25
+ yield tmp_file_name
26
+
27
+
28
+ def remove_temporary_directory(tmp_directory_name: str) -> None:
29
+ def is_temporary_directory(path: str) -> bool:
30
+ try:
31
+ tmpdir = tempfile.gettempdir()
32
+ return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
33
+ except Exception:
34
+ return False
35
+ if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
36
+ shutil.rmtree(tmp_directory_name)
@@ -2,9 +2,7 @@ from contextlib import contextmanager
2
2
  from dcicutils.tmpfile_utils import temporary_directory, temporary_file
3
3
  import gzip
4
4
  import os
5
- import shutil
6
5
  import tarfile
7
- import tempfile
8
6
  from typing import List, Optional
9
7
  import zipfile
10
8
 
@@ -47,28 +45,3 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
47
45
  outputf.write(inputf.read())
48
46
  outputf.close()
49
47
  yield tmp_file_name
50
-
51
-
52
- def extract_file_from_zip(zip_file: str, file_to_extract: str,
53
- destination_file: str, raise_exception: bool = True) -> bool:
54
- """
55
- Extracts from the given zip file, the given file to extract, writing it to the
56
- given destination file. Returns True if all is well, otherwise False, or if the
57
- raise_exception argument is True (the default), then raises and exception on error.
58
- """
59
- try:
60
- if not (destination_directory := os.path.dirname(destination_file)):
61
- destination_directory = os.getcwd()
62
- destination_file = os.path.join(destination_directory, destination_file)
63
- with tempfile.TemporaryDirectory() as tmp_directory_name:
64
- with zipfile.ZipFile(zip_file, "r") as zipf:
65
- if file_to_extract not in zipf.namelist():
66
- return False
67
- zipf.extract(file_to_extract, path=tmp_directory_name)
68
- os.makedirs(destination_directory, exist_ok=True)
69
- shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
70
- return True
71
- except Exception as e:
72
- if raise_exception:
73
- raise e
74
- return False
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.8.4.1b30" # TODO: To become 8.8.5
3
+ version = "8.9.0.0b0"
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
@@ -37,13 +37,12 @@ classifiers = [
37
37
 
38
38
 
39
39
  [tool.poetry.dependencies]
40
- python = ">=3.8,<3.13"
41
- boto3 = "^1.34.93"
42
- botocore = "^1.34.93"
40
+ python = ">=3.8,<3.12"
41
+ boto3 = "^1.28.57"
42
+ botocore = "^1.31.57"
43
43
  # The DCIC portals (cgap-portal and fourfront) are very particular about which ElasticSearch version.
44
44
  # This value is intentionally pinned and must not be changed casually.
45
45
  elasticsearch = "7.13.4"
46
- appdirs = "^1.4.4"
47
46
  aws-requests-auth = ">=0.4.2,<1"
48
47
  chardet = "^5.2.0"
49
48
  docker = "^4.4.4"
@@ -61,7 +60,6 @@ pyperclip = "^1.8.2"
61
60
  PyYAML = "^6.0.1"
62
61
  requests = "^2.21.0"
63
62
  rfc3986 = "^1.4.0"
64
- shortuuid = "^1.0.13"
65
63
  structlog = "^19.2.0"
66
64
  toml = ">=0.10.1,<1"
67
65
  tqdm = "^4.66.2"
@@ -71,8 +69,8 @@ webtest = "^2.0.34"
71
69
 
72
70
 
73
71
  [tool.poetry.dev-dependencies]
74
- boto3-stubs = "^1.34.93"
75
- botocore-stubs = "^1.34.93"
72
+ boto3-stubs = "^1.28.57"
73
+ botocore-stubs = "^1.31.57"
76
74
  coverage = ">=7.2.3"
77
75
  # Loaded manually in GA workflow for coverage because a dependency on 2to3
78
76
  # in its docopts dependency makes a problem for laoding it here in poetry. -kmp 7-Apr-2023
@@ -1,267 +0,0 @@
1
- import glob
2
- import hashlib
3
- import io
4
- import os
5
- import pathlib
6
- from datetime import datetime
7
- import random
8
- import string
9
- from tempfile import gettempdir as get_temporary_directory
10
- from typing import List, Optional, Union
11
- from uuid import uuid4 as uuid
12
-
13
- HOME_DIRECTORY = str(pathlib.Path().home())
14
-
15
-
16
- def search_for_file(file: str,
17
- location: Union[str, Optional[List[str]]] = None,
18
- recursive: bool = False,
19
- single: bool = False,
20
- order: bool = True) -> Union[List[str], Optional[str]]:
21
- """
22
- Searches for the existence of the given file name, first directly in the given directory or list
23
- of directories, if specified, and if not then just in the current (working) directory; if the
24
- given recursive flag is True then also searches all sub-directories of these directories;
25
- returns the full path name to the file if found. If the single flag is True then just the
26
- first file which is found is returns (as a string), or None if none; if the single flag
27
- is False, then all matched files are returned in a list, or and empty list if none.
28
- """
29
- def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]:
30
- def order_by(path: str):
31
- return len(path.split(os.path.sep)), path
32
- return sorted(paths, key=order_by)
33
-
34
- if not (file and isinstance(file, (str, pathlib.PosixPath))):
35
- return None if single is True else []
36
- if os.path.isabs(file):
37
- if os.path.exists(file):
38
- return file if single is True else [file]
39
- return None if single is True else []
40
- files_found = []
41
- if not location:
42
- location = ["."]
43
- elif isinstance(location, (str, pathlib.PosixPath)):
44
- location = [location]
45
- elif not isinstance(location, list):
46
- location = []
47
- location_pruned = []
48
- for directory in location:
49
- if not isinstance(directory, str):
50
- if not isinstance(directory, pathlib.PosixPath):
51
- continue
52
- directory = str(directory)
53
- if not (directory := directory.strip()):
54
- continue
55
- if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
56
- # Allow a file; assume its parent directory was intended.
57
- if not (directory := os.path.dirname(directory)):
58
- continue
59
- if directory not in location_pruned:
60
- location_pruned.append(directory)
61
- location = location_pruned
62
- for directory in location:
63
- if os.path.exists(os.path.join(directory, file)):
64
- file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
65
- if single is True:
66
- return file_found
67
- if file_found not in files_found:
68
- files_found.append(file_found)
69
- if recursive is True:
70
- for directory in location:
71
- if not directory.endswith("/**") and not file.startswith("**/"):
72
- path = f"{directory}/**/{file}"
73
- else:
74
- path = f"{directory}/{file}"
75
- files = glob.glob(path, recursive=True if recursive is True else False)
76
- if files:
77
- for file_found in files:
78
- file_found = os.path.abspath(file_found)
79
- if single is True:
80
- return file_found
81
- if file_found not in files_found:
82
- files_found.append(file_found)
83
- if single is True:
84
- return files_found[0] if files_found else None
85
- elif order is True:
86
- return order_by_fewest_number_of_paths_and_then_alphabetically(files_found)
87
- else:
88
- return files_found
89
-
90
-
91
- def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str:
92
- """
93
- Normalizes the given path value and returns the result; does things like remove redundant
94
- consecutive directory separators and redundant parent paths. If the given absolute argument
95
- is True than converts the path to an absolute path. If the given expand_home argument is False
96
- and if the path can reasonably be represented with a home directory indicator (i.e. "~"), then
97
- converts it to such. If the expand_home argument is True and path starts with the home directory
98
- indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
99
- given path value is not actually even a string (or pathlib.Path) then returns an empty string.
100
- """
101
- if isinstance(value, pathlib.Path):
102
- value = str(value)
103
- elif not isinstance(value, str):
104
- return ""
105
- if not (value := value.strip()) or not (value := os.path.normpath(value)):
106
- return ""
107
- if expand_home is True:
108
- value = os.path.expanduser(value)
109
- elif (expand_home is False) and (os.name == "posix"):
110
- if value.startswith(home := HOME_DIRECTORY + os.sep):
111
- value = "~/" + value[len(home):]
112
- elif value == HOME_DIRECTORY:
113
- value = "~"
114
- if absolute is True:
115
- value = os.path.abspath(value)
116
- return value
117
-
118
-
119
- def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
120
- try:
121
- return os.path.getsize(file) if isinstance(file, str) else None
122
- except Exception:
123
- if raise_exception is True:
124
- raise
125
- return None
126
-
127
-
128
- def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
129
- try:
130
- return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
131
- except Exception:
132
- if raise_exception is True:
133
- raise
134
- return None
135
-
136
-
137
- def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
138
- """
139
- Returns True iff the contents of the two given files are exactly the same.
140
- """
141
- try:
142
- with open(filea, "rb") as fa:
143
- with open(fileb, "rb") as fb:
144
- chunk_size = 4096
145
- while True:
146
- chunka = fa.read(chunk_size)
147
- chunkb = fb.read(chunk_size)
148
- if chunka != chunkb:
149
- return False
150
- if not chunka:
151
- break
152
- return True
153
- except Exception:
154
- if raise_exception is True:
155
- raise
156
- return False
157
-
158
-
159
- def compute_file_md5(file: str, raise_exception: bool = True) -> str:
160
- """
161
- Returns the md5 checksum for the given file.
162
- """
163
- if not isinstance(file, str):
164
- return ""
165
- try:
166
- md5 = hashlib.md5()
167
- with open(file, "rb") as file:
168
- for chunk in iter(lambda: file.read(4096), b""):
169
- md5.update(chunk)
170
- return md5.hexdigest()
171
- except Exception:
172
- if raise_exception is True:
173
- raise
174
- return ""
175
-
176
-
177
- def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
178
- """
179
- Returns the AWS S3 "etag" for the given file; this value is md5-like but
180
- not the same as a normal md5. We use this to compare that a file in S3
181
- appears to be the exact the same file as a local file.
182
- """
183
- try:
184
- with io.open(file, "rb") as f:
185
- return _compute_file_etag(f)
186
- except Exception:
187
- if raise_exception is True:
188
- raise
189
- return None
190
-
191
-
192
- def _compute_file_etag(f: io.BufferedReader) -> str:
193
- # See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
194
- MULTIPART_THRESHOLD = 8388608
195
- MULTIPART_CHUNKSIZE = 8388608
196
- # BUFFER_SIZE = 1048576
197
- # Verify some assumptions are correct
198
- # assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
199
- # assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
200
- # assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
201
- hash = hashlib.md5()
202
- read = 0
203
- chunks = None
204
- while True:
205
- # Read some from stdin, if we're at the end, stop reading
206
- bits = f.read(1048576)
207
- if len(bits) == 0:
208
- break
209
- read += len(bits)
210
- hash.update(bits)
211
- if chunks is None:
212
- # We're handling a multi-part upload, so switch to calculating
213
- # hashes of each chunk
214
- if read >= MULTIPART_THRESHOLD:
215
- chunks = b''
216
- if chunks is not None:
217
- if (read % MULTIPART_CHUNKSIZE) == 0:
218
- # Dont with a chunk, add it to the list of hashes to hash later
219
- chunks += hash.digest()
220
- hash = hashlib.md5()
221
- if chunks is None:
222
- # Normal upload, just output the MD5 hash
223
- etag = hash.hexdigest()
224
- else:
225
- # Multipart upload, need to output the hash of the hashes
226
- if (read % MULTIPART_CHUNKSIZE) != 0:
227
- # Add the last part if we have a partial chunk
228
- chunks += hash.digest()
229
- etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
230
- return etag
231
-
232
-
233
- def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
234
- nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
235
- """
236
- Write to the given file (name/path) some random content. If the given file is None then writes
237
- to a temporary file. In either case, returns the file written to. The of bytes written is 1024
238
- by default be can be specified with the nbytes argument; default to writing ASCII text but if
239
- the binary argument is True then writes binary data as well; if not binary the content is in
240
- lines of 80 characters each; use the line_length argumetn in this case to change the line length.
241
- """
242
- if not isinstance(nbytes, int) or nbytes < 0:
243
- nbytes = 0
244
- if not isinstance(file, str) or not file:
245
- if not isinstance(prefix, str):
246
- prefix = ""
247
- if not isinstance(suffix, str):
248
- suffix = ""
249
- file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
250
- file = os.path.join(get_temporary_directory(), file)
251
- with open(file, "wb" if binary is True else "w") as f:
252
- if binary is True:
253
- f.write(os.urandom(nbytes))
254
- else:
255
- if (not isinstance(line_length, int)) or (line_length < 1):
256
- line_length = 80
257
- line_length += 1
258
- nlines = nbytes // line_length
259
- nremainder = nbytes % line_length
260
- for n in range(nlines):
261
- f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
262
- f.write("\n")
263
- if nremainder > 1:
264
- f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
265
- if nremainder > 0:
266
- f.write("\n")
267
- return file
@@ -1,39 +0,0 @@
1
- from contextlib import contextmanager
2
- import requests
3
- from typing import Callable, Optional
4
- from dcicutils.tmpfile_utils import temporary_file
5
-
6
-
7
- @contextmanager
8
- def download(url: str, suffix: Optional[str] = None, binary: bool = True,
9
- progress: Optional[Callable] = None) -> Optional[str]:
10
- """
11
- Context manager to download the given URL into a temporary file and yields the file
12
- path to it. An optional file suffix may be specified for this temporary file name.
13
- Defaults to binary file mode; if not desired then pass False as the binary argument.
14
- """
15
- with temporary_file(suffix=suffix) as file:
16
- download_to(url, file, binary=binary, progress=progress)
17
- yield file
18
-
19
-
20
- def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
21
- """
22
- Download the given URL into the given file. Defaults to binary
23
- file mode; if not desired then pass False as the binary argument.
24
- """
25
- if not callable(progress):
26
- progress = None
27
- response = requests.get(url, stream=True)
28
- if progress:
29
- nbytes = 0
30
- nbytes_total = None
31
- if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
32
- nbytes_total = int(content_length)
33
- with open(file, "wb" if binary is True else "w") as f:
34
- for chunk in response.iter_content(chunk_size=8192):
35
- if chunk:
36
- f.write(chunk)
37
- if progress:
38
- nbytes += len(chunk)
39
- progress(nbytes, nbytes_total)
@@ -1,74 +0,0 @@
1
- from contextlib import contextmanager
2
- from datetime import datetime
3
- import os
4
- import shutil
5
- import tempfile
6
- from uuid import uuid4 as uuid
7
- from typing import List, Optional, Union
8
- from dcicutils.file_utils import create_random_file
9
-
10
-
11
- @contextmanager
12
- def temporary_directory() -> str:
13
- try:
14
- with tempfile.TemporaryDirectory() as tmp_directory_name:
15
- yield tmp_directory_name
16
- finally:
17
- remove_temporary_directory(tmp_directory_name)
18
-
19
-
20
- @contextmanager
21
- def temporary_file(name: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
22
- content: Optional[Union[str, bytes, List[str]]] = None) -> str:
23
- with temporary_directory() as tmp_directory_name:
24
- tmp_file_name = f"{prefix or ''}{name or tempfile.mktemp(dir='')}{suffix or ''}"
25
- tmp_file_path = os.path.join(tmp_directory_name, tmp_file_name)
26
- with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
27
- if content is not None:
28
- tmp_file.write("\n".join(content) if isinstance(content, list) else content)
29
- yield tmp_file_path
30
-
31
-
32
- def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
33
- """
34
- Generates and returns the full path to file within the system temporary directory.
35
- """
36
- random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
37
- tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
38
- return os.path.join(tempfile.gettempdir(), tmp_file_name)
39
-
40
-
41
- @contextmanager
42
- def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
43
- nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
44
- with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
45
- create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
46
- yield tmp_file_path
47
-
48
-
49
- def remove_temporary_directory(tmp_directory_name: str) -> None:
50
- """
51
- Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
52
- """
53
- def is_temporary_directory(path: str) -> bool:
54
- try:
55
- tmpdir = tempfile.gettempdir()
56
- return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
57
- except Exception:
58
- return False
59
- if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
60
- shutil.rmtree(tmp_directory_name)
61
-
62
-
63
- def remove_temporary_file(tmp_file_name: str) -> bool:
64
- """
65
- Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
66
- """
67
- try:
68
- tmpdir = tempfile.gettempdir()
69
- if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
70
- os.remove(tmp_file_name)
71
- return True
72
- return False
73
- except Exception:
74
- return False
File without changes