dcicutils 7.10.0.2b13__py3-none-any.whl → 7.11.0.1b9__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
dcicutils/sheet_utils.py CHANGED
@@ -8,18 +8,19 @@ import json
8
8
  import openpyxl
9
9
  import os
10
10
  import re
11
+ import subprocess
11
12
  import uuid
12
13
  import yaml
13
14
 
14
15
  from openpyxl.worksheet.worksheet import Worksheet
15
16
  from openpyxl.workbook.workbook import Workbook
16
- from tempfile import TemporaryFile
17
+ from tempfile import TemporaryFile, TemporaryDirectory
17
18
  from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
18
19
  from .common import AnyJsonData
19
20
  from .env_utils import public_env_name, EnvUtils
20
21
  from .ff_utils import get_schema
21
22
  from .lang_utils import conjoined_list, disjoined_list, maybe_pluralize, there_are
22
- from .misc_utils import ignored, PRINT, pad_to, JsonLinesReader, AbstractVirtualApp
23
+ from .misc_utils import ignored, PRINT, pad_to, JsonLinesReader, AbstractVirtualApp, remove_suffix
23
24
  from .task_utils import pmap
24
25
 
25
26
 
@@ -1011,6 +1012,53 @@ class TsvItemManager(ItemManagerMixin, TsvManager):
1011
1012
  pass
1012
1013
 
1013
1014
 
1015
+ def _do_shell_command(command, cwd=None):
1016
+ # This might need to be more elaborate, but hopefully it will do for now. -kmp 11-Sep-2023
1017
+ subprocess.check_output(command, cwd=cwd)
1018
+
1019
+
1020
+ @contextlib.contextmanager
1021
+ def maybe_unpack(filename): # Maybe move to another module
1022
+ """
1023
+ If necessary, unpack a file that is zipped and/or tarred, yielding the name of the file (unpacked or not).
1024
+ """
1025
+ unpackables = ['.tar.gz', '.tar', '.tgz', '.gz', '.zip']
1026
+ ext = None
1027
+ for unpackable in unpackables:
1028
+ if filename.endswith(unpackable):
1029
+ ext = unpackable
1030
+ break
1031
+ if not ext:
1032
+ yield filename
1033
+ return
1034
+ if not os.path.exists(filename):
1035
+ # We don't bother to raise this error if we're not planning to do any unpacking.
1036
+ # The caller can decide if/when such errors are needed in that case.
1037
+ # But if we are going to have to move bits around, they'll need to actually be there.
1038
+ # -kmp 12-Sep-2023
1039
+ raise ValueError(f"The file {filename!r} does not exist.")
1040
+ target_base_part = remove_suffix(ext, os.path.basename(filename), required=True)
1041
+ target_ext = '.tar.gz' if ext == '.tgz' else ext
1042
+ with TemporaryDirectory() as temp_dir:
1043
+ temp_base = os.path.join(temp_dir, target_base_part)
1044
+ temp_filename = temp_base + target_ext
1045
+ _do_shell_command(['cp', filename, temp_filename])
1046
+ if temp_filename.endswith('.gz'):
1047
+ _do_shell_command(['gunzip', temp_filename], cwd=temp_dir)
1048
+ temp_filename = remove_suffix('.gz', temp_filename)
1049
+ elif temp_filename.endswith(".zip"):
1050
+ _do_shell_command(['unzip', temp_filename], cwd=temp_dir)
1051
+ temp_filename = remove_suffix('.zip', temp_filename)
1052
+ if temp_filename.endswith(".tar"):
1053
+ _do_shell_command(['tar', '-xf', temp_filename], cwd=temp_dir)
1054
+ tar_file = temp_filename
1055
+ temp_filename = remove_suffix(".tar", temp_filename, required=True)
1056
+ if not os.path.isdir(temp_filename):
1057
+ raise Exception(f"{tar_file} didn't unpack to a dir: {temp_filename}")
1058
+ # print(f"Unpacked {filename} to {temp_filename}")
1059
+ yield temp_filename
1060
+
1061
+
1014
1062
  class TableSetManager(AbstractTableSetManager):
1015
1063
  """
1016
1064
  This class will open a .xlsx or .csv file and load its content in our standard format.
@@ -1031,8 +1079,10 @@ class TableSetManager(AbstractTableSetManager):
1031
1079
  """
1032
1080
  Given a filename and various options
1033
1081
  """
1034
- manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping, **kwargs)
1035
- return manager.load_content()
1082
+ with maybe_unpack(filename) as filename:
1083
+ manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
1084
+ **kwargs)
1085
+ return manager.load_content()
1036
1086
 
1037
1087
 
1038
1088
  class ItemManager(AbstractTableSetManager):
@@ -1067,11 +1117,14 @@ class ItemManager(AbstractTableSetManager):
1067
1117
  :param portal_env: A portal to consult to find schemas (usually if calling from the outside of a portal).
1068
1118
  :param portal_vapp: A vapp to use (usually if calling from within a portal).
1069
1119
  """
1070
- manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
1071
- schemas=schemas, autoload_schemas=autoload_schemas,
1072
- portal_env=portal_env, portal_vapp=portal_vapp,
1073
- **kwargs)
1074
- return manager.load_content()
1120
+
1121
+ with maybe_unpack(filename) as filename:
1122
+
1123
+ manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
1124
+ schemas=schemas, autoload_schemas=autoload_schemas,
1125
+ portal_env=portal_env, portal_vapp=portal_vapp,
1126
+ **kwargs)
1127
+ return manager.load_content()
1075
1128
 
1076
1129
 
1077
1130
  load_table_set = TableSetManager.load
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 7.10.0.2b13
3
+ Version: 7.11.0.1b9
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -21,7 +21,7 @@ Classifier: Programming Language :: Python :: 3.8
21
21
  Classifier: Programming Language :: Python :: 3.9
22
22
  Classifier: Topic :: Database :: Database Engines/Servers
23
23
  Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
24
- Requires-Dist: PyYAML (==5.3.1)
24
+ Requires-Dist: PyYAML (>=5.1,<5.5)
25
25
  Requires-Dist: aws-requests-auth (>=0.4.2,<1)
26
26
  Requires-Dist: boto3 (>=1.17.39,<2.0.0)
27
27
  Requires-Dist: botocore (>=1.20.39,<2.0.0)
@@ -43,14 +43,14 @@ dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
43
43
  dcicutils/s3_utils.py,sha256=a9eU3Flh8Asc8xPWLGP16A6UQ_FVwhoFQNqm4ZYgSQ4,28852
44
44
  dcicutils/scripts/publish_to_pypi.py,sha256=qmWyjrg5bNQNfpNKFTZdyMXpRmrECnRV9VmNQddUPQA,13576
45
45
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
46
- dcicutils/sheet_utils.py,sha256=YUJ6AVr3OlWGxo6dEpwHq4bCJSdA0SU_nfFVfDCQZhs,44594
46
+ dcicutils/sheet_utils.py,sha256=bnnefjeTUL4ES7gtqThISXJKeli1AIFryu4h7Dt9dxw,47040
47
47
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
48
48
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
49
49
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
50
50
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
51
51
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
52
- dcicutils-7.10.0.2b13.dist-info/LICENSE.txt,sha256=t0_-jIjqxNnymZoNJe-OltRIuuF8qfhN0ATlHyrUJPk,1102
53
- dcicutils-7.10.0.2b13.dist-info/METADATA,sha256=ilmBL4J-rU8A74M_a8lmXduk84O4grnkg3U13DV93qU,3082
54
- dcicutils-7.10.0.2b13.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
55
- dcicutils-7.10.0.2b13.dist-info/entry_points.txt,sha256=Z3vezbXsTpTIY4N2F33c5e-WDVQxgz_Vsk1oV_JBN7A,146
56
- dcicutils-7.10.0.2b13.dist-info/RECORD,,
52
+ dcicutils-7.11.0.1b9.dist-info/LICENSE.txt,sha256=t0_-jIjqxNnymZoNJe-OltRIuuF8qfhN0ATlHyrUJPk,1102
53
+ dcicutils-7.11.0.1b9.dist-info/METADATA,sha256=MER7N-gDAB5nz6YT51jT7aIu8_rHT2x65FBF5x3DN70,3084
54
+ dcicutils-7.11.0.1b9.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
55
+ dcicutils-7.11.0.1b9.dist-info/entry_points.txt,sha256=Z3vezbXsTpTIY4N2F33c5e-WDVQxgz_Vsk1oV_JBN7A,146
56
+ dcicutils-7.11.0.1b9.dist-info/RECORD,,