dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,7 @@
57
57
 
58
58
  import argparse
59
59
  from functools import lru_cache
60
+ import io
60
61
  import json
61
62
  import pyperclip
62
63
  import os
@@ -97,11 +98,18 @@ def main():
97
98
  help="Include all properties for schema usage.")
98
99
  parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.")
99
100
  parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.")
101
+ parser.add_argument("--post", type=str, required=False, default=None,
102
+ help="POST data of the main arg type with data from file specified with this option.")
103
+ parser.add_argument("--patch", type=str, required=False, default=None,
104
+ help="PATCH data of the main arg type with data from file specified with this option.")
100
105
  parser.add_argument("--database", action="store_true", required=False, default=False,
101
106
  help="Read from database output.")
107
+ parser.add_argument("--bool", action="store_true", required=False,
108
+ default=False, help="Only return whether found or not.")
102
109
  parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.")
103
110
  parser.add_argument("--copy", "-c", action="store_true", required=False, default=False,
104
111
  help="Copy object data to clipboard.")
112
+ parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int)
105
113
  parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.")
106
114
  parser.add_argument("--more-details", action="store_true", required=False, default=False,
107
115
  help="More detailed output.")
@@ -151,6 +159,18 @@ def main():
151
159
  args.schema = True
152
160
 
153
161
  if args.schema:
162
+ if args.post:
163
+ if post_data := _read_json_from_file(args.post):
164
+ if args.verbose:
165
+ _print(f"POSTing data from file ({args.post}) as type: {args.uuid}")
166
+ if isinstance(post_data, dict):
167
+ post_data = [post_data]
168
+ elif not isinstance(post_data, list):
169
+ _print(f"POST data neither list nor dictionary: {args.post}")
170
+ for item in post_data:
171
+ portal.post_metadata(args.uuid, item)
172
+ if args.verbose:
173
+ _print(f"Done POSTing data from file ({args.post}) as type: {args.uuid}")
154
174
  schema, schema_name = _get_schema(portal, args.uuid)
155
175
  if schema:
156
176
  if args.copy:
@@ -166,14 +186,50 @@ def main():
166
186
  _print_schema(schema, details=args.details, more_details=args.details,
167
187
  all=args.all, raw=args.raw, raw_yaml=args.yaml)
168
188
  return
169
-
170
- data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, database=args.database, verbose=args.verbose)
189
+ elif args.patch:
190
+ if patch_data := _read_json_from_file(args.patch):
191
+ if args.verbose:
192
+ _print(f"PATCHing data from file ({args.patch}) for object: {args.uuid}")
193
+ if isinstance(patch_data, dict):
194
+ patch_data = [patch_data]
195
+ elif not isinstance(patch_data, list):
196
+ _print(f"PATCH data neither list nor dictionary: {args.patch}")
197
+ for item in patch_data:
198
+ portal.patch_metadata(args.uuid, item)
199
+ if args.verbose:
200
+ _print(f"Done PATCHing data from file ({args.patch}) as type: {args.uuid}")
201
+ return
202
+ else:
203
+ _print(f"No PATCH data found in file: {args.patch}")
204
+ exit(1)
205
+
206
+ data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw,
207
+ database=args.database, check=args.bool, verbose=args.verbose)
208
+ if args.bool:
209
+ if data:
210
+ _print(f"{args.uuid}: found")
211
+ exit(0)
212
+ else:
213
+ _print(f"{args.uuid}: not found")
214
+ exit(1)
171
215
  if args.copy:
172
216
  pyperclip.copy(json.dumps(data, indent=4))
173
217
  if args.yaml:
174
218
  _print(yaml.dump(data))
175
219
  else:
176
- _print(json.dumps(data, default=str, indent=4))
220
+ if args.indent > 0:
221
+ _print(_format_json_with_indent(data, indent=args.indent))
222
+ else:
223
+ _print(json.dumps(data, default=str, indent=4))
224
+
225
+
226
+ def _format_json_with_indent(value: dict, indent: int = 0) -> Optional[str]:
227
+ if isinstance(value, dict):
228
+ result = json.dumps(value, indent=4)
229
+ if indent > 0:
230
+ result = f"{indent * ' '}{result}"
231
+ result = result.replace("\n", f"\n{indent * ' '}")
232
+ return result
177
233
 
178
234
 
179
235
  def _create_portal(ini: str, env: Optional[str] = None,
@@ -198,7 +254,8 @@ def _create_portal(ini: str, env: Optional[str] = None,
198
254
 
199
255
 
200
256
  def _get_portal_object(portal: Portal, uuid: str,
201
- raw: bool = False, database: bool = False, verbose: bool = False) -> dict:
257
+ raw: bool = False, database: bool = False,
258
+ check: bool = False, verbose: bool = False) -> dict:
202
259
  response = None
203
260
  try:
204
261
  if not uuid.startswith("/"):
@@ -212,13 +269,18 @@ def _get_portal_object(portal: Portal, uuid: str,
212
269
  _exit()
213
270
  _exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}")
214
271
  if not response:
272
+ if check:
273
+ return None
215
274
  _exit(f"Null response getting Portal object from {portal.server}: {uuid}")
216
275
  if response.status_code not in [200, 307]:
217
276
  # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above.
218
277
  _exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}")
219
278
  if not response.json:
220
279
  _exit(f"Invalid JSON getting Portal object: {uuid}")
221
- return response.json()
280
+ response = response.json()
281
+ if raw:
282
+ response.pop("schema_version", None)
283
+ return response
222
284
 
223
285
 
224
286
  @lru_cache(maxsize=1)
@@ -257,6 +319,7 @@ def _print_schema_info(schema: dict, level: int = 0,
257
319
  required: Optional[List[str]] = None) -> None:
258
320
  if not schema or not isinstance(schema, dict):
259
321
  return
322
+ identifying_properties = schema.get("identifyingProperties")
260
323
  if level == 0:
261
324
  if required_properties := schema.get("required"):
262
325
  _print("- required properties:")
@@ -383,6 +446,8 @@ def _print_schema_info(schema: dict, level: int = 0,
383
446
  suffix += f" | enum"
384
447
  if property_required:
385
448
  suffix += f" | required"
449
+ if property_name in (identifying_properties or []):
450
+ suffix += f" | identifying"
386
451
  if property.get("uniqueKey"):
387
452
  suffix += f" | unique"
388
453
  if pattern := property.get("pattern"):
@@ -529,6 +594,23 @@ def _print_tree(root_name: Optional[str],
529
594
  print(line)
530
595
 
531
596
 
597
+ def _read_json_from_file(file: str) -> Optional[dict]:
598
+ if not os.path.exists(file):
599
+ _print(f"Cannot find file: {file}")
600
+ exit(1)
601
+ try:
602
+ with io.open(file, "r") as f:
603
+ try:
604
+ return json.load(f)
605
+ except Exception:
606
+ _print(f"Cannot parse JSON in file: {file}")
607
+ exit(1)
608
+ except Exception as e:
609
+ print(e)
610
+ _print(f"Cannot open file: {file}")
611
+ exit(1)
612
+
613
+
532
614
  def _print(*args, **kwargs):
533
615
  with uncaptured_output():
534
616
  PRINT(*args, **kwargs)
@@ -11,7 +11,6 @@ from webtest.app import TestApp
11
11
  from dcicutils.common import OrchestratedApp
12
12
  from dcicutils.data_readers import CsvReader, Excel, RowReader
13
13
  from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
14
- from dcicutils.file_utils import search_for_file
15
14
  from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
16
15
  merge_objects, remove_empty_properties, right_trim, split_string,
17
16
  to_boolean, to_enum, to_float, to_integer, VirtualApp)
@@ -53,9 +52,10 @@ class StructuredDataSet:
53
52
  def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
54
53
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
55
54
  order: Optional[List[str]] = None, prune: bool = True,
55
+ remove_empty_objects_from_lists: bool = True,
56
56
  ref_lookup_strategy: Optional[Callable] = None,
57
57
  ref_lookup_nocache: bool = False,
58
- norefs: bool = False,
58
+ norefs: bool = False, merge: bool = False,
59
59
  progress: Optional[Callable] = None,
60
60
  debug_sleep: Optional[str] = None) -> None:
61
61
  self._progress = progress if callable(progress) else None
@@ -65,7 +65,8 @@ class StructuredDataSet:
65
65
  ref_lookup_nocache=ref_lookup_nocache) if portal else None
66
66
  self._ref_lookup_strategy = ref_lookup_strategy
67
67
  self._order = order
68
- self._prune = prune
68
+ self._prune = prune is True
69
+ self._remove_empty_objects_from_lists = remove_empty_objects_from_lists is True
69
70
  self._warnings = {}
70
71
  self._errors = {}
71
72
  self._resolved_refs = set()
@@ -73,6 +74,7 @@ class StructuredDataSet:
73
74
  self._nrows = 0
74
75
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
75
76
  self._norefs = True if norefs is True else False
77
+ self._merge = True if merge is True else False
76
78
  self._debug_sleep = None
77
79
  if debug_sleep:
78
80
  try:
@@ -93,14 +95,16 @@ class StructuredDataSet:
93
95
  def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
94
96
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
95
97
  order: Optional[List[str]] = None, prune: bool = True,
98
+ remove_empty_objects_from_lists: bool = True,
96
99
  ref_lookup_strategy: Optional[Callable] = None,
97
100
  ref_lookup_nocache: bool = False,
98
- norefs: bool = False,
101
+ norefs: bool = False, merge: bool = False,
99
102
  progress: Optional[Callable] = None,
100
103
  debug_sleep: Optional[str] = None) -> StructuredDataSet:
101
104
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
105
+ remove_empty_objects_from_lists=remove_empty_objects_from_lists,
102
106
  ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
103
- norefs=norefs, progress=progress, debug_sleep=debug_sleep)
107
+ norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
104
108
 
105
109
  def validate(self, force: bool = False) -> None:
106
110
  def data_without_deleted_properties(data: dict) -> dict:
@@ -204,14 +208,6 @@ class StructuredDataSet:
204
208
  result.append({"type": type_name, "file": file_name})
205
209
  return result
206
210
 
207
- def upload_files_located(self,
208
- location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]:
209
- upload_files = copy.deepcopy(self.upload_files)
210
- for upload_file in upload_files:
211
- if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True):
212
- upload_file["path"] = file_path
213
- return upload_files
214
-
215
211
  @property
216
212
  def nrows(self) -> int:
217
213
  return self._nrows
@@ -346,7 +342,23 @@ class StructuredDataSet:
346
342
 
347
343
  def _load_json_file(self, file: str) -> None:
348
344
  with open(file) as f:
349
- self._add(Schema.type_name(file), json.load(f))
345
+ data = json.load(f)
346
+ if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
347
+ (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
348
+ # If the JSON file name looks like a schema name then assume it
349
+ # contains an object or an array of object of that schema type.
350
+ if self._merge:
351
+ data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name)
352
+ self._add(Schema.type_name(file), data)
353
+ elif isinstance(data, dict):
354
+ # Otherwise if the JSON file name does not look like a schema name then
355
+ # assume it a dictionary where each property is the name of a schema, and
356
+ # which (each property) contains a list of object of that schema type.
357
+ for schema_name in data:
358
+ item = data[schema_name]
359
+ if self._merge:
360
+ item = self._merge_with_existing_portal_object(item, schema_name)
361
+ self._add(schema_name, item)
350
362
 
351
363
  def _load_reader(self, reader: RowReader, type_name: str) -> None:
352
364
  schema = None
@@ -368,7 +380,14 @@ class StructuredDataSet:
368
380
  structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
369
381
  if self._autoadd_properties:
370
382
  self._add_properties(structured_row, self._autoadd_properties, schema)
371
- self._add(type_name, structured_row)
383
+ # New merge functionality (2024-05-25).
384
+ if self._merge:
385
+ structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
386
+ if (prune_error := self._prune_structured_row(structured_row)) is not None:
387
+ self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
388
+ "error": prune_error}, "validation")
389
+ else:
390
+ self._add(type_name, structured_row) # TODO: why type_name and not schema_name?
372
391
  if self._progress:
373
392
  self._progress({
374
393
  PROGRESS.LOAD_ITEM: self._nrows,
@@ -385,9 +404,20 @@ class StructuredDataSet:
385
404
  self._note_error(schema._unresolved_refs, "ref")
386
405
  self._resolved_refs.update(schema._resolved_refs)
387
406
 
388
- def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
389
- if self._prune:
407
+ def _prune_structured_row(self, data: dict) -> Optional[str]:
408
+ if not self._prune:
409
+ return None
410
+ if not self._remove_empty_objects_from_lists:
390
411
  remove_empty_properties(data)
412
+ return None
413
+ try:
414
+ remove_empty_properties(data, isempty_array_element=lambda element: element == {},
415
+ raise_exception_on_nonempty_array_element_after_empty=True)
416
+ except Exception as e:
417
+ return str(e)
418
+ return None
419
+
420
+ def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
391
421
  if type_name in self._data:
392
422
  self._data[type_name].extend([data] if isinstance(data, dict) else data)
393
423
  else:
@@ -398,6 +428,18 @@ class StructuredDataSet:
398
428
  if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
399
429
  structured_row[name] = properties[name]
400
430
 
431
+ def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
432
+ """
433
+ Given a Portal object (presumably/in-practice from the given metadata), if there is
434
+ an existing Portal item, identified by the identifying properties for the given object,
435
+ then merges the given object into the existing one and returns the result; otherwise
436
+ just returns the given object. Note that the given object may be CHANGED in place.
437
+ """
438
+ for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
439
+ if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
440
+ return merge_objects(existing_portal_object, portal_object)
441
+ return portal_object
442
+
401
443
  def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
402
444
  return (ref_lookup_flags &
403
445
  Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
@@ -2,39 +2,45 @@ import re
2
2
  from typing import Optional
3
3
  from dcicutils.structured_data import Portal
4
4
 
5
+ # This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
6
+ # before it was fully developed, we had differing behaviors; but this has been unified; so this
7
+ # could now be internalized to structured_data, and portal_object_utils (TODO).
8
+
5
9
 
6
10
  def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
7
11
  #
8
- # FYI: Note this situation WRT object lookups ...
9
- #
10
- # /{submitted_id} # NOT FOUND
11
- # /UnalignedReads/{submitted_id} # OK
12
- # /SubmittedFile/{submitted_id} # OK
13
- # /File/{submitted_id} # NOT FOUND
14
- #
15
- # /{accession} # OK
16
- # /UnalignedReads/{accession} # NOT FOUND
17
- # /SubmittedFile/{accession} # NOT FOUND
18
- # /File/{accession} # OK
12
+ # Note this slight odd situation WRT object lookups by submitted_id and accession:
13
+ # -----------------------------+-----------------------------------------------+---------------+
14
+ # PATH | EXAMPLE | LOOKUP RESULT |
15
+ # -----------------------------+-----------------------------------------------+---------------+
16
+ # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
17
+ # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
18
+ # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
19
+ # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
20
+ # -----------------------------+-----------------------------------------------+---------------+
21
+ # /accession | /SMAFSFXF1RO4 | FOUND |
22
+ # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
23
+ # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
24
+ # /File/accession | /File/SMAFSFXF1RO4 | FOUND |
25
+ # -----------------------------+-----------------------------------------------+---------------+
19
26
  #
20
27
  def ref_validator(schema: Optional[dict],
21
28
  property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
22
29
  """
23
- Returns False iff the type represented by the given schema, can NOT be referenced by
24
- the given property name with the given property value, otherwise returns None.
30
+ Returns False iff objects of type represented by the given schema, CANNOT be referenced with
31
+ a Portal path using the given property name and its given property value, otherwise returns None.
25
32
 
26
- For example, if the schema is for the UnalignedReads type and the property name
27
- is accession, then we will return False iff the given property value is NOT a properly
28
- formatted accession ID. Otherwise, we will return None, which indicates that the
29
- caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing
30
- its default behavior, which is to check other ways in which the given type can NOT
31
- be referenced by the given value, i.e. it checks other identifying properties for
32
- the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
33
+ For example, if the schema is for UnalignedReads and the property name is accession, then we will
34
+ return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
35
+ will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
36
+ will continue executing its default behavior, which is to check other ways in which the given type
37
+ CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
38
+ and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
33
39
 
34
- The goal (in structured_data) being to detect if a type is being referenced in such
35
- a way that cannot possibly be allowed, i.e. because none of its identifying types
36
- are in the required form (if indeed there any requirements). Note that it is guaranteed
37
- that the given property name is indeed an identifying property for the given type.
40
+ The goal (in structured_data) being to detect if a type is being referenced in such a way that
41
+ CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
42
+ if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
43
+ identifying property for the given type.
38
44
  """
39
45
  if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
40
46
  if (property_format == "accession") and (property_name == "accession"):
@@ -62,6 +68,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
62
68
 
63
69
 
64
70
  # This is here for now because of problems with circular dependencies.
65
- # See: smaht-portal/.../schema_formats.py
71
+ # See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
66
72
  def _is_accession_id(value: str) -> bool:
67
73
  return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
@@ -1,8 +1,11 @@
1
1
  from contextlib import contextmanager
2
+ from datetime import datetime
2
3
  import os
3
4
  import shutil
4
5
  import tempfile
6
+ from uuid import uuid4 as uuid
5
7
  from typing import List, Optional, Union
8
+ from dcicutils.file_utils import create_random_file
6
9
 
7
10
 
8
11
  @contextmanager
@@ -15,22 +18,59 @@ def temporary_directory() -> str:
15
18
 
16
19
 
17
20
  @contextmanager
18
- def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
21
+ def temporary_file(name: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
19
22
  content: Optional[Union[str, bytes, List[str]]] = None) -> str:
20
23
  with temporary_directory() as tmp_directory_name:
21
- tmp_file_name = os.path.join(tmp_directory_name, name or tempfile.mktemp(dir="")) + (suffix or "")
22
- with open(tmp_file_name, "wb" if isinstance(content, bytes) else "w") as tmp_file:
24
+ tmp_file_name = f"{prefix or ''}{name or tempfile.mktemp(dir='')}{suffix or ''}"
25
+ tmp_file_path = os.path.join(tmp_directory_name, tmp_file_name)
26
+ with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
23
27
  if content is not None:
24
28
  tmp_file.write("\n".join(content) if isinstance(content, list) else content)
25
- yield tmp_file_name
29
+ yield tmp_file_path
30
+
31
+
32
+ def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
33
+ """
34
+ Generates and returns the full path to file within the system temporary directory.
35
+ """
36
+ random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
37
+ tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
38
+ return os.path.join(tempfile.gettempdir(), tmp_file_name)
39
+
40
+
41
+ @contextmanager
42
+ def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
43
+ nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
44
+ with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
45
+ create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
46
+ yield tmp_file_path
26
47
 
27
48
 
28
49
  def remove_temporary_directory(tmp_directory_name: str) -> None:
29
- def is_temporary_directory(path: str) -> bool:
30
- try:
31
- tmpdir = tempfile.gettempdir()
32
- return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
33
- except Exception:
34
- return False
50
+ """
51
+ Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
52
+ """
35
53
  if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
36
54
  shutil.rmtree(tmp_directory_name)
55
+
56
+
57
+ def remove_temporary_file(tmp_file_name: str) -> bool:
58
+ """
59
+ Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
60
+ """
61
+ try:
62
+ tmpdir = tempfile.gettempdir()
63
+ if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
64
+ os.remove(tmp_file_name)
65
+ return True
66
+ return False
67
+ except Exception:
68
+ return False
69
+
70
+
71
+ def is_temporary_directory(path: str) -> bool:
72
+ try:
73
+ tmpdir = tempfile.gettempdir()
74
+ return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
75
+ except Exception:
76
+ return False
dcicutils/zip_utils.py CHANGED
@@ -2,7 +2,9 @@ from contextlib import contextmanager
2
2
  from dcicutils.tmpfile_utils import temporary_directory, temporary_file
3
3
  import gzip
4
4
  import os
5
+ import shutil
5
6
  import tarfile
7
+ import tempfile
6
8
  from typing import List, Optional
7
9
  import zipfile
8
10
 
@@ -45,3 +47,28 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
45
47
  outputf.write(inputf.read())
46
48
  outputf.close()
47
49
  yield tmp_file_name
50
+
51
+
52
+ def extract_file_from_zip(zip_file: str, file_to_extract: str,
53
+ destination_file: str, raise_exception: bool = True) -> bool:
54
+ """
55
+ Extracts from the given zip file, the given file to extract, writing it to the
56
+ given destination file. Returns True if all is well, otherwise False, or if the
57
+ raise_exception argument is True (the default), then raises and exception on error.
58
+ """
59
+ try:
60
+ if not (destination_directory := os.path.dirname(destination_file)):
61
+ destination_directory = os.getcwd()
62
+ destination_file = os.path.join(destination_directory, destination_file)
63
+ with tempfile.TemporaryDirectory() as tmp_directory_name:
64
+ with zipfile.ZipFile(zip_file, "r") as zipf:
65
+ if file_to_extract not in zipf.namelist():
66
+ return False
67
+ zipf.extract(file_to_extract, path=tmp_directory_name)
68
+ os.makedirs(destination_directory, exist_ok=True)
69
+ shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
70
+ return True
71
+ except Exception as e:
72
+ if raise_exception:
73
+ raise e
74
+ return False
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.9.0.0b0
3
+ Version: 8.9.0.1b1
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
7
7
  Author: 4DN-DCIC Team
8
8
  Author-email: support@4dnucleome.org
9
- Requires-Python: >=3.8,<3.12
9
+ Requires-Python: >=3.8,<3.13
10
10
  Classifier: Development Status :: 4 - Beta
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Intended Audience :: Science/Research
@@ -24,9 +24,10 @@ Classifier: Programming Language :: Python :: 3.9
24
24
  Classifier: Topic :: Database :: Database Engines/Servers
25
25
  Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
26
26
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
27
+ Requires-Dist: appdirs (>=1.4.4,<2.0.0)
27
28
  Requires-Dist: aws-requests-auth (>=0.4.2,<1)
28
- Requires-Dist: boto3 (>=1.28.57,<2.0.0)
29
- Requires-Dist: botocore (>=1.31.57,<2.0.0)
29
+ Requires-Dist: boto3 (>=1.34.93,<2.0.0)
30
+ Requires-Dist: botocore (>=1.34.93,<2.0.0)
30
31
  Requires-Dist: chardet (>=5.2.0,<6.0.0)
31
32
  Requires-Dist: docker (>=4.4.4,<5.0.0)
32
33
  Requires-Dist: elasticsearch (==7.13.4)
@@ -42,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
42
43
  Requires-Dist: redis (>=4.5.1,<5.0.0)
43
44
  Requires-Dist: requests (>=2.21.0,<3.0.0)
44
45
  Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
46
+ Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
45
47
  Requires-Dist: structlog (>=19.2.0,<20.0.0)
46
48
  Requires-Dist: toml (>=0.10.1,<1)
47
49
  Requires-Dist: tqdm (>=4.66.2,<5.0.0)
@@ -5,11 +5,11 @@ dcicutils/bundle_utils.py,sha256=ZVQcqlt7Yly8-YbL3A-5DW859_hMWpTL6dXtknEYZIw,346
5
5
  dcicutils/captured_output.py,sha256=0hP7sPwleMaYXQAvCfJOxG8Z8T_JJYy8ADp8A5ZoblE,3295
6
6
  dcicutils/cloudformation_utils.py,sha256=MtWJrSTXyiImgbPHgRvfH9bWso20ZPLTFJAfhDQSVj4,13786
7
7
  dcicutils/codebuild_utils.py,sha256=CKpmhJ-Z8gYbkt1I2zyMlKtFdsg7T8lqrx3V5ieta-U,1155
8
- dcicutils/command_utils.py,sha256=JExll5TMqIcmuiGvuS8q4XDUvoEfi2oSH0E2FVF6suU,15285
8
+ dcicutils/command_utils.py,sha256=1_h18LGX86sLAkRkH33HNmBkwMb7v2wAh3jL01hzceU,18487
9
9
  dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
10
10
  dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
11
11
  dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
12
- dcicutils/creds_utils.py,sha256=xrLekD49Ex0GOpL9n7LlJA4gvNcY7txTVFOSYD7LvEU,11113
12
+ dcicutils/creds_utils.py,sha256=64BbIfS90T1eJmmQJrDyfrRa3V2F1x7T8lOrEeFfqJY,11127
13
13
  dcicutils/data_readers.py,sha256=6EMrY7TjDE8H7bA_TCWtpLQP7slJ0YTL77_dNh6e7sg,7626
14
14
  dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
15
15
  dcicutils/datetime_utils.py,sha256=sM653aw_1zy1qBmfAH-WetCi2Fw9cnFK7FZN_Tg4onI,13499
@@ -27,10 +27,11 @@ dcicutils/env_utils_legacy.py,sha256=J81OAtJHN69o1beHO6q1j7_J6TeblSjnAHlS8VA5KSM
27
27
  dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
28
28
  dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
29
29
  dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
30
- dcicutils/ff_utils.py,sha256=Yf-fET5gdpjrH0gikpOCIJdY2Dv3obzUpR31ur816mU,72972
31
- dcicutils/file_utils.py,sha256=098rXvLeIh8n69EGW7DpOS227ef3BPgwhRAktoU6mhE,2663
30
+ dcicutils/ff_utils.py,sha256=oIhuZPnGtfwj6bWyCc1u23JbMB_6InPp01ZqUOljd8M,73123
31
+ dcicutils/file_utils.py,sha256=zyNdRl1Fu3SrQwjJWaIMvQpi4DRaodNZCX7oTkiPJ-A,10916
32
32
  dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
33
33
  dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
34
+ dcicutils/http_utils.py,sha256=tNfH5JA-OwbQKEvD5HPJ3lcp2TSIZ4rnl__4d4JO8Gw,1583
34
35
  dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
35
36
  dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
36
37
  dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
@@ -43,11 +44,11 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
43
44
  dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
44
45
  dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
45
46
  dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
46
- dcicutils/misc_utils.py,sha256=YH_TTmv6ABWeMERwVvA2-rIfdS-CoPYLXJru9TvWxgM,104610
47
+ dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
47
48
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
48
49
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
49
- dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
50
- dcicutils/portal_utils.py,sha256=DYyE5o15GekDgzpJWas9iS7klAYbjJZUPW0G42McArk,30779
50
+ dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
51
+ dcicutils/portal_utils.py,sha256=cDMaqEW3aSDwhjJlsaVS4yEpYsmWo6yVXnLA9f4J_JY,44621
51
52
  dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
52
53
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
53
54
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
@@ -55,25 +56,25 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
55
56
  dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
56
57
  dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
57
58
  dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
58
- dcicutils/schema_utils.py,sha256=2hOzuGK7F8xZ7JyS7_Lan2wXOlNZezzT2lqgEs3QOe4,10605
59
+ dcicutils/schema_utils.py,sha256=IIteRrg-iOJOFU17n2lvKByVdWdiMfuAQ1kf_QIM96Q,10604
59
60
  dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
60
61
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
61
- dcicutils/scripts/view_portal_object.py,sha256=Cy-8GwGJS9EX-5RxE8mjsqNlDT0N6OCpkNffPVkTFQc,26262
62
+ dcicutils/scripts/view_portal_object.py,sha256=HZzM44BDcGycO9XTOTZyP-F7PRMZaZrnFfiqiT7Qvqg,29777
62
63
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
63
64
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
64
65
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
65
66
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
66
- dcicutils/structured_data.py,sha256=BQuIMv6OPySsn6YxtXE2Er-zLE2QJuCYhEQ3V0u_UXY,61238
67
+ dcicutils/structured_data.py,sha256=GfG96VyFwQIkmUax-ZdEzbWnfEiJxmeZEaUWz78IQZY,64030
67
68
  dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
68
- dcicutils/submitr/ref_lookup_strategy.py,sha256=Js2cVznTmgjciLWBPLCvMiwLIHXjDn3jww-gJPjYuFw,3467
69
+ dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
69
70
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
70
- dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
71
+ dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
71
72
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
72
73
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
73
74
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
74
- dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
75
- dcicutils-8.9.0.0b0.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
76
- dcicutils-8.9.0.0b0.dist-info/METADATA,sha256=sHJ_jTCTbZwTy6AoI9BSixIfwZDxdntJvQmTy5keWnI,3356
77
- dcicutils-8.9.0.0b0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
78
- dcicutils-8.9.0.0b0.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
79
- dcicutils-8.9.0.0b0.dist-info/RECORD,,
75
+ dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
76
+ dcicutils-8.9.0.1b1.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
+ dcicutils-8.9.0.1b1.dist-info/METADATA,sha256=ZOYWkIhpaTpYiARRhbr5PWlK4Qxi9wnsoQibYG2qGjQ,3439
78
+ dcicutils-8.9.0.1b1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
+ dcicutils-8.9.0.1b1.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
+ dcicutils-8.9.0.1b1.dist-info/RECORD,,