dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -57,6 +57,7 @@
57
57
 
58
58
  import argparse
59
59
  from functools import lru_cache
60
+ import io
60
61
  import json
61
62
  import pyperclip
62
63
  import os
@@ -97,11 +98,18 @@ def main():
97
98
  help="Include all properties for schema usage.")
98
99
  parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.")
99
100
  parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.")
101
+ parser.add_argument("--post", type=str, required=False, default=None,
102
+ help="POST data of the main arg type with data from file specified with this option.")
103
+ parser.add_argument("--patch", type=str, required=False, default=None,
104
+ help="PATCH data of the main arg type with data from file specified with this option.")
100
105
  parser.add_argument("--database", action="store_true", required=False, default=False,
101
106
  help="Read from database output.")
107
+ parser.add_argument("--bool", action="store_true", required=False,
108
+ default=False, help="Only return whether found or not.")
102
109
  parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.")
103
110
  parser.add_argument("--copy", "-c", action="store_true", required=False, default=False,
104
111
  help="Copy object data to clipboard.")
112
+ parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int)
105
113
  parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.")
106
114
  parser.add_argument("--more-details", action="store_true", required=False, default=False,
107
115
  help="More detailed output.")
@@ -151,6 +159,18 @@ def main():
151
159
  args.schema = True
152
160
 
153
161
  if args.schema:
162
+ if args.post:
163
+ if post_data := _read_json_from_file(args.post):
164
+ if args.verbose:
165
+ _print(f"POSTing data from file ({args.post}) as type: {args.uuid}")
166
+ if isinstance(post_data, dict):
167
+ post_data = [post_data]
168
+ elif not isinstance(post_data, list):
169
+ _print(f"POST data neither list nor dictionary: {args.post}")
170
+ for item in post_data:
171
+ portal.post_metadata(args.uuid, item)
172
+ if args.verbose:
173
+ _print(f"Done POSTing data from file ({args.post}) as type: {args.uuid}")
154
174
  schema, schema_name = _get_schema(portal, args.uuid)
155
175
  if schema:
156
176
  if args.copy:
@@ -166,14 +186,50 @@ def main():
166
186
  _print_schema(schema, details=args.details, more_details=args.details,
167
187
  all=args.all, raw=args.raw, raw_yaml=args.yaml)
168
188
  return
169
-
170
- data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, database=args.database, verbose=args.verbose)
189
+ elif args.patch:
190
+ if patch_data := _read_json_from_file(args.patch):
191
+ if args.verbose:
192
+ _print(f"PATCHing data from file ({args.patch}) for object: {args.uuid}")
193
+ if isinstance(patch_data, dict):
194
+ patch_data = [patch_data]
195
+ elif not isinstance(patch_data, list):
196
+ _print(f"PATCH data neither list nor dictionary: {args.patch}")
197
+ for item in patch_data:
198
+ portal.patch_metadata(args.uuid, item)
199
+ if args.verbose:
200
+ _print(f"Done PATCHing data from file ({args.patch}) as type: {args.uuid}")
201
+ return
202
+ else:
203
+ _print(f"No PATCH data found in file: {args.patch}")
204
+ exit(1)
205
+
206
+ data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw,
207
+ database=args.database, check=args.bool, verbose=args.verbose)
208
+ if args.bool:
209
+ if data:
210
+ _print(f"{args.uuid}: found")
211
+ exit(0)
212
+ else:
213
+ _print(f"{args.uuid}: not found")
214
+ exit(1)
171
215
  if args.copy:
172
216
  pyperclip.copy(json.dumps(data, indent=4))
173
217
  if args.yaml:
174
218
  _print(yaml.dump(data))
175
219
  else:
176
- _print(json.dumps(data, default=str, indent=4))
220
+ if args.indent > 0:
221
+ _print(_format_json_with_indent(data, indent=args.indent))
222
+ else:
223
+ _print(json.dumps(data, default=str, indent=4))
224
+
225
+
226
+ def _format_json_with_indent(value: dict, indent: int = 0) -> Optional[str]:
227
+ if isinstance(value, dict):
228
+ result = json.dumps(value, indent=4)
229
+ if indent > 0:
230
+ result = f"{indent * ' '}{result}"
231
+ result = result.replace("\n", f"\n{indent * ' '}")
232
+ return result
177
233
 
178
234
 
179
235
  def _create_portal(ini: str, env: Optional[str] = None,
@@ -198,7 +254,8 @@ def _create_portal(ini: str, env: Optional[str] = None,
198
254
 
199
255
 
200
256
  def _get_portal_object(portal: Portal, uuid: str,
201
- raw: bool = False, database: bool = False, verbose: bool = False) -> dict:
257
+ raw: bool = False, database: bool = False,
258
+ check: bool = False, verbose: bool = False) -> dict:
202
259
  response = None
203
260
  try:
204
261
  if not uuid.startswith("/"):
@@ -212,13 +269,18 @@ def _get_portal_object(portal: Portal, uuid: str,
212
269
  _exit()
213
270
  _exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}")
214
271
  if not response:
272
+ if check:
273
+ return None
215
274
  _exit(f"Null response getting Portal object from {portal.server}: {uuid}")
216
275
  if response.status_code not in [200, 307]:
217
276
  # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above.
218
277
  _exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}")
219
278
  if not response.json:
220
279
  _exit(f"Invalid JSON getting Portal object: {uuid}")
221
- return response.json()
280
+ response = response.json()
281
+ if raw:
282
+ response.pop("schema_version", None)
283
+ return response
222
284
 
223
285
 
224
286
  @lru_cache(maxsize=1)
@@ -257,6 +319,7 @@ def _print_schema_info(schema: dict, level: int = 0,
257
319
  required: Optional[List[str]] = None) -> None:
258
320
  if not schema or not isinstance(schema, dict):
259
321
  return
322
+ identifying_properties = schema.get("identifyingProperties")
260
323
  if level == 0:
261
324
  if required_properties := schema.get("required"):
262
325
  _print("- required properties:")
@@ -383,6 +446,8 @@ def _print_schema_info(schema: dict, level: int = 0,
383
446
  suffix += f" | enum"
384
447
  if property_required:
385
448
  suffix += f" | required"
449
+ if property_name in (identifying_properties or []):
450
+ suffix += f" | identifying"
386
451
  if property.get("uniqueKey"):
387
452
  suffix += f" | unique"
388
453
  if pattern := property.get("pattern"):
@@ -529,6 +594,23 @@ def _print_tree(root_name: Optional[str],
529
594
  print(line)
530
595
 
531
596
 
597
+ def _read_json_from_file(file: str) -> Optional[dict]:
598
+ if not os.path.exists(file):
599
+ _print(f"Cannot find file: {file}")
600
+ exit(1)
601
+ try:
602
+ with io.open(file, "r") as f:
603
+ try:
604
+ return json.load(f)
605
+ except Exception:
606
+ _print(f"Cannot parse JSON in file: {file}")
607
+ exit(1)
608
+ except Exception as e:
609
+ print(e)
610
+ _print(f"Cannot open file: {file}")
611
+ exit(1)
612
+
613
+
532
614
  def _print(*args, **kwargs):
533
615
  with uncaptured_output():
534
616
  PRINT(*args, **kwargs)
@@ -11,7 +11,6 @@ from webtest.app import TestApp
11
11
  from dcicutils.common import OrchestratedApp
12
12
  from dcicutils.data_readers import CsvReader, Excel, RowReader
13
13
  from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
14
- from dcicutils.file_utils import search_for_file
15
14
  from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
16
15
  merge_objects, remove_empty_properties, right_trim, split_string,
17
16
  to_boolean, to_enum, to_float, to_integer, VirtualApp)
@@ -53,9 +52,10 @@ class StructuredDataSet:
53
52
  def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
54
53
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
55
54
  order: Optional[List[str]] = None, prune: bool = True,
55
+ remove_empty_objects_from_lists: bool = True,
56
56
  ref_lookup_strategy: Optional[Callable] = None,
57
57
  ref_lookup_nocache: bool = False,
58
- norefs: bool = False,
58
+ norefs: bool = False, merge: bool = False,
59
59
  progress: Optional[Callable] = None,
60
60
  debug_sleep: Optional[str] = None) -> None:
61
61
  self._progress = progress if callable(progress) else None
@@ -65,7 +65,8 @@ class StructuredDataSet:
65
65
  ref_lookup_nocache=ref_lookup_nocache) if portal else None
66
66
  self._ref_lookup_strategy = ref_lookup_strategy
67
67
  self._order = order
68
- self._prune = prune
68
+ self._prune = prune is True
69
+ self._remove_empty_objects_from_lists = remove_empty_objects_from_lists is True
69
70
  self._warnings = {}
70
71
  self._errors = {}
71
72
  self._resolved_refs = set()
@@ -73,6 +74,7 @@ class StructuredDataSet:
73
74
  self._nrows = 0
74
75
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
75
76
  self._norefs = True if norefs is True else False
77
+ self._merge = True if merge is True else False
76
78
  self._debug_sleep = None
77
79
  if debug_sleep:
78
80
  try:
@@ -93,14 +95,16 @@ class StructuredDataSet:
93
95
  def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
94
96
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
95
97
  order: Optional[List[str]] = None, prune: bool = True,
98
+ remove_empty_objects_from_lists: bool = True,
96
99
  ref_lookup_strategy: Optional[Callable] = None,
97
100
  ref_lookup_nocache: bool = False,
98
- norefs: bool = False,
101
+ norefs: bool = False, merge: bool = False,
99
102
  progress: Optional[Callable] = None,
100
103
  debug_sleep: Optional[str] = None) -> StructuredDataSet:
101
104
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
105
+ remove_empty_objects_from_lists=remove_empty_objects_from_lists,
102
106
  ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
103
- norefs=norefs, progress=progress, debug_sleep=debug_sleep)
107
+ norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
104
108
 
105
109
  def validate(self, force: bool = False) -> None:
106
110
  def data_without_deleted_properties(data: dict) -> dict:
@@ -204,14 +208,6 @@ class StructuredDataSet:
204
208
  result.append({"type": type_name, "file": file_name})
205
209
  return result
206
210
 
207
- def upload_files_located(self,
208
- location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]:
209
- upload_files = copy.deepcopy(self.upload_files)
210
- for upload_file in upload_files:
211
- if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True):
212
- upload_file["path"] = file_path
213
- return upload_files
214
-
215
211
  @property
216
212
  def nrows(self) -> int:
217
213
  return self._nrows
@@ -346,7 +342,23 @@ class StructuredDataSet:
346
342
 
347
343
  def _load_json_file(self, file: str) -> None:
348
344
  with open(file) as f:
349
- self._add(Schema.type_name(file), json.load(f))
345
+ data = json.load(f)
346
+ if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
347
+ (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
348
+ # If the JSON file name looks like a schema name then assume it
349
+ # contains an object or an array of object of that schema type.
350
+ if self._merge:
351
+ data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name)
352
+ self._add(Schema.type_name(file), data)
353
+ elif isinstance(data, dict):
354
+ # Otherwise if the JSON file name does not look like a schema name then
355
+ # assume it a dictionary where each property is the name of a schema, and
356
+ # which (each property) contains a list of object of that schema type.
357
+ for schema_name in data:
358
+ item = data[schema_name]
359
+ if self._merge:
360
+ item = self._merge_with_existing_portal_object(item, schema_name)
361
+ self._add(schema_name, item)
350
362
 
351
363
  def _load_reader(self, reader: RowReader, type_name: str) -> None:
352
364
  schema = None
@@ -368,7 +380,14 @@ class StructuredDataSet:
368
380
  structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
369
381
  if self._autoadd_properties:
370
382
  self._add_properties(structured_row, self._autoadd_properties, schema)
371
- self._add(type_name, structured_row)
383
+ # New merge functionality (2024-05-25).
384
+ if self._merge:
385
+ structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
386
+ if (prune_error := self._prune_structured_row(structured_row)) is not None:
387
+ self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
388
+ "error": prune_error}, "validation")
389
+ else:
390
+ self._add(type_name, structured_row) # TODO: why type_name and not schema_name?
372
391
  if self._progress:
373
392
  self._progress({
374
393
  PROGRESS.LOAD_ITEM: self._nrows,
@@ -385,9 +404,20 @@ class StructuredDataSet:
385
404
  self._note_error(schema._unresolved_refs, "ref")
386
405
  self._resolved_refs.update(schema._resolved_refs)
387
406
 
388
- def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
389
- if self._prune:
407
+ def _prune_structured_row(self, data: dict) -> Optional[str]:
408
+ if not self._prune:
409
+ return None
410
+ if not self._remove_empty_objects_from_lists:
390
411
  remove_empty_properties(data)
412
+ return None
413
+ try:
414
+ remove_empty_properties(data, isempty_array_element=lambda element: element == {},
415
+ raise_exception_on_nonempty_array_element_after_empty=True)
416
+ except Exception as e:
417
+ return str(e)
418
+ return None
419
+
420
+ def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
391
421
  if type_name in self._data:
392
422
  self._data[type_name].extend([data] if isinstance(data, dict) else data)
393
423
  else:
@@ -398,6 +428,18 @@ class StructuredDataSet:
398
428
  if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
399
429
  structured_row[name] = properties[name]
400
430
 
431
+ def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
432
+ """
433
+ Given a Portal object (presumably/in-practice from the given metadata), if there is
434
+ an existing Portal item, identified by the identifying properties for the given object,
435
+ then merges the given object into the existing one and returns the result; otherwise
436
+ just returns the given object. Note that the given object may be CHANGED in place.
437
+ """
438
+ for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
439
+ if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
440
+ return merge_objects(existing_portal_object, portal_object)
441
+ return portal_object
442
+
401
443
  def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
402
444
  return (ref_lookup_flags &
403
445
  Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
@@ -2,39 +2,45 @@ import re
2
2
  from typing import Optional
3
3
  from dcicutils.structured_data import Portal
4
4
 
5
+ # This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
6
+ # before it was fully developed, we had differing behaviors; but this has been unified; so this
7
+ # could now be internalized to structured_data, and portal_object_utils (TODO).
8
+
5
9
 
6
10
  def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
7
11
  #
8
- # FYI: Note this situation WRT object lookups ...
9
- #
10
- # /{submitted_id} # NOT FOUND
11
- # /UnalignedReads/{submitted_id} # OK
12
- # /SubmittedFile/{submitted_id} # OK
13
- # /File/{submitted_id} # NOT FOUND
14
- #
15
- # /{accession} # OK
16
- # /UnalignedReads/{accession} # NOT FOUND
17
- # /SubmittedFile/{accession} # NOT FOUND
18
- # /File/{accession} # OK
12
+ # Note this slight odd situation WRT object lookups by submitted_id and accession:
13
+ # -----------------------------+-----------------------------------------------+---------------+
14
+ # PATH | EXAMPLE | LOOKUP RESULT |
15
+ # -----------------------------+-----------------------------------------------+---------------+
16
+ # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
17
+ # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
18
+ # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
19
+ # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
20
+ # -----------------------------+-----------------------------------------------+---------------+
21
+ # /accession | /SMAFSFXF1RO4 | FOUND |
22
+ # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
23
+ # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
24
+ # /File/accession | /File/SMAFSFXF1RO4 | FOUND |
25
+ # -----------------------------+-----------------------------------------------+---------------+
19
26
  #
20
27
  def ref_validator(schema: Optional[dict],
21
28
  property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
22
29
  """
23
- Returns False iff the type represented by the given schema, can NOT be referenced by
24
- the given property name with the given property value, otherwise returns None.
30
+ Returns False iff objects of type represented by the given schema, CANNOT be referenced with
31
+ a Portal path using the given property name and its given property value, otherwise returns None.
25
32
 
26
- For example, if the schema is for the UnalignedReads type and the property name
27
- is accession, then we will return False iff the given property value is NOT a properly
28
- formatted accession ID. Otherwise, we will return None, which indicates that the
29
- caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing
30
- its default behavior, which is to check other ways in which the given type can NOT
31
- be referenced by the given value, i.e. it checks other identifying properties for
32
- the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
33
+ For example, if the schema is for UnalignedReads and the property name is accession, then we will
34
+ return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
35
+ will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
36
+ will continue executing its default behavior, which is to check other ways in which the given type
37
+ CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
38
+ and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
33
39
 
34
- The goal (in structured_data) being to detect if a type is being referenced in such
35
- a way that cannot possibly be allowed, i.e. because none of its identifying types
36
- are in the required form (if indeed there any requirements). Note that it is guaranteed
37
- that the given property name is indeed an identifying property for the given type.
40
+ The goal (in structured_data) being to detect if a type is being referenced in such a way that
41
+ CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
42
+ if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
43
+ identifying property for the given type.
38
44
  """
39
45
  if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
40
46
  if (property_format == "accession") and (property_name == "accession"):
@@ -62,6 +68,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
62
68
 
63
69
 
64
70
  # This is here for now because of problems with circular dependencies.
65
- # See: smaht-portal/.../schema_formats.py
71
+ # See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
66
72
  def _is_accession_id(value: str) -> bool:
67
73
  return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
@@ -1,8 +1,11 @@
1
1
  from contextlib import contextmanager
2
+ from datetime import datetime
2
3
  import os
3
4
  import shutil
4
5
  import tempfile
6
+ from uuid import uuid4 as uuid
5
7
  from typing import List, Optional, Union
8
+ from dcicutils.file_utils import create_random_file
6
9
 
7
10
 
8
11
  @contextmanager
@@ -15,22 +18,59 @@ def temporary_directory() -> str:
15
18
 
16
19
 
17
20
  @contextmanager
18
- def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
21
+ def temporary_file(name: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
19
22
  content: Optional[Union[str, bytes, List[str]]] = None) -> str:
20
23
  with temporary_directory() as tmp_directory_name:
21
- tmp_file_name = os.path.join(tmp_directory_name, name or tempfile.mktemp(dir="")) + (suffix or "")
22
- with open(tmp_file_name, "wb" if isinstance(content, bytes) else "w") as tmp_file:
24
+ tmp_file_name = f"{prefix or ''}{name or tempfile.mktemp(dir='')}{suffix or ''}"
25
+ tmp_file_path = os.path.join(tmp_directory_name, tmp_file_name)
26
+ with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
23
27
  if content is not None:
24
28
  tmp_file.write("\n".join(content) if isinstance(content, list) else content)
25
- yield tmp_file_name
29
+ yield tmp_file_path
30
+
31
+
32
+ def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
33
+ """
34
+ Generates and returns the full path to file within the system temporary directory.
35
+ """
36
+ random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
37
+ tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
38
+ return os.path.join(tempfile.gettempdir(), tmp_file_name)
39
+
40
+
41
+ @contextmanager
42
+ def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
43
+ nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
44
+ with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
45
+ create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
46
+ yield tmp_file_path
26
47
 
27
48
 
28
49
  def remove_temporary_directory(tmp_directory_name: str) -> None:
29
- def is_temporary_directory(path: str) -> bool:
30
- try:
31
- tmpdir = tempfile.gettempdir()
32
- return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
33
- except Exception:
34
- return False
50
+ """
51
+ Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
52
+ """
35
53
  if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
36
54
  shutil.rmtree(tmp_directory_name)
55
+
56
+
57
+ def remove_temporary_file(tmp_file_name: str) -> bool:
58
+ """
59
+ Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
60
+ """
61
+ try:
62
+ tmpdir = tempfile.gettempdir()
63
+ if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
64
+ os.remove(tmp_file_name)
65
+ return True
66
+ return False
67
+ except Exception:
68
+ return False
69
+
70
+
71
+ def is_temporary_directory(path: str) -> bool:
72
+ try:
73
+ tmpdir = tempfile.gettempdir()
74
+ return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
75
+ except Exception:
76
+ return False
dcicutils/zip_utils.py CHANGED
@@ -2,7 +2,9 @@ from contextlib import contextmanager
2
2
  from dcicutils.tmpfile_utils import temporary_directory, temporary_file
3
3
  import gzip
4
4
  import os
5
+ import shutil
5
6
  import tarfile
7
+ import tempfile
6
8
  from typing import List, Optional
7
9
  import zipfile
8
10
 
@@ -45,3 +47,28 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
45
47
  outputf.write(inputf.read())
46
48
  outputf.close()
47
49
  yield tmp_file_name
50
+
51
+
52
+ def extract_file_from_zip(zip_file: str, file_to_extract: str,
53
+ destination_file: str, raise_exception: bool = True) -> bool:
54
+ """
55
+ Extracts from the given zip file, the given file to extract, writing it to the
56
+ given destination file. Returns True if all is well, otherwise False, or if the
57
+ raise_exception argument is True (the default), then raises and exception on error.
58
+ """
59
+ try:
60
+ if not (destination_directory := os.path.dirname(destination_file)):
61
+ destination_directory = os.getcwd()
62
+ destination_file = os.path.join(destination_directory, destination_file)
63
+ with tempfile.TemporaryDirectory() as tmp_directory_name:
64
+ with zipfile.ZipFile(zip_file, "r") as zipf:
65
+ if file_to_extract not in zipf.namelist():
66
+ return False
67
+ zipf.extract(file_to_extract, path=tmp_directory_name)
68
+ os.makedirs(destination_directory, exist_ok=True)
69
+ shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
70
+ return True
71
+ except Exception as e:
72
+ if raise_exception:
73
+ raise e
74
+ return False
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.9.0.0b0
3
+ Version: 8.9.0.1b2
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
7
7
  Author: 4DN-DCIC Team
8
8
  Author-email: support@4dnucleome.org
9
- Requires-Python: >=3.8,<3.12
9
+ Requires-Python: >=3.8,<3.13
10
10
  Classifier: Development Status :: 4 - Beta
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Intended Audience :: Science/Research
@@ -24,9 +24,10 @@ Classifier: Programming Language :: Python :: 3.9
24
24
  Classifier: Topic :: Database :: Database Engines/Servers
25
25
  Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
26
26
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
27
+ Requires-Dist: appdirs (>=1.4.4,<2.0.0)
27
28
  Requires-Dist: aws-requests-auth (>=0.4.2,<1)
28
- Requires-Dist: boto3 (>=1.28.57,<2.0.0)
29
- Requires-Dist: botocore (>=1.31.57,<2.0.0)
29
+ Requires-Dist: boto3 (>=1.34.93,<2.0.0)
30
+ Requires-Dist: botocore (>=1.34.93,<2.0.0)
30
31
  Requires-Dist: chardet (>=5.2.0,<6.0.0)
31
32
  Requires-Dist: docker (>=4.4.4,<5.0.0)
32
33
  Requires-Dist: elasticsearch (==7.13.4)
@@ -42,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
42
43
  Requires-Dist: redis (>=4.5.1,<5.0.0)
43
44
  Requires-Dist: requests (>=2.21.0,<3.0.0)
44
45
  Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
46
+ Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
45
47
  Requires-Dist: structlog (>=19.2.0,<20.0.0)
46
48
  Requires-Dist: toml (>=0.10.1,<1)
47
49
  Requires-Dist: tqdm (>=4.66.2,<5.0.0)
@@ -5,11 +5,11 @@ dcicutils/bundle_utils.py,sha256=ZVQcqlt7Yly8-YbL3A-5DW859_hMWpTL6dXtknEYZIw,346
5
5
  dcicutils/captured_output.py,sha256=0hP7sPwleMaYXQAvCfJOxG8Z8T_JJYy8ADp8A5ZoblE,3295
6
6
  dcicutils/cloudformation_utils.py,sha256=MtWJrSTXyiImgbPHgRvfH9bWso20ZPLTFJAfhDQSVj4,13786
7
7
  dcicutils/codebuild_utils.py,sha256=CKpmhJ-Z8gYbkt1I2zyMlKtFdsg7T8lqrx3V5ieta-U,1155
8
- dcicutils/command_utils.py,sha256=JExll5TMqIcmuiGvuS8q4XDUvoEfi2oSH0E2FVF6suU,15285
8
+ dcicutils/command_utils.py,sha256=1_h18LGX86sLAkRkH33HNmBkwMb7v2wAh3jL01hzceU,18487
9
9
  dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
10
10
  dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
11
11
  dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
12
- dcicutils/creds_utils.py,sha256=xrLekD49Ex0GOpL9n7LlJA4gvNcY7txTVFOSYD7LvEU,11113
12
+ dcicutils/creds_utils.py,sha256=64BbIfS90T1eJmmQJrDyfrRa3V2F1x7T8lOrEeFfqJY,11127
13
13
  dcicutils/data_readers.py,sha256=6EMrY7TjDE8H7bA_TCWtpLQP7slJ0YTL77_dNh6e7sg,7626
14
14
  dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
15
15
  dcicutils/datetime_utils.py,sha256=sM653aw_1zy1qBmfAH-WetCi2Fw9cnFK7FZN_Tg4onI,13499
@@ -27,10 +27,11 @@ dcicutils/env_utils_legacy.py,sha256=J81OAtJHN69o1beHO6q1j7_J6TeblSjnAHlS8VA5KSM
27
27
  dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
28
28
  dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
29
29
  dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
30
- dcicutils/ff_utils.py,sha256=Yf-fET5gdpjrH0gikpOCIJdY2Dv3obzUpR31ur816mU,72972
31
- dcicutils/file_utils.py,sha256=098rXvLeIh8n69EGW7DpOS227ef3BPgwhRAktoU6mhE,2663
30
+ dcicutils/ff_utils.py,sha256=oIhuZPnGtfwj6bWyCc1u23JbMB_6InPp01ZqUOljd8M,73123
31
+ dcicutils/file_utils.py,sha256=zyNdRl1Fu3SrQwjJWaIMvQpi4DRaodNZCX7oTkiPJ-A,10916
32
32
  dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
33
33
  dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
34
+ dcicutils/http_utils.py,sha256=tNfH5JA-OwbQKEvD5HPJ3lcp2TSIZ4rnl__4d4JO8Gw,1583
34
35
  dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
35
36
  dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
36
37
  dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
@@ -43,11 +44,11 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
43
44
  dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
44
45
  dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
45
46
  dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
46
- dcicutils/misc_utils.py,sha256=YH_TTmv6ABWeMERwVvA2-rIfdS-CoPYLXJru9TvWxgM,104610
47
+ dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
47
48
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
48
49
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
49
- dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
50
- dcicutils/portal_utils.py,sha256=DYyE5o15GekDgzpJWas9iS7klAYbjJZUPW0G42McArk,30779
50
+ dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
51
+ dcicutils/portal_utils.py,sha256=TDGmJqxqWfuqdJZ-ARqbOxPXNuzlqNxPD49jMEY4VQA,45217
51
52
  dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
52
53
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
53
54
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
@@ -55,25 +56,25 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
55
56
  dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
56
57
  dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
57
58
  dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
58
- dcicutils/schema_utils.py,sha256=2hOzuGK7F8xZ7JyS7_Lan2wXOlNZezzT2lqgEs3QOe4,10605
59
+ dcicutils/schema_utils.py,sha256=IIteRrg-iOJOFU17n2lvKByVdWdiMfuAQ1kf_QIM96Q,10604
59
60
  dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
60
61
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
61
- dcicutils/scripts/view_portal_object.py,sha256=Cy-8GwGJS9EX-5RxE8mjsqNlDT0N6OCpkNffPVkTFQc,26262
62
+ dcicutils/scripts/view_portal_object.py,sha256=HZzM44BDcGycO9XTOTZyP-F7PRMZaZrnFfiqiT7Qvqg,29777
62
63
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
63
64
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
64
65
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
65
66
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
66
- dcicutils/structured_data.py,sha256=BQuIMv6OPySsn6YxtXE2Er-zLE2QJuCYhEQ3V0u_UXY,61238
67
+ dcicutils/structured_data.py,sha256=GfG96VyFwQIkmUax-ZdEzbWnfEiJxmeZEaUWz78IQZY,64030
67
68
  dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
68
- dcicutils/submitr/ref_lookup_strategy.py,sha256=Js2cVznTmgjciLWBPLCvMiwLIHXjDn3jww-gJPjYuFw,3467
69
+ dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
69
70
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
70
- dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
71
+ dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
71
72
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
72
73
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
73
74
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
74
- dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
75
- dcicutils-8.9.0.0b0.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
76
- dcicutils-8.9.0.0b0.dist-info/METADATA,sha256=sHJ_jTCTbZwTy6AoI9BSixIfwZDxdntJvQmTy5keWnI,3356
77
- dcicutils-8.9.0.0b0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
78
- dcicutils-8.9.0.0b0.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
79
- dcicutils-8.9.0.0b0.dist-info/RECORD,,
75
+ dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
76
+ dcicutils-8.9.0.1b2.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
+ dcicutils-8.9.0.1b2.dist-info/METADATA,sha256=Mh-FpaQpv_ipkozMQ_Ul_vezgpxdzX4lvp38jaDD8rc,3439
78
+ dcicutils-8.9.0.1b2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
+ dcicutils-8.9.0.1b2.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
+ dcicutils-8.9.0.1b2.dist-info/RECORD,,