dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/command_utils.py +69 -1
- dcicutils/creds_utils.py +1 -1
- dcicutils/ff_utils.py +4 -1
- dcicutils/file_utils.py +250 -41
- dcicutils/http_utils.py +39 -0
- dcicutils/misc_utils.py +82 -5
- dcicutils/portal_object_utils.py +24 -89
- dcicutils/portal_utils.py +249 -37
- dcicutils/schema_utils.py +1 -1
- dcicutils/scripts/view_portal_object.py +87 -5
- dcicutils/structured_data.py +59 -17
- dcicutils/submitr/ref_lookup_strategy.py +31 -25
- dcicutils/tmpfile_utils.py +50 -10
- dcicutils/zip_utils.py +27 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/METADATA +6 -4
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/RECORD +19 -18
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/WHEEL +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/entry_points.txt +0 -0
@@ -57,6 +57,7 @@
|
|
57
57
|
|
58
58
|
import argparse
|
59
59
|
from functools import lru_cache
|
60
|
+
import io
|
60
61
|
import json
|
61
62
|
import pyperclip
|
62
63
|
import os
|
@@ -97,11 +98,18 @@ def main():
|
|
97
98
|
help="Include all properties for schema usage.")
|
98
99
|
parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.")
|
99
100
|
parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.")
|
101
|
+
parser.add_argument("--post", type=str, required=False, default=None,
|
102
|
+
help="POST data of the main arg type with data from file specified with this option.")
|
103
|
+
parser.add_argument("--patch", type=str, required=False, default=None,
|
104
|
+
help="PATCH data of the main arg type with data from file specified with this option.")
|
100
105
|
parser.add_argument("--database", action="store_true", required=False, default=False,
|
101
106
|
help="Read from database output.")
|
107
|
+
parser.add_argument("--bool", action="store_true", required=False,
|
108
|
+
default=False, help="Only return whether found or not.")
|
102
109
|
parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.")
|
103
110
|
parser.add_argument("--copy", "-c", action="store_true", required=False, default=False,
|
104
111
|
help="Copy object data to clipboard.")
|
112
|
+
parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int)
|
105
113
|
parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.")
|
106
114
|
parser.add_argument("--more-details", action="store_true", required=False, default=False,
|
107
115
|
help="More detailed output.")
|
@@ -151,6 +159,18 @@ def main():
|
|
151
159
|
args.schema = True
|
152
160
|
|
153
161
|
if args.schema:
|
162
|
+
if args.post:
|
163
|
+
if post_data := _read_json_from_file(args.post):
|
164
|
+
if args.verbose:
|
165
|
+
_print(f"POSTing data from file ({args.post}) as type: {args.uuid}")
|
166
|
+
if isinstance(post_data, dict):
|
167
|
+
post_data = [post_data]
|
168
|
+
elif not isinstance(post_data, list):
|
169
|
+
_print(f"POST data neither list nor dictionary: {args.post}")
|
170
|
+
for item in post_data:
|
171
|
+
portal.post_metadata(args.uuid, item)
|
172
|
+
if args.verbose:
|
173
|
+
_print(f"Done POSTing data from file ({args.post}) as type: {args.uuid}")
|
154
174
|
schema, schema_name = _get_schema(portal, args.uuid)
|
155
175
|
if schema:
|
156
176
|
if args.copy:
|
@@ -166,14 +186,50 @@ def main():
|
|
166
186
|
_print_schema(schema, details=args.details, more_details=args.details,
|
167
187
|
all=args.all, raw=args.raw, raw_yaml=args.yaml)
|
168
188
|
return
|
169
|
-
|
170
|
-
|
189
|
+
elif args.patch:
|
190
|
+
if patch_data := _read_json_from_file(args.patch):
|
191
|
+
if args.verbose:
|
192
|
+
_print(f"PATCHing data from file ({args.patch}) for object: {args.uuid}")
|
193
|
+
if isinstance(patch_data, dict):
|
194
|
+
patch_data = [patch_data]
|
195
|
+
elif not isinstance(patch_data, list):
|
196
|
+
_print(f"PATCH data neither list nor dictionary: {args.patch}")
|
197
|
+
for item in patch_data:
|
198
|
+
portal.patch_metadata(args.uuid, item)
|
199
|
+
if args.verbose:
|
200
|
+
_print(f"Done PATCHing data from file ({args.patch}) as type: {args.uuid}")
|
201
|
+
return
|
202
|
+
else:
|
203
|
+
_print(f"No PATCH data found in file: {args.patch}")
|
204
|
+
exit(1)
|
205
|
+
|
206
|
+
data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw,
|
207
|
+
database=args.database, check=args.bool, verbose=args.verbose)
|
208
|
+
if args.bool:
|
209
|
+
if data:
|
210
|
+
_print(f"{args.uuid}: found")
|
211
|
+
exit(0)
|
212
|
+
else:
|
213
|
+
_print(f"{args.uuid}: not found")
|
214
|
+
exit(1)
|
171
215
|
if args.copy:
|
172
216
|
pyperclip.copy(json.dumps(data, indent=4))
|
173
217
|
if args.yaml:
|
174
218
|
_print(yaml.dump(data))
|
175
219
|
else:
|
176
|
-
|
220
|
+
if args.indent > 0:
|
221
|
+
_print(_format_json_with_indent(data, indent=args.indent))
|
222
|
+
else:
|
223
|
+
_print(json.dumps(data, default=str, indent=4))
|
224
|
+
|
225
|
+
|
226
|
+
def _format_json_with_indent(value: dict, indent: int = 0) -> Optional[str]:
|
227
|
+
if isinstance(value, dict):
|
228
|
+
result = json.dumps(value, indent=4)
|
229
|
+
if indent > 0:
|
230
|
+
result = f"{indent * ' '}{result}"
|
231
|
+
result = result.replace("\n", f"\n{indent * ' '}")
|
232
|
+
return result
|
177
233
|
|
178
234
|
|
179
235
|
def _create_portal(ini: str, env: Optional[str] = None,
|
@@ -198,7 +254,8 @@ def _create_portal(ini: str, env: Optional[str] = None,
|
|
198
254
|
|
199
255
|
|
200
256
|
def _get_portal_object(portal: Portal, uuid: str,
|
201
|
-
raw: bool = False, database: bool = False,
|
257
|
+
raw: bool = False, database: bool = False,
|
258
|
+
check: bool = False, verbose: bool = False) -> dict:
|
202
259
|
response = None
|
203
260
|
try:
|
204
261
|
if not uuid.startswith("/"):
|
@@ -212,13 +269,18 @@ def _get_portal_object(portal: Portal, uuid: str,
|
|
212
269
|
_exit()
|
213
270
|
_exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}")
|
214
271
|
if not response:
|
272
|
+
if check:
|
273
|
+
return None
|
215
274
|
_exit(f"Null response getting Portal object from {portal.server}: {uuid}")
|
216
275
|
if response.status_code not in [200, 307]:
|
217
276
|
# TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above.
|
218
277
|
_exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}")
|
219
278
|
if not response.json:
|
220
279
|
_exit(f"Invalid JSON getting Portal object: {uuid}")
|
221
|
-
|
280
|
+
response = response.json()
|
281
|
+
if raw:
|
282
|
+
response.pop("schema_version", None)
|
283
|
+
return response
|
222
284
|
|
223
285
|
|
224
286
|
@lru_cache(maxsize=1)
|
@@ -257,6 +319,7 @@ def _print_schema_info(schema: dict, level: int = 0,
|
|
257
319
|
required: Optional[List[str]] = None) -> None:
|
258
320
|
if not schema or not isinstance(schema, dict):
|
259
321
|
return
|
322
|
+
identifying_properties = schema.get("identifyingProperties")
|
260
323
|
if level == 0:
|
261
324
|
if required_properties := schema.get("required"):
|
262
325
|
_print("- required properties:")
|
@@ -383,6 +446,8 @@ def _print_schema_info(schema: dict, level: int = 0,
|
|
383
446
|
suffix += f" | enum"
|
384
447
|
if property_required:
|
385
448
|
suffix += f" | required"
|
449
|
+
if property_name in (identifying_properties or []):
|
450
|
+
suffix += f" | identifying"
|
386
451
|
if property.get("uniqueKey"):
|
387
452
|
suffix += f" | unique"
|
388
453
|
if pattern := property.get("pattern"):
|
@@ -529,6 +594,23 @@ def _print_tree(root_name: Optional[str],
|
|
529
594
|
print(line)
|
530
595
|
|
531
596
|
|
597
|
+
def _read_json_from_file(file: str) -> Optional[dict]:
|
598
|
+
if not os.path.exists(file):
|
599
|
+
_print(f"Cannot find file: {file}")
|
600
|
+
exit(1)
|
601
|
+
try:
|
602
|
+
with io.open(file, "r") as f:
|
603
|
+
try:
|
604
|
+
return json.load(f)
|
605
|
+
except Exception:
|
606
|
+
_print(f"Cannot parse JSON in file: {file}")
|
607
|
+
exit(1)
|
608
|
+
except Exception as e:
|
609
|
+
print(e)
|
610
|
+
_print(f"Cannot open file: {file}")
|
611
|
+
exit(1)
|
612
|
+
|
613
|
+
|
532
614
|
def _print(*args, **kwargs):
|
533
615
|
with uncaptured_output():
|
534
616
|
PRINT(*args, **kwargs)
|
dcicutils/structured_data.py
CHANGED
@@ -11,7 +11,6 @@ from webtest.app import TestApp
|
|
11
11
|
from dcicutils.common import OrchestratedApp
|
12
12
|
from dcicutils.data_readers import CsvReader, Excel, RowReader
|
13
13
|
from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
|
14
|
-
from dcicutils.file_utils import search_for_file
|
15
14
|
from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
|
16
15
|
merge_objects, remove_empty_properties, right_trim, split_string,
|
17
16
|
to_boolean, to_enum, to_float, to_integer, VirtualApp)
|
@@ -53,9 +52,10 @@ class StructuredDataSet:
|
|
53
52
|
def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
|
54
53
|
schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
|
55
54
|
order: Optional[List[str]] = None, prune: bool = True,
|
55
|
+
remove_empty_objects_from_lists: bool = True,
|
56
56
|
ref_lookup_strategy: Optional[Callable] = None,
|
57
57
|
ref_lookup_nocache: bool = False,
|
58
|
-
norefs: bool = False,
|
58
|
+
norefs: bool = False, merge: bool = False,
|
59
59
|
progress: Optional[Callable] = None,
|
60
60
|
debug_sleep: Optional[str] = None) -> None:
|
61
61
|
self._progress = progress if callable(progress) else None
|
@@ -65,7 +65,8 @@ class StructuredDataSet:
|
|
65
65
|
ref_lookup_nocache=ref_lookup_nocache) if portal else None
|
66
66
|
self._ref_lookup_strategy = ref_lookup_strategy
|
67
67
|
self._order = order
|
68
|
-
self._prune = prune
|
68
|
+
self._prune = prune is True
|
69
|
+
self._remove_empty_objects_from_lists = remove_empty_objects_from_lists is True
|
69
70
|
self._warnings = {}
|
70
71
|
self._errors = {}
|
71
72
|
self._resolved_refs = set()
|
@@ -73,6 +74,7 @@ class StructuredDataSet:
|
|
73
74
|
self._nrows = 0
|
74
75
|
self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
|
75
76
|
self._norefs = True if norefs is True else False
|
77
|
+
self._merge = True if merge is True else False
|
76
78
|
self._debug_sleep = None
|
77
79
|
if debug_sleep:
|
78
80
|
try:
|
@@ -93,14 +95,16 @@ class StructuredDataSet:
|
|
93
95
|
def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
|
94
96
|
schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
|
95
97
|
order: Optional[List[str]] = None, prune: bool = True,
|
98
|
+
remove_empty_objects_from_lists: bool = True,
|
96
99
|
ref_lookup_strategy: Optional[Callable] = None,
|
97
100
|
ref_lookup_nocache: bool = False,
|
98
|
-
norefs: bool = False,
|
101
|
+
norefs: bool = False, merge: bool = False,
|
99
102
|
progress: Optional[Callable] = None,
|
100
103
|
debug_sleep: Optional[str] = None) -> StructuredDataSet:
|
101
104
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
|
105
|
+
remove_empty_objects_from_lists=remove_empty_objects_from_lists,
|
102
106
|
ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
|
103
|
-
norefs=norefs, progress=progress, debug_sleep=debug_sleep)
|
107
|
+
norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
|
104
108
|
|
105
109
|
def validate(self, force: bool = False) -> None:
|
106
110
|
def data_without_deleted_properties(data: dict) -> dict:
|
@@ -204,14 +208,6 @@ class StructuredDataSet:
|
|
204
208
|
result.append({"type": type_name, "file": file_name})
|
205
209
|
return result
|
206
210
|
|
207
|
-
def upload_files_located(self,
|
208
|
-
location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]:
|
209
|
-
upload_files = copy.deepcopy(self.upload_files)
|
210
|
-
for upload_file in upload_files:
|
211
|
-
if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True):
|
212
|
-
upload_file["path"] = file_path
|
213
|
-
return upload_files
|
214
|
-
|
215
211
|
@property
|
216
212
|
def nrows(self) -> int:
|
217
213
|
return self._nrows
|
@@ -346,7 +342,23 @@ class StructuredDataSet:
|
|
346
342
|
|
347
343
|
def _load_json_file(self, file: str) -> None:
|
348
344
|
with open(file) as f:
|
349
|
-
|
345
|
+
data = json.load(f)
|
346
|
+
if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
|
347
|
+
(self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
|
348
|
+
# If the JSON file name looks like a schema name then assume it
|
349
|
+
# contains an object or an array of object of that schema type.
|
350
|
+
if self._merge:
|
351
|
+
data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name)
|
352
|
+
self._add(Schema.type_name(file), data)
|
353
|
+
elif isinstance(data, dict):
|
354
|
+
# Otherwise if the JSON file name does not look like a schema name then
|
355
|
+
# assume it a dictionary where each property is the name of a schema, and
|
356
|
+
# which (each property) contains a list of object of that schema type.
|
357
|
+
for schema_name in data:
|
358
|
+
item = data[schema_name]
|
359
|
+
if self._merge:
|
360
|
+
item = self._merge_with_existing_portal_object(item, schema_name)
|
361
|
+
self._add(schema_name, item)
|
350
362
|
|
351
363
|
def _load_reader(self, reader: RowReader, type_name: str) -> None:
|
352
364
|
schema = None
|
@@ -368,7 +380,14 @@ class StructuredDataSet:
|
|
368
380
|
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
369
381
|
if self._autoadd_properties:
|
370
382
|
self._add_properties(structured_row, self._autoadd_properties, schema)
|
371
|
-
|
383
|
+
# New merge functionality (2024-05-25).
|
384
|
+
if self._merge:
|
385
|
+
structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
|
386
|
+
if (prune_error := self._prune_structured_row(structured_row)) is not None:
|
387
|
+
self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
|
388
|
+
"error": prune_error}, "validation")
|
389
|
+
else:
|
390
|
+
self._add(type_name, structured_row) # TODO: why type_name and not schema_name?
|
372
391
|
if self._progress:
|
373
392
|
self._progress({
|
374
393
|
PROGRESS.LOAD_ITEM: self._nrows,
|
@@ -385,9 +404,20 @@ class StructuredDataSet:
|
|
385
404
|
self._note_error(schema._unresolved_refs, "ref")
|
386
405
|
self._resolved_refs.update(schema._resolved_refs)
|
387
406
|
|
388
|
-
def
|
389
|
-
if self._prune:
|
407
|
+
def _prune_structured_row(self, data: dict) -> Optional[str]:
|
408
|
+
if not self._prune:
|
409
|
+
return None
|
410
|
+
if not self._remove_empty_objects_from_lists:
|
390
411
|
remove_empty_properties(data)
|
412
|
+
return None
|
413
|
+
try:
|
414
|
+
remove_empty_properties(data, isempty_array_element=lambda element: element == {},
|
415
|
+
raise_exception_on_nonempty_array_element_after_empty=True)
|
416
|
+
except Exception as e:
|
417
|
+
return str(e)
|
418
|
+
return None
|
419
|
+
|
420
|
+
def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
|
391
421
|
if type_name in self._data:
|
392
422
|
self._data[type_name].extend([data] if isinstance(data, dict) else data)
|
393
423
|
else:
|
@@ -398,6 +428,18 @@ class StructuredDataSet:
|
|
398
428
|
if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
|
399
429
|
structured_row[name] = properties[name]
|
400
430
|
|
431
|
+
def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
|
432
|
+
"""
|
433
|
+
Given a Portal object (presumably/in-practice from the given metadata), if there is
|
434
|
+
an existing Portal item, identified by the identifying properties for the given object,
|
435
|
+
then merges the given object into the existing one and returns the result; otherwise
|
436
|
+
just returns the given object. Note that the given object may be CHANGED in place.
|
437
|
+
"""
|
438
|
+
for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
|
439
|
+
if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
|
440
|
+
return merge_objects(existing_portal_object, portal_object)
|
441
|
+
return portal_object
|
442
|
+
|
401
443
|
def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
|
402
444
|
return (ref_lookup_flags &
|
403
445
|
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
@@ -2,39 +2,45 @@ import re
|
|
2
2
|
from typing import Optional
|
3
3
|
from dcicutils.structured_data import Portal
|
4
4
|
|
5
|
+
# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
|
6
|
+
# before it was fully developed, we had differing behaviors; but this has been unified; so this
|
7
|
+
# could now be internalized to structured_data, and portal_object_utils (TODO).
|
8
|
+
|
5
9
|
|
6
10
|
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
7
11
|
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# /
|
13
|
-
# /
|
14
|
-
#
|
15
|
-
# /
|
16
|
-
#
|
17
|
-
# /
|
18
|
-
# /
|
12
|
+
# Note this slight odd situation WRT object lookups by submitted_id and accession:
|
13
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
14
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
15
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
16
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
17
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
18
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
19
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
20
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
21
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
22
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
23
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
24
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
25
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
19
26
|
#
|
20
27
|
def ref_validator(schema: Optional[dict],
|
21
28
|
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
22
29
|
"""
|
23
|
-
Returns False iff
|
24
|
-
the given property name
|
30
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
31
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
25
32
|
|
26
|
-
For example, if the schema is for
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
34
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
35
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
36
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
37
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
38
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
39
|
|
34
|
-
The goal (in structured_data) being to detect if a type is being referenced in such
|
35
|
-
|
36
|
-
|
37
|
-
|
40
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
41
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
42
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
43
|
+
identifying property for the given type.
|
38
44
|
"""
|
39
45
|
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
40
46
|
if (property_format == "accession") and (property_name == "accession"):
|
@@ -62,6 +68,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
|
|
62
68
|
|
63
69
|
|
64
70
|
# This is here for now because of problems with circular dependencies.
|
65
|
-
# See: smaht-portal/.../schema_formats.py
|
71
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
66
72
|
def _is_accession_id(value: str) -> bool:
|
67
73
|
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
dcicutils/tmpfile_utils.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
from contextlib import contextmanager
|
2
|
+
from datetime import datetime
|
2
3
|
import os
|
3
4
|
import shutil
|
4
5
|
import tempfile
|
6
|
+
from uuid import uuid4 as uuid
|
5
7
|
from typing import List, Optional, Union
|
8
|
+
from dcicutils.file_utils import create_random_file
|
6
9
|
|
7
10
|
|
8
11
|
@contextmanager
|
@@ -15,22 +18,59 @@ def temporary_directory() -> str:
|
|
15
18
|
|
16
19
|
|
17
20
|
@contextmanager
|
18
|
-
def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
|
21
|
+
def temporary_file(name: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
19
22
|
content: Optional[Union[str, bytes, List[str]]] = None) -> str:
|
20
23
|
with temporary_directory() as tmp_directory_name:
|
21
|
-
tmp_file_name =
|
22
|
-
|
24
|
+
tmp_file_name = f"{prefix or ''}{name or tempfile.mktemp(dir='')}{suffix or ''}"
|
25
|
+
tmp_file_path = os.path.join(tmp_directory_name, tmp_file_name)
|
26
|
+
with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
|
23
27
|
if content is not None:
|
24
28
|
tmp_file.write("\n".join(content) if isinstance(content, list) else content)
|
25
|
-
yield
|
29
|
+
yield tmp_file_path
|
30
|
+
|
31
|
+
|
32
|
+
def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
|
33
|
+
"""
|
34
|
+
Generates and returns the full path to file within the system temporary directory.
|
35
|
+
"""
|
36
|
+
random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
37
|
+
tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
|
38
|
+
return os.path.join(tempfile.gettempdir(), tmp_file_name)
|
39
|
+
|
40
|
+
|
41
|
+
@contextmanager
|
42
|
+
def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
|
43
|
+
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
44
|
+
with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
|
45
|
+
create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
|
46
|
+
yield tmp_file_path
|
26
47
|
|
27
48
|
|
28
49
|
def remove_temporary_directory(tmp_directory_name: str) -> None:
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
|
33
|
-
except Exception:
|
34
|
-
return False
|
50
|
+
"""
|
51
|
+
Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
|
52
|
+
"""
|
35
53
|
if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
|
36
54
|
shutil.rmtree(tmp_directory_name)
|
55
|
+
|
56
|
+
|
57
|
+
def remove_temporary_file(tmp_file_name: str) -> bool:
|
58
|
+
"""
|
59
|
+
Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
|
60
|
+
"""
|
61
|
+
try:
|
62
|
+
tmpdir = tempfile.gettempdir()
|
63
|
+
if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
|
64
|
+
os.remove(tmp_file_name)
|
65
|
+
return True
|
66
|
+
return False
|
67
|
+
except Exception:
|
68
|
+
return False
|
69
|
+
|
70
|
+
|
71
|
+
def is_temporary_directory(path: str) -> bool:
|
72
|
+
try:
|
73
|
+
tmpdir = tempfile.gettempdir()
|
74
|
+
return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
|
75
|
+
except Exception:
|
76
|
+
return False
|
dcicutils/zip_utils.py
CHANGED
@@ -2,7 +2,9 @@ from contextlib import contextmanager
|
|
2
2
|
from dcicutils.tmpfile_utils import temporary_directory, temporary_file
|
3
3
|
import gzip
|
4
4
|
import os
|
5
|
+
import shutil
|
5
6
|
import tarfile
|
7
|
+
import tempfile
|
6
8
|
from typing import List, Optional
|
7
9
|
import zipfile
|
8
10
|
|
@@ -45,3 +47,28 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
|
|
45
47
|
outputf.write(inputf.read())
|
46
48
|
outputf.close()
|
47
49
|
yield tmp_file_name
|
50
|
+
|
51
|
+
|
52
|
+
def extract_file_from_zip(zip_file: str, file_to_extract: str,
|
53
|
+
destination_file: str, raise_exception: bool = True) -> bool:
|
54
|
+
"""
|
55
|
+
Extracts from the given zip file, the given file to extract, writing it to the
|
56
|
+
given destination file. Returns True if all is well, otherwise False, or if the
|
57
|
+
raise_exception argument is True (the default), then raises and exception on error.
|
58
|
+
"""
|
59
|
+
try:
|
60
|
+
if not (destination_directory := os.path.dirname(destination_file)):
|
61
|
+
destination_directory = os.getcwd()
|
62
|
+
destination_file = os.path.join(destination_directory, destination_file)
|
63
|
+
with tempfile.TemporaryDirectory() as tmp_directory_name:
|
64
|
+
with zipfile.ZipFile(zip_file, "r") as zipf:
|
65
|
+
if file_to_extract not in zipf.namelist():
|
66
|
+
return False
|
67
|
+
zipf.extract(file_to_extract, path=tmp_directory_name)
|
68
|
+
os.makedirs(destination_directory, exist_ok=True)
|
69
|
+
shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
|
70
|
+
return True
|
71
|
+
except Exception as e:
|
72
|
+
if raise_exception:
|
73
|
+
raise e
|
74
|
+
return False
|
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dcicutils
|
3
|
-
Version: 8.9.0.
|
3
|
+
Version: 8.9.0.1b2
|
4
4
|
Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
|
5
5
|
Home-page: https://github.com/4dn-dcic/utils
|
6
6
|
License: MIT
|
7
7
|
Author: 4DN-DCIC Team
|
8
8
|
Author-email: support@4dnucleome.org
|
9
|
-
Requires-Python: >=3.8,<3.
|
9
|
+
Requires-Python: >=3.8,<3.13
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: Intended Audience :: Science/Research
|
@@ -24,9 +24,10 @@ Classifier: Programming Language :: Python :: 3.9
|
|
24
24
|
Classifier: Topic :: Database :: Database Engines/Servers
|
25
25
|
Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
|
26
26
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
27
|
+
Requires-Dist: appdirs (>=1.4.4,<2.0.0)
|
27
28
|
Requires-Dist: aws-requests-auth (>=0.4.2,<1)
|
28
|
-
Requires-Dist: boto3 (>=1.
|
29
|
-
Requires-Dist: botocore (>=1.
|
29
|
+
Requires-Dist: boto3 (>=1.34.93,<2.0.0)
|
30
|
+
Requires-Dist: botocore (>=1.34.93,<2.0.0)
|
30
31
|
Requires-Dist: chardet (>=5.2.0,<6.0.0)
|
31
32
|
Requires-Dist: docker (>=4.4.4,<5.0.0)
|
32
33
|
Requires-Dist: elasticsearch (==7.13.4)
|
@@ -42,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
|
|
42
43
|
Requires-Dist: redis (>=4.5.1,<5.0.0)
|
43
44
|
Requires-Dist: requests (>=2.21.0,<3.0.0)
|
44
45
|
Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
|
46
|
+
Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
|
45
47
|
Requires-Dist: structlog (>=19.2.0,<20.0.0)
|
46
48
|
Requires-Dist: toml (>=0.10.1,<1)
|
47
49
|
Requires-Dist: tqdm (>=4.66.2,<5.0.0)
|
@@ -5,11 +5,11 @@ dcicutils/bundle_utils.py,sha256=ZVQcqlt7Yly8-YbL3A-5DW859_hMWpTL6dXtknEYZIw,346
|
|
5
5
|
dcicutils/captured_output.py,sha256=0hP7sPwleMaYXQAvCfJOxG8Z8T_JJYy8ADp8A5ZoblE,3295
|
6
6
|
dcicutils/cloudformation_utils.py,sha256=MtWJrSTXyiImgbPHgRvfH9bWso20ZPLTFJAfhDQSVj4,13786
|
7
7
|
dcicutils/codebuild_utils.py,sha256=CKpmhJ-Z8gYbkt1I2zyMlKtFdsg7T8lqrx3V5ieta-U,1155
|
8
|
-
dcicutils/command_utils.py,sha256=
|
8
|
+
dcicutils/command_utils.py,sha256=1_h18LGX86sLAkRkH33HNmBkwMb7v2wAh3jL01hzceU,18487
|
9
9
|
dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
|
10
10
|
dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
|
11
11
|
dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
|
12
|
-
dcicutils/creds_utils.py,sha256=
|
12
|
+
dcicutils/creds_utils.py,sha256=64BbIfS90T1eJmmQJrDyfrRa3V2F1x7T8lOrEeFfqJY,11127
|
13
13
|
dcicutils/data_readers.py,sha256=6EMrY7TjDE8H7bA_TCWtpLQP7slJ0YTL77_dNh6e7sg,7626
|
14
14
|
dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
|
15
15
|
dcicutils/datetime_utils.py,sha256=sM653aw_1zy1qBmfAH-WetCi2Fw9cnFK7FZN_Tg4onI,13499
|
@@ -27,10 +27,11 @@ dcicutils/env_utils_legacy.py,sha256=J81OAtJHN69o1beHO6q1j7_J6TeblSjnAHlS8VA5KSM
|
|
27
27
|
dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
|
28
28
|
dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
|
29
29
|
dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
|
30
|
-
dcicutils/ff_utils.py,sha256=
|
31
|
-
dcicutils/file_utils.py,sha256=
|
30
|
+
dcicutils/ff_utils.py,sha256=oIhuZPnGtfwj6bWyCc1u23JbMB_6InPp01ZqUOljd8M,73123
|
31
|
+
dcicutils/file_utils.py,sha256=zyNdRl1Fu3SrQwjJWaIMvQpi4DRaodNZCX7oTkiPJ-A,10916
|
32
32
|
dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
|
33
33
|
dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
|
34
|
+
dcicutils/http_utils.py,sha256=tNfH5JA-OwbQKEvD5HPJ3lcp2TSIZ4rnl__4d4JO8Gw,1583
|
34
35
|
dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
|
35
36
|
dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
|
36
37
|
dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
|
@@ -43,11 +44,11 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
|
|
43
44
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
44
45
|
dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
|
45
46
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
46
|
-
dcicutils/misc_utils.py,sha256=
|
47
|
+
dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
|
47
48
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
48
49
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
49
|
-
dcicutils/portal_object_utils.py,sha256=
|
50
|
-
dcicutils/portal_utils.py,sha256=
|
50
|
+
dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
|
51
|
+
dcicutils/portal_utils.py,sha256=TDGmJqxqWfuqdJZ-ARqbOxPXNuzlqNxPD49jMEY4VQA,45217
|
51
52
|
dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
|
52
53
|
dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
|
53
54
|
dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
|
@@ -55,25 +56,25 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
|
|
55
56
|
dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
|
56
57
|
dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
|
57
58
|
dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
|
58
|
-
dcicutils/schema_utils.py,sha256=
|
59
|
+
dcicutils/schema_utils.py,sha256=IIteRrg-iOJOFU17n2lvKByVdWdiMfuAQ1kf_QIM96Q,10604
|
59
60
|
dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
|
60
61
|
dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
|
61
|
-
dcicutils/scripts/view_portal_object.py,sha256=
|
62
|
+
dcicutils/scripts/view_portal_object.py,sha256=HZzM44BDcGycO9XTOTZyP-F7PRMZaZrnFfiqiT7Qvqg,29777
|
62
63
|
dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
|
63
64
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
64
65
|
dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
|
65
66
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
66
|
-
dcicutils/structured_data.py,sha256=
|
67
|
+
dcicutils/structured_data.py,sha256=GfG96VyFwQIkmUax-ZdEzbWnfEiJxmeZEaUWz78IQZY,64030
|
67
68
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
68
|
-
dcicutils/submitr/ref_lookup_strategy.py,sha256=
|
69
|
+
dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
|
69
70
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
70
|
-
dcicutils/tmpfile_utils.py,sha256=
|
71
|
+
dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
|
71
72
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
72
73
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
73
74
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
74
|
-
dcicutils/zip_utils.py,sha256=
|
75
|
-
dcicutils-8.9.0.
|
76
|
-
dcicutils-8.9.0.
|
77
|
-
dcicutils-8.9.0.
|
78
|
-
dcicutils-8.9.0.
|
79
|
-
dcicutils-8.9.0.
|
75
|
+
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
76
|
+
dcicutils-8.9.0.1b2.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
77
|
+
dcicutils-8.9.0.1b2.dist-info/METADATA,sha256=Mh-FpaQpv_ipkozMQ_Ul_vezgpxdzX4lvp38jaDD8rc,3439
|
78
|
+
dcicutils-8.9.0.1b2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
79
|
+
dcicutils-8.9.0.1b2.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
80
|
+
dcicutils-8.9.0.1b2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|