dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/command_utils.py +69 -1
- dcicutils/creds_utils.py +1 -1
- dcicutils/ff_utils.py +4 -1
- dcicutils/file_utils.py +250 -41
- dcicutils/http_utils.py +39 -0
- dcicutils/misc_utils.py +82 -5
- dcicutils/portal_object_utils.py +24 -89
- dcicutils/portal_utils.py +249 -37
- dcicutils/schema_utils.py +1 -1
- dcicutils/scripts/view_portal_object.py +87 -5
- dcicutils/structured_data.py +59 -17
- dcicutils/submitr/ref_lookup_strategy.py +31 -25
- dcicutils/tmpfile_utils.py +50 -10
- dcicutils/zip_utils.py +27 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/METADATA +6 -4
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/RECORD +19 -18
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/WHEEL +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/entry_points.txt +0 -0
@@ -57,6 +57,7 @@
|
|
57
57
|
|
58
58
|
import argparse
|
59
59
|
from functools import lru_cache
|
60
|
+
import io
|
60
61
|
import json
|
61
62
|
import pyperclip
|
62
63
|
import os
|
@@ -97,11 +98,18 @@ def main():
|
|
97
98
|
help="Include all properties for schema usage.")
|
98
99
|
parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.")
|
99
100
|
parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.")
|
101
|
+
parser.add_argument("--post", type=str, required=False, default=None,
|
102
|
+
help="POST data of the main arg type with data from file specified with this option.")
|
103
|
+
parser.add_argument("--patch", type=str, required=False, default=None,
|
104
|
+
help="PATCH data of the main arg type with data from file specified with this option.")
|
100
105
|
parser.add_argument("--database", action="store_true", required=False, default=False,
|
101
106
|
help="Read from database output.")
|
107
|
+
parser.add_argument("--bool", action="store_true", required=False,
|
108
|
+
default=False, help="Only return whether found or not.")
|
102
109
|
parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.")
|
103
110
|
parser.add_argument("--copy", "-c", action="store_true", required=False, default=False,
|
104
111
|
help="Copy object data to clipboard.")
|
112
|
+
parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int)
|
105
113
|
parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.")
|
106
114
|
parser.add_argument("--more-details", action="store_true", required=False, default=False,
|
107
115
|
help="More detailed output.")
|
@@ -151,6 +159,18 @@ def main():
|
|
151
159
|
args.schema = True
|
152
160
|
|
153
161
|
if args.schema:
|
162
|
+
if args.post:
|
163
|
+
if post_data := _read_json_from_file(args.post):
|
164
|
+
if args.verbose:
|
165
|
+
_print(f"POSTing data from file ({args.post}) as type: {args.uuid}")
|
166
|
+
if isinstance(post_data, dict):
|
167
|
+
post_data = [post_data]
|
168
|
+
elif not isinstance(post_data, list):
|
169
|
+
_print(f"POST data neither list nor dictionary: {args.post}")
|
170
|
+
for item in post_data:
|
171
|
+
portal.post_metadata(args.uuid, item)
|
172
|
+
if args.verbose:
|
173
|
+
_print(f"Done POSTing data from file ({args.post}) as type: {args.uuid}")
|
154
174
|
schema, schema_name = _get_schema(portal, args.uuid)
|
155
175
|
if schema:
|
156
176
|
if args.copy:
|
@@ -166,14 +186,50 @@ def main():
|
|
166
186
|
_print_schema(schema, details=args.details, more_details=args.details,
|
167
187
|
all=args.all, raw=args.raw, raw_yaml=args.yaml)
|
168
188
|
return
|
169
|
-
|
170
|
-
|
189
|
+
elif args.patch:
|
190
|
+
if patch_data := _read_json_from_file(args.patch):
|
191
|
+
if args.verbose:
|
192
|
+
_print(f"PATCHing data from file ({args.patch}) for object: {args.uuid}")
|
193
|
+
if isinstance(patch_data, dict):
|
194
|
+
patch_data = [patch_data]
|
195
|
+
elif not isinstance(patch_data, list):
|
196
|
+
_print(f"PATCH data neither list nor dictionary: {args.patch}")
|
197
|
+
for item in patch_data:
|
198
|
+
portal.patch_metadata(args.uuid, item)
|
199
|
+
if args.verbose:
|
200
|
+
_print(f"Done PATCHing data from file ({args.patch}) as type: {args.uuid}")
|
201
|
+
return
|
202
|
+
else:
|
203
|
+
_print(f"No PATCH data found in file: {args.patch}")
|
204
|
+
exit(1)
|
205
|
+
|
206
|
+
data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw,
|
207
|
+
database=args.database, check=args.bool, verbose=args.verbose)
|
208
|
+
if args.bool:
|
209
|
+
if data:
|
210
|
+
_print(f"{args.uuid}: found")
|
211
|
+
exit(0)
|
212
|
+
else:
|
213
|
+
_print(f"{args.uuid}: not found")
|
214
|
+
exit(1)
|
171
215
|
if args.copy:
|
172
216
|
pyperclip.copy(json.dumps(data, indent=4))
|
173
217
|
if args.yaml:
|
174
218
|
_print(yaml.dump(data))
|
175
219
|
else:
|
176
|
-
|
220
|
+
if args.indent > 0:
|
221
|
+
_print(_format_json_with_indent(data, indent=args.indent))
|
222
|
+
else:
|
223
|
+
_print(json.dumps(data, default=str, indent=4))
|
224
|
+
|
225
|
+
|
226
|
+
def _format_json_with_indent(value: dict, indent: int = 0) -> Optional[str]:
|
227
|
+
if isinstance(value, dict):
|
228
|
+
result = json.dumps(value, indent=4)
|
229
|
+
if indent > 0:
|
230
|
+
result = f"{indent * ' '}{result}"
|
231
|
+
result = result.replace("\n", f"\n{indent * ' '}")
|
232
|
+
return result
|
177
233
|
|
178
234
|
|
179
235
|
def _create_portal(ini: str, env: Optional[str] = None,
|
@@ -198,7 +254,8 @@ def _create_portal(ini: str, env: Optional[str] = None,
|
|
198
254
|
|
199
255
|
|
200
256
|
def _get_portal_object(portal: Portal, uuid: str,
|
201
|
-
raw: bool = False, database: bool = False,
|
257
|
+
raw: bool = False, database: bool = False,
|
258
|
+
check: bool = False, verbose: bool = False) -> dict:
|
202
259
|
response = None
|
203
260
|
try:
|
204
261
|
if not uuid.startswith("/"):
|
@@ -212,13 +269,18 @@ def _get_portal_object(portal: Portal, uuid: str,
|
|
212
269
|
_exit()
|
213
270
|
_exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}")
|
214
271
|
if not response:
|
272
|
+
if check:
|
273
|
+
return None
|
215
274
|
_exit(f"Null response getting Portal object from {portal.server}: {uuid}")
|
216
275
|
if response.status_code not in [200, 307]:
|
217
276
|
# TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above.
|
218
277
|
_exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}")
|
219
278
|
if not response.json:
|
220
279
|
_exit(f"Invalid JSON getting Portal object: {uuid}")
|
221
|
-
|
280
|
+
response = response.json()
|
281
|
+
if raw:
|
282
|
+
response.pop("schema_version", None)
|
283
|
+
return response
|
222
284
|
|
223
285
|
|
224
286
|
@lru_cache(maxsize=1)
|
@@ -257,6 +319,7 @@ def _print_schema_info(schema: dict, level: int = 0,
|
|
257
319
|
required: Optional[List[str]] = None) -> None:
|
258
320
|
if not schema or not isinstance(schema, dict):
|
259
321
|
return
|
322
|
+
identifying_properties = schema.get("identifyingProperties")
|
260
323
|
if level == 0:
|
261
324
|
if required_properties := schema.get("required"):
|
262
325
|
_print("- required properties:")
|
@@ -383,6 +446,8 @@ def _print_schema_info(schema: dict, level: int = 0,
|
|
383
446
|
suffix += f" | enum"
|
384
447
|
if property_required:
|
385
448
|
suffix += f" | required"
|
449
|
+
if property_name in (identifying_properties or []):
|
450
|
+
suffix += f" | identifying"
|
386
451
|
if property.get("uniqueKey"):
|
387
452
|
suffix += f" | unique"
|
388
453
|
if pattern := property.get("pattern"):
|
@@ -529,6 +594,23 @@ def _print_tree(root_name: Optional[str],
|
|
529
594
|
print(line)
|
530
595
|
|
531
596
|
|
597
|
+
def _read_json_from_file(file: str) -> Optional[dict]:
|
598
|
+
if not os.path.exists(file):
|
599
|
+
_print(f"Cannot find file: {file}")
|
600
|
+
exit(1)
|
601
|
+
try:
|
602
|
+
with io.open(file, "r") as f:
|
603
|
+
try:
|
604
|
+
return json.load(f)
|
605
|
+
except Exception:
|
606
|
+
_print(f"Cannot parse JSON in file: {file}")
|
607
|
+
exit(1)
|
608
|
+
except Exception as e:
|
609
|
+
print(e)
|
610
|
+
_print(f"Cannot open file: {file}")
|
611
|
+
exit(1)
|
612
|
+
|
613
|
+
|
532
614
|
def _print(*args, **kwargs):
|
533
615
|
with uncaptured_output():
|
534
616
|
PRINT(*args, **kwargs)
|
dcicutils/structured_data.py
CHANGED
@@ -11,7 +11,6 @@ from webtest.app import TestApp
|
|
11
11
|
from dcicutils.common import OrchestratedApp
|
12
12
|
from dcicutils.data_readers import CsvReader, Excel, RowReader
|
13
13
|
from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
|
14
|
-
from dcicutils.file_utils import search_for_file
|
15
14
|
from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
|
16
15
|
merge_objects, remove_empty_properties, right_trim, split_string,
|
17
16
|
to_boolean, to_enum, to_float, to_integer, VirtualApp)
|
@@ -53,9 +52,10 @@ class StructuredDataSet:
|
|
53
52
|
def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
|
54
53
|
schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
|
55
54
|
order: Optional[List[str]] = None, prune: bool = True,
|
55
|
+
remove_empty_objects_from_lists: bool = True,
|
56
56
|
ref_lookup_strategy: Optional[Callable] = None,
|
57
57
|
ref_lookup_nocache: bool = False,
|
58
|
-
norefs: bool = False,
|
58
|
+
norefs: bool = False, merge: bool = False,
|
59
59
|
progress: Optional[Callable] = None,
|
60
60
|
debug_sleep: Optional[str] = None) -> None:
|
61
61
|
self._progress = progress if callable(progress) else None
|
@@ -65,7 +65,8 @@ class StructuredDataSet:
|
|
65
65
|
ref_lookup_nocache=ref_lookup_nocache) if portal else None
|
66
66
|
self._ref_lookup_strategy = ref_lookup_strategy
|
67
67
|
self._order = order
|
68
|
-
self._prune = prune
|
68
|
+
self._prune = prune is True
|
69
|
+
self._remove_empty_objects_from_lists = remove_empty_objects_from_lists is True
|
69
70
|
self._warnings = {}
|
70
71
|
self._errors = {}
|
71
72
|
self._resolved_refs = set()
|
@@ -73,6 +74,7 @@ class StructuredDataSet:
|
|
73
74
|
self._nrows = 0
|
74
75
|
self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
|
75
76
|
self._norefs = True if norefs is True else False
|
77
|
+
self._merge = True if merge is True else False
|
76
78
|
self._debug_sleep = None
|
77
79
|
if debug_sleep:
|
78
80
|
try:
|
@@ -93,14 +95,16 @@ class StructuredDataSet:
|
|
93
95
|
def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
|
94
96
|
schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
|
95
97
|
order: Optional[List[str]] = None, prune: bool = True,
|
98
|
+
remove_empty_objects_from_lists: bool = True,
|
96
99
|
ref_lookup_strategy: Optional[Callable] = None,
|
97
100
|
ref_lookup_nocache: bool = False,
|
98
|
-
norefs: bool = False,
|
101
|
+
norefs: bool = False, merge: bool = False,
|
99
102
|
progress: Optional[Callable] = None,
|
100
103
|
debug_sleep: Optional[str] = None) -> StructuredDataSet:
|
101
104
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
|
105
|
+
remove_empty_objects_from_lists=remove_empty_objects_from_lists,
|
102
106
|
ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
|
103
|
-
norefs=norefs, progress=progress, debug_sleep=debug_sleep)
|
107
|
+
norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
|
104
108
|
|
105
109
|
def validate(self, force: bool = False) -> None:
|
106
110
|
def data_without_deleted_properties(data: dict) -> dict:
|
@@ -204,14 +208,6 @@ class StructuredDataSet:
|
|
204
208
|
result.append({"type": type_name, "file": file_name})
|
205
209
|
return result
|
206
210
|
|
207
|
-
def upload_files_located(self,
|
208
|
-
location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]:
|
209
|
-
upload_files = copy.deepcopy(self.upload_files)
|
210
|
-
for upload_file in upload_files:
|
211
|
-
if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True):
|
212
|
-
upload_file["path"] = file_path
|
213
|
-
return upload_files
|
214
|
-
|
215
211
|
@property
|
216
212
|
def nrows(self) -> int:
|
217
213
|
return self._nrows
|
@@ -346,7 +342,23 @@ class StructuredDataSet:
|
|
346
342
|
|
347
343
|
def _load_json_file(self, file: str) -> None:
|
348
344
|
with open(file) as f:
|
349
|
-
|
345
|
+
data = json.load(f)
|
346
|
+
if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
|
347
|
+
(self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
|
348
|
+
# If the JSON file name looks like a schema name then assume it
|
349
|
+
# contains an object or an array of object of that schema type.
|
350
|
+
if self._merge:
|
351
|
+
data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name)
|
352
|
+
self._add(Schema.type_name(file), data)
|
353
|
+
elif isinstance(data, dict):
|
354
|
+
# Otherwise if the JSON file name does not look like a schema name then
|
355
|
+
# assume it a dictionary where each property is the name of a schema, and
|
356
|
+
# which (each property) contains a list of object of that schema type.
|
357
|
+
for schema_name in data:
|
358
|
+
item = data[schema_name]
|
359
|
+
if self._merge:
|
360
|
+
item = self._merge_with_existing_portal_object(item, schema_name)
|
361
|
+
self._add(schema_name, item)
|
350
362
|
|
351
363
|
def _load_reader(self, reader: RowReader, type_name: str) -> None:
|
352
364
|
schema = None
|
@@ -368,7 +380,14 @@ class StructuredDataSet:
|
|
368
380
|
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
369
381
|
if self._autoadd_properties:
|
370
382
|
self._add_properties(structured_row, self._autoadd_properties, schema)
|
371
|
-
|
383
|
+
# New merge functionality (2024-05-25).
|
384
|
+
if self._merge:
|
385
|
+
structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
|
386
|
+
if (prune_error := self._prune_structured_row(structured_row)) is not None:
|
387
|
+
self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
|
388
|
+
"error": prune_error}, "validation")
|
389
|
+
else:
|
390
|
+
self._add(type_name, structured_row) # TODO: why type_name and not schema_name?
|
372
391
|
if self._progress:
|
373
392
|
self._progress({
|
374
393
|
PROGRESS.LOAD_ITEM: self._nrows,
|
@@ -385,9 +404,20 @@ class StructuredDataSet:
|
|
385
404
|
self._note_error(schema._unresolved_refs, "ref")
|
386
405
|
self._resolved_refs.update(schema._resolved_refs)
|
387
406
|
|
388
|
-
def
|
389
|
-
if self._prune:
|
407
|
+
def _prune_structured_row(self, data: dict) -> Optional[str]:
|
408
|
+
if not self._prune:
|
409
|
+
return None
|
410
|
+
if not self._remove_empty_objects_from_lists:
|
390
411
|
remove_empty_properties(data)
|
412
|
+
return None
|
413
|
+
try:
|
414
|
+
remove_empty_properties(data, isempty_array_element=lambda element: element == {},
|
415
|
+
raise_exception_on_nonempty_array_element_after_empty=True)
|
416
|
+
except Exception as e:
|
417
|
+
return str(e)
|
418
|
+
return None
|
419
|
+
|
420
|
+
def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
|
391
421
|
if type_name in self._data:
|
392
422
|
self._data[type_name].extend([data] if isinstance(data, dict) else data)
|
393
423
|
else:
|
@@ -398,6 +428,18 @@ class StructuredDataSet:
|
|
398
428
|
if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
|
399
429
|
structured_row[name] = properties[name]
|
400
430
|
|
431
|
+
def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
|
432
|
+
"""
|
433
|
+
Given a Portal object (presumably/in-practice from the given metadata), if there is
|
434
|
+
an existing Portal item, identified by the identifying properties for the given object,
|
435
|
+
then merges the given object into the existing one and returns the result; otherwise
|
436
|
+
just returns the given object. Note that the given object may be CHANGED in place.
|
437
|
+
"""
|
438
|
+
for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
|
439
|
+
if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
|
440
|
+
return merge_objects(existing_portal_object, portal_object)
|
441
|
+
return portal_object
|
442
|
+
|
401
443
|
def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
|
402
444
|
return (ref_lookup_flags &
|
403
445
|
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
@@ -2,39 +2,45 @@ import re
|
|
2
2
|
from typing import Optional
|
3
3
|
from dcicutils.structured_data import Portal
|
4
4
|
|
5
|
+
# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
|
6
|
+
# before it was fully developed, we had differing behaviors; but this has been unified; so this
|
7
|
+
# could now be internalized to structured_data, and portal_object_utils (TODO).
|
8
|
+
|
5
9
|
|
6
10
|
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
7
11
|
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# /
|
13
|
-
# /
|
14
|
-
#
|
15
|
-
# /
|
16
|
-
#
|
17
|
-
# /
|
18
|
-
# /
|
12
|
+
# Note this slight odd situation WRT object lookups by submitted_id and accession:
|
13
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
14
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
15
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
16
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
17
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
18
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
19
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
20
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
21
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
22
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
23
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
24
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
25
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
19
26
|
#
|
20
27
|
def ref_validator(schema: Optional[dict],
|
21
28
|
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
22
29
|
"""
|
23
|
-
Returns False iff
|
24
|
-
the given property name
|
30
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
31
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
25
32
|
|
26
|
-
For example, if the schema is for
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
34
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
35
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
36
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
37
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
38
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
39
|
|
34
|
-
The goal (in structured_data) being to detect if a type is being referenced in such
|
35
|
-
|
36
|
-
|
37
|
-
|
40
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
41
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
42
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
43
|
+
identifying property for the given type.
|
38
44
|
"""
|
39
45
|
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
40
46
|
if (property_format == "accession") and (property_name == "accession"):
|
@@ -62,6 +68,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
|
|
62
68
|
|
63
69
|
|
64
70
|
# This is here for now because of problems with circular dependencies.
|
65
|
-
# See: smaht-portal/.../schema_formats.py
|
71
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
66
72
|
def _is_accession_id(value: str) -> bool:
|
67
73
|
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
dcicutils/tmpfile_utils.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
from contextlib import contextmanager
|
2
|
+
from datetime import datetime
|
2
3
|
import os
|
3
4
|
import shutil
|
4
5
|
import tempfile
|
6
|
+
from uuid import uuid4 as uuid
|
5
7
|
from typing import List, Optional, Union
|
8
|
+
from dcicutils.file_utils import create_random_file
|
6
9
|
|
7
10
|
|
8
11
|
@contextmanager
|
@@ -15,22 +18,59 @@ def temporary_directory() -> str:
|
|
15
18
|
|
16
19
|
|
17
20
|
@contextmanager
|
18
|
-
def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
|
21
|
+
def temporary_file(name: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
19
22
|
content: Optional[Union[str, bytes, List[str]]] = None) -> str:
|
20
23
|
with temporary_directory() as tmp_directory_name:
|
21
|
-
tmp_file_name =
|
22
|
-
|
24
|
+
tmp_file_name = f"{prefix or ''}{name or tempfile.mktemp(dir='')}{suffix or ''}"
|
25
|
+
tmp_file_path = os.path.join(tmp_directory_name, tmp_file_name)
|
26
|
+
with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
|
23
27
|
if content is not None:
|
24
28
|
tmp_file.write("\n".join(content) if isinstance(content, list) else content)
|
25
|
-
yield
|
29
|
+
yield tmp_file_path
|
30
|
+
|
31
|
+
|
32
|
+
def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
|
33
|
+
"""
|
34
|
+
Generates and returns the full path to file within the system temporary directory.
|
35
|
+
"""
|
36
|
+
random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
37
|
+
tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
|
38
|
+
return os.path.join(tempfile.gettempdir(), tmp_file_name)
|
39
|
+
|
40
|
+
|
41
|
+
@contextmanager
|
42
|
+
def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
|
43
|
+
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
44
|
+
with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
|
45
|
+
create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
|
46
|
+
yield tmp_file_path
|
26
47
|
|
27
48
|
|
28
49
|
def remove_temporary_directory(tmp_directory_name: str) -> None:
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
|
33
|
-
except Exception:
|
34
|
-
return False
|
50
|
+
"""
|
51
|
+
Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
|
52
|
+
"""
|
35
53
|
if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
|
36
54
|
shutil.rmtree(tmp_directory_name)
|
55
|
+
|
56
|
+
|
57
|
+
def remove_temporary_file(tmp_file_name: str) -> bool:
|
58
|
+
"""
|
59
|
+
Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
|
60
|
+
"""
|
61
|
+
try:
|
62
|
+
tmpdir = tempfile.gettempdir()
|
63
|
+
if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
|
64
|
+
os.remove(tmp_file_name)
|
65
|
+
return True
|
66
|
+
return False
|
67
|
+
except Exception:
|
68
|
+
return False
|
69
|
+
|
70
|
+
|
71
|
+
def is_temporary_directory(path: str) -> bool:
|
72
|
+
try:
|
73
|
+
tmpdir = tempfile.gettempdir()
|
74
|
+
return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
|
75
|
+
except Exception:
|
76
|
+
return False
|
dcicutils/zip_utils.py
CHANGED
@@ -2,7 +2,9 @@ from contextlib import contextmanager
|
|
2
2
|
from dcicutils.tmpfile_utils import temporary_directory, temporary_file
|
3
3
|
import gzip
|
4
4
|
import os
|
5
|
+
import shutil
|
5
6
|
import tarfile
|
7
|
+
import tempfile
|
6
8
|
from typing import List, Optional
|
7
9
|
import zipfile
|
8
10
|
|
@@ -45,3 +47,28 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
|
|
45
47
|
outputf.write(inputf.read())
|
46
48
|
outputf.close()
|
47
49
|
yield tmp_file_name
|
50
|
+
|
51
|
+
|
52
|
+
def extract_file_from_zip(zip_file: str, file_to_extract: str,
|
53
|
+
destination_file: str, raise_exception: bool = True) -> bool:
|
54
|
+
"""
|
55
|
+
Extracts from the given zip file, the given file to extract, writing it to the
|
56
|
+
given destination file. Returns True if all is well, otherwise False, or if the
|
57
|
+
raise_exception argument is True (the default), then raises and exception on error.
|
58
|
+
"""
|
59
|
+
try:
|
60
|
+
if not (destination_directory := os.path.dirname(destination_file)):
|
61
|
+
destination_directory = os.getcwd()
|
62
|
+
destination_file = os.path.join(destination_directory, destination_file)
|
63
|
+
with tempfile.TemporaryDirectory() as tmp_directory_name:
|
64
|
+
with zipfile.ZipFile(zip_file, "r") as zipf:
|
65
|
+
if file_to_extract not in zipf.namelist():
|
66
|
+
return False
|
67
|
+
zipf.extract(file_to_extract, path=tmp_directory_name)
|
68
|
+
os.makedirs(destination_directory, exist_ok=True)
|
69
|
+
shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
|
70
|
+
return True
|
71
|
+
except Exception as e:
|
72
|
+
if raise_exception:
|
73
|
+
raise e
|
74
|
+
return False
|
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dcicutils
|
3
|
-
Version: 8.9.0.
|
3
|
+
Version: 8.9.0.1b2
|
4
4
|
Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
|
5
5
|
Home-page: https://github.com/4dn-dcic/utils
|
6
6
|
License: MIT
|
7
7
|
Author: 4DN-DCIC Team
|
8
8
|
Author-email: support@4dnucleome.org
|
9
|
-
Requires-Python: >=3.8,<3.
|
9
|
+
Requires-Python: >=3.8,<3.13
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: Intended Audience :: Science/Research
|
@@ -24,9 +24,10 @@ Classifier: Programming Language :: Python :: 3.9
|
|
24
24
|
Classifier: Topic :: Database :: Database Engines/Servers
|
25
25
|
Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
|
26
26
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
27
|
+
Requires-Dist: appdirs (>=1.4.4,<2.0.0)
|
27
28
|
Requires-Dist: aws-requests-auth (>=0.4.2,<1)
|
28
|
-
Requires-Dist: boto3 (>=1.
|
29
|
-
Requires-Dist: botocore (>=1.
|
29
|
+
Requires-Dist: boto3 (>=1.34.93,<2.0.0)
|
30
|
+
Requires-Dist: botocore (>=1.34.93,<2.0.0)
|
30
31
|
Requires-Dist: chardet (>=5.2.0,<6.0.0)
|
31
32
|
Requires-Dist: docker (>=4.4.4,<5.0.0)
|
32
33
|
Requires-Dist: elasticsearch (==7.13.4)
|
@@ -42,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
|
|
42
43
|
Requires-Dist: redis (>=4.5.1,<5.0.0)
|
43
44
|
Requires-Dist: requests (>=2.21.0,<3.0.0)
|
44
45
|
Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
|
46
|
+
Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
|
45
47
|
Requires-Dist: structlog (>=19.2.0,<20.0.0)
|
46
48
|
Requires-Dist: toml (>=0.10.1,<1)
|
47
49
|
Requires-Dist: tqdm (>=4.66.2,<5.0.0)
|
@@ -5,11 +5,11 @@ dcicutils/bundle_utils.py,sha256=ZVQcqlt7Yly8-YbL3A-5DW859_hMWpTL6dXtknEYZIw,346
|
|
5
5
|
dcicutils/captured_output.py,sha256=0hP7sPwleMaYXQAvCfJOxG8Z8T_JJYy8ADp8A5ZoblE,3295
|
6
6
|
dcicutils/cloudformation_utils.py,sha256=MtWJrSTXyiImgbPHgRvfH9bWso20ZPLTFJAfhDQSVj4,13786
|
7
7
|
dcicutils/codebuild_utils.py,sha256=CKpmhJ-Z8gYbkt1I2zyMlKtFdsg7T8lqrx3V5ieta-U,1155
|
8
|
-
dcicutils/command_utils.py,sha256=
|
8
|
+
dcicutils/command_utils.py,sha256=1_h18LGX86sLAkRkH33HNmBkwMb7v2wAh3jL01hzceU,18487
|
9
9
|
dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
|
10
10
|
dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
|
11
11
|
dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
|
12
|
-
dcicutils/creds_utils.py,sha256=
|
12
|
+
dcicutils/creds_utils.py,sha256=64BbIfS90T1eJmmQJrDyfrRa3V2F1x7T8lOrEeFfqJY,11127
|
13
13
|
dcicutils/data_readers.py,sha256=6EMrY7TjDE8H7bA_TCWtpLQP7slJ0YTL77_dNh6e7sg,7626
|
14
14
|
dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
|
15
15
|
dcicutils/datetime_utils.py,sha256=sM653aw_1zy1qBmfAH-WetCi2Fw9cnFK7FZN_Tg4onI,13499
|
@@ -27,10 +27,11 @@ dcicutils/env_utils_legacy.py,sha256=J81OAtJHN69o1beHO6q1j7_J6TeblSjnAHlS8VA5KSM
|
|
27
27
|
dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
|
28
28
|
dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
|
29
29
|
dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
|
30
|
-
dcicutils/ff_utils.py,sha256=
|
31
|
-
dcicutils/file_utils.py,sha256=
|
30
|
+
dcicutils/ff_utils.py,sha256=oIhuZPnGtfwj6bWyCc1u23JbMB_6InPp01ZqUOljd8M,73123
|
31
|
+
dcicutils/file_utils.py,sha256=zyNdRl1Fu3SrQwjJWaIMvQpi4DRaodNZCX7oTkiPJ-A,10916
|
32
32
|
dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
|
33
33
|
dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
|
34
|
+
dcicutils/http_utils.py,sha256=tNfH5JA-OwbQKEvD5HPJ3lcp2TSIZ4rnl__4d4JO8Gw,1583
|
34
35
|
dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
|
35
36
|
dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
|
36
37
|
dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
|
@@ -43,11 +44,11 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
|
|
43
44
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
44
45
|
dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
|
45
46
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
46
|
-
dcicutils/misc_utils.py,sha256=
|
47
|
+
dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
|
47
48
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
48
49
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
49
|
-
dcicutils/portal_object_utils.py,sha256=
|
50
|
-
dcicutils/portal_utils.py,sha256=
|
50
|
+
dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
|
51
|
+
dcicutils/portal_utils.py,sha256=TDGmJqxqWfuqdJZ-ARqbOxPXNuzlqNxPD49jMEY4VQA,45217
|
51
52
|
dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
|
52
53
|
dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
|
53
54
|
dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
|
@@ -55,25 +56,25 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
|
|
55
56
|
dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
|
56
57
|
dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
|
57
58
|
dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
|
58
|
-
dcicutils/schema_utils.py,sha256=
|
59
|
+
dcicutils/schema_utils.py,sha256=IIteRrg-iOJOFU17n2lvKByVdWdiMfuAQ1kf_QIM96Q,10604
|
59
60
|
dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
|
60
61
|
dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
|
61
|
-
dcicutils/scripts/view_portal_object.py,sha256=
|
62
|
+
dcicutils/scripts/view_portal_object.py,sha256=HZzM44BDcGycO9XTOTZyP-F7PRMZaZrnFfiqiT7Qvqg,29777
|
62
63
|
dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
|
63
64
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
64
65
|
dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
|
65
66
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
66
|
-
dcicutils/structured_data.py,sha256=
|
67
|
+
dcicutils/structured_data.py,sha256=GfG96VyFwQIkmUax-ZdEzbWnfEiJxmeZEaUWz78IQZY,64030
|
67
68
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
68
|
-
dcicutils/submitr/ref_lookup_strategy.py,sha256=
|
69
|
+
dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
|
69
70
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
70
|
-
dcicutils/tmpfile_utils.py,sha256=
|
71
|
+
dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
|
71
72
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
72
73
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
73
74
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
74
|
-
dcicutils/zip_utils.py,sha256=
|
75
|
-
dcicutils-8.9.0.
|
76
|
-
dcicutils-8.9.0.
|
77
|
-
dcicutils-8.9.0.
|
78
|
-
dcicutils-8.9.0.
|
79
|
-
dcicutils-8.9.0.
|
75
|
+
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
76
|
+
dcicutils-8.9.0.1b2.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
77
|
+
dcicutils-8.9.0.1b2.dist-info/METADATA,sha256=Mh-FpaQpv_ipkozMQ_Ul_vezgpxdzX4lvp38jaDD8rc,3439
|
78
|
+
dcicutils-8.9.0.1b2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
79
|
+
dcicutils-8.9.0.1b2.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
80
|
+
dcicutils-8.9.0.1b2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|