dcicutils 8.14.0.1b9__py3-none-any.whl → 8.14.0.1b10__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/scripts/update_portal_object.py +147 -40
- {dcicutils-8.14.0.1b9.dist-info → dcicutils-8.14.0.1b10.dist-info}/METADATA +1 -1
- {dcicutils-8.14.0.1b9.dist-info → dcicutils-8.14.0.1b10.dist-info}/RECORD +6 -6
- {dcicutils-8.14.0.1b9.dist-info → dcicutils-8.14.0.1b10.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.14.0.1b9.dist-info → dcicutils-8.14.0.1b10.dist-info}/WHEEL +0 -0
- {dcicutils-8.14.0.1b9.dist-info → dcicutils-8.14.0.1b10.dist-info}/entry_points.txt +0 -0
@@ -14,13 +14,15 @@ import io
|
|
14
14
|
import json
|
15
15
|
import os
|
16
16
|
import re
|
17
|
+
import shutil
|
17
18
|
import sys
|
18
19
|
from typing import Callable, List, Optional, Tuple, Union
|
19
20
|
from dcicutils.command_utils import yes_or_no
|
20
21
|
from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT
|
21
22
|
from dcicutils.ff_utils import delete_metadata, purge_metadata
|
22
|
-
from dcicutils.misc_utils import get_error_message, ignored, PRINT
|
23
|
+
from dcicutils.misc_utils import get_error_message, ignored, PRINT, to_camel_case, to_snake_case
|
23
24
|
from dcicutils.portal_utils import Portal as PortalFromUtils
|
25
|
+
from dcicutils.tmpfile_utils import temporary_directory
|
24
26
|
|
25
27
|
|
26
28
|
class Portal(PortalFromUtils):
|
@@ -131,6 +133,8 @@ def main():
|
|
131
133
|
parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.")
|
132
134
|
parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.")
|
133
135
|
parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.")
|
136
|
+
parser.add_argument("--noprogress", action="store_true", required=False, default=False,
|
137
|
+
help="No progress bar output for --load.")
|
134
138
|
parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.")
|
135
139
|
args = parser.parse_args()
|
136
140
|
|
@@ -158,27 +162,8 @@ def main():
|
|
158
162
|
_print("The --env is not used for the --load option (to load data via snovault.loadxl).")
|
159
163
|
if args.schema:
|
160
164
|
_print("The --schema is not used for the --load option (to load data via snovault.loadxl).")
|
161
|
-
|
162
|
-
|
163
|
-
if args.ini:
|
164
|
-
ini_file = args.ini
|
165
|
-
else:
|
166
|
-
ini_file = _DEFAULT_INI_FILE_FOR_LOAD
|
167
|
-
if not os.path.exists(ini_file):
|
168
|
-
_print(f"The INI file required for --load is not found: {ini_file}")
|
169
|
-
exit(1)
|
170
|
-
if not os.path.isdir(args.load):
|
171
|
-
_print(f"Load directory does not exist: {args.load}")
|
172
|
-
exit(1)
|
173
|
-
portal = None
|
174
|
-
with captured_output(not args.debug):
|
175
|
-
portal = Portal(ini_file)
|
176
|
-
if args.verbose:
|
177
|
-
_print(f"Loading data files into Portal (via snovault.loadxl) from: {args.load}")
|
178
|
-
_print(f"Portal INI file for load is: {ini_file}")
|
179
|
-
load_data(portal.vapp, indir=args.load, overwrite=True, use_master_inserts=False)
|
180
|
-
if args.verbose:
|
181
|
-
_print(f"Done loading data into Portal (via snovault.loadxl) files from: {args.load}")
|
165
|
+
_load_data(inserts_directory=args.load, ini_file=args.ini,
|
166
|
+
verbose=args.verbose, debug=args.debug, noprogress=args.noprogress)
|
182
167
|
exit(0)
|
183
168
|
|
184
169
|
portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug)
|
@@ -192,7 +177,7 @@ def main():
|
|
192
177
|
_post_or_patch_or_upsert(portal=portal,
|
193
178
|
file_or_directory=args.post,
|
194
179
|
explicit_schema_name=explicit_schema_name,
|
195
|
-
update_function=
|
180
|
+
update_function=_post_data,
|
196
181
|
update_action_name="POST",
|
197
182
|
noignore=args.noignore, ignore=args.ignore,
|
198
183
|
confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug)
|
@@ -200,7 +185,7 @@ def main():
|
|
200
185
|
_post_or_patch_or_upsert(portal=portal,
|
201
186
|
file_or_directory=args.patch,
|
202
187
|
explicit_schema_name=explicit_schema_name,
|
203
|
-
update_function=
|
188
|
+
update_function=_patch_data,
|
204
189
|
update_action_name="PATCH",
|
205
190
|
patch_delete_fields=args.delete,
|
206
191
|
noignore=args.noignore, ignore=args.ignore,
|
@@ -210,7 +195,7 @@ def main():
|
|
210
195
|
_post_or_patch_or_upsert(portal=portal,
|
211
196
|
file_or_directory=args.upsert,
|
212
197
|
explicit_schema_name=explicit_schema_name,
|
213
|
-
update_function=
|
198
|
+
update_function=_upsert_data,
|
214
199
|
update_action_name="UPSERT",
|
215
200
|
patch_delete_fields=args.delete,
|
216
201
|
noignore=args.noignore, ignore=args.ignore,
|
@@ -329,11 +314,11 @@ def _impose_special_ordering(data: List[dict], schema_name: str) -> List[dict]:
|
|
329
314
|
return data
|
330
315
|
|
331
316
|
|
332
|
-
def
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
317
|
+
def _post_data(portal: Portal, data: dict, schema_name: str,
|
318
|
+
file: Optional[str] = None, index: int = 0,
|
319
|
+
patch_delete_fields: Optional[str] = None,
|
320
|
+
noignore: bool = False, ignore: Optional[List[str]] = None,
|
321
|
+
confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
|
337
322
|
ignored(patch_delete_fields)
|
338
323
|
if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
|
339
324
|
if isinstance(file, str) and isinstance(index, int):
|
@@ -359,11 +344,11 @@ def post_data(portal: Portal, data: dict, schema_name: str,
|
|
359
344
|
return
|
360
345
|
|
361
346
|
|
362
|
-
def
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
347
|
+
def _patch_data(portal: Portal, data: dict, schema_name: str,
|
348
|
+
file: Optional[str] = None, index: int = 0,
|
349
|
+
patch_delete_fields: Optional[str] = None,
|
350
|
+
noignore: bool = False, ignore: Optional[List[str]] = None,
|
351
|
+
confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
|
367
352
|
if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
|
368
353
|
if isinstance(file, str) and isinstance(index, int):
|
369
354
|
_print(f"ERROR: Item for PATCH has no identifying property: {file} (#{index + 1})")
|
@@ -390,11 +375,11 @@ def patch_data(portal: Portal, data: dict, schema_name: str,
|
|
390
375
|
return
|
391
376
|
|
392
377
|
|
393
|
-
def
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
378
|
+
def _upsert_data(portal: Portal, data: dict, schema_name: str,
|
379
|
+
file: Optional[str] = None, index: int = 0,
|
380
|
+
patch_delete_fields: Optional[str] = None,
|
381
|
+
noignore: bool = False, ignore: Optional[List[str]] = None,
|
382
|
+
confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
|
398
383
|
if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
|
399
384
|
if isinstance(file, str) and isinstance(index, int):
|
400
385
|
_print(f"ERROR: Item for UPSERT has no identifying property: {file} (#{index + 1})")
|
@@ -423,6 +408,126 @@ def upsert_data(portal: Portal, data: dict, schema_name: str,
|
|
423
408
|
return
|
424
409
|
|
425
410
|
|
411
|
+
def _load_data(inserts_directory: str, ini_file: str,
|
412
|
+
verbose: bool = False, debug: bool = False, noprogress: bool = False) -> None:
|
413
|
+
|
414
|
+
from snovault.loadxl import load_all_gen, LoadGenWrapper
|
415
|
+
from dcicutils.captured_output import captured_output
|
416
|
+
from dcicutils.progress_bar import ProgressBar
|
417
|
+
|
418
|
+
def loadxl(portal: Portal, inserts_directory: str, schema_names_to_load: dict):
|
419
|
+
|
420
|
+
nonlocal LoadGenWrapper, load_all_gen, verbose, debug
|
421
|
+
progress_total = sum(schema_names_to_load.values()) * 2 # loadxl does two passes
|
422
|
+
progress_bar = ProgressBar(progress_total) if not noprogress else None
|
423
|
+
|
424
|
+
def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> str:
|
425
|
+
if not isinstance(encoding, str):
|
426
|
+
encoding = "utf-8"
|
427
|
+
if isinstance(str_or_bytes, bytes):
|
428
|
+
return str_or_bytes.decode(encoding).strip()
|
429
|
+
elif isinstance(str_or_bytes, str):
|
430
|
+
return str_or_bytes.strip()
|
431
|
+
return ""
|
432
|
+
|
433
|
+
LOADXL_RESPONSE_PATTERN = re.compile(r"^([A-Z]+):\s*([a-zA-Z\/\d_-]+)\s*(\S+)\s*(\S+)?\s*(.*)$")
|
434
|
+
LOADXL_ACTION_NAME = {"POST": "Create", "PATCH": "Update", "SKIP": "Skip",
|
435
|
+
"CHECK": "Validate", "ERROR": "Error"}
|
436
|
+
current_item_type = None
|
437
|
+
current_item_count = 0
|
438
|
+
current_item_total = 0
|
439
|
+
total_item_count = 0
|
440
|
+
for item in LoadGenWrapper(load_all_gen(testapp=portal.vapp, inserts=inserts_directory,
|
441
|
+
docsdir=None, overwrite=True, verbose=True)):
|
442
|
+
total_item_count += 1
|
443
|
+
item = decode_bytes(item)
|
444
|
+
match = LOADXL_RESPONSE_PATTERN.match(item)
|
445
|
+
if not match or match.re.groups < 3:
|
446
|
+
continue
|
447
|
+
action = LOADXL_ACTION_NAME[match.group(1).upper()]
|
448
|
+
# response_value = match.group(0)
|
449
|
+
# identifying_value = match.group(2)
|
450
|
+
item_type = match.group(3)
|
451
|
+
if current_item_type != item_type:
|
452
|
+
if noprogress and debug and current_item_type is not None:
|
453
|
+
print()
|
454
|
+
current_item_type = item_type
|
455
|
+
current_item_count = 0
|
456
|
+
current_item_total = schema_names_to_load[item_type]
|
457
|
+
if progress_bar:
|
458
|
+
progress_bar.set_description(f"▶ {to_camel_case(current_item_type)}: {action}")
|
459
|
+
current_item_count += 1
|
460
|
+
if progress_bar:
|
461
|
+
progress_bar.set_progress(total_item_count)
|
462
|
+
elif debug:
|
463
|
+
print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})")
|
464
|
+
if progress_bar:
|
465
|
+
progress_bar.set_description("▶ Load Complete")
|
466
|
+
print()
|
467
|
+
|
468
|
+
if not ini_file:
|
469
|
+
ini_file = _DEFAULT_INI_FILE_FOR_LOAD
|
470
|
+
if not os.path.isabs(ini_file := os.path.expanduser(ini_file)):
|
471
|
+
ini_file = os.path.join(os.getcwd(), ini_file)
|
472
|
+
if not os.path.exists(ini_file):
|
473
|
+
_print(f"The INI file required for --load is not found: {ini_file}")
|
474
|
+
exit(1)
|
475
|
+
if not os.path.isabs(inserts_directory := os.path.expanduser(inserts_directory)):
|
476
|
+
inserts_directory = os.path.join(os.getcwd(), inserts_directory)
|
477
|
+
if not os.path.isdir(inserts_directory := os.path.expanduser(inserts_directory)):
|
478
|
+
_print(f"Load directory does not exist: {inserts_directory}")
|
479
|
+
exit(1)
|
480
|
+
portal = None
|
481
|
+
with captured_output(not debug):
|
482
|
+
portal = Portal(ini_file)
|
483
|
+
if verbose:
|
484
|
+
_print(f"Loading data files into Portal (via snovault.loadxl) from: {inserts_directory}")
|
485
|
+
_print(f"Portal INI file for load is: {ini_file}")
|
486
|
+
|
487
|
+
schema_names = list(_get_schemas(portal).keys())
|
488
|
+
schema_snake_case_names = [to_snake_case(item) for item in schema_names]
|
489
|
+
schema_names_to_load = {}
|
490
|
+
|
491
|
+
copy_to_temporary_directory = False
|
492
|
+
for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")):
|
493
|
+
json_file_name = os.path.basename(json_file_path)
|
494
|
+
schema_name = os.path.basename(json_file_name)[:-len(".json")]
|
495
|
+
if (schema_name not in schema_snake_case_names) and (schema_name not in schema_names):
|
496
|
+
_print(f"File is not named for a known schema: {json_file_name} ▶ ignoring")
|
497
|
+
copy_to_temporary_directory = True
|
498
|
+
else:
|
499
|
+
try:
|
500
|
+
with io.open(json_file_path, "r") as f:
|
501
|
+
if not isinstance(data := json.load(f), list):
|
502
|
+
_print("Data JSON file does not contain an array: {json_file_path} ▶ ignoring")
|
503
|
+
copy_to_temporary_directory = True
|
504
|
+
elif (nobjects := len(data)) < 1:
|
505
|
+
_print("Data JSON file contains no items: {json_file_path} ▶ ignoring")
|
506
|
+
copy_to_temporary_directory = True
|
507
|
+
else:
|
508
|
+
schema_names_to_load[schema_name] = nobjects
|
509
|
+
except Exception:
|
510
|
+
_print("Cannot load JSON data from file: {json_file_path} ▶ ignoring")
|
511
|
+
copy_to_temporary_directory = True
|
512
|
+
if not schema_names_to_load:
|
513
|
+
_print("Directory contains no valid data: {inserts_directory}")
|
514
|
+
return
|
515
|
+
if copy_to_temporary_directory:
|
516
|
+
with temporary_directory() as tmpdir:
|
517
|
+
if debug:
|
518
|
+
_print(f"Using temporary directory: {tmpdir}")
|
519
|
+
for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")):
|
520
|
+
json_file_name = os.path.basename(json_file_path)
|
521
|
+
schema_name = os.path.basename(json_file_name)[:-len(".json")]
|
522
|
+
if (schema_name in schema_snake_case_names) or (schema_name in schema_names):
|
523
|
+
shutil.copy(json_file_path, tmpdir)
|
524
|
+
loadxl(portal=portal, inserts_directory=tmpdir, schema_names_to_load=schema_names_to_load)
|
525
|
+
else:
|
526
|
+
loadxl(portal=portal, inserts_directory=inserts_directory, schema_names_to_load=schema_names_to_load)
|
527
|
+
if verbose:
|
528
|
+
_print(f"Done loading data into Portal (via snovault.loadxl) files from: {inserts_directory}")
|
529
|
+
|
530
|
+
|
426
531
|
def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[List[str]] = None) -> dict:
|
427
532
|
ignore_these_properties = [] if noignore is True else _IGNORE_PROPERTIES_ON_UPDATE
|
428
533
|
if isinstance(ignore, list):
|
@@ -509,6 +614,8 @@ def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str
|
|
509
614
|
|
510
615
|
@lru_cache(maxsize=1)
|
511
616
|
def _get_schemas(portal: Portal) -> Optional[dict]:
|
617
|
+
if portal.vapp:
|
618
|
+
return portal.vapp.get("/profiles/?frame=raw").json
|
512
619
|
return portal.get_schemas()
|
513
620
|
|
514
621
|
|
@@ -60,7 +60,7 @@ dcicutils/s3_utils.py,sha256=h2B9ftOo-kxqfiKth5ZDC_cAUFy1Pbu7BrVanFnE5Iw,28839
|
|
60
60
|
dcicutils/schema_utils.py,sha256=GmRm-XqZKJ6qine16SQF1txcby9WougDav_sYmKNs9E,12400
|
61
61
|
dcicutils/scripts/publish_to_pypi.py,sha256=sMd4WASQGlxlh7uLrt2eGkFRXYgONVmvIg8mClMS5RQ,13903
|
62
62
|
dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
|
63
|
-
dcicutils/scripts/update_portal_object.py,sha256=
|
63
|
+
dcicutils/scripts/update_portal_object.py,sha256=OQ1v6QRJdVVUik_4RtNbMK2w7l4t-Htrm46pgj4kTNo,30063
|
64
64
|
dcicutils/scripts/view_portal_object.py,sha256=lcgXWH9ooVf7tJDIRnoFGOgT0wYLGhiJlJW3a9w6A_c,36983
|
65
65
|
dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
|
66
66
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
@@ -75,8 +75,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
|
75
75
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
76
76
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
77
77
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
78
|
-
dcicutils-8.14.0.
|
79
|
-
dcicutils-8.14.0.
|
80
|
-
dcicutils-8.14.0.
|
81
|
-
dcicutils-8.14.0.
|
82
|
-
dcicutils-8.14.0.
|
78
|
+
dcicutils-8.14.0.1b10.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
79
|
+
dcicutils-8.14.0.1b10.dist-info/METADATA,sha256=A0R06QINC7rpiYDmPXmX_lGCeK6VN7KmdgP8do7ss54,3440
|
80
|
+
dcicutils-8.14.0.1b10.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
81
|
+
dcicutils-8.14.0.1b10.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
|
82
|
+
dcicutils-8.14.0.1b10.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|