dcicutils 8.14.0.1b15__py3-none-any.whl → 8.14.0.1b17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,10 +17,11 @@ import re
17
17
  import shutil
18
18
  import sys
19
19
  from typing import Callable, List, Optional, Tuple, Union
20
+ from dcicutils.captured_output import captured_output
20
21
  from dcicutils.command_utils import yes_or_no
21
- from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT
22
+ from dcicutils.common import ORCHESTRATED_APPS, APP_CGAP, APP_FOURFRONT, APP_SMAHT
22
23
  from dcicutils.ff_utils import delete_metadata, purge_metadata
23
- from dcicutils.misc_utils import get_error_message, ignored, PRINT, to_camel_case, to_snake_case
24
+ from dcicutils.misc_utils import get_error_message, ignored, normalize_string, PRINT, to_camel_case, to_snake_case
24
25
  from dcicutils.portal_utils import Portal as PortalFromUtils
25
26
  from dcicutils.tmpfile_utils import temporary_directory
26
27
 
@@ -145,30 +146,16 @@ def main():
145
146
  parser.print_help()
146
147
  sys.exit(1)
147
148
 
148
- if app := args.app:
149
- if (app not in ORCHESTRATED_APPS) and ((app := app.lower()) not in ORCHESTRATED_APPS):
150
- usage(f"ERROR: Unknown app name; must be one of: {' | '.join(ORCHESTRATED_APPS)}")
151
- else:
152
- app = APP_SMAHT
153
-
154
149
  if not (args.post or args.patch or args.upsert or args.delete or args.purge or args.load):
155
150
  usage()
156
151
 
152
+ if not (portal := _create_portal(env=args.env, ini=args.ini, app=args.app, load=args.load,
153
+ verbose=args.verbose, debug=args.debug, quiet=args.quiet)):
154
+ exit(1)
155
+
157
156
  if args.load:
158
- if args.post or args.patch or args.upsert or args.delete or args.purge:
159
- _print("Cannot use any other update option"
160
- "when using the --load option (to load data via snovault.loadxl).")
161
- exit(1)
162
- if args.env:
163
- if args.ini:
164
- _print("The --env is not used for the --load option (to load data via snovault.loadxl).")
165
- args.ini = args.env
166
- if not _load_data(load=args.load, ini_file=args.ini, explicit_schema_name=args.schema,
167
- verbose=args.verbose, debug=args.debug, noprogress=args.noprogress):
168
- exit(1)
169
- exit(0)
170
-
171
- portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug)
157
+ _load_data(portal=portal, load=args.load, ini_file=args.ini, explicit_schema_name=args.schema,
158
+ verbose=args.verbose, debug=args.debug, noprogress=args.noprogress)
172
159
 
173
160
  if explicit_schema_name := args.schema:
174
161
  schema, explicit_schema_name = _get_schema(portal, explicit_schema_name)
@@ -402,17 +389,24 @@ def _upsert_data(portal: Portal, data: dict, schema_name: str,
402
389
  return
403
390
 
404
391
 
405
- def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = None,
392
+ def _load_data(portal: Portal, load: str, ini_file: str, explicit_schema_name: Optional[str] = None,
406
393
  verbose: bool = False, debug: bool = False, noprogress: bool = False,
407
- _portal: Optional[Portal] = None, _single_insert_file: Optional[str] = None) -> bool:
394
+ _single_insert_file: Optional[str] = None) -> bool:
408
395
 
396
+ import snovault.loadxl
409
397
  from snovault.loadxl import load_all_gen, LoadGenWrapper
410
- from dcicutils.captured_output import captured_output
411
398
  from dcicutils.progress_bar import ProgressBar
412
399
 
400
+ loadxl_summary = {}
401
+ loadxl_unresolved = {}
402
+ loadxl_output = []
403
+ loadxl_total_item_count = 0
404
+ loadxl_total_error_count = 0
405
+
413
406
  def loadxl(portal: Portal, inserts_directory: str, schema_names_to_load: dict):
414
407
 
415
- nonlocal LoadGenWrapper, load_all_gen, verbose, debug
408
+ nonlocal LoadGenWrapper, load_all_gen, loadxl_summary, verbose, debug
409
+ nonlocal loadxl_total_item_count, loadxl_total_error_count
416
410
  progress_total = sum(schema_names_to_load.values()) * 2 # loadxl does two passes
417
411
  progress_bar = ProgressBar(progress_total, interrupt_exit=True) if not noprogress else None
418
412
 
@@ -425,49 +419,75 @@ def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = N
425
419
  return str_or_bytes.strip()
426
420
  return ""
427
421
 
422
+ def loadxl_print(arg):
423
+ if arg:
424
+ loadxl_output.append(normalize_string(str(arg)))
425
+
426
+ snovault.loadxl.print = loadxl_print
427
+
428
428
  LOADXL_RESPONSE_PATTERN = re.compile(r"^([A-Z]+):\s*([a-zA-Z\/\d_-]+)\s*(\S+)\s*(\S+)?\s*(.*)$")
429
429
  LOADXL_ACTION_NAME = {"POST": "Create", "PATCH": "Update", "SKIP": "Check",
430
430
  "CHECK": "Validate", "ERROR": "Error"}
431
431
  current_item_type = None
432
432
  current_item_count = 0
433
433
  current_item_total = 0
434
- total_item_count = 0
434
+
435
435
  for item in LoadGenWrapper(load_all_gen(testapp=portal.vapp, inserts=inserts_directory,
436
436
  docsdir=None, overwrite=True, verbose=True)):
437
- total_item_count += 1
437
+ loadxl_total_item_count += 1
438
438
  item = decode_bytes(item)
439
439
  match = LOADXL_RESPONSE_PATTERN.match(item)
440
440
  if not match or match.re.groups < 3:
441
441
  continue
442
- action = LOADXL_ACTION_NAME[match.group(1).upper()]
443
- # response_value = match.group(0)
444
- # identifying_value = match.group(2)
442
+ if (action := LOADXL_ACTION_NAME[match.group(1).upper()]) == "Error":
443
+ identifying_value = match.group(2)
444
+ # Example message for unresolved link:
445
+ # ERROR: /22813a02-906b-4b60-b2b2-4afaea24aa28 Bad response: 422 Unprocessable Entity
446
+ # (not 200 OK or 3xx redirect for http://localhost/file_set?skip_indexing=true)b\'{"@type":
447
+ # ["ValidationFailure", "Error"], "status": "error", "code": # 422, "title": "Unprocessable Entity",
448
+ # "description": "Failed validation", "errors": [{"location": "body", "name": # "Schema: ",
449
+ # "description": "Unable to resolve link: /Library/a4e8f79f-4d47-4e85-9707-c343c940a315"},
450
+ # {"location": "body", "name": "Schema: libraries.0",
451
+ # "description": "\\\'a4e8f79f-4d47-4e85-9707-c343c940a315\\\' not found"}]}\'
452
+ unresolved_link_error_message_prefix = "Unable to resolve link:"
453
+ if (i := item.find(unresolved_link_error_message_prefix)) > 0:
454
+ unresolved_link = item[i + len(unresolved_link_error_message_prefix):].strip()
455
+ if (i := unresolved_link.find("\"")) > 0:
456
+ if (unresolved_link := unresolved_link[0:i]):
457
+ if ((error_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and
458
+ (len(error_type.groups()) == 1)): # noqa
459
+ error_type = to_camel_case(error_type.group(1))
460
+ identifying_value = f"/{error_type}{identifying_value}"
461
+ if not loadxl_unresolved.get(identifying_value):
462
+ loadxl_unresolved[identifying_value] = []
463
+ loadxl_unresolved[identifying_value].append(unresolved_link)
464
+ loadxl_total_error_count += 1
465
+ continue
445
466
  item_type = match.group(3)
446
467
  if current_item_type != item_type:
447
468
  if noprogress and debug and current_item_type is not None:
448
- print()
469
+ _print()
449
470
  current_item_type = item_type
450
471
  current_item_count = 0
451
472
  current_item_total = schema_names_to_load[item_type]
452
473
  if progress_bar:
453
474
  progress_bar.set_description(f"▶ {to_camel_case(current_item_type)}: {action}")
454
475
  current_item_count += 1
476
+ if loadxl_summary.get(current_item_type, None) is None:
477
+ loadxl_summary[current_item_type] = 0
478
+ loadxl_summary[current_item_type] += 1
455
479
  if progress_bar:
456
- progress_bar.set_progress(total_item_count)
480
+ progress_bar.set_progress(loadxl_total_item_count)
457
481
  elif debug:
458
- print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})")
482
+ _print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})")
459
483
  if progress_bar:
460
484
  progress_bar.set_description("▶ Load Complete")
461
- print()
462
-
463
- if not ini_file:
464
- ini_file = _DEFAULT_INI_FILE_FOR_LOAD
465
- if not os.path.isabs(ini_file := os.path.normpath(os.path.expanduser(ini_file))):
466
- ini_file = os.path.normpath(os.path.join(os.getcwd(), ini_file))
467
- if not os.path.exists(ini_file):
468
- _print(f"The INI file required for --load is not found: {ini_file}")
469
- exit(1)
485
+ if loadxl_total_item_count > loadxl_total_error_count:
486
+ _print()
470
487
 
488
+ if not portal.vapp:
489
+ _print("Must using INI based Portal object with --load (use --ini option to specify an INI file).")
490
+ return False
471
491
  if not os.path.isabs(load := os.path.normpath(os.path.expanduser(load))):
472
492
  load = os.path.normpath(os.path.join(os.getcwd(), load))
473
493
  if not os.path.exists(load):
@@ -480,10 +500,6 @@ def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = N
480
500
  inserts_directory = None
481
501
  inserts_file = load
482
502
 
483
- if not (portal := _portal):
484
- with captured_output(not debug):
485
- portal = Portal(ini_file)
486
-
487
503
  if inserts_file:
488
504
  with io.open(inserts_file, "r") as f:
489
505
  try:
@@ -503,9 +519,9 @@ def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = N
503
519
  file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json")
504
520
  with io.open(file_name, "w") as f:
505
521
  json.dump(data, f)
506
- return _load_data(load=tmpdir, ini_file=ini_file, explicit_schema_name=schema_name,
522
+ return _load_data(portal=portal, load=tmpdir, ini_file=ini_file, explicit_schema_name=schema_name,
507
523
  verbose=verbose, debug=debug, noprogress=noprogress,
508
- _portal=portal, _single_insert_file=inserts_file)
524
+ _single_insert_file=inserts_file)
509
525
  elif isinstance(data, dict):
510
526
  if schema_name := explicit_schema_name:
511
527
  if _is_schema_name_list(portal, schema_names := list(data.keys())):
@@ -530,20 +546,20 @@ def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = N
530
546
  json.dump(schema_data, f)
531
547
  nfiles += 1
532
548
  if nfiles > 0:
533
- return _load_data(load=tmpdir, ini_file=ini_file,
549
+ return _load_data(portal=portal, load=tmpdir, ini_file=ini_file,
534
550
  verbose=verbose, debug=debug, noprogress=noprogress,
535
- _portal=portal, _single_insert_file=inserts_file)
551
+ _single_insert_file=inserts_file)
536
552
  return True
537
553
  else:
538
554
  _print(f"Unrecognized JSON data in file: {inserts_file}")
539
555
  return False
540
556
  return True
557
+
541
558
  if verbose:
542
559
  if _single_insert_file:
543
560
  _print(f"Loading data into Portal (via snovault.loadxl) from file: {_single_insert_file}")
544
561
  else:
545
562
  _print(f"Loading data into Portal (via snovault.loadxl) from directory: {inserts_directory}")
546
- _print(f"Portal INI file for load is: {ini_file}")
547
563
 
548
564
  schema_names = list(_get_schemas(portal).keys())
549
565
  schema_snake_case_names = [to_snake_case(item) for item in schema_names]
@@ -585,11 +601,27 @@ def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = N
585
601
  loadxl(portal=portal, inserts_directory=tmpdir, schema_names_to_load=schema_names_to_load)
586
602
  else:
587
603
  loadxl(portal=portal, inserts_directory=inserts_directory, schema_names_to_load=schema_names_to_load)
604
+
588
605
  if verbose:
589
606
  if _single_insert_file:
590
607
  _print(f"Done loading data into Portal (via snovault.loadxl) from file: {_single_insert_file}")
591
608
  else:
592
609
  _print(f"Done loading data into Portal (via snovault.loadxl) from directory: {inserts_directory}")
610
+ _print(f"Total items loaded: {loadxl_total_item_count}"
611
+ f"{f' (errors: {loadxl_total_error_count})' if loadxl_total_error_count else ''}")
612
+ for item in sorted(loadxl_summary.keys()):
613
+ _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}")
614
+ if loadxl_unresolved:
615
+ _print("✗ Unresolved references:")
616
+ for item in loadxl_unresolved:
617
+ _print(f" ▶ {item}: {len(loadxl_unresolved[item])}")
618
+ for subitem in loadxl_unresolved[item]:
619
+ _print(f" ▷ {subitem}")
620
+ if debug and loadxl_output:
621
+ _print("✗ Output from loadxl:")
622
+ for item in loadxl_output:
623
+ _print(f" ▶ {item}")
624
+
593
625
  return True
594
626
 
595
627
 
@@ -611,25 +643,67 @@ def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[
611
643
  return {key: value for key, value in data.items() if key not in ignore_these_properties}
612
644
 
613
645
 
614
- def _create_portal(env: Optional[str] = None, app: Optional[str] = None,
615
- verbose: bool = False, debug: bool = False) -> Optional[Portal]:
646
+ def _create_portal(env: Optional[str] = None, ini: Optional[str] = None, app: Optional[str] = None,
647
+ load: Optional[str] = None, verbose: bool = False, debug: bool = False,
648
+ quiet: bool = False) -> Optional[Portal]:
649
+
650
+ if app:
651
+ if (app not in ORCHESTRATED_APPS) and ((app := app.lower()) not in ORCHESTRATED_APPS):
652
+ _print(f"Unknown app name; must be one of: {' | '.join(ORCHESTRATED_APPS)}")
653
+ return None
654
+ elif APP_SMAHT in (env or os.environ.get(_SMAHT_ENV_ENVIRON_NAME) or ""):
655
+ app = APP_SMAHT
656
+ elif APP_CGAP in (env or ""):
657
+ app = APP_CGAP
658
+ elif APP_FOURFRONT in (env or ""):
659
+ app = APP_FOURFRONT
660
+
661
+ if ini:
662
+ if env:
663
+ if not quiet:
664
+ _print("Ignoring --env option when --ini option is given.")
665
+ elif (app == _SMAHT_ENV_ENVIRON_NAME) and (env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME)):
666
+ if not quiet:
667
+ _print(f"Ignoring SMAHT_ENV environment variable ({env}) when --ini option is given.")
668
+ if not os.path.isabs(ini_file := os.path.normpath(os.path.expanduser(ini))):
669
+ ini_file = os.path.normpath(os.path.join(os.getcwd(), ini_file))
670
+ if not os.path.exists(ini_file):
671
+ _print(f"Specified Portal INI file not found: {ini_file}")
672
+ return None
673
+ with captured_output(not debug):
674
+ if not (portal := Portal(ini_file, app=app)):
675
+ _print(f"Cannot create INI based Portal object: {env} ({app})")
676
+ return None
677
+ else:
678
+ env_from_environ = False
679
+ if not env and app:
680
+ # If the --load option is specified, and no --ini option is specified, then do NOT default
681
+ # to using the SMAHT_ENV environment variable (if set) for an access-key based Portal
682
+ # object; rather default to the default INI file (i.e. development.ini).
683
+ if (not load) and (app == APP_SMAHT) and (env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME)):
684
+ env_from_environ = True
685
+ if not env:
686
+ if os.path.exists(ini_file := os.path.normpath(os.path.join(os.getcwd(), _DEFAULT_INI_FILE_FOR_LOAD))):
687
+ return _create_portal(ini=ini_file, app=app, verbose=verbose, debug=debug)
688
+ return None
689
+ if not (portal := Portal(env, app=app) if env or app else None):
690
+ _print(f"Cannot create access-key based Portal object: {env}{f' ({app})' if app else ''}")
691
+ return None
692
+
693
+ if (ini_file := portal.ini_file):
694
+ if not quiet:
695
+ _print(f"Portal environment: {ini_file}")
696
+ elif (env := portal.env) or (env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME)):
697
+ _print(f"Portal environment"
698
+ f"{f' (from {_SMAHT_ENV_ENVIRON_NAME})' if env_from_environ else ''}: {portal.env}")
699
+ if verbose:
700
+ if portal.keys_file:
701
+ _print(f"Portal keys file: {portal.keys_file}")
702
+ if portal.key_id:
703
+ _print(f"Portal key prefix: {portal.key_id[0:2]}******")
704
+ if portal.server:
705
+ _print(f"Portal server: {portal.server}")
616
706
 
617
- env_from_environ = None
618
- if not env and (app == APP_SMAHT):
619
- if env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME):
620
- env_from_environ = True
621
- if not (portal := Portal(env, app=app) if env or app else None):
622
- return None
623
- if verbose:
624
- if (env := portal.env) or (env := os.environ(_SMAHT_ENV_ENVIRON_NAME)):
625
- _print(f"Portal environment"
626
- f"{f' (from {_SMAHT_ENV_ENVIRON_NAME})' if env_from_environ else ''}: {portal.env}")
627
- if portal.keys_file:
628
- _print(f"Portal keys file: {portal.keys_file}")
629
- if portal.key_id:
630
- _print(f"Portal key prefix: {portal.key_id[0:2]}******")
631
- if portal.server:
632
- _print(f"Portal server: {portal.server}")
633
707
  return portal
634
708
 
635
709
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.14.0.1b15
3
+ Version: 8.14.0.1b17
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -60,7 +60,7 @@ dcicutils/s3_utils.py,sha256=h2B9ftOo-kxqfiKth5ZDC_cAUFy1Pbu7BrVanFnE5Iw,28839
60
60
  dcicutils/schema_utils.py,sha256=GmRm-XqZKJ6qine16SQF1txcby9WougDav_sYmKNs9E,12400
61
61
  dcicutils/scripts/publish_to_pypi.py,sha256=sMd4WASQGlxlh7uLrt2eGkFRXYgONVmvIg8mClMS5RQ,13903
62
62
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
63
- dcicutils/scripts/update_portal_object.py,sha256=LwqGSiR7Q0_8GP0ToNfaeVQ1hZtQ69EOLW21PyKBC7c,33950
63
+ dcicutils/scripts/update_portal_object.py,sha256=MsNpF6eHGVxr7dUg8QrwZo9XfIjPjAavOWqyI_RrfpA,38397
64
64
  dcicutils/scripts/view_portal_object.py,sha256=lcgXWH9ooVf7tJDIRnoFGOgT0wYLGhiJlJW3a9w6A_c,36983
65
65
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
66
66
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
@@ -75,8 +75,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
75
75
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
76
76
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
77
77
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
78
- dcicutils-8.14.0.1b15.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
- dcicutils-8.14.0.1b15.dist-info/METADATA,sha256=h4m3Yg-a4hgUo52m_3UmmBaMXsbAYRP1A-HRrhuutec,3440
80
- dcicutils-8.14.0.1b15.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
- dcicutils-8.14.0.1b15.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
- dcicutils-8.14.0.1b15.dist-info/RECORD,,
78
+ dcicutils-8.14.0.1b17.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
+ dcicutils-8.14.0.1b17.dist-info/METADATA,sha256=WxgzreGqLqRZT1QRIzME9quTpxihdyduDQKkxX9kdS0,3440
80
+ dcicutils-8.14.0.1b17.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
+ dcicutils-8.14.0.1b17.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
+ dcicutils-8.14.0.1b17.dist-info/RECORD,,