dcicutils 8.14.0.1b9__py3-none-any.whl → 8.14.0.1b10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,13 +14,15 @@ import io
14
14
  import json
15
15
  import os
16
16
  import re
17
+ import shutil
17
18
  import sys
18
19
  from typing import Callable, List, Optional, Tuple, Union
19
20
  from dcicutils.command_utils import yes_or_no
20
21
  from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT
21
22
  from dcicutils.ff_utils import delete_metadata, purge_metadata
22
- from dcicutils.misc_utils import get_error_message, ignored, PRINT
23
+ from dcicutils.misc_utils import get_error_message, ignored, PRINT, to_camel_case, to_snake_case
23
24
  from dcicutils.portal_utils import Portal as PortalFromUtils
25
+ from dcicutils.tmpfile_utils import temporary_directory
24
26
 
25
27
 
26
28
  class Portal(PortalFromUtils):
@@ -131,6 +133,8 @@ def main():
131
133
  parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.")
132
134
  parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.")
133
135
  parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.")
136
+ parser.add_argument("--noprogress", action="store_true", required=False, default=False,
137
+ help="No progress bar output for --load.")
134
138
  parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.")
135
139
  args = parser.parse_args()
136
140
 
@@ -158,27 +162,8 @@ def main():
158
162
  _print("The --env is not used for the --load option (to load data via snovault.loadxl).")
159
163
  if args.schema:
160
164
  _print("The --schema is not used for the --load option (to load data via snovault.loadxl).")
161
- from snovault.loadxl import load_data
162
- from dcicutils.captured_output import captured_output
163
- if args.ini:
164
- ini_file = args.ini
165
- else:
166
- ini_file = _DEFAULT_INI_FILE_FOR_LOAD
167
- if not os.path.exists(ini_file):
168
- _print(f"The INI file required for --load is not found: {ini_file}")
169
- exit(1)
170
- if not os.path.isdir(args.load):
171
- _print(f"Load directory does not exist: {args.load}")
172
- exit(1)
173
- portal = None
174
- with captured_output(not args.debug):
175
- portal = Portal(ini_file)
176
- if args.verbose:
177
- _print(f"Loading data files into Portal (via snovault.loadxl) from: {args.load}")
178
- _print(f"Portal INI file for load is: {ini_file}")
179
- load_data(portal.vapp, indir=args.load, overwrite=True, use_master_inserts=False)
180
- if args.verbose:
181
- _print(f"Done loading data into Portal (via snovault.loadxl) files from: {args.load}")
165
+ _load_data(inserts_directory=args.load, ini_file=args.ini,
166
+ verbose=args.verbose, debug=args.debug, noprogress=args.noprogress)
182
167
  exit(0)
183
168
 
184
169
  portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug)
@@ -192,7 +177,7 @@ def main():
192
177
  _post_or_patch_or_upsert(portal=portal,
193
178
  file_or_directory=args.post,
194
179
  explicit_schema_name=explicit_schema_name,
195
- update_function=post_data,
180
+ update_function=_post_data,
196
181
  update_action_name="POST",
197
182
  noignore=args.noignore, ignore=args.ignore,
198
183
  confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug)
@@ -200,7 +185,7 @@ def main():
200
185
  _post_or_patch_or_upsert(portal=portal,
201
186
  file_or_directory=args.patch,
202
187
  explicit_schema_name=explicit_schema_name,
203
- update_function=patch_data,
188
+ update_function=_patch_data,
204
189
  update_action_name="PATCH",
205
190
  patch_delete_fields=args.delete,
206
191
  noignore=args.noignore, ignore=args.ignore,
@@ -210,7 +195,7 @@ def main():
210
195
  _post_or_patch_or_upsert(portal=portal,
211
196
  file_or_directory=args.upsert,
212
197
  explicit_schema_name=explicit_schema_name,
213
- update_function=upsert_data,
198
+ update_function=_upsert_data,
214
199
  update_action_name="UPSERT",
215
200
  patch_delete_fields=args.delete,
216
201
  noignore=args.noignore, ignore=args.ignore,
@@ -329,11 +314,11 @@ def _impose_special_ordering(data: List[dict], schema_name: str) -> List[dict]:
329
314
  return data
330
315
 
331
316
 
332
- def post_data(portal: Portal, data: dict, schema_name: str,
333
- file: Optional[str] = None, index: int = 0,
334
- patch_delete_fields: Optional[str] = None,
335
- noignore: bool = False, ignore: Optional[List[str]] = None,
336
- confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
317
+ def _post_data(portal: Portal, data: dict, schema_name: str,
318
+ file: Optional[str] = None, index: int = 0,
319
+ patch_delete_fields: Optional[str] = None,
320
+ noignore: bool = False, ignore: Optional[List[str]] = None,
321
+ confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
337
322
  ignored(patch_delete_fields)
338
323
  if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
339
324
  if isinstance(file, str) and isinstance(index, int):
@@ -359,11 +344,11 @@ def post_data(portal: Portal, data: dict, schema_name: str,
359
344
  return
360
345
 
361
346
 
362
- def patch_data(portal: Portal, data: dict, schema_name: str,
363
- file: Optional[str] = None, index: int = 0,
364
- patch_delete_fields: Optional[str] = None,
365
- noignore: bool = False, ignore: Optional[List[str]] = None,
366
- confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
347
+ def _patch_data(portal: Portal, data: dict, schema_name: str,
348
+ file: Optional[str] = None, index: int = 0,
349
+ patch_delete_fields: Optional[str] = None,
350
+ noignore: bool = False, ignore: Optional[List[str]] = None,
351
+ confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
367
352
  if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
368
353
  if isinstance(file, str) and isinstance(index, int):
369
354
  _print(f"ERROR: Item for PATCH has no identifying property: {file} (#{index + 1})")
@@ -390,11 +375,11 @@ def patch_data(portal: Portal, data: dict, schema_name: str,
390
375
  return
391
376
 
392
377
 
393
- def upsert_data(portal: Portal, data: dict, schema_name: str,
394
- file: Optional[str] = None, index: int = 0,
395
- patch_delete_fields: Optional[str] = None,
396
- noignore: bool = False, ignore: Optional[List[str]] = None,
397
- confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
378
+ def _upsert_data(portal: Portal, data: dict, schema_name: str,
379
+ file: Optional[str] = None, index: int = 0,
380
+ patch_delete_fields: Optional[str] = None,
381
+ noignore: bool = False, ignore: Optional[List[str]] = None,
382
+ confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
398
383
  if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
399
384
  if isinstance(file, str) and isinstance(index, int):
400
385
  _print(f"ERROR: Item for UPSERT has no identifying property: {file} (#{index + 1})")
@@ -423,6 +408,126 @@ def upsert_data(portal: Portal, data: dict, schema_name: str,
423
408
  return
424
409
 
425
410
 
411
+ def _load_data(inserts_directory: str, ini_file: str,
412
+ verbose: bool = False, debug: bool = False, noprogress: bool = False) -> None:
413
+
414
+ from snovault.loadxl import load_all_gen, LoadGenWrapper
415
+ from dcicutils.captured_output import captured_output
416
+ from dcicutils.progress_bar import ProgressBar
417
+
418
+ def loadxl(portal: Portal, inserts_directory: str, schema_names_to_load: dict):
419
+
420
+ nonlocal LoadGenWrapper, load_all_gen, verbose, debug
421
+ progress_total = sum(schema_names_to_load.values()) * 2 # loadxl does two passes
422
+ progress_bar = ProgressBar(progress_total) if not noprogress else None
423
+
424
+ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> str:
425
+ if not isinstance(encoding, str):
426
+ encoding = "utf-8"
427
+ if isinstance(str_or_bytes, bytes):
428
+ return str_or_bytes.decode(encoding).strip()
429
+ elif isinstance(str_or_bytes, str):
430
+ return str_or_bytes.strip()
431
+ return ""
432
+
433
+ LOADXL_RESPONSE_PATTERN = re.compile(r"^([A-Z]+):\s*([a-zA-Z\/\d_-]+)\s*(\S+)\s*(\S+)?\s*(.*)$")
434
+ LOADXL_ACTION_NAME = {"POST": "Create", "PATCH": "Update", "SKIP": "Skip",
435
+ "CHECK": "Validate", "ERROR": "Error"}
436
+ current_item_type = None
437
+ current_item_count = 0
438
+ current_item_total = 0
439
+ total_item_count = 0
440
+ for item in LoadGenWrapper(load_all_gen(testapp=portal.vapp, inserts=inserts_directory,
441
+ docsdir=None, overwrite=True, verbose=True)):
442
+ total_item_count += 1
443
+ item = decode_bytes(item)
444
+ match = LOADXL_RESPONSE_PATTERN.match(item)
445
+ if not match or match.re.groups < 3:
446
+ continue
447
+ action = LOADXL_ACTION_NAME[match.group(1).upper()]
448
+ # response_value = match.group(0)
449
+ # identifying_value = match.group(2)
450
+ item_type = match.group(3)
451
+ if current_item_type != item_type:
452
+ if noprogress and debug and current_item_type is not None:
453
+ print()
454
+ current_item_type = item_type
455
+ current_item_count = 0
456
+ current_item_total = schema_names_to_load[item_type]
457
+ if progress_bar:
458
+ progress_bar.set_description(f"▶ {to_camel_case(current_item_type)}: {action}")
459
+ current_item_count += 1
460
+ if progress_bar:
461
+ progress_bar.set_progress(total_item_count)
462
+ elif debug:
463
+ print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})")
464
+ if progress_bar:
465
+ progress_bar.set_description("▶ Load Complete")
466
+ print()
467
+
468
+ if not ini_file:
469
+ ini_file = _DEFAULT_INI_FILE_FOR_LOAD
470
+ if not os.path.isabs(ini_file := os.path.expanduser(ini_file)):
471
+ ini_file = os.path.join(os.getcwd(), ini_file)
472
+ if not os.path.exists(ini_file):
473
+ _print(f"The INI file required for --load is not found: {ini_file}")
474
+ exit(1)
475
+ if not os.path.isabs(inserts_directory := os.path.expanduser(inserts_directory)):
476
+ inserts_directory = os.path.join(os.getcwd(), inserts_directory)
477
+ if not os.path.isdir(inserts_directory := os.path.expanduser(inserts_directory)):
478
+ _print(f"Load directory does not exist: {inserts_directory}")
479
+ exit(1)
480
+ portal = None
481
+ with captured_output(not debug):
482
+ portal = Portal(ini_file)
483
+ if verbose:
484
+ _print(f"Loading data files into Portal (via snovault.loadxl) from: {inserts_directory}")
485
+ _print(f"Portal INI file for load is: {ini_file}")
486
+
487
+ schema_names = list(_get_schemas(portal).keys())
488
+ schema_snake_case_names = [to_snake_case(item) for item in schema_names]
489
+ schema_names_to_load = {}
490
+
491
+ copy_to_temporary_directory = False
492
+ for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")):
493
+ json_file_name = os.path.basename(json_file_path)
494
+ schema_name = os.path.basename(json_file_name)[:-len(".json")]
495
+ if (schema_name not in schema_snake_case_names) and (schema_name not in schema_names):
496
+ _print(f"File is not named for a known schema: {json_file_name} ▶ ignoring")
497
+ copy_to_temporary_directory = True
498
+ else:
499
+ try:
500
+ with io.open(json_file_path, "r") as f:
501
+ if not isinstance(data := json.load(f), list):
502
+ _print("Data JSON file does not contain an array: {json_file_path} ▶ ignoring")
503
+ copy_to_temporary_directory = True
504
+ elif (nobjects := len(data)) < 1:
505
+ _print("Data JSON file contains no items: {json_file_path} ▶ ignoring")
506
+ copy_to_temporary_directory = True
507
+ else:
508
+ schema_names_to_load[schema_name] = nobjects
509
+ except Exception:
510
+ _print("Cannot load JSON data from file: {json_file_path} ▶ ignoring")
511
+ copy_to_temporary_directory = True
512
+ if not schema_names_to_load:
513
+ _print("Directory contains no valid data: {inserts_directory}")
514
+ return
515
+ if copy_to_temporary_directory:
516
+ with temporary_directory() as tmpdir:
517
+ if debug:
518
+ _print(f"Using temporary directory: {tmpdir}")
519
+ for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")):
520
+ json_file_name = os.path.basename(json_file_path)
521
+ schema_name = os.path.basename(json_file_name)[:-len(".json")]
522
+ if (schema_name in schema_snake_case_names) or (schema_name in schema_names):
523
+ shutil.copy(json_file_path, tmpdir)
524
+ loadxl(portal=portal, inserts_directory=tmpdir, schema_names_to_load=schema_names_to_load)
525
+ else:
526
+ loadxl(portal=portal, inserts_directory=inserts_directory, schema_names_to_load=schema_names_to_load)
527
+ if verbose:
528
+ _print(f"Done loading data into Portal (via snovault.loadxl) files from: {inserts_directory}")
529
+
530
+
426
531
  def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[List[str]] = None) -> dict:
427
532
  ignore_these_properties = [] if noignore is True else _IGNORE_PROPERTIES_ON_UPDATE
428
533
  if isinstance(ignore, list):
@@ -509,6 +614,8 @@ def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str
509
614
 
510
615
  @lru_cache(maxsize=1)
511
616
  def _get_schemas(portal: Portal) -> Optional[dict]:
617
+ if portal.vapp:
618
+ return portal.vapp.get("/profiles/?frame=raw").json
512
619
  return portal.get_schemas()
513
620
 
514
621
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.14.0.1b9
3
+ Version: 8.14.0.1b10
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -60,7 +60,7 @@ dcicutils/s3_utils.py,sha256=h2B9ftOo-kxqfiKth5ZDC_cAUFy1Pbu7BrVanFnE5Iw,28839
60
60
  dcicutils/schema_utils.py,sha256=GmRm-XqZKJ6qine16SQF1txcby9WougDav_sYmKNs9E,12400
61
61
  dcicutils/scripts/publish_to_pypi.py,sha256=sMd4WASQGlxlh7uLrt2eGkFRXYgONVmvIg8mClMS5RQ,13903
62
62
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
63
- dcicutils/scripts/update_portal_object.py,sha256=9_ZpfKwIJUDbyEI0Xqu_9keMxTIVZ_CyxX8WeGrFI14,24376
63
+ dcicutils/scripts/update_portal_object.py,sha256=OQ1v6QRJdVVUik_4RtNbMK2w7l4t-Htrm46pgj4kTNo,30063
64
64
  dcicutils/scripts/view_portal_object.py,sha256=lcgXWH9ooVf7tJDIRnoFGOgT0wYLGhiJlJW3a9w6A_c,36983
65
65
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
66
66
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
@@ -75,8 +75,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
75
75
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
76
76
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
77
77
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
78
- dcicutils-8.14.0.1b9.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
- dcicutils-8.14.0.1b9.dist-info/METADATA,sha256=6dzBhVwY1DNx4-lCS_3FSaztYt-JC8bNFU9o8VoyycA,3439
80
- dcicutils-8.14.0.1b9.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
- dcicutils-8.14.0.1b9.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
- dcicutils-8.14.0.1b9.dist-info/RECORD,,
78
+ dcicutils-8.14.0.1b10.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
+ dcicutils-8.14.0.1b10.dist-info/METADATA,sha256=A0R06QINC7rpiYDmPXmX_lGCeK6VN7KmdgP8do7ss54,3440
80
+ dcicutils-8.14.0.1b10.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
+ dcicutils-8.14.0.1b10.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
+ dcicutils-8.14.0.1b10.dist-info/RECORD,,