dcicutils 8.14.0.1b9__py3-none-any.whl → 8.14.0.1b11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,13 +14,15 @@ import io
14
14
  import json
15
15
  import os
16
16
  import re
17
+ import shutil
17
18
  import sys
18
19
  from typing import Callable, List, Optional, Tuple, Union
19
20
  from dcicutils.command_utils import yes_or_no
20
21
  from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT
21
22
  from dcicutils.ff_utils import delete_metadata, purge_metadata
22
- from dcicutils.misc_utils import get_error_message, ignored, PRINT
23
+ from dcicutils.misc_utils import get_error_message, ignored, PRINT, to_camel_case, to_snake_case
23
24
  from dcicutils.portal_utils import Portal as PortalFromUtils
25
+ from dcicutils.tmpfile_utils import temporary_directory
24
26
 
25
27
 
26
28
  class Portal(PortalFromUtils):
@@ -131,6 +133,8 @@ def main():
131
133
  parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.")
132
134
  parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.")
133
135
  parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.")
136
+ parser.add_argument("--noprogress", action="store_true", required=False, default=False,
137
+ help="No progress bar output for --load.")
134
138
  parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.")
135
139
  args = parser.parse_args()
136
140
 
@@ -158,27 +162,8 @@ def main():
158
162
  _print("The --env is not used for the --load option (to load data via snovault.loadxl).")
159
163
  if args.schema:
160
164
  _print("The --schema is not used for the --load option (to load data via snovault.loadxl).")
161
- from snovault.loadxl import load_data
162
- from dcicutils.captured_output import captured_output
163
- if args.ini:
164
- ini_file = args.ini
165
- else:
166
- ini_file = _DEFAULT_INI_FILE_FOR_LOAD
167
- if not os.path.exists(ini_file):
168
- _print(f"The INI file required for --load is not found: {ini_file}")
169
- exit(1)
170
- if not os.path.isdir(args.load):
171
- _print(f"Load directory does not exist: {args.load}")
172
- exit(1)
173
- portal = None
174
- with captured_output(not args.debug):
175
- portal = Portal(ini_file)
176
- if args.verbose:
177
- _print(f"Loading data files into Portal (via snovault.loadxl) from: {args.load}")
178
- _print(f"Portal INI file for load is: {ini_file}")
179
- load_data(portal.vapp, indir=args.load, overwrite=True, use_master_inserts=False)
180
- if args.verbose:
181
- _print(f"Done loading data into Portal (via snovault.loadxl) files from: {args.load}")
165
+ _load_data(load=args.load, ini_file=args.ini,
166
+ verbose=args.verbose, debug=args.debug, noprogress=args.noprogress)
182
167
  exit(0)
183
168
 
184
169
  portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug)
@@ -192,7 +177,7 @@ def main():
192
177
  _post_or_patch_or_upsert(portal=portal,
193
178
  file_or_directory=args.post,
194
179
  explicit_schema_name=explicit_schema_name,
195
- update_function=post_data,
180
+ update_function=_post_data,
196
181
  update_action_name="POST",
197
182
  noignore=args.noignore, ignore=args.ignore,
198
183
  confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug)
@@ -200,7 +185,7 @@ def main():
200
185
  _post_or_patch_or_upsert(portal=portal,
201
186
  file_or_directory=args.patch,
202
187
  explicit_schema_name=explicit_schema_name,
203
- update_function=patch_data,
188
+ update_function=_patch_data,
204
189
  update_action_name="PATCH",
205
190
  patch_delete_fields=args.delete,
206
191
  noignore=args.noignore, ignore=args.ignore,
@@ -210,7 +195,7 @@ def main():
210
195
  _post_or_patch_or_upsert(portal=portal,
211
196
  file_or_directory=args.upsert,
212
197
  explicit_schema_name=explicit_schema_name,
213
- update_function=upsert_data,
198
+ update_function=_upsert_data,
214
199
  update_action_name="UPSERT",
215
200
  patch_delete_fields=args.delete,
216
201
  noignore=args.noignore, ignore=args.ignore,
@@ -241,14 +226,6 @@ def _post_or_patch_or_upsert(portal: Portal, file_or_directory: str,
241
226
  confirm: bool = False, verbose: bool = False,
242
227
  quiet: bool = False, debug: bool = False) -> None:
243
228
 
244
- def is_schema_name_list(portal: Portal, keys: list) -> bool:
245
- if isinstance(keys, list):
246
- for key in keys:
247
- if portal.get_schema(key) is None:
248
- return False
249
- return True
250
- return False
251
-
252
229
  def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str],
253
230
  patch_delete_fields: Optional[str] = None,
254
231
  confirm: bool = False, verbose: bool = False,
@@ -266,7 +243,7 @@ def _post_or_patch_or_upsert(portal: Portal, file_or_directory: str,
266
243
  patch_delete_fields=patch_delete_fields,
267
244
  noignore=noignore, ignore=ignore,
268
245
  confirm=confirm, verbose=verbose, debug=debug)
269
- elif is_schema_name_list(portal, list(data.keys())):
246
+ elif _is_schema_name_list(portal, list(data.keys())):
270
247
  if debug:
271
248
  _print(f"DEBUG: File ({file}) contains a dictionary of schema names.")
272
249
  for schema_name in data:
@@ -329,11 +306,11 @@ def _impose_special_ordering(data: List[dict], schema_name: str) -> List[dict]:
329
306
  return data
330
307
 
331
308
 
332
- def post_data(portal: Portal, data: dict, schema_name: str,
333
- file: Optional[str] = None, index: int = 0,
334
- patch_delete_fields: Optional[str] = None,
335
- noignore: bool = False, ignore: Optional[List[str]] = None,
336
- confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
309
+ def _post_data(portal: Portal, data: dict, schema_name: str,
310
+ file: Optional[str] = None, index: int = 0,
311
+ patch_delete_fields: Optional[str] = None,
312
+ noignore: bool = False, ignore: Optional[List[str]] = None,
313
+ confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
337
314
  ignored(patch_delete_fields)
338
315
  if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
339
316
  if isinstance(file, str) and isinstance(index, int):
@@ -359,11 +336,11 @@ def post_data(portal: Portal, data: dict, schema_name: str,
359
336
  return
360
337
 
361
338
 
362
- def patch_data(portal: Portal, data: dict, schema_name: str,
363
- file: Optional[str] = None, index: int = 0,
364
- patch_delete_fields: Optional[str] = None,
365
- noignore: bool = False, ignore: Optional[List[str]] = None,
366
- confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
339
+ def _patch_data(portal: Portal, data: dict, schema_name: str,
340
+ file: Optional[str] = None, index: int = 0,
341
+ patch_delete_fields: Optional[str] = None,
342
+ noignore: bool = False, ignore: Optional[List[str]] = None,
343
+ confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
367
344
  if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
368
345
  if isinstance(file, str) and isinstance(index, int):
369
346
  _print(f"ERROR: Item for PATCH has no identifying property: {file} (#{index + 1})")
@@ -390,11 +367,11 @@ def patch_data(portal: Portal, data: dict, schema_name: str,
390
367
  return
391
368
 
392
369
 
393
- def upsert_data(portal: Portal, data: dict, schema_name: str,
394
- file: Optional[str] = None, index: int = 0,
395
- patch_delete_fields: Optional[str] = None,
396
- noignore: bool = False, ignore: Optional[List[str]] = None,
397
- confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
370
+ def _upsert_data(portal: Portal, data: dict, schema_name: str,
371
+ file: Optional[str] = None, index: int = 0,
372
+ patch_delete_fields: Optional[str] = None,
373
+ noignore: bool = False, ignore: Optional[List[str]] = None,
374
+ confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
398
375
  if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
399
376
  if isinstance(file, str) and isinstance(index, int):
400
377
  _print(f"ERROR: Item for UPSERT has no identifying property: {file} (#{index + 1})")
@@ -423,6 +400,188 @@ def upsert_data(portal: Portal, data: dict, schema_name: str,
423
400
  return
424
401
 
425
402
 
403
+ def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = None,
404
+ verbose: bool = False, debug: bool = False, noprogress: bool = False) -> bool:
405
+
406
+ from snovault.loadxl import load_all_gen, LoadGenWrapper
407
+ from dcicutils.captured_output import captured_output
408
+ from dcicutils.progress_bar import ProgressBar
409
+
410
+ def loadxl(portal: Portal, inserts_directory: str, schema_names_to_load: dict):
411
+
412
+ nonlocal LoadGenWrapper, load_all_gen, verbose, debug
413
+ progress_total = sum(schema_names_to_load.values()) * 2 # loadxl does two passes
414
+ progress_bar = ProgressBar(progress_total) if not noprogress else None
415
+
416
+ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> str:
417
+ if not isinstance(encoding, str):
418
+ encoding = "utf-8"
419
+ if isinstance(str_or_bytes, bytes):
420
+ return str_or_bytes.decode(encoding).strip()
421
+ elif isinstance(str_or_bytes, str):
422
+ return str_or_bytes.strip()
423
+ return ""
424
+
425
+ LOADXL_RESPONSE_PATTERN = re.compile(r"^([A-Z]+):\s*([a-zA-Z\/\d_-]+)\s*(\S+)\s*(\S+)?\s*(.*)$")
426
+ LOADXL_ACTION_NAME = {"POST": "Create", "PATCH": "Update", "SKIP": "Check",
427
+ "CHECK": "Validate", "ERROR": "Error"}
428
+ current_item_type = None
429
+ current_item_count = 0
430
+ current_item_total = 0
431
+ total_item_count = 0
432
+ for item in LoadGenWrapper(load_all_gen(testapp=portal.vapp, inserts=inserts_directory,
433
+ docsdir=None, overwrite=True, verbose=True)):
434
+ total_item_count += 1
435
+ item = decode_bytes(item)
436
+ match = LOADXL_RESPONSE_PATTERN.match(item)
437
+ if not match or match.re.groups < 3:
438
+ continue
439
+ action = LOADXL_ACTION_NAME[match.group(1).upper()]
440
+ # response_value = match.group(0)
441
+ # identifying_value = match.group(2)
442
+ item_type = match.group(3)
443
+ if current_item_type != item_type:
444
+ if noprogress and debug and current_item_type is not None:
445
+ print()
446
+ current_item_type = item_type
447
+ current_item_count = 0
448
+ current_item_total = schema_names_to_load[item_type]
449
+ if progress_bar:
450
+ progress_bar.set_description(f"▶ {to_camel_case(current_item_type)}: {action}")
451
+ current_item_count += 1
452
+ if progress_bar:
453
+ progress_bar.set_progress(total_item_count)
454
+ elif debug:
455
+ print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})")
456
+ if progress_bar:
457
+ progress_bar.set_description("▶ Load Complete")
458
+ print()
459
+
460
+ if not ini_file:
461
+ ini_file = _DEFAULT_INI_FILE_FOR_LOAD
462
+ if not os.path.isabs(ini_file := os.path.expanduser(ini_file)):
463
+ ini_file = os.path.join(os.getcwd(), ini_file)
464
+ if not os.path.exists(ini_file):
465
+ _print(f"The INI file required for --load is not found: {ini_file}")
466
+ exit(1)
467
+
468
+ if not os.path.isabs(load := os.path.expanduser(load)):
469
+ load = os.path.join(os.getcwd(), load)
470
+ if not os.path.exists(load):
471
+ return False
472
+
473
+ if os.path.isdir(load):
474
+ inserts_directory = load
475
+ inserts_file = None
476
+ else:
477
+ inserts_directory = None
478
+ inserts_file = load
479
+
480
+ portal = None
481
+ with captured_output(not debug):
482
+ portal = Portal(ini_file)
483
+
484
+ if inserts_file:
485
+ with io.open(inserts_file, "r") as f:
486
+ try:
487
+ data = json.load(f)
488
+ except Exception:
489
+ _print(f"Cannot load JSON data from file: {inserts_file}")
490
+ return False
491
+ if isinstance(data, list):
492
+ if not (schema_name := explicit_schema_name):
493
+ if not (schema_name := _get_schema_name_from_schema_named_json_file_name(portal, inserts_file)):
494
+ _print("Unable to determine schema name for JSON data file: {inserts_file}")
495
+ return False
496
+ with temporary_directory() as tmpdir:
497
+ file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json")
498
+ with io.open(file_name, "w") as f:
499
+ json.dump(data, f)
500
+ return _load_data(load=tmpdir, ini_file=ini_file, explicit_schema_name=explicit_schema_name,
501
+ verbose=verbose, debug=debug, noprogress=noprogress)
502
+ elif isinstance(data, dict):
503
+ _print("DICT IN FILE FOR LOAD NOT YET SUPPPORTED")
504
+ if not _is_schema_name_list(portal, schema_names := list(data.keys())):
505
+ _print(f"Unrecognized types in JSON data file: {inserts_file}")
506
+ return False
507
+ with temporary_directory() as tmpdir:
508
+ nfiles = 0
509
+ for schema_name in schema_names:
510
+ if not isinstance(schema_data := data[schema_name], list):
511
+ _print(f"Unexpected value for data type ({schema_name})"
512
+ f" in JSON data file: {inserts_file} ▶ ignoring")
513
+ continue
514
+ file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json")
515
+ with io.open(file_name, "w") as f:
516
+ json.dump(schema_data, f)
517
+ nfiles += 1
518
+ if nfiles > 0:
519
+ return _load_data(load=tmpdir, ini_file=ini_file,
520
+ verbose=verbose, debug=debug, noprogress=noprogress)
521
+ # TODO
522
+ return True
523
+ else:
524
+ _print(f"Unrecognized JSON data in file: {inserts_file}")
525
+ return False
526
+ return True
527
+ if verbose:
528
+ _print(f"Loading data files into Portal (via snovault.loadxl) from: {inserts_directory}")
529
+ _print(f"Portal INI file for load is: {ini_file}")
530
+
531
+ schema_names = list(_get_schemas(portal).keys())
532
+ schema_snake_case_names = [to_snake_case(item) for item in schema_names]
533
+ schema_names_to_load = {}
534
+
535
+ copy_to_temporary_directory = False
536
+ for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")):
537
+ json_file_name = os.path.basename(json_file_path)
538
+ schema_name = os.path.basename(json_file_name)[:-len(".json")]
539
+ if (schema_name not in schema_snake_case_names) and (schema_name not in schema_names):
540
+ _print(f"File is not named for a known schema: {json_file_name} ▶ ignoring")
541
+ copy_to_temporary_directory = True
542
+ else:
543
+ try:
544
+ with io.open(json_file_path, "r") as f:
545
+ if not isinstance(data := json.load(f), list):
546
+ _print("Data JSON file does not contain an array: {json_file_path} ▶ ignoring")
547
+ copy_to_temporary_directory = True
548
+ elif (nobjects := len(data)) < 1:
549
+ _print("Data JSON file contains no items: {json_file_path} ▶ ignoring")
550
+ copy_to_temporary_directory = True
551
+ else:
552
+ schema_names_to_load[schema_name] = nobjects
553
+ except Exception:
554
+ _print("Cannot load JSON data from file: {json_file_path} ▶ ignoring")
555
+ copy_to_temporary_directory = True
556
+ if not schema_names_to_load:
557
+ _print("Directory contains no valid data: {inserts_directory}")
558
+ return False
559
+ if copy_to_temporary_directory:
560
+ with temporary_directory() as tmpdir:
561
+ if debug:
562
+ _print(f"Using temporary directory: {tmpdir}")
563
+ for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")):
564
+ json_file_name = os.path.basename(json_file_path)
565
+ schema_name = os.path.basename(json_file_name)[:-len(".json")]
566
+ if (schema_name in schema_snake_case_names) or (schema_name in schema_names):
567
+ shutil.copy(json_file_path, tmpdir)
568
+ loadxl(portal=portal, inserts_directory=tmpdir, schema_names_to_load=schema_names_to_load)
569
+ else:
570
+ loadxl(portal=portal, inserts_directory=inserts_directory, schema_names_to_load=schema_names_to_load)
571
+ if verbose:
572
+ _print(f"Done loading data into Portal (via snovault.loadxl) files from: {inserts_directory}")
573
+ return True
574
+
575
+
576
+ def _is_schema_name_list(portal: Portal, keys: list) -> bool:
577
+ if isinstance(keys, list):
578
+ for key in keys:
579
+ if portal.get_schema(key) is None:
580
+ return False
581
+ return True
582
+ return False
583
+
584
+
426
585
  def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[List[str]] = None) -> dict:
427
586
  ignore_these_properties = [] if noignore is True else _IGNORE_PROPERTIES_ON_UPDATE
428
587
  if isinstance(ignore, list):
@@ -498,17 +657,21 @@ def _parse_delete_fields(value: str) -> str:
498
657
 
499
658
 
500
659
  def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str) -> Optional[str]:
501
- try:
502
- if not value.endswith(".json"):
503
- return None
504
- _, schema_name = _get_schema(portal, os.path.basename(value[:-5]))
505
- return schema_name
506
- except Exception:
507
- return False
660
+ if isinstance(value, str) and value:
661
+ try:
662
+ if value.endswith(".json"):
663
+ value = value[:-5]
664
+ _, schema_name = _get_schema(portal, os.path.basename(value))
665
+ return schema_name
666
+ except Exception:
667
+ pass
668
+ return False
508
669
 
509
670
 
510
671
  @lru_cache(maxsize=1)
511
672
  def _get_schemas(portal: Portal) -> Optional[dict]:
673
+ if portal.vapp:
674
+ return portal.vapp.get("/profiles/?frame=raw").json
512
675
  return portal.get_schemas()
513
676
 
514
677
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.14.0.1b9
3
+ Version: 8.14.0.1b11
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -60,7 +60,7 @@ dcicutils/s3_utils.py,sha256=h2B9ftOo-kxqfiKth5ZDC_cAUFy1Pbu7BrVanFnE5Iw,28839
60
60
  dcicutils/schema_utils.py,sha256=GmRm-XqZKJ6qine16SQF1txcby9WougDav_sYmKNs9E,12400
61
61
  dcicutils/scripts/publish_to_pypi.py,sha256=sMd4WASQGlxlh7uLrt2eGkFRXYgONVmvIg8mClMS5RQ,13903
62
62
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
63
- dcicutils/scripts/update_portal_object.py,sha256=9_ZpfKwIJUDbyEI0Xqu_9keMxTIVZ_CyxX8WeGrFI14,24376
63
+ dcicutils/scripts/update_portal_object.py,sha256=fLgsPgnugKLS18A8JwP8O_UdmeFqNIjUIhrEFlIhfgE,32589
64
64
  dcicutils/scripts/view_portal_object.py,sha256=lcgXWH9ooVf7tJDIRnoFGOgT0wYLGhiJlJW3a9w6A_c,36983
65
65
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
66
66
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
@@ -75,8 +75,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
75
75
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
76
76
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
77
77
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
78
- dcicutils-8.14.0.1b9.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
- dcicutils-8.14.0.1b9.dist-info/METADATA,sha256=6dzBhVwY1DNx4-lCS_3FSaztYt-JC8bNFU9o8VoyycA,3439
80
- dcicutils-8.14.0.1b9.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
- dcicutils-8.14.0.1b9.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
- dcicutils-8.14.0.1b9.dist-info/RECORD,,
78
+ dcicutils-8.14.0.1b11.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
+ dcicutils-8.14.0.1b11.dist-info/METADATA,sha256=HFNj87yKbAwTA98cVDJm_0r9aVJssg7zo5dIsqngVrg,3440
80
+ dcicutils-8.14.0.1b11.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
+ dcicutils-8.14.0.1b11.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
+ dcicutils-8.14.0.1b11.dist-info/RECORD,,