dcicutils 8.14.0.1b9__py3-none-any.whl → 8.14.0.1b11__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,13 +14,15 @@ import io
14
14
  import json
15
15
  import os
16
16
  import re
17
+ import shutil
17
18
  import sys
18
19
  from typing import Callable, List, Optional, Tuple, Union
19
20
  from dcicutils.command_utils import yes_or_no
20
21
  from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT
21
22
  from dcicutils.ff_utils import delete_metadata, purge_metadata
22
- from dcicutils.misc_utils import get_error_message, ignored, PRINT
23
+ from dcicutils.misc_utils import get_error_message, ignored, PRINT, to_camel_case, to_snake_case
23
24
  from dcicutils.portal_utils import Portal as PortalFromUtils
25
+ from dcicutils.tmpfile_utils import temporary_directory
24
26
 
25
27
 
26
28
  class Portal(PortalFromUtils):
@@ -131,6 +133,8 @@ def main():
131
133
  parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.")
132
134
  parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.")
133
135
  parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.")
136
+ parser.add_argument("--noprogress", action="store_true", required=False, default=False,
137
+ help="No progress bar output for --load.")
134
138
  parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.")
135
139
  args = parser.parse_args()
136
140
 
@@ -158,27 +162,8 @@ def main():
158
162
  _print("The --env is not used for the --load option (to load data via snovault.loadxl).")
159
163
  if args.schema:
160
164
  _print("The --schema is not used for the --load option (to load data via snovault.loadxl).")
161
- from snovault.loadxl import load_data
162
- from dcicutils.captured_output import captured_output
163
- if args.ini:
164
- ini_file = args.ini
165
- else:
166
- ini_file = _DEFAULT_INI_FILE_FOR_LOAD
167
- if not os.path.exists(ini_file):
168
- _print(f"The INI file required for --load is not found: {ini_file}")
169
- exit(1)
170
- if not os.path.isdir(args.load):
171
- _print(f"Load directory does not exist: {args.load}")
172
- exit(1)
173
- portal = None
174
- with captured_output(not args.debug):
175
- portal = Portal(ini_file)
176
- if args.verbose:
177
- _print(f"Loading data files into Portal (via snovault.loadxl) from: {args.load}")
178
- _print(f"Portal INI file for load is: {ini_file}")
179
- load_data(portal.vapp, indir=args.load, overwrite=True, use_master_inserts=False)
180
- if args.verbose:
181
- _print(f"Done loading data into Portal (via snovault.loadxl) files from: {args.load}")
165
+ _load_data(load=args.load, ini_file=args.ini,
166
+ verbose=args.verbose, debug=args.debug, noprogress=args.noprogress)
182
167
  exit(0)
183
168
 
184
169
  portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug)
@@ -192,7 +177,7 @@ def main():
192
177
  _post_or_patch_or_upsert(portal=portal,
193
178
  file_or_directory=args.post,
194
179
  explicit_schema_name=explicit_schema_name,
195
- update_function=post_data,
180
+ update_function=_post_data,
196
181
  update_action_name="POST",
197
182
  noignore=args.noignore, ignore=args.ignore,
198
183
  confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug)
@@ -200,7 +185,7 @@ def main():
200
185
  _post_or_patch_or_upsert(portal=portal,
201
186
  file_or_directory=args.patch,
202
187
  explicit_schema_name=explicit_schema_name,
203
- update_function=patch_data,
188
+ update_function=_patch_data,
204
189
  update_action_name="PATCH",
205
190
  patch_delete_fields=args.delete,
206
191
  noignore=args.noignore, ignore=args.ignore,
@@ -210,7 +195,7 @@ def main():
210
195
  _post_or_patch_or_upsert(portal=portal,
211
196
  file_or_directory=args.upsert,
212
197
  explicit_schema_name=explicit_schema_name,
213
- update_function=upsert_data,
198
+ update_function=_upsert_data,
214
199
  update_action_name="UPSERT",
215
200
  patch_delete_fields=args.delete,
216
201
  noignore=args.noignore, ignore=args.ignore,
@@ -241,14 +226,6 @@ def _post_or_patch_or_upsert(portal: Portal, file_or_directory: str,
241
226
  confirm: bool = False, verbose: bool = False,
242
227
  quiet: bool = False, debug: bool = False) -> None:
243
228
 
244
- def is_schema_name_list(portal: Portal, keys: list) -> bool:
245
- if isinstance(keys, list):
246
- for key in keys:
247
- if portal.get_schema(key) is None:
248
- return False
249
- return True
250
- return False
251
-
252
229
  def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str],
253
230
  patch_delete_fields: Optional[str] = None,
254
231
  confirm: bool = False, verbose: bool = False,
@@ -266,7 +243,7 @@ def _post_or_patch_or_upsert(portal: Portal, file_or_directory: str,
266
243
  patch_delete_fields=patch_delete_fields,
267
244
  noignore=noignore, ignore=ignore,
268
245
  confirm=confirm, verbose=verbose, debug=debug)
269
- elif is_schema_name_list(portal, list(data.keys())):
246
+ elif _is_schema_name_list(portal, list(data.keys())):
270
247
  if debug:
271
248
  _print(f"DEBUG: File ({file}) contains a dictionary of schema names.")
272
249
  for schema_name in data:
@@ -329,11 +306,11 @@ def _impose_special_ordering(data: List[dict], schema_name: str) -> List[dict]:
329
306
  return data
330
307
 
331
308
 
332
- def post_data(portal: Portal, data: dict, schema_name: str,
333
- file: Optional[str] = None, index: int = 0,
334
- patch_delete_fields: Optional[str] = None,
335
- noignore: bool = False, ignore: Optional[List[str]] = None,
336
- confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
309
+ def _post_data(portal: Portal, data: dict, schema_name: str,
310
+ file: Optional[str] = None, index: int = 0,
311
+ patch_delete_fields: Optional[str] = None,
312
+ noignore: bool = False, ignore: Optional[List[str]] = None,
313
+ confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
337
314
  ignored(patch_delete_fields)
338
315
  if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
339
316
  if isinstance(file, str) and isinstance(index, int):
@@ -359,11 +336,11 @@ def post_data(portal: Portal, data: dict, schema_name: str,
359
336
  return
360
337
 
361
338
 
362
- def patch_data(portal: Portal, data: dict, schema_name: str,
363
- file: Optional[str] = None, index: int = 0,
364
- patch_delete_fields: Optional[str] = None,
365
- noignore: bool = False, ignore: Optional[List[str]] = None,
366
- confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
339
+ def _patch_data(portal: Portal, data: dict, schema_name: str,
340
+ file: Optional[str] = None, index: int = 0,
341
+ patch_delete_fields: Optional[str] = None,
342
+ noignore: bool = False, ignore: Optional[List[str]] = None,
343
+ confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
367
344
  if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
368
345
  if isinstance(file, str) and isinstance(index, int):
369
346
  _print(f"ERROR: Item for PATCH has no identifying property: {file} (#{index + 1})")
@@ -390,11 +367,11 @@ def patch_data(portal: Portal, data: dict, schema_name: str,
390
367
  return
391
368
 
392
369
 
393
- def upsert_data(portal: Portal, data: dict, schema_name: str,
394
- file: Optional[str] = None, index: int = 0,
395
- patch_delete_fields: Optional[str] = None,
396
- noignore: bool = False, ignore: Optional[List[str]] = None,
397
- confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
370
+ def _upsert_data(portal: Portal, data: dict, schema_name: str,
371
+ file: Optional[str] = None, index: int = 0,
372
+ patch_delete_fields: Optional[str] = None,
373
+ noignore: bool = False, ignore: Optional[List[str]] = None,
374
+ confirm: bool = False, verbose: bool = False, debug: bool = False) -> None:
398
375
  if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)):
399
376
  if isinstance(file, str) and isinstance(index, int):
400
377
  _print(f"ERROR: Item for UPSERT has no identifying property: {file} (#{index + 1})")
@@ -423,6 +400,188 @@ def upsert_data(portal: Portal, data: dict, schema_name: str,
423
400
  return
424
401
 
425
402
 
403
+ def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = None,
404
+ verbose: bool = False, debug: bool = False, noprogress: bool = False) -> bool:
405
+
406
+ from snovault.loadxl import load_all_gen, LoadGenWrapper
407
+ from dcicutils.captured_output import captured_output
408
+ from dcicutils.progress_bar import ProgressBar
409
+
410
+ def loadxl(portal: Portal, inserts_directory: str, schema_names_to_load: dict):
411
+
412
+ nonlocal LoadGenWrapper, load_all_gen, verbose, debug
413
+ progress_total = sum(schema_names_to_load.values()) * 2 # loadxl does two passes
414
+ progress_bar = ProgressBar(progress_total) if not noprogress else None
415
+
416
+ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> str:
417
+ if not isinstance(encoding, str):
418
+ encoding = "utf-8"
419
+ if isinstance(str_or_bytes, bytes):
420
+ return str_or_bytes.decode(encoding).strip()
421
+ elif isinstance(str_or_bytes, str):
422
+ return str_or_bytes.strip()
423
+ return ""
424
+
425
+ LOADXL_RESPONSE_PATTERN = re.compile(r"^([A-Z]+):\s*([a-zA-Z\/\d_-]+)\s*(\S+)\s*(\S+)?\s*(.*)$")
426
+ LOADXL_ACTION_NAME = {"POST": "Create", "PATCH": "Update", "SKIP": "Check",
427
+ "CHECK": "Validate", "ERROR": "Error"}
428
+ current_item_type = None
429
+ current_item_count = 0
430
+ current_item_total = 0
431
+ total_item_count = 0
432
+ for item in LoadGenWrapper(load_all_gen(testapp=portal.vapp, inserts=inserts_directory,
433
+ docsdir=None, overwrite=True, verbose=True)):
434
+ total_item_count += 1
435
+ item = decode_bytes(item)
436
+ match = LOADXL_RESPONSE_PATTERN.match(item)
437
+ if not match or match.re.groups < 3:
438
+ continue
439
+ action = LOADXL_ACTION_NAME[match.group(1).upper()]
440
+ # response_value = match.group(0)
441
+ # identifying_value = match.group(2)
442
+ item_type = match.group(3)
443
+ if current_item_type != item_type:
444
+ if noprogress and debug and current_item_type is not None:
445
+ print()
446
+ current_item_type = item_type
447
+ current_item_count = 0
448
+ current_item_total = schema_names_to_load[item_type]
449
+ if progress_bar:
450
+ progress_bar.set_description(f"▶ {to_camel_case(current_item_type)}: {action}")
451
+ current_item_count += 1
452
+ if progress_bar:
453
+ progress_bar.set_progress(total_item_count)
454
+ elif debug:
455
+ print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})")
456
+ if progress_bar:
457
+ progress_bar.set_description("▶ Load Complete")
458
+ print()
459
+
460
+ if not ini_file:
461
+ ini_file = _DEFAULT_INI_FILE_FOR_LOAD
462
+ if not os.path.isabs(ini_file := os.path.expanduser(ini_file)):
463
+ ini_file = os.path.join(os.getcwd(), ini_file)
464
+ if not os.path.exists(ini_file):
465
+ _print(f"The INI file required for --load is not found: {ini_file}")
466
+ exit(1)
467
+
468
+ if not os.path.isabs(load := os.path.expanduser(load)):
469
+ load = os.path.join(os.getcwd(), load)
470
+ if not os.path.exists(load):
471
+ return False
472
+
473
+ if os.path.isdir(load):
474
+ inserts_directory = load
475
+ inserts_file = None
476
+ else:
477
+ inserts_directory = None
478
+ inserts_file = load
479
+
480
+ portal = None
481
+ with captured_output(not debug):
482
+ portal = Portal(ini_file)
483
+
484
+ if inserts_file:
485
+ with io.open(inserts_file, "r") as f:
486
+ try:
487
+ data = json.load(f)
488
+ except Exception:
489
+ _print(f"Cannot load JSON data from file: {inserts_file}")
490
+ return False
491
+ if isinstance(data, list):
492
+ if not (schema_name := explicit_schema_name):
493
+ if not (schema_name := _get_schema_name_from_schema_named_json_file_name(portal, inserts_file)):
494
+ _print("Unable to determine schema name for JSON data file: {inserts_file}")
495
+ return False
496
+ with temporary_directory() as tmpdir:
497
+ file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json")
498
+ with io.open(file_name, "w") as f:
499
+ json.dump(data, f)
500
+ return _load_data(load=tmpdir, ini_file=ini_file, explicit_schema_name=explicit_schema_name,
501
+ verbose=verbose, debug=debug, noprogress=noprogress)
502
+ elif isinstance(data, dict):
503
+ _print("DICT IN FILE FOR LOAD NOT YET SUPPPORTED")
504
+ if not _is_schema_name_list(portal, schema_names := list(data.keys())):
505
+ _print(f"Unrecognized types in JSON data file: {inserts_file}")
506
+ return False
507
+ with temporary_directory() as tmpdir:
508
+ nfiles = 0
509
+ for schema_name in schema_names:
510
+ if not isinstance(schema_data := data[schema_name], list):
511
+ _print(f"Unexpected value for data type ({schema_name})"
512
+ f" in JSON data file: {inserts_file} ▶ ignoring")
513
+ continue
514
+ file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json")
515
+ with io.open(file_name, "w") as f:
516
+ json.dump(schema_data, f)
517
+ nfiles += 1
518
+ if nfiles > 0:
519
+ return _load_data(load=tmpdir, ini_file=ini_file,
520
+ verbose=verbose, debug=debug, noprogress=noprogress)
521
+ # TODO
522
+ return True
523
+ else:
524
+ _print(f"Unrecognized JSON data in file: {inserts_file}")
525
+ return False
526
+ return True
527
+ if verbose:
528
+ _print(f"Loading data files into Portal (via snovault.loadxl) from: {inserts_directory}")
529
+ _print(f"Portal INI file for load is: {ini_file}")
530
+
531
+ schema_names = list(_get_schemas(portal).keys())
532
+ schema_snake_case_names = [to_snake_case(item) for item in schema_names]
533
+ schema_names_to_load = {}
534
+
535
+ copy_to_temporary_directory = False
536
+ for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")):
537
+ json_file_name = os.path.basename(json_file_path)
538
+ schema_name = os.path.basename(json_file_name)[:-len(".json")]
539
+ if (schema_name not in schema_snake_case_names) and (schema_name not in schema_names):
540
+ _print(f"File is not named for a known schema: {json_file_name} ▶ ignoring")
541
+ copy_to_temporary_directory = True
542
+ else:
543
+ try:
544
+ with io.open(json_file_path, "r") as f:
545
+ if not isinstance(data := json.load(f), list):
546
+ _print("Data JSON file does not contain an array: {json_file_path} ▶ ignoring")
547
+ copy_to_temporary_directory = True
548
+ elif (nobjects := len(data)) < 1:
549
+ _print("Data JSON file contains no items: {json_file_path} ▶ ignoring")
550
+ copy_to_temporary_directory = True
551
+ else:
552
+ schema_names_to_load[schema_name] = nobjects
553
+ except Exception:
554
+ _print("Cannot load JSON data from file: {json_file_path} ▶ ignoring")
555
+ copy_to_temporary_directory = True
556
+ if not schema_names_to_load:
557
+ _print("Directory contains no valid data: {inserts_directory}")
558
+ return False
559
+ if copy_to_temporary_directory:
560
+ with temporary_directory() as tmpdir:
561
+ if debug:
562
+ _print(f"Using temporary directory: {tmpdir}")
563
+ for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")):
564
+ json_file_name = os.path.basename(json_file_path)
565
+ schema_name = os.path.basename(json_file_name)[:-len(".json")]
566
+ if (schema_name in schema_snake_case_names) or (schema_name in schema_names):
567
+ shutil.copy(json_file_path, tmpdir)
568
+ loadxl(portal=portal, inserts_directory=tmpdir, schema_names_to_load=schema_names_to_load)
569
+ else:
570
+ loadxl(portal=portal, inserts_directory=inserts_directory, schema_names_to_load=schema_names_to_load)
571
+ if verbose:
572
+ _print(f"Done loading data into Portal (via snovault.loadxl) files from: {inserts_directory}")
573
+ return True
574
+
575
+
576
+ def _is_schema_name_list(portal: Portal, keys: list) -> bool:
577
+ if isinstance(keys, list):
578
+ for key in keys:
579
+ if portal.get_schema(key) is None:
580
+ return False
581
+ return True
582
+ return False
583
+
584
+
426
585
  def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[List[str]] = None) -> dict:
427
586
  ignore_these_properties = [] if noignore is True else _IGNORE_PROPERTIES_ON_UPDATE
428
587
  if isinstance(ignore, list):
@@ -498,17 +657,21 @@ def _parse_delete_fields(value: str) -> str:
498
657
 
499
658
 
500
659
  def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str) -> Optional[str]:
501
- try:
502
- if not value.endswith(".json"):
503
- return None
504
- _, schema_name = _get_schema(portal, os.path.basename(value[:-5]))
505
- return schema_name
506
- except Exception:
507
- return False
660
+ if isinstance(value, str) and value:
661
+ try:
662
+ if value.endswith(".json"):
663
+ value = value[:-5]
664
+ _, schema_name = _get_schema(portal, os.path.basename(value))
665
+ return schema_name
666
+ except Exception:
667
+ pass
668
+ return False
508
669
 
509
670
 
510
671
  @lru_cache(maxsize=1)
511
672
  def _get_schemas(portal: Portal) -> Optional[dict]:
673
+ if portal.vapp:
674
+ return portal.vapp.get("/profiles/?frame=raw").json
512
675
  return portal.get_schemas()
513
676
 
514
677
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.14.0.1b9
3
+ Version: 8.14.0.1b11
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -60,7 +60,7 @@ dcicutils/s3_utils.py,sha256=h2B9ftOo-kxqfiKth5ZDC_cAUFy1Pbu7BrVanFnE5Iw,28839
60
60
  dcicutils/schema_utils.py,sha256=GmRm-XqZKJ6qine16SQF1txcby9WougDav_sYmKNs9E,12400
61
61
  dcicutils/scripts/publish_to_pypi.py,sha256=sMd4WASQGlxlh7uLrt2eGkFRXYgONVmvIg8mClMS5RQ,13903
62
62
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
63
- dcicutils/scripts/update_portal_object.py,sha256=9_ZpfKwIJUDbyEI0Xqu_9keMxTIVZ_CyxX8WeGrFI14,24376
63
+ dcicutils/scripts/update_portal_object.py,sha256=fLgsPgnugKLS18A8JwP8O_UdmeFqNIjUIhrEFlIhfgE,32589
64
64
  dcicutils/scripts/view_portal_object.py,sha256=lcgXWH9ooVf7tJDIRnoFGOgT0wYLGhiJlJW3a9w6A_c,36983
65
65
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
66
66
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
@@ -75,8 +75,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
75
75
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
76
76
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
77
77
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
78
- dcicutils-8.14.0.1b9.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
- dcicutils-8.14.0.1b9.dist-info/METADATA,sha256=6dzBhVwY1DNx4-lCS_3FSaztYt-JC8bNFU9o8VoyycA,3439
80
- dcicutils-8.14.0.1b9.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
- dcicutils-8.14.0.1b9.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
- dcicutils-8.14.0.1b9.dist-info/RECORD,,
78
+ dcicutils-8.14.0.1b11.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
+ dcicutils-8.14.0.1b11.dist-info/METADATA,sha256=HFNj87yKbAwTA98cVDJm_0r9aVJssg7zo5dIsqngVrg,3440
80
+ dcicutils-8.14.0.1b11.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
+ dcicutils-8.14.0.1b11.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
+ dcicutils-8.14.0.1b11.dist-info/RECORD,,