dcicutils 8.8.1.1b4__py3-none-any.whl → 8.8.1.1b6__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/data_readers.py +5 -1
- dcicutils/structured_data.py +41 -17
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/METADATA +1 -1
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/RECORD +7 -7
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/entry_points.txt +0 -0
dcicutils/data_readers.py
CHANGED
@@ -77,7 +77,11 @@ class RowReader(abc.ABC):
|
|
77
77
|
def warnings(self) -> List[str]:
|
78
78
|
warnings = []
|
79
79
|
if self._warning_empty_headers:
|
80
|
-
|
80
|
+
if hasattr(self, "sheet_name") and self.sheet_name:
|
81
|
+
src = {"sheet": self.sheet_name}
|
82
|
+
else:
|
83
|
+
src = {"file": self.file}
|
84
|
+
warnings.append({"src": src,
|
81
85
|
"warning": "Empty header column encountered; ignoring it and all subsequent columns."})
|
82
86
|
if self._warning_extra_values:
|
83
87
|
for row_number in self._warning_extra_values:
|
dcicutils/structured_data.py
CHANGED
@@ -44,7 +44,7 @@ FILE_TYPE_PROPERTY_NAME = "filename"
|
|
44
44
|
EXTRA_FILE_TYPE_NAME = "ExtraFile"
|
45
45
|
EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
|
46
46
|
|
47
|
-
ENABLE_ARRAY_SHEET_REFS =
|
47
|
+
ENABLE_ARRAY_SHEET_REFS = False
|
48
48
|
|
49
49
|
# The ExtraFile pseudo-type schema.
|
50
50
|
EXTRA_FILE_SCHEMA = {
|
@@ -91,6 +91,7 @@ class StructuredDataSet:
|
|
91
91
|
self._errors = {}
|
92
92
|
self._resolved_refs = set()
|
93
93
|
self._validated = False
|
94
|
+
self._nrows = 0
|
94
95
|
self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
|
95
96
|
self._norefs = True if norefs is True else False
|
96
97
|
self._debug_sleep = None
|
@@ -194,6 +195,10 @@ class StructuredDataSet:
|
|
194
195
|
upload_file["path"] = file_path
|
195
196
|
return upload_files
|
196
197
|
|
198
|
+
@property
|
199
|
+
def nrows(self) -> int:
|
200
|
+
return self._nrows
|
201
|
+
|
197
202
|
def compare(self, progress: Optional[Callable] = None) -> dict:
|
198
203
|
def get_counts() -> int:
|
199
204
|
ntypes = 0
|
@@ -288,8 +293,6 @@ class StructuredDataSet:
|
|
288
293
|
order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
|
289
294
|
for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
|
290
295
|
self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
|
291
|
-
if self._progress:
|
292
|
-
self._progress({"finish": True})
|
293
296
|
# TODO: Do we really need progress reporting for the below?
|
294
297
|
# Check for unresolved reference errors which really are not because of ordering.
|
295
298
|
# Yes such internal references will be handled correctly on actual database update via snovault.loadxl.
|
@@ -301,11 +304,25 @@ class StructuredDataSet:
|
|
301
304
|
# if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])):
|
302
305
|
ref_errors_actual.append(ref_error)
|
303
306
|
else:
|
307
|
+
# Now found so subtract off from ref_total_notfound_count.
|
308
|
+
self.portal._ref_total_notfound_count -= 1
|
304
309
|
self._resolved_refs.add((ref, resolved.get("uuid")))
|
305
310
|
if ref_errors_actual:
|
306
311
|
self._errors["ref"] = ref_errors_actual
|
307
312
|
else:
|
308
313
|
del self._errors["ref"]
|
314
|
+
if self._progress:
|
315
|
+
# TODO: Refactor with same thing below in _load_reader.
|
316
|
+
self._progress({
|
317
|
+
"finish": True,
|
318
|
+
"refs": self.ref_total_count,
|
319
|
+
"refs_found": self.ref_total_found_count,
|
320
|
+
"refs_not_found": self.ref_total_notfound_count,
|
321
|
+
"refs_lookup": self.ref_lookup_count,
|
322
|
+
"refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
|
323
|
+
"refs_exists_cache_hit": self.ref_exists_cache_hit_count,
|
324
|
+
"refs_invalid": self.ref_invalid_identifying_property_count
|
325
|
+
})
|
309
326
|
|
310
327
|
def _load_json_file(self, file: str) -> None:
|
311
328
|
with open(file) as f:
|
@@ -316,6 +333,7 @@ class StructuredDataSet:
|
|
316
333
|
noschema = False
|
317
334
|
structured_row_template = None
|
318
335
|
for row in reader:
|
336
|
+
self._nrows += 1
|
319
337
|
if self._debug_sleep:
|
320
338
|
time.sleep(float(self._debug_sleep))
|
321
339
|
if not structured_row_template: # Delay creation just so we don't reference schema if there are no rows.
|
@@ -338,7 +356,8 @@ class StructuredDataSet:
|
|
338
356
|
"refs_found": self.ref_total_found_count,
|
339
357
|
"refs_not_found": self.ref_total_notfound_count,
|
340
358
|
"refs_lookup": self.ref_lookup_count,
|
341
|
-
"
|
359
|
+
"refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
|
360
|
+
"refs_exists_cache_hit": self.ref_exists_cache_hit_count,
|
342
361
|
"refs_invalid": self.ref_invalid_identifying_property_count
|
343
362
|
})
|
344
363
|
self._note_warning(reader.warnings, "reader")
|
@@ -510,15 +529,16 @@ class _StructuredRowTemplate:
|
|
510
529
|
set_value_backtrack_object(i, p)
|
511
530
|
data = data[p]
|
512
531
|
if (p := path[-1]) == -1 and isinstance(value, str):
|
513
|
-
if ENABLE_ARRAY_SHEET_REFS and
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
532
|
+
if ENABLE_ARRAY_SHEET_REFS and False:
|
533
|
+
# TODO: IN PROGRESS. DISABLED FOR NOW.
|
534
|
+
if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
|
535
|
+
if self._obtain_array_values:
|
536
|
+
values = self._obtain_array_values(value)
|
537
|
+
if sheet_name_containing_array := value[5:].strip():
|
538
|
+
if dot := sheet_name_containing_array.find(".") > 0:
|
539
|
+
if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
|
540
|
+
pass
|
541
|
+
# sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
|
522
542
|
values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
|
523
543
|
if mapv:
|
524
544
|
values = [mapv(value, src) for value in values]
|
@@ -891,7 +911,6 @@ class Portal(PortalBase):
|
|
891
911
|
|
892
912
|
def ref_exists(self, type_name: str, value: Optional[str] = None,
|
893
913
|
called_from_map_ref: bool = False) -> Optional[dict]:
|
894
|
-
# print(f"\033[Kxyzzy:ref_exists({type_name}/{value})")
|
895
914
|
if not value:
|
896
915
|
type_name, value = Portal._get_type_name_and_value_from_path(type_name)
|
897
916
|
if not type_name or not value:
|
@@ -919,7 +938,9 @@ class Portal(PortalBase):
|
|
919
938
|
# self._data can change, i.e. as data (e.g. spreadsheet sheets) are parsed.
|
920
939
|
return self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref) or {}
|
921
940
|
# Reference is NOT cached here; lookup INTERNALLY first.
|
922
|
-
if
|
941
|
+
# Skip updating _ref_total_notfound_count here as if not found we look in portal below.
|
942
|
+
if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref,
|
943
|
+
skip_total_notfound_count=True):
|
923
944
|
# Reference was resolved internally (note: here only if resolved is not an empty dictionary).
|
924
945
|
if called_from_map_ref:
|
925
946
|
self._ref_total_found_count += 1
|
@@ -965,13 +986,13 @@ class Portal(PortalBase):
|
|
965
986
|
return None
|
966
987
|
|
967
988
|
def ref_exists_internally(self, type_name: str, value: Optional[str] = None,
|
968
|
-
update_counts: bool = False
|
989
|
+
update_counts: bool = False,
|
990
|
+
skip_total_notfound_count: bool = False) -> Optional[dict]:
|
969
991
|
"""
|
970
992
|
Looks up the given reference (type/value) internally (i.e. with this data parsed thus far).
|
971
993
|
If found then returns a dictionary containing the (given) type name and the uuid (if any)
|
972
994
|
of the resolved item.
|
973
995
|
"""
|
974
|
-
# print(f"\033[Kxyzzy:ref_exists_internally({type_name}/{value})")
|
975
996
|
if not value:
|
976
997
|
type_name, value = Portal._get_type_name_and_value_from_path(type_name)
|
977
998
|
if not type_name or not value:
|
@@ -990,6 +1011,9 @@ class Portal(PortalBase):
|
|
990
1011
|
resolved = {"type": type_name, "uuid": resolved_item.get("uuid")}
|
991
1012
|
self._cache_ref(type_name, value, resolved)
|
992
1013
|
return resolved
|
1014
|
+
if update_counts:
|
1015
|
+
if not skip_total_notfound_count:
|
1016
|
+
self._ref_total_notfound_count += 1
|
993
1017
|
return {} # Empty return means not resolved internally.
|
994
1018
|
|
995
1019
|
def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[dict]]:
|
@@ -10,7 +10,7 @@ dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
|
|
10
10
|
dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
|
11
11
|
dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
|
12
12
|
dcicutils/creds_utils.py,sha256=xrLekD49Ex0GOpL9n7LlJA4gvNcY7txTVFOSYD7LvEU,11113
|
13
|
-
dcicutils/data_readers.py,sha256=
|
13
|
+
dcicutils/data_readers.py,sha256=WWH_VDz2KnNv_FoTjfFwrg6zh9asl8Q-uEV2V3XuyUg,7414
|
14
14
|
dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
|
15
15
|
dcicutils/datetime_utils.py,sha256=EODDGAngp1yh2ZlDIuI7tB74JBJucw2DljqfPknzK0Y,4666
|
16
16
|
dcicutils/deployment_utils.py,sha256=rcNUFMe_tsrG4CHEtgBe41cZx4Pk4JqISPsjrJRMoEs,68891
|
@@ -62,15 +62,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
|
|
62
62
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
63
63
|
dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
|
64
64
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
65
|
-
dcicutils/structured_data.py,sha256=
|
65
|
+
dcicutils/structured_data.py,sha256=kf5aiMXk-DGRtCXWo3D9e2HHcmMffouAvCS-r1epvsM,59254
|
66
66
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
67
67
|
dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
|
68
68
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
69
69
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
70
70
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
71
71
|
dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
|
72
|
-
dcicutils-8.8.1.
|
73
|
-
dcicutils-8.8.1.
|
74
|
-
dcicutils-8.8.1.
|
75
|
-
dcicutils-8.8.1.
|
76
|
-
dcicutils-8.8.1.
|
72
|
+
dcicutils-8.8.1.1b6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
73
|
+
dcicutils-8.8.1.1b6.dist-info/METADATA,sha256=i5C6Embybe7tMr1JUT4jB2tRppS8Omxs6afsU_LQkCE,3356
|
74
|
+
dcicutils-8.8.1.1b6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
75
|
+
dcicutils-8.8.1.1b6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
76
|
+
dcicutils-8.8.1.1b6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|