dcicutils 8.8.1.1b4__py3-none-any.whl → 8.8.1.1b6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/data_readers.py +5 -1
- dcicutils/structured_data.py +41 -17
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/METADATA +1 -1
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/RECORD +7 -7
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/entry_points.txt +0 -0
dcicutils/data_readers.py
CHANGED
@@ -77,7 +77,11 @@ class RowReader(abc.ABC):
|
|
77
77
|
def warnings(self) -> List[str]:
|
78
78
|
warnings = []
|
79
79
|
if self._warning_empty_headers:
|
80
|
-
|
80
|
+
if hasattr(self, "sheet_name") and self.sheet_name:
|
81
|
+
src = {"sheet": self.sheet_name}
|
82
|
+
else:
|
83
|
+
src = {"file": self.file}
|
84
|
+
warnings.append({"src": src,
|
81
85
|
"warning": "Empty header column encountered; ignoring it and all subsequent columns."})
|
82
86
|
if self._warning_extra_values:
|
83
87
|
for row_number in self._warning_extra_values:
|
dcicutils/structured_data.py
CHANGED
@@ -44,7 +44,7 @@ FILE_TYPE_PROPERTY_NAME = "filename"
|
|
44
44
|
EXTRA_FILE_TYPE_NAME = "ExtraFile"
|
45
45
|
EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
|
46
46
|
|
47
|
-
ENABLE_ARRAY_SHEET_REFS =
|
47
|
+
ENABLE_ARRAY_SHEET_REFS = False
|
48
48
|
|
49
49
|
# The ExtraFile pseudo-type schema.
|
50
50
|
EXTRA_FILE_SCHEMA = {
|
@@ -91,6 +91,7 @@ class StructuredDataSet:
|
|
91
91
|
self._errors = {}
|
92
92
|
self._resolved_refs = set()
|
93
93
|
self._validated = False
|
94
|
+
self._nrows = 0
|
94
95
|
self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
|
95
96
|
self._norefs = True if norefs is True else False
|
96
97
|
self._debug_sleep = None
|
@@ -194,6 +195,10 @@ class StructuredDataSet:
|
|
194
195
|
upload_file["path"] = file_path
|
195
196
|
return upload_files
|
196
197
|
|
198
|
+
@property
|
199
|
+
def nrows(self) -> int:
|
200
|
+
return self._nrows
|
201
|
+
|
197
202
|
def compare(self, progress: Optional[Callable] = None) -> dict:
|
198
203
|
def get_counts() -> int:
|
199
204
|
ntypes = 0
|
@@ -288,8 +293,6 @@ class StructuredDataSet:
|
|
288
293
|
order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
|
289
294
|
for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
|
290
295
|
self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
|
291
|
-
if self._progress:
|
292
|
-
self._progress({"finish": True})
|
293
296
|
# TODO: Do we really need progress reporting for the below?
|
294
297
|
# Check for unresolved reference errors which really are not because of ordering.
|
295
298
|
# Yes such internal references will be handled correctly on actual database update via snovault.loadxl.
|
@@ -301,11 +304,25 @@ class StructuredDataSet:
|
|
301
304
|
# if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])):
|
302
305
|
ref_errors_actual.append(ref_error)
|
303
306
|
else:
|
307
|
+
# Now found so subtract off from ref_total_notfound_count.
|
308
|
+
self.portal._ref_total_notfound_count -= 1
|
304
309
|
self._resolved_refs.add((ref, resolved.get("uuid")))
|
305
310
|
if ref_errors_actual:
|
306
311
|
self._errors["ref"] = ref_errors_actual
|
307
312
|
else:
|
308
313
|
del self._errors["ref"]
|
314
|
+
if self._progress:
|
315
|
+
# TODO: Refactor with same thing below in _load_reader.
|
316
|
+
self._progress({
|
317
|
+
"finish": True,
|
318
|
+
"refs": self.ref_total_count,
|
319
|
+
"refs_found": self.ref_total_found_count,
|
320
|
+
"refs_not_found": self.ref_total_notfound_count,
|
321
|
+
"refs_lookup": self.ref_lookup_count,
|
322
|
+
"refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
|
323
|
+
"refs_exists_cache_hit": self.ref_exists_cache_hit_count,
|
324
|
+
"refs_invalid": self.ref_invalid_identifying_property_count
|
325
|
+
})
|
309
326
|
|
310
327
|
def _load_json_file(self, file: str) -> None:
|
311
328
|
with open(file) as f:
|
@@ -316,6 +333,7 @@ class StructuredDataSet:
|
|
316
333
|
noschema = False
|
317
334
|
structured_row_template = None
|
318
335
|
for row in reader:
|
336
|
+
self._nrows += 1
|
319
337
|
if self._debug_sleep:
|
320
338
|
time.sleep(float(self._debug_sleep))
|
321
339
|
if not structured_row_template: # Delay creation just so we don't reference schema if there are no rows.
|
@@ -338,7 +356,8 @@ class StructuredDataSet:
|
|
338
356
|
"refs_found": self.ref_total_found_count,
|
339
357
|
"refs_not_found": self.ref_total_notfound_count,
|
340
358
|
"refs_lookup": self.ref_lookup_count,
|
341
|
-
"
|
359
|
+
"refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
|
360
|
+
"refs_exists_cache_hit": self.ref_exists_cache_hit_count,
|
342
361
|
"refs_invalid": self.ref_invalid_identifying_property_count
|
343
362
|
})
|
344
363
|
self._note_warning(reader.warnings, "reader")
|
@@ -510,15 +529,16 @@ class _StructuredRowTemplate:
|
|
510
529
|
set_value_backtrack_object(i, p)
|
511
530
|
data = data[p]
|
512
531
|
if (p := path[-1]) == -1 and isinstance(value, str):
|
513
|
-
if ENABLE_ARRAY_SHEET_REFS and
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
532
|
+
if ENABLE_ARRAY_SHEET_REFS and False:
|
533
|
+
# TODO: IN PROGRESS. DISABLED FOR NOW.
|
534
|
+
if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
|
535
|
+
if self._obtain_array_values:
|
536
|
+
values = self._obtain_array_values(value)
|
537
|
+
if sheet_name_containing_array := value[5:].strip():
|
538
|
+
if dot := sheet_name_containing_array.find(".") > 0:
|
539
|
+
if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
|
540
|
+
pass
|
541
|
+
# sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
|
522
542
|
values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
|
523
543
|
if mapv:
|
524
544
|
values = [mapv(value, src) for value in values]
|
@@ -891,7 +911,6 @@ class Portal(PortalBase):
|
|
891
911
|
|
892
912
|
def ref_exists(self, type_name: str, value: Optional[str] = None,
|
893
913
|
called_from_map_ref: bool = False) -> Optional[dict]:
|
894
|
-
# print(f"\033[Kxyzzy:ref_exists({type_name}/{value})")
|
895
914
|
if not value:
|
896
915
|
type_name, value = Portal._get_type_name_and_value_from_path(type_name)
|
897
916
|
if not type_name or not value:
|
@@ -919,7 +938,9 @@ class Portal(PortalBase):
|
|
919
938
|
# self._data can change, i.e. as data (e.g. spreadsheet sheets) are parsed.
|
920
939
|
return self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref) or {}
|
921
940
|
# Reference is NOT cached here; lookup INTERNALLY first.
|
922
|
-
if
|
941
|
+
# Skip updating _ref_total_notfound_count here as if not found we look in portal below.
|
942
|
+
if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref,
|
943
|
+
skip_total_notfound_count=True):
|
923
944
|
# Reference was resolved internally (note: here only if resolved is not an empty dictionary).
|
924
945
|
if called_from_map_ref:
|
925
946
|
self._ref_total_found_count += 1
|
@@ -965,13 +986,13 @@ class Portal(PortalBase):
|
|
965
986
|
return None
|
966
987
|
|
967
988
|
def ref_exists_internally(self, type_name: str, value: Optional[str] = None,
|
968
|
-
update_counts: bool = False
|
989
|
+
update_counts: bool = False,
|
990
|
+
skip_total_notfound_count: bool = False) -> Optional[dict]:
|
969
991
|
"""
|
970
992
|
Looks up the given reference (type/value) internally (i.e. with this data parsed thus far).
|
971
993
|
If found then returns a dictionary containing the (given) type name and the uuid (if any)
|
972
994
|
of the resolved item.
|
973
995
|
"""
|
974
|
-
# print(f"\033[Kxyzzy:ref_exists_internally({type_name}/{value})")
|
975
996
|
if not value:
|
976
997
|
type_name, value = Portal._get_type_name_and_value_from_path(type_name)
|
977
998
|
if not type_name or not value:
|
@@ -990,6 +1011,9 @@ class Portal(PortalBase):
|
|
990
1011
|
resolved = {"type": type_name, "uuid": resolved_item.get("uuid")}
|
991
1012
|
self._cache_ref(type_name, value, resolved)
|
992
1013
|
return resolved
|
1014
|
+
if update_counts:
|
1015
|
+
if not skip_total_notfound_count:
|
1016
|
+
self._ref_total_notfound_count += 1
|
993
1017
|
return {} # Empty return means not resolved internally.
|
994
1018
|
|
995
1019
|
def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[dict]]:
|
@@ -10,7 +10,7 @@ dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
|
|
10
10
|
dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
|
11
11
|
dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
|
12
12
|
dcicutils/creds_utils.py,sha256=xrLekD49Ex0GOpL9n7LlJA4gvNcY7txTVFOSYD7LvEU,11113
|
13
|
-
dcicutils/data_readers.py,sha256=
|
13
|
+
dcicutils/data_readers.py,sha256=WWH_VDz2KnNv_FoTjfFwrg6zh9asl8Q-uEV2V3XuyUg,7414
|
14
14
|
dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
|
15
15
|
dcicutils/datetime_utils.py,sha256=EODDGAngp1yh2ZlDIuI7tB74JBJucw2DljqfPknzK0Y,4666
|
16
16
|
dcicutils/deployment_utils.py,sha256=rcNUFMe_tsrG4CHEtgBe41cZx4Pk4JqISPsjrJRMoEs,68891
|
@@ -62,15 +62,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
|
|
62
62
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
63
63
|
dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
|
64
64
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
65
|
-
dcicutils/structured_data.py,sha256=
|
65
|
+
dcicutils/structured_data.py,sha256=kf5aiMXk-DGRtCXWo3D9e2HHcmMffouAvCS-r1epvsM,59254
|
66
66
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
67
67
|
dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
|
68
68
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
69
69
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
70
70
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
71
71
|
dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
|
72
|
-
dcicutils-8.8.1.
|
73
|
-
dcicutils-8.8.1.
|
74
|
-
dcicutils-8.8.1.
|
75
|
-
dcicutils-8.8.1.
|
76
|
-
dcicutils-8.8.1.
|
72
|
+
dcicutils-8.8.1.1b6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
73
|
+
dcicutils-8.8.1.1b6.dist-info/METADATA,sha256=i5C6Embybe7tMr1JUT4jB2tRppS8Omxs6afsU_LQkCE,3356
|
74
|
+
dcicutils-8.8.1.1b6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
75
|
+
dcicutils-8.8.1.1b6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
76
|
+
dcicutils-8.8.1.1b6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|