dcicutils 8.8.1.1b4__py3-none-any.whl → 8.8.1.1b6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dcicutils/data_readers.py CHANGED
@@ -77,7 +77,11 @@ class RowReader(abc.ABC):
77
77
  def warnings(self) -> List[str]:
78
78
  warnings = []
79
79
  if self._warning_empty_headers:
80
- warnings.append({"src": create_dict(file=self.file),
80
+ if hasattr(self, "sheet_name") and self.sheet_name:
81
+ src = {"sheet": self.sheet_name}
82
+ else:
83
+ src = {"file": self.file}
84
+ warnings.append({"src": src,
81
85
  "warning": "Empty header column encountered; ignoring it and all subsequent columns."})
82
86
  if self._warning_extra_values:
83
87
  for row_number in self._warning_extra_values:
@@ -44,7 +44,7 @@ FILE_TYPE_PROPERTY_NAME = "filename"
44
44
  EXTRA_FILE_TYPE_NAME = "ExtraFile"
45
45
  EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
46
46
 
47
- ENABLE_ARRAY_SHEET_REFS = True
47
+ ENABLE_ARRAY_SHEET_REFS = False
48
48
 
49
49
  # The ExtraFile pseudo-type schema.
50
50
  EXTRA_FILE_SCHEMA = {
@@ -91,6 +91,7 @@ class StructuredDataSet:
91
91
  self._errors = {}
92
92
  self._resolved_refs = set()
93
93
  self._validated = False
94
+ self._nrows = 0
94
95
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
95
96
  self._norefs = True if norefs is True else False
96
97
  self._debug_sleep = None
@@ -194,6 +195,10 @@ class StructuredDataSet:
194
195
  upload_file["path"] = file_path
195
196
  return upload_files
196
197
 
198
+ @property
199
+ def nrows(self) -> int:
200
+ return self._nrows
201
+
197
202
  def compare(self, progress: Optional[Callable] = None) -> dict:
198
203
  def get_counts() -> int:
199
204
  ntypes = 0
@@ -288,8 +293,6 @@ class StructuredDataSet:
288
293
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
289
294
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
290
295
  self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
291
- if self._progress:
292
- self._progress({"finish": True})
293
296
  # TODO: Do we really need progress reporting for the below?
294
297
  # Check for unresolved reference errors which really are not because of ordering.
295
298
  # Yes such internal references will be handled correctly on actual database update via snovault.loadxl.
@@ -301,11 +304,25 @@ class StructuredDataSet:
301
304
  # if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])):
302
305
  ref_errors_actual.append(ref_error)
303
306
  else:
307
+ # Now found so subtract off from ref_total_notfound_count.
308
+ self.portal._ref_total_notfound_count -= 1
304
309
  self._resolved_refs.add((ref, resolved.get("uuid")))
305
310
  if ref_errors_actual:
306
311
  self._errors["ref"] = ref_errors_actual
307
312
  else:
308
313
  del self._errors["ref"]
314
+ if self._progress:
315
+ # TODO: Refactor with same thing below in _load_reader.
316
+ self._progress({
317
+ "finish": True,
318
+ "refs": self.ref_total_count,
319
+ "refs_found": self.ref_total_found_count,
320
+ "refs_not_found": self.ref_total_notfound_count,
321
+ "refs_lookup": self.ref_lookup_count,
322
+ "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
323
+ "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
324
+ "refs_invalid": self.ref_invalid_identifying_property_count
325
+ })
309
326
 
310
327
  def _load_json_file(self, file: str) -> None:
311
328
  with open(file) as f:
@@ -316,6 +333,7 @@ class StructuredDataSet:
316
333
  noschema = False
317
334
  structured_row_template = None
318
335
  for row in reader:
336
+ self._nrows += 1
319
337
  if self._debug_sleep:
320
338
  time.sleep(float(self._debug_sleep))
321
339
  if not structured_row_template: # Delay creation just so we don't reference schema if there are no rows.
@@ -338,7 +356,8 @@ class StructuredDataSet:
338
356
  "refs_found": self.ref_total_found_count,
339
357
  "refs_not_found": self.ref_total_notfound_count,
340
358
  "refs_lookup": self.ref_lookup_count,
341
- "refs_cache_hit": self.ref_exists_cache_hit_count,
359
+ "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
360
+ "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
342
361
  "refs_invalid": self.ref_invalid_identifying_property_count
343
362
  })
344
363
  self._note_warning(reader.warnings, "reader")
@@ -510,15 +529,16 @@ class _StructuredRowTemplate:
510
529
  set_value_backtrack_object(i, p)
511
530
  data = data[p]
512
531
  if (p := path[-1]) == -1 and isinstance(value, str):
513
- if ENABLE_ARRAY_SHEET_REFS and value.lower().startswith("[ref:") and value.endswith("]"):
514
- if self._obtain_array_values:
515
- values = self._obtain_array_values(value)
516
- if sheet_name_containing_array := value[5:].strip():
517
- if dot := sheet_name_containing_array.find(".") > 0:
518
- if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
519
- pass
520
- # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
521
- pass
532
+ if ENABLE_ARRAY_SHEET_REFS and False:
533
+ # TODO: IN PROGRESS. DISABLED FOR NOW.
534
+ if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
535
+ if self._obtain_array_values:
536
+ values = self._obtain_array_values(value)
537
+ if sheet_name_containing_array := value[5:].strip():
538
+ if dot := sheet_name_containing_array.find(".") > 0:
539
+ if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
540
+ pass
541
+ # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
522
542
  values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
523
543
  if mapv:
524
544
  values = [mapv(value, src) for value in values]
@@ -891,7 +911,6 @@ class Portal(PortalBase):
891
911
 
892
912
  def ref_exists(self, type_name: str, value: Optional[str] = None,
893
913
  called_from_map_ref: bool = False) -> Optional[dict]:
894
- # print(f"\033[Kxyzzy:ref_exists({type_name}/{value})")
895
914
  if not value:
896
915
  type_name, value = Portal._get_type_name_and_value_from_path(type_name)
897
916
  if not type_name or not value:
@@ -919,7 +938,9 @@ class Portal(PortalBase):
919
938
  # self._data can change, i.e. as data (e.g. spreadsheet sheets) are parsed.
920
939
  return self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref) or {}
921
940
  # Reference is NOT cached here; lookup INTERNALLY first.
922
- if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref):
941
+ # Skip updating _ref_total_notfound_count here as if not found we look in portal below.
942
+ if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref,
943
+ skip_total_notfound_count=True):
923
944
  # Reference was resolved internally (note: here only if resolved is not an empty dictionary).
924
945
  if called_from_map_ref:
925
946
  self._ref_total_found_count += 1
@@ -965,13 +986,13 @@ class Portal(PortalBase):
965
986
  return None
966
987
 
967
988
  def ref_exists_internally(self, type_name: str, value: Optional[str] = None,
968
- update_counts: bool = False) -> Optional[dict]:
989
+ update_counts: bool = False,
990
+ skip_total_notfound_count: bool = False) -> Optional[dict]:
969
991
  """
970
992
  Looks up the given reference (type/value) internally (i.e. with this data parsed thus far).
971
993
  If found then returns a dictionary containing the (given) type name and the uuid (if any)
972
994
  of the resolved item.
973
995
  """
974
- # print(f"\033[Kxyzzy:ref_exists_internally({type_name}/{value})")
975
996
  if not value:
976
997
  type_name, value = Portal._get_type_name_and_value_from_path(type_name)
977
998
  if not type_name or not value:
@@ -990,6 +1011,9 @@ class Portal(PortalBase):
990
1011
  resolved = {"type": type_name, "uuid": resolved_item.get("uuid")}
991
1012
  self._cache_ref(type_name, value, resolved)
992
1013
  return resolved
1014
+ if update_counts:
1015
+ if not skip_total_notfound_count:
1016
+ self._ref_total_notfound_count += 1
993
1017
  return {} # Empty return means not resolved internally.
994
1018
 
995
1019
  def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[dict]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.1.1b4
3
+ Version: 8.8.1.1b6
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -10,7 +10,7 @@ dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
10
10
  dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
11
11
  dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
12
12
  dcicutils/creds_utils.py,sha256=xrLekD49Ex0GOpL9n7LlJA4gvNcY7txTVFOSYD7LvEU,11113
13
- dcicutils/data_readers.py,sha256=wNRNlCUpsrvFHUKXYhE9fMZ392NWH4-KDqwW5f6avGc,7265
13
+ dcicutils/data_readers.py,sha256=WWH_VDz2KnNv_FoTjfFwrg6zh9asl8Q-uEV2V3XuyUg,7414
14
14
  dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
15
15
  dcicutils/datetime_utils.py,sha256=EODDGAngp1yh2ZlDIuI7tB74JBJucw2DljqfPknzK0Y,4666
16
16
  dcicutils/deployment_utils.py,sha256=rcNUFMe_tsrG4CHEtgBe41cZx4Pk4JqISPsjrJRMoEs,68891
@@ -62,15 +62,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
62
62
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
63
63
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
64
64
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
65
- dcicutils/structured_data.py,sha256=bc0sTFQQTldLM0HIV9-o_DHIY4kjEdHBPybcrfiuowM,58004
65
+ dcicutils/structured_data.py,sha256=kf5aiMXk-DGRtCXWo3D9e2HHcmMffouAvCS-r1epvsM,59254
66
66
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
67
67
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
68
68
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
69
69
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
70
70
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
71
71
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
72
- dcicutils-8.8.1.1b4.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
- dcicutils-8.8.1.1b4.dist-info/METADATA,sha256=e5E4t8Ati-ECkMU0nNomHXag-7iXXz7G7DBJqfz0MSE,3356
74
- dcicutils-8.8.1.1b4.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
- dcicutils-8.8.1.1b4.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
- dcicutils-8.8.1.1b4.dist-info/RECORD,,
72
+ dcicutils-8.8.1.1b6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
+ dcicutils-8.8.1.1b6.dist-info/METADATA,sha256=i5C6Embybe7tMr1JUT4jB2tRppS8Omxs6afsU_LQkCE,3356
74
+ dcicutils-8.8.1.1b6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
+ dcicutils-8.8.1.1b6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
+ dcicutils-8.8.1.1b6.dist-info/RECORD,,