dcicutils 8.8.1.1b4__py3-none-any.whl → 8.8.1.1b6__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
dcicutils/data_readers.py CHANGED
@@ -77,7 +77,11 @@ class RowReader(abc.ABC):
77
77
  def warnings(self) -> List[str]:
78
78
  warnings = []
79
79
  if self._warning_empty_headers:
80
- warnings.append({"src": create_dict(file=self.file),
80
+ if hasattr(self, "sheet_name") and self.sheet_name:
81
+ src = {"sheet": self.sheet_name}
82
+ else:
83
+ src = {"file": self.file}
84
+ warnings.append({"src": src,
81
85
  "warning": "Empty header column encountered; ignoring it and all subsequent columns."})
82
86
  if self._warning_extra_values:
83
87
  for row_number in self._warning_extra_values:
@@ -44,7 +44,7 @@ FILE_TYPE_PROPERTY_NAME = "filename"
44
44
  EXTRA_FILE_TYPE_NAME = "ExtraFile"
45
45
  EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
46
46
 
47
- ENABLE_ARRAY_SHEET_REFS = True
47
+ ENABLE_ARRAY_SHEET_REFS = False
48
48
 
49
49
  # The ExtraFile pseudo-type schema.
50
50
  EXTRA_FILE_SCHEMA = {
@@ -91,6 +91,7 @@ class StructuredDataSet:
91
91
  self._errors = {}
92
92
  self._resolved_refs = set()
93
93
  self._validated = False
94
+ self._nrows = 0
94
95
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
95
96
  self._norefs = True if norefs is True else False
96
97
  self._debug_sleep = None
@@ -194,6 +195,10 @@ class StructuredDataSet:
194
195
  upload_file["path"] = file_path
195
196
  return upload_files
196
197
 
198
+ @property
199
+ def nrows(self) -> int:
200
+ return self._nrows
201
+
197
202
  def compare(self, progress: Optional[Callable] = None) -> dict:
198
203
  def get_counts() -> int:
199
204
  ntypes = 0
@@ -288,8 +293,6 @@ class StructuredDataSet:
288
293
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
289
294
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
290
295
  self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
291
- if self._progress:
292
- self._progress({"finish": True})
293
296
  # TODO: Do we really need progress reporting for the below?
294
297
  # Check for unresolved reference errors which really are not because of ordering.
295
298
  # Yes such internal references will be handled correctly on actual database update via snovault.loadxl.
@@ -301,11 +304,25 @@ class StructuredDataSet:
301
304
  # if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])):
302
305
  ref_errors_actual.append(ref_error)
303
306
  else:
307
+ # Now found so subtract off from ref_total_notfound_count.
308
+ self.portal._ref_total_notfound_count -= 1
304
309
  self._resolved_refs.add((ref, resolved.get("uuid")))
305
310
  if ref_errors_actual:
306
311
  self._errors["ref"] = ref_errors_actual
307
312
  else:
308
313
  del self._errors["ref"]
314
+ if self._progress:
315
+ # TODO: Refactor with same thing below in _load_reader.
316
+ self._progress({
317
+ "finish": True,
318
+ "refs": self.ref_total_count,
319
+ "refs_found": self.ref_total_found_count,
320
+ "refs_not_found": self.ref_total_notfound_count,
321
+ "refs_lookup": self.ref_lookup_count,
322
+ "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
323
+ "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
324
+ "refs_invalid": self.ref_invalid_identifying_property_count
325
+ })
309
326
 
310
327
  def _load_json_file(self, file: str) -> None:
311
328
  with open(file) as f:
@@ -316,6 +333,7 @@ class StructuredDataSet:
316
333
  noschema = False
317
334
  structured_row_template = None
318
335
  for row in reader:
336
+ self._nrows += 1
319
337
  if self._debug_sleep:
320
338
  time.sleep(float(self._debug_sleep))
321
339
  if not structured_row_template: # Delay creation just so we don't reference schema if there are no rows.
@@ -338,7 +356,8 @@ class StructuredDataSet:
338
356
  "refs_found": self.ref_total_found_count,
339
357
  "refs_not_found": self.ref_total_notfound_count,
340
358
  "refs_lookup": self.ref_lookup_count,
341
- "refs_cache_hit": self.ref_exists_cache_hit_count,
359
+ "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
360
+ "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
342
361
  "refs_invalid": self.ref_invalid_identifying_property_count
343
362
  })
344
363
  self._note_warning(reader.warnings, "reader")
@@ -510,15 +529,16 @@ class _StructuredRowTemplate:
510
529
  set_value_backtrack_object(i, p)
511
530
  data = data[p]
512
531
  if (p := path[-1]) == -1 and isinstance(value, str):
513
- if ENABLE_ARRAY_SHEET_REFS and value.lower().startswith("[ref:") and value.endswith("]"):
514
- if self._obtain_array_values:
515
- values = self._obtain_array_values(value)
516
- if sheet_name_containing_array := value[5:].strip():
517
- if dot := sheet_name_containing_array.find(".") > 0:
518
- if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
519
- pass
520
- # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
521
- pass
532
+ if ENABLE_ARRAY_SHEET_REFS and False:
533
+ # TODO: IN PROGRESS. DISABLED FOR NOW.
534
+ if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
535
+ if self._obtain_array_values:
536
+ values = self._obtain_array_values(value)
537
+ if sheet_name_containing_array := value[5:].strip():
538
+ if dot := sheet_name_containing_array.find(".") > 0:
539
+ if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
540
+ pass
541
+ # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
522
542
  values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
523
543
  if mapv:
524
544
  values = [mapv(value, src) for value in values]
@@ -891,7 +911,6 @@ class Portal(PortalBase):
891
911
 
892
912
  def ref_exists(self, type_name: str, value: Optional[str] = None,
893
913
  called_from_map_ref: bool = False) -> Optional[dict]:
894
- # print(f"\033[Kxyzzy:ref_exists({type_name}/{value})")
895
914
  if not value:
896
915
  type_name, value = Portal._get_type_name_and_value_from_path(type_name)
897
916
  if not type_name or not value:
@@ -919,7 +938,9 @@ class Portal(PortalBase):
919
938
  # self._data can change, i.e. as data (e.g. spreadsheet sheets) are parsed.
920
939
  return self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref) or {}
921
940
  # Reference is NOT cached here; lookup INTERNALLY first.
922
- if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref):
941
+ # Skip updating _ref_total_notfound_count here as if not found we look in portal below.
942
+ if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref,
943
+ skip_total_notfound_count=True):
923
944
  # Reference was resolved internally (note: here only if resolved is not an empty dictionary).
924
945
  if called_from_map_ref:
925
946
  self._ref_total_found_count += 1
@@ -965,13 +986,13 @@ class Portal(PortalBase):
965
986
  return None
966
987
 
967
988
  def ref_exists_internally(self, type_name: str, value: Optional[str] = None,
968
- update_counts: bool = False) -> Optional[dict]:
989
+ update_counts: bool = False,
990
+ skip_total_notfound_count: bool = False) -> Optional[dict]:
969
991
  """
970
992
  Looks up the given reference (type/value) internally (i.e. with this data parsed thus far).
971
993
  If found then returns a dictionary containing the (given) type name and the uuid (if any)
972
994
  of the resolved item.
973
995
  """
974
- # print(f"\033[Kxyzzy:ref_exists_internally({type_name}/{value})")
975
996
  if not value:
976
997
  type_name, value = Portal._get_type_name_and_value_from_path(type_name)
977
998
  if not type_name or not value:
@@ -990,6 +1011,9 @@ class Portal(PortalBase):
990
1011
  resolved = {"type": type_name, "uuid": resolved_item.get("uuid")}
991
1012
  self._cache_ref(type_name, value, resolved)
992
1013
  return resolved
1014
+ if update_counts:
1015
+ if not skip_total_notfound_count:
1016
+ self._ref_total_notfound_count += 1
993
1017
  return {} # Empty return means not resolved internally.
994
1018
 
995
1019
  def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[dict]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.1.1b4
3
+ Version: 8.8.1.1b6
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -10,7 +10,7 @@ dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
10
10
  dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
11
11
  dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
12
12
  dcicutils/creds_utils.py,sha256=xrLekD49Ex0GOpL9n7LlJA4gvNcY7txTVFOSYD7LvEU,11113
13
- dcicutils/data_readers.py,sha256=wNRNlCUpsrvFHUKXYhE9fMZ392NWH4-KDqwW5f6avGc,7265
13
+ dcicutils/data_readers.py,sha256=WWH_VDz2KnNv_FoTjfFwrg6zh9asl8Q-uEV2V3XuyUg,7414
14
14
  dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
15
15
  dcicutils/datetime_utils.py,sha256=EODDGAngp1yh2ZlDIuI7tB74JBJucw2DljqfPknzK0Y,4666
16
16
  dcicutils/deployment_utils.py,sha256=rcNUFMe_tsrG4CHEtgBe41cZx4Pk4JqISPsjrJRMoEs,68891
@@ -62,15 +62,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
62
62
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
63
63
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
64
64
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
65
- dcicutils/structured_data.py,sha256=bc0sTFQQTldLM0HIV9-o_DHIY4kjEdHBPybcrfiuowM,58004
65
+ dcicutils/structured_data.py,sha256=kf5aiMXk-DGRtCXWo3D9e2HHcmMffouAvCS-r1epvsM,59254
66
66
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
67
67
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
68
68
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
69
69
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
70
70
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
71
71
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
72
- dcicutils-8.8.1.1b4.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
- dcicutils-8.8.1.1b4.dist-info/METADATA,sha256=e5E4t8Ati-ECkMU0nNomHXag-7iXXz7G7DBJqfz0MSE,3356
74
- dcicutils-8.8.1.1b4.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
- dcicutils-8.8.1.1b4.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
- dcicutils-8.8.1.1b4.dist-info/RECORD,,
72
+ dcicutils-8.8.1.1b6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
+ dcicutils-8.8.1.1b6.dist-info/METADATA,sha256=i5C6Embybe7tMr1JUT4jB2tRppS8Omxs6afsU_LQkCE,3356
74
+ dcicutils-8.8.1.1b6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
+ dcicutils-8.8.1.1b6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
+ dcicutils-8.8.1.1b6.dist-info/RECORD,,