dcicutils 8.8.1.1b4__tar.gz → 8.8.1.1b6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/PKG-INFO +1 -1
  2. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/data_readers.py +5 -1
  3. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/structured_data.py +41 -17
  4. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/pyproject.toml +1 -1
  5. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/LICENSE.txt +0 -0
  6. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/README.rst +0 -0
  7. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/__init__.py +0 -0
  8. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/base.py +0 -0
  9. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/beanstalk_utils.py +0 -0
  10. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/bundle_utils.py +0 -0
  11. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/captured_output.py +0 -0
  12. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/cloudformation_utils.py +0 -0
  13. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/codebuild_utils.py +0 -0
  14. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/command_utils.py +0 -0
  15. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/common.py +0 -0
  16. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/contribution_scripts.py +0 -0
  17. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/contribution_utils.py +0 -0
  18. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/creds_utils.py +0 -0
  19. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/data_utils.py +0 -0
  20. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/datetime_utils.py +0 -0
  21. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/deployment_utils.py +0 -0
  22. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/diff_utils.py +0 -0
  23. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/docker_utils.py +0 -0
  24. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/ecr_scripts.py +0 -0
  25. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/ecr_utils.py +0 -0
  26. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/ecs_utils.py +0 -0
  27. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/env_base.py +0 -0
  28. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/env_manager.py +0 -0
  29. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/env_scripts.py +0 -0
  30. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/env_utils.py +0 -0
  31. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/env_utils_legacy.py +0 -0
  32. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/es_utils.py +0 -0
  33. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/exceptions.py +0 -0
  34. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/ff_mocks.py +0 -0
  35. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/ff_utils.py +0 -0
  36. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/file_utils.py +0 -0
  37. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/function_cache_decorator.py +0 -0
  38. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/glacier_utils.py +0 -0
  39. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/jh_utils.py +0 -0
  40. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/kibana/dashboards.json +0 -0
  41. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/kibana/readme.md +0 -0
  42. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/lang_utils.py +0 -0
  43. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  44. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  45. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  46. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  47. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  48. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  49. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/license_utils.py +0 -0
  50. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/log_utils.py +0 -0
  51. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/misc_utils.py +0 -0
  52. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/obfuscation_utils.py +0 -0
  53. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/opensearch_utils.py +0 -0
  54. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/portal_object_utils.py +0 -0
  55. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/portal_utils.py +0 -0
  56. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/project_utils.py +0 -0
  57. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/qa_checkers.py +0 -0
  58. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/qa_utils.py +0 -0
  59. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/redis_tools.py +0 -0
  60. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/redis_utils.py +0 -0
  61. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/s3_utils.py +0 -0
  62. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/schema_utils.py +0 -0
  63. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/scripts/publish_to_pypi.py +0 -0
  64. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/scripts/run_license_checker.py +0 -0
  65. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/scripts/view_portal_object.py +0 -0
  66. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/secrets_utils.py +0 -0
  67. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/sheet_utils.py +0 -0
  68. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/snapshot_utils.py +0 -0
  69. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/ssl_certificate_utils.py +0 -0
  70. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/task_utils.py +0 -0
  71. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/tmpfile_utils.py +0 -0
  72. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/trace_utils.py +0 -0
  73. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/validation_utils.py +0 -0
  74. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/variant_utils.py +0 -0
  75. {dcicutils-8.8.1.1b4 → dcicutils-8.8.1.1b6}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.1.1b4
3
+ Version: 8.8.1.1b6
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -77,7 +77,11 @@ class RowReader(abc.ABC):
77
77
  def warnings(self) -> List[str]:
78
78
  warnings = []
79
79
  if self._warning_empty_headers:
80
- warnings.append({"src": create_dict(file=self.file),
80
+ if hasattr(self, "sheet_name") and self.sheet_name:
81
+ src = {"sheet": self.sheet_name}
82
+ else:
83
+ src = {"file": self.file}
84
+ warnings.append({"src": src,
81
85
  "warning": "Empty header column encountered; ignoring it and all subsequent columns."})
82
86
  if self._warning_extra_values:
83
87
  for row_number in self._warning_extra_values:
@@ -44,7 +44,7 @@ FILE_TYPE_PROPERTY_NAME = "filename"
44
44
  EXTRA_FILE_TYPE_NAME = "ExtraFile"
45
45
  EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
46
46
 
47
- ENABLE_ARRAY_SHEET_REFS = True
47
+ ENABLE_ARRAY_SHEET_REFS = False
48
48
 
49
49
  # The ExtraFile pseudo-type schema.
50
50
  EXTRA_FILE_SCHEMA = {
@@ -91,6 +91,7 @@ class StructuredDataSet:
91
91
  self._errors = {}
92
92
  self._resolved_refs = set()
93
93
  self._validated = False
94
+ self._nrows = 0
94
95
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
95
96
  self._norefs = True if norefs is True else False
96
97
  self._debug_sleep = None
@@ -194,6 +195,10 @@ class StructuredDataSet:
194
195
  upload_file["path"] = file_path
195
196
  return upload_files
196
197
 
198
+ @property
199
+ def nrows(self) -> int:
200
+ return self._nrows
201
+
197
202
  def compare(self, progress: Optional[Callable] = None) -> dict:
198
203
  def get_counts() -> int:
199
204
  ntypes = 0
@@ -288,8 +293,6 @@ class StructuredDataSet:
288
293
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
289
294
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
290
295
  self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
291
- if self._progress:
292
- self._progress({"finish": True})
293
296
  # TODO: Do we really need progress reporting for the below?
294
297
  # Check for unresolved reference errors which really are not because of ordering.
295
298
  # Yes such internal references will be handled correctly on actual database update via snovault.loadxl.
@@ -301,11 +304,25 @@ class StructuredDataSet:
301
304
  # if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])):
302
305
  ref_errors_actual.append(ref_error)
303
306
  else:
307
+ # Now found so subtract off from ref_total_notfound_count.
308
+ self.portal._ref_total_notfound_count -= 1
304
309
  self._resolved_refs.add((ref, resolved.get("uuid")))
305
310
  if ref_errors_actual:
306
311
  self._errors["ref"] = ref_errors_actual
307
312
  else:
308
313
  del self._errors["ref"]
314
+ if self._progress:
315
+ # TODO: Refactor with same thing below in _load_reader.
316
+ self._progress({
317
+ "finish": True,
318
+ "refs": self.ref_total_count,
319
+ "refs_found": self.ref_total_found_count,
320
+ "refs_not_found": self.ref_total_notfound_count,
321
+ "refs_lookup": self.ref_lookup_count,
322
+ "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
323
+ "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
324
+ "refs_invalid": self.ref_invalid_identifying_property_count
325
+ })
309
326
 
310
327
  def _load_json_file(self, file: str) -> None:
311
328
  with open(file) as f:
@@ -316,6 +333,7 @@ class StructuredDataSet:
316
333
  noschema = False
317
334
  structured_row_template = None
318
335
  for row in reader:
336
+ self._nrows += 1
319
337
  if self._debug_sleep:
320
338
  time.sleep(float(self._debug_sleep))
321
339
  if not structured_row_template: # Delay creation just so we don't reference schema if there are no rows.
@@ -338,7 +356,8 @@ class StructuredDataSet:
338
356
  "refs_found": self.ref_total_found_count,
339
357
  "refs_not_found": self.ref_total_notfound_count,
340
358
  "refs_lookup": self.ref_lookup_count,
341
- "refs_cache_hit": self.ref_exists_cache_hit_count,
359
+ "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
360
+ "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
342
361
  "refs_invalid": self.ref_invalid_identifying_property_count
343
362
  })
344
363
  self._note_warning(reader.warnings, "reader")
@@ -510,15 +529,16 @@ class _StructuredRowTemplate:
510
529
  set_value_backtrack_object(i, p)
511
530
  data = data[p]
512
531
  if (p := path[-1]) == -1 and isinstance(value, str):
513
- if ENABLE_ARRAY_SHEET_REFS and value.lower().startswith("[ref:") and value.endswith("]"):
514
- if self._obtain_array_values:
515
- values = self._obtain_array_values(value)
516
- if sheet_name_containing_array := value[5:].strip():
517
- if dot := sheet_name_containing_array.find(".") > 0:
518
- if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
519
- pass
520
- # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
521
- pass
532
+ if ENABLE_ARRAY_SHEET_REFS and False:
533
+ # TODO: IN PROGRESS. DISABLED FOR NOW.
534
+ if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
535
+ if self._obtain_array_values:
536
+ values = self._obtain_array_values(value)
537
+ if sheet_name_containing_array := value[5:].strip():
538
+ if dot := sheet_name_containing_array.find(".") > 0:
539
+ if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
540
+ pass
541
+ # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
522
542
  values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
523
543
  if mapv:
524
544
  values = [mapv(value, src) for value in values]
@@ -891,7 +911,6 @@ class Portal(PortalBase):
891
911
 
892
912
  def ref_exists(self, type_name: str, value: Optional[str] = None,
893
913
  called_from_map_ref: bool = False) -> Optional[dict]:
894
- # print(f"\033[Kxyzzy:ref_exists({type_name}/{value})")
895
914
  if not value:
896
915
  type_name, value = Portal._get_type_name_and_value_from_path(type_name)
897
916
  if not type_name or not value:
@@ -919,7 +938,9 @@ class Portal(PortalBase):
919
938
  # self._data can change, i.e. as data (e.g. spreadsheet sheets) are parsed.
920
939
  return self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref) or {}
921
940
  # Reference is NOT cached here; lookup INTERNALLY first.
922
- if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref):
941
+ # Skip updating _ref_total_notfound_count here as if not found we look in portal below.
942
+ if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref,
943
+ skip_total_notfound_count=True):
923
944
  # Reference was resolved internally (note: here only if resolved is not an empty dictionary).
924
945
  if called_from_map_ref:
925
946
  self._ref_total_found_count += 1
@@ -965,13 +986,13 @@ class Portal(PortalBase):
965
986
  return None
966
987
 
967
988
  def ref_exists_internally(self, type_name: str, value: Optional[str] = None,
968
- update_counts: bool = False) -> Optional[dict]:
989
+ update_counts: bool = False,
990
+ skip_total_notfound_count: bool = False) -> Optional[dict]:
969
991
  """
970
992
  Looks up the given reference (type/value) internally (i.e. with this data parsed thus far).
971
993
  If found then returns a dictionary containing the (given) type name and the uuid (if any)
972
994
  of the resolved item.
973
995
  """
974
- # print(f"\033[Kxyzzy:ref_exists_internally({type_name}/{value})")
975
996
  if not value:
976
997
  type_name, value = Portal._get_type_name_and_value_from_path(type_name)
977
998
  if not type_name or not value:
@@ -990,6 +1011,9 @@ class Portal(PortalBase):
990
1011
  resolved = {"type": type_name, "uuid": resolved_item.get("uuid")}
991
1012
  self._cache_ref(type_name, value, resolved)
992
1013
  return resolved
1014
+ if update_counts:
1015
+ if not skip_total_notfound_count:
1016
+ self._ref_total_notfound_count += 1
993
1017
  return {} # Empty return means not resolved internally.
994
1018
 
995
1019
  def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[dict]]:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.8.1.1b4" # TODO: To become 8.8.2
3
+ version = "8.8.1.1b6" # TODO: To become 8.8.2
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
File without changes
File without changes