dcicutils 8.8.1.1b7__tar.gz → 8.8.1.1b10__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/PKG-INFO +1 -1
  2. dcicutils-8.8.1.1b10/dcicutils/progress_constants.py +53 -0
  3. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/structured_data.py +30 -60
  4. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/pyproject.toml +1 -1
  5. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/LICENSE.txt +0 -0
  6. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/README.rst +0 -0
  7. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/__init__.py +0 -0
  8. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/base.py +0 -0
  9. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/beanstalk_utils.py +0 -0
  10. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/bundle_utils.py +0 -0
  11. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/captured_output.py +0 -0
  12. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/cloudformation_utils.py +0 -0
  13. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/codebuild_utils.py +0 -0
  14. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/command_utils.py +0 -0
  15. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/common.py +0 -0
  16. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/contribution_scripts.py +0 -0
  17. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/contribution_utils.py +0 -0
  18. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/creds_utils.py +0 -0
  19. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/data_readers.py +0 -0
  20. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/data_utils.py +0 -0
  21. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/datetime_utils.py +0 -0
  22. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/deployment_utils.py +0 -0
  23. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/diff_utils.py +0 -0
  24. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/docker_utils.py +0 -0
  25. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/ecr_scripts.py +0 -0
  26. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/ecr_utils.py +0 -0
  27. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/ecs_utils.py +0 -0
  28. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/env_base.py +0 -0
  29. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/env_manager.py +0 -0
  30. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/env_scripts.py +0 -0
  31. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/env_utils.py +0 -0
  32. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/env_utils_legacy.py +0 -0
  33. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/es_utils.py +0 -0
  34. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/exceptions.py +0 -0
  35. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/ff_mocks.py +0 -0
  36. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/ff_utils.py +0 -0
  37. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/file_utils.py +0 -0
  38. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/function_cache_decorator.py +0 -0
  39. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/glacier_utils.py +0 -0
  40. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/jh_utils.py +0 -0
  41. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/kibana/dashboards.json +0 -0
  42. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/kibana/readme.md +0 -0
  43. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/lang_utils.py +0 -0
  44. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  45. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  46. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  47. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  48. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  49. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  50. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/license_utils.py +0 -0
  51. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/log_utils.py +0 -0
  52. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/misc_utils.py +0 -0
  53. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/obfuscation_utils.py +0 -0
  54. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/opensearch_utils.py +0 -0
  55. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/portal_object_utils.py +0 -0
  56. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/portal_utils.py +0 -0
  57. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/project_utils.py +0 -0
  58. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/qa_checkers.py +0 -0
  59. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/qa_utils.py +0 -0
  60. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/redis_tools.py +0 -0
  61. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/redis_utils.py +0 -0
  62. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/s3_utils.py +0 -0
  63. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/schema_utils.py +0 -0
  64. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/scripts/publish_to_pypi.py +0 -0
  65. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/scripts/run_license_checker.py +0 -0
  66. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/scripts/view_portal_object.py +0 -0
  67. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/secrets_utils.py +0 -0
  68. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/sheet_utils.py +0 -0
  69. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/snapshot_utils.py +0 -0
  70. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/ssl_certificate_utils.py +0 -0
  71. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/task_utils.py +0 -0
  72. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/tmpfile_utils.py +0 -0
  73. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/trace_utils.py +0 -0
  74. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/validation_utils.py +0 -0
  75. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/variant_utils.py +0 -0
  76. {dcicutils-8.8.1.1b7 → dcicutils-8.8.1.1b10}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.1.1b7
3
+ Version: 8.8.1.1b10
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -0,0 +1,53 @@
1
+ from enum import Enum
2
+
3
+
4
+ # Constants for progress tracking for smaht-submitr.
5
+ # Here only to share between smaht-portal, snovault, and smaht-submitr.
6
+
7
+ class PROGRESS_INGESTER(Enum):
8
+ VALIDATION = "ingester_validation"
9
+ INITIATE = "ingester_initiate"
10
+ PARSE_LOAD_INITIATE = "ingester_parse_initiate"
11
+ PARSE_LOAD_DONE = "ingester_parse_done"
12
+ VALIDATE_LOAD_INITIATE = "ingester_validate_initiate"
13
+ VALIDATE_LOAD_DONE = "ingester_validate_done"
14
+ LOADXL_INITIATE = "ingester_loadxl_initiate"
15
+ LOADXL_DONE = "ingester_loadxl_done"
16
+
17
+
18
+ class PROGRESS_PARSE(Enum):
19
+ LOAD_START = "start"
20
+ LOAD_ITEM = "parse"
21
+ LOAD_DONE = "finish"
22
+ LOAD_COUNT_SHEETS = "sheets"
23
+ LOAD_COUNT_ROWS = "rows"
24
+ LOAD_COUNT_REFS = "refs"
25
+ LOAD_COUNT_REFS_FOUND = "refs_found"
26
+ LOAD_COUNT_REFS_NOT_FOUND = "refs_not_found"
27
+ LOAD_COUNT_REFS_LOOKUP = "refs_lookup"
28
+ LOAD_COUNT_REFS_LOOKUP_CACHE_HIT = "refs_lookup_cache_hit"
29
+ LOAD_COUNT_REFS_EXISTS_CACHE_HIT = "refs_exists_cache_hit"
30
+ LOAD_COUNT_REFS_INVALID = "refs_invalid"
31
+ ANALYZE_START = "start"
32
+ ANALYZE_COUNT_TYPES = "types"
33
+ ANALYZE_COUNT_ITEMS = "objects"
34
+ ANALYZE_CREATE = "create"
35
+ ANALYZE_COUNT_LOOKUP = "lookups"
36
+ ANALYZE_UPDATE = "update"
37
+ ANALYZE_DONE = "finish"
38
+
39
+
40
+ class PROGRESS_LOADXL(Enum):
41
+ START = "loadxl_start"
42
+ START_SECOND_ROUND = "loadxl_start_second_round"
43
+ ITEM = "loadxl_item"
44
+ ITEM_SECOND_ROUND = "loadxl_item_second_round"
45
+ GET = "loadxl_lookup"
46
+ POST = "loadxl_post"
47
+ PATCH = "loadxl_patch"
48
+ ERROR = "loadxl_error"
49
+ DONE = "loadxl_done"
50
+ TOTAL = "loadxl_total"
51
+ MESSAGE = "loadxl_message"
52
+ MESSAGE_VERBOSE = "loadxl_message_verbose"
53
+ MESSAGE_DEBUG = "loadxl_message_debug"
@@ -17,6 +17,7 @@ from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid,
17
17
  to_boolean, to_enum, to_float, to_integer, VirtualApp)
18
18
  from dcicutils.portal_object_utils import PortalObject
19
19
  from dcicutils.portal_utils import Portal as PortalBase
20
+ from dcicutils.progress_constants import PROGRESS_PARSE as PROGRESS
20
21
  from dcicutils.schema_utils import Schema as SchemaBase
21
22
  from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files
22
23
 
@@ -37,31 +38,10 @@ ARRAY_NAME_SUFFIX_CHAR = "#"
37
38
  ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+")
38
39
  DOTTED_NAME_DELIMITER_CHAR = "."
39
40
 
41
+
40
42
  # TODO: Should probably pass this knowledge in from callers.
41
43
  FILE_TYPE_NAME = "File"
42
44
  FILE_TYPE_PROPERTY_NAME = "filename"
43
- # This ExtraFile is a pseudo-type to handle extra_files in smaht-submitr.
44
- EXTRA_FILE_TYPE_NAME = "ExtraFile"
45
- EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
46
-
47
- ENABLE_ARRAY_SHEET_REFS = False
48
-
49
- # The ExtraFile pseudo-type schema.
50
- EXTRA_FILE_SCHEMA = {
51
- "title": "ExtraFile",
52
- "type": "object",
53
- "required": [
54
- "filename"
55
- ],
56
- "identifyingProperties": [
57
- "filename"
58
- ],
59
- "properties": {
60
- "filename": {
61
- "type": "string"
62
- }
63
- }
64
- }
65
45
 
66
46
  # Forward type references for type hints.
67
47
  Portal = Type["Portal"]
@@ -212,7 +192,8 @@ class StructuredDataSet:
212
192
  diffs = {}
213
193
  if callable(progress):
214
194
  ntypes, nobjects = get_counts()
215
- progress({"start": True, "types": ntypes, "objects": nobjects})
195
+ progress({PROGRESS.ANALYZE_START: True,
196
+ PROGRESS.ANALYZE_COUNT_TYPES: ntypes, PROGRESS.ANALYZE_COUNT_ITEMS: nobjects})
216
197
  if self.data or self.portal: # TODO: what is this OR biz?
217
198
  refs = self.resolved_refs_with_uuids
218
199
  # TODO: Need feedback/progress tracking mechanism here.
@@ -231,18 +212,19 @@ class StructuredDataSet:
231
212
  uuid=existing_object.uuid,
232
213
  diffs=object_diffs or None))
233
214
  if callable(progress):
234
- progress({"update": True, "lookups": nlookups + nlookups_compare})
215
+ progress({PROGRESS.ANALYZE_UPDATE: True,
216
+ PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups + nlookups_compare})
235
217
  elif identifying_path:
236
218
  # If there is no existing object we still create a record for this object
237
219
  # but with no uuid which will be the indication that it does not exist.
238
220
  diffs[type_name].append(create_readonly_object(path=identifying_path, uuid=None, diffs=None))
239
221
  if callable(progress):
240
- progress({"create": True, "lookups": nlookups})
222
+ progress({PROGRESS.ANALYZE_CREATE: True, PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
241
223
  else:
242
224
  if callable(progress):
243
- progress({"lookups": nlookups})
225
+ progress({PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
244
226
  if callable(progress):
245
- progress({"finish": True})
227
+ progress({PROGRESS.ANALYZE_DONE: True})
246
228
  return diffs
247
229
 
248
230
  def load_file(self, file: str) -> None:
@@ -287,9 +269,10 @@ class StructuredDataSet:
287
269
  for row in excel.sheet_reader(sheet_name):
288
270
  nrows += 1
289
271
  return nrows, len(excel.sheet_names)
290
- if self._progress:
272
+ if self._progress: # TODO: Move to _load_reader
291
273
  nrows, nsheets = get_counts()
292
- self._progress({"start": True, "sheets": nsheets, "rows": nrows})
274
+ self._progress({PROGRESS.LOAD_START: True,
275
+ PROGRESS.LOAD_COUNT_SHEETS: nsheets, PROGRESS.LOAD_COUNT_ROWS: nrows})
293
276
  excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
294
277
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
295
278
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
@@ -313,16 +296,15 @@ class StructuredDataSet:
313
296
  else:
314
297
  del self._errors["ref"]
315
298
  if self._progress:
316
- # TODO: Refactor with same thing below in _load_reader.
317
- self._progress({
318
- "finish": True,
319
- "refs": self.ref_total_count,
320
- "refs_found": self.ref_total_found_count,
321
- "refs_not_found": self.ref_total_notfound_count,
322
- "refs_lookup": self.ref_lookup_count,
323
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
324
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
325
- "refs_invalid": self.ref_invalid_identifying_property_count
299
+ self._progress({ # TODO: Refactor with same thing below in _load_reader.
300
+ PROGRESS.LOAD_DONE: True,
301
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
302
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
303
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
304
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
305
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
306
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
307
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
326
308
  })
327
309
 
328
310
  def _load_json_file(self, file: str) -> None:
@@ -352,14 +334,14 @@ class StructuredDataSet:
352
334
  self._add(type_name, structured_row)
353
335
  if self._progress:
354
336
  self._progress({
355
- "parse": True,
356
- "refs": self.ref_total_count,
357
- "refs_found": self.ref_total_found_count,
358
- "refs_not_found": self.ref_total_notfound_count,
359
- "refs_lookup": self.ref_lookup_count,
360
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
361
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
362
- "refs_invalid": self.ref_invalid_identifying_property_count
337
+ PROGRESS.LOAD_ITEM: True,
338
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
339
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
340
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
341
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
342
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
343
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
344
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
363
345
  })
364
346
  self._note_warning(reader.warnings, "reader")
365
347
  if schema:
@@ -465,12 +447,10 @@ class StructuredDataSet:
465
447
 
466
448
  class _StructuredRowTemplate:
467
449
 
468
- def __init__(self, column_names: List[str], schema: Optional[Schema] = None,
469
- obtain_array_values: Optional[Callable] = None) -> None:
450
+ def __init__(self, column_names: List[str], schema: Optional[Schema] = None) -> None:
470
451
  self._schema = schema
471
452
  self._set_value_functions = {}
472
453
  self._template = self._create_row_template(column_names)
473
- self._obtain_array_values = obtain_array_values if callable(obtain_array_values) else None
474
454
 
475
455
  def create_row(self) -> dict:
476
456
  return copy.deepcopy(self._template)
@@ -530,16 +510,6 @@ class _StructuredRowTemplate:
530
510
  set_value_backtrack_object(i, p)
531
511
  data = data[p]
532
512
  if (p := path[-1]) == -1 and isinstance(value, str):
533
- if ENABLE_ARRAY_SHEET_REFS and False:
534
- # TODO: IN PROGRESS. DISABLED FOR NOW.
535
- if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
536
- if self._obtain_array_values:
537
- values = self._obtain_array_values(value)
538
- if sheet_name_containing_array := value[5:].strip():
539
- if dot := sheet_name_containing_array.find(".") > 0:
540
- if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
541
- pass
542
- # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
543
513
  values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
544
514
  if mapv:
545
515
  values = [mapv(value, src) for value in values]
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.8.1.1b7" # TODO: To become 8.8.2
3
+ version = "8.8.1.1b10" # TODO: To become 8.8.2
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
File without changes