dcicutils 8.8.1.1b6__py3-none-any.whl → 8.8.1.1b9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ from enum import Enum
2
+
3
+
4
+ # Constants for progress tracking for smaht-submitr.
5
+ # Here only to share between smaht-portal, snovault, and smaht-submitr.
6
+
7
+ class PROGRESS_INGESTER(Enum):
8
+ VALIDATION = "ingester_validation"
9
+ INITIATE = "ingester_initiate"
10
+
11
+
12
+ class PROGRESS_PARSE(Enum):
13
+ LOAD_START = "start"
14
+ LOAD_ITEM = "parse"
15
+ LOAD_DONE = "finish"
16
+ LOAD_COUNT_SHEETS = "sheets"
17
+ LOAD_COUNT_ROWS = "rows"
18
+ LOAD_COUNT_REFS = "refs"
19
+ LOAD_COUNT_REFS_FOUND = "refs_found"
20
+ LOAD_COUNT_REFS_NOT_FOUND = "refs_not_found"
21
+ LOAD_COUNT_REFS_LOOKUP = "refs_lookup"
22
+ LOAD_COUNT_REFS_LOOKUP_CACHE_HIT = "refs_lookup_cache_hit"
23
+ LOAD_COUNT_REFS_EXISTS_CACHE_HIT = "refs_exists_cache_hit"
24
+ LOAD_COUNT_REFS_INVALID = "refs_invalid"
25
+ ANALYZE_START = "start"
26
+ ANALYZE_COUNT_TYPES = "types"
27
+ ANALYZE_COUNT_ITEMS = "objects"
28
+ ANALYZE_CREATE = "create"
29
+ ANALYZE_COUNT_LOOKUP = "lookups"
30
+ ANALYZE_UPDATE = "update"
31
+ ANALYZE_DONE = "finish"
32
+
33
+
34
+ class PROGRESS_LOADXL(Enum):
35
+ INITIATE = "loadxl_initiate"
36
+ START = "loadxl_start"
37
+ START_SECOND_ROUND = "loadxl_start_second_round"
38
+ ITEM = "loadxl_item"
39
+ ITEM_SECOND_ROUND = "loadxl_item_second_round"
40
+ GET = "loadxl_lookup"
41
+ POST = "loadxl_post"
42
+ PATCH = "loadxl_patch"
43
+ ERROR = "loadxl_error"
44
+ DONE = "loadxl_done"
45
+ TOTAL = "loadxl_total"
46
+ MESSAGE = "loadxl_message"
47
+ MESSAGE_VERBOSE = "loadxl_message_verbose"
48
+ MESSAGE_DEBUG = "loadxl_message_debug"
@@ -17,6 +17,7 @@ from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid,
17
17
  to_boolean, to_enum, to_float, to_integer, VirtualApp)
18
18
  from dcicutils.portal_object_utils import PortalObject
19
19
  from dcicutils.portal_utils import Portal as PortalBase
20
+ from dcicutils.progress_constants import PROGRESS_PARSE as PROGRESS
20
21
  from dcicutils.schema_utils import Schema as SchemaBase
21
22
  from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files
22
23
 
@@ -37,31 +38,10 @@ ARRAY_NAME_SUFFIX_CHAR = "#"
37
38
  ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+")
38
39
  DOTTED_NAME_DELIMITER_CHAR = "."
39
40
 
41
+
40
42
  # TODO: Should probably pass this knowledge in from callers.
41
43
  FILE_TYPE_NAME = "File"
42
44
  FILE_TYPE_PROPERTY_NAME = "filename"
43
- # This ExtraFile is a pseudo-type to handle extra_files in smaht-submitr.
44
- EXTRA_FILE_TYPE_NAME = "ExtraFile"
45
- EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
46
-
47
- ENABLE_ARRAY_SHEET_REFS = False
48
-
49
- # The ExtraFile pseudo-type schema.
50
- EXTRA_FILE_SCHEMA = {
51
- "title": "ExtraFile",
52
- "type": "object",
53
- "required": [
54
- "filename"
55
- ],
56
- "identifyingProperties": [
57
- "filename"
58
- ],
59
- "properties": {
60
- "filename": {
61
- "type": "string"
62
- }
63
- }
64
- }
65
45
 
66
46
  # Forward type references for type hints.
67
47
  Portal = Type["Portal"]
@@ -117,10 +97,11 @@ class StructuredDataSet:
117
97
  ref_lookup_strategy: Optional[Callable] = None,
118
98
  ref_lookup_nocache: bool = False,
119
99
  norefs: bool = False,
100
+ progress: Optional[Callable] = None,
120
101
  debug_sleep: Optional[str] = None) -> StructuredDataSet:
121
102
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
122
103
  ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
123
- norefs=norefs, debug_sleep=debug_sleep)
104
+ norefs=norefs, progress=progress, debug_sleep=debug_sleep)
124
105
 
125
106
  def validate(self, force: bool = False) -> None:
126
107
  def data_without_deleted_properties(data: dict) -> dict:
@@ -211,7 +192,8 @@ class StructuredDataSet:
211
192
  diffs = {}
212
193
  if callable(progress):
213
194
  ntypes, nobjects = get_counts()
214
- progress({"start": True, "types": ntypes, "objects": nobjects})
195
+ progress({PROGRESS.ANALYZE_START: True,
196
+ PROGRESS.ANALYZE_COUNT_TYPES: ntypes, PROGRESS.ANALYZE_COUNT_ITEMS: nobjects})
215
197
  if self.data or self.portal: # TODO: what is this OR biz?
216
198
  refs = self.resolved_refs_with_uuids
217
199
  # TODO: Need feedback/progress tracking mechanism here.
@@ -230,18 +212,19 @@ class StructuredDataSet:
230
212
  uuid=existing_object.uuid,
231
213
  diffs=object_diffs or None))
232
214
  if callable(progress):
233
- progress({"update": True, "lookups": nlookups + nlookups_compare})
215
+ progress({PROGRESS.ANALYZE_UPDATE: True,
216
+ PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups + nlookups_compare})
234
217
  elif identifying_path:
235
218
  # If there is no existing object we still create a record for this object
236
219
  # but with no uuid which will be the indication that it does not exist.
237
220
  diffs[type_name].append(create_readonly_object(path=identifying_path, uuid=None, diffs=None))
238
221
  if callable(progress):
239
- progress({"create": True, "lookups": nlookups})
222
+ progress({PROGRESS.ANALYZE_CREATE: True, PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
240
223
  else:
241
224
  if callable(progress):
242
- progress({"lookups": nlookups})
225
+ progress({PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
243
226
  if callable(progress):
244
- progress({"finish": True})
227
+ progress({PROGRESS.ANALYZE_DONE: True})
245
228
  return diffs
246
229
 
247
230
  def load_file(self, file: str) -> None:
@@ -286,9 +269,10 @@ class StructuredDataSet:
286
269
  for row in excel.sheet_reader(sheet_name):
287
270
  nrows += 1
288
271
  return nrows, len(excel.sheet_names)
289
- if self._progress:
272
+ if self._progress: # TODO: Move to _load_reader
290
273
  nrows, nsheets = get_counts()
291
- self._progress({"start": True, "sheets": nsheets, "rows": nrows})
274
+ self._progress({PROGRESS.LOAD_START: True,
275
+ PROGRESS.LOAD_COUNT_SHEETS: nsheets, PROGRESS.LOAD_COUNT_ROWS: nrows})
292
276
  excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
293
277
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
294
278
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
@@ -312,16 +296,15 @@ class StructuredDataSet:
312
296
  else:
313
297
  del self._errors["ref"]
314
298
  if self._progress:
315
- # TODO: Refactor with same thing below in _load_reader.
316
- self._progress({
317
- "finish": True,
318
- "refs": self.ref_total_count,
319
- "refs_found": self.ref_total_found_count,
320
- "refs_not_found": self.ref_total_notfound_count,
321
- "refs_lookup": self.ref_lookup_count,
322
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
323
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
324
- "refs_invalid": self.ref_invalid_identifying_property_count
299
+ self._progress({ # TODO: Refactor with same thing below in _load_reader.
300
+ PROGRESS.LOAD_DONE: True,
301
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
302
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
303
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
304
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
305
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
306
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
307
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
325
308
  })
326
309
 
327
310
  def _load_json_file(self, file: str) -> None:
@@ -351,14 +334,14 @@ class StructuredDataSet:
351
334
  self._add(type_name, structured_row)
352
335
  if self._progress:
353
336
  self._progress({
354
- "parse": True,
355
- "refs": self.ref_total_count,
356
- "refs_found": self.ref_total_found_count,
357
- "refs_not_found": self.ref_total_notfound_count,
358
- "refs_lookup": self.ref_lookup_count,
359
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
360
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
361
- "refs_invalid": self.ref_invalid_identifying_property_count
337
+ PROGRESS.LOAD_ITEM: True,
338
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
339
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
340
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
341
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
342
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
343
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
344
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
362
345
  })
363
346
  self._note_warning(reader.warnings, "reader")
364
347
  if schema:
@@ -464,12 +447,10 @@ class StructuredDataSet:
464
447
 
465
448
  class _StructuredRowTemplate:
466
449
 
467
- def __init__(self, column_names: List[str], schema: Optional[Schema] = None,
468
- obtain_array_values: Optional[Callable] = None) -> None:
450
+ def __init__(self, column_names: List[str], schema: Optional[Schema] = None) -> None:
469
451
  self._schema = schema
470
452
  self._set_value_functions = {}
471
453
  self._template = self._create_row_template(column_names)
472
- self._obtain_array_values = obtain_array_values if callable(obtain_array_values) else None
473
454
 
474
455
  def create_row(self) -> dict:
475
456
  return copy.deepcopy(self._template)
@@ -529,16 +510,6 @@ class _StructuredRowTemplate:
529
510
  set_value_backtrack_object(i, p)
530
511
  data = data[p]
531
512
  if (p := path[-1]) == -1 and isinstance(value, str):
532
- if ENABLE_ARRAY_SHEET_REFS and False:
533
- # TODO: IN PROGRESS. DISABLED FOR NOW.
534
- if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
535
- if self._obtain_array_values:
536
- values = self._obtain_array_values(value)
537
- if sheet_name_containing_array := value[5:].strip():
538
- if dot := sheet_name_containing_array.find(".") > 0:
539
- if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
540
- pass
541
- # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
542
513
  values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
543
514
  if mapv:
544
515
  values = [mapv(value, src) for value in values]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.1.1b6
3
+ Version: 8.8.1.1b9
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -48,6 +48,7 @@ dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmj
48
48
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
49
49
  dcicutils/portal_object_utils.py,sha256=MF6MTZ6yxakZFDjbkTKCsF4q4p11dLDVvT5JBV9m6RQ,15408
50
50
  dcicutils/portal_utils.py,sha256=oBoI3KWRp6YrbsuVGbmPQ3kATB5cVVsQo7-qmnYXWqg,30260
51
+ dcicutils/progress_constants.py,sha256=Q5ZzXYQXi6QMIYnUi_vxDAEH-nTYjQVauc9HPfvk5jE,1475
51
52
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
52
53
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
53
54
  dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
@@ -62,15 +63,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
62
63
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
63
64
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
64
65
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
65
- dcicutils/structured_data.py,sha256=kf5aiMXk-DGRtCXWo3D9e2HHcmMffouAvCS-r1epvsM,59254
66
+ dcicutils/structured_data.py,sha256=1guVNDzIVxJkQA_m0jSh9xI2FB5oVXR4m7sqrqF8A5w,58559
66
67
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
67
68
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
68
69
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
69
70
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
70
71
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
71
72
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
72
- dcicutils-8.8.1.1b6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
- dcicutils-8.8.1.1b6.dist-info/METADATA,sha256=i5C6Embybe7tMr1JUT4jB2tRppS8Omxs6afsU_LQkCE,3356
74
- dcicutils-8.8.1.1b6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
- dcicutils-8.8.1.1b6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
- dcicutils-8.8.1.1b6.dist-info/RECORD,,
73
+ dcicutils-8.8.1.1b9.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
74
+ dcicutils-8.8.1.1b9.dist-info/METADATA,sha256=PpSJ-JtZqnTWFk4eeZbU3RnCfRXko6sCYafK2wtmFW0,3356
75
+ dcicutils-8.8.1.1b9.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
76
+ dcicutils-8.8.1.1b9.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
77
+ dcicutils-8.8.1.1b9.dist-info/RECORD,,