dcicutils 8.8.1.1b6__py3-none-any.whl → 8.8.1.1b9__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,48 @@
1
+ from enum import Enum
2
+
3
+
4
+ # Constants for progress tracking for smaht-submitr.
5
+ # Here only to share between smaht-portal, snovault, and smaht-submitr.
6
+
7
+ class PROGRESS_INGESTER(Enum):
8
+ VALIDATION = "ingester_validation"
9
+ INITIATE = "ingester_initiate"
10
+
11
+
12
+ class PROGRESS_PARSE(Enum):
13
+ LOAD_START = "start"
14
+ LOAD_ITEM = "parse"
15
+ LOAD_DONE = "finish"
16
+ LOAD_COUNT_SHEETS = "sheets"
17
+ LOAD_COUNT_ROWS = "rows"
18
+ LOAD_COUNT_REFS = "refs"
19
+ LOAD_COUNT_REFS_FOUND = "refs_found"
20
+ LOAD_COUNT_REFS_NOT_FOUND = "refs_not_found"
21
+ LOAD_COUNT_REFS_LOOKUP = "refs_lookup"
22
+ LOAD_COUNT_REFS_LOOKUP_CACHE_HIT = "refs_lookup_cache_hit"
23
+ LOAD_COUNT_REFS_EXISTS_CACHE_HIT = "refs_exists_cache_hit"
24
+ LOAD_COUNT_REFS_INVALID = "refs_invalid"
25
+ ANALYZE_START = "start"
26
+ ANALYZE_COUNT_TYPES = "types"
27
+ ANALYZE_COUNT_ITEMS = "objects"
28
+ ANALYZE_CREATE = "create"
29
+ ANALYZE_COUNT_LOOKUP = "lookups"
30
+ ANALYZE_UPDATE = "update"
31
+ ANALYZE_DONE = "finish"
32
+
33
+
34
+ class PROGRESS_LOADXL(Enum):
35
+ INITIATE = "loadxl_initiate"
36
+ START = "loadxl_start"
37
+ START_SECOND_ROUND = "loadxl_start_second_round"
38
+ ITEM = "loadxl_item"
39
+ ITEM_SECOND_ROUND = "loadxl_item_second_round"
40
+ GET = "loadxl_lookup"
41
+ POST = "loadxl_post"
42
+ PATCH = "loadxl_patch"
43
+ ERROR = "loadxl_error"
44
+ DONE = "loadxl_done"
45
+ TOTAL = "loadxl_total"
46
+ MESSAGE = "loadxl_message"
47
+ MESSAGE_VERBOSE = "loadxl_message_verbose"
48
+ MESSAGE_DEBUG = "loadxl_message_debug"
@@ -17,6 +17,7 @@ from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid,
17
17
  to_boolean, to_enum, to_float, to_integer, VirtualApp)
18
18
  from dcicutils.portal_object_utils import PortalObject
19
19
  from dcicutils.portal_utils import Portal as PortalBase
20
+ from dcicutils.progress_constants import PROGRESS_PARSE as PROGRESS
20
21
  from dcicutils.schema_utils import Schema as SchemaBase
21
22
  from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files
22
23
 
@@ -37,31 +38,10 @@ ARRAY_NAME_SUFFIX_CHAR = "#"
37
38
  ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+")
38
39
  DOTTED_NAME_DELIMITER_CHAR = "."
39
40
 
41
+
40
42
  # TODO: Should probably pass this knowledge in from callers.
41
43
  FILE_TYPE_NAME = "File"
42
44
  FILE_TYPE_PROPERTY_NAME = "filename"
43
- # This ExtraFile is a pseudo-type to handle extra_files in smaht-submitr.
44
- EXTRA_FILE_TYPE_NAME = "ExtraFile"
45
- EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
46
-
47
- ENABLE_ARRAY_SHEET_REFS = False
48
-
49
- # The ExtraFile pseudo-type schema.
50
- EXTRA_FILE_SCHEMA = {
51
- "title": "ExtraFile",
52
- "type": "object",
53
- "required": [
54
- "filename"
55
- ],
56
- "identifyingProperties": [
57
- "filename"
58
- ],
59
- "properties": {
60
- "filename": {
61
- "type": "string"
62
- }
63
- }
64
- }
65
45
 
66
46
  # Forward type references for type hints.
67
47
  Portal = Type["Portal"]
@@ -117,10 +97,11 @@ class StructuredDataSet:
117
97
  ref_lookup_strategy: Optional[Callable] = None,
118
98
  ref_lookup_nocache: bool = False,
119
99
  norefs: bool = False,
100
+ progress: Optional[Callable] = None,
120
101
  debug_sleep: Optional[str] = None) -> StructuredDataSet:
121
102
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
122
103
  ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
123
- norefs=norefs, debug_sleep=debug_sleep)
104
+ norefs=norefs, progress=progress, debug_sleep=debug_sleep)
124
105
 
125
106
  def validate(self, force: bool = False) -> None:
126
107
  def data_without_deleted_properties(data: dict) -> dict:
@@ -211,7 +192,8 @@ class StructuredDataSet:
211
192
  diffs = {}
212
193
  if callable(progress):
213
194
  ntypes, nobjects = get_counts()
214
- progress({"start": True, "types": ntypes, "objects": nobjects})
195
+ progress({PROGRESS.ANALYZE_START: True,
196
+ PROGRESS.ANALYZE_COUNT_TYPES: ntypes, PROGRESS.ANALYZE_COUNT_ITEMS: nobjects})
215
197
  if self.data or self.portal: # TODO: what is this OR biz?
216
198
  refs = self.resolved_refs_with_uuids
217
199
  # TODO: Need feedback/progress tracking mechanism here.
@@ -230,18 +212,19 @@ class StructuredDataSet:
230
212
  uuid=existing_object.uuid,
231
213
  diffs=object_diffs or None))
232
214
  if callable(progress):
233
- progress({"update": True, "lookups": nlookups + nlookups_compare})
215
+ progress({PROGRESS.ANALYZE_UPDATE: True,
216
+ PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups + nlookups_compare})
234
217
  elif identifying_path:
235
218
  # If there is no existing object we still create a record for this object
236
219
  # but with no uuid which will be the indication that it does not exist.
237
220
  diffs[type_name].append(create_readonly_object(path=identifying_path, uuid=None, diffs=None))
238
221
  if callable(progress):
239
- progress({"create": True, "lookups": nlookups})
222
+ progress({PROGRESS.ANALYZE_CREATE: True, PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
240
223
  else:
241
224
  if callable(progress):
242
- progress({"lookups": nlookups})
225
+ progress({PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
243
226
  if callable(progress):
244
- progress({"finish": True})
227
+ progress({PROGRESS.ANALYZE_DONE: True})
245
228
  return diffs
246
229
 
247
230
  def load_file(self, file: str) -> None:
@@ -286,9 +269,10 @@ class StructuredDataSet:
286
269
  for row in excel.sheet_reader(sheet_name):
287
270
  nrows += 1
288
271
  return nrows, len(excel.sheet_names)
289
- if self._progress:
272
+ if self._progress: # TODO: Move to _load_reader
290
273
  nrows, nsheets = get_counts()
291
- self._progress({"start": True, "sheets": nsheets, "rows": nrows})
274
+ self._progress({PROGRESS.LOAD_START: True,
275
+ PROGRESS.LOAD_COUNT_SHEETS: nsheets, PROGRESS.LOAD_COUNT_ROWS: nrows})
292
276
  excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
293
277
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
294
278
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
@@ -312,16 +296,15 @@ class StructuredDataSet:
312
296
  else:
313
297
  del self._errors["ref"]
314
298
  if self._progress:
315
- # TODO: Refactor with same thing below in _load_reader.
316
- self._progress({
317
- "finish": True,
318
- "refs": self.ref_total_count,
319
- "refs_found": self.ref_total_found_count,
320
- "refs_not_found": self.ref_total_notfound_count,
321
- "refs_lookup": self.ref_lookup_count,
322
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
323
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
324
- "refs_invalid": self.ref_invalid_identifying_property_count
299
+ self._progress({ # TODO: Refactor with same thing below in _load_reader.
300
+ PROGRESS.LOAD_DONE: True,
301
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
302
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
303
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
304
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
305
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
306
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
307
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
325
308
  })
326
309
 
327
310
  def _load_json_file(self, file: str) -> None:
@@ -351,14 +334,14 @@ class StructuredDataSet:
351
334
  self._add(type_name, structured_row)
352
335
  if self._progress:
353
336
  self._progress({
354
- "parse": True,
355
- "refs": self.ref_total_count,
356
- "refs_found": self.ref_total_found_count,
357
- "refs_not_found": self.ref_total_notfound_count,
358
- "refs_lookup": self.ref_lookup_count,
359
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
360
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
361
- "refs_invalid": self.ref_invalid_identifying_property_count
337
+ PROGRESS.LOAD_ITEM: True,
338
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
339
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
340
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
341
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
342
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
343
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
344
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
362
345
  })
363
346
  self._note_warning(reader.warnings, "reader")
364
347
  if schema:
@@ -464,12 +447,10 @@ class StructuredDataSet:
464
447
 
465
448
  class _StructuredRowTemplate:
466
449
 
467
- def __init__(self, column_names: List[str], schema: Optional[Schema] = None,
468
- obtain_array_values: Optional[Callable] = None) -> None:
450
+ def __init__(self, column_names: List[str], schema: Optional[Schema] = None) -> None:
469
451
  self._schema = schema
470
452
  self._set_value_functions = {}
471
453
  self._template = self._create_row_template(column_names)
472
- self._obtain_array_values = obtain_array_values if callable(obtain_array_values) else None
473
454
 
474
455
  def create_row(self) -> dict:
475
456
  return copy.deepcopy(self._template)
@@ -529,16 +510,6 @@ class _StructuredRowTemplate:
529
510
  set_value_backtrack_object(i, p)
530
511
  data = data[p]
531
512
  if (p := path[-1]) == -1 and isinstance(value, str):
532
- if ENABLE_ARRAY_SHEET_REFS and False:
533
- # TODO: IN PROGRESS. DISABLED FOR NOW.
534
- if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
535
- if self._obtain_array_values:
536
- values = self._obtain_array_values(value)
537
- if sheet_name_containing_array := value[5:].strip():
538
- if dot := sheet_name_containing_array.find(".") > 0:
539
- if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
540
- pass
541
- # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
542
513
  values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
543
514
  if mapv:
544
515
  values = [mapv(value, src) for value in values]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.1.1b6
3
+ Version: 8.8.1.1b9
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -48,6 +48,7 @@ dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmj
48
48
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
49
49
  dcicutils/portal_object_utils.py,sha256=MF6MTZ6yxakZFDjbkTKCsF4q4p11dLDVvT5JBV9m6RQ,15408
50
50
  dcicutils/portal_utils.py,sha256=oBoI3KWRp6YrbsuVGbmPQ3kATB5cVVsQo7-qmnYXWqg,30260
51
+ dcicutils/progress_constants.py,sha256=Q5ZzXYQXi6QMIYnUi_vxDAEH-nTYjQVauc9HPfvk5jE,1475
51
52
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
52
53
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
53
54
  dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
@@ -62,15 +63,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
62
63
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
63
64
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
64
65
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
65
- dcicutils/structured_data.py,sha256=kf5aiMXk-DGRtCXWo3D9e2HHcmMffouAvCS-r1epvsM,59254
66
+ dcicutils/structured_data.py,sha256=1guVNDzIVxJkQA_m0jSh9xI2FB5oVXR4m7sqrqF8A5w,58559
66
67
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
67
68
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
68
69
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
69
70
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
70
71
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
71
72
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
72
- dcicutils-8.8.1.1b6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
- dcicutils-8.8.1.1b6.dist-info/METADATA,sha256=i5C6Embybe7tMr1JUT4jB2tRppS8Omxs6afsU_LQkCE,3356
74
- dcicutils-8.8.1.1b6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
- dcicutils-8.8.1.1b6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
- dcicutils-8.8.1.1b6.dist-info/RECORD,,
73
+ dcicutils-8.8.1.1b9.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
74
+ dcicutils-8.8.1.1b9.dist-info/METADATA,sha256=PpSJ-JtZqnTWFk4eeZbU3RnCfRXko6sCYafK2wtmFW0,3356
75
+ dcicutils-8.8.1.1b9.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
76
+ dcicutils-8.8.1.1b9.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
77
+ dcicutils-8.8.1.1b9.dist-info/RECORD,,