dcicutils 8.8.1.1b7__py3-none-any.whl → 8.8.1.1b9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ from enum import Enum
2
+
3
+
4
+ # Constants for progress tracking for smaht-submitr.
5
+ # Here only to share between smaht-portal, snovault, and smaht-submitr.
6
+
7
+ class PROGRESS_INGESTER(Enum):
8
+ VALIDATION = "ingester_validation"
9
+ INITIATE = "ingester_initiate"
10
+
11
+
12
+ class PROGRESS_PARSE(Enum):
13
+ LOAD_START = "start"
14
+ LOAD_ITEM = "parse"
15
+ LOAD_DONE = "finish"
16
+ LOAD_COUNT_SHEETS = "sheets"
17
+ LOAD_COUNT_ROWS = "rows"
18
+ LOAD_COUNT_REFS = "refs"
19
+ LOAD_COUNT_REFS_FOUND = "refs_found"
20
+ LOAD_COUNT_REFS_NOT_FOUND = "refs_not_found"
21
+ LOAD_COUNT_REFS_LOOKUP = "refs_lookup"
22
+ LOAD_COUNT_REFS_LOOKUP_CACHE_HIT = "refs_lookup_cache_hit"
23
+ LOAD_COUNT_REFS_EXISTS_CACHE_HIT = "refs_exists_cache_hit"
24
+ LOAD_COUNT_REFS_INVALID = "refs_invalid"
25
+ ANALYZE_START = "start"
26
+ ANALYZE_COUNT_TYPES = "types"
27
+ ANALYZE_COUNT_ITEMS = "objects"
28
+ ANALYZE_CREATE = "create"
29
+ ANALYZE_COUNT_LOOKUP = "lookups"
30
+ ANALYZE_UPDATE = "update"
31
+ ANALYZE_DONE = "finish"
32
+
33
+
34
+ class PROGRESS_LOADXL(Enum):
35
+ INITIATE = "loadxl_initiate"
36
+ START = "loadxl_start"
37
+ START_SECOND_ROUND = "loadxl_start_second_round"
38
+ ITEM = "loadxl_item"
39
+ ITEM_SECOND_ROUND = "loadxl_item_second_round"
40
+ GET = "loadxl_lookup"
41
+ POST = "loadxl_post"
42
+ PATCH = "loadxl_patch"
43
+ ERROR = "loadxl_error"
44
+ DONE = "loadxl_done"
45
+ TOTAL = "loadxl_total"
46
+ MESSAGE = "loadxl_message"
47
+ MESSAGE_VERBOSE = "loadxl_message_verbose"
48
+ MESSAGE_DEBUG = "loadxl_message_debug"
@@ -17,6 +17,7 @@ from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid,
17
17
  to_boolean, to_enum, to_float, to_integer, VirtualApp)
18
18
  from dcicutils.portal_object_utils import PortalObject
19
19
  from dcicutils.portal_utils import Portal as PortalBase
20
+ from dcicutils.progress_constants import PROGRESS_PARSE as PROGRESS
20
21
  from dcicutils.schema_utils import Schema as SchemaBase
21
22
  from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files
22
23
 
@@ -37,31 +38,10 @@ ARRAY_NAME_SUFFIX_CHAR = "#"
37
38
  ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+")
38
39
  DOTTED_NAME_DELIMITER_CHAR = "."
39
40
 
41
+
40
42
  # TODO: Should probably pass this knowledge in from callers.
41
43
  FILE_TYPE_NAME = "File"
42
44
  FILE_TYPE_PROPERTY_NAME = "filename"
43
- # This ExtraFile is a pseudo-type to handle extra_files in smaht-submitr.
44
- EXTRA_FILE_TYPE_NAME = "ExtraFile"
45
- EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
46
-
47
- ENABLE_ARRAY_SHEET_REFS = False
48
-
49
- # The ExtraFile pseudo-type schema.
50
- EXTRA_FILE_SCHEMA = {
51
- "title": "ExtraFile",
52
- "type": "object",
53
- "required": [
54
- "filename"
55
- ],
56
- "identifyingProperties": [
57
- "filename"
58
- ],
59
- "properties": {
60
- "filename": {
61
- "type": "string"
62
- }
63
- }
64
- }
65
45
 
66
46
  # Forward type references for type hints.
67
47
  Portal = Type["Portal"]
@@ -212,7 +192,8 @@ class StructuredDataSet:
212
192
  diffs = {}
213
193
  if callable(progress):
214
194
  ntypes, nobjects = get_counts()
215
- progress({"start": True, "types": ntypes, "objects": nobjects})
195
+ progress({PROGRESS.ANALYZE_START: True,
196
+ PROGRESS.ANALYZE_COUNT_TYPES: ntypes, PROGRESS.ANALYZE_COUNT_ITEMS: nobjects})
216
197
  if self.data or self.portal: # TODO: what is this OR biz?
217
198
  refs = self.resolved_refs_with_uuids
218
199
  # TODO: Need feedback/progress tracking mechanism here.
@@ -231,18 +212,19 @@ class StructuredDataSet:
231
212
  uuid=existing_object.uuid,
232
213
  diffs=object_diffs or None))
233
214
  if callable(progress):
234
- progress({"update": True, "lookups": nlookups + nlookups_compare})
215
+ progress({PROGRESS.ANALYZE_UPDATE: True,
216
+ PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups + nlookups_compare})
235
217
  elif identifying_path:
236
218
  # If there is no existing object we still create a record for this object
237
219
  # but with no uuid which will be the indication that it does not exist.
238
220
  diffs[type_name].append(create_readonly_object(path=identifying_path, uuid=None, diffs=None))
239
221
  if callable(progress):
240
- progress({"create": True, "lookups": nlookups})
222
+ progress({PROGRESS.ANALYZE_CREATE: True, PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
241
223
  else:
242
224
  if callable(progress):
243
- progress({"lookups": nlookups})
225
+ progress({PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
244
226
  if callable(progress):
245
- progress({"finish": True})
227
+ progress({PROGRESS.ANALYZE_DONE: True})
246
228
  return diffs
247
229
 
248
230
  def load_file(self, file: str) -> None:
@@ -287,9 +269,10 @@ class StructuredDataSet:
287
269
  for row in excel.sheet_reader(sheet_name):
288
270
  nrows += 1
289
271
  return nrows, len(excel.sheet_names)
290
- if self._progress:
272
+ if self._progress: # TODO: Move to _load_reader
291
273
  nrows, nsheets = get_counts()
292
- self._progress({"start": True, "sheets": nsheets, "rows": nrows})
274
+ self._progress({PROGRESS.LOAD_START: True,
275
+ PROGRESS.LOAD_COUNT_SHEETS: nsheets, PROGRESS.LOAD_COUNT_ROWS: nrows})
293
276
  excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
294
277
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
295
278
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
@@ -313,16 +296,15 @@ class StructuredDataSet:
313
296
  else:
314
297
  del self._errors["ref"]
315
298
  if self._progress:
316
- # TODO: Refactor with same thing below in _load_reader.
317
- self._progress({
318
- "finish": True,
319
- "refs": self.ref_total_count,
320
- "refs_found": self.ref_total_found_count,
321
- "refs_not_found": self.ref_total_notfound_count,
322
- "refs_lookup": self.ref_lookup_count,
323
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
324
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
325
- "refs_invalid": self.ref_invalid_identifying_property_count
299
+ self._progress({ # TODO: Refactor with same thing below in _load_reader.
300
+ PROGRESS.LOAD_DONE: True,
301
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
302
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
303
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
304
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
305
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
306
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
307
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
326
308
  })
327
309
 
328
310
  def _load_json_file(self, file: str) -> None:
@@ -352,14 +334,14 @@ class StructuredDataSet:
352
334
  self._add(type_name, structured_row)
353
335
  if self._progress:
354
336
  self._progress({
355
- "parse": True,
356
- "refs": self.ref_total_count,
357
- "refs_found": self.ref_total_found_count,
358
- "refs_not_found": self.ref_total_notfound_count,
359
- "refs_lookup": self.ref_lookup_count,
360
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
361
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
362
- "refs_invalid": self.ref_invalid_identifying_property_count
337
+ PROGRESS.LOAD_ITEM: True,
338
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
339
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
340
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
341
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
342
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
343
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
344
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
363
345
  })
364
346
  self._note_warning(reader.warnings, "reader")
365
347
  if schema:
@@ -465,12 +447,10 @@ class StructuredDataSet:
465
447
 
466
448
  class _StructuredRowTemplate:
467
449
 
468
- def __init__(self, column_names: List[str], schema: Optional[Schema] = None,
469
- obtain_array_values: Optional[Callable] = None) -> None:
450
+ def __init__(self, column_names: List[str], schema: Optional[Schema] = None) -> None:
470
451
  self._schema = schema
471
452
  self._set_value_functions = {}
472
453
  self._template = self._create_row_template(column_names)
473
- self._obtain_array_values = obtain_array_values if callable(obtain_array_values) else None
474
454
 
475
455
  def create_row(self) -> dict:
476
456
  return copy.deepcopy(self._template)
@@ -530,16 +510,6 @@ class _StructuredRowTemplate:
530
510
  set_value_backtrack_object(i, p)
531
511
  data = data[p]
532
512
  if (p := path[-1]) == -1 and isinstance(value, str):
533
- if ENABLE_ARRAY_SHEET_REFS and False:
534
- # TODO: IN PROGRESS. DISABLED FOR NOW.
535
- if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
536
- if self._obtain_array_values:
537
- values = self._obtain_array_values(value)
538
- if sheet_name_containing_array := value[5:].strip():
539
- if dot := sheet_name_containing_array.find(".") > 0:
540
- if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
541
- pass
542
- # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
543
513
  values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
544
514
  if mapv:
545
515
  values = [mapv(value, src) for value in values]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.1.1b7
3
+ Version: 8.8.1.1b9
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -48,6 +48,7 @@ dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmj
48
48
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
49
49
  dcicutils/portal_object_utils.py,sha256=MF6MTZ6yxakZFDjbkTKCsF4q4p11dLDVvT5JBV9m6RQ,15408
50
50
  dcicutils/portal_utils.py,sha256=oBoI3KWRp6YrbsuVGbmPQ3kATB5cVVsQo7-qmnYXWqg,30260
51
+ dcicutils/progress_constants.py,sha256=Q5ZzXYQXi6QMIYnUi_vxDAEH-nTYjQVauc9HPfvk5jE,1475
51
52
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
52
53
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
53
54
  dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
@@ -62,15 +63,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
62
63
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
63
64
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
64
65
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
65
- dcicutils/structured_data.py,sha256=tnxilT_CBN58hEmvH-wMRM47RJiQTPQYiP2O_JrO41Q,59323
66
+ dcicutils/structured_data.py,sha256=1guVNDzIVxJkQA_m0jSh9xI2FB5oVXR4m7sqrqF8A5w,58559
66
67
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
67
68
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
68
69
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
69
70
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
70
71
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
71
72
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
72
- dcicutils-8.8.1.1b7.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
- dcicutils-8.8.1.1b7.dist-info/METADATA,sha256=0bVW0YQ9uJnm4u110RRhzXeCQQ0V1N3v42jxYTnsNGU,3356
74
- dcicutils-8.8.1.1b7.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
- dcicutils-8.8.1.1b7.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
- dcicutils-8.8.1.1b7.dist-info/RECORD,,
73
+ dcicutils-8.8.1.1b9.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
74
+ dcicutils-8.8.1.1b9.dist-info/METADATA,sha256=PpSJ-JtZqnTWFk4eeZbU3RnCfRXko6sCYafK2wtmFW0,3356
75
+ dcicutils-8.8.1.1b9.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
76
+ dcicutils-8.8.1.1b9.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
77
+ dcicutils-8.8.1.1b9.dist-info/RECORD,,