dcicutils 8.8.1.1b7__py3-none-any.whl → 8.8.1.1b10__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,53 @@
1
+ from enum import Enum
2
+
3
+
4
+ # Constants for progress tracking for smaht-submitr.
5
+ # Here only to share between smaht-portal, snovault, and smaht-submitr.
6
+
7
+ class PROGRESS_INGESTER(Enum):
8
+ VALIDATION = "ingester_validation"
9
+ INITIATE = "ingester_initiate"
10
+ PARSE_LOAD_INITIATE = "ingester_parse_initiate"
11
+ PARSE_LOAD_DONE = "ingester_parse_done"
12
+ VALIDATE_LOAD_INITIATE = "ingester_validate_initiate"
13
+ VALIDATE_LOAD_DONE = "ingester_validate_done"
14
+ LOADXL_INITIATE = "ingester_loadxl_initiate"
15
+ LOADXL_DONE = "ingester_loadxl_done"
16
+
17
+
18
+ class PROGRESS_PARSE(Enum):
19
+ LOAD_START = "start"
20
+ LOAD_ITEM = "parse"
21
+ LOAD_DONE = "finish"
22
+ LOAD_COUNT_SHEETS = "sheets"
23
+ LOAD_COUNT_ROWS = "rows"
24
+ LOAD_COUNT_REFS = "refs"
25
+ LOAD_COUNT_REFS_FOUND = "refs_found"
26
+ LOAD_COUNT_REFS_NOT_FOUND = "refs_not_found"
27
+ LOAD_COUNT_REFS_LOOKUP = "refs_lookup"
28
+ LOAD_COUNT_REFS_LOOKUP_CACHE_HIT = "refs_lookup_cache_hit"
29
+ LOAD_COUNT_REFS_EXISTS_CACHE_HIT = "refs_exists_cache_hit"
30
+ LOAD_COUNT_REFS_INVALID = "refs_invalid"
31
+ ANALYZE_START = "start"
32
+ ANALYZE_COUNT_TYPES = "types"
33
+ ANALYZE_COUNT_ITEMS = "objects"
34
+ ANALYZE_CREATE = "create"
35
+ ANALYZE_COUNT_LOOKUP = "lookups"
36
+ ANALYZE_UPDATE = "update"
37
+ ANALYZE_DONE = "finish"
38
+
39
+
40
+ class PROGRESS_LOADXL(Enum):
41
+ START = "loadxl_start"
42
+ START_SECOND_ROUND = "loadxl_start_second_round"
43
+ ITEM = "loadxl_item"
44
+ ITEM_SECOND_ROUND = "loadxl_item_second_round"
45
+ GET = "loadxl_lookup"
46
+ POST = "loadxl_post"
47
+ PATCH = "loadxl_patch"
48
+ ERROR = "loadxl_error"
49
+ DONE = "loadxl_done"
50
+ TOTAL = "loadxl_total"
51
+ MESSAGE = "loadxl_message"
52
+ MESSAGE_VERBOSE = "loadxl_message_verbose"
53
+ MESSAGE_DEBUG = "loadxl_message_debug"
@@ -17,6 +17,7 @@ from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid,
17
17
  to_boolean, to_enum, to_float, to_integer, VirtualApp)
18
18
  from dcicutils.portal_object_utils import PortalObject
19
19
  from dcicutils.portal_utils import Portal as PortalBase
20
+ from dcicutils.progress_constants import PROGRESS_PARSE as PROGRESS
20
21
  from dcicutils.schema_utils import Schema as SchemaBase
21
22
  from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files
22
23
 
@@ -37,31 +38,10 @@ ARRAY_NAME_SUFFIX_CHAR = "#"
37
38
  ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+")
38
39
  DOTTED_NAME_DELIMITER_CHAR = "."
39
40
 
41
+
40
42
  # TODO: Should probably pass this knowledge in from callers.
41
43
  FILE_TYPE_NAME = "File"
42
44
  FILE_TYPE_PROPERTY_NAME = "filename"
43
- # This ExtraFile is a pseudo-type to handle extra_files in smaht-submitr.
44
- EXTRA_FILE_TYPE_NAME = "ExtraFile"
45
- EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
46
-
47
- ENABLE_ARRAY_SHEET_REFS = False
48
-
49
- # The ExtraFile pseudo-type schema.
50
- EXTRA_FILE_SCHEMA = {
51
- "title": "ExtraFile",
52
- "type": "object",
53
- "required": [
54
- "filename"
55
- ],
56
- "identifyingProperties": [
57
- "filename"
58
- ],
59
- "properties": {
60
- "filename": {
61
- "type": "string"
62
- }
63
- }
64
- }
65
45
 
66
46
  # Forward type references for type hints.
67
47
  Portal = Type["Portal"]
@@ -212,7 +192,8 @@ class StructuredDataSet:
212
192
  diffs = {}
213
193
  if callable(progress):
214
194
  ntypes, nobjects = get_counts()
215
- progress({"start": True, "types": ntypes, "objects": nobjects})
195
+ progress({PROGRESS.ANALYZE_START: True,
196
+ PROGRESS.ANALYZE_COUNT_TYPES: ntypes, PROGRESS.ANALYZE_COUNT_ITEMS: nobjects})
216
197
  if self.data or self.portal: # TODO: what is this OR biz?
217
198
  refs = self.resolved_refs_with_uuids
218
199
  # TODO: Need feedback/progress tracking mechanism here.
@@ -231,18 +212,19 @@ class StructuredDataSet:
231
212
  uuid=existing_object.uuid,
232
213
  diffs=object_diffs or None))
233
214
  if callable(progress):
234
- progress({"update": True, "lookups": nlookups + nlookups_compare})
215
+ progress({PROGRESS.ANALYZE_UPDATE: True,
216
+ PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups + nlookups_compare})
235
217
  elif identifying_path:
236
218
  # If there is no existing object we still create a record for this object
237
219
  # but with no uuid which will be the indication that it does not exist.
238
220
  diffs[type_name].append(create_readonly_object(path=identifying_path, uuid=None, diffs=None))
239
221
  if callable(progress):
240
- progress({"create": True, "lookups": nlookups})
222
+ progress({PROGRESS.ANALYZE_CREATE: True, PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
241
223
  else:
242
224
  if callable(progress):
243
- progress({"lookups": nlookups})
225
+ progress({PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
244
226
  if callable(progress):
245
- progress({"finish": True})
227
+ progress({PROGRESS.ANALYZE_DONE: True})
246
228
  return diffs
247
229
 
248
230
  def load_file(self, file: str) -> None:
@@ -287,9 +269,10 @@ class StructuredDataSet:
287
269
  for row in excel.sheet_reader(sheet_name):
288
270
  nrows += 1
289
271
  return nrows, len(excel.sheet_names)
290
- if self._progress:
272
+ if self._progress: # TODO: Move to _load_reader
291
273
  nrows, nsheets = get_counts()
292
- self._progress({"start": True, "sheets": nsheets, "rows": nrows})
274
+ self._progress({PROGRESS.LOAD_START: True,
275
+ PROGRESS.LOAD_COUNT_SHEETS: nsheets, PROGRESS.LOAD_COUNT_ROWS: nrows})
293
276
  excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
294
277
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
295
278
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
@@ -313,16 +296,15 @@ class StructuredDataSet:
313
296
  else:
314
297
  del self._errors["ref"]
315
298
  if self._progress:
316
- # TODO: Refactor with same thing below in _load_reader.
317
- self._progress({
318
- "finish": True,
319
- "refs": self.ref_total_count,
320
- "refs_found": self.ref_total_found_count,
321
- "refs_not_found": self.ref_total_notfound_count,
322
- "refs_lookup": self.ref_lookup_count,
323
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
324
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
325
- "refs_invalid": self.ref_invalid_identifying_property_count
299
+ self._progress({ # TODO: Refactor with same thing below in _load_reader.
300
+ PROGRESS.LOAD_DONE: True,
301
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
302
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
303
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
304
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
305
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
306
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
307
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
326
308
  })
327
309
 
328
310
  def _load_json_file(self, file: str) -> None:
@@ -352,14 +334,14 @@ class StructuredDataSet:
352
334
  self._add(type_name, structured_row)
353
335
  if self._progress:
354
336
  self._progress({
355
- "parse": True,
356
- "refs": self.ref_total_count,
357
- "refs_found": self.ref_total_found_count,
358
- "refs_not_found": self.ref_total_notfound_count,
359
- "refs_lookup": self.ref_lookup_count,
360
- "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
361
- "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
362
- "refs_invalid": self.ref_invalid_identifying_property_count
337
+ PROGRESS.LOAD_ITEM: True,
338
+ PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
339
+ PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
340
+ PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
341
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
342
+ PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
343
+ PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
344
+ PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
363
345
  })
364
346
  self._note_warning(reader.warnings, "reader")
365
347
  if schema:
@@ -465,12 +447,10 @@ class StructuredDataSet:
465
447
 
466
448
  class _StructuredRowTemplate:
467
449
 
468
- def __init__(self, column_names: List[str], schema: Optional[Schema] = None,
469
- obtain_array_values: Optional[Callable] = None) -> None:
450
+ def __init__(self, column_names: List[str], schema: Optional[Schema] = None) -> None:
470
451
  self._schema = schema
471
452
  self._set_value_functions = {}
472
453
  self._template = self._create_row_template(column_names)
473
- self._obtain_array_values = obtain_array_values if callable(obtain_array_values) else None
474
454
 
475
455
  def create_row(self) -> dict:
476
456
  return copy.deepcopy(self._template)
@@ -530,16 +510,6 @@ class _StructuredRowTemplate:
530
510
  set_value_backtrack_object(i, p)
531
511
  data = data[p]
532
512
  if (p := path[-1]) == -1 and isinstance(value, str):
533
- if ENABLE_ARRAY_SHEET_REFS and False:
534
- # TODO: IN PROGRESS. DISABLED FOR NOW.
535
- if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
536
- if self._obtain_array_values:
537
- values = self._obtain_array_values(value)
538
- if sheet_name_containing_array := value[5:].strip():
539
- if dot := sheet_name_containing_array.find(".") > 0:
540
- if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
541
- pass
542
- # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
543
513
  values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
544
514
  if mapv:
545
515
  values = [mapv(value, src) for value in values]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.1.1b7
3
+ Version: 8.8.1.1b10
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -48,6 +48,7 @@ dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmj
48
48
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
49
49
  dcicutils/portal_object_utils.py,sha256=MF6MTZ6yxakZFDjbkTKCsF4q4p11dLDVvT5JBV9m6RQ,15408
50
50
  dcicutils/portal_utils.py,sha256=oBoI3KWRp6YrbsuVGbmPQ3kATB5cVVsQo7-qmnYXWqg,30260
51
+ dcicutils/progress_constants.py,sha256=1N3BOnViX4AlrNK4lq7O-zRBqoZ0BTvC_vpFk2rOxD4,1736
51
52
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
52
53
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
53
54
  dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
@@ -62,15 +63,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
62
63
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
63
64
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
64
65
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
65
- dcicutils/structured_data.py,sha256=tnxilT_CBN58hEmvH-wMRM47RJiQTPQYiP2O_JrO41Q,59323
66
+ dcicutils/structured_data.py,sha256=1guVNDzIVxJkQA_m0jSh9xI2FB5oVXR4m7sqrqF8A5w,58559
66
67
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
67
68
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
68
69
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
69
70
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
70
71
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
71
72
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
72
- dcicutils-8.8.1.1b7.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
- dcicutils-8.8.1.1b7.dist-info/METADATA,sha256=0bVW0YQ9uJnm4u110RRhzXeCQQ0V1N3v42jxYTnsNGU,3356
74
- dcicutils-8.8.1.1b7.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
- dcicutils-8.8.1.1b7.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
- dcicutils-8.8.1.1b7.dist-info/RECORD,,
73
+ dcicutils-8.8.1.1b10.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
74
+ dcicutils-8.8.1.1b10.dist-info/METADATA,sha256=ZpO-aF0tzLRyns4vaIGBO5VsQq1hX8bw4rWDv8_v5Hc,3357
75
+ dcicutils-8.8.1.1b10.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
76
+ dcicutils-8.8.1.1b10.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
77
+ dcicutils-8.8.1.1b10.dist-info/RECORD,,