dcicutils 8.8.1.1b7__py3-none-any.whl → 8.8.1.1b9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/progress_constants.py +48 -0
- dcicutils/structured_data.py +30 -60
- {dcicutils-8.8.1.1b7.dist-info → dcicutils-8.8.1.1b9.dist-info}/METADATA +1 -1
- {dcicutils-8.8.1.1b7.dist-info → dcicutils-8.8.1.1b9.dist-info}/RECORD +7 -6
- {dcicutils-8.8.1.1b7.dist-info → dcicutils-8.8.1.1b9.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.1.1b7.dist-info → dcicutils-8.8.1.1b9.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.1.1b7.dist-info → dcicutils-8.8.1.1b9.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
|
3
|
+
|
4
|
+
# Constants for progress tracking for smaht-submitr.
|
5
|
+
# Here only to share between smaht-portal, snovault, and smaht-submitr.
|
6
|
+
|
7
|
+
class PROGRESS_INGESTER(Enum):
|
8
|
+
VALIDATION = "ingester_validation"
|
9
|
+
INITIATE = "ingester_initiate"
|
10
|
+
|
11
|
+
|
12
|
+
class PROGRESS_PARSE(Enum):
|
13
|
+
LOAD_START = "start"
|
14
|
+
LOAD_ITEM = "parse"
|
15
|
+
LOAD_DONE = "finish"
|
16
|
+
LOAD_COUNT_SHEETS = "sheets"
|
17
|
+
LOAD_COUNT_ROWS = "rows"
|
18
|
+
LOAD_COUNT_REFS = "refs"
|
19
|
+
LOAD_COUNT_REFS_FOUND = "refs_found"
|
20
|
+
LOAD_COUNT_REFS_NOT_FOUND = "refs_not_found"
|
21
|
+
LOAD_COUNT_REFS_LOOKUP = "refs_lookup"
|
22
|
+
LOAD_COUNT_REFS_LOOKUP_CACHE_HIT = "refs_lookup_cache_hit"
|
23
|
+
LOAD_COUNT_REFS_EXISTS_CACHE_HIT = "refs_exists_cache_hit"
|
24
|
+
LOAD_COUNT_REFS_INVALID = "refs_invalid"
|
25
|
+
ANALYZE_START = "start"
|
26
|
+
ANALYZE_COUNT_TYPES = "types"
|
27
|
+
ANALYZE_COUNT_ITEMS = "objects"
|
28
|
+
ANALYZE_CREATE = "create"
|
29
|
+
ANALYZE_COUNT_LOOKUP = "lookups"
|
30
|
+
ANALYZE_UPDATE = "update"
|
31
|
+
ANALYZE_DONE = "finish"
|
32
|
+
|
33
|
+
|
34
|
+
class PROGRESS_LOADXL(Enum):
|
35
|
+
INITIATE = "loadxl_initiate"
|
36
|
+
START = "loadxl_start"
|
37
|
+
START_SECOND_ROUND = "loadxl_start_second_round"
|
38
|
+
ITEM = "loadxl_item"
|
39
|
+
ITEM_SECOND_ROUND = "loadxl_item_second_round"
|
40
|
+
GET = "loadxl_lookup"
|
41
|
+
POST = "loadxl_post"
|
42
|
+
PATCH = "loadxl_patch"
|
43
|
+
ERROR = "loadxl_error"
|
44
|
+
DONE = "loadxl_done"
|
45
|
+
TOTAL = "loadxl_total"
|
46
|
+
MESSAGE = "loadxl_message"
|
47
|
+
MESSAGE_VERBOSE = "loadxl_message_verbose"
|
48
|
+
MESSAGE_DEBUG = "loadxl_message_debug"
|
dcicutils/structured_data.py
CHANGED
@@ -17,6 +17,7 @@ from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid,
|
|
17
17
|
to_boolean, to_enum, to_float, to_integer, VirtualApp)
|
18
18
|
from dcicutils.portal_object_utils import PortalObject
|
19
19
|
from dcicutils.portal_utils import Portal as PortalBase
|
20
|
+
from dcicutils.progress_constants import PROGRESS_PARSE as PROGRESS
|
20
21
|
from dcicutils.schema_utils import Schema as SchemaBase
|
21
22
|
from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files
|
22
23
|
|
@@ -37,31 +38,10 @@ ARRAY_NAME_SUFFIX_CHAR = "#"
|
|
37
38
|
ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+")
|
38
39
|
DOTTED_NAME_DELIMITER_CHAR = "."
|
39
40
|
|
41
|
+
|
40
42
|
# TODO: Should probably pass this knowledge in from callers.
|
41
43
|
FILE_TYPE_NAME = "File"
|
42
44
|
FILE_TYPE_PROPERTY_NAME = "filename"
|
43
|
-
# This ExtraFile is a pseudo-type to handle extra_files in smaht-submitr.
|
44
|
-
EXTRA_FILE_TYPE_NAME = "ExtraFile"
|
45
|
-
EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
|
46
|
-
|
47
|
-
ENABLE_ARRAY_SHEET_REFS = False
|
48
|
-
|
49
|
-
# The ExtraFile pseudo-type schema.
|
50
|
-
EXTRA_FILE_SCHEMA = {
|
51
|
-
"title": "ExtraFile",
|
52
|
-
"type": "object",
|
53
|
-
"required": [
|
54
|
-
"filename"
|
55
|
-
],
|
56
|
-
"identifyingProperties": [
|
57
|
-
"filename"
|
58
|
-
],
|
59
|
-
"properties": {
|
60
|
-
"filename": {
|
61
|
-
"type": "string"
|
62
|
-
}
|
63
|
-
}
|
64
|
-
}
|
65
45
|
|
66
46
|
# Forward type references for type hints.
|
67
47
|
Portal = Type["Portal"]
|
@@ -212,7 +192,8 @@ class StructuredDataSet:
|
|
212
192
|
diffs = {}
|
213
193
|
if callable(progress):
|
214
194
|
ntypes, nobjects = get_counts()
|
215
|
-
progress({
|
195
|
+
progress({PROGRESS.ANALYZE_START: True,
|
196
|
+
PROGRESS.ANALYZE_COUNT_TYPES: ntypes, PROGRESS.ANALYZE_COUNT_ITEMS: nobjects})
|
216
197
|
if self.data or self.portal: # TODO: what is this OR biz?
|
217
198
|
refs = self.resolved_refs_with_uuids
|
218
199
|
# TODO: Need feedback/progress tracking mechanism here.
|
@@ -231,18 +212,19 @@ class StructuredDataSet:
|
|
231
212
|
uuid=existing_object.uuid,
|
232
213
|
diffs=object_diffs or None))
|
233
214
|
if callable(progress):
|
234
|
-
progress({
|
215
|
+
progress({PROGRESS.ANALYZE_UPDATE: True,
|
216
|
+
PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups + nlookups_compare})
|
235
217
|
elif identifying_path:
|
236
218
|
# If there is no existing object we still create a record for this object
|
237
219
|
# but with no uuid which will be the indication that it does not exist.
|
238
220
|
diffs[type_name].append(create_readonly_object(path=identifying_path, uuid=None, diffs=None))
|
239
221
|
if callable(progress):
|
240
|
-
progress({
|
222
|
+
progress({PROGRESS.ANALYZE_CREATE: True, PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
|
241
223
|
else:
|
242
224
|
if callable(progress):
|
243
|
-
progress({
|
225
|
+
progress({PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
|
244
226
|
if callable(progress):
|
245
|
-
progress({
|
227
|
+
progress({PROGRESS.ANALYZE_DONE: True})
|
246
228
|
return diffs
|
247
229
|
|
248
230
|
def load_file(self, file: str) -> None:
|
@@ -287,9 +269,10 @@ class StructuredDataSet:
|
|
287
269
|
for row in excel.sheet_reader(sheet_name):
|
288
270
|
nrows += 1
|
289
271
|
return nrows, len(excel.sheet_names)
|
290
|
-
if self._progress:
|
272
|
+
if self._progress: # TODO: Move to _load_reader
|
291
273
|
nrows, nsheets = get_counts()
|
292
|
-
self._progress({
|
274
|
+
self._progress({PROGRESS.LOAD_START: True,
|
275
|
+
PROGRESS.LOAD_COUNT_SHEETS: nsheets, PROGRESS.LOAD_COUNT_ROWS: nrows})
|
293
276
|
excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
|
294
277
|
order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
|
295
278
|
for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
|
@@ -313,16 +296,15 @@ class StructuredDataSet:
|
|
313
296
|
else:
|
314
297
|
del self._errors["ref"]
|
315
298
|
if self._progress:
|
316
|
-
# TODO: Refactor with same thing below in _load_reader.
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
"refs_invalid": self.ref_invalid_identifying_property_count
|
299
|
+
self._progress({ # TODO: Refactor with same thing below in _load_reader.
|
300
|
+
PROGRESS.LOAD_DONE: True,
|
301
|
+
PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
|
302
|
+
PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
|
303
|
+
PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
|
304
|
+
PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
|
305
|
+
PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
|
306
|
+
PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
|
307
|
+
PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
|
326
308
|
})
|
327
309
|
|
328
310
|
def _load_json_file(self, file: str) -> None:
|
@@ -352,14 +334,14 @@ class StructuredDataSet:
|
|
352
334
|
self._add(type_name, structured_row)
|
353
335
|
if self._progress:
|
354
336
|
self._progress({
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
337
|
+
PROGRESS.LOAD_ITEM: True,
|
338
|
+
PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
|
339
|
+
PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
|
340
|
+
PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
|
341
|
+
PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
|
342
|
+
PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
|
343
|
+
PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
|
344
|
+
PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
|
363
345
|
})
|
364
346
|
self._note_warning(reader.warnings, "reader")
|
365
347
|
if schema:
|
@@ -465,12 +447,10 @@ class StructuredDataSet:
|
|
465
447
|
|
466
448
|
class _StructuredRowTemplate:
|
467
449
|
|
468
|
-
def __init__(self, column_names: List[str], schema: Optional[Schema] = None
|
469
|
-
obtain_array_values: Optional[Callable] = None) -> None:
|
450
|
+
def __init__(self, column_names: List[str], schema: Optional[Schema] = None) -> None:
|
470
451
|
self._schema = schema
|
471
452
|
self._set_value_functions = {}
|
472
453
|
self._template = self._create_row_template(column_names)
|
473
|
-
self._obtain_array_values = obtain_array_values if callable(obtain_array_values) else None
|
474
454
|
|
475
455
|
def create_row(self) -> dict:
|
476
456
|
return copy.deepcopy(self._template)
|
@@ -530,16 +510,6 @@ class _StructuredRowTemplate:
|
|
530
510
|
set_value_backtrack_object(i, p)
|
531
511
|
data = data[p]
|
532
512
|
if (p := path[-1]) == -1 and isinstance(value, str):
|
533
|
-
if ENABLE_ARRAY_SHEET_REFS and False:
|
534
|
-
# TODO: IN PROGRESS. DISABLED FOR NOW.
|
535
|
-
if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
|
536
|
-
if self._obtain_array_values:
|
537
|
-
values = self._obtain_array_values(value)
|
538
|
-
if sheet_name_containing_array := value[5:].strip():
|
539
|
-
if dot := sheet_name_containing_array.find(".") > 0:
|
540
|
-
if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
|
541
|
-
pass
|
542
|
-
# sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
|
543
513
|
values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
|
544
514
|
if mapv:
|
545
515
|
values = [mapv(value, src) for value in values]
|
@@ -48,6 +48,7 @@ dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmj
|
|
48
48
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
49
49
|
dcicutils/portal_object_utils.py,sha256=MF6MTZ6yxakZFDjbkTKCsF4q4p11dLDVvT5JBV9m6RQ,15408
|
50
50
|
dcicutils/portal_utils.py,sha256=oBoI3KWRp6YrbsuVGbmPQ3kATB5cVVsQo7-qmnYXWqg,30260
|
51
|
+
dcicutils/progress_constants.py,sha256=Q5ZzXYQXi6QMIYnUi_vxDAEH-nTYjQVauc9HPfvk5jE,1475
|
51
52
|
dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
|
52
53
|
dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
|
53
54
|
dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
|
@@ -62,15 +63,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
|
|
62
63
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
63
64
|
dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
|
64
65
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
65
|
-
dcicutils/structured_data.py,sha256=
|
66
|
+
dcicutils/structured_data.py,sha256=1guVNDzIVxJkQA_m0jSh9xI2FB5oVXR4m7sqrqF8A5w,58559
|
66
67
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
67
68
|
dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
|
68
69
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
69
70
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
70
71
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
71
72
|
dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
|
72
|
-
dcicutils-8.8.1.
|
73
|
-
dcicutils-8.8.1.
|
74
|
-
dcicutils-8.8.1.
|
75
|
-
dcicutils-8.8.1.
|
76
|
-
dcicutils-8.8.1.
|
73
|
+
dcicutils-8.8.1.1b9.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
74
|
+
dcicutils-8.8.1.1b9.dist-info/METADATA,sha256=PpSJ-JtZqnTWFk4eeZbU3RnCfRXko6sCYafK2wtmFW0,3356
|
75
|
+
dcicutils-8.8.1.1b9.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
76
|
+
dcicutils-8.8.1.1b9.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
77
|
+
dcicutils-8.8.1.1b9.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|