dcicutils 8.17.0.1b1__py3-none-any.whl → 8.17.0.1b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/structured_data.py +5 -3
- dcicutils/submitr/config/custom_column_mappings.json +79 -0
- dcicutils/submitr/custom_excel.py +290 -0
- {dcicutils-8.17.0.1b1.dist-info → dcicutils-8.17.0.1b3.dist-info}/METADATA +1 -1
- {dcicutils-8.17.0.1b1.dist-info → dcicutils-8.17.0.1b3.dist-info}/RECORD +8 -6
- {dcicutils-8.17.0.1b1.dist-info → dcicutils-8.17.0.1b3.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.17.0.1b1.dist-info → dcicutils-8.17.0.1b3.dist-info}/WHEEL +0 -0
- {dcicutils-8.17.0.1b1.dist-info → dcicutils-8.17.0.1b3.dist-info}/entry_points.txt +0 -0
dcicutils/structured_data.py
CHANGED
@@ -105,11 +105,13 @@ class StructuredDataSet:
|
|
105
105
|
ref_lookup_strategy: Optional[Callable] = None,
|
106
106
|
ref_lookup_nocache: bool = False,
|
107
107
|
norefs: bool = False, merge: bool = False,
|
108
|
+
excel_class: Optional[Excel] = None,
|
108
109
|
progress: Optional[Callable] = None,
|
109
110
|
debug_sleep: Optional[str] = None) -> StructuredDataSet:
|
110
111
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
|
111
112
|
remove_empty_objects_from_lists=remove_empty_objects_from_lists,
|
112
113
|
ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
|
114
|
+
excel_class=excel_class,
|
113
115
|
norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
|
114
116
|
|
115
117
|
def validate(self, force: bool = False) -> None:
|
@@ -242,7 +244,7 @@ class StructuredDataSet:
|
|
242
244
|
for portal_object in self.data[type_name]:
|
243
245
|
portal_object = PortalObject(portal_object, portal=self.portal, type=type_name)
|
244
246
|
existing_object, identifying_path, nlookups = (
|
245
|
-
portal_object.lookup(raw=
|
247
|
+
portal_object.lookup(raw=True, ref_lookup_strategy=self._ref_lookup_strategy))
|
246
248
|
if existing_object:
|
247
249
|
object_diffs, nlookups_compare = portal_object.compare(
|
248
250
|
existing_object, consider_refs=True, resolved_refs=refs)
|
@@ -459,7 +461,7 @@ class StructuredDataSet:
|
|
459
461
|
just returns the given object. Note that the given object may be CHANGED in place.
|
460
462
|
"""
|
461
463
|
for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
|
462
|
-
if existing_portal_object := self._portal.get_metadata(identifying_path, raw=
|
464
|
+
if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
|
463
465
|
return merge_objects(existing_portal_object, portal_object, primitive_lists=True)
|
464
466
|
return portal_object
|
465
467
|
|
@@ -981,7 +983,7 @@ class Portal(PortalBase):
|
|
981
983
|
|
982
984
|
def ref_lookup_uncached(self, object_name: str) -> Optional[dict]:
|
983
985
|
try:
|
984
|
-
result = super().get_metadata(object_name, raw=
|
986
|
+
result = super().get_metadata(object_name, raw=True)
|
985
987
|
self._ref_lookup_found_count += 1
|
986
988
|
return result
|
987
989
|
except Exception as e:
|
@@ -0,0 +1,79 @@
|
|
1
|
+
{
|
2
|
+
"version": "1.0.0",
|
3
|
+
"sheet_mappings": {
|
4
|
+
"ExternalQualityMetric": "external_quality_metric",
|
5
|
+
"AnotherTypeForCustomColumnMappingsHere": "another_custom_column_mappings_here"
|
6
|
+
},
|
7
|
+
"column_mappings": {
|
8
|
+
"external_quality_metric": {
|
9
|
+
"total_raw_reads_sequenced": {
|
10
|
+
"qc_values#.derived_from": "{name}",
|
11
|
+
"qc_values#.value": "{value:integer}",
|
12
|
+
"qc_values#.key": "Total Raw Reads Sequenced",
|
13
|
+
"qc_values#.tooltip": "# of reads (150bp)"
|
14
|
+
},
|
15
|
+
"total_raw_bases_sequenced": {
|
16
|
+
"qc_values#.derived_from": "{name}",
|
17
|
+
"qc_values#.value": "{value:integer}",
|
18
|
+
"qc_values#.key": "Total Raw Bases Sequenced",
|
19
|
+
"qc_values#.tooltip": null
|
20
|
+
},
|
21
|
+
"prefiltering_number_of_consensus_molecules": {
|
22
|
+
"qc_values#.derived_from": "{name}",
|
23
|
+
"qc_values#.value": "{value:integer}",
|
24
|
+
"qc_values#.key": "Pre-filtering # of Consensus Molecules",
|
25
|
+
"qc_values#.tooltip": "Number of DNA molecules identified"
|
26
|
+
},
|
27
|
+
"prefiltering_genome_coverage": {
|
28
|
+
"qc_values#.derived_from": "{name}",
|
29
|
+
"qc_values#.value": "{value:float}",
|
30
|
+
"qc_values#.key": "Pre-filtering Genome Coverage",
|
31
|
+
"qc_values#.tooltip": null
|
32
|
+
},
|
33
|
+
"prefiltering_number_of_reads_per_consensus_molecule": {
|
34
|
+
"qc_values#.derived_from": "{name}",
|
35
|
+
"qc_values#.value": "{value:integer}",
|
36
|
+
"qc_values#.key": "Pre-filtering Number of Reads per Consensus Molecule",
|
37
|
+
"qc_values#.tooltip": null
|
38
|
+
},
|
39
|
+
"postfiltering_number_of_consensus_molecules": {
|
40
|
+
"qc_values#.derived_from": "{name}",
|
41
|
+
"qc_values#.value": "{value:integer}",
|
42
|
+
"qc_values#.key": "Post-filtering Number of Consensus Molecules",
|
43
|
+
"qc_values#.tooltip": null
|
44
|
+
},
|
45
|
+
"fraction_prefiltering_molecules_passing_filters": {
|
46
|
+
"qc_values#.derived_from": "{name}",
|
47
|
+
"qc_values#.value": "{value:float}",
|
48
|
+
"qc_values#.key": "Fraction of Pre-filtering Consensus Molecules that Pass Filters",
|
49
|
+
"qc_values#.tooltip": null
|
50
|
+
},
|
51
|
+
"number_postfiltering_consensus_base_pairs": {
|
52
|
+
"qc_values#.derived_from": "{name}",
|
53
|
+
"qc_values#.value": "{value:integer}",
|
54
|
+
"qc_values#.key": "Number of Final Post-filtering Consensus Interrogated Base Pairs",
|
55
|
+
"qc_values#.tooltip": "After applying all filters for variant calling, e.e. Mapping quality, Low complexity regions, a4s2 duplex reconstruction criteria, etc."
|
56
|
+
},
|
57
|
+
"somatic_snv_count_by_molecule": {
|
58
|
+
"qc_values#.derived_from": "{name}",
|
59
|
+
"qc_values#.value": "{value:integer}",
|
60
|
+
"qc_values#.key": "Somatic SNV Count by Molecule",
|
61
|
+
"qc_values#.tooltip": null
|
62
|
+
},
|
63
|
+
"snv_mutation_burden_by_molecule": {
|
64
|
+
"qc_values#.derived_from": "{name}",
|
65
|
+
"qc_values#.value": "{value:float}",
|
66
|
+
"qc_values#.key": "Somatic SNV Mutation Burden by Molecule",
|
67
|
+
"qc_values#.tooltip": "Detected somatic mutation / final consensus interrogated base pairs"
|
68
|
+
}
|
69
|
+
},
|
70
|
+
"another_custom_column_mappings_here": {
|
71
|
+
"your_custom_column_name": {
|
72
|
+
"qc_values#.derived_from": "{name}",
|
73
|
+
"qc_values#.value": "{value}",
|
74
|
+
"qc_values#.key": "Your key for this column mapping",
|
75
|
+
"qc_values#.tooltip": "Your tooltip text for this column mapping"
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|
@@ -0,0 +1,290 @@
|
|
1
|
+
from copy import deepcopy
|
2
|
+
import io
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from requests import get as requests_get
|
6
|
+
from typing import Any, List, Optional
|
7
|
+
from dcicutils.data_readers import Excel, ExcelSheetReader
|
8
|
+
from dcicutils.misc_utils import to_boolean, to_float, to_integer
|
9
|
+
|
10
|
+
# This module implements a custom Excel spreadsheet class which support "custom column mappings",
|
11
|
+
# meaning that, and a very low/early level in processing, the columns/values in the spreadsheet
|
12
|
+
# can be redefined/remapped to different columns/values. The mapping is defined by a JSON config
|
13
|
+
# file (by default in config/custom_column_mappings.json). It can be thought of as a virtual
|
14
|
+
# preprocessing step on the spreadsheet. This was first implemented to support the simplified QC
|
15
|
+
# columns/values. For EXAMPLE, so the spreadsheet author can specify single columns like this:
|
16
|
+
#
|
17
|
+
# total_raw_reads_sequenced: 11870183
|
18
|
+
# total_raw_bases_sequenced: 44928835584
|
19
|
+
#
|
20
|
+
# But this will be mapped, i.e the system will act AS-IF we instead had these columns/values:
|
21
|
+
#
|
22
|
+
# qc_values#0.derived_from: total_raw_reads_sequenced
|
23
|
+
# qc_values#0.value: 11870183
|
24
|
+
# qc_values#0.key: Total Raw Reads Sequenced
|
25
|
+
# qc_values#0.tooltip: # of reads (150bp)
|
26
|
+
# qc_values#1.derived_from: total_raw_bases_sequenced
|
27
|
+
# qc_values#1.value: 44928835584
|
28
|
+
# qc_values#1.key: Total Raw Bases Sequenced
|
29
|
+
# qc_values#1.tooltip: None
|
30
|
+
#
|
31
|
+
# The relevant portion of the controlling config file (config/custom_column_mappings.json)
|
32
|
+
# for the above example looks something like this:
|
33
|
+
#
|
34
|
+
# "sheet_mappings": {
|
35
|
+
# "ExternalQualityMetric": "external_quality_metric"
|
36
|
+
# },
|
37
|
+
# "column_mappings": {
|
38
|
+
# "external_quality_metric": {
|
39
|
+
# "total_raw_reads_sequenced": {
|
40
|
+
# "qc_values#.derived_from": "{name}",
|
41
|
+
# "qc_values#.value": "{value:integer}",
|
42
|
+
# "qc_values#.key": "Total Raw Reads Sequenced",
|
43
|
+
# "qc_values#.tooltip": "# of reads (150bp)"
|
44
|
+
# },
|
45
|
+
# "total_raw_bases_sequenced": {
|
46
|
+
# "qc_values#.derived_from": "{name}",
|
47
|
+
# "qc_values#.value": "{value:integer}",
|
48
|
+
# "qc_values#.key": "Total Raw Bases Sequenced",
|
49
|
+
# "qc_values#.tooltip": null
|
50
|
+
# },
|
51
|
+
# "et cetera": "..."
|
52
|
+
# }
|
53
|
+
# }
|
54
|
+
#
|
55
|
+
# This says that for the ExternalQualityMetric sheet (only) the mappings with the config file
|
56
|
+
# section column_mappings.external_quality_metric will be applied. The "qc_values#" portion of
|
57
|
+
# the mapped columns names will be expanded to "qc_values#0" for total_raw_reads_sequenced items,
|
58
|
+
# and to "qc_values#1" for the total_raw_bases_sequenced items, and so on. This will be based on
|
59
|
+
# the ACTUAL columns present in the sheet; so if total_raw_reads_sequenced were not present in
|
60
|
+
# the sheet, then the total_raw_bases_sequenced items would be expanded to "qc_values#0".
|
61
|
+
# Note the special "{name}" and "{value}" values ("macros") for the target (synthetic) properties;
|
62
|
+
# these will be evaluated (here) to the name of the original property name and value, respectively.
|
63
|
+
#
|
64
|
+
# Since the (first) actual use-case of this is in fact for these qc_values, and since these have
|
65
|
+
# effectively untyped values (i.e. the ExternalQualityMetric schema specifies all primitive types
|
66
|
+
# as possible/acceptable types for qc_values.value), we also allow a ":TYPE" suffix for the
|
67
|
+
# special "{value}" macro, so that a specific primitive type may be specified, e.g. "{value:integer}"
|
68
|
+
# will evaluate the original property value as an integer (if it cannot be converted to an integer
|
69
|
+
# then whatever its value is, will be passed on through as a string).
|
70
|
+
#
|
71
|
+
# The hook for this is to pass the CustomExcel type to StructuredDataSet in submission.py.
|
72
|
+
# Note that the config file is fetched from GitHub, with a fallback to config/custom_column_mappings.json.
|
73
|
+
#
|
74
|
+
# ALSO ...
|
75
|
+
# This CustomExcel class also handles multiple sheets within a spreadsheet representing
|
76
|
+
# the same (portal) type; see comments below near the ExcelSheetName class definition.
|
77
|
+
|
78
|
+
CUSTOM_COLUMN_MAPPINGS_BASE_URL = "https://raw.githubusercontent.com/smaht-dac/submitr/refs/heads"
|
79
|
+
CUSTOM_COLUMN_MAPPINGS_BRANCH = "dmichaels-custom-column-mappings-20250115"
|
80
|
+
CUSTOM_COLUMN_MAPPINGS_PATH = "submitr/config/custom_column_mappings.json"
|
81
|
+
CUSTOM_COLUMN_MAPPINGS_URL = f"{CUSTOM_COLUMN_MAPPINGS_BASE_URL}/{CUSTOM_COLUMN_MAPPINGS_BRANCH}/{CUSTOM_COLUMN_MAPPINGS_PATH}" # noqa
|
82
|
+
CUSTOM_COLUMN_MAPPINGS_LOCAL = False
|
83
|
+
|
84
|
+
COLUMN_NAME_ARRAY_SUFFIX_CHAR = "#"
|
85
|
+
COLUMN_NAME_SEPARATOR = "."
|
86
|
+
|
87
|
+
|
88
|
+
class CustomExcel(Excel):
|
89
|
+
|
90
|
+
def sheet_reader(self, sheet_name: str) -> ExcelSheetReader:
|
91
|
+
return CustomExcelSheetReader(self, sheet_name=sheet_name, workbook=self._workbook,
|
92
|
+
custom_column_mappings=CustomExcel._get_custom_column_mappings())
|
93
|
+
|
94
|
+
def effective_sheet_name(self, sheet_name: str) -> str:
|
95
|
+
if (underscore := sheet_name.find("_")) > 1:
|
96
|
+
return sheet_name[underscore + 1:]
|
97
|
+
return sheet_name
|
98
|
+
|
99
|
+
@staticmethod
|
100
|
+
def _get_custom_column_mappings() -> Optional[dict]:
|
101
|
+
|
102
|
+
def fetch_custom_column_mappings():
|
103
|
+
custom_column_mappings = None
|
104
|
+
if CUSTOM_COLUMN_MAPPINGS_LOCAL is not True:
|
105
|
+
# Fetch config file directly from GitHub (yes this repo is public).
|
106
|
+
try:
|
107
|
+
custom_column_mappings = requests_get(CUSTOM_COLUMN_MAPPINGS_URL).json()
|
108
|
+
except Exception:
|
109
|
+
pass
|
110
|
+
if not custom_column_mappings:
|
111
|
+
# Fallback to the actual config file in this package.
|
112
|
+
try:
|
113
|
+
file = os.path.join(os.path.dirname(__file__), "config", "custom_column_mappings.json")
|
114
|
+
with io.open(file, "r") as f:
|
115
|
+
custom_column_mappings = json.load(f)
|
116
|
+
except Exception:
|
117
|
+
custom_column_mappings = None
|
118
|
+
if not isinstance(custom_column_mappings, dict):
|
119
|
+
custom_column_mappings = {}
|
120
|
+
return custom_column_mappings
|
121
|
+
|
122
|
+
def post_process_custom_column_mappings(custom_column_mappings: dict) -> Optional[dict]:
|
123
|
+
if isinstance(column_mappings := custom_column_mappings.get("column_mappings"), dict):
|
124
|
+
if isinstance(sheet_mappings := custom_column_mappings.get("sheet_mappings"), dict):
|
125
|
+
for sheet_name in list(sheet_mappings.keys()):
|
126
|
+
if isinstance(sheet_mappings[sheet_name], str):
|
127
|
+
if isinstance(column_mappings.get(sheet_mappings[sheet_name]), dict):
|
128
|
+
sheet_mappings[sheet_name] = column_mappings.get(sheet_mappings[sheet_name])
|
129
|
+
else:
|
130
|
+
del sheet_mappings[sheet_name]
|
131
|
+
elif not isinstance(sheet_mappings[sheet_name], dict):
|
132
|
+
del sheet_mappings[sheet_name]
|
133
|
+
return sheet_mappings
|
134
|
+
return None
|
135
|
+
|
136
|
+
if not (custom_column_mappings := fetch_custom_column_mappings()):
|
137
|
+
return None
|
138
|
+
if not (custom_column_mappings := post_process_custom_column_mappings(custom_column_mappings)):
|
139
|
+
return None
|
140
|
+
return custom_column_mappings
|
141
|
+
|
142
|
+
|
143
|
+
class CustomExcelSheetReader(ExcelSheetReader):
|
144
|
+
|
145
|
+
def __init__(self, *args, **kwargs) -> None:
|
146
|
+
ARGUMENT_NAME_SHEET_NAME = "sheet_name"
|
147
|
+
ARGUMENT_NAME_CUSTOM_COLUMN_MAPPINGS = "custom_column_mappings"
|
148
|
+
self._custom_column_mappings = None
|
149
|
+
if ARGUMENT_NAME_CUSTOM_COLUMN_MAPPINGS in kwargs:
|
150
|
+
custom_column_mappings = kwargs[ARGUMENT_NAME_CUSTOM_COLUMN_MAPPINGS]
|
151
|
+
del kwargs[ARGUMENT_NAME_CUSTOM_COLUMN_MAPPINGS]
|
152
|
+
if not (isinstance(custom_column_mappings, dict) and
|
153
|
+
isinstance(sheet_name := kwargs.get(ARGUMENT_NAME_SHEET_NAME, None), str) and
|
154
|
+
isinstance(custom_column_mappings := custom_column_mappings.get(sheet_name), dict)):
|
155
|
+
custom_column_mappings = None
|
156
|
+
self._custom_column_mappings = custom_column_mappings
|
157
|
+
super().__init__(*args, **kwargs)
|
158
|
+
|
159
|
+
def _define_header(self, header: List[Optional[Any]]) -> None:
|
160
|
+
|
161
|
+
def fixup_custom_column_mappings(custom_column_mappings: dict, actual_column_names: List[str]) -> dict:
|
162
|
+
|
163
|
+
# This fixes up the custom column mappings config for this particular sheet based
|
164
|
+
# on the actual (header) column names, i.e. e.g. in particular for the array
|
165
|
+
# specifiers like mapping "qc_values#.value" to qc_values#0.value".
|
166
|
+
|
167
|
+
def fixup_custom_array_column_mappings(custom_column_mappings: dict) -> None:
|
168
|
+
|
169
|
+
def get_simple_array_column_name_component(column_name: str) -> Optional[str]:
|
170
|
+
if isinstance(column_name, str):
|
171
|
+
if column_name_components := column_name.split(COLUMN_NAME_SEPARATOR):
|
172
|
+
if (suffix := column_name_components[0].find(COLUMN_NAME_ARRAY_SUFFIX_CHAR)) > 0:
|
173
|
+
if (suffix + 1) == len(column_name_components[0]):
|
174
|
+
return column_name_components[0][:suffix]
|
175
|
+
return None
|
176
|
+
|
177
|
+
synthetic_array_column_names = {}
|
178
|
+
for column_name in custom_column_mappings:
|
179
|
+
for synthetic_column_name in list(custom_column_mappings[column_name].keys()):
|
180
|
+
synthetic_array_column_name = get_simple_array_column_name_component(synthetic_column_name)
|
181
|
+
if synthetic_array_column_name:
|
182
|
+
if synthetic_array_column_name not in synthetic_array_column_names:
|
183
|
+
synthetic_array_column_names[synthetic_array_column_name] = \
|
184
|
+
{"index": 0, "columns": [column_name]}
|
185
|
+
elif (column_name not in
|
186
|
+
synthetic_array_column_names[synthetic_array_column_name]["columns"]):
|
187
|
+
synthetic_array_column_names[synthetic_array_column_name]["index"] += 1
|
188
|
+
synthetic_array_column_names[synthetic_array_column_name]["columns"].append(column_name)
|
189
|
+
synthetic_array_column_index = \
|
190
|
+
synthetic_array_column_names[synthetic_array_column_name]["index"]
|
191
|
+
synthetic_array_column_name = synthetic_column_name.replace(
|
192
|
+
f"{synthetic_array_column_name}#",
|
193
|
+
f"{synthetic_array_column_name}#{synthetic_array_column_index}")
|
194
|
+
custom_column_mappings[column_name][synthetic_array_column_name] = \
|
195
|
+
custom_column_mappings[column_name][synthetic_column_name]
|
196
|
+
del custom_column_mappings[column_name][synthetic_column_name]
|
197
|
+
|
198
|
+
custom_column_mappings = deepcopy(custom_column_mappings)
|
199
|
+
for custom_column_name in list(custom_column_mappings.keys()):
|
200
|
+
if custom_column_name not in actual_column_names:
|
201
|
+
del custom_column_mappings[custom_column_name]
|
202
|
+
fixup_custom_array_column_mappings(custom_column_mappings)
|
203
|
+
return custom_column_mappings
|
204
|
+
|
205
|
+
super()._define_header(header)
|
206
|
+
if self._custom_column_mappings:
|
207
|
+
self._custom_column_mappings = fixup_custom_column_mappings(self._custom_column_mappings, self.header)
|
208
|
+
self._original_header = self.header
|
209
|
+
self.header = []
|
210
|
+
for column_name in header:
|
211
|
+
if column_name in self._custom_column_mappings:
|
212
|
+
synthetic_column_names = list(self._custom_column_mappings[column_name].keys())
|
213
|
+
self.header += synthetic_column_names
|
214
|
+
else:
|
215
|
+
self.header.append(column_name)
|
216
|
+
|
217
|
+
def _iter_header(self) -> List[str]:
|
218
|
+
if self._custom_column_mappings:
|
219
|
+
return self._original_header
|
220
|
+
return super()._iter_header()
|
221
|
+
|
222
|
+
def _iter_mapper(self, row: dict) -> List[str]:
|
223
|
+
if self._custom_column_mappings:
|
224
|
+
synthetic_columns = {}
|
225
|
+
columns_to_delete = []
|
226
|
+
for column_name in row:
|
227
|
+
if column_name in self._custom_column_mappings:
|
228
|
+
column_mapping = self._custom_column_mappings[column_name]
|
229
|
+
for synthetic_column_name in column_mapping:
|
230
|
+
synthetic_column_value = column_mapping[synthetic_column_name]
|
231
|
+
if synthetic_column_value == "{name}":
|
232
|
+
synthetic_columns[synthetic_column_name] = column_name
|
233
|
+
elif (column_value := self._parse_value_specifier(synthetic_column_value,
|
234
|
+
row[column_name])) is not None:
|
235
|
+
synthetic_columns[synthetic_column_name] = column_value
|
236
|
+
else:
|
237
|
+
synthetic_columns[synthetic_column_name] = synthetic_column_value
|
238
|
+
columns_to_delete.append(column_name)
|
239
|
+
if columns_to_delete:
|
240
|
+
for column_to_delete in columns_to_delete:
|
241
|
+
del row[column_to_delete]
|
242
|
+
if synthetic_columns:
|
243
|
+
row.update(synthetic_columns)
|
244
|
+
return row
|
245
|
+
|
246
|
+
@staticmethod
|
247
|
+
def _parse_value_specifier(value_specifier: Optional[Any], value: Optional[Any]) -> Optional[Any]:
|
248
|
+
if value is not None:
|
249
|
+
if isinstance(value_specifier, str) and (value_specifier := value_specifier.replace(" ", "")):
|
250
|
+
if value_specifier.startswith("{value"):
|
251
|
+
if (value_specifier[len(value_specifier) - 1] == "}"):
|
252
|
+
if len(value_specifier) == 7:
|
253
|
+
return str(value)
|
254
|
+
if value_specifier[6] == ":":
|
255
|
+
if (value_specifier := value_specifier[7:-1]) in ["int", "integer"]:
|
256
|
+
return to_integer(value, fallback=value,
|
257
|
+
allow_commas=True, allow_multiplier_suffix=True)
|
258
|
+
elif value_specifier in ["float", "number"]:
|
259
|
+
return to_float(value, fallback=value,
|
260
|
+
allow_commas=True, allow_multiplier_suffix=True)
|
261
|
+
elif value_specifier in ["bool", "boolean"]:
|
262
|
+
return to_boolean(value, fallback=value)
|
263
|
+
return str(value)
|
264
|
+
return None
|
265
|
+
|
266
|
+
|
267
|
+
# This ExcelSheetName class is used to represent an Excel sheet name; it is simply a str type with an
|
268
|
+
# additional "original" property. The value of this will be given string with any prefix preceeding an
|
269
|
+
# underscore removed; and the "original" property will evaluate to the original/given string. This is
|
270
|
+
# used to support the use of sheet names of the form "XYZ_TypeName", where "XYZ" is an arbitrary string
|
271
|
+
# and "TypeName" is the virtual name of the sheet, which will be used by StructuredDataSet/etc, and which
|
272
|
+
# represents the (portal) type of (the items/rows within the) sheet. The purpose of all this is to allow
|
273
|
+
# multiple sheets within a spreadsheet of the same (portal object) type; since sheet names must be unique,
|
274
|
+
# this would otherwise not be possible; this provides a way for a spreadsheet to partition items/rows of
|
275
|
+
# a particular fixed type across multiple sheets.
|
276
|
+
#
|
277
|
+
# If this requirement was known at the beginning (or if we had more foresight) we would not support this
|
278
|
+
# feature this way; we would build it in from the start; this mechanism here merely provides a hook for
|
279
|
+
# this feature with minimal disruption (the only real tricky part being to make sure the original sheet
|
280
|
+
# name is reported in error messages); doing is this was minimizes risk of disruption.
|
281
|
+
#
|
282
|
+
class ExcelSheetName(str):
|
283
|
+
def __new__(cls, value: str):
|
284
|
+
value = value if isinstance(value, str) else str(value)
|
285
|
+
original_value = value
|
286
|
+
if ((delimiter := value.find("_")) > 0) and (delimiter < len(value) - 1):
|
287
|
+
value = value[delimiter + 1:]
|
288
|
+
instance = super().__new__(cls, value)
|
289
|
+
setattr(instance, "original", original_value)
|
290
|
+
return instance
|
@@ -66,7 +66,9 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
|
|
66
66
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
67
67
|
dcicutils/snapshot_utils.py,sha256=YDeI3vD-MhAtHwKDzfEm2q-n3l-da2yRpRR3xp0Ah1M,23021
|
68
68
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
69
|
-
dcicutils/structured_data.py,sha256=
|
69
|
+
dcicutils/structured_data.py,sha256=9Uleh8AhRWW6qj4fDCUnpRBsLgxWHZVh8nJdMDSHmD0,67510
|
70
|
+
dcicutils/submitr/config/custom_column_mappings.json,sha256=GS6umTpNZszTao8zTbGiNbR8qFnL4kudMt3nBZXaom0,3881
|
71
|
+
dcicutils/submitr/custom_excel.py,sha256=Ssaz9uQgeeEiaqXlu-4u_4M3KSdNPACKZRDOsJN8Xdk,16431
|
70
72
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
71
73
|
dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
|
72
74
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
@@ -75,8 +77,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
|
75
77
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
76
78
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
77
79
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
78
|
-
dcicutils-8.17.0.
|
79
|
-
dcicutils-8.17.0.
|
80
|
-
dcicutils-8.17.0.
|
81
|
-
dcicutils-8.17.0.
|
82
|
-
dcicutils-8.17.0.
|
80
|
+
dcicutils-8.17.0.1b3.dist-info/LICENSE.txt,sha256=IrWHOFtMb2inTgr2ZqwQriRC8H3uMQfQVO6lniaaoE0,1102
|
81
|
+
dcicutils-8.17.0.1b3.dist-info/METADATA,sha256=MFUkIrTgfJdX225FjRGPZKq5V_Ad8cOUb6INkwPDxx0,3493
|
82
|
+
dcicutils-8.17.0.1b3.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
83
|
+
dcicutils-8.17.0.1b3.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
|
84
|
+
dcicutils-8.17.0.1b3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|