metameq 2026.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metameq/__init__.py +42 -0
- metameq/_version.py +21 -0
- metameq/config/__init__.py +0 -0
- metameq/config/config.yml +3 -0
- metameq/config/standards.yml +1648 -0
- metameq/src/__init__.py +0 -0
- metameq/src/__main__.py +34 -0
- metameq/src/metadata_configurator.py +512 -0
- metameq/src/metadata_extender.py +1168 -0
- metameq/src/metadata_merger.py +362 -0
- metameq/src/metadata_transformers.py +335 -0
- metameq/src/metadata_validator.py +387 -0
- metameq/src/util.py +299 -0
- metameq/tests/__init__.py +0 -0
- metameq/tests/data/invalid.yml +1 -0
- metameq/tests/data/test_config.yml +9 -0
- metameq/tests/test_metadata_configurator.py +2334 -0
- metameq/tests/test_metadata_extender.py +2610 -0
- metameq/tests/test_metadata_merger.py +657 -0
- metameq/tests/test_metadata_transformers.py +277 -0
- metameq/tests/test_metadata_validator.py +1191 -0
- metameq/tests/test_util.py +436 -0
- metameq-2026.1.1.dist-info/METADATA +21 -0
- metameq-2026.1.1.dist-info/RECORD +27 -0
- metameq-2026.1.1.dist-info/WHEEL +5 -0
- metameq-2026.1.1.dist-info/entry_points.txt +2 -0
- metameq-2026.1.1.dist-info/top_level.txt +1 -0
metameq/src/__init__.py
ADDED
|
File without changes
|
metameq/src/__main__.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from metameq import write_extended_metadata as _write_extended_metadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@click.group()
|
|
6
|
+
def root():
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@root.command("write-extended-metadata",
|
|
11
|
+
context_settings={'show_default': True})
|
|
12
|
+
@click.argument('metadata_file_path', type=click.Path(exists=True))
|
|
13
|
+
# help='path to the metadata file to be extended')
|
|
14
|
+
@click.argument('config_fp', type=click.Path(exists=True))
|
|
15
|
+
# help='path to the study-specific config yaml file')
|
|
16
|
+
@click.argument('name_base', type=str)
|
|
17
|
+
# help='base name for the output extended metadata file')
|
|
18
|
+
@click.option('--out_dir', default=".",
|
|
19
|
+
help='output directory for the extended metadata file')
|
|
20
|
+
@click.option('--sep', default="\t",
|
|
21
|
+
help='separator of input file (default is tab); '
|
|
22
|
+
'not applicable to excel files')
|
|
23
|
+
@click.option('--suppress_fails_files', is_flag=True,
|
|
24
|
+
help='suppress output of QC and validation error files if no'
|
|
25
|
+
'errors found. Default is to output empty files.')
|
|
26
|
+
def write_extended_metadata(metadata_file_path, config_fp,
|
|
27
|
+
out_dir, name_base, sep, suppress_fails_files):
|
|
28
|
+
_write_extended_metadata(
|
|
29
|
+
metadata_file_path, config_fp, out_dir, name_base,
|
|
30
|
+
sep, suppress_empty_fails=suppress_fails_files)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
if __name__ == '__main__':
|
|
34
|
+
root()
|
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
from typing import Dict, Optional, Any
|
|
2
|
+
from metameq.src.util import extract_config_dict, extract_stds_config, \
|
|
3
|
+
deepcopy_dict, \
|
|
4
|
+
METADATA_FIELDS_KEY, STUDY_SPECIFIC_METADATA_KEY, \
|
|
5
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY, \
|
|
6
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY, ALIAS_KEY, BASE_TYPE_KEY, \
|
|
7
|
+
DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def combine_stds_and_study_config(
|
|
11
|
+
study_config_dict: Dict[str, Any],
|
|
12
|
+
stds_fp: Optional[str] = None) \
|
|
13
|
+
-> Dict[str, Any]:
|
|
14
|
+
"""Combine standards and study-specific-configuration dictionaries.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
study_config_dict : Dict[str, Any]
|
|
19
|
+
Study-specific flat-host-type config dictionary.
|
|
20
|
+
stds_fp : Optional[str], default=None
|
|
21
|
+
Path to standards dictionary file.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
Dict[str, Any]
|
|
26
|
+
Nested-host-type config dictionary combining standards and study-specific info.
|
|
27
|
+
"""
|
|
28
|
+
stds_nested_dict = extract_stds_config(stds_fp)
|
|
29
|
+
study_flat_dict = study_config_dict.get(STUDY_SPECIFIC_METADATA_KEY, {})
|
|
30
|
+
combined_host_types_dict = _make_combined_stds_and_study_host_type_dicts(
|
|
31
|
+
study_flat_dict, stds_nested_dict)
|
|
32
|
+
|
|
33
|
+
stds_plus_study_nested_dict = {
|
|
34
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: combined_host_types_dict}
|
|
35
|
+
return stds_plus_study_nested_dict
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def flatten_nested_stds_dict(
|
|
39
|
+
parent_stds_nested_dict: Dict[str, Any],
|
|
40
|
+
parent_flattened_host_dict: Optional[Dict[str, Any]] = None) \
|
|
41
|
+
-> Dict[str, Any]:
|
|
42
|
+
"""Flatten a nested standards dictionary into a flat structure.
|
|
43
|
+
|
|
44
|
+
Note: this method is called recursively.
|
|
45
|
+
At each level, this method adds info from the host types dictionary for the
|
|
46
|
+
previous host level's standards nested dictionary (arg 1) into a copy of a growing
|
|
47
|
+
flat-and-complete hosts dictionary for the previous level (arg 2). The result is a
|
|
48
|
+
flat hosts dictionary that (a) contains all hosts and (b) has complete metadata
|
|
49
|
+
definitions for each host.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
parent_stds_nested_dict : Dict[str, Any]
|
|
54
|
+
Parent (previous host)-level standards nested dictionary.
|
|
55
|
+
parent_flattened_host_dict : Optional[Dict[str, Any]], default=None
|
|
56
|
+
Parent (previous host)-level flattened host dictionary. If None, a new empty dictionary
|
|
57
|
+
will be created.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
Dict[str, Any]
|
|
62
|
+
Flattened dictionary containing all host types and their complete metadata definitions.
|
|
63
|
+
"""
|
|
64
|
+
# if this is the top-level call, set flat parent to new dict.
|
|
65
|
+
# this is what we will be copying to add *TO*
|
|
66
|
+
if parent_flattened_host_dict is None:
|
|
67
|
+
parent_flattened_host_dict = {}
|
|
68
|
+
|
|
69
|
+
parent_stds_host_types_dict = \
|
|
70
|
+
parent_stds_nested_dict.get(HOST_TYPE_SPECIFIC_METADATA_KEY, {})
|
|
71
|
+
# define the output dictionary as empty. This will be overwritten if there
|
|
72
|
+
# are any hosts at this level.
|
|
73
|
+
wip_host_types_dict = {}
|
|
74
|
+
|
|
75
|
+
# loop over the host types at this level in parent_stds_nested_dict;
|
|
76
|
+
# these are what we will be adding *FROM*
|
|
77
|
+
for curr_host_type, curr_host_type_stds_nested_dict \
|
|
78
|
+
in parent_stds_host_types_dict.items():
|
|
79
|
+
|
|
80
|
+
curr_host_type_wip_flat_dict = \
|
|
81
|
+
_combine_base_and_added_host_type(
|
|
82
|
+
parent_flattened_host_dict,
|
|
83
|
+
curr_host_type_stds_nested_dict)
|
|
84
|
+
|
|
85
|
+
# recurse into the next level--depth first search.
|
|
86
|
+
# if this comes back empty, we ignore it.
|
|
87
|
+
curr_host_type_sub_host_dict = flatten_nested_stds_dict(
|
|
88
|
+
curr_host_type_stds_nested_dict, curr_host_type_wip_flat_dict)
|
|
89
|
+
if curr_host_type_sub_host_dict:
|
|
90
|
+
wip_host_types_dict.update(curr_host_type_sub_host_dict)
|
|
91
|
+
|
|
92
|
+
# assign the flattened wip dict for the current host type to the result
|
|
93
|
+
# (which now contains flat records for the hosts lower down than
|
|
94
|
+
# this, if there are any)
|
|
95
|
+
wip_host_types_dict[curr_host_type] = \
|
|
96
|
+
curr_host_type_wip_flat_dict
|
|
97
|
+
# next host type
|
|
98
|
+
|
|
99
|
+
return wip_host_types_dict
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# TODO: Rewrite so this doesn't BOTH modify the wip in place AND return a pointer to it.
|
|
103
|
+
# The fact that it returns a dictionary makes it unclear that this returned value is not a copy
|
|
104
|
+
# but is in fact the same dictionary as the one passed in, now with modifications.
|
|
105
|
+
# This is confusing and error-prone.
|
|
106
|
+
def update_wip_metadata_dict(
|
|
107
|
+
wip_metadata_fields_dict: Dict[str, Any],
|
|
108
|
+
add_metadata_fields_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
109
|
+
"""Update work-in-progress metadata dictionary *in place* with additional metadata dictionary.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
wip_metadata_fields_dict : Dict[str, Any]
|
|
114
|
+
Current work-in-progress metadata fields dictionary.
|
|
115
|
+
add_metadata_fields_dict : Dict[str, Any]
|
|
116
|
+
Metadata fields dictionary to incorporate.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
Dict[str, Any]
|
|
121
|
+
(Pointer to) updated work-in-progress metadata fields dictionary.
|
|
122
|
+
"""
|
|
123
|
+
for curr_add_metadata_field, curr_add_metadata_field_dict in add_metadata_fields_dict.items():
|
|
124
|
+
if curr_add_metadata_field not in wip_metadata_fields_dict:
|
|
125
|
+
wip_metadata_fields_dict[curr_add_metadata_field] = {}
|
|
126
|
+
|
|
127
|
+
if ALLOWED_KEY in curr_add_metadata_field_dict:
|
|
128
|
+
# remove the ANYOF_KEY from curr_wip_metadata_fields_dict[curr_metadata_field] if it exists there
|
|
129
|
+
if ANYOF_KEY in wip_metadata_fields_dict[curr_add_metadata_field]:
|
|
130
|
+
del wip_metadata_fields_dict[curr_add_metadata_field][ANYOF_KEY]
|
|
131
|
+
|
|
132
|
+
if ANYOF_KEY in curr_add_metadata_field_dict:
|
|
133
|
+
# remove the ALLOWED_KEY from curr_wip_metadata_fields_dict[curr_metadata_field] if it exists there
|
|
134
|
+
if ALLOWED_KEY in wip_metadata_fields_dict[curr_add_metadata_field]:
|
|
135
|
+
del wip_metadata_fields_dict[curr_add_metadata_field][ALLOWED_KEY]
|
|
136
|
+
|
|
137
|
+
# remove the TYPE_KEY from curr_wip_metadata_fields_dict[curr_metadata_field] if it exists there
|
|
138
|
+
if TYPE_KEY in wip_metadata_fields_dict[curr_add_metadata_field]:
|
|
139
|
+
del wip_metadata_fields_dict[curr_add_metadata_field][TYPE_KEY]
|
|
140
|
+
|
|
141
|
+
# TODO: Q: is it possible to have a list of allowed with a default
|
|
142
|
+
# at high level, then lower down have a list of allowed WITHOUT
|
|
143
|
+
# a default? If so, how do we handle that?
|
|
144
|
+
|
|
145
|
+
# update curr_wip_metadata_fields_dict[curr_metadata_field] with curr_add_metadata_field_dict
|
|
146
|
+
wip_metadata_fields_dict[curr_add_metadata_field].update(curr_add_metadata_field_dict)
|
|
147
|
+
# next metadata field
|
|
148
|
+
|
|
149
|
+
return wip_metadata_fields_dict
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _make_combined_stds_and_study_host_type_dicts(
|
|
153
|
+
flat_study_dict: Dict[str, Any],
|
|
154
|
+
parent_host_stds_nested_dict: Dict[str, Any]) \
|
|
155
|
+
-> Dict[str, Any]:
|
|
156
|
+
"""Combine standards and study-specific host type dictionaries.
|
|
157
|
+
|
|
158
|
+
At each level, this method adds info from a static, flat study-specific
|
|
159
|
+
hosts dictionary (the same at every level; arg 1) into a copy of the host
|
|
160
|
+
types dictionary for the previous host level's standards nested dictionary (arg 2).
|
|
161
|
+
(Note that the flat study-specific hosts dictionary is NOT expected
|
|
162
|
+
to (a) contains all hosts nor to (b) have complete metadata definitions for
|
|
163
|
+
each host.) The result is an augmented nested hosts dictionary.
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
flat_study_dict : Dict[str, Any]
|
|
168
|
+
Flat study-specific dictionary. Note that this is the same at every level
|
|
169
|
+
and is NOT the full study-specific config dictionary,
|
|
170
|
+
only the contents of the STUDY_SPECIFIC_METADATA_KEY section thereof.
|
|
171
|
+
parent_host_stds_nested_dict : Dict[str, Any]
|
|
172
|
+
Parent (previous host)-level standards nested dictionary.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
Dict[str, Any]
|
|
177
|
+
Nested dictionary combining standards and study-specific metadata definitions.
|
|
178
|
+
"""
|
|
179
|
+
# get all the host type dicts for the study (these are flat);
|
|
180
|
+
# these are what we will be adding *FROM*
|
|
181
|
+
study_host_types_dict = flat_study_dict.get(
|
|
182
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY, {})
|
|
183
|
+
|
|
184
|
+
parent_stds_host_types_dict = \
|
|
185
|
+
parent_host_stds_nested_dict.get(HOST_TYPE_SPECIFIC_METADATA_KEY, {})
|
|
186
|
+
# define the output dictionary as a copy of the parent-level standard.
|
|
187
|
+
# This will be augmented if there are any hosts at this level.
|
|
188
|
+
wip_host_types_dict = \
|
|
189
|
+
deepcopy_dict(parent_stds_host_types_dict)
|
|
190
|
+
|
|
191
|
+
# loop over the host types at this level in parent_stds_nested_dict;
|
|
192
|
+
# these are what we will be copying to add *TO*
|
|
193
|
+
for curr_host_type, curr_host_type_stds_nested_dict \
|
|
194
|
+
in parent_stds_host_types_dict.items():
|
|
195
|
+
|
|
196
|
+
# only need to do work at this level if curr host type is in study dict
|
|
197
|
+
# since otherwise the wip dict is an unchanged copy of the stds dict
|
|
198
|
+
if curr_host_type not in study_host_types_dict:
|
|
199
|
+
# make a copy of the stds for the current host type to add info to
|
|
200
|
+
curr_host_type_wip_nested_dict = \
|
|
201
|
+
deepcopy_dict(curr_host_type_stds_nested_dict)
|
|
202
|
+
else:
|
|
203
|
+
curr_host_type_wip_nested_dict = \
|
|
204
|
+
_combine_base_and_added_host_type(
|
|
205
|
+
curr_host_type_stds_nested_dict,
|
|
206
|
+
study_host_types_dict[curr_host_type])
|
|
207
|
+
# endif the host type isn't/is in the study dict
|
|
208
|
+
|
|
209
|
+
# recurse into the next level--depth first search.
|
|
210
|
+
# if this comes back empty, we ignore it.
|
|
211
|
+
curr_host_type_sub_host_dict = \
|
|
212
|
+
_make_combined_stds_and_study_host_type_dicts(
|
|
213
|
+
flat_study_dict,
|
|
214
|
+
curr_host_type_stds_nested_dict)
|
|
215
|
+
if curr_host_type_sub_host_dict:
|
|
216
|
+
curr_host_type_wip_nested_dict[HOST_TYPE_SPECIFIC_METADATA_KEY] = \
|
|
217
|
+
curr_host_type_sub_host_dict
|
|
218
|
+
|
|
219
|
+
# assign the nested wip dict for the current host type to the result
|
|
220
|
+
# (which now contains nested records for the hosts lower down than
|
|
221
|
+
# this, if there are any)
|
|
222
|
+
wip_host_types_dict[curr_host_type] = \
|
|
223
|
+
curr_host_type_wip_nested_dict
|
|
224
|
+
# next host type in wip dict
|
|
225
|
+
|
|
226
|
+
return wip_host_types_dict
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _combine_base_and_added_host_type(
|
|
230
|
+
host_type_base_dict: Dict[str, Any],
|
|
231
|
+
host_type_add_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
232
|
+
"""Combine base and additional host type configurations.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
host_type_base_dict : Dict[str, Any]
|
|
237
|
+
Base host type configuration dictionary.
|
|
238
|
+
host_type_add_dict : Dict[str, Any]
|
|
239
|
+
Additional host type configuration to incorporate.
|
|
240
|
+
|
|
241
|
+
Returns
|
|
242
|
+
-------
|
|
243
|
+
Dict[str, Any]
|
|
244
|
+
Combined host type configuration dictionary.
|
|
245
|
+
"""
|
|
246
|
+
# make a copy of the base for the current host type to add info to
|
|
247
|
+
host_type_wip_nested_dict = \
|
|
248
|
+
deepcopy_dict(host_type_base_dict)
|
|
249
|
+
|
|
250
|
+
# look for a default key in the add dict for this host; if
|
|
251
|
+
# it exists, add it to the wip dict (ok to overwrite existing)
|
|
252
|
+
if DEFAULT_KEY in host_type_add_dict:
|
|
253
|
+
host_type_wip_nested_dict[DEFAULT_KEY] = \
|
|
254
|
+
host_type_add_dict.get(DEFAULT_KEY)
|
|
255
|
+
|
|
256
|
+
# combine add metadata fields with the wip metadata fields
|
|
257
|
+
# for the current host type and assign to wip if not empty
|
|
258
|
+
host_type_wip_metadata_fields_dict = \
|
|
259
|
+
_combine_base_and_added_metadata_fields(
|
|
260
|
+
host_type_base_dict,
|
|
261
|
+
host_type_add_dict)
|
|
262
|
+
if host_type_wip_metadata_fields_dict:
|
|
263
|
+
host_type_wip_nested_dict[METADATA_FIELDS_KEY] = \
|
|
264
|
+
host_type_wip_metadata_fields_dict
|
|
265
|
+
# endif the host type combination is not empty
|
|
266
|
+
|
|
267
|
+
# combine any sample-type specific entries within the current host
|
|
268
|
+
# type and assign to wip if not empty
|
|
269
|
+
curr_host_wip_sample_types_dict = \
|
|
270
|
+
_combine_base_and_added_sample_type_specific_metadata(
|
|
271
|
+
host_type_wip_nested_dict,
|
|
272
|
+
host_type_add_dict)
|
|
273
|
+
# if we got back a non-empty dictionary of sample types,
|
|
274
|
+
# add it to the wip for this host type dict
|
|
275
|
+
if curr_host_wip_sample_types_dict:
|
|
276
|
+
host_type_wip_nested_dict[
|
|
277
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY] = \
|
|
278
|
+
curr_host_wip_sample_types_dict
|
|
279
|
+
# endif the sample types dictionary is not empty
|
|
280
|
+
|
|
281
|
+
return host_type_wip_nested_dict
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _combine_base_and_added_metadata_fields(
|
|
285
|
+
host_type_base_dict: Dict[str, Any],
|
|
286
|
+
host_type_add_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
287
|
+
"""Combine just the metadata fields from base and additional host type dictionaries.
|
|
288
|
+
|
|
289
|
+
Parameters
|
|
290
|
+
----------
|
|
291
|
+
host_type_base_dict : Dict[str, Any]
|
|
292
|
+
Base host type configuration dictionary.
|
|
293
|
+
host_type_add_dict : Dict[str, Any]
|
|
294
|
+
Additional configuration to incorporate.
|
|
295
|
+
|
|
296
|
+
Returns
|
|
297
|
+
-------
|
|
298
|
+
Dict[str, Any]
|
|
299
|
+
Combined metadata fields dictionary.
|
|
300
|
+
"""
|
|
301
|
+
# copy the metadata fields from the base to make the wip metadata fields
|
|
302
|
+
host_type_wip_metadata_fields_dict = deepcopy_dict(
|
|
303
|
+
host_type_base_dict.get(METADATA_FIELDS_KEY, {}))
|
|
304
|
+
|
|
305
|
+
# update the wip with the add metadata fields
|
|
306
|
+
host_type_add_metadata_fields_dict = \
|
|
307
|
+
host_type_add_dict.get(METADATA_FIELDS_KEY, {})
|
|
308
|
+
host_type_wip_metadata_fields_dict = \
|
|
309
|
+
update_wip_metadata_dict(
|
|
310
|
+
host_type_wip_metadata_fields_dict,
|
|
311
|
+
host_type_add_metadata_fields_dict)
|
|
312
|
+
|
|
313
|
+
return host_type_wip_metadata_fields_dict
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _combine_base_and_added_sample_type_specific_metadata(
|
|
317
|
+
host_type_base_dict: Dict[str, Any],
|
|
318
|
+
host_type_add_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
319
|
+
"""Combine just sample type specific metadata from base and additional host type dictionaries.
|
|
320
|
+
|
|
321
|
+
Parameters
|
|
322
|
+
----------
|
|
323
|
+
host_type_base_dict : Dict[str, Any]
|
|
324
|
+
Base host type configuration dictionary.
|
|
325
|
+
host_type_add_dict : Dict[str, Any]
|
|
326
|
+
Additional configuration to incorporate.
|
|
327
|
+
|
|
328
|
+
Returns
|
|
329
|
+
-------
|
|
330
|
+
Dict[str, Any]
|
|
331
|
+
Combined sample type specific metadata dictionary.
|
|
332
|
+
|
|
333
|
+
Raises
|
|
334
|
+
------
|
|
335
|
+
ValueError
|
|
336
|
+
If sample type has both alias and metadata fields, or both alias and base type.
|
|
337
|
+
"""
|
|
338
|
+
# copy the dictionary of sample types from the base to make the wip dict
|
|
339
|
+
curr_host_wip_sample_types_dict = deepcopy_dict(
|
|
340
|
+
host_type_base_dict.get(
|
|
341
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY, {}))
|
|
342
|
+
|
|
343
|
+
# loop over the sample types in the add dict
|
|
344
|
+
curr_host_add_sample_types_dict = \
|
|
345
|
+
host_type_add_dict.get(
|
|
346
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY, {})
|
|
347
|
+
for curr_sample_type, curr_sample_type_add_dict \
|
|
348
|
+
in curr_host_add_sample_types_dict.items():
|
|
349
|
+
|
|
350
|
+
curr_sample_type_wip_dict = deepcopy_dict(
|
|
351
|
+
curr_host_wip_sample_types_dict.get(curr_sample_type, {}))
|
|
352
|
+
|
|
353
|
+
curr_sample_type_add_def_type = \
|
|
354
|
+
_id_sample_type_definition(
|
|
355
|
+
curr_sample_type, curr_sample_type_add_dict)
|
|
356
|
+
curr_sample_type_wip_def_type = None
|
|
357
|
+
if curr_sample_type in curr_host_wip_sample_types_dict:
|
|
358
|
+
curr_sample_type_wip_def_type = \
|
|
359
|
+
_id_sample_type_definition(
|
|
360
|
+
curr_sample_type,
|
|
361
|
+
curr_sample_type_wip_dict)
|
|
362
|
+
# end if sample type is in wip
|
|
363
|
+
|
|
364
|
+
# if the sample type is already in the wip, and it has metadata fields,
|
|
365
|
+
# and it has metadata fields in the add dict, combine metadata fields
|
|
366
|
+
if curr_sample_type_wip_def_type == METADATA_FIELDS_KEY \
|
|
367
|
+
and curr_sample_type_add_def_type == METADATA_FIELDS_KEY:
|
|
368
|
+
|
|
369
|
+
# first, add all non-metadata fields from the add dict to the wip;
|
|
370
|
+
# this captures, e.g., base_type
|
|
371
|
+
curr_sample_type_add_dict_wo_metadata = deepcopy_dict(
|
|
372
|
+
curr_sample_type_add_dict)
|
|
373
|
+
del curr_sample_type_add_dict_wo_metadata[METADATA_FIELDS_KEY]
|
|
374
|
+
curr_sample_type_wip_dict.update(
|
|
375
|
+
curr_sample_type_add_dict_wo_metadata)
|
|
376
|
+
|
|
377
|
+
curr_sample_type_add_metadata_fields_dict = \
|
|
378
|
+
curr_sample_type_add_dict[METADATA_FIELDS_KEY]
|
|
379
|
+
curr_sample_type_wip_metadata_fields_dict = \
|
|
380
|
+
curr_sample_type_wip_dict[METADATA_FIELDS_KEY]
|
|
381
|
+
curr_sample_type_wip_metadata_fields_dict = (
|
|
382
|
+
update_wip_metadata_dict(
|
|
383
|
+
curr_sample_type_wip_metadata_fields_dict,
|
|
384
|
+
curr_sample_type_add_metadata_fields_dict))
|
|
385
|
+
# if the above combination is not of two empties
|
|
386
|
+
if curr_sample_type_wip_metadata_fields_dict:
|
|
387
|
+
curr_sample_type_wip_dict[METADATA_FIELDS_KEY] = \
|
|
388
|
+
curr_sample_type_wip_metadata_fields_dict
|
|
389
|
+
# end if the metadata fields combination is not empty
|
|
390
|
+
|
|
391
|
+
curr_host_wip_sample_types_dict[curr_sample_type] = \
|
|
392
|
+
curr_sample_type_wip_dict
|
|
393
|
+
# end if both wip and add have metadata fields for the sample type
|
|
394
|
+
|
|
395
|
+
# otherwise, if a sample type is in the add dict but not in the wip,
|
|
396
|
+
# or it is in both but of different definition types
|
|
397
|
+
# (alias vs metadata) in the two, just set the entry in the wip dict
|
|
398
|
+
# to be the entry in the add dict.
|
|
399
|
+
else:
|
|
400
|
+
curr_host_wip_sample_types_dict[curr_sample_type] = \
|
|
401
|
+
curr_sample_type_add_dict
|
|
402
|
+
# endif sample type is in wip and has metadata fields in both or not
|
|
403
|
+
# next sample type
|
|
404
|
+
|
|
405
|
+
return curr_host_wip_sample_types_dict
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _id_sample_type_definition(sample_type_name: str, sample_type_dict: Dict[str, Any]) -> str:
|
|
409
|
+
"""Identify the type of sample type definition in the dictionary.
|
|
410
|
+
|
|
411
|
+
Parameters
|
|
412
|
+
----------
|
|
413
|
+
sample_type_name : str
|
|
414
|
+
Name of the sample type.
|
|
415
|
+
sample_type_dict : Dict[str, Any]
|
|
416
|
+
Dictionary containing sample type configuration.
|
|
417
|
+
|
|
418
|
+
Returns
|
|
419
|
+
-------
|
|
420
|
+
str
|
|
421
|
+
The type of definition (ALIAS_KEY, METADATA_FIELDS_KEY, or BASE_TYPE_KEY).
|
|
422
|
+
|
|
423
|
+
Raises
|
|
424
|
+
------
|
|
425
|
+
ValueError
|
|
426
|
+
If sample type has both alias and metadata fields, or both alias and base type,
|
|
427
|
+
or neither alias nor metadata fields.
|
|
428
|
+
"""
|
|
429
|
+
has_alias = ALIAS_KEY in sample_type_dict
|
|
430
|
+
has_metadata = METADATA_FIELDS_KEY in sample_type_dict
|
|
431
|
+
has_base = BASE_TYPE_KEY in sample_type_dict
|
|
432
|
+
if has_alias and has_metadata:
|
|
433
|
+
raise ValueError(f"Sample type '{sample_type_name}' has both "
|
|
434
|
+
f"'{ALIAS_KEY}' and '{METADATA_FIELDS_KEY}' keys in "
|
|
435
|
+
"the same sample type dict")
|
|
436
|
+
elif has_alias and has_base:
|
|
437
|
+
raise ValueError(f"Sample type '{sample_type_name}' has both "
|
|
438
|
+
f"'{ALIAS_KEY}' and '{BASE_TYPE_KEY}' keys in "
|
|
439
|
+
"the same sample type dict")
|
|
440
|
+
elif has_alias:
|
|
441
|
+
return ALIAS_KEY
|
|
442
|
+
elif has_metadata:
|
|
443
|
+
return METADATA_FIELDS_KEY
|
|
444
|
+
elif has_base:
|
|
445
|
+
# this implies that it has ONLY a base, not a base and metadata
|
|
446
|
+
return BASE_TYPE_KEY
|
|
447
|
+
else:
|
|
448
|
+
raise ValueError(f"Sample type '{sample_type_name}' has neither "
|
|
449
|
+
f"'{ALIAS_KEY}' nor '{METADATA_FIELDS_KEY}' keys in "
|
|
450
|
+
"the same sample type dict")
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def build_full_flat_config_dict(
|
|
454
|
+
study_specific_config_dict: Optional[Dict[str, Any]] = None,
|
|
455
|
+
software_config_dict: Optional[Dict[str, Any]] = None,
|
|
456
|
+
stds_fp: Optional[str] = None
|
|
457
|
+
) -> Dict[str, Any]:
|
|
458
|
+
"""Build a complete flattened configuration dictionary.
|
|
459
|
+
|
|
460
|
+
Merges software configuration, study-specific configuration, and standards
|
|
461
|
+
configuration into a single flat dictionary with fully resolved host type
|
|
462
|
+
specific metadata.
|
|
463
|
+
|
|
464
|
+
Parameters
|
|
465
|
+
----------
|
|
466
|
+
study_specific_config_dict : Optional[Dict[str, Any]], default=None
|
|
467
|
+
Study-specific flat-host-type config dictionary. If provided, these
|
|
468
|
+
settings override the software config defaults.
|
|
469
|
+
software_config_dict : Optional[Dict[str, Any]], default=None
|
|
470
|
+
Software configuration dictionary with default settings. If None,
|
|
471
|
+
the default software config from config.yml will be used.
|
|
472
|
+
stds_fp : Optional[str], default=None
|
|
473
|
+
Path to standards dictionary file. If None, the default standards
|
|
474
|
+
config pulled from the standards.yml file will be used.
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
Dict[str, Any]
|
|
479
|
+
A complete flat configuration dictionary with HOST_TYPE_SPECIFIC_METADATA_KEY
|
|
480
|
+
containing the flattened and merged host type configurations.
|
|
481
|
+
"""
|
|
482
|
+
if software_config_dict is None:
|
|
483
|
+
software_config_dict = extract_config_dict(None)
|
|
484
|
+
|
|
485
|
+
if study_specific_config_dict:
|
|
486
|
+
# overwrite default settings in software config with study-specific ones (if any)
|
|
487
|
+
software_plus_study_flat_config_dict = deepcopy_dict(study_specific_config_dict)
|
|
488
|
+
software_plus_study_flat_config_dict = \
|
|
489
|
+
software_config_dict | software_plus_study_flat_config_dict
|
|
490
|
+
|
|
491
|
+
# combine the software+study flat-host-type config's host type specific info
|
|
492
|
+
# with the standards nested-host-type config's host type specific info
|
|
493
|
+
# to get a full combined, nested dictionary starting from HOST_TYPE_SPECIFIC_METADATA_KEY
|
|
494
|
+
full_nested_hosts_dict = combine_stds_and_study_config(
|
|
495
|
+
software_plus_study_flat_config_dict, stds_fp)
|
|
496
|
+
else:
|
|
497
|
+
software_plus_study_flat_config_dict = software_config_dict
|
|
498
|
+
# no need to combine the standards' host info with anything else,
|
|
499
|
+
# since the software config doesn't include any host type specific info
|
|
500
|
+
full_nested_hosts_dict = extract_stds_config(stds_fp)
|
|
501
|
+
|
|
502
|
+
full_flat_hosts_dict = flatten_nested_stds_dict(
|
|
503
|
+
full_nested_hosts_dict, None)
|
|
504
|
+
software_plus_study_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY] = \
|
|
505
|
+
full_flat_hosts_dict
|
|
506
|
+
# this is just a renaming to indicate that, having overwritten any original
|
|
507
|
+
# HOST_TYPE_SPECIFIC_METADATA_KEY in the software_plus_study_flat_config_dict
|
|
508
|
+
# with the complete and flattened combination of software+study+standards, it is now
|
|
509
|
+
# the "full" flat-host-type config dictionary
|
|
510
|
+
full_flat_config_dict = software_plus_study_flat_config_dict
|
|
511
|
+
|
|
512
|
+
return full_flat_config_dict
|