metameq 2026.1.1__py3-none-any.whl → 2026.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metameq/_version.py +3 -3
- metameq/src/metadata_configurator.py +146 -1
- metameq/src/metadata_extender.py +44 -38
- metameq/tests/test_metadata_configurator.py +2741 -208
- metameq/tests/test_metadata_extender.py +2034 -497
- metameq/tests/test_metadata_merger.py +1 -1
- metameq/tests/test_util.py +1 -1
- {metameq-2026.1.1.dist-info → metameq-2026.1.2.dist-info}/METADATA +1 -1
- {metameq-2026.1.1.dist-info → metameq-2026.1.2.dist-info}/RECORD +12 -12
- {metameq-2026.1.1.dist-info → metameq-2026.1.2.dist-info}/WHEEL +0 -0
- {metameq-2026.1.1.dist-info → metameq-2026.1.2.dist-info}/entry_points.txt +0 -0
- {metameq-2026.1.1.dist-info → metameq-2026.1.2.dist-info}/top_level.txt +0 -0
metameq/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2026-01-
|
|
11
|
+
"date": "2026-01-31T12:28:01-0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "2026.01.
|
|
14
|
+
"full-revisionid": "889941fbd7d28889867e3f4b6edba2d50dbc5956",
|
|
15
|
+
"version": "2026.01.2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -4,7 +4,8 @@ from metameq.src.util import extract_config_dict, extract_stds_config, \
|
|
|
4
4
|
METADATA_FIELDS_KEY, STUDY_SPECIFIC_METADATA_KEY, \
|
|
5
5
|
HOST_TYPE_SPECIFIC_METADATA_KEY, \
|
|
6
6
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY, ALIAS_KEY, BASE_TYPE_KEY, \
|
|
7
|
-
DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY
|
|
7
|
+
DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
|
|
8
|
+
SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
def combine_stds_and_study_config(
|
|
@@ -89,6 +90,15 @@ def flatten_nested_stds_dict(
|
|
|
89
90
|
if curr_host_type_sub_host_dict:
|
|
90
91
|
wip_host_types_dict.update(curr_host_type_sub_host_dict)
|
|
91
92
|
|
|
93
|
+
# resolve aliases and base types for this host's sample types
|
|
94
|
+
# This happens AFTER recursion so children inherit unresolved aliases,
|
|
95
|
+
# ensuring correct bottom-up resolution order
|
|
96
|
+
if SAMPLE_TYPE_SPECIFIC_METADATA_KEY in curr_host_type_wip_flat_dict:
|
|
97
|
+
curr_host_type_wip_flat_dict[SAMPLE_TYPE_SPECIFIC_METADATA_KEY] = \
|
|
98
|
+
_resolve_sample_type_aliases_and_bases(
|
|
99
|
+
curr_host_type_wip_flat_dict[SAMPLE_TYPE_SPECIFIC_METADATA_KEY],
|
|
100
|
+
curr_host_type_wip_flat_dict.get(METADATA_FIELDS_KEY, {}))
|
|
101
|
+
|
|
92
102
|
# assign the flattened wip dict for the current host type to the result
|
|
93
103
|
# (which now contains flat records for the hosts lower down than
|
|
94
104
|
# this, if there are any)
|
|
@@ -270,8 +280,11 @@ def _combine_base_and_added_host_type(
|
|
|
270
280
|
_combine_base_and_added_sample_type_specific_metadata(
|
|
271
281
|
host_type_wip_nested_dict,
|
|
272
282
|
host_type_add_dict)
|
|
283
|
+
|
|
273
284
|
# if we got back a non-empty dictionary of sample types,
|
|
274
285
|
# add it to the wip for this host type dict
|
|
286
|
+
# Note: resolution of aliases/base types happens in flatten_nested_stds_dict
|
|
287
|
+
# AFTER recursion, to ensure correct bottom-up resolution order
|
|
275
288
|
if curr_host_wip_sample_types_dict:
|
|
276
289
|
host_type_wip_nested_dict[
|
|
277
290
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY] = \
|
|
@@ -450,6 +463,130 @@ def _id_sample_type_definition(sample_type_name: str, sample_type_dict: Dict[str
|
|
|
450
463
|
"the same sample type dict")
|
|
451
464
|
|
|
452
465
|
|
|
466
|
+
def _construct_sample_type_metadata_fields_dict(
|
|
467
|
+
sample_type: str,
|
|
468
|
+
host_sample_types_config_dict: Dict[str, Any],
|
|
469
|
+
a_host_type_metadata_fields_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
470
|
+
"""Construct metadata fields dictionary for a specific host+sample type, resolving aliases and base types.
|
|
471
|
+
|
|
472
|
+
Parameters
|
|
473
|
+
----------
|
|
474
|
+
sample_type : str
|
|
475
|
+
The sample type to process.
|
|
476
|
+
host_sample_types_config_dict : Dict[str, Any]
|
|
477
|
+
Dictionary containing config for *all* sample types in
|
|
478
|
+
the host type in question.
|
|
479
|
+
a_host_type_metadata_fields_dict : Dict[str, Any]
|
|
480
|
+
Dictionary containing metadata fields for the host type in question.
|
|
481
|
+
|
|
482
|
+
Returns
|
|
483
|
+
-------
|
|
484
|
+
Dict[str, Any]
|
|
485
|
+
The constructed metadata fields dictionary for this host-and-sample-type combination.
|
|
486
|
+
|
|
487
|
+
Raises
|
|
488
|
+
------
|
|
489
|
+
ValueError
|
|
490
|
+
If there are invalid alias chains or base type configurations.
|
|
491
|
+
"""
|
|
492
|
+
sample_type_for_metadata = sample_type
|
|
493
|
+
|
|
494
|
+
# get dict associated with the naive sample type
|
|
495
|
+
sample_type_specific_dict = \
|
|
496
|
+
host_sample_types_config_dict[sample_type]
|
|
497
|
+
|
|
498
|
+
# if naive sample type contains an alias
|
|
499
|
+
sample_type_alias = sample_type_specific_dict.get(ALIAS_KEY)
|
|
500
|
+
if sample_type_alias:
|
|
501
|
+
# change the sample type to the alias sample type
|
|
502
|
+
# and use the alias's sample type dict
|
|
503
|
+
sample_type_for_metadata = sample_type_alias
|
|
504
|
+
sample_type_specific_dict = \
|
|
505
|
+
host_sample_types_config_dict[sample_type_alias]
|
|
506
|
+
if METADATA_FIELDS_KEY not in sample_type_specific_dict:
|
|
507
|
+
raise ValueError(f"May not chain aliases "
|
|
508
|
+
f"('{sample_type}' to '{sample_type_alias}')")
|
|
509
|
+
# endif sample type is an alias
|
|
510
|
+
|
|
511
|
+
# if the sample type has a base type
|
|
512
|
+
sample_type_base = sample_type_specific_dict.get(BASE_TYPE_KEY)
|
|
513
|
+
if sample_type_base:
|
|
514
|
+
# get the base's sample type dict and add this sample type's
|
|
515
|
+
# info on top of it
|
|
516
|
+
base_sample_dict = host_sample_types_config_dict[sample_type_base]
|
|
517
|
+
if list(base_sample_dict.keys()) != [METADATA_FIELDS_KEY]:
|
|
518
|
+
raise ValueError(f"Base sample type '{sample_type_base}' "
|
|
519
|
+
f"must only have metadata fields")
|
|
520
|
+
sample_type_specific_dict_metadata = update_wip_metadata_dict(
|
|
521
|
+
deepcopy_dict(base_sample_dict[METADATA_FIELDS_KEY]),
|
|
522
|
+
sample_type_specific_dict.get(METADATA_FIELDS_KEY, {}))
|
|
523
|
+
sample_type_specific_dict = deepcopy_dict(sample_type_specific_dict)
|
|
524
|
+
sample_type_specific_dict[METADATA_FIELDS_KEY] = \
|
|
525
|
+
sample_type_specific_dict_metadata
|
|
526
|
+
# endif sample type has a base type
|
|
527
|
+
|
|
528
|
+
# add the sample-type-specific info generated above on top of the host info
|
|
529
|
+
sample_type_metadata_dict = update_wip_metadata_dict(
|
|
530
|
+
deepcopy_dict(a_host_type_metadata_fields_dict),
|
|
531
|
+
sample_type_specific_dict.get(METADATA_FIELDS_KEY, {}))
|
|
532
|
+
|
|
533
|
+
# set sample_type, and qiita_sample_type if it is not already set
|
|
534
|
+
sample_type_definition = {
|
|
535
|
+
ALLOWED_KEY: [sample_type_for_metadata],
|
|
536
|
+
DEFAULT_KEY: sample_type_for_metadata,
|
|
537
|
+
TYPE_KEY: "string"
|
|
538
|
+
}
|
|
539
|
+
sample_type_metadata_dict = update_wip_metadata_dict(
|
|
540
|
+
sample_type_metadata_dict, {SAMPLE_TYPE_KEY: sample_type_definition})
|
|
541
|
+
if QIITA_SAMPLE_TYPE not in sample_type_metadata_dict:
|
|
542
|
+
sample_type_metadata_dict = update_wip_metadata_dict(
|
|
543
|
+
sample_type_metadata_dict, {QIITA_SAMPLE_TYPE: sample_type_definition})
|
|
544
|
+
# end if qiita_sample_type not already set
|
|
545
|
+
|
|
546
|
+
return sample_type_metadata_dict
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def _resolve_sample_type_aliases_and_bases(
|
|
550
|
+
sample_types_dict: Dict[str, Any],
|
|
551
|
+
host_metadata_fields_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
552
|
+
"""Resolve aliases and base types in sample type definitions.
|
|
553
|
+
|
|
554
|
+
For each sample type in the input dictionary:
|
|
555
|
+
1. If it's an alias, follow the alias and resolve the target's metadata
|
|
556
|
+
2. If it has a base_type, inherit metadata fields from the base
|
|
557
|
+
3. Merge sample-type metadata fields with host-level metadata fields
|
|
558
|
+
4. Add sample_type and qiita_sample_type fields
|
|
559
|
+
|
|
560
|
+
Parameters
|
|
561
|
+
----------
|
|
562
|
+
sample_types_dict : Dict[str, Any]
|
|
563
|
+
Dictionary of sample type configurations (from sample_type_specific_metadata).
|
|
564
|
+
host_metadata_fields_dict : Dict[str, Any]
|
|
565
|
+
Host-level metadata fields to merge into each sample type.
|
|
566
|
+
|
|
567
|
+
Returns
|
|
568
|
+
-------
|
|
569
|
+
Dict[str, Any]
|
|
570
|
+
Dictionary with all sample types resolved.
|
|
571
|
+
|
|
572
|
+
Raises
|
|
573
|
+
------
|
|
574
|
+
ValueError
|
|
575
|
+
If chained aliases are detected or base type has invalid structure.
|
|
576
|
+
"""
|
|
577
|
+
result = {}
|
|
578
|
+
|
|
579
|
+
for sample_type_name in sample_types_dict.keys():
|
|
580
|
+
resolved_metadata = _construct_sample_type_metadata_fields_dict(
|
|
581
|
+
sample_type_name, sample_types_dict, host_metadata_fields_dict)
|
|
582
|
+
|
|
583
|
+
result[sample_type_name] = {
|
|
584
|
+
METADATA_FIELDS_KEY: resolved_metadata
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
return result
|
|
588
|
+
|
|
589
|
+
|
|
453
590
|
def build_full_flat_config_dict(
|
|
454
591
|
study_specific_config_dict: Optional[Dict[str, Any]] = None,
|
|
455
592
|
software_config_dict: Optional[Dict[str, Any]] = None,
|
|
@@ -503,6 +640,14 @@ def build_full_flat_config_dict(
|
|
|
503
640
|
full_nested_hosts_dict, None)
|
|
504
641
|
software_plus_study_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY] = \
|
|
505
642
|
full_flat_hosts_dict
|
|
643
|
+
|
|
644
|
+
# drop the STUDY_SPECIFIC_METADATA_KEY from the final output dict (because
|
|
645
|
+
# its contents have already been incorporated into the
|
|
646
|
+
# HOST_TYPE_SPECIFIC_METADATA_KEY section); note we keep all the other
|
|
647
|
+
# top-level keys from the study-specific config dict
|
|
648
|
+
if STUDY_SPECIFIC_METADATA_KEY in software_plus_study_flat_config_dict:
|
|
649
|
+
del software_plus_study_flat_config_dict[STUDY_SPECIFIC_METADATA_KEY]
|
|
650
|
+
|
|
506
651
|
# this is just a renaming to indicate that, having overwritten any original
|
|
507
652
|
# HOST_TYPE_SPECIFIC_METADATA_KEY in the software_plus_study_flat_config_dict
|
|
508
653
|
# with the complete and flattened combination of software+study+standards, it is now
|
metameq/src/metadata_extender.py
CHANGED
|
@@ -42,7 +42,7 @@ pandas.set_option("future.no_silent_downcasting", True)
|
|
|
42
42
|
def get_reserved_cols(
|
|
43
43
|
raw_metadata_df: pandas.DataFrame,
|
|
44
44
|
study_specific_config_dict: Dict[str, Any],
|
|
45
|
-
|
|
45
|
+
stds_fp: Optional[str] = None) -> List[str]:
|
|
46
46
|
"""Get a list of all reserved column names for all host+sample type combinations in the metadata.
|
|
47
47
|
|
|
48
48
|
Note that 'reserved' is not the same as 'required'. Some column names (e.g.,
|
|
@@ -55,8 +55,9 @@ def get_reserved_cols(
|
|
|
55
55
|
The input metadata DataFrame.
|
|
56
56
|
study_specific_config_dict : Dict[str, Any]
|
|
57
57
|
Study-specific flat-host-type config dictionary.
|
|
58
|
-
|
|
59
|
-
|
|
58
|
+
stds_fp : Optional[str], default=None
|
|
59
|
+
Path to standards dictionary file. If None, the default standards
|
|
60
|
+
config pulled from the standards.yml file will be used.
|
|
60
61
|
|
|
61
62
|
Returns
|
|
62
63
|
-------
|
|
@@ -90,10 +91,10 @@ def get_reserved_cols(
|
|
|
90
91
|
|
|
91
92
|
# extend the metadata_df to get all the required columns for all host+sample type combinations;
|
|
92
93
|
# we don't really care about the contents of these columns, just their names.
|
|
93
|
-
# (
|
|
94
|
+
# (Likewise, it is not necessary to pass the actual study_specific_transformers_dict so
|
|
95
|
+
# just use None)
|
|
94
96
|
metadata_df, _ = extend_metadata_df(
|
|
95
|
-
temp_df, study_specific_config_dict,
|
|
96
|
-
study_specific_transformers_dict)
|
|
97
|
+
temp_df, study_specific_config_dict, None, None, stds_fp)
|
|
97
98
|
|
|
98
99
|
return sorted(metadata_df.columns.to_list())
|
|
99
100
|
|
|
@@ -119,7 +120,7 @@ def id_missing_cols(a_df: pandas.DataFrame) -> List[str]:
|
|
|
119
120
|
def find_standard_cols(
|
|
120
121
|
a_df: pandas.DataFrame,
|
|
121
122
|
study_specific_config_dict: Dict[str, Any],
|
|
122
|
-
|
|
123
|
+
stds_fp: Optional[str] = None,
|
|
123
124
|
suppress_missing_name_err: bool = False) -> List[str]:
|
|
124
125
|
"""Find all the standard columns in the metadata DataFrame.
|
|
125
126
|
|
|
@@ -129,8 +130,9 @@ def find_standard_cols(
|
|
|
129
130
|
The metadata DataFrame to analyze.
|
|
130
131
|
study_specific_config_dict : Dict[str, Any]
|
|
131
132
|
Study-specific flat-host-type config dictionary.
|
|
132
|
-
|
|
133
|
-
|
|
133
|
+
stds_fp : Optional[str], default=None
|
|
134
|
+
Path to standards dictionary file. If None, the default standards
|
|
135
|
+
config pulled from the standards.yml file will be used.
|
|
134
136
|
suppress_missing_name_err : bool, default=False
|
|
135
137
|
Whether to suppress errors about missing sample name.
|
|
136
138
|
|
|
@@ -156,8 +158,7 @@ def find_standard_cols(
|
|
|
156
158
|
# get the intersection of the reserved standard columns and
|
|
157
159
|
# the columns in the input dataframe
|
|
158
160
|
standard_cols = get_reserved_cols(
|
|
159
|
-
a_df, study_specific_config_dict,
|
|
160
|
-
study_specific_transformers_dict=study_specific_transformers_dict)
|
|
161
|
+
a_df, study_specific_config_dict, stds_fp)
|
|
161
162
|
|
|
162
163
|
standard_cols_set = (set(standard_cols) - set(INTERNAL_COL_KEYS))
|
|
163
164
|
|
|
@@ -167,7 +168,7 @@ def find_standard_cols(
|
|
|
167
168
|
def find_nonstandard_cols(
|
|
168
169
|
a_df: pandas.DataFrame,
|
|
169
170
|
study_specific_config_dict: Dict[str, Any],
|
|
170
|
-
|
|
171
|
+
stds_fp: Optional[str] = None) -> List[str]:
|
|
171
172
|
"""Find any non-standard columns in the metadata DataFrame.
|
|
172
173
|
|
|
173
174
|
Parameters
|
|
@@ -176,8 +177,9 @@ def find_nonstandard_cols(
|
|
|
176
177
|
The metadata DataFrame to analyze.
|
|
177
178
|
study_specific_config_dict : Dict[str, Any]
|
|
178
179
|
Study-specific flat-host-type config dictionary.
|
|
179
|
-
|
|
180
|
-
|
|
180
|
+
stds_fp : Optional[str], default=None
|
|
181
|
+
Path to standards dictionary file. If None, the default standards
|
|
182
|
+
config pulled from the standards.yml file will be used.
|
|
181
183
|
|
|
182
184
|
Returns
|
|
183
185
|
-------
|
|
@@ -195,15 +197,15 @@ def find_nonstandard_cols(
|
|
|
195
197
|
|
|
196
198
|
# get the columns in
|
|
197
199
|
standard_cols = get_reserved_cols(
|
|
198
|
-
a_df, study_specific_config_dict,
|
|
199
|
-
study_specific_transformers_dict=study_specific_transformers_dict)
|
|
200
|
+
a_df, study_specific_config_dict, stds_fp)
|
|
200
201
|
|
|
201
202
|
return list(set(a_df.columns) - set(standard_cols))
|
|
202
203
|
|
|
203
204
|
|
|
204
205
|
def get_extended_metadata_from_df_and_yaml(
|
|
205
206
|
raw_metadata_df: pandas.DataFrame,
|
|
206
|
-
study_specific_config_fp: Optional[str]
|
|
207
|
+
study_specific_config_fp: Optional[str],
|
|
208
|
+
stds_fp: Optional[str] = None) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
|
|
207
209
|
"""Extend metadata using configuration from a study-specific YAML config file.
|
|
208
210
|
|
|
209
211
|
Parameters
|
|
@@ -212,6 +214,9 @@ def get_extended_metadata_from_df_and_yaml(
|
|
|
212
214
|
The raw metadata DataFrame to extend.
|
|
213
215
|
study_specific_config_fp : Optional[str]
|
|
214
216
|
Path to the study-specific configuration YAML file.
|
|
217
|
+
stds_fp : Optional[str], default=None
|
|
218
|
+
Path to standards dictionary file. If None, the default standards
|
|
219
|
+
config pulled from the standards.yml file will be used.
|
|
215
220
|
|
|
216
221
|
Returns
|
|
217
222
|
-------
|
|
@@ -226,7 +231,8 @@ def get_extended_metadata_from_df_and_yaml(
|
|
|
226
231
|
|
|
227
232
|
# extend the metadata DataFrame using the study-specific flat-host-type config dictionary
|
|
228
233
|
metadata_df, validation_msgs_df = \
|
|
229
|
-
extend_metadata_df(raw_metadata_df, study_specific_config_dict
|
|
234
|
+
extend_metadata_df(raw_metadata_df, study_specific_config_dict,
|
|
235
|
+
None, None, stds_fp)
|
|
230
236
|
|
|
231
237
|
return metadata_df, validation_msgs_df
|
|
232
238
|
|
|
@@ -257,7 +263,8 @@ def write_extended_metadata(
|
|
|
257
263
|
out_name_base: str,
|
|
258
264
|
sep: str = "\t",
|
|
259
265
|
remove_internals: bool = True,
|
|
260
|
-
suppress_empty_fails: bool = False
|
|
266
|
+
suppress_empty_fails: bool = False,
|
|
267
|
+
stds_fp: Optional[str] = None) -> pandas.DataFrame:
|
|
261
268
|
"""Write extended metadata to files starting from input file paths to metadata and config.
|
|
262
269
|
|
|
263
270
|
Parameters
|
|
@@ -276,6 +283,9 @@ def write_extended_metadata(
|
|
|
276
283
|
Whether to remove internal columns.
|
|
277
284
|
suppress_empty_fails : bool, default=False
|
|
278
285
|
Whether to suppress empty failure files.
|
|
286
|
+
stds_fp : Optional[str], default=None
|
|
287
|
+
Path to standards dictionary file. If None, the default standards
|
|
288
|
+
config pulled from the standards.yml file will be used.
|
|
279
289
|
|
|
280
290
|
Returns
|
|
281
291
|
-------
|
|
@@ -310,7 +320,8 @@ def write_extended_metadata(
|
|
|
310
320
|
raw_metadata_df, study_specific_config_dict,
|
|
311
321
|
out_dir, out_name_base, sep=sep,
|
|
312
322
|
remove_internals=remove_internals,
|
|
313
|
-
suppress_empty_fails=suppress_empty_fails
|
|
323
|
+
suppress_empty_fails=suppress_empty_fails,
|
|
324
|
+
stds_fp=stds_fp)
|
|
314
325
|
|
|
315
326
|
# for good measure, return the extended metadata DataFrame
|
|
316
327
|
return extended_df
|
|
@@ -351,7 +362,8 @@ def write_extended_metadata_from_df(
|
|
|
351
362
|
sep: str = "\t",
|
|
352
363
|
remove_internals: bool = True,
|
|
353
364
|
suppress_empty_fails: bool = False,
|
|
354
|
-
internal_col_names: Optional[List[str]] = None
|
|
365
|
+
internal_col_names: Optional[List[str]] = None,
|
|
366
|
+
stds_fp: Optional[str] = None) -> pandas.DataFrame:
|
|
355
367
|
"""Write extended metadata to files starting from a metadata DataFrame and config dictionary.
|
|
356
368
|
|
|
357
369
|
Parameters
|
|
@@ -374,6 +386,9 @@ def write_extended_metadata_from_df(
|
|
|
374
386
|
Whether to suppress empty failure files.
|
|
375
387
|
internal_col_names : Optional[List[str]], default=None
|
|
376
388
|
List of internal column names.
|
|
389
|
+
stds_fp : Optional[str], default=None
|
|
390
|
+
Path to standards dictionary file. If None, the default standards
|
|
391
|
+
config pulled from the standards.yml file will be used.
|
|
377
392
|
|
|
378
393
|
Returns
|
|
379
394
|
-------
|
|
@@ -383,7 +398,7 @@ def write_extended_metadata_from_df(
|
|
|
383
398
|
# extend the metadata DataFrame using the study-specific flat-host-type config dictionary
|
|
384
399
|
metadata_df, validation_msgs_df = extend_metadata_df(
|
|
385
400
|
raw_metadata_df, study_specific_config_dict,
|
|
386
|
-
study_specific_transformers_dict)
|
|
401
|
+
study_specific_transformers_dict, None, stds_fp)
|
|
387
402
|
|
|
388
403
|
# write the metadata and validation results to files
|
|
389
404
|
write_metadata_results(
|
|
@@ -814,12 +829,6 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
|
814
829
|
- The updated metadata DataFrame with sample-type-specific elements added
|
|
815
830
|
- A list of validation messages
|
|
816
831
|
"""
|
|
817
|
-
# copy the metadata fields dict from the host type config to be the
|
|
818
|
-
# basis of the work-in-progress metadata dict--these are the default fields
|
|
819
|
-
# that will be overwritten, if necessary, by sample type-specific fields
|
|
820
|
-
wip_metadata_fields_dict = deepcopy_dict(
|
|
821
|
-
a_host_type_config_dict.get(METADATA_FIELDS_KEY, {}))
|
|
822
|
-
|
|
823
832
|
# get the config section for *all* sample types within this host type
|
|
824
833
|
host_sample_types_config_dict = \
|
|
825
834
|
a_host_type_config_dict[SAMPLE_TYPE_SPECIFIC_METADATA_KEY]
|
|
@@ -837,20 +846,17 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
|
837
846
|
update_metadata_df_field(
|
|
838
847
|
sample_type_df, QC_NOTE_KEY, "invalid sample_type")
|
|
839
848
|
else:
|
|
840
|
-
#
|
|
841
|
-
#
|
|
842
|
-
#
|
|
849
|
+
# Get the already-resolved metadata fields dict for this sample type.
|
|
850
|
+
# The config is pre-resolved: aliases/base types are merged and
|
|
851
|
+
# host metadata is combined.
|
|
852
|
+
sample_type_config = host_sample_types_config_dict[a_sample_type]
|
|
843
853
|
full_sample_type_metadata_fields_dict = \
|
|
844
|
-
|
|
845
|
-
a_sample_type, host_sample_types_config_dict, wip_metadata_fields_dict)
|
|
854
|
+
sample_type_config.get(METADATA_FIELDS_KEY, {})
|
|
846
855
|
|
|
847
856
|
# update the metadata df with the sample type specific metadata fields
|
|
848
|
-
# TODO: this is taking in wip_metadata_fields_dict instead of full_sample_type_metadata_fields_dict,
|
|
849
|
-
# which only works because the code underlying _construct_sample_type_metadata_fields_dict
|
|
850
|
-
# is *modifying* wip_metadata_fields_dict in place. This should be corrected, but that
|
|
851
|
-
# needs to wait until there are tests to make sure doing so doesn't break anything.
|
|
852
857
|
sample_type_df = _update_metadata_from_dict(
|
|
853
|
-
sample_type_df,
|
|
858
|
+
sample_type_df, full_sample_type_metadata_fields_dict,
|
|
859
|
+
dict_is_metadata_fields=True,
|
|
854
860
|
overwrite_non_nans=global_plus_host_settings_dict[OVERWRITE_NON_NANS_KEY])
|
|
855
861
|
|
|
856
862
|
# for fields that are required but not yet filled, replace the placeholder with
|