metameq 2026.2.1__py3-none-any.whl → 2026.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metameq/__init__.py +3 -2
- metameq/_version.py +3 -3
- metameq/src/metadata_configurator.py +53 -6
- metameq/src/metadata_extender.py +16 -38
- metameq/src/util.py +7 -0
- metameq/tests/test_metadata_configurator.py +184 -1
- metameq/tests/test_metadata_extender.py +306 -117
- metameq/tests/test_metadata_validator.py +2 -2
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/METADATA +2 -1
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/RECORD +13 -13
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/WHEEL +0 -0
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/entry_points.txt +0 -0
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/top_level.txt +0 -0
metameq/__init__.py
CHANGED
|
@@ -9,7 +9,7 @@ from metameq.src.metadata_extender import \
|
|
|
9
9
|
write_extended_metadata, write_extended_metadata_from_df, \
|
|
10
10
|
get_reserved_cols, get_extended_metadata_from_df_and_yaml, \
|
|
11
11
|
write_metadata_results, id_missing_cols, find_standard_cols, \
|
|
12
|
-
find_nonstandard_cols, get_qc_failures
|
|
12
|
+
find_nonstandard_cols, get_qc_failures, extend_metadata_df
|
|
13
13
|
from metameq.src.metadata_merger import merge_sample_and_subject_metadata, \
|
|
14
14
|
merge_many_to_one_metadata, merge_one_to_one_metadata, \
|
|
15
15
|
find_common_col_names, find_common_df_cols
|
|
@@ -36,7 +36,8 @@ __all__ = ["HOSTTYPE_SHORTHAND_KEY", "SAMPLETYPE_SHORTHAND_KEY",
|
|
|
36
36
|
"find_nonstandard_cols", "get_qc_failures",
|
|
37
37
|
"format_a_datetime", "standardize_input_sex",
|
|
38
38
|
"set_life_stage_from_age_yrs", "transform_input_sex_to_std_sex",
|
|
39
|
-
"transform_age_to_life_stage", "transform_date_to_formatted_date"
|
|
39
|
+
"transform_age_to_life_stage", "transform_date_to_formatted_date",
|
|
40
|
+
"extend_metadata_df"]
|
|
40
41
|
|
|
41
42
|
from . import _version
|
|
42
43
|
__version__ = _version.get_versions()['version']
|
metameq/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2026-02-
|
|
11
|
+
"date": "2026-02-03T15:03:32-0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "2026.02.
|
|
14
|
+
"full-revisionid": "89687d23015566a7583179a69f92c2e1d1adcf61",
|
|
15
|
+
"version": "2026.02.3"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -5,7 +5,7 @@ from metameq.src.util import extract_config_dict, extract_stds_config, \
|
|
|
5
5
|
HOST_TYPE_SPECIFIC_METADATA_KEY, \
|
|
6
6
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY, ALIAS_KEY, BASE_TYPE_KEY, \
|
|
7
7
|
DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
|
|
8
|
-
SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
|
|
8
|
+
SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE, GLOBAL_SETTINGS_KEYS
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def combine_stds_and_study_config(
|
|
@@ -257,11 +257,12 @@ def _combine_base_and_added_host_type(
|
|
|
257
257
|
host_type_wip_nested_dict = \
|
|
258
258
|
deepcopy_dict(host_type_base_dict)
|
|
259
259
|
|
|
260
|
-
# look for
|
|
261
|
-
#
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
260
|
+
# look for global settings in the add dict for this host; if
|
|
261
|
+
# any exists, add it to the wip dict (ok to overwrite existing)
|
|
262
|
+
for curr_global_setting_key in GLOBAL_SETTINGS_KEYS:
|
|
263
|
+
if curr_global_setting_key in host_type_add_dict:
|
|
264
|
+
host_type_wip_nested_dict[curr_global_setting_key] = \
|
|
265
|
+
host_type_add_dict.get(curr_global_setting_key)
|
|
265
266
|
|
|
266
267
|
# combine add metadata fields with the wip metadata fields
|
|
267
268
|
# for the current host type and assign to wip if not empty
|
|
@@ -636,6 +637,10 @@ def build_full_flat_config_dict(
|
|
|
636
637
|
# since the software config doesn't include any host type specific info
|
|
637
638
|
full_nested_hosts_dict = extract_stds_config(stds_fp)
|
|
638
639
|
|
|
640
|
+
full_nested_hosts_dict = _push_global_settings_into_top_host(
|
|
641
|
+
full_nested_hosts_dict,
|
|
642
|
+
software_plus_study_flat_config_dict)
|
|
643
|
+
|
|
639
644
|
full_flat_hosts_dict = flatten_nested_stds_dict(
|
|
640
645
|
full_nested_hosts_dict, None)
|
|
641
646
|
software_plus_study_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY] = \
|
|
@@ -655,3 +660,45 @@ def build_full_flat_config_dict(
|
|
|
655
660
|
full_flat_config_dict = software_plus_study_flat_config_dict
|
|
656
661
|
|
|
657
662
|
return full_flat_config_dict
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _push_global_settings_into_top_host(
|
|
666
|
+
a_full_nested_hosts_dict: Dict[str, Any],
|
|
667
|
+
a_software_plus_study_flat_config_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
668
|
+
"""Push global settings from flat config into top-level host in nested hosts dict.
|
|
669
|
+
|
|
670
|
+
Parameters
|
|
671
|
+
----------
|
|
672
|
+
a_full_nested_hosts_dict : Dict[str, Any]
|
|
673
|
+
Nested hosts dictionary to update.
|
|
674
|
+
a_software_plus_study_flat_config_dict : Dict[str, Any]
|
|
675
|
+
Flat configuration dictionary containing global settings.
|
|
676
|
+
|
|
677
|
+
Returns
|
|
678
|
+
-------
|
|
679
|
+
Dict[str, Any]
|
|
680
|
+
Updated nested hosts dictionary with global settings added to top-level host.
|
|
681
|
+
|
|
682
|
+
Raises
|
|
683
|
+
------
|
|
684
|
+
ValueError
|
|
685
|
+
If there is not exactly one top-level host in the nested hosts dictionary.
|
|
686
|
+
"""
|
|
687
|
+
result = deepcopy_dict(a_full_nested_hosts_dict)
|
|
688
|
+
|
|
689
|
+
# get the top level host(s) in full_nested_hosts_dict
|
|
690
|
+
# (should be only one because it is nested)
|
|
691
|
+
top_level_host_keys = list(a_full_nested_hosts_dict[HOST_TYPE_SPECIFIC_METADATA_KEY].keys())
|
|
692
|
+
if len(top_level_host_keys) != 1:
|
|
693
|
+
raise ValueError(f"Expected exactly one top-level key in "
|
|
694
|
+
f"full_nested_hosts_dict but found: {top_level_host_keys}")
|
|
695
|
+
top_level_host_key = top_level_host_keys[0]
|
|
696
|
+
|
|
697
|
+
# check for each top-level setting from the software+study dictionary
|
|
698
|
+
# and add it under the top level host key in a_full_nested_hosts_dict
|
|
699
|
+
for curr_setting_key in GLOBAL_SETTINGS_KEYS:
|
|
700
|
+
if curr_setting_key in a_software_plus_study_flat_config_dict:
|
|
701
|
+
result[HOST_TYPE_SPECIFIC_METADATA_KEY][top_level_host_key][curr_setting_key] = \
|
|
702
|
+
a_software_plus_study_flat_config_dict[curr_setting_key]
|
|
703
|
+
|
|
704
|
+
return result
|
metameq/src/metadata_extender.py
CHANGED
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from typing import List, Dict, Optional, Tuple, Any
|
|
8
8
|
from metameq.src.util import extract_config_dict, \
|
|
9
|
-
|
|
9
|
+
validate_required_columns_exist, get_extension, \
|
|
10
10
|
load_df_with_best_fit_encoding, update_metadata_df_field, \
|
|
11
11
|
HOSTTYPE_SHORTHAND_KEY, SAMPLETYPE_SHORTHAND_KEY, \
|
|
12
12
|
QC_NOTE_KEY, METADATA_FIELDS_KEY, HOST_TYPE_SPECIFIC_METADATA_KEY, \
|
|
@@ -301,13 +301,13 @@ def write_extended_metadata(
|
|
|
301
301
|
# extract the extension from the raw_metadata_fp file path
|
|
302
302
|
extension = os.path.splitext(raw_metadata_fp)[1]
|
|
303
303
|
if extension == ".csv":
|
|
304
|
-
raw_metadata_df = load_df_with_best_fit_encoding(raw_metadata_fp, ",")
|
|
304
|
+
raw_metadata_df = load_df_with_best_fit_encoding(raw_metadata_fp, ",", str)
|
|
305
305
|
elif extension == ".txt":
|
|
306
|
-
raw_metadata_df = load_df_with_best_fit_encoding(raw_metadata_fp, "\t")
|
|
306
|
+
raw_metadata_df = load_df_with_best_fit_encoding(raw_metadata_fp, "\t", str)
|
|
307
307
|
elif extension == ".xlsx":
|
|
308
308
|
# NB: this loads (only) the first sheet of the input excel file.
|
|
309
309
|
# If needed, can expand with pandas.read_excel sheet_name parameter.
|
|
310
|
-
raw_metadata_df = pandas.read_excel(raw_metadata_fp)
|
|
310
|
+
raw_metadata_df = pandas.read_excel(raw_metadata_fp, dtype=str)
|
|
311
311
|
else:
|
|
312
312
|
raise ValueError("Unrecognized input file extension; "
|
|
313
313
|
"must be .csv, .txt, or .xlsx")
|
|
@@ -451,7 +451,7 @@ def extend_metadata_df(
|
|
|
451
451
|
full_flat_config_dict = build_full_flat_config_dict(
|
|
452
452
|
study_specific_config_dict, software_config_dict, stds_fp)
|
|
453
453
|
|
|
454
|
-
needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
|
|
454
|
+
needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
|
|
455
455
|
(SAMPLETYPE_SHORTHAND_KEY, SAMPLETYPE_COL_OPTIONS_KEY)]
|
|
456
456
|
for curr_key, curr_options_key in needed_cols:
|
|
457
457
|
if curr_key not in raw_metadata_df.columns:
|
|
@@ -485,7 +485,7 @@ def _get_specified_column_name(
|
|
|
485
485
|
The metadata DataFrame to check.
|
|
486
486
|
config_dict : Dict[str, Any], default=None
|
|
487
487
|
Configuration dictionary. If provided, may contain a list of possible
|
|
488
|
-
column names under the key specified by col_options_key.
|
|
488
|
+
column names under the key specified by col_options_key.
|
|
489
489
|
If None, defaults to values from the main config.yml file.
|
|
490
490
|
Returns
|
|
491
491
|
-------
|
|
@@ -503,7 +503,8 @@ def _get_specified_column_name(
|
|
|
503
503
|
found_name = col_name
|
|
504
504
|
break
|
|
505
505
|
|
|
506
|
-
return found_name
|
|
506
|
+
return found_name
|
|
507
|
+
|
|
507
508
|
|
|
508
509
|
def write_metadata_results(
|
|
509
510
|
metadata_df: pandas.DataFrame,
|
|
@@ -738,12 +739,6 @@ def _generate_metadata_for_host_types(
|
|
|
738
739
|
- The processed DataFrame with specific metadata added to each sample of each host type
|
|
739
740
|
- A list of validation messages
|
|
740
741
|
"""
|
|
741
|
-
# gather global settings
|
|
742
|
-
settings_dict = {DEFAULT_KEY: full_flat_config_dict.get(DEFAULT_KEY),
|
|
743
|
-
LEAVE_REQUIREDS_BLANK_KEY:
|
|
744
|
-
full_flat_config_dict.get(LEAVE_REQUIREDS_BLANK_KEY),
|
|
745
|
-
OVERWRITE_NON_NANS_KEY:
|
|
746
|
-
full_flat_config_dict.get(OVERWRITE_NON_NANS_KEY)}
|
|
747
742
|
|
|
748
743
|
validation_msgs = []
|
|
749
744
|
host_type_dfs = []
|
|
@@ -751,7 +746,7 @@ def _generate_metadata_for_host_types(
|
|
|
751
746
|
host_type_shorthands = pandas.unique(metadata_df[HOSTTYPE_SHORTHAND_KEY])
|
|
752
747
|
for curr_host_type_shorthand in host_type_shorthands:
|
|
753
748
|
concatted_dfs, curr_validation_msgs = _generate_metadata_for_a_host_type(
|
|
754
|
-
metadata_df, curr_host_type_shorthand,
|
|
749
|
+
metadata_df, curr_host_type_shorthand, full_flat_config_dict)
|
|
755
750
|
|
|
756
751
|
host_type_dfs.append(concatted_dfs)
|
|
757
752
|
validation_msgs.extend(curr_validation_msgs)
|
|
@@ -767,7 +762,7 @@ def _generate_metadata_for_host_types(
|
|
|
767
762
|
# NB: passing in the same dict twice here is not a mistake, just a
|
|
768
763
|
# convenience since we don't have a more specific dict at this point.
|
|
769
764
|
output_df = _fill_na_if_default(
|
|
770
|
-
output_df,
|
|
765
|
+
output_df, full_flat_config_dict)
|
|
771
766
|
|
|
772
767
|
# TODO: this is setting a value in the output; should it be centralized
|
|
773
768
|
# so it is easy to find?
|
|
@@ -779,7 +774,6 @@ def _generate_metadata_for_host_types(
|
|
|
779
774
|
def _generate_metadata_for_a_host_type(
|
|
780
775
|
metadata_df: pandas.DataFrame,
|
|
781
776
|
a_host_type: str,
|
|
782
|
-
settings_dict: Dict[str, Any],
|
|
783
777
|
full_flat_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
|
|
784
778
|
"""Generate metadata df for samples with a specific host type.
|
|
785
779
|
|
|
@@ -790,8 +784,6 @@ def _generate_metadata_for_a_host_type(
|
|
|
790
784
|
the columns in REQUIRED_RAW_METADATA_FIELDS.
|
|
791
785
|
a_host_type : str
|
|
792
786
|
The specific host type for which to process samples.
|
|
793
|
-
settings_dict : Dict[str, Any]
|
|
794
|
-
Dictionary containing global settings for default/nan/etc.
|
|
795
787
|
full_flat_config_dict : Dict[str, Any]
|
|
796
788
|
Fully combined flat-host-type config dictionary.
|
|
797
789
|
|
|
@@ -814,16 +806,11 @@ def _generate_metadata_for_a_host_type(
|
|
|
814
806
|
# for these samples but do not error out; move on to the next host type
|
|
815
807
|
update_metadata_df_field(
|
|
816
808
|
host_type_df, QC_NOTE_KEY, "invalid host_type")
|
|
817
|
-
# host_type_df[QC_NOTE_KEY] = "invalid host_type"
|
|
818
809
|
concatted_df = host_type_df
|
|
819
810
|
else:
|
|
820
811
|
# gather host-type-specific settings and overwrite the global settings with them, if any
|
|
821
812
|
a_host_type_config_dict = \
|
|
822
813
|
full_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY][a_host_type]
|
|
823
|
-
global_plus_host_settings_dict = deepcopy_dict(settings_dict)
|
|
824
|
-
# if this host type has a default value for empty fields, use it; otherwise, use the global default
|
|
825
|
-
global_plus_host_settings_dict[DEFAULT_KEY] = a_host_type_config_dict.get(
|
|
826
|
-
DEFAULT_KEY, global_plus_host_settings_dict[DEFAULT_KEY])
|
|
827
814
|
|
|
828
815
|
dfs_to_concat = []
|
|
829
816
|
# loop through each sample type in the metadata for this host type
|
|
@@ -833,8 +820,7 @@ def _generate_metadata_for_a_host_type(
|
|
|
833
820
|
# generate the specific metadata for this sample type *in this host type*
|
|
834
821
|
curr_sample_type_df, curr_validation_msgs = \
|
|
835
822
|
_generate_metadata_for_a_sample_type_in_a_host_type(
|
|
836
|
-
host_type_df, curr_sample_type,
|
|
837
|
-
a_host_type_config_dict)
|
|
823
|
+
host_type_df, curr_sample_type, a_host_type_config_dict)
|
|
838
824
|
|
|
839
825
|
dfs_to_concat.append(curr_sample_type_df)
|
|
840
826
|
validation_msgs.extend(curr_validation_msgs)
|
|
@@ -851,7 +837,6 @@ def _generate_metadata_for_a_host_type(
|
|
|
851
837
|
def _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
852
838
|
host_type_metadata_df: pandas.DataFrame,
|
|
853
839
|
a_sample_type: str,
|
|
854
|
-
global_plus_host_settings_dict: Dict[str, Any],
|
|
855
840
|
a_host_type_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
|
|
856
841
|
"""Generate metadata df for samples with a specific sample type within a specific host type.
|
|
857
842
|
|
|
@@ -861,8 +846,6 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
|
861
846
|
DataFrame containing metadata samples for a specific host type.
|
|
862
847
|
a_sample_type : str
|
|
863
848
|
The sample type to process.
|
|
864
|
-
global_plus_host_settings_dict : Dict[str, Any]
|
|
865
|
-
Dictionary containing default/nan/etc settings for current context.
|
|
866
849
|
a_host_type_config_dict : Dict[str, Any]
|
|
867
850
|
Dictionary containing config for this host type.
|
|
868
851
|
|
|
@@ -901,19 +884,19 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
|
901
884
|
sample_type_df = _update_metadata_from_dict(
|
|
902
885
|
sample_type_df, full_sample_type_metadata_fields_dict,
|
|
903
886
|
dict_is_metadata_fields=True,
|
|
904
|
-
overwrite_non_nans=
|
|
887
|
+
overwrite_non_nans=a_host_type_config_dict[OVERWRITE_NON_NANS_KEY])
|
|
905
888
|
|
|
906
889
|
# for fields that are required but not yet filled, replace the placeholder with
|
|
907
890
|
# either an indicator that it should be blank or else
|
|
908
891
|
# fill with NA (replaced with default just below), based on config setting
|
|
909
|
-
leave_reqs_blank =
|
|
892
|
+
leave_reqs_blank = a_host_type_config_dict[LEAVE_REQUIREDS_BLANK_KEY]
|
|
910
893
|
reqs_val = LEAVE_BLANK_VAL if leave_reqs_blank else np.nan
|
|
911
894
|
sample_type_df.replace(
|
|
912
895
|
to_replace=REQ_PLACEHOLDER, value=reqs_val, inplace=True)
|
|
913
896
|
|
|
914
897
|
# fill NAs with appropriate default value if any is set
|
|
915
898
|
sample_type_df = _fill_na_if_default(
|
|
916
|
-
sample_type_df,
|
|
899
|
+
sample_type_df, a_host_type_config_dict)
|
|
917
900
|
|
|
918
901
|
# validate the metadata df based on the specific requirements
|
|
919
902
|
# for this host+sample type
|
|
@@ -1095,7 +1078,6 @@ def _update_metadata_from_metadata_fields_dict(
|
|
|
1095
1078
|
# fill NAs with default value if any is set
|
|
1096
1079
|
def _fill_na_if_default(
|
|
1097
1080
|
metadata_df: pandas.DataFrame,
|
|
1098
|
-
specific_dict: Dict[str, Any],
|
|
1099
1081
|
settings_dict: Dict[str, Any]) -> pandas.DataFrame:
|
|
1100
1082
|
"""Fill NaN values in metadata df with default values if available.
|
|
1101
1083
|
|
|
@@ -1103,24 +1085,20 @@ def _fill_na_if_default(
|
|
|
1103
1085
|
----------
|
|
1104
1086
|
metadata_df : pandas.DataFrame
|
|
1105
1087
|
The metadata DataFrame to process.
|
|
1106
|
-
specific_dict : Dict[str, Any]
|
|
1107
|
-
Dictionary containing context-specific settings. Will be used first as a source of default values.
|
|
1108
1088
|
settings_dict : Dict[str, Any]
|
|
1109
|
-
Dictionary containing
|
|
1110
|
-
source of default values if specific_dict does not contain a DEFAULT_KEY.
|
|
1089
|
+
Dictionary containing settings.
|
|
1111
1090
|
|
|
1112
1091
|
Returns
|
|
1113
1092
|
-------
|
|
1114
1093
|
pandas.DataFrame
|
|
1115
1094
|
The updated DataFrame with NaN values filled. Unchanged if no default values are set.
|
|
1116
1095
|
"""
|
|
1117
|
-
default_val =
|
|
1096
|
+
default_val = settings_dict.get(DEFAULT_KEY)
|
|
1118
1097
|
if default_val:
|
|
1119
1098
|
# TODO: this is setting a value in the output; should it be
|
|
1120
1099
|
# centralized so it is easy to find?
|
|
1121
1100
|
metadata_df = \
|
|
1122
1101
|
metadata_df.fillna(default_val)
|
|
1123
|
-
# metadata_df.astype("string").fillna(default_val)
|
|
1124
1102
|
|
|
1125
1103
|
return metadata_df
|
|
1126
1104
|
|
metameq/src/util.py
CHANGED
|
@@ -51,6 +51,13 @@ REQUIRED_RAW_METADATA_FIELDS = [SAMPLE_NAME_KEY,
|
|
|
51
51
|
SAMPLETYPE_SHORTHAND_KEY]
|
|
52
52
|
|
|
53
53
|
|
|
54
|
+
GLOBAL_SETTINGS_KEYS = [
|
|
55
|
+
DEFAULT_KEY,
|
|
56
|
+
LEAVE_REQUIREDS_BLANK_KEY,
|
|
57
|
+
OVERWRITE_NON_NANS_KEY
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
54
61
|
def extract_config_dict(
|
|
55
62
|
config_fp: Union[str, None]) -> dict:
|
|
56
63
|
"""Extract configuration dictionary from a YAML file.
|
|
@@ -17,7 +17,8 @@ from metameq.src.metadata_configurator import \
|
|
|
17
17
|
_id_sample_type_definition, \
|
|
18
18
|
update_wip_metadata_dict, \
|
|
19
19
|
build_full_flat_config_dict, \
|
|
20
|
-
_resolve_sample_type_aliases_and_bases
|
|
20
|
+
_resolve_sample_type_aliases_and_bases, \
|
|
21
|
+
_push_global_settings_into_top_host
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class TestMetadataConfigurator(TestCase):
|
|
@@ -3847,6 +3848,9 @@ class TestMetadataConfigurator(TestCase):
|
|
|
3847
3848
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
3848
3849
|
# base: top level in test_standards.yml, no default
|
|
3849
3850
|
"base": {
|
|
3851
|
+
DEFAULT_KEY: "software_default",
|
|
3852
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
3853
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
3850
3854
|
METADATA_FIELDS_KEY: {
|
|
3851
3855
|
# sample_name defined at base level
|
|
3852
3856
|
"sample_name": {
|
|
@@ -3865,6 +3869,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
3865
3869
|
"host_associated": {
|
|
3866
3870
|
# default defined at host_associated level
|
|
3867
3871
|
DEFAULT_KEY: "not provided",
|
|
3872
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
3873
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
3868
3874
|
METADATA_FIELDS_KEY: {
|
|
3869
3875
|
# description defined at host_associated level
|
|
3870
3876
|
"description": {
|
|
@@ -3919,6 +3925,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
3919
3925
|
"human": {
|
|
3920
3926
|
# default inherited from host_associated
|
|
3921
3927
|
DEFAULT_KEY: "not provided",
|
|
3928
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
3929
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
3922
3930
|
METADATA_FIELDS_KEY: {
|
|
3923
3931
|
# custom_field added from study_specific_metadata
|
|
3924
3932
|
"custom_field": {
|
|
@@ -4037,6 +4045,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4037
4045
|
"mouse": {
|
|
4038
4046
|
# default inherited from host_associated
|
|
4039
4047
|
DEFAULT_KEY: "not provided",
|
|
4048
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4049
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4040
4050
|
METADATA_FIELDS_KEY: {
|
|
4041
4051
|
# description inherited from host_associated (not overridden)
|
|
4042
4052
|
"description": {
|
|
@@ -4103,6 +4113,7 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4103
4113
|
}
|
|
4104
4114
|
}
|
|
4105
4115
|
}
|
|
4116
|
+
|
|
4106
4117
|
self.assertEqual(expected, result)
|
|
4107
4118
|
|
|
4108
4119
|
def test_build_full_flat_config_dict_without_study_config(self):
|
|
@@ -4130,6 +4141,9 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4130
4141
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4131
4142
|
# base: top level, no default, just sample_name/sample_type
|
|
4132
4143
|
"base": {
|
|
4144
|
+
DEFAULT_KEY: "software_default",
|
|
4145
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4146
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4133
4147
|
METADATA_FIELDS_KEY: {
|
|
4134
4148
|
"sample_name": {
|
|
4135
4149
|
REQUIRED_KEY: True,
|
|
@@ -4145,6 +4159,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4145
4159
|
# host_associated: inherits from base, adds default and description
|
|
4146
4160
|
"host_associated": {
|
|
4147
4161
|
DEFAULT_KEY: "not provided",
|
|
4162
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4163
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4148
4164
|
METADATA_FIELDS_KEY: {
|
|
4149
4165
|
"description": {
|
|
4150
4166
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4194,6 +4210,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4194
4210
|
# human: inherits from host_associated, overrides description
|
|
4195
4211
|
"human": {
|
|
4196
4212
|
DEFAULT_KEY: "not provided",
|
|
4213
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4214
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4197
4215
|
METADATA_FIELDS_KEY: {
|
|
4198
4216
|
"description": {
|
|
4199
4217
|
DEFAULT_KEY: "human sample",
|
|
@@ -4291,6 +4309,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4291
4309
|
# mouse: inherits from host_associated, keeps parent description
|
|
4292
4310
|
"mouse": {
|
|
4293
4311
|
DEFAULT_KEY: "not provided",
|
|
4312
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4313
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4294
4314
|
METADATA_FIELDS_KEY: {
|
|
4295
4315
|
"description": {
|
|
4296
4316
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4395,6 +4415,12 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4395
4415
|
# Flattened host types
|
|
4396
4416
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4397
4417
|
"base": {
|
|
4418
|
+
# default from study_config overrides software_config
|
|
4419
|
+
DEFAULT_KEY: "study_default",
|
|
4420
|
+
# leave_requireds_blank from study_config overrides software_config
|
|
4421
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4422
|
+
# overwrite_non_nans from software_config (not overridden by study)
|
|
4423
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4398
4424
|
METADATA_FIELDS_KEY: {
|
|
4399
4425
|
"sample_name": {
|
|
4400
4426
|
REQUIRED_KEY: True,
|
|
@@ -4409,6 +4435,10 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4409
4435
|
},
|
|
4410
4436
|
"host_associated": {
|
|
4411
4437
|
DEFAULT_KEY: "not provided",
|
|
4438
|
+
# leave_requireds_blank from study_config overrides software_config
|
|
4439
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4440
|
+
# overwrite_non_nans from software_config (not overridden by study)
|
|
4441
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4412
4442
|
METADATA_FIELDS_KEY: {
|
|
4413
4443
|
"description": {
|
|
4414
4444
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4457,6 +4487,10 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4457
4487
|
},
|
|
4458
4488
|
"human": {
|
|
4459
4489
|
DEFAULT_KEY: "not provided",
|
|
4490
|
+
# leave_requireds_blank from study_config overrides software_config
|
|
4491
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4492
|
+
# overwrite_non_nans from software_config (not overridden by study)
|
|
4493
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4460
4494
|
METADATA_FIELDS_KEY: {
|
|
4461
4495
|
"description": {
|
|
4462
4496
|
DEFAULT_KEY: "human sample",
|
|
@@ -4553,6 +4587,10 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4553
4587
|
},
|
|
4554
4588
|
"mouse": {
|
|
4555
4589
|
DEFAULT_KEY: "not provided",
|
|
4590
|
+
# leave_requireds_blank from study_config overrides software_config
|
|
4591
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4592
|
+
# overwrite_non_nans from software_config (not overridden by study)
|
|
4593
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4556
4594
|
METADATA_FIELDS_KEY: {
|
|
4557
4595
|
"description": {
|
|
4558
4596
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4649,6 +4687,9 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4649
4687
|
# Flattened host types
|
|
4650
4688
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4651
4689
|
"base": {
|
|
4690
|
+
DEFAULT_KEY: "not applicable",
|
|
4691
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
4692
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4652
4693
|
METADATA_FIELDS_KEY: {
|
|
4653
4694
|
"sample_name": {
|
|
4654
4695
|
REQUIRED_KEY: True,
|
|
@@ -4663,6 +4704,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4663
4704
|
},
|
|
4664
4705
|
"host_associated": {
|
|
4665
4706
|
DEFAULT_KEY: "not provided",
|
|
4707
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
4708
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4666
4709
|
METADATA_FIELDS_KEY: {
|
|
4667
4710
|
"description": {
|
|
4668
4711
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4711,6 +4754,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4711
4754
|
},
|
|
4712
4755
|
"human": {
|
|
4713
4756
|
DEFAULT_KEY: "not provided",
|
|
4757
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
4758
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4714
4759
|
METADATA_FIELDS_KEY: {
|
|
4715
4760
|
"description": {
|
|
4716
4761
|
DEFAULT_KEY: "human sample",
|
|
@@ -4807,6 +4852,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4807
4852
|
},
|
|
4808
4853
|
"mouse": {
|
|
4809
4854
|
DEFAULT_KEY: "not provided",
|
|
4855
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
4856
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4810
4857
|
METADATA_FIELDS_KEY: {
|
|
4811
4858
|
"description": {
|
|
4812
4859
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4867,4 +4914,140 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4867
4914
|
}
|
|
4868
4915
|
}
|
|
4869
4916
|
}
|
|
4917
|
+
|
|
4918
|
+
self.assertEqual(expected, result)
|
|
4919
|
+
|
|
4920
|
+
# Tests for _push_global_settings_into_top_host
|
|
4921
|
+
|
|
4922
|
+
def test__push_global_settings_into_top_host_single_setting(self):
|
|
4923
|
+
"""Test pushing a single global setting into the top-level host."""
|
|
4924
|
+
nested_hosts_dict = {
|
|
4925
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4926
|
+
"base": {
|
|
4927
|
+
METADATA_FIELDS_KEY: {
|
|
4928
|
+
"field1": {TYPE_KEY: "string"}
|
|
4929
|
+
}
|
|
4930
|
+
}
|
|
4931
|
+
}
|
|
4932
|
+
}
|
|
4933
|
+
flat_config_dict = {
|
|
4934
|
+
DEFAULT_KEY: "custom_default"
|
|
4935
|
+
}
|
|
4936
|
+
|
|
4937
|
+
expected = {
|
|
4938
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4939
|
+
"base": {
|
|
4940
|
+
DEFAULT_KEY: "custom_default",
|
|
4941
|
+
METADATA_FIELDS_KEY: {
|
|
4942
|
+
"field1": {TYPE_KEY: "string"}
|
|
4943
|
+
}
|
|
4944
|
+
}
|
|
4945
|
+
}
|
|
4946
|
+
}
|
|
4947
|
+
|
|
4948
|
+
result = _push_global_settings_into_top_host(
|
|
4949
|
+
nested_hosts_dict, flat_config_dict)
|
|
4950
|
+
|
|
4951
|
+
self.assertEqual(expected, result)
|
|
4952
|
+
# Original should be unchanged
|
|
4953
|
+
self.assertNotIn(
|
|
4954
|
+
DEFAULT_KEY,
|
|
4955
|
+
nested_hosts_dict[HOST_TYPE_SPECIFIC_METADATA_KEY]["base"])
|
|
4956
|
+
|
|
4957
|
+
def test__push_global_settings_into_top_host_multiple_settings(self):
|
|
4958
|
+
"""Test pushing multiple global settings into the top-level host."""
|
|
4959
|
+
nested_hosts_dict = {
|
|
4960
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4961
|
+
"base": {
|
|
4962
|
+
METADATA_FIELDS_KEY: {
|
|
4963
|
+
"field1": {TYPE_KEY: "string"}
|
|
4964
|
+
}
|
|
4965
|
+
}
|
|
4966
|
+
}
|
|
4967
|
+
}
|
|
4968
|
+
flat_config_dict = {
|
|
4969
|
+
DEFAULT_KEY: "custom_default",
|
|
4970
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4971
|
+
OVERWRITE_NON_NANS_KEY: True
|
|
4972
|
+
}
|
|
4973
|
+
|
|
4974
|
+
expected = {
|
|
4975
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4976
|
+
"base": {
|
|
4977
|
+
DEFAULT_KEY: "custom_default",
|
|
4978
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4979
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4980
|
+
METADATA_FIELDS_KEY: {
|
|
4981
|
+
"field1": {TYPE_KEY: "string"}
|
|
4982
|
+
}
|
|
4983
|
+
}
|
|
4984
|
+
}
|
|
4985
|
+
}
|
|
4986
|
+
|
|
4987
|
+
result = _push_global_settings_into_top_host(
|
|
4988
|
+
nested_hosts_dict, flat_config_dict)
|
|
4989
|
+
|
|
4870
4990
|
self.assertEqual(expected, result)
|
|
4991
|
+
|
|
4992
|
+
def test__push_global_settings_into_top_host_no_settings(self):
|
|
4993
|
+
"""Test that function returns copy when no global settings present."""
|
|
4994
|
+
nested_hosts_dict = {
|
|
4995
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4996
|
+
"base": {
|
|
4997
|
+
METADATA_FIELDS_KEY: {
|
|
4998
|
+
"field1": {TYPE_KEY: "string"}
|
|
4999
|
+
}
|
|
5000
|
+
}
|
|
5001
|
+
}
|
|
5002
|
+
}
|
|
5003
|
+
flat_config_dict = {
|
|
5004
|
+
"some_other_key": "value"
|
|
5005
|
+
}
|
|
5006
|
+
|
|
5007
|
+
expected = {
|
|
5008
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
5009
|
+
"base": {
|
|
5010
|
+
METADATA_FIELDS_KEY: {
|
|
5011
|
+
"field1": {TYPE_KEY: "string"}
|
|
5012
|
+
}
|
|
5013
|
+
}
|
|
5014
|
+
}
|
|
5015
|
+
}
|
|
5016
|
+
|
|
5017
|
+
result = _push_global_settings_into_top_host(
|
|
5018
|
+
nested_hosts_dict, flat_config_dict)
|
|
5019
|
+
|
|
5020
|
+
self.assertEqual(expected, result)
|
|
5021
|
+
|
|
5022
|
+
def test__push_global_settings_into_top_host_raises_on_zero_hosts(self):
|
|
5023
|
+
"""Test that ValueError is raised when no top-level hosts exist."""
|
|
5024
|
+
nested_hosts_dict = {
|
|
5025
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {}
|
|
5026
|
+
}
|
|
5027
|
+
flat_config_dict = {
|
|
5028
|
+
DEFAULT_KEY: "custom_default"
|
|
5029
|
+
}
|
|
5030
|
+
|
|
5031
|
+
with self.assertRaisesRegex(
|
|
5032
|
+
ValueError,
|
|
5033
|
+
r"Expected exactly one top-level key.*found: \[\]"):
|
|
5034
|
+
_push_global_settings_into_top_host(
|
|
5035
|
+
nested_hosts_dict, flat_config_dict)
|
|
5036
|
+
|
|
5037
|
+
def test__push_global_settings_into_top_host_raises_on_multiple_hosts(self):
|
|
5038
|
+
"""Test that ValueError is raised when multiple top-level hosts exist."""
|
|
5039
|
+
nested_hosts_dict = {
|
|
5040
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
5041
|
+
"host1": {METADATA_FIELDS_KEY: {}},
|
|
5042
|
+
"host2": {METADATA_FIELDS_KEY: {}}
|
|
5043
|
+
}
|
|
5044
|
+
}
|
|
5045
|
+
flat_config_dict = {
|
|
5046
|
+
DEFAULT_KEY: "custom_default"
|
|
5047
|
+
}
|
|
5048
|
+
|
|
5049
|
+
with self.assertRaisesRegex(
|
|
5050
|
+
ValueError,
|
|
5051
|
+
r"Expected exactly one top-level key"):
|
|
5052
|
+
_push_global_settings_into_top_host(
|
|
5053
|
+
nested_hosts_dict, flat_config_dict)
|