metameq 2026.2.1__tar.gz → 2026.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {metameq-2026.2.1/metameq.egg-info → metameq-2026.2.2}/PKG-INFO +1 -1
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/_version.py +3 -3
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_configurator.py +53 -6
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_extender.py +13 -35
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/util.py +7 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_configurator.py +184 -1
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_extender.py +106 -91
- {metameq-2026.2.1 → metameq-2026.2.2/metameq.egg-info}/PKG-INFO +1 -1
- {metameq-2026.2.1 → metameq-2026.2.2}/.gitattributes +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/.github/workflows/main.yaml +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/.gitignore +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/README.md +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/assets/metameq.png +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/assets/metameq_dark.svg +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/assets/metameq_light.svg +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/assets/metameq_medium.png +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/environment.yml +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/__init__.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/config/__init__.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/config/config.yml +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/config/standards.yml +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/__init__.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/__main__.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_merger.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_transformers.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_validator.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/__init__.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/data/invalid.yml +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/data/test_config.yml +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_merger.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_transformers.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_validator.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_util.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/SOURCES.txt +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/dependency_links.txt +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/entry_points.txt +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/requires.txt +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/top_level.txt +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/setup.cfg +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/setup.py +0 -0
- {metameq-2026.2.1 → metameq-2026.2.2}/versioneer.py +0 -0
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2026-02-
|
|
11
|
+
"date": "2026-02-02T16:43:52-0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "2026.02.
|
|
14
|
+
"full-revisionid": "4fe1396e1007820dc7a4bdb58708fff0df6b9a57",
|
|
15
|
+
"version": "2026.02.2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -5,7 +5,7 @@ from metameq.src.util import extract_config_dict, extract_stds_config, \
|
|
|
5
5
|
HOST_TYPE_SPECIFIC_METADATA_KEY, \
|
|
6
6
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY, ALIAS_KEY, BASE_TYPE_KEY, \
|
|
7
7
|
DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
|
|
8
|
-
SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
|
|
8
|
+
SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE, GLOBAL_SETTINGS_KEYS
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def combine_stds_and_study_config(
|
|
@@ -257,11 +257,12 @@ def _combine_base_and_added_host_type(
|
|
|
257
257
|
host_type_wip_nested_dict = \
|
|
258
258
|
deepcopy_dict(host_type_base_dict)
|
|
259
259
|
|
|
260
|
-
# look for
|
|
261
|
-
#
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
260
|
+
# look for global settings in the add dict for this host; if
|
|
261
|
+
# any exists, add it to the wip dict (ok to overwrite existing)
|
|
262
|
+
for curr_global_setting_key in GLOBAL_SETTINGS_KEYS:
|
|
263
|
+
if curr_global_setting_key in host_type_add_dict:
|
|
264
|
+
host_type_wip_nested_dict[curr_global_setting_key] = \
|
|
265
|
+
host_type_add_dict.get(curr_global_setting_key)
|
|
265
266
|
|
|
266
267
|
# combine add metadata fields with the wip metadata fields
|
|
267
268
|
# for the current host type and assign to wip if not empty
|
|
@@ -636,6 +637,10 @@ def build_full_flat_config_dict(
|
|
|
636
637
|
# since the software config doesn't include any host type specific info
|
|
637
638
|
full_nested_hosts_dict = extract_stds_config(stds_fp)
|
|
638
639
|
|
|
640
|
+
full_nested_hosts_dict = _push_global_settings_into_top_host(
|
|
641
|
+
full_nested_hosts_dict,
|
|
642
|
+
software_plus_study_flat_config_dict)
|
|
643
|
+
|
|
639
644
|
full_flat_hosts_dict = flatten_nested_stds_dict(
|
|
640
645
|
full_nested_hosts_dict, None)
|
|
641
646
|
software_plus_study_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY] = \
|
|
@@ -655,3 +660,45 @@ def build_full_flat_config_dict(
|
|
|
655
660
|
full_flat_config_dict = software_plus_study_flat_config_dict
|
|
656
661
|
|
|
657
662
|
return full_flat_config_dict
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _push_global_settings_into_top_host(
|
|
666
|
+
a_full_nested_hosts_dict: Dict[str, Any],
|
|
667
|
+
a_software_plus_study_flat_config_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
668
|
+
"""Push global settings from flat config into top-level host in nested hosts dict.
|
|
669
|
+
|
|
670
|
+
Parameters
|
|
671
|
+
----------
|
|
672
|
+
a_full_nested_hosts_dict : Dict[str, Any]
|
|
673
|
+
Nested hosts dictionary to update.
|
|
674
|
+
a_software_plus_study_flat_config_dict : Dict[str, Any]
|
|
675
|
+
Flat configuration dictionary containing global settings.
|
|
676
|
+
|
|
677
|
+
Returns
|
|
678
|
+
-------
|
|
679
|
+
Dict[str, Any]
|
|
680
|
+
Updated nested hosts dictionary with global settings added to top-level host.
|
|
681
|
+
|
|
682
|
+
Raises
|
|
683
|
+
------
|
|
684
|
+
ValueError
|
|
685
|
+
If there is not exactly one top-level host in the nested hosts dictionary.
|
|
686
|
+
"""
|
|
687
|
+
result = deepcopy_dict(a_full_nested_hosts_dict)
|
|
688
|
+
|
|
689
|
+
# get the top level host(s) in full_nested_hosts_dict
|
|
690
|
+
# (should be only one because it is nested)
|
|
691
|
+
top_level_host_keys = list(a_full_nested_hosts_dict[HOST_TYPE_SPECIFIC_METADATA_KEY].keys())
|
|
692
|
+
if len(top_level_host_keys) != 1:
|
|
693
|
+
raise ValueError(f"Expected exactly one top-level key in "
|
|
694
|
+
f"full_nested_hosts_dict but found: {top_level_host_keys}")
|
|
695
|
+
top_level_host_key = top_level_host_keys[0]
|
|
696
|
+
|
|
697
|
+
# check for each top-level setting from the software+study dictionary
|
|
698
|
+
# and add it under the top level host key in a_full_nested_hosts_dict
|
|
699
|
+
for curr_setting_key in GLOBAL_SETTINGS_KEYS:
|
|
700
|
+
if curr_setting_key in a_software_plus_study_flat_config_dict:
|
|
701
|
+
result[HOST_TYPE_SPECIFIC_METADATA_KEY][top_level_host_key][curr_setting_key] = \
|
|
702
|
+
a_software_plus_study_flat_config_dict[curr_setting_key]
|
|
703
|
+
|
|
704
|
+
return result
|
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from typing import List, Dict, Optional, Tuple, Any
|
|
8
8
|
from metameq.src.util import extract_config_dict, \
|
|
9
|
-
|
|
9
|
+
validate_required_columns_exist, get_extension, \
|
|
10
10
|
load_df_with_best_fit_encoding, update_metadata_df_field, \
|
|
11
11
|
HOSTTYPE_SHORTHAND_KEY, SAMPLETYPE_SHORTHAND_KEY, \
|
|
12
12
|
QC_NOTE_KEY, METADATA_FIELDS_KEY, HOST_TYPE_SPECIFIC_METADATA_KEY, \
|
|
@@ -451,7 +451,7 @@ def extend_metadata_df(
|
|
|
451
451
|
full_flat_config_dict = build_full_flat_config_dict(
|
|
452
452
|
study_specific_config_dict, software_config_dict, stds_fp)
|
|
453
453
|
|
|
454
|
-
needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
|
|
454
|
+
needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
|
|
455
455
|
(SAMPLETYPE_SHORTHAND_KEY, SAMPLETYPE_COL_OPTIONS_KEY)]
|
|
456
456
|
for curr_key, curr_options_key in needed_cols:
|
|
457
457
|
if curr_key not in raw_metadata_df.columns:
|
|
@@ -485,7 +485,7 @@ def _get_specified_column_name(
|
|
|
485
485
|
The metadata DataFrame to check.
|
|
486
486
|
config_dict : Dict[str, Any], default=None
|
|
487
487
|
Configuration dictionary. If provided, may contain a list of possible
|
|
488
|
-
column names under the key specified by col_options_key.
|
|
488
|
+
column names under the key specified by col_options_key.
|
|
489
489
|
If None, defaults to values from the main config.yml file.
|
|
490
490
|
Returns
|
|
491
491
|
-------
|
|
@@ -503,7 +503,8 @@ def _get_specified_column_name(
|
|
|
503
503
|
found_name = col_name
|
|
504
504
|
break
|
|
505
505
|
|
|
506
|
-
return found_name
|
|
506
|
+
return found_name
|
|
507
|
+
|
|
507
508
|
|
|
508
509
|
def write_metadata_results(
|
|
509
510
|
metadata_df: pandas.DataFrame,
|
|
@@ -738,12 +739,6 @@ def _generate_metadata_for_host_types(
|
|
|
738
739
|
- The processed DataFrame with specific metadata added to each sample of each host type
|
|
739
740
|
- A list of validation messages
|
|
740
741
|
"""
|
|
741
|
-
# gather global settings
|
|
742
|
-
settings_dict = {DEFAULT_KEY: full_flat_config_dict.get(DEFAULT_KEY),
|
|
743
|
-
LEAVE_REQUIREDS_BLANK_KEY:
|
|
744
|
-
full_flat_config_dict.get(LEAVE_REQUIREDS_BLANK_KEY),
|
|
745
|
-
OVERWRITE_NON_NANS_KEY:
|
|
746
|
-
full_flat_config_dict.get(OVERWRITE_NON_NANS_KEY)}
|
|
747
742
|
|
|
748
743
|
validation_msgs = []
|
|
749
744
|
host_type_dfs = []
|
|
@@ -751,7 +746,7 @@ def _generate_metadata_for_host_types(
|
|
|
751
746
|
host_type_shorthands = pandas.unique(metadata_df[HOSTTYPE_SHORTHAND_KEY])
|
|
752
747
|
for curr_host_type_shorthand in host_type_shorthands:
|
|
753
748
|
concatted_dfs, curr_validation_msgs = _generate_metadata_for_a_host_type(
|
|
754
|
-
metadata_df, curr_host_type_shorthand,
|
|
749
|
+
metadata_df, curr_host_type_shorthand, full_flat_config_dict)
|
|
755
750
|
|
|
756
751
|
host_type_dfs.append(concatted_dfs)
|
|
757
752
|
validation_msgs.extend(curr_validation_msgs)
|
|
@@ -767,7 +762,7 @@ def _generate_metadata_for_host_types(
|
|
|
767
762
|
# NB: passing in the same dict twice here is not a mistake, just a
|
|
768
763
|
# convenience since we don't have a more specific dict at this point.
|
|
769
764
|
output_df = _fill_na_if_default(
|
|
770
|
-
output_df,
|
|
765
|
+
output_df, full_flat_config_dict)
|
|
771
766
|
|
|
772
767
|
# TODO: this is setting a value in the output; should it be centralized
|
|
773
768
|
# so it is easy to find?
|
|
@@ -779,7 +774,6 @@ def _generate_metadata_for_host_types(
|
|
|
779
774
|
def _generate_metadata_for_a_host_type(
|
|
780
775
|
metadata_df: pandas.DataFrame,
|
|
781
776
|
a_host_type: str,
|
|
782
|
-
settings_dict: Dict[str, Any],
|
|
783
777
|
full_flat_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
|
|
784
778
|
"""Generate metadata df for samples with a specific host type.
|
|
785
779
|
|
|
@@ -790,8 +784,6 @@ def _generate_metadata_for_a_host_type(
|
|
|
790
784
|
the columns in REQUIRED_RAW_METADATA_FIELDS.
|
|
791
785
|
a_host_type : str
|
|
792
786
|
The specific host type for which to process samples.
|
|
793
|
-
settings_dict : Dict[str, Any]
|
|
794
|
-
Dictionary containing global settings for default/nan/etc.
|
|
795
787
|
full_flat_config_dict : Dict[str, Any]
|
|
796
788
|
Fully combined flat-host-type config dictionary.
|
|
797
789
|
|
|
@@ -814,16 +806,11 @@ def _generate_metadata_for_a_host_type(
|
|
|
814
806
|
# for these samples but do not error out; move on to the next host type
|
|
815
807
|
update_metadata_df_field(
|
|
816
808
|
host_type_df, QC_NOTE_KEY, "invalid host_type")
|
|
817
|
-
# host_type_df[QC_NOTE_KEY] = "invalid host_type"
|
|
818
809
|
concatted_df = host_type_df
|
|
819
810
|
else:
|
|
820
811
|
# gather host-type-specific settings and overwrite the global settings with them, if any
|
|
821
812
|
a_host_type_config_dict = \
|
|
822
813
|
full_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY][a_host_type]
|
|
823
|
-
global_plus_host_settings_dict = deepcopy_dict(settings_dict)
|
|
824
|
-
# if this host type has a default value for empty fields, use it; otherwise, use the global default
|
|
825
|
-
global_plus_host_settings_dict[DEFAULT_KEY] = a_host_type_config_dict.get(
|
|
826
|
-
DEFAULT_KEY, global_plus_host_settings_dict[DEFAULT_KEY])
|
|
827
814
|
|
|
828
815
|
dfs_to_concat = []
|
|
829
816
|
# loop through each sample type in the metadata for this host type
|
|
@@ -833,8 +820,7 @@ def _generate_metadata_for_a_host_type(
|
|
|
833
820
|
# generate the specific metadata for this sample type *in this host type*
|
|
834
821
|
curr_sample_type_df, curr_validation_msgs = \
|
|
835
822
|
_generate_metadata_for_a_sample_type_in_a_host_type(
|
|
836
|
-
host_type_df, curr_sample_type,
|
|
837
|
-
a_host_type_config_dict)
|
|
823
|
+
host_type_df, curr_sample_type, a_host_type_config_dict)
|
|
838
824
|
|
|
839
825
|
dfs_to_concat.append(curr_sample_type_df)
|
|
840
826
|
validation_msgs.extend(curr_validation_msgs)
|
|
@@ -851,7 +837,6 @@ def _generate_metadata_for_a_host_type(
|
|
|
851
837
|
def _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
852
838
|
host_type_metadata_df: pandas.DataFrame,
|
|
853
839
|
a_sample_type: str,
|
|
854
|
-
global_plus_host_settings_dict: Dict[str, Any],
|
|
855
840
|
a_host_type_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
|
|
856
841
|
"""Generate metadata df for samples with a specific sample type within a specific host type.
|
|
857
842
|
|
|
@@ -861,8 +846,6 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
|
861
846
|
DataFrame containing metadata samples for a specific host type.
|
|
862
847
|
a_sample_type : str
|
|
863
848
|
The sample type to process.
|
|
864
|
-
global_plus_host_settings_dict : Dict[str, Any]
|
|
865
|
-
Dictionary containing default/nan/etc settings for current context.
|
|
866
849
|
a_host_type_config_dict : Dict[str, Any]
|
|
867
850
|
Dictionary containing config for this host type.
|
|
868
851
|
|
|
@@ -901,19 +884,19 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
|
901
884
|
sample_type_df = _update_metadata_from_dict(
|
|
902
885
|
sample_type_df, full_sample_type_metadata_fields_dict,
|
|
903
886
|
dict_is_metadata_fields=True,
|
|
904
|
-
overwrite_non_nans=
|
|
887
|
+
overwrite_non_nans=a_host_type_config_dict[OVERWRITE_NON_NANS_KEY])
|
|
905
888
|
|
|
906
889
|
# for fields that are required but not yet filled, replace the placeholder with
|
|
907
890
|
# either an indicator that it should be blank or else
|
|
908
891
|
# fill with NA (replaced with default just below), based on config setting
|
|
909
|
-
leave_reqs_blank =
|
|
892
|
+
leave_reqs_blank = a_host_type_config_dict[LEAVE_REQUIREDS_BLANK_KEY]
|
|
910
893
|
reqs_val = LEAVE_BLANK_VAL if leave_reqs_blank else np.nan
|
|
911
894
|
sample_type_df.replace(
|
|
912
895
|
to_replace=REQ_PLACEHOLDER, value=reqs_val, inplace=True)
|
|
913
896
|
|
|
914
897
|
# fill NAs with appropriate default value if any is set
|
|
915
898
|
sample_type_df = _fill_na_if_default(
|
|
916
|
-
sample_type_df,
|
|
899
|
+
sample_type_df, a_host_type_config_dict)
|
|
917
900
|
|
|
918
901
|
# validate the metadata df based on the specific requirements
|
|
919
902
|
# for this host+sample type
|
|
@@ -1095,7 +1078,6 @@ def _update_metadata_from_metadata_fields_dict(
|
|
|
1095
1078
|
# fill NAs with default value if any is set
|
|
1096
1079
|
def _fill_na_if_default(
|
|
1097
1080
|
metadata_df: pandas.DataFrame,
|
|
1098
|
-
specific_dict: Dict[str, Any],
|
|
1099
1081
|
settings_dict: Dict[str, Any]) -> pandas.DataFrame:
|
|
1100
1082
|
"""Fill NaN values in metadata df with default values if available.
|
|
1101
1083
|
|
|
@@ -1103,24 +1085,20 @@ def _fill_na_if_default(
|
|
|
1103
1085
|
----------
|
|
1104
1086
|
metadata_df : pandas.DataFrame
|
|
1105
1087
|
The metadata DataFrame to process.
|
|
1106
|
-
specific_dict : Dict[str, Any]
|
|
1107
|
-
Dictionary containing context-specific settings. Will be used first as a source of default values.
|
|
1108
1088
|
settings_dict : Dict[str, Any]
|
|
1109
|
-
Dictionary containing
|
|
1110
|
-
source of default values if specific_dict does not contain a DEFAULT_KEY.
|
|
1089
|
+
Dictionary containing settings.
|
|
1111
1090
|
|
|
1112
1091
|
Returns
|
|
1113
1092
|
-------
|
|
1114
1093
|
pandas.DataFrame
|
|
1115
1094
|
The updated DataFrame with NaN values filled. Unchanged if no default values are set.
|
|
1116
1095
|
"""
|
|
1117
|
-
default_val =
|
|
1096
|
+
default_val = settings_dict.get(DEFAULT_KEY)
|
|
1118
1097
|
if default_val:
|
|
1119
1098
|
# TODO: this is setting a value in the output; should it be
|
|
1120
1099
|
# centralized so it is easy to find?
|
|
1121
1100
|
metadata_df = \
|
|
1122
1101
|
metadata_df.fillna(default_val)
|
|
1123
|
-
# metadata_df.astype("string").fillna(default_val)
|
|
1124
1102
|
|
|
1125
1103
|
return metadata_df
|
|
1126
1104
|
|
|
@@ -51,6 +51,13 @@ REQUIRED_RAW_METADATA_FIELDS = [SAMPLE_NAME_KEY,
|
|
|
51
51
|
SAMPLETYPE_SHORTHAND_KEY]
|
|
52
52
|
|
|
53
53
|
|
|
54
|
+
GLOBAL_SETTINGS_KEYS = [
|
|
55
|
+
DEFAULT_KEY,
|
|
56
|
+
LEAVE_REQUIREDS_BLANK_KEY,
|
|
57
|
+
OVERWRITE_NON_NANS_KEY
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
54
61
|
def extract_config_dict(
|
|
55
62
|
config_fp: Union[str, None]) -> dict:
|
|
56
63
|
"""Extract configuration dictionary from a YAML file.
|
|
@@ -17,7 +17,8 @@ from metameq.src.metadata_configurator import \
|
|
|
17
17
|
_id_sample_type_definition, \
|
|
18
18
|
update_wip_metadata_dict, \
|
|
19
19
|
build_full_flat_config_dict, \
|
|
20
|
-
_resolve_sample_type_aliases_and_bases
|
|
20
|
+
_resolve_sample_type_aliases_and_bases, \
|
|
21
|
+
_push_global_settings_into_top_host
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class TestMetadataConfigurator(TestCase):
|
|
@@ -3847,6 +3848,9 @@ class TestMetadataConfigurator(TestCase):
|
|
|
3847
3848
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
3848
3849
|
# base: top level in test_standards.yml, no default
|
|
3849
3850
|
"base": {
|
|
3851
|
+
DEFAULT_KEY: "software_default",
|
|
3852
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
3853
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
3850
3854
|
METADATA_FIELDS_KEY: {
|
|
3851
3855
|
# sample_name defined at base level
|
|
3852
3856
|
"sample_name": {
|
|
@@ -3865,6 +3869,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
3865
3869
|
"host_associated": {
|
|
3866
3870
|
# default defined at host_associated level
|
|
3867
3871
|
DEFAULT_KEY: "not provided",
|
|
3872
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
3873
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
3868
3874
|
METADATA_FIELDS_KEY: {
|
|
3869
3875
|
# description defined at host_associated level
|
|
3870
3876
|
"description": {
|
|
@@ -3919,6 +3925,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
3919
3925
|
"human": {
|
|
3920
3926
|
# default inherited from host_associated
|
|
3921
3927
|
DEFAULT_KEY: "not provided",
|
|
3928
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
3929
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
3922
3930
|
METADATA_FIELDS_KEY: {
|
|
3923
3931
|
# custom_field added from study_specific_metadata
|
|
3924
3932
|
"custom_field": {
|
|
@@ -4037,6 +4045,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4037
4045
|
"mouse": {
|
|
4038
4046
|
# default inherited from host_associated
|
|
4039
4047
|
DEFAULT_KEY: "not provided",
|
|
4048
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4049
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4040
4050
|
METADATA_FIELDS_KEY: {
|
|
4041
4051
|
# description inherited from host_associated (not overridden)
|
|
4042
4052
|
"description": {
|
|
@@ -4103,6 +4113,7 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4103
4113
|
}
|
|
4104
4114
|
}
|
|
4105
4115
|
}
|
|
4116
|
+
|
|
4106
4117
|
self.assertEqual(expected, result)
|
|
4107
4118
|
|
|
4108
4119
|
def test_build_full_flat_config_dict_without_study_config(self):
|
|
@@ -4130,6 +4141,9 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4130
4141
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4131
4142
|
# base: top level, no default, just sample_name/sample_type
|
|
4132
4143
|
"base": {
|
|
4144
|
+
DEFAULT_KEY: "software_default",
|
|
4145
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4146
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4133
4147
|
METADATA_FIELDS_KEY: {
|
|
4134
4148
|
"sample_name": {
|
|
4135
4149
|
REQUIRED_KEY: True,
|
|
@@ -4145,6 +4159,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4145
4159
|
# host_associated: inherits from base, adds default and description
|
|
4146
4160
|
"host_associated": {
|
|
4147
4161
|
DEFAULT_KEY: "not provided",
|
|
4162
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4163
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4148
4164
|
METADATA_FIELDS_KEY: {
|
|
4149
4165
|
"description": {
|
|
4150
4166
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4194,6 +4210,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4194
4210
|
# human: inherits from host_associated, overrides description
|
|
4195
4211
|
"human": {
|
|
4196
4212
|
DEFAULT_KEY: "not provided",
|
|
4213
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4214
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4197
4215
|
METADATA_FIELDS_KEY: {
|
|
4198
4216
|
"description": {
|
|
4199
4217
|
DEFAULT_KEY: "human sample",
|
|
@@ -4291,6 +4309,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4291
4309
|
# mouse: inherits from host_associated, keeps parent description
|
|
4292
4310
|
"mouse": {
|
|
4293
4311
|
DEFAULT_KEY: "not provided",
|
|
4312
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4313
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4294
4314
|
METADATA_FIELDS_KEY: {
|
|
4295
4315
|
"description": {
|
|
4296
4316
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4395,6 +4415,12 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4395
4415
|
# Flattened host types
|
|
4396
4416
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4397
4417
|
"base": {
|
|
4418
|
+
# default from study_config overrides software_config
|
|
4419
|
+
DEFAULT_KEY: "study_default",
|
|
4420
|
+
# leave_requireds_blank from study_config overrides software_config
|
|
4421
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4422
|
+
# overwrite_non_nans from software_config (not overridden by study)
|
|
4423
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4398
4424
|
METADATA_FIELDS_KEY: {
|
|
4399
4425
|
"sample_name": {
|
|
4400
4426
|
REQUIRED_KEY: True,
|
|
@@ -4409,6 +4435,10 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4409
4435
|
},
|
|
4410
4436
|
"host_associated": {
|
|
4411
4437
|
DEFAULT_KEY: "not provided",
|
|
4438
|
+
# leave_requireds_blank from study_config overrides software_config
|
|
4439
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4440
|
+
# overwrite_non_nans from software_config (not overridden by study)
|
|
4441
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4412
4442
|
METADATA_FIELDS_KEY: {
|
|
4413
4443
|
"description": {
|
|
4414
4444
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4457,6 +4487,10 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4457
4487
|
},
|
|
4458
4488
|
"human": {
|
|
4459
4489
|
DEFAULT_KEY: "not provided",
|
|
4490
|
+
# leave_requireds_blank from study_config overrides software_config
|
|
4491
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4492
|
+
# overwrite_non_nans from software_config (not overridden by study)
|
|
4493
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4460
4494
|
METADATA_FIELDS_KEY: {
|
|
4461
4495
|
"description": {
|
|
4462
4496
|
DEFAULT_KEY: "human sample",
|
|
@@ -4553,6 +4587,10 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4553
4587
|
},
|
|
4554
4588
|
"mouse": {
|
|
4555
4589
|
DEFAULT_KEY: "not provided",
|
|
4590
|
+
# leave_requireds_blank from study_config overrides software_config
|
|
4591
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4592
|
+
# overwrite_non_nans from software_config (not overridden by study)
|
|
4593
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4556
4594
|
METADATA_FIELDS_KEY: {
|
|
4557
4595
|
"description": {
|
|
4558
4596
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4649,6 +4687,9 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4649
4687
|
# Flattened host types
|
|
4650
4688
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4651
4689
|
"base": {
|
|
4690
|
+
DEFAULT_KEY: "not applicable",
|
|
4691
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
4692
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4652
4693
|
METADATA_FIELDS_KEY: {
|
|
4653
4694
|
"sample_name": {
|
|
4654
4695
|
REQUIRED_KEY: True,
|
|
@@ -4663,6 +4704,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4663
4704
|
},
|
|
4664
4705
|
"host_associated": {
|
|
4665
4706
|
DEFAULT_KEY: "not provided",
|
|
4707
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
4708
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4666
4709
|
METADATA_FIELDS_KEY: {
|
|
4667
4710
|
"description": {
|
|
4668
4711
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4711,6 +4754,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4711
4754
|
},
|
|
4712
4755
|
"human": {
|
|
4713
4756
|
DEFAULT_KEY: "not provided",
|
|
4757
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
4758
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4714
4759
|
METADATA_FIELDS_KEY: {
|
|
4715
4760
|
"description": {
|
|
4716
4761
|
DEFAULT_KEY: "human sample",
|
|
@@ -4807,6 +4852,8 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4807
4852
|
},
|
|
4808
4853
|
"mouse": {
|
|
4809
4854
|
DEFAULT_KEY: "not provided",
|
|
4855
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
4856
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
4810
4857
|
METADATA_FIELDS_KEY: {
|
|
4811
4858
|
"description": {
|
|
4812
4859
|
DEFAULT_KEY: "host associated sample",
|
|
@@ -4867,4 +4914,140 @@ class TestMetadataConfigurator(TestCase):
|
|
|
4867
4914
|
}
|
|
4868
4915
|
}
|
|
4869
4916
|
}
|
|
4917
|
+
|
|
4918
|
+
self.assertEqual(expected, result)
|
|
4919
|
+
|
|
4920
|
+
# Tests for _push_global_settings_into_top_host
|
|
4921
|
+
|
|
4922
|
+
def test__push_global_settings_into_top_host_single_setting(self):
|
|
4923
|
+
"""Test pushing a single global setting into the top-level host."""
|
|
4924
|
+
nested_hosts_dict = {
|
|
4925
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4926
|
+
"base": {
|
|
4927
|
+
METADATA_FIELDS_KEY: {
|
|
4928
|
+
"field1": {TYPE_KEY: "string"}
|
|
4929
|
+
}
|
|
4930
|
+
}
|
|
4931
|
+
}
|
|
4932
|
+
}
|
|
4933
|
+
flat_config_dict = {
|
|
4934
|
+
DEFAULT_KEY: "custom_default"
|
|
4935
|
+
}
|
|
4936
|
+
|
|
4937
|
+
expected = {
|
|
4938
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4939
|
+
"base": {
|
|
4940
|
+
DEFAULT_KEY: "custom_default",
|
|
4941
|
+
METADATA_FIELDS_KEY: {
|
|
4942
|
+
"field1": {TYPE_KEY: "string"}
|
|
4943
|
+
}
|
|
4944
|
+
}
|
|
4945
|
+
}
|
|
4946
|
+
}
|
|
4947
|
+
|
|
4948
|
+
result = _push_global_settings_into_top_host(
|
|
4949
|
+
nested_hosts_dict, flat_config_dict)
|
|
4950
|
+
|
|
4951
|
+
self.assertEqual(expected, result)
|
|
4952
|
+
# Original should be unchanged
|
|
4953
|
+
self.assertNotIn(
|
|
4954
|
+
DEFAULT_KEY,
|
|
4955
|
+
nested_hosts_dict[HOST_TYPE_SPECIFIC_METADATA_KEY]["base"])
|
|
4956
|
+
|
|
4957
|
+
def test__push_global_settings_into_top_host_multiple_settings(self):
|
|
4958
|
+
"""Test pushing multiple global settings into the top-level host."""
|
|
4959
|
+
nested_hosts_dict = {
|
|
4960
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4961
|
+
"base": {
|
|
4962
|
+
METADATA_FIELDS_KEY: {
|
|
4963
|
+
"field1": {TYPE_KEY: "string"}
|
|
4964
|
+
}
|
|
4965
|
+
}
|
|
4966
|
+
}
|
|
4967
|
+
}
|
|
4968
|
+
flat_config_dict = {
|
|
4969
|
+
DEFAULT_KEY: "custom_default",
|
|
4970
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4971
|
+
OVERWRITE_NON_NANS_KEY: True
|
|
4972
|
+
}
|
|
4973
|
+
|
|
4974
|
+
expected = {
|
|
4975
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4976
|
+
"base": {
|
|
4977
|
+
DEFAULT_KEY: "custom_default",
|
|
4978
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
4979
|
+
OVERWRITE_NON_NANS_KEY: True,
|
|
4980
|
+
METADATA_FIELDS_KEY: {
|
|
4981
|
+
"field1": {TYPE_KEY: "string"}
|
|
4982
|
+
}
|
|
4983
|
+
}
|
|
4984
|
+
}
|
|
4985
|
+
}
|
|
4986
|
+
|
|
4987
|
+
result = _push_global_settings_into_top_host(
|
|
4988
|
+
nested_hosts_dict, flat_config_dict)
|
|
4989
|
+
|
|
4870
4990
|
self.assertEqual(expected, result)
|
|
4991
|
+
|
|
4992
|
+
def test__push_global_settings_into_top_host_no_settings(self):
|
|
4993
|
+
"""Test that function returns copy when no global settings present."""
|
|
4994
|
+
nested_hosts_dict = {
|
|
4995
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
4996
|
+
"base": {
|
|
4997
|
+
METADATA_FIELDS_KEY: {
|
|
4998
|
+
"field1": {TYPE_KEY: "string"}
|
|
4999
|
+
}
|
|
5000
|
+
}
|
|
5001
|
+
}
|
|
5002
|
+
}
|
|
5003
|
+
flat_config_dict = {
|
|
5004
|
+
"some_other_key": "value"
|
|
5005
|
+
}
|
|
5006
|
+
|
|
5007
|
+
expected = {
|
|
5008
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
5009
|
+
"base": {
|
|
5010
|
+
METADATA_FIELDS_KEY: {
|
|
5011
|
+
"field1": {TYPE_KEY: "string"}
|
|
5012
|
+
}
|
|
5013
|
+
}
|
|
5014
|
+
}
|
|
5015
|
+
}
|
|
5016
|
+
|
|
5017
|
+
result = _push_global_settings_into_top_host(
|
|
5018
|
+
nested_hosts_dict, flat_config_dict)
|
|
5019
|
+
|
|
5020
|
+
self.assertEqual(expected, result)
|
|
5021
|
+
|
|
5022
|
+
def test__push_global_settings_into_top_host_raises_on_zero_hosts(self):
|
|
5023
|
+
"""Test that ValueError is raised when no top-level hosts exist."""
|
|
5024
|
+
nested_hosts_dict = {
|
|
5025
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {}
|
|
5026
|
+
}
|
|
5027
|
+
flat_config_dict = {
|
|
5028
|
+
DEFAULT_KEY: "custom_default"
|
|
5029
|
+
}
|
|
5030
|
+
|
|
5031
|
+
with self.assertRaisesRegex(
|
|
5032
|
+
ValueError,
|
|
5033
|
+
r"Expected exactly one top-level key.*found: \[\]"):
|
|
5034
|
+
_push_global_settings_into_top_host(
|
|
5035
|
+
nested_hosts_dict, flat_config_dict)
|
|
5036
|
+
|
|
5037
|
+
def test__push_global_settings_into_top_host_raises_on_multiple_hosts(self):
|
|
5038
|
+
"""Test that ValueError is raised when multiple top-level hosts exist."""
|
|
5039
|
+
nested_hosts_dict = {
|
|
5040
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
5041
|
+
"host1": {METADATA_FIELDS_KEY: {}},
|
|
5042
|
+
"host2": {METADATA_FIELDS_KEY: {}}
|
|
5043
|
+
}
|
|
5044
|
+
}
|
|
5045
|
+
flat_config_dict = {
|
|
5046
|
+
DEFAULT_KEY: "custom_default"
|
|
5047
|
+
}
|
|
5048
|
+
|
|
5049
|
+
with self.assertRaisesRegex(
|
|
5050
|
+
ValueError,
|
|
5051
|
+
r"Expected exactly one top-level key"):
|
|
5052
|
+
_push_global_settings_into_top_host(
|
|
5053
|
+
nested_hosts_dict, flat_config_dict)
|
|
@@ -748,16 +748,15 @@ class TestMetadataExtender(TestCase):
|
|
|
748
748
|
|
|
749
749
|
# Tests for _fill_na_if_default
|
|
750
750
|
|
|
751
|
-
def
|
|
751
|
+
def test__fill_na_if_default_has_default_in_settings(self):
|
|
752
752
|
"""Test that specific_dict default takes precedence over settings_dict."""
|
|
753
753
|
input_df = pandas.DataFrame({
|
|
754
754
|
"field1": ["value1", np.nan, "value3"],
|
|
755
755
|
"field2": [np.nan, "value2", np.nan]
|
|
756
756
|
})
|
|
757
|
-
|
|
758
|
-
settings_dict = {DEFAULT_KEY: "unused"}
|
|
757
|
+
settings_dict = {DEFAULT_KEY: "filled"}
|
|
759
758
|
|
|
760
|
-
result = _fill_na_if_default(input_df,
|
|
759
|
+
result = _fill_na_if_default(input_df, settings_dict)
|
|
761
760
|
|
|
762
761
|
expected = pandas.DataFrame({
|
|
763
762
|
"field1": ["value1", "filled", "value3"],
|
|
@@ -765,18 +764,19 @@ class TestMetadataExtender(TestCase):
|
|
|
765
764
|
})
|
|
766
765
|
assert_frame_equal(expected, result)
|
|
767
766
|
|
|
768
|
-
def
|
|
769
|
-
"""Test that
|
|
767
|
+
def test__fill_na_if_default_no_default_in_settings(self):
|
|
768
|
+
"""Test that NaN values are unchanged when no default is in settings."""
|
|
770
769
|
input_df = pandas.DataFrame({
|
|
771
|
-
"field1": [np.nan]
|
|
770
|
+
"field1": ["value1", np.nan, "value3"],
|
|
771
|
+
"field2": [np.nan, "value2", np.nan]
|
|
772
772
|
})
|
|
773
|
-
|
|
774
|
-
settings_dict = {DEFAULT_KEY: "settings_default"}
|
|
773
|
+
settings_dict = {}
|
|
775
774
|
|
|
776
|
-
result = _fill_na_if_default(input_df,
|
|
775
|
+
result = _fill_na_if_default(input_df, settings_dict)
|
|
777
776
|
|
|
778
777
|
expected = pandas.DataFrame({
|
|
779
|
-
"field1": ["
|
|
778
|
+
"field1": ["value1", np.nan, "value3"],
|
|
779
|
+
"field2": [np.nan, "value2", np.nan]
|
|
780
780
|
})
|
|
781
781
|
assert_frame_equal(expected, result)
|
|
782
782
|
|
|
@@ -1273,14 +1273,13 @@ class TestMetadataExtender(TestCase):
|
|
|
1273
1273
|
SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
|
|
1274
1274
|
QC_NOTE_KEY: ["", ""]
|
|
1275
1275
|
})
|
|
1276
|
-
|
|
1277
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1278
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1279
|
-
DEFAULT_KEY: "not provided"
|
|
1280
|
-
}
|
|
1276
|
+
|
|
1281
1277
|
# Config is pre-resolved: sample type's metadata_fields already includes
|
|
1282
1278
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1283
1279
|
host_type_config_dict = {
|
|
1280
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1281
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1282
|
+
DEFAULT_KEY: "not provided",
|
|
1284
1283
|
METADATA_FIELDS_KEY: {
|
|
1285
1284
|
"host_field": {
|
|
1286
1285
|
DEFAULT_KEY: "host_default",
|
|
@@ -1314,7 +1313,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1314
1313
|
}
|
|
1315
1314
|
|
|
1316
1315
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1317
|
-
input_df, "stool",
|
|
1316
|
+
input_df, "stool", host_type_config_dict)
|
|
1318
1317
|
|
|
1319
1318
|
expected_df = pandas.DataFrame({
|
|
1320
1319
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
@@ -1337,12 +1336,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1337
1336
|
SAMPLETYPE_SHORTHAND_KEY: ["unknown_type"],
|
|
1338
1337
|
QC_NOTE_KEY: [""]
|
|
1339
1338
|
})
|
|
1340
|
-
|
|
1339
|
+
|
|
1340
|
+
host_type_config_dict = {
|
|
1341
1341
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1342
1342
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1343
|
-
DEFAULT_KEY: "not provided"
|
|
1344
|
-
}
|
|
1345
|
-
host_type_config_dict = {
|
|
1343
|
+
DEFAULT_KEY: "not provided",
|
|
1346
1344
|
METADATA_FIELDS_KEY: {},
|
|
1347
1345
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1348
1346
|
"stool": {
|
|
@@ -1352,7 +1350,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1352
1350
|
}
|
|
1353
1351
|
|
|
1354
1352
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1355
|
-
input_df, "unknown_type",
|
|
1353
|
+
input_df, "unknown_type", host_type_config_dict)
|
|
1356
1354
|
|
|
1357
1355
|
expected_df = pandas.DataFrame({
|
|
1358
1356
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1371,12 +1369,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1371
1369
|
SAMPLETYPE_SHORTHAND_KEY: ["stool", "blood", "stool"],
|
|
1372
1370
|
QC_NOTE_KEY: ["", "", ""]
|
|
1373
1371
|
})
|
|
1374
|
-
|
|
1372
|
+
|
|
1373
|
+
host_type_config_dict = {
|
|
1375
1374
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1376
1375
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1377
|
-
DEFAULT_KEY: "not provided"
|
|
1378
|
-
}
|
|
1379
|
-
host_type_config_dict = {
|
|
1376
|
+
DEFAULT_KEY: "not provided",
|
|
1380
1377
|
METADATA_FIELDS_KEY: {},
|
|
1381
1378
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1382
1379
|
"stool": {
|
|
@@ -1394,7 +1391,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1394
1391
|
}
|
|
1395
1392
|
|
|
1396
1393
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1397
|
-
input_df, "stool",
|
|
1394
|
+
input_df, "stool", host_type_config_dict)
|
|
1398
1395
|
|
|
1399
1396
|
# Should only have the two stool samples
|
|
1400
1397
|
self.assertEqual(2, len(result_df))
|
|
@@ -1409,12 +1406,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1409
1406
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1410
1407
|
QC_NOTE_KEY: [""]
|
|
1411
1408
|
})
|
|
1412
|
-
|
|
1409
|
+
|
|
1410
|
+
host_type_config_dict = {
|
|
1413
1411
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1414
1412
|
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
1415
|
-
DEFAULT_KEY: "not provided"
|
|
1416
|
-
}
|
|
1417
|
-
host_type_config_dict = {
|
|
1413
|
+
DEFAULT_KEY: "not provided",
|
|
1418
1414
|
METADATA_FIELDS_KEY: {},
|
|
1419
1415
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1420
1416
|
"stool": {
|
|
@@ -1429,7 +1425,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1429
1425
|
}
|
|
1430
1426
|
|
|
1431
1427
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1432
|
-
input_df, "stool",
|
|
1428
|
+
input_df, "stool", host_type_config_dict)
|
|
1433
1429
|
|
|
1434
1430
|
self.assertEqual(LEAVE_BLANK_VAL, result_df["required_field"].iloc[0])
|
|
1435
1431
|
|
|
@@ -1441,12 +1437,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1441
1437
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1442
1438
|
QC_NOTE_KEY: [""]
|
|
1443
1439
|
})
|
|
1444
|
-
|
|
1440
|
+
|
|
1441
|
+
host_type_config_dict = {
|
|
1445
1442
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1446
1443
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1447
|
-
DEFAULT_KEY: "global_default"
|
|
1448
|
-
}
|
|
1449
|
-
host_type_config_dict = {
|
|
1444
|
+
DEFAULT_KEY: "global_default",
|
|
1450
1445
|
METADATA_FIELDS_KEY: {},
|
|
1451
1446
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1452
1447
|
"stool": {
|
|
@@ -1461,7 +1456,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1461
1456
|
}
|
|
1462
1457
|
|
|
1463
1458
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1464
|
-
input_df, "stool",
|
|
1459
|
+
input_df, "stool", host_type_config_dict)
|
|
1465
1460
|
|
|
1466
1461
|
# When leave_requireds_blank is False, NaN values get filled with global default
|
|
1467
1462
|
self.assertEqual("global_default", result_df["required_field"].iloc[0])
|
|
@@ -1475,12 +1470,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1475
1470
|
QC_NOTE_KEY: [""],
|
|
1476
1471
|
"existing_field": ["original_value"]
|
|
1477
1472
|
})
|
|
1478
|
-
|
|
1473
|
+
|
|
1474
|
+
host_type_config_dict = {
|
|
1479
1475
|
OVERWRITE_NON_NANS_KEY: True,
|
|
1480
1476
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1481
|
-
DEFAULT_KEY: "not provided"
|
|
1482
|
-
}
|
|
1483
|
-
host_type_config_dict = {
|
|
1477
|
+
DEFAULT_KEY: "not provided",
|
|
1484
1478
|
METADATA_FIELDS_KEY: {},
|
|
1485
1479
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1486
1480
|
"stool": {
|
|
@@ -1495,7 +1489,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1495
1489
|
}
|
|
1496
1490
|
|
|
1497
1491
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1498
|
-
input_df, "stool",
|
|
1492
|
+
input_df, "stool", host_type_config_dict)
|
|
1499
1493
|
|
|
1500
1494
|
self.assertEqual("new_value", result_df["existing_field"].iloc[0])
|
|
1501
1495
|
|
|
@@ -1508,12 +1502,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1508
1502
|
QC_NOTE_KEY: [""],
|
|
1509
1503
|
"existing_field": ["original_value"]
|
|
1510
1504
|
})
|
|
1511
|
-
|
|
1505
|
+
|
|
1506
|
+
host_type_config_dict = {
|
|
1512
1507
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1513
1508
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1514
|
-
DEFAULT_KEY: "not provided"
|
|
1515
|
-
}
|
|
1516
|
-
host_type_config_dict = {
|
|
1509
|
+
DEFAULT_KEY: "not provided",
|
|
1517
1510
|
METADATA_FIELDS_KEY: {},
|
|
1518
1511
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1519
1512
|
"stool": {
|
|
@@ -1528,7 +1521,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1528
1521
|
}
|
|
1529
1522
|
|
|
1530
1523
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1531
|
-
input_df, "stool",
|
|
1524
|
+
input_df, "stool", host_type_config_dict)
|
|
1532
1525
|
|
|
1533
1526
|
self.assertEqual("original_value", result_df["existing_field"].iloc[0])
|
|
1534
1527
|
|
|
@@ -1540,14 +1533,13 @@ class TestMetadataExtender(TestCase):
|
|
|
1540
1533
|
SAMPLETYPE_SHORTHAND_KEY: ["feces"],
|
|
1541
1534
|
QC_NOTE_KEY: [""]
|
|
1542
1535
|
})
|
|
1543
|
-
|
|
1544
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1545
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1546
|
-
DEFAULT_KEY: "not provided"
|
|
1547
|
-
}
|
|
1536
|
+
|
|
1548
1537
|
# Config is pre-resolved: alias "feces" has its own metadata_fields
|
|
1549
1538
|
# that is a copy of "stool"'s resolved fields with sample_type="stool"
|
|
1550
1539
|
host_type_config_dict = {
|
|
1540
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1541
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1542
|
+
DEFAULT_KEY: "not provided",
|
|
1551
1543
|
METADATA_FIELDS_KEY: {},
|
|
1552
1544
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1553
1545
|
"feces": {
|
|
@@ -1590,7 +1582,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1590
1582
|
}
|
|
1591
1583
|
|
|
1592
1584
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1593
|
-
input_df, "feces",
|
|
1585
|
+
input_df, "feces", host_type_config_dict)
|
|
1594
1586
|
|
|
1595
1587
|
self.assertEqual("stool_value", result_df["stool_field"].iloc[0])
|
|
1596
1588
|
# sample_type should be set to the resolved type "stool"
|
|
@@ -1606,17 +1598,15 @@ class TestMetadataExtender(TestCase):
|
|
|
1606
1598
|
SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
|
|
1607
1599
|
QC_NOTE_KEY: ["", ""]
|
|
1608
1600
|
})
|
|
1609
|
-
|
|
1610
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1611
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1612
|
-
DEFAULT_KEY: "global_default"
|
|
1613
|
-
}
|
|
1601
|
+
|
|
1614
1602
|
# Config is pre-resolved: sample type's metadata_fields includes
|
|
1615
1603
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1616
1604
|
full_flat_config_dict = {
|
|
1617
1605
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1618
1606
|
"human": {
|
|
1619
1607
|
DEFAULT_KEY: "human_default",
|
|
1608
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1609
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1620
1610
|
METADATA_FIELDS_KEY: {
|
|
1621
1611
|
"host_field": {
|
|
1622
1612
|
DEFAULT_KEY: "host_value",
|
|
@@ -1652,7 +1642,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1652
1642
|
}
|
|
1653
1643
|
|
|
1654
1644
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1655
|
-
input_df, "human",
|
|
1645
|
+
input_df, "human", full_flat_config_dict)
|
|
1656
1646
|
|
|
1657
1647
|
expected_df = pandas.DataFrame({
|
|
1658
1648
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
@@ -1675,14 +1665,13 @@ class TestMetadataExtender(TestCase):
|
|
|
1675
1665
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1676
1666
|
QC_NOTE_KEY: [""]
|
|
1677
1667
|
})
|
|
1678
|
-
|
|
1679
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1680
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1681
|
-
DEFAULT_KEY: "global_default"
|
|
1682
|
-
}
|
|
1668
|
+
|
|
1683
1669
|
full_flat_config_dict = {
|
|
1684
1670
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1685
1671
|
"human": {
|
|
1672
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1673
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1674
|
+
DEFAULT_KEY: "global_default",
|
|
1686
1675
|
METADATA_FIELDS_KEY: {},
|
|
1687
1676
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
|
|
1688
1677
|
}
|
|
@@ -1690,7 +1679,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1690
1679
|
}
|
|
1691
1680
|
|
|
1692
1681
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1693
|
-
input_df, "unknown_host",
|
|
1682
|
+
input_df, "unknown_host", full_flat_config_dict)
|
|
1694
1683
|
|
|
1695
1684
|
expected_df = pandas.DataFrame({
|
|
1696
1685
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1709,14 +1698,13 @@ class TestMetadataExtender(TestCase):
|
|
|
1709
1698
|
SAMPLETYPE_SHORTHAND_KEY: ["unknown_sample"],
|
|
1710
1699
|
QC_NOTE_KEY: [""]
|
|
1711
1700
|
})
|
|
1712
|
-
|
|
1713
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1714
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1715
|
-
DEFAULT_KEY: "global_default"
|
|
1716
|
-
}
|
|
1701
|
+
|
|
1717
1702
|
full_flat_config_dict = {
|
|
1718
1703
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1719
1704
|
"human": {
|
|
1705
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1706
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1707
|
+
DEFAULT_KEY: "global_default",
|
|
1720
1708
|
METADATA_FIELDS_KEY: {},
|
|
1721
1709
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1722
1710
|
"stool": {
|
|
@@ -1728,7 +1716,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1728
1716
|
}
|
|
1729
1717
|
|
|
1730
1718
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1731
|
-
input_df, "human",
|
|
1719
|
+
input_df, "human", full_flat_config_dict)
|
|
1732
1720
|
|
|
1733
1721
|
expected_df = pandas.DataFrame({
|
|
1734
1722
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1747,16 +1735,15 @@ class TestMetadataExtender(TestCase):
|
|
|
1747
1735
|
SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool", "stool"],
|
|
1748
1736
|
QC_NOTE_KEY: ["", "", ""]
|
|
1749
1737
|
})
|
|
1750
|
-
|
|
1751
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1752
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1753
|
-
DEFAULT_KEY: "global_default"
|
|
1754
|
-
}
|
|
1738
|
+
|
|
1755
1739
|
# Config is pre-resolved: sample type's metadata_fields includes
|
|
1756
1740
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1757
1741
|
full_flat_config_dict = {
|
|
1758
1742
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1759
1743
|
"human": {
|
|
1744
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1745
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1746
|
+
DEFAULT_KEY: "global_default",
|
|
1760
1747
|
METADATA_FIELDS_KEY: {
|
|
1761
1748
|
"human_field": {
|
|
1762
1749
|
DEFAULT_KEY: "human_value",
|
|
@@ -1785,6 +1772,9 @@ class TestMetadataExtender(TestCase):
|
|
|
1785
1772
|
}
|
|
1786
1773
|
},
|
|
1787
1774
|
"mouse": {
|
|
1775
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1776
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1777
|
+
DEFAULT_KEY: "global_default",
|
|
1788
1778
|
METADATA_FIELDS_KEY: {},
|
|
1789
1779
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
|
|
1790
1780
|
}
|
|
@@ -1792,7 +1782,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1792
1782
|
}
|
|
1793
1783
|
|
|
1794
1784
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1795
|
-
input_df, "human",
|
|
1785
|
+
input_df, "human", full_flat_config_dict)
|
|
1796
1786
|
|
|
1797
1787
|
expected_df = pandas.DataFrame({
|
|
1798
1788
|
SAMPLE_NAME_KEY: ["sample1", "sample3"],
|
|
@@ -1813,17 +1803,15 @@ class TestMetadataExtender(TestCase):
|
|
|
1813
1803
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1814
1804
|
QC_NOTE_KEY: [""]
|
|
1815
1805
|
})
|
|
1816
|
-
|
|
1817
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1818
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1819
|
-
DEFAULT_KEY: "global_default"
|
|
1820
|
-
}
|
|
1806
|
+
|
|
1821
1807
|
# Config is pre-resolved: sample type's metadata_fields includes
|
|
1822
1808
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1823
1809
|
full_flat_config_dict = {
|
|
1824
1810
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1825
1811
|
"human": {
|
|
1826
1812
|
DEFAULT_KEY: "human_specific_default",
|
|
1813
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1814
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1827
1815
|
METADATA_FIELDS_KEY: {},
|
|
1828
1816
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1829
1817
|
"stool": {
|
|
@@ -1850,7 +1838,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1850
1838
|
}
|
|
1851
1839
|
|
|
1852
1840
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1853
|
-
input_df, "human",
|
|
1841
|
+
input_df, "human", full_flat_config_dict)
|
|
1854
1842
|
|
|
1855
1843
|
expected_df = pandas.DataFrame({
|
|
1856
1844
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1871,17 +1859,14 @@ class TestMetadataExtender(TestCase):
|
|
|
1871
1859
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1872
1860
|
QC_NOTE_KEY: [""]
|
|
1873
1861
|
})
|
|
1874
|
-
settings_dict = {
|
|
1875
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1876
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1877
|
-
DEFAULT_KEY: "global_default"
|
|
1878
|
-
}
|
|
1879
1862
|
# Config is pre-resolved: sample type's metadata_fields includes
|
|
1880
1863
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1881
1864
|
full_flat_config_dict = {
|
|
1882
1865
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1883
1866
|
"human": {
|
|
1884
|
-
|
|
1867
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1868
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1869
|
+
DEFAULT_KEY: "global_default",
|
|
1885
1870
|
METADATA_FIELDS_KEY: {},
|
|
1886
1871
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1887
1872
|
"stool": {
|
|
@@ -1908,7 +1893,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1908
1893
|
}
|
|
1909
1894
|
|
|
1910
1895
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1911
|
-
input_df, "human",
|
|
1896
|
+
input_df, "human", full_flat_config_dict)
|
|
1912
1897
|
|
|
1913
1898
|
expected_df = pandas.DataFrame({
|
|
1914
1899
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1939,6 +1924,9 @@ class TestMetadataExtender(TestCase):
|
|
|
1939
1924
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1940
1925
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1941
1926
|
"human": {
|
|
1927
|
+
DEFAULT_KEY: "global_default",
|
|
1928
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1929
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1942
1930
|
METADATA_FIELDS_KEY: {
|
|
1943
1931
|
"host_field": {
|
|
1944
1932
|
DEFAULT_KEY: "host_value",
|
|
@@ -2005,6 +1993,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2005
1993
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2006
1994
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2007
1995
|
"human": {
|
|
1996
|
+
DEFAULT_KEY: "global_default",
|
|
1997
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1998
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2008
1999
|
METADATA_FIELDS_KEY: {
|
|
2009
2000
|
"human_field": {
|
|
2010
2001
|
DEFAULT_KEY: "human_value",
|
|
@@ -2051,6 +2042,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2051
2042
|
}
|
|
2052
2043
|
},
|
|
2053
2044
|
"mouse": {
|
|
2045
|
+
DEFAULT_KEY: "global_default",
|
|
2046
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2047
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2054
2048
|
METADATA_FIELDS_KEY: {
|
|
2055
2049
|
"mouse_field": {
|
|
2056
2050
|
DEFAULT_KEY: "mouse_value",
|
|
@@ -2182,6 +2176,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2182
2176
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2183
2177
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2184
2178
|
"human": {
|
|
2179
|
+
DEFAULT_KEY: "global_default",
|
|
2180
|
+
LEAVE_REQUIREDS_BLANK_KEY: True, # This causes required fields to get LEAVE_BLANK_VAL
|
|
2181
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2185
2182
|
METADATA_FIELDS_KEY: {},
|
|
2186
2183
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2187
2184
|
"stool": {
|
|
@@ -2506,6 +2503,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2506
2503
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2507
2504
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2508
2505
|
"human": {
|
|
2506
|
+
DEFAULT_KEY: "not provided",
|
|
2507
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2508
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2509
2509
|
METADATA_FIELDS_KEY: {
|
|
2510
2510
|
"host_field": {
|
|
2511
2511
|
DEFAULT_KEY: "host_value",
|
|
@@ -2580,6 +2580,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2580
2580
|
},
|
|
2581
2581
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2582
2582
|
"human": {
|
|
2583
|
+
DEFAULT_KEY: "not provided",
|
|
2584
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2585
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2583
2586
|
METADATA_FIELDS_KEY: {},
|
|
2584
2587
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2585
2588
|
"stool": {
|
|
@@ -2639,6 +2642,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2639
2642
|
},
|
|
2640
2643
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2641
2644
|
"human": {
|
|
2645
|
+
DEFAULT_KEY: "not provided",
|
|
2646
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2647
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2642
2648
|
METADATA_FIELDS_KEY: {},
|
|
2643
2649
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2644
2650
|
"stool": {
|
|
@@ -2687,6 +2693,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2687
2693
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2688
2694
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2689
2695
|
"human": {
|
|
2696
|
+
DEFAULT_KEY: "not provided",
|
|
2697
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2698
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2690
2699
|
METADATA_FIELDS_KEY: {},
|
|
2691
2700
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
|
|
2692
2701
|
}
|
|
@@ -2721,6 +2730,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2721
2730
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2722
2731
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2723
2732
|
"human": {
|
|
2733
|
+
DEFAULT_KEY: "not provided",
|
|
2734
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2735
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2724
2736
|
METADATA_FIELDS_KEY: {},
|
|
2725
2737
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2726
2738
|
"stool": {
|
|
@@ -2781,6 +2793,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2781
2793
|
},
|
|
2782
2794
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2783
2795
|
"human": {
|
|
2796
|
+
DEFAULT_KEY: "not provided",
|
|
2797
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2798
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2784
2799
|
METADATA_FIELDS_KEY: {},
|
|
2785
2800
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2786
2801
|
"stool": {
|
|
@@ -4142,6 +4157,7 @@ class TestMetadataExtender(TestCase):
|
|
|
4142
4157
|
TEST_DIR, "data/test_project1_output_metadata.txt")
|
|
4143
4158
|
TEST_PROJECT1_EXPECTED_FAILS_FP = path.join(
|
|
4144
4159
|
TEST_DIR, "data/test_project1_output_fails.csv")
|
|
4160
|
+
|
|
4145
4161
|
def test_write_extended_metadata_from_df_project1_integration(self):
|
|
4146
4162
|
"""Integration test using project1 test data files."""
|
|
4147
4163
|
|
|
@@ -4153,7 +4169,6 @@ class TestMetadataExtender(TestCase):
|
|
|
4153
4169
|
with open(path.join(debug_dir, f"UNMATCHED_2_{file_name}"), 'w') as debug_actual_file:
|
|
4154
4170
|
debug_actual_file.write(actual_content)
|
|
4155
4171
|
|
|
4156
|
-
|
|
4157
4172
|
# Load input metadata CSV
|
|
4158
4173
|
input_df = pandas.read_csv(self.TEST_PROJECT1_METADATA_FP, dtype=str)
|
|
4159
4174
|
# for the columns "plating_notes" and "notes", fill NaN with empty string
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|