PyPI - metameq - Versions diffs - 2026.2.1__py3-none-any.whl → 2026.2.3__py3-none-any.whl - Mend

metameq 2026.2.1py3-none-any.whl → 2026.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

metameq/__init__.py +3 -2
metameq/_version.py +3 -3
metameq/src/metadata_configurator.py +53 -6
metameq/src/metadata_extender.py +16 -38
metameq/src/util.py +7 -0
metameq/tests/test_metadata_configurator.py +184 -1
metameq/tests/test_metadata_extender.py +306 -117
metameq/tests/test_metadata_validator.py +2 -2
{metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/METADATA +2 -1
{metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/RECORD +13 -13
{metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/WHEEL +0 -0
{metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/entry_points.txt +0 -0
{metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/top_level.txt +0 -0

metameq/__init__.py CHANGED Viewed

@@ -9,7 +9,7 @@ from metameq.src.metadata_extender import \
     write_extended_metadata, write_extended_metadata_from_df, \
     get_reserved_cols, get_extended_metadata_from_df_and_yaml, \
     write_metadata_results, id_missing_cols, find_standard_cols, \
-    find_nonstandard_cols, get_qc_failures
+    find_nonstandard_cols, get_qc_failures, extend_metadata_df
 from metameq.src.metadata_merger import merge_sample_and_subject_metadata, \
     merge_many_to_one_metadata, merge_one_to_one_metadata, \
     find_common_col_names, find_common_df_cols
@@ -36,7 +36,8 @@ __all__ = ["HOSTTYPE_SHORTHAND_KEY", "SAMPLETYPE_SHORTHAND_KEY",
            "find_nonstandard_cols", "get_qc_failures",
            "format_a_datetime", "standardize_input_sex",
            "set_life_stage_from_age_yrs", "transform_input_sex_to_std_sex",
-           "transform_age_to_life_stage", "transform_date_to_formatted_date"]
+           "transform_age_to_life_stage", "transform_date_to_formatted_date",
+           "extend_metadata_df"]
 from . import _version
 __version__ = _version.get_versions()['version']

metameq/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2026-02-01T21:32:06-0800",
+ "date": "2026-02-03T15:03:32-0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "87171fd73f555e2c03a15fa36ed9b5a912b824e9",
- "version": "2026.02.1"
+ "full-revisionid": "89687d23015566a7583179a69f92c2e1d1adcf61",
+ "version": "2026.02.3"
 }
 '''  # END VERSION_JSON

metameq/src/metadata_configurator.py CHANGED Viewed

@@ -5,7 +5,7 @@ from metameq.src.util import extract_config_dict, extract_stds_config, \
     HOST_TYPE_SPECIFIC_METADATA_KEY, \
     SAMPLE_TYPE_SPECIFIC_METADATA_KEY, ALIAS_KEY, BASE_TYPE_KEY, \
     DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
-    SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
+    SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE, GLOBAL_SETTINGS_KEYS
 def combine_stds_and_study_config(
@@ -257,11 +257,12 @@ def _combine_base_and_added_host_type(
     host_type_wip_nested_dict = \
         deepcopy_dict(host_type_base_dict)
-    # look for a default key in the add dict for this host; if
-    # it exists, add it to the wip dict (ok to overwrite existing)
-    if DEFAULT_KEY in host_type_add_dict:
-        host_type_wip_nested_dict[DEFAULT_KEY] = \
-            host_type_add_dict.get(DEFAULT_KEY)
+    # look for global settings in the add dict for this host; if
+    # any exists, add it to the wip dict (ok to overwrite existing)
+    for curr_global_setting_key in GLOBAL_SETTINGS_KEYS:
+        if curr_global_setting_key in host_type_add_dict:
+            host_type_wip_nested_dict[curr_global_setting_key] = \
+                host_type_add_dict.get(curr_global_setting_key)
     # combine add metadata fields with the wip metadata fields
     # for the current host type and assign to wip if not empty
@@ -636,6 +637,10 @@ def build_full_flat_config_dict(
         # since the software config doesn't include any host type specific info
         full_nested_hosts_dict = extract_stds_config(stds_fp)
+    full_nested_hosts_dict = _push_global_settings_into_top_host(
+            full_nested_hosts_dict,
+            software_plus_study_flat_config_dict)
     full_flat_hosts_dict = flatten_nested_stds_dict(
         full_nested_hosts_dict, None)
     software_plus_study_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY] = \
@@ -655,3 +660,45 @@ def build_full_flat_config_dict(
     full_flat_config_dict = software_plus_study_flat_config_dict
     return full_flat_config_dict
+def _push_global_settings_into_top_host(
+        a_full_nested_hosts_dict: Dict[str, Any],
+        a_software_plus_study_flat_config_dict: Dict[str, Any]) -> Dict[str, Any]:
+    """Push global settings from flat config into top-level host in nested hosts dict.
+    Parameters
+    ----------
+    a_full_nested_hosts_dict : Dict[str, Any]
+        Nested hosts dictionary to update.
+    a_software_plus_study_flat_config_dict : Dict[str, Any]
+        Flat configuration dictionary containing global settings.
+    Returns
+    -------
+    Dict[str, Any]
+        Updated nested hosts dictionary with global settings added to top-level host.
+    Raises
+    ------
+    ValueError
+        If there is not exactly one top-level host in the nested hosts dictionary.
+    """
+    result = deepcopy_dict(a_full_nested_hosts_dict)
+    # get the top level host(s) in full_nested_hosts_dict
+    # (should be only one because it is nested)
+    top_level_host_keys = list(a_full_nested_hosts_dict[HOST_TYPE_SPECIFIC_METADATA_KEY].keys())
+    if len(top_level_host_keys) != 1:
+        raise ValueError(f"Expected exactly one top-level key in "
+                         f"full_nested_hosts_dict but found: {top_level_host_keys}")
+    top_level_host_key = top_level_host_keys[0]
+    # check for each top-level setting from the software+study dictionary
+    # and add it under the top level host key in a_full_nested_hosts_dict
+    for curr_setting_key in GLOBAL_SETTINGS_KEYS:
+        if curr_setting_key in a_software_plus_study_flat_config_dict:
+            result[HOST_TYPE_SPECIFIC_METADATA_KEY][top_level_host_key][curr_setting_key] = \
+                a_software_plus_study_flat_config_dict[curr_setting_key]
+    return result

metameq/src/metadata_extender.py CHANGED Viewed

@@ -6,7 +6,7 @@ from pathlib import Path
 from datetime import datetime
 from typing import List, Dict, Optional, Tuple, Any
 from metameq.src.util import extract_config_dict, \
-    deepcopy_dict, validate_required_columns_exist, get_extension, \
+    validate_required_columns_exist, get_extension, \
     load_df_with_best_fit_encoding, update_metadata_df_field, \
     HOSTTYPE_SHORTHAND_KEY, SAMPLETYPE_SHORTHAND_KEY, \
     QC_NOTE_KEY, METADATA_FIELDS_KEY, HOST_TYPE_SPECIFIC_METADATA_KEY, \
@@ -301,13 +301,13 @@ def write_extended_metadata(
     # extract the extension from the raw_metadata_fp file path
     extension = os.path.splitext(raw_metadata_fp)[1]
     if extension == ".csv":
-        raw_metadata_df = load_df_with_best_fit_encoding(raw_metadata_fp, ",")
+        raw_metadata_df = load_df_with_best_fit_encoding(raw_metadata_fp, ",", str)
     elif extension == ".txt":
-        raw_metadata_df = load_df_with_best_fit_encoding(raw_metadata_fp, "\t")
+        raw_metadata_df = load_df_with_best_fit_encoding(raw_metadata_fp, "\t", str)
     elif extension == ".xlsx":
         # NB: this loads (only) the first sheet of the input excel file.
         # If needed, can expand with pandas.read_excel sheet_name parameter.
-        raw_metadata_df = pandas.read_excel(raw_metadata_fp)
+        raw_metadata_df = pandas.read_excel(raw_metadata_fp, dtype=str)
     else:
         raise ValueError("Unrecognized input file extension; "
                          "must be .csv, .txt, or .xlsx")
@@ -451,7 +451,7 @@ def extend_metadata_df(
     full_flat_config_dict = build_full_flat_config_dict(
         study_specific_config_dict, software_config_dict, stds_fp)
-    needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
+    needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
                    (SAMPLETYPE_SHORTHAND_KEY, SAMPLETYPE_COL_OPTIONS_KEY)]
     for curr_key, curr_options_key in needed_cols:
         if curr_key not in raw_metadata_df.columns:
@@ -485,7 +485,7 @@ def _get_specified_column_name(
         The metadata DataFrame to check.
     config_dict : Dict[str, Any], default=None
         Configuration dictionary. If provided, may contain a list of possible
-        column names under the key specified by col_options_key.
+        column names under the key specified by col_options_key.
         If None, defaults to values from the main config.yml file.
     Returns
     -------
@@ -503,7 +503,8 @@ def _get_specified_column_name(
                 found_name = col_name
                 break
-    return found_name
+    return found_name
 def write_metadata_results(
         metadata_df: pandas.DataFrame,
@@ -738,12 +739,6 @@ def _generate_metadata_for_host_types(
             - The processed DataFrame with specific metadata added to each sample of each host type
             - A list of validation messages
     """
-    # gather global settings
-    settings_dict = {DEFAULT_KEY: full_flat_config_dict.get(DEFAULT_KEY),
-                     LEAVE_REQUIREDS_BLANK_KEY:
-                         full_flat_config_dict.get(LEAVE_REQUIREDS_BLANK_KEY),
-                     OVERWRITE_NON_NANS_KEY:
-                         full_flat_config_dict.get(OVERWRITE_NON_NANS_KEY)}
     validation_msgs = []
     host_type_dfs = []
@@ -751,7 +746,7 @@ def _generate_metadata_for_host_types(
     host_type_shorthands = pandas.unique(metadata_df[HOSTTYPE_SHORTHAND_KEY])
     for curr_host_type_shorthand in host_type_shorthands:
         concatted_dfs, curr_validation_msgs = _generate_metadata_for_a_host_type(
-                metadata_df, curr_host_type_shorthand, settings_dict, full_flat_config_dict)
+                metadata_df, curr_host_type_shorthand, full_flat_config_dict)
         host_type_dfs.append(concatted_dfs)
         validation_msgs.extend(curr_validation_msgs)
@@ -767,7 +762,7 @@ def _generate_metadata_for_host_types(
     # NB: passing in the same dict twice here is not a mistake, just a
     # convenience since we don't have a more specific dict at this point.
     output_df = _fill_na_if_default(
-        output_df, settings_dict, settings_dict)
+        output_df, full_flat_config_dict)
     # TODO: this is setting a value in the output; should it be centralized
     #  so it is easy to find?
@@ -779,7 +774,6 @@ def _generate_metadata_for_host_types(
 def _generate_metadata_for_a_host_type(
         metadata_df: pandas.DataFrame,
         a_host_type: str,
-        settings_dict: Dict[str, Any],
         full_flat_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
     """Generate metadata df for samples with a specific host type.
@@ -790,8 +784,6 @@ def _generate_metadata_for_a_host_type(
         the columns in REQUIRED_RAW_METADATA_FIELDS.
     a_host_type : str
         The specific host type for which to process samples.
-    settings_dict : Dict[str, Any]
-        Dictionary containing global settings for default/nan/etc.
     full_flat_config_dict : Dict[str, Any]
         Fully combined flat-host-type config dictionary.
@@ -814,16 +806,11 @@ def _generate_metadata_for_a_host_type(
         # for these samples but do not error out; move on to the next host type
         update_metadata_df_field(
             host_type_df, QC_NOTE_KEY, "invalid host_type")
-        # host_type_df[QC_NOTE_KEY] = "invalid host_type"
         concatted_df = host_type_df
     else:
         # gather host-type-specific settings and overwrite the global settings with them, if any
         a_host_type_config_dict = \
             full_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY][a_host_type]
-        global_plus_host_settings_dict = deepcopy_dict(settings_dict)
-        # if this host type has a default value for empty fields, use it; otherwise, use the global default
-        global_plus_host_settings_dict[DEFAULT_KEY] = a_host_type_config_dict.get(
-            DEFAULT_KEY, global_plus_host_settings_dict[DEFAULT_KEY])
         dfs_to_concat = []
         # loop through each sample type in the metadata for this host type
@@ -833,8 +820,7 @@ def _generate_metadata_for_a_host_type(
             # generate the specific metadata for this sample type *in this host type*
             curr_sample_type_df, curr_validation_msgs = \
                 _generate_metadata_for_a_sample_type_in_a_host_type(
-                    host_type_df, curr_sample_type, global_plus_host_settings_dict,
-                    a_host_type_config_dict)
+                    host_type_df, curr_sample_type, a_host_type_config_dict)
             dfs_to_concat.append(curr_sample_type_df)
             validation_msgs.extend(curr_validation_msgs)
@@ -851,7 +837,6 @@ def _generate_metadata_for_a_host_type(
 def _generate_metadata_for_a_sample_type_in_a_host_type(
         host_type_metadata_df: pandas.DataFrame,
         a_sample_type: str,
-        global_plus_host_settings_dict: Dict[str, Any],
         a_host_type_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
     """Generate metadata df for samples with a specific sample type within a specific host type.
@@ -861,8 +846,6 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
         DataFrame containing metadata samples for a specific host type.
     a_sample_type : str
         The sample type to process.
-    global_plus_host_settings_dict : Dict[str, Any]
-        Dictionary containing default/nan/etc settings for current context.
     a_host_type_config_dict : Dict[str, Any]
         Dictionary containing config for this host type.
@@ -901,19 +884,19 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
         sample_type_df = _update_metadata_from_dict(
             sample_type_df, full_sample_type_metadata_fields_dict,
             dict_is_metadata_fields=True,
-            overwrite_non_nans=global_plus_host_settings_dict[OVERWRITE_NON_NANS_KEY])
+            overwrite_non_nans=a_host_type_config_dict[OVERWRITE_NON_NANS_KEY])
         # for fields that are required but not yet filled, replace the placeholder with
         # either an indicator that it should be blank or else
         # fill with NA (replaced with default just below), based on config setting
-        leave_reqs_blank = global_plus_host_settings_dict[LEAVE_REQUIREDS_BLANK_KEY]
+        leave_reqs_blank = a_host_type_config_dict[LEAVE_REQUIREDS_BLANK_KEY]
         reqs_val = LEAVE_BLANK_VAL if leave_reqs_blank else np.nan
         sample_type_df.replace(
             to_replace=REQ_PLACEHOLDER, value=reqs_val, inplace=True)
         # fill NAs with appropriate default value if any is set
         sample_type_df = _fill_na_if_default(
-            sample_type_df, full_sample_type_metadata_fields_dict, global_plus_host_settings_dict)
+            sample_type_df, a_host_type_config_dict)
         # validate the metadata df based on the specific requirements
         # for this host+sample type
@@ -1095,7 +1078,6 @@ def _update_metadata_from_metadata_fields_dict(
 # fill NAs with default value if any is set
 def _fill_na_if_default(
         metadata_df: pandas.DataFrame,
-        specific_dict: Dict[str, Any],
         settings_dict: Dict[str, Any]) -> pandas.DataFrame:
     """Fill NaN values in metadata df with default values if available.
@@ -1103,24 +1085,20 @@ def _fill_na_if_default(
     ----------
     metadata_df : pandas.DataFrame
         The metadata DataFrame to process.
-    specific_dict : Dict[str, Any]
-        Dictionary containing context-specific settings. Will be used first as a source of default values.
     settings_dict : Dict[str, Any]
-        Dictionary containing global settings. Will be used as a
-          source of default values if specific_dict does not contain a DEFAULT_KEY.
+        Dictionary containing settings.
     Returns
     -------
     pandas.DataFrame
         The updated DataFrame with NaN values filled. Unchanged if no default values are set.
     """
-    default_val = specific_dict.get(DEFAULT_KEY, settings_dict[DEFAULT_KEY])
+    default_val = settings_dict.get(DEFAULT_KEY)
     if default_val:
         # TODO: this is setting a value in the output; should it be
         #  centralized so it is easy to find?
         metadata_df = \
             metadata_df.fillna(default_val)
-#             metadata_df.astype("string").fillna(default_val)
     return metadata_df

metameq/src/util.py CHANGED Viewed

@@ -51,6 +51,13 @@ REQUIRED_RAW_METADATA_FIELDS = [SAMPLE_NAME_KEY,
                                 SAMPLETYPE_SHORTHAND_KEY]
+GLOBAL_SETTINGS_KEYS = [
+    DEFAULT_KEY,
+    LEAVE_REQUIREDS_BLANK_KEY,
+    OVERWRITE_NON_NANS_KEY
+]
 def extract_config_dict(
         config_fp: Union[str, None]) -> dict:
     """Extract configuration dictionary from a YAML file.

metameq/tests/test_metadata_configurator.py CHANGED Viewed

@@ -17,7 +17,8 @@ from metameq.src.metadata_configurator import \
     _id_sample_type_definition, \
     update_wip_metadata_dict, \
     build_full_flat_config_dict, \
-    _resolve_sample_type_aliases_and_bases
+    _resolve_sample_type_aliases_and_bases, \
+    _push_global_settings_into_top_host
 class TestMetadataConfigurator(TestCase):
@@ -3847,6 +3848,9 @@ class TestMetadataConfigurator(TestCase):
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 # base: top level in test_standards.yml, no default
                 "base": {
+                    DEFAULT_KEY: "software_default",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         # sample_name defined at base level
                         "sample_name": {
@@ -3865,6 +3869,8 @@ class TestMetadataConfigurator(TestCase):
                 "host_associated": {
                     # default defined at host_associated level
                     DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         # description defined at host_associated level
                         "description": {
@@ -3919,6 +3925,8 @@ class TestMetadataConfigurator(TestCase):
                 "human": {
                     # default inherited from host_associated
                     DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         # custom_field added from study_specific_metadata
                         "custom_field": {
@@ -4037,6 +4045,8 @@ class TestMetadataConfigurator(TestCase):
                 "mouse": {
                     # default inherited from host_associated
                     DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         # description inherited from host_associated (not overridden)
                         "description": {
@@ -4103,6 +4113,7 @@ class TestMetadataConfigurator(TestCase):
                 }
             }
         }
         self.assertEqual(expected, result)
     def test_build_full_flat_config_dict_without_study_config(self):
@@ -4130,6 +4141,9 @@ class TestMetadataConfigurator(TestCase):
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 # base: top level, no default, just sample_name/sample_type
                 "base": {
+                    DEFAULT_KEY: "software_default",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "sample_name": {
                             REQUIRED_KEY: True,
@@ -4145,6 +4159,8 @@ class TestMetadataConfigurator(TestCase):
                 # host_associated: inherits from base, adds default and description
                 "host_associated": {
                     DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "description": {
                             DEFAULT_KEY: "host associated sample",
@@ -4194,6 +4210,8 @@ class TestMetadataConfigurator(TestCase):
                 # human: inherits from host_associated, overrides description
                 "human": {
                     DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "description": {
                             DEFAULT_KEY: "human sample",
@@ -4291,6 +4309,8 @@ class TestMetadataConfigurator(TestCase):
                 # mouse: inherits from host_associated, keeps parent description
                 "mouse": {
                     DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "description": {
                             DEFAULT_KEY: "host associated sample",
@@ -4395,6 +4415,12 @@ class TestMetadataConfigurator(TestCase):
             # Flattened host types
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "base": {
+                    # default from study_config overrides software_config
+                    DEFAULT_KEY: "study_default",
+                    # leave_requireds_blank from study_config overrides software_config
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    # overwrite_non_nans from software_config (not overridden by study)
+                    OVERWRITE_NON_NANS_KEY: True,
                     METADATA_FIELDS_KEY: {
                         "sample_name": {
                             REQUIRED_KEY: True,
@@ -4409,6 +4435,10 @@ class TestMetadataConfigurator(TestCase):
                 },
                 "host_associated": {
                     DEFAULT_KEY: "not provided",
+                    # leave_requireds_blank from study_config overrides software_config
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    # overwrite_non_nans from software_config (not overridden by study)
+                    OVERWRITE_NON_NANS_KEY: True,
                     METADATA_FIELDS_KEY: {
                         "description": {
                             DEFAULT_KEY: "host associated sample",
@@ -4457,6 +4487,10 @@ class TestMetadataConfigurator(TestCase):
                 },
                 "human": {
                     DEFAULT_KEY: "not provided",
+                    # leave_requireds_blank from study_config overrides software_config
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    # overwrite_non_nans from software_config (not overridden by study)
+                    OVERWRITE_NON_NANS_KEY: True,
                     METADATA_FIELDS_KEY: {
                         "description": {
                             DEFAULT_KEY: "human sample",
@@ -4553,6 +4587,10 @@ class TestMetadataConfigurator(TestCase):
                 },
                 "mouse": {
                     DEFAULT_KEY: "not provided",
+                    # leave_requireds_blank from study_config overrides software_config
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    # overwrite_non_nans from software_config (not overridden by study)
+                    OVERWRITE_NON_NANS_KEY: True,
                     METADATA_FIELDS_KEY: {
                         "description": {
                             DEFAULT_KEY: "host associated sample",
@@ -4649,6 +4687,9 @@ class TestMetadataConfigurator(TestCase):
             # Flattened host types
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "base": {
+                    DEFAULT_KEY: "not applicable",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "sample_name": {
                             REQUIRED_KEY: True,
@@ -4663,6 +4704,8 @@ class TestMetadataConfigurator(TestCase):
                 },
                 "host_associated": {
                     DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "description": {
                             DEFAULT_KEY: "host associated sample",
@@ -4711,6 +4754,8 @@ class TestMetadataConfigurator(TestCase):
                 },
                 "human": {
                     DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "description": {
                             DEFAULT_KEY: "human sample",
@@ -4807,6 +4852,8 @@ class TestMetadataConfigurator(TestCase):
                 },
                 "mouse": {
                     DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "description": {
                             DEFAULT_KEY: "host associated sample",
@@ -4867,4 +4914,140 @@ class TestMetadataConfigurator(TestCase):
                 }
             }
         }
+        self.assertEqual(expected, result)
+    # Tests for _push_global_settings_into_top_host
+    def test__push_global_settings_into_top_host_single_setting(self):
+        """Test pushing a single global setting into the top-level host."""
+        nested_hosts_dict = {
+            HOST_TYPE_SPECIFIC_METADATA_KEY: {
+                "base": {
+                    METADATA_FIELDS_KEY: {
+                        "field1": {TYPE_KEY: "string"}
+                    }
+                }
+            }
+        }
+        flat_config_dict = {
+            DEFAULT_KEY: "custom_default"
+        }
+        expected = {
+            HOST_TYPE_SPECIFIC_METADATA_KEY: {
+                "base": {
+                    DEFAULT_KEY: "custom_default",
+                    METADATA_FIELDS_KEY: {
+                        "field1": {TYPE_KEY: "string"}
+                    }
+                }
+            }
+        }
+        result = _push_global_settings_into_top_host(
+            nested_hosts_dict, flat_config_dict)
+        self.assertEqual(expected, result)
+        # Original should be unchanged
+        self.assertNotIn(
+            DEFAULT_KEY,
+            nested_hosts_dict[HOST_TYPE_SPECIFIC_METADATA_KEY]["base"])
+    def test__push_global_settings_into_top_host_multiple_settings(self):
+        """Test pushing multiple global settings into the top-level host."""
+        nested_hosts_dict = {
+            HOST_TYPE_SPECIFIC_METADATA_KEY: {
+                "base": {
+                    METADATA_FIELDS_KEY: {
+                        "field1": {TYPE_KEY: "string"}
+                    }
+                }
+            }
+        }
+        flat_config_dict = {
+            DEFAULT_KEY: "custom_default",
+            LEAVE_REQUIREDS_BLANK_KEY: True,
+            OVERWRITE_NON_NANS_KEY: True
+        }
+        expected = {
+            HOST_TYPE_SPECIFIC_METADATA_KEY: {
+                "base": {
+                    DEFAULT_KEY: "custom_default",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,
+                    OVERWRITE_NON_NANS_KEY: True,
+                    METADATA_FIELDS_KEY: {
+                        "field1": {TYPE_KEY: "string"}
+                    }
+                }
+            }
+        }
+        result = _push_global_settings_into_top_host(
+            nested_hosts_dict, flat_config_dict)
         self.assertEqual(expected, result)
+    def test__push_global_settings_into_top_host_no_settings(self):
+        """Test that function returns copy when no global settings present."""
+        nested_hosts_dict = {
+            HOST_TYPE_SPECIFIC_METADATA_KEY: {
+                "base": {
+                    METADATA_FIELDS_KEY: {
+                        "field1": {TYPE_KEY: "string"}
+                    }
+                }
+            }
+        }
+        flat_config_dict = {
+            "some_other_key": "value"
+        }
+        expected = {
+            HOST_TYPE_SPECIFIC_METADATA_KEY: {
+                "base": {
+                    METADATA_FIELDS_KEY: {
+                        "field1": {TYPE_KEY: "string"}
+                    }
+                }
+            }
+        }
+        result = _push_global_settings_into_top_host(
+            nested_hosts_dict, flat_config_dict)
+        self.assertEqual(expected, result)
+    def test__push_global_settings_into_top_host_raises_on_zero_hosts(self):
+        """Test that ValueError is raised when no top-level hosts exist."""
+        nested_hosts_dict = {
+            HOST_TYPE_SPECIFIC_METADATA_KEY: {}
+        }
+        flat_config_dict = {
+            DEFAULT_KEY: "custom_default"
+        }
+        with self.assertRaisesRegex(
+                ValueError,
+                r"Expected exactly one top-level key.*found: \[\]"):
+            _push_global_settings_into_top_host(
+                nested_hosts_dict, flat_config_dict)
+    def test__push_global_settings_into_top_host_raises_on_multiple_hosts(self):
+        """Test that ValueError is raised when multiple top-level hosts exist."""
+        nested_hosts_dict = {
+            HOST_TYPE_SPECIFIC_METADATA_KEY: {
+                "host1": {METADATA_FIELDS_KEY: {}},
+                "host2": {METADATA_FIELDS_KEY: {}}
+            }
+        }
+        flat_config_dict = {
+            DEFAULT_KEY: "custom_default"
+        }
+        with self.assertRaisesRegex(
+                ValueError,
+                r"Expected exactly one top-level key"):
+            _push_global_settings_into_top_host(
+                nested_hosts_dict, flat_config_dict)

metameq 2026.2.1__py3-none-any.whl → 2026.2.3__py3-none-any.whl

metameq 2026.2.1py3-none-any.whl → 2026.2.3py3-none-any.whl