PyPI - metameq - Versions diffs - 2026.1.2__py3-none-any.whl → 2026.2.2__py3-none-any.whl - Mend

metameq 2026.1.2py3-none-any.whl → 2026.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

metameq/_version.py +3 -3
metameq/config/config.yml +5 -1
metameq/src/metadata_configurator.py +53 -6
metameq/src/metadata_extender.py +58 -36
metameq/src/util.py +9 -0
metameq/tests/test_metadata_configurator.py +188 -2
metameq/tests/test_metadata_extender.py +246 -93
{metameq-2026.1.2.dist-info → metameq-2026.2.2.dist-info}/METADATA +1 -1
{metameq-2026.1.2.dist-info → metameq-2026.2.2.dist-info}/RECORD +12 -12
{metameq-2026.1.2.dist-info → metameq-2026.2.2.dist-info}/WHEEL +0 -0
{metameq-2026.1.2.dist-info → metameq-2026.2.2.dist-info}/entry_points.txt +0 -0
{metameq-2026.1.2.dist-info → metameq-2026.2.2.dist-info}/top_level.txt +0 -0

metameq/tests/test_metadata_extender.py CHANGED Viewed

@@ -14,7 +14,8 @@ from metameq.src.util import \
     OVERWRITE_NON_NANS_KEY, LEAVE_REQUIREDS_BLANK_KEY, LEAVE_BLANK_VAL, \
     HOST_TYPE_SPECIFIC_METADATA_KEY, METADATA_TRANSFORMERS_KEY, \
     SOURCES_KEY, FUNCTION_KEY, PRE_TRANSFORMERS_KEY, POST_TRANSFORMERS_KEY, \
-    STUDY_SPECIFIC_METADATA_KEY
+    STUDY_SPECIFIC_METADATA_KEY, HOSTTYPE_COL_OPTIONS_KEY, \
+    SAMPLETYPE_COL_OPTIONS_KEY
 from metameq.src.metadata_extender import \
     id_missing_cols, get_qc_failures, get_reserved_cols, find_standard_cols, \
     find_nonstandard_cols, write_metadata_results, \
@@ -26,7 +27,7 @@ from metameq.src.metadata_extender import \
     _generate_metadata_for_a_host_type, _generate_metadata_for_host_types, \
     _transform_metadata, _populate_metadata_df, extend_metadata_df, \
     _get_study_specific_config, _output_metadata_df_to_files, \
-    INTERNAL_COL_KEYS, REQ_PLACEHOLDER
+    _get_specified_column_name, INTERNAL_COL_KEYS, REQ_PLACEHOLDER
 class TestMetadataExtender(TestCase):
@@ -747,16 +748,15 @@ class TestMetadataExtender(TestCase):
     # Tests for _fill_na_if_default
-    def test__fill_na_if_default_specific_overrides_settings(self):
+    def test__fill_na_if_default_has_default_in_settings(self):
         """Test that specific_dict default takes precedence over settings_dict."""
         input_df = pandas.DataFrame({
             "field1": ["value1", np.nan, "value3"],
             "field2": [np.nan, "value2", np.nan]
         })
-        specific_dict = {DEFAULT_KEY: "filled"}
-        settings_dict = {DEFAULT_KEY: "unused"}
+        settings_dict = {DEFAULT_KEY: "filled"}
-        result = _fill_na_if_default(input_df, specific_dict, settings_dict)
+        result = _fill_na_if_default(input_df, settings_dict)
         expected = pandas.DataFrame({
             "field1": ["value1", "filled", "value3"],
@@ -764,18 +764,19 @@ class TestMetadataExtender(TestCase):
         })
         assert_frame_equal(expected, result)
-    def test__fill_na_if_default_uses_settings_when_specific_missing(self):
-        """Test that settings_dict default is used when specific_dict has no default."""
+    def test__fill_na_if_default_no_default_in_settings(self):
+        """Test that NaN values are unchanged when no default is in settings."""
         input_df = pandas.DataFrame({
-            "field1": [np.nan]
+            "field1": ["value1", np.nan, "value3"],
+            "field2": [np.nan, "value2", np.nan]
         })
-        specific_dict = {}
-        settings_dict = {DEFAULT_KEY: "settings_default"}
+        settings_dict = {}
-        result = _fill_na_if_default(input_df, specific_dict, settings_dict)
+        result = _fill_na_if_default(input_df, settings_dict)
         expected = pandas.DataFrame({
-            "field1": ["settings_default"]
+            "field1": ["value1", np.nan, "value3"],
+            "field2": [np.nan, "value2", np.nan]
         })
         assert_frame_equal(expected, result)
@@ -1272,14 +1273,13 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
             QC_NOTE_KEY: ["", ""]
         })
-        global_plus_host_settings_dict = {
-            OVERWRITE_NON_NANS_KEY: False,
-            LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "not provided"
-        }
         # Config is pre-resolved: sample type's metadata_fields already includes
         # host fields merged in, plus sample_type and qiita_sample_type
         host_type_config_dict = {
+            OVERWRITE_NON_NANS_KEY: False,
+            LEAVE_REQUIREDS_BLANK_KEY: False,
+            DEFAULT_KEY: "not provided",
             METADATA_FIELDS_KEY: {
                 "host_field": {
                     DEFAULT_KEY: "host_default",
@@ -1313,7 +1313,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
-            input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
+            input_df, "stool", host_type_config_dict)
         expected_df = pandas.DataFrame({
             SAMPLE_NAME_KEY: ["sample1", "sample2"],
@@ -1336,12 +1336,11 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["unknown_type"],
             QC_NOTE_KEY: [""]
         })
-        global_plus_host_settings_dict = {
+        host_type_config_dict = {
             OVERWRITE_NON_NANS_KEY: False,
             LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "not provided"
-        }
-        host_type_config_dict = {
+            DEFAULT_KEY: "not provided",
             METADATA_FIELDS_KEY: {},
             SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                 "stool": {
@@ -1351,7 +1350,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
-            input_df, "unknown_type", global_plus_host_settings_dict, host_type_config_dict)
+            input_df, "unknown_type", host_type_config_dict)
         expected_df = pandas.DataFrame({
             SAMPLE_NAME_KEY: ["sample1"],
@@ -1370,12 +1369,11 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["stool", "blood", "stool"],
             QC_NOTE_KEY: ["", "", ""]
         })
-        global_plus_host_settings_dict = {
+        host_type_config_dict = {
             OVERWRITE_NON_NANS_KEY: False,
             LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "not provided"
-        }
-        host_type_config_dict = {
+            DEFAULT_KEY: "not provided",
             METADATA_FIELDS_KEY: {},
             SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                 "stool": {
@@ -1393,7 +1391,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
-            input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
+            input_df, "stool", host_type_config_dict)
         # Should only have the two stool samples
         self.assertEqual(2, len(result_df))
@@ -1408,12 +1406,11 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["stool"],
             QC_NOTE_KEY: [""]
         })
-        global_plus_host_settings_dict = {
+        host_type_config_dict = {
             OVERWRITE_NON_NANS_KEY: False,
             LEAVE_REQUIREDS_BLANK_KEY: True,
-            DEFAULT_KEY: "not provided"
-        }
-        host_type_config_dict = {
+            DEFAULT_KEY: "not provided",
             METADATA_FIELDS_KEY: {},
             SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                 "stool": {
@@ -1428,7 +1425,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
-            input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
+            input_df, "stool", host_type_config_dict)
         self.assertEqual(LEAVE_BLANK_VAL, result_df["required_field"].iloc[0])
@@ -1440,12 +1437,11 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["stool"],
             QC_NOTE_KEY: [""]
         })
-        global_plus_host_settings_dict = {
+        host_type_config_dict = {
             OVERWRITE_NON_NANS_KEY: False,
             LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "global_default"
-        }
-        host_type_config_dict = {
+            DEFAULT_KEY: "global_default",
             METADATA_FIELDS_KEY: {},
             SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                 "stool": {
@@ -1460,7 +1456,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
-            input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
+            input_df, "stool", host_type_config_dict)
         # When leave_requireds_blank is False, NaN values get filled with global default
         self.assertEqual("global_default", result_df["required_field"].iloc[0])
@@ -1474,12 +1470,11 @@ class TestMetadataExtender(TestCase):
             QC_NOTE_KEY: [""],
             "existing_field": ["original_value"]
         })
-        global_plus_host_settings_dict = {
+        host_type_config_dict = {
             OVERWRITE_NON_NANS_KEY: True,
             LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "not provided"
-        }
-        host_type_config_dict = {
+            DEFAULT_KEY: "not provided",
             METADATA_FIELDS_KEY: {},
             SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                 "stool": {
@@ -1494,7 +1489,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
-            input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
+            input_df, "stool", host_type_config_dict)
         self.assertEqual("new_value", result_df["existing_field"].iloc[0])
@@ -1507,12 +1502,11 @@ class TestMetadataExtender(TestCase):
             QC_NOTE_KEY: [""],
             "existing_field": ["original_value"]
         })
-        global_plus_host_settings_dict = {
+        host_type_config_dict = {
             OVERWRITE_NON_NANS_KEY: False,
             LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "not provided"
-        }
-        host_type_config_dict = {
+            DEFAULT_KEY: "not provided",
             METADATA_FIELDS_KEY: {},
             SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                 "stool": {
@@ -1527,7 +1521,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
-            input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
+            input_df, "stool", host_type_config_dict)
         self.assertEqual("original_value", result_df["existing_field"].iloc[0])
@@ -1539,14 +1533,13 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["feces"],
             QC_NOTE_KEY: [""]
         })
-        global_plus_host_settings_dict = {
-            OVERWRITE_NON_NANS_KEY: False,
-            LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "not provided"
-        }
         # Config is pre-resolved: alias "feces" has its own metadata_fields
         # that is a copy of "stool"'s resolved fields with sample_type="stool"
         host_type_config_dict = {
+            OVERWRITE_NON_NANS_KEY: False,
+            LEAVE_REQUIREDS_BLANK_KEY: False,
+            DEFAULT_KEY: "not provided",
             METADATA_FIELDS_KEY: {},
             SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                 "feces": {
@@ -1589,7 +1582,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
-            input_df, "feces", global_plus_host_settings_dict, host_type_config_dict)
+            input_df, "feces", host_type_config_dict)
         self.assertEqual("stool_value", result_df["stool_field"].iloc[0])
         # sample_type should be set to the resolved type "stool"
@@ -1605,17 +1598,15 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
             QC_NOTE_KEY: ["", ""]
         })
-        settings_dict = {
-            OVERWRITE_NON_NANS_KEY: False,
-            LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "global_default"
-        }
         # Config is pre-resolved: sample type's metadata_fields includes
         # host fields merged in, plus sample_type and qiita_sample_type
         full_flat_config_dict = {
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
                     DEFAULT_KEY: "human_default",
+                    OVERWRITE_NON_NANS_KEY: False,
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "host_field": {
                             DEFAULT_KEY: "host_value",
@@ -1651,7 +1642,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_host_type(
-            input_df, "human", settings_dict, full_flat_config_dict)
+            input_df, "human", full_flat_config_dict)
         expected_df = pandas.DataFrame({
             SAMPLE_NAME_KEY: ["sample1", "sample2"],
@@ -1674,14 +1665,13 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["stool"],
             QC_NOTE_KEY: [""]
         })
-        settings_dict = {
-            OVERWRITE_NON_NANS_KEY: False,
-            LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "global_default"
-        }
         full_flat_config_dict = {
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    OVERWRITE_NON_NANS_KEY: False,
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    DEFAULT_KEY: "global_default",
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
                 }
@@ -1689,7 +1679,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_host_type(
-            input_df, "unknown_host", settings_dict, full_flat_config_dict)
+            input_df, "unknown_host", full_flat_config_dict)
         expected_df = pandas.DataFrame({
             SAMPLE_NAME_KEY: ["sample1"],
@@ -1708,14 +1698,13 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["unknown_sample"],
             QC_NOTE_KEY: [""]
         })
-        settings_dict = {
-            OVERWRITE_NON_NANS_KEY: False,
-            LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "global_default"
-        }
         full_flat_config_dict = {
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    OVERWRITE_NON_NANS_KEY: False,
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    DEFAULT_KEY: "global_default",
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                         "stool": {
@@ -1727,7 +1716,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_host_type(
-            input_df, "human", settings_dict, full_flat_config_dict)
+            input_df, "human", full_flat_config_dict)
         expected_df = pandas.DataFrame({
             SAMPLE_NAME_KEY: ["sample1"],
@@ -1746,16 +1735,15 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool", "stool"],
             QC_NOTE_KEY: ["", "", ""]
         })
-        settings_dict = {
-            OVERWRITE_NON_NANS_KEY: False,
-            LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "global_default"
-        }
         # Config is pre-resolved: sample type's metadata_fields includes
         # host fields merged in, plus sample_type and qiita_sample_type
         full_flat_config_dict = {
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    OVERWRITE_NON_NANS_KEY: False,
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    DEFAULT_KEY: "global_default",
                     METADATA_FIELDS_KEY: {
                         "human_field": {
                             DEFAULT_KEY: "human_value",
@@ -1784,6 +1772,9 @@ class TestMetadataExtender(TestCase):
                     }
                 },
                 "mouse": {
+                    OVERWRITE_NON_NANS_KEY: False,
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    DEFAULT_KEY: "global_default",
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
                 }
@@ -1791,7 +1782,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_host_type(
-            input_df, "human", settings_dict, full_flat_config_dict)
+            input_df, "human", full_flat_config_dict)
         expected_df = pandas.DataFrame({
             SAMPLE_NAME_KEY: ["sample1", "sample3"],
@@ -1812,17 +1803,15 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["stool"],
             QC_NOTE_KEY: [""]
         })
-        settings_dict = {
-            OVERWRITE_NON_NANS_KEY: False,
-            LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "global_default"
-        }
         # Config is pre-resolved: sample type's metadata_fields includes
         # host fields merged in, plus sample_type and qiita_sample_type
         full_flat_config_dict = {
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
                     DEFAULT_KEY: "human_specific_default",
+                    OVERWRITE_NON_NANS_KEY: False,
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                         "stool": {
@@ -1849,7 +1838,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_host_type(
-            input_df, "human", settings_dict, full_flat_config_dict)
+            input_df, "human", full_flat_config_dict)
         expected_df = pandas.DataFrame({
             SAMPLE_NAME_KEY: ["sample1"],
@@ -1870,17 +1859,14 @@ class TestMetadataExtender(TestCase):
             SAMPLETYPE_SHORTHAND_KEY: ["stool"],
             QC_NOTE_KEY: [""]
         })
-        settings_dict = {
-            OVERWRITE_NON_NANS_KEY: False,
-            LEAVE_REQUIREDS_BLANK_KEY: False,
-            DEFAULT_KEY: "global_default"
-        }
         # Config is pre-resolved: sample type's metadata_fields includes
         # host fields merged in, plus sample_type and qiita_sample_type
         full_flat_config_dict = {
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
-                    # No DEFAULT_KEY here
+                    OVERWRITE_NON_NANS_KEY: False,
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    DEFAULT_KEY: "global_default",
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                         "stool": {
@@ -1907,7 +1893,7 @@ class TestMetadataExtender(TestCase):
         }
         result_df, validation_msgs = _generate_metadata_for_a_host_type(
-            input_df, "human", settings_dict, full_flat_config_dict)
+            input_df, "human", full_flat_config_dict)
         expected_df = pandas.DataFrame({
             SAMPLE_NAME_KEY: ["sample1"],
@@ -1938,6 +1924,9 @@ class TestMetadataExtender(TestCase):
             OVERWRITE_NON_NANS_KEY: False,
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    DEFAULT_KEY: "global_default",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "host_field": {
                             DEFAULT_KEY: "host_value",
@@ -2004,6 +1993,9 @@ class TestMetadataExtender(TestCase):
             OVERWRITE_NON_NANS_KEY: False,
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    DEFAULT_KEY: "global_default",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "human_field": {
                             DEFAULT_KEY: "human_value",
@@ -2050,6 +2042,9 @@ class TestMetadataExtender(TestCase):
                     }
                 },
                 "mouse": {
+                    DEFAULT_KEY: "global_default",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "mouse_field": {
                             DEFAULT_KEY: "mouse_value",
@@ -2181,6 +2176,9 @@ class TestMetadataExtender(TestCase):
             OVERWRITE_NON_NANS_KEY: False,
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    DEFAULT_KEY: "global_default",
+                    LEAVE_REQUIREDS_BLANK_KEY: True,  # This causes required fields to get LEAVE_BLANK_VAL
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                         "stool": {
@@ -2505,6 +2503,9 @@ class TestMetadataExtender(TestCase):
             OVERWRITE_NON_NANS_KEY: False,
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {
                         "host_field": {
                             DEFAULT_KEY: "host_value",
@@ -2579,6 +2580,9 @@ class TestMetadataExtender(TestCase):
             },
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                         "stool": {
@@ -2638,6 +2642,9 @@ class TestMetadataExtender(TestCase):
             },
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                         "stool": {
@@ -2686,6 +2693,9 @@ class TestMetadataExtender(TestCase):
             OVERWRITE_NON_NANS_KEY: False,
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
                 }
@@ -2720,6 +2730,9 @@ class TestMetadataExtender(TestCase):
             OVERWRITE_NON_NANS_KEY: False,
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                         "stool": {
@@ -2780,6 +2793,9 @@ class TestMetadataExtender(TestCase):
             },
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "human": {
+                    DEFAULT_KEY: "not provided",
+                    LEAVE_REQUIREDS_BLANK_KEY: False,
+                    OVERWRITE_NON_NANS_KEY: False,
                     METADATA_FIELDS_KEY: {},
                     SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
                         "stool": {
@@ -3189,6 +3205,63 @@ class TestMetadataExtender(TestCase):
         })
         assert_frame_equal(expected_df, result_df)
+    def test_extend_metadata_df_with_alternate_column_names(self):
+        """Test metadata extension with alternate hosttype and sampletype column names."""
+        # Use alternate column names instead of hosttype_shorthand and sampletype_shorthand
+        input_df = pandas.DataFrame({
+            SAMPLE_NAME_KEY: ["sample1", "sample2"],
+            "host_type": ["human", "human"],
+            "sample": ["stool", "stool"]
+        })
+        study_config = {
+            DEFAULT_KEY: "not provided",
+            LEAVE_REQUIREDS_BLANK_KEY: True,
+            OVERWRITE_NON_NANS_KEY: False,
+            STUDY_SPECIFIC_METADATA_KEY: {
+                HOST_TYPE_SPECIFIC_METADATA_KEY: {
+                    "human": {
+                        METADATA_FIELDS_KEY: {},
+                        SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
+                            "stool": {
+                                METADATA_FIELDS_KEY: {}
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        # Software config specifies alternate column names
+        software_config = {
+            DEFAULT_KEY: "not provided",
+            LEAVE_REQUIREDS_BLANK_KEY: True,
+            OVERWRITE_NON_NANS_KEY: False,
+            HOSTTYPE_COL_OPTIONS_KEY: ["host_type"],
+            SAMPLETYPE_COL_OPTIONS_KEY: ["sample"]
+        }
+        result_df, validation_msgs_df = extend_metadata_df(
+            input_df, study_config, None, software_config, self.TEST_STDS_FP)
+        expected_df = pandas.DataFrame({
+            SAMPLE_NAME_KEY: ["sample1", "sample2"],
+            "body_product": ["UBERON:feces", "UBERON:feces"],
+            "body_site": ["gut", "gut"],
+            "description": ["human sample", "human sample"],
+            "host_common_name": ["human", "human"],
+            # Alternate column names from input are preserved
+            "host_type": ["human", "human"],
+            QIITA_SAMPLE_TYPE: ["stool", "stool"],
+            # Alternate column names from input are preserved
+            "sample": ["stool", "stool"],
+            SAMPLE_TYPE_KEY: ["stool", "stool"],
+            # Standard internal columns added at end (in order of INTERNAL_COL_KEYS)
+            HOSTTYPE_SHORTHAND_KEY: ["human", "human"],
+            SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
+            QC_NOTE_KEY: ["", ""]
+        })
+        assert_frame_equal(expected_df, result_df)
+        self.assertTrue(validation_msgs_df.empty)
     # Tests for _get_study_specific_config
     def test__get_study_specific_config_with_valid_file(self):
@@ -4084,6 +4157,7 @@ class TestMetadataExtender(TestCase):
         TEST_DIR, "data/test_project1_output_metadata.txt")
     TEST_PROJECT1_EXPECTED_FAILS_FP = path.join(
         TEST_DIR, "data/test_project1_output_fails.csv")
     def test_write_extended_metadata_from_df_project1_integration(self):
         """Integration test using project1 test data files."""
@@ -4095,7 +4169,6 @@ class TestMetadataExtender(TestCase):
             with open(path.join(debug_dir, f"UNMATCHED_2_{file_name}"), 'w') as debug_actual_file:
                 debug_actual_file.write(actual_content)
         # Load input metadata CSV
         input_df = pandas.read_csv(self.TEST_PROJECT1_METADATA_FP, dtype=str)
         # for the columns "plating_notes" and "notes", fill NaN with empty string
@@ -4145,3 +4218,83 @@ class TestMetadataExtender(TestCase):
                 os.path.join(tmpdir, "*_test_output_validation_errors.csv"))
             self.assertEqual(1, len(validation_files))
             self.assertEqual(0, os.path.getsize(validation_files[0]))
+    # Tests for _get_specified_column_name
+    def test__get_specified_column_name_finds_column(self):
+        """Test that _get_specified_column_name finds a column that exists."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "host_type": ["human"]
+        })
+        config_dict = {
+            HOSTTYPE_COL_OPTIONS_KEY: ["host_type", "host_common_name"]
+        }
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertEqual("host_type", result)
+    def test__get_specified_column_name_returns_first_match(self):
+        """Test that _get_specified_column_name returns the first match when multiple options exist."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "host_type": ["human"],
+            "host_common_name": ["human"]
+        })
+        config_dict = {
+            HOSTTYPE_COL_OPTIONS_KEY: ["host_type", "host_common_name"]
+        }
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertEqual("host_type", result)
+    def test__get_specified_column_name_returns_none_when_no_match(self):
+        """Test that _get_specified_column_name returns None when no options match."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "other_column": ["value"]
+        })
+        config_dict = {
+            HOSTTYPE_COL_OPTIONS_KEY: ["host_type", "host_common_name"]
+        }
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertIsNone(result)
+    def test__get_specified_column_name_returns_none_when_key_missing(self):
+        """Test that _get_specified_column_name returns None when col_options_key is not in config."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "host_type": ["human"]
+        })
+        config_dict = {}
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertIsNone(result)
+    def test__get_specified_column_name_returns_none_when_options_empty(self):
+        """Test that _get_specified_column_name returns None when col_options is empty list."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "host_type": ["human"]
+        })
+        config_dict = {
+            HOSTTYPE_COL_OPTIONS_KEY: []
+        }
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertIsNone(result)
+    def test__get_specified_column_name_with_sampletype_key(self):
+        """Test that _get_specified_column_name works with sampletype column options."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "sample_type": ["stool"]
+        })
+        config_dict = {
+            SAMPLETYPE_COL_OPTIONS_KEY: ["sample_type", "sampletype"]
+        }
+        result = _get_specified_column_name(
+            SAMPLETYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertEqual("sample_type", result)
+    # endregion _get_specified_column_name tests

metameq 2026.1.2__py3-none-any.whl → 2026.2.2__py3-none-any.whl

metameq 2026.1.2py3-none-any.whl → 2026.2.2py3-none-any.whl