PyPI - metameq - Versions diffs - 2026.1.2__tar.gz → 2026.2.1__tar.gz - Mend

metameq 2026.1.2tar.gz → 2026.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{metameq-2026.1.2/metameq.egg-info → metameq-2026.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metameq
-Version: 2026.1.2
+Version: 2026.2.1
 Summary: Qiita-compliant metadata generation and validation tool
 Home-page: https://github.com/AmandaBirmingham/metameq
 Author: Amanda Birmingham

{metameq-2026.1.2 → metameq-2026.2.1}/metameq/_version.py RENAMED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2026-01-31T12:28:01-0800",
+ "date": "2026-02-01T21:32:06-0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "889941fbd7d28889867e3f4b6edba2d50dbc5956",
- "version": "2026.01.2"
+ "full-revisionid": "87171fd73f555e2c03a15fa36ed9b5a912b824e9",
+ "version": "2026.02.1"
 }
 '''  # END VERSION_JSON

metameq-2026.2.1/metameq/config/config.yml ADDED Viewed

@@ -0,0 +1,7 @@
+"default": "not applicable"
+"leave_requireds_blank": false
+"overwrite_non_nans": false
+"hosttype_column_options":
+  - "host_common_name"
+"sampletype_column_options":
+  - "sample_type"

{metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/metadata_extender.py RENAMED Viewed

@@ -15,7 +15,8 @@ from metameq.src.util import extract_config_dict, \
     LEAVE_BLANK_VAL, SAMPLE_NAME_KEY, \
     ALLOWED_KEY, TYPE_KEY, LEAVE_REQUIREDS_BLANK_KEY, OVERWRITE_NON_NANS_KEY, \
     METADATA_TRANSFORMERS_KEY, PRE_TRANSFORMERS_KEY, POST_TRANSFORMERS_KEY, \
-    SOURCES_KEY, FUNCTION_KEY, REQUIRED_RAW_METADATA_FIELDS
+    SOURCES_KEY, FUNCTION_KEY, REQUIRED_RAW_METADATA_FIELDS, \
+    HOSTTYPE_COL_OPTIONS_KEY, SAMPLETYPE_COL_OPTIONS_KEY
 from metameq.src.metadata_configurator import update_wip_metadata_dict, \
     build_full_flat_config_dict
 from metameq.src.metadata_validator import validate_metadata_df, \
@@ -447,13 +448,22 @@ def extend_metadata_df(
     ValueError
         If required columns are missing from the metadata.
     """
+    full_flat_config_dict = build_full_flat_config_dict(
+        study_specific_config_dict, software_config_dict, stds_fp)
+    needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
+                   (SAMPLETYPE_SHORTHAND_KEY, SAMPLETYPE_COL_OPTIONS_KEY)]
+    for curr_key, curr_options_key in needed_cols:
+        if curr_key not in raw_metadata_df.columns:
+            specified_name = _get_specified_column_name(
+                curr_options_key, raw_metadata_df, full_flat_config_dict)
+            if specified_name:
+                raw_metadata_df[curr_key] = raw_metadata_df[specified_name]
     validate_required_columns_exist(
         raw_metadata_df, REQUIRED_RAW_METADATA_FIELDS,
         "metadata missing required columns")
-    full_flat_config_dict = build_full_flat_config_dict(
-        study_specific_config_dict, software_config_dict, stds_fp)
     metadata_df, validation_msgs_df = _populate_metadata_df(
         raw_metadata_df, full_flat_config_dict,
         study_specific_transformers_dict)
@@ -461,6 +471,40 @@ def extend_metadata_df(
     return metadata_df, validation_msgs_df
+def _get_specified_column_name(
+        col_options_key: str,
+        raw_metadata_df: pandas.DataFrame,
+        config_dict: Dict[str, Any] = None) -> Optional[str]:
+    """Get the specified type of column name from the metadata DataFrame based on possible options.
+    Parameters
+    ----------
+    col_options_key : str
+        Key in the config dict that holds the list of possible column names to check.
+    raw_metadata_df : pandas.DataFrame
+        The metadata DataFrame to check.
+    config_dict : Dict[str, Any], default=None
+        Configuration dictionary. If provided, may contain a list of possible
+        column names under the key specified by col_options_key.
+        If None, defaults to values from the main config.yml file.
+    Returns
+    -------
+    Optional[str]
+        The specified column name found in the DataFrame, or None if not found.
+    """
+    found_name = None
+    if not config_dict:
+        config_dict = extract_config_dict(None)
+    col_options = config_dict.get(col_options_key)
+    if col_options:
+        for col_name in col_options:
+            if col_name in raw_metadata_df.columns:
+                found_name = col_name
+                break
+    return found_name
 def write_metadata_results(
         metadata_df: pandas.DataFrame,
         validation_msgs_df: pandas.DataFrame,

{metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/util.py RENAMED Viewed

@@ -27,6 +27,8 @@ SOURCES_KEY = "sources"
 FUNCTION_KEY = "function"
 LEAVE_REQUIREDS_BLANK_KEY = "leave_requireds_blank"
 OVERWRITE_NON_NANS_KEY = "overwrite_non_nans"
+HOSTTYPE_COL_OPTIONS_KEY = "hosttype_column_options"
+SAMPLETYPE_COL_OPTIONS_KEY = "sampletype_column_options"
 # internal code keys
 HOSTTYPE_SHORTHAND_KEY = "hosttype_shorthand"

{metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/test_metadata_configurator.py RENAMED Viewed

@@ -5,7 +5,8 @@ from metameq.src.util import \
     SAMPLE_TYPE_SPECIFIC_METADATA_KEY, DEFAULT_KEY, \
     ALIAS_KEY, BASE_TYPE_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
     STUDY_SPECIFIC_METADATA_KEY, LEAVE_REQUIREDS_BLANK_KEY, \
-    OVERWRITE_NON_NANS_KEY, REQUIRED_KEY, SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
+    OVERWRITE_NON_NANS_KEY, REQUIRED_KEY, SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE, \
+    HOSTTYPE_COL_OPTIONS_KEY, SAMPLETYPE_COL_OPTIONS_KEY
 from metameq.src.metadata_configurator import \
     combine_stds_and_study_config, \
     _make_combined_stds_and_study_host_type_dicts, \
@@ -4643,6 +4644,8 @@ class TestMetadataConfigurator(TestCase):
             DEFAULT_KEY: "not applicable",
             LEAVE_REQUIREDS_BLANK_KEY: False,
             OVERWRITE_NON_NANS_KEY: False,
+            HOSTTYPE_COL_OPTIONS_KEY: ["host_common_name"],
+            SAMPLETYPE_COL_OPTIONS_KEY: ["sample_type"],
             # Flattened host types
             HOST_TYPE_SPECIFIC_METADATA_KEY: {
                 "base": {

{metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/test_metadata_extender.py RENAMED Viewed

@@ -14,7 +14,8 @@ from metameq.src.util import \
     OVERWRITE_NON_NANS_KEY, LEAVE_REQUIREDS_BLANK_KEY, LEAVE_BLANK_VAL, \
     HOST_TYPE_SPECIFIC_METADATA_KEY, METADATA_TRANSFORMERS_KEY, \
     SOURCES_KEY, FUNCTION_KEY, PRE_TRANSFORMERS_KEY, POST_TRANSFORMERS_KEY, \
-    STUDY_SPECIFIC_METADATA_KEY
+    STUDY_SPECIFIC_METADATA_KEY, HOSTTYPE_COL_OPTIONS_KEY, \
+    SAMPLETYPE_COL_OPTIONS_KEY
 from metameq.src.metadata_extender import \
     id_missing_cols, get_qc_failures, get_reserved_cols, find_standard_cols, \
     find_nonstandard_cols, write_metadata_results, \
@@ -26,7 +27,7 @@ from metameq.src.metadata_extender import \
     _generate_metadata_for_a_host_type, _generate_metadata_for_host_types, \
     _transform_metadata, _populate_metadata_df, extend_metadata_df, \
     _get_study_specific_config, _output_metadata_df_to_files, \
-    INTERNAL_COL_KEYS, REQ_PLACEHOLDER
+    _get_specified_column_name, INTERNAL_COL_KEYS, REQ_PLACEHOLDER
 class TestMetadataExtender(TestCase):
@@ -3189,6 +3190,63 @@ class TestMetadataExtender(TestCase):
         })
         assert_frame_equal(expected_df, result_df)
+    def test_extend_metadata_df_with_alternate_column_names(self):
+        """Test metadata extension with alternate hosttype and sampletype column names."""
+        # Use alternate column names instead of hosttype_shorthand and sampletype_shorthand
+        input_df = pandas.DataFrame({
+            SAMPLE_NAME_KEY: ["sample1", "sample2"],
+            "host_type": ["human", "human"],
+            "sample": ["stool", "stool"]
+        })
+        study_config = {
+            DEFAULT_KEY: "not provided",
+            LEAVE_REQUIREDS_BLANK_KEY: True,
+            OVERWRITE_NON_NANS_KEY: False,
+            STUDY_SPECIFIC_METADATA_KEY: {
+                HOST_TYPE_SPECIFIC_METADATA_KEY: {
+                    "human": {
+                        METADATA_FIELDS_KEY: {},
+                        SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
+                            "stool": {
+                                METADATA_FIELDS_KEY: {}
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        # Software config specifies alternate column names
+        software_config = {
+            DEFAULT_KEY: "not provided",
+            LEAVE_REQUIREDS_BLANK_KEY: True,
+            OVERWRITE_NON_NANS_KEY: False,
+            HOSTTYPE_COL_OPTIONS_KEY: ["host_type"],
+            SAMPLETYPE_COL_OPTIONS_KEY: ["sample"]
+        }
+        result_df, validation_msgs_df = extend_metadata_df(
+            input_df, study_config, None, software_config, self.TEST_STDS_FP)
+        expected_df = pandas.DataFrame({
+            SAMPLE_NAME_KEY: ["sample1", "sample2"],
+            "body_product": ["UBERON:feces", "UBERON:feces"],
+            "body_site": ["gut", "gut"],
+            "description": ["human sample", "human sample"],
+            "host_common_name": ["human", "human"],
+            # Alternate column names from input are preserved
+            "host_type": ["human", "human"],
+            QIITA_SAMPLE_TYPE: ["stool", "stool"],
+            # Alternate column names from input are preserved
+            "sample": ["stool", "stool"],
+            SAMPLE_TYPE_KEY: ["stool", "stool"],
+            # Standard internal columns added at end (in order of INTERNAL_COL_KEYS)
+            HOSTTYPE_SHORTHAND_KEY: ["human", "human"],
+            SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
+            QC_NOTE_KEY: ["", ""]
+        })
+        assert_frame_equal(expected_df, result_df)
+        self.assertTrue(validation_msgs_df.empty)
     # Tests for _get_study_specific_config
     def test__get_study_specific_config_with_valid_file(self):
@@ -4145,3 +4203,83 @@ class TestMetadataExtender(TestCase):
                 os.path.join(tmpdir, "*_test_output_validation_errors.csv"))
             self.assertEqual(1, len(validation_files))
             self.assertEqual(0, os.path.getsize(validation_files[0]))
+    # Tests for _get_specified_column_name
+    def test__get_specified_column_name_finds_column(self):
+        """Test that _get_specified_column_name finds a column that exists."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "host_type": ["human"]
+        })
+        config_dict = {
+            HOSTTYPE_COL_OPTIONS_KEY: ["host_type", "host_common_name"]
+        }
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertEqual("host_type", result)
+    def test__get_specified_column_name_returns_first_match(self):
+        """Test that _get_specified_column_name returns the first match when multiple options exist."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "host_type": ["human"],
+            "host_common_name": ["human"]
+        })
+        config_dict = {
+            HOSTTYPE_COL_OPTIONS_KEY: ["host_type", "host_common_name"]
+        }
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertEqual("host_type", result)
+    def test__get_specified_column_name_returns_none_when_no_match(self):
+        """Test that _get_specified_column_name returns None when no options match."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "other_column": ["value"]
+        })
+        config_dict = {
+            HOSTTYPE_COL_OPTIONS_KEY: ["host_type", "host_common_name"]
+        }
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertIsNone(result)
+    def test__get_specified_column_name_returns_none_when_key_missing(self):
+        """Test that _get_specified_column_name returns None when col_options_key is not in config."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "host_type": ["human"]
+        })
+        config_dict = {}
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertIsNone(result)
+    def test__get_specified_column_name_returns_none_when_options_empty(self):
+        """Test that _get_specified_column_name returns None when col_options is empty list."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "host_type": ["human"]
+        })
+        config_dict = {
+            HOSTTYPE_COL_OPTIONS_KEY: []
+        }
+        result = _get_specified_column_name(
+            HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertIsNone(result)
+    def test__get_specified_column_name_with_sampletype_key(self):
+        """Test that _get_specified_column_name works with sampletype column options."""
+        input_df = pandas.DataFrame({
+            "sample_name": ["s1"],
+            "sample_type": ["stool"]
+        })
+        config_dict = {
+            SAMPLETYPE_COL_OPTIONS_KEY: ["sample_type", "sampletype"]
+        }
+        result = _get_specified_column_name(
+            SAMPLETYPE_COL_OPTIONS_KEY, input_df, config_dict)
+        self.assertEqual("sample_type", result)
+    # endregion _get_specified_column_name tests

{metameq-2026.1.2 → metameq-2026.2.1/metameq.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metameq
-Version: 2026.1.2
+Version: 2026.2.1
 Summary: Qiita-compliant metadata generation and validation tool
 Home-page: https://github.com/AmandaBirmingham/metameq
 Author: Amanda Birmingham