PyPI - metameq - Versions diffs - 2026.1.2__tar.gz → 2026.2.2__tar.gz - Mend

metameq 2026.1.2tar.gz → 2026.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{metameq-2026.1.2/metameq.egg-info → metameq-2026.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metameq
-Version: 2026.1.2
+Version: 2026.2.2
 Summary: Qiita-compliant metadata generation and validation tool
 Home-page: https://github.com/AmandaBirmingham/metameq
 Author: Amanda Birmingham

{metameq-2026.1.2 → metameq-2026.2.2}/metameq/_version.py RENAMED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2026-01-31T12:28:01-0800",
+ "date": "2026-02-02T16:43:52-0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "889941fbd7d28889867e3f4b6edba2d50dbc5956",
- "version": "2026.01.2"
+ "full-revisionid": "4fe1396e1007820dc7a4bdb58708fff0df6b9a57",
+ "version": "2026.02.2"
 }
 '''  # END VERSION_JSON

metameq-2026.2.2/metameq/config/config.yml ADDED Viewed

@@ -0,0 +1,7 @@
+"default": "not applicable"
+"leave_requireds_blank": false
+"overwrite_non_nans": false
+"hosttype_column_options":
+  - "host_common_name"
+"sampletype_column_options":
+  - "sample_type"

{metameq-2026.1.2 → metameq-2026.2.2}/metameq/src/metadata_configurator.py RENAMED Viewed

@@ -5,7 +5,7 @@ from metameq.src.util import extract_config_dict, extract_stds_config, \
     HOST_TYPE_SPECIFIC_METADATA_KEY, \
     SAMPLE_TYPE_SPECIFIC_METADATA_KEY, ALIAS_KEY, BASE_TYPE_KEY, \
     DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
-    SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
+    SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE, GLOBAL_SETTINGS_KEYS
 def combine_stds_and_study_config(
@@ -257,11 +257,12 @@ def _combine_base_and_added_host_type(
     host_type_wip_nested_dict = \
         deepcopy_dict(host_type_base_dict)
-    # look for a default key in the add dict for this host; if
-    # it exists, add it to the wip dict (ok to overwrite existing)
-    if DEFAULT_KEY in host_type_add_dict:
-        host_type_wip_nested_dict[DEFAULT_KEY] = \
-            host_type_add_dict.get(DEFAULT_KEY)
+    # look for global settings in the add dict for this host; if
+    # any exists, add it to the wip dict (ok to overwrite existing)
+    for curr_global_setting_key in GLOBAL_SETTINGS_KEYS:
+        if curr_global_setting_key in host_type_add_dict:
+            host_type_wip_nested_dict[curr_global_setting_key] = \
+                host_type_add_dict.get(curr_global_setting_key)
     # combine add metadata fields with the wip metadata fields
     # for the current host type and assign to wip if not empty
@@ -636,6 +637,10 @@ def build_full_flat_config_dict(
         # since the software config doesn't include any host type specific info
         full_nested_hosts_dict = extract_stds_config(stds_fp)
+    full_nested_hosts_dict = _push_global_settings_into_top_host(
+            full_nested_hosts_dict,
+            software_plus_study_flat_config_dict)
     full_flat_hosts_dict = flatten_nested_stds_dict(
         full_nested_hosts_dict, None)
     software_plus_study_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY] = \
@@ -655,3 +660,45 @@ def build_full_flat_config_dict(
     full_flat_config_dict = software_plus_study_flat_config_dict
     return full_flat_config_dict
+def _push_global_settings_into_top_host(
+        a_full_nested_hosts_dict: Dict[str, Any],
+        a_software_plus_study_flat_config_dict: Dict[str, Any]) -> Dict[str, Any]:
+    """Push global settings from flat config into top-level host in nested hosts dict.
+    Parameters
+    ----------
+    a_full_nested_hosts_dict : Dict[str, Any]
+        Nested hosts dictionary to update.
+    a_software_plus_study_flat_config_dict : Dict[str, Any]
+        Flat configuration dictionary containing global settings.
+    Returns
+    -------
+    Dict[str, Any]
+        Updated nested hosts dictionary with global settings added to top-level host.
+    Raises
+    ------
+    ValueError
+        If there is not exactly one top-level host in the nested hosts dictionary.
+    """
+    result = deepcopy_dict(a_full_nested_hosts_dict)
+    # get the top level host(s) in full_nested_hosts_dict
+    # (should be only one because it is nested)
+    top_level_host_keys = list(a_full_nested_hosts_dict[HOST_TYPE_SPECIFIC_METADATA_KEY].keys())
+    if len(top_level_host_keys) != 1:
+        raise ValueError(f"Expected exactly one top-level key in "
+                         f"full_nested_hosts_dict but found: {top_level_host_keys}")
+    top_level_host_key = top_level_host_keys[0]
+    # check for each top-level setting from the software+study dictionary
+    # and add it under the top level host key in a_full_nested_hosts_dict
+    for curr_setting_key in GLOBAL_SETTINGS_KEYS:
+        if curr_setting_key in a_software_plus_study_flat_config_dict:
+            result[HOST_TYPE_SPECIFIC_METADATA_KEY][top_level_host_key][curr_setting_key] = \
+                a_software_plus_study_flat_config_dict[curr_setting_key]
+    return result

{metameq-2026.1.2 → metameq-2026.2.2}/metameq/src/metadata_extender.py RENAMED Viewed

@@ -6,7 +6,7 @@ from pathlib import Path
 from datetime import datetime
 from typing import List, Dict, Optional, Tuple, Any
 from metameq.src.util import extract_config_dict, \
-    deepcopy_dict, validate_required_columns_exist, get_extension, \
+    validate_required_columns_exist, get_extension, \
     load_df_with_best_fit_encoding, update_metadata_df_field, \
     HOSTTYPE_SHORTHAND_KEY, SAMPLETYPE_SHORTHAND_KEY, \
     QC_NOTE_KEY, METADATA_FIELDS_KEY, HOST_TYPE_SPECIFIC_METADATA_KEY, \
@@ -15,7 +15,8 @@ from metameq.src.util import extract_config_dict, \
     LEAVE_BLANK_VAL, SAMPLE_NAME_KEY, \
     ALLOWED_KEY, TYPE_KEY, LEAVE_REQUIREDS_BLANK_KEY, OVERWRITE_NON_NANS_KEY, \
     METADATA_TRANSFORMERS_KEY, PRE_TRANSFORMERS_KEY, POST_TRANSFORMERS_KEY, \
-    SOURCES_KEY, FUNCTION_KEY, REQUIRED_RAW_METADATA_FIELDS
+    SOURCES_KEY, FUNCTION_KEY, REQUIRED_RAW_METADATA_FIELDS, \
+    HOSTTYPE_COL_OPTIONS_KEY, SAMPLETYPE_COL_OPTIONS_KEY
 from metameq.src.metadata_configurator import update_wip_metadata_dict, \
     build_full_flat_config_dict
 from metameq.src.metadata_validator import validate_metadata_df, \
@@ -447,13 +448,22 @@ def extend_metadata_df(
     ValueError
         If required columns are missing from the metadata.
     """
+    full_flat_config_dict = build_full_flat_config_dict(
+        study_specific_config_dict, software_config_dict, stds_fp)
+    needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
+                   (SAMPLETYPE_SHORTHAND_KEY, SAMPLETYPE_COL_OPTIONS_KEY)]
+    for curr_key, curr_options_key in needed_cols:
+        if curr_key not in raw_metadata_df.columns:
+            specified_name = _get_specified_column_name(
+                curr_options_key, raw_metadata_df, full_flat_config_dict)
+            if specified_name:
+                raw_metadata_df[curr_key] = raw_metadata_df[specified_name]
     validate_required_columns_exist(
         raw_metadata_df, REQUIRED_RAW_METADATA_FIELDS,
         "metadata missing required columns")
-    full_flat_config_dict = build_full_flat_config_dict(
-        study_specific_config_dict, software_config_dict, stds_fp)
     metadata_df, validation_msgs_df = _populate_metadata_df(
         raw_metadata_df, full_flat_config_dict,
         study_specific_transformers_dict)
@@ -461,6 +471,41 @@ def extend_metadata_df(
     return metadata_df, validation_msgs_df
+def _get_specified_column_name(
+        col_options_key: str,
+        raw_metadata_df: pandas.DataFrame,
+        config_dict: Dict[str, Any] = None) -> Optional[str]:
+    """Get the specified type of column name from the metadata DataFrame based on possible options.
+    Parameters
+    ----------
+    col_options_key : str
+        Key in the config dict that holds the list of possible column names to check.
+    raw_metadata_df : pandas.DataFrame
+        The metadata DataFrame to check.
+    config_dict : Dict[str, Any], default=None
+        Configuration dictionary. If provided, may contain a list of possible
+        column names under the key specified by col_options_key.
+        If None, defaults to values from the main config.yml file.
+    Returns
+    -------
+    Optional[str]
+        The specified column name found in the DataFrame, or None if not found.
+    """
+    found_name = None
+    if not config_dict:
+        config_dict = extract_config_dict(None)
+    col_options = config_dict.get(col_options_key)
+    if col_options:
+        for col_name in col_options:
+            if col_name in raw_metadata_df.columns:
+                found_name = col_name
+                break
+    return found_name
 def write_metadata_results(
         metadata_df: pandas.DataFrame,
         validation_msgs_df: pandas.DataFrame,
@@ -694,12 +739,6 @@ def _generate_metadata_for_host_types(
             - The processed DataFrame with specific metadata added to each sample of each host type
             - A list of validation messages
     """
-    # gather global settings
-    settings_dict = {DEFAULT_KEY: full_flat_config_dict.get(DEFAULT_KEY),
-                     LEAVE_REQUIREDS_BLANK_KEY:
-                         full_flat_config_dict.get(LEAVE_REQUIREDS_BLANK_KEY),
-                     OVERWRITE_NON_NANS_KEY:
-                         full_flat_config_dict.get(OVERWRITE_NON_NANS_KEY)}
     validation_msgs = []
     host_type_dfs = []
@@ -707,7 +746,7 @@ def _generate_metadata_for_host_types(
     host_type_shorthands = pandas.unique(metadata_df[HOSTTYPE_SHORTHAND_KEY])
     for curr_host_type_shorthand in host_type_shorthands:
         concatted_dfs, curr_validation_msgs = _generate_metadata_for_a_host_type(
-                metadata_df, curr_host_type_shorthand, settings_dict, full_flat_config_dict)
+                metadata_df, curr_host_type_shorthand, full_flat_config_dict)
         host_type_dfs.append(concatted_dfs)
         validation_msgs.extend(curr_validation_msgs)
@@ -723,7 +762,7 @@ def _generate_metadata_for_host_types(
     # NB: passing in the same dict twice here is not a mistake, just a
     # convenience since we don't have a more specific dict at this point.
     output_df = _fill_na_if_default(
-        output_df, settings_dict, settings_dict)
+        output_df, full_flat_config_dict)
     # TODO: this is setting a value in the output; should it be centralized
     #  so it is easy to find?
@@ -735,7 +774,6 @@ def _generate_metadata_for_host_types(
 def _generate_metadata_for_a_host_type(
         metadata_df: pandas.DataFrame,
         a_host_type: str,
-        settings_dict: Dict[str, Any],
         full_flat_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
     """Generate metadata df for samples with a specific host type.
@@ -746,8 +784,6 @@ def _generate_metadata_for_a_host_type(
         the columns in REQUIRED_RAW_METADATA_FIELDS.
     a_host_type : str
         The specific host type for which to process samples.
-    settings_dict : Dict[str, Any]
-        Dictionary containing global settings for default/nan/etc.
     full_flat_config_dict : Dict[str, Any]
         Fully combined flat-host-type config dictionary.
@@ -770,16 +806,11 @@ def _generate_metadata_for_a_host_type(
         # for these samples but do not error out; move on to the next host type
         update_metadata_df_field(
             host_type_df, QC_NOTE_KEY, "invalid host_type")
-        # host_type_df[QC_NOTE_KEY] = "invalid host_type"
         concatted_df = host_type_df
     else:
         # gather host-type-specific settings and overwrite the global settings with them, if any
         a_host_type_config_dict = \
             full_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY][a_host_type]
-        global_plus_host_settings_dict = deepcopy_dict(settings_dict)
-        # if this host type has a default value for empty fields, use it; otherwise, use the global default
-        global_plus_host_settings_dict[DEFAULT_KEY] = a_host_type_config_dict.get(
-            DEFAULT_KEY, global_plus_host_settings_dict[DEFAULT_KEY])
         dfs_to_concat = []
         # loop through each sample type in the metadata for this host type
@@ -789,8 +820,7 @@ def _generate_metadata_for_a_host_type(
             # generate the specific metadata for this sample type *in this host type*
             curr_sample_type_df, curr_validation_msgs = \
                 _generate_metadata_for_a_sample_type_in_a_host_type(
-                    host_type_df, curr_sample_type, global_plus_host_settings_dict,
-                    a_host_type_config_dict)
+                    host_type_df, curr_sample_type, a_host_type_config_dict)
             dfs_to_concat.append(curr_sample_type_df)
             validation_msgs.extend(curr_validation_msgs)
@@ -807,7 +837,6 @@ def _generate_metadata_for_a_host_type(
 def _generate_metadata_for_a_sample_type_in_a_host_type(
         host_type_metadata_df: pandas.DataFrame,
         a_sample_type: str,
-        global_plus_host_settings_dict: Dict[str, Any],
         a_host_type_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
     """Generate metadata df for samples with a specific sample type within a specific host type.
@@ -817,8 +846,6 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
         DataFrame containing metadata samples for a specific host type.
     a_sample_type : str
         The sample type to process.
-    global_plus_host_settings_dict : Dict[str, Any]
-        Dictionary containing default/nan/etc settings for current context.
     a_host_type_config_dict : Dict[str, Any]
         Dictionary containing config for this host type.
@@ -857,19 +884,19 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
         sample_type_df = _update_metadata_from_dict(
             sample_type_df, full_sample_type_metadata_fields_dict,
             dict_is_metadata_fields=True,
-            overwrite_non_nans=global_plus_host_settings_dict[OVERWRITE_NON_NANS_KEY])
+            overwrite_non_nans=a_host_type_config_dict[OVERWRITE_NON_NANS_KEY])
         # for fields that are required but not yet filled, replace the placeholder with
         # either an indicator that it should be blank or else
         # fill with NA (replaced with default just below), based on config setting
-        leave_reqs_blank = global_plus_host_settings_dict[LEAVE_REQUIREDS_BLANK_KEY]
+        leave_reqs_blank = a_host_type_config_dict[LEAVE_REQUIREDS_BLANK_KEY]
         reqs_val = LEAVE_BLANK_VAL if leave_reqs_blank else np.nan
         sample_type_df.replace(
             to_replace=REQ_PLACEHOLDER, value=reqs_val, inplace=True)
         # fill NAs with appropriate default value if any is set
         sample_type_df = _fill_na_if_default(
-            sample_type_df, full_sample_type_metadata_fields_dict, global_plus_host_settings_dict)
+            sample_type_df, a_host_type_config_dict)
         # validate the metadata df based on the specific requirements
         # for this host+sample type
@@ -1051,7 +1078,6 @@ def _update_metadata_from_metadata_fields_dict(
 # fill NAs with default value if any is set
 def _fill_na_if_default(
         metadata_df: pandas.DataFrame,
-        specific_dict: Dict[str, Any],
         settings_dict: Dict[str, Any]) -> pandas.DataFrame:
     """Fill NaN values in metadata df with default values if available.
@@ -1059,24 +1085,20 @@ def _fill_na_if_default(
     ----------
     metadata_df : pandas.DataFrame
         The metadata DataFrame to process.
-    specific_dict : Dict[str, Any]
-        Dictionary containing context-specific settings. Will be used first as a source of default values.
     settings_dict : Dict[str, Any]
-        Dictionary containing global settings. Will be used as a
-          source of default values if specific_dict does not contain a DEFAULT_KEY.
+        Dictionary containing settings.
     Returns
     -------
     pandas.DataFrame
         The updated DataFrame with NaN values filled. Unchanged if no default values are set.
     """
-    default_val = specific_dict.get(DEFAULT_KEY, settings_dict[DEFAULT_KEY])
+    default_val = settings_dict.get(DEFAULT_KEY)
     if default_val:
         # TODO: this is setting a value in the output; should it be
         #  centralized so it is easy to find?
         metadata_df = \
             metadata_df.fillna(default_val)
-#             metadata_df.astype("string").fillna(default_val)
     return metadata_df

{metameq-2026.1.2 → metameq-2026.2.2}/metameq/src/util.py RENAMED Viewed

@@ -27,6 +27,8 @@ SOURCES_KEY = "sources"
 FUNCTION_KEY = "function"
 LEAVE_REQUIREDS_BLANK_KEY = "leave_requireds_blank"
 OVERWRITE_NON_NANS_KEY = "overwrite_non_nans"
+HOSTTYPE_COL_OPTIONS_KEY = "hosttype_column_options"
+SAMPLETYPE_COL_OPTIONS_KEY = "sampletype_column_options"
 # internal code keys
 HOSTTYPE_SHORTHAND_KEY = "hosttype_shorthand"
@@ -49,6 +51,13 @@ REQUIRED_RAW_METADATA_FIELDS = [SAMPLE_NAME_KEY,
                                 SAMPLETYPE_SHORTHAND_KEY]
+GLOBAL_SETTINGS_KEYS = [
+    DEFAULT_KEY,
+    LEAVE_REQUIREDS_BLANK_KEY,
+    OVERWRITE_NON_NANS_KEY
+]
 def extract_config_dict(
         config_fp: Union[str, None]) -> dict:
     """Extract configuration dictionary from a YAML file.

metameq 2026.1.2__tar.gz → 2026.2.2__tar.gz

metameq 2026.1.2tar.gz → 2026.2.2tar.gz