metameq 2026.2.1__tar.gz → 2026.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {metameq-2026.2.1/metameq.egg-info → metameq-2026.2.2}/PKG-INFO +1 -1
  2. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/_version.py +3 -3
  3. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_configurator.py +53 -6
  4. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_extender.py +13 -35
  5. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/util.py +7 -0
  6. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_configurator.py +184 -1
  7. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_extender.py +106 -91
  8. {metameq-2026.2.1 → metameq-2026.2.2/metameq.egg-info}/PKG-INFO +1 -1
  9. {metameq-2026.2.1 → metameq-2026.2.2}/.gitattributes +0 -0
  10. {metameq-2026.2.1 → metameq-2026.2.2}/.github/workflows/main.yaml +0 -0
  11. {metameq-2026.2.1 → metameq-2026.2.2}/.gitignore +0 -0
  12. {metameq-2026.2.1 → metameq-2026.2.2}/README.md +0 -0
  13. {metameq-2026.2.1 → metameq-2026.2.2}/assets/metameq.png +0 -0
  14. {metameq-2026.2.1 → metameq-2026.2.2}/assets/metameq_dark.svg +0 -0
  15. {metameq-2026.2.1 → metameq-2026.2.2}/assets/metameq_light.svg +0 -0
  16. {metameq-2026.2.1 → metameq-2026.2.2}/assets/metameq_medium.png +0 -0
  17. {metameq-2026.2.1 → metameq-2026.2.2}/environment.yml +0 -0
  18. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/__init__.py +0 -0
  19. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/config/__init__.py +0 -0
  20. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/config/config.yml +0 -0
  21. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/config/standards.yml +0 -0
  22. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/__init__.py +0 -0
  23. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/__main__.py +0 -0
  24. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_merger.py +0 -0
  25. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_transformers.py +0 -0
  26. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/src/metadata_validator.py +0 -0
  27. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/__init__.py +0 -0
  28. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/data/invalid.yml +0 -0
  29. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/data/test_config.yml +0 -0
  30. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_merger.py +0 -0
  31. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_transformers.py +0 -0
  32. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_metadata_validator.py +0 -0
  33. {metameq-2026.2.1 → metameq-2026.2.2}/metameq/tests/test_util.py +0 -0
  34. {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/SOURCES.txt +0 -0
  35. {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/dependency_links.txt +0 -0
  36. {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/entry_points.txt +0 -0
  37. {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/requires.txt +0 -0
  38. {metameq-2026.2.1 → metameq-2026.2.2}/metameq.egg-info/top_level.txt +0 -0
  39. {metameq-2026.2.1 → metameq-2026.2.2}/setup.cfg +0 -0
  40. {metameq-2026.2.1 → metameq-2026.2.2}/setup.py +0 -0
  41. {metameq-2026.2.1 → metameq-2026.2.2}/versioneer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metameq
3
- Version: 2026.2.1
3
+ Version: 2026.2.2
4
4
  Summary: Qiita-compliant metadata generation and validation tool
5
5
  Home-page: https://github.com/AmandaBirmingham/metameq
6
6
  Author: Amanda Birmingham
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2026-02-01T21:32:06-0800",
11
+ "date": "2026-02-02T16:43:52-0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "87171fd73f555e2c03a15fa36ed9b5a912b824e9",
15
- "version": "2026.02.1"
14
+ "full-revisionid": "4fe1396e1007820dc7a4bdb58708fff0df6b9a57",
15
+ "version": "2026.02.2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -5,7 +5,7 @@ from metameq.src.util import extract_config_dict, extract_stds_config, \
5
5
  HOST_TYPE_SPECIFIC_METADATA_KEY, \
6
6
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY, ALIAS_KEY, BASE_TYPE_KEY, \
7
7
  DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
8
- SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
8
+ SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE, GLOBAL_SETTINGS_KEYS
9
9
 
10
10
 
11
11
  def combine_stds_and_study_config(
@@ -257,11 +257,12 @@ def _combine_base_and_added_host_type(
257
257
  host_type_wip_nested_dict = \
258
258
  deepcopy_dict(host_type_base_dict)
259
259
 
260
- # look for a default key in the add dict for this host; if
261
- # it exists, add it to the wip dict (ok to overwrite existing)
262
- if DEFAULT_KEY in host_type_add_dict:
263
- host_type_wip_nested_dict[DEFAULT_KEY] = \
264
- host_type_add_dict.get(DEFAULT_KEY)
260
+ # look for global settings in the add dict for this host; if
261
+ # any exists, add it to the wip dict (ok to overwrite existing)
262
+ for curr_global_setting_key in GLOBAL_SETTINGS_KEYS:
263
+ if curr_global_setting_key in host_type_add_dict:
264
+ host_type_wip_nested_dict[curr_global_setting_key] = \
265
+ host_type_add_dict.get(curr_global_setting_key)
265
266
 
266
267
  # combine add metadata fields with the wip metadata fields
267
268
  # for the current host type and assign to wip if not empty
@@ -636,6 +637,10 @@ def build_full_flat_config_dict(
636
637
  # since the software config doesn't include any host type specific info
637
638
  full_nested_hosts_dict = extract_stds_config(stds_fp)
638
639
 
640
+ full_nested_hosts_dict = _push_global_settings_into_top_host(
641
+ full_nested_hosts_dict,
642
+ software_plus_study_flat_config_dict)
643
+
639
644
  full_flat_hosts_dict = flatten_nested_stds_dict(
640
645
  full_nested_hosts_dict, None)
641
646
  software_plus_study_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY] = \
@@ -655,3 +660,45 @@ def build_full_flat_config_dict(
655
660
  full_flat_config_dict = software_plus_study_flat_config_dict
656
661
 
657
662
  return full_flat_config_dict
663
+
664
+
665
+ def _push_global_settings_into_top_host(
666
+ a_full_nested_hosts_dict: Dict[str, Any],
667
+ a_software_plus_study_flat_config_dict: Dict[str, Any]) -> Dict[str, Any]:
668
+ """Push global settings from flat config into top-level host in nested hosts dict.
669
+
670
+ Parameters
671
+ ----------
672
+ a_full_nested_hosts_dict : Dict[str, Any]
673
+ Nested hosts dictionary to update.
674
+ a_software_plus_study_flat_config_dict : Dict[str, Any]
675
+ Flat configuration dictionary containing global settings.
676
+
677
+ Returns
678
+ -------
679
+ Dict[str, Any]
680
+ Updated nested hosts dictionary with global settings added to top-level host.
681
+
682
+ Raises
683
+ ------
684
+ ValueError
685
+ If there is not exactly one top-level host in the nested hosts dictionary.
686
+ """
687
+ result = deepcopy_dict(a_full_nested_hosts_dict)
688
+
689
+ # get the top level host(s) in full_nested_hosts_dict
690
+ # (should be only one because it is nested)
691
+ top_level_host_keys = list(a_full_nested_hosts_dict[HOST_TYPE_SPECIFIC_METADATA_KEY].keys())
692
+ if len(top_level_host_keys) != 1:
693
+ raise ValueError(f"Expected exactly one top-level key in "
694
+ f"full_nested_hosts_dict but found: {top_level_host_keys}")
695
+ top_level_host_key = top_level_host_keys[0]
696
+
697
+ # check for each top-level setting from the software+study dictionary
698
+ # and add it under the top level host key in a_full_nested_hosts_dict
699
+ for curr_setting_key in GLOBAL_SETTINGS_KEYS:
700
+ if curr_setting_key in a_software_plus_study_flat_config_dict:
701
+ result[HOST_TYPE_SPECIFIC_METADATA_KEY][top_level_host_key][curr_setting_key] = \
702
+ a_software_plus_study_flat_config_dict[curr_setting_key]
703
+
704
+ return result
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  from datetime import datetime
7
7
  from typing import List, Dict, Optional, Tuple, Any
8
8
  from metameq.src.util import extract_config_dict, \
9
- deepcopy_dict, validate_required_columns_exist, get_extension, \
9
+ validate_required_columns_exist, get_extension, \
10
10
  load_df_with_best_fit_encoding, update_metadata_df_field, \
11
11
  HOSTTYPE_SHORTHAND_KEY, SAMPLETYPE_SHORTHAND_KEY, \
12
12
  QC_NOTE_KEY, METADATA_FIELDS_KEY, HOST_TYPE_SPECIFIC_METADATA_KEY, \
@@ -451,7 +451,7 @@ def extend_metadata_df(
451
451
  full_flat_config_dict = build_full_flat_config_dict(
452
452
  study_specific_config_dict, software_config_dict, stds_fp)
453
453
 
454
- needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
454
+ needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
455
455
  (SAMPLETYPE_SHORTHAND_KEY, SAMPLETYPE_COL_OPTIONS_KEY)]
456
456
  for curr_key, curr_options_key in needed_cols:
457
457
  if curr_key not in raw_metadata_df.columns:
@@ -485,7 +485,7 @@ def _get_specified_column_name(
485
485
  The metadata DataFrame to check.
486
486
  config_dict : Dict[str, Any], default=None
487
487
  Configuration dictionary. If provided, may contain a list of possible
488
- column names under the key specified by col_options_key.
488
+ column names under the key specified by col_options_key.
489
489
  If None, defaults to values from the main config.yml file.
490
490
  Returns
491
491
  -------
@@ -503,7 +503,8 @@ def _get_specified_column_name(
503
503
  found_name = col_name
504
504
  break
505
505
 
506
- return found_name
506
+ return found_name
507
+
507
508
 
508
509
  def write_metadata_results(
509
510
  metadata_df: pandas.DataFrame,
@@ -738,12 +739,6 @@ def _generate_metadata_for_host_types(
738
739
  - The processed DataFrame with specific metadata added to each sample of each host type
739
740
  - A list of validation messages
740
741
  """
741
- # gather global settings
742
- settings_dict = {DEFAULT_KEY: full_flat_config_dict.get(DEFAULT_KEY),
743
- LEAVE_REQUIREDS_BLANK_KEY:
744
- full_flat_config_dict.get(LEAVE_REQUIREDS_BLANK_KEY),
745
- OVERWRITE_NON_NANS_KEY:
746
- full_flat_config_dict.get(OVERWRITE_NON_NANS_KEY)}
747
742
 
748
743
  validation_msgs = []
749
744
  host_type_dfs = []
@@ -751,7 +746,7 @@ def _generate_metadata_for_host_types(
751
746
  host_type_shorthands = pandas.unique(metadata_df[HOSTTYPE_SHORTHAND_KEY])
752
747
  for curr_host_type_shorthand in host_type_shorthands:
753
748
  concatted_dfs, curr_validation_msgs = _generate_metadata_for_a_host_type(
754
- metadata_df, curr_host_type_shorthand, settings_dict, full_flat_config_dict)
749
+ metadata_df, curr_host_type_shorthand, full_flat_config_dict)
755
750
 
756
751
  host_type_dfs.append(concatted_dfs)
757
752
  validation_msgs.extend(curr_validation_msgs)
@@ -767,7 +762,7 @@ def _generate_metadata_for_host_types(
767
762
  # NB: passing in the same dict twice here is not a mistake, just a
768
763
  # convenience since we don't have a more specific dict at this point.
769
764
  output_df = _fill_na_if_default(
770
- output_df, settings_dict, settings_dict)
765
+ output_df, full_flat_config_dict)
771
766
 
772
767
  # TODO: this is setting a value in the output; should it be centralized
773
768
  # so it is easy to find?
@@ -779,7 +774,6 @@ def _generate_metadata_for_host_types(
779
774
  def _generate_metadata_for_a_host_type(
780
775
  metadata_df: pandas.DataFrame,
781
776
  a_host_type: str,
782
- settings_dict: Dict[str, Any],
783
777
  full_flat_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
784
778
  """Generate metadata df for samples with a specific host type.
785
779
 
@@ -790,8 +784,6 @@ def _generate_metadata_for_a_host_type(
790
784
  the columns in REQUIRED_RAW_METADATA_FIELDS.
791
785
  a_host_type : str
792
786
  The specific host type for which to process samples.
793
- settings_dict : Dict[str, Any]
794
- Dictionary containing global settings for default/nan/etc.
795
787
  full_flat_config_dict : Dict[str, Any]
796
788
  Fully combined flat-host-type config dictionary.
797
789
 
@@ -814,16 +806,11 @@ def _generate_metadata_for_a_host_type(
814
806
  # for these samples but do not error out; move on to the next host type
815
807
  update_metadata_df_field(
816
808
  host_type_df, QC_NOTE_KEY, "invalid host_type")
817
- # host_type_df[QC_NOTE_KEY] = "invalid host_type"
818
809
  concatted_df = host_type_df
819
810
  else:
820
811
  # gather host-type-specific settings and overwrite the global settings with them, if any
821
812
  a_host_type_config_dict = \
822
813
  full_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY][a_host_type]
823
- global_plus_host_settings_dict = deepcopy_dict(settings_dict)
824
- # if this host type has a default value for empty fields, use it; otherwise, use the global default
825
- global_plus_host_settings_dict[DEFAULT_KEY] = a_host_type_config_dict.get(
826
- DEFAULT_KEY, global_plus_host_settings_dict[DEFAULT_KEY])
827
814
 
828
815
  dfs_to_concat = []
829
816
  # loop through each sample type in the metadata for this host type
@@ -833,8 +820,7 @@ def _generate_metadata_for_a_host_type(
833
820
  # generate the specific metadata for this sample type *in this host type*
834
821
  curr_sample_type_df, curr_validation_msgs = \
835
822
  _generate_metadata_for_a_sample_type_in_a_host_type(
836
- host_type_df, curr_sample_type, global_plus_host_settings_dict,
837
- a_host_type_config_dict)
823
+ host_type_df, curr_sample_type, a_host_type_config_dict)
838
824
 
839
825
  dfs_to_concat.append(curr_sample_type_df)
840
826
  validation_msgs.extend(curr_validation_msgs)
@@ -851,7 +837,6 @@ def _generate_metadata_for_a_host_type(
851
837
  def _generate_metadata_for_a_sample_type_in_a_host_type(
852
838
  host_type_metadata_df: pandas.DataFrame,
853
839
  a_sample_type: str,
854
- global_plus_host_settings_dict: Dict[str, Any],
855
840
  a_host_type_config_dict: Dict[str, Any]) -> Tuple[pandas.DataFrame, List[str]]:
856
841
  """Generate metadata df for samples with a specific sample type within a specific host type.
857
842
 
@@ -861,8 +846,6 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
861
846
  DataFrame containing metadata samples for a specific host type.
862
847
  a_sample_type : str
863
848
  The sample type to process.
864
- global_plus_host_settings_dict : Dict[str, Any]
865
- Dictionary containing default/nan/etc settings for current context.
866
849
  a_host_type_config_dict : Dict[str, Any]
867
850
  Dictionary containing config for this host type.
868
851
 
@@ -901,19 +884,19 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
901
884
  sample_type_df = _update_metadata_from_dict(
902
885
  sample_type_df, full_sample_type_metadata_fields_dict,
903
886
  dict_is_metadata_fields=True,
904
- overwrite_non_nans=global_plus_host_settings_dict[OVERWRITE_NON_NANS_KEY])
887
+ overwrite_non_nans=a_host_type_config_dict[OVERWRITE_NON_NANS_KEY])
905
888
 
906
889
  # for fields that are required but not yet filled, replace the placeholder with
907
890
  # either an indicator that it should be blank or else
908
891
  # fill with NA (replaced with default just below), based on config setting
909
- leave_reqs_blank = global_plus_host_settings_dict[LEAVE_REQUIREDS_BLANK_KEY]
892
+ leave_reqs_blank = a_host_type_config_dict[LEAVE_REQUIREDS_BLANK_KEY]
910
893
  reqs_val = LEAVE_BLANK_VAL if leave_reqs_blank else np.nan
911
894
  sample_type_df.replace(
912
895
  to_replace=REQ_PLACEHOLDER, value=reqs_val, inplace=True)
913
896
 
914
897
  # fill NAs with appropriate default value if any is set
915
898
  sample_type_df = _fill_na_if_default(
916
- sample_type_df, full_sample_type_metadata_fields_dict, global_plus_host_settings_dict)
899
+ sample_type_df, a_host_type_config_dict)
917
900
 
918
901
  # validate the metadata df based on the specific requirements
919
902
  # for this host+sample type
@@ -1095,7 +1078,6 @@ def _update_metadata_from_metadata_fields_dict(
1095
1078
  # fill NAs with default value if any is set
1096
1079
  def _fill_na_if_default(
1097
1080
  metadata_df: pandas.DataFrame,
1098
- specific_dict: Dict[str, Any],
1099
1081
  settings_dict: Dict[str, Any]) -> pandas.DataFrame:
1100
1082
  """Fill NaN values in metadata df with default values if available.
1101
1083
 
@@ -1103,24 +1085,20 @@ def _fill_na_if_default(
1103
1085
  ----------
1104
1086
  metadata_df : pandas.DataFrame
1105
1087
  The metadata DataFrame to process.
1106
- specific_dict : Dict[str, Any]
1107
- Dictionary containing context-specific settings. Will be used first as a source of default values.
1108
1088
  settings_dict : Dict[str, Any]
1109
- Dictionary containing global settings. Will be used as a
1110
- source of default values if specific_dict does not contain a DEFAULT_KEY.
1089
+ Dictionary containing settings.
1111
1090
 
1112
1091
  Returns
1113
1092
  -------
1114
1093
  pandas.DataFrame
1115
1094
  The updated DataFrame with NaN values filled. Unchanged if no default values are set.
1116
1095
  """
1117
- default_val = specific_dict.get(DEFAULT_KEY, settings_dict[DEFAULT_KEY])
1096
+ default_val = settings_dict.get(DEFAULT_KEY)
1118
1097
  if default_val:
1119
1098
  # TODO: this is setting a value in the output; should it be
1120
1099
  # centralized so it is easy to find?
1121
1100
  metadata_df = \
1122
1101
  metadata_df.fillna(default_val)
1123
- # metadata_df.astype("string").fillna(default_val)
1124
1102
 
1125
1103
  return metadata_df
1126
1104
 
@@ -51,6 +51,13 @@ REQUIRED_RAW_METADATA_FIELDS = [SAMPLE_NAME_KEY,
51
51
  SAMPLETYPE_SHORTHAND_KEY]
52
52
 
53
53
 
54
+ GLOBAL_SETTINGS_KEYS = [
55
+ DEFAULT_KEY,
56
+ LEAVE_REQUIREDS_BLANK_KEY,
57
+ OVERWRITE_NON_NANS_KEY
58
+ ]
59
+
60
+
54
61
  def extract_config_dict(
55
62
  config_fp: Union[str, None]) -> dict:
56
63
  """Extract configuration dictionary from a YAML file.
@@ -17,7 +17,8 @@ from metameq.src.metadata_configurator import \
17
17
  _id_sample_type_definition, \
18
18
  update_wip_metadata_dict, \
19
19
  build_full_flat_config_dict, \
20
- _resolve_sample_type_aliases_and_bases
20
+ _resolve_sample_type_aliases_and_bases, \
21
+ _push_global_settings_into_top_host
21
22
 
22
23
 
23
24
  class TestMetadataConfigurator(TestCase):
@@ -3847,6 +3848,9 @@ class TestMetadataConfigurator(TestCase):
3847
3848
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
3848
3849
  # base: top level in test_standards.yml, no default
3849
3850
  "base": {
3851
+ DEFAULT_KEY: "software_default",
3852
+ LEAVE_REQUIREDS_BLANK_KEY: True,
3853
+ OVERWRITE_NON_NANS_KEY: False,
3850
3854
  METADATA_FIELDS_KEY: {
3851
3855
  # sample_name defined at base level
3852
3856
  "sample_name": {
@@ -3865,6 +3869,8 @@ class TestMetadataConfigurator(TestCase):
3865
3869
  "host_associated": {
3866
3870
  # default defined at host_associated level
3867
3871
  DEFAULT_KEY: "not provided",
3872
+ LEAVE_REQUIREDS_BLANK_KEY: True,
3873
+ OVERWRITE_NON_NANS_KEY: False,
3868
3874
  METADATA_FIELDS_KEY: {
3869
3875
  # description defined at host_associated level
3870
3876
  "description": {
@@ -3919,6 +3925,8 @@ class TestMetadataConfigurator(TestCase):
3919
3925
  "human": {
3920
3926
  # default inherited from host_associated
3921
3927
  DEFAULT_KEY: "not provided",
3928
+ LEAVE_REQUIREDS_BLANK_KEY: True,
3929
+ OVERWRITE_NON_NANS_KEY: False,
3922
3930
  METADATA_FIELDS_KEY: {
3923
3931
  # custom_field added from study_specific_metadata
3924
3932
  "custom_field": {
@@ -4037,6 +4045,8 @@ class TestMetadataConfigurator(TestCase):
4037
4045
  "mouse": {
4038
4046
  # default inherited from host_associated
4039
4047
  DEFAULT_KEY: "not provided",
4048
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4049
+ OVERWRITE_NON_NANS_KEY: False,
4040
4050
  METADATA_FIELDS_KEY: {
4041
4051
  # description inherited from host_associated (not overridden)
4042
4052
  "description": {
@@ -4103,6 +4113,7 @@ class TestMetadataConfigurator(TestCase):
4103
4113
  }
4104
4114
  }
4105
4115
  }
4116
+
4106
4117
  self.assertEqual(expected, result)
4107
4118
 
4108
4119
  def test_build_full_flat_config_dict_without_study_config(self):
@@ -4130,6 +4141,9 @@ class TestMetadataConfigurator(TestCase):
4130
4141
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
4131
4142
  # base: top level, no default, just sample_name/sample_type
4132
4143
  "base": {
4144
+ DEFAULT_KEY: "software_default",
4145
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4146
+ OVERWRITE_NON_NANS_KEY: False,
4133
4147
  METADATA_FIELDS_KEY: {
4134
4148
  "sample_name": {
4135
4149
  REQUIRED_KEY: True,
@@ -4145,6 +4159,8 @@ class TestMetadataConfigurator(TestCase):
4145
4159
  # host_associated: inherits from base, adds default and description
4146
4160
  "host_associated": {
4147
4161
  DEFAULT_KEY: "not provided",
4162
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4163
+ OVERWRITE_NON_NANS_KEY: False,
4148
4164
  METADATA_FIELDS_KEY: {
4149
4165
  "description": {
4150
4166
  DEFAULT_KEY: "host associated sample",
@@ -4194,6 +4210,8 @@ class TestMetadataConfigurator(TestCase):
4194
4210
  # human: inherits from host_associated, overrides description
4195
4211
  "human": {
4196
4212
  DEFAULT_KEY: "not provided",
4213
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4214
+ OVERWRITE_NON_NANS_KEY: False,
4197
4215
  METADATA_FIELDS_KEY: {
4198
4216
  "description": {
4199
4217
  DEFAULT_KEY: "human sample",
@@ -4291,6 +4309,8 @@ class TestMetadataConfigurator(TestCase):
4291
4309
  # mouse: inherits from host_associated, keeps parent description
4292
4310
  "mouse": {
4293
4311
  DEFAULT_KEY: "not provided",
4312
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4313
+ OVERWRITE_NON_NANS_KEY: False,
4294
4314
  METADATA_FIELDS_KEY: {
4295
4315
  "description": {
4296
4316
  DEFAULT_KEY: "host associated sample",
@@ -4395,6 +4415,12 @@ class TestMetadataConfigurator(TestCase):
4395
4415
  # Flattened host types
4396
4416
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
4397
4417
  "base": {
4418
+ # default from study_config overrides software_config
4419
+ DEFAULT_KEY: "study_default",
4420
+ # leave_requireds_blank from study_config overrides software_config
4421
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4422
+ # overwrite_non_nans from software_config (not overridden by study)
4423
+ OVERWRITE_NON_NANS_KEY: True,
4398
4424
  METADATA_FIELDS_KEY: {
4399
4425
  "sample_name": {
4400
4426
  REQUIRED_KEY: True,
@@ -4409,6 +4435,10 @@ class TestMetadataConfigurator(TestCase):
4409
4435
  },
4410
4436
  "host_associated": {
4411
4437
  DEFAULT_KEY: "not provided",
4438
+ # leave_requireds_blank from study_config overrides software_config
4439
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4440
+ # overwrite_non_nans from software_config (not overridden by study)
4441
+ OVERWRITE_NON_NANS_KEY: True,
4412
4442
  METADATA_FIELDS_KEY: {
4413
4443
  "description": {
4414
4444
  DEFAULT_KEY: "host associated sample",
@@ -4457,6 +4487,10 @@ class TestMetadataConfigurator(TestCase):
4457
4487
  },
4458
4488
  "human": {
4459
4489
  DEFAULT_KEY: "not provided",
4490
+ # leave_requireds_blank from study_config overrides software_config
4491
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4492
+ # overwrite_non_nans from software_config (not overridden by study)
4493
+ OVERWRITE_NON_NANS_KEY: True,
4460
4494
  METADATA_FIELDS_KEY: {
4461
4495
  "description": {
4462
4496
  DEFAULT_KEY: "human sample",
@@ -4553,6 +4587,10 @@ class TestMetadataConfigurator(TestCase):
4553
4587
  },
4554
4588
  "mouse": {
4555
4589
  DEFAULT_KEY: "not provided",
4590
+ # leave_requireds_blank from study_config overrides software_config
4591
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4592
+ # overwrite_non_nans from software_config (not overridden by study)
4593
+ OVERWRITE_NON_NANS_KEY: True,
4556
4594
  METADATA_FIELDS_KEY: {
4557
4595
  "description": {
4558
4596
  DEFAULT_KEY: "host associated sample",
@@ -4649,6 +4687,9 @@ class TestMetadataConfigurator(TestCase):
4649
4687
  # Flattened host types
4650
4688
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
4651
4689
  "base": {
4690
+ DEFAULT_KEY: "not applicable",
4691
+ LEAVE_REQUIREDS_BLANK_KEY: False,
4692
+ OVERWRITE_NON_NANS_KEY: False,
4652
4693
  METADATA_FIELDS_KEY: {
4653
4694
  "sample_name": {
4654
4695
  REQUIRED_KEY: True,
@@ -4663,6 +4704,8 @@ class TestMetadataConfigurator(TestCase):
4663
4704
  },
4664
4705
  "host_associated": {
4665
4706
  DEFAULT_KEY: "not provided",
4707
+ LEAVE_REQUIREDS_BLANK_KEY: False,
4708
+ OVERWRITE_NON_NANS_KEY: False,
4666
4709
  METADATA_FIELDS_KEY: {
4667
4710
  "description": {
4668
4711
  DEFAULT_KEY: "host associated sample",
@@ -4711,6 +4754,8 @@ class TestMetadataConfigurator(TestCase):
4711
4754
  },
4712
4755
  "human": {
4713
4756
  DEFAULT_KEY: "not provided",
4757
+ LEAVE_REQUIREDS_BLANK_KEY: False,
4758
+ OVERWRITE_NON_NANS_KEY: False,
4714
4759
  METADATA_FIELDS_KEY: {
4715
4760
  "description": {
4716
4761
  DEFAULT_KEY: "human sample",
@@ -4807,6 +4852,8 @@ class TestMetadataConfigurator(TestCase):
4807
4852
  },
4808
4853
  "mouse": {
4809
4854
  DEFAULT_KEY: "not provided",
4855
+ LEAVE_REQUIREDS_BLANK_KEY: False,
4856
+ OVERWRITE_NON_NANS_KEY: False,
4810
4857
  METADATA_FIELDS_KEY: {
4811
4858
  "description": {
4812
4859
  DEFAULT_KEY: "host associated sample",
@@ -4867,4 +4914,140 @@ class TestMetadataConfigurator(TestCase):
4867
4914
  }
4868
4915
  }
4869
4916
  }
4917
+
4918
+ self.assertEqual(expected, result)
4919
+
4920
+ # Tests for _push_global_settings_into_top_host
4921
+
4922
+ def test__push_global_settings_into_top_host_single_setting(self):
4923
+ """Test pushing a single global setting into the top-level host."""
4924
+ nested_hosts_dict = {
4925
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4926
+ "base": {
4927
+ METADATA_FIELDS_KEY: {
4928
+ "field1": {TYPE_KEY: "string"}
4929
+ }
4930
+ }
4931
+ }
4932
+ }
4933
+ flat_config_dict = {
4934
+ DEFAULT_KEY: "custom_default"
4935
+ }
4936
+
4937
+ expected = {
4938
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4939
+ "base": {
4940
+ DEFAULT_KEY: "custom_default",
4941
+ METADATA_FIELDS_KEY: {
4942
+ "field1": {TYPE_KEY: "string"}
4943
+ }
4944
+ }
4945
+ }
4946
+ }
4947
+
4948
+ result = _push_global_settings_into_top_host(
4949
+ nested_hosts_dict, flat_config_dict)
4950
+
4951
+ self.assertEqual(expected, result)
4952
+ # Original should be unchanged
4953
+ self.assertNotIn(
4954
+ DEFAULT_KEY,
4955
+ nested_hosts_dict[HOST_TYPE_SPECIFIC_METADATA_KEY]["base"])
4956
+
4957
+ def test__push_global_settings_into_top_host_multiple_settings(self):
4958
+ """Test pushing multiple global settings into the top-level host."""
4959
+ nested_hosts_dict = {
4960
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4961
+ "base": {
4962
+ METADATA_FIELDS_KEY: {
4963
+ "field1": {TYPE_KEY: "string"}
4964
+ }
4965
+ }
4966
+ }
4967
+ }
4968
+ flat_config_dict = {
4969
+ DEFAULT_KEY: "custom_default",
4970
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4971
+ OVERWRITE_NON_NANS_KEY: True
4972
+ }
4973
+
4974
+ expected = {
4975
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4976
+ "base": {
4977
+ DEFAULT_KEY: "custom_default",
4978
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4979
+ OVERWRITE_NON_NANS_KEY: True,
4980
+ METADATA_FIELDS_KEY: {
4981
+ "field1": {TYPE_KEY: "string"}
4982
+ }
4983
+ }
4984
+ }
4985
+ }
4986
+
4987
+ result = _push_global_settings_into_top_host(
4988
+ nested_hosts_dict, flat_config_dict)
4989
+
4870
4990
  self.assertEqual(expected, result)
4991
+
4992
+ def test__push_global_settings_into_top_host_no_settings(self):
4993
+ """Test that function returns copy when no global settings present."""
4994
+ nested_hosts_dict = {
4995
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4996
+ "base": {
4997
+ METADATA_FIELDS_KEY: {
4998
+ "field1": {TYPE_KEY: "string"}
4999
+ }
5000
+ }
5001
+ }
5002
+ }
5003
+ flat_config_dict = {
5004
+ "some_other_key": "value"
5005
+ }
5006
+
5007
+ expected = {
5008
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
5009
+ "base": {
5010
+ METADATA_FIELDS_KEY: {
5011
+ "field1": {TYPE_KEY: "string"}
5012
+ }
5013
+ }
5014
+ }
5015
+ }
5016
+
5017
+ result = _push_global_settings_into_top_host(
5018
+ nested_hosts_dict, flat_config_dict)
5019
+
5020
+ self.assertEqual(expected, result)
5021
+
5022
+ def test__push_global_settings_into_top_host_raises_on_zero_hosts(self):
5023
+ """Test that ValueError is raised when no top-level hosts exist."""
5024
+ nested_hosts_dict = {
5025
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {}
5026
+ }
5027
+ flat_config_dict = {
5028
+ DEFAULT_KEY: "custom_default"
5029
+ }
5030
+
5031
+ with self.assertRaisesRegex(
5032
+ ValueError,
5033
+ r"Expected exactly one top-level key.*found: \[\]"):
5034
+ _push_global_settings_into_top_host(
5035
+ nested_hosts_dict, flat_config_dict)
5036
+
5037
+ def test__push_global_settings_into_top_host_raises_on_multiple_hosts(self):
5038
+ """Test that ValueError is raised when multiple top-level hosts exist."""
5039
+ nested_hosts_dict = {
5040
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
5041
+ "host1": {METADATA_FIELDS_KEY: {}},
5042
+ "host2": {METADATA_FIELDS_KEY: {}}
5043
+ }
5044
+ }
5045
+ flat_config_dict = {
5046
+ DEFAULT_KEY: "custom_default"
5047
+ }
5048
+
5049
+ with self.assertRaisesRegex(
5050
+ ValueError,
5051
+ r"Expected exactly one top-level key"):
5052
+ _push_global_settings_into_top_host(
5053
+ nested_hosts_dict, flat_config_dict)
@@ -748,16 +748,15 @@ class TestMetadataExtender(TestCase):
748
748
 
749
749
  # Tests for _fill_na_if_default
750
750
 
751
- def test__fill_na_if_default_specific_overrides_settings(self):
751
+ def test__fill_na_if_default_has_default_in_settings(self):
752
752
  """Test that specific_dict default takes precedence over settings_dict."""
753
753
  input_df = pandas.DataFrame({
754
754
  "field1": ["value1", np.nan, "value3"],
755
755
  "field2": [np.nan, "value2", np.nan]
756
756
  })
757
- specific_dict = {DEFAULT_KEY: "filled"}
758
- settings_dict = {DEFAULT_KEY: "unused"}
757
+ settings_dict = {DEFAULT_KEY: "filled"}
759
758
 
760
- result = _fill_na_if_default(input_df, specific_dict, settings_dict)
759
+ result = _fill_na_if_default(input_df, settings_dict)
761
760
 
762
761
  expected = pandas.DataFrame({
763
762
  "field1": ["value1", "filled", "value3"],
@@ -765,18 +764,19 @@ class TestMetadataExtender(TestCase):
765
764
  })
766
765
  assert_frame_equal(expected, result)
767
766
 
768
- def test__fill_na_if_default_uses_settings_when_specific_missing(self):
769
- """Test that settings_dict default is used when specific_dict has no default."""
767
+ def test__fill_na_if_default_no_default_in_settings(self):
768
+ """Test that NaN values are unchanged when no default is in settings."""
770
769
  input_df = pandas.DataFrame({
771
- "field1": [np.nan]
770
+ "field1": ["value1", np.nan, "value3"],
771
+ "field2": [np.nan, "value2", np.nan]
772
772
  })
773
- specific_dict = {}
774
- settings_dict = {DEFAULT_KEY: "settings_default"}
773
+ settings_dict = {}
775
774
 
776
- result = _fill_na_if_default(input_df, specific_dict, settings_dict)
775
+ result = _fill_na_if_default(input_df, settings_dict)
777
776
 
778
777
  expected = pandas.DataFrame({
779
- "field1": ["settings_default"]
778
+ "field1": ["value1", np.nan, "value3"],
779
+ "field2": [np.nan, "value2", np.nan]
780
780
  })
781
781
  assert_frame_equal(expected, result)
782
782
 
@@ -1273,14 +1273,13 @@ class TestMetadataExtender(TestCase):
1273
1273
  SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
1274
1274
  QC_NOTE_KEY: ["", ""]
1275
1275
  })
1276
- global_plus_host_settings_dict = {
1277
- OVERWRITE_NON_NANS_KEY: False,
1278
- LEAVE_REQUIREDS_BLANK_KEY: False,
1279
- DEFAULT_KEY: "not provided"
1280
- }
1276
+
1281
1277
  # Config is pre-resolved: sample type's metadata_fields already includes
1282
1278
  # host fields merged in, plus sample_type and qiita_sample_type
1283
1279
  host_type_config_dict = {
1280
+ OVERWRITE_NON_NANS_KEY: False,
1281
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1282
+ DEFAULT_KEY: "not provided",
1284
1283
  METADATA_FIELDS_KEY: {
1285
1284
  "host_field": {
1286
1285
  DEFAULT_KEY: "host_default",
@@ -1314,7 +1313,7 @@ class TestMetadataExtender(TestCase):
1314
1313
  }
1315
1314
 
1316
1315
  result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
1317
- input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
1316
+ input_df, "stool", host_type_config_dict)
1318
1317
 
1319
1318
  expected_df = pandas.DataFrame({
1320
1319
  SAMPLE_NAME_KEY: ["sample1", "sample2"],
@@ -1337,12 +1336,11 @@ class TestMetadataExtender(TestCase):
1337
1336
  SAMPLETYPE_SHORTHAND_KEY: ["unknown_type"],
1338
1337
  QC_NOTE_KEY: [""]
1339
1338
  })
1340
- global_plus_host_settings_dict = {
1339
+
1340
+ host_type_config_dict = {
1341
1341
  OVERWRITE_NON_NANS_KEY: False,
1342
1342
  LEAVE_REQUIREDS_BLANK_KEY: False,
1343
- DEFAULT_KEY: "not provided"
1344
- }
1345
- host_type_config_dict = {
1343
+ DEFAULT_KEY: "not provided",
1346
1344
  METADATA_FIELDS_KEY: {},
1347
1345
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1348
1346
  "stool": {
@@ -1352,7 +1350,7 @@ class TestMetadataExtender(TestCase):
1352
1350
  }
1353
1351
 
1354
1352
  result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
1355
- input_df, "unknown_type", global_plus_host_settings_dict, host_type_config_dict)
1353
+ input_df, "unknown_type", host_type_config_dict)
1356
1354
 
1357
1355
  expected_df = pandas.DataFrame({
1358
1356
  SAMPLE_NAME_KEY: ["sample1"],
@@ -1371,12 +1369,11 @@ class TestMetadataExtender(TestCase):
1371
1369
  SAMPLETYPE_SHORTHAND_KEY: ["stool", "blood", "stool"],
1372
1370
  QC_NOTE_KEY: ["", "", ""]
1373
1371
  })
1374
- global_plus_host_settings_dict = {
1372
+
1373
+ host_type_config_dict = {
1375
1374
  OVERWRITE_NON_NANS_KEY: False,
1376
1375
  LEAVE_REQUIREDS_BLANK_KEY: False,
1377
- DEFAULT_KEY: "not provided"
1378
- }
1379
- host_type_config_dict = {
1376
+ DEFAULT_KEY: "not provided",
1380
1377
  METADATA_FIELDS_KEY: {},
1381
1378
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1382
1379
  "stool": {
@@ -1394,7 +1391,7 @@ class TestMetadataExtender(TestCase):
1394
1391
  }
1395
1392
 
1396
1393
  result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
1397
- input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
1394
+ input_df, "stool", host_type_config_dict)
1398
1395
 
1399
1396
  # Should only have the two stool samples
1400
1397
  self.assertEqual(2, len(result_df))
@@ -1409,12 +1406,11 @@ class TestMetadataExtender(TestCase):
1409
1406
  SAMPLETYPE_SHORTHAND_KEY: ["stool"],
1410
1407
  QC_NOTE_KEY: [""]
1411
1408
  })
1412
- global_plus_host_settings_dict = {
1409
+
1410
+ host_type_config_dict = {
1413
1411
  OVERWRITE_NON_NANS_KEY: False,
1414
1412
  LEAVE_REQUIREDS_BLANK_KEY: True,
1415
- DEFAULT_KEY: "not provided"
1416
- }
1417
- host_type_config_dict = {
1413
+ DEFAULT_KEY: "not provided",
1418
1414
  METADATA_FIELDS_KEY: {},
1419
1415
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1420
1416
  "stool": {
@@ -1429,7 +1425,7 @@ class TestMetadataExtender(TestCase):
1429
1425
  }
1430
1426
 
1431
1427
  result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
1432
- input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
1428
+ input_df, "stool", host_type_config_dict)
1433
1429
 
1434
1430
  self.assertEqual(LEAVE_BLANK_VAL, result_df["required_field"].iloc[0])
1435
1431
 
@@ -1441,12 +1437,11 @@ class TestMetadataExtender(TestCase):
1441
1437
  SAMPLETYPE_SHORTHAND_KEY: ["stool"],
1442
1438
  QC_NOTE_KEY: [""]
1443
1439
  })
1444
- global_plus_host_settings_dict = {
1440
+
1441
+ host_type_config_dict = {
1445
1442
  OVERWRITE_NON_NANS_KEY: False,
1446
1443
  LEAVE_REQUIREDS_BLANK_KEY: False,
1447
- DEFAULT_KEY: "global_default"
1448
- }
1449
- host_type_config_dict = {
1444
+ DEFAULT_KEY: "global_default",
1450
1445
  METADATA_FIELDS_KEY: {},
1451
1446
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1452
1447
  "stool": {
@@ -1461,7 +1456,7 @@ class TestMetadataExtender(TestCase):
1461
1456
  }
1462
1457
 
1463
1458
  result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
1464
- input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
1459
+ input_df, "stool", host_type_config_dict)
1465
1460
 
1466
1461
  # When leave_requireds_blank is False, NaN values get filled with global default
1467
1462
  self.assertEqual("global_default", result_df["required_field"].iloc[0])
@@ -1475,12 +1470,11 @@ class TestMetadataExtender(TestCase):
1475
1470
  QC_NOTE_KEY: [""],
1476
1471
  "existing_field": ["original_value"]
1477
1472
  })
1478
- global_plus_host_settings_dict = {
1473
+
1474
+ host_type_config_dict = {
1479
1475
  OVERWRITE_NON_NANS_KEY: True,
1480
1476
  LEAVE_REQUIREDS_BLANK_KEY: False,
1481
- DEFAULT_KEY: "not provided"
1482
- }
1483
- host_type_config_dict = {
1477
+ DEFAULT_KEY: "not provided",
1484
1478
  METADATA_FIELDS_KEY: {},
1485
1479
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1486
1480
  "stool": {
@@ -1495,7 +1489,7 @@ class TestMetadataExtender(TestCase):
1495
1489
  }
1496
1490
 
1497
1491
  result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
1498
- input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
1492
+ input_df, "stool", host_type_config_dict)
1499
1493
 
1500
1494
  self.assertEqual("new_value", result_df["existing_field"].iloc[0])
1501
1495
 
@@ -1508,12 +1502,11 @@ class TestMetadataExtender(TestCase):
1508
1502
  QC_NOTE_KEY: [""],
1509
1503
  "existing_field": ["original_value"]
1510
1504
  })
1511
- global_plus_host_settings_dict = {
1505
+
1506
+ host_type_config_dict = {
1512
1507
  OVERWRITE_NON_NANS_KEY: False,
1513
1508
  LEAVE_REQUIREDS_BLANK_KEY: False,
1514
- DEFAULT_KEY: "not provided"
1515
- }
1516
- host_type_config_dict = {
1509
+ DEFAULT_KEY: "not provided",
1517
1510
  METADATA_FIELDS_KEY: {},
1518
1511
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1519
1512
  "stool": {
@@ -1528,7 +1521,7 @@ class TestMetadataExtender(TestCase):
1528
1521
  }
1529
1522
 
1530
1523
  result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
1531
- input_df, "stool", global_plus_host_settings_dict, host_type_config_dict)
1524
+ input_df, "stool", host_type_config_dict)
1532
1525
 
1533
1526
  self.assertEqual("original_value", result_df["existing_field"].iloc[0])
1534
1527
 
@@ -1540,14 +1533,13 @@ class TestMetadataExtender(TestCase):
1540
1533
  SAMPLETYPE_SHORTHAND_KEY: ["feces"],
1541
1534
  QC_NOTE_KEY: [""]
1542
1535
  })
1543
- global_plus_host_settings_dict = {
1544
- OVERWRITE_NON_NANS_KEY: False,
1545
- LEAVE_REQUIREDS_BLANK_KEY: False,
1546
- DEFAULT_KEY: "not provided"
1547
- }
1536
+
1548
1537
  # Config is pre-resolved: alias "feces" has its own metadata_fields
1549
1538
  # that is a copy of "stool"'s resolved fields with sample_type="stool"
1550
1539
  host_type_config_dict = {
1540
+ OVERWRITE_NON_NANS_KEY: False,
1541
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1542
+ DEFAULT_KEY: "not provided",
1551
1543
  METADATA_FIELDS_KEY: {},
1552
1544
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1553
1545
  "feces": {
@@ -1590,7 +1582,7 @@ class TestMetadataExtender(TestCase):
1590
1582
  }
1591
1583
 
1592
1584
  result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
1593
- input_df, "feces", global_plus_host_settings_dict, host_type_config_dict)
1585
+ input_df, "feces", host_type_config_dict)
1594
1586
 
1595
1587
  self.assertEqual("stool_value", result_df["stool_field"].iloc[0])
1596
1588
  # sample_type should be set to the resolved type "stool"
@@ -1606,17 +1598,15 @@ class TestMetadataExtender(TestCase):
1606
1598
  SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
1607
1599
  QC_NOTE_KEY: ["", ""]
1608
1600
  })
1609
- settings_dict = {
1610
- OVERWRITE_NON_NANS_KEY: False,
1611
- LEAVE_REQUIREDS_BLANK_KEY: False,
1612
- DEFAULT_KEY: "global_default"
1613
- }
1601
+
1614
1602
  # Config is pre-resolved: sample type's metadata_fields includes
1615
1603
  # host fields merged in, plus sample_type and qiita_sample_type
1616
1604
  full_flat_config_dict = {
1617
1605
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
1618
1606
  "human": {
1619
1607
  DEFAULT_KEY: "human_default",
1608
+ OVERWRITE_NON_NANS_KEY: False,
1609
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1620
1610
  METADATA_FIELDS_KEY: {
1621
1611
  "host_field": {
1622
1612
  DEFAULT_KEY: "host_value",
@@ -1652,7 +1642,7 @@ class TestMetadataExtender(TestCase):
1652
1642
  }
1653
1643
 
1654
1644
  result_df, validation_msgs = _generate_metadata_for_a_host_type(
1655
- input_df, "human", settings_dict, full_flat_config_dict)
1645
+ input_df, "human", full_flat_config_dict)
1656
1646
 
1657
1647
  expected_df = pandas.DataFrame({
1658
1648
  SAMPLE_NAME_KEY: ["sample1", "sample2"],
@@ -1675,14 +1665,13 @@ class TestMetadataExtender(TestCase):
1675
1665
  SAMPLETYPE_SHORTHAND_KEY: ["stool"],
1676
1666
  QC_NOTE_KEY: [""]
1677
1667
  })
1678
- settings_dict = {
1679
- OVERWRITE_NON_NANS_KEY: False,
1680
- LEAVE_REQUIREDS_BLANK_KEY: False,
1681
- DEFAULT_KEY: "global_default"
1682
- }
1668
+
1683
1669
  full_flat_config_dict = {
1684
1670
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
1685
1671
  "human": {
1672
+ OVERWRITE_NON_NANS_KEY: False,
1673
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1674
+ DEFAULT_KEY: "global_default",
1686
1675
  METADATA_FIELDS_KEY: {},
1687
1676
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
1688
1677
  }
@@ -1690,7 +1679,7 @@ class TestMetadataExtender(TestCase):
1690
1679
  }
1691
1680
 
1692
1681
  result_df, validation_msgs = _generate_metadata_for_a_host_type(
1693
- input_df, "unknown_host", settings_dict, full_flat_config_dict)
1682
+ input_df, "unknown_host", full_flat_config_dict)
1694
1683
 
1695
1684
  expected_df = pandas.DataFrame({
1696
1685
  SAMPLE_NAME_KEY: ["sample1"],
@@ -1709,14 +1698,13 @@ class TestMetadataExtender(TestCase):
1709
1698
  SAMPLETYPE_SHORTHAND_KEY: ["unknown_sample"],
1710
1699
  QC_NOTE_KEY: [""]
1711
1700
  })
1712
- settings_dict = {
1713
- OVERWRITE_NON_NANS_KEY: False,
1714
- LEAVE_REQUIREDS_BLANK_KEY: False,
1715
- DEFAULT_KEY: "global_default"
1716
- }
1701
+
1717
1702
  full_flat_config_dict = {
1718
1703
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
1719
1704
  "human": {
1705
+ OVERWRITE_NON_NANS_KEY: False,
1706
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1707
+ DEFAULT_KEY: "global_default",
1720
1708
  METADATA_FIELDS_KEY: {},
1721
1709
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1722
1710
  "stool": {
@@ -1728,7 +1716,7 @@ class TestMetadataExtender(TestCase):
1728
1716
  }
1729
1717
 
1730
1718
  result_df, validation_msgs = _generate_metadata_for_a_host_type(
1731
- input_df, "human", settings_dict, full_flat_config_dict)
1719
+ input_df, "human", full_flat_config_dict)
1732
1720
 
1733
1721
  expected_df = pandas.DataFrame({
1734
1722
  SAMPLE_NAME_KEY: ["sample1"],
@@ -1747,16 +1735,15 @@ class TestMetadataExtender(TestCase):
1747
1735
  SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool", "stool"],
1748
1736
  QC_NOTE_KEY: ["", "", ""]
1749
1737
  })
1750
- settings_dict = {
1751
- OVERWRITE_NON_NANS_KEY: False,
1752
- LEAVE_REQUIREDS_BLANK_KEY: False,
1753
- DEFAULT_KEY: "global_default"
1754
- }
1738
+
1755
1739
  # Config is pre-resolved: sample type's metadata_fields includes
1756
1740
  # host fields merged in, plus sample_type and qiita_sample_type
1757
1741
  full_flat_config_dict = {
1758
1742
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
1759
1743
  "human": {
1744
+ OVERWRITE_NON_NANS_KEY: False,
1745
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1746
+ DEFAULT_KEY: "global_default",
1760
1747
  METADATA_FIELDS_KEY: {
1761
1748
  "human_field": {
1762
1749
  DEFAULT_KEY: "human_value",
@@ -1785,6 +1772,9 @@ class TestMetadataExtender(TestCase):
1785
1772
  }
1786
1773
  },
1787
1774
  "mouse": {
1775
+ OVERWRITE_NON_NANS_KEY: False,
1776
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1777
+ DEFAULT_KEY: "global_default",
1788
1778
  METADATA_FIELDS_KEY: {},
1789
1779
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
1790
1780
  }
@@ -1792,7 +1782,7 @@ class TestMetadataExtender(TestCase):
1792
1782
  }
1793
1783
 
1794
1784
  result_df, validation_msgs = _generate_metadata_for_a_host_type(
1795
- input_df, "human", settings_dict, full_flat_config_dict)
1785
+ input_df, "human", full_flat_config_dict)
1796
1786
 
1797
1787
  expected_df = pandas.DataFrame({
1798
1788
  SAMPLE_NAME_KEY: ["sample1", "sample3"],
@@ -1813,17 +1803,15 @@ class TestMetadataExtender(TestCase):
1813
1803
  SAMPLETYPE_SHORTHAND_KEY: ["stool"],
1814
1804
  QC_NOTE_KEY: [""]
1815
1805
  })
1816
- settings_dict = {
1817
- OVERWRITE_NON_NANS_KEY: False,
1818
- LEAVE_REQUIREDS_BLANK_KEY: False,
1819
- DEFAULT_KEY: "global_default"
1820
- }
1806
+
1821
1807
  # Config is pre-resolved: sample type's metadata_fields includes
1822
1808
  # host fields merged in, plus sample_type and qiita_sample_type
1823
1809
  full_flat_config_dict = {
1824
1810
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
1825
1811
  "human": {
1826
1812
  DEFAULT_KEY: "human_specific_default",
1813
+ OVERWRITE_NON_NANS_KEY: False,
1814
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1827
1815
  METADATA_FIELDS_KEY: {},
1828
1816
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1829
1817
  "stool": {
@@ -1850,7 +1838,7 @@ class TestMetadataExtender(TestCase):
1850
1838
  }
1851
1839
 
1852
1840
  result_df, validation_msgs = _generate_metadata_for_a_host_type(
1853
- input_df, "human", settings_dict, full_flat_config_dict)
1841
+ input_df, "human", full_flat_config_dict)
1854
1842
 
1855
1843
  expected_df = pandas.DataFrame({
1856
1844
  SAMPLE_NAME_KEY: ["sample1"],
@@ -1871,17 +1859,14 @@ class TestMetadataExtender(TestCase):
1871
1859
  SAMPLETYPE_SHORTHAND_KEY: ["stool"],
1872
1860
  QC_NOTE_KEY: [""]
1873
1861
  })
1874
- settings_dict = {
1875
- OVERWRITE_NON_NANS_KEY: False,
1876
- LEAVE_REQUIREDS_BLANK_KEY: False,
1877
- DEFAULT_KEY: "global_default"
1878
- }
1879
1862
  # Config is pre-resolved: sample type's metadata_fields includes
1880
1863
  # host fields merged in, plus sample_type and qiita_sample_type
1881
1864
  full_flat_config_dict = {
1882
1865
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
1883
1866
  "human": {
1884
- # No DEFAULT_KEY here
1867
+ OVERWRITE_NON_NANS_KEY: False,
1868
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1869
+ DEFAULT_KEY: "global_default",
1885
1870
  METADATA_FIELDS_KEY: {},
1886
1871
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1887
1872
  "stool": {
@@ -1908,7 +1893,7 @@ class TestMetadataExtender(TestCase):
1908
1893
  }
1909
1894
 
1910
1895
  result_df, validation_msgs = _generate_metadata_for_a_host_type(
1911
- input_df, "human", settings_dict, full_flat_config_dict)
1896
+ input_df, "human", full_flat_config_dict)
1912
1897
 
1913
1898
  expected_df = pandas.DataFrame({
1914
1899
  SAMPLE_NAME_KEY: ["sample1"],
@@ -1939,6 +1924,9 @@ class TestMetadataExtender(TestCase):
1939
1924
  OVERWRITE_NON_NANS_KEY: False,
1940
1925
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
1941
1926
  "human": {
1927
+ DEFAULT_KEY: "global_default",
1928
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1929
+ OVERWRITE_NON_NANS_KEY: False,
1942
1930
  METADATA_FIELDS_KEY: {
1943
1931
  "host_field": {
1944
1932
  DEFAULT_KEY: "host_value",
@@ -2005,6 +1993,9 @@ class TestMetadataExtender(TestCase):
2005
1993
  OVERWRITE_NON_NANS_KEY: False,
2006
1994
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
2007
1995
  "human": {
1996
+ DEFAULT_KEY: "global_default",
1997
+ LEAVE_REQUIREDS_BLANK_KEY: False,
1998
+ OVERWRITE_NON_NANS_KEY: False,
2008
1999
  METADATA_FIELDS_KEY: {
2009
2000
  "human_field": {
2010
2001
  DEFAULT_KEY: "human_value",
@@ -2051,6 +2042,9 @@ class TestMetadataExtender(TestCase):
2051
2042
  }
2052
2043
  },
2053
2044
  "mouse": {
2045
+ DEFAULT_KEY: "global_default",
2046
+ LEAVE_REQUIREDS_BLANK_KEY: False,
2047
+ OVERWRITE_NON_NANS_KEY: False,
2054
2048
  METADATA_FIELDS_KEY: {
2055
2049
  "mouse_field": {
2056
2050
  DEFAULT_KEY: "mouse_value",
@@ -2182,6 +2176,9 @@ class TestMetadataExtender(TestCase):
2182
2176
  OVERWRITE_NON_NANS_KEY: False,
2183
2177
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
2184
2178
  "human": {
2179
+ DEFAULT_KEY: "global_default",
2180
+ LEAVE_REQUIREDS_BLANK_KEY: True, # This causes required fields to get LEAVE_BLANK_VAL
2181
+ OVERWRITE_NON_NANS_KEY: False,
2185
2182
  METADATA_FIELDS_KEY: {},
2186
2183
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2187
2184
  "stool": {
@@ -2506,6 +2503,9 @@ class TestMetadataExtender(TestCase):
2506
2503
  OVERWRITE_NON_NANS_KEY: False,
2507
2504
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
2508
2505
  "human": {
2506
+ DEFAULT_KEY: "not provided",
2507
+ LEAVE_REQUIREDS_BLANK_KEY: False,
2508
+ OVERWRITE_NON_NANS_KEY: False,
2509
2509
  METADATA_FIELDS_KEY: {
2510
2510
  "host_field": {
2511
2511
  DEFAULT_KEY: "host_value",
@@ -2580,6 +2580,9 @@ class TestMetadataExtender(TestCase):
2580
2580
  },
2581
2581
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
2582
2582
  "human": {
2583
+ DEFAULT_KEY: "not provided",
2584
+ LEAVE_REQUIREDS_BLANK_KEY: False,
2585
+ OVERWRITE_NON_NANS_KEY: False,
2583
2586
  METADATA_FIELDS_KEY: {},
2584
2587
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2585
2588
  "stool": {
@@ -2639,6 +2642,9 @@ class TestMetadataExtender(TestCase):
2639
2642
  },
2640
2643
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
2641
2644
  "human": {
2645
+ DEFAULT_KEY: "not provided",
2646
+ LEAVE_REQUIREDS_BLANK_KEY: False,
2647
+ OVERWRITE_NON_NANS_KEY: False,
2642
2648
  METADATA_FIELDS_KEY: {},
2643
2649
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2644
2650
  "stool": {
@@ -2687,6 +2693,9 @@ class TestMetadataExtender(TestCase):
2687
2693
  OVERWRITE_NON_NANS_KEY: False,
2688
2694
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
2689
2695
  "human": {
2696
+ DEFAULT_KEY: "not provided",
2697
+ LEAVE_REQUIREDS_BLANK_KEY: False,
2698
+ OVERWRITE_NON_NANS_KEY: False,
2690
2699
  METADATA_FIELDS_KEY: {},
2691
2700
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
2692
2701
  }
@@ -2721,6 +2730,9 @@ class TestMetadataExtender(TestCase):
2721
2730
  OVERWRITE_NON_NANS_KEY: False,
2722
2731
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
2723
2732
  "human": {
2733
+ DEFAULT_KEY: "not provided",
2734
+ LEAVE_REQUIREDS_BLANK_KEY: False,
2735
+ OVERWRITE_NON_NANS_KEY: False,
2724
2736
  METADATA_FIELDS_KEY: {},
2725
2737
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2726
2738
  "stool": {
@@ -2781,6 +2793,9 @@ class TestMetadataExtender(TestCase):
2781
2793
  },
2782
2794
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
2783
2795
  "human": {
2796
+ DEFAULT_KEY: "not provided",
2797
+ LEAVE_REQUIREDS_BLANK_KEY: False,
2798
+ OVERWRITE_NON_NANS_KEY: False,
2784
2799
  METADATA_FIELDS_KEY: {},
2785
2800
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2786
2801
  "stool": {
@@ -4142,6 +4157,7 @@ class TestMetadataExtender(TestCase):
4142
4157
  TEST_DIR, "data/test_project1_output_metadata.txt")
4143
4158
  TEST_PROJECT1_EXPECTED_FAILS_FP = path.join(
4144
4159
  TEST_DIR, "data/test_project1_output_fails.csv")
4160
+
4145
4161
  def test_write_extended_metadata_from_df_project1_integration(self):
4146
4162
  """Integration test using project1 test data files."""
4147
4163
 
@@ -4153,7 +4169,6 @@ class TestMetadataExtender(TestCase):
4153
4169
  with open(path.join(debug_dir, f"UNMATCHED_2_{file_name}"), 'w') as debug_actual_file:
4154
4170
  debug_actual_file.write(actual_content)
4155
4171
 
4156
-
4157
4172
  # Load input metadata CSV
4158
4173
  input_df = pandas.read_csv(self.TEST_PROJECT1_METADATA_FP, dtype=str)
4159
4174
  # for the columns "plating_notes" and "notes", fill NaN with empty string
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metameq
3
- Version: 2026.2.1
3
+ Version: 2026.2.2
4
4
  Summary: Qiita-compliant metadata generation and validation tool
5
5
  Home-page: https://github.com/AmandaBirmingham/metameq
6
6
  Author: Amanda Birmingham
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes