metameq 2026.1.2__tar.gz → 2026.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {metameq-2026.1.2/metameq.egg-info → metameq-2026.2.1}/PKG-INFO +1 -1
  2. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/_version.py +3 -3
  3. metameq-2026.2.1/metameq/config/config.yml +7 -0
  4. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/metadata_extender.py +48 -4
  5. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/util.py +2 -0
  6. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/test_metadata_configurator.py +4 -1
  7. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/test_metadata_extender.py +140 -2
  8. {metameq-2026.1.2 → metameq-2026.2.1/metameq.egg-info}/PKG-INFO +1 -1
  9. metameq-2026.1.2/metameq/config/config.yml +0 -3
  10. {metameq-2026.1.2 → metameq-2026.2.1}/.gitattributes +0 -0
  11. {metameq-2026.1.2 → metameq-2026.2.1}/.github/workflows/main.yaml +0 -0
  12. {metameq-2026.1.2 → metameq-2026.2.1}/.gitignore +0 -0
  13. {metameq-2026.1.2 → metameq-2026.2.1}/README.md +0 -0
  14. {metameq-2026.1.2 → metameq-2026.2.1}/assets/metameq.png +0 -0
  15. {metameq-2026.1.2 → metameq-2026.2.1}/assets/metameq_dark.svg +0 -0
  16. {metameq-2026.1.2 → metameq-2026.2.1}/assets/metameq_light.svg +0 -0
  17. {metameq-2026.1.2 → metameq-2026.2.1}/assets/metameq_medium.png +0 -0
  18. {metameq-2026.1.2 → metameq-2026.2.1}/environment.yml +0 -0
  19. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/__init__.py +0 -0
  20. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/config/__init__.py +0 -0
  21. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/config/standards.yml +0 -0
  22. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/__init__.py +0 -0
  23. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/__main__.py +0 -0
  24. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/metadata_configurator.py +0 -0
  25. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/metadata_merger.py +0 -0
  26. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/metadata_transformers.py +0 -0
  27. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/src/metadata_validator.py +0 -0
  28. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/__init__.py +0 -0
  29. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/data/invalid.yml +0 -0
  30. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/data/test_config.yml +0 -0
  31. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/test_metadata_merger.py +0 -0
  32. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/test_metadata_transformers.py +0 -0
  33. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/test_metadata_validator.py +0 -0
  34. {metameq-2026.1.2 → metameq-2026.2.1}/metameq/tests/test_util.py +0 -0
  35. {metameq-2026.1.2 → metameq-2026.2.1}/metameq.egg-info/SOURCES.txt +0 -0
  36. {metameq-2026.1.2 → metameq-2026.2.1}/metameq.egg-info/dependency_links.txt +0 -0
  37. {metameq-2026.1.2 → metameq-2026.2.1}/metameq.egg-info/entry_points.txt +0 -0
  38. {metameq-2026.1.2 → metameq-2026.2.1}/metameq.egg-info/requires.txt +0 -0
  39. {metameq-2026.1.2 → metameq-2026.2.1}/metameq.egg-info/top_level.txt +0 -0
  40. {metameq-2026.1.2 → metameq-2026.2.1}/setup.cfg +0 -0
  41. {metameq-2026.1.2 → metameq-2026.2.1}/setup.py +0 -0
  42. {metameq-2026.1.2 → metameq-2026.2.1}/versioneer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metameq
3
- Version: 2026.1.2
3
+ Version: 2026.2.1
4
4
  Summary: Qiita-compliant metadata generation and validation tool
5
5
  Home-page: https://github.com/AmandaBirmingham/metameq
6
6
  Author: Amanda Birmingham
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2026-01-31T12:28:01-0800",
11
+ "date": "2026-02-01T21:32:06-0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "889941fbd7d28889867e3f4b6edba2d50dbc5956",
15
- "version": "2026.01.2"
14
+ "full-revisionid": "87171fd73f555e2c03a15fa36ed9b5a912b824e9",
15
+ "version": "2026.02.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -0,0 +1,7 @@
1
+ "default": "not applicable"
2
+ "leave_requireds_blank": false
3
+ "overwrite_non_nans": false
4
+ "hosttype_column_options":
5
+ - "host_common_name"
6
+ "sampletype_column_options":
7
+ - "sample_type"
@@ -15,7 +15,8 @@ from metameq.src.util import extract_config_dict, \
15
15
  LEAVE_BLANK_VAL, SAMPLE_NAME_KEY, \
16
16
  ALLOWED_KEY, TYPE_KEY, LEAVE_REQUIREDS_BLANK_KEY, OVERWRITE_NON_NANS_KEY, \
17
17
  METADATA_TRANSFORMERS_KEY, PRE_TRANSFORMERS_KEY, POST_TRANSFORMERS_KEY, \
18
- SOURCES_KEY, FUNCTION_KEY, REQUIRED_RAW_METADATA_FIELDS
18
+ SOURCES_KEY, FUNCTION_KEY, REQUIRED_RAW_METADATA_FIELDS, \
19
+ HOSTTYPE_COL_OPTIONS_KEY, SAMPLETYPE_COL_OPTIONS_KEY
19
20
  from metameq.src.metadata_configurator import update_wip_metadata_dict, \
20
21
  build_full_flat_config_dict
21
22
  from metameq.src.metadata_validator import validate_metadata_df, \
@@ -447,13 +448,22 @@ def extend_metadata_df(
447
448
  ValueError
448
449
  If required columns are missing from the metadata.
449
450
  """
451
+ full_flat_config_dict = build_full_flat_config_dict(
452
+ study_specific_config_dict, software_config_dict, stds_fp)
453
+
454
+ needed_cols = [(HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY),
455
+ (SAMPLETYPE_SHORTHAND_KEY, SAMPLETYPE_COL_OPTIONS_KEY)]
456
+ for curr_key, curr_options_key in needed_cols:
457
+ if curr_key not in raw_metadata_df.columns:
458
+ specified_name = _get_specified_column_name(
459
+ curr_options_key, raw_metadata_df, full_flat_config_dict)
460
+ if specified_name:
461
+ raw_metadata_df[curr_key] = raw_metadata_df[specified_name]
462
+
450
463
  validate_required_columns_exist(
451
464
  raw_metadata_df, REQUIRED_RAW_METADATA_FIELDS,
452
465
  "metadata missing required columns")
453
466
 
454
- full_flat_config_dict = build_full_flat_config_dict(
455
- study_specific_config_dict, software_config_dict, stds_fp)
456
-
457
467
  metadata_df, validation_msgs_df = _populate_metadata_df(
458
468
  raw_metadata_df, full_flat_config_dict,
459
469
  study_specific_transformers_dict)
@@ -461,6 +471,40 @@ def extend_metadata_df(
461
471
  return metadata_df, validation_msgs_df
462
472
 
463
473
 
474
+ def _get_specified_column_name(
475
+ col_options_key: str,
476
+ raw_metadata_df: pandas.DataFrame,
477
+ config_dict: Dict[str, Any] = None) -> Optional[str]:
478
+ """Get the specified type of column name from the metadata DataFrame based on possible options.
479
+
480
+ Parameters
481
+ ----------
482
+ col_options_key : str
483
+ Key in the config dict that holds the list of possible column names to check.
484
+ raw_metadata_df : pandas.DataFrame
485
+ The metadata DataFrame to check.
486
+ config_dict : Dict[str, Any], default=None
487
+ Configuration dictionary. If provided, may contain a list of possible
488
+ column names under the key specified by col_options_key.
489
+ If None, defaults to values from the main config.yml file.
490
+ Returns
491
+ -------
492
+ Optional[str]
493
+ The specified column name found in the DataFrame, or None if not found.
494
+ """
495
+ found_name = None
496
+
497
+ if not config_dict:
498
+ config_dict = extract_config_dict(None)
499
+ col_options = config_dict.get(col_options_key)
500
+ if col_options:
501
+ for col_name in col_options:
502
+ if col_name in raw_metadata_df.columns:
503
+ found_name = col_name
504
+ break
505
+
506
+ return found_name
507
+
464
508
  def write_metadata_results(
465
509
  metadata_df: pandas.DataFrame,
466
510
  validation_msgs_df: pandas.DataFrame,
@@ -27,6 +27,8 @@ SOURCES_KEY = "sources"
27
27
  FUNCTION_KEY = "function"
28
28
  LEAVE_REQUIREDS_BLANK_KEY = "leave_requireds_blank"
29
29
  OVERWRITE_NON_NANS_KEY = "overwrite_non_nans"
30
+ HOSTTYPE_COL_OPTIONS_KEY = "hosttype_column_options"
31
+ SAMPLETYPE_COL_OPTIONS_KEY = "sampletype_column_options"
30
32
 
31
33
  # internal code keys
32
34
  HOSTTYPE_SHORTHAND_KEY = "hosttype_shorthand"
@@ -5,7 +5,8 @@ from metameq.src.util import \
5
5
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY, DEFAULT_KEY, \
6
6
  ALIAS_KEY, BASE_TYPE_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
7
7
  STUDY_SPECIFIC_METADATA_KEY, LEAVE_REQUIREDS_BLANK_KEY, \
8
- OVERWRITE_NON_NANS_KEY, REQUIRED_KEY, SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
8
+ OVERWRITE_NON_NANS_KEY, REQUIRED_KEY, SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE, \
9
+ HOSTTYPE_COL_OPTIONS_KEY, SAMPLETYPE_COL_OPTIONS_KEY
9
10
  from metameq.src.metadata_configurator import \
10
11
  combine_stds_and_study_config, \
11
12
  _make_combined_stds_and_study_host_type_dicts, \
@@ -4643,6 +4644,8 @@ class TestMetadataConfigurator(TestCase):
4643
4644
  DEFAULT_KEY: "not applicable",
4644
4645
  LEAVE_REQUIREDS_BLANK_KEY: False,
4645
4646
  OVERWRITE_NON_NANS_KEY: False,
4647
+ HOSTTYPE_COL_OPTIONS_KEY: ["host_common_name"],
4648
+ SAMPLETYPE_COL_OPTIONS_KEY: ["sample_type"],
4646
4649
  # Flattened host types
4647
4650
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
4648
4651
  "base": {
@@ -14,7 +14,8 @@ from metameq.src.util import \
14
14
  OVERWRITE_NON_NANS_KEY, LEAVE_REQUIREDS_BLANK_KEY, LEAVE_BLANK_VAL, \
15
15
  HOST_TYPE_SPECIFIC_METADATA_KEY, METADATA_TRANSFORMERS_KEY, \
16
16
  SOURCES_KEY, FUNCTION_KEY, PRE_TRANSFORMERS_KEY, POST_TRANSFORMERS_KEY, \
17
- STUDY_SPECIFIC_METADATA_KEY
17
+ STUDY_SPECIFIC_METADATA_KEY, HOSTTYPE_COL_OPTIONS_KEY, \
18
+ SAMPLETYPE_COL_OPTIONS_KEY
18
19
  from metameq.src.metadata_extender import \
19
20
  id_missing_cols, get_qc_failures, get_reserved_cols, find_standard_cols, \
20
21
  find_nonstandard_cols, write_metadata_results, \
@@ -26,7 +27,7 @@ from metameq.src.metadata_extender import \
26
27
  _generate_metadata_for_a_host_type, _generate_metadata_for_host_types, \
27
28
  _transform_metadata, _populate_metadata_df, extend_metadata_df, \
28
29
  _get_study_specific_config, _output_metadata_df_to_files, \
29
- INTERNAL_COL_KEYS, REQ_PLACEHOLDER
30
+ _get_specified_column_name, INTERNAL_COL_KEYS, REQ_PLACEHOLDER
30
31
 
31
32
 
32
33
  class TestMetadataExtender(TestCase):
@@ -3189,6 +3190,63 @@ class TestMetadataExtender(TestCase):
3189
3190
  })
3190
3191
  assert_frame_equal(expected_df, result_df)
3191
3192
 
3193
+ def test_extend_metadata_df_with_alternate_column_names(self):
3194
+ """Test metadata extension with alternate hosttype and sampletype column names."""
3195
+ # Use alternate column names instead of hosttype_shorthand and sampletype_shorthand
3196
+ input_df = pandas.DataFrame({
3197
+ SAMPLE_NAME_KEY: ["sample1", "sample2"],
3198
+ "host_type": ["human", "human"],
3199
+ "sample": ["stool", "stool"]
3200
+ })
3201
+ study_config = {
3202
+ DEFAULT_KEY: "not provided",
3203
+ LEAVE_REQUIREDS_BLANK_KEY: True,
3204
+ OVERWRITE_NON_NANS_KEY: False,
3205
+ STUDY_SPECIFIC_METADATA_KEY: {
3206
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
3207
+ "human": {
3208
+ METADATA_FIELDS_KEY: {},
3209
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
3210
+ "stool": {
3211
+ METADATA_FIELDS_KEY: {}
3212
+ }
3213
+ }
3214
+ }
3215
+ }
3216
+ }
3217
+ }
3218
+ # Software config specifies alternate column names
3219
+ software_config = {
3220
+ DEFAULT_KEY: "not provided",
3221
+ LEAVE_REQUIREDS_BLANK_KEY: True,
3222
+ OVERWRITE_NON_NANS_KEY: False,
3223
+ HOSTTYPE_COL_OPTIONS_KEY: ["host_type"],
3224
+ SAMPLETYPE_COL_OPTIONS_KEY: ["sample"]
3225
+ }
3226
+
3227
+ result_df, validation_msgs_df = extend_metadata_df(
3228
+ input_df, study_config, None, software_config, self.TEST_STDS_FP)
3229
+
3230
+ expected_df = pandas.DataFrame({
3231
+ SAMPLE_NAME_KEY: ["sample1", "sample2"],
3232
+ "body_product": ["UBERON:feces", "UBERON:feces"],
3233
+ "body_site": ["gut", "gut"],
3234
+ "description": ["human sample", "human sample"],
3235
+ "host_common_name": ["human", "human"],
3236
+ # Alternate column names from input are preserved
3237
+ "host_type": ["human", "human"],
3238
+ QIITA_SAMPLE_TYPE: ["stool", "stool"],
3239
+ # Alternate column names from input are preserved
3240
+ "sample": ["stool", "stool"],
3241
+ SAMPLE_TYPE_KEY: ["stool", "stool"],
3242
+ # Standard internal columns added at end (in order of INTERNAL_COL_KEYS)
3243
+ HOSTTYPE_SHORTHAND_KEY: ["human", "human"],
3244
+ SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
3245
+ QC_NOTE_KEY: ["", ""]
3246
+ })
3247
+ assert_frame_equal(expected_df, result_df)
3248
+ self.assertTrue(validation_msgs_df.empty)
3249
+
3192
3250
  # Tests for _get_study_specific_config
3193
3251
 
3194
3252
  def test__get_study_specific_config_with_valid_file(self):
@@ -4145,3 +4203,83 @@ class TestMetadataExtender(TestCase):
4145
4203
  os.path.join(tmpdir, "*_test_output_validation_errors.csv"))
4146
4204
  self.assertEqual(1, len(validation_files))
4147
4205
  self.assertEqual(0, os.path.getsize(validation_files[0]))
4206
+
4207
+ # Tests for _get_specified_column_name
4208
+
4209
+ def test__get_specified_column_name_finds_column(self):
4210
+ """Test that _get_specified_column_name finds a column that exists."""
4211
+ input_df = pandas.DataFrame({
4212
+ "sample_name": ["s1"],
4213
+ "host_type": ["human"]
4214
+ })
4215
+ config_dict = {
4216
+ HOSTTYPE_COL_OPTIONS_KEY: ["host_type", "host_common_name"]
4217
+ }
4218
+ result = _get_specified_column_name(
4219
+ HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
4220
+ self.assertEqual("host_type", result)
4221
+
4222
+ def test__get_specified_column_name_returns_first_match(self):
4223
+ """Test that _get_specified_column_name returns the first match when multiple options exist."""
4224
+ input_df = pandas.DataFrame({
4225
+ "sample_name": ["s1"],
4226
+ "host_type": ["human"],
4227
+ "host_common_name": ["human"]
4228
+ })
4229
+ config_dict = {
4230
+ HOSTTYPE_COL_OPTIONS_KEY: ["host_type", "host_common_name"]
4231
+ }
4232
+ result = _get_specified_column_name(
4233
+ HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
4234
+ self.assertEqual("host_type", result)
4235
+
4236
+ def test__get_specified_column_name_returns_none_when_no_match(self):
4237
+ """Test that _get_specified_column_name returns None when no options match."""
4238
+ input_df = pandas.DataFrame({
4239
+ "sample_name": ["s1"],
4240
+ "other_column": ["value"]
4241
+ })
4242
+ config_dict = {
4243
+ HOSTTYPE_COL_OPTIONS_KEY: ["host_type", "host_common_name"]
4244
+ }
4245
+ result = _get_specified_column_name(
4246
+ HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
4247
+ self.assertIsNone(result)
4248
+
4249
+ def test__get_specified_column_name_returns_none_when_key_missing(self):
4250
+ """Test that _get_specified_column_name returns None when col_options_key is not in config."""
4251
+ input_df = pandas.DataFrame({
4252
+ "sample_name": ["s1"],
4253
+ "host_type": ["human"]
4254
+ })
4255
+ config_dict = {}
4256
+ result = _get_specified_column_name(
4257
+ HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
4258
+ self.assertIsNone(result)
4259
+
4260
+ def test__get_specified_column_name_returns_none_when_options_empty(self):
4261
+ """Test that _get_specified_column_name returns None when col_options is empty list."""
4262
+ input_df = pandas.DataFrame({
4263
+ "sample_name": ["s1"],
4264
+ "host_type": ["human"]
4265
+ })
4266
+ config_dict = {
4267
+ HOSTTYPE_COL_OPTIONS_KEY: []
4268
+ }
4269
+ result = _get_specified_column_name(
4270
+ HOSTTYPE_COL_OPTIONS_KEY, input_df, config_dict)
4271
+ self.assertIsNone(result)
4272
+
4273
+ def test__get_specified_column_name_with_sampletype_key(self):
4274
+ """Test that _get_specified_column_name works with sampletype column options."""
4275
+ input_df = pandas.DataFrame({
4276
+ "sample_name": ["s1"],
4277
+ "sample_type": ["stool"]
4278
+ })
4279
+ config_dict = {
4280
+ SAMPLETYPE_COL_OPTIONS_KEY: ["sample_type", "sampletype"]
4281
+ }
4282
+ result = _get_specified_column_name(
4283
+ SAMPLETYPE_COL_OPTIONS_KEY, input_df, config_dict)
4284
+ self.assertEqual("sample_type", result)
4285
+ # endregion _get_specified_column_name tests
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metameq
3
- Version: 2026.1.2
3
+ Version: 2026.2.1
4
4
  Summary: Qiita-compliant metadata generation and validation tool
5
5
  Home-page: https://github.com/AmandaBirmingham/metameq
6
6
  Author: Amanda Birmingham
@@ -1,3 +0,0 @@
1
- "default": "not applicable"
2
- "leave_requireds_blank": false
3
- "overwrite_non_nans": false
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes