metameq 2026.1.1__tar.gz → 2026.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {metameq-2026.1.1/metameq.egg-info → metameq-2026.1.2}/PKG-INFO +1 -1
  2. {metameq-2026.1.1 → metameq-2026.1.2}/environment.yml +1 -0
  3. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/_version.py +3 -3
  4. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/src/metadata_configurator.py +146 -1
  5. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/src/metadata_extender.py +44 -38
  6. metameq-2026.1.2/metameq/tests/test_metadata_configurator.py +4867 -0
  7. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/tests/test_metadata_extender.py +2034 -497
  8. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/tests/test_metadata_merger.py +1 -1
  9. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/tests/test_util.py +1 -1
  10. {metameq-2026.1.1 → metameq-2026.1.2/metameq.egg-info}/PKG-INFO +1 -1
  11. metameq-2026.1.1/metameq/tests/test_metadata_configurator.py +0 -2334
  12. {metameq-2026.1.1 → metameq-2026.1.2}/.gitattributes +0 -0
  13. {metameq-2026.1.1 → metameq-2026.1.2}/.github/workflows/main.yaml +0 -0
  14. {metameq-2026.1.1 → metameq-2026.1.2}/.gitignore +0 -0
  15. {metameq-2026.1.1 → metameq-2026.1.2}/README.md +0 -0
  16. {metameq-2026.1.1 → metameq-2026.1.2}/assets/metameq.png +0 -0
  17. {metameq-2026.1.1 → metameq-2026.1.2}/assets/metameq_dark.svg +0 -0
  18. {metameq-2026.1.1 → metameq-2026.1.2}/assets/metameq_light.svg +0 -0
  19. {metameq-2026.1.1 → metameq-2026.1.2}/assets/metameq_medium.png +0 -0
  20. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/__init__.py +0 -0
  21. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/config/__init__.py +0 -0
  22. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/config/config.yml +0 -0
  23. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/config/standards.yml +0 -0
  24. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/src/__init__.py +0 -0
  25. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/src/__main__.py +0 -0
  26. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/src/metadata_merger.py +0 -0
  27. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/src/metadata_transformers.py +0 -0
  28. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/src/metadata_validator.py +0 -0
  29. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/src/util.py +0 -0
  30. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/tests/__init__.py +0 -0
  31. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/tests/data/invalid.yml +0 -0
  32. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/tests/data/test_config.yml +0 -0
  33. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/tests/test_metadata_transformers.py +0 -0
  34. {metameq-2026.1.1 → metameq-2026.1.2}/metameq/tests/test_metadata_validator.py +0 -0
  35. {metameq-2026.1.1 → metameq-2026.1.2}/metameq.egg-info/SOURCES.txt +0 -0
  36. {metameq-2026.1.1 → metameq-2026.1.2}/metameq.egg-info/dependency_links.txt +0 -0
  37. {metameq-2026.1.1 → metameq-2026.1.2}/metameq.egg-info/entry_points.txt +0 -0
  38. {metameq-2026.1.1 → metameq-2026.1.2}/metameq.egg-info/requires.txt +0 -0
  39. {metameq-2026.1.1 → metameq-2026.1.2}/metameq.egg-info/top_level.txt +0 -0
  40. {metameq-2026.1.1 → metameq-2026.1.2}/setup.cfg +0 -0
  41. {metameq-2026.1.1 → metameq-2026.1.2}/setup.py +0 -0
  42. {metameq-2026.1.1 → metameq-2026.1.2}/versioneer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metameq
3
- Version: 2026.1.1
3
+ Version: 2026.1.2
4
4
  Summary: Qiita-compliant metadata generation and validation tool
5
5
  Home-page: https://github.com/AmandaBirmingham/metameq
6
6
  Author: Amanda Birmingham
@@ -10,5 +10,6 @@ dependencies:
10
10
  - pyyaml
11
11
  - flake8
12
12
  - pep8
13
+ - pytest
13
14
  - pip:
14
15
  - cerberus
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2026-01-28T14:30:42-0800",
11
+ "date": "2026-01-31T12:28:01-0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "b60757af0c4b7b16d71119971565d9991779f6d2",
15
- "version": "2026.01.1"
14
+ "full-revisionid": "889941fbd7d28889867e3f4b6edba2d50dbc5956",
15
+ "version": "2026.01.2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -4,7 +4,8 @@ from metameq.src.util import extract_config_dict, extract_stds_config, \
4
4
  METADATA_FIELDS_KEY, STUDY_SPECIFIC_METADATA_KEY, \
5
5
  HOST_TYPE_SPECIFIC_METADATA_KEY, \
6
6
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY, ALIAS_KEY, BASE_TYPE_KEY, \
7
- DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY
7
+ DEFAULT_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
8
+ SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
8
9
 
9
10
 
10
11
  def combine_stds_and_study_config(
@@ -89,6 +90,15 @@ def flatten_nested_stds_dict(
89
90
  if curr_host_type_sub_host_dict:
90
91
  wip_host_types_dict.update(curr_host_type_sub_host_dict)
91
92
 
93
+ # resolve aliases and base types for this host's sample types
94
+ # This happens AFTER recursion so children inherit unresolved aliases,
95
+ # ensuring correct bottom-up resolution order
96
+ if SAMPLE_TYPE_SPECIFIC_METADATA_KEY in curr_host_type_wip_flat_dict:
97
+ curr_host_type_wip_flat_dict[SAMPLE_TYPE_SPECIFIC_METADATA_KEY] = \
98
+ _resolve_sample_type_aliases_and_bases(
99
+ curr_host_type_wip_flat_dict[SAMPLE_TYPE_SPECIFIC_METADATA_KEY],
100
+ curr_host_type_wip_flat_dict.get(METADATA_FIELDS_KEY, {}))
101
+
92
102
  # assign the flattened wip dict for the current host type to the result
93
103
  # (which now contains flat records for the hosts lower down than
94
104
  # this, if there are any)
@@ -270,8 +280,11 @@ def _combine_base_and_added_host_type(
270
280
  _combine_base_and_added_sample_type_specific_metadata(
271
281
  host_type_wip_nested_dict,
272
282
  host_type_add_dict)
283
+
273
284
  # if we got back a non-empty dictionary of sample types,
274
285
  # add it to the wip for this host type dict
286
+ # Note: resolution of aliases/base types happens in flatten_nested_stds_dict
287
+ # AFTER recursion, to ensure correct bottom-up resolution order
275
288
  if curr_host_wip_sample_types_dict:
276
289
  host_type_wip_nested_dict[
277
290
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY] = \
@@ -450,6 +463,130 @@ def _id_sample_type_definition(sample_type_name: str, sample_type_dict: Dict[str
450
463
  "the same sample type dict")
451
464
 
452
465
 
466
+ def _construct_sample_type_metadata_fields_dict(
467
+ sample_type: str,
468
+ host_sample_types_config_dict: Dict[str, Any],
469
+ a_host_type_metadata_fields_dict: Dict[str, Any]) -> Dict[str, Any]:
470
+ """Construct metadata fields dictionary for a specific host+sample type, resolving aliases and base types.
471
+
472
+ Parameters
473
+ ----------
474
+ sample_type : str
475
+ The sample type to process.
476
+ host_sample_types_config_dict : Dict[str, Any]
477
+ Dictionary containing config for *all* sample types in
478
+ the host type in question.
479
+ a_host_type_metadata_fields_dict : Dict[str, Any]
480
+ Dictionary containing metadata fields for the host type in question.
481
+
482
+ Returns
483
+ -------
484
+ Dict[str, Any]
485
+ The constructed metadata fields dictionary for this host-and-sample-type combination.
486
+
487
+ Raises
488
+ ------
489
+ ValueError
490
+ If there are invalid alias chains or base type configurations.
491
+ """
492
+ sample_type_for_metadata = sample_type
493
+
494
+ # get dict associated with the naive sample type
495
+ sample_type_specific_dict = \
496
+ host_sample_types_config_dict[sample_type]
497
+
498
+ # if naive sample type contains an alias
499
+ sample_type_alias = sample_type_specific_dict.get(ALIAS_KEY)
500
+ if sample_type_alias:
501
+ # change the sample type to the alias sample type
502
+ # and use the alias's sample type dict
503
+ sample_type_for_metadata = sample_type_alias
504
+ sample_type_specific_dict = \
505
+ host_sample_types_config_dict[sample_type_alias]
506
+ if METADATA_FIELDS_KEY not in sample_type_specific_dict:
507
+ raise ValueError(f"May not chain aliases "
508
+ f"('{sample_type}' to '{sample_type_alias}')")
509
+ # endif sample type is an alias
510
+
511
+ # if the sample type has a base type
512
+ sample_type_base = sample_type_specific_dict.get(BASE_TYPE_KEY)
513
+ if sample_type_base:
514
+ # get the base's sample type dict and add this sample type's
515
+ # info on top of it
516
+ base_sample_dict = host_sample_types_config_dict[sample_type_base]
517
+ if list(base_sample_dict.keys()) != [METADATA_FIELDS_KEY]:
518
+ raise ValueError(f"Base sample type '{sample_type_base}' "
519
+ f"must only have metadata fields")
520
+ sample_type_specific_dict_metadata = update_wip_metadata_dict(
521
+ deepcopy_dict(base_sample_dict[METADATA_FIELDS_KEY]),
522
+ sample_type_specific_dict.get(METADATA_FIELDS_KEY, {}))
523
+ sample_type_specific_dict = deepcopy_dict(sample_type_specific_dict)
524
+ sample_type_specific_dict[METADATA_FIELDS_KEY] = \
525
+ sample_type_specific_dict_metadata
526
+ # endif sample type has a base type
527
+
528
+ # add the sample-type-specific info generated above on top of the host info
529
+ sample_type_metadata_dict = update_wip_metadata_dict(
530
+ deepcopy_dict(a_host_type_metadata_fields_dict),
531
+ sample_type_specific_dict.get(METADATA_FIELDS_KEY, {}))
532
+
533
+ # set sample_type, and qiita_sample_type if it is not already set
534
+ sample_type_definition = {
535
+ ALLOWED_KEY: [sample_type_for_metadata],
536
+ DEFAULT_KEY: sample_type_for_metadata,
537
+ TYPE_KEY: "string"
538
+ }
539
+ sample_type_metadata_dict = update_wip_metadata_dict(
540
+ sample_type_metadata_dict, {SAMPLE_TYPE_KEY: sample_type_definition})
541
+ if QIITA_SAMPLE_TYPE not in sample_type_metadata_dict:
542
+ sample_type_metadata_dict = update_wip_metadata_dict(
543
+ sample_type_metadata_dict, {QIITA_SAMPLE_TYPE: sample_type_definition})
544
+ # end if qiita_sample_type not already set
545
+
546
+ return sample_type_metadata_dict
547
+
548
+
549
+ def _resolve_sample_type_aliases_and_bases(
550
+ sample_types_dict: Dict[str, Any],
551
+ host_metadata_fields_dict: Dict[str, Any]) -> Dict[str, Any]:
552
+ """Resolve aliases and base types in sample type definitions.
553
+
554
+ For each sample type in the input dictionary:
555
+ 1. If it's an alias, follow the alias and resolve the target's metadata
556
+ 2. If it has a base_type, inherit metadata fields from the base
557
+ 3. Merge sample-type metadata fields with host-level metadata fields
558
+ 4. Add sample_type and qiita_sample_type fields
559
+
560
+ Parameters
561
+ ----------
562
+ sample_types_dict : Dict[str, Any]
563
+ Dictionary of sample type configurations (from sample_type_specific_metadata).
564
+ host_metadata_fields_dict : Dict[str, Any]
565
+ Host-level metadata fields to merge into each sample type.
566
+
567
+ Returns
568
+ -------
569
+ Dict[str, Any]
570
+ Dictionary with all sample types resolved.
571
+
572
+ Raises
573
+ ------
574
+ ValueError
575
+ If chained aliases are detected or base type has invalid structure.
576
+ """
577
+ result = {}
578
+
579
+ for sample_type_name in sample_types_dict.keys():
580
+ resolved_metadata = _construct_sample_type_metadata_fields_dict(
581
+ sample_type_name, sample_types_dict, host_metadata_fields_dict)
582
+
583
+ result[sample_type_name] = {
584
+ METADATA_FIELDS_KEY: resolved_metadata
585
+ }
586
+
587
+ return result
588
+
589
+
453
590
  def build_full_flat_config_dict(
454
591
  study_specific_config_dict: Optional[Dict[str, Any]] = None,
455
592
  software_config_dict: Optional[Dict[str, Any]] = None,
@@ -503,6 +640,14 @@ def build_full_flat_config_dict(
503
640
  full_nested_hosts_dict, None)
504
641
  software_plus_study_flat_config_dict[HOST_TYPE_SPECIFIC_METADATA_KEY] = \
505
642
  full_flat_hosts_dict
643
+
644
+ # drop the STUDY_SPECIFIC_METADATA_KEY from the final output dict (because
645
+ # its contents have already been incorporated into the
646
+ # HOST_TYPE_SPECIFIC_METADATA_KEY section); note we keep all the other
647
+ # top-level keys from the study-specific config dict
648
+ if STUDY_SPECIFIC_METADATA_KEY in software_plus_study_flat_config_dict:
649
+ del software_plus_study_flat_config_dict[STUDY_SPECIFIC_METADATA_KEY]
650
+
506
651
  # this is just a renaming to indicate that, having overwritten any original
507
652
  # HOST_TYPE_SPECIFIC_METADATA_KEY in the software_plus_study_flat_config_dict
508
653
  # with the complete and flattened combination of software+study+standards, it is now
@@ -42,7 +42,7 @@ pandas.set_option("future.no_silent_downcasting", True)
42
42
  def get_reserved_cols(
43
43
  raw_metadata_df: pandas.DataFrame,
44
44
  study_specific_config_dict: Dict[str, Any],
45
- study_specific_transformers_dict: Optional[Dict[str, Any]] = None) -> List[str]:
45
+ stds_fp: Optional[str] = None) -> List[str]:
46
46
  """Get a list of all reserved column names for all host+sample type combinations in the metadata.
47
47
 
48
48
  Note that 'reserved' is not the same as 'required'. Some column names (e.g.,
@@ -55,8 +55,9 @@ def get_reserved_cols(
55
55
  The input metadata DataFrame.
56
56
  study_specific_config_dict : Dict[str, Any]
57
57
  Study-specific flat-host-type config dictionary.
58
- study_specific_transformers_dict : Optional[Dict[str, Any]], default=None
59
- Dictionary of custom transformers for this study (only).
58
+ stds_fp : Optional[str], default=None
59
+ Path to standards dictionary file. If None, the default standards
60
+ config pulled from the standards.yml file will be used.
60
61
 
61
62
  Returns
62
63
  -------
@@ -90,10 +91,10 @@ def get_reserved_cols(
90
91
 
91
92
  # extend the metadata_df to get all the required columns for all host+sample type combinations;
92
93
  # we don't really care about the contents of these columns, just their names.
93
- # (I doubt it is necessary to pass the actual study_specific_transformers_dict; could probably just use None)
94
+ # (Likewise, it is not necessary to pass the actual study_specific_transformers_dict so
95
+ # just use None)
94
96
  metadata_df, _ = extend_metadata_df(
95
- temp_df, study_specific_config_dict,
96
- study_specific_transformers_dict)
97
+ temp_df, study_specific_config_dict, None, None, stds_fp)
97
98
 
98
99
  return sorted(metadata_df.columns.to_list())
99
100
 
@@ -119,7 +120,7 @@ def id_missing_cols(a_df: pandas.DataFrame) -> List[str]:
119
120
  def find_standard_cols(
120
121
  a_df: pandas.DataFrame,
121
122
  study_specific_config_dict: Dict[str, Any],
122
- study_specific_transformers_dict: Optional[Dict[str, Any]] = None,
123
+ stds_fp: Optional[str] = None,
123
124
  suppress_missing_name_err: bool = False) -> List[str]:
124
125
  """Find all the standard columns in the metadata DataFrame.
125
126
 
@@ -129,8 +130,9 @@ def find_standard_cols(
129
130
  The metadata DataFrame to analyze.
130
131
  study_specific_config_dict : Dict[str, Any]
131
132
  Study-specific flat-host-type config dictionary.
132
- study_specific_transformers_dict : Optional[Dict[str, Any]], default=None
133
- Dictionary of custom transformers for this study (only).
133
+ stds_fp : Optional[str], default=None
134
+ Path to standards dictionary file. If None, the default standards
135
+ config pulled from the standards.yml file will be used.
134
136
  suppress_missing_name_err : bool, default=False
135
137
  Whether to suppress errors about missing sample name.
136
138
 
@@ -156,8 +158,7 @@ def find_standard_cols(
156
158
  # get the intersection of the reserved standard columns and
157
159
  # the columns in the input dataframe
158
160
  standard_cols = get_reserved_cols(
159
- a_df, study_specific_config_dict,
160
- study_specific_transformers_dict=study_specific_transformers_dict)
161
+ a_df, study_specific_config_dict, stds_fp)
161
162
 
162
163
  standard_cols_set = (set(standard_cols) - set(INTERNAL_COL_KEYS))
163
164
 
@@ -167,7 +168,7 @@ def find_standard_cols(
167
168
  def find_nonstandard_cols(
168
169
  a_df: pandas.DataFrame,
169
170
  study_specific_config_dict: Dict[str, Any],
170
- study_specific_transformers_dict: Optional[Dict[str, Any]] = None) -> List[str]:
171
+ stds_fp: Optional[str] = None) -> List[str]:
171
172
  """Find any non-standard columns in the metadata DataFrame.
172
173
 
173
174
  Parameters
@@ -176,8 +177,9 @@ def find_nonstandard_cols(
176
177
  The metadata DataFrame to analyze.
177
178
  study_specific_config_dict : Dict[str, Any]
178
179
  Study-specific flat-host-type config dictionary.
179
- study_specific_transformers_dict : Optional[Dict[str, Any]], default=None
180
- Dictionary of custom transformers for this study (only).
180
+ stds_fp : Optional[str], default=None
181
+ Path to standards dictionary file. If None, the default standards
182
+ config pulled from the standards.yml file will be used.
181
183
 
182
184
  Returns
183
185
  -------
@@ -195,15 +197,15 @@ def find_nonstandard_cols(
195
197
 
196
198
  # get the columns in
197
199
  standard_cols = get_reserved_cols(
198
- a_df, study_specific_config_dict,
199
- study_specific_transformers_dict=study_specific_transformers_dict)
200
+ a_df, study_specific_config_dict, stds_fp)
200
201
 
201
202
  return list(set(a_df.columns) - set(standard_cols))
202
203
 
203
204
 
204
205
  def get_extended_metadata_from_df_and_yaml(
205
206
  raw_metadata_df: pandas.DataFrame,
206
- study_specific_config_fp: Optional[str]) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
207
+ study_specific_config_fp: Optional[str],
208
+ stds_fp: Optional[str] = None) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
207
209
  """Extend metadata using configuration from a study-specific YAML config file.
208
210
 
209
211
  Parameters
@@ -212,6 +214,9 @@ def get_extended_metadata_from_df_and_yaml(
212
214
  The raw metadata DataFrame to extend.
213
215
  study_specific_config_fp : Optional[str]
214
216
  Path to the study-specific configuration YAML file.
217
+ stds_fp : Optional[str], default=None
218
+ Path to standards dictionary file. If None, the default standards
219
+ config pulled from the standards.yml file will be used.
215
220
 
216
221
  Returns
217
222
  -------
@@ -226,7 +231,8 @@ def get_extended_metadata_from_df_and_yaml(
226
231
 
227
232
  # extend the metadata DataFrame using the study-specific flat-host-type config dictionary
228
233
  metadata_df, validation_msgs_df = \
229
- extend_metadata_df(raw_metadata_df, study_specific_config_dict)
234
+ extend_metadata_df(raw_metadata_df, study_specific_config_dict,
235
+ None, None, stds_fp)
230
236
 
231
237
  return metadata_df, validation_msgs_df
232
238
 
@@ -257,7 +263,8 @@ def write_extended_metadata(
257
263
  out_name_base: str,
258
264
  sep: str = "\t",
259
265
  remove_internals: bool = True,
260
- suppress_empty_fails: bool = False) -> pandas.DataFrame:
266
+ suppress_empty_fails: bool = False,
267
+ stds_fp: Optional[str] = None) -> pandas.DataFrame:
261
268
  """Write extended metadata to files starting from input file paths to metadata and config.
262
269
 
263
270
  Parameters
@@ -276,6 +283,9 @@ def write_extended_metadata(
276
283
  Whether to remove internal columns.
277
284
  suppress_empty_fails : bool, default=False
278
285
  Whether to suppress empty failure files.
286
+ stds_fp : Optional[str], default=None
287
+ Path to standards dictionary file. If None, the default standards
288
+ config pulled from the standards.yml file will be used.
279
289
 
280
290
  Returns
281
291
  -------
@@ -310,7 +320,8 @@ def write_extended_metadata(
310
320
  raw_metadata_df, study_specific_config_dict,
311
321
  out_dir, out_name_base, sep=sep,
312
322
  remove_internals=remove_internals,
313
- suppress_empty_fails=suppress_empty_fails)
323
+ suppress_empty_fails=suppress_empty_fails,
324
+ stds_fp=stds_fp)
314
325
 
315
326
  # for good measure, return the extended metadata DataFrame
316
327
  return extended_df
@@ -351,7 +362,8 @@ def write_extended_metadata_from_df(
351
362
  sep: str = "\t",
352
363
  remove_internals: bool = True,
353
364
  suppress_empty_fails: bool = False,
354
- internal_col_names: Optional[List[str]] = None) -> pandas.DataFrame:
365
+ internal_col_names: Optional[List[str]] = None,
366
+ stds_fp: Optional[str] = None) -> pandas.DataFrame:
355
367
  """Write extended metadata to files starting from a metadata DataFrame and config dictionary.
356
368
 
357
369
  Parameters
@@ -374,6 +386,9 @@ def write_extended_metadata_from_df(
374
386
  Whether to suppress empty failure files.
375
387
  internal_col_names : Optional[List[str]], default=None
376
388
  List of internal column names.
389
+ stds_fp : Optional[str], default=None
390
+ Path to standards dictionary file. If None, the default standards
391
+ config pulled from the standards.yml file will be used.
377
392
 
378
393
  Returns
379
394
  -------
@@ -383,7 +398,7 @@ def write_extended_metadata_from_df(
383
398
  # extend the metadata DataFrame using the study-specific flat-host-type config dictionary
384
399
  metadata_df, validation_msgs_df = extend_metadata_df(
385
400
  raw_metadata_df, study_specific_config_dict,
386
- study_specific_transformers_dict)
401
+ study_specific_transformers_dict, None, stds_fp)
387
402
 
388
403
  # write the metadata and validation results to files
389
404
  write_metadata_results(
@@ -814,12 +829,6 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
814
829
  - The updated metadata DataFrame with sample-type-specific elements added
815
830
  - A list of validation messages
816
831
  """
817
- # copy the metadata fields dict from the host type config to be the
818
- # basis of the work-in-progress metadata dict--these are the default fields
819
- # that will be overwritten, if necessary, by sample type-specific fields
820
- wip_metadata_fields_dict = deepcopy_dict(
821
- a_host_type_config_dict.get(METADATA_FIELDS_KEY, {}))
822
-
823
832
  # get the config section for *all* sample types within this host type
824
833
  host_sample_types_config_dict = \
825
834
  a_host_type_config_dict[SAMPLE_TYPE_SPECIFIC_METADATA_KEY]
@@ -837,20 +846,17 @@ def _generate_metadata_for_a_sample_type_in_a_host_type(
837
846
  update_metadata_df_field(
838
847
  sample_type_df, QC_NOTE_KEY, "invalid sample_type")
839
848
  else:
840
- # resolve any aliases and base types for the sample type and combine its
841
- # specific metadata fields with the host type's metadata fields
842
- # to get the full set of config info for this host+sample type
849
+ # Get the already-resolved metadata fields dict for this sample type.
850
+ # The config is pre-resolved: aliases/base types are merged and
851
+ # host metadata is combined.
852
+ sample_type_config = host_sample_types_config_dict[a_sample_type]
843
853
  full_sample_type_metadata_fields_dict = \
844
- _construct_sample_type_metadata_fields_dict(
845
- a_sample_type, host_sample_types_config_dict, wip_metadata_fields_dict)
854
+ sample_type_config.get(METADATA_FIELDS_KEY, {})
846
855
 
847
856
  # update the metadata df with the sample type specific metadata fields
848
- # TODO: this is taking in wip_metadata_fields_dict instead of full_sample_type_metadata_fields_dict,
849
- # which only works because the code underlying _construct_sample_type_metadata_fields_dict
850
- # is *modifying* wip_metadata_fields_dict in place. This should be corrected, but that
851
- # needs to wait until there are tests to make sure doing so doesn't break anything.
852
857
  sample_type_df = _update_metadata_from_dict(
853
- sample_type_df, wip_metadata_fields_dict, dict_is_metadata_fields=True,
858
+ sample_type_df, full_sample_type_metadata_fields_dict,
859
+ dict_is_metadata_fields=True,
854
860
  overwrite_non_nans=global_plus_host_settings_dict[OVERWRITE_NON_NANS_KEY])
855
861
 
856
862
  # for fields that are required but not yet filled, replace the placeholder with