metameq 2026.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,436 @@
1
+ import numpy as np
2
+ import pandas
3
+ from pandas.testing import assert_frame_equal
4
+ import os
5
+ import os.path as path
6
+ from unittest import TestCase
7
+ from metameq.src.util import extract_config_dict, \
8
+ extract_yaml_dict, extract_stds_config, deepcopy_dict, \
9
+ validate_required_columns_exist, update_metadata_df_field, get_extension, \
10
+ load_df_with_best_fit_encoding
11
+
12
+
13
+ class TestUtil(TestCase):
14
+ """Test suite for utility functions in metameq.src.util module."""
15
+
16
+ # get the parent directory of the current file
17
+ TEST_DIR = path.dirname(__file__)
18
+
19
+ TEST_CONFIG_DICT = {
20
+ "host_type_specific_metadata": {
21
+ "base": {
22
+ "metadata_fields": {
23
+ "sample_name": {
24
+ "type": "string",
25
+ "unique": True
26
+ },
27
+ "sample_type": {
28
+ "empty": False,
29
+ "is_phi": False
30
+ }
31
+ }
32
+ }
33
+ }
34
+ }
35
+
36
+ # Tests for extract_config_dict
37
+ def test_extract_config_dict_no_inputs(self):
38
+ """Test extracting config dictionary with no inputs.
39
+
40
+ NB: this test is looking at the *real* config, which may change, so
41
+ just checking that a couple of the expected keys (which are not in
42
+ the test config) are present.
43
+ """
44
+ obs = extract_config_dict(None)
45
+ self.assertIn("default", obs)
46
+ self.assertIn("leave_requireds_blank", obs)
47
+
48
+ def test_extract_config_dict_w_config_fp(self):
49
+ """Test extracting config dictionary from a valid config file path."""
50
+ config_fp = path.join(self.TEST_DIR, "data/test_config.yml")
51
+ obs = extract_config_dict(config_fp)
52
+ self.assertDictEqual(self.TEST_CONFIG_DICT, obs)
53
+
54
+ def test_extract_config_dict_missing_file(self):
55
+ """Test that attempting to extract config from non-existent file raises FileNotFoundError."""
56
+ with self.assertRaises(FileNotFoundError):
57
+ extract_config_dict("nonexistent.yml")
58
+
59
+ def test_extract_config_dict_invalid_yaml(self):
60
+ """Test that attempting to extract config from invalid YAML raises an exception."""
61
+ # Create a temporary invalid YAML file
62
+ invalid_yaml_path = path.join(self.TEST_DIR, "data/invalid.yml")
63
+ with open(invalid_yaml_path, "w") as f:
64
+ f.write("invalid: yaml: content: - [")
65
+
66
+ with self.assertRaises(Exception):
67
+ extract_config_dict(invalid_yaml_path)
68
+
69
+ # Tests for extract_yaml_dict
70
+ def test_extract_yaml_dict(self):
71
+ """Test extracting YAML dictionary from a valid YAML file."""
72
+ config_fp = path.join(self.TEST_DIR, "data/test_config.yml")
73
+ obs = extract_yaml_dict(config_fp)
74
+ self.assertDictEqual(self.TEST_CONFIG_DICT, obs)
75
+
76
+ # Tests for extract_stds_config
77
+ def test_extract_stds_config(self):
78
+ """Test extracting standards configuration with default settings.
79
+
80
+ Verifies that the extracted config contains expected standard keys.
81
+ """
82
+ obs = extract_stds_config(None)
83
+ self.assertIn("ebi_null_vals_all", obs)
84
+
85
+ def test_extract_stds_config_default_path(self):
86
+ """Test extracting standards configuration using default path.
87
+
88
+ NB: This test assumes the default standards.yml exists. This may change, so
89
+ it's just checking that a couple of the expected keys are present.
90
+ """
91
+ config = extract_stds_config(None)
92
+ self.assertIsInstance(config, dict)
93
+ self.assertIn("host_type_specific_metadata", config)
94
+
95
+ def test_extract_stds_config_custom_path(self):
96
+ """Test extracting standards configuration using a custom path."""
97
+ config = extract_stds_config(path.join(self.TEST_DIR, "data/test_config.yml"))
98
+ self.assertDictEqual(config, self.TEST_CONFIG_DICT)
99
+
100
+ # Tests for deepcopy_dict
101
+ def test_deepcopy_dict(self):
102
+ """Test deep copying of nested dictionary structure.
103
+
104
+ Verifies that modifications to the copy do not affect the original dictionary.
105
+ """
106
+ obs = deepcopy_dict(self.TEST_CONFIG_DICT)
107
+ self.assertDictEqual(self.TEST_CONFIG_DICT, obs)
108
+ self.assertIsNot(self.TEST_CONFIG_DICT, obs)
109
+ obs["host_type_specific_metadata"]["base"]["metadata_fields"].pop(
110
+ "sample_name")
111
+ self.assertFalse(self.TEST_CONFIG_DICT == obs)
112
+
113
+ # Tests for load_df_with_best_fit_encoding
114
+ def test_load_df_with_best_fit_encoding_utf8(self):
115
+ """Test loading DataFrame from a file with UTF-8 encoding."""
116
+ test_data = "col1,col2\nval1,val2"
117
+ test_file = path.join(self.TEST_DIR, "data/test_utf8.csv")
118
+ with open(test_file, "w", encoding="utf-8") as f:
119
+ f.write(test_data)
120
+
121
+ try:
122
+ df = load_df_with_best_fit_encoding(test_file, ",")
123
+ self.assertEqual(len(df), 1)
124
+ self.assertEqual(df.columns.tolist(), ["col1", "col2"])
125
+ self.assertEqual(df.iloc[0]["col1"], "val1")
126
+ self.assertEqual(df.iloc[0]["col2"], "val2")
127
+ finally:
128
+ if path.exists(test_file):
129
+ os.remove(test_file)
130
+
131
+ def test_load_df_with_best_fit_encoding_utf8_sig(self):
132
+ """Test loading DataFrame from a file with UTF-8 with BOM signature encoding."""
133
+ test_data = "col1,col2\nval1,val2"
134
+ test_file = path.join(self.TEST_DIR, "data/test_utf8_sig.csv")
135
+ with open(test_file, "w", encoding="utf-8-sig") as f:
136
+ f.write(test_data)
137
+
138
+ try:
139
+ df = load_df_with_best_fit_encoding(test_file, ",")
140
+ self.assertEqual(len(df), 1)
141
+ self.assertEqual(df.columns.tolist(), ["col1", "col2"])
142
+ self.assertEqual(df.iloc[0]["col1"], "val1")
143
+ self.assertEqual(df.iloc[0]["col2"], "val2")
144
+ finally:
145
+ if path.exists(test_file):
146
+ os.remove(test_file)
147
+
148
+ def test_load_df_with_best_fit_encoding_invalid_file(self):
149
+ """Test that attempting to load DataFrame from non-existent file raises ValueError."""
150
+ with self.assertRaises(ValueError):
151
+ load_df_with_best_fit_encoding("nonexistent.csv", ",")
152
+
153
+ def test_load_df_with_best_fit_encoding_unsupported_encoding(self):
154
+ """Test that attempting to load DataFrame with unsupported encoding raises ValueError."""
155
+ test_file = os.path.join(self.TEST_DIR, "data/test.biom")
156
+
157
+ try:
158
+ with self.assertRaisesRegex(ValueError, "Unable to decode .* with any available encoder"):
159
+ load_df_with_best_fit_encoding(test_file, ",")
160
+ finally:
161
+ if path.exists(test_file):
162
+ os.remove(test_file)
163
+
164
+ # Tests for validate_required_columns_exist
165
+ def test_validate_required_columns_exist_empty_df(self):
166
+ """Test that validation of required columns in an empty DataFrame raises ValueError."""
167
+
168
+ empty_df = pandas.DataFrame()
169
+ with self.assertRaisesRegex(ValueError, "test_df missing columns: \\['sample_name', 'sample_type'\\]"):
170
+ validate_required_columns_exist(
171
+ empty_df, ["sample_name", "sample_type"],
172
+ "test_df missing columns")
173
+
174
+ def test_validate_required_columns_exist_no_err(self):
175
+ """Test successful validation of required columns when all required columns exist."""
176
+ test_df = pandas.DataFrame({
177
+ "sample_name": ["s1", "s2"],
178
+ "sample_type": ["st1", "st2"]
179
+ })
180
+
181
+ validate_required_columns_exist(
182
+ test_df, ["sample_name", "sample_type"], "test_df missing")
183
+ # if no error at step above, this test passed
184
+ self.assertTrue(True)
185
+
186
+ def test_validate_required_columns_exist_err(self):
187
+ """Test that validation of required columns when a required column is missing raises ValueError."""
188
+
189
+ test_df = pandas.DataFrame({
190
+ "sample_name": ["s1", "s2"],
191
+ "sample_tye": ["st1", "st2"]
192
+ })
193
+
194
+ err_msg = r"test_df missing column: \['sample_type'\]"
195
+ with self.assertRaisesRegex(ValueError, err_msg):
196
+ validate_required_columns_exist(
197
+ test_df, ["sample_name", "sample_type"],
198
+ "test_df missing column")
199
+
200
+ # Tests for get_extension
201
+ def test_get_extension(self):
202
+ """Test that the correct file extension is returned for different separator types."""
203
+
204
+ # Test comma separator
205
+ self.assertEqual(get_extension(","), "csv")
206
+
207
+ # Test tab separator
208
+ self.assertEqual(get_extension("\t"), "txt")
209
+
210
+ # Test other separators
211
+ self.assertEqual(get_extension(";"), "txt")
212
+ self.assertEqual(get_extension("|"), "txt")
213
+
214
+ # Tests for update_metadata_df_field
215
+ def test_update_metadata_df_field_constant_new_field(self):
216
+ """Test that a new field can be added to the DataFrame with a constant value."""
217
+
218
+ working_df = pandas.DataFrame({
219
+ "sample_name": ["s1", "s2"],
220
+ "sample_type": ["st1", "st2"]
221
+ })
222
+
223
+ exp_df = pandas.DataFrame({
224
+ "sample_name": ["s1", "s2"],
225
+ "sample_type": ["st1", "st2"],
226
+ "new_field": ["bacon", "bacon"]
227
+ })
228
+
229
+ update_metadata_df_field(
230
+ working_df, "new_field", "bacon",
231
+ overwrite_non_nans=True)
232
+ assert_frame_equal(exp_df, working_df)
233
+
234
+ def test_update_metadata_df_field_constant_overwrite(self):
235
+ """Test overwriting existing field in DataFrame with constant value.
236
+
237
+ Verifies that an existing field can be overwritten with a constant value
238
+ when overwrite_non_nans is True.
239
+ """
240
+ working_df = pandas.DataFrame({
241
+ "sample_name": ["s1", "s2"],
242
+ "sample_type": ["st1", "st2"]
243
+ })
244
+
245
+ exp_df = pandas.DataFrame({
246
+ "sample_name": ["s1", "s2"],
247
+ "sample_type": ["bacon", "bacon"]
248
+ })
249
+
250
+ update_metadata_df_field(
251
+ working_df, "sample_type", "bacon",
252
+ overwrite_non_nans=True)
253
+ # with overwrite set to True, the column in question should have
254
+ # every entry set to the input constant value
255
+ assert_frame_equal(exp_df, working_df)
256
+
257
+ def test_update_metadata_df_field_constant_no_overwrite_no_nan(self):
258
+ """Test (not) updating field in DataFrame with constant value when no NaN values exist.
259
+
260
+ Verifies that no changes are made when overwrite_non_nans is False
261
+ and there are no NaN values to replace.
262
+ """
263
+ working_df = pandas.DataFrame({
264
+ "sample_name": ["s1", "s2"],
265
+ "sample_type": ["st1", "st2"]
266
+ })
267
+
268
+ exp_df = pandas.DataFrame({
269
+ "sample_name": ["s1", "s2"],
270
+ "sample_type": ["st1", "st2"]
271
+ })
272
+
273
+ update_metadata_df_field(
274
+ working_df, "sample_type", "bacon",
275
+ overwrite_non_nans=False)
276
+ # with overwrite set to False, no change should be made because there
277
+ # are no NaN values in the column in question
278
+ assert_frame_equal(exp_df, working_df)
279
+
280
+ def test_update_metadata_df_field_constant_no_overwrite_w_nan(self):
281
+ """Test updating field in DataFrame with constant value when NaN values exist.
282
+
283
+ Verifies that only NaN values are replaced when overwrite_non_nans is False
284
+ and there are NaN values to replace.
285
+ """
286
+ working_df = pandas.DataFrame({
287
+ "sample_name": ["s1", "s2"],
288
+ "sample_type": [np.nan, "st2"]
289
+ })
290
+
291
+ exp_df = pandas.DataFrame({
292
+ "sample_name": ["s1", "s2"],
293
+ "sample_type": ["bacon", "st2"]
294
+ })
295
+
296
+ update_metadata_df_field(
297
+ working_df, "sample_type", "bacon",
298
+ overwrite_non_nans=False)
299
+ # with overwrite set to False, only one change should be made because
300
+ # there is only one NaN value in the column in question
301
+ assert_frame_equal(exp_df, working_df)
302
+
303
+ def test_update_metadata_df_field_function_new_field(self):
304
+ """Test updating DataFrame with a new field using a function.
305
+
306
+ Verifies that a new field can be added to the DataFrame using a function
307
+ to compute values based on existing fields.
308
+ """
309
+ def test_func(row, source_fields):
310
+ return f"processed_{row[source_fields[0]]}"
311
+
312
+ working_df = pandas.DataFrame({
313
+ "sample_name": ["s1", np.nan],
314
+ "sample_type": ["st1", "st2"]
315
+ })
316
+
317
+ exp_df = pandas.DataFrame({
318
+ "sample_name": ["s1", np.nan],
319
+ "sample_type": ["st1", "st2"],
320
+ "processed": ["processed_s1", "processed_nan"]
321
+ })
322
+
323
+ update_metadata_df_field(
324
+ working_df, "processed", test_func,
325
+ ["sample_name"], overwrite_non_nans=True)
326
+ assert_frame_equal(exp_df, working_df)
327
+
328
+ def test_update_metadata_df_field_function_overwrite(self):
329
+ """Test overwriting existing field in DataFrame using a function.
330
+
331
+ Verifies that an existing field can be overwritten using a function
332
+ to compute values based on existing fields when overwrite_non_nans is True.
333
+ """
334
+ def test_func(row, source_fields):
335
+ source_field = source_fields[0]
336
+ last_char = row[source_field][-1]
337
+ return f"bacon{last_char}"
338
+
339
+ working_df = pandas.DataFrame({
340
+ "sample_name": ["s1", "s2"],
341
+ "sample_type": ["st1", "st2"]
342
+ })
343
+
344
+ exp_df = pandas.DataFrame({
345
+ "sample_name": ["s1", "s2"],
346
+ "sample_type": ["bacon1", "bacon2"]
347
+ })
348
+
349
+ update_metadata_df_field(
350
+ working_df, "sample_type", test_func,
351
+ ["sample_name"], overwrite_non_nans=True)
352
+ # with overwrite set to True, the column in question should have
353
+ # every entry set to result of running the input function on the input
354
+ # source fields in the same row
355
+ assert_frame_equal(exp_df, working_df)
356
+
357
+ def test_update_metadata_df_field_function_no_overwrite_no_nan(self):
358
+ """Test (not) updating field in DataFrame with function when no NaN values exist.
359
+
360
+ Verifies that, when using a function, no changes are made when overwrite_non_nans is False
361
+ and there are no NaN values to replace.
362
+ """
363
+ def test_func(row, source_fields):
364
+ source_field = source_fields[0]
365
+ last_char = row[source_field][-1]
366
+ return f"bacon{last_char}"
367
+
368
+ working_df = pandas.DataFrame({
369
+ "sample_name": ["s1", "s2"],
370
+ "sample_type": ["st1", "st2"]
371
+ })
372
+
373
+ exp_df = pandas.DataFrame({
374
+ "sample_name": ["s1", "s2"],
375
+ "sample_type": ["st1", "st2"]
376
+ })
377
+
378
+ update_metadata_df_field(
379
+ working_df, "sample_type", test_func,
380
+ ["sample_name"], overwrite_non_nans=False)
381
+ # with overwrite set to False, no change should be made because there
382
+ # are no NaN values in the column in question
383
+ assert_frame_equal(exp_df, working_df)
384
+
385
+ def test_update_metadata_df_field_function_no_overwrite_w_nan(self):
386
+ """Test updating field in DataFrame with function when NaN values exist.
387
+
388
+ Verifies that, when using a function, only NaN values are replaced when overwrite_non_nans is False
389
+ and there are NaN values to replace.
390
+ """
391
+ def test_func(row, source_fields):
392
+ source_field = source_fields[0]
393
+ last_char = row[source_field][-1]
394
+ return f"bacon{last_char}"
395
+
396
+ working_df = pandas.DataFrame({
397
+ "sample_name": ["s1", "s2"],
398
+ "sample_type": [np.nan, "st2"]
399
+ })
400
+
401
+ exp_df = pandas.DataFrame({
402
+ "sample_name": ["s1", "s2"],
403
+ "sample_type": ["bacon1", "st2"]
404
+ })
405
+
406
+ update_metadata_df_field(
407
+ working_df, "sample_type", test_func,
408
+ ["sample_name"], overwrite_non_nans=False)
409
+ # with overwrite set to False, only one change should be made because
410
+ # there is only one NaN value in the column in question
411
+ assert_frame_equal(exp_df, working_df)
412
+
413
+ def test_update_metadata_df_field_function_multiple_sources(self):
414
+ """Test updating field using function with multiple source fields.
415
+
416
+ Verifies that a new field can be created using a function that combines
417
+ values from multiple source fields.
418
+ """
419
+ def test_func(row, source_fields):
420
+ return f"{row[source_fields[0]]}_{row[source_fields[1]]}"
421
+
422
+ working_df = pandas.DataFrame({
423
+ "sample_name": ["s1", "s2"],
424
+ "sample_type": ["st1", "st2"]
425
+ })
426
+
427
+ exp_df = pandas.DataFrame({
428
+ "sample_name": ["s1", "s2"],
429
+ "sample_type": ["st1", "st2"],
430
+ "combined": ["s1_st1", "s2_st2"]
431
+ })
432
+
433
+ update_metadata_df_field(
434
+ working_df, "combined", test_func,
435
+ ["sample_name", "sample_type"], overwrite_non_nans=True)
436
+ assert_frame_equal(exp_df, working_df)
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: metameq
3
+ Version: 2026.1.1
4
+ Summary: Qiita-compliant metadata generation and validation tool
5
+ Home-page: https://github.com/AmandaBirmingham/metameq
6
+ Author: Amanda Birmingham
7
+ Author-email: abirmingham@ucsd.edu
8
+ License: BSD-3-Clause
9
+ Requires-Dist: click>=8.0.0
10
+ Requires-Dist: pandas>=1.3.0
11
+ Requires-Dist: PyYAML>=5.4.0
12
+ Requires-Dist: Cerberus>=1.3.4
13
+ Dynamic: author
14
+ Dynamic: author-email
15
+ Dynamic: description
16
+ Dynamic: home-page
17
+ Dynamic: license
18
+ Dynamic: requires-dist
19
+ Dynamic: summary
20
+
21
+ METAMEQ: Metadata Extension Tool to Annotate Microbiome Experiments for Qiita, a tool for generating and validating Qiita-compliant metadata files.
@@ -0,0 +1,27 @@
1
+ metameq/__init__.py,sha256=6mwG4ULHzMyPm5V16BSJwTMBsqsugFe2yRp5oA-IVHo,2461
2
+ metameq/_version.py,sha256=i2teBBkMgecDNELFv53UaqmmxgxdqxON_QV0QiSjpD8,501
3
+ metameq/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ metameq/config/config.yml,sha256=0FIJYepFxR3w0GVhMNGrNQLz0HQOBcrGDkcFjzQdRlc,86
5
+ metameq/config/standards.yml,sha256=tY3G4jL0GlxcpW8I6xoOmzdtoH6Pn5hZ_9BR_tL9lxo,73783
6
+ metameq/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ metameq/src/__main__.py,sha256=KJtslcriLKqYA9FcFH02i8TXMsCeB-14b8L9SBP18qw,1379
8
+ metameq/src/metadata_configurator.py,sha256=JUfrzR3CccMreZG8ON_UTeoS2qzL3Wohms8oM4zN9O8,21999
9
+ metameq/src/metadata_extender.py,sha256=xlSVKPxedrWcO-gj5iaK1t5pfGdvuTLlsTU-5qiuKXc,48651
10
+ metameq/src/metadata_merger.py,sha256=aZo3NdYAsbz34rjLEZXvRRCtFm25p8BUNW79kW9y55Y,13067
11
+ metameq/src/metadata_transformers.py,sha256=kBGB2zbyRTpu-M9nIZBGhlCIDnWxnPPZPylfvxfX5YU,9354
12
+ metameq/src/metadata_validator.py,sha256=pvyPACr4epJv9zkJdS6bS0JzRXTPj6rxVl_n8p0RQxs,14351
13
+ metameq/src/util.py,sha256=A2J2oyRPz8XTaq68AgJuGPuvMh6x0HUtQ684pZoXSMM,8958
14
+ metameq/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ metameq/tests/test_metadata_configurator.py,sha256=HeNcrazS-3MKgXu43l9tOvPwDqU0n69Rl7hMGrne4Vw,93273
16
+ metameq/tests/test_metadata_extender.py,sha256=lSElWriTeIzYuhGSsvKKKQw08JAGcoviZOvnrNhQr0Y,100015
17
+ metameq/tests/test_metadata_merger.py,sha256=czoXIUaNt8h5Nsv6KcBxr3X9vxAApG_jVBlx5jUZa2k,22996
18
+ metameq/tests/test_metadata_transformers.py,sha256=O80Qzka_7CeyMZz5RO8YQ-AR12sK64ArevWbkt7NVZU,11857
19
+ metameq/tests/test_metadata_validator.py,sha256=Ux9EDsmYJdUFeKKey4QstnW3sgWKFuNneFyYZ8dWPHc,39814
20
+ metameq/tests/test_util.py,sha256=KW5zV3FCV522oXT0Ev1oYMwg_Eoc3Pvqfn5VZ1VqSwU,17169
21
+ metameq/tests/data/invalid.yml,sha256=ZjyPB2QELgjhhTx8lHgaVXWoIIbfmU_YcMxebHWda78,27
22
+ metameq/tests/data/test_config.yml,sha256=KKvu_ILmcN7822r1BwWQ9s14AW6vVSePmjmoSx8B1zM,180
23
+ metameq-2026.1.1.dist-info/METADATA,sha256=rASIAOakCSpkW_0UzcOSrEHLZB7PjmoUufAZ7N8ucro,658
24
+ metameq-2026.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
25
+ metameq-2026.1.1.dist-info/entry_points.txt,sha256=DJsXx2sh4afJZw4KGcYrz9v4L86LrMKrJYYuDRV7edQ,54
26
+ metameq-2026.1.1.dist-info/top_level.txt,sha256=UKD6LtyBhlRDgKBfaPUuQGp_ekTsmOXucSwuDoUO1Fw,8
27
+ metameq-2026.1.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ metameq = metameq.src.__main__:root
@@ -0,0 +1 @@
1
+ metameq