metameq 2026.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,277 @@
1
+ from datetime import datetime
2
+ import pandas
3
+ import numpy as np
4
+ from unittest import TestCase
5
+ from metameq.src.metadata_transformers import (
6
+ pass_through,
7
+ transform_input_sex_to_std_sex,
8
+ transform_age_to_life_stage,
9
+ transform_date_to_formatted_date,
10
+ help_transform_mapping,
11
+ standardize_input_sex,
12
+ set_life_stage_from_age_yrs,
13
+ format_a_datetime,
14
+ _get_one_source_field,
15
+ _help_transform_mapping
16
+ )
17
+
18
+
19
+ class TestMetadataTransformers(TestCase):
20
+ def setUp(self):
21
+ self.test_row = pandas.Series({
22
+ 'sample_name': 'test_sample',
23
+ 'patient_sex': 'M',
24
+ 'patient_age': 25,
25
+ 'start_date': '2023-01-01'
26
+ })
27
+
28
+ # Tests for pass_through
29
+ def test_pass_through(self):
30
+ """Test pass_through"""
31
+ result = pass_through(self.test_row, ['patient_sex'])
32
+ self.assertEqual(result, 'M')
33
+
34
+ def test_pass_through_err_multiple_source_fields(self):
35
+ """Test pass_through errors with multiple source fields"""
36
+ with self.assertRaisesRegex(ValueError, "pass_through requires exactly one source field"):
37
+ pass_through(self.test_row, ['patient_sex', 'patient_age'])
38
+
39
+ def test_pass_through_nan(self):
40
+ """Test pass_through with NaN value"""
41
+ test_row = self.test_row.copy()
42
+ test_row['patient_sex'] = np.nan
43
+ result = pass_through(test_row, ['patient_sex'])
44
+ self.assertTrue(pandas.isna(result))
45
+
46
+ # Tests for transform_input_sex_to_std_sex
47
+ def test_transform_input_sex_to_std_sex_male(self):
48
+ """Test transform_input_sex_to_std_sex with male input"""
49
+ result = transform_input_sex_to_std_sex(self.test_row, ['patient_sex'])
50
+ self.assertEqual(result, 'male')
51
+
52
+ def test_transform_input_sex_to_std_sex_female(self):
53
+ """Test transform_input_sex_to_std_sex with female input"""
54
+ test_row = self.test_row.copy()
55
+ test_row['patient_sex'] = 'F'
56
+ result = transform_input_sex_to_std_sex(test_row, ['patient_sex'])
57
+ self.assertEqual(result, 'female')
58
+
59
+ def test_transform_input_sex_to_std_sex_invalid(self):
60
+ """Test transform_input_sex_to_std_sex with invalid input"""
61
+ test_row = self.test_row.copy()
62
+ test_row['patient_sex'] = 'invalid'
63
+ with self.assertRaisesRegex(ValueError, "Unrecognized sex: invalid"):
64
+ transform_input_sex_to_std_sex(test_row, ['patient_sex'])
65
+
66
+ # Tests for transform_age_to_life_stage
67
+ def test_transform_age_to_life_stage_child(self):
68
+ """Test transform_age_to_life_stage with child age"""
69
+ test_row = self.test_row.copy()
70
+ test_row['patient_age'] = 16
71
+ result = transform_age_to_life_stage(test_row, ['patient_age'])
72
+ self.assertEqual(result, 'child')
73
+
74
+ def test_transform_age_to_life_stage_adult(self):
75
+ """Test transform_age_to_life_stage with adult age"""
76
+ result = transform_age_to_life_stage(self.test_row, ['patient_age'])
77
+ self.assertEqual(result, 'adult')
78
+
79
+ def test_transform_age_to_life_stage_invalid(self):
80
+ """Test transform_age_to_life_stage with invalid age"""
81
+ test_row = self.test_row.copy()
82
+ test_row['patient_age'] = 'invalid'
83
+ with self.assertRaisesRegex(ValueError, "patient_age must be an integer"):
84
+ transform_age_to_life_stage(test_row, ['patient_age'])
85
+
86
+ # Tests for transform_date_to_formatted_date
87
+ def test_transform_date_to_formatted_date_valid(self):
88
+ """Test transform_date_to_formatted_date with valid date"""
89
+ result = transform_date_to_formatted_date(self.test_row, ['start_date'])
90
+ self.assertEqual(result, '2023-01-01 00:00')
91
+
92
+ def test_transform_date_to_formatted_date_invalid(self):
93
+ """Test transform_date_to_formatted_date with invalid date"""
94
+ test_row = self.test_row.copy()
95
+ test_row['start_date'] = 'invalid'
96
+ with self.assertRaisesRegex(ValueError, "start_date cannot be parsed to a date"):
97
+ transform_date_to_formatted_date(test_row, ['start_date'])
98
+
99
+ # Tests for help_transform_mapping
100
+ def test_help_transform_mapping_valid(self):
101
+ """Test help_transform_mapping with valid input"""
102
+ mapping = {'M': '2', 'F': '1'}
103
+ result = help_transform_mapping(self.test_row, ['patient_sex'], mapping)
104
+ self.assertEqual(result, '2') # 'M' maps to '2' in this test mapping
105
+
106
+ def test_help_transform_mapping_invalid(self):
107
+ """Test help_transform_mapping with invalid input"""
108
+ mapping = {'A': '1', 'B': '2'}
109
+ test_row = self.test_row.copy()
110
+ test_row['patient_sex'] = 'C'
111
+ with self.assertRaisesRegex(ValueError, "Unrecognized help_transform_mapping: C"):
112
+ help_transform_mapping(test_row, ['patient_sex'], mapping)
113
+
114
+ # Tests for standardize_input_sex
115
+ def test_standardize_input_sex_M(self):
116
+ """Test standardize_input_sex with 'M' input"""
117
+ result = standardize_input_sex('M')
118
+ self.assertEqual(result, 'male')
119
+
120
+ def test_standardize_input_sex_m(self):
121
+ """Test standardize_input_sex with 'm' input"""
122
+ result = standardize_input_sex('m')
123
+ self.assertEqual(result, 'male')
124
+
125
+ def test_standardize_input_sex_Male(self):
126
+ """Test standardize_input_sex with 'Male' input"""
127
+ result = standardize_input_sex('Male')
128
+ self.assertEqual(result, 'male')
129
+
130
+ def test_standardize_input_sex_male(self):
131
+ """Test standardize_input_sex with 'male' input"""
132
+ result = standardize_input_sex('male')
133
+ self.assertEqual(result, 'male')
134
+
135
+ def test_standardize_input_sex_MALE(self):
136
+ """Test standardize_input_sex with 'MALE' input"""
137
+ result = standardize_input_sex('MALE')
138
+ self.assertEqual(result, 'male')
139
+
140
+ def test_standardize_input_sex_F(self):
141
+ """Test standardize_input_sex with 'F' input"""
142
+ result = standardize_input_sex('F')
143
+ self.assertEqual(result, 'female')
144
+
145
+ def test_standardize_input_sex_f(self):
146
+ """Test standardize_input_sex with 'f' input"""
147
+ result = standardize_input_sex('f')
148
+ self.assertEqual(result, 'female')
149
+
150
+ def test_standardize_input_sex_Female(self):
151
+ """Test standardize_input_sex with 'Female' input"""
152
+ result = standardize_input_sex('Female')
153
+ self.assertEqual(result, 'female')
154
+
155
+ def test_standardize_input_sex_female(self):
156
+ """Test standardize_input_sex with 'female' input"""
157
+ result = standardize_input_sex('female')
158
+ self.assertEqual(result, 'female')
159
+
160
+ def test_standardize_input_sex_FEMALE(self):
161
+ """Test standardize_input_sex with 'FEMALE' input"""
162
+ result = standardize_input_sex('FEMALE')
163
+ self.assertEqual(result, 'female')
164
+
165
+ def test_standardize_input_sex_intersex(self):
166
+ """Test standardize_input_sex with 'intersex' input"""
167
+ result = standardize_input_sex('intersex')
168
+ self.assertEqual(result, 'intersex')
169
+
170
+ def test_standardize_input_sex_INTERSEX(self):
171
+ """Test standardize_input_sex with 'INTERSEX' input"""
172
+ result = standardize_input_sex('INTERSEX')
173
+ self.assertEqual(result, 'intersex')
174
+
175
+ def test_standardize_input_sex_prefernottoanswer(self):
176
+ """Test standardize_input_sex with 'prefernottoanswer' input"""
177
+ result = standardize_input_sex('prefernottoanswer')
178
+ self.assertEqual(result, 'not provided')
179
+
180
+ def test_standardize_input_sex_PREFERNOTTOANSWER(self):
181
+ """Test standardize_input_sex with 'PREFERNOTTOANSWER' input"""
182
+ result = standardize_input_sex('PREFERNOTTOANSWER')
183
+ self.assertEqual(result, 'not provided')
184
+
185
+ def test_standardize_input_sex_invalid(self):
186
+ """Test standardize_input_sex with invalid input"""
187
+ with self.assertRaisesRegex(ValueError, "Unrecognized sex: invalid"):
188
+ standardize_input_sex('invalid')
189
+
190
+ def test_standardize_input_sex_nan(self):
191
+ """Test standardize_input_sex with NaN input"""
192
+ result = standardize_input_sex(np.nan)
193
+ self.assertTrue(pandas.isna(result))
194
+
195
+ # Tests for set_life_stage_from_age_yrs
196
+ def test_set_life_stage_from_age_yrs_child(self):
197
+ """Test set_life_stage_from_age_yrs with child age"""
198
+ result = set_life_stage_from_age_yrs(16)
199
+ self.assertEqual(result, 'child')
200
+
201
+ def test_set_life_stage_from_age_yrs_adult(self):
202
+ """Test set_life_stage_from_age_yrs with adult age"""
203
+ result = set_life_stage_from_age_yrs(17)
204
+ self.assertEqual(result, 'adult')
205
+
206
+ def test_set_life_stage_from_age_yrs_nan(self):
207
+ """Test set_life_stage_from_age_yrs with NaN input"""
208
+ result = set_life_stage_from_age_yrs(np.nan)
209
+ self.assertTrue(pandas.isna(result))
210
+
211
+ def test_set_life_stage_from_age_yrs_invalid(self):
212
+ """Test set_life_stage_from_age_yrs with invalid age"""
213
+ with self.assertRaisesRegex(ValueError, "input must be an integer"):
214
+ set_life_stage_from_age_yrs('twelve')
215
+
216
+ # Tests for format_a_datetime
217
+ def test_format_a_datetime_valid(self):
218
+ """Test format_a_datetime with valid date"""
219
+ result = format_a_datetime('2023-01-01')
220
+ self.assertEqual(result, '2023-01-01 00:00')
221
+
222
+ def test_format_a_datetime_invalid(self):
223
+ """Test format_a_datetime with invalid date"""
224
+ with self.assertRaisesRegex(ValueError, "input cannot be parsed to a date"):
225
+ format_a_datetime('invalid')
226
+
227
+ def test_format_a_datetime_invalid_w_custom_source_name(self):
228
+ """Test format_a_datetime with invalid date"""
229
+ with self.assertRaisesRegex(ValueError, "my_date cannot be parsed to a date"):
230
+ format_a_datetime('invalid', source_name='my_date')
231
+
232
+ def test_format_a_datetime_nan(self):
233
+ """Test format_a_datetime with NaN value"""
234
+ result = format_a_datetime(np.nan)
235
+ self.assertTrue(pandas.isna(result))
236
+
237
+ def test_format_a_datetime_datetime_obj(self):
238
+ """Test format_a_datetime with datetime object input"""
239
+ dt = datetime(2023, 1, 1, 12, 30, 45)
240
+ result = format_a_datetime(dt)
241
+ self.assertEqual(result, '2023-01-01 12:30')
242
+
243
+ # Tests for _get_one_source_field
244
+ def test__get_one_source_field_valid(self):
245
+ """Test _get_one_source_field with valid input"""
246
+ result = _get_one_source_field(self.test_row, ['patient_sex'], 'test')
247
+ self.assertEqual(result, 'M')
248
+
249
+ def test__get_one_source_field_multiple_fields(self):
250
+ """Test _get_one_source_field with multiple source fields"""
251
+ with self.assertRaisesRegex(ValueError, "test requires exactly one source field"):
252
+ _get_one_source_field(self.test_row, ['patient_sex', 'patient_age'], 'test')
253
+
254
+ # Tests for _help_transform_mapping
255
+ def test__help_transform_mapping_valid(self):
256
+ """Test _help_transform_mapping with valid input"""
257
+ mapping = {'A': '1', 'B': '2'}
258
+ result = _help_transform_mapping('A', mapping)
259
+ self.assertEqual(result, '1')
260
+
261
+ def test__help_transform_mapping_invalid(self):
262
+ """Test _help_transform_mapping with invalid input"""
263
+ mapping = {'A': '1', 'B': '2'}
264
+ with self.assertRaisesRegex(ValueError, "Unrecognized value: C"):
265
+ _help_transform_mapping('C', mapping)
266
+
267
+ def test__help_transform_mapping_nan(self):
268
+ """Test _help_transform_mapping with NaN value"""
269
+ mapping = {'A': '1', 'B': '2'}
270
+ result = _help_transform_mapping(np.nan, mapping)
271
+ self.assertTrue(pandas.isna(result))
272
+
273
+ def test__help_transform_mapping_make_lower(self):
274
+ """Test _help_transform_mapping with make_lower=True"""
275
+ mapping = {'a': '1', 'b': '2'}
276
+ result = _help_transform_mapping('A', mapping, make_lower=True)
277
+ self.assertEqual(result, '1')