metameq 2026.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metameq/__init__.py +42 -0
- metameq/_version.py +21 -0
- metameq/config/__init__.py +0 -0
- metameq/config/config.yml +3 -0
- metameq/config/standards.yml +1648 -0
- metameq/src/__init__.py +0 -0
- metameq/src/__main__.py +34 -0
- metameq/src/metadata_configurator.py +512 -0
- metameq/src/metadata_extender.py +1168 -0
- metameq/src/metadata_merger.py +362 -0
- metameq/src/metadata_transformers.py +335 -0
- metameq/src/metadata_validator.py +387 -0
- metameq/src/util.py +299 -0
- metameq/tests/__init__.py +0 -0
- metameq/tests/data/invalid.yml +1 -0
- metameq/tests/data/test_config.yml +9 -0
- metameq/tests/test_metadata_configurator.py +2334 -0
- metameq/tests/test_metadata_extender.py +2610 -0
- metameq/tests/test_metadata_merger.py +657 -0
- metameq/tests/test_metadata_transformers.py +277 -0
- metameq/tests/test_metadata_validator.py +1191 -0
- metameq/tests/test_util.py +436 -0
- metameq-2026.1.1.dist-info/METADATA +21 -0
- metameq-2026.1.1.dist-info/RECORD +27 -0
- metameq-2026.1.1.dist-info/WHEEL +5 -0
- metameq-2026.1.1.dist-info/entry_points.txt +2 -0
- metameq-2026.1.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2334 @@
|
|
|
1
|
+
import os.path as path
|
|
2
|
+
from unittest import TestCase
|
|
3
|
+
from metameq.src.util import \
|
|
4
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY, METADATA_FIELDS_KEY, \
|
|
5
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY, DEFAULT_KEY, \
|
|
6
|
+
ALIAS_KEY, BASE_TYPE_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
|
|
7
|
+
STUDY_SPECIFIC_METADATA_KEY, LEAVE_REQUIREDS_BLANK_KEY, \
|
|
8
|
+
OVERWRITE_NON_NANS_KEY
|
|
9
|
+
from metameq.src.metadata_configurator import \
|
|
10
|
+
combine_stds_and_study_config, \
|
|
11
|
+
_make_combined_stds_and_study_host_type_dicts, \
|
|
12
|
+
flatten_nested_stds_dict, \
|
|
13
|
+
_combine_base_and_added_metadata_fields, \
|
|
14
|
+
_combine_base_and_added_sample_type_specific_metadata, \
|
|
15
|
+
_combine_base_and_added_host_type, \
|
|
16
|
+
_id_sample_type_definition, \
|
|
17
|
+
update_wip_metadata_dict, \
|
|
18
|
+
build_full_flat_config_dict
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TestMetadataConfigurator(TestCase):
|
|
22
|
+
TEST_DIR = path.dirname(__file__)
|
|
23
|
+
TEST_STDS_FP = path.join(TEST_DIR, "data/test_standards.yml")
|
|
24
|
+
|
|
25
|
+
NESTED_STDS_DICT = {
|
|
26
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
27
|
+
# Top host level (host_associated in this example) has
|
|
28
|
+
# *complete* definitions for all metadata fields it includes.
|
|
29
|
+
# Lower levels include only the elements of the definition that
|
|
30
|
+
# are different from the parent level (but if a field is NEW at
|
|
31
|
+
# a lower level, the lower level must include the complete
|
|
32
|
+
# definition for that field).
|
|
33
|
+
"host_associated": {
|
|
34
|
+
DEFAULT_KEY: "not provided",
|
|
35
|
+
METADATA_FIELDS_KEY: {
|
|
36
|
+
# not overridden
|
|
37
|
+
"country": {
|
|
38
|
+
"allowed": ["USA"],
|
|
39
|
+
DEFAULT_KEY: "USA",
|
|
40
|
+
"empty": False,
|
|
41
|
+
"is_phi": False,
|
|
42
|
+
"required": True,
|
|
43
|
+
"type": "string"
|
|
44
|
+
},
|
|
45
|
+
# overridden in stds same level host + sample type,
|
|
46
|
+
# again in stds lower host, and *again* in
|
|
47
|
+
# stds lower host + sample type
|
|
48
|
+
"description": {
|
|
49
|
+
"allowed": ["host associated"],
|
|
50
|
+
DEFAULT_KEY: "host associated",
|
|
51
|
+
"empty": False,
|
|
52
|
+
"is_phi": False,
|
|
53
|
+
"required": True,
|
|
54
|
+
"type": "string"
|
|
55
|
+
},
|
|
56
|
+
# overridden in stds lower host
|
|
57
|
+
"dna_extracted": {
|
|
58
|
+
"allowed": ["true", "false"],
|
|
59
|
+
DEFAULT_KEY: "true",
|
|
60
|
+
"empty": False,
|
|
61
|
+
"is_phi": False,
|
|
62
|
+
"required": True,
|
|
63
|
+
"type": "string"
|
|
64
|
+
},
|
|
65
|
+
# overridden in stds lower host + sample type
|
|
66
|
+
"elevation": {
|
|
67
|
+
"anyof": [
|
|
68
|
+
{
|
|
69
|
+
"allowed": [
|
|
70
|
+
"not collected",
|
|
71
|
+
"not provided",
|
|
72
|
+
"restricted access"],
|
|
73
|
+
"type": "string"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"min": -413.0,
|
|
77
|
+
"type": "number"
|
|
78
|
+
}],
|
|
79
|
+
"empty": False,
|
|
80
|
+
"is_phi": False,
|
|
81
|
+
"required": True
|
|
82
|
+
},
|
|
83
|
+
# overridden in STUDY for this host
|
|
84
|
+
"geo_loc_name": {
|
|
85
|
+
"empty": False,
|
|
86
|
+
"is_phi": False,
|
|
87
|
+
"required": True,
|
|
88
|
+
"type": "string"
|
|
89
|
+
},
|
|
90
|
+
# overridden in STUDY for this host
|
|
91
|
+
"host_type": {
|
|
92
|
+
"allowed": ["human", "animal", "plant"],
|
|
93
|
+
"empty": False,
|
|
94
|
+
"is_phi": False,
|
|
95
|
+
"required": True,
|
|
96
|
+
"type": "string"
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
100
|
+
"fe": {
|
|
101
|
+
"alias": "stool",
|
|
102
|
+
},
|
|
103
|
+
"stool": {
|
|
104
|
+
METADATA_FIELDS_KEY: {
|
|
105
|
+
# overrides stds host,
|
|
106
|
+
# overridden in stds lower host, and
|
|
107
|
+
# in stds lower host + sample type
|
|
108
|
+
"description": {
|
|
109
|
+
"allowed": ["host associated stool"],
|
|
110
|
+
DEFAULT_KEY: "host associated stool",
|
|
111
|
+
"type": "string"
|
|
112
|
+
},
|
|
113
|
+
# overridden in STUDY for this host + sample type
|
|
114
|
+
"physical_specimen_location": {
|
|
115
|
+
"allowed": ["UCSD"],
|
|
116
|
+
DEFAULT_KEY: "UCSD",
|
|
117
|
+
"empty": False,
|
|
118
|
+
"is_phi": False,
|
|
119
|
+
"required": True,
|
|
120
|
+
"type": "string"
|
|
121
|
+
},
|
|
122
|
+
# overridden in stds lower host + sample type
|
|
123
|
+
"physical_specimen_remaining": {
|
|
124
|
+
"allowed": ["true", "false"],
|
|
125
|
+
DEFAULT_KEY: "true",
|
|
126
|
+
"empty": False,
|
|
127
|
+
"is_phi": False,
|
|
128
|
+
"required": True,
|
|
129
|
+
"type": "string"
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
},
|
|
134
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
135
|
+
"human": {
|
|
136
|
+
METADATA_FIELDS_KEY: {
|
|
137
|
+
# overrides stds parent host
|
|
138
|
+
"description": {
|
|
139
|
+
"allowed": ["human"],
|
|
140
|
+
DEFAULT_KEY: "human",
|
|
141
|
+
"type": "string"
|
|
142
|
+
},
|
|
143
|
+
# overrides stds parent host
|
|
144
|
+
# BUT overridden in turn in STUDY for this host
|
|
145
|
+
"dna_extracted": {
|
|
146
|
+
"allowed": ["false"],
|
|
147
|
+
DEFAULT_KEY: "false",
|
|
148
|
+
"type": "string"
|
|
149
|
+
},
|
|
150
|
+
# overrides stds parent host
|
|
151
|
+
"host_type": {
|
|
152
|
+
"allowed": ["human"],
|
|
153
|
+
DEFAULT_KEY: "human",
|
|
154
|
+
"type": "string"
|
|
155
|
+
}
|
|
156
|
+
},
|
|
157
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
158
|
+
"stool": {
|
|
159
|
+
METADATA_FIELDS_KEY: {
|
|
160
|
+
# overrides stds parent host + sample type
|
|
161
|
+
"description": {
|
|
162
|
+
"allowed": ["human stool"],
|
|
163
|
+
DEFAULT_KEY: "human stool",
|
|
164
|
+
"type": "string"
|
|
165
|
+
},
|
|
166
|
+
# overrides stds parent host
|
|
167
|
+
"elevation": {
|
|
168
|
+
DEFAULT_KEY: 14,
|
|
169
|
+
"type": "number"
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
"dung": {
|
|
174
|
+
METADATA_FIELDS_KEY: {
|
|
175
|
+
# overrides stds parent host + sample type
|
|
176
|
+
"description": {
|
|
177
|
+
"allowed": ["human dung"],
|
|
178
|
+
DEFAULT_KEY: "human dung",
|
|
179
|
+
"type": "string"
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
},
|
|
184
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
185
|
+
"dude": {
|
|
186
|
+
METADATA_FIELDS_KEY: {
|
|
187
|
+
# overrides stds parent host
|
|
188
|
+
"host_type": {
|
|
189
|
+
"allowed": ["dude"],
|
|
190
|
+
DEFAULT_KEY: "dude",
|
|
191
|
+
"type": "string"
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
},
|
|
197
|
+
"control": {
|
|
198
|
+
METADATA_FIELDS_KEY: {
|
|
199
|
+
# overrides stds parent host
|
|
200
|
+
"description": {
|
|
201
|
+
"allowed": ["control"],
|
|
202
|
+
DEFAULT_KEY: "control",
|
|
203
|
+
"type": "string"
|
|
204
|
+
},
|
|
205
|
+
# overrides stds parent host
|
|
206
|
+
"host_type": {
|
|
207
|
+
"allowed": ["control"],
|
|
208
|
+
DEFAULT_KEY: "control",
|
|
209
|
+
"type": "string"
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
FLAT_STUDY_DICT = {
|
|
219
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
220
|
+
# FLAT list of host types
|
|
221
|
+
"host_associated": {
|
|
222
|
+
METADATA_FIELDS_KEY: {
|
|
223
|
+
# override of standard for this host type
|
|
224
|
+
"geo_loc_name": {
|
|
225
|
+
"allowed": ["USA:CA:San Diego"],
|
|
226
|
+
DEFAULT_KEY: "USA:CA:San Diego",
|
|
227
|
+
"type": "string"
|
|
228
|
+
},
|
|
229
|
+
# note: this overrides the standard for this host type
|
|
230
|
+
# BUT the std lower host type overrides this,
|
|
231
|
+
# and the lowest (most specific) directive wins,
|
|
232
|
+
# so this will NOT be included in output
|
|
233
|
+
"host_type": {
|
|
234
|
+
"allowed": ["human", "non-human"],
|
|
235
|
+
"type": "string"
|
|
236
|
+
},
|
|
237
|
+
},
|
|
238
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
239
|
+
"stool": {
|
|
240
|
+
METADATA_FIELDS_KEY: {
|
|
241
|
+
# override of standard for this
|
|
242
|
+
# host + sample type
|
|
243
|
+
"physical_specimen_location": {
|
|
244
|
+
"allowed": ["UCSDST"],
|
|
245
|
+
DEFAULT_KEY: "UCSDST",
|
|
246
|
+
"type": "string"
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
},
|
|
252
|
+
"human": {
|
|
253
|
+
DEFAULT_KEY: "not collected",
|
|
254
|
+
METADATA_FIELDS_KEY: {
|
|
255
|
+
# overrides std parent host type
|
|
256
|
+
"dna_extracted": {
|
|
257
|
+
"allowed": ["true"],
|
|
258
|
+
DEFAULT_KEY: "true",
|
|
259
|
+
"type": "string"
|
|
260
|
+
},
|
|
261
|
+
},
|
|
262
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
263
|
+
"feces": {
|
|
264
|
+
"alias": "stool"
|
|
265
|
+
},
|
|
266
|
+
"stool": {
|
|
267
|
+
METADATA_FIELDS_KEY: {
|
|
268
|
+
# override of std parent
|
|
269
|
+
# host + sample type
|
|
270
|
+
"physical_specimen_remaining": {
|
|
271
|
+
"allowed": ["false"],
|
|
272
|
+
DEFAULT_KEY: "false",
|
|
273
|
+
"type": "string"
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
},
|
|
277
|
+
"dung": {
|
|
278
|
+
"base_type": "stool",
|
|
279
|
+
METADATA_FIELDS_KEY: {
|
|
280
|
+
# overrides stds parent host + sample type
|
|
281
|
+
"physical_specimen_location": {
|
|
282
|
+
"allowed": ["FIELD"],
|
|
283
|
+
DEFAULT_KEY: "FIELD",
|
|
284
|
+
"type": "string"
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
},
|
|
288
|
+
"f": {
|
|
289
|
+
"base_type": "stool"
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
NESTED_STDS_W_STUDY_DICT = {
|
|
297
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
298
|
+
# Top host level (host_associated in this example) has
|
|
299
|
+
# *complete* definitions for all metadata fields it includes.
|
|
300
|
+
# Lower levels include only the elements of the definition that
|
|
301
|
+
# are different from the parent level (but if a field is NEW at
|
|
302
|
+
# a lower level, the lower level must include the complete
|
|
303
|
+
# definition for that field).
|
|
304
|
+
"host_associated": {
|
|
305
|
+
DEFAULT_KEY: "not provided",
|
|
306
|
+
METADATA_FIELDS_KEY: {
|
|
307
|
+
# not overridden
|
|
308
|
+
"country": {
|
|
309
|
+
"allowed": ["USA"],
|
|
310
|
+
DEFAULT_KEY: "USA",
|
|
311
|
+
"empty": False,
|
|
312
|
+
"is_phi": False,
|
|
313
|
+
"required": True,
|
|
314
|
+
"type": "string"
|
|
315
|
+
},
|
|
316
|
+
# overridden in stds same level host + sample type,
|
|
317
|
+
# again in stds lower host, and *again* in
|
|
318
|
+
# stds lower host + sample type
|
|
319
|
+
"description": {
|
|
320
|
+
"allowed": ["host associated"],
|
|
321
|
+
DEFAULT_KEY: "host associated",
|
|
322
|
+
"empty": False,
|
|
323
|
+
"is_phi": False,
|
|
324
|
+
"required": True,
|
|
325
|
+
"type": "string"
|
|
326
|
+
},
|
|
327
|
+
# overridden in stds lower host
|
|
328
|
+
"dna_extracted": {
|
|
329
|
+
"allowed": ["true", "false"],
|
|
330
|
+
DEFAULT_KEY: "true",
|
|
331
|
+
"empty": False,
|
|
332
|
+
"is_phi": False,
|
|
333
|
+
"required": True,
|
|
334
|
+
"type": "string"
|
|
335
|
+
},
|
|
336
|
+
# overridden in stds lower host + sample type
|
|
337
|
+
"elevation": {
|
|
338
|
+
"anyof": [
|
|
339
|
+
{
|
|
340
|
+
"allowed": [
|
|
341
|
+
"not collected",
|
|
342
|
+
"not provided",
|
|
343
|
+
"restricted access"],
|
|
344
|
+
"type": "string"
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
"min": -413.0,
|
|
348
|
+
"type": "number"
|
|
349
|
+
}],
|
|
350
|
+
"empty": False,
|
|
351
|
+
"is_phi": False,
|
|
352
|
+
"required": True
|
|
353
|
+
},
|
|
354
|
+
# not overridden (NB: comes from study)
|
|
355
|
+
"geo_loc_name": {
|
|
356
|
+
"allowed": ["USA:CA:San Diego"],
|
|
357
|
+
DEFAULT_KEY: "USA:CA:San Diego",
|
|
358
|
+
"empty": False,
|
|
359
|
+
"is_phi": False,
|
|
360
|
+
"required": True,
|
|
361
|
+
"type": "string"
|
|
362
|
+
},
|
|
363
|
+
# overridden in stds lower host
|
|
364
|
+
# (NB: comes from study)
|
|
365
|
+
"host_type": {
|
|
366
|
+
"allowed": ["human", "non-human"],
|
|
367
|
+
"empty": False,
|
|
368
|
+
"is_phi": False,
|
|
369
|
+
"required": True,
|
|
370
|
+
"type": "string"
|
|
371
|
+
}
|
|
372
|
+
},
|
|
373
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
374
|
+
"fe": {
|
|
375
|
+
"alias": "stool",
|
|
376
|
+
},
|
|
377
|
+
"stool": {
|
|
378
|
+
METADATA_FIELDS_KEY: {
|
|
379
|
+
# overrides stds host,
|
|
380
|
+
# overridden in stds lower host, and
|
|
381
|
+
# in stds lower host + sample type
|
|
382
|
+
"description": {
|
|
383
|
+
"allowed": ["host associated stool"],
|
|
384
|
+
DEFAULT_KEY: "host associated stool",
|
|
385
|
+
"type": "string"
|
|
386
|
+
},
|
|
387
|
+
# not overridden
|
|
388
|
+
# (NB: comes from study)
|
|
389
|
+
"physical_specimen_location": {
|
|
390
|
+
"allowed": ["UCSDST"],
|
|
391
|
+
DEFAULT_KEY: "UCSDST",
|
|
392
|
+
"empty": False,
|
|
393
|
+
"is_phi": False,
|
|
394
|
+
"required": True,
|
|
395
|
+
"type": "string"
|
|
396
|
+
},
|
|
397
|
+
# overridden in stds lower host + sample type
|
|
398
|
+
"physical_specimen_remaining": {
|
|
399
|
+
"allowed": ["true", "false"],
|
|
400
|
+
DEFAULT_KEY: "true",
|
|
401
|
+
"empty": False,
|
|
402
|
+
"is_phi": False,
|
|
403
|
+
"required": True,
|
|
404
|
+
"type": "string"
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
},
|
|
409
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
410
|
+
"human": {
|
|
411
|
+
DEFAULT_KEY: "not collected",
|
|
412
|
+
METADATA_FIELDS_KEY: {
|
|
413
|
+
# overrides stds parent host
|
|
414
|
+
"description": {
|
|
415
|
+
"allowed": ["human"],
|
|
416
|
+
DEFAULT_KEY: "human",
|
|
417
|
+
"type": "string"
|
|
418
|
+
},
|
|
419
|
+
# overrides stds parent host
|
|
420
|
+
# (NB: comes from study)
|
|
421
|
+
"dna_extracted": {
|
|
422
|
+
"allowed": ["true"],
|
|
423
|
+
DEFAULT_KEY: "true",
|
|
424
|
+
"type": "string"
|
|
425
|
+
},
|
|
426
|
+
# overrides stds parent host
|
|
427
|
+
"host_type": {
|
|
428
|
+
"allowed": ["human"],
|
|
429
|
+
DEFAULT_KEY: "human",
|
|
430
|
+
"type": "string"
|
|
431
|
+
}
|
|
432
|
+
},
|
|
433
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
434
|
+
"feces": {
|
|
435
|
+
"alias": "stool",
|
|
436
|
+
},
|
|
437
|
+
"stool": {
|
|
438
|
+
METADATA_FIELDS_KEY: {
|
|
439
|
+
# overrides stds parent host + sample type
|
|
440
|
+
"description": {
|
|
441
|
+
"allowed": ["human stool"],
|
|
442
|
+
DEFAULT_KEY: "human stool",
|
|
443
|
+
"type": "string"
|
|
444
|
+
},
|
|
445
|
+
# overrides stds parent host
|
|
446
|
+
"elevation": {
|
|
447
|
+
DEFAULT_KEY: 14,
|
|
448
|
+
"type": "number"
|
|
449
|
+
},
|
|
450
|
+
# overrides stds parent host + sample type
|
|
451
|
+
# (NB: comes from study)
|
|
452
|
+
"physical_specimen_remaining": {
|
|
453
|
+
"allowed": ["false"],
|
|
454
|
+
DEFAULT_KEY: "false",
|
|
455
|
+
"type": "string"
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
},
|
|
459
|
+
"dung": {
|
|
460
|
+
"base_type": "stool",
|
|
461
|
+
METADATA_FIELDS_KEY: {
|
|
462
|
+
# overrides stds parent host + sample type
|
|
463
|
+
"description": {
|
|
464
|
+
"allowed": ["human dung"],
|
|
465
|
+
DEFAULT_KEY: "human dung",
|
|
466
|
+
"type": "string"
|
|
467
|
+
},
|
|
468
|
+
# overrides stds parent host + sample type
|
|
469
|
+
"physical_specimen_location": {
|
|
470
|
+
"allowed": ["FIELD"],
|
|
471
|
+
DEFAULT_KEY: "FIELD",
|
|
472
|
+
"type": "string"
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
},
|
|
476
|
+
"f": {
|
|
477
|
+
"base_type": "stool"
|
|
478
|
+
}
|
|
479
|
+
},
|
|
480
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
481
|
+
"dude": {
|
|
482
|
+
METADATA_FIELDS_KEY: {
|
|
483
|
+
# overrides stds parent host
|
|
484
|
+
"host_type": {
|
|
485
|
+
"allowed": ["dude"],
|
|
486
|
+
DEFAULT_KEY: "dude",
|
|
487
|
+
"type": "string"
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
},
|
|
493
|
+
"control": {
|
|
494
|
+
METADATA_FIELDS_KEY: {
|
|
495
|
+
# overrides stds parent host
|
|
496
|
+
"description": {
|
|
497
|
+
"allowed": ["control"],
|
|
498
|
+
DEFAULT_KEY: "control",
|
|
499
|
+
"type": "string"
|
|
500
|
+
},
|
|
501
|
+
# overrides stds parent host
|
|
502
|
+
"host_type": {
|
|
503
|
+
"allowed": ["control"],
|
|
504
|
+
DEFAULT_KEY: "control",
|
|
505
|
+
"type": "string"
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
FLATTENED_STDS_W_STUDY_DICT = {
|
|
515
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
516
|
+
"host_associated": {
|
|
517
|
+
DEFAULT_KEY: "not provided",
|
|
518
|
+
METADATA_FIELDS_KEY: {
|
|
519
|
+
# from stds same level host
|
|
520
|
+
"country": {
|
|
521
|
+
"allowed": ["USA"],
|
|
522
|
+
DEFAULT_KEY: "USA",
|
|
523
|
+
"empty": False,
|
|
524
|
+
"is_phi": False,
|
|
525
|
+
"required": True,
|
|
526
|
+
"type": "string"
|
|
527
|
+
},
|
|
528
|
+
# from stds same level host
|
|
529
|
+
"description": {
|
|
530
|
+
"allowed": ["host associated"],
|
|
531
|
+
DEFAULT_KEY: "host associated",
|
|
532
|
+
"empty": False,
|
|
533
|
+
"is_phi": False,
|
|
534
|
+
"required": True,
|
|
535
|
+
"type": "string"
|
|
536
|
+
},
|
|
537
|
+
# from stds same level host
|
|
538
|
+
"dna_extracted": {
|
|
539
|
+
"allowed": ["true", "false"],
|
|
540
|
+
DEFAULT_KEY: "true",
|
|
541
|
+
"empty": False,
|
|
542
|
+
"is_phi": False,
|
|
543
|
+
"required": True,
|
|
544
|
+
"type": "string"
|
|
545
|
+
},
|
|
546
|
+
# from stds same level host
|
|
547
|
+
"elevation": {
|
|
548
|
+
"anyof": [
|
|
549
|
+
{
|
|
550
|
+
"allowed": [
|
|
551
|
+
"not collected",
|
|
552
|
+
"not provided",
|
|
553
|
+
"restricted access"],
|
|
554
|
+
"type": "string"
|
|
555
|
+
},
|
|
556
|
+
{
|
|
557
|
+
"min": -413.0,
|
|
558
|
+
"type": "number"
|
|
559
|
+
}],
|
|
560
|
+
"empty": False,
|
|
561
|
+
"is_phi": False,
|
|
562
|
+
"required": True
|
|
563
|
+
},
|
|
564
|
+
# from stds same level host
|
|
565
|
+
"geo_loc_name": {
|
|
566
|
+
"allowed": ["USA:CA:San Diego"],
|
|
567
|
+
DEFAULT_KEY: "USA:CA:San Diego",
|
|
568
|
+
"empty": False,
|
|
569
|
+
"is_phi": False,
|
|
570
|
+
"required": True,
|
|
571
|
+
"type": "string"
|
|
572
|
+
},
|
|
573
|
+
# overridden in stds lower host
|
|
574
|
+
"host_type": {
|
|
575
|
+
"allowed": ["human", "non-human"],
|
|
576
|
+
"empty": False,
|
|
577
|
+
"is_phi": False,
|
|
578
|
+
"required": True,
|
|
579
|
+
"type": "string"
|
|
580
|
+
}
|
|
581
|
+
},
|
|
582
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
583
|
+
"fe": {
|
|
584
|
+
"alias": "stool"
|
|
585
|
+
},
|
|
586
|
+
"stool": {
|
|
587
|
+
METADATA_FIELDS_KEY: {
|
|
588
|
+
# from stds same level host + sample type
|
|
589
|
+
"description": {
|
|
590
|
+
"allowed": ["host associated stool"],
|
|
591
|
+
DEFAULT_KEY: "host associated stool",
|
|
592
|
+
"type": "string"
|
|
593
|
+
},
|
|
594
|
+
# from stds same level host + sample type
|
|
595
|
+
# (NB: comes from study)
|
|
596
|
+
"physical_specimen_location": {
|
|
597
|
+
"allowed": ["UCSDST"],
|
|
598
|
+
DEFAULT_KEY: "UCSDST",
|
|
599
|
+
"empty": False,
|
|
600
|
+
"is_phi": False,
|
|
601
|
+
"required": True,
|
|
602
|
+
"type": "string"
|
|
603
|
+
},
|
|
604
|
+
# from stds same level host + sample type
|
|
605
|
+
# (NB: comes from study)
|
|
606
|
+
"physical_specimen_remaining": {
|
|
607
|
+
"allowed": ["true", "false"],
|
|
608
|
+
DEFAULT_KEY: "true",
|
|
609
|
+
"empty": False,
|
|
610
|
+
"is_phi": False,
|
|
611
|
+
"required": True,
|
|
612
|
+
"type": "string"
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
},
|
|
618
|
+
"control": {
|
|
619
|
+
DEFAULT_KEY: "not provided",
|
|
620
|
+
METADATA_FIELDS_KEY: {
|
|
621
|
+
# from stds same level host
|
|
622
|
+
"country": {
|
|
623
|
+
"allowed": ["USA"],
|
|
624
|
+
DEFAULT_KEY: "USA",
|
|
625
|
+
"empty": False,
|
|
626
|
+
"is_phi": False,
|
|
627
|
+
"required": True,
|
|
628
|
+
"type": "string"
|
|
629
|
+
},
|
|
630
|
+
# from stds same level host
|
|
631
|
+
"description": {
|
|
632
|
+
"allowed": ["control"],
|
|
633
|
+
DEFAULT_KEY: "control",
|
|
634
|
+
"empty": False,
|
|
635
|
+
"is_phi": False,
|
|
636
|
+
"required": True,
|
|
637
|
+
"type": "string"
|
|
638
|
+
},
|
|
639
|
+
# from stds same level host
|
|
640
|
+
"dna_extracted": {
|
|
641
|
+
"allowed": ["true", "false"],
|
|
642
|
+
DEFAULT_KEY: "true",
|
|
643
|
+
"empty": False,
|
|
644
|
+
"is_phi": False,
|
|
645
|
+
"required": True,
|
|
646
|
+
"type": "string"
|
|
647
|
+
},
|
|
648
|
+
# from stds same level host
|
|
649
|
+
"elevation": {
|
|
650
|
+
"anyof": [
|
|
651
|
+
{
|
|
652
|
+
"allowed": [
|
|
653
|
+
"not collected",
|
|
654
|
+
"not provided",
|
|
655
|
+
"restricted access"],
|
|
656
|
+
"type": "string"
|
|
657
|
+
},
|
|
658
|
+
{
|
|
659
|
+
"min": -413.0,
|
|
660
|
+
"type": "number"
|
|
661
|
+
}],
|
|
662
|
+
"empty": False,
|
|
663
|
+
"is_phi": False,
|
|
664
|
+
"required": True
|
|
665
|
+
},
|
|
666
|
+
# from stds same level host
|
|
667
|
+
"geo_loc_name": {
|
|
668
|
+
"allowed": ["USA:CA:San Diego"],
|
|
669
|
+
DEFAULT_KEY: "USA:CA:San Diego",
|
|
670
|
+
"empty": False,
|
|
671
|
+
"is_phi": False,
|
|
672
|
+
"required": True,
|
|
673
|
+
"type": "string"
|
|
674
|
+
},
|
|
675
|
+
# overridden in stds lower host
|
|
676
|
+
"host_type": {
|
|
677
|
+
"allowed": ["control"],
|
|
678
|
+
DEFAULT_KEY: "control",
|
|
679
|
+
"empty": False,
|
|
680
|
+
"is_phi": False,
|
|
681
|
+
"required": True,
|
|
682
|
+
"type": "string"
|
|
683
|
+
}
|
|
684
|
+
},
|
|
685
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
686
|
+
"fe": {
|
|
687
|
+
"alias": "stool"
|
|
688
|
+
},
|
|
689
|
+
"stool": {
|
|
690
|
+
METADATA_FIELDS_KEY: {
|
|
691
|
+
# from stds same level host + sample type
|
|
692
|
+
"description": {
|
|
693
|
+
"allowed": ["host associated stool"],
|
|
694
|
+
DEFAULT_KEY: "host associated stool",
|
|
695
|
+
"type": "string"
|
|
696
|
+
},
|
|
697
|
+
# from stds same level host + sample type
|
|
698
|
+
# (NB: comes from study)
|
|
699
|
+
"physical_specimen_location": {
|
|
700
|
+
"allowed": ["UCSDST"],
|
|
701
|
+
DEFAULT_KEY: "UCSDST",
|
|
702
|
+
"empty": False,
|
|
703
|
+
"is_phi": False,
|
|
704
|
+
"required": True,
|
|
705
|
+
"type": "string"
|
|
706
|
+
},
|
|
707
|
+
# from stds same level host + sample type
|
|
708
|
+
# (NB: comes from study)
|
|
709
|
+
"physical_specimen_remaining": {
|
|
710
|
+
"allowed": ["true", "false"],
|
|
711
|
+
DEFAULT_KEY: "true",
|
|
712
|
+
"empty": False,
|
|
713
|
+
"is_phi": False,
|
|
714
|
+
"required": True,
|
|
715
|
+
"type": "string"
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
},
|
|
721
|
+
"human": {
|
|
722
|
+
DEFAULT_KEY: "not collected",
|
|
723
|
+
METADATA_FIELDS_KEY: {
|
|
724
|
+
# from stds parent host
|
|
725
|
+
"country": {
|
|
726
|
+
"allowed": ["USA"],
|
|
727
|
+
DEFAULT_KEY: "USA",
|
|
728
|
+
"empty": False,
|
|
729
|
+
"is_phi": False,
|
|
730
|
+
"required": True,
|
|
731
|
+
"type": "string"
|
|
732
|
+
},
|
|
733
|
+
# from stds same level host
|
|
734
|
+
"description": {
|
|
735
|
+
"allowed": ["human"],
|
|
736
|
+
DEFAULT_KEY: "human",
|
|
737
|
+
"empty": False,
|
|
738
|
+
"is_phi": False,
|
|
739
|
+
"required": True,
|
|
740
|
+
"type": "string"
|
|
741
|
+
},
|
|
742
|
+
# from stds same level host
|
|
743
|
+
# (NB: comes from study)
|
|
744
|
+
"dna_extracted": {
|
|
745
|
+
"allowed": ["true"],
|
|
746
|
+
DEFAULT_KEY: "true",
|
|
747
|
+
"empty": False,
|
|
748
|
+
"is_phi": False,
|
|
749
|
+
"required": True,
|
|
750
|
+
"type": "string"
|
|
751
|
+
},
|
|
752
|
+
# from stds parent host
|
|
753
|
+
"elevation": {
|
|
754
|
+
"anyof": [
|
|
755
|
+
{
|
|
756
|
+
"allowed": [
|
|
757
|
+
"not collected",
|
|
758
|
+
"not provided",
|
|
759
|
+
"restricted access"],
|
|
760
|
+
"type": "string"
|
|
761
|
+
},
|
|
762
|
+
{
|
|
763
|
+
"min": -413.0,
|
|
764
|
+
"type": "number"
|
|
765
|
+
}],
|
|
766
|
+
"empty": False,
|
|
767
|
+
"is_phi": False,
|
|
768
|
+
"required": True
|
|
769
|
+
},
|
|
770
|
+
# from stds parent host
|
|
771
|
+
"geo_loc_name": {
|
|
772
|
+
"allowed": ["USA:CA:San Diego"],
|
|
773
|
+
DEFAULT_KEY: "USA:CA:San Diego",
|
|
774
|
+
"empty": False,
|
|
775
|
+
"is_phi": False,
|
|
776
|
+
"required": True,
|
|
777
|
+
"type": "string"
|
|
778
|
+
},
|
|
779
|
+
# from stds same level host
|
|
780
|
+
"host_type": {
|
|
781
|
+
"allowed": ["human"],
|
|
782
|
+
DEFAULT_KEY: "human",
|
|
783
|
+
"empty": False,
|
|
784
|
+
"is_phi": False,
|
|
785
|
+
"required": True,
|
|
786
|
+
"type": "string"
|
|
787
|
+
}
|
|
788
|
+
},
|
|
789
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
790
|
+
"dung": {
|
|
791
|
+
"base_type": "stool",
|
|
792
|
+
METADATA_FIELDS_KEY: {
|
|
793
|
+
# overrides stds parent host + sample type
|
|
794
|
+
"description": {
|
|
795
|
+
"allowed": ["human dung"],
|
|
796
|
+
DEFAULT_KEY: "human dung",
|
|
797
|
+
"type": "string"
|
|
798
|
+
},
|
|
799
|
+
# overrides stds parent host + sample type
|
|
800
|
+
"physical_specimen_location": {
|
|
801
|
+
"allowed": ["FIELD"],
|
|
802
|
+
DEFAULT_KEY: "FIELD",
|
|
803
|
+
"type": "string"
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
},
|
|
807
|
+
"f": {
|
|
808
|
+
"base_type": "stool"
|
|
809
|
+
},
|
|
810
|
+
"fe": {
|
|
811
|
+
"alias": "stool"
|
|
812
|
+
},
|
|
813
|
+
"feces": {
|
|
814
|
+
"alias": "stool"
|
|
815
|
+
},
|
|
816
|
+
"stool": {
|
|
817
|
+
METADATA_FIELDS_KEY: {
|
|
818
|
+
# from stds same level host + sample type
|
|
819
|
+
"description": {
|
|
820
|
+
"allowed": ["human stool"],
|
|
821
|
+
DEFAULT_KEY: "human stool",
|
|
822
|
+
"type": "string"
|
|
823
|
+
},
|
|
824
|
+
# from stds same level host + sample type
|
|
825
|
+
"elevation": {
|
|
826
|
+
DEFAULT_KEY: 14,
|
|
827
|
+
"type": "number"
|
|
828
|
+
},
|
|
829
|
+
# from stds parent level host + sample type
|
|
830
|
+
"physical_specimen_location": {
|
|
831
|
+
"allowed": ["UCSDST"],
|
|
832
|
+
DEFAULT_KEY: "UCSDST",
|
|
833
|
+
"empty": False,
|
|
834
|
+
"is_phi": False,
|
|
835
|
+
"required": True,
|
|
836
|
+
"type": "string"
|
|
837
|
+
},
|
|
838
|
+
# from stds same level host + sample type
|
|
839
|
+
"physical_specimen_remaining": {
|
|
840
|
+
"allowed": ["false"],
|
|
841
|
+
DEFAULT_KEY: "false",
|
|
842
|
+
"empty": False,
|
|
843
|
+
"is_phi": False,
|
|
844
|
+
"required": True,
|
|
845
|
+
"type": "string"
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
},
|
|
851
|
+
"dude": {
|
|
852
|
+
DEFAULT_KEY: "not collected",
|
|
853
|
+
METADATA_FIELDS_KEY: {
|
|
854
|
+
# from stds parent host
|
|
855
|
+
"country": {
|
|
856
|
+
"allowed": ["USA"],
|
|
857
|
+
DEFAULT_KEY: "USA",
|
|
858
|
+
"empty": False,
|
|
859
|
+
"is_phi": False,
|
|
860
|
+
"required": True,
|
|
861
|
+
"type": "string"
|
|
862
|
+
},
|
|
863
|
+
# from stds same level host
|
|
864
|
+
"description": {
|
|
865
|
+
"allowed": ["human"],
|
|
866
|
+
DEFAULT_KEY: "human",
|
|
867
|
+
"empty": False,
|
|
868
|
+
"is_phi": False,
|
|
869
|
+
"required": True,
|
|
870
|
+
"type": "string"
|
|
871
|
+
},
|
|
872
|
+
# from stds same level host
|
|
873
|
+
# (NB: comes from study)
|
|
874
|
+
"dna_extracted": {
|
|
875
|
+
"allowed": ["true"],
|
|
876
|
+
DEFAULT_KEY: "true",
|
|
877
|
+
"empty": False,
|
|
878
|
+
"is_phi": False,
|
|
879
|
+
"required": True,
|
|
880
|
+
"type": "string"
|
|
881
|
+
},
|
|
882
|
+
# from stds parent host
|
|
883
|
+
"elevation": {
|
|
884
|
+
"anyof": [
|
|
885
|
+
{
|
|
886
|
+
"allowed": [
|
|
887
|
+
"not collected",
|
|
888
|
+
"not provided",
|
|
889
|
+
"restricted access"],
|
|
890
|
+
"type": "string"
|
|
891
|
+
},
|
|
892
|
+
{
|
|
893
|
+
"min": -413.0,
|
|
894
|
+
"type": "number"
|
|
895
|
+
}],
|
|
896
|
+
"empty": False,
|
|
897
|
+
"is_phi": False,
|
|
898
|
+
"required": True
|
|
899
|
+
},
|
|
900
|
+
# from stds parent host
|
|
901
|
+
"geo_loc_name": {
|
|
902
|
+
"allowed": ["USA:CA:San Diego"],
|
|
903
|
+
DEFAULT_KEY: "USA:CA:San Diego",
|
|
904
|
+
"empty": False,
|
|
905
|
+
"is_phi": False,
|
|
906
|
+
"required": True,
|
|
907
|
+
"type": "string"
|
|
908
|
+
},
|
|
909
|
+
# from stds same level host
|
|
910
|
+
"host_type": {
|
|
911
|
+
"allowed": ["dude"],
|
|
912
|
+
DEFAULT_KEY: "dude",
|
|
913
|
+
"empty": False,
|
|
914
|
+
"is_phi": False,
|
|
915
|
+
"required": True,
|
|
916
|
+
"type": "string"
|
|
917
|
+
}
|
|
918
|
+
},
|
|
919
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
920
|
+
"dung": {
|
|
921
|
+
"base_type": "stool",
|
|
922
|
+
METADATA_FIELDS_KEY: {
|
|
923
|
+
# overrides stds parent host + sample type
|
|
924
|
+
"description": {
|
|
925
|
+
"allowed": ["human dung"],
|
|
926
|
+
DEFAULT_KEY: "human dung",
|
|
927
|
+
"type": "string"
|
|
928
|
+
},
|
|
929
|
+
# overrides stds parent host + sample type
|
|
930
|
+
"physical_specimen_location": {
|
|
931
|
+
"allowed": ["FIELD"],
|
|
932
|
+
DEFAULT_KEY: "FIELD",
|
|
933
|
+
"type": "string"
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
},
|
|
937
|
+
"f": {
|
|
938
|
+
"base_type": "stool"
|
|
939
|
+
},
|
|
940
|
+
"fe": {
|
|
941
|
+
"alias": "stool"
|
|
942
|
+
},
|
|
943
|
+
"feces": {
|
|
944
|
+
"alias": "stool"
|
|
945
|
+
},
|
|
946
|
+
"stool": {
|
|
947
|
+
METADATA_FIELDS_KEY: {
|
|
948
|
+
# from stds same level host + sample type
|
|
949
|
+
"description": {
|
|
950
|
+
"allowed": ["human stool"],
|
|
951
|
+
DEFAULT_KEY: "human stool",
|
|
952
|
+
"type": "string"
|
|
953
|
+
},
|
|
954
|
+
# from stds same level host + sample type
|
|
955
|
+
"elevation": {
|
|
956
|
+
DEFAULT_KEY: 14,
|
|
957
|
+
"type": "number"
|
|
958
|
+
},
|
|
959
|
+
# from stds parent level host + sample type
|
|
960
|
+
"physical_specimen_location": {
|
|
961
|
+
"allowed": ["UCSDST"],
|
|
962
|
+
DEFAULT_KEY: "UCSDST",
|
|
963
|
+
"empty": False,
|
|
964
|
+
"is_phi": False,
|
|
965
|
+
"required": True,
|
|
966
|
+
"type": "string"
|
|
967
|
+
},
|
|
968
|
+
# from stds same level host + sample type
|
|
969
|
+
"physical_specimen_remaining": {
|
|
970
|
+
"allowed": ["false"],
|
|
971
|
+
DEFAULT_KEY: "false",
|
|
972
|
+
"empty": False,
|
|
973
|
+
"is_phi": False,
|
|
974
|
+
"required": True,
|
|
975
|
+
"type": "string"
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
# Tests for combine_stds_and_study_config
|
|
985
|
+
|
|
986
|
+
TEST_DIR = path.dirname(__file__)
|
|
987
|
+
|
|
988
|
+
def test_combine_stds_and_study_config_empty_study(self):
|
|
989
|
+
"""Test combining with an empty study config dict uses only standards."""
|
|
990
|
+
study_config = {}
|
|
991
|
+
|
|
992
|
+
result = combine_stds_and_study_config(
|
|
993
|
+
study_config,
|
|
994
|
+
path.join(self.TEST_DIR, "data/test_config.yml"))
|
|
995
|
+
|
|
996
|
+
expected = {
|
|
997
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
998
|
+
"base": {
|
|
999
|
+
METADATA_FIELDS_KEY: {
|
|
1000
|
+
"sample_name": {
|
|
1001
|
+
TYPE_KEY: "string",
|
|
1002
|
+
"unique": True
|
|
1003
|
+
},
|
|
1004
|
+
"sample_type": {
|
|
1005
|
+
"empty": False,
|
|
1006
|
+
"is_phi": False
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
self.assertDictEqual(expected, result)
|
|
1014
|
+
|
|
1015
|
+
def test_combine_stds_and_study_config_with_study_specific_metadata(self):
|
|
1016
|
+
"""Test combining when study config has STUDY_SPECIFIC_METADATA_KEY section."""
|
|
1017
|
+
study_config = {
|
|
1018
|
+
STUDY_SPECIFIC_METADATA_KEY: {
|
|
1019
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1020
|
+
"base": {
|
|
1021
|
+
METADATA_FIELDS_KEY: {
|
|
1022
|
+
"new_field": {
|
|
1023
|
+
TYPE_KEY: "string",
|
|
1024
|
+
DEFAULT_KEY: "study_value"
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
result = combine_stds_and_study_config(
|
|
1033
|
+
study_config,
|
|
1034
|
+
path.join(self.TEST_DIR, "data/test_config.yml"))
|
|
1035
|
+
|
|
1036
|
+
expected = {
|
|
1037
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1038
|
+
"base": {
|
|
1039
|
+
METADATA_FIELDS_KEY: {
|
|
1040
|
+
"sample_name": {
|
|
1041
|
+
TYPE_KEY: "string",
|
|
1042
|
+
"unique": True
|
|
1043
|
+
},
|
|
1044
|
+
"sample_type": {
|
|
1045
|
+
"empty": False,
|
|
1046
|
+
"is_phi": False
|
|
1047
|
+
},
|
|
1048
|
+
"new_field": {
|
|
1049
|
+
TYPE_KEY: "string",
|
|
1050
|
+
DEFAULT_KEY: "study_value"
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
self.assertDictEqual(expected, result)
|
|
1058
|
+
|
|
1059
|
+
def test_combine_stds_and_study_config_study_overrides_standards(self):
|
|
1060
|
+
"""Test that study config values override standards values."""
|
|
1061
|
+
study_config = {
|
|
1062
|
+
STUDY_SPECIFIC_METADATA_KEY: {
|
|
1063
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1064
|
+
"base": {
|
|
1065
|
+
METADATA_FIELDS_KEY: {
|
|
1066
|
+
"sample_type": {
|
|
1067
|
+
"empty": True
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
result = combine_stds_and_study_config(
|
|
1076
|
+
study_config,
|
|
1077
|
+
path.join(self.TEST_DIR, "data/test_config.yml"))
|
|
1078
|
+
|
|
1079
|
+
expected = {
|
|
1080
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1081
|
+
"base": {
|
|
1082
|
+
METADATA_FIELDS_KEY: {
|
|
1083
|
+
"sample_name": {
|
|
1084
|
+
TYPE_KEY: "string",
|
|
1085
|
+
"unique": True
|
|
1086
|
+
},
|
|
1087
|
+
"sample_type": {
|
|
1088
|
+
"empty": True,
|
|
1089
|
+
"is_phi": False
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
self.assertDictEqual(expected, result)
|
|
1097
|
+
|
|
1098
|
+
def test__make_combined_stds_and_study_host_type_dicts(self):
|
|
1099
|
+
"""Test making a combined standards and study host type dictionary."""
|
|
1100
|
+
out_nested_dict = _make_combined_stds_and_study_host_type_dicts(
|
|
1101
|
+
self.FLAT_STUDY_DICT, self.NESTED_STDS_DICT, )
|
|
1102
|
+
|
|
1103
|
+
self.maxDiff = None
|
|
1104
|
+
self.assertDictEqual(
|
|
1105
|
+
self.NESTED_STDS_W_STUDY_DICT[HOST_TYPE_SPECIFIC_METADATA_KEY],
|
|
1106
|
+
out_nested_dict)
|
|
1107
|
+
|
|
1108
|
+
|
|
1109
|
+
def test_flatten_nested_stds_dict(self):
|
|
1110
|
+
"""Test flattening a nested standards dictionary."""
|
|
1111
|
+
out_flattened_dict = flatten_nested_stds_dict(
|
|
1112
|
+
self.NESTED_STDS_W_STUDY_DICT,
|
|
1113
|
+
None) # , None)
|
|
1114
|
+
|
|
1115
|
+
self.maxDiff = None
|
|
1116
|
+
self.assertDictEqual(
|
|
1117
|
+
self.FLATTENED_STDS_W_STUDY_DICT[HOST_TYPE_SPECIFIC_METADATA_KEY],
|
|
1118
|
+
out_flattened_dict)
|
|
1119
|
+
|
|
1120
|
+
def test_flatten_nested_stds_dict_empty_input(self):
|
|
1121
|
+
"""Test flattening an empty dictionary returns empty dict."""
|
|
1122
|
+
input_dict = {}
|
|
1123
|
+
|
|
1124
|
+
result = flatten_nested_stds_dict(input_dict, None)
|
|
1125
|
+
|
|
1126
|
+
self.assertDictEqual({}, result)
|
|
1127
|
+
|
|
1128
|
+
def test_flatten_nested_stds_dict_empty_host_types(self):
|
|
1129
|
+
"""Test flattening when HOST_TYPE_SPECIFIC_METADATA_KEY exists but is empty."""
|
|
1130
|
+
input_dict = {
|
|
1131
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {}
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
result = flatten_nested_stds_dict(input_dict, None)
|
|
1135
|
+
|
|
1136
|
+
self.assertDictEqual({}, result)
|
|
1137
|
+
|
|
1138
|
+
def test_flatten_nested_stds_dict_single_level(self):
|
|
1139
|
+
"""Test flattening a dictionary with only one host type level (no nesting)."""
|
|
1140
|
+
input_dict = {
|
|
1141
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1142
|
+
"host_a": {
|
|
1143
|
+
DEFAULT_KEY: "not provided",
|
|
1144
|
+
METADATA_FIELDS_KEY: {
|
|
1145
|
+
"field1": {
|
|
1146
|
+
TYPE_KEY: "string",
|
|
1147
|
+
DEFAULT_KEY: "value1"
|
|
1148
|
+
}
|
|
1149
|
+
},
|
|
1150
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1151
|
+
"sample1": {
|
|
1152
|
+
METADATA_FIELDS_KEY: {
|
|
1153
|
+
"sample_field": {TYPE_KEY: "string"}
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
# No HOST_TYPE_SPECIFIC_METADATA_KEY here (no nesting)
|
|
1158
|
+
},
|
|
1159
|
+
"host_b": {
|
|
1160
|
+
DEFAULT_KEY: "not collected",
|
|
1161
|
+
METADATA_FIELDS_KEY: {
|
|
1162
|
+
"field2": {
|
|
1163
|
+
TYPE_KEY: "integer"
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
expected = input_dict[HOST_TYPE_SPECIFIC_METADATA_KEY]
|
|
1171
|
+
|
|
1172
|
+
result = flatten_nested_stds_dict(input_dict, None)
|
|
1173
|
+
|
|
1174
|
+
self.assertDictEqual(expected, result)
|
|
1175
|
+
|
|
1176
|
+
def test_flatten_nested_stds_dict_deeply_nested(self):
|
|
1177
|
+
"""Test flattening with 4 levels of host type nesting.
|
|
1178
|
+
|
|
1179
|
+
Tests that metadata inheritance works correctly through multiple
|
|
1180
|
+
levels of nesting: level1 -> level2 -> level3 -> level4.
|
|
1181
|
+
"""
|
|
1182
|
+
input_dict = {
|
|
1183
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1184
|
+
"host_level1": {
|
|
1185
|
+
DEFAULT_KEY: "level1_default",
|
|
1186
|
+
METADATA_FIELDS_KEY: {
|
|
1187
|
+
"field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"}
|
|
1188
|
+
},
|
|
1189
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1190
|
+
"host_level2": {
|
|
1191
|
+
METADATA_FIELDS_KEY: {
|
|
1192
|
+
"field_b": {TYPE_KEY: "string", DEFAULT_KEY: "b2"}
|
|
1193
|
+
},
|
|
1194
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1195
|
+
"host_level3": {
|
|
1196
|
+
DEFAULT_KEY: "level3_default",
|
|
1197
|
+
METADATA_FIELDS_KEY: {
|
|
1198
|
+
"field_c": {TYPE_KEY: "string", DEFAULT_KEY: "c3"}
|
|
1199
|
+
},
|
|
1200
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1201
|
+
"host_level4": {
|
|
1202
|
+
METADATA_FIELDS_KEY: {
|
|
1203
|
+
"field_d": {TYPE_KEY: "string", DEFAULT_KEY: "d4"}
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
expected = {
|
|
1216
|
+
"host_level1": {
|
|
1217
|
+
DEFAULT_KEY: "level1_default",
|
|
1218
|
+
METADATA_FIELDS_KEY: {
|
|
1219
|
+
"field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"}
|
|
1220
|
+
}
|
|
1221
|
+
},
|
|
1222
|
+
"host_level2": {
|
|
1223
|
+
DEFAULT_KEY: "level1_default",
|
|
1224
|
+
METADATA_FIELDS_KEY: {
|
|
1225
|
+
"field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"},
|
|
1226
|
+
"field_b": {TYPE_KEY: "string", DEFAULT_KEY: "b2"}
|
|
1227
|
+
}
|
|
1228
|
+
},
|
|
1229
|
+
"host_level3": {
|
|
1230
|
+
DEFAULT_KEY: "level3_default",
|
|
1231
|
+
METADATA_FIELDS_KEY: {
|
|
1232
|
+
"field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"},
|
|
1233
|
+
"field_b": {TYPE_KEY: "string", DEFAULT_KEY: "b2"},
|
|
1234
|
+
"field_c": {TYPE_KEY: "string", DEFAULT_KEY: "c3"}
|
|
1235
|
+
}
|
|
1236
|
+
},
|
|
1237
|
+
"host_level4": {
|
|
1238
|
+
DEFAULT_KEY: "level3_default",
|
|
1239
|
+
METADATA_FIELDS_KEY: {
|
|
1240
|
+
"field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"},
|
|
1241
|
+
"field_b": {TYPE_KEY: "string", DEFAULT_KEY: "b2"},
|
|
1242
|
+
"field_c": {TYPE_KEY: "string", DEFAULT_KEY: "c3"},
|
|
1243
|
+
"field_d": {TYPE_KEY: "string", DEFAULT_KEY: "d4"}
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
result = flatten_nested_stds_dict(input_dict, None)
|
|
1249
|
+
|
|
1250
|
+
self.assertDictEqual(expected, result)
|
|
1251
|
+
|
|
1252
|
+
def test_flatten_nested_stds_dict_preserves_sample_types(self):
|
|
1253
|
+
"""Test that sample_type_specific_metadata is correctly inherited through nesting."""
|
|
1254
|
+
input_dict = {
|
|
1255
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1256
|
+
"parent_host": {
|
|
1257
|
+
DEFAULT_KEY: "not provided",
|
|
1258
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1259
|
+
"stool": {
|
|
1260
|
+
METADATA_FIELDS_KEY: {
|
|
1261
|
+
"parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"}
|
|
1262
|
+
}
|
|
1263
|
+
},
|
|
1264
|
+
"saliva": {
|
|
1265
|
+
ALIAS_KEY: "oral"
|
|
1266
|
+
}
|
|
1267
|
+
},
|
|
1268
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1269
|
+
"child_host": {
|
|
1270
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1271
|
+
"stool": {
|
|
1272
|
+
METADATA_FIELDS_KEY: {
|
|
1273
|
+
"child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"}
|
|
1274
|
+
}
|
|
1275
|
+
},
|
|
1276
|
+
"blood": {
|
|
1277
|
+
METADATA_FIELDS_KEY: {
|
|
1278
|
+
"blood_field": {TYPE_KEY: "string"}
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
expected = {
|
|
1289
|
+
"parent_host": {
|
|
1290
|
+
DEFAULT_KEY: "not provided",
|
|
1291
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1292
|
+
"stool": {
|
|
1293
|
+
METADATA_FIELDS_KEY: {
|
|
1294
|
+
"parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"}
|
|
1295
|
+
}
|
|
1296
|
+
},
|
|
1297
|
+
"saliva": {
|
|
1298
|
+
ALIAS_KEY: "oral"
|
|
1299
|
+
}
|
|
1300
|
+
}
|
|
1301
|
+
},
|
|
1302
|
+
"child_host": {
|
|
1303
|
+
DEFAULT_KEY: "not provided",
|
|
1304
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1305
|
+
"stool": {
|
|
1306
|
+
METADATA_FIELDS_KEY: {
|
|
1307
|
+
"parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
|
|
1308
|
+
"child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"}
|
|
1309
|
+
}
|
|
1310
|
+
},
|
|
1311
|
+
"saliva": {
|
|
1312
|
+
ALIAS_KEY: "oral"
|
|
1313
|
+
},
|
|
1314
|
+
"blood": {
|
|
1315
|
+
METADATA_FIELDS_KEY: {
|
|
1316
|
+
"blood_field": {TYPE_KEY: "string"}
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
}
|
|
1322
|
+
|
|
1323
|
+
result = flatten_nested_stds_dict(input_dict, None)
|
|
1324
|
+
|
|
1325
|
+
self.assertDictEqual(expected, result)
|
|
1326
|
+
|
|
1327
|
+
# Tests for update_wip_metadata_dict
|
|
1328
|
+
|
|
1329
|
+
def test_update_wip_metadata_dict_new_field(self):
|
|
1330
|
+
"""Test adding a completely new metadata field to wip dict."""
|
|
1331
|
+
wip = {}
|
|
1332
|
+
stds = {
|
|
1333
|
+
"field1": {
|
|
1334
|
+
TYPE_KEY: "string",
|
|
1335
|
+
ALLOWED_KEY: ["value1", "value2"]
|
|
1336
|
+
}
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
result = update_wip_metadata_dict(wip, stds)
|
|
1340
|
+
|
|
1341
|
+
expected = stds
|
|
1342
|
+
self.assertDictEqual(expected, result)
|
|
1343
|
+
|
|
1344
|
+
def test_update_wip_metadata_dict_update_existing_field(self):
|
|
1345
|
+
"""Test updating an existing field with additional properties."""
|
|
1346
|
+
wip = {
|
|
1347
|
+
"field1": {
|
|
1348
|
+
TYPE_KEY: "string"
|
|
1349
|
+
}
|
|
1350
|
+
}
|
|
1351
|
+
stds = {
|
|
1352
|
+
"field1": {
|
|
1353
|
+
DEFAULT_KEY: "default_value"
|
|
1354
|
+
}
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
result = update_wip_metadata_dict(wip, stds)
|
|
1358
|
+
|
|
1359
|
+
expected = {
|
|
1360
|
+
"field1": {
|
|
1361
|
+
TYPE_KEY: "string",
|
|
1362
|
+
DEFAULT_KEY: "default_value"
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
self.assertDictEqual(expected, result)
|
|
1366
|
+
|
|
1367
|
+
def test_update_wip_metadata_dict_allowed_replaces_anyof(self):
|
|
1368
|
+
"""Test that adding 'allowed' key removes existing 'anyof' key."""
|
|
1369
|
+
wip = {
|
|
1370
|
+
"field1": {
|
|
1371
|
+
ANYOF_KEY: [
|
|
1372
|
+
{TYPE_KEY: "string"},
|
|
1373
|
+
{TYPE_KEY: "number"}
|
|
1374
|
+
],
|
|
1375
|
+
"required": True
|
|
1376
|
+
}
|
|
1377
|
+
}
|
|
1378
|
+
stds = {
|
|
1379
|
+
"field1": {
|
|
1380
|
+
ALLOWED_KEY: ["value1", "value2"]
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
|
|
1384
|
+
result = update_wip_metadata_dict(wip, stds)
|
|
1385
|
+
|
|
1386
|
+
# anyof should be removed, allowed should be added, required preserved
|
|
1387
|
+
self.assertNotIn(ANYOF_KEY, result["field1"])
|
|
1388
|
+
self.assertIn(ALLOWED_KEY, result["field1"])
|
|
1389
|
+
self.assertEqual(["value1", "value2"], result["field1"][ALLOWED_KEY])
|
|
1390
|
+
self.assertTrue(result["field1"]["required"])
|
|
1391
|
+
|
|
1392
|
+
def test_update_wip_metadata_dict_anyof_replaces_allowed_and_type(self):
|
|
1393
|
+
"""Test that adding 'anyof' key removes existing 'allowed' and 'type' keys."""
|
|
1394
|
+
wip = {
|
|
1395
|
+
"field1": {
|
|
1396
|
+
ALLOWED_KEY: ["old_value"],
|
|
1397
|
+
TYPE_KEY: "string",
|
|
1398
|
+
"required": True
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
stds = {
|
|
1402
|
+
"field1": {
|
|
1403
|
+
ANYOF_KEY: [
|
|
1404
|
+
{TYPE_KEY: "string", ALLOWED_KEY: ["a", "b"]},
|
|
1405
|
+
{TYPE_KEY: "number", "min": 0}
|
|
1406
|
+
]
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1409
|
+
|
|
1410
|
+
result = update_wip_metadata_dict(wip, stds)
|
|
1411
|
+
|
|
1412
|
+
# allowed and type should be removed, anyof should be added, required preserved
|
|
1413
|
+
self.assertNotIn(ALLOWED_KEY, result["field1"])
|
|
1414
|
+
self.assertNotIn(TYPE_KEY, result["field1"])
|
|
1415
|
+
self.assertIn(ANYOF_KEY, result["field1"])
|
|
1416
|
+
self.assertTrue(result["field1"]["required"])
|
|
1417
|
+
|
|
1418
|
+
def test_update_wip_metadata_dict_preserves_unrelated_keys(self):
|
|
1419
|
+
"""Test that keys not in stds dict are preserved in wip dict."""
|
|
1420
|
+
wip = {
|
|
1421
|
+
"field1": {
|
|
1422
|
+
"required": True,
|
|
1423
|
+
"is_phi": False,
|
|
1424
|
+
"empty": False
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
stds = {
|
|
1428
|
+
"field1": {
|
|
1429
|
+
DEFAULT_KEY: "new_default"
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
result = update_wip_metadata_dict(wip, stds)
|
|
1434
|
+
|
|
1435
|
+
expected = {
|
|
1436
|
+
"field1": {
|
|
1437
|
+
"required": True,
|
|
1438
|
+
"is_phi": False,
|
|
1439
|
+
"empty": False,
|
|
1440
|
+
DEFAULT_KEY: "new_default"
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
self.assertDictEqual(expected, result)
|
|
1444
|
+
|
|
1445
|
+
def test_update_wip_metadata_dict_multiple_fields(self):
|
|
1446
|
+
"""Test updating multiple fields at once."""
|
|
1447
|
+
wip = {
|
|
1448
|
+
"field1": {TYPE_KEY: "string"},
|
|
1449
|
+
"field2": {TYPE_KEY: "integer"}
|
|
1450
|
+
}
|
|
1451
|
+
stds = {
|
|
1452
|
+
"field1": {DEFAULT_KEY: "default1"},
|
|
1453
|
+
"field2": {DEFAULT_KEY: 42},
|
|
1454
|
+
"field3": {TYPE_KEY: "boolean", DEFAULT_KEY: True}
|
|
1455
|
+
}
|
|
1456
|
+
|
|
1457
|
+
result = update_wip_metadata_dict(wip, stds)
|
|
1458
|
+
|
|
1459
|
+
expected = {
|
|
1460
|
+
"field1": {TYPE_KEY: "string", DEFAULT_KEY: "default1"},
|
|
1461
|
+
"field2": {TYPE_KEY: "integer", DEFAULT_KEY: 42},
|
|
1462
|
+
"field3": {TYPE_KEY: "boolean", DEFAULT_KEY: True}
|
|
1463
|
+
}
|
|
1464
|
+
self.assertDictEqual(expected, result)
|
|
1465
|
+
|
|
1466
|
+
def test_update_wip_metadata_dict_returns_same_object(self):
|
|
1467
|
+
"""Test that the function returns the same dict object it modifies (not a copy).
|
|
1468
|
+
|
|
1469
|
+
This verifies the documented in-place modification behavior, which is
|
|
1470
|
+
relied upon by other parts of the codebase.
|
|
1471
|
+
"""
|
|
1472
|
+
wip = {"field1": {TYPE_KEY: "string"}}
|
|
1473
|
+
stds = {"field1": {DEFAULT_KEY: "x"}}
|
|
1474
|
+
|
|
1475
|
+
result = update_wip_metadata_dict(wip, stds)
|
|
1476
|
+
|
|
1477
|
+
# result should be the exact same object as wip, not a copy
|
|
1478
|
+
self.assertIs(result, wip)
|
|
1479
|
+
# and wip should have been modified in place
|
|
1480
|
+
self.assertIn(DEFAULT_KEY, wip["field1"])
|
|
1481
|
+
|
|
1482
|
+
def test__combine_base_and_added_metadata_fields(self):
|
|
1483
|
+
"""Test combining base and additional metadata fields."""
|
|
1484
|
+
base_dict = {
|
|
1485
|
+
METADATA_FIELDS_KEY: {
|
|
1486
|
+
# in both, add wins
|
|
1487
|
+
"field1": {
|
|
1488
|
+
"allowed": ["value1"],
|
|
1489
|
+
"type": "string"
|
|
1490
|
+
},
|
|
1491
|
+
# in base only
|
|
1492
|
+
"fieldX": {
|
|
1493
|
+
"type": "string",
|
|
1494
|
+
"allowed": ["valueX"]
|
|
1495
|
+
}
|
|
1496
|
+
}
|
|
1497
|
+
}
|
|
1498
|
+
|
|
1499
|
+
add_dict = {
|
|
1500
|
+
# in both, add wins
|
|
1501
|
+
METADATA_FIELDS_KEY: {
|
|
1502
|
+
"field1": {
|
|
1503
|
+
"allowed": ["value2"],
|
|
1504
|
+
"type": "string"
|
|
1505
|
+
},
|
|
1506
|
+
# in add only
|
|
1507
|
+
"field2": {
|
|
1508
|
+
"type": "string"
|
|
1509
|
+
}
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
expected = {
|
|
1514
|
+
"field1": {
|
|
1515
|
+
"allowed": ["value2"],
|
|
1516
|
+
"type": "string"
|
|
1517
|
+
},
|
|
1518
|
+
"field2": {
|
|
1519
|
+
"type": "string"
|
|
1520
|
+
},
|
|
1521
|
+
"fieldX": {
|
|
1522
|
+
"type": "string",
|
|
1523
|
+
"allowed": ["valueX"]
|
|
1524
|
+
}
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
|
|
1528
|
+
self.assertDictEqual(expected, result)
|
|
1529
|
+
|
|
1530
|
+
def test__combine_base_and_added_metadata_fields_empty_base(self):
|
|
1531
|
+
"""Test combining when base_dict has no metadata_fields key."""
|
|
1532
|
+
base_dict = {}
|
|
1533
|
+
|
|
1534
|
+
add_dict = {
|
|
1535
|
+
METADATA_FIELDS_KEY: {
|
|
1536
|
+
"field1": {TYPE_KEY: "string", DEFAULT_KEY: "value1"}
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
expected = add_dict[METADATA_FIELDS_KEY]
|
|
1541
|
+
|
|
1542
|
+
result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
|
|
1543
|
+
self.assertDictEqual(expected, result)
|
|
1544
|
+
|
|
1545
|
+
def test__combine_base_and_added_metadata_fields_empty_add(self):
|
|
1546
|
+
"""Test combining when add_dict has no metadata_fields key."""
|
|
1547
|
+
base_dict = {
|
|
1548
|
+
METADATA_FIELDS_KEY: {
|
|
1549
|
+
"field1": {TYPE_KEY: "string", DEFAULT_KEY: "value1"}
|
|
1550
|
+
}
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
add_dict = {}
|
|
1554
|
+
|
|
1555
|
+
expected = base_dict[METADATA_FIELDS_KEY]
|
|
1556
|
+
|
|
1557
|
+
result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
|
|
1558
|
+
self.assertDictEqual(expected, result)
|
|
1559
|
+
|
|
1560
|
+
def test__combine_base_and_added_metadata_fields_both_empty(self):
|
|
1561
|
+
"""Test combining when both dicts have no metadata_fields key."""
|
|
1562
|
+
base_dict = {}
|
|
1563
|
+
add_dict = {}
|
|
1564
|
+
|
|
1565
|
+
expected = {}
|
|
1566
|
+
|
|
1567
|
+
result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
|
|
1568
|
+
self.assertDictEqual(expected, result)
|
|
1569
|
+
|
|
1570
|
+
# Tests for _combine_base_and_added_host_type
|
|
1571
|
+
|
|
1572
|
+
def test__combine_base_and_added_host_type_default_key_override(self):
|
|
1573
|
+
"""Test that DEFAULT_KEY from add_dict overwrites DEFAULT_KEY from base_dict."""
|
|
1574
|
+
base_dict = {
|
|
1575
|
+
DEFAULT_KEY: "not provided"
|
|
1576
|
+
}
|
|
1577
|
+
add_dict = {
|
|
1578
|
+
DEFAULT_KEY: "not collected"
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
result = _combine_base_and_added_host_type(base_dict, add_dict)
|
|
1582
|
+
|
|
1583
|
+
self.assertEqual("not collected", result[DEFAULT_KEY])
|
|
1584
|
+
|
|
1585
|
+
def test__combine_base_and_added_host_type_default_key_preserved(self):
|
|
1586
|
+
"""Test that DEFAULT_KEY from base_dict is preserved when add_dict has none."""
|
|
1587
|
+
base_dict = {
|
|
1588
|
+
DEFAULT_KEY: "not provided"
|
|
1589
|
+
}
|
|
1590
|
+
add_dict = {}
|
|
1591
|
+
|
|
1592
|
+
result = _combine_base_and_added_host_type(base_dict, add_dict)
|
|
1593
|
+
|
|
1594
|
+
self.assertEqual("not provided", result[DEFAULT_KEY])
|
|
1595
|
+
|
|
1596
|
+
def test__combine_base_and_added_host_type_default_key_added(self):
|
|
1597
|
+
"""Test that DEFAULT_KEY from add_dict is added when base_dict has none."""
|
|
1598
|
+
base_dict = {}
|
|
1599
|
+
add_dict = {
|
|
1600
|
+
DEFAULT_KEY: "not collected"
|
|
1601
|
+
}
|
|
1602
|
+
|
|
1603
|
+
result = _combine_base_and_added_host_type(base_dict, add_dict)
|
|
1604
|
+
|
|
1605
|
+
self.assertEqual("not collected", result[DEFAULT_KEY])
|
|
1606
|
+
|
|
1607
|
+
def test__combine_base_and_added_host_type_empty_base(self):
|
|
1608
|
+
"""Test combining when base_dict is empty."""
|
|
1609
|
+
base_dict = {}
|
|
1610
|
+
add_dict = {
|
|
1611
|
+
DEFAULT_KEY: "not collected",
|
|
1612
|
+
METADATA_FIELDS_KEY: {
|
|
1613
|
+
"field1": {TYPE_KEY: "string"}
|
|
1614
|
+
},
|
|
1615
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1616
|
+
"stool": {
|
|
1617
|
+
METADATA_FIELDS_KEY: {
|
|
1618
|
+
"description": {TYPE_KEY: "string"}
|
|
1619
|
+
}
|
|
1620
|
+
}
|
|
1621
|
+
}
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1624
|
+
result = _combine_base_and_added_host_type(base_dict, add_dict)
|
|
1625
|
+
|
|
1626
|
+
self.assertDictEqual(add_dict, result)
|
|
1627
|
+
|
|
1628
|
+
def test__combine_base_and_added_host_type_empty_add(self):
|
|
1629
|
+
"""Test combining when add_dict is empty (result should match base)."""
|
|
1630
|
+
base_dict = {
|
|
1631
|
+
DEFAULT_KEY: "not provided",
|
|
1632
|
+
METADATA_FIELDS_KEY: {
|
|
1633
|
+
"field1": {TYPE_KEY: "string", DEFAULT_KEY: "value1"}
|
|
1634
|
+
},
|
|
1635
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1636
|
+
"stool": {
|
|
1637
|
+
METADATA_FIELDS_KEY: {
|
|
1638
|
+
"description": {TYPE_KEY: "string"}
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
}
|
|
1643
|
+
add_dict = {}
|
|
1644
|
+
|
|
1645
|
+
result = _combine_base_and_added_host_type(base_dict, add_dict)
|
|
1646
|
+
|
|
1647
|
+
self.assertDictEqual(base_dict, result)
|
|
1648
|
+
|
|
1649
|
+
def test__combine_base_and_added_host_type_both_empty(self):
|
|
1650
|
+
"""Test combining when both base_dict and add_dict are empty."""
|
|
1651
|
+
base_dict = {}
|
|
1652
|
+
add_dict = {}
|
|
1653
|
+
|
|
1654
|
+
result = _combine_base_and_added_host_type(base_dict, add_dict)
|
|
1655
|
+
|
|
1656
|
+
self.assertDictEqual({}, result)
|
|
1657
|
+
|
|
1658
|
+
def test__combine_base_and_added_host_type_full_combination(self):
|
|
1659
|
+
"""Test full combination with all components: DEFAULT_KEY, metadata_fields, and sample_types."""
|
|
1660
|
+
base_dict = {
|
|
1661
|
+
DEFAULT_KEY: "not provided",
|
|
1662
|
+
METADATA_FIELDS_KEY: {
|
|
1663
|
+
"country": {
|
|
1664
|
+
TYPE_KEY: "string",
|
|
1665
|
+
ALLOWED_KEY: ["USA"],
|
|
1666
|
+
DEFAULT_KEY: "USA"
|
|
1667
|
+
},
|
|
1668
|
+
"description": {
|
|
1669
|
+
TYPE_KEY: "string",
|
|
1670
|
+
DEFAULT_KEY: "base description"
|
|
1671
|
+
}
|
|
1672
|
+
},
|
|
1673
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1674
|
+
"stool": {
|
|
1675
|
+
METADATA_FIELDS_KEY: {
|
|
1676
|
+
"location": {TYPE_KEY: "string", DEFAULT_KEY: "UCSD"}
|
|
1677
|
+
}
|
|
1678
|
+
},
|
|
1679
|
+
"saliva": {
|
|
1680
|
+
ALIAS_KEY: "oral"
|
|
1681
|
+
}
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
add_dict = {
|
|
1685
|
+
DEFAULT_KEY: "not collected",
|
|
1686
|
+
METADATA_FIELDS_KEY: {
|
|
1687
|
+
# Override existing field
|
|
1688
|
+
"description": {
|
|
1689
|
+
DEFAULT_KEY: "add description"
|
|
1690
|
+
},
|
|
1691
|
+
# Add new field
|
|
1692
|
+
"new_field": {
|
|
1693
|
+
TYPE_KEY: "integer"
|
|
1694
|
+
}
|
|
1695
|
+
},
|
|
1696
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1697
|
+
# Override existing sample type
|
|
1698
|
+
"stool": {
|
|
1699
|
+
METADATA_FIELDS_KEY: {
|
|
1700
|
+
"location": {DEFAULT_KEY: "UCLA"}
|
|
1701
|
+
}
|
|
1702
|
+
},
|
|
1703
|
+
# Add new sample type
|
|
1704
|
+
"blood": {
|
|
1705
|
+
METADATA_FIELDS_KEY: {
|
|
1706
|
+
"volume": {TYPE_KEY: "number"}
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
expected = {
|
|
1713
|
+
# DEFAULT_KEY overwritten by add
|
|
1714
|
+
DEFAULT_KEY: "not collected",
|
|
1715
|
+
METADATA_FIELDS_KEY: {
|
|
1716
|
+
# Preserved from base
|
|
1717
|
+
"country": {
|
|
1718
|
+
TYPE_KEY: "string",
|
|
1719
|
+
ALLOWED_KEY: ["USA"],
|
|
1720
|
+
DEFAULT_KEY: "USA"
|
|
1721
|
+
},
|
|
1722
|
+
# Combined: base type preserved, add default overwrites
|
|
1723
|
+
"description": {
|
|
1724
|
+
TYPE_KEY: "string",
|
|
1725
|
+
DEFAULT_KEY: "add description"
|
|
1726
|
+
},
|
|
1727
|
+
# New from add
|
|
1728
|
+
"new_field": {
|
|
1729
|
+
TYPE_KEY: "integer"
|
|
1730
|
+
}
|
|
1731
|
+
},
|
|
1732
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1733
|
+
# Combined: base type preserved, add default overwrites
|
|
1734
|
+
"stool": {
|
|
1735
|
+
METADATA_FIELDS_KEY: {
|
|
1736
|
+
"location": {TYPE_KEY: "string", DEFAULT_KEY: "UCLA"}
|
|
1737
|
+
}
|
|
1738
|
+
},
|
|
1739
|
+
# Preserved from base
|
|
1740
|
+
"saliva": {
|
|
1741
|
+
ALIAS_KEY: "oral"
|
|
1742
|
+
},
|
|
1743
|
+
# New from add
|
|
1744
|
+
"blood": {
|
|
1745
|
+
METADATA_FIELDS_KEY: {
|
|
1746
|
+
"volume": {TYPE_KEY: "number"}
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
|
|
1752
|
+
result = _combine_base_and_added_host_type(base_dict, add_dict)
|
|
1753
|
+
|
|
1754
|
+
self.assertDictEqual(expected, result)
|
|
1755
|
+
|
|
1756
|
+
def test__combine_base_and_added_host_type_empty_metadata_fields_result(self):
|
|
1757
|
+
"""Test that METADATA_FIELDS_KEY is not included when result would be empty."""
|
|
1758
|
+
base_dict = {
|
|
1759
|
+
DEFAULT_KEY: "not provided"
|
|
1760
|
+
# No METADATA_FIELDS_KEY
|
|
1761
|
+
}
|
|
1762
|
+
add_dict = {
|
|
1763
|
+
# No METADATA_FIELDS_KEY
|
|
1764
|
+
}
|
|
1765
|
+
|
|
1766
|
+
result = _combine_base_and_added_host_type(base_dict, add_dict)
|
|
1767
|
+
|
|
1768
|
+
self.assertEqual("not provided", result[DEFAULT_KEY])
|
|
1769
|
+
self.assertNotIn(METADATA_FIELDS_KEY, result)
|
|
1770
|
+
|
|
1771
|
+
def test__combine_base_and_added_host_type_empty_sample_types_result(self):
|
|
1772
|
+
"""Test that SAMPLE_TYPE_SPECIFIC_METADATA_KEY is not included when result would be empty."""
|
|
1773
|
+
base_dict = {
|
|
1774
|
+
DEFAULT_KEY: "not provided",
|
|
1775
|
+
METADATA_FIELDS_KEY: {
|
|
1776
|
+
"field1": {TYPE_KEY: "string"}
|
|
1777
|
+
}
|
|
1778
|
+
# No SAMPLE_TYPE_SPECIFIC_METADATA_KEY
|
|
1779
|
+
}
|
|
1780
|
+
add_dict = {
|
|
1781
|
+
# No SAMPLE_TYPE_SPECIFIC_METADATA_KEY
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
result = _combine_base_and_added_host_type(base_dict, add_dict)
|
|
1785
|
+
|
|
1786
|
+
self.assertEqual("not provided", result[DEFAULT_KEY])
|
|
1787
|
+
self.assertIn(METADATA_FIELDS_KEY, result)
|
|
1788
|
+
self.assertNotIn(SAMPLE_TYPE_SPECIFIC_METADATA_KEY, result)
|
|
1789
|
+
|
|
1790
|
+
def test__combine_base_and_added_sample_type_specific_metadata(self):
|
|
1791
|
+
"""Test combining base and additional sample type specific metadata."""
|
|
1792
|
+
base_dict = {
|
|
1793
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1794
|
+
# defined in stds w metadata fields but in add as an alias
|
|
1795
|
+
"sample_type1": {
|
|
1796
|
+
METADATA_FIELDS_KEY: {
|
|
1797
|
+
"confuse": {
|
|
1798
|
+
"allowed": ["value1"],
|
|
1799
|
+
"type": "string"
|
|
1800
|
+
},
|
|
1801
|
+
}
|
|
1802
|
+
},
|
|
1803
|
+
# defined in both w metadata fields, must combine, add wins
|
|
1804
|
+
"sample_type2": {
|
|
1805
|
+
METADATA_FIELDS_KEY: {
|
|
1806
|
+
"field1": {
|
|
1807
|
+
"type": "string"
|
|
1808
|
+
},
|
|
1809
|
+
"fieldX": {
|
|
1810
|
+
"type": "string",
|
|
1811
|
+
"allowed": ["valueX"]
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
},
|
|
1815
|
+
# defined only in base
|
|
1816
|
+
"sample_type4": {
|
|
1817
|
+
METADATA_FIELDS_KEY: {
|
|
1818
|
+
"field1": {
|
|
1819
|
+
"type": "string"
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
}
|
|
1825
|
+
|
|
1826
|
+
add_dict = {
|
|
1827
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1828
|
+
# defined here as an alias, defined in stds w metadata fields
|
|
1829
|
+
"sample_type1": {
|
|
1830
|
+
"alias": "sample_type2"
|
|
1831
|
+
},
|
|
1832
|
+
# defined in both w metadata fields, must combine, add wins
|
|
1833
|
+
"sample_type2": {
|
|
1834
|
+
METADATA_FIELDS_KEY: {
|
|
1835
|
+
"field1": {
|
|
1836
|
+
"allowed": ["value1"],
|
|
1837
|
+
"type": "string"
|
|
1838
|
+
},
|
|
1839
|
+
"field2": {
|
|
1840
|
+
"type": "string"
|
|
1841
|
+
}
|
|
1842
|
+
}
|
|
1843
|
+
},
|
|
1844
|
+
# defined only in add
|
|
1845
|
+
"sample_type3": {
|
|
1846
|
+
"base_type": "sample_type2"
|
|
1847
|
+
}
|
|
1848
|
+
}
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
expected = {
|
|
1852
|
+
"sample_type1": {
|
|
1853
|
+
"alias": "sample_type2"
|
|
1854
|
+
},
|
|
1855
|
+
"sample_type2": {
|
|
1856
|
+
METADATA_FIELDS_KEY: {
|
|
1857
|
+
"field1": {
|
|
1858
|
+
"allowed": ["value1"],
|
|
1859
|
+
"type": "string"
|
|
1860
|
+
},
|
|
1861
|
+
"field2": {
|
|
1862
|
+
"type": "string"
|
|
1863
|
+
},
|
|
1864
|
+
"fieldX": {
|
|
1865
|
+
"type": "string",
|
|
1866
|
+
"allowed": ["valueX"]
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
},
|
|
1870
|
+
"sample_type3": {
|
|
1871
|
+
"base_type": "sample_type2"
|
|
1872
|
+
},
|
|
1873
|
+
"sample_type4": {
|
|
1874
|
+
METADATA_FIELDS_KEY: {
|
|
1875
|
+
"field1": {
|
|
1876
|
+
"type": "string"
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
}
|
|
1880
|
+
}
|
|
1881
|
+
|
|
1882
|
+
result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
|
|
1883
|
+
self.assertDictEqual(expected, result)
|
|
1884
|
+
|
|
1885
|
+
def test__combine_base_and_added_sample_type_specific_metadata_empty_base(self):
|
|
1886
|
+
"""Test combining when base has no sample_type_specific_metadata."""
|
|
1887
|
+
base_dict = {}
|
|
1888
|
+
|
|
1889
|
+
add_dict = {
|
|
1890
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1891
|
+
"stool": {
|
|
1892
|
+
METADATA_FIELDS_KEY: {
|
|
1893
|
+
"field1": {"type": "string"}
|
|
1894
|
+
}
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
|
|
1899
|
+
expected = {
|
|
1900
|
+
"stool": {
|
|
1901
|
+
METADATA_FIELDS_KEY: {
|
|
1902
|
+
"field1": {"type": "string"}
|
|
1903
|
+
}
|
|
1904
|
+
}
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
|
|
1908
|
+
self.assertDictEqual(expected, result)
|
|
1909
|
+
|
|
1910
|
+
def test__combine_base_and_added_sample_type_specific_metadata_empty_add(self):
|
|
1911
|
+
"""Test combining when add has no sample_type_specific_metadata."""
|
|
1912
|
+
base_dict = {
|
|
1913
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1914
|
+
"stool": {
|
|
1915
|
+
METADATA_FIELDS_KEY: {
|
|
1916
|
+
"field1": {"type": "string"}
|
|
1917
|
+
}
|
|
1918
|
+
}
|
|
1919
|
+
}
|
|
1920
|
+
}
|
|
1921
|
+
|
|
1922
|
+
add_dict = {}
|
|
1923
|
+
|
|
1924
|
+
expected = {
|
|
1925
|
+
"stool": {
|
|
1926
|
+
METADATA_FIELDS_KEY: {
|
|
1927
|
+
"field1": {"type": "string"}
|
|
1928
|
+
}
|
|
1929
|
+
}
|
|
1930
|
+
}
|
|
1931
|
+
|
|
1932
|
+
result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
|
|
1933
|
+
self.assertDictEqual(expected, result)
|
|
1934
|
+
|
|
1935
|
+
def test__combine_base_and_added_sample_type_specific_metadata_base_type_with_metadata(self):
|
|
1936
|
+
"""Test sample type with both base_type AND metadata_fields.
|
|
1937
|
+
|
|
1938
|
+
This is a valid configuration where base_type indicates inheritance and
|
|
1939
|
+
metadata_fields contains overrides. If both base_dict and add_dict have
|
|
1940
|
+
base_type for the same sample type, add_dict's base_type overwrites base_dict's.
|
|
1941
|
+
The metadata_fields are combined as usual (add wins for overlapping fields).
|
|
1942
|
+
"""
|
|
1943
|
+
base_dict = {
|
|
1944
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1945
|
+
"stool": {
|
|
1946
|
+
BASE_TYPE_KEY: "original_base",
|
|
1947
|
+
METADATA_FIELDS_KEY: {
|
|
1948
|
+
"description": {
|
|
1949
|
+
"allowed": ["stool sample"],
|
|
1950
|
+
"type": "string"
|
|
1951
|
+
},
|
|
1952
|
+
"location": {
|
|
1953
|
+
"allowed": ["UCSD"],
|
|
1954
|
+
"type": "string"
|
|
1955
|
+
}
|
|
1956
|
+
}
|
|
1957
|
+
}
|
|
1958
|
+
}
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1961
|
+
add_dict = {
|
|
1962
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1963
|
+
"stool": {
|
|
1964
|
+
BASE_TYPE_KEY: "new_base",
|
|
1965
|
+
METADATA_FIELDS_KEY: {
|
|
1966
|
+
"description": {
|
|
1967
|
+
"allowed": ["human stool"],
|
|
1968
|
+
"type": "string"
|
|
1969
|
+
}
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
}
|
|
1974
|
+
|
|
1975
|
+
expected = {
|
|
1976
|
+
"stool": {
|
|
1977
|
+
# base_type from add_dict overwrites base_type from base_dict
|
|
1978
|
+
BASE_TYPE_KEY: "new_base",
|
|
1979
|
+
METADATA_FIELDS_KEY: {
|
|
1980
|
+
# description from add_dict overwrites base_dict
|
|
1981
|
+
"description": {
|
|
1982
|
+
"allowed": ["human stool"],
|
|
1983
|
+
"type": "string"
|
|
1984
|
+
},
|
|
1985
|
+
# location preserved from base_dict (not in add_dict)
|
|
1986
|
+
"location": {
|
|
1987
|
+
"allowed": ["UCSD"],
|
|
1988
|
+
"type": "string"
|
|
1989
|
+
}
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
}
|
|
1993
|
+
|
|
1994
|
+
result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
|
|
1995
|
+
self.assertDictEqual(expected, result)
|
|
1996
|
+
|
|
1997
|
+
def test__combine_base_and_added_sample_type_specific_metadata_mismatched_types_add_wins(self):
|
|
1998
|
+
"""Test that when definition types differ between base and add, add always wins.
|
|
1999
|
+
|
|
2000
|
+
When the sample type definition type (alias, base_type, or metadata_fields)
|
|
2001
|
+
differs between base_dict and add_dict, the add_dict entry completely
|
|
2002
|
+
replaces the base_dict entry rather than attempting to combine them.
|
|
2003
|
+
|
|
2004
|
+
This test covers all possible type mismatch scenarios:
|
|
2005
|
+
- base has alias, add has metadata_fields
|
|
2006
|
+
- base has alias, add has base_type
|
|
2007
|
+
- base has metadata_fields, add has alias
|
|
2008
|
+
- base has metadata_fields, add has base_type
|
|
2009
|
+
- base has base_type, add has alias
|
|
2010
|
+
- base has base_type, add has metadata_fields
|
|
2011
|
+
"""
|
|
2012
|
+
base_dict = {
|
|
2013
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2014
|
+
# alias -> metadata_fields
|
|
2015
|
+
"sample_alias_to_metadata": {
|
|
2016
|
+
ALIAS_KEY: "stool"
|
|
2017
|
+
},
|
|
2018
|
+
# alias -> base_type
|
|
2019
|
+
"sample_alias_to_base": {
|
|
2020
|
+
ALIAS_KEY: "stool"
|
|
2021
|
+
},
|
|
2022
|
+
# metadata_fields -> alias
|
|
2023
|
+
"sample_metadata_to_alias": {
|
|
2024
|
+
METADATA_FIELDS_KEY: {
|
|
2025
|
+
"field1": {"type": "string"}
|
|
2026
|
+
}
|
|
2027
|
+
},
|
|
2028
|
+
# metadata_fields -> base_type
|
|
2029
|
+
"sample_metadata_to_base": {
|
|
2030
|
+
METADATA_FIELDS_KEY: {
|
|
2031
|
+
"field1": {"type": "string"}
|
|
2032
|
+
}
|
|
2033
|
+
},
|
|
2034
|
+
# base_type -> alias
|
|
2035
|
+
"sample_base_to_alias": {
|
|
2036
|
+
BASE_TYPE_KEY: "stool"
|
|
2037
|
+
},
|
|
2038
|
+
# base_type -> metadata_fields
|
|
2039
|
+
"sample_base_to_metadata": {
|
|
2040
|
+
BASE_TYPE_KEY: "stool"
|
|
2041
|
+
}
|
|
2042
|
+
}
|
|
2043
|
+
}
|
|
2044
|
+
|
|
2045
|
+
add_dict = {
|
|
2046
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2047
|
+
"sample_alias_to_metadata": {
|
|
2048
|
+
METADATA_FIELDS_KEY: {
|
|
2049
|
+
"new_field": {"type": "integer"}
|
|
2050
|
+
}
|
|
2051
|
+
},
|
|
2052
|
+
"sample_alias_to_base": {
|
|
2053
|
+
BASE_TYPE_KEY: "saliva"
|
|
2054
|
+
},
|
|
2055
|
+
"sample_metadata_to_alias": {
|
|
2056
|
+
ALIAS_KEY: "saliva"
|
|
2057
|
+
},
|
|
2058
|
+
"sample_metadata_to_base": {
|
|
2059
|
+
BASE_TYPE_KEY: "saliva"
|
|
2060
|
+
},
|
|
2061
|
+
"sample_base_to_alias": {
|
|
2062
|
+
ALIAS_KEY: "saliva"
|
|
2063
|
+
},
|
|
2064
|
+
"sample_base_to_metadata": {
|
|
2065
|
+
METADATA_FIELDS_KEY: {
|
|
2066
|
+
"new_field": {"type": "integer"}
|
|
2067
|
+
}
|
|
2068
|
+
}
|
|
2069
|
+
}
|
|
2070
|
+
}
|
|
2071
|
+
|
|
2072
|
+
# All entries should match add_dict exactly; base_dict is replaced
|
|
2073
|
+
expected = add_dict[SAMPLE_TYPE_SPECIFIC_METADATA_KEY]
|
|
2074
|
+
|
|
2075
|
+
result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
|
|
2076
|
+
self.assertDictEqual(expected, result)
|
|
2077
|
+
|
|
2078
|
+
def test__id_sample_type_definition_alias(self):
|
|
2079
|
+
"""Test identifying sample type definition as alias type."""
|
|
2080
|
+
sample_dict = {
|
|
2081
|
+
ALIAS_KEY: "other_sample"
|
|
2082
|
+
}
|
|
2083
|
+
result = _id_sample_type_definition("test_sample", sample_dict)
|
|
2084
|
+
self.assertEqual(ALIAS_KEY, result)
|
|
2085
|
+
|
|
2086
|
+
def test__id_sample_type_definition_metadata(self):
|
|
2087
|
+
"""Test identifying sample type definition as metadata type."""
|
|
2088
|
+
sample_dict = {
|
|
2089
|
+
METADATA_FIELDS_KEY: {
|
|
2090
|
+
"field1": {
|
|
2091
|
+
"type": "string"
|
|
2092
|
+
}
|
|
2093
|
+
}
|
|
2094
|
+
}
|
|
2095
|
+
result = _id_sample_type_definition("test_sample", sample_dict)
|
|
2096
|
+
self.assertEqual(METADATA_FIELDS_KEY, result)
|
|
2097
|
+
|
|
2098
|
+
def test__id_sample_type_definition_base_with_metadata(self):
|
|
2099
|
+
"""Test sample type with both base_type AND metadata_fields returns metadata_fields.
|
|
2100
|
+
|
|
2101
|
+
This is a valid configuration: base_type indicates inheritance from another
|
|
2102
|
+
sample type, while metadata_fields contains overrides specific to this sample type.
|
|
2103
|
+
The function should return METADATA_FIELDS_KEY since metadata takes precedence.
|
|
2104
|
+
"""
|
|
2105
|
+
sample_dict = {
|
|
2106
|
+
BASE_TYPE_KEY: "stool",
|
|
2107
|
+
METADATA_FIELDS_KEY: {
|
|
2108
|
+
"description": {
|
|
2109
|
+
"allowed": ["human dung"],
|
|
2110
|
+
"type": "string"
|
|
2111
|
+
}
|
|
2112
|
+
}
|
|
2113
|
+
}
|
|
2114
|
+
result = _id_sample_type_definition("dung", sample_dict)
|
|
2115
|
+
self.assertEqual(METADATA_FIELDS_KEY, result)
|
|
2116
|
+
|
|
2117
|
+
def test__id_sample_type_definition_base(self):
|
|
2118
|
+
"""Test identifying sample type definition as base type."""
|
|
2119
|
+
sample_dict = {
|
|
2120
|
+
BASE_TYPE_KEY: "other_sample"
|
|
2121
|
+
}
|
|
2122
|
+
result = _id_sample_type_definition("test_sample", sample_dict)
|
|
2123
|
+
self.assertEqual(BASE_TYPE_KEY, result)
|
|
2124
|
+
|
|
2125
|
+
def test__id_sample_type_definition_err_alias_metadata(self):
|
|
2126
|
+
"""Test that sample type with both alias and metadata fields raises ValueError."""
|
|
2127
|
+
sample_dict = {
|
|
2128
|
+
ALIAS_KEY: "other_sample",
|
|
2129
|
+
METADATA_FIELDS_KEY: {
|
|
2130
|
+
"field1": {
|
|
2131
|
+
"type": "string"
|
|
2132
|
+
}
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2135
|
+
with self.assertRaisesRegex(ValueError, "Sample type 'test_sample' has both 'alias' and 'metadata_fields' keys"):
|
|
2136
|
+
_id_sample_type_definition("test_sample", sample_dict)
|
|
2137
|
+
|
|
2138
|
+
def test__id_sample_type_definition_err_alias_base(self):
|
|
2139
|
+
"""Test that sample type with both alias and base type raises ValueError."""
|
|
2140
|
+
sample_dict = {
|
|
2141
|
+
ALIAS_KEY: "other_sample",
|
|
2142
|
+
BASE_TYPE_KEY: "other_sample"
|
|
2143
|
+
}
|
|
2144
|
+
with self.assertRaisesRegex(ValueError, "Sample type 'test_sample' has both 'alias' and 'base_type' keys"):
|
|
2145
|
+
_id_sample_type_definition("test_sample", sample_dict)
|
|
2146
|
+
|
|
2147
|
+
def test__id_sample_type_definition_err_no_keys(self):
|
|
2148
|
+
"""Test that sample type with neither alias nor metadata fields raises ValueError."""
|
|
2149
|
+
sample_dict = {}
|
|
2150
|
+
with self.assertRaisesRegex(ValueError, "Sample type 'test_sample' has neither 'alias' nor 'metadata_fields' keys"):
|
|
2151
|
+
_id_sample_type_definition("test_sample", sample_dict)
|
|
2152
|
+
|
|
2153
|
+
# Tests for build_full_flat_config_dict
|
|
2154
|
+
|
|
2155
|
+
def test_build_full_flat_config_dict_no_inputs(self):
|
|
2156
|
+
"""Test build_full_flat_config_dict with no arguments uses all defaults."""
|
|
2157
|
+
result = build_full_flat_config_dict()
|
|
2158
|
+
|
|
2159
|
+
# Should have HOST_TYPE_SPECIFIC_METADATA_KEY
|
|
2160
|
+
self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
|
|
2161
|
+
hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
|
|
2162
|
+
self.assertIsInstance(hosts_dict, dict)
|
|
2163
|
+
|
|
2164
|
+
# Should have "base" host type with sample_name metadata field
|
|
2165
|
+
self.assertIn("base", hosts_dict)
|
|
2166
|
+
base_host = hosts_dict["base"]
|
|
2167
|
+
self.assertIn(METADATA_FIELDS_KEY, base_host)
|
|
2168
|
+
self.assertIn("sample_name", base_host[METADATA_FIELDS_KEY])
|
|
2169
|
+
|
|
2170
|
+
# Should have "human" host type with host_common_name defaulting to "human"
|
|
2171
|
+
self.assertIn("human", hosts_dict)
|
|
2172
|
+
human_host = hosts_dict["human"]
|
|
2173
|
+
self.assertIn(METADATA_FIELDS_KEY, human_host)
|
|
2174
|
+
self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
|
|
2175
|
+
self.assertEqual(
|
|
2176
|
+
"human",
|
|
2177
|
+
human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
|
|
2178
|
+
|
|
2179
|
+
# Should have default software config keys with expected default value
|
|
2180
|
+
self.assertIn(DEFAULT_KEY, result)
|
|
2181
|
+
self.assertEqual("not applicable", result[DEFAULT_KEY])
|
|
2182
|
+
|
|
2183
|
+
def test_build_full_flat_config_dict_with_study_config(self):
|
|
2184
|
+
"""Test build_full_flat_config_dict with study config merges correctly."""
|
|
2185
|
+
software_config = {
|
|
2186
|
+
DEFAULT_KEY: "software_default",
|
|
2187
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
2188
|
+
OVERWRITE_NON_NANS_KEY: False
|
|
2189
|
+
}
|
|
2190
|
+
study_config = {
|
|
2191
|
+
STUDY_SPECIFIC_METADATA_KEY: {
|
|
2192
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2193
|
+
"human": {
|
|
2194
|
+
METADATA_FIELDS_KEY: {
|
|
2195
|
+
"custom_field": {
|
|
2196
|
+
DEFAULT_KEY: "custom_value",
|
|
2197
|
+
TYPE_KEY: "string"
|
|
2198
|
+
}
|
|
2199
|
+
},
|
|
2200
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2201
|
+
"stool": {
|
|
2202
|
+
METADATA_FIELDS_KEY: {}
|
|
2203
|
+
}
|
|
2204
|
+
}
|
|
2205
|
+
}
|
|
2206
|
+
}
|
|
2207
|
+
}
|
|
2208
|
+
}
|
|
2209
|
+
|
|
2210
|
+
result = build_full_flat_config_dict(
|
|
2211
|
+
study_config, software_config, self.TEST_STDS_FP)
|
|
2212
|
+
|
|
2213
|
+
# Should have HOST_TYPE_SPECIFIC_METADATA_KEY
|
|
2214
|
+
self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
|
|
2215
|
+
hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
|
|
2216
|
+
self.assertIsInstance(hosts_dict, dict)
|
|
2217
|
+
|
|
2218
|
+
# Should have "human" host type with host_common_name defaulting to "human"
|
|
2219
|
+
self.assertIn("human", hosts_dict)
|
|
2220
|
+
human_host = hosts_dict["human"]
|
|
2221
|
+
self.assertIn(METADATA_FIELDS_KEY, human_host)
|
|
2222
|
+
self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
|
|
2223
|
+
self.assertEqual(
|
|
2224
|
+
"human",
|
|
2225
|
+
human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
|
|
2226
|
+
|
|
2227
|
+
# Should have custom_field from study config
|
|
2228
|
+
self.assertIn("custom_field", human_host[METADATA_FIELDS_KEY])
|
|
2229
|
+
self.assertEqual(
|
|
2230
|
+
"custom_value",
|
|
2231
|
+
human_host[METADATA_FIELDS_KEY]["custom_field"][DEFAULT_KEY])
|
|
2232
|
+
|
|
2233
|
+
# Should have software config default value
|
|
2234
|
+
self.assertIn(DEFAULT_KEY, result)
|
|
2235
|
+
self.assertEqual("software_default", result[DEFAULT_KEY])
|
|
2236
|
+
|
|
2237
|
+
def test_build_full_flat_config_dict_without_study_config(self):
|
|
2238
|
+
"""Test build_full_flat_config_dict with no study config uses standards only."""
|
|
2239
|
+
software_config = {
|
|
2240
|
+
DEFAULT_KEY: "software_default",
|
|
2241
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
2242
|
+
OVERWRITE_NON_NANS_KEY: False
|
|
2243
|
+
}
|
|
2244
|
+
|
|
2245
|
+
result = build_full_flat_config_dict(
|
|
2246
|
+
None, software_config, self.TEST_STDS_FP)
|
|
2247
|
+
|
|
2248
|
+
# Should have HOST_TYPE_SPECIFIC_METADATA_KEY
|
|
2249
|
+
self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
|
|
2250
|
+
hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
|
|
2251
|
+
self.assertIsInstance(hosts_dict, dict)
|
|
2252
|
+
|
|
2253
|
+
# Should have "human" host type with host_common_name defaulting to "human"
|
|
2254
|
+
self.assertIn("human", hosts_dict)
|
|
2255
|
+
human_host = hosts_dict["human"]
|
|
2256
|
+
self.assertIn(METADATA_FIELDS_KEY, human_host)
|
|
2257
|
+
self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
|
|
2258
|
+
self.assertEqual(
|
|
2259
|
+
"human",
|
|
2260
|
+
human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
|
|
2261
|
+
|
|
2262
|
+
# Should preserve software config settings
|
|
2263
|
+
self.assertEqual("software_default", result[DEFAULT_KEY])
|
|
2264
|
+
|
|
2265
|
+
def test_build_full_flat_config_dict_merges_software_and_study(self):
|
|
2266
|
+
"""Test that study config values override software config values."""
|
|
2267
|
+
software_config = {
|
|
2268
|
+
DEFAULT_KEY: "software_default",
|
|
2269
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2270
|
+
OVERWRITE_NON_NANS_KEY: True
|
|
2271
|
+
}
|
|
2272
|
+
study_config = {
|
|
2273
|
+
DEFAULT_KEY: "study_default",
|
|
2274
|
+
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
2275
|
+
STUDY_SPECIFIC_METADATA_KEY: {
|
|
2276
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2277
|
+
"human": {
|
|
2278
|
+
METADATA_FIELDS_KEY: {},
|
|
2279
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2280
|
+
"stool": {
|
|
2281
|
+
METADATA_FIELDS_KEY: {}
|
|
2282
|
+
}
|
|
2283
|
+
}
|
|
2284
|
+
}
|
|
2285
|
+
}
|
|
2286
|
+
}
|
|
2287
|
+
}
|
|
2288
|
+
|
|
2289
|
+
result = build_full_flat_config_dict(
|
|
2290
|
+
study_config, software_config, self.TEST_STDS_FP)
|
|
2291
|
+
|
|
2292
|
+
# Study config should override software config
|
|
2293
|
+
self.assertEqual("study_default", result[DEFAULT_KEY])
|
|
2294
|
+
self.assertTrue(result[LEAVE_REQUIREDS_BLANK_KEY])
|
|
2295
|
+
# Software config value should be preserved when not overridden
|
|
2296
|
+
self.assertTrue(result[OVERWRITE_NON_NANS_KEY])
|
|
2297
|
+
|
|
2298
|
+
def test_build_full_flat_config_dict_none_software_config(self):
|
|
2299
|
+
"""Test that None software_config loads defaults from config.yml."""
|
|
2300
|
+
study_config = {
|
|
2301
|
+
STUDY_SPECIFIC_METADATA_KEY: {
|
|
2302
|
+
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2303
|
+
"human": {
|
|
2304
|
+
METADATA_FIELDS_KEY: {},
|
|
2305
|
+
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2306
|
+
"stool": {
|
|
2307
|
+
METADATA_FIELDS_KEY: {}
|
|
2308
|
+
}
|
|
2309
|
+
}
|
|
2310
|
+
}
|
|
2311
|
+
}
|
|
2312
|
+
}
|
|
2313
|
+
}
|
|
2314
|
+
|
|
2315
|
+
result = build_full_flat_config_dict(
|
|
2316
|
+
study_config, None, self.TEST_STDS_FP)
|
|
2317
|
+
|
|
2318
|
+
# Should have HOST_TYPE_SPECIFIC_METADATA_KEY
|
|
2319
|
+
self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
|
|
2320
|
+
hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
|
|
2321
|
+
self.assertIsInstance(hosts_dict, dict)
|
|
2322
|
+
|
|
2323
|
+
# Should have "human" host type with host_common_name defaulting to "human"
|
|
2324
|
+
self.assertIn("human", hosts_dict)
|
|
2325
|
+
human_host = hosts_dict["human"]
|
|
2326
|
+
self.assertIn(METADATA_FIELDS_KEY, human_host)
|
|
2327
|
+
self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
|
|
2328
|
+
self.assertEqual(
|
|
2329
|
+
"human",
|
|
2330
|
+
human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
|
|
2331
|
+
|
|
2332
|
+
# Should have loaded default software config (which includes DEFAULT_KEY)
|
|
2333
|
+
self.assertIn(DEFAULT_KEY, result)
|
|
2334
|
+
self.assertEqual("not applicable", result[DEFAULT_KEY])
|