metameq 2026.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2334 @@
1
+ import os.path as path
2
+ from unittest import TestCase
3
+ from metameq.src.util import \
4
+ HOST_TYPE_SPECIFIC_METADATA_KEY, METADATA_FIELDS_KEY, \
5
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY, DEFAULT_KEY, \
6
+ ALIAS_KEY, BASE_TYPE_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
7
+ STUDY_SPECIFIC_METADATA_KEY, LEAVE_REQUIREDS_BLANK_KEY, \
8
+ OVERWRITE_NON_NANS_KEY
9
+ from metameq.src.metadata_configurator import \
10
+ combine_stds_and_study_config, \
11
+ _make_combined_stds_and_study_host_type_dicts, \
12
+ flatten_nested_stds_dict, \
13
+ _combine_base_and_added_metadata_fields, \
14
+ _combine_base_and_added_sample_type_specific_metadata, \
15
+ _combine_base_and_added_host_type, \
16
+ _id_sample_type_definition, \
17
+ update_wip_metadata_dict, \
18
+ build_full_flat_config_dict
19
+
20
+
21
+ class TestMetadataConfigurator(TestCase):
22
+ TEST_DIR = path.dirname(__file__)
23
+ TEST_STDS_FP = path.join(TEST_DIR, "data/test_standards.yml")
24
+
25
+ NESTED_STDS_DICT = {
26
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
27
+ # Top host level (host_associated in this example) has
28
+ # *complete* definitions for all metadata fields it includes.
29
+ # Lower levels include only the elements of the definition that
30
+ # are different from the parent level (but if a field is NEW at
31
+ # a lower level, the lower level must include the complete
32
+ # definition for that field).
33
+ "host_associated": {
34
+ DEFAULT_KEY: "not provided",
35
+ METADATA_FIELDS_KEY: {
36
+ # not overridden
37
+ "country": {
38
+ "allowed": ["USA"],
39
+ DEFAULT_KEY: "USA",
40
+ "empty": False,
41
+ "is_phi": False,
42
+ "required": True,
43
+ "type": "string"
44
+ },
45
+ # overridden in stds same level host + sample type,
46
+ # again in stds lower host, and *again* in
47
+ # stds lower host + sample type
48
+ "description": {
49
+ "allowed": ["host associated"],
50
+ DEFAULT_KEY: "host associated",
51
+ "empty": False,
52
+ "is_phi": False,
53
+ "required": True,
54
+ "type": "string"
55
+ },
56
+ # overridden in stds lower host
57
+ "dna_extracted": {
58
+ "allowed": ["true", "false"],
59
+ DEFAULT_KEY: "true",
60
+ "empty": False,
61
+ "is_phi": False,
62
+ "required": True,
63
+ "type": "string"
64
+ },
65
+ # overridden in stds lower host + sample type
66
+ "elevation": {
67
+ "anyof": [
68
+ {
69
+ "allowed": [
70
+ "not collected",
71
+ "not provided",
72
+ "restricted access"],
73
+ "type": "string"
74
+ },
75
+ {
76
+ "min": -413.0,
77
+ "type": "number"
78
+ }],
79
+ "empty": False,
80
+ "is_phi": False,
81
+ "required": True
82
+ },
83
+ # overridden in STUDY for this host
84
+ "geo_loc_name": {
85
+ "empty": False,
86
+ "is_phi": False,
87
+ "required": True,
88
+ "type": "string"
89
+ },
90
+ # overridden in STUDY for this host
91
+ "host_type": {
92
+ "allowed": ["human", "animal", "plant"],
93
+ "empty": False,
94
+ "is_phi": False,
95
+ "required": True,
96
+ "type": "string"
97
+ }
98
+ },
99
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
100
+ "fe": {
101
+ "alias": "stool",
102
+ },
103
+ "stool": {
104
+ METADATA_FIELDS_KEY: {
105
+ # overrides stds host,
106
+ # overridden in stds lower host, and
107
+ # in stds lower host + sample type
108
+ "description": {
109
+ "allowed": ["host associated stool"],
110
+ DEFAULT_KEY: "host associated stool",
111
+ "type": "string"
112
+ },
113
+ # overridden in STUDY for this host + sample type
114
+ "physical_specimen_location": {
115
+ "allowed": ["UCSD"],
116
+ DEFAULT_KEY: "UCSD",
117
+ "empty": False,
118
+ "is_phi": False,
119
+ "required": True,
120
+ "type": "string"
121
+ },
122
+ # overridden in stds lower host + sample type
123
+ "physical_specimen_remaining": {
124
+ "allowed": ["true", "false"],
125
+ DEFAULT_KEY: "true",
126
+ "empty": False,
127
+ "is_phi": False,
128
+ "required": True,
129
+ "type": "string"
130
+ }
131
+ }
132
+ }
133
+ },
134
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
135
+ "human": {
136
+ METADATA_FIELDS_KEY: {
137
+ # overrides stds parent host
138
+ "description": {
139
+ "allowed": ["human"],
140
+ DEFAULT_KEY: "human",
141
+ "type": "string"
142
+ },
143
+ # overrides stds parent host
144
+ # BUT overridden in turn in STUDY for this host
145
+ "dna_extracted": {
146
+ "allowed": ["false"],
147
+ DEFAULT_KEY: "false",
148
+ "type": "string"
149
+ },
150
+ # overrides stds parent host
151
+ "host_type": {
152
+ "allowed": ["human"],
153
+ DEFAULT_KEY: "human",
154
+ "type": "string"
155
+ }
156
+ },
157
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
158
+ "stool": {
159
+ METADATA_FIELDS_KEY: {
160
+ # overrides stds parent host + sample type
161
+ "description": {
162
+ "allowed": ["human stool"],
163
+ DEFAULT_KEY: "human stool",
164
+ "type": "string"
165
+ },
166
+ # overrides stds parent host
167
+ "elevation": {
168
+ DEFAULT_KEY: 14,
169
+ "type": "number"
170
+ }
171
+ }
172
+ },
173
+ "dung": {
174
+ METADATA_FIELDS_KEY: {
175
+ # overrides stds parent host + sample type
176
+ "description": {
177
+ "allowed": ["human dung"],
178
+ DEFAULT_KEY: "human dung",
179
+ "type": "string"
180
+ }
181
+ }
182
+ }
183
+ },
184
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
185
+ "dude": {
186
+ METADATA_FIELDS_KEY: {
187
+ # overrides stds parent host
188
+ "host_type": {
189
+ "allowed": ["dude"],
190
+ DEFAULT_KEY: "dude",
191
+ "type": "string"
192
+ }
193
+ }
194
+ }
195
+ }
196
+ },
197
+ "control": {
198
+ METADATA_FIELDS_KEY: {
199
+ # overrides stds parent host
200
+ "description": {
201
+ "allowed": ["control"],
202
+ DEFAULT_KEY: "control",
203
+ "type": "string"
204
+ },
205
+ # overrides stds parent host
206
+ "host_type": {
207
+ "allowed": ["control"],
208
+ DEFAULT_KEY: "control",
209
+ "type": "string"
210
+ }
211
+ }
212
+ }
213
+ }
214
+ }
215
+ }
216
+ }
217
+
218
+ FLAT_STUDY_DICT = {
219
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
220
+ # FLAT list of host types
221
+ "host_associated": {
222
+ METADATA_FIELDS_KEY: {
223
+ # override of standard for this host type
224
+ "geo_loc_name": {
225
+ "allowed": ["USA:CA:San Diego"],
226
+ DEFAULT_KEY: "USA:CA:San Diego",
227
+ "type": "string"
228
+ },
229
+ # note: this overrides the standard for this host type
230
+ # BUT the std lower host type overrides this,
231
+ # and the lowest (most specific) directive wins,
232
+ # so this will NOT be included in output
233
+ "host_type": {
234
+ "allowed": ["human", "non-human"],
235
+ "type": "string"
236
+ },
237
+ },
238
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
239
+ "stool": {
240
+ METADATA_FIELDS_KEY: {
241
+ # override of standard for this
242
+ # host + sample type
243
+ "physical_specimen_location": {
244
+ "allowed": ["UCSDST"],
245
+ DEFAULT_KEY: "UCSDST",
246
+ "type": "string"
247
+ }
248
+ }
249
+ }
250
+ }
251
+ },
252
+ "human": {
253
+ DEFAULT_KEY: "not collected",
254
+ METADATA_FIELDS_KEY: {
255
+ # overrides std parent host type
256
+ "dna_extracted": {
257
+ "allowed": ["true"],
258
+ DEFAULT_KEY: "true",
259
+ "type": "string"
260
+ },
261
+ },
262
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
263
+ "feces": {
264
+ "alias": "stool"
265
+ },
266
+ "stool": {
267
+ METADATA_FIELDS_KEY: {
268
+ # override of std parent
269
+ # host + sample type
270
+ "physical_specimen_remaining": {
271
+ "allowed": ["false"],
272
+ DEFAULT_KEY: "false",
273
+ "type": "string"
274
+ }
275
+ }
276
+ },
277
+ "dung": {
278
+ "base_type": "stool",
279
+ METADATA_FIELDS_KEY: {
280
+ # overrides stds parent host + sample type
281
+ "physical_specimen_location": {
282
+ "allowed": ["FIELD"],
283
+ DEFAULT_KEY: "FIELD",
284
+ "type": "string"
285
+ }
286
+ }
287
+ },
288
+ "f": {
289
+ "base_type": "stool"
290
+ }
291
+ }
292
+ }
293
+ }
294
+ }
295
+
296
+ NESTED_STDS_W_STUDY_DICT = {
297
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
298
+ # Top host level (host_associated in this example) has
299
+ # *complete* definitions for all metadata fields it includes.
300
+ # Lower levels include only the elements of the definition that
301
+ # are different from the parent level (but if a field is NEW at
302
+ # a lower level, the lower level must include the complete
303
+ # definition for that field).
304
+ "host_associated": {
305
+ DEFAULT_KEY: "not provided",
306
+ METADATA_FIELDS_KEY: {
307
+ # not overridden
308
+ "country": {
309
+ "allowed": ["USA"],
310
+ DEFAULT_KEY: "USA",
311
+ "empty": False,
312
+ "is_phi": False,
313
+ "required": True,
314
+ "type": "string"
315
+ },
316
+ # overridden in stds same level host + sample type,
317
+ # again in stds lower host, and *again* in
318
+ # stds lower host + sample type
319
+ "description": {
320
+ "allowed": ["host associated"],
321
+ DEFAULT_KEY: "host associated",
322
+ "empty": False,
323
+ "is_phi": False,
324
+ "required": True,
325
+ "type": "string"
326
+ },
327
+ # overridden in stds lower host
328
+ "dna_extracted": {
329
+ "allowed": ["true", "false"],
330
+ DEFAULT_KEY: "true",
331
+ "empty": False,
332
+ "is_phi": False,
333
+ "required": True,
334
+ "type": "string"
335
+ },
336
+ # overridden in stds lower host + sample type
337
+ "elevation": {
338
+ "anyof": [
339
+ {
340
+ "allowed": [
341
+ "not collected",
342
+ "not provided",
343
+ "restricted access"],
344
+ "type": "string"
345
+ },
346
+ {
347
+ "min": -413.0,
348
+ "type": "number"
349
+ }],
350
+ "empty": False,
351
+ "is_phi": False,
352
+ "required": True
353
+ },
354
+ # not overridden (NB: comes from study)
355
+ "geo_loc_name": {
356
+ "allowed": ["USA:CA:San Diego"],
357
+ DEFAULT_KEY: "USA:CA:San Diego",
358
+ "empty": False,
359
+ "is_phi": False,
360
+ "required": True,
361
+ "type": "string"
362
+ },
363
+ # overridden in stds lower host
364
+ # (NB: comes from study)
365
+ "host_type": {
366
+ "allowed": ["human", "non-human"],
367
+ "empty": False,
368
+ "is_phi": False,
369
+ "required": True,
370
+ "type": "string"
371
+ }
372
+ },
373
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
374
+ "fe": {
375
+ "alias": "stool",
376
+ },
377
+ "stool": {
378
+ METADATA_FIELDS_KEY: {
379
+ # overrides stds host,
380
+ # overridden in stds lower host, and
381
+ # in stds lower host + sample type
382
+ "description": {
383
+ "allowed": ["host associated stool"],
384
+ DEFAULT_KEY: "host associated stool",
385
+ "type": "string"
386
+ },
387
+ # not overridden
388
+ # (NB: comes from study)
389
+ "physical_specimen_location": {
390
+ "allowed": ["UCSDST"],
391
+ DEFAULT_KEY: "UCSDST",
392
+ "empty": False,
393
+ "is_phi": False,
394
+ "required": True,
395
+ "type": "string"
396
+ },
397
+ # overridden in stds lower host + sample type
398
+ "physical_specimen_remaining": {
399
+ "allowed": ["true", "false"],
400
+ DEFAULT_KEY: "true",
401
+ "empty": False,
402
+ "is_phi": False,
403
+ "required": True,
404
+ "type": "string"
405
+ }
406
+ }
407
+ }
408
+ },
409
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
410
+ "human": {
411
+ DEFAULT_KEY: "not collected",
412
+ METADATA_FIELDS_KEY: {
413
+ # overrides stds parent host
414
+ "description": {
415
+ "allowed": ["human"],
416
+ DEFAULT_KEY: "human",
417
+ "type": "string"
418
+ },
419
+ # overrides stds parent host
420
+ # (NB: comes from study)
421
+ "dna_extracted": {
422
+ "allowed": ["true"],
423
+ DEFAULT_KEY: "true",
424
+ "type": "string"
425
+ },
426
+ # overrides stds parent host
427
+ "host_type": {
428
+ "allowed": ["human"],
429
+ DEFAULT_KEY: "human",
430
+ "type": "string"
431
+ }
432
+ },
433
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
434
+ "feces": {
435
+ "alias": "stool",
436
+ },
437
+ "stool": {
438
+ METADATA_FIELDS_KEY: {
439
+ # overrides stds parent host + sample type
440
+ "description": {
441
+ "allowed": ["human stool"],
442
+ DEFAULT_KEY: "human stool",
443
+ "type": "string"
444
+ },
445
+ # overrides stds parent host
446
+ "elevation": {
447
+ DEFAULT_KEY: 14,
448
+ "type": "number"
449
+ },
450
+ # overrides stds parent host + sample type
451
+ # (NB: comes from study)
452
+ "physical_specimen_remaining": {
453
+ "allowed": ["false"],
454
+ DEFAULT_KEY: "false",
455
+ "type": "string"
456
+ }
457
+ }
458
+ },
459
+ "dung": {
460
+ "base_type": "stool",
461
+ METADATA_FIELDS_KEY: {
462
+ # overrides stds parent host + sample type
463
+ "description": {
464
+ "allowed": ["human dung"],
465
+ DEFAULT_KEY: "human dung",
466
+ "type": "string"
467
+ },
468
+ # overrides stds parent host + sample type
469
+ "physical_specimen_location": {
470
+ "allowed": ["FIELD"],
471
+ DEFAULT_KEY: "FIELD",
472
+ "type": "string"
473
+ }
474
+ }
475
+ },
476
+ "f": {
477
+ "base_type": "stool"
478
+ }
479
+ },
480
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
481
+ "dude": {
482
+ METADATA_FIELDS_KEY: {
483
+ # overrides stds parent host
484
+ "host_type": {
485
+ "allowed": ["dude"],
486
+ DEFAULT_KEY: "dude",
487
+ "type": "string"
488
+ }
489
+ }
490
+ }
491
+ }
492
+ },
493
+ "control": {
494
+ METADATA_FIELDS_KEY: {
495
+ # overrides stds parent host
496
+ "description": {
497
+ "allowed": ["control"],
498
+ DEFAULT_KEY: "control",
499
+ "type": "string"
500
+ },
501
+ # overrides stds parent host
502
+ "host_type": {
503
+ "allowed": ["control"],
504
+ DEFAULT_KEY: "control",
505
+ "type": "string"
506
+ }
507
+ }
508
+ }
509
+ }
510
+ }
511
+ }
512
+ }
513
+
514
+ FLATTENED_STDS_W_STUDY_DICT = {
515
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
516
+ "host_associated": {
517
+ DEFAULT_KEY: "not provided",
518
+ METADATA_FIELDS_KEY: {
519
+ # from stds same level host
520
+ "country": {
521
+ "allowed": ["USA"],
522
+ DEFAULT_KEY: "USA",
523
+ "empty": False,
524
+ "is_phi": False,
525
+ "required": True,
526
+ "type": "string"
527
+ },
528
+ # from stds same level host
529
+ "description": {
530
+ "allowed": ["host associated"],
531
+ DEFAULT_KEY: "host associated",
532
+ "empty": False,
533
+ "is_phi": False,
534
+ "required": True,
535
+ "type": "string"
536
+ },
537
+ # from stds same level host
538
+ "dna_extracted": {
539
+ "allowed": ["true", "false"],
540
+ DEFAULT_KEY: "true",
541
+ "empty": False,
542
+ "is_phi": False,
543
+ "required": True,
544
+ "type": "string"
545
+ },
546
+ # from stds same level host
547
+ "elevation": {
548
+ "anyof": [
549
+ {
550
+ "allowed": [
551
+ "not collected",
552
+ "not provided",
553
+ "restricted access"],
554
+ "type": "string"
555
+ },
556
+ {
557
+ "min": -413.0,
558
+ "type": "number"
559
+ }],
560
+ "empty": False,
561
+ "is_phi": False,
562
+ "required": True
563
+ },
564
+ # from stds same level host
565
+ "geo_loc_name": {
566
+ "allowed": ["USA:CA:San Diego"],
567
+ DEFAULT_KEY: "USA:CA:San Diego",
568
+ "empty": False,
569
+ "is_phi": False,
570
+ "required": True,
571
+ "type": "string"
572
+ },
573
+ # overridden in stds lower host
574
+ "host_type": {
575
+ "allowed": ["human", "non-human"],
576
+ "empty": False,
577
+ "is_phi": False,
578
+ "required": True,
579
+ "type": "string"
580
+ }
581
+ },
582
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
583
+ "fe": {
584
+ "alias": "stool"
585
+ },
586
+ "stool": {
587
+ METADATA_FIELDS_KEY: {
588
+ # from stds same level host + sample type
589
+ "description": {
590
+ "allowed": ["host associated stool"],
591
+ DEFAULT_KEY: "host associated stool",
592
+ "type": "string"
593
+ },
594
+ # from stds same level host + sample type
595
+ # (NB: comes from study)
596
+ "physical_specimen_location": {
597
+ "allowed": ["UCSDST"],
598
+ DEFAULT_KEY: "UCSDST",
599
+ "empty": False,
600
+ "is_phi": False,
601
+ "required": True,
602
+ "type": "string"
603
+ },
604
+ # from stds same level host + sample type
605
+ # (NB: comes from study)
606
+ "physical_specimen_remaining": {
607
+ "allowed": ["true", "false"],
608
+ DEFAULT_KEY: "true",
609
+ "empty": False,
610
+ "is_phi": False,
611
+ "required": True,
612
+ "type": "string"
613
+ }
614
+ }
615
+ }
616
+ }
617
+ },
618
+ "control": {
619
+ DEFAULT_KEY: "not provided",
620
+ METADATA_FIELDS_KEY: {
621
+ # from stds same level host
622
+ "country": {
623
+ "allowed": ["USA"],
624
+ DEFAULT_KEY: "USA",
625
+ "empty": False,
626
+ "is_phi": False,
627
+ "required": True,
628
+ "type": "string"
629
+ },
630
+ # from stds same level host
631
+ "description": {
632
+ "allowed": ["control"],
633
+ DEFAULT_KEY: "control",
634
+ "empty": False,
635
+ "is_phi": False,
636
+ "required": True,
637
+ "type": "string"
638
+ },
639
+ # from stds same level host
640
+ "dna_extracted": {
641
+ "allowed": ["true", "false"],
642
+ DEFAULT_KEY: "true",
643
+ "empty": False,
644
+ "is_phi": False,
645
+ "required": True,
646
+ "type": "string"
647
+ },
648
+ # from stds same level host
649
+ "elevation": {
650
+ "anyof": [
651
+ {
652
+ "allowed": [
653
+ "not collected",
654
+ "not provided",
655
+ "restricted access"],
656
+ "type": "string"
657
+ },
658
+ {
659
+ "min": -413.0,
660
+ "type": "number"
661
+ }],
662
+ "empty": False,
663
+ "is_phi": False,
664
+ "required": True
665
+ },
666
+ # from stds same level host
667
+ "geo_loc_name": {
668
+ "allowed": ["USA:CA:San Diego"],
669
+ DEFAULT_KEY: "USA:CA:San Diego",
670
+ "empty": False,
671
+ "is_phi": False,
672
+ "required": True,
673
+ "type": "string"
674
+ },
675
+ # overridden in stds lower host
676
+ "host_type": {
677
+ "allowed": ["control"],
678
+ DEFAULT_KEY: "control",
679
+ "empty": False,
680
+ "is_phi": False,
681
+ "required": True,
682
+ "type": "string"
683
+ }
684
+ },
685
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
686
+ "fe": {
687
+ "alias": "stool"
688
+ },
689
+ "stool": {
690
+ METADATA_FIELDS_KEY: {
691
+ # from stds same level host + sample type
692
+ "description": {
693
+ "allowed": ["host associated stool"],
694
+ DEFAULT_KEY: "host associated stool",
695
+ "type": "string"
696
+ },
697
+ # from stds same level host + sample type
698
+ # (NB: comes from study)
699
+ "physical_specimen_location": {
700
+ "allowed": ["UCSDST"],
701
+ DEFAULT_KEY: "UCSDST",
702
+ "empty": False,
703
+ "is_phi": False,
704
+ "required": True,
705
+ "type": "string"
706
+ },
707
+ # from stds same level host + sample type
708
+ # (NB: comes from study)
709
+ "physical_specimen_remaining": {
710
+ "allowed": ["true", "false"],
711
+ DEFAULT_KEY: "true",
712
+ "empty": False,
713
+ "is_phi": False,
714
+ "required": True,
715
+ "type": "string"
716
+ }
717
+ }
718
+ }
719
+ }
720
+ },
721
+ "human": {
722
+ DEFAULT_KEY: "not collected",
723
+ METADATA_FIELDS_KEY: {
724
+ # from stds parent host
725
+ "country": {
726
+ "allowed": ["USA"],
727
+ DEFAULT_KEY: "USA",
728
+ "empty": False,
729
+ "is_phi": False,
730
+ "required": True,
731
+ "type": "string"
732
+ },
733
+ # from stds same level host
734
+ "description": {
735
+ "allowed": ["human"],
736
+ DEFAULT_KEY: "human",
737
+ "empty": False,
738
+ "is_phi": False,
739
+ "required": True,
740
+ "type": "string"
741
+ },
742
+ # from stds same level host
743
+ # (NB: comes from study)
744
+ "dna_extracted": {
745
+ "allowed": ["true"],
746
+ DEFAULT_KEY: "true",
747
+ "empty": False,
748
+ "is_phi": False,
749
+ "required": True,
750
+ "type": "string"
751
+ },
752
+ # from stds parent host
753
+ "elevation": {
754
+ "anyof": [
755
+ {
756
+ "allowed": [
757
+ "not collected",
758
+ "not provided",
759
+ "restricted access"],
760
+ "type": "string"
761
+ },
762
+ {
763
+ "min": -413.0,
764
+ "type": "number"
765
+ }],
766
+ "empty": False,
767
+ "is_phi": False,
768
+ "required": True
769
+ },
770
+ # from stds parent host
771
+ "geo_loc_name": {
772
+ "allowed": ["USA:CA:San Diego"],
773
+ DEFAULT_KEY: "USA:CA:San Diego",
774
+ "empty": False,
775
+ "is_phi": False,
776
+ "required": True,
777
+ "type": "string"
778
+ },
779
+ # from stds same level host
780
+ "host_type": {
781
+ "allowed": ["human"],
782
+ DEFAULT_KEY: "human",
783
+ "empty": False,
784
+ "is_phi": False,
785
+ "required": True,
786
+ "type": "string"
787
+ }
788
+ },
789
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
790
+ "dung": {
791
+ "base_type": "stool",
792
+ METADATA_FIELDS_KEY: {
793
+ # overrides stds parent host + sample type
794
+ "description": {
795
+ "allowed": ["human dung"],
796
+ DEFAULT_KEY: "human dung",
797
+ "type": "string"
798
+ },
799
+ # overrides stds parent host + sample type
800
+ "physical_specimen_location": {
801
+ "allowed": ["FIELD"],
802
+ DEFAULT_KEY: "FIELD",
803
+ "type": "string"
804
+ }
805
+ }
806
+ },
807
+ "f": {
808
+ "base_type": "stool"
809
+ },
810
+ "fe": {
811
+ "alias": "stool"
812
+ },
813
+ "feces": {
814
+ "alias": "stool"
815
+ },
816
+ "stool": {
817
+ METADATA_FIELDS_KEY: {
818
+ # from stds same level host + sample type
819
+ "description": {
820
+ "allowed": ["human stool"],
821
+ DEFAULT_KEY: "human stool",
822
+ "type": "string"
823
+ },
824
+ # from stds same level host + sample type
825
+ "elevation": {
826
+ DEFAULT_KEY: 14,
827
+ "type": "number"
828
+ },
829
+ # from stds parent level host + sample type
830
+ "physical_specimen_location": {
831
+ "allowed": ["UCSDST"],
832
+ DEFAULT_KEY: "UCSDST",
833
+ "empty": False,
834
+ "is_phi": False,
835
+ "required": True,
836
+ "type": "string"
837
+ },
838
+ # from stds same level host + sample type
839
+ "physical_specimen_remaining": {
840
+ "allowed": ["false"],
841
+ DEFAULT_KEY: "false",
842
+ "empty": False,
843
+ "is_phi": False,
844
+ "required": True,
845
+ "type": "string"
846
+ }
847
+ }
848
+ }
849
+ }
850
+ },
851
+ "dude": {
852
+ DEFAULT_KEY: "not collected",
853
+ METADATA_FIELDS_KEY: {
854
+ # from stds parent host
855
+ "country": {
856
+ "allowed": ["USA"],
857
+ DEFAULT_KEY: "USA",
858
+ "empty": False,
859
+ "is_phi": False,
860
+ "required": True,
861
+ "type": "string"
862
+ },
863
+ # from stds same level host
864
+ "description": {
865
+ "allowed": ["human"],
866
+ DEFAULT_KEY: "human",
867
+ "empty": False,
868
+ "is_phi": False,
869
+ "required": True,
870
+ "type": "string"
871
+ },
872
+ # from stds same level host
873
+ # (NB: comes from study)
874
+ "dna_extracted": {
875
+ "allowed": ["true"],
876
+ DEFAULT_KEY: "true",
877
+ "empty": False,
878
+ "is_phi": False,
879
+ "required": True,
880
+ "type": "string"
881
+ },
882
+ # from stds parent host
883
+ "elevation": {
884
+ "anyof": [
885
+ {
886
+ "allowed": [
887
+ "not collected",
888
+ "not provided",
889
+ "restricted access"],
890
+ "type": "string"
891
+ },
892
+ {
893
+ "min": -413.0,
894
+ "type": "number"
895
+ }],
896
+ "empty": False,
897
+ "is_phi": False,
898
+ "required": True
899
+ },
900
+ # from stds parent host
901
+ "geo_loc_name": {
902
+ "allowed": ["USA:CA:San Diego"],
903
+ DEFAULT_KEY: "USA:CA:San Diego",
904
+ "empty": False,
905
+ "is_phi": False,
906
+ "required": True,
907
+ "type": "string"
908
+ },
909
+ # from stds same level host
910
+ "host_type": {
911
+ "allowed": ["dude"],
912
+ DEFAULT_KEY: "dude",
913
+ "empty": False,
914
+ "is_phi": False,
915
+ "required": True,
916
+ "type": "string"
917
+ }
918
+ },
919
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
920
+ "dung": {
921
+ "base_type": "stool",
922
+ METADATA_FIELDS_KEY: {
923
+ # overrides stds parent host + sample type
924
+ "description": {
925
+ "allowed": ["human dung"],
926
+ DEFAULT_KEY: "human dung",
927
+ "type": "string"
928
+ },
929
+ # overrides stds parent host + sample type
930
+ "physical_specimen_location": {
931
+ "allowed": ["FIELD"],
932
+ DEFAULT_KEY: "FIELD",
933
+ "type": "string"
934
+ }
935
+ }
936
+ },
937
+ "f": {
938
+ "base_type": "stool"
939
+ },
940
+ "fe": {
941
+ "alias": "stool"
942
+ },
943
+ "feces": {
944
+ "alias": "stool"
945
+ },
946
+ "stool": {
947
+ METADATA_FIELDS_KEY: {
948
+ # from stds same level host + sample type
949
+ "description": {
950
+ "allowed": ["human stool"],
951
+ DEFAULT_KEY: "human stool",
952
+ "type": "string"
953
+ },
954
+ # from stds same level host + sample type
955
+ "elevation": {
956
+ DEFAULT_KEY: 14,
957
+ "type": "number"
958
+ },
959
+ # from stds parent level host + sample type
960
+ "physical_specimen_location": {
961
+ "allowed": ["UCSDST"],
962
+ DEFAULT_KEY: "UCSDST",
963
+ "empty": False,
964
+ "is_phi": False,
965
+ "required": True,
966
+ "type": "string"
967
+ },
968
+ # from stds same level host + sample type
969
+ "physical_specimen_remaining": {
970
+ "allowed": ["false"],
971
+ DEFAULT_KEY: "false",
972
+ "empty": False,
973
+ "is_phi": False,
974
+ "required": True,
975
+ "type": "string"
976
+ }
977
+ }
978
+ }
979
+ }
980
+ }
981
+ }
982
+ }
983
+
984
+ # Tests for combine_stds_and_study_config
985
+
986
+ TEST_DIR = path.dirname(__file__)
987
+
988
+ def test_combine_stds_and_study_config_empty_study(self):
989
+ """Test combining with an empty study config dict uses only standards."""
990
+ study_config = {}
991
+
992
+ result = combine_stds_and_study_config(
993
+ study_config,
994
+ path.join(self.TEST_DIR, "data/test_config.yml"))
995
+
996
+ expected = {
997
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
998
+ "base": {
999
+ METADATA_FIELDS_KEY: {
1000
+ "sample_name": {
1001
+ TYPE_KEY: "string",
1002
+ "unique": True
1003
+ },
1004
+ "sample_type": {
1005
+ "empty": False,
1006
+ "is_phi": False
1007
+ }
1008
+ }
1009
+ }
1010
+ }
1011
+ }
1012
+
1013
+ self.assertDictEqual(expected, result)
1014
+
1015
+ def test_combine_stds_and_study_config_with_study_specific_metadata(self):
1016
+ """Test combining when study config has STUDY_SPECIFIC_METADATA_KEY section."""
1017
+ study_config = {
1018
+ STUDY_SPECIFIC_METADATA_KEY: {
1019
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1020
+ "base": {
1021
+ METADATA_FIELDS_KEY: {
1022
+ "new_field": {
1023
+ TYPE_KEY: "string",
1024
+ DEFAULT_KEY: "study_value"
1025
+ }
1026
+ }
1027
+ }
1028
+ }
1029
+ }
1030
+ }
1031
+
1032
+ result = combine_stds_and_study_config(
1033
+ study_config,
1034
+ path.join(self.TEST_DIR, "data/test_config.yml"))
1035
+
1036
+ expected = {
1037
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1038
+ "base": {
1039
+ METADATA_FIELDS_KEY: {
1040
+ "sample_name": {
1041
+ TYPE_KEY: "string",
1042
+ "unique": True
1043
+ },
1044
+ "sample_type": {
1045
+ "empty": False,
1046
+ "is_phi": False
1047
+ },
1048
+ "new_field": {
1049
+ TYPE_KEY: "string",
1050
+ DEFAULT_KEY: "study_value"
1051
+ }
1052
+ }
1053
+ }
1054
+ }
1055
+ }
1056
+
1057
+ self.assertDictEqual(expected, result)
1058
+
1059
+ def test_combine_stds_and_study_config_study_overrides_standards(self):
1060
+ """Test that study config values override standards values."""
1061
+ study_config = {
1062
+ STUDY_SPECIFIC_METADATA_KEY: {
1063
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1064
+ "base": {
1065
+ METADATA_FIELDS_KEY: {
1066
+ "sample_type": {
1067
+ "empty": True
1068
+ }
1069
+ }
1070
+ }
1071
+ }
1072
+ }
1073
+ }
1074
+
1075
+ result = combine_stds_and_study_config(
1076
+ study_config,
1077
+ path.join(self.TEST_DIR, "data/test_config.yml"))
1078
+
1079
+ expected = {
1080
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1081
+ "base": {
1082
+ METADATA_FIELDS_KEY: {
1083
+ "sample_name": {
1084
+ TYPE_KEY: "string",
1085
+ "unique": True
1086
+ },
1087
+ "sample_type": {
1088
+ "empty": True,
1089
+ "is_phi": False
1090
+ }
1091
+ }
1092
+ }
1093
+ }
1094
+ }
1095
+
1096
+ self.assertDictEqual(expected, result)
1097
+
1098
+ def test__make_combined_stds_and_study_host_type_dicts(self):
1099
+ """Test making a combined standards and study host type dictionary."""
1100
+ out_nested_dict = _make_combined_stds_and_study_host_type_dicts(
1101
+ self.FLAT_STUDY_DICT, self.NESTED_STDS_DICT, )
1102
+
1103
+ self.maxDiff = None
1104
+ self.assertDictEqual(
1105
+ self.NESTED_STDS_W_STUDY_DICT[HOST_TYPE_SPECIFIC_METADATA_KEY],
1106
+ out_nested_dict)
1107
+
1108
+
1109
+ def test_flatten_nested_stds_dict(self):
1110
+ """Test flattening a nested standards dictionary."""
1111
+ out_flattened_dict = flatten_nested_stds_dict(
1112
+ self.NESTED_STDS_W_STUDY_DICT,
1113
+ None) # , None)
1114
+
1115
+ self.maxDiff = None
1116
+ self.assertDictEqual(
1117
+ self.FLATTENED_STDS_W_STUDY_DICT[HOST_TYPE_SPECIFIC_METADATA_KEY],
1118
+ out_flattened_dict)
1119
+
1120
+ def test_flatten_nested_stds_dict_empty_input(self):
1121
+ """Test flattening an empty dictionary returns empty dict."""
1122
+ input_dict = {}
1123
+
1124
+ result = flatten_nested_stds_dict(input_dict, None)
1125
+
1126
+ self.assertDictEqual({}, result)
1127
+
1128
+ def test_flatten_nested_stds_dict_empty_host_types(self):
1129
+ """Test flattening when HOST_TYPE_SPECIFIC_METADATA_KEY exists but is empty."""
1130
+ input_dict = {
1131
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {}
1132
+ }
1133
+
1134
+ result = flatten_nested_stds_dict(input_dict, None)
1135
+
1136
+ self.assertDictEqual({}, result)
1137
+
1138
+ def test_flatten_nested_stds_dict_single_level(self):
1139
+ """Test flattening a dictionary with only one host type level (no nesting)."""
1140
+ input_dict = {
1141
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1142
+ "host_a": {
1143
+ DEFAULT_KEY: "not provided",
1144
+ METADATA_FIELDS_KEY: {
1145
+ "field1": {
1146
+ TYPE_KEY: "string",
1147
+ DEFAULT_KEY: "value1"
1148
+ }
1149
+ },
1150
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1151
+ "sample1": {
1152
+ METADATA_FIELDS_KEY: {
1153
+ "sample_field": {TYPE_KEY: "string"}
1154
+ }
1155
+ }
1156
+ }
1157
+ # No HOST_TYPE_SPECIFIC_METADATA_KEY here (no nesting)
1158
+ },
1159
+ "host_b": {
1160
+ DEFAULT_KEY: "not collected",
1161
+ METADATA_FIELDS_KEY: {
1162
+ "field2": {
1163
+ TYPE_KEY: "integer"
1164
+ }
1165
+ }
1166
+ }
1167
+ }
1168
+ }
1169
+
1170
+ expected = input_dict[HOST_TYPE_SPECIFIC_METADATA_KEY]
1171
+
1172
+ result = flatten_nested_stds_dict(input_dict, None)
1173
+
1174
+ self.assertDictEqual(expected, result)
1175
+
1176
+ def test_flatten_nested_stds_dict_deeply_nested(self):
1177
+ """Test flattening with 4 levels of host type nesting.
1178
+
1179
+ Tests that metadata inheritance works correctly through multiple
1180
+ levels of nesting: level1 -> level2 -> level3 -> level4.
1181
+ """
1182
+ input_dict = {
1183
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1184
+ "host_level1": {
1185
+ DEFAULT_KEY: "level1_default",
1186
+ METADATA_FIELDS_KEY: {
1187
+ "field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"}
1188
+ },
1189
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1190
+ "host_level2": {
1191
+ METADATA_FIELDS_KEY: {
1192
+ "field_b": {TYPE_KEY: "string", DEFAULT_KEY: "b2"}
1193
+ },
1194
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1195
+ "host_level3": {
1196
+ DEFAULT_KEY: "level3_default",
1197
+ METADATA_FIELDS_KEY: {
1198
+ "field_c": {TYPE_KEY: "string", DEFAULT_KEY: "c3"}
1199
+ },
1200
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1201
+ "host_level4": {
1202
+ METADATA_FIELDS_KEY: {
1203
+ "field_d": {TYPE_KEY: "string", DEFAULT_KEY: "d4"}
1204
+ }
1205
+ }
1206
+ }
1207
+ }
1208
+ }
1209
+ }
1210
+ }
1211
+ }
1212
+ }
1213
+ }
1214
+
1215
+ expected = {
1216
+ "host_level1": {
1217
+ DEFAULT_KEY: "level1_default",
1218
+ METADATA_FIELDS_KEY: {
1219
+ "field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"}
1220
+ }
1221
+ },
1222
+ "host_level2": {
1223
+ DEFAULT_KEY: "level1_default",
1224
+ METADATA_FIELDS_KEY: {
1225
+ "field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"},
1226
+ "field_b": {TYPE_KEY: "string", DEFAULT_KEY: "b2"}
1227
+ }
1228
+ },
1229
+ "host_level3": {
1230
+ DEFAULT_KEY: "level3_default",
1231
+ METADATA_FIELDS_KEY: {
1232
+ "field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"},
1233
+ "field_b": {TYPE_KEY: "string", DEFAULT_KEY: "b2"},
1234
+ "field_c": {TYPE_KEY: "string", DEFAULT_KEY: "c3"}
1235
+ }
1236
+ },
1237
+ "host_level4": {
1238
+ DEFAULT_KEY: "level3_default",
1239
+ METADATA_FIELDS_KEY: {
1240
+ "field_a": {TYPE_KEY: "string", DEFAULT_KEY: "a1"},
1241
+ "field_b": {TYPE_KEY: "string", DEFAULT_KEY: "b2"},
1242
+ "field_c": {TYPE_KEY: "string", DEFAULT_KEY: "c3"},
1243
+ "field_d": {TYPE_KEY: "string", DEFAULT_KEY: "d4"}
1244
+ }
1245
+ }
1246
+ }
1247
+
1248
+ result = flatten_nested_stds_dict(input_dict, None)
1249
+
1250
+ self.assertDictEqual(expected, result)
1251
+
1252
+ def test_flatten_nested_stds_dict_preserves_sample_types(self):
1253
+ """Test that sample_type_specific_metadata is correctly inherited through nesting."""
1254
+ input_dict = {
1255
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1256
+ "parent_host": {
1257
+ DEFAULT_KEY: "not provided",
1258
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1259
+ "stool": {
1260
+ METADATA_FIELDS_KEY: {
1261
+ "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"}
1262
+ }
1263
+ },
1264
+ "saliva": {
1265
+ ALIAS_KEY: "oral"
1266
+ }
1267
+ },
1268
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
1269
+ "child_host": {
1270
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1271
+ "stool": {
1272
+ METADATA_FIELDS_KEY: {
1273
+ "child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"}
1274
+ }
1275
+ },
1276
+ "blood": {
1277
+ METADATA_FIELDS_KEY: {
1278
+ "blood_field": {TYPE_KEY: "string"}
1279
+ }
1280
+ }
1281
+ }
1282
+ }
1283
+ }
1284
+ }
1285
+ }
1286
+ }
1287
+
1288
+ expected = {
1289
+ "parent_host": {
1290
+ DEFAULT_KEY: "not provided",
1291
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1292
+ "stool": {
1293
+ METADATA_FIELDS_KEY: {
1294
+ "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"}
1295
+ }
1296
+ },
1297
+ "saliva": {
1298
+ ALIAS_KEY: "oral"
1299
+ }
1300
+ }
1301
+ },
1302
+ "child_host": {
1303
+ DEFAULT_KEY: "not provided",
1304
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1305
+ "stool": {
1306
+ METADATA_FIELDS_KEY: {
1307
+ "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
1308
+ "child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"}
1309
+ }
1310
+ },
1311
+ "saliva": {
1312
+ ALIAS_KEY: "oral"
1313
+ },
1314
+ "blood": {
1315
+ METADATA_FIELDS_KEY: {
1316
+ "blood_field": {TYPE_KEY: "string"}
1317
+ }
1318
+ }
1319
+ }
1320
+ }
1321
+ }
1322
+
1323
+ result = flatten_nested_stds_dict(input_dict, None)
1324
+
1325
+ self.assertDictEqual(expected, result)
1326
+
1327
+ # Tests for update_wip_metadata_dict
1328
+
1329
+ def test_update_wip_metadata_dict_new_field(self):
1330
+ """Test adding a completely new metadata field to wip dict."""
1331
+ wip = {}
1332
+ stds = {
1333
+ "field1": {
1334
+ TYPE_KEY: "string",
1335
+ ALLOWED_KEY: ["value1", "value2"]
1336
+ }
1337
+ }
1338
+
1339
+ result = update_wip_metadata_dict(wip, stds)
1340
+
1341
+ expected = stds
1342
+ self.assertDictEqual(expected, result)
1343
+
1344
+ def test_update_wip_metadata_dict_update_existing_field(self):
1345
+ """Test updating an existing field with additional properties."""
1346
+ wip = {
1347
+ "field1": {
1348
+ TYPE_KEY: "string"
1349
+ }
1350
+ }
1351
+ stds = {
1352
+ "field1": {
1353
+ DEFAULT_KEY: "default_value"
1354
+ }
1355
+ }
1356
+
1357
+ result = update_wip_metadata_dict(wip, stds)
1358
+
1359
+ expected = {
1360
+ "field1": {
1361
+ TYPE_KEY: "string",
1362
+ DEFAULT_KEY: "default_value"
1363
+ }
1364
+ }
1365
+ self.assertDictEqual(expected, result)
1366
+
1367
+ def test_update_wip_metadata_dict_allowed_replaces_anyof(self):
1368
+ """Test that adding 'allowed' key removes existing 'anyof' key."""
1369
+ wip = {
1370
+ "field1": {
1371
+ ANYOF_KEY: [
1372
+ {TYPE_KEY: "string"},
1373
+ {TYPE_KEY: "number"}
1374
+ ],
1375
+ "required": True
1376
+ }
1377
+ }
1378
+ stds = {
1379
+ "field1": {
1380
+ ALLOWED_KEY: ["value1", "value2"]
1381
+ }
1382
+ }
1383
+
1384
+ result = update_wip_metadata_dict(wip, stds)
1385
+
1386
+ # anyof should be removed, allowed should be added, required preserved
1387
+ self.assertNotIn(ANYOF_KEY, result["field1"])
1388
+ self.assertIn(ALLOWED_KEY, result["field1"])
1389
+ self.assertEqual(["value1", "value2"], result["field1"][ALLOWED_KEY])
1390
+ self.assertTrue(result["field1"]["required"])
1391
+
1392
+ def test_update_wip_metadata_dict_anyof_replaces_allowed_and_type(self):
1393
+ """Test that adding 'anyof' key removes existing 'allowed' and 'type' keys."""
1394
+ wip = {
1395
+ "field1": {
1396
+ ALLOWED_KEY: ["old_value"],
1397
+ TYPE_KEY: "string",
1398
+ "required": True
1399
+ }
1400
+ }
1401
+ stds = {
1402
+ "field1": {
1403
+ ANYOF_KEY: [
1404
+ {TYPE_KEY: "string", ALLOWED_KEY: ["a", "b"]},
1405
+ {TYPE_KEY: "number", "min": 0}
1406
+ ]
1407
+ }
1408
+ }
1409
+
1410
+ result = update_wip_metadata_dict(wip, stds)
1411
+
1412
+ # allowed and type should be removed, anyof should be added, required preserved
1413
+ self.assertNotIn(ALLOWED_KEY, result["field1"])
1414
+ self.assertNotIn(TYPE_KEY, result["field1"])
1415
+ self.assertIn(ANYOF_KEY, result["field1"])
1416
+ self.assertTrue(result["field1"]["required"])
1417
+
1418
+ def test_update_wip_metadata_dict_preserves_unrelated_keys(self):
1419
+ """Test that keys not in stds dict are preserved in wip dict."""
1420
+ wip = {
1421
+ "field1": {
1422
+ "required": True,
1423
+ "is_phi": False,
1424
+ "empty": False
1425
+ }
1426
+ }
1427
+ stds = {
1428
+ "field1": {
1429
+ DEFAULT_KEY: "new_default"
1430
+ }
1431
+ }
1432
+
1433
+ result = update_wip_metadata_dict(wip, stds)
1434
+
1435
+ expected = {
1436
+ "field1": {
1437
+ "required": True,
1438
+ "is_phi": False,
1439
+ "empty": False,
1440
+ DEFAULT_KEY: "new_default"
1441
+ }
1442
+ }
1443
+ self.assertDictEqual(expected, result)
1444
+
1445
+ def test_update_wip_metadata_dict_multiple_fields(self):
1446
+ """Test updating multiple fields at once."""
1447
+ wip = {
1448
+ "field1": {TYPE_KEY: "string"},
1449
+ "field2": {TYPE_KEY: "integer"}
1450
+ }
1451
+ stds = {
1452
+ "field1": {DEFAULT_KEY: "default1"},
1453
+ "field2": {DEFAULT_KEY: 42},
1454
+ "field3": {TYPE_KEY: "boolean", DEFAULT_KEY: True}
1455
+ }
1456
+
1457
+ result = update_wip_metadata_dict(wip, stds)
1458
+
1459
+ expected = {
1460
+ "field1": {TYPE_KEY: "string", DEFAULT_KEY: "default1"},
1461
+ "field2": {TYPE_KEY: "integer", DEFAULT_KEY: 42},
1462
+ "field3": {TYPE_KEY: "boolean", DEFAULT_KEY: True}
1463
+ }
1464
+ self.assertDictEqual(expected, result)
1465
+
1466
+ def test_update_wip_metadata_dict_returns_same_object(self):
1467
+ """Test that the function returns the same dict object it modifies (not a copy).
1468
+
1469
+ This verifies the documented in-place modification behavior, which is
1470
+ relied upon by other parts of the codebase.
1471
+ """
1472
+ wip = {"field1": {TYPE_KEY: "string"}}
1473
+ stds = {"field1": {DEFAULT_KEY: "x"}}
1474
+
1475
+ result = update_wip_metadata_dict(wip, stds)
1476
+
1477
+ # result should be the exact same object as wip, not a copy
1478
+ self.assertIs(result, wip)
1479
+ # and wip should have been modified in place
1480
+ self.assertIn(DEFAULT_KEY, wip["field1"])
1481
+
1482
+ def test__combine_base_and_added_metadata_fields(self):
1483
+ """Test combining base and additional metadata fields."""
1484
+ base_dict = {
1485
+ METADATA_FIELDS_KEY: {
1486
+ # in both, add wins
1487
+ "field1": {
1488
+ "allowed": ["value1"],
1489
+ "type": "string"
1490
+ },
1491
+ # in base only
1492
+ "fieldX": {
1493
+ "type": "string",
1494
+ "allowed": ["valueX"]
1495
+ }
1496
+ }
1497
+ }
1498
+
1499
+ add_dict = {
1500
+ # in both, add wins
1501
+ METADATA_FIELDS_KEY: {
1502
+ "field1": {
1503
+ "allowed": ["value2"],
1504
+ "type": "string"
1505
+ },
1506
+ # in add only
1507
+ "field2": {
1508
+ "type": "string"
1509
+ }
1510
+ }
1511
+ }
1512
+
1513
+ expected = {
1514
+ "field1": {
1515
+ "allowed": ["value2"],
1516
+ "type": "string"
1517
+ },
1518
+ "field2": {
1519
+ "type": "string"
1520
+ },
1521
+ "fieldX": {
1522
+ "type": "string",
1523
+ "allowed": ["valueX"]
1524
+ }
1525
+ }
1526
+
1527
+ result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
1528
+ self.assertDictEqual(expected, result)
1529
+
1530
+ def test__combine_base_and_added_metadata_fields_empty_base(self):
1531
+ """Test combining when base_dict has no metadata_fields key."""
1532
+ base_dict = {}
1533
+
1534
+ add_dict = {
1535
+ METADATA_FIELDS_KEY: {
1536
+ "field1": {TYPE_KEY: "string", DEFAULT_KEY: "value1"}
1537
+ }
1538
+ }
1539
+
1540
+ expected = add_dict[METADATA_FIELDS_KEY]
1541
+
1542
+ result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
1543
+ self.assertDictEqual(expected, result)
1544
+
1545
+ def test__combine_base_and_added_metadata_fields_empty_add(self):
1546
+ """Test combining when add_dict has no metadata_fields key."""
1547
+ base_dict = {
1548
+ METADATA_FIELDS_KEY: {
1549
+ "field1": {TYPE_KEY: "string", DEFAULT_KEY: "value1"}
1550
+ }
1551
+ }
1552
+
1553
+ add_dict = {}
1554
+
1555
+ expected = base_dict[METADATA_FIELDS_KEY]
1556
+
1557
+ result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
1558
+ self.assertDictEqual(expected, result)
1559
+
1560
+ def test__combine_base_and_added_metadata_fields_both_empty(self):
1561
+ """Test combining when both dicts have no metadata_fields key."""
1562
+ base_dict = {}
1563
+ add_dict = {}
1564
+
1565
+ expected = {}
1566
+
1567
+ result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
1568
+ self.assertDictEqual(expected, result)
1569
+
1570
+ # Tests for _combine_base_and_added_host_type
1571
+
1572
+ def test__combine_base_and_added_host_type_default_key_override(self):
1573
+ """Test that DEFAULT_KEY from add_dict overwrites DEFAULT_KEY from base_dict."""
1574
+ base_dict = {
1575
+ DEFAULT_KEY: "not provided"
1576
+ }
1577
+ add_dict = {
1578
+ DEFAULT_KEY: "not collected"
1579
+ }
1580
+
1581
+ result = _combine_base_and_added_host_type(base_dict, add_dict)
1582
+
1583
+ self.assertEqual("not collected", result[DEFAULT_KEY])
1584
+
1585
+ def test__combine_base_and_added_host_type_default_key_preserved(self):
1586
+ """Test that DEFAULT_KEY from base_dict is preserved when add_dict has none."""
1587
+ base_dict = {
1588
+ DEFAULT_KEY: "not provided"
1589
+ }
1590
+ add_dict = {}
1591
+
1592
+ result = _combine_base_and_added_host_type(base_dict, add_dict)
1593
+
1594
+ self.assertEqual("not provided", result[DEFAULT_KEY])
1595
+
1596
+ def test__combine_base_and_added_host_type_default_key_added(self):
1597
+ """Test that DEFAULT_KEY from add_dict is added when base_dict has none."""
1598
+ base_dict = {}
1599
+ add_dict = {
1600
+ DEFAULT_KEY: "not collected"
1601
+ }
1602
+
1603
+ result = _combine_base_and_added_host_type(base_dict, add_dict)
1604
+
1605
+ self.assertEqual("not collected", result[DEFAULT_KEY])
1606
+
1607
+ def test__combine_base_and_added_host_type_empty_base(self):
1608
+ """Test combining when base_dict is empty."""
1609
+ base_dict = {}
1610
+ add_dict = {
1611
+ DEFAULT_KEY: "not collected",
1612
+ METADATA_FIELDS_KEY: {
1613
+ "field1": {TYPE_KEY: "string"}
1614
+ },
1615
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1616
+ "stool": {
1617
+ METADATA_FIELDS_KEY: {
1618
+ "description": {TYPE_KEY: "string"}
1619
+ }
1620
+ }
1621
+ }
1622
+ }
1623
+
1624
+ result = _combine_base_and_added_host_type(base_dict, add_dict)
1625
+
1626
+ self.assertDictEqual(add_dict, result)
1627
+
1628
+ def test__combine_base_and_added_host_type_empty_add(self):
1629
+ """Test combining when add_dict is empty (result should match base)."""
1630
+ base_dict = {
1631
+ DEFAULT_KEY: "not provided",
1632
+ METADATA_FIELDS_KEY: {
1633
+ "field1": {TYPE_KEY: "string", DEFAULT_KEY: "value1"}
1634
+ },
1635
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1636
+ "stool": {
1637
+ METADATA_FIELDS_KEY: {
1638
+ "description": {TYPE_KEY: "string"}
1639
+ }
1640
+ }
1641
+ }
1642
+ }
1643
+ add_dict = {}
1644
+
1645
+ result = _combine_base_and_added_host_type(base_dict, add_dict)
1646
+
1647
+ self.assertDictEqual(base_dict, result)
1648
+
1649
+ def test__combine_base_and_added_host_type_both_empty(self):
1650
+ """Test combining when both base_dict and add_dict are empty."""
1651
+ base_dict = {}
1652
+ add_dict = {}
1653
+
1654
+ result = _combine_base_and_added_host_type(base_dict, add_dict)
1655
+
1656
+ self.assertDictEqual({}, result)
1657
+
1658
+ def test__combine_base_and_added_host_type_full_combination(self):
1659
+ """Test full combination with all components: DEFAULT_KEY, metadata_fields, and sample_types."""
1660
+ base_dict = {
1661
+ DEFAULT_KEY: "not provided",
1662
+ METADATA_FIELDS_KEY: {
1663
+ "country": {
1664
+ TYPE_KEY: "string",
1665
+ ALLOWED_KEY: ["USA"],
1666
+ DEFAULT_KEY: "USA"
1667
+ },
1668
+ "description": {
1669
+ TYPE_KEY: "string",
1670
+ DEFAULT_KEY: "base description"
1671
+ }
1672
+ },
1673
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1674
+ "stool": {
1675
+ METADATA_FIELDS_KEY: {
1676
+ "location": {TYPE_KEY: "string", DEFAULT_KEY: "UCSD"}
1677
+ }
1678
+ },
1679
+ "saliva": {
1680
+ ALIAS_KEY: "oral"
1681
+ }
1682
+ }
1683
+ }
1684
+ add_dict = {
1685
+ DEFAULT_KEY: "not collected",
1686
+ METADATA_FIELDS_KEY: {
1687
+ # Override existing field
1688
+ "description": {
1689
+ DEFAULT_KEY: "add description"
1690
+ },
1691
+ # Add new field
1692
+ "new_field": {
1693
+ TYPE_KEY: "integer"
1694
+ }
1695
+ },
1696
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1697
+ # Override existing sample type
1698
+ "stool": {
1699
+ METADATA_FIELDS_KEY: {
1700
+ "location": {DEFAULT_KEY: "UCLA"}
1701
+ }
1702
+ },
1703
+ # Add new sample type
1704
+ "blood": {
1705
+ METADATA_FIELDS_KEY: {
1706
+ "volume": {TYPE_KEY: "number"}
1707
+ }
1708
+ }
1709
+ }
1710
+ }
1711
+
1712
+ expected = {
1713
+ # DEFAULT_KEY overwritten by add
1714
+ DEFAULT_KEY: "not collected",
1715
+ METADATA_FIELDS_KEY: {
1716
+ # Preserved from base
1717
+ "country": {
1718
+ TYPE_KEY: "string",
1719
+ ALLOWED_KEY: ["USA"],
1720
+ DEFAULT_KEY: "USA"
1721
+ },
1722
+ # Combined: base type preserved, add default overwrites
1723
+ "description": {
1724
+ TYPE_KEY: "string",
1725
+ DEFAULT_KEY: "add description"
1726
+ },
1727
+ # New from add
1728
+ "new_field": {
1729
+ TYPE_KEY: "integer"
1730
+ }
1731
+ },
1732
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1733
+ # Combined: base type preserved, add default overwrites
1734
+ "stool": {
1735
+ METADATA_FIELDS_KEY: {
1736
+ "location": {TYPE_KEY: "string", DEFAULT_KEY: "UCLA"}
1737
+ }
1738
+ },
1739
+ # Preserved from base
1740
+ "saliva": {
1741
+ ALIAS_KEY: "oral"
1742
+ },
1743
+ # New from add
1744
+ "blood": {
1745
+ METADATA_FIELDS_KEY: {
1746
+ "volume": {TYPE_KEY: "number"}
1747
+ }
1748
+ }
1749
+ }
1750
+ }
1751
+
1752
+ result = _combine_base_and_added_host_type(base_dict, add_dict)
1753
+
1754
+ self.assertDictEqual(expected, result)
1755
+
1756
+ def test__combine_base_and_added_host_type_empty_metadata_fields_result(self):
1757
+ """Test that METADATA_FIELDS_KEY is not included when result would be empty."""
1758
+ base_dict = {
1759
+ DEFAULT_KEY: "not provided"
1760
+ # No METADATA_FIELDS_KEY
1761
+ }
1762
+ add_dict = {
1763
+ # No METADATA_FIELDS_KEY
1764
+ }
1765
+
1766
+ result = _combine_base_and_added_host_type(base_dict, add_dict)
1767
+
1768
+ self.assertEqual("not provided", result[DEFAULT_KEY])
1769
+ self.assertNotIn(METADATA_FIELDS_KEY, result)
1770
+
1771
+ def test__combine_base_and_added_host_type_empty_sample_types_result(self):
1772
+ """Test that SAMPLE_TYPE_SPECIFIC_METADATA_KEY is not included when result would be empty."""
1773
+ base_dict = {
1774
+ DEFAULT_KEY: "not provided",
1775
+ METADATA_FIELDS_KEY: {
1776
+ "field1": {TYPE_KEY: "string"}
1777
+ }
1778
+ # No SAMPLE_TYPE_SPECIFIC_METADATA_KEY
1779
+ }
1780
+ add_dict = {
1781
+ # No SAMPLE_TYPE_SPECIFIC_METADATA_KEY
1782
+ }
1783
+
1784
+ result = _combine_base_and_added_host_type(base_dict, add_dict)
1785
+
1786
+ self.assertEqual("not provided", result[DEFAULT_KEY])
1787
+ self.assertIn(METADATA_FIELDS_KEY, result)
1788
+ self.assertNotIn(SAMPLE_TYPE_SPECIFIC_METADATA_KEY, result)
1789
+
1790
+ def test__combine_base_and_added_sample_type_specific_metadata(self):
1791
+ """Test combining base and additional sample type specific metadata."""
1792
+ base_dict = {
1793
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1794
+ # defined in stds w metadata fields but in add as an alias
1795
+ "sample_type1": {
1796
+ METADATA_FIELDS_KEY: {
1797
+ "confuse": {
1798
+ "allowed": ["value1"],
1799
+ "type": "string"
1800
+ },
1801
+ }
1802
+ },
1803
+ # defined in both w metadata fields, must combine, add wins
1804
+ "sample_type2": {
1805
+ METADATA_FIELDS_KEY: {
1806
+ "field1": {
1807
+ "type": "string"
1808
+ },
1809
+ "fieldX": {
1810
+ "type": "string",
1811
+ "allowed": ["valueX"]
1812
+ }
1813
+ }
1814
+ },
1815
+ # defined only in base
1816
+ "sample_type4": {
1817
+ METADATA_FIELDS_KEY: {
1818
+ "field1": {
1819
+ "type": "string"
1820
+ }
1821
+ }
1822
+ }
1823
+ }
1824
+ }
1825
+
1826
+ add_dict = {
1827
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1828
+ # defined here as an alias, defined in stds w metadata fields
1829
+ "sample_type1": {
1830
+ "alias": "sample_type2"
1831
+ },
1832
+ # defined in both w metadata fields, must combine, add wins
1833
+ "sample_type2": {
1834
+ METADATA_FIELDS_KEY: {
1835
+ "field1": {
1836
+ "allowed": ["value1"],
1837
+ "type": "string"
1838
+ },
1839
+ "field2": {
1840
+ "type": "string"
1841
+ }
1842
+ }
1843
+ },
1844
+ # defined only in add
1845
+ "sample_type3": {
1846
+ "base_type": "sample_type2"
1847
+ }
1848
+ }
1849
+ }
1850
+
1851
+ expected = {
1852
+ "sample_type1": {
1853
+ "alias": "sample_type2"
1854
+ },
1855
+ "sample_type2": {
1856
+ METADATA_FIELDS_KEY: {
1857
+ "field1": {
1858
+ "allowed": ["value1"],
1859
+ "type": "string"
1860
+ },
1861
+ "field2": {
1862
+ "type": "string"
1863
+ },
1864
+ "fieldX": {
1865
+ "type": "string",
1866
+ "allowed": ["valueX"]
1867
+ }
1868
+ }
1869
+ },
1870
+ "sample_type3": {
1871
+ "base_type": "sample_type2"
1872
+ },
1873
+ "sample_type4": {
1874
+ METADATA_FIELDS_KEY: {
1875
+ "field1": {
1876
+ "type": "string"
1877
+ }
1878
+ }
1879
+ }
1880
+ }
1881
+
1882
+ result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
1883
+ self.assertDictEqual(expected, result)
1884
+
1885
+ def test__combine_base_and_added_sample_type_specific_metadata_empty_base(self):
1886
+ """Test combining when base has no sample_type_specific_metadata."""
1887
+ base_dict = {}
1888
+
1889
+ add_dict = {
1890
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1891
+ "stool": {
1892
+ METADATA_FIELDS_KEY: {
1893
+ "field1": {"type": "string"}
1894
+ }
1895
+ }
1896
+ }
1897
+ }
1898
+
1899
+ expected = {
1900
+ "stool": {
1901
+ METADATA_FIELDS_KEY: {
1902
+ "field1": {"type": "string"}
1903
+ }
1904
+ }
1905
+ }
1906
+
1907
+ result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
1908
+ self.assertDictEqual(expected, result)
1909
+
1910
+ def test__combine_base_and_added_sample_type_specific_metadata_empty_add(self):
1911
+ """Test combining when add has no sample_type_specific_metadata."""
1912
+ base_dict = {
1913
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1914
+ "stool": {
1915
+ METADATA_FIELDS_KEY: {
1916
+ "field1": {"type": "string"}
1917
+ }
1918
+ }
1919
+ }
1920
+ }
1921
+
1922
+ add_dict = {}
1923
+
1924
+ expected = {
1925
+ "stool": {
1926
+ METADATA_FIELDS_KEY: {
1927
+ "field1": {"type": "string"}
1928
+ }
1929
+ }
1930
+ }
1931
+
1932
+ result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
1933
+ self.assertDictEqual(expected, result)
1934
+
1935
+ def test__combine_base_and_added_sample_type_specific_metadata_base_type_with_metadata(self):
1936
+ """Test sample type with both base_type AND metadata_fields.
1937
+
1938
+ This is a valid configuration where base_type indicates inheritance and
1939
+ metadata_fields contains overrides. If both base_dict and add_dict have
1940
+ base_type for the same sample type, add_dict's base_type overwrites base_dict's.
1941
+ The metadata_fields are combined as usual (add wins for overlapping fields).
1942
+ """
1943
+ base_dict = {
1944
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1945
+ "stool": {
1946
+ BASE_TYPE_KEY: "original_base",
1947
+ METADATA_FIELDS_KEY: {
1948
+ "description": {
1949
+ "allowed": ["stool sample"],
1950
+ "type": "string"
1951
+ },
1952
+ "location": {
1953
+ "allowed": ["UCSD"],
1954
+ "type": "string"
1955
+ }
1956
+ }
1957
+ }
1958
+ }
1959
+ }
1960
+
1961
+ add_dict = {
1962
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1963
+ "stool": {
1964
+ BASE_TYPE_KEY: "new_base",
1965
+ METADATA_FIELDS_KEY: {
1966
+ "description": {
1967
+ "allowed": ["human stool"],
1968
+ "type": "string"
1969
+ }
1970
+ }
1971
+ }
1972
+ }
1973
+ }
1974
+
1975
+ expected = {
1976
+ "stool": {
1977
+ # base_type from add_dict overwrites base_type from base_dict
1978
+ BASE_TYPE_KEY: "new_base",
1979
+ METADATA_FIELDS_KEY: {
1980
+ # description from add_dict overwrites base_dict
1981
+ "description": {
1982
+ "allowed": ["human stool"],
1983
+ "type": "string"
1984
+ },
1985
+ # location preserved from base_dict (not in add_dict)
1986
+ "location": {
1987
+ "allowed": ["UCSD"],
1988
+ "type": "string"
1989
+ }
1990
+ }
1991
+ }
1992
+ }
1993
+
1994
+ result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
1995
+ self.assertDictEqual(expected, result)
1996
+
1997
+ def test__combine_base_and_added_sample_type_specific_metadata_mismatched_types_add_wins(self):
1998
+ """Test that when definition types differ between base and add, add always wins.
1999
+
2000
+ When the sample type definition type (alias, base_type, or metadata_fields)
2001
+ differs between base_dict and add_dict, the add_dict entry completely
2002
+ replaces the base_dict entry rather than attempting to combine them.
2003
+
2004
+ This test covers all possible type mismatch scenarios:
2005
+ - base has alias, add has metadata_fields
2006
+ - base has alias, add has base_type
2007
+ - base has metadata_fields, add has alias
2008
+ - base has metadata_fields, add has base_type
2009
+ - base has base_type, add has alias
2010
+ - base has base_type, add has metadata_fields
2011
+ """
2012
+ base_dict = {
2013
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2014
+ # alias -> metadata_fields
2015
+ "sample_alias_to_metadata": {
2016
+ ALIAS_KEY: "stool"
2017
+ },
2018
+ # alias -> base_type
2019
+ "sample_alias_to_base": {
2020
+ ALIAS_KEY: "stool"
2021
+ },
2022
+ # metadata_fields -> alias
2023
+ "sample_metadata_to_alias": {
2024
+ METADATA_FIELDS_KEY: {
2025
+ "field1": {"type": "string"}
2026
+ }
2027
+ },
2028
+ # metadata_fields -> base_type
2029
+ "sample_metadata_to_base": {
2030
+ METADATA_FIELDS_KEY: {
2031
+ "field1": {"type": "string"}
2032
+ }
2033
+ },
2034
+ # base_type -> alias
2035
+ "sample_base_to_alias": {
2036
+ BASE_TYPE_KEY: "stool"
2037
+ },
2038
+ # base_type -> metadata_fields
2039
+ "sample_base_to_metadata": {
2040
+ BASE_TYPE_KEY: "stool"
2041
+ }
2042
+ }
2043
+ }
2044
+
2045
+ add_dict = {
2046
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2047
+ "sample_alias_to_metadata": {
2048
+ METADATA_FIELDS_KEY: {
2049
+ "new_field": {"type": "integer"}
2050
+ }
2051
+ },
2052
+ "sample_alias_to_base": {
2053
+ BASE_TYPE_KEY: "saliva"
2054
+ },
2055
+ "sample_metadata_to_alias": {
2056
+ ALIAS_KEY: "saliva"
2057
+ },
2058
+ "sample_metadata_to_base": {
2059
+ BASE_TYPE_KEY: "saliva"
2060
+ },
2061
+ "sample_base_to_alias": {
2062
+ ALIAS_KEY: "saliva"
2063
+ },
2064
+ "sample_base_to_metadata": {
2065
+ METADATA_FIELDS_KEY: {
2066
+ "new_field": {"type": "integer"}
2067
+ }
2068
+ }
2069
+ }
2070
+ }
2071
+
2072
+ # All entries should match add_dict exactly; base_dict is replaced
2073
+ expected = add_dict[SAMPLE_TYPE_SPECIFIC_METADATA_KEY]
2074
+
2075
+ result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
2076
+ self.assertDictEqual(expected, result)
2077
+
2078
+ def test__id_sample_type_definition_alias(self):
2079
+ """Test identifying sample type definition as alias type."""
2080
+ sample_dict = {
2081
+ ALIAS_KEY: "other_sample"
2082
+ }
2083
+ result = _id_sample_type_definition("test_sample", sample_dict)
2084
+ self.assertEqual(ALIAS_KEY, result)
2085
+
2086
+ def test__id_sample_type_definition_metadata(self):
2087
+ """Test identifying sample type definition as metadata type."""
2088
+ sample_dict = {
2089
+ METADATA_FIELDS_KEY: {
2090
+ "field1": {
2091
+ "type": "string"
2092
+ }
2093
+ }
2094
+ }
2095
+ result = _id_sample_type_definition("test_sample", sample_dict)
2096
+ self.assertEqual(METADATA_FIELDS_KEY, result)
2097
+
2098
+ def test__id_sample_type_definition_base_with_metadata(self):
2099
+ """Test sample type with both base_type AND metadata_fields returns metadata_fields.
2100
+
2101
+ This is a valid configuration: base_type indicates inheritance from another
2102
+ sample type, while metadata_fields contains overrides specific to this sample type.
2103
+ The function should return METADATA_FIELDS_KEY since metadata takes precedence.
2104
+ """
2105
+ sample_dict = {
2106
+ BASE_TYPE_KEY: "stool",
2107
+ METADATA_FIELDS_KEY: {
2108
+ "description": {
2109
+ "allowed": ["human dung"],
2110
+ "type": "string"
2111
+ }
2112
+ }
2113
+ }
2114
+ result = _id_sample_type_definition("dung", sample_dict)
2115
+ self.assertEqual(METADATA_FIELDS_KEY, result)
2116
+
2117
+ def test__id_sample_type_definition_base(self):
2118
+ """Test identifying sample type definition as base type."""
2119
+ sample_dict = {
2120
+ BASE_TYPE_KEY: "other_sample"
2121
+ }
2122
+ result = _id_sample_type_definition("test_sample", sample_dict)
2123
+ self.assertEqual(BASE_TYPE_KEY, result)
2124
+
2125
+ def test__id_sample_type_definition_err_alias_metadata(self):
2126
+ """Test that sample type with both alias and metadata fields raises ValueError."""
2127
+ sample_dict = {
2128
+ ALIAS_KEY: "other_sample",
2129
+ METADATA_FIELDS_KEY: {
2130
+ "field1": {
2131
+ "type": "string"
2132
+ }
2133
+ }
2134
+ }
2135
+ with self.assertRaisesRegex(ValueError, "Sample type 'test_sample' has both 'alias' and 'metadata_fields' keys"):
2136
+ _id_sample_type_definition("test_sample", sample_dict)
2137
+
2138
+ def test__id_sample_type_definition_err_alias_base(self):
2139
+ """Test that sample type with both alias and base type raises ValueError."""
2140
+ sample_dict = {
2141
+ ALIAS_KEY: "other_sample",
2142
+ BASE_TYPE_KEY: "other_sample"
2143
+ }
2144
+ with self.assertRaisesRegex(ValueError, "Sample type 'test_sample' has both 'alias' and 'base_type' keys"):
2145
+ _id_sample_type_definition("test_sample", sample_dict)
2146
+
2147
+ def test__id_sample_type_definition_err_no_keys(self):
2148
+ """Test that sample type with neither alias nor metadata fields raises ValueError."""
2149
+ sample_dict = {}
2150
+ with self.assertRaisesRegex(ValueError, "Sample type 'test_sample' has neither 'alias' nor 'metadata_fields' keys"):
2151
+ _id_sample_type_definition("test_sample", sample_dict)
2152
+
2153
+ # Tests for build_full_flat_config_dict
2154
+
2155
+ def test_build_full_flat_config_dict_no_inputs(self):
2156
+ """Test build_full_flat_config_dict with no arguments uses all defaults."""
2157
+ result = build_full_flat_config_dict()
2158
+
2159
+ # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2160
+ self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2161
+ hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2162
+ self.assertIsInstance(hosts_dict, dict)
2163
+
2164
+ # Should have "base" host type with sample_name metadata field
2165
+ self.assertIn("base", hosts_dict)
2166
+ base_host = hosts_dict["base"]
2167
+ self.assertIn(METADATA_FIELDS_KEY, base_host)
2168
+ self.assertIn("sample_name", base_host[METADATA_FIELDS_KEY])
2169
+
2170
+ # Should have "human" host type with host_common_name defaulting to "human"
2171
+ self.assertIn("human", hosts_dict)
2172
+ human_host = hosts_dict["human"]
2173
+ self.assertIn(METADATA_FIELDS_KEY, human_host)
2174
+ self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2175
+ self.assertEqual(
2176
+ "human",
2177
+ human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
2178
+
2179
+ # Should have default software config keys with expected default value
2180
+ self.assertIn(DEFAULT_KEY, result)
2181
+ self.assertEqual("not applicable", result[DEFAULT_KEY])
2182
+
2183
+ def test_build_full_flat_config_dict_with_study_config(self):
2184
+ """Test build_full_flat_config_dict with study config merges correctly."""
2185
+ software_config = {
2186
+ DEFAULT_KEY: "software_default",
2187
+ LEAVE_REQUIREDS_BLANK_KEY: True,
2188
+ OVERWRITE_NON_NANS_KEY: False
2189
+ }
2190
+ study_config = {
2191
+ STUDY_SPECIFIC_METADATA_KEY: {
2192
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
2193
+ "human": {
2194
+ METADATA_FIELDS_KEY: {
2195
+ "custom_field": {
2196
+ DEFAULT_KEY: "custom_value",
2197
+ TYPE_KEY: "string"
2198
+ }
2199
+ },
2200
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2201
+ "stool": {
2202
+ METADATA_FIELDS_KEY: {}
2203
+ }
2204
+ }
2205
+ }
2206
+ }
2207
+ }
2208
+ }
2209
+
2210
+ result = build_full_flat_config_dict(
2211
+ study_config, software_config, self.TEST_STDS_FP)
2212
+
2213
+ # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2214
+ self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2215
+ hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2216
+ self.assertIsInstance(hosts_dict, dict)
2217
+
2218
+ # Should have "human" host type with host_common_name defaulting to "human"
2219
+ self.assertIn("human", hosts_dict)
2220
+ human_host = hosts_dict["human"]
2221
+ self.assertIn(METADATA_FIELDS_KEY, human_host)
2222
+ self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2223
+ self.assertEqual(
2224
+ "human",
2225
+ human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
2226
+
2227
+ # Should have custom_field from study config
2228
+ self.assertIn("custom_field", human_host[METADATA_FIELDS_KEY])
2229
+ self.assertEqual(
2230
+ "custom_value",
2231
+ human_host[METADATA_FIELDS_KEY]["custom_field"][DEFAULT_KEY])
2232
+
2233
+ # Should have software config default value
2234
+ self.assertIn(DEFAULT_KEY, result)
2235
+ self.assertEqual("software_default", result[DEFAULT_KEY])
2236
+
2237
+ def test_build_full_flat_config_dict_without_study_config(self):
2238
+ """Test build_full_flat_config_dict with no study config uses standards only."""
2239
+ software_config = {
2240
+ DEFAULT_KEY: "software_default",
2241
+ LEAVE_REQUIREDS_BLANK_KEY: True,
2242
+ OVERWRITE_NON_NANS_KEY: False
2243
+ }
2244
+
2245
+ result = build_full_flat_config_dict(
2246
+ None, software_config, self.TEST_STDS_FP)
2247
+
2248
+ # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2249
+ self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2250
+ hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2251
+ self.assertIsInstance(hosts_dict, dict)
2252
+
2253
+ # Should have "human" host type with host_common_name defaulting to "human"
2254
+ self.assertIn("human", hosts_dict)
2255
+ human_host = hosts_dict["human"]
2256
+ self.assertIn(METADATA_FIELDS_KEY, human_host)
2257
+ self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2258
+ self.assertEqual(
2259
+ "human",
2260
+ human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
2261
+
2262
+ # Should preserve software config settings
2263
+ self.assertEqual("software_default", result[DEFAULT_KEY])
2264
+
2265
+ def test_build_full_flat_config_dict_merges_software_and_study(self):
2266
+ """Test that study config values override software config values."""
2267
+ software_config = {
2268
+ DEFAULT_KEY: "software_default",
2269
+ LEAVE_REQUIREDS_BLANK_KEY: False,
2270
+ OVERWRITE_NON_NANS_KEY: True
2271
+ }
2272
+ study_config = {
2273
+ DEFAULT_KEY: "study_default",
2274
+ LEAVE_REQUIREDS_BLANK_KEY: True,
2275
+ STUDY_SPECIFIC_METADATA_KEY: {
2276
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
2277
+ "human": {
2278
+ METADATA_FIELDS_KEY: {},
2279
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2280
+ "stool": {
2281
+ METADATA_FIELDS_KEY: {}
2282
+ }
2283
+ }
2284
+ }
2285
+ }
2286
+ }
2287
+ }
2288
+
2289
+ result = build_full_flat_config_dict(
2290
+ study_config, software_config, self.TEST_STDS_FP)
2291
+
2292
+ # Study config should override software config
2293
+ self.assertEqual("study_default", result[DEFAULT_KEY])
2294
+ self.assertTrue(result[LEAVE_REQUIREDS_BLANK_KEY])
2295
+ # Software config value should be preserved when not overridden
2296
+ self.assertTrue(result[OVERWRITE_NON_NANS_KEY])
2297
+
2298
+ def test_build_full_flat_config_dict_none_software_config(self):
2299
+ """Test that None software_config loads defaults from config.yml."""
2300
+ study_config = {
2301
+ STUDY_SPECIFIC_METADATA_KEY: {
2302
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
2303
+ "human": {
2304
+ METADATA_FIELDS_KEY: {},
2305
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2306
+ "stool": {
2307
+ METADATA_FIELDS_KEY: {}
2308
+ }
2309
+ }
2310
+ }
2311
+ }
2312
+ }
2313
+ }
2314
+
2315
+ result = build_full_flat_config_dict(
2316
+ study_config, None, self.TEST_STDS_FP)
2317
+
2318
+ # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2319
+ self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2320
+ hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2321
+ self.assertIsInstance(hosts_dict, dict)
2322
+
2323
+ # Should have "human" host type with host_common_name defaulting to "human"
2324
+ self.assertIn("human", hosts_dict)
2325
+ human_host = hosts_dict["human"]
2326
+ self.assertIn(METADATA_FIELDS_KEY, human_host)
2327
+ self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2328
+ self.assertEqual(
2329
+ "human",
2330
+ human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
2331
+
2332
+ # Should have loaded default software config (which includes DEFAULT_KEY)
2333
+ self.assertIn(DEFAULT_KEY, result)
2334
+ self.assertEqual("not applicable", result[DEFAULT_KEY])