metameq 2026.1.1__py3-none-any.whl → 2026.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ from metameq.src.util import \
5
5
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY, DEFAULT_KEY, \
6
6
  ALIAS_KEY, BASE_TYPE_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
7
7
  STUDY_SPECIFIC_METADATA_KEY, LEAVE_REQUIREDS_BLANK_KEY, \
8
- OVERWRITE_NON_NANS_KEY
8
+ OVERWRITE_NON_NANS_KEY, REQUIRED_KEY, SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE
9
9
  from metameq.src.metadata_configurator import \
10
10
  combine_stds_and_study_config, \
11
11
  _make_combined_stds_and_study_host_type_dicts, \
@@ -15,7 +15,8 @@ from metameq.src.metadata_configurator import \
15
15
  _combine_base_and_added_host_type, \
16
16
  _id_sample_type_definition, \
17
17
  update_wip_metadata_dict, \
18
- build_full_flat_config_dict
18
+ build_full_flat_config_dict, \
19
+ _resolve_sample_type_aliases_and_bases
19
20
 
20
21
 
21
22
  class TestMetadataConfigurator(TestCase):
@@ -581,18 +582,153 @@ class TestMetadataConfigurator(TestCase):
581
582
  },
582
583
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
583
584
  "fe": {
584
- "alias": "stool"
585
+ # Resolved alias to stool - gets stool's resolved fields
586
+ METADATA_FIELDS_KEY: {
587
+ "country": {
588
+ "allowed": ["USA"],
589
+ DEFAULT_KEY: "USA",
590
+ "empty": False,
591
+ "is_phi": False,
592
+ "required": True,
593
+ "type": "string"
594
+ },
595
+ "description": {
596
+ "allowed": ["host associated stool"],
597
+ DEFAULT_KEY: "host associated stool",
598
+ "empty": False,
599
+ "is_phi": False,
600
+ "required": True,
601
+ "type": "string"
602
+ },
603
+ "dna_extracted": {
604
+ "allowed": ["true", "false"],
605
+ DEFAULT_KEY: "true",
606
+ "empty": False,
607
+ "is_phi": False,
608
+ "required": True,
609
+ "type": "string"
610
+ },
611
+ "elevation": {
612
+ "anyof": [
613
+ {
614
+ "allowed": [
615
+ "not collected",
616
+ "not provided",
617
+ "restricted access"],
618
+ "type": "string"
619
+ },
620
+ {
621
+ "min": -413.0,
622
+ "type": "number"
623
+ }],
624
+ "empty": False,
625
+ "is_phi": False,
626
+ "required": True
627
+ },
628
+ "geo_loc_name": {
629
+ "allowed": ["USA:CA:San Diego"],
630
+ DEFAULT_KEY: "USA:CA:San Diego",
631
+ "empty": False,
632
+ "is_phi": False,
633
+ "required": True,
634
+ "type": "string"
635
+ },
636
+ "host_type": {
637
+ "allowed": ["human", "non-human"],
638
+ "empty": False,
639
+ "is_phi": False,
640
+ "required": True,
641
+ "type": "string"
642
+ },
643
+ "physical_specimen_location": {
644
+ "allowed": ["UCSDST"],
645
+ DEFAULT_KEY: "UCSDST",
646
+ "empty": False,
647
+ "is_phi": False,
648
+ "required": True,
649
+ "type": "string"
650
+ },
651
+ "physical_specimen_remaining": {
652
+ "allowed": ["true", "false"],
653
+ DEFAULT_KEY: "true",
654
+ "empty": False,
655
+ "is_phi": False,
656
+ "required": True,
657
+ "type": "string"
658
+ },
659
+ QIITA_SAMPLE_TYPE: {
660
+ "allowed": ["stool"],
661
+ DEFAULT_KEY: "stool",
662
+ "type": "string"
663
+ },
664
+ SAMPLE_TYPE_KEY: {
665
+ "allowed": ["stool"],
666
+ DEFAULT_KEY: "stool",
667
+ "type": "string"
668
+ }
669
+ }
585
670
  },
586
671
  "stool": {
587
672
  METADATA_FIELDS_KEY: {
673
+ # Host fields merged in
674
+ "country": {
675
+ "allowed": ["USA"],
676
+ DEFAULT_KEY: "USA",
677
+ "empty": False,
678
+ "is_phi": False,
679
+ "required": True,
680
+ "type": "string"
681
+ },
588
682
  # from stds same level host + sample type
589
683
  "description": {
590
684
  "allowed": ["host associated stool"],
591
685
  DEFAULT_KEY: "host associated stool",
686
+ "empty": False,
687
+ "is_phi": False,
688
+ "required": True,
592
689
  "type": "string"
593
690
  },
594
- # from stds same level host + sample type
595
- # (NB: comes from study)
691
+ "dna_extracted": {
692
+ "allowed": ["true", "false"],
693
+ DEFAULT_KEY: "true",
694
+ "empty": False,
695
+ "is_phi": False,
696
+ "required": True,
697
+ "type": "string"
698
+ },
699
+ "elevation": {
700
+ "anyof": [
701
+ {
702
+ "allowed": [
703
+ "not collected",
704
+ "not provided",
705
+ "restricted access"],
706
+ "type": "string"
707
+ },
708
+ {
709
+ "min": -413.0,
710
+ "type": "number"
711
+ }],
712
+ "empty": False,
713
+ "is_phi": False,
714
+ "required": True
715
+ },
716
+ "geo_loc_name": {
717
+ "allowed": ["USA:CA:San Diego"],
718
+ DEFAULT_KEY: "USA:CA:San Diego",
719
+ "empty": False,
720
+ "is_phi": False,
721
+ "required": True,
722
+ "type": "string"
723
+ },
724
+ "host_type": {
725
+ "allowed": ["human", "non-human"],
726
+ "empty": False,
727
+ "is_phi": False,
728
+ "required": True,
729
+ "type": "string"
730
+ },
731
+ # from stds same level host + sample type (NB: comes from study)
596
732
  "physical_specimen_location": {
597
733
  "allowed": ["UCSDST"],
598
734
  DEFAULT_KEY: "UCSDST",
@@ -601,8 +737,7 @@ class TestMetadataConfigurator(TestCase):
601
737
  "required": True,
602
738
  "type": "string"
603
739
  },
604
- # from stds same level host + sample type
605
- # (NB: comes from study)
740
+ # from stds same level host + sample type (NB: comes from study)
606
741
  "physical_specimen_remaining": {
607
742
  "allowed": ["true", "false"],
608
743
  DEFAULT_KEY: "true",
@@ -610,6 +745,16 @@ class TestMetadataConfigurator(TestCase):
610
745
  "is_phi": False,
611
746
  "required": True,
612
747
  "type": "string"
748
+ },
749
+ QIITA_SAMPLE_TYPE: {
750
+ "allowed": ["stool"],
751
+ DEFAULT_KEY: "stool",
752
+ "type": "string"
753
+ },
754
+ SAMPLE_TYPE_KEY: {
755
+ "allowed": ["stool"],
756
+ DEFAULT_KEY: "stool",
757
+ "type": "string"
613
758
  }
614
759
  }
615
760
  }
@@ -684,18 +829,155 @@ class TestMetadataConfigurator(TestCase):
684
829
  },
685
830
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
686
831
  "fe": {
687
- "alias": "stool"
832
+ # Resolved alias to stool - gets stool's resolved fields
833
+ METADATA_FIELDS_KEY: {
834
+ "country": {
835
+ "allowed": ["USA"],
836
+ DEFAULT_KEY: "USA",
837
+ "empty": False,
838
+ "is_phi": False,
839
+ "required": True,
840
+ "type": "string"
841
+ },
842
+ "description": {
843
+ "allowed": ["host associated stool"],
844
+ DEFAULT_KEY: "host associated stool",
845
+ "empty": False,
846
+ "is_phi": False,
847
+ "required": True,
848
+ "type": "string"
849
+ },
850
+ "dna_extracted": {
851
+ "allowed": ["true", "false"],
852
+ DEFAULT_KEY: "true",
853
+ "empty": False,
854
+ "is_phi": False,
855
+ "required": True,
856
+ "type": "string"
857
+ },
858
+ "elevation": {
859
+ "anyof": [
860
+ {
861
+ "allowed": [
862
+ "not collected",
863
+ "not provided",
864
+ "restricted access"],
865
+ "type": "string"
866
+ },
867
+ {
868
+ "min": -413.0,
869
+ "type": "number"
870
+ }],
871
+ "empty": False,
872
+ "is_phi": False,
873
+ "required": True
874
+ },
875
+ "geo_loc_name": {
876
+ "allowed": ["USA:CA:San Diego"],
877
+ DEFAULT_KEY: "USA:CA:San Diego",
878
+ "empty": False,
879
+ "is_phi": False,
880
+ "required": True,
881
+ "type": "string"
882
+ },
883
+ "host_type": {
884
+ "allowed": ["control"],
885
+ DEFAULT_KEY: "control",
886
+ "empty": False,
887
+ "is_phi": False,
888
+ "required": True,
889
+ "type": "string"
890
+ },
891
+ "physical_specimen_location": {
892
+ "allowed": ["UCSDST"],
893
+ DEFAULT_KEY: "UCSDST",
894
+ "empty": False,
895
+ "is_phi": False,
896
+ "required": True,
897
+ "type": "string"
898
+ },
899
+ "physical_specimen_remaining": {
900
+ "allowed": ["true", "false"],
901
+ DEFAULT_KEY: "true",
902
+ "empty": False,
903
+ "is_phi": False,
904
+ "required": True,
905
+ "type": "string"
906
+ },
907
+ QIITA_SAMPLE_TYPE: {
908
+ "allowed": ["stool"],
909
+ DEFAULT_KEY: "stool",
910
+ "type": "string"
911
+ },
912
+ SAMPLE_TYPE_KEY: {
913
+ "allowed": ["stool"],
914
+ DEFAULT_KEY: "stool",
915
+ "type": "string"
916
+ }
917
+ }
688
918
  },
689
919
  "stool": {
690
920
  METADATA_FIELDS_KEY: {
921
+ # Host fields merged in
922
+ "country": {
923
+ "allowed": ["USA"],
924
+ DEFAULT_KEY: "USA",
925
+ "empty": False,
926
+ "is_phi": False,
927
+ "required": True,
928
+ "type": "string"
929
+ },
691
930
  # from stds same level host + sample type
692
931
  "description": {
693
932
  "allowed": ["host associated stool"],
694
933
  DEFAULT_KEY: "host associated stool",
934
+ "empty": False,
935
+ "is_phi": False,
936
+ "required": True,
695
937
  "type": "string"
696
938
  },
697
- # from stds same level host + sample type
698
- # (NB: comes from study)
939
+ "dna_extracted": {
940
+ "allowed": ["true", "false"],
941
+ DEFAULT_KEY: "true",
942
+ "empty": False,
943
+ "is_phi": False,
944
+ "required": True,
945
+ "type": "string"
946
+ },
947
+ "elevation": {
948
+ "anyof": [
949
+ {
950
+ "allowed": [
951
+ "not collected",
952
+ "not provided",
953
+ "restricted access"],
954
+ "type": "string"
955
+ },
956
+ {
957
+ "min": -413.0,
958
+ "type": "number"
959
+ }],
960
+ "empty": False,
961
+ "is_phi": False,
962
+ "required": True
963
+ },
964
+ "geo_loc_name": {
965
+ "allowed": ["USA:CA:San Diego"],
966
+ DEFAULT_KEY: "USA:CA:San Diego",
967
+ "empty": False,
968
+ "is_phi": False,
969
+ "required": True,
970
+ "type": "string"
971
+ },
972
+ "host_type": {
973
+ "allowed": ["control"],
974
+ DEFAULT_KEY: "control",
975
+ "empty": False,
976
+ "is_phi": False,
977
+ "required": True,
978
+ "type": "string"
979
+ },
980
+ # from stds same level host + sample type (NB: comes from study)
699
981
  "physical_specimen_location": {
700
982
  "allowed": ["UCSDST"],
701
983
  DEFAULT_KEY: "UCSDST",
@@ -704,8 +986,7 @@ class TestMetadataConfigurator(TestCase):
704
986
  "required": True,
705
987
  "type": "string"
706
988
  },
707
- # from stds same level host + sample type
708
- # (NB: comes from study)
989
+ # from stds same level host + sample type (NB: comes from study)
709
990
  "physical_specimen_remaining": {
710
991
  "allowed": ["true", "false"],
711
992
  DEFAULT_KEY: "true",
@@ -713,6 +994,16 @@ class TestMetadataConfigurator(TestCase):
713
994
  "is_phi": False,
714
995
  "required": True,
715
996
  "type": "string"
997
+ },
998
+ QIITA_SAMPLE_TYPE: {
999
+ "allowed": ["stool"],
1000
+ DEFAULT_KEY: "stool",
1001
+ "type": "string"
1002
+ },
1003
+ SAMPLE_TYPE_KEY: {
1004
+ "allowed": ["stool"],
1005
+ DEFAULT_KEY: "stool",
1006
+ "type": "string"
716
1007
  }
717
1008
  }
718
1009
  }
@@ -788,54 +1079,77 @@ class TestMetadataConfigurator(TestCase):
788
1079
  },
789
1080
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
790
1081
  "dung": {
791
- "base_type": "stool",
1082
+ # Resolved base_type=stool - has stool's fields + dung's overrides
792
1083
  METADATA_FIELDS_KEY: {
1084
+ "country": {
1085
+ "allowed": ["USA"],
1086
+ DEFAULT_KEY: "USA",
1087
+ "empty": False,
1088
+ "is_phi": False,
1089
+ "required": True,
1090
+ "type": "string"
1091
+ },
793
1092
  # overrides stds parent host + sample type
794
1093
  "description": {
795
1094
  "allowed": ["human dung"],
796
1095
  DEFAULT_KEY: "human dung",
1096
+ "empty": False,
1097
+ "is_phi": False,
1098
+ "required": True,
797
1099
  "type": "string"
798
1100
  },
799
- # overrides stds parent host + sample type
800
- "physical_specimen_location": {
801
- "allowed": ["FIELD"],
802
- DEFAULT_KEY: "FIELD",
803
- "type": "string"
804
- }
805
- }
806
- },
807
- "f": {
808
- "base_type": "stool"
809
- },
810
- "fe": {
811
- "alias": "stool"
812
- },
813
- "feces": {
814
- "alias": "stool"
815
- },
816
- "stool": {
817
- METADATA_FIELDS_KEY: {
818
- # from stds same level host + sample type
819
- "description": {
820
- "allowed": ["human stool"],
821
- DEFAULT_KEY: "human stool",
1101
+ "dna_extracted": {
1102
+ "allowed": ["true"],
1103
+ DEFAULT_KEY: "true",
1104
+ "empty": False,
1105
+ "is_phi": False,
1106
+ "required": True,
822
1107
  "type": "string"
823
1108
  },
824
- # from stds same level host + sample type
825
1109
  "elevation": {
1110
+ "anyof": [
1111
+ {
1112
+ "allowed": [
1113
+ "not collected",
1114
+ "not provided",
1115
+ "restricted access"],
1116
+ "type": "string"
1117
+ },
1118
+ {
1119
+ "min": -413.0,
1120
+ "type": "number"
1121
+ }],
826
1122
  DEFAULT_KEY: 14,
1123
+ "empty": False,
1124
+ "is_phi": False,
1125
+ "required": True,
827
1126
  "type": "number"
828
1127
  },
829
- # from stds parent level host + sample type
1128
+ "geo_loc_name": {
1129
+ "allowed": ["USA:CA:San Diego"],
1130
+ DEFAULT_KEY: "USA:CA:San Diego",
1131
+ "empty": False,
1132
+ "is_phi": False,
1133
+ "required": True,
1134
+ "type": "string"
1135
+ },
1136
+ "host_type": {
1137
+ "allowed": ["human"],
1138
+ DEFAULT_KEY: "human",
1139
+ "empty": False,
1140
+ "is_phi": False,
1141
+ "required": True,
1142
+ "type": "string"
1143
+ },
1144
+ # overrides stds parent host + sample type
830
1145
  "physical_specimen_location": {
831
- "allowed": ["UCSDST"],
832
- DEFAULT_KEY: "UCSDST",
1146
+ "allowed": ["FIELD"],
1147
+ DEFAULT_KEY: "FIELD",
833
1148
  "empty": False,
834
1149
  "is_phi": False,
835
1150
  "required": True,
836
1151
  "type": "string"
837
1152
  },
838
- # from stds same level host + sample type
839
1153
  "physical_specimen_remaining": {
840
1154
  "allowed": ["false"],
841
1155
  DEFAULT_KEY: "false",
@@ -843,14 +1157,388 @@ class TestMetadataConfigurator(TestCase):
843
1157
  "is_phi": False,
844
1158
  "required": True,
845
1159
  "type": "string"
1160
+ },
1161
+ QIITA_SAMPLE_TYPE: {
1162
+ "allowed": ["dung"],
1163
+ DEFAULT_KEY: "dung",
1164
+ "type": "string"
1165
+ },
1166
+ SAMPLE_TYPE_KEY: {
1167
+ "allowed": ["dung"],
1168
+ DEFAULT_KEY: "dung",
1169
+ "type": "string"
846
1170
  }
847
1171
  }
848
- }
849
- }
850
- },
851
- "dude": {
852
- DEFAULT_KEY: "not collected",
853
- METADATA_FIELDS_KEY: {
1172
+ },
1173
+ "f": {
1174
+ # Resolved base_type=stool - has stool's fields
1175
+ METADATA_FIELDS_KEY: {
1176
+ "country": {
1177
+ "allowed": ["USA"],
1178
+ DEFAULT_KEY: "USA",
1179
+ "empty": False,
1180
+ "is_phi": False,
1181
+ "required": True,
1182
+ "type": "string"
1183
+ },
1184
+ "description": {
1185
+ "allowed": ["human stool"],
1186
+ DEFAULT_KEY: "human stool",
1187
+ "empty": False,
1188
+ "is_phi": False,
1189
+ "required": True,
1190
+ "type": "string"
1191
+ },
1192
+ "dna_extracted": {
1193
+ "allowed": ["true"],
1194
+ DEFAULT_KEY: "true",
1195
+ "empty": False,
1196
+ "is_phi": False,
1197
+ "required": True,
1198
+ "type": "string"
1199
+ },
1200
+ "elevation": {
1201
+ "anyof": [
1202
+ {
1203
+ "allowed": [
1204
+ "not collected",
1205
+ "not provided",
1206
+ "restricted access"],
1207
+ "type": "string"
1208
+ },
1209
+ {
1210
+ "min": -413.0,
1211
+ "type": "number"
1212
+ }],
1213
+ DEFAULT_KEY: 14,
1214
+ "empty": False,
1215
+ "is_phi": False,
1216
+ "required": True,
1217
+ "type": "number"
1218
+ },
1219
+ "geo_loc_name": {
1220
+ "allowed": ["USA:CA:San Diego"],
1221
+ DEFAULT_KEY: "USA:CA:San Diego",
1222
+ "empty": False,
1223
+ "is_phi": False,
1224
+ "required": True,
1225
+ "type": "string"
1226
+ },
1227
+ "host_type": {
1228
+ "allowed": ["human"],
1229
+ DEFAULT_KEY: "human",
1230
+ "empty": False,
1231
+ "is_phi": False,
1232
+ "required": True,
1233
+ "type": "string"
1234
+ },
1235
+ "physical_specimen_location": {
1236
+ "allowed": ["UCSDST"],
1237
+ DEFAULT_KEY: "UCSDST",
1238
+ "empty": False,
1239
+ "is_phi": False,
1240
+ "required": True,
1241
+ "type": "string"
1242
+ },
1243
+ "physical_specimen_remaining": {
1244
+ "allowed": ["false"],
1245
+ DEFAULT_KEY: "false",
1246
+ "empty": False,
1247
+ "is_phi": False,
1248
+ "required": True,
1249
+ "type": "string"
1250
+ },
1251
+ QIITA_SAMPLE_TYPE: {
1252
+ "allowed": ["f"],
1253
+ DEFAULT_KEY: "f",
1254
+ "type": "string"
1255
+ },
1256
+ SAMPLE_TYPE_KEY: {
1257
+ "allowed": ["f"],
1258
+ DEFAULT_KEY: "f",
1259
+ "type": "string"
1260
+ }
1261
+ }
1262
+ },
1263
+ "fe": {
1264
+ # Resolved alias to stool - gets stool's resolved fields
1265
+ METADATA_FIELDS_KEY: {
1266
+ "country": {
1267
+ "allowed": ["USA"],
1268
+ DEFAULT_KEY: "USA",
1269
+ "empty": False,
1270
+ "is_phi": False,
1271
+ "required": True,
1272
+ "type": "string"
1273
+ },
1274
+ "description": {
1275
+ "allowed": ["human stool"],
1276
+ DEFAULT_KEY: "human stool",
1277
+ "empty": False,
1278
+ "is_phi": False,
1279
+ "required": True,
1280
+ "type": "string"
1281
+ },
1282
+ "dna_extracted": {
1283
+ "allowed": ["true"],
1284
+ DEFAULT_KEY: "true",
1285
+ "empty": False,
1286
+ "is_phi": False,
1287
+ "required": True,
1288
+ "type": "string"
1289
+ },
1290
+ "elevation": {
1291
+ "anyof": [
1292
+ {
1293
+ "allowed": [
1294
+ "not collected",
1295
+ "not provided",
1296
+ "restricted access"],
1297
+ "type": "string"
1298
+ },
1299
+ {
1300
+ "min": -413.0,
1301
+ "type": "number"
1302
+ }],
1303
+ DEFAULT_KEY: 14,
1304
+ "empty": False,
1305
+ "is_phi": False,
1306
+ "required": True,
1307
+ "type": "number"
1308
+ },
1309
+ "geo_loc_name": {
1310
+ "allowed": ["USA:CA:San Diego"],
1311
+ DEFAULT_KEY: "USA:CA:San Diego",
1312
+ "empty": False,
1313
+ "is_phi": False,
1314
+ "required": True,
1315
+ "type": "string"
1316
+ },
1317
+ "host_type": {
1318
+ "allowed": ["human"],
1319
+ DEFAULT_KEY: "human",
1320
+ "empty": False,
1321
+ "is_phi": False,
1322
+ "required": True,
1323
+ "type": "string"
1324
+ },
1325
+ "physical_specimen_location": {
1326
+ "allowed": ["UCSDST"],
1327
+ DEFAULT_KEY: "UCSDST",
1328
+ "empty": False,
1329
+ "is_phi": False,
1330
+ "required": True,
1331
+ "type": "string"
1332
+ },
1333
+ "physical_specimen_remaining": {
1334
+ "allowed": ["false"],
1335
+ DEFAULT_KEY: "false",
1336
+ "empty": False,
1337
+ "is_phi": False,
1338
+ "required": True,
1339
+ "type": "string"
1340
+ },
1341
+ QIITA_SAMPLE_TYPE: {
1342
+ "allowed": ["stool"],
1343
+ DEFAULT_KEY: "stool",
1344
+ "type": "string"
1345
+ },
1346
+ SAMPLE_TYPE_KEY: {
1347
+ "allowed": ["stool"],
1348
+ DEFAULT_KEY: "stool",
1349
+ "type": "string"
1350
+ }
1351
+ }
1352
+ },
1353
+ "feces": {
1354
+ # Resolved alias to stool - gets stool's resolved fields
1355
+ METADATA_FIELDS_KEY: {
1356
+ "country": {
1357
+ "allowed": ["USA"],
1358
+ DEFAULT_KEY: "USA",
1359
+ "empty": False,
1360
+ "is_phi": False,
1361
+ "required": True,
1362
+ "type": "string"
1363
+ },
1364
+ "description": {
1365
+ "allowed": ["human stool"],
1366
+ DEFAULT_KEY: "human stool",
1367
+ "empty": False,
1368
+ "is_phi": False,
1369
+ "required": True,
1370
+ "type": "string"
1371
+ },
1372
+ "dna_extracted": {
1373
+ "allowed": ["true"],
1374
+ DEFAULT_KEY: "true",
1375
+ "empty": False,
1376
+ "is_phi": False,
1377
+ "required": True,
1378
+ "type": "string"
1379
+ },
1380
+ "elevation": {
1381
+ "anyof": [
1382
+ {
1383
+ "allowed": [
1384
+ "not collected",
1385
+ "not provided",
1386
+ "restricted access"],
1387
+ "type": "string"
1388
+ },
1389
+ {
1390
+ "min": -413.0,
1391
+ "type": "number"
1392
+ }],
1393
+ DEFAULT_KEY: 14,
1394
+ "empty": False,
1395
+ "is_phi": False,
1396
+ "required": True,
1397
+ "type": "number"
1398
+ },
1399
+ "geo_loc_name": {
1400
+ "allowed": ["USA:CA:San Diego"],
1401
+ DEFAULT_KEY: "USA:CA:San Diego",
1402
+ "empty": False,
1403
+ "is_phi": False,
1404
+ "required": True,
1405
+ "type": "string"
1406
+ },
1407
+ "host_type": {
1408
+ "allowed": ["human"],
1409
+ DEFAULT_KEY: "human",
1410
+ "empty": False,
1411
+ "is_phi": False,
1412
+ "required": True,
1413
+ "type": "string"
1414
+ },
1415
+ "physical_specimen_location": {
1416
+ "allowed": ["UCSDST"],
1417
+ DEFAULT_KEY: "UCSDST",
1418
+ "empty": False,
1419
+ "is_phi": False,
1420
+ "required": True,
1421
+ "type": "string"
1422
+ },
1423
+ "physical_specimen_remaining": {
1424
+ "allowed": ["false"],
1425
+ DEFAULT_KEY: "false",
1426
+ "empty": False,
1427
+ "is_phi": False,
1428
+ "required": True,
1429
+ "type": "string"
1430
+ },
1431
+ QIITA_SAMPLE_TYPE: {
1432
+ "allowed": ["stool"],
1433
+ DEFAULT_KEY: "stool",
1434
+ "type": "string"
1435
+ },
1436
+ SAMPLE_TYPE_KEY: {
1437
+ "allowed": ["stool"],
1438
+ DEFAULT_KEY: "stool",
1439
+ "type": "string"
1440
+ }
1441
+ }
1442
+ },
1443
+ "stool": {
1444
+ METADATA_FIELDS_KEY: {
1445
+ # Host fields merged in
1446
+ "country": {
1447
+ "allowed": ["USA"],
1448
+ DEFAULT_KEY: "USA",
1449
+ "empty": False,
1450
+ "is_phi": False,
1451
+ "required": True,
1452
+ "type": "string"
1453
+ },
1454
+ # from stds same level host + sample type
1455
+ "description": {
1456
+ "allowed": ["human stool"],
1457
+ DEFAULT_KEY: "human stool",
1458
+ "empty": False,
1459
+ "is_phi": False,
1460
+ "required": True,
1461
+ "type": "string"
1462
+ },
1463
+ "dna_extracted": {
1464
+ "allowed": ["true"],
1465
+ DEFAULT_KEY: "true",
1466
+ "empty": False,
1467
+ "is_phi": False,
1468
+ "required": True,
1469
+ "type": "string"
1470
+ },
1471
+ # from stds same level host + sample type
1472
+ "elevation": {
1473
+ "anyof": [
1474
+ {
1475
+ "allowed": [
1476
+ "not collected",
1477
+ "not provided",
1478
+ "restricted access"],
1479
+ "type": "string"
1480
+ },
1481
+ {
1482
+ "min": -413.0,
1483
+ "type": "number"
1484
+ }],
1485
+ DEFAULT_KEY: 14,
1486
+ "empty": False,
1487
+ "is_phi": False,
1488
+ "required": True,
1489
+ "type": "number"
1490
+ },
1491
+ "geo_loc_name": {
1492
+ "allowed": ["USA:CA:San Diego"],
1493
+ DEFAULT_KEY: "USA:CA:San Diego",
1494
+ "empty": False,
1495
+ "is_phi": False,
1496
+ "required": True,
1497
+ "type": "string"
1498
+ },
1499
+ "host_type": {
1500
+ "allowed": ["human"],
1501
+ DEFAULT_KEY: "human",
1502
+ "empty": False,
1503
+ "is_phi": False,
1504
+ "required": True,
1505
+ "type": "string"
1506
+ },
1507
+ # from stds parent level host + sample type
1508
+ "physical_specimen_location": {
1509
+ "allowed": ["UCSDST"],
1510
+ DEFAULT_KEY: "UCSDST",
1511
+ "empty": False,
1512
+ "is_phi": False,
1513
+ "required": True,
1514
+ "type": "string"
1515
+ },
1516
+ # from stds same level host + sample type
1517
+ "physical_specimen_remaining": {
1518
+ "allowed": ["false"],
1519
+ DEFAULT_KEY: "false",
1520
+ "empty": False,
1521
+ "is_phi": False,
1522
+ "required": True,
1523
+ "type": "string"
1524
+ },
1525
+ QIITA_SAMPLE_TYPE: {
1526
+ "allowed": ["stool"],
1527
+ DEFAULT_KEY: "stool",
1528
+ "type": "string"
1529
+ },
1530
+ SAMPLE_TYPE_KEY: {
1531
+ "allowed": ["stool"],
1532
+ DEFAULT_KEY: "stool",
1533
+ "type": "string"
1534
+ }
1535
+ }
1536
+ }
1537
+ }
1538
+ },
1539
+ "dude": {
1540
+ DEFAULT_KEY: "not collected",
1541
+ METADATA_FIELDS_KEY: {
854
1542
  # from stds parent host
855
1543
  "country": {
856
1544
  "allowed": ["USA"],
@@ -918,44 +1606,431 @@ class TestMetadataConfigurator(TestCase):
918
1606
  },
919
1607
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
920
1608
  "dung": {
921
- "base_type": "stool",
1609
+ # Resolved base_type=stool - has stool's fields + dung's overrides
922
1610
  METADATA_FIELDS_KEY: {
923
- # overrides stds parent host + sample type
1611
+ "country": {
1612
+ "allowed": ["USA"],
1613
+ DEFAULT_KEY: "USA",
1614
+ "empty": False,
1615
+ "is_phi": False,
1616
+ "required": True,
1617
+ "type": "string"
1618
+ },
1619
+ # overrides stds parent host + sample type
924
1620
  "description": {
925
1621
  "allowed": ["human dung"],
926
1622
  DEFAULT_KEY: "human dung",
1623
+ "empty": False,
1624
+ "is_phi": False,
1625
+ "required": True,
1626
+ "type": "string"
1627
+ },
1628
+ "dna_extracted": {
1629
+ "allowed": ["true"],
1630
+ DEFAULT_KEY: "true",
1631
+ "empty": False,
1632
+ "is_phi": False,
1633
+ "required": True,
1634
+ "type": "string"
1635
+ },
1636
+ "elevation": {
1637
+ "anyof": [
1638
+ {
1639
+ "allowed": [
1640
+ "not collected",
1641
+ "not provided",
1642
+ "restricted access"],
1643
+ "type": "string"
1644
+ },
1645
+ {
1646
+ "min": -413.0,
1647
+ "type": "number"
1648
+ }],
1649
+ DEFAULT_KEY: 14,
1650
+ "empty": False,
1651
+ "is_phi": False,
1652
+ "required": True,
1653
+ "type": "number"
1654
+ },
1655
+ "geo_loc_name": {
1656
+ "allowed": ["USA:CA:San Diego"],
1657
+ DEFAULT_KEY: "USA:CA:San Diego",
1658
+ "empty": False,
1659
+ "is_phi": False,
1660
+ "required": True,
1661
+ "type": "string"
1662
+ },
1663
+ "host_type": {
1664
+ "allowed": ["dude"],
1665
+ DEFAULT_KEY: "dude",
1666
+ "empty": False,
1667
+ "is_phi": False,
1668
+ "required": True,
927
1669
  "type": "string"
928
1670
  },
929
1671
  # overrides stds parent host + sample type
930
1672
  "physical_specimen_location": {
931
1673
  "allowed": ["FIELD"],
932
1674
  DEFAULT_KEY: "FIELD",
1675
+ "empty": False,
1676
+ "is_phi": False,
1677
+ "required": True,
1678
+ "type": "string"
1679
+ },
1680
+ "physical_specimen_remaining": {
1681
+ "allowed": ["false"],
1682
+ DEFAULT_KEY: "false",
1683
+ "empty": False,
1684
+ "is_phi": False,
1685
+ "required": True,
1686
+ "type": "string"
1687
+ },
1688
+ QIITA_SAMPLE_TYPE: {
1689
+ "allowed": ["dung"],
1690
+ DEFAULT_KEY: "dung",
1691
+ "type": "string"
1692
+ },
1693
+ SAMPLE_TYPE_KEY: {
1694
+ "allowed": ["dung"],
1695
+ DEFAULT_KEY: "dung",
933
1696
  "type": "string"
934
1697
  }
935
1698
  }
936
1699
  },
937
1700
  "f": {
938
- "base_type": "stool"
1701
+ # Resolved base_type=stool - has stool's fields
1702
+ METADATA_FIELDS_KEY: {
1703
+ "country": {
1704
+ "allowed": ["USA"],
1705
+ DEFAULT_KEY: "USA",
1706
+ "empty": False,
1707
+ "is_phi": False,
1708
+ "required": True,
1709
+ "type": "string"
1710
+ },
1711
+ "description": {
1712
+ "allowed": ["human stool"],
1713
+ DEFAULT_KEY: "human stool",
1714
+ "empty": False,
1715
+ "is_phi": False,
1716
+ "required": True,
1717
+ "type": "string"
1718
+ },
1719
+ "dna_extracted": {
1720
+ "allowed": ["true"],
1721
+ DEFAULT_KEY: "true",
1722
+ "empty": False,
1723
+ "is_phi": False,
1724
+ "required": True,
1725
+ "type": "string"
1726
+ },
1727
+ "elevation": {
1728
+ "anyof": [
1729
+ {
1730
+ "allowed": [
1731
+ "not collected",
1732
+ "not provided",
1733
+ "restricted access"],
1734
+ "type": "string"
1735
+ },
1736
+ {
1737
+ "min": -413.0,
1738
+ "type": "number"
1739
+ }],
1740
+ DEFAULT_KEY: 14,
1741
+ "empty": False,
1742
+ "is_phi": False,
1743
+ "required": True,
1744
+ "type": "number"
1745
+ },
1746
+ "geo_loc_name": {
1747
+ "allowed": ["USA:CA:San Diego"],
1748
+ DEFAULT_KEY: "USA:CA:San Diego",
1749
+ "empty": False,
1750
+ "is_phi": False,
1751
+ "required": True,
1752
+ "type": "string"
1753
+ },
1754
+ "host_type": {
1755
+ "allowed": ["dude"],
1756
+ DEFAULT_KEY: "dude",
1757
+ "empty": False,
1758
+ "is_phi": False,
1759
+ "required": True,
1760
+ "type": "string"
1761
+ },
1762
+ "physical_specimen_location": {
1763
+ "allowed": ["UCSDST"],
1764
+ DEFAULT_KEY: "UCSDST",
1765
+ "empty": False,
1766
+ "is_phi": False,
1767
+ "required": True,
1768
+ "type": "string"
1769
+ },
1770
+ "physical_specimen_remaining": {
1771
+ "allowed": ["false"],
1772
+ DEFAULT_KEY: "false",
1773
+ "empty": False,
1774
+ "is_phi": False,
1775
+ "required": True,
1776
+ "type": "string"
1777
+ },
1778
+ QIITA_SAMPLE_TYPE: {
1779
+ "allowed": ["f"],
1780
+ DEFAULT_KEY: "f",
1781
+ "type": "string"
1782
+ },
1783
+ SAMPLE_TYPE_KEY: {
1784
+ "allowed": ["f"],
1785
+ DEFAULT_KEY: "f",
1786
+ "type": "string"
1787
+ }
1788
+ }
939
1789
  },
940
1790
  "fe": {
941
- "alias": "stool"
1791
+ # Resolved alias to stool - gets stool's resolved fields
1792
+ METADATA_FIELDS_KEY: {
1793
+ "country": {
1794
+ "allowed": ["USA"],
1795
+ DEFAULT_KEY: "USA",
1796
+ "empty": False,
1797
+ "is_phi": False,
1798
+ "required": True,
1799
+ "type": "string"
1800
+ },
1801
+ "description": {
1802
+ "allowed": ["human stool"],
1803
+ DEFAULT_KEY: "human stool",
1804
+ "empty": False,
1805
+ "is_phi": False,
1806
+ "required": True,
1807
+ "type": "string"
1808
+ },
1809
+ "dna_extracted": {
1810
+ "allowed": ["true"],
1811
+ DEFAULT_KEY: "true",
1812
+ "empty": False,
1813
+ "is_phi": False,
1814
+ "required": True,
1815
+ "type": "string"
1816
+ },
1817
+ "elevation": {
1818
+ "anyof": [
1819
+ {
1820
+ "allowed": [
1821
+ "not collected",
1822
+ "not provided",
1823
+ "restricted access"],
1824
+ "type": "string"
1825
+ },
1826
+ {
1827
+ "min": -413.0,
1828
+ "type": "number"
1829
+ }],
1830
+ DEFAULT_KEY: 14,
1831
+ "empty": False,
1832
+ "is_phi": False,
1833
+ "required": True,
1834
+ "type": "number"
1835
+ },
1836
+ "geo_loc_name": {
1837
+ "allowed": ["USA:CA:San Diego"],
1838
+ DEFAULT_KEY: "USA:CA:San Diego",
1839
+ "empty": False,
1840
+ "is_phi": False,
1841
+ "required": True,
1842
+ "type": "string"
1843
+ },
1844
+ "host_type": {
1845
+ "allowed": ["dude"],
1846
+ DEFAULT_KEY: "dude",
1847
+ "empty": False,
1848
+ "is_phi": False,
1849
+ "required": True,
1850
+ "type": "string"
1851
+ },
1852
+ "physical_specimen_location": {
1853
+ "allowed": ["UCSDST"],
1854
+ DEFAULT_KEY: "UCSDST",
1855
+ "empty": False,
1856
+ "is_phi": False,
1857
+ "required": True,
1858
+ "type": "string"
1859
+ },
1860
+ "physical_specimen_remaining": {
1861
+ "allowed": ["false"],
1862
+ DEFAULT_KEY: "false",
1863
+ "empty": False,
1864
+ "is_phi": False,
1865
+ "required": True,
1866
+ "type": "string"
1867
+ },
1868
+ QIITA_SAMPLE_TYPE: {
1869
+ "allowed": ["stool"],
1870
+ DEFAULT_KEY: "stool",
1871
+ "type": "string"
1872
+ },
1873
+ SAMPLE_TYPE_KEY: {
1874
+ "allowed": ["stool"],
1875
+ DEFAULT_KEY: "stool",
1876
+ "type": "string"
1877
+ }
1878
+ }
942
1879
  },
943
1880
  "feces": {
944
- "alias": "stool"
1881
+ # Resolved alias to stool - gets stool's resolved fields
1882
+ METADATA_FIELDS_KEY: {
1883
+ "country": {
1884
+ "allowed": ["USA"],
1885
+ DEFAULT_KEY: "USA",
1886
+ "empty": False,
1887
+ "is_phi": False,
1888
+ "required": True,
1889
+ "type": "string"
1890
+ },
1891
+ "description": {
1892
+ "allowed": ["human stool"],
1893
+ DEFAULT_KEY: "human stool",
1894
+ "empty": False,
1895
+ "is_phi": False,
1896
+ "required": True,
1897
+ "type": "string"
1898
+ },
1899
+ "dna_extracted": {
1900
+ "allowed": ["true"],
1901
+ DEFAULT_KEY: "true",
1902
+ "empty": False,
1903
+ "is_phi": False,
1904
+ "required": True,
1905
+ "type": "string"
1906
+ },
1907
+ "elevation": {
1908
+ "anyof": [
1909
+ {
1910
+ "allowed": [
1911
+ "not collected",
1912
+ "not provided",
1913
+ "restricted access"],
1914
+ "type": "string"
1915
+ },
1916
+ {
1917
+ "min": -413.0,
1918
+ "type": "number"
1919
+ }],
1920
+ DEFAULT_KEY: 14,
1921
+ "empty": False,
1922
+ "is_phi": False,
1923
+ "required": True,
1924
+ "type": "number"
1925
+ },
1926
+ "geo_loc_name": {
1927
+ "allowed": ["USA:CA:San Diego"],
1928
+ DEFAULT_KEY: "USA:CA:San Diego",
1929
+ "empty": False,
1930
+ "is_phi": False,
1931
+ "required": True,
1932
+ "type": "string"
1933
+ },
1934
+ "host_type": {
1935
+ "allowed": ["dude"],
1936
+ DEFAULT_KEY: "dude",
1937
+ "empty": False,
1938
+ "is_phi": False,
1939
+ "required": True,
1940
+ "type": "string"
1941
+ },
1942
+ "physical_specimen_location": {
1943
+ "allowed": ["UCSDST"],
1944
+ DEFAULT_KEY: "UCSDST",
1945
+ "empty": False,
1946
+ "is_phi": False,
1947
+ "required": True,
1948
+ "type": "string"
1949
+ },
1950
+ "physical_specimen_remaining": {
1951
+ "allowed": ["false"],
1952
+ DEFAULT_KEY: "false",
1953
+ "empty": False,
1954
+ "is_phi": False,
1955
+ "required": True,
1956
+ "type": "string"
1957
+ },
1958
+ QIITA_SAMPLE_TYPE: {
1959
+ "allowed": ["stool"],
1960
+ DEFAULT_KEY: "stool",
1961
+ "type": "string"
1962
+ },
1963
+ SAMPLE_TYPE_KEY: {
1964
+ "allowed": ["stool"],
1965
+ DEFAULT_KEY: "stool",
1966
+ "type": "string"
1967
+ }
1968
+ }
945
1969
  },
946
1970
  "stool": {
947
1971
  METADATA_FIELDS_KEY: {
1972
+ # Host fields merged in
1973
+ "country": {
1974
+ "allowed": ["USA"],
1975
+ DEFAULT_KEY: "USA",
1976
+ "empty": False,
1977
+ "is_phi": False,
1978
+ "required": True,
1979
+ "type": "string"
1980
+ },
948
1981
  # from stds same level host + sample type
949
1982
  "description": {
950
1983
  "allowed": ["human stool"],
951
1984
  DEFAULT_KEY: "human stool",
1985
+ "empty": False,
1986
+ "is_phi": False,
1987
+ "required": True,
1988
+ "type": "string"
1989
+ },
1990
+ "dna_extracted": {
1991
+ "allowed": ["true"],
1992
+ DEFAULT_KEY: "true",
1993
+ "empty": False,
1994
+ "is_phi": False,
1995
+ "required": True,
952
1996
  "type": "string"
953
1997
  },
954
1998
  # from stds same level host + sample type
955
1999
  "elevation": {
2000
+ "anyof": [
2001
+ {
2002
+ "allowed": [
2003
+ "not collected",
2004
+ "not provided",
2005
+ "restricted access"],
2006
+ "type": "string"
2007
+ },
2008
+ {
2009
+ "min": -413.0,
2010
+ "type": "number"
2011
+ }],
956
2012
  DEFAULT_KEY: 14,
2013
+ "empty": False,
2014
+ "is_phi": False,
2015
+ "required": True,
957
2016
  "type": "number"
958
2017
  },
2018
+ "geo_loc_name": {
2019
+ "allowed": ["USA:CA:San Diego"],
2020
+ DEFAULT_KEY: "USA:CA:San Diego",
2021
+ "empty": False,
2022
+ "is_phi": False,
2023
+ "required": True,
2024
+ "type": "string"
2025
+ },
2026
+ "host_type": {
2027
+ "allowed": ["dude"],
2028
+ DEFAULT_KEY: "dude",
2029
+ "empty": False,
2030
+ "is_phi": False,
2031
+ "required": True,
2032
+ "type": "string"
2033
+ },
959
2034
  # from stds parent level host + sample type
960
2035
  "physical_specimen_location": {
961
2036
  "allowed": ["UCSDST"],
@@ -973,6 +2048,16 @@ class TestMetadataConfigurator(TestCase):
973
2048
  "is_phi": False,
974
2049
  "required": True,
975
2050
  "type": "string"
2051
+ },
2052
+ QIITA_SAMPLE_TYPE: {
2053
+ "allowed": ["stool"],
2054
+ DEFAULT_KEY: "stool",
2055
+ "type": "string"
2056
+ },
2057
+ SAMPLE_TYPE_KEY: {
2058
+ "allowed": ["stool"],
2059
+ DEFAULT_KEY: "stool",
2060
+ "type": "string"
976
2061
  }
977
2062
  }
978
2063
  }
@@ -1105,7 +2190,6 @@ class TestMetadataConfigurator(TestCase):
1105
2190
  self.NESTED_STDS_W_STUDY_DICT[HOST_TYPE_SPECIFIC_METADATA_KEY],
1106
2191
  out_nested_dict)
1107
2192
 
1108
-
1109
2193
  def test_flatten_nested_stds_dict(self):
1110
2194
  """Test flattening a nested standards dictionary."""
1111
2195
  out_flattened_dict = flatten_nested_stds_dict(
@@ -1167,7 +2251,48 @@ class TestMetadataConfigurator(TestCase):
1167
2251
  }
1168
2252
  }
1169
2253
 
1170
- expected = input_dict[HOST_TYPE_SPECIFIC_METADATA_KEY]
2254
+ # After resolution, sample types have host metadata merged in
2255
+ # plus sample_type and qiita_sample_type fields
2256
+ expected = {
2257
+ "host_a": {
2258
+ DEFAULT_KEY: "not provided",
2259
+ METADATA_FIELDS_KEY: {
2260
+ "field1": {
2261
+ TYPE_KEY: "string",
2262
+ DEFAULT_KEY: "value1"
2263
+ }
2264
+ },
2265
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2266
+ "sample1": {
2267
+ METADATA_FIELDS_KEY: {
2268
+ "field1": {
2269
+ TYPE_KEY: "string",
2270
+ DEFAULT_KEY: "value1"
2271
+ },
2272
+ "sample_field": {TYPE_KEY: "string"},
2273
+ SAMPLE_TYPE_KEY: {
2274
+ ALLOWED_KEY: ["sample1"],
2275
+ DEFAULT_KEY: "sample1",
2276
+ TYPE_KEY: "string"
2277
+ },
2278
+ QIITA_SAMPLE_TYPE: {
2279
+ ALLOWED_KEY: ["sample1"],
2280
+ DEFAULT_KEY: "sample1",
2281
+ TYPE_KEY: "string"
2282
+ }
2283
+ }
2284
+ }
2285
+ }
2286
+ },
2287
+ "host_b": {
2288
+ DEFAULT_KEY: "not collected",
2289
+ METADATA_FIELDS_KEY: {
2290
+ "field2": {
2291
+ TYPE_KEY: "integer"
2292
+ }
2293
+ }
2294
+ }
2295
+ }
1171
2296
 
1172
2297
  result = flatten_nested_stds_dict(input_dict, None)
1173
2298
 
@@ -1261,8 +2386,8 @@ class TestMetadataConfigurator(TestCase):
1261
2386
  "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"}
1262
2387
  }
1263
2388
  },
1264
- "saliva": {
1265
- ALIAS_KEY: "oral"
2389
+ "fe": {
2390
+ ALIAS_KEY: "stool"
1266
2391
  }
1267
2392
  },
1268
2393
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
@@ -1285,17 +2410,41 @@ class TestMetadataConfigurator(TestCase):
1285
2410
  }
1286
2411
  }
1287
2412
 
2413
+ # After resolution, each sample type has resolved metadata_fields
2414
+ # with host metadata merged in plus sample_type and qiita_sample_type
1288
2415
  expected = {
1289
2416
  "parent_host": {
1290
2417
  DEFAULT_KEY: "not provided",
1291
2418
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1292
2419
  "stool": {
1293
2420
  METADATA_FIELDS_KEY: {
1294
- "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"}
2421
+ "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
2422
+ SAMPLE_TYPE_KEY: {
2423
+ ALLOWED_KEY: ["stool"],
2424
+ DEFAULT_KEY: "stool",
2425
+ TYPE_KEY: "string"
2426
+ },
2427
+ QIITA_SAMPLE_TYPE: {
2428
+ ALLOWED_KEY: ["stool"],
2429
+ DEFAULT_KEY: "stool",
2430
+ TYPE_KEY: "string"
2431
+ }
1295
2432
  }
1296
2433
  },
1297
- "saliva": {
1298
- ALIAS_KEY: "oral"
2434
+ "fe": {
2435
+ METADATA_FIELDS_KEY: {
2436
+ "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
2437
+ SAMPLE_TYPE_KEY: {
2438
+ ALLOWED_KEY: ["stool"],
2439
+ DEFAULT_KEY: "stool",
2440
+ TYPE_KEY: "string"
2441
+ },
2442
+ QIITA_SAMPLE_TYPE: {
2443
+ ALLOWED_KEY: ["stool"],
2444
+ DEFAULT_KEY: "stool",
2445
+ TYPE_KEY: "string"
2446
+ }
2447
+ }
1299
2448
  }
1300
2449
  }
1301
2450
  },
@@ -1305,15 +2454,48 @@ class TestMetadataConfigurator(TestCase):
1305
2454
  "stool": {
1306
2455
  METADATA_FIELDS_KEY: {
1307
2456
  "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
1308
- "child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"}
2457
+ "child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"},
2458
+ SAMPLE_TYPE_KEY: {
2459
+ ALLOWED_KEY: ["stool"],
2460
+ DEFAULT_KEY: "stool",
2461
+ TYPE_KEY: "string"
2462
+ },
2463
+ QIITA_SAMPLE_TYPE: {
2464
+ ALLOWED_KEY: ["stool"],
2465
+ DEFAULT_KEY: "stool",
2466
+ TYPE_KEY: "string"
2467
+ }
1309
2468
  }
1310
2469
  },
1311
- "saliva": {
1312
- ALIAS_KEY: "oral"
2470
+ "fe": {
2471
+ METADATA_FIELDS_KEY: {
2472
+ "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
2473
+ "child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"},
2474
+ SAMPLE_TYPE_KEY: {
2475
+ ALLOWED_KEY: ["stool"],
2476
+ DEFAULT_KEY: "stool",
2477
+ TYPE_KEY: "string"
2478
+ },
2479
+ QIITA_SAMPLE_TYPE: {
2480
+ ALLOWED_KEY: ["stool"],
2481
+ DEFAULT_KEY: "stool",
2482
+ TYPE_KEY: "string"
2483
+ }
2484
+ }
1313
2485
  },
1314
2486
  "blood": {
1315
2487
  METADATA_FIELDS_KEY: {
1316
- "blood_field": {TYPE_KEY: "string"}
2488
+ "blood_field": {TYPE_KEY: "string"},
2489
+ SAMPLE_TYPE_KEY: {
2490
+ ALLOWED_KEY: ["blood"],
2491
+ DEFAULT_KEY: "blood",
2492
+ TYPE_KEY: "string"
2493
+ },
2494
+ QIITA_SAMPLE_TYPE: {
2495
+ ALLOWED_KEY: ["blood"],
2496
+ DEFAULT_KEY: "blood",
2497
+ TYPE_KEY: "string"
2498
+ }
1317
2499
  }
1318
2500
  }
1319
2501
  }
@@ -1509,7 +2691,7 @@ class TestMetadataConfigurator(TestCase):
1509
2691
  }
1510
2692
  }
1511
2693
  }
1512
-
2694
+
1513
2695
  expected = {
1514
2696
  "field1": {
1515
2697
  "allowed": ["value2"],
@@ -1523,7 +2705,7 @@ class TestMetadataConfigurator(TestCase):
1523
2705
  "allowed": ["valueX"]
1524
2706
  }
1525
2707
  }
1526
-
2708
+
1527
2709
  result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
1528
2710
  self.assertDictEqual(expected, result)
1529
2711
 
@@ -1676,8 +2858,8 @@ class TestMetadataConfigurator(TestCase):
1676
2858
  "location": {TYPE_KEY: "string", DEFAULT_KEY: "UCSD"}
1677
2859
  }
1678
2860
  },
1679
- "saliva": {
1680
- ALIAS_KEY: "oral"
2861
+ "fe": {
2862
+ ALIAS_KEY: "stool"
1681
2863
  }
1682
2864
  }
1683
2865
  }
@@ -1737,8 +2919,8 @@ class TestMetadataConfigurator(TestCase):
1737
2919
  }
1738
2920
  },
1739
2921
  # Preserved from base
1740
- "saliva": {
1741
- ALIAS_KEY: "oral"
2922
+ "fe": {
2923
+ ALIAS_KEY: "stool"
1742
2924
  },
1743
2925
  # New from add
1744
2926
  "blood": {
@@ -1847,7 +3029,7 @@ class TestMetadataConfigurator(TestCase):
1847
3029
  }
1848
3030
  }
1849
3031
  }
1850
-
3032
+
1851
3033
  expected = {
1852
3034
  "sample_type1": {
1853
3035
  "alias": "sample_type2"
@@ -1878,7 +3060,7 @@ class TestMetadataConfigurator(TestCase):
1878
3060
  }
1879
3061
  }
1880
3062
  }
1881
-
3063
+
1882
3064
  result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
1883
3065
  self.assertDictEqual(expected, result)
1884
3066
 
@@ -2150,175 +3332,461 @@ class TestMetadataConfigurator(TestCase):
2150
3332
  with self.assertRaisesRegex(ValueError, "Sample type 'test_sample' has neither 'alias' nor 'metadata_fields' keys"):
2151
3333
  _id_sample_type_definition("test_sample", sample_dict)
2152
3334
 
2153
- # Tests for build_full_flat_config_dict
2154
-
2155
- def test_build_full_flat_config_dict_no_inputs(self):
2156
- """Test build_full_flat_config_dict with no arguments uses all defaults."""
2157
- result = build_full_flat_config_dict()
2158
-
2159
- # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2160
- self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2161
- hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2162
- self.assertIsInstance(hosts_dict, dict)
3335
+ # Tests for _resolve_sample_type_aliases_and_bases
2163
3336
 
2164
- # Should have "base" host type with sample_name metadata field
2165
- self.assertIn("base", hosts_dict)
2166
- base_host = hosts_dict["base"]
2167
- self.assertIn(METADATA_FIELDS_KEY, base_host)
2168
- self.assertIn("sample_name", base_host[METADATA_FIELDS_KEY])
3337
+ def test__resolve_sample_type_aliases_and_bases_simple(self):
3338
+ """Test basic resolution with no aliases or bases.
2169
3339
 
2170
- # Should have "human" host type with host_common_name defaulting to "human"
2171
- self.assertIn("human", hosts_dict)
2172
- human_host = hosts_dict["human"]
2173
- self.assertIn(METADATA_FIELDS_KEY, human_host)
2174
- self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2175
- self.assertEqual(
2176
- "human",
2177
- human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
3340
+ Input: Single sample type with metadata fields, empty host metadata.
3341
+ Expected: Sample type has its metadata fields plus sample_type and qiita_sample_type added.
3342
+ """
3343
+ sample_types_dict = {
3344
+ "stool": {
3345
+ METADATA_FIELDS_KEY: {
3346
+ "body_site": {
3347
+ DEFAULT_KEY: "gut",
3348
+ TYPE_KEY: "string"
3349
+ }
3350
+ }
3351
+ }
3352
+ }
3353
+ host_metadata_fields_dict = {}
2178
3354
 
2179
- # Should have default software config keys with expected default value
2180
- self.assertIn(DEFAULT_KEY, result)
2181
- self.assertEqual("not applicable", result[DEFAULT_KEY])
3355
+ result = _resolve_sample_type_aliases_and_bases(
3356
+ sample_types_dict, host_metadata_fields_dict)
2182
3357
 
2183
- def test_build_full_flat_config_dict_with_study_config(self):
2184
- """Test build_full_flat_config_dict with study config merges correctly."""
2185
- software_config = {
2186
- DEFAULT_KEY: "software_default",
2187
- LEAVE_REQUIREDS_BLANK_KEY: True,
2188
- OVERWRITE_NON_NANS_KEY: False
2189
- }
2190
- study_config = {
2191
- STUDY_SPECIFIC_METADATA_KEY: {
2192
- HOST_TYPE_SPECIFIC_METADATA_KEY: {
2193
- "human": {
2194
- METADATA_FIELDS_KEY: {
2195
- "custom_field": {
2196
- DEFAULT_KEY: "custom_value",
2197
- TYPE_KEY: "string"
2198
- }
2199
- },
2200
- SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2201
- "stool": {
2202
- METADATA_FIELDS_KEY: {}
2203
- }
2204
- }
3358
+ expected = {
3359
+ "stool": {
3360
+ METADATA_FIELDS_KEY: {
3361
+ "body_site": {
3362
+ DEFAULT_KEY: "gut",
3363
+ TYPE_KEY: "string"
3364
+ },
3365
+ # sample_type field added by resolution
3366
+ SAMPLE_TYPE_KEY: {
3367
+ ALLOWED_KEY: ["stool"],
3368
+ DEFAULT_KEY: "stool",
3369
+ TYPE_KEY: "string"
3370
+ },
3371
+ # qiita_sample_type field added by resolution (same as sample_type)
3372
+ QIITA_SAMPLE_TYPE: {
3373
+ ALLOWED_KEY: ["stool"],
3374
+ DEFAULT_KEY: "stool",
3375
+ TYPE_KEY: "string"
2205
3376
  }
2206
3377
  }
2207
3378
  }
2208
3379
  }
3380
+ self.assertDictEqual(expected, result)
2209
3381
 
2210
- result = build_full_flat_config_dict(
2211
- study_config, software_config, self.TEST_STDS_FP)
3382
+ def test__resolve_sample_type_aliases_and_bases_with_alias(self):
3383
+ """Test that alias is resolved to target sample type's metadata.
2212
3384
 
2213
- # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2214
- self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2215
- hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2216
- self.assertIsInstance(hosts_dict, dict)
3385
+ Input: 'feces' is alias to 'stool', 'stool' has metadata.
3386
+ Expected: Both 'feces' and 'stool' are resolved with same metadata,
3387
+ but sample_type field uses the alias target name ('stool').
3388
+ """
3389
+ sample_types_dict = {
3390
+ "feces": {
3391
+ ALIAS_KEY: "stool"
3392
+ },
3393
+ "stool": {
3394
+ METADATA_FIELDS_KEY: {
3395
+ "stool_field": {
3396
+ DEFAULT_KEY: "stool_value",
3397
+ TYPE_KEY: "string"
3398
+ }
3399
+ }
3400
+ }
3401
+ }
3402
+ host_metadata_fields_dict = {}
2217
3403
 
2218
- # Should have "human" host type with host_common_name defaulting to "human"
2219
- self.assertIn("human", hosts_dict)
2220
- human_host = hosts_dict["human"]
2221
- self.assertIn(METADATA_FIELDS_KEY, human_host)
2222
- self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2223
- self.assertEqual(
2224
- "human",
2225
- human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
3404
+ result = _resolve_sample_type_aliases_and_bases(
3405
+ sample_types_dict, host_metadata_fields_dict)
2226
3406
 
2227
- # Should have custom_field from study config
2228
- self.assertIn("custom_field", human_host[METADATA_FIELDS_KEY])
2229
- self.assertEqual(
2230
- "custom_value",
2231
- human_host[METADATA_FIELDS_KEY]["custom_field"][DEFAULT_KEY])
3407
+ # Both entries resolve to same metadata, sample_type uses alias target name
3408
+ stool_resolved_metadata = {
3409
+ "stool_field": {
3410
+ DEFAULT_KEY: "stool_value",
3411
+ TYPE_KEY: "string"
3412
+ },
3413
+ SAMPLE_TYPE_KEY: {
3414
+ ALLOWED_KEY: ["stool"],
3415
+ DEFAULT_KEY: "stool",
3416
+ TYPE_KEY: "string"
3417
+ },
3418
+ QIITA_SAMPLE_TYPE: {
3419
+ ALLOWED_KEY: ["stool"],
3420
+ DEFAULT_KEY: "stool",
3421
+ TYPE_KEY: "string"
3422
+ }
3423
+ }
3424
+ expected = {
3425
+ # Alias entry resolves to same metadata as target (sample_type="stool")
3426
+ "feces": {
3427
+ METADATA_FIELDS_KEY: stool_resolved_metadata
3428
+ },
3429
+ # Target sample type is fully resolved
3430
+ "stool": {
3431
+ METADATA_FIELDS_KEY: stool_resolved_metadata
3432
+ }
3433
+ }
3434
+ self.assertDictEqual(expected, result)
2232
3435
 
2233
- # Should have software config default value
2234
- self.assertIn(DEFAULT_KEY, result)
2235
- self.assertEqual("software_default", result[DEFAULT_KEY])
3436
+ def test__resolve_sample_type_aliases_and_bases_chained_alias_raises(self):
3437
+ """Test that chained aliases raise ValueError.
2236
3438
 
2237
- def test_build_full_flat_config_dict_without_study_config(self):
2238
- """Test build_full_flat_config_dict with no study config uses standards only."""
2239
- software_config = {
2240
- DEFAULT_KEY: "software_default",
2241
- LEAVE_REQUIREDS_BLANK_KEY: True,
2242
- OVERWRITE_NON_NANS_KEY: False
3439
+ Input: 'feces' aliases to 'stool', 'stool' aliases to 'poop'.
3440
+ Expected: ValueError because chained aliases are not allowed.
3441
+ """
3442
+ sample_types_dict = {
3443
+ "feces": {
3444
+ ALIAS_KEY: "stool"
3445
+ },
3446
+ "stool": {
3447
+ ALIAS_KEY: "poop"
3448
+ },
3449
+ "poop": {
3450
+ METADATA_FIELDS_KEY: {}
3451
+ }
2243
3452
  }
3453
+ host_metadata_fields_dict = {}
2244
3454
 
2245
- result = build_full_flat_config_dict(
2246
- None, software_config, self.TEST_STDS_FP)
3455
+ with self.assertRaisesRegex(ValueError, "May not chain aliases"):
3456
+ _resolve_sample_type_aliases_and_bases(
3457
+ sample_types_dict, host_metadata_fields_dict)
2247
3458
 
2248
- # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2249
- self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2250
- hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2251
- self.assertIsInstance(hosts_dict, dict)
3459
+ def test__resolve_sample_type_aliases_and_bases_with_base_type(self):
3460
+ """Test that base type fields are inherited and overlaid.
2252
3461
 
2253
- # Should have "human" host type with host_common_name defaulting to "human"
2254
- self.assertIn("human", hosts_dict)
2255
- human_host = hosts_dict["human"]
2256
- self.assertIn(METADATA_FIELDS_KEY, human_host)
2257
- self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2258
- self.assertEqual(
2259
- "human",
2260
- human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
3462
+ Input: 'derived_sample' has base_type 'base_sample'.
3463
+ Expected: 'derived_sample' inherits base fields, adds own, base_type key removed.
3464
+ """
3465
+ sample_types_dict = {
3466
+ "base_sample": {
3467
+ METADATA_FIELDS_KEY: {
3468
+ "base_field": {
3469
+ DEFAULT_KEY: "base_value",
3470
+ TYPE_KEY: "string"
3471
+ }
3472
+ }
3473
+ },
3474
+ "derived_sample": {
3475
+ BASE_TYPE_KEY: "base_sample",
3476
+ METADATA_FIELDS_KEY: {
3477
+ "derived_field": {
3478
+ DEFAULT_KEY: "derived_value",
3479
+ TYPE_KEY: "string"
3480
+ }
3481
+ }
3482
+ }
3483
+ }
3484
+ host_metadata_fields_dict = {}
2261
3485
 
2262
- # Should preserve software config settings
2263
- self.assertEqual("software_default", result[DEFAULT_KEY])
3486
+ result = _resolve_sample_type_aliases_and_bases(
3487
+ sample_types_dict, host_metadata_fields_dict)
2264
3488
 
2265
- def test_build_full_flat_config_dict_merges_software_and_study(self):
2266
- """Test that study config values override software config values."""
2267
- software_config = {
2268
- DEFAULT_KEY: "software_default",
2269
- LEAVE_REQUIREDS_BLANK_KEY: False,
2270
- OVERWRITE_NON_NANS_KEY: True
2271
- }
2272
- study_config = {
2273
- DEFAULT_KEY: "study_default",
2274
- LEAVE_REQUIREDS_BLANK_KEY: True,
2275
- STUDY_SPECIFIC_METADATA_KEY: {
2276
- HOST_TYPE_SPECIFIC_METADATA_KEY: {
2277
- "human": {
2278
- METADATA_FIELDS_KEY: {},
2279
- SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2280
- "stool": {
2281
- METADATA_FIELDS_KEY: {}
2282
- }
2283
- }
3489
+ expected = {
3490
+ # Base sample type is fully resolved
3491
+ "base_sample": {
3492
+ METADATA_FIELDS_KEY: {
3493
+ "base_field": {
3494
+ DEFAULT_KEY: "base_value",
3495
+ TYPE_KEY: "string"
3496
+ },
3497
+ SAMPLE_TYPE_KEY: {
3498
+ ALLOWED_KEY: ["base_sample"],
3499
+ DEFAULT_KEY: "base_sample",
3500
+ TYPE_KEY: "string"
3501
+ },
3502
+ QIITA_SAMPLE_TYPE: {
3503
+ ALLOWED_KEY: ["base_sample"],
3504
+ DEFAULT_KEY: "base_sample",
3505
+ TYPE_KEY: "string"
3506
+ }
3507
+ }
3508
+ },
3509
+ # Derived sample type inherits base fields, base_type key removed
3510
+ "derived_sample": {
3511
+ METADATA_FIELDS_KEY: {
3512
+ # Inherited from base
3513
+ "base_field": {
3514
+ DEFAULT_KEY: "base_value",
3515
+ TYPE_KEY: "string"
3516
+ },
3517
+ # Own field
3518
+ "derived_field": {
3519
+ DEFAULT_KEY: "derived_value",
3520
+ TYPE_KEY: "string"
3521
+ },
3522
+ SAMPLE_TYPE_KEY: {
3523
+ ALLOWED_KEY: ["derived_sample"],
3524
+ DEFAULT_KEY: "derived_sample",
3525
+ TYPE_KEY: "string"
3526
+ },
3527
+ QIITA_SAMPLE_TYPE: {
3528
+ ALLOWED_KEY: ["derived_sample"],
3529
+ DEFAULT_KEY: "derived_sample",
3530
+ TYPE_KEY: "string"
2284
3531
  }
2285
3532
  }
2286
3533
  }
2287
3534
  }
3535
+ self.assertDictEqual(expected, result)
2288
3536
 
2289
- result = build_full_flat_config_dict(
2290
- study_config, software_config, self.TEST_STDS_FP)
3537
+ def test__resolve_sample_type_aliases_and_bases_base_type_invalid_raises(self):
3538
+ """Test that base type with non-metadata-fields keys raises ValueError.
2291
3539
 
2292
- # Study config should override software config
2293
- self.assertEqual("study_default", result[DEFAULT_KEY])
2294
- self.assertTrue(result[LEAVE_REQUIREDS_BLANK_KEY])
2295
- # Software config value should be preserved when not overridden
2296
- self.assertTrue(result[OVERWRITE_NON_NANS_KEY])
3540
+ Input: Base sample type has extra keys beyond metadata_fields.
3541
+ Expected: ValueError because base must only have metadata_fields.
3542
+ """
3543
+ sample_types_dict = {
3544
+ "base_sample": {
3545
+ METADATA_FIELDS_KEY: {
3546
+ "base_field": {DEFAULT_KEY: "value", TYPE_KEY: "string"}
3547
+ },
3548
+ "extra_key": "not_allowed"
3549
+ },
3550
+ "derived_sample": {
3551
+ BASE_TYPE_KEY: "base_sample",
3552
+ METADATA_FIELDS_KEY: {}
3553
+ }
3554
+ }
3555
+ host_metadata_fields_dict = {}
2297
3556
 
2298
- def test_build_full_flat_config_dict_none_software_config(self):
2299
- """Test that None software_config loads defaults from config.yml."""
2300
- study_config = {
2301
- STUDY_SPECIFIC_METADATA_KEY: {
2302
- HOST_TYPE_SPECIFIC_METADATA_KEY: {
2303
- "human": {
2304
- METADATA_FIELDS_KEY: {},
2305
- SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2306
- "stool": {
2307
- METADATA_FIELDS_KEY: {}
2308
- }
2309
- }
3557
+ with self.assertRaisesRegex(ValueError, "must only have metadata fields"):
3558
+ _resolve_sample_type_aliases_and_bases(
3559
+ sample_types_dict, host_metadata_fields_dict)
3560
+
3561
+ def test__resolve_sample_type_aliases_and_bases_sets_sample_type(self):
3562
+ """Test that sample_type field is added with correct allowed/default.
3563
+
3564
+ Input: Sample type without sample_type field.
3565
+ Expected: sample_type field added with allowed=[sample_type_name], default=sample_type_name.
3566
+ """
3567
+ sample_types_dict = {
3568
+ "blood": {
3569
+ METADATA_FIELDS_KEY: {
3570
+ "body_site": {
3571
+ DEFAULT_KEY: "blood",
3572
+ TYPE_KEY: "string"
2310
3573
  }
2311
3574
  }
2312
3575
  }
2313
3576
  }
3577
+ host_metadata_fields_dict = {}
2314
3578
 
2315
- result = build_full_flat_config_dict(
2316
- study_config, None, self.TEST_STDS_FP)
3579
+ result = _resolve_sample_type_aliases_and_bases(
3580
+ sample_types_dict, host_metadata_fields_dict)
2317
3581
 
2318
- # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2319
- self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2320
- hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2321
- self.assertIsInstance(hosts_dict, dict)
3582
+ expected = {
3583
+ "blood": {
3584
+ METADATA_FIELDS_KEY: {
3585
+ "body_site": {
3586
+ DEFAULT_KEY: "blood",
3587
+ TYPE_KEY: "string"
3588
+ },
3589
+ SAMPLE_TYPE_KEY: {
3590
+ ALLOWED_KEY: ["blood"],
3591
+ DEFAULT_KEY: "blood",
3592
+ TYPE_KEY: "string"
3593
+ },
3594
+ QIITA_SAMPLE_TYPE: {
3595
+ ALLOWED_KEY: ["blood"],
3596
+ DEFAULT_KEY: "blood",
3597
+ TYPE_KEY: "string"
3598
+ }
3599
+ }
3600
+ }
3601
+ }
3602
+ self.assertDictEqual(expected, result)
3603
+
3604
+ def test__resolve_sample_type_aliases_and_bases_preserves_existing_qiita_sample_type(self):
3605
+ """Test that existing qiita_sample_type is not overwritten.
3606
+
3607
+ Input: Sample type already has qiita_sample_type defined with very different value.
3608
+ Expected: Existing qiita_sample_type preserved exactly, sample_type still added.
3609
+ """
3610
+ sample_types_dict = {
3611
+ "stool": {
3612
+ METADATA_FIELDS_KEY: {
3613
+ "body_site": {
3614
+ DEFAULT_KEY: "gut",
3615
+ TYPE_KEY: "string"
3616
+ },
3617
+ # Pre-existing qiita_sample_type with VERY different value
3618
+ # to make it clear it's preserved, not overwritten
3619
+ QIITA_SAMPLE_TYPE: {
3620
+ ALLOWED_KEY: ["CUSTOM_QIITA_VALUE_12345"],
3621
+ DEFAULT_KEY: "CUSTOM_QIITA_VALUE_12345",
3622
+ TYPE_KEY: "string"
3623
+ }
3624
+ }
3625
+ }
3626
+ }
3627
+ host_metadata_fields_dict = {}
3628
+
3629
+ result = _resolve_sample_type_aliases_and_bases(
3630
+ sample_types_dict, host_metadata_fields_dict)
3631
+
3632
+ expected = {
3633
+ "stool": {
3634
+ METADATA_FIELDS_KEY: {
3635
+ "body_site": {
3636
+ DEFAULT_KEY: "gut",
3637
+ TYPE_KEY: "string"
3638
+ },
3639
+ # sample_type added (would be "stool")
3640
+ SAMPLE_TYPE_KEY: {
3641
+ ALLOWED_KEY: ["stool"],
3642
+ DEFAULT_KEY: "stool",
3643
+ TYPE_KEY: "string"
3644
+ },
3645
+ # Pre-existing qiita_sample_type preserved exactly (NOT "stool")
3646
+ QIITA_SAMPLE_TYPE: {
3647
+ ALLOWED_KEY: ["CUSTOM_QIITA_VALUE_12345"],
3648
+ DEFAULT_KEY: "CUSTOM_QIITA_VALUE_12345",
3649
+ TYPE_KEY: "string"
3650
+ }
3651
+ }
3652
+ }
3653
+ }
3654
+ self.assertDictEqual(expected, result)
3655
+
3656
+ def test__resolve_sample_type_aliases_and_bases_merges_with_host_metadata(self):
3657
+ """Test that host-level metadata fields are merged with sample-type fields.
3658
+
3659
+ Input: Host has host_common_name field, sample type has body_site field.
3660
+ Expected: Resolved sample type has both fields merged.
3661
+ """
3662
+ sample_types_dict = {
3663
+ "stool": {
3664
+ METADATA_FIELDS_KEY: {
3665
+ "body_site": {
3666
+ DEFAULT_KEY: "gut",
3667
+ TYPE_KEY: "string"
3668
+ }
3669
+ }
3670
+ }
3671
+ }
3672
+ host_metadata_fields_dict = {
3673
+ "host_common_name": {
3674
+ DEFAULT_KEY: "human",
3675
+ TYPE_KEY: "string"
3676
+ }
3677
+ }
3678
+
3679
+ result = _resolve_sample_type_aliases_and_bases(
3680
+ sample_types_dict, host_metadata_fields_dict)
3681
+
3682
+ expected = {
3683
+ "stool": {
3684
+ METADATA_FIELDS_KEY: {
3685
+ # Host-level field merged in
3686
+ "host_common_name": {
3687
+ DEFAULT_KEY: "human",
3688
+ TYPE_KEY: "string"
3689
+ },
3690
+ # Sample-type field
3691
+ "body_site": {
3692
+ DEFAULT_KEY: "gut",
3693
+ TYPE_KEY: "string"
3694
+ },
3695
+ SAMPLE_TYPE_KEY: {
3696
+ ALLOWED_KEY: ["stool"],
3697
+ DEFAULT_KEY: "stool",
3698
+ TYPE_KEY: "string"
3699
+ },
3700
+ QIITA_SAMPLE_TYPE: {
3701
+ ALLOWED_KEY: ["stool"],
3702
+ DEFAULT_KEY: "stool",
3703
+ TYPE_KEY: "string"
3704
+ }
3705
+ }
3706
+ }
3707
+ }
3708
+ self.assertDictEqual(expected, result)
3709
+
3710
+ def test__resolve_sample_type_aliases_and_bases_sample_overrides_host(self):
3711
+ """Test that sample-level field overrides host-level field with same name.
3712
+
3713
+ Input: Host has description="host description", sample type also has description="sample description".
3714
+ Expected: Sample-level description value wins.
3715
+ """
3716
+ sample_types_dict = {
3717
+ "stool": {
3718
+ METADATA_FIELDS_KEY: {
3719
+ # Sample-level description should override host-level
3720
+ "description": {
3721
+ DEFAULT_KEY: "sample-level description value",
3722
+ TYPE_KEY: "string"
3723
+ }
3724
+ }
3725
+ }
3726
+ }
3727
+ host_metadata_fields_dict = {
3728
+ # Host-level description should be overridden
3729
+ "description": {
3730
+ DEFAULT_KEY: "host-level description value",
3731
+ TYPE_KEY: "string"
3732
+ },
3733
+ "host_common_name": {
3734
+ DEFAULT_KEY: "human",
3735
+ TYPE_KEY: "string"
3736
+ }
3737
+ }
3738
+
3739
+ result = _resolve_sample_type_aliases_and_bases(
3740
+ sample_types_dict, host_metadata_fields_dict)
3741
+
3742
+ expected = {
3743
+ "stool": {
3744
+ METADATA_FIELDS_KEY: {
3745
+ # Host-level field that wasn't overridden
3746
+ "host_common_name": {
3747
+ DEFAULT_KEY: "human",
3748
+ TYPE_KEY: "string"
3749
+ },
3750
+ # Description: sample-level value wins over host-level
3751
+ "description": {
3752
+ DEFAULT_KEY: "sample-level description value",
3753
+ TYPE_KEY: "string"
3754
+ },
3755
+ SAMPLE_TYPE_KEY: {
3756
+ ALLOWED_KEY: ["stool"],
3757
+ DEFAULT_KEY: "stool",
3758
+ TYPE_KEY: "string"
3759
+ },
3760
+ QIITA_SAMPLE_TYPE: {
3761
+ ALLOWED_KEY: ["stool"],
3762
+ DEFAULT_KEY: "stool",
3763
+ TYPE_KEY: "string"
3764
+ }
3765
+ }
3766
+ }
3767
+ }
3768
+ self.assertDictEqual(expected, result)
3769
+
3770
+ # Tests for build_full_flat_config_dict
3771
+
3772
+ def test_build_full_flat_config_dict_no_inputs(self):
3773
+ """Test build_full_flat_config_dict with no arguments uses all defaults."""
3774
+ result = build_full_flat_config_dict()
3775
+
3776
+ # These tests are less specific because they depend on the actual contents
3777
+ # of the default standards file, which may change over time, so
3778
+ # we just verify the presence of key structures rather than exact contents.
3779
+
3780
+ # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
3781
+ self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
3782
+ hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
3783
+ self.assertIsInstance(hosts_dict, dict)
3784
+
3785
+ # Should have "base" host type with sample_name metadata field
3786
+ self.assertIn("base", hosts_dict)
3787
+ base_host = hosts_dict["base"]
3788
+ self.assertIn(METADATA_FIELDS_KEY, base_host)
3789
+ self.assertIn("sample_name", base_host[METADATA_FIELDS_KEY])
2322
3790
 
2323
3791
  # Should have "human" host type with host_common_name defaulting to "human"
2324
3792
  self.assertIn("human", hosts_dict)
@@ -2329,6 +3797,1071 @@ class TestMetadataConfigurator(TestCase):
2329
3797
  "human",
2330
3798
  human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
2331
3799
 
2332
- # Should have loaded default software config (which includes DEFAULT_KEY)
3800
+ # Should have default software config keys with expected default value
2333
3801
  self.assertIn(DEFAULT_KEY, result)
2334
3802
  self.assertEqual("not applicable", result[DEFAULT_KEY])
3803
+
3804
+ def test_build_full_flat_config_dict_with_study_config(self):
3805
+ """Test build_full_flat_config_dict with study config merges correctly.
3806
+
3807
+ test_standards.yml structure: base -> host_associated -> human/mouse
3808
+ This tests that:
3809
+ 1. Fields are inherited through the nesting hierarchy
3810
+ 2. Study-specific fields are merged into the flattened output
3811
+ """
3812
+ software_config = {
3813
+ DEFAULT_KEY: "software_default",
3814
+ LEAVE_REQUIREDS_BLANK_KEY: True,
3815
+ OVERWRITE_NON_NANS_KEY: False
3816
+ }
3817
+ study_config = {
3818
+ STUDY_SPECIFIC_METADATA_KEY: {
3819
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
3820
+ "human": {
3821
+ METADATA_FIELDS_KEY: {
3822
+ "custom_field": {
3823
+ DEFAULT_KEY: "custom_value",
3824
+ TYPE_KEY: "string"
3825
+ }
3826
+ },
3827
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
3828
+ "stool": {
3829
+ METADATA_FIELDS_KEY: {}
3830
+ }
3831
+ }
3832
+ }
3833
+ }
3834
+ }
3835
+ }
3836
+
3837
+ result = build_full_flat_config_dict(
3838
+ study_config, software_config, self.TEST_STDS_FP)
3839
+
3840
+ expected = {
3841
+ # Top-level keys from software_config
3842
+ DEFAULT_KEY: "software_default",
3843
+ LEAVE_REQUIREDS_BLANK_KEY: True,
3844
+ OVERWRITE_NON_NANS_KEY: False,
3845
+ # Flattened host types from standards + study
3846
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
3847
+ # base: top level in test_standards.yml, no default
3848
+ "base": {
3849
+ METADATA_FIELDS_KEY: {
3850
+ # sample_name defined at base level
3851
+ "sample_name": {
3852
+ REQUIRED_KEY: True,
3853
+ TYPE_KEY: "string",
3854
+ "unique": True
3855
+ },
3856
+ # sample_type defined at base level
3857
+ "sample_type": {
3858
+ REQUIRED_KEY: True,
3859
+ TYPE_KEY: "string"
3860
+ }
3861
+ }
3862
+ },
3863
+ # host_associated: nested under base, inherits sample_name/sample_type
3864
+ "host_associated": {
3865
+ # default defined at host_associated level
3866
+ DEFAULT_KEY: "not provided",
3867
+ METADATA_FIELDS_KEY: {
3868
+ # description defined at host_associated level
3869
+ "description": {
3870
+ DEFAULT_KEY: "host associated sample",
3871
+ TYPE_KEY: "string"
3872
+ },
3873
+ # sample_name inherited from base
3874
+ "sample_name": {
3875
+ REQUIRED_KEY: True,
3876
+ TYPE_KEY: "string",
3877
+ "unique": True
3878
+ },
3879
+ # sample_type inherited from base
3880
+ "sample_type": {
3881
+ REQUIRED_KEY: True,
3882
+ TYPE_KEY: "string"
3883
+ }
3884
+ },
3885
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
3886
+ # stool defined at host_associated level
3887
+ "stool": {
3888
+ METADATA_FIELDS_KEY: {
3889
+ "body_site": {
3890
+ DEFAULT_KEY: "gut",
3891
+ TYPE_KEY: "string"
3892
+ },
3893
+ "description": {
3894
+ DEFAULT_KEY: "host associated sample",
3895
+ TYPE_KEY: "string"
3896
+ },
3897
+ QIITA_SAMPLE_TYPE: {
3898
+ ALLOWED_KEY: ["stool"],
3899
+ DEFAULT_KEY: "stool",
3900
+ TYPE_KEY: "string"
3901
+ },
3902
+ "sample_name": {
3903
+ REQUIRED_KEY: True,
3904
+ TYPE_KEY: "string",
3905
+ "unique": True
3906
+ },
3907
+ SAMPLE_TYPE_KEY: {
3908
+ ALLOWED_KEY: ["stool"],
3909
+ DEFAULT_KEY: "stool",
3910
+ REQUIRED_KEY: True,
3911
+ TYPE_KEY: "string"
3912
+ }
3913
+ }
3914
+ }
3915
+ }
3916
+ },
3917
+ # human: nested under host_associated
3918
+ "human": {
3919
+ # default inherited from host_associated
3920
+ DEFAULT_KEY: "not provided",
3921
+ METADATA_FIELDS_KEY: {
3922
+ # custom_field added from study_specific_metadata
3923
+ "custom_field": {
3924
+ DEFAULT_KEY: "custom_value",
3925
+ TYPE_KEY: "string"
3926
+ },
3927
+ # description overrides host_associated value at human level
3928
+ "description": {
3929
+ DEFAULT_KEY: "human sample",
3930
+ TYPE_KEY: "string"
3931
+ },
3932
+ # host_common_name defined at human level
3933
+ "host_common_name": {
3934
+ DEFAULT_KEY: "human",
3935
+ TYPE_KEY: "string"
3936
+ },
3937
+ # sample_name inherited from base -> host_associated -> human
3938
+ "sample_name": {
3939
+ REQUIRED_KEY: True,
3940
+ TYPE_KEY: "string",
3941
+ "unique": True
3942
+ },
3943
+ # sample_type inherited from base -> host_associated -> human
3944
+ "sample_type": {
3945
+ REQUIRED_KEY: True,
3946
+ TYPE_KEY: "string"
3947
+ }
3948
+ },
3949
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
3950
+ # blood defined only at human level
3951
+ "blood": {
3952
+ METADATA_FIELDS_KEY: {
3953
+ "body_product": {
3954
+ DEFAULT_KEY: "UBERON:blood",
3955
+ TYPE_KEY: "string"
3956
+ },
3957
+ "body_site": {
3958
+ DEFAULT_KEY: "blood",
3959
+ TYPE_KEY: "string"
3960
+ },
3961
+ "custom_field": {
3962
+ DEFAULT_KEY: "custom_value",
3963
+ TYPE_KEY: "string"
3964
+ },
3965
+ "description": {
3966
+ DEFAULT_KEY: "human sample",
3967
+ TYPE_KEY: "string"
3968
+ },
3969
+ "host_common_name": {
3970
+ DEFAULT_KEY: "human",
3971
+ TYPE_KEY: "string"
3972
+ },
3973
+ QIITA_SAMPLE_TYPE: {
3974
+ ALLOWED_KEY: ["blood"],
3975
+ DEFAULT_KEY: "blood",
3976
+ TYPE_KEY: "string"
3977
+ },
3978
+ "sample_name": {
3979
+ REQUIRED_KEY: True,
3980
+ TYPE_KEY: "string",
3981
+ "unique": True
3982
+ },
3983
+ SAMPLE_TYPE_KEY: {
3984
+ ALLOWED_KEY: ["blood"],
3985
+ DEFAULT_KEY: "blood",
3986
+ REQUIRED_KEY: True,
3987
+ TYPE_KEY: "string"
3988
+ }
3989
+ }
3990
+ },
3991
+ # stool: body_site inherited from host_associated,
3992
+ # body_product added at human level
3993
+ "stool": {
3994
+ METADATA_FIELDS_KEY: {
3995
+ "body_product": {
3996
+ DEFAULT_KEY: "UBERON:feces",
3997
+ TYPE_KEY: "string"
3998
+ },
3999
+ "body_site": {
4000
+ DEFAULT_KEY: "gut",
4001
+ TYPE_KEY: "string"
4002
+ },
4003
+ "custom_field": {
4004
+ DEFAULT_KEY: "custom_value",
4005
+ TYPE_KEY: "string"
4006
+ },
4007
+ "description": {
4008
+ DEFAULT_KEY: "human sample",
4009
+ TYPE_KEY: "string"
4010
+ },
4011
+ "host_common_name": {
4012
+ DEFAULT_KEY: "human",
4013
+ TYPE_KEY: "string"
4014
+ },
4015
+ QIITA_SAMPLE_TYPE: {
4016
+ ALLOWED_KEY: ["stool"],
4017
+ DEFAULT_KEY: "stool",
4018
+ TYPE_KEY: "string"
4019
+ },
4020
+ "sample_name": {
4021
+ REQUIRED_KEY: True,
4022
+ TYPE_KEY: "string",
4023
+ "unique": True
4024
+ },
4025
+ SAMPLE_TYPE_KEY: {
4026
+ ALLOWED_KEY: ["stool"],
4027
+ DEFAULT_KEY: "stool",
4028
+ REQUIRED_KEY: True,
4029
+ TYPE_KEY: "string"
4030
+ }
4031
+ }
4032
+ }
4033
+ }
4034
+ },
4035
+ # mouse: nested under host_associated (not in study config)
4036
+ "mouse": {
4037
+ # default inherited from host_associated
4038
+ DEFAULT_KEY: "not provided",
4039
+ METADATA_FIELDS_KEY: {
4040
+ # description inherited from host_associated (not overridden)
4041
+ "description": {
4042
+ DEFAULT_KEY: "host associated sample",
4043
+ TYPE_KEY: "string"
4044
+ },
4045
+ # host_common_name defined at mouse level
4046
+ "host_common_name": {
4047
+ DEFAULT_KEY: "mouse",
4048
+ TYPE_KEY: "string"
4049
+ },
4050
+ # sample_name inherited from base -> host_associated -> mouse
4051
+ "sample_name": {
4052
+ REQUIRED_KEY: True,
4053
+ TYPE_KEY: "string",
4054
+ "unique": True
4055
+ },
4056
+ # sample_type inherited from base -> host_associated -> mouse
4057
+ "sample_type": {
4058
+ REQUIRED_KEY: True,
4059
+ TYPE_KEY: "string"
4060
+ }
4061
+ },
4062
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4063
+ # stool: body_site inherited from host_associated,
4064
+ # cage_id added at mouse level
4065
+ "stool": {
4066
+ METADATA_FIELDS_KEY: {
4067
+ "body_site": {
4068
+ DEFAULT_KEY: "gut",
4069
+ TYPE_KEY: "string"
4070
+ },
4071
+ "cage_id": {
4072
+ REQUIRED_KEY: False,
4073
+ TYPE_KEY: "string"
4074
+ },
4075
+ "description": {
4076
+ DEFAULT_KEY: "host associated sample",
4077
+ TYPE_KEY: "string"
4078
+ },
4079
+ "host_common_name": {
4080
+ DEFAULT_KEY: "mouse",
4081
+ TYPE_KEY: "string"
4082
+ },
4083
+ QIITA_SAMPLE_TYPE: {
4084
+ ALLOWED_KEY: ["stool"],
4085
+ DEFAULT_KEY: "stool",
4086
+ TYPE_KEY: "string"
4087
+ },
4088
+ "sample_name": {
4089
+ REQUIRED_KEY: True,
4090
+ TYPE_KEY: "string",
4091
+ "unique": True
4092
+ },
4093
+ SAMPLE_TYPE_KEY: {
4094
+ ALLOWED_KEY: ["stool"],
4095
+ DEFAULT_KEY: "stool",
4096
+ REQUIRED_KEY: True,
4097
+ TYPE_KEY: "string"
4098
+ }
4099
+ }
4100
+ }
4101
+ }
4102
+ }
4103
+ }
4104
+ }
4105
+ self.assertEqual(expected, result)
4106
+
4107
+ def test_build_full_flat_config_dict_without_study_config(self):
4108
+ """Test build_full_flat_config_dict with no study config uses standards only.
4109
+
4110
+ test_standards.yml structure: base -> host_associated -> human/mouse
4111
+ With no study config, output is pure flattened standards.
4112
+ """
4113
+ software_config = {
4114
+ DEFAULT_KEY: "software_default",
4115
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4116
+ OVERWRITE_NON_NANS_KEY: False
4117
+ }
4118
+
4119
+ result = build_full_flat_config_dict(
4120
+ None, software_config, self.TEST_STDS_FP)
4121
+
4122
+ expected = {
4123
+ # Top-level keys from software_config
4124
+ DEFAULT_KEY: "software_default",
4125
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4126
+ OVERWRITE_NON_NANS_KEY: False,
4127
+ # No STUDY_SPECIFIC_METADATA_KEY since no study config provided
4128
+ # Flattened host types from standards only
4129
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4130
+ # base: top level, no default, just sample_name/sample_type
4131
+ "base": {
4132
+ METADATA_FIELDS_KEY: {
4133
+ "sample_name": {
4134
+ REQUIRED_KEY: True,
4135
+ TYPE_KEY: "string",
4136
+ "unique": True
4137
+ },
4138
+ "sample_type": {
4139
+ REQUIRED_KEY: True,
4140
+ TYPE_KEY: "string"
4141
+ }
4142
+ }
4143
+ },
4144
+ # host_associated: inherits from base, adds default and description
4145
+ "host_associated": {
4146
+ DEFAULT_KEY: "not provided",
4147
+ METADATA_FIELDS_KEY: {
4148
+ "description": {
4149
+ DEFAULT_KEY: "host associated sample",
4150
+ TYPE_KEY: "string"
4151
+ },
4152
+ "sample_name": {
4153
+ REQUIRED_KEY: True,
4154
+ TYPE_KEY: "string",
4155
+ "unique": True
4156
+ },
4157
+ "sample_type": {
4158
+ REQUIRED_KEY: True,
4159
+ TYPE_KEY: "string"
4160
+ }
4161
+ },
4162
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4163
+ "stool": {
4164
+ METADATA_FIELDS_KEY: {
4165
+ "body_site": {
4166
+ DEFAULT_KEY: "gut",
4167
+ TYPE_KEY: "string"
4168
+ },
4169
+ "description": {
4170
+ DEFAULT_KEY: "host associated sample",
4171
+ TYPE_KEY: "string"
4172
+ },
4173
+ QIITA_SAMPLE_TYPE: {
4174
+ ALLOWED_KEY: ["stool"],
4175
+ DEFAULT_KEY: "stool",
4176
+ TYPE_KEY: "string"
4177
+ },
4178
+ "sample_name": {
4179
+ REQUIRED_KEY: True,
4180
+ TYPE_KEY: "string",
4181
+ "unique": True
4182
+ },
4183
+ SAMPLE_TYPE_KEY: {
4184
+ ALLOWED_KEY: ["stool"],
4185
+ DEFAULT_KEY: "stool",
4186
+ REQUIRED_KEY: True,
4187
+ TYPE_KEY: "string"
4188
+ }
4189
+ }
4190
+ }
4191
+ }
4192
+ },
4193
+ # human: inherits from host_associated, overrides description
4194
+ "human": {
4195
+ DEFAULT_KEY: "not provided",
4196
+ METADATA_FIELDS_KEY: {
4197
+ "description": {
4198
+ DEFAULT_KEY: "human sample",
4199
+ TYPE_KEY: "string"
4200
+ },
4201
+ "host_common_name": {
4202
+ DEFAULT_KEY: "human",
4203
+ TYPE_KEY: "string"
4204
+ },
4205
+ "sample_name": {
4206
+ REQUIRED_KEY: True,
4207
+ TYPE_KEY: "string",
4208
+ "unique": True
4209
+ },
4210
+ "sample_type": {
4211
+ REQUIRED_KEY: True,
4212
+ TYPE_KEY: "string"
4213
+ }
4214
+ },
4215
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4216
+ "blood": {
4217
+ METADATA_FIELDS_KEY: {
4218
+ "body_product": {
4219
+ DEFAULT_KEY: "UBERON:blood",
4220
+ TYPE_KEY: "string"
4221
+ },
4222
+ "body_site": {
4223
+ DEFAULT_KEY: "blood",
4224
+ TYPE_KEY: "string"
4225
+ },
4226
+ "description": {
4227
+ DEFAULT_KEY: "human sample",
4228
+ TYPE_KEY: "string"
4229
+ },
4230
+ "host_common_name": {
4231
+ DEFAULT_KEY: "human",
4232
+ TYPE_KEY: "string"
4233
+ },
4234
+ QIITA_SAMPLE_TYPE: {
4235
+ ALLOWED_KEY: ["blood"],
4236
+ DEFAULT_KEY: "blood",
4237
+ TYPE_KEY: "string"
4238
+ },
4239
+ "sample_name": {
4240
+ REQUIRED_KEY: True,
4241
+ TYPE_KEY: "string",
4242
+ "unique": True
4243
+ },
4244
+ SAMPLE_TYPE_KEY: {
4245
+ ALLOWED_KEY: ["blood"],
4246
+ DEFAULT_KEY: "blood",
4247
+ REQUIRED_KEY: True,
4248
+ TYPE_KEY: "string"
4249
+ }
4250
+ }
4251
+ },
4252
+ "stool": {
4253
+ METADATA_FIELDS_KEY: {
4254
+ "body_product": {
4255
+ DEFAULT_KEY: "UBERON:feces",
4256
+ TYPE_KEY: "string"
4257
+ },
4258
+ "body_site": {
4259
+ DEFAULT_KEY: "gut",
4260
+ TYPE_KEY: "string"
4261
+ },
4262
+ "description": {
4263
+ DEFAULT_KEY: "human sample",
4264
+ TYPE_KEY: "string"
4265
+ },
4266
+ "host_common_name": {
4267
+ DEFAULT_KEY: "human",
4268
+ TYPE_KEY: "string"
4269
+ },
4270
+ QIITA_SAMPLE_TYPE: {
4271
+ ALLOWED_KEY: ["stool"],
4272
+ DEFAULT_KEY: "stool",
4273
+ TYPE_KEY: "string"
4274
+ },
4275
+ "sample_name": {
4276
+ REQUIRED_KEY: True,
4277
+ TYPE_KEY: "string",
4278
+ "unique": True
4279
+ },
4280
+ SAMPLE_TYPE_KEY: {
4281
+ ALLOWED_KEY: ["stool"],
4282
+ DEFAULT_KEY: "stool",
4283
+ REQUIRED_KEY: True,
4284
+ TYPE_KEY: "string"
4285
+ }
4286
+ }
4287
+ }
4288
+ }
4289
+ },
4290
+ # mouse: inherits from host_associated, keeps parent description
4291
+ "mouse": {
4292
+ DEFAULT_KEY: "not provided",
4293
+ METADATA_FIELDS_KEY: {
4294
+ "description": {
4295
+ DEFAULT_KEY: "host associated sample",
4296
+ TYPE_KEY: "string"
4297
+ },
4298
+ "host_common_name": {
4299
+ DEFAULT_KEY: "mouse",
4300
+ TYPE_KEY: "string"
4301
+ },
4302
+ "sample_name": {
4303
+ REQUIRED_KEY: True,
4304
+ TYPE_KEY: "string",
4305
+ "unique": True
4306
+ },
4307
+ "sample_type": {
4308
+ REQUIRED_KEY: True,
4309
+ TYPE_KEY: "string"
4310
+ }
4311
+ },
4312
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4313
+ "stool": {
4314
+ METADATA_FIELDS_KEY: {
4315
+ "body_site": {
4316
+ DEFAULT_KEY: "gut",
4317
+ TYPE_KEY: "string"
4318
+ },
4319
+ "cage_id": {
4320
+ REQUIRED_KEY: False,
4321
+ TYPE_KEY: "string"
4322
+ },
4323
+ "description": {
4324
+ DEFAULT_KEY: "host associated sample",
4325
+ TYPE_KEY: "string"
4326
+ },
4327
+ "host_common_name": {
4328
+ DEFAULT_KEY: "mouse",
4329
+ TYPE_KEY: "string"
4330
+ },
4331
+ QIITA_SAMPLE_TYPE: {
4332
+ ALLOWED_KEY: ["stool"],
4333
+ DEFAULT_KEY: "stool",
4334
+ TYPE_KEY: "string"
4335
+ },
4336
+ "sample_name": {
4337
+ REQUIRED_KEY: True,
4338
+ TYPE_KEY: "string",
4339
+ "unique": True
4340
+ },
4341
+ SAMPLE_TYPE_KEY: {
4342
+ ALLOWED_KEY: ["stool"],
4343
+ DEFAULT_KEY: "stool",
4344
+ REQUIRED_KEY: True,
4345
+ TYPE_KEY: "string"
4346
+ }
4347
+ }
4348
+ }
4349
+ }
4350
+ }
4351
+ }
4352
+ }
4353
+ self.assertEqual(expected, result)
4354
+
4355
+ def test_build_full_flat_config_dict_merges_software_and_study(self):
4356
+ """Test that study config values override software config values.
4357
+
4358
+ Tests that top-level config keys (default, leave_requireds_blank, etc.)
4359
+ from study_config override matching keys from software_config.
4360
+ """
4361
+ software_config = {
4362
+ DEFAULT_KEY: "software_default",
4363
+ LEAVE_REQUIREDS_BLANK_KEY: False,
4364
+ OVERWRITE_NON_NANS_KEY: True
4365
+ }
4366
+ study_config = {
4367
+ # These override software_config values
4368
+ DEFAULT_KEY: "study_default",
4369
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4370
+ STUDY_SPECIFIC_METADATA_KEY: {
4371
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4372
+ "human": {
4373
+ METADATA_FIELDS_KEY: {},
4374
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4375
+ "stool": {
4376
+ METADATA_FIELDS_KEY: {}
4377
+ }
4378
+ }
4379
+ }
4380
+ }
4381
+ }
4382
+ }
4383
+
4384
+ result = build_full_flat_config_dict(
4385
+ study_config, software_config, self.TEST_STDS_FP)
4386
+
4387
+ expected = {
4388
+ # default from study_config overrides software_config
4389
+ DEFAULT_KEY: "study_default",
4390
+ # leave_requireds_blank from study_config overrides software_config
4391
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4392
+ # overwrite_non_nans from software_config (not overridden by study)
4393
+ OVERWRITE_NON_NANS_KEY: True,
4394
+ # Flattened host types
4395
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4396
+ "base": {
4397
+ METADATA_FIELDS_KEY: {
4398
+ "sample_name": {
4399
+ REQUIRED_KEY: True,
4400
+ TYPE_KEY: "string",
4401
+ "unique": True
4402
+ },
4403
+ "sample_type": {
4404
+ REQUIRED_KEY: True,
4405
+ TYPE_KEY: "string"
4406
+ }
4407
+ }
4408
+ },
4409
+ "host_associated": {
4410
+ DEFAULT_KEY: "not provided",
4411
+ METADATA_FIELDS_KEY: {
4412
+ "description": {
4413
+ DEFAULT_KEY: "host associated sample",
4414
+ TYPE_KEY: "string"
4415
+ },
4416
+ "sample_name": {
4417
+ REQUIRED_KEY: True,
4418
+ TYPE_KEY: "string",
4419
+ "unique": True
4420
+ },
4421
+ "sample_type": {
4422
+ REQUIRED_KEY: True,
4423
+ TYPE_KEY: "string"
4424
+ }
4425
+ },
4426
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4427
+ "stool": {
4428
+ METADATA_FIELDS_KEY: {
4429
+ "body_site": {
4430
+ DEFAULT_KEY: "gut",
4431
+ TYPE_KEY: "string"
4432
+ },
4433
+ "description": {
4434
+ DEFAULT_KEY: "host associated sample",
4435
+ TYPE_KEY: "string"
4436
+ },
4437
+ QIITA_SAMPLE_TYPE: {
4438
+ ALLOWED_KEY: ["stool"],
4439
+ DEFAULT_KEY: "stool",
4440
+ TYPE_KEY: "string"
4441
+ },
4442
+ "sample_name": {
4443
+ REQUIRED_KEY: True,
4444
+ TYPE_KEY: "string",
4445
+ "unique": True
4446
+ },
4447
+ SAMPLE_TYPE_KEY: {
4448
+ ALLOWED_KEY: ["stool"],
4449
+ DEFAULT_KEY: "stool",
4450
+ REQUIRED_KEY: True,
4451
+ TYPE_KEY: "string"
4452
+ }
4453
+ }
4454
+ }
4455
+ }
4456
+ },
4457
+ "human": {
4458
+ DEFAULT_KEY: "not provided",
4459
+ METADATA_FIELDS_KEY: {
4460
+ "description": {
4461
+ DEFAULT_KEY: "human sample",
4462
+ TYPE_KEY: "string"
4463
+ },
4464
+ "host_common_name": {
4465
+ DEFAULT_KEY: "human",
4466
+ TYPE_KEY: "string"
4467
+ },
4468
+ "sample_name": {
4469
+ REQUIRED_KEY: True,
4470
+ TYPE_KEY: "string",
4471
+ "unique": True
4472
+ },
4473
+ "sample_type": {
4474
+ REQUIRED_KEY: True,
4475
+ TYPE_KEY: "string"
4476
+ }
4477
+ },
4478
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4479
+ "blood": {
4480
+ METADATA_FIELDS_KEY: {
4481
+ "body_product": {
4482
+ DEFAULT_KEY: "UBERON:blood",
4483
+ TYPE_KEY: "string"
4484
+ },
4485
+ "body_site": {
4486
+ DEFAULT_KEY: "blood",
4487
+ TYPE_KEY: "string"
4488
+ },
4489
+ "description": {
4490
+ DEFAULT_KEY: "human sample",
4491
+ TYPE_KEY: "string"
4492
+ },
4493
+ "host_common_name": {
4494
+ DEFAULT_KEY: "human",
4495
+ TYPE_KEY: "string"
4496
+ },
4497
+ QIITA_SAMPLE_TYPE: {
4498
+ ALLOWED_KEY: ["blood"],
4499
+ DEFAULT_KEY: "blood",
4500
+ TYPE_KEY: "string"
4501
+ },
4502
+ "sample_name": {
4503
+ REQUIRED_KEY: True,
4504
+ TYPE_KEY: "string",
4505
+ "unique": True
4506
+ },
4507
+ SAMPLE_TYPE_KEY: {
4508
+ ALLOWED_KEY: ["blood"],
4509
+ DEFAULT_KEY: "blood",
4510
+ REQUIRED_KEY: True,
4511
+ TYPE_KEY: "string"
4512
+ }
4513
+ }
4514
+ },
4515
+ "stool": {
4516
+ METADATA_FIELDS_KEY: {
4517
+ "body_product": {
4518
+ DEFAULT_KEY: "UBERON:feces",
4519
+ TYPE_KEY: "string"
4520
+ },
4521
+ "body_site": {
4522
+ DEFAULT_KEY: "gut",
4523
+ TYPE_KEY: "string"
4524
+ },
4525
+ "description": {
4526
+ DEFAULT_KEY: "human sample",
4527
+ TYPE_KEY: "string"
4528
+ },
4529
+ "host_common_name": {
4530
+ DEFAULT_KEY: "human",
4531
+ TYPE_KEY: "string"
4532
+ },
4533
+ QIITA_SAMPLE_TYPE: {
4534
+ ALLOWED_KEY: ["stool"],
4535
+ DEFAULT_KEY: "stool",
4536
+ TYPE_KEY: "string"
4537
+ },
4538
+ "sample_name": {
4539
+ REQUIRED_KEY: True,
4540
+ TYPE_KEY: "string",
4541
+ "unique": True
4542
+ },
4543
+ SAMPLE_TYPE_KEY: {
4544
+ ALLOWED_KEY: ["stool"],
4545
+ DEFAULT_KEY: "stool",
4546
+ REQUIRED_KEY: True,
4547
+ TYPE_KEY: "string"
4548
+ }
4549
+ }
4550
+ }
4551
+ }
4552
+ },
4553
+ "mouse": {
4554
+ DEFAULT_KEY: "not provided",
4555
+ METADATA_FIELDS_KEY: {
4556
+ "description": {
4557
+ DEFAULT_KEY: "host associated sample",
4558
+ TYPE_KEY: "string"
4559
+ },
4560
+ "host_common_name": {
4561
+ DEFAULT_KEY: "mouse",
4562
+ TYPE_KEY: "string"
4563
+ },
4564
+ "sample_name": {
4565
+ REQUIRED_KEY: True,
4566
+ TYPE_KEY: "string",
4567
+ "unique": True
4568
+ },
4569
+ "sample_type": {
4570
+ REQUIRED_KEY: True,
4571
+ TYPE_KEY: "string"
4572
+ }
4573
+ },
4574
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4575
+ "stool": {
4576
+ METADATA_FIELDS_KEY: {
4577
+ "body_site": {
4578
+ DEFAULT_KEY: "gut",
4579
+ TYPE_KEY: "string"
4580
+ },
4581
+ "cage_id": {
4582
+ REQUIRED_KEY: False,
4583
+ TYPE_KEY: "string"
4584
+ },
4585
+ "description": {
4586
+ DEFAULT_KEY: "host associated sample",
4587
+ TYPE_KEY: "string"
4588
+ },
4589
+ "host_common_name": {
4590
+ DEFAULT_KEY: "mouse",
4591
+ TYPE_KEY: "string"
4592
+ },
4593
+ QIITA_SAMPLE_TYPE: {
4594
+ ALLOWED_KEY: ["stool"],
4595
+ DEFAULT_KEY: "stool",
4596
+ TYPE_KEY: "string"
4597
+ },
4598
+ "sample_name": {
4599
+ REQUIRED_KEY: True,
4600
+ TYPE_KEY: "string",
4601
+ "unique": True
4602
+ },
4603
+ SAMPLE_TYPE_KEY: {
4604
+ ALLOWED_KEY: ["stool"],
4605
+ DEFAULT_KEY: "stool",
4606
+ REQUIRED_KEY: True,
4607
+ TYPE_KEY: "string"
4608
+ }
4609
+ }
4610
+ }
4611
+ }
4612
+ }
4613
+ }
4614
+ }
4615
+ self.assertEqual(expected, result)
4616
+
4617
+ def test_build_full_flat_config_dict_none_software_config(self):
4618
+ """Test that None software_config loads defaults from config.yml.
4619
+
4620
+ When software_config is None, the function loads defaults from the
4621
+ software's config.yml file (default="not applicable", etc.).
4622
+ """
4623
+ study_config = {
4624
+ STUDY_SPECIFIC_METADATA_KEY: {
4625
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4626
+ "human": {
4627
+ METADATA_FIELDS_KEY: {},
4628
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4629
+ "stool": {
4630
+ METADATA_FIELDS_KEY: {}
4631
+ }
4632
+ }
4633
+ }
4634
+ }
4635
+ }
4636
+ }
4637
+
4638
+ result = build_full_flat_config_dict(
4639
+ study_config, None, self.TEST_STDS_FP)
4640
+
4641
+ expected = {
4642
+ # Top-level keys loaded from software's config.yml defaults
4643
+ DEFAULT_KEY: "not applicable",
4644
+ LEAVE_REQUIREDS_BLANK_KEY: False,
4645
+ OVERWRITE_NON_NANS_KEY: False,
4646
+ # Flattened host types
4647
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4648
+ "base": {
4649
+ METADATA_FIELDS_KEY: {
4650
+ "sample_name": {
4651
+ REQUIRED_KEY: True,
4652
+ TYPE_KEY: "string",
4653
+ "unique": True
4654
+ },
4655
+ "sample_type": {
4656
+ REQUIRED_KEY: True,
4657
+ TYPE_KEY: "string"
4658
+ }
4659
+ }
4660
+ },
4661
+ "host_associated": {
4662
+ DEFAULT_KEY: "not provided",
4663
+ METADATA_FIELDS_KEY: {
4664
+ "description": {
4665
+ DEFAULT_KEY: "host associated sample",
4666
+ TYPE_KEY: "string"
4667
+ },
4668
+ "sample_name": {
4669
+ REQUIRED_KEY: True,
4670
+ TYPE_KEY: "string",
4671
+ "unique": True
4672
+ },
4673
+ "sample_type": {
4674
+ REQUIRED_KEY: True,
4675
+ TYPE_KEY: "string"
4676
+ }
4677
+ },
4678
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4679
+ "stool": {
4680
+ METADATA_FIELDS_KEY: {
4681
+ "body_site": {
4682
+ DEFAULT_KEY: "gut",
4683
+ TYPE_KEY: "string"
4684
+ },
4685
+ "description": {
4686
+ DEFAULT_KEY: "host associated sample",
4687
+ TYPE_KEY: "string"
4688
+ },
4689
+ QIITA_SAMPLE_TYPE: {
4690
+ ALLOWED_KEY: ["stool"],
4691
+ DEFAULT_KEY: "stool",
4692
+ TYPE_KEY: "string"
4693
+ },
4694
+ "sample_name": {
4695
+ REQUIRED_KEY: True,
4696
+ TYPE_KEY: "string",
4697
+ "unique": True
4698
+ },
4699
+ SAMPLE_TYPE_KEY: {
4700
+ ALLOWED_KEY: ["stool"],
4701
+ DEFAULT_KEY: "stool",
4702
+ REQUIRED_KEY: True,
4703
+ TYPE_KEY: "string"
4704
+ }
4705
+ }
4706
+ }
4707
+ }
4708
+ },
4709
+ "human": {
4710
+ DEFAULT_KEY: "not provided",
4711
+ METADATA_FIELDS_KEY: {
4712
+ "description": {
4713
+ DEFAULT_KEY: "human sample",
4714
+ TYPE_KEY: "string"
4715
+ },
4716
+ "host_common_name": {
4717
+ DEFAULT_KEY: "human",
4718
+ TYPE_KEY: "string"
4719
+ },
4720
+ "sample_name": {
4721
+ REQUIRED_KEY: True,
4722
+ TYPE_KEY: "string",
4723
+ "unique": True
4724
+ },
4725
+ "sample_type": {
4726
+ REQUIRED_KEY: True,
4727
+ TYPE_KEY: "string"
4728
+ }
4729
+ },
4730
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4731
+ "blood": {
4732
+ METADATA_FIELDS_KEY: {
4733
+ "body_product": {
4734
+ DEFAULT_KEY: "UBERON:blood",
4735
+ TYPE_KEY: "string"
4736
+ },
4737
+ "body_site": {
4738
+ DEFAULT_KEY: "blood",
4739
+ TYPE_KEY: "string"
4740
+ },
4741
+ "description": {
4742
+ DEFAULT_KEY: "human sample",
4743
+ TYPE_KEY: "string"
4744
+ },
4745
+ "host_common_name": {
4746
+ DEFAULT_KEY: "human",
4747
+ TYPE_KEY: "string"
4748
+ },
4749
+ QIITA_SAMPLE_TYPE: {
4750
+ ALLOWED_KEY: ["blood"],
4751
+ DEFAULT_KEY: "blood",
4752
+ TYPE_KEY: "string"
4753
+ },
4754
+ "sample_name": {
4755
+ REQUIRED_KEY: True,
4756
+ TYPE_KEY: "string",
4757
+ "unique": True
4758
+ },
4759
+ SAMPLE_TYPE_KEY: {
4760
+ ALLOWED_KEY: ["blood"],
4761
+ DEFAULT_KEY: "blood",
4762
+ REQUIRED_KEY: True,
4763
+ TYPE_KEY: "string"
4764
+ }
4765
+ }
4766
+ },
4767
+ "stool": {
4768
+ METADATA_FIELDS_KEY: {
4769
+ "body_product": {
4770
+ DEFAULT_KEY: "UBERON:feces",
4771
+ TYPE_KEY: "string"
4772
+ },
4773
+ "body_site": {
4774
+ DEFAULT_KEY: "gut",
4775
+ TYPE_KEY: "string"
4776
+ },
4777
+ "description": {
4778
+ DEFAULT_KEY: "human sample",
4779
+ TYPE_KEY: "string"
4780
+ },
4781
+ "host_common_name": {
4782
+ DEFAULT_KEY: "human",
4783
+ TYPE_KEY: "string"
4784
+ },
4785
+ QIITA_SAMPLE_TYPE: {
4786
+ ALLOWED_KEY: ["stool"],
4787
+ DEFAULT_KEY: "stool",
4788
+ TYPE_KEY: "string"
4789
+ },
4790
+ "sample_name": {
4791
+ REQUIRED_KEY: True,
4792
+ TYPE_KEY: "string",
4793
+ "unique": True
4794
+ },
4795
+ SAMPLE_TYPE_KEY: {
4796
+ ALLOWED_KEY: ["stool"],
4797
+ DEFAULT_KEY: "stool",
4798
+ REQUIRED_KEY: True,
4799
+ TYPE_KEY: "string"
4800
+ }
4801
+ }
4802
+ }
4803
+ }
4804
+ },
4805
+ "mouse": {
4806
+ DEFAULT_KEY: "not provided",
4807
+ METADATA_FIELDS_KEY: {
4808
+ "description": {
4809
+ DEFAULT_KEY: "host associated sample",
4810
+ TYPE_KEY: "string"
4811
+ },
4812
+ "host_common_name": {
4813
+ DEFAULT_KEY: "mouse",
4814
+ TYPE_KEY: "string"
4815
+ },
4816
+ "sample_name": {
4817
+ REQUIRED_KEY: True,
4818
+ TYPE_KEY: "string",
4819
+ "unique": True
4820
+ },
4821
+ "sample_type": {
4822
+ REQUIRED_KEY: True,
4823
+ TYPE_KEY: "string"
4824
+ }
4825
+ },
4826
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4827
+ "stool": {
4828
+ METADATA_FIELDS_KEY: {
4829
+ "body_site": {
4830
+ DEFAULT_KEY: "gut",
4831
+ TYPE_KEY: "string"
4832
+ },
4833
+ "cage_id": {
4834
+ REQUIRED_KEY: False,
4835
+ TYPE_KEY: "string"
4836
+ },
4837
+ "description": {
4838
+ DEFAULT_KEY: "host associated sample",
4839
+ TYPE_KEY: "string"
4840
+ },
4841
+ "host_common_name": {
4842
+ DEFAULT_KEY: "mouse",
4843
+ TYPE_KEY: "string"
4844
+ },
4845
+ QIITA_SAMPLE_TYPE: {
4846
+ ALLOWED_KEY: ["stool"],
4847
+ DEFAULT_KEY: "stool",
4848
+ TYPE_KEY: "string"
4849
+ },
4850
+ "sample_name": {
4851
+ REQUIRED_KEY: True,
4852
+ TYPE_KEY: "string",
4853
+ "unique": True
4854
+ },
4855
+ SAMPLE_TYPE_KEY: {
4856
+ ALLOWED_KEY: ["stool"],
4857
+ DEFAULT_KEY: "stool",
4858
+ REQUIRED_KEY: True,
4859
+ TYPE_KEY: "string"
4860
+ }
4861
+ }
4862
+ }
4863
+ }
4864
+ }
4865
+ }
4866
+ }
4867
+ self.assertEqual(expected, result)