metameq 2026.1.1__py3-none-any.whl → 2026.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,8 @@ from metameq.src.util import \
5
5
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY, DEFAULT_KEY, \
6
6
  ALIAS_KEY, BASE_TYPE_KEY, ALLOWED_KEY, ANYOF_KEY, TYPE_KEY, \
7
7
  STUDY_SPECIFIC_METADATA_KEY, LEAVE_REQUIREDS_BLANK_KEY, \
8
- OVERWRITE_NON_NANS_KEY
8
+ OVERWRITE_NON_NANS_KEY, REQUIRED_KEY, SAMPLE_TYPE_KEY, QIITA_SAMPLE_TYPE, \
9
+ HOSTTYPE_COL_OPTIONS_KEY, SAMPLETYPE_COL_OPTIONS_KEY
9
10
  from metameq.src.metadata_configurator import \
10
11
  combine_stds_and_study_config, \
11
12
  _make_combined_stds_and_study_host_type_dicts, \
@@ -15,7 +16,8 @@ from metameq.src.metadata_configurator import \
15
16
  _combine_base_and_added_host_type, \
16
17
  _id_sample_type_definition, \
17
18
  update_wip_metadata_dict, \
18
- build_full_flat_config_dict
19
+ build_full_flat_config_dict, \
20
+ _resolve_sample_type_aliases_and_bases
19
21
 
20
22
 
21
23
  class TestMetadataConfigurator(TestCase):
@@ -581,18 +583,153 @@ class TestMetadataConfigurator(TestCase):
581
583
  },
582
584
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
583
585
  "fe": {
584
- "alias": "stool"
586
+ # Resolved alias to stool - gets stool's resolved fields
587
+ METADATA_FIELDS_KEY: {
588
+ "country": {
589
+ "allowed": ["USA"],
590
+ DEFAULT_KEY: "USA",
591
+ "empty": False,
592
+ "is_phi": False,
593
+ "required": True,
594
+ "type": "string"
595
+ },
596
+ "description": {
597
+ "allowed": ["host associated stool"],
598
+ DEFAULT_KEY: "host associated stool",
599
+ "empty": False,
600
+ "is_phi": False,
601
+ "required": True,
602
+ "type": "string"
603
+ },
604
+ "dna_extracted": {
605
+ "allowed": ["true", "false"],
606
+ DEFAULT_KEY: "true",
607
+ "empty": False,
608
+ "is_phi": False,
609
+ "required": True,
610
+ "type": "string"
611
+ },
612
+ "elevation": {
613
+ "anyof": [
614
+ {
615
+ "allowed": [
616
+ "not collected",
617
+ "not provided",
618
+ "restricted access"],
619
+ "type": "string"
620
+ },
621
+ {
622
+ "min": -413.0,
623
+ "type": "number"
624
+ }],
625
+ "empty": False,
626
+ "is_phi": False,
627
+ "required": True
628
+ },
629
+ "geo_loc_name": {
630
+ "allowed": ["USA:CA:San Diego"],
631
+ DEFAULT_KEY: "USA:CA:San Diego",
632
+ "empty": False,
633
+ "is_phi": False,
634
+ "required": True,
635
+ "type": "string"
636
+ },
637
+ "host_type": {
638
+ "allowed": ["human", "non-human"],
639
+ "empty": False,
640
+ "is_phi": False,
641
+ "required": True,
642
+ "type": "string"
643
+ },
644
+ "physical_specimen_location": {
645
+ "allowed": ["UCSDST"],
646
+ DEFAULT_KEY: "UCSDST",
647
+ "empty": False,
648
+ "is_phi": False,
649
+ "required": True,
650
+ "type": "string"
651
+ },
652
+ "physical_specimen_remaining": {
653
+ "allowed": ["true", "false"],
654
+ DEFAULT_KEY: "true",
655
+ "empty": False,
656
+ "is_phi": False,
657
+ "required": True,
658
+ "type": "string"
659
+ },
660
+ QIITA_SAMPLE_TYPE: {
661
+ "allowed": ["stool"],
662
+ DEFAULT_KEY: "stool",
663
+ "type": "string"
664
+ },
665
+ SAMPLE_TYPE_KEY: {
666
+ "allowed": ["stool"],
667
+ DEFAULT_KEY: "stool",
668
+ "type": "string"
669
+ }
670
+ }
585
671
  },
586
672
  "stool": {
587
673
  METADATA_FIELDS_KEY: {
674
+ # Host fields merged in
675
+ "country": {
676
+ "allowed": ["USA"],
677
+ DEFAULT_KEY: "USA",
678
+ "empty": False,
679
+ "is_phi": False,
680
+ "required": True,
681
+ "type": "string"
682
+ },
588
683
  # from stds same level host + sample type
589
684
  "description": {
590
685
  "allowed": ["host associated stool"],
591
686
  DEFAULT_KEY: "host associated stool",
687
+ "empty": False,
688
+ "is_phi": False,
689
+ "required": True,
592
690
  "type": "string"
593
691
  },
594
- # from stds same level host + sample type
595
- # (NB: comes from study)
692
+ "dna_extracted": {
693
+ "allowed": ["true", "false"],
694
+ DEFAULT_KEY: "true",
695
+ "empty": False,
696
+ "is_phi": False,
697
+ "required": True,
698
+ "type": "string"
699
+ },
700
+ "elevation": {
701
+ "anyof": [
702
+ {
703
+ "allowed": [
704
+ "not collected",
705
+ "not provided",
706
+ "restricted access"],
707
+ "type": "string"
708
+ },
709
+ {
710
+ "min": -413.0,
711
+ "type": "number"
712
+ }],
713
+ "empty": False,
714
+ "is_phi": False,
715
+ "required": True
716
+ },
717
+ "geo_loc_name": {
718
+ "allowed": ["USA:CA:San Diego"],
719
+ DEFAULT_KEY: "USA:CA:San Diego",
720
+ "empty": False,
721
+ "is_phi": False,
722
+ "required": True,
723
+ "type": "string"
724
+ },
725
+ "host_type": {
726
+ "allowed": ["human", "non-human"],
727
+ "empty": False,
728
+ "is_phi": False,
729
+ "required": True,
730
+ "type": "string"
731
+ },
732
+ # from stds same level host + sample type (NB: comes from study)
596
733
  "physical_specimen_location": {
597
734
  "allowed": ["UCSDST"],
598
735
  DEFAULT_KEY: "UCSDST",
@@ -601,8 +738,7 @@ class TestMetadataConfigurator(TestCase):
601
738
  "required": True,
602
739
  "type": "string"
603
740
  },
604
- # from stds same level host + sample type
605
- # (NB: comes from study)
741
+ # from stds same level host + sample type (NB: comes from study)
606
742
  "physical_specimen_remaining": {
607
743
  "allowed": ["true", "false"],
608
744
  DEFAULT_KEY: "true",
@@ -610,6 +746,16 @@ class TestMetadataConfigurator(TestCase):
610
746
  "is_phi": False,
611
747
  "required": True,
612
748
  "type": "string"
749
+ },
750
+ QIITA_SAMPLE_TYPE: {
751
+ "allowed": ["stool"],
752
+ DEFAULT_KEY: "stool",
753
+ "type": "string"
754
+ },
755
+ SAMPLE_TYPE_KEY: {
756
+ "allowed": ["stool"],
757
+ DEFAULT_KEY: "stool",
758
+ "type": "string"
613
759
  }
614
760
  }
615
761
  }
@@ -684,18 +830,155 @@ class TestMetadataConfigurator(TestCase):
684
830
  },
685
831
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
686
832
  "fe": {
687
- "alias": "stool"
833
+ # Resolved alias to stool - gets stool's resolved fields
834
+ METADATA_FIELDS_KEY: {
835
+ "country": {
836
+ "allowed": ["USA"],
837
+ DEFAULT_KEY: "USA",
838
+ "empty": False,
839
+ "is_phi": False,
840
+ "required": True,
841
+ "type": "string"
842
+ },
843
+ "description": {
844
+ "allowed": ["host associated stool"],
845
+ DEFAULT_KEY: "host associated stool",
846
+ "empty": False,
847
+ "is_phi": False,
848
+ "required": True,
849
+ "type": "string"
850
+ },
851
+ "dna_extracted": {
852
+ "allowed": ["true", "false"],
853
+ DEFAULT_KEY: "true",
854
+ "empty": False,
855
+ "is_phi": False,
856
+ "required": True,
857
+ "type": "string"
858
+ },
859
+ "elevation": {
860
+ "anyof": [
861
+ {
862
+ "allowed": [
863
+ "not collected",
864
+ "not provided",
865
+ "restricted access"],
866
+ "type": "string"
867
+ },
868
+ {
869
+ "min": -413.0,
870
+ "type": "number"
871
+ }],
872
+ "empty": False,
873
+ "is_phi": False,
874
+ "required": True
875
+ },
876
+ "geo_loc_name": {
877
+ "allowed": ["USA:CA:San Diego"],
878
+ DEFAULT_KEY: "USA:CA:San Diego",
879
+ "empty": False,
880
+ "is_phi": False,
881
+ "required": True,
882
+ "type": "string"
883
+ },
884
+ "host_type": {
885
+ "allowed": ["control"],
886
+ DEFAULT_KEY: "control",
887
+ "empty": False,
888
+ "is_phi": False,
889
+ "required": True,
890
+ "type": "string"
891
+ },
892
+ "physical_specimen_location": {
893
+ "allowed": ["UCSDST"],
894
+ DEFAULT_KEY: "UCSDST",
895
+ "empty": False,
896
+ "is_phi": False,
897
+ "required": True,
898
+ "type": "string"
899
+ },
900
+ "physical_specimen_remaining": {
901
+ "allowed": ["true", "false"],
902
+ DEFAULT_KEY: "true",
903
+ "empty": False,
904
+ "is_phi": False,
905
+ "required": True,
906
+ "type": "string"
907
+ },
908
+ QIITA_SAMPLE_TYPE: {
909
+ "allowed": ["stool"],
910
+ DEFAULT_KEY: "stool",
911
+ "type": "string"
912
+ },
913
+ SAMPLE_TYPE_KEY: {
914
+ "allowed": ["stool"],
915
+ DEFAULT_KEY: "stool",
916
+ "type": "string"
917
+ }
918
+ }
688
919
  },
689
920
  "stool": {
690
921
  METADATA_FIELDS_KEY: {
922
+ # Host fields merged in
923
+ "country": {
924
+ "allowed": ["USA"],
925
+ DEFAULT_KEY: "USA",
926
+ "empty": False,
927
+ "is_phi": False,
928
+ "required": True,
929
+ "type": "string"
930
+ },
691
931
  # from stds same level host + sample type
692
932
  "description": {
693
933
  "allowed": ["host associated stool"],
694
934
  DEFAULT_KEY: "host associated stool",
935
+ "empty": False,
936
+ "is_phi": False,
937
+ "required": True,
695
938
  "type": "string"
696
939
  },
697
- # from stds same level host + sample type
698
- # (NB: comes from study)
940
+ "dna_extracted": {
941
+ "allowed": ["true", "false"],
942
+ DEFAULT_KEY: "true",
943
+ "empty": False,
944
+ "is_phi": False,
945
+ "required": True,
946
+ "type": "string"
947
+ },
948
+ "elevation": {
949
+ "anyof": [
950
+ {
951
+ "allowed": [
952
+ "not collected",
953
+ "not provided",
954
+ "restricted access"],
955
+ "type": "string"
956
+ },
957
+ {
958
+ "min": -413.0,
959
+ "type": "number"
960
+ }],
961
+ "empty": False,
962
+ "is_phi": False,
963
+ "required": True
964
+ },
965
+ "geo_loc_name": {
966
+ "allowed": ["USA:CA:San Diego"],
967
+ DEFAULT_KEY: "USA:CA:San Diego",
968
+ "empty": False,
969
+ "is_phi": False,
970
+ "required": True,
971
+ "type": "string"
972
+ },
973
+ "host_type": {
974
+ "allowed": ["control"],
975
+ DEFAULT_KEY: "control",
976
+ "empty": False,
977
+ "is_phi": False,
978
+ "required": True,
979
+ "type": "string"
980
+ },
981
+ # from stds same level host + sample type (NB: comes from study)
699
982
  "physical_specimen_location": {
700
983
  "allowed": ["UCSDST"],
701
984
  DEFAULT_KEY: "UCSDST",
@@ -704,8 +987,7 @@ class TestMetadataConfigurator(TestCase):
704
987
  "required": True,
705
988
  "type": "string"
706
989
  },
707
- # from stds same level host + sample type
708
- # (NB: comes from study)
990
+ # from stds same level host + sample type (NB: comes from study)
709
991
  "physical_specimen_remaining": {
710
992
  "allowed": ["true", "false"],
711
993
  DEFAULT_KEY: "true",
@@ -713,6 +995,16 @@ class TestMetadataConfigurator(TestCase):
713
995
  "is_phi": False,
714
996
  "required": True,
715
997
  "type": "string"
998
+ },
999
+ QIITA_SAMPLE_TYPE: {
1000
+ "allowed": ["stool"],
1001
+ DEFAULT_KEY: "stool",
1002
+ "type": "string"
1003
+ },
1004
+ SAMPLE_TYPE_KEY: {
1005
+ "allowed": ["stool"],
1006
+ DEFAULT_KEY: "stool",
1007
+ "type": "string"
716
1008
  }
717
1009
  }
718
1010
  }
@@ -788,54 +1080,77 @@ class TestMetadataConfigurator(TestCase):
788
1080
  },
789
1081
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
790
1082
  "dung": {
791
- "base_type": "stool",
1083
+ # Resolved base_type=stool - has stool's fields + dung's overrides
792
1084
  METADATA_FIELDS_KEY: {
1085
+ "country": {
1086
+ "allowed": ["USA"],
1087
+ DEFAULT_KEY: "USA",
1088
+ "empty": False,
1089
+ "is_phi": False,
1090
+ "required": True,
1091
+ "type": "string"
1092
+ },
793
1093
  # overrides stds parent host + sample type
794
1094
  "description": {
795
1095
  "allowed": ["human dung"],
796
1096
  DEFAULT_KEY: "human dung",
1097
+ "empty": False,
1098
+ "is_phi": False,
1099
+ "required": True,
797
1100
  "type": "string"
798
1101
  },
799
- # overrides stds parent host + sample type
800
- "physical_specimen_location": {
801
- "allowed": ["FIELD"],
802
- DEFAULT_KEY: "FIELD",
803
- "type": "string"
804
- }
805
- }
806
- },
807
- "f": {
808
- "base_type": "stool"
809
- },
810
- "fe": {
811
- "alias": "stool"
812
- },
813
- "feces": {
814
- "alias": "stool"
815
- },
816
- "stool": {
817
- METADATA_FIELDS_KEY: {
818
- # from stds same level host + sample type
819
- "description": {
820
- "allowed": ["human stool"],
821
- DEFAULT_KEY: "human stool",
1102
+ "dna_extracted": {
1103
+ "allowed": ["true"],
1104
+ DEFAULT_KEY: "true",
1105
+ "empty": False,
1106
+ "is_phi": False,
1107
+ "required": True,
822
1108
  "type": "string"
823
1109
  },
824
- # from stds same level host + sample type
825
1110
  "elevation": {
1111
+ "anyof": [
1112
+ {
1113
+ "allowed": [
1114
+ "not collected",
1115
+ "not provided",
1116
+ "restricted access"],
1117
+ "type": "string"
1118
+ },
1119
+ {
1120
+ "min": -413.0,
1121
+ "type": "number"
1122
+ }],
826
1123
  DEFAULT_KEY: 14,
1124
+ "empty": False,
1125
+ "is_phi": False,
1126
+ "required": True,
827
1127
  "type": "number"
828
1128
  },
829
- # from stds parent level host + sample type
1129
+ "geo_loc_name": {
1130
+ "allowed": ["USA:CA:San Diego"],
1131
+ DEFAULT_KEY: "USA:CA:San Diego",
1132
+ "empty": False,
1133
+ "is_phi": False,
1134
+ "required": True,
1135
+ "type": "string"
1136
+ },
1137
+ "host_type": {
1138
+ "allowed": ["human"],
1139
+ DEFAULT_KEY: "human",
1140
+ "empty": False,
1141
+ "is_phi": False,
1142
+ "required": True,
1143
+ "type": "string"
1144
+ },
1145
+ # overrides stds parent host + sample type
830
1146
  "physical_specimen_location": {
831
- "allowed": ["UCSDST"],
832
- DEFAULT_KEY: "UCSDST",
1147
+ "allowed": ["FIELD"],
1148
+ DEFAULT_KEY: "FIELD",
833
1149
  "empty": False,
834
1150
  "is_phi": False,
835
1151
  "required": True,
836
1152
  "type": "string"
837
1153
  },
838
- # from stds same level host + sample type
839
1154
  "physical_specimen_remaining": {
840
1155
  "allowed": ["false"],
841
1156
  DEFAULT_KEY: "false",
@@ -843,14 +1158,388 @@ class TestMetadataConfigurator(TestCase):
843
1158
  "is_phi": False,
844
1159
  "required": True,
845
1160
  "type": "string"
1161
+ },
1162
+ QIITA_SAMPLE_TYPE: {
1163
+ "allowed": ["dung"],
1164
+ DEFAULT_KEY: "dung",
1165
+ "type": "string"
1166
+ },
1167
+ SAMPLE_TYPE_KEY: {
1168
+ "allowed": ["dung"],
1169
+ DEFAULT_KEY: "dung",
1170
+ "type": "string"
846
1171
  }
847
1172
  }
848
- }
849
- }
850
- },
851
- "dude": {
852
- DEFAULT_KEY: "not collected",
853
- METADATA_FIELDS_KEY: {
1173
+ },
1174
+ "f": {
1175
+ # Resolved base_type=stool - has stool's fields
1176
+ METADATA_FIELDS_KEY: {
1177
+ "country": {
1178
+ "allowed": ["USA"],
1179
+ DEFAULT_KEY: "USA",
1180
+ "empty": False,
1181
+ "is_phi": False,
1182
+ "required": True,
1183
+ "type": "string"
1184
+ },
1185
+ "description": {
1186
+ "allowed": ["human stool"],
1187
+ DEFAULT_KEY: "human stool",
1188
+ "empty": False,
1189
+ "is_phi": False,
1190
+ "required": True,
1191
+ "type": "string"
1192
+ },
1193
+ "dna_extracted": {
1194
+ "allowed": ["true"],
1195
+ DEFAULT_KEY: "true",
1196
+ "empty": False,
1197
+ "is_phi": False,
1198
+ "required": True,
1199
+ "type": "string"
1200
+ },
1201
+ "elevation": {
1202
+ "anyof": [
1203
+ {
1204
+ "allowed": [
1205
+ "not collected",
1206
+ "not provided",
1207
+ "restricted access"],
1208
+ "type": "string"
1209
+ },
1210
+ {
1211
+ "min": -413.0,
1212
+ "type": "number"
1213
+ }],
1214
+ DEFAULT_KEY: 14,
1215
+ "empty": False,
1216
+ "is_phi": False,
1217
+ "required": True,
1218
+ "type": "number"
1219
+ },
1220
+ "geo_loc_name": {
1221
+ "allowed": ["USA:CA:San Diego"],
1222
+ DEFAULT_KEY: "USA:CA:San Diego",
1223
+ "empty": False,
1224
+ "is_phi": False,
1225
+ "required": True,
1226
+ "type": "string"
1227
+ },
1228
+ "host_type": {
1229
+ "allowed": ["human"],
1230
+ DEFAULT_KEY: "human",
1231
+ "empty": False,
1232
+ "is_phi": False,
1233
+ "required": True,
1234
+ "type": "string"
1235
+ },
1236
+ "physical_specimen_location": {
1237
+ "allowed": ["UCSDST"],
1238
+ DEFAULT_KEY: "UCSDST",
1239
+ "empty": False,
1240
+ "is_phi": False,
1241
+ "required": True,
1242
+ "type": "string"
1243
+ },
1244
+ "physical_specimen_remaining": {
1245
+ "allowed": ["false"],
1246
+ DEFAULT_KEY: "false",
1247
+ "empty": False,
1248
+ "is_phi": False,
1249
+ "required": True,
1250
+ "type": "string"
1251
+ },
1252
+ QIITA_SAMPLE_TYPE: {
1253
+ "allowed": ["f"],
1254
+ DEFAULT_KEY: "f",
1255
+ "type": "string"
1256
+ },
1257
+ SAMPLE_TYPE_KEY: {
1258
+ "allowed": ["f"],
1259
+ DEFAULT_KEY: "f",
1260
+ "type": "string"
1261
+ }
1262
+ }
1263
+ },
1264
+ "fe": {
1265
+ # Resolved alias to stool - gets stool's resolved fields
1266
+ METADATA_FIELDS_KEY: {
1267
+ "country": {
1268
+ "allowed": ["USA"],
1269
+ DEFAULT_KEY: "USA",
1270
+ "empty": False,
1271
+ "is_phi": False,
1272
+ "required": True,
1273
+ "type": "string"
1274
+ },
1275
+ "description": {
1276
+ "allowed": ["human stool"],
1277
+ DEFAULT_KEY: "human stool",
1278
+ "empty": False,
1279
+ "is_phi": False,
1280
+ "required": True,
1281
+ "type": "string"
1282
+ },
1283
+ "dna_extracted": {
1284
+ "allowed": ["true"],
1285
+ DEFAULT_KEY: "true",
1286
+ "empty": False,
1287
+ "is_phi": False,
1288
+ "required": True,
1289
+ "type": "string"
1290
+ },
1291
+ "elevation": {
1292
+ "anyof": [
1293
+ {
1294
+ "allowed": [
1295
+ "not collected",
1296
+ "not provided",
1297
+ "restricted access"],
1298
+ "type": "string"
1299
+ },
1300
+ {
1301
+ "min": -413.0,
1302
+ "type": "number"
1303
+ }],
1304
+ DEFAULT_KEY: 14,
1305
+ "empty": False,
1306
+ "is_phi": False,
1307
+ "required": True,
1308
+ "type": "number"
1309
+ },
1310
+ "geo_loc_name": {
1311
+ "allowed": ["USA:CA:San Diego"],
1312
+ DEFAULT_KEY: "USA:CA:San Diego",
1313
+ "empty": False,
1314
+ "is_phi": False,
1315
+ "required": True,
1316
+ "type": "string"
1317
+ },
1318
+ "host_type": {
1319
+ "allowed": ["human"],
1320
+ DEFAULT_KEY: "human",
1321
+ "empty": False,
1322
+ "is_phi": False,
1323
+ "required": True,
1324
+ "type": "string"
1325
+ },
1326
+ "physical_specimen_location": {
1327
+ "allowed": ["UCSDST"],
1328
+ DEFAULT_KEY: "UCSDST",
1329
+ "empty": False,
1330
+ "is_phi": False,
1331
+ "required": True,
1332
+ "type": "string"
1333
+ },
1334
+ "physical_specimen_remaining": {
1335
+ "allowed": ["false"],
1336
+ DEFAULT_KEY: "false",
1337
+ "empty": False,
1338
+ "is_phi": False,
1339
+ "required": True,
1340
+ "type": "string"
1341
+ },
1342
+ QIITA_SAMPLE_TYPE: {
1343
+ "allowed": ["stool"],
1344
+ DEFAULT_KEY: "stool",
1345
+ "type": "string"
1346
+ },
1347
+ SAMPLE_TYPE_KEY: {
1348
+ "allowed": ["stool"],
1349
+ DEFAULT_KEY: "stool",
1350
+ "type": "string"
1351
+ }
1352
+ }
1353
+ },
1354
+ "feces": {
1355
+ # Resolved alias to stool - gets stool's resolved fields
1356
+ METADATA_FIELDS_KEY: {
1357
+ "country": {
1358
+ "allowed": ["USA"],
1359
+ DEFAULT_KEY: "USA",
1360
+ "empty": False,
1361
+ "is_phi": False,
1362
+ "required": True,
1363
+ "type": "string"
1364
+ },
1365
+ "description": {
1366
+ "allowed": ["human stool"],
1367
+ DEFAULT_KEY: "human stool",
1368
+ "empty": False,
1369
+ "is_phi": False,
1370
+ "required": True,
1371
+ "type": "string"
1372
+ },
1373
+ "dna_extracted": {
1374
+ "allowed": ["true"],
1375
+ DEFAULT_KEY: "true",
1376
+ "empty": False,
1377
+ "is_phi": False,
1378
+ "required": True,
1379
+ "type": "string"
1380
+ },
1381
+ "elevation": {
1382
+ "anyof": [
1383
+ {
1384
+ "allowed": [
1385
+ "not collected",
1386
+ "not provided",
1387
+ "restricted access"],
1388
+ "type": "string"
1389
+ },
1390
+ {
1391
+ "min": -413.0,
1392
+ "type": "number"
1393
+ }],
1394
+ DEFAULT_KEY: 14,
1395
+ "empty": False,
1396
+ "is_phi": False,
1397
+ "required": True,
1398
+ "type": "number"
1399
+ },
1400
+ "geo_loc_name": {
1401
+ "allowed": ["USA:CA:San Diego"],
1402
+ DEFAULT_KEY: "USA:CA:San Diego",
1403
+ "empty": False,
1404
+ "is_phi": False,
1405
+ "required": True,
1406
+ "type": "string"
1407
+ },
1408
+ "host_type": {
1409
+ "allowed": ["human"],
1410
+ DEFAULT_KEY: "human",
1411
+ "empty": False,
1412
+ "is_phi": False,
1413
+ "required": True,
1414
+ "type": "string"
1415
+ },
1416
+ "physical_specimen_location": {
1417
+ "allowed": ["UCSDST"],
1418
+ DEFAULT_KEY: "UCSDST",
1419
+ "empty": False,
1420
+ "is_phi": False,
1421
+ "required": True,
1422
+ "type": "string"
1423
+ },
1424
+ "physical_specimen_remaining": {
1425
+ "allowed": ["false"],
1426
+ DEFAULT_KEY: "false",
1427
+ "empty": False,
1428
+ "is_phi": False,
1429
+ "required": True,
1430
+ "type": "string"
1431
+ },
1432
+ QIITA_SAMPLE_TYPE: {
1433
+ "allowed": ["stool"],
1434
+ DEFAULT_KEY: "stool",
1435
+ "type": "string"
1436
+ },
1437
+ SAMPLE_TYPE_KEY: {
1438
+ "allowed": ["stool"],
1439
+ DEFAULT_KEY: "stool",
1440
+ "type": "string"
1441
+ }
1442
+ }
1443
+ },
1444
+ "stool": {
1445
+ METADATA_FIELDS_KEY: {
1446
+ # Host fields merged in
1447
+ "country": {
1448
+ "allowed": ["USA"],
1449
+ DEFAULT_KEY: "USA",
1450
+ "empty": False,
1451
+ "is_phi": False,
1452
+ "required": True,
1453
+ "type": "string"
1454
+ },
1455
+ # from stds same level host + sample type
1456
+ "description": {
1457
+ "allowed": ["human stool"],
1458
+ DEFAULT_KEY: "human stool",
1459
+ "empty": False,
1460
+ "is_phi": False,
1461
+ "required": True,
1462
+ "type": "string"
1463
+ },
1464
+ "dna_extracted": {
1465
+ "allowed": ["true"],
1466
+ DEFAULT_KEY: "true",
1467
+ "empty": False,
1468
+ "is_phi": False,
1469
+ "required": True,
1470
+ "type": "string"
1471
+ },
1472
+ # from stds same level host + sample type
1473
+ "elevation": {
1474
+ "anyof": [
1475
+ {
1476
+ "allowed": [
1477
+ "not collected",
1478
+ "not provided",
1479
+ "restricted access"],
1480
+ "type": "string"
1481
+ },
1482
+ {
1483
+ "min": -413.0,
1484
+ "type": "number"
1485
+ }],
1486
+ DEFAULT_KEY: 14,
1487
+ "empty": False,
1488
+ "is_phi": False,
1489
+ "required": True,
1490
+ "type": "number"
1491
+ },
1492
+ "geo_loc_name": {
1493
+ "allowed": ["USA:CA:San Diego"],
1494
+ DEFAULT_KEY: "USA:CA:San Diego",
1495
+ "empty": False,
1496
+ "is_phi": False,
1497
+ "required": True,
1498
+ "type": "string"
1499
+ },
1500
+ "host_type": {
1501
+ "allowed": ["human"],
1502
+ DEFAULT_KEY: "human",
1503
+ "empty": False,
1504
+ "is_phi": False,
1505
+ "required": True,
1506
+ "type": "string"
1507
+ },
1508
+ # from stds parent level host + sample type
1509
+ "physical_specimen_location": {
1510
+ "allowed": ["UCSDST"],
1511
+ DEFAULT_KEY: "UCSDST",
1512
+ "empty": False,
1513
+ "is_phi": False,
1514
+ "required": True,
1515
+ "type": "string"
1516
+ },
1517
+ # from stds same level host + sample type
1518
+ "physical_specimen_remaining": {
1519
+ "allowed": ["false"],
1520
+ DEFAULT_KEY: "false",
1521
+ "empty": False,
1522
+ "is_phi": False,
1523
+ "required": True,
1524
+ "type": "string"
1525
+ },
1526
+ QIITA_SAMPLE_TYPE: {
1527
+ "allowed": ["stool"],
1528
+ DEFAULT_KEY: "stool",
1529
+ "type": "string"
1530
+ },
1531
+ SAMPLE_TYPE_KEY: {
1532
+ "allowed": ["stool"],
1533
+ DEFAULT_KEY: "stool",
1534
+ "type": "string"
1535
+ }
1536
+ }
1537
+ }
1538
+ }
1539
+ },
1540
+ "dude": {
1541
+ DEFAULT_KEY: "not collected",
1542
+ METADATA_FIELDS_KEY: {
854
1543
  # from stds parent host
855
1544
  "country": {
856
1545
  "allowed": ["USA"],
@@ -918,44 +1607,431 @@ class TestMetadataConfigurator(TestCase):
918
1607
  },
919
1608
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
920
1609
  "dung": {
921
- "base_type": "stool",
1610
+ # Resolved base_type=stool - has stool's fields + dung's overrides
922
1611
  METADATA_FIELDS_KEY: {
923
- # overrides stds parent host + sample type
1612
+ "country": {
1613
+ "allowed": ["USA"],
1614
+ DEFAULT_KEY: "USA",
1615
+ "empty": False,
1616
+ "is_phi": False,
1617
+ "required": True,
1618
+ "type": "string"
1619
+ },
1620
+ # overrides stds parent host + sample type
924
1621
  "description": {
925
1622
  "allowed": ["human dung"],
926
1623
  DEFAULT_KEY: "human dung",
1624
+ "empty": False,
1625
+ "is_phi": False,
1626
+ "required": True,
1627
+ "type": "string"
1628
+ },
1629
+ "dna_extracted": {
1630
+ "allowed": ["true"],
1631
+ DEFAULT_KEY: "true",
1632
+ "empty": False,
1633
+ "is_phi": False,
1634
+ "required": True,
1635
+ "type": "string"
1636
+ },
1637
+ "elevation": {
1638
+ "anyof": [
1639
+ {
1640
+ "allowed": [
1641
+ "not collected",
1642
+ "not provided",
1643
+ "restricted access"],
1644
+ "type": "string"
1645
+ },
1646
+ {
1647
+ "min": -413.0,
1648
+ "type": "number"
1649
+ }],
1650
+ DEFAULT_KEY: 14,
1651
+ "empty": False,
1652
+ "is_phi": False,
1653
+ "required": True,
1654
+ "type": "number"
1655
+ },
1656
+ "geo_loc_name": {
1657
+ "allowed": ["USA:CA:San Diego"],
1658
+ DEFAULT_KEY: "USA:CA:San Diego",
1659
+ "empty": False,
1660
+ "is_phi": False,
1661
+ "required": True,
1662
+ "type": "string"
1663
+ },
1664
+ "host_type": {
1665
+ "allowed": ["dude"],
1666
+ DEFAULT_KEY: "dude",
1667
+ "empty": False,
1668
+ "is_phi": False,
1669
+ "required": True,
927
1670
  "type": "string"
928
1671
  },
929
1672
  # overrides stds parent host + sample type
930
1673
  "physical_specimen_location": {
931
1674
  "allowed": ["FIELD"],
932
1675
  DEFAULT_KEY: "FIELD",
1676
+ "empty": False,
1677
+ "is_phi": False,
1678
+ "required": True,
1679
+ "type": "string"
1680
+ },
1681
+ "physical_specimen_remaining": {
1682
+ "allowed": ["false"],
1683
+ DEFAULT_KEY: "false",
1684
+ "empty": False,
1685
+ "is_phi": False,
1686
+ "required": True,
1687
+ "type": "string"
1688
+ },
1689
+ QIITA_SAMPLE_TYPE: {
1690
+ "allowed": ["dung"],
1691
+ DEFAULT_KEY: "dung",
1692
+ "type": "string"
1693
+ },
1694
+ SAMPLE_TYPE_KEY: {
1695
+ "allowed": ["dung"],
1696
+ DEFAULT_KEY: "dung",
933
1697
  "type": "string"
934
1698
  }
935
1699
  }
936
1700
  },
937
1701
  "f": {
938
- "base_type": "stool"
1702
+ # Resolved base_type=stool - has stool's fields
1703
+ METADATA_FIELDS_KEY: {
1704
+ "country": {
1705
+ "allowed": ["USA"],
1706
+ DEFAULT_KEY: "USA",
1707
+ "empty": False,
1708
+ "is_phi": False,
1709
+ "required": True,
1710
+ "type": "string"
1711
+ },
1712
+ "description": {
1713
+ "allowed": ["human stool"],
1714
+ DEFAULT_KEY: "human stool",
1715
+ "empty": False,
1716
+ "is_phi": False,
1717
+ "required": True,
1718
+ "type": "string"
1719
+ },
1720
+ "dna_extracted": {
1721
+ "allowed": ["true"],
1722
+ DEFAULT_KEY: "true",
1723
+ "empty": False,
1724
+ "is_phi": False,
1725
+ "required": True,
1726
+ "type": "string"
1727
+ },
1728
+ "elevation": {
1729
+ "anyof": [
1730
+ {
1731
+ "allowed": [
1732
+ "not collected",
1733
+ "not provided",
1734
+ "restricted access"],
1735
+ "type": "string"
1736
+ },
1737
+ {
1738
+ "min": -413.0,
1739
+ "type": "number"
1740
+ }],
1741
+ DEFAULT_KEY: 14,
1742
+ "empty": False,
1743
+ "is_phi": False,
1744
+ "required": True,
1745
+ "type": "number"
1746
+ },
1747
+ "geo_loc_name": {
1748
+ "allowed": ["USA:CA:San Diego"],
1749
+ DEFAULT_KEY: "USA:CA:San Diego",
1750
+ "empty": False,
1751
+ "is_phi": False,
1752
+ "required": True,
1753
+ "type": "string"
1754
+ },
1755
+ "host_type": {
1756
+ "allowed": ["dude"],
1757
+ DEFAULT_KEY: "dude",
1758
+ "empty": False,
1759
+ "is_phi": False,
1760
+ "required": True,
1761
+ "type": "string"
1762
+ },
1763
+ "physical_specimen_location": {
1764
+ "allowed": ["UCSDST"],
1765
+ DEFAULT_KEY: "UCSDST",
1766
+ "empty": False,
1767
+ "is_phi": False,
1768
+ "required": True,
1769
+ "type": "string"
1770
+ },
1771
+ "physical_specimen_remaining": {
1772
+ "allowed": ["false"],
1773
+ DEFAULT_KEY: "false",
1774
+ "empty": False,
1775
+ "is_phi": False,
1776
+ "required": True,
1777
+ "type": "string"
1778
+ },
1779
+ QIITA_SAMPLE_TYPE: {
1780
+ "allowed": ["f"],
1781
+ DEFAULT_KEY: "f",
1782
+ "type": "string"
1783
+ },
1784
+ SAMPLE_TYPE_KEY: {
1785
+ "allowed": ["f"],
1786
+ DEFAULT_KEY: "f",
1787
+ "type": "string"
1788
+ }
1789
+ }
939
1790
  },
940
1791
  "fe": {
941
- "alias": "stool"
1792
+ # Resolved alias to stool - gets stool's resolved fields
1793
+ METADATA_FIELDS_KEY: {
1794
+ "country": {
1795
+ "allowed": ["USA"],
1796
+ DEFAULT_KEY: "USA",
1797
+ "empty": False,
1798
+ "is_phi": False,
1799
+ "required": True,
1800
+ "type": "string"
1801
+ },
1802
+ "description": {
1803
+ "allowed": ["human stool"],
1804
+ DEFAULT_KEY: "human stool",
1805
+ "empty": False,
1806
+ "is_phi": False,
1807
+ "required": True,
1808
+ "type": "string"
1809
+ },
1810
+ "dna_extracted": {
1811
+ "allowed": ["true"],
1812
+ DEFAULT_KEY: "true",
1813
+ "empty": False,
1814
+ "is_phi": False,
1815
+ "required": True,
1816
+ "type": "string"
1817
+ },
1818
+ "elevation": {
1819
+ "anyof": [
1820
+ {
1821
+ "allowed": [
1822
+ "not collected",
1823
+ "not provided",
1824
+ "restricted access"],
1825
+ "type": "string"
1826
+ },
1827
+ {
1828
+ "min": -413.0,
1829
+ "type": "number"
1830
+ }],
1831
+ DEFAULT_KEY: 14,
1832
+ "empty": False,
1833
+ "is_phi": False,
1834
+ "required": True,
1835
+ "type": "number"
1836
+ },
1837
+ "geo_loc_name": {
1838
+ "allowed": ["USA:CA:San Diego"],
1839
+ DEFAULT_KEY: "USA:CA:San Diego",
1840
+ "empty": False,
1841
+ "is_phi": False,
1842
+ "required": True,
1843
+ "type": "string"
1844
+ },
1845
+ "host_type": {
1846
+ "allowed": ["dude"],
1847
+ DEFAULT_KEY: "dude",
1848
+ "empty": False,
1849
+ "is_phi": False,
1850
+ "required": True,
1851
+ "type": "string"
1852
+ },
1853
+ "physical_specimen_location": {
1854
+ "allowed": ["UCSDST"],
1855
+ DEFAULT_KEY: "UCSDST",
1856
+ "empty": False,
1857
+ "is_phi": False,
1858
+ "required": True,
1859
+ "type": "string"
1860
+ },
1861
+ "physical_specimen_remaining": {
1862
+ "allowed": ["false"],
1863
+ DEFAULT_KEY: "false",
1864
+ "empty": False,
1865
+ "is_phi": False,
1866
+ "required": True,
1867
+ "type": "string"
1868
+ },
1869
+ QIITA_SAMPLE_TYPE: {
1870
+ "allowed": ["stool"],
1871
+ DEFAULT_KEY: "stool",
1872
+ "type": "string"
1873
+ },
1874
+ SAMPLE_TYPE_KEY: {
1875
+ "allowed": ["stool"],
1876
+ DEFAULT_KEY: "stool",
1877
+ "type": "string"
1878
+ }
1879
+ }
942
1880
  },
943
1881
  "feces": {
944
- "alias": "stool"
1882
+ # Resolved alias to stool - gets stool's resolved fields
1883
+ METADATA_FIELDS_KEY: {
1884
+ "country": {
1885
+ "allowed": ["USA"],
1886
+ DEFAULT_KEY: "USA",
1887
+ "empty": False,
1888
+ "is_phi": False,
1889
+ "required": True,
1890
+ "type": "string"
1891
+ },
1892
+ "description": {
1893
+ "allowed": ["human stool"],
1894
+ DEFAULT_KEY: "human stool",
1895
+ "empty": False,
1896
+ "is_phi": False,
1897
+ "required": True,
1898
+ "type": "string"
1899
+ },
1900
+ "dna_extracted": {
1901
+ "allowed": ["true"],
1902
+ DEFAULT_KEY: "true",
1903
+ "empty": False,
1904
+ "is_phi": False,
1905
+ "required": True,
1906
+ "type": "string"
1907
+ },
1908
+ "elevation": {
1909
+ "anyof": [
1910
+ {
1911
+ "allowed": [
1912
+ "not collected",
1913
+ "not provided",
1914
+ "restricted access"],
1915
+ "type": "string"
1916
+ },
1917
+ {
1918
+ "min": -413.0,
1919
+ "type": "number"
1920
+ }],
1921
+ DEFAULT_KEY: 14,
1922
+ "empty": False,
1923
+ "is_phi": False,
1924
+ "required": True,
1925
+ "type": "number"
1926
+ },
1927
+ "geo_loc_name": {
1928
+ "allowed": ["USA:CA:San Diego"],
1929
+ DEFAULT_KEY: "USA:CA:San Diego",
1930
+ "empty": False,
1931
+ "is_phi": False,
1932
+ "required": True,
1933
+ "type": "string"
1934
+ },
1935
+ "host_type": {
1936
+ "allowed": ["dude"],
1937
+ DEFAULT_KEY: "dude",
1938
+ "empty": False,
1939
+ "is_phi": False,
1940
+ "required": True,
1941
+ "type": "string"
1942
+ },
1943
+ "physical_specimen_location": {
1944
+ "allowed": ["UCSDST"],
1945
+ DEFAULT_KEY: "UCSDST",
1946
+ "empty": False,
1947
+ "is_phi": False,
1948
+ "required": True,
1949
+ "type": "string"
1950
+ },
1951
+ "physical_specimen_remaining": {
1952
+ "allowed": ["false"],
1953
+ DEFAULT_KEY: "false",
1954
+ "empty": False,
1955
+ "is_phi": False,
1956
+ "required": True,
1957
+ "type": "string"
1958
+ },
1959
+ QIITA_SAMPLE_TYPE: {
1960
+ "allowed": ["stool"],
1961
+ DEFAULT_KEY: "stool",
1962
+ "type": "string"
1963
+ },
1964
+ SAMPLE_TYPE_KEY: {
1965
+ "allowed": ["stool"],
1966
+ DEFAULT_KEY: "stool",
1967
+ "type": "string"
1968
+ }
1969
+ }
945
1970
  },
946
1971
  "stool": {
947
1972
  METADATA_FIELDS_KEY: {
1973
+ # Host fields merged in
1974
+ "country": {
1975
+ "allowed": ["USA"],
1976
+ DEFAULT_KEY: "USA",
1977
+ "empty": False,
1978
+ "is_phi": False,
1979
+ "required": True,
1980
+ "type": "string"
1981
+ },
948
1982
  # from stds same level host + sample type
949
1983
  "description": {
950
1984
  "allowed": ["human stool"],
951
1985
  DEFAULT_KEY: "human stool",
1986
+ "empty": False,
1987
+ "is_phi": False,
1988
+ "required": True,
1989
+ "type": "string"
1990
+ },
1991
+ "dna_extracted": {
1992
+ "allowed": ["true"],
1993
+ DEFAULT_KEY: "true",
1994
+ "empty": False,
1995
+ "is_phi": False,
1996
+ "required": True,
952
1997
  "type": "string"
953
1998
  },
954
1999
  # from stds same level host + sample type
955
2000
  "elevation": {
2001
+ "anyof": [
2002
+ {
2003
+ "allowed": [
2004
+ "not collected",
2005
+ "not provided",
2006
+ "restricted access"],
2007
+ "type": "string"
2008
+ },
2009
+ {
2010
+ "min": -413.0,
2011
+ "type": "number"
2012
+ }],
956
2013
  DEFAULT_KEY: 14,
2014
+ "empty": False,
2015
+ "is_phi": False,
2016
+ "required": True,
957
2017
  "type": "number"
958
2018
  },
2019
+ "geo_loc_name": {
2020
+ "allowed": ["USA:CA:San Diego"],
2021
+ DEFAULT_KEY: "USA:CA:San Diego",
2022
+ "empty": False,
2023
+ "is_phi": False,
2024
+ "required": True,
2025
+ "type": "string"
2026
+ },
2027
+ "host_type": {
2028
+ "allowed": ["dude"],
2029
+ DEFAULT_KEY: "dude",
2030
+ "empty": False,
2031
+ "is_phi": False,
2032
+ "required": True,
2033
+ "type": "string"
2034
+ },
959
2035
  # from stds parent level host + sample type
960
2036
  "physical_specimen_location": {
961
2037
  "allowed": ["UCSDST"],
@@ -973,6 +2049,16 @@ class TestMetadataConfigurator(TestCase):
973
2049
  "is_phi": False,
974
2050
  "required": True,
975
2051
  "type": "string"
2052
+ },
2053
+ QIITA_SAMPLE_TYPE: {
2054
+ "allowed": ["stool"],
2055
+ DEFAULT_KEY: "stool",
2056
+ "type": "string"
2057
+ },
2058
+ SAMPLE_TYPE_KEY: {
2059
+ "allowed": ["stool"],
2060
+ DEFAULT_KEY: "stool",
2061
+ "type": "string"
976
2062
  }
977
2063
  }
978
2064
  }
@@ -1105,7 +2191,6 @@ class TestMetadataConfigurator(TestCase):
1105
2191
  self.NESTED_STDS_W_STUDY_DICT[HOST_TYPE_SPECIFIC_METADATA_KEY],
1106
2192
  out_nested_dict)
1107
2193
 
1108
-
1109
2194
  def test_flatten_nested_stds_dict(self):
1110
2195
  """Test flattening a nested standards dictionary."""
1111
2196
  out_flattened_dict = flatten_nested_stds_dict(
@@ -1167,7 +2252,48 @@ class TestMetadataConfigurator(TestCase):
1167
2252
  }
1168
2253
  }
1169
2254
 
1170
- expected = input_dict[HOST_TYPE_SPECIFIC_METADATA_KEY]
2255
+ # After resolution, sample types have host metadata merged in
2256
+ # plus sample_type and qiita_sample_type fields
2257
+ expected = {
2258
+ "host_a": {
2259
+ DEFAULT_KEY: "not provided",
2260
+ METADATA_FIELDS_KEY: {
2261
+ "field1": {
2262
+ TYPE_KEY: "string",
2263
+ DEFAULT_KEY: "value1"
2264
+ }
2265
+ },
2266
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2267
+ "sample1": {
2268
+ METADATA_FIELDS_KEY: {
2269
+ "field1": {
2270
+ TYPE_KEY: "string",
2271
+ DEFAULT_KEY: "value1"
2272
+ },
2273
+ "sample_field": {TYPE_KEY: "string"},
2274
+ SAMPLE_TYPE_KEY: {
2275
+ ALLOWED_KEY: ["sample1"],
2276
+ DEFAULT_KEY: "sample1",
2277
+ TYPE_KEY: "string"
2278
+ },
2279
+ QIITA_SAMPLE_TYPE: {
2280
+ ALLOWED_KEY: ["sample1"],
2281
+ DEFAULT_KEY: "sample1",
2282
+ TYPE_KEY: "string"
2283
+ }
2284
+ }
2285
+ }
2286
+ }
2287
+ },
2288
+ "host_b": {
2289
+ DEFAULT_KEY: "not collected",
2290
+ METADATA_FIELDS_KEY: {
2291
+ "field2": {
2292
+ TYPE_KEY: "integer"
2293
+ }
2294
+ }
2295
+ }
2296
+ }
1171
2297
 
1172
2298
  result = flatten_nested_stds_dict(input_dict, None)
1173
2299
 
@@ -1261,8 +2387,8 @@ class TestMetadataConfigurator(TestCase):
1261
2387
  "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"}
1262
2388
  }
1263
2389
  },
1264
- "saliva": {
1265
- ALIAS_KEY: "oral"
2390
+ "fe": {
2391
+ ALIAS_KEY: "stool"
1266
2392
  }
1267
2393
  },
1268
2394
  HOST_TYPE_SPECIFIC_METADATA_KEY: {
@@ -1285,17 +2411,41 @@ class TestMetadataConfigurator(TestCase):
1285
2411
  }
1286
2412
  }
1287
2413
 
2414
+ # After resolution, each sample type has resolved metadata_fields
2415
+ # with host metadata merged in plus sample_type and qiita_sample_type
1288
2416
  expected = {
1289
2417
  "parent_host": {
1290
2418
  DEFAULT_KEY: "not provided",
1291
2419
  SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
1292
2420
  "stool": {
1293
2421
  METADATA_FIELDS_KEY: {
1294
- "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"}
2422
+ "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
2423
+ SAMPLE_TYPE_KEY: {
2424
+ ALLOWED_KEY: ["stool"],
2425
+ DEFAULT_KEY: "stool",
2426
+ TYPE_KEY: "string"
2427
+ },
2428
+ QIITA_SAMPLE_TYPE: {
2429
+ ALLOWED_KEY: ["stool"],
2430
+ DEFAULT_KEY: "stool",
2431
+ TYPE_KEY: "string"
2432
+ }
1295
2433
  }
1296
2434
  },
1297
- "saliva": {
1298
- ALIAS_KEY: "oral"
2435
+ "fe": {
2436
+ METADATA_FIELDS_KEY: {
2437
+ "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
2438
+ SAMPLE_TYPE_KEY: {
2439
+ ALLOWED_KEY: ["stool"],
2440
+ DEFAULT_KEY: "stool",
2441
+ TYPE_KEY: "string"
2442
+ },
2443
+ QIITA_SAMPLE_TYPE: {
2444
+ ALLOWED_KEY: ["stool"],
2445
+ DEFAULT_KEY: "stool",
2446
+ TYPE_KEY: "string"
2447
+ }
2448
+ }
1299
2449
  }
1300
2450
  }
1301
2451
  },
@@ -1305,15 +2455,48 @@ class TestMetadataConfigurator(TestCase):
1305
2455
  "stool": {
1306
2456
  METADATA_FIELDS_KEY: {
1307
2457
  "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
1308
- "child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"}
2458
+ "child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"},
2459
+ SAMPLE_TYPE_KEY: {
2460
+ ALLOWED_KEY: ["stool"],
2461
+ DEFAULT_KEY: "stool",
2462
+ TYPE_KEY: "string"
2463
+ },
2464
+ QIITA_SAMPLE_TYPE: {
2465
+ ALLOWED_KEY: ["stool"],
2466
+ DEFAULT_KEY: "stool",
2467
+ TYPE_KEY: "string"
2468
+ }
1309
2469
  }
1310
2470
  },
1311
- "saliva": {
1312
- ALIAS_KEY: "oral"
2471
+ "fe": {
2472
+ METADATA_FIELDS_KEY: {
2473
+ "parent_field": {TYPE_KEY: "string", DEFAULT_KEY: "parent"},
2474
+ "child_field": {TYPE_KEY: "string", DEFAULT_KEY: "child"},
2475
+ SAMPLE_TYPE_KEY: {
2476
+ ALLOWED_KEY: ["stool"],
2477
+ DEFAULT_KEY: "stool",
2478
+ TYPE_KEY: "string"
2479
+ },
2480
+ QIITA_SAMPLE_TYPE: {
2481
+ ALLOWED_KEY: ["stool"],
2482
+ DEFAULT_KEY: "stool",
2483
+ TYPE_KEY: "string"
2484
+ }
2485
+ }
1313
2486
  },
1314
2487
  "blood": {
1315
2488
  METADATA_FIELDS_KEY: {
1316
- "blood_field": {TYPE_KEY: "string"}
2489
+ "blood_field": {TYPE_KEY: "string"},
2490
+ SAMPLE_TYPE_KEY: {
2491
+ ALLOWED_KEY: ["blood"],
2492
+ DEFAULT_KEY: "blood",
2493
+ TYPE_KEY: "string"
2494
+ },
2495
+ QIITA_SAMPLE_TYPE: {
2496
+ ALLOWED_KEY: ["blood"],
2497
+ DEFAULT_KEY: "blood",
2498
+ TYPE_KEY: "string"
2499
+ }
1317
2500
  }
1318
2501
  }
1319
2502
  }
@@ -1509,7 +2692,7 @@ class TestMetadataConfigurator(TestCase):
1509
2692
  }
1510
2693
  }
1511
2694
  }
1512
-
2695
+
1513
2696
  expected = {
1514
2697
  "field1": {
1515
2698
  "allowed": ["value2"],
@@ -1523,7 +2706,7 @@ class TestMetadataConfigurator(TestCase):
1523
2706
  "allowed": ["valueX"]
1524
2707
  }
1525
2708
  }
1526
-
2709
+
1527
2710
  result = _combine_base_and_added_metadata_fields(base_dict, add_dict)
1528
2711
  self.assertDictEqual(expected, result)
1529
2712
 
@@ -1676,8 +2859,8 @@ class TestMetadataConfigurator(TestCase):
1676
2859
  "location": {TYPE_KEY: "string", DEFAULT_KEY: "UCSD"}
1677
2860
  }
1678
2861
  },
1679
- "saliva": {
1680
- ALIAS_KEY: "oral"
2862
+ "fe": {
2863
+ ALIAS_KEY: "stool"
1681
2864
  }
1682
2865
  }
1683
2866
  }
@@ -1737,8 +2920,8 @@ class TestMetadataConfigurator(TestCase):
1737
2920
  }
1738
2921
  },
1739
2922
  # Preserved from base
1740
- "saliva": {
1741
- ALIAS_KEY: "oral"
2923
+ "fe": {
2924
+ ALIAS_KEY: "stool"
1742
2925
  },
1743
2926
  # New from add
1744
2927
  "blood": {
@@ -1847,7 +3030,7 @@ class TestMetadataConfigurator(TestCase):
1847
3030
  }
1848
3031
  }
1849
3032
  }
1850
-
3033
+
1851
3034
  expected = {
1852
3035
  "sample_type1": {
1853
3036
  "alias": "sample_type2"
@@ -1878,7 +3061,7 @@ class TestMetadataConfigurator(TestCase):
1878
3061
  }
1879
3062
  }
1880
3063
  }
1881
-
3064
+
1882
3065
  result = _combine_base_and_added_sample_type_specific_metadata(base_dict, add_dict)
1883
3066
  self.assertDictEqual(expected, result)
1884
3067
 
@@ -2150,175 +3333,461 @@ class TestMetadataConfigurator(TestCase):
2150
3333
  with self.assertRaisesRegex(ValueError, "Sample type 'test_sample' has neither 'alias' nor 'metadata_fields' keys"):
2151
3334
  _id_sample_type_definition("test_sample", sample_dict)
2152
3335
 
2153
- # Tests for build_full_flat_config_dict
2154
-
2155
- def test_build_full_flat_config_dict_no_inputs(self):
2156
- """Test build_full_flat_config_dict with no arguments uses all defaults."""
2157
- result = build_full_flat_config_dict()
2158
-
2159
- # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2160
- self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2161
- hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2162
- self.assertIsInstance(hosts_dict, dict)
3336
+ # Tests for _resolve_sample_type_aliases_and_bases
2163
3337
 
2164
- # Should have "base" host type with sample_name metadata field
2165
- self.assertIn("base", hosts_dict)
2166
- base_host = hosts_dict["base"]
2167
- self.assertIn(METADATA_FIELDS_KEY, base_host)
2168
- self.assertIn("sample_name", base_host[METADATA_FIELDS_KEY])
3338
+ def test__resolve_sample_type_aliases_and_bases_simple(self):
3339
+ """Test basic resolution with no aliases or bases.
2169
3340
 
2170
- # Should have "human" host type with host_common_name defaulting to "human"
2171
- self.assertIn("human", hosts_dict)
2172
- human_host = hosts_dict["human"]
2173
- self.assertIn(METADATA_FIELDS_KEY, human_host)
2174
- self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2175
- self.assertEqual(
2176
- "human",
2177
- human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
3341
+ Input: Single sample type with metadata fields, empty host metadata.
3342
+ Expected: Sample type has its metadata fields plus sample_type and qiita_sample_type added.
3343
+ """
3344
+ sample_types_dict = {
3345
+ "stool": {
3346
+ METADATA_FIELDS_KEY: {
3347
+ "body_site": {
3348
+ DEFAULT_KEY: "gut",
3349
+ TYPE_KEY: "string"
3350
+ }
3351
+ }
3352
+ }
3353
+ }
3354
+ host_metadata_fields_dict = {}
2178
3355
 
2179
- # Should have default software config keys with expected default value
2180
- self.assertIn(DEFAULT_KEY, result)
2181
- self.assertEqual("not applicable", result[DEFAULT_KEY])
3356
+ result = _resolve_sample_type_aliases_and_bases(
3357
+ sample_types_dict, host_metadata_fields_dict)
2182
3358
 
2183
- def test_build_full_flat_config_dict_with_study_config(self):
2184
- """Test build_full_flat_config_dict with study config merges correctly."""
2185
- software_config = {
2186
- DEFAULT_KEY: "software_default",
2187
- LEAVE_REQUIREDS_BLANK_KEY: True,
2188
- OVERWRITE_NON_NANS_KEY: False
2189
- }
2190
- study_config = {
2191
- STUDY_SPECIFIC_METADATA_KEY: {
2192
- HOST_TYPE_SPECIFIC_METADATA_KEY: {
2193
- "human": {
2194
- METADATA_FIELDS_KEY: {
2195
- "custom_field": {
2196
- DEFAULT_KEY: "custom_value",
2197
- TYPE_KEY: "string"
2198
- }
2199
- },
2200
- SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2201
- "stool": {
2202
- METADATA_FIELDS_KEY: {}
2203
- }
2204
- }
3359
+ expected = {
3360
+ "stool": {
3361
+ METADATA_FIELDS_KEY: {
3362
+ "body_site": {
3363
+ DEFAULT_KEY: "gut",
3364
+ TYPE_KEY: "string"
3365
+ },
3366
+ # sample_type field added by resolution
3367
+ SAMPLE_TYPE_KEY: {
3368
+ ALLOWED_KEY: ["stool"],
3369
+ DEFAULT_KEY: "stool",
3370
+ TYPE_KEY: "string"
3371
+ },
3372
+ # qiita_sample_type field added by resolution (same as sample_type)
3373
+ QIITA_SAMPLE_TYPE: {
3374
+ ALLOWED_KEY: ["stool"],
3375
+ DEFAULT_KEY: "stool",
3376
+ TYPE_KEY: "string"
2205
3377
  }
2206
3378
  }
2207
3379
  }
2208
3380
  }
3381
+ self.assertDictEqual(expected, result)
2209
3382
 
2210
- result = build_full_flat_config_dict(
2211
- study_config, software_config, self.TEST_STDS_FP)
3383
+ def test__resolve_sample_type_aliases_and_bases_with_alias(self):
3384
+ """Test that alias is resolved to target sample type's metadata.
2212
3385
 
2213
- # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2214
- self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2215
- hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2216
- self.assertIsInstance(hosts_dict, dict)
3386
+ Input: 'feces' is alias to 'stool', 'stool' has metadata.
3387
+ Expected: Both 'feces' and 'stool' are resolved with same metadata,
3388
+ but sample_type field uses the alias target name ('stool').
3389
+ """
3390
+ sample_types_dict = {
3391
+ "feces": {
3392
+ ALIAS_KEY: "stool"
3393
+ },
3394
+ "stool": {
3395
+ METADATA_FIELDS_KEY: {
3396
+ "stool_field": {
3397
+ DEFAULT_KEY: "stool_value",
3398
+ TYPE_KEY: "string"
3399
+ }
3400
+ }
3401
+ }
3402
+ }
3403
+ host_metadata_fields_dict = {}
2217
3404
 
2218
- # Should have "human" host type with host_common_name defaulting to "human"
2219
- self.assertIn("human", hosts_dict)
2220
- human_host = hosts_dict["human"]
2221
- self.assertIn(METADATA_FIELDS_KEY, human_host)
2222
- self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2223
- self.assertEqual(
2224
- "human",
2225
- human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
3405
+ result = _resolve_sample_type_aliases_and_bases(
3406
+ sample_types_dict, host_metadata_fields_dict)
2226
3407
 
2227
- # Should have custom_field from study config
2228
- self.assertIn("custom_field", human_host[METADATA_FIELDS_KEY])
2229
- self.assertEqual(
2230
- "custom_value",
2231
- human_host[METADATA_FIELDS_KEY]["custom_field"][DEFAULT_KEY])
3408
+ # Both entries resolve to same metadata, sample_type uses alias target name
3409
+ stool_resolved_metadata = {
3410
+ "stool_field": {
3411
+ DEFAULT_KEY: "stool_value",
3412
+ TYPE_KEY: "string"
3413
+ },
3414
+ SAMPLE_TYPE_KEY: {
3415
+ ALLOWED_KEY: ["stool"],
3416
+ DEFAULT_KEY: "stool",
3417
+ TYPE_KEY: "string"
3418
+ },
3419
+ QIITA_SAMPLE_TYPE: {
3420
+ ALLOWED_KEY: ["stool"],
3421
+ DEFAULT_KEY: "stool",
3422
+ TYPE_KEY: "string"
3423
+ }
3424
+ }
3425
+ expected = {
3426
+ # Alias entry resolves to same metadata as target (sample_type="stool")
3427
+ "feces": {
3428
+ METADATA_FIELDS_KEY: stool_resolved_metadata
3429
+ },
3430
+ # Target sample type is fully resolved
3431
+ "stool": {
3432
+ METADATA_FIELDS_KEY: stool_resolved_metadata
3433
+ }
3434
+ }
3435
+ self.assertDictEqual(expected, result)
2232
3436
 
2233
- # Should have software config default value
2234
- self.assertIn(DEFAULT_KEY, result)
2235
- self.assertEqual("software_default", result[DEFAULT_KEY])
3437
+ def test__resolve_sample_type_aliases_and_bases_chained_alias_raises(self):
3438
+ """Test that chained aliases raise ValueError.
2236
3439
 
2237
- def test_build_full_flat_config_dict_without_study_config(self):
2238
- """Test build_full_flat_config_dict with no study config uses standards only."""
2239
- software_config = {
2240
- DEFAULT_KEY: "software_default",
2241
- LEAVE_REQUIREDS_BLANK_KEY: True,
2242
- OVERWRITE_NON_NANS_KEY: False
3440
+ Input: 'feces' aliases to 'stool', 'stool' aliases to 'poop'.
3441
+ Expected: ValueError because chained aliases are not allowed.
3442
+ """
3443
+ sample_types_dict = {
3444
+ "feces": {
3445
+ ALIAS_KEY: "stool"
3446
+ },
3447
+ "stool": {
3448
+ ALIAS_KEY: "poop"
3449
+ },
3450
+ "poop": {
3451
+ METADATA_FIELDS_KEY: {}
3452
+ }
2243
3453
  }
3454
+ host_metadata_fields_dict = {}
2244
3455
 
2245
- result = build_full_flat_config_dict(
2246
- None, software_config, self.TEST_STDS_FP)
3456
+ with self.assertRaisesRegex(ValueError, "May not chain aliases"):
3457
+ _resolve_sample_type_aliases_and_bases(
3458
+ sample_types_dict, host_metadata_fields_dict)
2247
3459
 
2248
- # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2249
- self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2250
- hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2251
- self.assertIsInstance(hosts_dict, dict)
3460
+ def test__resolve_sample_type_aliases_and_bases_with_base_type(self):
3461
+ """Test that base type fields are inherited and overlaid.
2252
3462
 
2253
- # Should have "human" host type with host_common_name defaulting to "human"
2254
- self.assertIn("human", hosts_dict)
2255
- human_host = hosts_dict["human"]
2256
- self.assertIn(METADATA_FIELDS_KEY, human_host)
2257
- self.assertIn("host_common_name", human_host[METADATA_FIELDS_KEY])
2258
- self.assertEqual(
2259
- "human",
2260
- human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
3463
+ Input: 'derived_sample' has base_type 'base_sample'.
3464
+ Expected: 'derived_sample' inherits base fields, adds own, base_type key removed.
3465
+ """
3466
+ sample_types_dict = {
3467
+ "base_sample": {
3468
+ METADATA_FIELDS_KEY: {
3469
+ "base_field": {
3470
+ DEFAULT_KEY: "base_value",
3471
+ TYPE_KEY: "string"
3472
+ }
3473
+ }
3474
+ },
3475
+ "derived_sample": {
3476
+ BASE_TYPE_KEY: "base_sample",
3477
+ METADATA_FIELDS_KEY: {
3478
+ "derived_field": {
3479
+ DEFAULT_KEY: "derived_value",
3480
+ TYPE_KEY: "string"
3481
+ }
3482
+ }
3483
+ }
3484
+ }
3485
+ host_metadata_fields_dict = {}
2261
3486
 
2262
- # Should preserve software config settings
2263
- self.assertEqual("software_default", result[DEFAULT_KEY])
3487
+ result = _resolve_sample_type_aliases_and_bases(
3488
+ sample_types_dict, host_metadata_fields_dict)
2264
3489
 
2265
- def test_build_full_flat_config_dict_merges_software_and_study(self):
2266
- """Test that study config values override software config values."""
2267
- software_config = {
2268
- DEFAULT_KEY: "software_default",
2269
- LEAVE_REQUIREDS_BLANK_KEY: False,
2270
- OVERWRITE_NON_NANS_KEY: True
2271
- }
2272
- study_config = {
2273
- DEFAULT_KEY: "study_default",
2274
- LEAVE_REQUIREDS_BLANK_KEY: True,
2275
- STUDY_SPECIFIC_METADATA_KEY: {
2276
- HOST_TYPE_SPECIFIC_METADATA_KEY: {
2277
- "human": {
2278
- METADATA_FIELDS_KEY: {},
2279
- SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2280
- "stool": {
2281
- METADATA_FIELDS_KEY: {}
2282
- }
2283
- }
3490
+ expected = {
3491
+ # Base sample type is fully resolved
3492
+ "base_sample": {
3493
+ METADATA_FIELDS_KEY: {
3494
+ "base_field": {
3495
+ DEFAULT_KEY: "base_value",
3496
+ TYPE_KEY: "string"
3497
+ },
3498
+ SAMPLE_TYPE_KEY: {
3499
+ ALLOWED_KEY: ["base_sample"],
3500
+ DEFAULT_KEY: "base_sample",
3501
+ TYPE_KEY: "string"
3502
+ },
3503
+ QIITA_SAMPLE_TYPE: {
3504
+ ALLOWED_KEY: ["base_sample"],
3505
+ DEFAULT_KEY: "base_sample",
3506
+ TYPE_KEY: "string"
3507
+ }
3508
+ }
3509
+ },
3510
+ # Derived sample type inherits base fields, base_type key removed
3511
+ "derived_sample": {
3512
+ METADATA_FIELDS_KEY: {
3513
+ # Inherited from base
3514
+ "base_field": {
3515
+ DEFAULT_KEY: "base_value",
3516
+ TYPE_KEY: "string"
3517
+ },
3518
+ # Own field
3519
+ "derived_field": {
3520
+ DEFAULT_KEY: "derived_value",
3521
+ TYPE_KEY: "string"
3522
+ },
3523
+ SAMPLE_TYPE_KEY: {
3524
+ ALLOWED_KEY: ["derived_sample"],
3525
+ DEFAULT_KEY: "derived_sample",
3526
+ TYPE_KEY: "string"
3527
+ },
3528
+ QIITA_SAMPLE_TYPE: {
3529
+ ALLOWED_KEY: ["derived_sample"],
3530
+ DEFAULT_KEY: "derived_sample",
3531
+ TYPE_KEY: "string"
2284
3532
  }
2285
3533
  }
2286
3534
  }
2287
3535
  }
3536
+ self.assertDictEqual(expected, result)
2288
3537
 
2289
- result = build_full_flat_config_dict(
2290
- study_config, software_config, self.TEST_STDS_FP)
3538
+ def test__resolve_sample_type_aliases_and_bases_base_type_invalid_raises(self):
3539
+ """Test that base type with non-metadata-fields keys raises ValueError.
2291
3540
 
2292
- # Study config should override software config
2293
- self.assertEqual("study_default", result[DEFAULT_KEY])
2294
- self.assertTrue(result[LEAVE_REQUIREDS_BLANK_KEY])
2295
- # Software config value should be preserved when not overridden
2296
- self.assertTrue(result[OVERWRITE_NON_NANS_KEY])
3541
+ Input: Base sample type has extra keys beyond metadata_fields.
3542
+ Expected: ValueError because base must only have metadata_fields.
3543
+ """
3544
+ sample_types_dict = {
3545
+ "base_sample": {
3546
+ METADATA_FIELDS_KEY: {
3547
+ "base_field": {DEFAULT_KEY: "value", TYPE_KEY: "string"}
3548
+ },
3549
+ "extra_key": "not_allowed"
3550
+ },
3551
+ "derived_sample": {
3552
+ BASE_TYPE_KEY: "base_sample",
3553
+ METADATA_FIELDS_KEY: {}
3554
+ }
3555
+ }
3556
+ host_metadata_fields_dict = {}
2297
3557
 
2298
- def test_build_full_flat_config_dict_none_software_config(self):
2299
- """Test that None software_config loads defaults from config.yml."""
2300
- study_config = {
2301
- STUDY_SPECIFIC_METADATA_KEY: {
2302
- HOST_TYPE_SPECIFIC_METADATA_KEY: {
2303
- "human": {
2304
- METADATA_FIELDS_KEY: {},
2305
- SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
2306
- "stool": {
2307
- METADATA_FIELDS_KEY: {}
2308
- }
2309
- }
3558
+ with self.assertRaisesRegex(ValueError, "must only have metadata fields"):
3559
+ _resolve_sample_type_aliases_and_bases(
3560
+ sample_types_dict, host_metadata_fields_dict)
3561
+
3562
+ def test__resolve_sample_type_aliases_and_bases_sets_sample_type(self):
3563
+ """Test that sample_type field is added with correct allowed/default.
3564
+
3565
+ Input: Sample type without sample_type field.
3566
+ Expected: sample_type field added with allowed=[sample_type_name], default=sample_type_name.
3567
+ """
3568
+ sample_types_dict = {
3569
+ "blood": {
3570
+ METADATA_FIELDS_KEY: {
3571
+ "body_site": {
3572
+ DEFAULT_KEY: "blood",
3573
+ TYPE_KEY: "string"
2310
3574
  }
2311
3575
  }
2312
3576
  }
2313
3577
  }
3578
+ host_metadata_fields_dict = {}
2314
3579
 
2315
- result = build_full_flat_config_dict(
2316
- study_config, None, self.TEST_STDS_FP)
3580
+ result = _resolve_sample_type_aliases_and_bases(
3581
+ sample_types_dict, host_metadata_fields_dict)
2317
3582
 
2318
- # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
2319
- self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
2320
- hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
2321
- self.assertIsInstance(hosts_dict, dict)
3583
+ expected = {
3584
+ "blood": {
3585
+ METADATA_FIELDS_KEY: {
3586
+ "body_site": {
3587
+ DEFAULT_KEY: "blood",
3588
+ TYPE_KEY: "string"
3589
+ },
3590
+ SAMPLE_TYPE_KEY: {
3591
+ ALLOWED_KEY: ["blood"],
3592
+ DEFAULT_KEY: "blood",
3593
+ TYPE_KEY: "string"
3594
+ },
3595
+ QIITA_SAMPLE_TYPE: {
3596
+ ALLOWED_KEY: ["blood"],
3597
+ DEFAULT_KEY: "blood",
3598
+ TYPE_KEY: "string"
3599
+ }
3600
+ }
3601
+ }
3602
+ }
3603
+ self.assertDictEqual(expected, result)
3604
+
3605
+ def test__resolve_sample_type_aliases_and_bases_preserves_existing_qiita_sample_type(self):
3606
+ """Test that existing qiita_sample_type is not overwritten.
3607
+
3608
+ Input: Sample type already has qiita_sample_type defined with very different value.
3609
+ Expected: Existing qiita_sample_type preserved exactly, sample_type still added.
3610
+ """
3611
+ sample_types_dict = {
3612
+ "stool": {
3613
+ METADATA_FIELDS_KEY: {
3614
+ "body_site": {
3615
+ DEFAULT_KEY: "gut",
3616
+ TYPE_KEY: "string"
3617
+ },
3618
+ # Pre-existing qiita_sample_type with VERY different value
3619
+ # to make it clear it's preserved, not overwritten
3620
+ QIITA_SAMPLE_TYPE: {
3621
+ ALLOWED_KEY: ["CUSTOM_QIITA_VALUE_12345"],
3622
+ DEFAULT_KEY: "CUSTOM_QIITA_VALUE_12345",
3623
+ TYPE_KEY: "string"
3624
+ }
3625
+ }
3626
+ }
3627
+ }
3628
+ host_metadata_fields_dict = {}
3629
+
3630
+ result = _resolve_sample_type_aliases_and_bases(
3631
+ sample_types_dict, host_metadata_fields_dict)
3632
+
3633
+ expected = {
3634
+ "stool": {
3635
+ METADATA_FIELDS_KEY: {
3636
+ "body_site": {
3637
+ DEFAULT_KEY: "gut",
3638
+ TYPE_KEY: "string"
3639
+ },
3640
+ # sample_type added (would be "stool")
3641
+ SAMPLE_TYPE_KEY: {
3642
+ ALLOWED_KEY: ["stool"],
3643
+ DEFAULT_KEY: "stool",
3644
+ TYPE_KEY: "string"
3645
+ },
3646
+ # Pre-existing qiita_sample_type preserved exactly (NOT "stool")
3647
+ QIITA_SAMPLE_TYPE: {
3648
+ ALLOWED_KEY: ["CUSTOM_QIITA_VALUE_12345"],
3649
+ DEFAULT_KEY: "CUSTOM_QIITA_VALUE_12345",
3650
+ TYPE_KEY: "string"
3651
+ }
3652
+ }
3653
+ }
3654
+ }
3655
+ self.assertDictEqual(expected, result)
3656
+
3657
+ def test__resolve_sample_type_aliases_and_bases_merges_with_host_metadata(self):
3658
+ """Test that host-level metadata fields are merged with sample-type fields.
3659
+
3660
+ Input: Host has host_common_name field, sample type has body_site field.
3661
+ Expected: Resolved sample type has both fields merged.
3662
+ """
3663
+ sample_types_dict = {
3664
+ "stool": {
3665
+ METADATA_FIELDS_KEY: {
3666
+ "body_site": {
3667
+ DEFAULT_KEY: "gut",
3668
+ TYPE_KEY: "string"
3669
+ }
3670
+ }
3671
+ }
3672
+ }
3673
+ host_metadata_fields_dict = {
3674
+ "host_common_name": {
3675
+ DEFAULT_KEY: "human",
3676
+ TYPE_KEY: "string"
3677
+ }
3678
+ }
3679
+
3680
+ result = _resolve_sample_type_aliases_and_bases(
3681
+ sample_types_dict, host_metadata_fields_dict)
3682
+
3683
+ expected = {
3684
+ "stool": {
3685
+ METADATA_FIELDS_KEY: {
3686
+ # Host-level field merged in
3687
+ "host_common_name": {
3688
+ DEFAULT_KEY: "human",
3689
+ TYPE_KEY: "string"
3690
+ },
3691
+ # Sample-type field
3692
+ "body_site": {
3693
+ DEFAULT_KEY: "gut",
3694
+ TYPE_KEY: "string"
3695
+ },
3696
+ SAMPLE_TYPE_KEY: {
3697
+ ALLOWED_KEY: ["stool"],
3698
+ DEFAULT_KEY: "stool",
3699
+ TYPE_KEY: "string"
3700
+ },
3701
+ QIITA_SAMPLE_TYPE: {
3702
+ ALLOWED_KEY: ["stool"],
3703
+ DEFAULT_KEY: "stool",
3704
+ TYPE_KEY: "string"
3705
+ }
3706
+ }
3707
+ }
3708
+ }
3709
+ self.assertDictEqual(expected, result)
3710
+
3711
+ def test__resolve_sample_type_aliases_and_bases_sample_overrides_host(self):
3712
+ """Test that sample-level field overrides host-level field with same name.
3713
+
3714
+ Input: Host has description="host description", sample type also has description="sample description".
3715
+ Expected: Sample-level description value wins.
3716
+ """
3717
+ sample_types_dict = {
3718
+ "stool": {
3719
+ METADATA_FIELDS_KEY: {
3720
+ # Sample-level description should override host-level
3721
+ "description": {
3722
+ DEFAULT_KEY: "sample-level description value",
3723
+ TYPE_KEY: "string"
3724
+ }
3725
+ }
3726
+ }
3727
+ }
3728
+ host_metadata_fields_dict = {
3729
+ # Host-level description should be overridden
3730
+ "description": {
3731
+ DEFAULT_KEY: "host-level description value",
3732
+ TYPE_KEY: "string"
3733
+ },
3734
+ "host_common_name": {
3735
+ DEFAULT_KEY: "human",
3736
+ TYPE_KEY: "string"
3737
+ }
3738
+ }
3739
+
3740
+ result = _resolve_sample_type_aliases_and_bases(
3741
+ sample_types_dict, host_metadata_fields_dict)
3742
+
3743
+ expected = {
3744
+ "stool": {
3745
+ METADATA_FIELDS_KEY: {
3746
+ # Host-level field that wasn't overridden
3747
+ "host_common_name": {
3748
+ DEFAULT_KEY: "human",
3749
+ TYPE_KEY: "string"
3750
+ },
3751
+ # Description: sample-level value wins over host-level
3752
+ "description": {
3753
+ DEFAULT_KEY: "sample-level description value",
3754
+ TYPE_KEY: "string"
3755
+ },
3756
+ SAMPLE_TYPE_KEY: {
3757
+ ALLOWED_KEY: ["stool"],
3758
+ DEFAULT_KEY: "stool",
3759
+ TYPE_KEY: "string"
3760
+ },
3761
+ QIITA_SAMPLE_TYPE: {
3762
+ ALLOWED_KEY: ["stool"],
3763
+ DEFAULT_KEY: "stool",
3764
+ TYPE_KEY: "string"
3765
+ }
3766
+ }
3767
+ }
3768
+ }
3769
+ self.assertDictEqual(expected, result)
3770
+
3771
+ # Tests for build_full_flat_config_dict
3772
+
3773
+ def test_build_full_flat_config_dict_no_inputs(self):
3774
+ """Test build_full_flat_config_dict with no arguments uses all defaults."""
3775
+ result = build_full_flat_config_dict()
3776
+
3777
+ # These tests are less specific because they depend on the actual contents
3778
+ # of the default standards file, which may change over time, so
3779
+ # we just verify the presence of key structures rather than exact contents.
3780
+
3781
+ # Should have HOST_TYPE_SPECIFIC_METADATA_KEY
3782
+ self.assertIn(HOST_TYPE_SPECIFIC_METADATA_KEY, result)
3783
+ hosts_dict = result[HOST_TYPE_SPECIFIC_METADATA_KEY]
3784
+ self.assertIsInstance(hosts_dict, dict)
3785
+
3786
+ # Should have "base" host type with sample_name metadata field
3787
+ self.assertIn("base", hosts_dict)
3788
+ base_host = hosts_dict["base"]
3789
+ self.assertIn(METADATA_FIELDS_KEY, base_host)
3790
+ self.assertIn("sample_name", base_host[METADATA_FIELDS_KEY])
2322
3791
 
2323
3792
  # Should have "human" host type with host_common_name defaulting to "human"
2324
3793
  self.assertIn("human", hosts_dict)
@@ -2329,6 +3798,1073 @@ class TestMetadataConfigurator(TestCase):
2329
3798
  "human",
2330
3799
  human_host[METADATA_FIELDS_KEY]["host_common_name"][DEFAULT_KEY])
2331
3800
 
2332
- # Should have loaded default software config (which includes DEFAULT_KEY)
3801
+ # Should have default software config keys with expected default value
2333
3802
  self.assertIn(DEFAULT_KEY, result)
2334
3803
  self.assertEqual("not applicable", result[DEFAULT_KEY])
3804
+
3805
+ def test_build_full_flat_config_dict_with_study_config(self):
3806
+ """Test build_full_flat_config_dict with study config merges correctly.
3807
+
3808
+ test_standards.yml structure: base -> host_associated -> human/mouse
3809
+ This tests that:
3810
+ 1. Fields are inherited through the nesting hierarchy
3811
+ 2. Study-specific fields are merged into the flattened output
3812
+ """
3813
+ software_config = {
3814
+ DEFAULT_KEY: "software_default",
3815
+ LEAVE_REQUIREDS_BLANK_KEY: True,
3816
+ OVERWRITE_NON_NANS_KEY: False
3817
+ }
3818
+ study_config = {
3819
+ STUDY_SPECIFIC_METADATA_KEY: {
3820
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
3821
+ "human": {
3822
+ METADATA_FIELDS_KEY: {
3823
+ "custom_field": {
3824
+ DEFAULT_KEY: "custom_value",
3825
+ TYPE_KEY: "string"
3826
+ }
3827
+ },
3828
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
3829
+ "stool": {
3830
+ METADATA_FIELDS_KEY: {}
3831
+ }
3832
+ }
3833
+ }
3834
+ }
3835
+ }
3836
+ }
3837
+
3838
+ result = build_full_flat_config_dict(
3839
+ study_config, software_config, self.TEST_STDS_FP)
3840
+
3841
+ expected = {
3842
+ # Top-level keys from software_config
3843
+ DEFAULT_KEY: "software_default",
3844
+ LEAVE_REQUIREDS_BLANK_KEY: True,
3845
+ OVERWRITE_NON_NANS_KEY: False,
3846
+ # Flattened host types from standards + study
3847
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
3848
+ # base: top level in test_standards.yml, no default
3849
+ "base": {
3850
+ METADATA_FIELDS_KEY: {
3851
+ # sample_name defined at base level
3852
+ "sample_name": {
3853
+ REQUIRED_KEY: True,
3854
+ TYPE_KEY: "string",
3855
+ "unique": True
3856
+ },
3857
+ # sample_type defined at base level
3858
+ "sample_type": {
3859
+ REQUIRED_KEY: True,
3860
+ TYPE_KEY: "string"
3861
+ }
3862
+ }
3863
+ },
3864
+ # host_associated: nested under base, inherits sample_name/sample_type
3865
+ "host_associated": {
3866
+ # default defined at host_associated level
3867
+ DEFAULT_KEY: "not provided",
3868
+ METADATA_FIELDS_KEY: {
3869
+ # description defined at host_associated level
3870
+ "description": {
3871
+ DEFAULT_KEY: "host associated sample",
3872
+ TYPE_KEY: "string"
3873
+ },
3874
+ # sample_name inherited from base
3875
+ "sample_name": {
3876
+ REQUIRED_KEY: True,
3877
+ TYPE_KEY: "string",
3878
+ "unique": True
3879
+ },
3880
+ # sample_type inherited from base
3881
+ "sample_type": {
3882
+ REQUIRED_KEY: True,
3883
+ TYPE_KEY: "string"
3884
+ }
3885
+ },
3886
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
3887
+ # stool defined at host_associated level
3888
+ "stool": {
3889
+ METADATA_FIELDS_KEY: {
3890
+ "body_site": {
3891
+ DEFAULT_KEY: "gut",
3892
+ TYPE_KEY: "string"
3893
+ },
3894
+ "description": {
3895
+ DEFAULT_KEY: "host associated sample",
3896
+ TYPE_KEY: "string"
3897
+ },
3898
+ QIITA_SAMPLE_TYPE: {
3899
+ ALLOWED_KEY: ["stool"],
3900
+ DEFAULT_KEY: "stool",
3901
+ TYPE_KEY: "string"
3902
+ },
3903
+ "sample_name": {
3904
+ REQUIRED_KEY: True,
3905
+ TYPE_KEY: "string",
3906
+ "unique": True
3907
+ },
3908
+ SAMPLE_TYPE_KEY: {
3909
+ ALLOWED_KEY: ["stool"],
3910
+ DEFAULT_KEY: "stool",
3911
+ REQUIRED_KEY: True,
3912
+ TYPE_KEY: "string"
3913
+ }
3914
+ }
3915
+ }
3916
+ }
3917
+ },
3918
+ # human: nested under host_associated
3919
+ "human": {
3920
+ # default inherited from host_associated
3921
+ DEFAULT_KEY: "not provided",
3922
+ METADATA_FIELDS_KEY: {
3923
+ # custom_field added from study_specific_metadata
3924
+ "custom_field": {
3925
+ DEFAULT_KEY: "custom_value",
3926
+ TYPE_KEY: "string"
3927
+ },
3928
+ # description overrides host_associated value at human level
3929
+ "description": {
3930
+ DEFAULT_KEY: "human sample",
3931
+ TYPE_KEY: "string"
3932
+ },
3933
+ # host_common_name defined at human level
3934
+ "host_common_name": {
3935
+ DEFAULT_KEY: "human",
3936
+ TYPE_KEY: "string"
3937
+ },
3938
+ # sample_name inherited from base -> host_associated -> human
3939
+ "sample_name": {
3940
+ REQUIRED_KEY: True,
3941
+ TYPE_KEY: "string",
3942
+ "unique": True
3943
+ },
3944
+ # sample_type inherited from base -> host_associated -> human
3945
+ "sample_type": {
3946
+ REQUIRED_KEY: True,
3947
+ TYPE_KEY: "string"
3948
+ }
3949
+ },
3950
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
3951
+ # blood defined only at human level
3952
+ "blood": {
3953
+ METADATA_FIELDS_KEY: {
3954
+ "body_product": {
3955
+ DEFAULT_KEY: "UBERON:blood",
3956
+ TYPE_KEY: "string"
3957
+ },
3958
+ "body_site": {
3959
+ DEFAULT_KEY: "blood",
3960
+ TYPE_KEY: "string"
3961
+ },
3962
+ "custom_field": {
3963
+ DEFAULT_KEY: "custom_value",
3964
+ TYPE_KEY: "string"
3965
+ },
3966
+ "description": {
3967
+ DEFAULT_KEY: "human sample",
3968
+ TYPE_KEY: "string"
3969
+ },
3970
+ "host_common_name": {
3971
+ DEFAULT_KEY: "human",
3972
+ TYPE_KEY: "string"
3973
+ },
3974
+ QIITA_SAMPLE_TYPE: {
3975
+ ALLOWED_KEY: ["blood"],
3976
+ DEFAULT_KEY: "blood",
3977
+ TYPE_KEY: "string"
3978
+ },
3979
+ "sample_name": {
3980
+ REQUIRED_KEY: True,
3981
+ TYPE_KEY: "string",
3982
+ "unique": True
3983
+ },
3984
+ SAMPLE_TYPE_KEY: {
3985
+ ALLOWED_KEY: ["blood"],
3986
+ DEFAULT_KEY: "blood",
3987
+ REQUIRED_KEY: True,
3988
+ TYPE_KEY: "string"
3989
+ }
3990
+ }
3991
+ },
3992
+ # stool: body_site inherited from host_associated,
3993
+ # body_product added at human level
3994
+ "stool": {
3995
+ METADATA_FIELDS_KEY: {
3996
+ "body_product": {
3997
+ DEFAULT_KEY: "UBERON:feces",
3998
+ TYPE_KEY: "string"
3999
+ },
4000
+ "body_site": {
4001
+ DEFAULT_KEY: "gut",
4002
+ TYPE_KEY: "string"
4003
+ },
4004
+ "custom_field": {
4005
+ DEFAULT_KEY: "custom_value",
4006
+ TYPE_KEY: "string"
4007
+ },
4008
+ "description": {
4009
+ DEFAULT_KEY: "human sample",
4010
+ TYPE_KEY: "string"
4011
+ },
4012
+ "host_common_name": {
4013
+ DEFAULT_KEY: "human",
4014
+ TYPE_KEY: "string"
4015
+ },
4016
+ QIITA_SAMPLE_TYPE: {
4017
+ ALLOWED_KEY: ["stool"],
4018
+ DEFAULT_KEY: "stool",
4019
+ TYPE_KEY: "string"
4020
+ },
4021
+ "sample_name": {
4022
+ REQUIRED_KEY: True,
4023
+ TYPE_KEY: "string",
4024
+ "unique": True
4025
+ },
4026
+ SAMPLE_TYPE_KEY: {
4027
+ ALLOWED_KEY: ["stool"],
4028
+ DEFAULT_KEY: "stool",
4029
+ REQUIRED_KEY: True,
4030
+ TYPE_KEY: "string"
4031
+ }
4032
+ }
4033
+ }
4034
+ }
4035
+ },
4036
+ # mouse: nested under host_associated (not in study config)
4037
+ "mouse": {
4038
+ # default inherited from host_associated
4039
+ DEFAULT_KEY: "not provided",
4040
+ METADATA_FIELDS_KEY: {
4041
+ # description inherited from host_associated (not overridden)
4042
+ "description": {
4043
+ DEFAULT_KEY: "host associated sample",
4044
+ TYPE_KEY: "string"
4045
+ },
4046
+ # host_common_name defined at mouse level
4047
+ "host_common_name": {
4048
+ DEFAULT_KEY: "mouse",
4049
+ TYPE_KEY: "string"
4050
+ },
4051
+ # sample_name inherited from base -> host_associated -> mouse
4052
+ "sample_name": {
4053
+ REQUIRED_KEY: True,
4054
+ TYPE_KEY: "string",
4055
+ "unique": True
4056
+ },
4057
+ # sample_type inherited from base -> host_associated -> mouse
4058
+ "sample_type": {
4059
+ REQUIRED_KEY: True,
4060
+ TYPE_KEY: "string"
4061
+ }
4062
+ },
4063
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4064
+ # stool: body_site inherited from host_associated,
4065
+ # cage_id added at mouse level
4066
+ "stool": {
4067
+ METADATA_FIELDS_KEY: {
4068
+ "body_site": {
4069
+ DEFAULT_KEY: "gut",
4070
+ TYPE_KEY: "string"
4071
+ },
4072
+ "cage_id": {
4073
+ REQUIRED_KEY: False,
4074
+ TYPE_KEY: "string"
4075
+ },
4076
+ "description": {
4077
+ DEFAULT_KEY: "host associated sample",
4078
+ TYPE_KEY: "string"
4079
+ },
4080
+ "host_common_name": {
4081
+ DEFAULT_KEY: "mouse",
4082
+ TYPE_KEY: "string"
4083
+ },
4084
+ QIITA_SAMPLE_TYPE: {
4085
+ ALLOWED_KEY: ["stool"],
4086
+ DEFAULT_KEY: "stool",
4087
+ TYPE_KEY: "string"
4088
+ },
4089
+ "sample_name": {
4090
+ REQUIRED_KEY: True,
4091
+ TYPE_KEY: "string",
4092
+ "unique": True
4093
+ },
4094
+ SAMPLE_TYPE_KEY: {
4095
+ ALLOWED_KEY: ["stool"],
4096
+ DEFAULT_KEY: "stool",
4097
+ REQUIRED_KEY: True,
4098
+ TYPE_KEY: "string"
4099
+ }
4100
+ }
4101
+ }
4102
+ }
4103
+ }
4104
+ }
4105
+ }
4106
+ self.assertEqual(expected, result)
4107
+
4108
+ def test_build_full_flat_config_dict_without_study_config(self):
4109
+ """Test build_full_flat_config_dict with no study config uses standards only.
4110
+
4111
+ test_standards.yml structure: base -> host_associated -> human/mouse
4112
+ With no study config, output is pure flattened standards.
4113
+ """
4114
+ software_config = {
4115
+ DEFAULT_KEY: "software_default",
4116
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4117
+ OVERWRITE_NON_NANS_KEY: False
4118
+ }
4119
+
4120
+ result = build_full_flat_config_dict(
4121
+ None, software_config, self.TEST_STDS_FP)
4122
+
4123
+ expected = {
4124
+ # Top-level keys from software_config
4125
+ DEFAULT_KEY: "software_default",
4126
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4127
+ OVERWRITE_NON_NANS_KEY: False,
4128
+ # No STUDY_SPECIFIC_METADATA_KEY since no study config provided
4129
+ # Flattened host types from standards only
4130
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4131
+ # base: top level, no default, just sample_name/sample_type
4132
+ "base": {
4133
+ METADATA_FIELDS_KEY: {
4134
+ "sample_name": {
4135
+ REQUIRED_KEY: True,
4136
+ TYPE_KEY: "string",
4137
+ "unique": True
4138
+ },
4139
+ "sample_type": {
4140
+ REQUIRED_KEY: True,
4141
+ TYPE_KEY: "string"
4142
+ }
4143
+ }
4144
+ },
4145
+ # host_associated: inherits from base, adds default and description
4146
+ "host_associated": {
4147
+ DEFAULT_KEY: "not provided",
4148
+ METADATA_FIELDS_KEY: {
4149
+ "description": {
4150
+ DEFAULT_KEY: "host associated sample",
4151
+ TYPE_KEY: "string"
4152
+ },
4153
+ "sample_name": {
4154
+ REQUIRED_KEY: True,
4155
+ TYPE_KEY: "string",
4156
+ "unique": True
4157
+ },
4158
+ "sample_type": {
4159
+ REQUIRED_KEY: True,
4160
+ TYPE_KEY: "string"
4161
+ }
4162
+ },
4163
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4164
+ "stool": {
4165
+ METADATA_FIELDS_KEY: {
4166
+ "body_site": {
4167
+ DEFAULT_KEY: "gut",
4168
+ TYPE_KEY: "string"
4169
+ },
4170
+ "description": {
4171
+ DEFAULT_KEY: "host associated sample",
4172
+ TYPE_KEY: "string"
4173
+ },
4174
+ QIITA_SAMPLE_TYPE: {
4175
+ ALLOWED_KEY: ["stool"],
4176
+ DEFAULT_KEY: "stool",
4177
+ TYPE_KEY: "string"
4178
+ },
4179
+ "sample_name": {
4180
+ REQUIRED_KEY: True,
4181
+ TYPE_KEY: "string",
4182
+ "unique": True
4183
+ },
4184
+ SAMPLE_TYPE_KEY: {
4185
+ ALLOWED_KEY: ["stool"],
4186
+ DEFAULT_KEY: "stool",
4187
+ REQUIRED_KEY: True,
4188
+ TYPE_KEY: "string"
4189
+ }
4190
+ }
4191
+ }
4192
+ }
4193
+ },
4194
+ # human: inherits from host_associated, overrides description
4195
+ "human": {
4196
+ DEFAULT_KEY: "not provided",
4197
+ METADATA_FIELDS_KEY: {
4198
+ "description": {
4199
+ DEFAULT_KEY: "human sample",
4200
+ TYPE_KEY: "string"
4201
+ },
4202
+ "host_common_name": {
4203
+ DEFAULT_KEY: "human",
4204
+ TYPE_KEY: "string"
4205
+ },
4206
+ "sample_name": {
4207
+ REQUIRED_KEY: True,
4208
+ TYPE_KEY: "string",
4209
+ "unique": True
4210
+ },
4211
+ "sample_type": {
4212
+ REQUIRED_KEY: True,
4213
+ TYPE_KEY: "string"
4214
+ }
4215
+ },
4216
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4217
+ "blood": {
4218
+ METADATA_FIELDS_KEY: {
4219
+ "body_product": {
4220
+ DEFAULT_KEY: "UBERON:blood",
4221
+ TYPE_KEY: "string"
4222
+ },
4223
+ "body_site": {
4224
+ DEFAULT_KEY: "blood",
4225
+ TYPE_KEY: "string"
4226
+ },
4227
+ "description": {
4228
+ DEFAULT_KEY: "human sample",
4229
+ TYPE_KEY: "string"
4230
+ },
4231
+ "host_common_name": {
4232
+ DEFAULT_KEY: "human",
4233
+ TYPE_KEY: "string"
4234
+ },
4235
+ QIITA_SAMPLE_TYPE: {
4236
+ ALLOWED_KEY: ["blood"],
4237
+ DEFAULT_KEY: "blood",
4238
+ TYPE_KEY: "string"
4239
+ },
4240
+ "sample_name": {
4241
+ REQUIRED_KEY: True,
4242
+ TYPE_KEY: "string",
4243
+ "unique": True
4244
+ },
4245
+ SAMPLE_TYPE_KEY: {
4246
+ ALLOWED_KEY: ["blood"],
4247
+ DEFAULT_KEY: "blood",
4248
+ REQUIRED_KEY: True,
4249
+ TYPE_KEY: "string"
4250
+ }
4251
+ }
4252
+ },
4253
+ "stool": {
4254
+ METADATA_FIELDS_KEY: {
4255
+ "body_product": {
4256
+ DEFAULT_KEY: "UBERON:feces",
4257
+ TYPE_KEY: "string"
4258
+ },
4259
+ "body_site": {
4260
+ DEFAULT_KEY: "gut",
4261
+ TYPE_KEY: "string"
4262
+ },
4263
+ "description": {
4264
+ DEFAULT_KEY: "human sample",
4265
+ TYPE_KEY: "string"
4266
+ },
4267
+ "host_common_name": {
4268
+ DEFAULT_KEY: "human",
4269
+ TYPE_KEY: "string"
4270
+ },
4271
+ QIITA_SAMPLE_TYPE: {
4272
+ ALLOWED_KEY: ["stool"],
4273
+ DEFAULT_KEY: "stool",
4274
+ TYPE_KEY: "string"
4275
+ },
4276
+ "sample_name": {
4277
+ REQUIRED_KEY: True,
4278
+ TYPE_KEY: "string",
4279
+ "unique": True
4280
+ },
4281
+ SAMPLE_TYPE_KEY: {
4282
+ ALLOWED_KEY: ["stool"],
4283
+ DEFAULT_KEY: "stool",
4284
+ REQUIRED_KEY: True,
4285
+ TYPE_KEY: "string"
4286
+ }
4287
+ }
4288
+ }
4289
+ }
4290
+ },
4291
+ # mouse: inherits from host_associated, keeps parent description
4292
+ "mouse": {
4293
+ DEFAULT_KEY: "not provided",
4294
+ METADATA_FIELDS_KEY: {
4295
+ "description": {
4296
+ DEFAULT_KEY: "host associated sample",
4297
+ TYPE_KEY: "string"
4298
+ },
4299
+ "host_common_name": {
4300
+ DEFAULT_KEY: "mouse",
4301
+ TYPE_KEY: "string"
4302
+ },
4303
+ "sample_name": {
4304
+ REQUIRED_KEY: True,
4305
+ TYPE_KEY: "string",
4306
+ "unique": True
4307
+ },
4308
+ "sample_type": {
4309
+ REQUIRED_KEY: True,
4310
+ TYPE_KEY: "string"
4311
+ }
4312
+ },
4313
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4314
+ "stool": {
4315
+ METADATA_FIELDS_KEY: {
4316
+ "body_site": {
4317
+ DEFAULT_KEY: "gut",
4318
+ TYPE_KEY: "string"
4319
+ },
4320
+ "cage_id": {
4321
+ REQUIRED_KEY: False,
4322
+ TYPE_KEY: "string"
4323
+ },
4324
+ "description": {
4325
+ DEFAULT_KEY: "host associated sample",
4326
+ TYPE_KEY: "string"
4327
+ },
4328
+ "host_common_name": {
4329
+ DEFAULT_KEY: "mouse",
4330
+ TYPE_KEY: "string"
4331
+ },
4332
+ QIITA_SAMPLE_TYPE: {
4333
+ ALLOWED_KEY: ["stool"],
4334
+ DEFAULT_KEY: "stool",
4335
+ TYPE_KEY: "string"
4336
+ },
4337
+ "sample_name": {
4338
+ REQUIRED_KEY: True,
4339
+ TYPE_KEY: "string",
4340
+ "unique": True
4341
+ },
4342
+ SAMPLE_TYPE_KEY: {
4343
+ ALLOWED_KEY: ["stool"],
4344
+ DEFAULT_KEY: "stool",
4345
+ REQUIRED_KEY: True,
4346
+ TYPE_KEY: "string"
4347
+ }
4348
+ }
4349
+ }
4350
+ }
4351
+ }
4352
+ }
4353
+ }
4354
+ self.assertEqual(expected, result)
4355
+
4356
+ def test_build_full_flat_config_dict_merges_software_and_study(self):
4357
+ """Test that study config values override software config values.
4358
+
4359
+ Tests that top-level config keys (default, leave_requireds_blank, etc.)
4360
+ from study_config override matching keys from software_config.
4361
+ """
4362
+ software_config = {
4363
+ DEFAULT_KEY: "software_default",
4364
+ LEAVE_REQUIREDS_BLANK_KEY: False,
4365
+ OVERWRITE_NON_NANS_KEY: True
4366
+ }
4367
+ study_config = {
4368
+ # These override software_config values
4369
+ DEFAULT_KEY: "study_default",
4370
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4371
+ STUDY_SPECIFIC_METADATA_KEY: {
4372
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4373
+ "human": {
4374
+ METADATA_FIELDS_KEY: {},
4375
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4376
+ "stool": {
4377
+ METADATA_FIELDS_KEY: {}
4378
+ }
4379
+ }
4380
+ }
4381
+ }
4382
+ }
4383
+ }
4384
+
4385
+ result = build_full_flat_config_dict(
4386
+ study_config, software_config, self.TEST_STDS_FP)
4387
+
4388
+ expected = {
4389
+ # default from study_config overrides software_config
4390
+ DEFAULT_KEY: "study_default",
4391
+ # leave_requireds_blank from study_config overrides software_config
4392
+ LEAVE_REQUIREDS_BLANK_KEY: True,
4393
+ # overwrite_non_nans from software_config (not overridden by study)
4394
+ OVERWRITE_NON_NANS_KEY: True,
4395
+ # Flattened host types
4396
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4397
+ "base": {
4398
+ METADATA_FIELDS_KEY: {
4399
+ "sample_name": {
4400
+ REQUIRED_KEY: True,
4401
+ TYPE_KEY: "string",
4402
+ "unique": True
4403
+ },
4404
+ "sample_type": {
4405
+ REQUIRED_KEY: True,
4406
+ TYPE_KEY: "string"
4407
+ }
4408
+ }
4409
+ },
4410
+ "host_associated": {
4411
+ DEFAULT_KEY: "not provided",
4412
+ METADATA_FIELDS_KEY: {
4413
+ "description": {
4414
+ DEFAULT_KEY: "host associated sample",
4415
+ TYPE_KEY: "string"
4416
+ },
4417
+ "sample_name": {
4418
+ REQUIRED_KEY: True,
4419
+ TYPE_KEY: "string",
4420
+ "unique": True
4421
+ },
4422
+ "sample_type": {
4423
+ REQUIRED_KEY: True,
4424
+ TYPE_KEY: "string"
4425
+ }
4426
+ },
4427
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4428
+ "stool": {
4429
+ METADATA_FIELDS_KEY: {
4430
+ "body_site": {
4431
+ DEFAULT_KEY: "gut",
4432
+ TYPE_KEY: "string"
4433
+ },
4434
+ "description": {
4435
+ DEFAULT_KEY: "host associated sample",
4436
+ TYPE_KEY: "string"
4437
+ },
4438
+ QIITA_SAMPLE_TYPE: {
4439
+ ALLOWED_KEY: ["stool"],
4440
+ DEFAULT_KEY: "stool",
4441
+ TYPE_KEY: "string"
4442
+ },
4443
+ "sample_name": {
4444
+ REQUIRED_KEY: True,
4445
+ TYPE_KEY: "string",
4446
+ "unique": True
4447
+ },
4448
+ SAMPLE_TYPE_KEY: {
4449
+ ALLOWED_KEY: ["stool"],
4450
+ DEFAULT_KEY: "stool",
4451
+ REQUIRED_KEY: True,
4452
+ TYPE_KEY: "string"
4453
+ }
4454
+ }
4455
+ }
4456
+ }
4457
+ },
4458
+ "human": {
4459
+ DEFAULT_KEY: "not provided",
4460
+ METADATA_FIELDS_KEY: {
4461
+ "description": {
4462
+ DEFAULT_KEY: "human sample",
4463
+ TYPE_KEY: "string"
4464
+ },
4465
+ "host_common_name": {
4466
+ DEFAULT_KEY: "human",
4467
+ TYPE_KEY: "string"
4468
+ },
4469
+ "sample_name": {
4470
+ REQUIRED_KEY: True,
4471
+ TYPE_KEY: "string",
4472
+ "unique": True
4473
+ },
4474
+ "sample_type": {
4475
+ REQUIRED_KEY: True,
4476
+ TYPE_KEY: "string"
4477
+ }
4478
+ },
4479
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4480
+ "blood": {
4481
+ METADATA_FIELDS_KEY: {
4482
+ "body_product": {
4483
+ DEFAULT_KEY: "UBERON:blood",
4484
+ TYPE_KEY: "string"
4485
+ },
4486
+ "body_site": {
4487
+ DEFAULT_KEY: "blood",
4488
+ TYPE_KEY: "string"
4489
+ },
4490
+ "description": {
4491
+ DEFAULT_KEY: "human sample",
4492
+ TYPE_KEY: "string"
4493
+ },
4494
+ "host_common_name": {
4495
+ DEFAULT_KEY: "human",
4496
+ TYPE_KEY: "string"
4497
+ },
4498
+ QIITA_SAMPLE_TYPE: {
4499
+ ALLOWED_KEY: ["blood"],
4500
+ DEFAULT_KEY: "blood",
4501
+ TYPE_KEY: "string"
4502
+ },
4503
+ "sample_name": {
4504
+ REQUIRED_KEY: True,
4505
+ TYPE_KEY: "string",
4506
+ "unique": True
4507
+ },
4508
+ SAMPLE_TYPE_KEY: {
4509
+ ALLOWED_KEY: ["blood"],
4510
+ DEFAULT_KEY: "blood",
4511
+ REQUIRED_KEY: True,
4512
+ TYPE_KEY: "string"
4513
+ }
4514
+ }
4515
+ },
4516
+ "stool": {
4517
+ METADATA_FIELDS_KEY: {
4518
+ "body_product": {
4519
+ DEFAULT_KEY: "UBERON:feces",
4520
+ TYPE_KEY: "string"
4521
+ },
4522
+ "body_site": {
4523
+ DEFAULT_KEY: "gut",
4524
+ TYPE_KEY: "string"
4525
+ },
4526
+ "description": {
4527
+ DEFAULT_KEY: "human sample",
4528
+ TYPE_KEY: "string"
4529
+ },
4530
+ "host_common_name": {
4531
+ DEFAULT_KEY: "human",
4532
+ TYPE_KEY: "string"
4533
+ },
4534
+ QIITA_SAMPLE_TYPE: {
4535
+ ALLOWED_KEY: ["stool"],
4536
+ DEFAULT_KEY: "stool",
4537
+ TYPE_KEY: "string"
4538
+ },
4539
+ "sample_name": {
4540
+ REQUIRED_KEY: True,
4541
+ TYPE_KEY: "string",
4542
+ "unique": True
4543
+ },
4544
+ SAMPLE_TYPE_KEY: {
4545
+ ALLOWED_KEY: ["stool"],
4546
+ DEFAULT_KEY: "stool",
4547
+ REQUIRED_KEY: True,
4548
+ TYPE_KEY: "string"
4549
+ }
4550
+ }
4551
+ }
4552
+ }
4553
+ },
4554
+ "mouse": {
4555
+ DEFAULT_KEY: "not provided",
4556
+ METADATA_FIELDS_KEY: {
4557
+ "description": {
4558
+ DEFAULT_KEY: "host associated sample",
4559
+ TYPE_KEY: "string"
4560
+ },
4561
+ "host_common_name": {
4562
+ DEFAULT_KEY: "mouse",
4563
+ TYPE_KEY: "string"
4564
+ },
4565
+ "sample_name": {
4566
+ REQUIRED_KEY: True,
4567
+ TYPE_KEY: "string",
4568
+ "unique": True
4569
+ },
4570
+ "sample_type": {
4571
+ REQUIRED_KEY: True,
4572
+ TYPE_KEY: "string"
4573
+ }
4574
+ },
4575
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4576
+ "stool": {
4577
+ METADATA_FIELDS_KEY: {
4578
+ "body_site": {
4579
+ DEFAULT_KEY: "gut",
4580
+ TYPE_KEY: "string"
4581
+ },
4582
+ "cage_id": {
4583
+ REQUIRED_KEY: False,
4584
+ TYPE_KEY: "string"
4585
+ },
4586
+ "description": {
4587
+ DEFAULT_KEY: "host associated sample",
4588
+ TYPE_KEY: "string"
4589
+ },
4590
+ "host_common_name": {
4591
+ DEFAULT_KEY: "mouse",
4592
+ TYPE_KEY: "string"
4593
+ },
4594
+ QIITA_SAMPLE_TYPE: {
4595
+ ALLOWED_KEY: ["stool"],
4596
+ DEFAULT_KEY: "stool",
4597
+ TYPE_KEY: "string"
4598
+ },
4599
+ "sample_name": {
4600
+ REQUIRED_KEY: True,
4601
+ TYPE_KEY: "string",
4602
+ "unique": True
4603
+ },
4604
+ SAMPLE_TYPE_KEY: {
4605
+ ALLOWED_KEY: ["stool"],
4606
+ DEFAULT_KEY: "stool",
4607
+ REQUIRED_KEY: True,
4608
+ TYPE_KEY: "string"
4609
+ }
4610
+ }
4611
+ }
4612
+ }
4613
+ }
4614
+ }
4615
+ }
4616
+ self.assertEqual(expected, result)
4617
+
4618
+ def test_build_full_flat_config_dict_none_software_config(self):
4619
+ """Test that None software_config loads defaults from config.yml.
4620
+
4621
+ When software_config is None, the function loads defaults from the
4622
+ software's config.yml file (default="not applicable", etc.).
4623
+ """
4624
+ study_config = {
4625
+ STUDY_SPECIFIC_METADATA_KEY: {
4626
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4627
+ "human": {
4628
+ METADATA_FIELDS_KEY: {},
4629
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4630
+ "stool": {
4631
+ METADATA_FIELDS_KEY: {}
4632
+ }
4633
+ }
4634
+ }
4635
+ }
4636
+ }
4637
+ }
4638
+
4639
+ result = build_full_flat_config_dict(
4640
+ study_config, None, self.TEST_STDS_FP)
4641
+
4642
+ expected = {
4643
+ # Top-level keys loaded from software's config.yml defaults
4644
+ DEFAULT_KEY: "not applicable",
4645
+ LEAVE_REQUIREDS_BLANK_KEY: False,
4646
+ OVERWRITE_NON_NANS_KEY: False,
4647
+ HOSTTYPE_COL_OPTIONS_KEY: ["host_common_name"],
4648
+ SAMPLETYPE_COL_OPTIONS_KEY: ["sample_type"],
4649
+ # Flattened host types
4650
+ HOST_TYPE_SPECIFIC_METADATA_KEY: {
4651
+ "base": {
4652
+ METADATA_FIELDS_KEY: {
4653
+ "sample_name": {
4654
+ REQUIRED_KEY: True,
4655
+ TYPE_KEY: "string",
4656
+ "unique": True
4657
+ },
4658
+ "sample_type": {
4659
+ REQUIRED_KEY: True,
4660
+ TYPE_KEY: "string"
4661
+ }
4662
+ }
4663
+ },
4664
+ "host_associated": {
4665
+ DEFAULT_KEY: "not provided",
4666
+ METADATA_FIELDS_KEY: {
4667
+ "description": {
4668
+ DEFAULT_KEY: "host associated sample",
4669
+ TYPE_KEY: "string"
4670
+ },
4671
+ "sample_name": {
4672
+ REQUIRED_KEY: True,
4673
+ TYPE_KEY: "string",
4674
+ "unique": True
4675
+ },
4676
+ "sample_type": {
4677
+ REQUIRED_KEY: True,
4678
+ TYPE_KEY: "string"
4679
+ }
4680
+ },
4681
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4682
+ "stool": {
4683
+ METADATA_FIELDS_KEY: {
4684
+ "body_site": {
4685
+ DEFAULT_KEY: "gut",
4686
+ TYPE_KEY: "string"
4687
+ },
4688
+ "description": {
4689
+ DEFAULT_KEY: "host associated sample",
4690
+ TYPE_KEY: "string"
4691
+ },
4692
+ QIITA_SAMPLE_TYPE: {
4693
+ ALLOWED_KEY: ["stool"],
4694
+ DEFAULT_KEY: "stool",
4695
+ TYPE_KEY: "string"
4696
+ },
4697
+ "sample_name": {
4698
+ REQUIRED_KEY: True,
4699
+ TYPE_KEY: "string",
4700
+ "unique": True
4701
+ },
4702
+ SAMPLE_TYPE_KEY: {
4703
+ ALLOWED_KEY: ["stool"],
4704
+ DEFAULT_KEY: "stool",
4705
+ REQUIRED_KEY: True,
4706
+ TYPE_KEY: "string"
4707
+ }
4708
+ }
4709
+ }
4710
+ }
4711
+ },
4712
+ "human": {
4713
+ DEFAULT_KEY: "not provided",
4714
+ METADATA_FIELDS_KEY: {
4715
+ "description": {
4716
+ DEFAULT_KEY: "human sample",
4717
+ TYPE_KEY: "string"
4718
+ },
4719
+ "host_common_name": {
4720
+ DEFAULT_KEY: "human",
4721
+ TYPE_KEY: "string"
4722
+ },
4723
+ "sample_name": {
4724
+ REQUIRED_KEY: True,
4725
+ TYPE_KEY: "string",
4726
+ "unique": True
4727
+ },
4728
+ "sample_type": {
4729
+ REQUIRED_KEY: True,
4730
+ TYPE_KEY: "string"
4731
+ }
4732
+ },
4733
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4734
+ "blood": {
4735
+ METADATA_FIELDS_KEY: {
4736
+ "body_product": {
4737
+ DEFAULT_KEY: "UBERON:blood",
4738
+ TYPE_KEY: "string"
4739
+ },
4740
+ "body_site": {
4741
+ DEFAULT_KEY: "blood",
4742
+ TYPE_KEY: "string"
4743
+ },
4744
+ "description": {
4745
+ DEFAULT_KEY: "human sample",
4746
+ TYPE_KEY: "string"
4747
+ },
4748
+ "host_common_name": {
4749
+ DEFAULT_KEY: "human",
4750
+ TYPE_KEY: "string"
4751
+ },
4752
+ QIITA_SAMPLE_TYPE: {
4753
+ ALLOWED_KEY: ["blood"],
4754
+ DEFAULT_KEY: "blood",
4755
+ TYPE_KEY: "string"
4756
+ },
4757
+ "sample_name": {
4758
+ REQUIRED_KEY: True,
4759
+ TYPE_KEY: "string",
4760
+ "unique": True
4761
+ },
4762
+ SAMPLE_TYPE_KEY: {
4763
+ ALLOWED_KEY: ["blood"],
4764
+ DEFAULT_KEY: "blood",
4765
+ REQUIRED_KEY: True,
4766
+ TYPE_KEY: "string"
4767
+ }
4768
+ }
4769
+ },
4770
+ "stool": {
4771
+ METADATA_FIELDS_KEY: {
4772
+ "body_product": {
4773
+ DEFAULT_KEY: "UBERON:feces",
4774
+ TYPE_KEY: "string"
4775
+ },
4776
+ "body_site": {
4777
+ DEFAULT_KEY: "gut",
4778
+ TYPE_KEY: "string"
4779
+ },
4780
+ "description": {
4781
+ DEFAULT_KEY: "human sample",
4782
+ TYPE_KEY: "string"
4783
+ },
4784
+ "host_common_name": {
4785
+ DEFAULT_KEY: "human",
4786
+ TYPE_KEY: "string"
4787
+ },
4788
+ QIITA_SAMPLE_TYPE: {
4789
+ ALLOWED_KEY: ["stool"],
4790
+ DEFAULT_KEY: "stool",
4791
+ TYPE_KEY: "string"
4792
+ },
4793
+ "sample_name": {
4794
+ REQUIRED_KEY: True,
4795
+ TYPE_KEY: "string",
4796
+ "unique": True
4797
+ },
4798
+ SAMPLE_TYPE_KEY: {
4799
+ ALLOWED_KEY: ["stool"],
4800
+ DEFAULT_KEY: "stool",
4801
+ REQUIRED_KEY: True,
4802
+ TYPE_KEY: "string"
4803
+ }
4804
+ }
4805
+ }
4806
+ }
4807
+ },
4808
+ "mouse": {
4809
+ DEFAULT_KEY: "not provided",
4810
+ METADATA_FIELDS_KEY: {
4811
+ "description": {
4812
+ DEFAULT_KEY: "host associated sample",
4813
+ TYPE_KEY: "string"
4814
+ },
4815
+ "host_common_name": {
4816
+ DEFAULT_KEY: "mouse",
4817
+ TYPE_KEY: "string"
4818
+ },
4819
+ "sample_name": {
4820
+ REQUIRED_KEY: True,
4821
+ TYPE_KEY: "string",
4822
+ "unique": True
4823
+ },
4824
+ "sample_type": {
4825
+ REQUIRED_KEY: True,
4826
+ TYPE_KEY: "string"
4827
+ }
4828
+ },
4829
+ SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
4830
+ "stool": {
4831
+ METADATA_FIELDS_KEY: {
4832
+ "body_site": {
4833
+ DEFAULT_KEY: "gut",
4834
+ TYPE_KEY: "string"
4835
+ },
4836
+ "cage_id": {
4837
+ REQUIRED_KEY: False,
4838
+ TYPE_KEY: "string"
4839
+ },
4840
+ "description": {
4841
+ DEFAULT_KEY: "host associated sample",
4842
+ TYPE_KEY: "string"
4843
+ },
4844
+ "host_common_name": {
4845
+ DEFAULT_KEY: "mouse",
4846
+ TYPE_KEY: "string"
4847
+ },
4848
+ QIITA_SAMPLE_TYPE: {
4849
+ ALLOWED_KEY: ["stool"],
4850
+ DEFAULT_KEY: "stool",
4851
+ TYPE_KEY: "string"
4852
+ },
4853
+ "sample_name": {
4854
+ REQUIRED_KEY: True,
4855
+ TYPE_KEY: "string",
4856
+ "unique": True
4857
+ },
4858
+ SAMPLE_TYPE_KEY: {
4859
+ ALLOWED_KEY: ["stool"],
4860
+ DEFAULT_KEY: "stool",
4861
+ REQUIRED_KEY: True,
4862
+ TYPE_KEY: "string"
4863
+ }
4864
+ }
4865
+ }
4866
+ }
4867
+ }
4868
+ }
4869
+ }
4870
+ self.assertEqual(expected, result)