metameq 2026.2.1__py3-none-any.whl → 2026.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metameq/__init__.py +3 -2
- metameq/_version.py +3 -3
- metameq/src/metadata_configurator.py +53 -6
- metameq/src/metadata_extender.py +16 -38
- metameq/src/util.py +7 -0
- metameq/tests/test_metadata_configurator.py +184 -1
- metameq/tests/test_metadata_extender.py +306 -117
- metameq/tests/test_metadata_validator.py +2 -2
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/METADATA +2 -1
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/RECORD +13 -13
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/WHEEL +0 -0
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/entry_points.txt +0 -0
- {metameq-2026.2.1.dist-info → metameq-2026.2.3.dist-info}/top_level.txt +0 -0
|
@@ -475,7 +475,7 @@ class TestMetadataExtender(TestCase):
|
|
|
475
475
|
|
|
476
476
|
# Verify metadata file contents - includes failed row when remove_internals=False
|
|
477
477
|
result_df = pandas.read_csv(
|
|
478
|
-
metadata_files[0], sep="\t", keep_default_na=False)
|
|
478
|
+
metadata_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
479
479
|
assert_frame_equal(metadata_df, result_df)
|
|
480
480
|
|
|
481
481
|
# Find the validation errors file (uses comma separator)
|
|
@@ -484,7 +484,7 @@ class TestMetadataExtender(TestCase):
|
|
|
484
484
|
self.assertEqual(1, len(validation_files))
|
|
485
485
|
|
|
486
486
|
# Verify validation errors file contents
|
|
487
|
-
result_validation_df = pandas.read_csv(validation_files[0], sep=",")
|
|
487
|
+
result_validation_df = pandas.read_csv(validation_files[0], sep=",", dtype=str, keep_default_na=False)
|
|
488
488
|
assert_frame_equal(validation_msgs_df, result_validation_df)
|
|
489
489
|
|
|
490
490
|
# No fails file should be created when remove_internals=False
|
|
@@ -513,7 +513,7 @@ class TestMetadataExtender(TestCase):
|
|
|
513
513
|
self.assertEqual(1, len(metadata_files))
|
|
514
514
|
|
|
515
515
|
# Verify metadata has internal cols removed and no failures
|
|
516
|
-
result_df = pandas.read_csv(metadata_files[0], sep="\t")
|
|
516
|
+
result_df = pandas.read_csv(metadata_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
517
517
|
expected_df = pandas.DataFrame({
|
|
518
518
|
SAMPLE_NAME_KEY: ["sample1", "sample3"],
|
|
519
519
|
"field_a": ["a1", "a3"]
|
|
@@ -526,7 +526,7 @@ class TestMetadataExtender(TestCase):
|
|
|
526
526
|
self.assertEqual(1, len(fails_files))
|
|
527
527
|
|
|
528
528
|
# Verify fails file contains the failed row
|
|
529
|
-
fails_df = pandas.read_csv(fails_files[0], sep=",")
|
|
529
|
+
fails_df = pandas.read_csv(fails_files[0], sep=",", dtype=str, keep_default_na=False)
|
|
530
530
|
expected_fails_df = pandas.DataFrame({
|
|
531
531
|
SAMPLE_NAME_KEY: ["sample2"],
|
|
532
532
|
"field_a": ["a2"],
|
|
@@ -593,7 +593,7 @@ class TestMetadataExtender(TestCase):
|
|
|
593
593
|
self.assertEqual(1, len(metadata_files))
|
|
594
594
|
|
|
595
595
|
# Verify custom internal cols are removed
|
|
596
|
-
result_df = pandas.read_csv(metadata_files[0], sep="\t")
|
|
596
|
+
result_df = pandas.read_csv(metadata_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
597
597
|
expected_df = pandas.DataFrame({
|
|
598
598
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
599
599
|
"field_a": ["a1", "a2"]
|
|
@@ -748,16 +748,15 @@ class TestMetadataExtender(TestCase):
|
|
|
748
748
|
|
|
749
749
|
# Tests for _fill_na_if_default
|
|
750
750
|
|
|
751
|
-
def
|
|
751
|
+
def test__fill_na_if_default_has_default_in_settings(self):
|
|
752
752
|
"""Test that specific_dict default takes precedence over settings_dict."""
|
|
753
753
|
input_df = pandas.DataFrame({
|
|
754
754
|
"field1": ["value1", np.nan, "value3"],
|
|
755
755
|
"field2": [np.nan, "value2", np.nan]
|
|
756
756
|
})
|
|
757
|
-
|
|
758
|
-
settings_dict = {DEFAULT_KEY: "unused"}
|
|
757
|
+
settings_dict = {DEFAULT_KEY: "filled"}
|
|
759
758
|
|
|
760
|
-
result = _fill_na_if_default(input_df,
|
|
759
|
+
result = _fill_na_if_default(input_df, settings_dict)
|
|
761
760
|
|
|
762
761
|
expected = pandas.DataFrame({
|
|
763
762
|
"field1": ["value1", "filled", "value3"],
|
|
@@ -765,18 +764,19 @@ class TestMetadataExtender(TestCase):
|
|
|
765
764
|
})
|
|
766
765
|
assert_frame_equal(expected, result)
|
|
767
766
|
|
|
768
|
-
def
|
|
769
|
-
"""Test that
|
|
767
|
+
def test__fill_na_if_default_no_default_in_settings(self):
|
|
768
|
+
"""Test that NaN values are unchanged when no default is in settings."""
|
|
770
769
|
input_df = pandas.DataFrame({
|
|
771
|
-
"field1": [np.nan]
|
|
770
|
+
"field1": ["value1", np.nan, "value3"],
|
|
771
|
+
"field2": [np.nan, "value2", np.nan]
|
|
772
772
|
})
|
|
773
|
-
|
|
774
|
-
settings_dict = {DEFAULT_KEY: "settings_default"}
|
|
773
|
+
settings_dict = {}
|
|
775
774
|
|
|
776
|
-
result = _fill_na_if_default(input_df,
|
|
775
|
+
result = _fill_na_if_default(input_df, settings_dict)
|
|
777
776
|
|
|
778
777
|
expected = pandas.DataFrame({
|
|
779
|
-
"field1": ["
|
|
778
|
+
"field1": ["value1", np.nan, "value3"],
|
|
779
|
+
"field2": [np.nan, "value2", np.nan]
|
|
780
780
|
})
|
|
781
781
|
assert_frame_equal(expected, result)
|
|
782
782
|
|
|
@@ -1273,14 +1273,13 @@ class TestMetadataExtender(TestCase):
|
|
|
1273
1273
|
SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
|
|
1274
1274
|
QC_NOTE_KEY: ["", ""]
|
|
1275
1275
|
})
|
|
1276
|
-
|
|
1277
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1278
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1279
|
-
DEFAULT_KEY: "not provided"
|
|
1280
|
-
}
|
|
1276
|
+
|
|
1281
1277
|
# Config is pre-resolved: sample type's metadata_fields already includes
|
|
1282
1278
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1283
1279
|
host_type_config_dict = {
|
|
1280
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1281
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1282
|
+
DEFAULT_KEY: "not provided",
|
|
1284
1283
|
METADATA_FIELDS_KEY: {
|
|
1285
1284
|
"host_field": {
|
|
1286
1285
|
DEFAULT_KEY: "host_default",
|
|
@@ -1314,7 +1313,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1314
1313
|
}
|
|
1315
1314
|
|
|
1316
1315
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1317
|
-
input_df, "stool",
|
|
1316
|
+
input_df, "stool", host_type_config_dict)
|
|
1318
1317
|
|
|
1319
1318
|
expected_df = pandas.DataFrame({
|
|
1320
1319
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
@@ -1337,12 +1336,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1337
1336
|
SAMPLETYPE_SHORTHAND_KEY: ["unknown_type"],
|
|
1338
1337
|
QC_NOTE_KEY: [""]
|
|
1339
1338
|
})
|
|
1340
|
-
|
|
1339
|
+
|
|
1340
|
+
host_type_config_dict = {
|
|
1341
1341
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1342
1342
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1343
|
-
DEFAULT_KEY: "not provided"
|
|
1344
|
-
}
|
|
1345
|
-
host_type_config_dict = {
|
|
1343
|
+
DEFAULT_KEY: "not provided",
|
|
1346
1344
|
METADATA_FIELDS_KEY: {},
|
|
1347
1345
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1348
1346
|
"stool": {
|
|
@@ -1352,7 +1350,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1352
1350
|
}
|
|
1353
1351
|
|
|
1354
1352
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1355
|
-
input_df, "unknown_type",
|
|
1353
|
+
input_df, "unknown_type", host_type_config_dict)
|
|
1356
1354
|
|
|
1357
1355
|
expected_df = pandas.DataFrame({
|
|
1358
1356
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1371,12 +1369,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1371
1369
|
SAMPLETYPE_SHORTHAND_KEY: ["stool", "blood", "stool"],
|
|
1372
1370
|
QC_NOTE_KEY: ["", "", ""]
|
|
1373
1371
|
})
|
|
1374
|
-
|
|
1372
|
+
|
|
1373
|
+
host_type_config_dict = {
|
|
1375
1374
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1376
1375
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1377
|
-
DEFAULT_KEY: "not provided"
|
|
1378
|
-
}
|
|
1379
|
-
host_type_config_dict = {
|
|
1376
|
+
DEFAULT_KEY: "not provided",
|
|
1380
1377
|
METADATA_FIELDS_KEY: {},
|
|
1381
1378
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1382
1379
|
"stool": {
|
|
@@ -1394,7 +1391,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1394
1391
|
}
|
|
1395
1392
|
|
|
1396
1393
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1397
|
-
input_df, "stool",
|
|
1394
|
+
input_df, "stool", host_type_config_dict)
|
|
1398
1395
|
|
|
1399
1396
|
# Should only have the two stool samples
|
|
1400
1397
|
self.assertEqual(2, len(result_df))
|
|
@@ -1409,12 +1406,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1409
1406
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1410
1407
|
QC_NOTE_KEY: [""]
|
|
1411
1408
|
})
|
|
1412
|
-
|
|
1409
|
+
|
|
1410
|
+
host_type_config_dict = {
|
|
1413
1411
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1414
1412
|
LEAVE_REQUIREDS_BLANK_KEY: True,
|
|
1415
|
-
DEFAULT_KEY: "not provided"
|
|
1416
|
-
}
|
|
1417
|
-
host_type_config_dict = {
|
|
1413
|
+
DEFAULT_KEY: "not provided",
|
|
1418
1414
|
METADATA_FIELDS_KEY: {},
|
|
1419
1415
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1420
1416
|
"stool": {
|
|
@@ -1429,7 +1425,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1429
1425
|
}
|
|
1430
1426
|
|
|
1431
1427
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1432
|
-
input_df, "stool",
|
|
1428
|
+
input_df, "stool", host_type_config_dict)
|
|
1433
1429
|
|
|
1434
1430
|
self.assertEqual(LEAVE_BLANK_VAL, result_df["required_field"].iloc[0])
|
|
1435
1431
|
|
|
@@ -1441,12 +1437,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1441
1437
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1442
1438
|
QC_NOTE_KEY: [""]
|
|
1443
1439
|
})
|
|
1444
|
-
|
|
1440
|
+
|
|
1441
|
+
host_type_config_dict = {
|
|
1445
1442
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1446
1443
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1447
|
-
DEFAULT_KEY: "global_default"
|
|
1448
|
-
}
|
|
1449
|
-
host_type_config_dict = {
|
|
1444
|
+
DEFAULT_KEY: "global_default",
|
|
1450
1445
|
METADATA_FIELDS_KEY: {},
|
|
1451
1446
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1452
1447
|
"stool": {
|
|
@@ -1461,7 +1456,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1461
1456
|
}
|
|
1462
1457
|
|
|
1463
1458
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1464
|
-
input_df, "stool",
|
|
1459
|
+
input_df, "stool", host_type_config_dict)
|
|
1465
1460
|
|
|
1466
1461
|
# When leave_requireds_blank is False, NaN values get filled with global default
|
|
1467
1462
|
self.assertEqual("global_default", result_df["required_field"].iloc[0])
|
|
@@ -1475,12 +1470,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1475
1470
|
QC_NOTE_KEY: [""],
|
|
1476
1471
|
"existing_field": ["original_value"]
|
|
1477
1472
|
})
|
|
1478
|
-
|
|
1473
|
+
|
|
1474
|
+
host_type_config_dict = {
|
|
1479
1475
|
OVERWRITE_NON_NANS_KEY: True,
|
|
1480
1476
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1481
|
-
DEFAULT_KEY: "not provided"
|
|
1482
|
-
}
|
|
1483
|
-
host_type_config_dict = {
|
|
1477
|
+
DEFAULT_KEY: "not provided",
|
|
1484
1478
|
METADATA_FIELDS_KEY: {},
|
|
1485
1479
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1486
1480
|
"stool": {
|
|
@@ -1495,7 +1489,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1495
1489
|
}
|
|
1496
1490
|
|
|
1497
1491
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1498
|
-
input_df, "stool",
|
|
1492
|
+
input_df, "stool", host_type_config_dict)
|
|
1499
1493
|
|
|
1500
1494
|
self.assertEqual("new_value", result_df["existing_field"].iloc[0])
|
|
1501
1495
|
|
|
@@ -1508,12 +1502,11 @@ class TestMetadataExtender(TestCase):
|
|
|
1508
1502
|
QC_NOTE_KEY: [""],
|
|
1509
1503
|
"existing_field": ["original_value"]
|
|
1510
1504
|
})
|
|
1511
|
-
|
|
1505
|
+
|
|
1506
|
+
host_type_config_dict = {
|
|
1512
1507
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1513
1508
|
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1514
|
-
DEFAULT_KEY: "not provided"
|
|
1515
|
-
}
|
|
1516
|
-
host_type_config_dict = {
|
|
1509
|
+
DEFAULT_KEY: "not provided",
|
|
1517
1510
|
METADATA_FIELDS_KEY: {},
|
|
1518
1511
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1519
1512
|
"stool": {
|
|
@@ -1528,7 +1521,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1528
1521
|
}
|
|
1529
1522
|
|
|
1530
1523
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1531
|
-
input_df, "stool",
|
|
1524
|
+
input_df, "stool", host_type_config_dict)
|
|
1532
1525
|
|
|
1533
1526
|
self.assertEqual("original_value", result_df["existing_field"].iloc[0])
|
|
1534
1527
|
|
|
@@ -1540,14 +1533,13 @@ class TestMetadataExtender(TestCase):
|
|
|
1540
1533
|
SAMPLETYPE_SHORTHAND_KEY: ["feces"],
|
|
1541
1534
|
QC_NOTE_KEY: [""]
|
|
1542
1535
|
})
|
|
1543
|
-
|
|
1544
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1545
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1546
|
-
DEFAULT_KEY: "not provided"
|
|
1547
|
-
}
|
|
1536
|
+
|
|
1548
1537
|
# Config is pre-resolved: alias "feces" has its own metadata_fields
|
|
1549
1538
|
# that is a copy of "stool"'s resolved fields with sample_type="stool"
|
|
1550
1539
|
host_type_config_dict = {
|
|
1540
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1541
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1542
|
+
DEFAULT_KEY: "not provided",
|
|
1551
1543
|
METADATA_FIELDS_KEY: {},
|
|
1552
1544
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1553
1545
|
"feces": {
|
|
@@ -1590,7 +1582,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1590
1582
|
}
|
|
1591
1583
|
|
|
1592
1584
|
result_df, validation_msgs = _generate_metadata_for_a_sample_type_in_a_host_type(
|
|
1593
|
-
input_df, "feces",
|
|
1585
|
+
input_df, "feces", host_type_config_dict)
|
|
1594
1586
|
|
|
1595
1587
|
self.assertEqual("stool_value", result_df["stool_field"].iloc[0])
|
|
1596
1588
|
# sample_type should be set to the resolved type "stool"
|
|
@@ -1606,17 +1598,15 @@ class TestMetadataExtender(TestCase):
|
|
|
1606
1598
|
SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
|
|
1607
1599
|
QC_NOTE_KEY: ["", ""]
|
|
1608
1600
|
})
|
|
1609
|
-
|
|
1610
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1611
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1612
|
-
DEFAULT_KEY: "global_default"
|
|
1613
|
-
}
|
|
1601
|
+
|
|
1614
1602
|
# Config is pre-resolved: sample type's metadata_fields includes
|
|
1615
1603
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1616
1604
|
full_flat_config_dict = {
|
|
1617
1605
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1618
1606
|
"human": {
|
|
1619
1607
|
DEFAULT_KEY: "human_default",
|
|
1608
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1609
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1620
1610
|
METADATA_FIELDS_KEY: {
|
|
1621
1611
|
"host_field": {
|
|
1622
1612
|
DEFAULT_KEY: "host_value",
|
|
@@ -1652,7 +1642,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1652
1642
|
}
|
|
1653
1643
|
|
|
1654
1644
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1655
|
-
input_df, "human",
|
|
1645
|
+
input_df, "human", full_flat_config_dict)
|
|
1656
1646
|
|
|
1657
1647
|
expected_df = pandas.DataFrame({
|
|
1658
1648
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
@@ -1675,14 +1665,13 @@ class TestMetadataExtender(TestCase):
|
|
|
1675
1665
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1676
1666
|
QC_NOTE_KEY: [""]
|
|
1677
1667
|
})
|
|
1678
|
-
|
|
1679
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1680
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1681
|
-
DEFAULT_KEY: "global_default"
|
|
1682
|
-
}
|
|
1668
|
+
|
|
1683
1669
|
full_flat_config_dict = {
|
|
1684
1670
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1685
1671
|
"human": {
|
|
1672
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1673
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1674
|
+
DEFAULT_KEY: "global_default",
|
|
1686
1675
|
METADATA_FIELDS_KEY: {},
|
|
1687
1676
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
|
|
1688
1677
|
}
|
|
@@ -1690,7 +1679,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1690
1679
|
}
|
|
1691
1680
|
|
|
1692
1681
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1693
|
-
input_df, "unknown_host",
|
|
1682
|
+
input_df, "unknown_host", full_flat_config_dict)
|
|
1694
1683
|
|
|
1695
1684
|
expected_df = pandas.DataFrame({
|
|
1696
1685
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1709,14 +1698,13 @@ class TestMetadataExtender(TestCase):
|
|
|
1709
1698
|
SAMPLETYPE_SHORTHAND_KEY: ["unknown_sample"],
|
|
1710
1699
|
QC_NOTE_KEY: [""]
|
|
1711
1700
|
})
|
|
1712
|
-
|
|
1713
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1714
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1715
|
-
DEFAULT_KEY: "global_default"
|
|
1716
|
-
}
|
|
1701
|
+
|
|
1717
1702
|
full_flat_config_dict = {
|
|
1718
1703
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1719
1704
|
"human": {
|
|
1705
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1706
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1707
|
+
DEFAULT_KEY: "global_default",
|
|
1720
1708
|
METADATA_FIELDS_KEY: {},
|
|
1721
1709
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1722
1710
|
"stool": {
|
|
@@ -1728,7 +1716,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1728
1716
|
}
|
|
1729
1717
|
|
|
1730
1718
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1731
|
-
input_df, "human",
|
|
1719
|
+
input_df, "human", full_flat_config_dict)
|
|
1732
1720
|
|
|
1733
1721
|
expected_df = pandas.DataFrame({
|
|
1734
1722
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1747,16 +1735,15 @@ class TestMetadataExtender(TestCase):
|
|
|
1747
1735
|
SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool", "stool"],
|
|
1748
1736
|
QC_NOTE_KEY: ["", "", ""]
|
|
1749
1737
|
})
|
|
1750
|
-
|
|
1751
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1752
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1753
|
-
DEFAULT_KEY: "global_default"
|
|
1754
|
-
}
|
|
1738
|
+
|
|
1755
1739
|
# Config is pre-resolved: sample type's metadata_fields includes
|
|
1756
1740
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1757
1741
|
full_flat_config_dict = {
|
|
1758
1742
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1759
1743
|
"human": {
|
|
1744
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1745
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1746
|
+
DEFAULT_KEY: "global_default",
|
|
1760
1747
|
METADATA_FIELDS_KEY: {
|
|
1761
1748
|
"human_field": {
|
|
1762
1749
|
DEFAULT_KEY: "human_value",
|
|
@@ -1785,6 +1772,9 @@ class TestMetadataExtender(TestCase):
|
|
|
1785
1772
|
}
|
|
1786
1773
|
},
|
|
1787
1774
|
"mouse": {
|
|
1775
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1776
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1777
|
+
DEFAULT_KEY: "global_default",
|
|
1788
1778
|
METADATA_FIELDS_KEY: {},
|
|
1789
1779
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
|
|
1790
1780
|
}
|
|
@@ -1792,7 +1782,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1792
1782
|
}
|
|
1793
1783
|
|
|
1794
1784
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1795
|
-
input_df, "human",
|
|
1785
|
+
input_df, "human", full_flat_config_dict)
|
|
1796
1786
|
|
|
1797
1787
|
expected_df = pandas.DataFrame({
|
|
1798
1788
|
SAMPLE_NAME_KEY: ["sample1", "sample3"],
|
|
@@ -1813,17 +1803,15 @@ class TestMetadataExtender(TestCase):
|
|
|
1813
1803
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1814
1804
|
QC_NOTE_KEY: [""]
|
|
1815
1805
|
})
|
|
1816
|
-
|
|
1817
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1818
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1819
|
-
DEFAULT_KEY: "global_default"
|
|
1820
|
-
}
|
|
1806
|
+
|
|
1821
1807
|
# Config is pre-resolved: sample type's metadata_fields includes
|
|
1822
1808
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1823
1809
|
full_flat_config_dict = {
|
|
1824
1810
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1825
1811
|
"human": {
|
|
1826
1812
|
DEFAULT_KEY: "human_specific_default",
|
|
1813
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1814
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1827
1815
|
METADATA_FIELDS_KEY: {},
|
|
1828
1816
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1829
1817
|
"stool": {
|
|
@@ -1850,7 +1838,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1850
1838
|
}
|
|
1851
1839
|
|
|
1852
1840
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1853
|
-
input_df, "human",
|
|
1841
|
+
input_df, "human", full_flat_config_dict)
|
|
1854
1842
|
|
|
1855
1843
|
expected_df = pandas.DataFrame({
|
|
1856
1844
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1871,17 +1859,14 @@ class TestMetadataExtender(TestCase):
|
|
|
1871
1859
|
SAMPLETYPE_SHORTHAND_KEY: ["stool"],
|
|
1872
1860
|
QC_NOTE_KEY: [""]
|
|
1873
1861
|
})
|
|
1874
|
-
settings_dict = {
|
|
1875
|
-
OVERWRITE_NON_NANS_KEY: False,
|
|
1876
|
-
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1877
|
-
DEFAULT_KEY: "global_default"
|
|
1878
|
-
}
|
|
1879
1862
|
# Config is pre-resolved: sample type's metadata_fields includes
|
|
1880
1863
|
# host fields merged in, plus sample_type and qiita_sample_type
|
|
1881
1864
|
full_flat_config_dict = {
|
|
1882
1865
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1883
1866
|
"human": {
|
|
1884
|
-
|
|
1867
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1868
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1869
|
+
DEFAULT_KEY: "global_default",
|
|
1885
1870
|
METADATA_FIELDS_KEY: {},
|
|
1886
1871
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1887
1872
|
"stool": {
|
|
@@ -1908,7 +1893,7 @@ class TestMetadataExtender(TestCase):
|
|
|
1908
1893
|
}
|
|
1909
1894
|
|
|
1910
1895
|
result_df, validation_msgs = _generate_metadata_for_a_host_type(
|
|
1911
|
-
input_df, "human",
|
|
1896
|
+
input_df, "human", full_flat_config_dict)
|
|
1912
1897
|
|
|
1913
1898
|
expected_df = pandas.DataFrame({
|
|
1914
1899
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
@@ -1939,6 +1924,9 @@ class TestMetadataExtender(TestCase):
|
|
|
1939
1924
|
OVERWRITE_NON_NANS_KEY: False,
|
|
1940
1925
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
1941
1926
|
"human": {
|
|
1927
|
+
DEFAULT_KEY: "global_default",
|
|
1928
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1929
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
1942
1930
|
METADATA_FIELDS_KEY: {
|
|
1943
1931
|
"host_field": {
|
|
1944
1932
|
DEFAULT_KEY: "host_value",
|
|
@@ -2005,6 +1993,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2005
1993
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2006
1994
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2007
1995
|
"human": {
|
|
1996
|
+
DEFAULT_KEY: "global_default",
|
|
1997
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
1998
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2008
1999
|
METADATA_FIELDS_KEY: {
|
|
2009
2000
|
"human_field": {
|
|
2010
2001
|
DEFAULT_KEY: "human_value",
|
|
@@ -2051,6 +2042,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2051
2042
|
}
|
|
2052
2043
|
},
|
|
2053
2044
|
"mouse": {
|
|
2045
|
+
DEFAULT_KEY: "global_default",
|
|
2046
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2047
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2054
2048
|
METADATA_FIELDS_KEY: {
|
|
2055
2049
|
"mouse_field": {
|
|
2056
2050
|
DEFAULT_KEY: "mouse_value",
|
|
@@ -2182,6 +2176,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2182
2176
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2183
2177
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2184
2178
|
"human": {
|
|
2179
|
+
DEFAULT_KEY: "global_default",
|
|
2180
|
+
LEAVE_REQUIREDS_BLANK_KEY: True, # This causes required fields to get LEAVE_BLANK_VAL
|
|
2181
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2185
2182
|
METADATA_FIELDS_KEY: {},
|
|
2186
2183
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2187
2184
|
"stool": {
|
|
@@ -2506,6 +2503,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2506
2503
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2507
2504
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2508
2505
|
"human": {
|
|
2506
|
+
DEFAULT_KEY: "not provided",
|
|
2507
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2508
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2509
2509
|
METADATA_FIELDS_KEY: {
|
|
2510
2510
|
"host_field": {
|
|
2511
2511
|
DEFAULT_KEY: "host_value",
|
|
@@ -2580,6 +2580,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2580
2580
|
},
|
|
2581
2581
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2582
2582
|
"human": {
|
|
2583
|
+
DEFAULT_KEY: "not provided",
|
|
2584
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2585
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2583
2586
|
METADATA_FIELDS_KEY: {},
|
|
2584
2587
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2585
2588
|
"stool": {
|
|
@@ -2639,6 +2642,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2639
2642
|
},
|
|
2640
2643
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2641
2644
|
"human": {
|
|
2645
|
+
DEFAULT_KEY: "not provided",
|
|
2646
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2647
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2642
2648
|
METADATA_FIELDS_KEY: {},
|
|
2643
2649
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2644
2650
|
"stool": {
|
|
@@ -2687,6 +2693,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2687
2693
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2688
2694
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2689
2695
|
"human": {
|
|
2696
|
+
DEFAULT_KEY: "not provided",
|
|
2697
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2698
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2690
2699
|
METADATA_FIELDS_KEY: {},
|
|
2691
2700
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {}
|
|
2692
2701
|
}
|
|
@@ -2721,6 +2730,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2721
2730
|
OVERWRITE_NON_NANS_KEY: False,
|
|
2722
2731
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2723
2732
|
"human": {
|
|
2733
|
+
DEFAULT_KEY: "not provided",
|
|
2734
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2735
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2724
2736
|
METADATA_FIELDS_KEY: {},
|
|
2725
2737
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2726
2738
|
"stool": {
|
|
@@ -2781,6 +2793,9 @@ class TestMetadataExtender(TestCase):
|
|
|
2781
2793
|
},
|
|
2782
2794
|
HOST_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2783
2795
|
"human": {
|
|
2796
|
+
DEFAULT_KEY: "not provided",
|
|
2797
|
+
LEAVE_REQUIREDS_BLANK_KEY: False,
|
|
2798
|
+
OVERWRITE_NON_NANS_KEY: False,
|
|
2784
2799
|
METADATA_FIELDS_KEY: {},
|
|
2785
2800
|
SAMPLE_TYPE_SPECIFIC_METADATA_KEY: {
|
|
2786
2801
|
"stool": {
|
|
@@ -3319,7 +3334,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3319
3334
|
self.assertEqual(1, len(output_files))
|
|
3320
3335
|
|
|
3321
3336
|
# Read and verify contents (keep_default_na=False preserves empty strings)
|
|
3322
|
-
result_df = pandas.read_csv(output_files[0], sep="\t", keep_default_na=False)
|
|
3337
|
+
result_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3323
3338
|
expected_df = input_df
|
|
3324
3339
|
assert_frame_equal(expected_df, result_df)
|
|
3325
3340
|
|
|
@@ -3343,7 +3358,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3343
3358
|
self.assertEqual(1, len(output_files))
|
|
3344
3359
|
|
|
3345
3360
|
# Verify main output has internal cols removed and no failures
|
|
3346
|
-
result_df = pandas.read_csv(output_files[0], sep="\t")
|
|
3361
|
+
result_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3347
3362
|
expected_df = pandas.DataFrame({
|
|
3348
3363
|
SAMPLE_NAME_KEY: ["sample1", "sample3"],
|
|
3349
3364
|
"field_a": ["a1", "a3"]
|
|
@@ -3355,7 +3370,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3355
3370
|
self.assertEqual(1, len(fails_files))
|
|
3356
3371
|
|
|
3357
3372
|
# Verify fails file contains the failed row
|
|
3358
|
-
fails_df = pandas.read_csv(fails_files[0], sep=",")
|
|
3373
|
+
fails_df = pandas.read_csv(fails_files[0], sep=",", dtype=str, keep_default_na=False)
|
|
3359
3374
|
expected_fails_df = pandas.DataFrame({
|
|
3360
3375
|
SAMPLE_NAME_KEY: ["sample2"],
|
|
3361
3376
|
"field_a": ["a2"],
|
|
@@ -3432,7 +3447,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3432
3447
|
self.assertEqual(1, len(output_files))
|
|
3433
3448
|
|
|
3434
3449
|
# Read and verify contents (keep_default_na=False preserves empty strings)
|
|
3435
|
-
result_df = pandas.read_csv(output_files[0], sep=",", keep_default_na=False)
|
|
3450
|
+
result_df = pandas.read_csv(output_files[0], sep=",", dtype=str, keep_default_na=False)
|
|
3436
3451
|
expected_df = input_df
|
|
3437
3452
|
assert_frame_equal(expected_df, result_df)
|
|
3438
3453
|
|
|
@@ -3454,14 +3469,14 @@ class TestMetadataExtender(TestCase):
|
|
|
3454
3469
|
# Main output file should have only headers (empty data)
|
|
3455
3470
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
3456
3471
|
self.assertEqual(1, len(output_files))
|
|
3457
|
-
result_df = pandas.read_csv(output_files[0], sep="\t")
|
|
3472
|
+
result_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3458
3473
|
self.assertTrue(result_df.empty)
|
|
3459
3474
|
self.assertEqual([SAMPLE_NAME_KEY, "field_a"], list(result_df.columns))
|
|
3460
3475
|
|
|
3461
3476
|
# Fails file should have both rows
|
|
3462
3477
|
fails_files = glob.glob(os.path.join(tmpdir, "*_test_output_fails.csv"))
|
|
3463
3478
|
self.assertEqual(1, len(fails_files))
|
|
3464
|
-
fails_df = pandas.read_csv(fails_files[0], sep=",")
|
|
3479
|
+
fails_df = pandas.read_csv(fails_files[0], sep=",", dtype=str, keep_default_na=False)
|
|
3465
3480
|
self.assertEqual(2, len(fails_df))
|
|
3466
3481
|
|
|
3467
3482
|
# Tests for get_extended_metadata_from_df_and_yaml
|
|
@@ -3606,7 +3621,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3606
3621
|
# Verify main output file was created (internal cols removed by default)
|
|
3607
3622
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
3608
3623
|
self.assertEqual(1, len(output_files))
|
|
3609
|
-
output_df = pandas.read_csv(output_files[0], sep="\t")
|
|
3624
|
+
output_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3610
3625
|
expected_output_df = pandas.DataFrame({
|
|
3611
3626
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
3612
3627
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
@@ -3679,7 +3694,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3679
3694
|
# Verify main output file excludes failure rows
|
|
3680
3695
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
3681
3696
|
self.assertEqual(1, len(output_files))
|
|
3682
|
-
output_df = pandas.read_csv(output_files[0], sep="\t")
|
|
3697
|
+
output_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3683
3698
|
expected_output_df = pandas.DataFrame({
|
|
3684
3699
|
SAMPLE_NAME_KEY: ["sample1", "sample3"],
|
|
3685
3700
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
@@ -3694,7 +3709,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3694
3709
|
# Verify fails file contains the failed row
|
|
3695
3710
|
fails_files = glob.glob(os.path.join(tmpdir, "*_test_output_fails.csv"))
|
|
3696
3711
|
self.assertEqual(1, len(fails_files))
|
|
3697
|
-
fails_df = pandas.read_csv(fails_files[0], sep=",")
|
|
3712
|
+
fails_df = pandas.read_csv(fails_files[0], sep=",", dtype=str, keep_default_na=False)
|
|
3698
3713
|
expected_fails_df = pandas.DataFrame({
|
|
3699
3714
|
SAMPLE_NAME_KEY: ["sample2"],
|
|
3700
3715
|
"body_product": ["not provided"],
|
|
@@ -3765,7 +3780,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3765
3780
|
validation_files = glob.glob(
|
|
3766
3781
|
os.path.join(tmpdir, "*_test_output_validation_errors.csv"))
|
|
3767
3782
|
self.assertEqual(1, len(validation_files))
|
|
3768
|
-
validation_df = pandas.read_csv(validation_files[0], sep=",")
|
|
3783
|
+
validation_df = pandas.read_csv(validation_files[0], sep=",", dtype=str, keep_default_na=False)
|
|
3769
3784
|
expected_validation_df = pandas.DataFrame({
|
|
3770
3785
|
"sample_name": ["sample1"],
|
|
3771
3786
|
"field_name": ["restricted_field"],
|
|
@@ -3806,7 +3821,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3806
3821
|
# Verify main output file includes internal columns
|
|
3807
3822
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
3808
3823
|
self.assertEqual(1, len(output_files))
|
|
3809
|
-
output_df = pandas.read_csv(output_files[0], sep="\t", keep_default_na=False)
|
|
3824
|
+
output_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3810
3825
|
expected_output_df = pandas.DataFrame({
|
|
3811
3826
|
SAMPLE_NAME_KEY: ["sample1"],
|
|
3812
3827
|
"body_product": ["UBERON:feces"],
|
|
@@ -3829,6 +3844,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3829
3844
|
|
|
3830
3845
|
TEST_METADATA_CSV_FP = path.join(TEST_DIR, "data/test_metadata.csv")
|
|
3831
3846
|
TEST_METADATA_TXT_FP = path.join(TEST_DIR, "data/test_metadata.txt")
|
|
3847
|
+
TEST_METADATA_XLSX_FP = path.join(TEST_DIR, "data/test_metadata.xlsx")
|
|
3832
3848
|
TEST_METADATA_WITH_ERRORS_FP = path.join(
|
|
3833
3849
|
TEST_DIR, "data/test_metadata_with_errors.csv")
|
|
3834
3850
|
TEST_STUDY_CONFIG_WITH_VALIDATION_FP = path.join(
|
|
@@ -3847,6 +3863,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3847
3863
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
3848
3864
|
"body_site": ["gut", "gut"],
|
|
3849
3865
|
"description": ["human sample", "human sample"],
|
|
3866
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
3850
3867
|
"host_common_name": ["human", "human"],
|
|
3851
3868
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
3852
3869
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -3861,12 +3878,13 @@ class TestMetadataExtender(TestCase):
|
|
|
3861
3878
|
# Verify main output file was created (internal cols removed by default)
|
|
3862
3879
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
3863
3880
|
self.assertEqual(1, len(output_files))
|
|
3864
|
-
output_df = pandas.read_csv(output_files[0], sep="\t")
|
|
3881
|
+
output_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3865
3882
|
expected_output_df = pandas.DataFrame({
|
|
3866
3883
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
3867
3884
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
3868
3885
|
"body_site": ["gut", "gut"],
|
|
3869
3886
|
"description": ["human sample", "human sample"],
|
|
3887
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
3870
3888
|
"host_common_name": ["human", "human"],
|
|
3871
3889
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
3872
3890
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -3899,6 +3917,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3899
3917
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
3900
3918
|
"body_site": ["gut", "gut"],
|
|
3901
3919
|
"description": ["human sample", "human sample"],
|
|
3920
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
3902
3921
|
"host_common_name": ["human", "human"],
|
|
3903
3922
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
3904
3923
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -3913,12 +3932,13 @@ class TestMetadataExtender(TestCase):
|
|
|
3913
3932
|
# Verify main output file was created
|
|
3914
3933
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
3915
3934
|
self.assertEqual(1, len(output_files))
|
|
3916
|
-
output_df = pandas.read_csv(output_files[0], sep="\t")
|
|
3935
|
+
output_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3917
3936
|
expected_output_df = pandas.DataFrame({
|
|
3918
3937
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
3919
3938
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
3920
3939
|
"body_site": ["gut", "gut"],
|
|
3921
3940
|
"description": ["human sample", "human sample"],
|
|
3941
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
3922
3942
|
"host_common_name": ["human", "human"],
|
|
3923
3943
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
3924
3944
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -3927,6 +3947,60 @@ class TestMetadataExtender(TestCase):
|
|
|
3927
3947
|
})
|
|
3928
3948
|
assert_frame_equal(expected_output_df, output_df)
|
|
3929
3949
|
|
|
3950
|
+
def test_write_extended_metadata_xlsx_input(self):
|
|
3951
|
+
"""Test writing extended metadata from an Excel XLSX input file."""
|
|
3952
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
3953
|
+
result_df = write_extended_metadata(
|
|
3954
|
+
self.TEST_METADATA_XLSX_FP, self.TEST_STUDY_CONFIG_FP,
|
|
3955
|
+
tmpdir, "test_output", stds_fp=self.TEST_STDS_FP)
|
|
3956
|
+
|
|
3957
|
+
# Verify returned DataFrame
|
|
3958
|
+
expected_result_df = pandas.DataFrame({
|
|
3959
|
+
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
3960
|
+
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
3961
|
+
"body_site": ["gut", "gut"],
|
|
3962
|
+
"description": ["human sample", "human sample"],
|
|
3963
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
3964
|
+
"host_common_name": ["human", "human"],
|
|
3965
|
+
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
3966
|
+
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
3967
|
+
"study_custom_field": ["custom_value", "custom_value"],
|
|
3968
|
+
"study_stool_field": ["stool_custom", "stool_custom"],
|
|
3969
|
+
HOSTTYPE_SHORTHAND_KEY: ["human", "human"],
|
|
3970
|
+
SAMPLETYPE_SHORTHAND_KEY: ["stool", "stool"],
|
|
3971
|
+
QC_NOTE_KEY: ["", ""]
|
|
3972
|
+
})
|
|
3973
|
+
assert_frame_equal(expected_result_df, result_df)
|
|
3974
|
+
|
|
3975
|
+
# Verify main output file was created
|
|
3976
|
+
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
3977
|
+
self.assertEqual(1, len(output_files))
|
|
3978
|
+
output_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3979
|
+
expected_output_df = pandas.DataFrame({
|
|
3980
|
+
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
3981
|
+
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
3982
|
+
"body_site": ["gut", "gut"],
|
|
3983
|
+
"description": ["human sample", "human sample"],
|
|
3984
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
3985
|
+
"host_common_name": ["human", "human"],
|
|
3986
|
+
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
3987
|
+
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
3988
|
+
"study_custom_field": ["custom_value", "custom_value"],
|
|
3989
|
+
"study_stool_field": ["stool_custom", "stool_custom"]
|
|
3990
|
+
})
|
|
3991
|
+
assert_frame_equal(expected_output_df, output_df)
|
|
3992
|
+
|
|
3993
|
+
# Verify empty fails file was created
|
|
3994
|
+
fails_files = glob.glob(os.path.join(tmpdir, "*_test_output_fails.csv"))
|
|
3995
|
+
self.assertEqual(1, len(fails_files))
|
|
3996
|
+
self.assertEqual(0, os.path.getsize(fails_files[0]))
|
|
3997
|
+
|
|
3998
|
+
# Verify empty validation errors file was created
|
|
3999
|
+
validation_files = glob.glob(
|
|
4000
|
+
os.path.join(tmpdir, "*_test_output_validation_errors.csv"))
|
|
4001
|
+
self.assertEqual(1, len(validation_files))
|
|
4002
|
+
self.assertEqual(0, os.path.getsize(validation_files[0]))
|
|
4003
|
+
|
|
3930
4004
|
def test_write_extended_metadata_with_validation_errors(self):
|
|
3931
4005
|
"""Test writing extended metadata when validation errors occur."""
|
|
3932
4006
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
@@ -3941,6 +4015,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3941
4015
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
3942
4016
|
"body_site": ["gut", "gut"],
|
|
3943
4017
|
"description": ["human sample", "human sample"],
|
|
4018
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
3944
4019
|
"host_common_name": ["human", "human"],
|
|
3945
4020
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
3946
4021
|
"restricted_field": ["invalid_value", "allowed_value"],
|
|
@@ -3954,12 +4029,13 @@ class TestMetadataExtender(TestCase):
|
|
|
3954
4029
|
# Verify main output file was created
|
|
3955
4030
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
3956
4031
|
self.assertEqual(1, len(output_files))
|
|
3957
|
-
output_df = pandas.read_csv(output_files[0], sep="\t")
|
|
4032
|
+
output_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
3958
4033
|
expected_output_df = pandas.DataFrame({
|
|
3959
4034
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
3960
4035
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
3961
4036
|
"body_site": ["gut", "gut"],
|
|
3962
4037
|
"description": ["human sample", "human sample"],
|
|
4038
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
3963
4039
|
"host_common_name": ["human", "human"],
|
|
3964
4040
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
3965
4041
|
"restricted_field": ["invalid_value", "allowed_value"],
|
|
@@ -3971,7 +4047,7 @@ class TestMetadataExtender(TestCase):
|
|
|
3971
4047
|
validation_files = glob.glob(
|
|
3972
4048
|
os.path.join(tmpdir, "*_test_output_validation_errors.csv"))
|
|
3973
4049
|
self.assertEqual(1, len(validation_files))
|
|
3974
|
-
validation_df = pandas.read_csv(validation_files[0], sep=",")
|
|
4050
|
+
validation_df = pandas.read_csv(validation_files[0], sep=",", dtype=str, keep_default_na=False)
|
|
3975
4051
|
expected_validation_df = pandas.DataFrame({
|
|
3976
4052
|
"sample_name": ["sample1"],
|
|
3977
4053
|
"field_name": ["restricted_field"],
|
|
@@ -4006,6 +4082,7 @@ class TestMetadataExtender(TestCase):
|
|
|
4006
4082
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
4007
4083
|
"body_site": ["gut", "gut"],
|
|
4008
4084
|
"description": ["human sample", "human sample"],
|
|
4085
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
4009
4086
|
"host_common_name": ["human", "human"],
|
|
4010
4087
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
4011
4088
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -4020,12 +4097,13 @@ class TestMetadataExtender(TestCase):
|
|
|
4020
4097
|
# Verify output file has .csv extension
|
|
4021
4098
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.csv"))
|
|
4022
4099
|
self.assertEqual(1, len(output_files))
|
|
4023
|
-
output_df = pandas.read_csv(output_files[0], sep=",")
|
|
4100
|
+
output_df = pandas.read_csv(output_files[0], sep=",", dtype=str, keep_default_na=False)
|
|
4024
4101
|
expected_output_df = pandas.DataFrame({
|
|
4025
4102
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
4026
4103
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
4027
4104
|
"body_site": ["gut", "gut"],
|
|
4028
4105
|
"description": ["human sample", "human sample"],
|
|
4106
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
4029
4107
|
"host_common_name": ["human", "human"],
|
|
4030
4108
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
4031
4109
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -4048,6 +4126,7 @@ class TestMetadataExtender(TestCase):
|
|
|
4048
4126
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
4049
4127
|
"body_site": ["gut", "gut"],
|
|
4050
4128
|
"description": ["human sample", "human sample"],
|
|
4129
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
4051
4130
|
"host_common_name": ["human", "human"],
|
|
4052
4131
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
4053
4132
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -4062,12 +4141,13 @@ class TestMetadataExtender(TestCase):
|
|
|
4062
4141
|
# Verify main output file includes internal columns
|
|
4063
4142
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
4064
4143
|
self.assertEqual(1, len(output_files))
|
|
4065
|
-
output_df = pandas.read_csv(output_files[0], sep="\t", keep_default_na=False)
|
|
4144
|
+
output_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
4066
4145
|
expected_output_df = pandas.DataFrame({
|
|
4067
4146
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
4068
4147
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
4069
4148
|
"body_site": ["gut", "gut"],
|
|
4070
4149
|
"description": ["human sample", "human sample"],
|
|
4150
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
4071
4151
|
"host_common_name": ["human", "human"],
|
|
4072
4152
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
4073
4153
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -4097,6 +4177,7 @@ class TestMetadataExtender(TestCase):
|
|
|
4097
4177
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
4098
4178
|
"body_site": ["gut", "gut"],
|
|
4099
4179
|
"description": ["human sample", "human sample"],
|
|
4180
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
4100
4181
|
"host_common_name": ["human", "human"],
|
|
4101
4182
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
4102
4183
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -4111,12 +4192,13 @@ class TestMetadataExtender(TestCase):
|
|
|
4111
4192
|
# Verify main output file was created
|
|
4112
4193
|
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
4113
4194
|
self.assertEqual(1, len(output_files))
|
|
4114
|
-
output_df = pandas.read_csv(output_files[0], sep="\t")
|
|
4195
|
+
output_df = pandas.read_csv(output_files[0], sep="\t", dtype=str, keep_default_na=False)
|
|
4115
4196
|
expected_output_df = pandas.DataFrame({
|
|
4116
4197
|
SAMPLE_NAME_KEY: ["sample1", "sample2"],
|
|
4117
4198
|
"body_product": ["UBERON:feces", "UBERON:feces"],
|
|
4118
4199
|
"body_site": ["gut", "gut"],
|
|
4119
4200
|
"description": ["human sample", "human sample"],
|
|
4201
|
+
"dna_extracted": ["TRUE", "FALSE"],
|
|
4120
4202
|
"host_common_name": ["human", "human"],
|
|
4121
4203
|
QIITA_SAMPLE_TYPE: ["stool", "stool"],
|
|
4122
4204
|
SAMPLE_TYPE_KEY: ["stool", "stool"],
|
|
@@ -4134,6 +4216,64 @@ class TestMetadataExtender(TestCase):
|
|
|
4134
4216
|
os.path.join(tmpdir, "*_test_output_validation_errors.csv"))
|
|
4135
4217
|
self.assertEqual(0, len(validation_files))
|
|
4136
4218
|
|
|
4219
|
+
def test_write_extended_metadata_preserves_string_booleans(self):
|
|
4220
|
+
"""Test that TRUE/FALSE string values are not converted to booleans.
|
|
4221
|
+
|
|
4222
|
+
This tests for a bug where loading a CSV without dtype=str causes
|
|
4223
|
+
pandas to convert 'TRUE'/'FALSE' strings to boolean True/False,
|
|
4224
|
+
which then fail validation against allowed string values.
|
|
4225
|
+
"""
|
|
4226
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
4227
|
+
# Create a CSV file with TRUE/FALSE string values
|
|
4228
|
+
csv_content = (
|
|
4229
|
+
"sample_name,hosttype_shorthand,sampletype_shorthand,dna_extracted\n"
|
|
4230
|
+
"sample1,human,stool,TRUE\n"
|
|
4231
|
+
"sample2,human,stool,FALSE\n"
|
|
4232
|
+
)
|
|
4233
|
+
csv_fp = path.join(tmpdir, "test_bool_strings.csv")
|
|
4234
|
+
with open(csv_fp, "w") as f:
|
|
4235
|
+
f.write(csv_content)
|
|
4236
|
+
|
|
4237
|
+
# Create a config that defines TRUE/FALSE as allowed string values
|
|
4238
|
+
config_content = """
|
|
4239
|
+
default: "not provided"
|
|
4240
|
+
leave_requireds_blank: false
|
|
4241
|
+
overwrite_non_nans: false
|
|
4242
|
+
study_specific_metadata:
|
|
4243
|
+
host_type_specific_metadata:
|
|
4244
|
+
human:
|
|
4245
|
+
default: "not provided"
|
|
4246
|
+
leave_requireds_blank: false
|
|
4247
|
+
overwrite_non_nans: false
|
|
4248
|
+
sample_type_specific_metadata:
|
|
4249
|
+
stool:
|
|
4250
|
+
metadata_fields:
|
|
4251
|
+
dna_extracted:
|
|
4252
|
+
type: string
|
|
4253
|
+
allowed:
|
|
4254
|
+
- "TRUE"
|
|
4255
|
+
- "FALSE"
|
|
4256
|
+
"""
|
|
4257
|
+
config_fp = path.join(tmpdir, "test_bool_config.yml")
|
|
4258
|
+
with open(config_fp, "w") as f:
|
|
4259
|
+
f.write(config_content)
|
|
4260
|
+
|
|
4261
|
+
# Call write_extended_metadata
|
|
4262
|
+
result_df = write_extended_metadata(
|
|
4263
|
+
csv_fp, config_fp, tmpdir, "test_output",
|
|
4264
|
+
stds_fp=self.TEST_STDS_FP)
|
|
4265
|
+
|
|
4266
|
+
# Verify the dna_extracted values are preserved as strings
|
|
4267
|
+
self.assertEqual("TRUE", result_df.loc[0, "dna_extracted"])
|
|
4268
|
+
self.assertEqual("FALSE", result_df.loc[1, "dna_extracted"])
|
|
4269
|
+
|
|
4270
|
+
# Verify no validation errors occurred
|
|
4271
|
+
validation_files = glob.glob(
|
|
4272
|
+
os.path.join(tmpdir, "*_test_output_validation_errors.csv"))
|
|
4273
|
+
self.assertEqual(1, len(validation_files))
|
|
4274
|
+
# The validation errors file should be empty (0 bytes)
|
|
4275
|
+
self.assertEqual(0, os.path.getsize(validation_files[0]))
|
|
4276
|
+
|
|
4137
4277
|
# Integration tests
|
|
4138
4278
|
|
|
4139
4279
|
TEST_PROJECT1_METADATA_FP = path.join(TEST_DIR, "data/test_project1_input_metadata.csv")
|
|
@@ -4142,6 +4282,7 @@ class TestMetadataExtender(TestCase):
|
|
|
4142
4282
|
TEST_DIR, "data/test_project1_output_metadata.txt")
|
|
4143
4283
|
TEST_PROJECT1_EXPECTED_FAILS_FP = path.join(
|
|
4144
4284
|
TEST_DIR, "data/test_project1_output_fails.csv")
|
|
4285
|
+
|
|
4145
4286
|
def test_write_extended_metadata_from_df_project1_integration(self):
|
|
4146
4287
|
"""Integration test using project1 test data files."""
|
|
4147
4288
|
|
|
@@ -4153,12 +4294,8 @@ class TestMetadataExtender(TestCase):
|
|
|
4153
4294
|
with open(path.join(debug_dir, f"UNMATCHED_2_{file_name}"), 'w') as debug_actual_file:
|
|
4154
4295
|
debug_actual_file.write(actual_content)
|
|
4155
4296
|
|
|
4156
|
-
|
|
4157
4297
|
# Load input metadata CSV
|
|
4158
4298
|
input_df = pandas.read_csv(self.TEST_PROJECT1_METADATA_FP, dtype=str)
|
|
4159
|
-
# for the columns "plating_notes" and "notes", fill NaN with empty string
|
|
4160
|
-
input_df["plating_notes"] = input_df["plating_notes"].fillna("")
|
|
4161
|
-
input_df["notes"] = input_df["notes"].fillna("")
|
|
4162
4299
|
|
|
4163
4300
|
# Load study config
|
|
4164
4301
|
study_config = _get_study_specific_config(self.TEST_PROJECT1_CONFIG_FP)
|
|
@@ -4204,6 +4341,58 @@ class TestMetadataExtender(TestCase):
|
|
|
4204
4341
|
self.assertEqual(1, len(validation_files))
|
|
4205
4342
|
self.assertEqual(0, os.path.getsize(validation_files[0]))
|
|
4206
4343
|
|
|
4344
|
+
def test_write_extended_metadata_project1_integration(self):
|
|
4345
|
+
"""Integration test for write_extended_metadata using project1 test data files."""
|
|
4346
|
+
|
|
4347
|
+
def write_mismatched_debug_files(expected_content, actual_content, file_name):
|
|
4348
|
+
"""Write debug files to Desktop for unmatched content."""
|
|
4349
|
+
debug_dir = path.join(path.expanduser("~"), "Desktop")
|
|
4350
|
+
with open(path.join(debug_dir, f"UNMATCHED_1_{file_name}"), 'w') as debug_expected_file:
|
|
4351
|
+
debug_expected_file.write(expected_content)
|
|
4352
|
+
with open(path.join(debug_dir, f"UNMATCHED_2_{file_name}"), 'w') as debug_actual_file:
|
|
4353
|
+
debug_actual_file.write(actual_content)
|
|
4354
|
+
|
|
4355
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
4356
|
+
write_extended_metadata(
|
|
4357
|
+
self.TEST_PROJECT1_METADATA_FP, self.TEST_PROJECT1_CONFIG_FP,
|
|
4358
|
+
tmpdir, "test_output", remove_internals=True)
|
|
4359
|
+
|
|
4360
|
+
# Compare main output file directly to expected file
|
|
4361
|
+
output_files = glob.glob(os.path.join(tmpdir, "*_test_output.txt"))
|
|
4362
|
+
self.assertEqual(1, len(output_files))
|
|
4363
|
+
with open(output_files[0], 'r') as actual_file:
|
|
4364
|
+
actual_content = actual_file.read()
|
|
4365
|
+
with open(self.TEST_PROJECT1_EXPECTED_OUTPUT_FP, 'r') as expected_file:
|
|
4366
|
+
expected_content = expected_file.read()
|
|
4367
|
+
try:
|
|
4368
|
+
self.assertEqual(expected_content, actual_content)
|
|
4369
|
+
except AssertionError:
|
|
4370
|
+
write_mismatched_debug_files(
|
|
4371
|
+
expected_content, actual_content,
|
|
4372
|
+
"project1_output.txt")
|
|
4373
|
+
raise
|
|
4374
|
+
|
|
4375
|
+
# Compare fails file directly to expected file
|
|
4376
|
+
fails_files = glob.glob(os.path.join(tmpdir, "*_test_output_fails.csv"))
|
|
4377
|
+
self.assertEqual(1, len(fails_files))
|
|
4378
|
+
with open(fails_files[0], 'r') as actual_file:
|
|
4379
|
+
actual_fails_content = actual_file.read()
|
|
4380
|
+
with open(self.TEST_PROJECT1_EXPECTED_FAILS_FP, 'r') as expected_file:
|
|
4381
|
+
expected_fails_content = expected_file.read()
|
|
4382
|
+
try:
|
|
4383
|
+
self.assertEqual(expected_fails_content, actual_fails_content)
|
|
4384
|
+
except AssertionError:
|
|
4385
|
+
write_mismatched_debug_files(
|
|
4386
|
+
expected_fails_content, actual_fails_content,
|
|
4387
|
+
"project1_fails.csv")
|
|
4388
|
+
raise
|
|
4389
|
+
|
|
4390
|
+
# Verify validation errors file is empty
|
|
4391
|
+
validation_files = glob.glob(
|
|
4392
|
+
os.path.join(tmpdir, "*_test_output_validation_errors.csv"))
|
|
4393
|
+
self.assertEqual(1, len(validation_files))
|
|
4394
|
+
self.assertEqual(0, os.path.getsize(validation_files[0]))
|
|
4395
|
+
|
|
4207
4396
|
# Tests for _get_specified_column_name
|
|
4208
4397
|
|
|
4209
4398
|
def test__get_specified_column_name_finds_column(self):
|