bio-grumpy 0.2.7__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bio-grumpy might be problematic. Click here for more details.
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/.gitignore +1 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/Cargo.lock +7 -7
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/Cargo.toml +1 -1
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/PKG-INFO +1 -1
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/common.rs +6 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/genome.rs +39 -2
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/vcf.rs +101 -0
- bio_grumpy-1.0.0/test/complex.vcf +14 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/.github/workflows/release.yaml +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/.github/workflows/test.yaml +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/LICENSE +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/README-python.md +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/README.md +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/pyproject.toml +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/reference/MN908947.3.gb +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/reference/NC_000962.3.gbk +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/reference/TEST-DNA.gbk +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/difference.rs +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/gene.rs +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/lib.rs +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/main.rs +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/TEST-DNA-4-minor.vcf +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/TEST-DNA-4.vcf +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/TEST-DNA-misc-indel.vcf +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/TEST-DNA.vcf +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/dummy.vcf +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/minor-populations-revcomp-AD.vcf +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/minor-populations-revcomp.vcf +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/revcomp-del-first-pos.vcf +0 -0
- {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/tb-snp-after-del.vcf +0 -0
|
@@ -96,7 +96,7 @@ dependencies = [
|
|
|
96
96
|
|
|
97
97
|
[[package]]
|
|
98
98
|
name = "grumpy"
|
|
99
|
-
version = "0.
|
|
99
|
+
version = "1.0.0"
|
|
100
100
|
dependencies = [
|
|
101
101
|
"gb-io",
|
|
102
102
|
"ordered-float",
|
|
@@ -136,9 +136,9 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
|
|
|
136
136
|
|
|
137
137
|
[[package]]
|
|
138
138
|
name = "lock_api"
|
|
139
|
-
version = "0.4.
|
|
139
|
+
version = "0.4.13"
|
|
140
140
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
141
|
-
checksum = "
|
|
141
|
+
checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
|
|
142
142
|
dependencies = [
|
|
143
143
|
"autocfg",
|
|
144
144
|
"scopeguard",
|
|
@@ -223,9 +223,9 @@ dependencies = [
|
|
|
223
223
|
|
|
224
224
|
[[package]]
|
|
225
225
|
name = "parking_lot"
|
|
226
|
-
version = "0.12.
|
|
226
|
+
version = "0.12.4"
|
|
227
227
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
228
|
-
checksum = "
|
|
228
|
+
checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
|
|
229
229
|
dependencies = [
|
|
230
230
|
"lock_api",
|
|
231
231
|
"parking_lot_core",
|
|
@@ -233,9 +233,9 @@ dependencies = [
|
|
|
233
233
|
|
|
234
234
|
[[package]]
|
|
235
235
|
name = "parking_lot_core"
|
|
236
|
-
version = "0.9.
|
|
236
|
+
version = "0.9.11"
|
|
237
237
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
238
|
-
checksum = "
|
|
238
|
+
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
|
|
239
239
|
dependencies = [
|
|
240
240
|
"cfg-if",
|
|
241
241
|
"libc",
|
|
@@ -51,6 +51,12 @@ pub struct VCFRow {
|
|
|
51
51
|
#[pyo3(get, set)]
|
|
52
52
|
/// True if the filter column passes
|
|
53
53
|
pub is_filter_pass: bool,
|
|
54
|
+
|
|
55
|
+
#[pyo3(get, set)]
|
|
56
|
+
/// Whether this VCF row is complex
|
|
57
|
+
/// Complex VCF rows are edge cases where a row has a large (>1,000) bases for the ref as well as >1 alt
|
|
58
|
+
/// This is used to flag when mutations' evidence isn't worth fetching due to size.
|
|
59
|
+
pub is_complex: bool,
|
|
54
60
|
}
|
|
55
61
|
|
|
56
62
|
#[pyclass(eq)]
|
|
@@ -861,6 +861,7 @@ mod tests {
|
|
|
861
861
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
862
862
|
]),
|
|
863
863
|
is_filter_pass: true,
|
|
864
|
+
is_complex: false,
|
|
864
865
|
},
|
|
865
866
|
VCFRow {
|
|
866
867
|
// 1
|
|
@@ -875,6 +876,7 @@ mod tests {
|
|
|
875
876
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
876
877
|
]),
|
|
877
878
|
is_filter_pass: false,
|
|
879
|
+
is_complex: false,
|
|
878
880
|
},
|
|
879
881
|
VCFRow {
|
|
880
882
|
// 2
|
|
@@ -892,6 +894,7 @@ mod tests {
|
|
|
892
894
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
893
895
|
]),
|
|
894
896
|
is_filter_pass: true,
|
|
897
|
+
is_complex: false,
|
|
895
898
|
},
|
|
896
899
|
VCFRow {
|
|
897
900
|
// 3
|
|
@@ -906,6 +909,7 @@ mod tests {
|
|
|
906
909
|
("GT_CONF".to_string(), vec!["63.77".to_string()]),
|
|
907
910
|
]),
|
|
908
911
|
is_filter_pass: true,
|
|
912
|
+
is_complex: false,
|
|
909
913
|
},
|
|
910
914
|
VCFRow {
|
|
911
915
|
// 4
|
|
@@ -920,6 +924,7 @@ mod tests {
|
|
|
920
924
|
("GT_CONF".to_string(), vec!["63.77".to_string()]),
|
|
921
925
|
]),
|
|
922
926
|
is_filter_pass: false,
|
|
927
|
+
is_complex: false,
|
|
923
928
|
},
|
|
924
929
|
VCFRow {
|
|
925
930
|
// 5
|
|
@@ -934,6 +939,7 @@ mod tests {
|
|
|
934
939
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
935
940
|
]),
|
|
936
941
|
is_filter_pass: false,
|
|
942
|
+
is_complex: false,
|
|
937
943
|
},
|
|
938
944
|
VCFRow {
|
|
939
945
|
// 6
|
|
@@ -948,6 +954,7 @@ mod tests {
|
|
|
948
954
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
949
955
|
]),
|
|
950
956
|
is_filter_pass: true,
|
|
957
|
+
is_complex: false,
|
|
951
958
|
},
|
|
952
959
|
VCFRow {
|
|
953
960
|
// 7
|
|
@@ -962,6 +969,7 @@ mod tests {
|
|
|
962
969
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
963
970
|
]),
|
|
964
971
|
is_filter_pass: true,
|
|
972
|
+
is_complex: false,
|
|
965
973
|
},
|
|
966
974
|
VCFRow {
|
|
967
975
|
// 8
|
|
@@ -980,6 +988,7 @@ mod tests {
|
|
|
980
988
|
("GT_CONF".to_string(), vec!["613".to_string()]),
|
|
981
989
|
]),
|
|
982
990
|
is_filter_pass: false,
|
|
991
|
+
is_complex: false,
|
|
983
992
|
},
|
|
984
993
|
VCFRow {
|
|
985
994
|
// 9
|
|
@@ -998,6 +1007,7 @@ mod tests {
|
|
|
998
1007
|
("GT_CONF".to_string(), vec!["3.77".to_string()]),
|
|
999
1008
|
]),
|
|
1000
1009
|
is_filter_pass: false,
|
|
1010
|
+
is_complex: false,
|
|
1001
1011
|
},
|
|
1002
1012
|
VCFRow {
|
|
1003
1013
|
// 10
|
|
@@ -1014,6 +1024,7 @@ mod tests {
|
|
|
1014
1024
|
("GT_CONF".to_string(), vec!["613".to_string()]),
|
|
1015
1025
|
]),
|
|
1016
1026
|
is_filter_pass: false,
|
|
1027
|
+
is_complex: false,
|
|
1017
1028
|
},
|
|
1018
1029
|
VCFRow {
|
|
1019
1030
|
// 11
|
|
@@ -1029,6 +1040,7 @@ mod tests {
|
|
|
1029
1040
|
("GT_CONF".to_string(), vec!["613".to_string()]),
|
|
1030
1041
|
]),
|
|
1031
1042
|
is_filter_pass: false,
|
|
1043
|
+
is_complex: false,
|
|
1032
1044
|
},
|
|
1033
1045
|
];
|
|
1034
1046
|
|
|
@@ -1456,6 +1468,7 @@ mod tests {
|
|
|
1456
1468
|
("GT_CONF".to_string(), vec!["2.05".to_string()]),
|
|
1457
1469
|
]),
|
|
1458
1470
|
is_filter_pass: true,
|
|
1471
|
+
is_complex: false,
|
|
1459
1472
|
},
|
|
1460
1473
|
VCFRow {
|
|
1461
1474
|
// 1
|
|
@@ -1477,6 +1490,7 @@ mod tests {
|
|
|
1477
1490
|
("GT_CONF".to_string(), vec!["3.77".to_string()]),
|
|
1478
1491
|
]),
|
|
1479
1492
|
is_filter_pass: false,
|
|
1493
|
+
is_complex: false,
|
|
1480
1494
|
},
|
|
1481
1495
|
VCFRow {
|
|
1482
1496
|
// 2
|
|
@@ -1499,6 +1513,7 @@ mod tests {
|
|
|
1499
1513
|
("GT_CONF".to_string(), vec!["2.76".to_string()]),
|
|
1500
1514
|
]),
|
|
1501
1515
|
is_filter_pass: true,
|
|
1516
|
+
is_complex: false,
|
|
1502
1517
|
},
|
|
1503
1518
|
VCFRow {
|
|
1504
1519
|
// 3
|
|
@@ -1513,6 +1528,7 @@ mod tests {
|
|
|
1513
1528
|
("GT_CONF".to_string(), vec!["200.58".to_string()]),
|
|
1514
1529
|
]),
|
|
1515
1530
|
is_filter_pass: true,
|
|
1531
|
+
is_complex: false,
|
|
1516
1532
|
},
|
|
1517
1533
|
VCFRow {
|
|
1518
1534
|
// 4
|
|
@@ -1530,6 +1546,7 @@ mod tests {
|
|
|
1530
1546
|
("GT_CONF".to_string(), vec!["155.58".to_string()]),
|
|
1531
1547
|
]),
|
|
1532
1548
|
is_filter_pass: true,
|
|
1549
|
+
is_complex: false,
|
|
1533
1550
|
},
|
|
1534
1551
|
VCFRow {
|
|
1535
1552
|
// 5
|
|
@@ -1547,6 +1564,7 @@ mod tests {
|
|
|
1547
1564
|
("GT_CONF".to_string(), vec!["300.25".to_string()]),
|
|
1548
1565
|
]),
|
|
1549
1566
|
is_filter_pass: true,
|
|
1567
|
+
is_complex: false,
|
|
1550
1568
|
},
|
|
1551
1569
|
VCFRow {
|
|
1552
1570
|
// 6
|
|
@@ -1569,6 +1587,7 @@ mod tests {
|
|
|
1569
1587
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1570
1588
|
]),
|
|
1571
1589
|
is_filter_pass: true,
|
|
1590
|
+
is_complex: false,
|
|
1572
1591
|
},
|
|
1573
1592
|
VCFRow {
|
|
1574
1593
|
// 7
|
|
@@ -1591,6 +1610,7 @@ mod tests {
|
|
|
1591
1610
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1592
1611
|
]),
|
|
1593
1612
|
is_filter_pass: true,
|
|
1613
|
+
is_complex: false,
|
|
1594
1614
|
},
|
|
1595
1615
|
VCFRow {
|
|
1596
1616
|
// 8
|
|
@@ -1613,6 +1633,7 @@ mod tests {
|
|
|
1613
1633
|
("GT_CONF".to_string(), vec!["475.54".to_string()]),
|
|
1614
1634
|
]),
|
|
1615
1635
|
is_filter_pass: true,
|
|
1636
|
+
is_complex: false,
|
|
1616
1637
|
},
|
|
1617
1638
|
VCFRow {
|
|
1618
1639
|
// 9
|
|
@@ -1635,6 +1656,7 @@ mod tests {
|
|
|
1635
1656
|
("GT_CONF".to_string(), vec!["315.11".to_string()]),
|
|
1636
1657
|
]),
|
|
1637
1658
|
is_filter_pass: true,
|
|
1659
|
+
is_complex: false,
|
|
1638
1660
|
},
|
|
1639
1661
|
VCFRow {
|
|
1640
1662
|
// 10
|
|
@@ -1649,6 +1671,7 @@ mod tests {
|
|
|
1649
1671
|
("GT_CONF".to_string(), vec!["145.21".to_string()]),
|
|
1650
1672
|
]),
|
|
1651
1673
|
is_filter_pass: true,
|
|
1674
|
+
is_complex: false,
|
|
1652
1675
|
},
|
|
1653
1676
|
VCFRow {
|
|
1654
1677
|
// 11
|
|
@@ -1663,6 +1686,7 @@ mod tests {
|
|
|
1663
1686
|
("GT_CONF".to_string(), vec!["145.21".to_string()]),
|
|
1664
1687
|
]),
|
|
1665
1688
|
is_filter_pass: true,
|
|
1689
|
+
is_complex: false,
|
|
1666
1690
|
},
|
|
1667
1691
|
VCFRow {
|
|
1668
1692
|
// 12
|
|
@@ -1677,6 +1701,7 @@ mod tests {
|
|
|
1677
1701
|
("GT_CONF".to_string(), vec!["145.21".to_string()]),
|
|
1678
1702
|
]),
|
|
1679
1703
|
is_filter_pass: true,
|
|
1704
|
+
is_complex: false,
|
|
1680
1705
|
},
|
|
1681
1706
|
VCFRow {
|
|
1682
1707
|
// 13
|
|
@@ -1691,6 +1716,7 @@ mod tests {
|
|
|
1691
1716
|
("GT_CONF".to_string(), vec!["145.21".to_string()]),
|
|
1692
1717
|
]),
|
|
1693
1718
|
is_filter_pass: true,
|
|
1719
|
+
is_complex: false,
|
|
1694
1720
|
},
|
|
1695
1721
|
VCFRow {
|
|
1696
1722
|
// 14
|
|
@@ -1705,6 +1731,7 @@ mod tests {
|
|
|
1705
1731
|
("GT_CONF".to_string(), vec!["145.21".to_string()]),
|
|
1706
1732
|
]),
|
|
1707
1733
|
is_filter_pass: true,
|
|
1734
|
+
is_complex: false,
|
|
1708
1735
|
},
|
|
1709
1736
|
VCFRow {
|
|
1710
1737
|
// 15
|
|
@@ -1722,6 +1749,7 @@ mod tests {
|
|
|
1722
1749
|
("GT_CONF".to_string(), vec!["145.21".to_string()]),
|
|
1723
1750
|
]),
|
|
1724
1751
|
is_filter_pass: true,
|
|
1752
|
+
is_complex: false,
|
|
1725
1753
|
},
|
|
1726
1754
|
];
|
|
1727
1755
|
|
|
@@ -3444,7 +3472,7 @@ mod tests {
|
|
|
3444
3472
|
("COV".to_string(), vec!["1".to_string(), "3".to_string()]),
|
|
3445
3473
|
("GT_CONF".to_string(), vec!["2.05".to_string()]),
|
|
3446
3474
|
]),
|
|
3447
|
-
is_filter_pass: true,
|
|
3475
|
+
is_filter_pass: true, is_complex: false,
|
|
3448
3476
|
}
|
|
3449
3477
|
];
|
|
3450
3478
|
|
|
@@ -3670,7 +3698,7 @@ mod tests {
|
|
|
3670
3698
|
("COV".to_string(), vec!["1".to_string(), "3".to_string()]),
|
|
3671
3699
|
("GT_CONF".to_string(), vec!["2.05".to_string()]),
|
|
3672
3700
|
]),
|
|
3673
|
-
is_filter_pass: true,
|
|
3701
|
+
is_filter_pass: true, is_complex: false,
|
|
3674
3702
|
}
|
|
3675
3703
|
|
|
3676
3704
|
];
|
|
@@ -3969,6 +3997,7 @@ mod tests {
|
|
|
3969
3997
|
("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
|
|
3970
3998
|
]),
|
|
3971
3999
|
is_filter_pass: false,
|
|
4000
|
+
is_complex: false,
|
|
3972
4001
|
},
|
|
3973
4002
|
VCFRow {
|
|
3974
4003
|
// 1
|
|
@@ -3990,6 +4019,7 @@ mod tests {
|
|
|
3990
4019
|
("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
|
|
3991
4020
|
]),
|
|
3992
4021
|
is_filter_pass: false,
|
|
4022
|
+
is_complex: false,
|
|
3993
4023
|
},
|
|
3994
4024
|
];
|
|
3995
4025
|
|
|
@@ -4117,6 +4147,7 @@ mod tests {
|
|
|
4117
4147
|
("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
|
|
4118
4148
|
]),
|
|
4119
4149
|
is_filter_pass: false,
|
|
4150
|
+
is_complex: false,
|
|
4120
4151
|
},
|
|
4121
4152
|
VCFRow {
|
|
4122
4153
|
// 1
|
|
@@ -4139,6 +4170,7 @@ mod tests {
|
|
|
4139
4170
|
("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
|
|
4140
4171
|
]),
|
|
4141
4172
|
is_filter_pass: false,
|
|
4173
|
+
is_complex: false,
|
|
4142
4174
|
},
|
|
4143
4175
|
];
|
|
4144
4176
|
|
|
@@ -4257,6 +4289,7 @@ mod tests {
|
|
|
4257
4289
|
("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
|
|
4258
4290
|
]),
|
|
4259
4291
|
is_filter_pass: false,
|
|
4292
|
+
is_complex: false,
|
|
4260
4293
|
},
|
|
4261
4294
|
VCFRow {
|
|
4262
4295
|
// 1
|
|
@@ -4278,6 +4311,7 @@ mod tests {
|
|
|
4278
4311
|
("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
|
|
4279
4312
|
]),
|
|
4280
4313
|
is_filter_pass: false,
|
|
4314
|
+
is_complex: false,
|
|
4281
4315
|
},
|
|
4282
4316
|
];
|
|
4283
4317
|
|
|
@@ -4483,6 +4517,7 @@ mod tests {
|
|
|
4483
4517
|
("GT_CONF".to_string(), vec!["2.05".to_string()]),
|
|
4484
4518
|
]),
|
|
4485
4519
|
is_filter_pass: true,
|
|
4520
|
+
is_complex: false,
|
|
4486
4521
|
},
|
|
4487
4522
|
VCFRow {
|
|
4488
4523
|
// 1
|
|
@@ -4497,6 +4532,7 @@ mod tests {
|
|
|
4497
4532
|
("GT_CONF".to_string(), vec!["2.05".to_string()]),
|
|
4498
4533
|
]),
|
|
4499
4534
|
is_filter_pass: true,
|
|
4535
|
+
is_complex: false,
|
|
4500
4536
|
},
|
|
4501
4537
|
VCFRow {
|
|
4502
4538
|
// 2
|
|
@@ -4511,6 +4547,7 @@ mod tests {
|
|
|
4511
4547
|
("GT_CONF".to_string(), vec!["2.05".to_string()]),
|
|
4512
4548
|
]),
|
|
4513
4549
|
is_filter_pass: true,
|
|
4550
|
+
is_complex: false,
|
|
4514
4551
|
},
|
|
4515
4552
|
];
|
|
4516
4553
|
|
|
@@ -266,6 +266,8 @@ impl VCFFile {
|
|
|
266
266
|
passed = true;
|
|
267
267
|
}
|
|
268
268
|
|
|
269
|
+
let is_complex = record.reference.len() > 1000 && alts.len() > 1;
|
|
270
|
+
|
|
269
271
|
let row = VCFRow {
|
|
270
272
|
position: record.position as i64,
|
|
271
273
|
reference: String::from_utf8_lossy(&record.reference)
|
|
@@ -275,6 +277,7 @@ impl VCFFile {
|
|
|
275
277
|
filter: filters.clone(),
|
|
276
278
|
fields: fields.clone(),
|
|
277
279
|
is_filter_pass: passed,
|
|
280
|
+
is_complex,
|
|
278
281
|
};
|
|
279
282
|
|
|
280
283
|
let (record_calls, record_minor_calls) =
|
|
@@ -831,6 +834,7 @@ mod tests {
|
|
|
831
834
|
("COV".to_string(), vec!["1".to_string()]),
|
|
832
835
|
]),
|
|
833
836
|
is_filter_pass: true,
|
|
837
|
+
is_complex: false,
|
|
834
838
|
};
|
|
835
839
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 1, 0);
|
|
836
840
|
assert_eq!(calls.len(), 1);
|
|
@@ -858,6 +862,7 @@ mod tests {
|
|
|
858
862
|
("COV".to_string(), vec!["1".to_string(), "2".to_string()]),
|
|
859
863
|
]),
|
|
860
864
|
is_filter_pass: true,
|
|
865
|
+
is_complex: false,
|
|
861
866
|
};
|
|
862
867
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 1, 0);
|
|
863
868
|
assert_eq!(calls.len(), 1);
|
|
@@ -885,6 +890,7 @@ mod tests {
|
|
|
885
890
|
("COV".to_string(), vec!["1".to_string(), "3".to_string()]),
|
|
886
891
|
]),
|
|
887
892
|
is_filter_pass: true,
|
|
893
|
+
is_complex: false,
|
|
888
894
|
};
|
|
889
895
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 2, 0);
|
|
890
896
|
assert_eq!(calls.len(), 1);
|
|
@@ -924,6 +930,7 @@ mod tests {
|
|
|
924
930
|
("COV".to_string(), vec!["1".to_string()]),
|
|
925
931
|
]),
|
|
926
932
|
is_filter_pass: true,
|
|
933
|
+
is_complex: false,
|
|
927
934
|
};
|
|
928
935
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 2, 0);
|
|
929
936
|
assert_eq!(calls.len(), 1);
|
|
@@ -951,6 +958,7 @@ mod tests {
|
|
|
951
958
|
("COV".to_string(), vec!["1".to_string()]),
|
|
952
959
|
]),
|
|
953
960
|
is_filter_pass: true,
|
|
961
|
+
is_complex: false,
|
|
954
962
|
};
|
|
955
963
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 1, 0);
|
|
956
964
|
assert_eq!(calls.len(), 1);
|
|
@@ -978,6 +986,7 @@ mod tests {
|
|
|
978
986
|
("COV".to_string(), vec!["1".to_string()]),
|
|
979
987
|
]),
|
|
980
988
|
is_filter_pass: false,
|
|
989
|
+
is_complex: false,
|
|
981
990
|
};
|
|
982
991
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 2, 0);
|
|
983
992
|
assert_eq!(calls.len(), 1);
|
|
@@ -1005,6 +1014,7 @@ mod tests {
|
|
|
1005
1014
|
("COV".to_string(), vec!["1".to_string(), "5".to_string()]),
|
|
1006
1015
|
]),
|
|
1007
1016
|
is_filter_pass: true,
|
|
1017
|
+
is_complex: false,
|
|
1008
1018
|
};
|
|
1009
1019
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 3, 123);
|
|
1010
1020
|
assert_eq!(calls.len(), 1);
|
|
@@ -1032,6 +1042,7 @@ mod tests {
|
|
|
1032
1042
|
("COV".to_string(), vec!["1".to_string(), "5".to_string()]),
|
|
1033
1043
|
]),
|
|
1034
1044
|
is_filter_pass: true,
|
|
1045
|
+
is_complex: false,
|
|
1035
1046
|
};
|
|
1036
1047
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 3, 0);
|
|
1037
1048
|
assert_eq!(calls.len(), 1);
|
|
@@ -1059,6 +1070,7 @@ mod tests {
|
|
|
1059
1070
|
("COV".to_string(), vec!["1".to_string(), "5".to_string()]),
|
|
1060
1071
|
]),
|
|
1061
1072
|
is_filter_pass: true,
|
|
1073
|
+
is_complex: false,
|
|
1062
1074
|
};
|
|
1063
1075
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 3, 0);
|
|
1064
1076
|
assert_eq!(calls.len(), 2);
|
|
@@ -1098,6 +1110,7 @@ mod tests {
|
|
|
1098
1110
|
),
|
|
1099
1111
|
]),
|
|
1100
1112
|
is_filter_pass: true,
|
|
1113
|
+
is_complex: false,
|
|
1101
1114
|
};
|
|
1102
1115
|
let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 3, 0);
|
|
1103
1116
|
assert_eq!(calls.len(), 2);
|
|
@@ -1180,6 +1193,7 @@ mod tests {
|
|
|
1180
1193
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1181
1194
|
]),
|
|
1182
1195
|
is_filter_pass: true,
|
|
1196
|
+
is_complex: false,
|
|
1183
1197
|
},
|
|
1184
1198
|
VCFRow {
|
|
1185
1199
|
position: 4725,
|
|
@@ -1193,6 +1207,7 @@ mod tests {
|
|
|
1193
1207
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1194
1208
|
]),
|
|
1195
1209
|
is_filter_pass: false,
|
|
1210
|
+
is_complex: false,
|
|
1196
1211
|
},
|
|
1197
1212
|
VCFRow {
|
|
1198
1213
|
position: 4730,
|
|
@@ -1209,6 +1224,7 @@ mod tests {
|
|
|
1209
1224
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1210
1225
|
]),
|
|
1211
1226
|
is_filter_pass: true,
|
|
1227
|
+
is_complex: false,
|
|
1212
1228
|
},
|
|
1213
1229
|
VCFRow {
|
|
1214
1230
|
position: 4735,
|
|
@@ -1222,6 +1238,7 @@ mod tests {
|
|
|
1222
1238
|
("GT_CONF".to_string(), vec!["63.77".to_string()]),
|
|
1223
1239
|
]),
|
|
1224
1240
|
is_filter_pass: true,
|
|
1241
|
+
is_complex: false,
|
|
1225
1242
|
},
|
|
1226
1243
|
VCFRow {
|
|
1227
1244
|
position: 4740,
|
|
@@ -1235,6 +1252,7 @@ mod tests {
|
|
|
1235
1252
|
("GT_CONF".to_string(), vec!["63.77".to_string()]),
|
|
1236
1253
|
]),
|
|
1237
1254
|
is_filter_pass: false,
|
|
1255
|
+
is_complex: false,
|
|
1238
1256
|
},
|
|
1239
1257
|
VCFRow {
|
|
1240
1258
|
position: 13148,
|
|
@@ -1248,6 +1266,7 @@ mod tests {
|
|
|
1248
1266
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1249
1267
|
]),
|
|
1250
1268
|
is_filter_pass: false,
|
|
1269
|
+
is_complex: false,
|
|
1251
1270
|
},
|
|
1252
1271
|
VCFRow {
|
|
1253
1272
|
position: 13149,
|
|
@@ -1261,6 +1280,7 @@ mod tests {
|
|
|
1261
1280
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1262
1281
|
]),
|
|
1263
1282
|
is_filter_pass: true,
|
|
1283
|
+
is_complex: false,
|
|
1264
1284
|
},
|
|
1265
1285
|
VCFRow {
|
|
1266
1286
|
position: 13150,
|
|
@@ -1274,6 +1294,7 @@ mod tests {
|
|
|
1274
1294
|
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1275
1295
|
]),
|
|
1276
1296
|
is_filter_pass: true,
|
|
1297
|
+
is_complex: false,
|
|
1277
1298
|
},
|
|
1278
1299
|
VCFRow {
|
|
1279
1300
|
position: 13333,
|
|
@@ -1290,6 +1311,7 @@ mod tests {
|
|
|
1290
1311
|
("GT_CONF".to_string(), vec!["613".to_string()]),
|
|
1291
1312
|
]),
|
|
1292
1313
|
is_filter_pass: false,
|
|
1314
|
+
is_complex: false,
|
|
1293
1315
|
},
|
|
1294
1316
|
VCFRow {
|
|
1295
1317
|
// Edge case of using `RO` and `AO` for coverage
|
|
@@ -1306,6 +1328,7 @@ mod tests {
|
|
|
1306
1328
|
("GT_CONF".to_string(), vec!["3.77".to_string()]),
|
|
1307
1329
|
]),
|
|
1308
1330
|
is_filter_pass: false,
|
|
1331
|
+
is_complex: false,
|
|
1309
1332
|
},
|
|
1310
1333
|
VCFRow {
|
|
1311
1334
|
// Odd edge case which is a valid VCF row where it's a het GT but single COV value
|
|
@@ -1320,6 +1343,7 @@ mod tests {
|
|
|
1320
1343
|
("GT_CONF".to_string(), vec!["613".to_string()]),
|
|
1321
1344
|
]),
|
|
1322
1345
|
is_filter_pass: false,
|
|
1346
|
+
is_complex: false,
|
|
1323
1347
|
},
|
|
1324
1348
|
VCFRow {
|
|
1325
1349
|
// Null call by virtue of failing specific filter
|
|
@@ -1334,6 +1358,7 @@ mod tests {
|
|
|
1334
1358
|
("GT_CONF".to_string(), vec!["613".to_string()]),
|
|
1335
1359
|
]),
|
|
1336
1360
|
is_filter_pass: false,
|
|
1361
|
+
is_complex: false,
|
|
1337
1362
|
},
|
|
1338
1363
|
];
|
|
1339
1364
|
for (idx, record) in expected_records.iter().enumerate() {
|
|
@@ -1506,4 +1531,80 @@ mod tests {
|
|
|
1506
1531
|
}
|
|
1507
1532
|
}
|
|
1508
1533
|
}
|
|
1534
|
+
|
|
1535
|
+
#[test]
|
|
1536
|
+
fn test_complex_is_detected() {
|
|
1537
|
+
// Check that the is_complex flag gets set if a VCF has a complex row in it
|
|
1538
|
+
// This VCF has 1 standard row, then 2 almost complex rows, then a complex row
|
|
1539
|
+
// Standard SNP at 4687
|
|
1540
|
+
// Ref of length 1000 with 2 alts at 4730
|
|
1541
|
+
// Ref of length 1001 with 1 alt at 5730
|
|
1542
|
+
// Ref of length 1001 with 2 alts at 7030 <-- this is the complex row
|
|
1543
|
+
|
|
1544
|
+
let vcf = VCFFile::new("test/complex.vcf".to_string(), false, 3);
|
|
1545
|
+
|
|
1546
|
+
let expected_records = vec![
|
|
1547
|
+
VCFRow {
|
|
1548
|
+
position: 4687,
|
|
1549
|
+
reference: "t".to_string(),
|
|
1550
|
+
alternative: vec!["c".to_string()],
|
|
1551
|
+
filter: vec!["PASS".to_string()],
|
|
1552
|
+
fields: HashMap::from([
|
|
1553
|
+
("GT".to_string(), vec!["1/1".to_string()]),
|
|
1554
|
+
("DP".to_string(), vec!["68".to_string()]),
|
|
1555
|
+
("COV".to_string(), vec!["0".to_string(), "68".to_string()]),
|
|
1556
|
+
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1557
|
+
]),
|
|
1558
|
+
is_filter_pass: true,is_complex: false,
|
|
1559
|
+
},
|
|
1560
|
+
VCFRow {
|
|
1561
|
+
position: 4730,
|
|
1562
|
+
reference: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
|
|
1563
|
+
alternative: vec!["t".to_string(), "g".to_string()],
|
|
1564
|
+
filter: vec!["PASS".to_string()],
|
|
1565
|
+
fields: HashMap::from([
|
|
1566
|
+
("GT".to_string(), vec!["1/1".to_string()]),
|
|
1567
|
+
("DP".to_string(), vec!["200".to_string()]),
|
|
1568
|
+
("COV".to_string(), vec!["1".to_string(), "99".to_string(), "100".to_string()]),
|
|
1569
|
+
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1570
|
+
]),
|
|
1571
|
+
is_filter_pass: true,is_complex: false,
|
|
1572
|
+
},
|
|
1573
|
+
VCFRow {
|
|
1574
|
+
position: 5730,
|
|
1575
|
+
reference: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
|
|
1576
|
+
alternative: vec!["t".to_string()],
|
|
1577
|
+
filter: vec!["PASS".to_string()],
|
|
1578
|
+
fields: HashMap::from([
|
|
1579
|
+
("GT".to_string(), vec!["1/1".to_string()]),
|
|
1580
|
+
("DP".to_string(), vec!["200".to_string()]),
|
|
1581
|
+
("COV".to_string(), vec!["1".to_string(), "99".to_string()]),
|
|
1582
|
+
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1583
|
+
]),
|
|
1584
|
+
is_filter_pass: true,is_complex: false,
|
|
1585
|
+
},
|
|
1586
|
+
VCFRow {
|
|
1587
|
+
position: 7030,
|
|
1588
|
+
reference: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
|
|
1589
|
+
alternative: vec!["t".to_string(), "g".to_string()],
|
|
1590
|
+
filter: vec!["PASS".to_string()],
|
|
1591
|
+
fields: HashMap::from([
|
|
1592
|
+
("GT".to_string(), vec!["2/2".to_string()]),
|
|
1593
|
+
("DP".to_string(), vec!["200".to_string()]),
|
|
1594
|
+
("COV".to_string(), vec!["1".to_string(), "99".to_string(), "100".to_string()]),
|
|
1595
|
+
("GT_CONF".to_string(), vec!["613.77".to_string()]),
|
|
1596
|
+
]),
|
|
1597
|
+
is_filter_pass: true,is_complex: true,
|
|
1598
|
+
},
|
|
1599
|
+
];
|
|
1600
|
+
|
|
1601
|
+
for (idx, record) in expected_records.iter().enumerate() {
|
|
1602
|
+
assert_eq!(record.position, vcf.records[idx].position);
|
|
1603
|
+
assert_eq!(record.reference, vcf.records[idx].reference);
|
|
1604
|
+
assert_eq!(record.alternative, vcf.records[idx].alternative);
|
|
1605
|
+
assert_eq!(record.filter, vcf.records[idx].filter);
|
|
1606
|
+
assert_eq!(record.fields, vcf.records[idx].fields);
|
|
1607
|
+
assert_eq!(record.is_filter_pass, vcf.records[idx].is_filter_pass);
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1509
1610
|
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
##fileformat=VCFv4.2
|
|
2
|
+
##source=minos, version 0.4.1
|
|
3
|
+
##fileDate=2018-04-26
|
|
4
|
+
##FORMAT=<ID=COV,Number=R,Type=Integer,Description="Number of reads on ref and alt alleles">
|
|
5
|
+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
|
|
6
|
+
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="total kmer depth from gramtools",Source="minos">
|
|
7
|
+
##FORMAT=<ID=GT_CONF,Number=1,Type=Float,Description="Genotype confidence. Difference in log likelihood of most likely and next most likely genotype">
|
|
8
|
+
##INFO=<ID=KMER,Number=1,Type=Integer,Description="Kmer size at which variant was discovered (kmer-size used by gramtools build)">
|
|
9
|
+
##minos_max_read_length=200
|
|
10
|
+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 03.vcf
|
|
11
|
+
NC_004148.2 4687 . T C . PASS KMER=15 GT:DP:COV:GT_CONF 1/1:68:0,68:613.77
|
|
12
|
+

|
|
13
|
+

|
|
14
|
+

|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|