bio-grumpy 0.2.7__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bio-grumpy might be problematic. Click here for more details.

Files changed (30) hide show
  1. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/.gitignore +1 -0
  2. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/Cargo.lock +7 -7
  3. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/Cargo.toml +1 -1
  4. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/PKG-INFO +1 -1
  5. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/common.rs +6 -0
  6. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/genome.rs +39 -2
  7. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/vcf.rs +101 -0
  8. bio_grumpy-1.0.0/test/complex.vcf +14 -0
  9. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/.github/workflows/release.yaml +0 -0
  10. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/.github/workflows/test.yaml +0 -0
  11. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/LICENSE +0 -0
  12. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/README-python.md +0 -0
  13. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/README.md +0 -0
  14. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/pyproject.toml +0 -0
  15. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/reference/MN908947.3.gb +0 -0
  16. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/reference/NC_000962.3.gbk +0 -0
  17. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/reference/TEST-DNA.gbk +0 -0
  18. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/difference.rs +0 -0
  19. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/gene.rs +0 -0
  20. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/lib.rs +0 -0
  21. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/src/main.rs +0 -0
  22. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/TEST-DNA-4-minor.vcf +0 -0
  23. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/TEST-DNA-4.vcf +0 -0
  24. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/TEST-DNA-misc-indel.vcf +0 -0
  25. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/TEST-DNA.vcf +0 -0
  26. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/dummy.vcf +0 -0
  27. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/minor-populations-revcomp-AD.vcf +0 -0
  28. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/minor-populations-revcomp.vcf +0 -0
  29. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/revcomp-del-first-pos.vcf +0 -0
  30. {bio_grumpy-0.2.7 → bio_grumpy-1.0.0}/test/tb-snp-after-del.vcf +0 -0
@@ -14,6 +14,7 @@ Cargo.lock
14
14
  *.pdb
15
15
 
16
16
  massif.out.*
17
+ mprofile*
17
18
  perf.data*
18
19
  tarpaulin-*.html
19
20
  edge-cases.md
@@ -96,7 +96,7 @@ dependencies = [
96
96
 
97
97
  [[package]]
98
98
  name = "grumpy"
99
- version = "0.2.7"
99
+ version = "1.0.0"
100
100
  dependencies = [
101
101
  "gb-io",
102
102
  "ordered-float",
@@ -136,9 +136,9 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
136
136
 
137
137
  [[package]]
138
138
  name = "lock_api"
139
- version = "0.4.12"
139
+ version = "0.4.13"
140
140
  source = "registry+https://github.com/rust-lang/crates.io-index"
141
- checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
141
+ checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
142
142
  dependencies = [
143
143
  "autocfg",
144
144
  "scopeguard",
@@ -223,9 +223,9 @@ dependencies = [
223
223
 
224
224
  [[package]]
225
225
  name = "parking_lot"
226
- version = "0.12.3"
226
+ version = "0.12.4"
227
227
  source = "registry+https://github.com/rust-lang/crates.io-index"
228
- checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
228
+ checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
229
229
  dependencies = [
230
230
  "lock_api",
231
231
  "parking_lot_core",
@@ -233,9 +233,9 @@ dependencies = [
233
233
 
234
234
  [[package]]
235
235
  name = "parking_lot_core"
236
- version = "0.9.10"
236
+ version = "0.9.11"
237
237
  source = "registry+https://github.com/rust-lang/crates.io-index"
238
- checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
238
+ checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
239
239
  dependencies = [
240
240
  "cfg-if",
241
241
  "libc",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "grumpy"
3
- version = "0.2.7"
3
+ version = "1.0.0"
4
4
  edition = "2021"
5
5
  description = "Genetic analysis in Rust."
6
6
  license-file = "LICENSE"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bio-grumpy
3
- Version: 0.2.7
3
+ Version: 1.0.0
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Programming Language :: Rust
6
6
  Classifier: Programming Language :: Python :: Implementation :: CPython
@@ -51,6 +51,12 @@ pub struct VCFRow {
51
51
  #[pyo3(get, set)]
52
52
  /// True if the filter column passes
53
53
  pub is_filter_pass: bool,
54
+
55
+ #[pyo3(get, set)]
56
+ /// Whether this VCF row is complex
57
+ /// Complex VCF rows are edge cases where a row has a large (>1,000) bases for the ref as well as >1 alt
58
+ /// This is used to flag when mutations' evidence isn't worth fetching due to size.
59
+ pub is_complex: bool,
54
60
  }
55
61
 
56
62
  #[pyclass(eq)]
@@ -861,6 +861,7 @@ mod tests {
861
861
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
862
862
  ]),
863
863
  is_filter_pass: true,
864
+ is_complex: false,
864
865
  },
865
866
  VCFRow {
866
867
  // 1
@@ -875,6 +876,7 @@ mod tests {
875
876
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
876
877
  ]),
877
878
  is_filter_pass: false,
879
+ is_complex: false,
878
880
  },
879
881
  VCFRow {
880
882
  // 2
@@ -892,6 +894,7 @@ mod tests {
892
894
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
893
895
  ]),
894
896
  is_filter_pass: true,
897
+ is_complex: false,
895
898
  },
896
899
  VCFRow {
897
900
  // 3
@@ -906,6 +909,7 @@ mod tests {
906
909
  ("GT_CONF".to_string(), vec!["63.77".to_string()]),
907
910
  ]),
908
911
  is_filter_pass: true,
912
+ is_complex: false,
909
913
  },
910
914
  VCFRow {
911
915
  // 4
@@ -920,6 +924,7 @@ mod tests {
920
924
  ("GT_CONF".to_string(), vec!["63.77".to_string()]),
921
925
  ]),
922
926
  is_filter_pass: false,
927
+ is_complex: false,
923
928
  },
924
929
  VCFRow {
925
930
  // 5
@@ -934,6 +939,7 @@ mod tests {
934
939
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
935
940
  ]),
936
941
  is_filter_pass: false,
942
+ is_complex: false,
937
943
  },
938
944
  VCFRow {
939
945
  // 6
@@ -948,6 +954,7 @@ mod tests {
948
954
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
949
955
  ]),
950
956
  is_filter_pass: true,
957
+ is_complex: false,
951
958
  },
952
959
  VCFRow {
953
960
  // 7
@@ -962,6 +969,7 @@ mod tests {
962
969
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
963
970
  ]),
964
971
  is_filter_pass: true,
972
+ is_complex: false,
965
973
  },
966
974
  VCFRow {
967
975
  // 8
@@ -980,6 +988,7 @@ mod tests {
980
988
  ("GT_CONF".to_string(), vec!["613".to_string()]),
981
989
  ]),
982
990
  is_filter_pass: false,
991
+ is_complex: false,
983
992
  },
984
993
  VCFRow {
985
994
  // 9
@@ -998,6 +1007,7 @@ mod tests {
998
1007
  ("GT_CONF".to_string(), vec!["3.77".to_string()]),
999
1008
  ]),
1000
1009
  is_filter_pass: false,
1010
+ is_complex: false,
1001
1011
  },
1002
1012
  VCFRow {
1003
1013
  // 10
@@ -1014,6 +1024,7 @@ mod tests {
1014
1024
  ("GT_CONF".to_string(), vec!["613".to_string()]),
1015
1025
  ]),
1016
1026
  is_filter_pass: false,
1027
+ is_complex: false,
1017
1028
  },
1018
1029
  VCFRow {
1019
1030
  // 11
@@ -1029,6 +1040,7 @@ mod tests {
1029
1040
  ("GT_CONF".to_string(), vec!["613".to_string()]),
1030
1041
  ]),
1031
1042
  is_filter_pass: false,
1043
+ is_complex: false,
1032
1044
  },
1033
1045
  ];
1034
1046
 
@@ -1456,6 +1468,7 @@ mod tests {
1456
1468
  ("GT_CONF".to_string(), vec!["2.05".to_string()]),
1457
1469
  ]),
1458
1470
  is_filter_pass: true,
1471
+ is_complex: false,
1459
1472
  },
1460
1473
  VCFRow {
1461
1474
  // 1
@@ -1477,6 +1490,7 @@ mod tests {
1477
1490
  ("GT_CONF".to_string(), vec!["3.77".to_string()]),
1478
1491
  ]),
1479
1492
  is_filter_pass: false,
1493
+ is_complex: false,
1480
1494
  },
1481
1495
  VCFRow {
1482
1496
  // 2
@@ -1499,6 +1513,7 @@ mod tests {
1499
1513
  ("GT_CONF".to_string(), vec!["2.76".to_string()]),
1500
1514
  ]),
1501
1515
  is_filter_pass: true,
1516
+ is_complex: false,
1502
1517
  },
1503
1518
  VCFRow {
1504
1519
  // 3
@@ -1513,6 +1528,7 @@ mod tests {
1513
1528
  ("GT_CONF".to_string(), vec!["200.58".to_string()]),
1514
1529
  ]),
1515
1530
  is_filter_pass: true,
1531
+ is_complex: false,
1516
1532
  },
1517
1533
  VCFRow {
1518
1534
  // 4
@@ -1530,6 +1546,7 @@ mod tests {
1530
1546
  ("GT_CONF".to_string(), vec!["155.58".to_string()]),
1531
1547
  ]),
1532
1548
  is_filter_pass: true,
1549
+ is_complex: false,
1533
1550
  },
1534
1551
  VCFRow {
1535
1552
  // 5
@@ -1547,6 +1564,7 @@ mod tests {
1547
1564
  ("GT_CONF".to_string(), vec!["300.25".to_string()]),
1548
1565
  ]),
1549
1566
  is_filter_pass: true,
1567
+ is_complex: false,
1550
1568
  },
1551
1569
  VCFRow {
1552
1570
  // 6
@@ -1569,6 +1587,7 @@ mod tests {
1569
1587
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1570
1588
  ]),
1571
1589
  is_filter_pass: true,
1590
+ is_complex: false,
1572
1591
  },
1573
1592
  VCFRow {
1574
1593
  // 7
@@ -1591,6 +1610,7 @@ mod tests {
1591
1610
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1592
1611
  ]),
1593
1612
  is_filter_pass: true,
1613
+ is_complex: false,
1594
1614
  },
1595
1615
  VCFRow {
1596
1616
  // 8
@@ -1613,6 +1633,7 @@ mod tests {
1613
1633
  ("GT_CONF".to_string(), vec!["475.54".to_string()]),
1614
1634
  ]),
1615
1635
  is_filter_pass: true,
1636
+ is_complex: false,
1616
1637
  },
1617
1638
  VCFRow {
1618
1639
  // 9
@@ -1635,6 +1656,7 @@ mod tests {
1635
1656
  ("GT_CONF".to_string(), vec!["315.11".to_string()]),
1636
1657
  ]),
1637
1658
  is_filter_pass: true,
1659
+ is_complex: false,
1638
1660
  },
1639
1661
  VCFRow {
1640
1662
  // 10
@@ -1649,6 +1671,7 @@ mod tests {
1649
1671
  ("GT_CONF".to_string(), vec!["145.21".to_string()]),
1650
1672
  ]),
1651
1673
  is_filter_pass: true,
1674
+ is_complex: false,
1652
1675
  },
1653
1676
  VCFRow {
1654
1677
  // 11
@@ -1663,6 +1686,7 @@ mod tests {
1663
1686
  ("GT_CONF".to_string(), vec!["145.21".to_string()]),
1664
1687
  ]),
1665
1688
  is_filter_pass: true,
1689
+ is_complex: false,
1666
1690
  },
1667
1691
  VCFRow {
1668
1692
  // 12
@@ -1677,6 +1701,7 @@ mod tests {
1677
1701
  ("GT_CONF".to_string(), vec!["145.21".to_string()]),
1678
1702
  ]),
1679
1703
  is_filter_pass: true,
1704
+ is_complex: false,
1680
1705
  },
1681
1706
  VCFRow {
1682
1707
  // 13
@@ -1691,6 +1716,7 @@ mod tests {
1691
1716
  ("GT_CONF".to_string(), vec!["145.21".to_string()]),
1692
1717
  ]),
1693
1718
  is_filter_pass: true,
1719
+ is_complex: false,
1694
1720
  },
1695
1721
  VCFRow {
1696
1722
  // 14
@@ -1705,6 +1731,7 @@ mod tests {
1705
1731
  ("GT_CONF".to_string(), vec!["145.21".to_string()]),
1706
1732
  ]),
1707
1733
  is_filter_pass: true,
1734
+ is_complex: false,
1708
1735
  },
1709
1736
  VCFRow {
1710
1737
  // 15
@@ -1722,6 +1749,7 @@ mod tests {
1722
1749
  ("GT_CONF".to_string(), vec!["145.21".to_string()]),
1723
1750
  ]),
1724
1751
  is_filter_pass: true,
1752
+ is_complex: false,
1725
1753
  },
1726
1754
  ];
1727
1755
 
@@ -3444,7 +3472,7 @@ mod tests {
3444
3472
  ("COV".to_string(), vec!["1".to_string(), "3".to_string()]),
3445
3473
  ("GT_CONF".to_string(), vec!["2.05".to_string()]),
3446
3474
  ]),
3447
- is_filter_pass: true,
3475
+ is_filter_pass: true, is_complex: false,
3448
3476
  }
3449
3477
  ];
3450
3478
 
@@ -3670,7 +3698,7 @@ mod tests {
3670
3698
  ("COV".to_string(), vec!["1".to_string(), "3".to_string()]),
3671
3699
  ("GT_CONF".to_string(), vec!["2.05".to_string()]),
3672
3700
  ]),
3673
- is_filter_pass: true,
3701
+ is_filter_pass: true, is_complex: false,
3674
3702
  }
3675
3703
 
3676
3704
  ];
@@ -3969,6 +3997,7 @@ mod tests {
3969
3997
  ("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
3970
3998
  ]),
3971
3999
  is_filter_pass: false,
4000
+ is_complex: false,
3972
4001
  },
3973
4002
  VCFRow {
3974
4003
  // 1
@@ -3990,6 +4019,7 @@ mod tests {
3990
4019
  ("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
3991
4020
  ]),
3992
4021
  is_filter_pass: false,
4022
+ is_complex: false,
3993
4023
  },
3994
4024
  ];
3995
4025
 
@@ -4117,6 +4147,7 @@ mod tests {
4117
4147
  ("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
4118
4148
  ]),
4119
4149
  is_filter_pass: false,
4150
+ is_complex: false,
4120
4151
  },
4121
4152
  VCFRow {
4122
4153
  // 1
@@ -4139,6 +4170,7 @@ mod tests {
4139
4170
  ("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
4140
4171
  ]),
4141
4172
  is_filter_pass: false,
4173
+ is_complex: false,
4142
4174
  },
4143
4175
  ];
4144
4176
 
@@ -4257,6 +4289,7 @@ mod tests {
4257
4289
  ("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
4258
4290
  ]),
4259
4291
  is_filter_pass: false,
4292
+ is_complex: false,
4260
4293
  },
4261
4294
  VCFRow {
4262
4295
  // 1
@@ -4278,6 +4311,7 @@ mod tests {
4278
4311
  ("GT_CONF_PERCENTILE".to_string(), vec!["77.89".to_string()]),
4279
4312
  ]),
4280
4313
  is_filter_pass: false,
4314
+ is_complex: false,
4281
4315
  },
4282
4316
  ];
4283
4317
 
@@ -4483,6 +4517,7 @@ mod tests {
4483
4517
  ("GT_CONF".to_string(), vec!["2.05".to_string()]),
4484
4518
  ]),
4485
4519
  is_filter_pass: true,
4520
+ is_complex: false,
4486
4521
  },
4487
4522
  VCFRow {
4488
4523
  // 1
@@ -4497,6 +4532,7 @@ mod tests {
4497
4532
  ("GT_CONF".to_string(), vec!["2.05".to_string()]),
4498
4533
  ]),
4499
4534
  is_filter_pass: true,
4535
+ is_complex: false,
4500
4536
  },
4501
4537
  VCFRow {
4502
4538
  // 2
@@ -4511,6 +4547,7 @@ mod tests {
4511
4547
  ("GT_CONF".to_string(), vec!["2.05".to_string()]),
4512
4548
  ]),
4513
4549
  is_filter_pass: true,
4550
+ is_complex: false,
4514
4551
  },
4515
4552
  ];
4516
4553
 
@@ -266,6 +266,8 @@ impl VCFFile {
266
266
  passed = true;
267
267
  }
268
268
 
269
+ let is_complex = record.reference.len() > 1000 && alts.len() > 1;
270
+
269
271
  let row = VCFRow {
270
272
  position: record.position as i64,
271
273
  reference: String::from_utf8_lossy(&record.reference)
@@ -275,6 +277,7 @@ impl VCFFile {
275
277
  filter: filters.clone(),
276
278
  fields: fields.clone(),
277
279
  is_filter_pass: passed,
280
+ is_complex,
278
281
  };
279
282
 
280
283
  let (record_calls, record_minor_calls) =
@@ -831,6 +834,7 @@ mod tests {
831
834
  ("COV".to_string(), vec!["1".to_string()]),
832
835
  ]),
833
836
  is_filter_pass: true,
837
+ is_complex: false,
834
838
  };
835
839
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 1, 0);
836
840
  assert_eq!(calls.len(), 1);
@@ -858,6 +862,7 @@ mod tests {
858
862
  ("COV".to_string(), vec!["1".to_string(), "2".to_string()]),
859
863
  ]),
860
864
  is_filter_pass: true,
865
+ is_complex: false,
861
866
  };
862
867
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 1, 0);
863
868
  assert_eq!(calls.len(), 1);
@@ -885,6 +890,7 @@ mod tests {
885
890
  ("COV".to_string(), vec!["1".to_string(), "3".to_string()]),
886
891
  ]),
887
892
  is_filter_pass: true,
893
+ is_complex: false,
888
894
  };
889
895
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 2, 0);
890
896
  assert_eq!(calls.len(), 1);
@@ -924,6 +930,7 @@ mod tests {
924
930
  ("COV".to_string(), vec!["1".to_string()]),
925
931
  ]),
926
932
  is_filter_pass: true,
933
+ is_complex: false,
927
934
  };
928
935
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 2, 0);
929
936
  assert_eq!(calls.len(), 1);
@@ -951,6 +958,7 @@ mod tests {
951
958
  ("COV".to_string(), vec!["1".to_string()]),
952
959
  ]),
953
960
  is_filter_pass: true,
961
+ is_complex: false,
954
962
  };
955
963
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 1, 0);
956
964
  assert_eq!(calls.len(), 1);
@@ -978,6 +986,7 @@ mod tests {
978
986
  ("COV".to_string(), vec!["1".to_string()]),
979
987
  ]),
980
988
  is_filter_pass: false,
989
+ is_complex: false,
981
990
  };
982
991
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 2, 0);
983
992
  assert_eq!(calls.len(), 1);
@@ -1005,6 +1014,7 @@ mod tests {
1005
1014
  ("COV".to_string(), vec!["1".to_string(), "5".to_string()]),
1006
1015
  ]),
1007
1016
  is_filter_pass: true,
1017
+ is_complex: false,
1008
1018
  };
1009
1019
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 3, 123);
1010
1020
  assert_eq!(calls.len(), 1);
@@ -1032,6 +1042,7 @@ mod tests {
1032
1042
  ("COV".to_string(), vec!["1".to_string(), "5".to_string()]),
1033
1043
  ]),
1034
1044
  is_filter_pass: true,
1045
+ is_complex: false,
1035
1046
  };
1036
1047
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 3, 0);
1037
1048
  assert_eq!(calls.len(), 1);
@@ -1059,6 +1070,7 @@ mod tests {
1059
1070
  ("COV".to_string(), vec!["1".to_string(), "5".to_string()]),
1060
1071
  ]),
1061
1072
  is_filter_pass: true,
1073
+ is_complex: false,
1062
1074
  };
1063
1075
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 3, 0);
1064
1076
  assert_eq!(calls.len(), 2);
@@ -1098,6 +1110,7 @@ mod tests {
1098
1110
  ),
1099
1111
  ]),
1100
1112
  is_filter_pass: true,
1113
+ is_complex: false,
1101
1114
  };
1102
1115
  let (calls, minor_calls) = VCFFile::parse_record_for_calls(record, 3, 0);
1103
1116
  assert_eq!(calls.len(), 2);
@@ -1180,6 +1193,7 @@ mod tests {
1180
1193
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1181
1194
  ]),
1182
1195
  is_filter_pass: true,
1196
+ is_complex: false,
1183
1197
  },
1184
1198
  VCFRow {
1185
1199
  position: 4725,
@@ -1193,6 +1207,7 @@ mod tests {
1193
1207
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1194
1208
  ]),
1195
1209
  is_filter_pass: false,
1210
+ is_complex: false,
1196
1211
  },
1197
1212
  VCFRow {
1198
1213
  position: 4730,
@@ -1209,6 +1224,7 @@ mod tests {
1209
1224
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1210
1225
  ]),
1211
1226
  is_filter_pass: true,
1227
+ is_complex: false,
1212
1228
  },
1213
1229
  VCFRow {
1214
1230
  position: 4735,
@@ -1222,6 +1238,7 @@ mod tests {
1222
1238
  ("GT_CONF".to_string(), vec!["63.77".to_string()]),
1223
1239
  ]),
1224
1240
  is_filter_pass: true,
1241
+ is_complex: false,
1225
1242
  },
1226
1243
  VCFRow {
1227
1244
  position: 4740,
@@ -1235,6 +1252,7 @@ mod tests {
1235
1252
  ("GT_CONF".to_string(), vec!["63.77".to_string()]),
1236
1253
  ]),
1237
1254
  is_filter_pass: false,
1255
+ is_complex: false,
1238
1256
  },
1239
1257
  VCFRow {
1240
1258
  position: 13148,
@@ -1248,6 +1266,7 @@ mod tests {
1248
1266
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1249
1267
  ]),
1250
1268
  is_filter_pass: false,
1269
+ is_complex: false,
1251
1270
  },
1252
1271
  VCFRow {
1253
1272
  position: 13149,
@@ -1261,6 +1280,7 @@ mod tests {
1261
1280
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1262
1281
  ]),
1263
1282
  is_filter_pass: true,
1283
+ is_complex: false,
1264
1284
  },
1265
1285
  VCFRow {
1266
1286
  position: 13150,
@@ -1274,6 +1294,7 @@ mod tests {
1274
1294
  ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1275
1295
  ]),
1276
1296
  is_filter_pass: true,
1297
+ is_complex: false,
1277
1298
  },
1278
1299
  VCFRow {
1279
1300
  position: 13333,
@@ -1290,6 +1311,7 @@ mod tests {
1290
1311
  ("GT_CONF".to_string(), vec!["613".to_string()]),
1291
1312
  ]),
1292
1313
  is_filter_pass: false,
1314
+ is_complex: false,
1293
1315
  },
1294
1316
  VCFRow {
1295
1317
  // Edge case of using `RO` and `AO` for coverage
@@ -1306,6 +1328,7 @@ mod tests {
1306
1328
  ("GT_CONF".to_string(), vec!["3.77".to_string()]),
1307
1329
  ]),
1308
1330
  is_filter_pass: false,
1331
+ is_complex: false,
1309
1332
  },
1310
1333
  VCFRow {
1311
1334
  // Odd edge case which is a valid VCF row where it's a het GT but single COV value
@@ -1320,6 +1343,7 @@ mod tests {
1320
1343
  ("GT_CONF".to_string(), vec!["613".to_string()]),
1321
1344
  ]),
1322
1345
  is_filter_pass: false,
1346
+ is_complex: false,
1323
1347
  },
1324
1348
  VCFRow {
1325
1349
  // Null call by virtue of failing specific filter
@@ -1334,6 +1358,7 @@ mod tests {
1334
1358
  ("GT_CONF".to_string(), vec!["613".to_string()]),
1335
1359
  ]),
1336
1360
  is_filter_pass: false,
1361
+ is_complex: false,
1337
1362
  },
1338
1363
  ];
1339
1364
  for (idx, record) in expected_records.iter().enumerate() {
@@ -1506,4 +1531,80 @@ mod tests {
1506
1531
  }
1507
1532
  }
1508
1533
  }
1534
+
1535
+ #[test]
1536
+ fn test_complex_is_detected() {
1537
+ // Check that the is_complex flag gets set if a VCF has a complex row in it
1538
+ // This VCF has 1 standard row, then 2 almost complex rows, then a complex row
1539
+ // Standard SNP at 4687
1540
+ // Ref of length 1000 with 2 alts at 4730
1541
+ // Ref of length 1001 with 1 alt at 5730
1542
+ // Ref of length 1001 with 2 alts at 7030 <-- this is the complex row
1543
+
1544
+ let vcf = VCFFile::new("test/complex.vcf".to_string(), false, 3);
1545
+
1546
+ let expected_records = vec![
1547
+ VCFRow {
1548
+ position: 4687,
1549
+ reference: "t".to_string(),
1550
+ alternative: vec!["c".to_string()],
1551
+ filter: vec!["PASS".to_string()],
1552
+ fields: HashMap::from([
1553
+ ("GT".to_string(), vec!["1/1".to_string()]),
1554
+ ("DP".to_string(), vec!["68".to_string()]),
1555
+ ("COV".to_string(), vec!["0".to_string(), "68".to_string()]),
1556
+ ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1557
+ ]),
1558
+ is_filter_pass: true,is_complex: false,
1559
+ },
1560
+ VCFRow {
1561
+ position: 4730,
1562
+ reference: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
1563
+ alternative: vec!["t".to_string(), "g".to_string()],
1564
+ filter: vec!["PASS".to_string()],
1565
+ fields: HashMap::from([
1566
+ ("GT".to_string(), vec!["1/1".to_string()]),
1567
+ ("DP".to_string(), vec!["200".to_string()]),
1568
+ ("COV".to_string(), vec!["1".to_string(), "99".to_string(), "100".to_string()]),
1569
+ ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1570
+ ]),
1571
+ is_filter_pass: true,is_complex: false,
1572
+ },
1573
+ VCFRow {
1574
+ position: 5730,
1575
+ reference: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
1576
+ alternative: vec!["t".to_string()],
1577
+ filter: vec!["PASS".to_string()],
1578
+ fields: HashMap::from([
1579
+ ("GT".to_string(), vec!["1/1".to_string()]),
1580
+ ("DP".to_string(), vec!["200".to_string()]),
1581
+ ("COV".to_string(), vec!["1".to_string(), "99".to_string()]),
1582
+ ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1583
+ ]),
1584
+ is_filter_pass: true,is_complex: false,
1585
+ },
1586
+ VCFRow {
1587
+ position: 7030,
1588
+ reference: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
1589
+ alternative: vec!["t".to_string(), "g".to_string()],
1590
+ filter: vec!["PASS".to_string()],
1591
+ fields: HashMap::from([
1592
+ ("GT".to_string(), vec!["2/2".to_string()]),
1593
+ ("DP".to_string(), vec!["200".to_string()]),
1594
+ ("COV".to_string(), vec!["1".to_string(), "99".to_string(), "100".to_string()]),
1595
+ ("GT_CONF".to_string(), vec!["613.77".to_string()]),
1596
+ ]),
1597
+ is_filter_pass: true,is_complex: true,
1598
+ },
1599
+ ];
1600
+
1601
+ for (idx, record) in expected_records.iter().enumerate() {
1602
+ assert_eq!(record.position, vcf.records[idx].position);
1603
+ assert_eq!(record.reference, vcf.records[idx].reference);
1604
+ assert_eq!(record.alternative, vcf.records[idx].alternative);
1605
+ assert_eq!(record.filter, vcf.records[idx].filter);
1606
+ assert_eq!(record.fields, vcf.records[idx].fields);
1607
+ assert_eq!(record.is_filter_pass, vcf.records[idx].is_filter_pass);
1608
+ }
1609
+ }
1509
1610
  }
@@ -0,0 +1,14 @@
1
+ ##fileformat=VCFv4.2
2
+ ##source=minos, version 0.4.1
3
+ ##fileDate=2018-04-26
4
+ ##FORMAT=<ID=COV,Number=R,Type=Integer,Description="Number of reads on ref and alt alleles">
5
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
6
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="total kmer depth from gramtools",Source="minos">
7
+ ##FORMAT=<ID=GT_CONF,Number=1,Type=Float,Description="Genotype confidence. Difference in log likelihood of most likely and next most likely genotype">
8
+ ##INFO=<ID=KMER,Number=1,Type=Integer,Description="Kmer size at which variant was discovered (kmer-size used by gramtools build)">
9
+ ##minos_max_read_length=200
10
+ #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 03.vcf
11
+ NC_004148.2 4687 . T C . PASS KMER=15 GT:DP:COV:GT_CONF 1/1:68:0,68:613.77
12

13

14

File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes