bio-sam-mutation 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ require 'helper'
2
+ class MutationArrayTest < Test::Unit::TestCase
3
+ def test_hgvs
4
+ sub_del_ins = Bio::DB::Alignment.new "OR1FQ:00021:00043 0 ENST00000366794 936 70 128M1D16M1I38M * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGTGCCTATTACTGCACTGGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGAC CCCCCCCDB@?=<B7;;<<<A7?@FCDDABBCCD;C???BAB?@@?CAC@CC>??C@;;;7;;;/8/000+.;;8/@7<;;;1;;7;7;;1>BBCCDAD;???;;;C:;;C;;;?@@BC=;;7;;<00...)-55357DC<<6;;;;;,66;;;;;;6606606<:@@;5;44--)--.)--- PG:Z:novoalign AS:i:121 UQ:i:121 NM:i:3 MD:Z:60T67^A54"
5
+ # Using MutationArray collection method:
6
+ assert_equal "ENST00000366794:c.[996T>C;1064delA;1080_1081insG]", sub_del_ins.mutations.to_hgvs("c")
7
+ end
8
+
9
+ def test_vep
10
+ del = Bio::DB::Alignment.new "MLF7W:00035:00652\t0\tENST00000366794\t361\t70\t141M3D68M8S\t*\t0\t0\tGGGTTCTCTGAGCTTCGGTGGGATGACCAGCAGAAAGTCAAGAAGACAGCGGAAGCTGGAGGAGTGACAGGCAAAGGCCAGGATGGAATTGGTAGCAAGGCAGAGAAGACTCTGGGTGACTTTGCAGCAGAGTATGCCAAGAACAGAAGTACGTGCAAGGGGTGTATGGAGAAGATAGAAAAGGGCCAGGTGCGCCTGTCCAAGAAGATGGCTGAGG\tEE?E?@@CCBB??>@CCACCC?CBCAA<?ACDCDE?CD@.)-/8/--;;;A6;=???DADD@BACCCDCCACCC?CACA>?>?CC@CAC@C@CCCBC@CACCCBCC@??>CCBBC=B@ACCC?CCCDDCCCCCCADDAD<??BCCCCADDCCDCDCCACCC:CCCCCCACCC@ACA:::::.:C;CACCACCDDB>BCCC@CACCA???CACCCCC@\tPG:Z:novoalign\tAS:i:135\tUQ:i:135\tNM:i:3\tMD:Z:141^TCC68"
11
+ assert_equal "TCC/-", del.mutations(1,nil,145).vep[0]["allele_string"]
12
+ end
13
+ end
@@ -0,0 +1,160 @@
1
+ require 'helper'
2
+ class SAMTest < Test::Unit::TestCase
3
+ def test_split_types
4
+ sam = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 112767204 37 60M1D7M2I6M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:60^G13")
5
+ assert(sam.qname.is_a? String)
6
+ assert(sam.flag.is_a? Integer) # DB::Alignment only supports integer FLAG
7
+ assert(sam.rname.is_a? String)
8
+ assert(sam.pos.is_a? Integer)
9
+ assert(sam.mapq.is_a? Integer)
10
+ assert(sam.cigar.is_a? String)
11
+ assert(sam.mrnm.is_a?(String) || sam.mrnm.nil?)
12
+ assert(sam.mpos.is_a?(Integer) || sam.mpos.nil?)
13
+ assert(sam.isize.is_a?(Integer) || sam.isize.nil?)
14
+ assert(sam.seq.is_a? String)
15
+ assert(sam.qual.is_a? String)
16
+ assert(sam.tags.is_a? Hash)
17
+
18
+ end
19
+ def test_split
20
+ sam = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 112767204 37 60M1D7M2I6M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:60^G13")
21
+ assert_equal(sam.qname,"DKNQZ:00025:00303","ID not as expected")
22
+ assert_equal(sam.flag,0,"Flag not as expected")
23
+ assert_equal(sam.rname,"5","Chr not as expected")
24
+ assert_equal(sam.pos,112767204,"Position not as expected")
25
+ assert_equal(sam.mapq,37,"Quality not as expected")
26
+ assert_equal("60M1D7M2I6M", sam.cigar)
27
+ assert_equal("*",sam.mrnm)
28
+ assert_equal(0,sam.mpos)
29
+ assert_equal(0,sam.isize)
30
+ assert_equal(sam.seq,"GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA")
31
+ assert_equal(sam.qual,"CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112","Base quality string not as expected")
32
+ assert_equal("60^G13",sam.tags["MD"].value)
33
+
34
+ end
35
+
36
+ def test_aliases
37
+ sam = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 1 37 60M1D7M2I6M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:60^G13")
38
+ assert_equal(sam.chr,sam.rname)
39
+ assert_equal(sam.tags,sam.opt)
40
+ end
41
+
42
+ def test_insertion_mutation
43
+ insertion = Bio::DB::Alignment.new("I2M5K:00253:00406 0 5 112839854 70 63M2I138M1D27M7S * 0 0 CAGTGATCTTCCAGATAGCCCTGGACAAACCATGCCACCAAGCAGAAGTAAAACACCTCCACCATACCTCCTCAAACAGCTCAAACCAAGCGAGAAGTACCTAAAAATAAAGCACCTACTGCTGAAAAGAGAGAGAGTGGACCTAAGCAAGCTGCAGTAAATGCTGCAGTTCAGAGGGTCCAGGTTCTTCCAGATGCTGATACTTATTACATTTTGCCACGGAAAGTACTGCTGAGG @CDDDCCCCACACCCCCCCC?CCACCCC>A6;;;;7;;6;6;BC;;6;;;;;.;;>ADDA??;;;;;?CCACCCD>C??@CCCC>C@C;>?CCCC@C=::@:::::+:::/:CCC?>>>>CCCCDDD9CCCC@AB????=AB>??;?BB>@@@AA???CC<@@?????BB>??;;;B<BC;??8;6:A=@=@BBB;;;?<77//*08*088888*8=9=?B7;;4;??????????< PG:Z:novoalign AS:i:183 UQ:i:183 NM:i:3 MD:Z:201^T27")
44
+ assert_equal 112839916, insertion.mutations[0].position
45
+ assert_nil insertion.mutations[0].reference
46
+ assert_equal "AT", insertion.mutations[0].mutant
47
+ assert_equal :insertion, insertion.mutations[0].type
48
+ assert_equal 112839916, insertion.mutations(60,10)[0].position
49
+ end
50
+ def test_ins_with_offset
51
+ insertion = Bio::DB::Alignment.new("I2M5K:00253:00406 0 5 112839854 70 63M2I138M1D27M7S * 0 0 CAGTGATCTTCCAGATAGCCCTGGACAAACCATGCCACCAAGCAGAAGTAAAACACCTCCACCATACCTCCTCAAACAGCTCAAACCAAGCGAGAAGTACCTAAAAATAAAGCACCTACTGCTGAAAAGAGAGAGAGTGGACCTAAGCAAGCTGCAGTAAATGCTGCAGTTCAGAGGGTCCAGGTTCTTCCAGATGCTGATACTTATTACATTTTGCCACGGAAAGTACTGCTGAGG @CDDDCCCCACACCCCCCCC?CCACCCC>A6;;;;7;;6;6;BC;;6;;;;;.;;>ADDA??;;;;;?CCACCCD>C??@CCCC>C@C;>?CCCC@C=::@:::::+:::/:CCC?>>>>CCCCDDD9CCCC@AB????=AB>??;?BB>@@@AA???CC<@@?????BB>??;;;B<BC;??8;6:A=@=@BBB;;;?<77//*08*088888*8=9=?B7;;4;??????????< PG:Z:novoalign AS:i:183 UQ:i:183 NM:i:3 MD:Z:201^T27")
52
+ assert_equal 112839916, insertion.mutations(60,10)[0].position
53
+ end
54
+ def test_sorting
55
+ insertion_and_deletion = Bio::DB::Alignment.new("I2M5K:00253:00406 0 5 1 70 63M2I138M1D27M7S * 0 0 CAGTGATCTTCCAGATAGCCCTGGACAAACCATGCCACCAAGCAGAAGTAAAACACCTCCACCATACCTCCTCAAACAGCTCAAACCAAGCGAGAAGTACCTAAAAATAAAGCACCTACTGCTGAAAAGAGAGAGAGTGGACCTAAGCAAGCTGCAGTAAATGCTGCAGTTCAGAGGGTCCAGGTTCTTCCAGATGCTGATACTTATTACATTTTGCCACGGAAAGTACTGCTGAGG @CDDDCCCCACACCCCCCCC?CCACCCC>A6;;;;7;;6;6;BC;;6;;;;;.;;>ADDA??;;;;;?CCACCCD>C??@CCCC>C@C;>?CCCC@C=::@:::::+:::/:CCC?>>>>CCCCDDD9CCCC@AB????=AB>??;?BB>@@@AA???CC<@@?????BB>??;;;B<BC;??8;6:A=@=@BBB;;;?<77//*08*088888*8=9=?B7;;4;??????????< PG:Z:novoalign AS:i:183 UQ:i:183 NM:i:3 MD:Z:201^T27")
56
+ assert_equal :insertion, insertion_and_deletion.mutations[0].type
57
+ assert_equal :deletion, insertion_and_deletion.mutations[1].type
58
+ end
59
+
60
+ def test_deletion_mutation
61
+ deletion = Bio::DB::Alignment.new("I2M5K:00271:01406 0 5 112839854 70 55M12D162M7S * 0 0 CAGTGATCTTCCAGATAGCCCTGGACAAACCATGCCACCAAGCAGAAGTAAAACACCTCAAACAGCTCAAACCAAGCGAGAAGTACCTAAAAATAAAGCACCTACTGCTGAAAAGAGAGAGAGTGGACCTAAGCAAGCTGCAGTAAATGCTGCAGTTCAGAGGGTCCAGGTTCTTCCAGATGCTGATACTTTATTACATTTTGCCACGGAAAGTACTGCTGAGG ACHECCC@???ACCCCCCDC>CC@CDCC>C>>?CC=>?ACADCCCCACCCCC:AAA<=CCC??<>CDCDE?C@C>=;CC=>>>@@@>:::::+:::/:@@@<>>?CCCD=>>=7:D???AAAAAB8;;>??;?@@=?;;;???@@@:@B;;;;GBB?BBAAAA9??=@@;?<?A>B?C@@@@@CBBB?;;;4;C?BB;;;:1:B>BBB=AA;A@@???A>?::2 PG:Z:novoalign AS:i:194 UQ:i:194 NM:i:12 MD:Z:55^CCTCCACCACCT162")
62
+ assert_equal deletion.mutations[0].type, :deletion
63
+ assert_equal deletion.mutations[0].reference, "CCTCCACCACCT"
64
+ assert_equal deletion.mutations[0].mutant, nil
65
+ assert_equal deletion.mutations(50,40)[0].position, 112839909
66
+ assert_equal "5", deletion.mutations[0].seqname
67
+ end
68
+ def test_substitution_mutation
69
+ substitution = Bio::DB::Alignment.new("OR1FQ:00462:02257 0 ENST00000366794 936 70 193M43S * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGACACAGACACCCTCCACAGCCTCGGCTCCTGCTGCTGTGAACTCCTCTGCTGAGG @BCACC@@@@DAFFADCCCCDEID@@?@ACCCDD:@??;<8<1..=>=<1111@@CD??@@CC@C@CFDCACCCADDABCCD?DD@CACD?CC??>C6;6;>>???E?C@??CCDACCC@CD@CCC><?A>>7;<<7<<<??;BBBBB/;;;;;BBBCC@CCACC=@7;;;;;;;6;@@7?@CCCCCC111<6<<+00>>>=CCD;??C@CCCC?????CCAC????CCECDDCDB PG:Z:novoalign AS:i:328 UQ:i:328 NM:i:1 MD:Z:60T132")
70
+ assert_equal substitution.mutations[0].type, :substitution
71
+ assert_equal substitution.mutations[0].reference, "T"
72
+ assert_equal substitution.mutations[0].mutant, "C"
73
+ assert_equal 996, substitution.mutations[0].position
74
+ assert_equal "ENST00000366794", substitution.mutations[0].seqname
75
+ end
76
+ def test_substitution_mutation_with_translation_pos
77
+ substitution = Bio::DB::Alignment.new("OR1FQ:00462:02257 0 ENST00000366794 936 70 193M43S * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGACACAGACACCCTCCACAGCCTCGGCTCCTGCTGCTGTGAACTCCTCTGCTGAGG @BCACC@@@@DAFFADCCCCDEID@@?@ACCCDD:@??;<8<1..=>=<1111@@CD??@@CC@C@CFDCACCCADDABCCD?DD@CACD?CC??>C6;6;>>???E?C@??CCDACCC@CD@CCC><?A>>7;<<7<<<??;BBBBB/;;;;;BBBCC@CCACC=@7;;;;;;;6;@@7?@CCCCCC111<6<<+00>>>=CCD;??C@CCCC?????CCAC????CCECDDCDB PG:Z:novoalign AS:i:328 UQ:i:328 NM:i:1 MD:Z:60T132")
78
+ # offset, length, reference start, translation start
79
+ assert_equal 852, substitution.mutations(55,20,145)[0].position
80
+ assert_equal "ENST00000366794", substitution.mutations[0].seqname
81
+ end
82
+
83
+ def test_cdna_mutation
84
+ parp1 = Bio::DB::Alignment.new("OR1FQ:00028:00030 0 ENST00000366794 342 70 66M1D88M6D70M8S * 0 0 CCCTGACGTTGAGGTGGATGGGTTCTCTGAGCTTCGGTGGGATGATCAGCAGAAAGTCAAGAAGACGCGGAAGCTGGAGGAGTGACAGGCAAAGGCCAGGATGGAATTGGTAGCAAGGCAGAGAAGACTCTGGGTGACTTTGCAGCAGAGTATGCCAACAGAAGTACGTGCAAGGGGTGTATGGAGAAGATAGAAAAGGGCCAGGTGCGCCTGTCCAAGAAGATGGCTGAGG ;;1;;;;6606660;B?A<<<1?ACCDC?@;;;A<;7;<<16B==BDB@@@;;;1;@@@:;/*/;/0--)-)-C660>B@=?@D?;;;7;;;1;7;@;;7;;;64.4.4.454;;6;=@CFDCC@?>;@A;;>:;CACCC>CCCCCCCCCCCCCC@C>@@CCACCCCECC@@6:::.::::>D>?>CACEC?>1<<(00*0*0/6777?A??C??6;?;;6;@::;?CDCD> PG:Z:novoalign AS:i:234 UQ:i:234 NM:i:8 MD:Z:45C20^A88^CCAAGT70")
85
+ response = parp1.mutations(140,40,145).vep("human","c")
86
+ assert_equal "ENST00000366794:c.353_358delCCAAGT", parp1.mutations(140,40,145).first.to_hgvs("c")
87
+ assert_equal "CCAAGT/-", response.first["allele_string"]
88
+ assert_equal 226392243, response.first["start"]
89
+ end
90
+
91
+ def test_nil_if_no_mutation
92
+ no_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 1 37 75M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:75")
93
+ assert_nil no_mut.mutations
94
+ end
95
+
96
+ def test_query_no_mutation
97
+ no_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 1 37 75M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:75")
98
+ assert_equal "TTTCCCTGGA", no_mut.query(8,10)
99
+ end
100
+ def test_query_deletion
101
+ del_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 100000 37 9M1D65M * 0 0 GCAGTAATTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:9^T65")
102
+ assert_equal "TT-CCCTGGA", del_mut.query(8,10)
103
+ end
104
+ def test_query_insertion
105
+ ins_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 100000 37 10M2I65M * 0 0 GCAGTAATTTGGCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:75")
106
+ assert_equal "TTT_gg_CCCTGGA", ins_mut.query(8,10)
107
+ end
108
+ def test_query_substitution
109
+ sub_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 100000 37 75M * 0 0 GCAGTAATTTCGCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:11C63")
110
+ assert_equal "TTTCgCTGGA", sub_mut.query(8,10)
111
+ end
112
+
113
+ def test_query_complex
114
+ complex = Bio::DB::Alignment.new "MLF7W:00161:00415 0 1 226064342 70 121M30I32M1D52M7S * 0 0 TCTCTGTACCATGGCTCGTACAAAGCAGACTGCCCGCAAATCGACCGGTGGTAAAGCACCCAGGAAGCAACTGGCTACAAAAGCCGCTCGCAAGAGTGCGCCCTCTACTGGAGTGGTGAAGAAACCTCATCGTTAAAAAAACCTCATCGTTAAACCTCATCGTTACAGGTATTAAAAAACAGGAAAAAATGGGACAAAGTCTCTCTTGTATGTATCCATATAATTTAACAAAAAGAATGGAT CC???ECCCACCCACBBCD>?@D=@@DCEEDDCC=CCDD;??;;;=@=;;0;66,65;5;1;@?C@CCCACCC=CCCCCCB.;;6;;;C;=:@:9:=2::>>52:BBCBCAD?>=ADCC?==:/:><<<=8884:CCCCC(B288::::C3=@8C?@:8888/*//0-'--.'-:;:;,><=288888)6BE?DCCC?DC@@>@;;7;00000<<DB@@@DC@CE?CA00000(0/*/;+;0 PG:Z:novoalign AS:i:379 UQ:i:379 NM:i:33 MD:Z:113G39^A35C16 YH:m:1:g.[226064455G>T;226064462_226064463insAAACCTCATCGTTAAAAAAACCTCATCGTT;226064495delA]"
115
+ # offset 100, length 70
116
+ # MD:Z:113G39^A35C16 YH:m:1:g.[226064455G>T;226064462_226064463insAAACCTCATCGTTAAAAAAACCTCATCGTT;226064495delA]
117
+ assert_equal "GCCCTCTACTGGAGtGGTGAAG_aaacctcatcgttaaaaaaacctcatcgtt_AAACCTCATCGTTACAGGTATTAAAAAACAGG-", complex.query(100,70)
118
+ end
119
+
120
+ def test_query_with_preceding
121
+ pre = Bio::DB::Alignment.new "MLF7W:00741:00261 0 1 226064342 70 32M1I127M2D33M1D34M1I14M36S * 0 0 TCTCTGTACCATGGCTCGTACAAAGCAGACTGCCCCGCAAATCGACCGGTGGTAAAGCACCCAGGAAGCAACTGGCTACAAAAGCCGCTCGCAAGAGTGCGCCCTCTACTGGAGTGGTGAAGAAACCTCATCGTTACAGGTATTAAAAAACAGGAAAAAATGGACAAAGTCTCTCTTGTATGTATCCATATAATTAACAAAAAGATGGATAACAGGAAAACTTTTTGTCTTTAGAGAAACTTCATTTTGAACACTTAAACTTTACTGCTTAGCTGAGG CC?>=CB@B>?@C@CCCDBB<66,<AA;;;;;;;;,6=CC,>>>>>@C@BC@;;;1;>;>C????@???DA???ACCCCCDC8@@ACCCFC??>?CCCCCCCC<CCCCCCE@CCCCACDCACCE:>=;:80:00*////)/??288888)8005:::::(0.).-61)133::BBC;????<<<882888?D20*/*/////(..66-.-------999-9;;99,0...8-:6:/88,8<7>?ABB1;:9888<288,888,88=88/*//0008:* PG:Z:novoalign AS:i:534 UQ:i:534 NM:i:8 MD:Z:113G45^AT0G27C4^T48 YH:m:1:g.[226064455G>T;226064501_226064502delAT;226064503G>T]"
122
+ # MD:Z:113G45^AT0G27C4^T48 YH:m:1:g.[226064455G>T;226064501_226064502delAT;226064503G>T]
123
+ assert_equal "GCCCTCTACTGGAGtGGTGAAGAAACCTCATCGTTACAGGTATTAAAAAACAGGAAAAAA--tGGACAAA", pre.query(100,70)
124
+ end
125
+
126
+
127
+ def test_complex_mutation
128
+ sub_del_ins = Bio::DB::Alignment.new "OR1FQ:00021:00043 0 ENST00000366794 936 70 128M1D16M1I38M * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGTGCCTATTACTGCACTGGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGAC CCCCCCCDB@?=<B7;;<<<A7?@FCDDABBCCD;C???BAB?@@?CAC@CC>??C@;;;7;;;/8/000+.;;8/@7<;;;1;;7;7;;1>BBCCDAD;???;;;C:;;C;;;?@@BC=;;7;;<00...)-55357DC<<6;;;;;,66;;;;;;6606606<:@@;5;44--)--.)--- PG:Z:novoalign AS:i:121 UQ:i:121 NM:i:3 MD:Z:60T67^A54"
129
+ # MD:Z:60T67^A54
130
+ # Old "manual" method:
131
+ assert_equal "996T>C;1064delA;1080_1081insG", sub_del_ins.mutations.map{|m| m.to_hgvs}.join(";")
132
+ end
133
+
134
+ def test_really_complex_mutation
135
+ omg = Bio::DB::Alignment.new "MLF7W:01389:01808 0 ENST00000366794 957 70 153M45D46M1I83M1I13M4S * 0 0 AGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGAATTCCGAGAAATCTCTTACCTCAAGAAATTGAAGGTTAAAAAAGCAGGACCGTATATTCCCCCCAGAAACCAGCGCCTCCGTGGCGGCCACGCCTCCGCCCTCCACAGCCTCGGCTCCTGCTGCTGTCGAACTCCTCTGCTGAGG 0666606066666,66@//*//----)-)--)666@6666B@@<666660---)--)----)660606C<65>5;;6;4;BCCCCD>CCC??>B7;;C7;;@AAAA<<<<<7<<<7<;DA;;;;---%-------6BB6??=;D<@7;;;;;;A6;7;7;7;;<6D:6<B<<7;;7;;;7;;;1<><;7;@C7;;;;;+;BCCA?><<;BB;;7;;;;;+;;;;160666.-)-.)--6)-4.;6;;AB>;;7;;;1;C7;;5;;0666066<0555<?6;;/.-.).8==CCCCCA;:/6 PG:Z:novoalign AS:i:504 UQ:i:504 NM:i:48 MD:Z:39T113^GGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAA142"
136
+ # MDZ MD:Z:39T113^GGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAA142
137
+ # translation start 145
138
+ assert_equal "ENST00000366794:c.[852T>C;966_1010delGGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAA;1056_1057insG;1139_1140insC]", omg.mutations(1,nil,145).to_hgvs
139
+
140
+ #This one (wrong mutant base retrieved for substition) seems to be a problem with novoalign? Mutation is CT>TC but aligned as a single T substition
141
+ #wtf = Bio::DB::Alignment.new "MLF7W:00090:00609\t0\tENST00000366794\t957\t70\t25M1I314M4S\t*\t0\t0\tAGTGCCTTCTGGGGAGTCGGCGATCTTTGGACCGAGTAGTCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAAGGAATTCCGAGAAATCTCTTACCTCAAGAAATTGAAGGTTAAAAAGCAGGACCGTATATTCCCCCCAGAAACCAGCGCCTCCGTGGCGGCCACGCCTCCGCCCTCCACAGCCTCGGCTCCTGCTGCTGTGAACTCCTCTGCGAGG\t?AA<<7;7>>DDC;@@@DCAB;;;;;;1;7;@?;;;;;;;6;;;@@C;<<7<<;7<?7<CBB:<;7;7;;1;;B;;;7;=@BCCCCC1>>;;>BC@CCDACDACC;;<<<<<7?FEADCDGDDA;?;;/;<<;;CCB@CA@@>?DAC???DCCFDAD??>?CCCDDCD@??C9@<B;>=?@C@CFCCD>CC6;;;/;C;CAD>?>?ACCCDC?DDCCCACCACCCA@@@:>6;E>A@C<;;;;-;CCCADD<;;C>??ACGGGE+;<<<1;>ACCDDADC7<;B7/0+0+////8;;7?AA9<<66<6;<66CC?C?><;<<;;;;>;ACA>??A>>;;>>;;7\tPG:Z:novoalign\tAS:i:164\tUQ:i:164\tNM:i:3\tMD:Z:39T203A95"
142
+ # MD:Z:39T203A95
143
+ #assert_equal "ENST00000366794:c.[981_982insT;996T>T;1200A>A]", wtf.mutations.to_hgvs
144
+
145
+ end
146
+
147
+ def test_add_tag
148
+ sub_del_ins = Bio::DB::Alignment.new "OR1FQ:00021:00043 0 ENST00000366794 936 70 128M1D16M1I38M * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGTGCCTATTACTGCACTGGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGAC CCCCCCCDB@?=<B7;;<<<A7?@FCDDABBCCD;C???BAB?@@?CAC@CC>??C@;;;7;;;/8/000+.;;8/@7<;;;1;;7;7;;1>BBCCDAD;???;;;C:;;C;;;?@@BC=;;7;;<00...)-55357DC<<6;;;;;,66;;;;;;6606606<:@@;5;44--)--.)--- PG:Z:novoalign AS:i:121 UQ:i:121 NM:i:3 MD:Z:60T67^A54"
149
+ sub_del_ins.add_tag!("TE:s:TTAG")
150
+ assert_equal "TTAG", sub_del_ins.tags["TE"].value
151
+
152
+ tag_obj = Bio::DB::Tag.new
153
+ tag_obj.set("TA:g:Object")
154
+ sub_del_ins.add_tag!(tag_obj)
155
+ assert_equal "Object", sub_del_ins.tags["TA"].value
156
+
157
+ assert_match "TA:g:Object", sub_del_ins.sam_string
158
+ end
159
+
160
+ end
@@ -0,0 +1,9 @@
1
+ require 'helper'
2
+ class VepHgvsTest < Test::Unit::TestCase
3
+ def test_vep_parsing
4
+ json_obj = JSON.parse "[{\"assembly_name\":\"GRCh38\",\"end\":226392243,\"seq_region_name\":\"1\",\"transcript_consequences\":[{\"gene_id\":\"ENSG00000143799\",\"distance\":15,\"biotype\":\"protein_coding\",\"gene_symbol_source\":\"HGNC\",\"consequence_terms\":[\"downstream_gene_variant\"],\"strand\":-1,\"hgnc_id\":\"HGNC:270\",\"gene_symbol\":\"PARP1\",\"transcript_id\":\"ENST00000366792\",\"impact\":\"MODIFIER\"},{\"gene_id\":\"ENSG00000143799\",\"distance\":697,\"biotype\":\"protein_coding\",\"gene_symbol_source\":\"HGNC\",\"consequence_terms\":[\"downstream_gene_variant\"],\"strand\":-1,\"hgnc_id\":\"HGNC:270\",\"gene_symbol\":\"PARP1\",\"transcript_id\":\"ENST00000629232\",\"impact\":\"MODIFIER\"},{\"gene_id\":\"ENSG00000143799\",\"cdna_end\":227,\"biotype\":\"processed_transcript\",\"gene_symbol_source\":\"HGNC\",\"consequence_terms\":[\"non_coding_transcript_exon_variant\",\"non_coding_transcript_variant\"],\"strand\":-1,\"hgnc_id\":\"HGNC:270\",\"gene_symbol\":\"PARP1\",\"cdna_start\":225,\"transcript_id\":\"ENST00000469663\",\"impact\":\"MODIFIER\"},{\"cdna_end\":504,\"codons\":\"TCC/-\",\"protein_end\":120,\"strand\":-1,\"hgnc_id\":\"HGNC:270\",\"amino_acids\":\"S/-\",\"gene_symbol\":\"PARP1\",\"cdna_start\":502,\"transcript_id\":\"ENST00000366794\",\"cds_start\":358,\"gene_id\":\"ENSG00000143799\",\"protein_start\":120,\"biotype\":\"protein_coding\",\"gene_symbol_source\":\"HGNC\",\"cds_end\":360,\"consequence_terms\":[\"inframe_deletion\"],\"impact\":\"MODERATE\"}],\"strand\":-1,\"id\":\"ENST00000366794:c.358_360delTCC\",\"allele_string\":\"TCC/-\",\"most_severe_consequence\":\"inframe_deletion\",\"start\":226392241}]"
5
+ #test result from hg18
6
+ assert_equal [{"Allele"=>"TCC/-", "CDS position"=>358, "Protein start"=>120, "Mutation"=>"S/-", "Consequence"=>["inframe_deletion"]}], VepHgvs.consequences_for_transcript(json_obj,"ENST00000366794")
7
+
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,247 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-sam-mutation
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.1
5
+ platform: ruby
6
+ authors:
7
+ - Stephen Pettitt
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-02-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bio
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.4.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: bio-samtools
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 2.3.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 2.3.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: oj
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.14'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.14'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bio-ensembl-rest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.2.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.2.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: trollop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.9'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.9'
97
+ - !ruby/object:Gem::Dependency
98
+ name: shoulda
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rdoc
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '3.12'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '3.12'
125
+ - !ruby/object:Gem::Dependency
126
+ name: simplecov
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: jeweler
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '2.0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '2.0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: bundler
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: test-unit
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '3.0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '3.0'
181
+ description: Simple classes for parsing SAM, CIGAR and MD:Z strings, including slices.
182
+ Methods for calling mutations in HGVS format and looking up consequences using Ensembl
183
+ VEP REST API. Developed for calling mutations at an expected position in an alignment
184
+ - e.g. Amplicon sequencing of CRISPR-induced mutations.
185
+ email: spettitt@gmail.com
186
+ executables:
187
+ - mutations
188
+ - sam-mutation
189
+ extensions: []
190
+ extra_rdoc_files:
191
+ - LICENSE.txt
192
+ - README.md
193
+ - README.rdoc
194
+ files:
195
+ - ".document"
196
+ - ".travis.yml"
197
+ - Gemfile
198
+ - LICENSE.txt
199
+ - README.md
200
+ - README.rdoc
201
+ - Rakefile
202
+ - bin/mutations
203
+ - bin/sam-mutation
204
+ - lib/bio-sam-mutation.rb
205
+ - lib/bio-sam-mutation/bio/alignment/cigar.rb
206
+ - lib/bio-sam-mutation/bio/alignment/iterate_pairs.rb
207
+ - lib/bio-sam-mutation/bio/db/alignment.rb
208
+ - lib/bio-sam-mutation/bio/db/tag.rb
209
+ - lib/bio-sam-mutation/bio/db/tag/md.rb
210
+ - lib/bio-sam-mutation/bio/mutantallele.rb
211
+ - lib/bio-sam-mutation/bio/mutation.rb
212
+ - lib/bio-sam-mutation/bio/mutation_array.rb
213
+ - lib/bio-sam-mutation/bio/vephgvs.rb
214
+ - lib/bio-sam-mutation/mutationscli.rb
215
+ - test/helper.rb
216
+ - test/test_cigar.rb
217
+ - test/test_mdtag.rb
218
+ - test/test_mutant_allele.rb
219
+ - test/test_mutation.rb
220
+ - test/test_mutation_array.rb
221
+ - test/test_sam.rb
222
+ - test/test_vep_hgvs.rb
223
+ homepage: http://github.com/stveep/bioruby-sam-mutation
224
+ licenses:
225
+ - MIT
226
+ metadata: {}
227
+ post_install_message:
228
+ rdoc_options: []
229
+ require_paths:
230
+ - lib
231
+ required_ruby_version: !ruby/object:Gem::Requirement
232
+ requirements:
233
+ - - ">="
234
+ - !ruby/object:Gem::Version
235
+ version: '0'
236
+ required_rubygems_version: !ruby/object:Gem::Requirement
237
+ requirements:
238
+ - - ">="
239
+ - !ruby/object:Gem::Version
240
+ version: '0'
241
+ requirements: []
242
+ rubyforge_project:
243
+ rubygems_version: 2.4.5
244
+ signing_key:
245
+ specification_version: 4
246
+ summary: Parsing and mutation calling from SAM, CIGAR and MD:Z.
247
+ test_files: []