bio-sam-mutation 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,13 @@
1
+ require 'helper'
2
+ class MutationArrayTest < Test::Unit::TestCase
3
+ def test_hgvs
4
+ sub_del_ins = Bio::DB::Alignment.new "OR1FQ:00021:00043 0 ENST00000366794 936 70 128M1D16M1I38M * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGTGCCTATTACTGCACTGGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGAC CCCCCCCDB@?=<B7;;<<<A7?@FCDDABBCCD;C???BAB?@@?CAC@CC>??C@;;;7;;;/8/000+.;;8/@7<;;;1;;7;7;;1>BBCCDAD;???;;;C:;;C;;;?@@BC=;;7;;<00...)-55357DC<<6;;;;;,66;;;;;;6606606<:@@;5;44--)--.)--- PG:Z:novoalign AS:i:121 UQ:i:121 NM:i:3 MD:Z:60T67^A54"
5
+ # Using MutationArray collection method:
6
+ assert_equal "ENST00000366794:c.[996T>C;1064delA;1080_1081insG]", sub_del_ins.mutations.to_hgvs("c")
7
+ end
8
+
9
+ def test_vep
10
+ del = Bio::DB::Alignment.new "MLF7W:00035:00652\t0\tENST00000366794\t361\t70\t141M3D68M8S\t*\t0\t0\tGGGTTCTCTGAGCTTCGGTGGGATGACCAGCAGAAAGTCAAGAAGACAGCGGAAGCTGGAGGAGTGACAGGCAAAGGCCAGGATGGAATTGGTAGCAAGGCAGAGAAGACTCTGGGTGACTTTGCAGCAGAGTATGCCAAGAACAGAAGTACGTGCAAGGGGTGTATGGAGAAGATAGAAAAGGGCCAGGTGCGCCTGTCCAAGAAGATGGCTGAGG\tEE?E?@@CCBB??>@CCACCC?CBCAA<?ACDCDE?CD@.)-/8/--;;;A6;=???DADD@BACCCDCCACCC?CACA>?>?CC@CAC@C@CCCBC@CACCCBCC@??>CCBBC=B@ACCC?CCCDDCCCCCCADDAD<??BCCCCADDCCDCDCCACCC:CCCCCCACCC@ACA:::::.:C;CACCACCDDB>BCCC@CACCA???CACCCCC@\tPG:Z:novoalign\tAS:i:135\tUQ:i:135\tNM:i:3\tMD:Z:141^TCC68"
11
+ assert_equal "TCC/-", del.mutations(1,nil,145).vep[0]["allele_string"]
12
+ end
13
+ end
@@ -0,0 +1,160 @@
1
+ require 'helper'
2
+ class SAMTest < Test::Unit::TestCase
3
+ def test_split_types
4
+ sam = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 112767204 37 60M1D7M2I6M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:60^G13")
5
+ assert(sam.qname.is_a? String)
6
+ assert(sam.flag.is_a? Integer) # DB::Alignment only supports integer FLAG
7
+ assert(sam.rname.is_a? String)
8
+ assert(sam.pos.is_a? Integer)
9
+ assert(sam.mapq.is_a? Integer)
10
+ assert(sam.cigar.is_a? String)
11
+ assert(sam.mrnm.is_a?(String) || sam.mrnm.nil?)
12
+ assert(sam.mpos.is_a?(Integer) || sam.mpos.nil?)
13
+ assert(sam.isize.is_a?(Integer) || sam.isize.nil?)
14
+ assert(sam.seq.is_a? String)
15
+ assert(sam.qual.is_a? String)
16
+ assert(sam.tags.is_a? Hash)
17
+
18
+ end
19
+ def test_split
20
+ sam = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 112767204 37 60M1D7M2I6M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:60^G13")
21
+ assert_equal(sam.qname,"DKNQZ:00025:00303","ID not as expected")
22
+ assert_equal(sam.flag,0,"Flag not as expected")
23
+ assert_equal(sam.rname,"5","Chr not as expected")
24
+ assert_equal(sam.pos,112767204,"Position not as expected")
25
+ assert_equal(sam.mapq,37,"Quality not as expected")
26
+ assert_equal("60M1D7M2I6M", sam.cigar)
27
+ assert_equal("*",sam.mrnm)
28
+ assert_equal(0,sam.mpos)
29
+ assert_equal(0,sam.isize)
30
+ assert_equal(sam.seq,"GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA")
31
+ assert_equal(sam.qual,"CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112","Base quality string not as expected")
32
+ assert_equal("60^G13",sam.tags["MD"].value)
33
+
34
+ end
35
+
36
+ def test_aliases
37
+ sam = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 1 37 60M1D7M2I6M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:60^G13")
38
+ assert_equal(sam.chr,sam.rname)
39
+ assert_equal(sam.tags,sam.opt)
40
+ end
41
+
42
+ def test_insertion_mutation
43
+ insertion = Bio::DB::Alignment.new("I2M5K:00253:00406 0 5 112839854 70 63M2I138M1D27M7S * 0 0 CAGTGATCTTCCAGATAGCCCTGGACAAACCATGCCACCAAGCAGAAGTAAAACACCTCCACCATACCTCCTCAAACAGCTCAAACCAAGCGAGAAGTACCTAAAAATAAAGCACCTACTGCTGAAAAGAGAGAGAGTGGACCTAAGCAAGCTGCAGTAAATGCTGCAGTTCAGAGGGTCCAGGTTCTTCCAGATGCTGATACTTATTACATTTTGCCACGGAAAGTACTGCTGAGG @CDDDCCCCACACCCCCCCC?CCACCCC>A6;;;;7;;6;6;BC;;6;;;;;.;;>ADDA??;;;;;?CCACCCD>C??@CCCC>C@C;>?CCCC@C=::@:::::+:::/:CCC?>>>>CCCCDDD9CCCC@AB????=AB>??;?BB>@@@AA???CC<@@?????BB>??;;;B<BC;??8;6:A=@=@BBB;;;?<77//*08*088888*8=9=?B7;;4;??????????< PG:Z:novoalign AS:i:183 UQ:i:183 NM:i:3 MD:Z:201^T27")
44
+ assert_equal 112839916, insertion.mutations[0].position
45
+ assert_nil insertion.mutations[0].reference
46
+ assert_equal "AT", insertion.mutations[0].mutant
47
+ assert_equal :insertion, insertion.mutations[0].type
48
+ assert_equal 112839916, insertion.mutations(60,10)[0].position
49
+ end
50
+ def test_ins_with_offset
51
+ insertion = Bio::DB::Alignment.new("I2M5K:00253:00406 0 5 112839854 70 63M2I138M1D27M7S * 0 0 CAGTGATCTTCCAGATAGCCCTGGACAAACCATGCCACCAAGCAGAAGTAAAACACCTCCACCATACCTCCTCAAACAGCTCAAACCAAGCGAGAAGTACCTAAAAATAAAGCACCTACTGCTGAAAAGAGAGAGAGTGGACCTAAGCAAGCTGCAGTAAATGCTGCAGTTCAGAGGGTCCAGGTTCTTCCAGATGCTGATACTTATTACATTTTGCCACGGAAAGTACTGCTGAGG @CDDDCCCCACACCCCCCCC?CCACCCC>A6;;;;7;;6;6;BC;;6;;;;;.;;>ADDA??;;;;;?CCACCCD>C??@CCCC>C@C;>?CCCC@C=::@:::::+:::/:CCC?>>>>CCCCDDD9CCCC@AB????=AB>??;?BB>@@@AA???CC<@@?????BB>??;;;B<BC;??8;6:A=@=@BBB;;;?<77//*08*088888*8=9=?B7;;4;??????????< PG:Z:novoalign AS:i:183 UQ:i:183 NM:i:3 MD:Z:201^T27")
52
+ assert_equal 112839916, insertion.mutations(60,10)[0].position
53
+ end
54
+ def test_sorting
55
+ insertion_and_deletion = Bio::DB::Alignment.new("I2M5K:00253:00406 0 5 1 70 63M2I138M1D27M7S * 0 0 CAGTGATCTTCCAGATAGCCCTGGACAAACCATGCCACCAAGCAGAAGTAAAACACCTCCACCATACCTCCTCAAACAGCTCAAACCAAGCGAGAAGTACCTAAAAATAAAGCACCTACTGCTGAAAAGAGAGAGAGTGGACCTAAGCAAGCTGCAGTAAATGCTGCAGTTCAGAGGGTCCAGGTTCTTCCAGATGCTGATACTTATTACATTTTGCCACGGAAAGTACTGCTGAGG @CDDDCCCCACACCCCCCCC?CCACCCC>A6;;;;7;;6;6;BC;;6;;;;;.;;>ADDA??;;;;;?CCACCCD>C??@CCCC>C@C;>?CCCC@C=::@:::::+:::/:CCC?>>>>CCCCDDD9CCCC@AB????=AB>??;?BB>@@@AA???CC<@@?????BB>??;;;B<BC;??8;6:A=@=@BBB;;;?<77//*08*088888*8=9=?B7;;4;??????????< PG:Z:novoalign AS:i:183 UQ:i:183 NM:i:3 MD:Z:201^T27")
56
+ assert_equal :insertion, insertion_and_deletion.mutations[0].type
57
+ assert_equal :deletion, insertion_and_deletion.mutations[1].type
58
+ end
59
+
60
+ def test_deletion_mutation
61
+ deletion = Bio::DB::Alignment.new("I2M5K:00271:01406 0 5 112839854 70 55M12D162M7S * 0 0 CAGTGATCTTCCAGATAGCCCTGGACAAACCATGCCACCAAGCAGAAGTAAAACACCTCAAACAGCTCAAACCAAGCGAGAAGTACCTAAAAATAAAGCACCTACTGCTGAAAAGAGAGAGAGTGGACCTAAGCAAGCTGCAGTAAATGCTGCAGTTCAGAGGGTCCAGGTTCTTCCAGATGCTGATACTTTATTACATTTTGCCACGGAAAGTACTGCTGAGG ACHECCC@???ACCCCCCDC>CC@CDCC>C>>?CC=>?ACADCCCCACCCCC:AAA<=CCC??<>CDCDE?C@C>=;CC=>>>@@@>:::::+:::/:@@@<>>?CCCD=>>=7:D???AAAAAB8;;>??;?@@=?;;;???@@@:@B;;;;GBB?BBAAAA9??=@@;?<?A>B?C@@@@@CBBB?;;;4;C?BB;;;:1:B>BBB=AA;A@@???A>?::2 PG:Z:novoalign AS:i:194 UQ:i:194 NM:i:12 MD:Z:55^CCTCCACCACCT162")
62
+ assert_equal deletion.mutations[0].type, :deletion
63
+ assert_equal deletion.mutations[0].reference, "CCTCCACCACCT"
64
+ assert_equal deletion.mutations[0].mutant, nil
65
+ assert_equal deletion.mutations(50,40)[0].position, 112839909
66
+ assert_equal "5", deletion.mutations[0].seqname
67
+ end
68
+ def test_substitution_mutation
69
+ substitution = Bio::DB::Alignment.new("OR1FQ:00462:02257 0 ENST00000366794 936 70 193M43S * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGACACAGACACCCTCCACAGCCTCGGCTCCTGCTGCTGTGAACTCCTCTGCTGAGG @BCACC@@@@DAFFADCCCCDEID@@?@ACCCDD:@??;<8<1..=>=<1111@@CD??@@CC@C@CFDCACCCADDABCCD?DD@CACD?CC??>C6;6;>>???E?C@??CCDACCC@CD@CCC><?A>>7;<<7<<<??;BBBBB/;;;;;BBBCC@CCACC=@7;;;;;;;6;@@7?@CCCCCC111<6<<+00>>>=CCD;??C@CCCC?????CCAC????CCECDDCDB PG:Z:novoalign AS:i:328 UQ:i:328 NM:i:1 MD:Z:60T132")
70
+ assert_equal substitution.mutations[0].type, :substitution
71
+ assert_equal substitution.mutations[0].reference, "T"
72
+ assert_equal substitution.mutations[0].mutant, "C"
73
+ assert_equal 996, substitution.mutations[0].position
74
+ assert_equal "ENST00000366794", substitution.mutations[0].seqname
75
+ end
76
+ def test_substitution_mutation_with_translation_pos
77
+ substitution = Bio::DB::Alignment.new("OR1FQ:00462:02257 0 ENST00000366794 936 70 193M43S * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGACACAGACACCCTCCACAGCCTCGGCTCCTGCTGCTGTGAACTCCTCTGCTGAGG @BCACC@@@@DAFFADCCCCDEID@@?@ACCCDD:@??;<8<1..=>=<1111@@CD??@@CC@C@CFDCACCCADDABCCD?DD@CACD?CC??>C6;6;>>???E?C@??CCDACCC@CD@CCC><?A>>7;<<7<<<??;BBBBB/;;;;;BBBCC@CCACC=@7;;;;;;;6;@@7?@CCCCCC111<6<<+00>>>=CCD;??C@CCCC?????CCAC????CCECDDCDB PG:Z:novoalign AS:i:328 UQ:i:328 NM:i:1 MD:Z:60T132")
78
+ # offset, length, reference start, translation start
79
+ assert_equal 852, substitution.mutations(55,20,145)[0].position
80
+ assert_equal "ENST00000366794", substitution.mutations[0].seqname
81
+ end
82
+
83
+ def test_cdna_mutation
84
+ parp1 = Bio::DB::Alignment.new("OR1FQ:00028:00030 0 ENST00000366794 342 70 66M1D88M6D70M8S * 0 0 CCCTGACGTTGAGGTGGATGGGTTCTCTGAGCTTCGGTGGGATGATCAGCAGAAAGTCAAGAAGACGCGGAAGCTGGAGGAGTGACAGGCAAAGGCCAGGATGGAATTGGTAGCAAGGCAGAGAAGACTCTGGGTGACTTTGCAGCAGAGTATGCCAACAGAAGTACGTGCAAGGGGTGTATGGAGAAGATAGAAAAGGGCCAGGTGCGCCTGTCCAAGAAGATGGCTGAGG ;;1;;;;6606660;B?A<<<1?ACCDC?@;;;A<;7;<<16B==BDB@@@;;;1;@@@:;/*/;/0--)-)-C660>B@=?@D?;;;7;;;1;7;@;;7;;;64.4.4.454;;6;=@CFDCC@?>;@A;;>:;CACCC>CCCCCCCCCCCCCC@C>@@CCACCCCECC@@6:::.::::>D>?>CACEC?>1<<(00*0*0/6777?A??C??6;?;;6;@::;?CDCD> PG:Z:novoalign AS:i:234 UQ:i:234 NM:i:8 MD:Z:45C20^A88^CCAAGT70")
85
+ response = parp1.mutations(140,40,145).vep("human","c")
86
+ assert_equal "ENST00000366794:c.353_358delCCAAGT", parp1.mutations(140,40,145).first.to_hgvs("c")
87
+ assert_equal "CCAAGT/-", response.first["allele_string"]
88
+ assert_equal 226392243, response.first["start"]
89
+ end
90
+
91
+ def test_nil_if_no_mutation
92
+ no_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 1 37 75M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:75")
93
+ assert_nil no_mut.mutations
94
+ end
95
+
96
+ def test_query_no_mutation
97
+ no_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 1 37 75M * 0 0 GCAGTAATTTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:75")
98
+ assert_equal "TTTCCCTGGA", no_mut.query(8,10)
99
+ end
100
+ def test_query_deletion
101
+ del_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 100000 37 9M1D65M * 0 0 GCAGTAATTCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:9^T65")
102
+ assert_equal "TT-CCCTGGA", del_mut.query(8,10)
103
+ end
104
+ def test_query_insertion
105
+ ins_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 100000 37 10M2I65M * 0 0 GCAGTAATTTGGCCCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:75")
106
+ assert_equal "TTT_gg_CCCTGGA", ins_mut.query(8,10)
107
+ end
108
+ def test_query_substitution
109
+ sub_mut = Bio::DB::Alignment.new("DKNQZ:00025:00303 0 5 100000 37 75M * 0 0 GCAGTAATTTCGCTGGAGTAAAACTGCGGTCAAAAATGTCCCTCCGTTCTTATGGAAGCCGGAAGGAAGTCTGTA CCCCCC@CE>CC<CC@CB;;;;.;;;;;AC;::::+:92A:=CCAEE=?>;=:@<B?:<6<*/*/*/*/911112 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:11C63")
110
+ assert_equal "TTTCgCTGGA", sub_mut.query(8,10)
111
+ end
112
+
113
+ def test_query_complex
114
+ complex = Bio::DB::Alignment.new "MLF7W:00161:00415 0 1 226064342 70 121M30I32M1D52M7S * 0 0 TCTCTGTACCATGGCTCGTACAAAGCAGACTGCCCGCAAATCGACCGGTGGTAAAGCACCCAGGAAGCAACTGGCTACAAAAGCCGCTCGCAAGAGTGCGCCCTCTACTGGAGTGGTGAAGAAACCTCATCGTTAAAAAAACCTCATCGTTAAACCTCATCGTTACAGGTATTAAAAAACAGGAAAAAATGGGACAAAGTCTCTCTTGTATGTATCCATATAATTTAACAAAAAGAATGGAT CC???ECCCACCCACBBCD>?@D=@@DCEEDDCC=CCDD;??;;;=@=;;0;66,65;5;1;@?C@CCCACCC=CCCCCCB.;;6;;;C;=:@:9:=2::>>52:BBCBCAD?>=ADCC?==:/:><<<=8884:CCCCC(B288::::C3=@8C?@:8888/*//0-'--.'-:;:;,><=288888)6BE?DCCC?DC@@>@;;7;00000<<DB@@@DC@CE?CA00000(0/*/;+;0 PG:Z:novoalign AS:i:379 UQ:i:379 NM:i:33 MD:Z:113G39^A35C16 YH:m:1:g.[226064455G>T;226064462_226064463insAAACCTCATCGTTAAAAAAACCTCATCGTT;226064495delA]"
115
+ # offset 100, length 70
116
+ # MD:Z:113G39^A35C16 YH:m:1:g.[226064455G>T;226064462_226064463insAAACCTCATCGTTAAAAAAACCTCATCGTT;226064495delA]
117
+ assert_equal "GCCCTCTACTGGAGtGGTGAAG_aaacctcatcgttaaaaaaacctcatcgtt_AAACCTCATCGTTACAGGTATTAAAAAACAGG-", complex.query(100,70)
118
+ end
119
+
120
+ def test_query_with_preceding
121
+ pre = Bio::DB::Alignment.new "MLF7W:00741:00261 0 1 226064342 70 32M1I127M2D33M1D34M1I14M36S * 0 0 TCTCTGTACCATGGCTCGTACAAAGCAGACTGCCCCGCAAATCGACCGGTGGTAAAGCACCCAGGAAGCAACTGGCTACAAAAGCCGCTCGCAAGAGTGCGCCCTCTACTGGAGTGGTGAAGAAACCTCATCGTTACAGGTATTAAAAAACAGGAAAAAATGGACAAAGTCTCTCTTGTATGTATCCATATAATTAACAAAAAGATGGATAACAGGAAAACTTTTTGTCTTTAGAGAAACTTCATTTTGAACACTTAAACTTTACTGCTTAGCTGAGG CC?>=CB@B>?@C@CCCDBB<66,<AA;;;;;;;;,6=CC,>>>>>@C@BC@;;;1;>;>C????@???DA???ACCCCCDC8@@ACCCFC??>?CCCCCCCC<CCCCCCE@CCCCACDCACCE:>=;:80:00*////)/??288888)8005:::::(0.).-61)133::BBC;????<<<882888?D20*/*/////(..66-.-------999-9;;99,0...8-:6:/88,8<7>?ABB1;:9888<288,888,88=88/*//0008:* PG:Z:novoalign AS:i:534 UQ:i:534 NM:i:8 MD:Z:113G45^AT0G27C4^T48 YH:m:1:g.[226064455G>T;226064501_226064502delAT;226064503G>T]"
122
+ # MD:Z:113G45^AT0G27C4^T48 YH:m:1:g.[226064455G>T;226064501_226064502delAT;226064503G>T]
123
+ assert_equal "GCCCTCTACTGGAGtGGTGAAGAAACCTCATCGTTACAGGTATTAAAAAACAGGAAAAAA--tGGACAAA", pre.query(100,70)
124
+ end
125
+
126
+
127
+ def test_complex_mutation
128
+ sub_del_ins = Bio::DB::Alignment.new "OR1FQ:00021:00043 0 ENST00000366794 936 70 128M1D16M1I38M * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGTGCCTATTACTGCACTGGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGAC CCCCCCCDB@?=<B7;;<<<A7?@FCDDABBCCD;C???BAB?@@?CAC@CC>??C@;;;7;;;/8/000+.;;8/@7<;;;1;;7;7;;1>BBCCDAD;???;;;C:;;C;;;?@@BC=;;7;;<00...)-55357DC<<6;;;;;,66;;;;;;6606606<:@@;5;44--)--.)--- PG:Z:novoalign AS:i:121 UQ:i:121 NM:i:3 MD:Z:60T67^A54"
129
+ # MD:Z:60T67^A54
130
+ # Old "manual" method:
131
+ assert_equal "996T>C;1064delA;1080_1081insG", sub_del_ins.mutations.map{|m| m.to_hgvs}.join(";")
132
+ end
133
+
134
+ def test_really_complex_mutation
135
+ omg = Bio::DB::Alignment.new "MLF7W:01389:01808 0 ENST00000366794 957 70 153M45D46M1I83M1I13M4S * 0 0 AGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGAATTCCGAGAAATCTCTTACCTCAAGAAATTGAAGGTTAAAAAAGCAGGACCGTATATTCCCCCCAGAAACCAGCGCCTCCGTGGCGGCCACGCCTCCGCCCTCCACAGCCTCGGCTCCTGCTGCTGTCGAACTCCTCTGCTGAGG 0666606066666,66@//*//----)-)--)666@6666B@@<666660---)--)----)660606C<65>5;;6;4;BCCCCD>CCC??>B7;;C7;;@AAAA<<<<<7<<<7<;DA;;;;---%-------6BB6??=;D<@7;;;;;;A6;7;7;7;;<6D:6<B<<7;;7;;;7;;;1<><;7;@C7;;;;;+;BCCA?><<;BB;;7;;;;;+;;;;160666.-)-.)--6)-4.;6;;AB>;;7;;;1;C7;;5;;0666066<0555<?6;;/.-.).8==CCCCCA;:/6 PG:Z:novoalign AS:i:504 UQ:i:504 NM:i:48 MD:Z:39T113^GGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAA142"
136
+ # MDZ MD:Z:39T113^GGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAA142
137
+ # translation start 145
138
+ assert_equal "ENST00000366794:c.[852T>C;966_1010delGGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAA;1056_1057insG;1139_1140insC]", omg.mutations(1,nil,145).to_hgvs
139
+
140
+ #This one (wrong mutant base retrieved for substition) seems to be a problem with novoalign? Mutation is CT>TC but aligned as a single T substition
141
+ #wtf = Bio::DB::Alignment.new "MLF7W:00090:00609\t0\tENST00000366794\t957\t70\t25M1I314M4S\t*\t0\t0\tAGTGCCTTCTGGGGAGTCGGCGATCTTTGGACCGAGTAGTCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGATGCCTATTACTGCACTGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGACACAGACACCCAACCGGAAGGAGTGGGTAACCCCAAAGGAATTCCGAGAAATCTCTTACCTCAAGAAATTGAAGGTTAAAAAGCAGGACCGTATATTCCCCCCAGAAACCAGCGCCTCCGTGGCGGCCACGCCTCCGCCCTCCACAGCCTCGGCTCCTGCTGCTGTGAACTCCTCTGCGAGG\t?AA<<7;7>>DDC;@@@DCAB;;;;;;1;7;@?;;;;;;;6;;;@@C;<<7<<;7<?7<CBB:<;7;7;;1;;B;;;7;=@BCCCCC1>>;;>BC@CCDACDACC;;<<<<<7?FEADCDGDDA;?;;/;<<;;CCB@CA@@>?DAC???DCCFDAD??>?CCCDDCD@??C9@<B;>=?@C@CFCCD>CC6;;;/;C;CAD>?>?ACCCDC?DDCCCACCACCCA@@@:>6;E>A@C<;;;;-;CCCADD<;;C>??ACGGGE+;<<<1;>ACCDDADC7<;B7/0+0+////8;;7?AA9<<66<6;<66CC?C?><;<<;;;;>;ACA>??A>>;;>>;;7\tPG:Z:novoalign\tAS:i:164\tUQ:i:164\tNM:i:3\tMD:Z:39T203A95"
142
+ # MD:Z:39T203A95
143
+ #assert_equal "ENST00000366794:c.[981_982insT;996T>T;1200A>A]", wtf.mutations.to_hgvs
144
+
145
+ end
146
+
147
+ def test_add_tag
148
+ sub_del_ins = Bio::DB::Alignment.new "OR1FQ:00021:00043 0 ENST00000366794 936 70 128M1D16M1I38M * 0 0 ACTCATCTTCAACAAGCAGCAAGTGCCTTCTGGGGAGTCGGCGATCTTGGACCGAGTAGCCGATGGCATGGTGTTCGGTGCCCTCCTTCCCTGCGAGGAATGCTCGGGTCAGCTGGTCTTCAAGAGCGTGCCTATTACTGCACTGGGGGACGTCACTGCCTGGACCAAGTGTATGGTCAAGAC CCCCCCCDB@?=<B7;;<<<A7?@FCDDABBCCD;C???BAB?@@?CAC@CC>??C@;;;7;;;/8/000+.;;8/@7<;;;1;;7;7;;1>BBCCDAD;???;;;C:;;C;;;?@@BC=;;7;;<00...)-55357DC<<6;;;;;,66;;;;;;6606606<:@@;5;44--)--.)--- PG:Z:novoalign AS:i:121 UQ:i:121 NM:i:3 MD:Z:60T67^A54"
149
+ sub_del_ins.add_tag!("TE:s:TTAG")
150
+ assert_equal "TTAG", sub_del_ins.tags["TE"].value
151
+
152
+ tag_obj = Bio::DB::Tag.new
153
+ tag_obj.set("TA:g:Object")
154
+ sub_del_ins.add_tag!(tag_obj)
155
+ assert_equal "Object", sub_del_ins.tags["TA"].value
156
+
157
+ assert_match "TA:g:Object", sub_del_ins.sam_string
158
+ end
159
+
160
+ end
@@ -0,0 +1,9 @@
1
+ require 'helper'
2
+ class VepHgvsTest < Test::Unit::TestCase
3
+ def test_vep_parsing
4
+ json_obj = JSON.parse "[{\"assembly_name\":\"GRCh38\",\"end\":226392243,\"seq_region_name\":\"1\",\"transcript_consequences\":[{\"gene_id\":\"ENSG00000143799\",\"distance\":15,\"biotype\":\"protein_coding\",\"gene_symbol_source\":\"HGNC\",\"consequence_terms\":[\"downstream_gene_variant\"],\"strand\":-1,\"hgnc_id\":\"HGNC:270\",\"gene_symbol\":\"PARP1\",\"transcript_id\":\"ENST00000366792\",\"impact\":\"MODIFIER\"},{\"gene_id\":\"ENSG00000143799\",\"distance\":697,\"biotype\":\"protein_coding\",\"gene_symbol_source\":\"HGNC\",\"consequence_terms\":[\"downstream_gene_variant\"],\"strand\":-1,\"hgnc_id\":\"HGNC:270\",\"gene_symbol\":\"PARP1\",\"transcript_id\":\"ENST00000629232\",\"impact\":\"MODIFIER\"},{\"gene_id\":\"ENSG00000143799\",\"cdna_end\":227,\"biotype\":\"processed_transcript\",\"gene_symbol_source\":\"HGNC\",\"consequence_terms\":[\"non_coding_transcript_exon_variant\",\"non_coding_transcript_variant\"],\"strand\":-1,\"hgnc_id\":\"HGNC:270\",\"gene_symbol\":\"PARP1\",\"cdna_start\":225,\"transcript_id\":\"ENST00000469663\",\"impact\":\"MODIFIER\"},{\"cdna_end\":504,\"codons\":\"TCC/-\",\"protein_end\":120,\"strand\":-1,\"hgnc_id\":\"HGNC:270\",\"amino_acids\":\"S/-\",\"gene_symbol\":\"PARP1\",\"cdna_start\":502,\"transcript_id\":\"ENST00000366794\",\"cds_start\":358,\"gene_id\":\"ENSG00000143799\",\"protein_start\":120,\"biotype\":\"protein_coding\",\"gene_symbol_source\":\"HGNC\",\"cds_end\":360,\"consequence_terms\":[\"inframe_deletion\"],\"impact\":\"MODERATE\"}],\"strand\":-1,\"id\":\"ENST00000366794:c.358_360delTCC\",\"allele_string\":\"TCC/-\",\"most_severe_consequence\":\"inframe_deletion\",\"start\":226392241}]"
5
+ #test result from hg18
6
+ assert_equal [{"Allele"=>"TCC/-", "CDS position"=>358, "Protein start"=>120, "Mutation"=>"S/-", "Consequence"=>["inframe_deletion"]}], VepHgvs.consequences_for_transcript(json_obj,"ENST00000366794")
7
+
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,247 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-sam-mutation
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.1
5
+ platform: ruby
6
+ authors:
7
+ - Stephen Pettitt
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-02-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bio
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.4.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: bio-samtools
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 2.3.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 2.3.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: oj
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.14'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.14'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bio-ensembl-rest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.2.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.2.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: trollop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.9'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.9'
97
+ - !ruby/object:Gem::Dependency
98
+ name: shoulda
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rdoc
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '3.12'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '3.12'
125
+ - !ruby/object:Gem::Dependency
126
+ name: simplecov
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: jeweler
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '2.0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '2.0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: bundler
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: test-unit
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '3.0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '3.0'
181
+ description: Simple classes for parsing SAM, CIGAR and MD:Z strings, including slices.
182
+ Methods for calling mutations in HGVS format and looking up consequences using Ensembl
183
+ VEP REST API. Developed for calling mutations at an expected position in an alignment
184
+ - e.g. Amplicon sequencing of CRISPR-induced mutations.
185
+ email: spettitt@gmail.com
186
+ executables:
187
+ - mutations
188
+ - sam-mutation
189
+ extensions: []
190
+ extra_rdoc_files:
191
+ - LICENSE.txt
192
+ - README.md
193
+ - README.rdoc
194
+ files:
195
+ - ".document"
196
+ - ".travis.yml"
197
+ - Gemfile
198
+ - LICENSE.txt
199
+ - README.md
200
+ - README.rdoc
201
+ - Rakefile
202
+ - bin/mutations
203
+ - bin/sam-mutation
204
+ - lib/bio-sam-mutation.rb
205
+ - lib/bio-sam-mutation/bio/alignment/cigar.rb
206
+ - lib/bio-sam-mutation/bio/alignment/iterate_pairs.rb
207
+ - lib/bio-sam-mutation/bio/db/alignment.rb
208
+ - lib/bio-sam-mutation/bio/db/tag.rb
209
+ - lib/bio-sam-mutation/bio/db/tag/md.rb
210
+ - lib/bio-sam-mutation/bio/mutantallele.rb
211
+ - lib/bio-sam-mutation/bio/mutation.rb
212
+ - lib/bio-sam-mutation/bio/mutation_array.rb
213
+ - lib/bio-sam-mutation/bio/vephgvs.rb
214
+ - lib/bio-sam-mutation/mutationscli.rb
215
+ - test/helper.rb
216
+ - test/test_cigar.rb
217
+ - test/test_mdtag.rb
218
+ - test/test_mutant_allele.rb
219
+ - test/test_mutation.rb
220
+ - test/test_mutation_array.rb
221
+ - test/test_sam.rb
222
+ - test/test_vep_hgvs.rb
223
+ homepage: http://github.com/stveep/bioruby-sam-mutation
224
+ licenses:
225
+ - MIT
226
+ metadata: {}
227
+ post_install_message:
228
+ rdoc_options: []
229
+ require_paths:
230
+ - lib
231
+ required_ruby_version: !ruby/object:Gem::Requirement
232
+ requirements:
233
+ - - ">="
234
+ - !ruby/object:Gem::Version
235
+ version: '0'
236
+ required_rubygems_version: !ruby/object:Gem::Requirement
237
+ requirements:
238
+ - - ">="
239
+ - !ruby/object:Gem::Version
240
+ version: '0'
241
+ requirements: []
242
+ rubyforge_project:
243
+ rubygems_version: 2.4.5
244
+ signing_key:
245
+ specification_version: 4
246
+ summary: Parsing and mutation calling from SAM, CIGAR and MD:Z.
247
+ test_files: []