bio-cigar 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/bio-cigar/cigar.rb +26 -13
- data/spec/bio-cigar_spec.rb +44 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04021bbb9a499a88095d7dbb0da06a82ffbd1cfb
|
4
|
+
data.tar.gz: 2c6cd7124a76a38a6f49e602c007241fde0384f5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a366678809c45fe01b25b41fcfacd96c76a7f8239c1c2d700669fdfaad2f4d7c412c0dee0fef8e2d9d605277d31f85ceafe2d0c94bc33e0a1c086b6f2d935d6b
|
7
|
+
data.tar.gz: 2fc5b1e96f0ca742cfcbbac73804ee63db968df54e75ce893a6a78a4889926f4555dd32d6ea89c7f1aec897a3fe89ca8d32ced777bac25651a0509a3a8fe6517
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/lib/bio-cigar/cigar.rb
CHANGED
@@ -13,17 +13,11 @@ module Bio
|
|
13
13
|
ref_index = 0
|
14
14
|
query_index = 0
|
15
15
|
each_alignment_chunk do |type, count|
|
16
|
+
# puts "ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}"
|
17
|
+
# puts "#{type} #{count}"
|
18
|
+
# puts "ref=#{reference_sequence_string[ref_index...(reference_sequence_string.length)] }"
|
19
|
+
# puts "query=#{query_sequence_string[query_index...(query_sequence_string.length)] }"
|
16
20
|
case type
|
17
|
-
when 'M'
|
18
|
-
(0...count).each do |i|
|
19
|
-
if reference_sequence_string[ref_index+i] == query_sequence_string[query_index+i]
|
20
|
-
num_match += 1
|
21
|
-
else
|
22
|
-
num_mismatch += 1
|
23
|
-
end
|
24
|
-
end
|
25
|
-
ref_index += count
|
26
|
-
query_index += count
|
27
21
|
when 'I'
|
28
22
|
# Extra characters in the query sequence
|
29
23
|
num_mismatch += count
|
@@ -36,12 +30,31 @@ module Bio
|
|
36
30
|
query_index += count
|
37
31
|
when 'H'
|
38
32
|
query_index += count
|
33
|
+
when 'P'
|
34
|
+
# Do nothing
|
35
|
+
when 'N'
|
36
|
+
# long skip on the reference sequence
|
37
|
+
ref_index += count
|
39
38
|
else
|
40
|
-
|
39
|
+
if %w(M = X).include?(type)
|
40
|
+
# For = and X, ignore these and recalculate, for ease of programming this method.
|
41
|
+
(0...count).each do |i|
|
42
|
+
if reference_sequence_string[ref_index+i] == query_sequence_string[query_index+i]
|
43
|
+
num_match += 1
|
44
|
+
else
|
45
|
+
num_mismatch += 1
|
46
|
+
end
|
47
|
+
end
|
48
|
+
ref_index += count
|
49
|
+
query_index += count
|
50
|
+
else
|
51
|
+
raise "Cigar string not parsed correctly. Unrecognised alignment type #{type}"
|
52
|
+
end
|
41
53
|
end
|
54
|
+
#puts "after, ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}"
|
42
55
|
end
|
43
56
|
|
44
|
-
percent = num_match.to_f/(num_match+num_mismatch)*100
|
57
|
+
percent = num_match.to_f / (num_match+num_mismatch)*100
|
45
58
|
return percent, num_match, num_mismatch
|
46
59
|
end
|
47
60
|
|
@@ -55,7 +68,7 @@ module Bio
|
|
55
68
|
# end
|
56
69
|
def each_alignment_chunk
|
57
70
|
leftover = @cigar_string
|
58
|
-
while matches = leftover.match(/^(\d+)([
|
71
|
+
while matches = leftover.match(/^(\d+)([MSIHNDP\=X])(.*)/)
|
59
72
|
yield matches[2], matches[1].to_i
|
60
73
|
leftover = matches[3]
|
61
74
|
end
|
data/spec/bio-cigar_spec.rb
CHANGED
@@ -65,4 +65,48 @@ describe "BioCigar" do
|
|
65
65
|
sam.seq = 'TCAGAGCTACAAGAGTTTGATCGTGGCTCAGAAGGAACGCTAGCTATATGCTTAACACATGCAAGTCGAACGTTGTTTTCGGGGAGCTGGGCAGAAGGAAAAGAGGCTCCTAGCGTGAAGGTAGCTTGTCTCGCCCAGGAGGTGGGAACAGTTGAAAACAAAGTGGCGAACGGGTGCGTAATGCGTGGGAATCTGCCGAACAGTTCGGGCCAAATCCTGAAGAAAGCTAAAAAGCGCTGTTTGATGAGCC'
|
66
66
|
sam.percent_identity(ref)[0].should == 99.58333333333333
|
67
67
|
end
|
68
|
+
|
69
|
+
it 'should work with padded reference seqs' do
|
70
|
+
ref = 'CCGG'
|
71
|
+
sam = Bio::DB::Alignment.new
|
72
|
+
sam.cigar = '2M1P1I1P3I2M'
|
73
|
+
sam.pos = 1
|
74
|
+
sam.seq = 'CCAGGTGG'
|
75
|
+
sam.percent_identity(ref).should == [
|
76
|
+
50.0,
|
77
|
+
4,
|
78
|
+
4,
|
79
|
+
]
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'should work with X and =' do
|
83
|
+
# SAM:
|
84
|
+
# 790 16 2303416 1150 1 196M54S * 0 0 ACTGCCGGTGTTAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGCCCAGGGCTACACACGTGCTACAATGGCCGTTACAAAGCGTCGCTAACCCGCGAGGGGGAGCCAATCGCAAAAAAGCGGCCTCAGTTCAGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCCCTAGTAATCGCGTGTCATTAGCGCGCGGTGAATACGTCCCTGCTCCTTGCACTCACCGCCCGT * AS:i:184
|
85
|
+
ref = 'GAGCGAACGTTAGCGGCGGGCTTAACACATGCAAGTCGAACGAGAATGAAGGAGCAATCCTTCTAGTAAAGTGGCGGACGGGTGCGTAACACGTGGATAATCTACCTTCCGGCGGGGGACAACAGTTCGAAAGGACTGCTAATACCGCGTACGTCGGCGAGAGCTCAGGCTCTTGTCGGGAAAGATGGCCAATCCTTGGAAGCTGTCACCGGAAGATGAATCCGCGGCCCATCAGGTAGTTGGTGAGGTAATGGCTCACCAAGCCTAAGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGCGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGGCAATGGGCGAAAGCCTGACCCAGCCACGCCGCGTGAGTGATGAAGGCCTTCGGGTCGTAAAGCTCTGTGGGGAGGGACGAACAAGTGCGTATCGAATAAATACGTGCCCTGACGGTACCTCCTTAGCAAGCACCGGCTAACCATGTGCCAGCAGCCGCGGTAATACATGGGGTGCAAACGTTGCTCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTCGCTTAAGTCGGATGTGAAATCCCTCGGCTTAACTGAGGAAGTGCATCCGAGACTGAATGGCTAGAGTACGAAAGAGGGTCGNNNNNTTCCCGGTGTAGAGGTGAAATTCGTAGATATCGGGAGGAACACCGGCGGCGAAGGCGGCGACCTGGTTCGAGACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGGATGCTAGATGTTTCTGGTATTGACCCCGGAGGCGTCGTAGCTAACGCGATAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTCAATTTGACGCAACGCGAAGAACCTTACCTGGGTTGGAACCCTCCAGAAGTCCGCAGAGATGTGGATGTGCTCGCAAGAGAACTGGATGTCCAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCGTTAGTTGCTAACAGTTCGGCTGAGCACTCTAACGAGACTGCCGGTGTTAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGCCCAGGGCTACACACGTGCTACAATGGTCGTTACAAAGCGTCGCTAACCCGCGAGGGGGAGCTAATCGCAAAAAAGCGGCCTCAGTTCAGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCGCTAGTAATCCCTGATCAGCAGGCAGGGGTGAATACGTTCCCGGGCC'
|
86
|
+
query = 'ACTGCCGGTGTTAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGCCCAGGGCTACACACGTGCTACAATGGCCGTTACAAAGCGTCGCTAACCCGCGAGGGGGAGCCAATCGCAAAAAAGCGGCCTCAGTTCAGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCCCTAGTAATCGCGTGTCATTAGCGCGCGGTGAATACGTCCCTGCTCCTTGCACTCACCGCCCGT'
|
87
|
+
pos = 1150
|
88
|
+
|
89
|
+
ref_seq = ref[pos-1...ref.length]
|
90
|
+
Bio::Cigar.new('100X96=54S').percent_identity(ref_seq, query).should == [ #This example is a little fake because X and = are not true, but it is re-calculated in the code so should not matter
|
91
|
+
98.46938775510205,
|
92
|
+
193,
|
93
|
+
3
|
94
|
+
]
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'should work with N' do
|
98
|
+
ref = 'GTGTCGCCCGTCTAGCATACGCATGATCGACTGTCAGCTAGTCAGACTA'
|
99
|
+
query = 'GTGTAACCC'+ 'TCAGAATA'
|
100
|
+
sam = Bio::DB::Alignment.new
|
101
|
+
sam.cigar = '9M32N8M'
|
102
|
+
sam.pos = 1
|
103
|
+
sam.seq = query
|
104
|
+
expected_matches = 4+3+4+3
|
105
|
+
expected_mismatches = 2+1
|
106
|
+
sam.percent_identity(ref).should == [
|
107
|
+
expected_matches.to_f/ (expected_matches+expected_mismatches)*100,
|
108
|
+
expected_matches,
|
109
|
+
expected_mismatches,
|
110
|
+
]
|
111
|
+
end
|
68
112
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-cigar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben J Woodcroft
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio-samtools
|