bio-cigar 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/bio-cigar/cigar.rb +26 -13
- data/spec/bio-cigar_spec.rb +44 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04021bbb9a499a88095d7dbb0da06a82ffbd1cfb
|
4
|
+
data.tar.gz: 2c6cd7124a76a38a6f49e602c007241fde0384f5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a366678809c45fe01b25b41fcfacd96c76a7f8239c1c2d700669fdfaad2f4d7c412c0dee0fef8e2d9d605277d31f85ceafe2d0c94bc33e0a1c086b6f2d935d6b
|
7
|
+
data.tar.gz: 2fc5b1e96f0ca742cfcbbac73804ee63db968df54e75ce893a6a78a4889926f4555dd32d6ea89c7f1aec897a3fe89ca8d32ced777bac25651a0509a3a8fe6517
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/lib/bio-cigar/cigar.rb
CHANGED
@@ -13,17 +13,11 @@ module Bio
|
|
13
13
|
ref_index = 0
|
14
14
|
query_index = 0
|
15
15
|
each_alignment_chunk do |type, count|
|
16
|
+
# puts "ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}"
|
17
|
+
# puts "#{type} #{count}"
|
18
|
+
# puts "ref=#{reference_sequence_string[ref_index...(reference_sequence_string.length)] }"
|
19
|
+
# puts "query=#{query_sequence_string[query_index...(query_sequence_string.length)] }"
|
16
20
|
case type
|
17
|
-
when 'M'
|
18
|
-
(0...count).each do |i|
|
19
|
-
if reference_sequence_string[ref_index+i] == query_sequence_string[query_index+i]
|
20
|
-
num_match += 1
|
21
|
-
else
|
22
|
-
num_mismatch += 1
|
23
|
-
end
|
24
|
-
end
|
25
|
-
ref_index += count
|
26
|
-
query_index += count
|
27
21
|
when 'I'
|
28
22
|
# Extra characters in the query sequence
|
29
23
|
num_mismatch += count
|
@@ -36,12 +30,31 @@ module Bio
|
|
36
30
|
query_index += count
|
37
31
|
when 'H'
|
38
32
|
query_index += count
|
33
|
+
when 'P'
|
34
|
+
# Do nothing
|
35
|
+
when 'N'
|
36
|
+
# long skip on the reference sequence
|
37
|
+
ref_index += count
|
39
38
|
else
|
40
|
-
|
39
|
+
if %w(M = X).include?(type)
|
40
|
+
# For = and X, ignore these and recalculate, for ease of programming this method.
|
41
|
+
(0...count).each do |i|
|
42
|
+
if reference_sequence_string[ref_index+i] == query_sequence_string[query_index+i]
|
43
|
+
num_match += 1
|
44
|
+
else
|
45
|
+
num_mismatch += 1
|
46
|
+
end
|
47
|
+
end
|
48
|
+
ref_index += count
|
49
|
+
query_index += count
|
50
|
+
else
|
51
|
+
raise "Cigar string not parsed correctly. Unrecognised alignment type #{type}"
|
52
|
+
end
|
41
53
|
end
|
54
|
+
#puts "after, ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}"
|
42
55
|
end
|
43
56
|
|
44
|
-
percent = num_match.to_f/(num_match+num_mismatch)*100
|
57
|
+
percent = num_match.to_f / (num_match+num_mismatch)*100
|
45
58
|
return percent, num_match, num_mismatch
|
46
59
|
end
|
47
60
|
|
@@ -55,7 +68,7 @@ module Bio
|
|
55
68
|
# end
|
56
69
|
def each_alignment_chunk
|
57
70
|
leftover = @cigar_string
|
58
|
-
while matches = leftover.match(/^(\d+)([
|
71
|
+
while matches = leftover.match(/^(\d+)([MSIHNDP\=X])(.*)/)
|
59
72
|
yield matches[2], matches[1].to_i
|
60
73
|
leftover = matches[3]
|
61
74
|
end
|
data/spec/bio-cigar_spec.rb
CHANGED
@@ -65,4 +65,48 @@ describe "BioCigar" do
|
|
65
65
|
sam.seq = 'TCAGAGCTACAAGAGTTTGATCGTGGCTCAGAAGGAACGCTAGCTATATGCTTAACACATGCAAGTCGAACGTTGTTTTCGGGGAGCTGGGCAGAAGGAAAAGAGGCTCCTAGCGTGAAGGTAGCTTGTCTCGCCCAGGAGGTGGGAACAGTTGAAAACAAAGTGGCGAACGGGTGCGTAATGCGTGGGAATCTGCCGAACAGTTCGGGCCAAATCCTGAAGAAAGCTAAAAAGCGCTGTTTGATGAGCC'
|
66
66
|
sam.percent_identity(ref)[0].should == 99.58333333333333
|
67
67
|
end
|
68
|
+
|
69
|
+
it 'should work with padded reference seqs' do
|
70
|
+
ref = 'CCGG'
|
71
|
+
sam = Bio::DB::Alignment.new
|
72
|
+
sam.cigar = '2M1P1I1P3I2M'
|
73
|
+
sam.pos = 1
|
74
|
+
sam.seq = 'CCAGGTGG'
|
75
|
+
sam.percent_identity(ref).should == [
|
76
|
+
50.0,
|
77
|
+
4,
|
78
|
+
4,
|
79
|
+
]
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'should work with X and =' do
|
83
|
+
# SAM:
|
84
|
+
# 790 16 2303416 1150 1 196M54S * 0 0 ACTGCCGGTGTTAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGCCCAGGGCTACACACGTGCTACAATGGCCGTTACAAAGCGTCGCTAACCCGCGAGGGGGAGCCAATCGCAAAAAAGCGGCCTCAGTTCAGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCCCTAGTAATCGCGTGTCATTAGCGCGCGGTGAATACGTCCCTGCTCCTTGCACTCACCGCCCGT * AS:i:184
|
85
|
+
ref = 'GAGCGAACGTTAGCGGCGGGCTTAACACATGCAAGTCGAACGAGAATGAAGGAGCAATCCTTCTAGTAAAGTGGCGGACGGGTGCGTAACACGTGGATAATCTACCTTCCGGCGGGGGACAACAGTTCGAAAGGACTGCTAATACCGCGTACGTCGGCGAGAGCTCAGGCTCTTGTCGGGAAAGATGGCCAATCCTTGGAAGCTGTCACCGGAAGATGAATCCGCGGCCCATCAGGTAGTTGGTGAGGTAATGGCTCACCAAGCCTAAGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGCGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGGCAATGGGCGAAAGCCTGACCCAGCCACGCCGCGTGAGTGATGAAGGCCTTCGGGTCGTAAAGCTCTGTGGGGAGGGACGAACAAGTGCGTATCGAATAAATACGTGCCCTGACGGTACCTCCTTAGCAAGCACCGGCTAACCATGTGCCAGCAGCCGCGGTAATACATGGGGTGCAAACGTTGCTCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTCGCTTAAGTCGGATGTGAAATCCCTCGGCTTAACTGAGGAAGTGCATCCGAGACTGAATGGCTAGAGTACGAAAGAGGGTCGNNNNNTTCCCGGTGTAGAGGTGAAATTCGTAGATATCGGGAGGAACACCGGCGGCGAAGGCGGCGACCTGGTTCGAGACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGGATGCTAGATGTTTCTGGTATTGACCCCGGAGGCGTCGTAGCTAACGCGATAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTCAATTTGACGCAACGCGAAGAACCTTACCTGGGTTGGAACCCTCCAGAAGTCCGCAGAGATGTGGATGTGCTCGCAAGAGAACTGGATGTCCAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCGTTAGTTGCTAACAGTTCGGCTGAGCACTCTAACGAGACTGCCGGTGTTAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGCCCAGGGCTACACACGTGCTACAATGGTCGTTACAAAGCGTCGCTAACCCGCGAGGGGGAGCTAATCGCAAAAAAGCGGCCTCAGTTCAGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCGCTAGTAATCCCTGATCAGCAGGCAGGGGTGAATACGTTCCCGGGCC'
|
86
|
+
query = 'ACTGCCGGTGTTAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGCCCAGGGCTACACACGTGCTACAATGGCCGTTACAAAGCGTCGCTAACCCGCGAGGGGGAGCCAATCGCAAAAAAGCGGCCTCAGTTCAGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCCCTAGTAATCGCGTGTCATTAGCGCGCGGTGAATACGTCCCTGCTCCTTGCACTCACCGCCCGT'
|
87
|
+
pos = 1150
|
88
|
+
|
89
|
+
ref_seq = ref[pos-1...ref.length]
|
90
|
+
Bio::Cigar.new('100X96=54S').percent_identity(ref_seq, query).should == [ #This example is a little fake because X and = are not true, but it is re-calculated in the code so should not matter
|
91
|
+
98.46938775510205,
|
92
|
+
193,
|
93
|
+
3
|
94
|
+
]
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'should work with N' do
|
98
|
+
ref = 'GTGTCGCCCGTCTAGCATACGCATGATCGACTGTCAGCTAGTCAGACTA'
|
99
|
+
query = 'GTGTAACCC'+ 'TCAGAATA'
|
100
|
+
sam = Bio::DB::Alignment.new
|
101
|
+
sam.cigar = '9M32N8M'
|
102
|
+
sam.pos = 1
|
103
|
+
sam.seq = query
|
104
|
+
expected_matches = 4+3+4+3
|
105
|
+
expected_mismatches = 2+1
|
106
|
+
sam.percent_identity(ref).should == [
|
107
|
+
expected_matches.to_f/ (expected_matches+expected_mismatches)*100,
|
108
|
+
expected_matches,
|
109
|
+
expected_mismatches,
|
110
|
+
]
|
111
|
+
end
|
68
112
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-cigar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben J Woodcroft
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio-samtools
|