bio-cigar 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95056409b5f06d3c60a738740f27c4835a414335
4
- data.tar.gz: 7882ac6d35a76b43ac78af2218faf7a46a7542eb
3
+ metadata.gz: 04021bbb9a499a88095d7dbb0da06a82ffbd1cfb
4
+ data.tar.gz: 2c6cd7124a76a38a6f49e602c007241fde0384f5
5
5
  SHA512:
6
- metadata.gz: 5fca372c6030c01121adddbb273bc9fdca7eb25579ea15c5957842a592c7ce337c1b2eab749f511aa73c100d815c3dd02101285daf346629c36625e893c1cac0
7
- data.tar.gz: d5e72661f94fb60bb8c1afec10d8cce76cac22e0e6edf3483a1003cacfb6cef70f006c0fd5dd185f110b67384603e9154881c845a836a4b0c299a6fb70492ee7
6
+ metadata.gz: a366678809c45fe01b25b41fcfacd96c76a7f8239c1c2d700669fdfaad2f4d7c412c0dee0fef8e2d9d605277d31f85ceafe2d0c94bc33e0a1c086b6f2d935d6b
7
+ data.tar.gz: 2fc5b1e96f0ca742cfcbbac73804ee63db968df54e75ce893a6a78a4889926f4555dd32d6ea89c7f1aec897a3fe89ca8d32ced777bac25651a0509a3a8fe6517
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.1.0
@@ -13,17 +13,11 @@ module Bio
13
13
  ref_index = 0
14
14
  query_index = 0
15
15
  each_alignment_chunk do |type, count|
16
+ # puts "ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}"
17
+ # puts "#{type} #{count}"
18
+ # puts "ref=#{reference_sequence_string[ref_index...(reference_sequence_string.length)] }"
19
+ # puts "query=#{query_sequence_string[query_index...(query_sequence_string.length)] }"
16
20
  case type
17
- when 'M'
18
- (0...count).each do |i|
19
- if reference_sequence_string[ref_index+i] == query_sequence_string[query_index+i]
20
- num_match += 1
21
- else
22
- num_mismatch += 1
23
- end
24
- end
25
- ref_index += count
26
- query_index += count
27
21
  when 'I'
28
22
  # Extra characters in the query sequence
29
23
  num_mismatch += count
@@ -36,12 +30,31 @@ module Bio
36
30
  query_index += count
37
31
  when 'H'
38
32
  query_index += count
33
+ when 'P'
34
+ # Do nothing
35
+ when 'N'
36
+ # long skip on the reference sequence
37
+ ref_index += count
39
38
  else
40
- raise "Cigar string not parsed correctly. Unrecognised alignment type #{type}"
39
+ if %w(M = X).include?(type)
40
+ # For = and X, ignore these and recalculate, for ease of programming this method.
41
+ (0...count).each do |i|
42
+ if reference_sequence_string[ref_index+i] == query_sequence_string[query_index+i]
43
+ num_match += 1
44
+ else
45
+ num_mismatch += 1
46
+ end
47
+ end
48
+ ref_index += count
49
+ query_index += count
50
+ else
51
+ raise "Cigar string not parsed correctly. Unrecognised alignment type #{type}"
52
+ end
41
53
  end
54
+ #puts "after, ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}"
42
55
  end
43
56
 
44
- percent = num_match.to_f/(num_match+num_mismatch)*100
57
+ percent = num_match.to_f / (num_match+num_mismatch)*100
45
58
  return percent, num_match, num_mismatch
46
59
  end
47
60
 
@@ -55,7 +68,7 @@ module Bio
55
68
  # end
56
69
  def each_alignment_chunk
57
70
  leftover = @cigar_string
58
- while matches = leftover.match(/^(\d+)([MSIHD])(.*)/)
71
+ while matches = leftover.match(/^(\d+)([MSIHNDP\=X])(.*)/)
59
72
  yield matches[2], matches[1].to_i
60
73
  leftover = matches[3]
61
74
  end
@@ -65,4 +65,48 @@ describe "BioCigar" do
65
65
  sam.seq = 'TCAGAGCTACAAGAGTTTGATCGTGGCTCAGAAGGAACGCTAGCTATATGCTTAACACATGCAAGTCGAACGTTGTTTTCGGGGAGCTGGGCAGAAGGAAAAGAGGCTCCTAGCGTGAAGGTAGCTTGTCTCGCCCAGGAGGTGGGAACAGTTGAAAACAAAGTGGCGAACGGGTGCGTAATGCGTGGGAATCTGCCGAACAGTTCGGGCCAAATCCTGAAGAAAGCTAAAAAGCGCTGTTTGATGAGCC'
66
66
  sam.percent_identity(ref)[0].should == 99.58333333333333
67
67
  end
68
+
69
+ it 'should work with padded reference seqs' do
70
+ ref = 'CCGG'
71
+ sam = Bio::DB::Alignment.new
72
+ sam.cigar = '2M1P1I1P3I2M'
73
+ sam.pos = 1
74
+ sam.seq = 'CCAGGTGG'
75
+ sam.percent_identity(ref).should == [
76
+ 50.0,
77
+ 4,
78
+ 4,
79
+ ]
80
+ end
81
+
82
+ it 'should work with X and =' do
83
+ # SAM:
84
+ # 790 16 2303416 1150 1 196M54S * 0 0 ACTGCCGGTGTTAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGCCCAGGGCTACACACGTGCTACAATGGCCGTTACAAAGCGTCGCTAACCCGCGAGGGGGAGCCAATCGCAAAAAAGCGGCCTCAGTTCAGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCCCTAGTAATCGCGTGTCATTAGCGCGCGGTGAATACGTCCCTGCTCCTTGCACTCACCGCCCGT * AS:i:184
85
+ ref = 'GAGCGAACGTTAGCGGCGGGCTTAACACATGCAAGTCGAACGAGAATGAAGGAGCAATCCTTCTAGTAAAGTGGCGGACGGGTGCGTAACACGTGGATAATCTACCTTCCGGCGGGGGACAACAGTTCGAAAGGACTGCTAATACCGCGTACGTCGGCGAGAGCTCAGGCTCTTGTCGGGAAAGATGGCCAATCCTTGGAAGCTGTCACCGGAAGATGAATCCGCGGCCCATCAGGTAGTTGGTGAGGTAATGGCTCACCAAGCCTAAGACGGGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGCGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGGCAATGGGCGAAAGCCTGACCCAGCCACGCCGCGTGAGTGATGAAGGCCTTCGGGTCGTAAAGCTCTGTGGGGAGGGACGAACAAGTGCGTATCGAATAAATACGTGCCCTGACGGTACCTCCTTAGCAAGCACCGGCTAACCATGTGCCAGCAGCCGCGGTAATACATGGGGTGCAAACGTTGCTCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTCGCTTAAGTCGGATGTGAAATCCCTCGGCTTAACTGAGGAAGTGCATCCGAGACTGAATGGCTAGAGTACGAAAGAGGGTCGNNNNNTTCCCGGTGTAGAGGTGAAATTCGTAGATATCGGGAGGAACACCGGCGGCGAAGGCGGCGACCTGGTTCGAGACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGGATGCTAGATGTTTCTGGTATTGACCCCGGAGGCGTCGTAGCTAACGCGATAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTCAATTTGACGCAACGCGAAGAACCTTACCTGGGTTGGAACCCTCCAGAAGTCCGCAGAGATGTGGATGTGCTCGCAAGAGAACTGGATGTCCAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCGTTAGTTGCTAACAGTTCGGCTGAGCACTCTAACGAGACTGCCGGTGTTAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGCCCAGGGCTACACACGTGCTACAATGGTCGTTACAAAGCGTCGCTAACCCGCGAGGGGGAGCTAATCGCAAAAAAGCGGCCTCAGTTCAGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCGCTAGTAATCCCTGATCAGCAGGCAGGGGTGAATACGTTCCCGGGCC'
86
+ query = 'ACTGCCGGTGTTAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGCCCAGGGCTACACACGTGCTACAATGGCCGTTACAAAGCGTCGCTAACCCGCGAGGGGGAGCCAATCGCAAAAAAGCGGCCTCAGTTCAGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCCCTAGTAATCGCGTGTCATTAGCGCGCGGTGAATACGTCCCTGCTCCTTGCACTCACCGCCCGT'
87
+ pos = 1150
88
+
89
+ ref_seq = ref[pos-1...ref.length]
90
+ Bio::Cigar.new('100X96=54S').percent_identity(ref_seq, query).should == [ #This example is a little fake because X and = are not true, but it is re-calculated in the code so should not matter
91
+ 98.46938775510205,
92
+ 193,
93
+ 3
94
+ ]
95
+ end
96
+
97
+ it 'should work with N' do
98
+ ref = 'GTGTCGCCCGTCTAGCATACGCATGATCGACTGTCAGCTAGTCAGACTA'
99
+ query = 'GTGTAACCC'+ 'TCAGAATA'
100
+ sam = Bio::DB::Alignment.new
101
+ sam.cigar = '9M32N8M'
102
+ sam.pos = 1
103
+ sam.seq = query
104
+ expected_matches = 4+3+4+3
105
+ expected_mismatches = 2+1
106
+ sam.percent_identity(ref).should == [
107
+ expected_matches.to_f/ (expected_matches+expected_mismatches)*100,
108
+ expected_matches,
109
+ expected_mismatches,
110
+ ]
111
+ end
68
112
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-cigar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben J Woodcroft
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-27 00:00:00.000000000 Z
11
+ date: 2014-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio-samtools