pasv_lib 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 752ca72e197ff3f4249ba50b67cbe8ecc09a00aa8e4f4b050943437c303bbc42
4
- data.tar.gz: 35c1ed983ba5643ea84e31fa3c7ca6c0ecb031597ea2d8058664725f443e336a
3
+ metadata.gz: 9c784b906c7e31bdcd730c366169b973d47a2d42684515bcb80bdc07a1885deb
4
+ data.tar.gz: 6e8d255293a94a02fcaf4cee373632c387c753db7f71b8be6515c75ea84e22f6
5
5
  SHA512:
6
- metadata.gz: 5436d6c890346eb6560d6f00d4801ca5f59f61e0d517404429553b23750f4516f117a86fb8236cc1cabd7f1a07251a2ce6f41cc2df8c3210620ac0204e686e5b
7
- data.tar.gz: e92b04401c6b118b7906eca7e750f42dcfe720d4d42c3d533de33d6a8f46babb6601bdeeb2ed8e80bd4bce9407e084f8a8d60c0e6215a8e254aa96ab79442511
6
+ metadata.gz: ed50ff3cf8856cf568bb20f38f0adcd045699244b468909e88452a5fbc930477e26618dc0e18b0a7c6e332a6f8de1a48803b44ba228812eac53b1b35e92a38a4
7
+ data.tar.gz: d73b1d2bcbc5b28a951017d6074b7ce5cdc7435684c86be4df4911c5d01924164606cce8c679b638206b2c951ac7506567ef97a29dee6e848d3f8f051b3f7c73
@@ -3,6 +3,7 @@ require "set"
3
3
 
4
4
  module PasvLib
5
5
  module Alignment
6
+ # If you need to check if a residue is a gap, use this Set.
6
7
  GAP_CHARS = Set.new %w[- .]
7
8
 
8
9
  # If the overall min of the scoring matrix is < 0, then this scales it so that the overall min becomes zero.
@@ -44,7 +45,7 @@ module PasvLib
44
45
  # klass.alignment_columns ["AA-", "A-A"] #=> [["A", "A"], ["A", "-"], ["-", "A"]]
45
46
  def alignment_columns seqs
46
47
  seqs_no_spaces = seqs.map { |seq| seq.tr " ", "" }
47
- len = seqs_no_spaces.first.length
48
+ len = seqs_no_spaces.first.length
48
49
 
49
50
  seqs_no_spaces.map do |seq|
50
51
  unless seq.length == len
@@ -55,6 +56,10 @@ module PasvLib
55
56
  end.transpose
56
57
  end
57
58
 
59
+ def gap? residue
60
+ GAP_CHARS.include? residue
61
+ end
62
+
58
63
  # Calculate the geometric index for an alignment.
59
64
  #
60
65
  # Basically, you change all residues to 0 and all gaps to 1. Then you take the permutations of the sequences and then the residues and XOR each of the permutations. Then you add it up and take averages and you'll get the score. That is a pretty bad explanation, so see http://merenlab.org/2016/11/08/pangenomics-v2/#geometric-homogeneity-index for more information.
@@ -72,8 +77,8 @@ module PasvLib
72
77
  binary_aln = binary_aln.transpose
73
78
  end
74
79
 
75
- num_rows = binary_aln.length
76
- max_differences_per_row = binary_aln.first.length
80
+ num_rows = binary_aln.length
81
+ max_differences_per_row = binary_aln.first.length
77
82
  num_comparisions_per_row = num_rows - 1
78
83
 
79
84
  diff_score = binary_aln.permutation(2).map do |(row1, row2)|
@@ -91,13 +96,21 @@ module PasvLib
91
96
  #
92
97
  # @return [Float] a score between 0 and 1, with 1 being very homogeneous and 1 being very heterogeneous.
93
98
  def geometric_index aln
94
- by_seq_score = geometric_score aln, "sequence"
99
+ by_seq_score = geometric_score aln, "sequence"
95
100
  by_residue_score = geometric_score aln, "residue"
96
101
 
97
102
  (by_seq_score + by_residue_score) / 2.0
98
103
  end
99
104
 
100
- # @note Technically you could get a negative score if you don't have enough high scoring residue pairs to keep the total score above zero. If this is the case, you're alignment probably isn't very good.
105
+ # Returns the similarity score for the alignment.
106
+ #
107
+ # For each colunm, each pair of residues are scored using the similarity matrix to get a points accrued / max_points. Then all column scores are averaged.
108
+ #
109
+ # @raise PasvLib::Error if any residue is not present in the scoring matrix.
110
+ # @raise PasvLib::Error if seqs is empty
111
+ # @raise PasvLib::Error if max_points is zero. Could happen if one of the seqs has all gaps, or no omparisons could be made.
112
+ #
113
+ # @note Technically you could get a negative score if you don't have enough high scoring residue pairs to keep the total score above zero. If this is the case, you're alignment probably isn't very good. Alternatively, you could use #adjust_scoring_matrix to avoid this issue....
101
114
  def similarity_score seqs, scoring_matrix = Blosum::BLOSUM62
102
115
  raise PasvLib::Error if seqs.empty?
103
116
  return 1.0 if seqs.count == 1
@@ -109,7 +122,15 @@ module PasvLib
109
122
 
110
123
  aln_cols.each do |residues|
111
124
  residues.map(&:upcase).combination(2).each do |r1, r2|
112
- unless GAP_CHARS.include?(r1) || GAP_CHARS.include?(r2)
125
+ unless gap?(r1) || gap?(r2)
126
+ # Check that scoring matrix has the residues.
127
+ [r1, r2].each do |res|
128
+ unless scoring_matrix.has_key? res
129
+ raise PasvLib::Error, "Residue '#{res}' is missing from the " \
130
+ "scoring matrix."
131
+ end
132
+ end
133
+
113
134
  r1_max_score = scoring_matrix[r1].values.max
114
135
  r2_max_score = scoring_matrix[r2].values.max
115
136
  pair_max = [r1_max_score, r2_max_score].max
@@ -139,7 +160,7 @@ module PasvLib
139
160
  aln.map do |seq|
140
161
  seq.chars.map do |char|
141
162
  # Gaps turn to 1, residues turn to 0.
142
- GAP_CHARS.include?(char) ? 1 : 0
163
+ gap?(char) ? 1 : 0
143
164
  end
144
165
  end
145
166
  end
@@ -1,3 +1,3 @@
1
1
  module PasvLib
2
- VERSION = "0.4.0"
2
+ VERSION = "0.4.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pasv_lib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore