bioinform 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -104,37 +104,31 @@ module Bioinform
104
104
  end
105
105
 
106
106
  def background!(new_background)
107
- clear_cache
108
107
  @background = new_background
109
108
  self
110
109
  end
111
110
 
112
111
  def self.zero_column
113
- [0.0, 0.0, 0.0, 0.0]
112
+ [0, 0, 0, 0]
114
113
  end
115
114
 
116
- def reverse_complement!
117
- clear_cache
115
+ def reverse_complement!
118
116
  @matrix.reverse!.map!(&:reverse!)
119
117
  self
120
118
  end
121
119
  def left_augment!(n)
122
- clear_cache
123
120
  n.times{ @matrix.unshift(self.class.zero_column) }
124
121
  self
125
122
  end
126
123
  def right_augment!(n)
127
- clear_cache
128
124
  n.times{ @matrix.push(self.class.zero_column) }
129
125
  self
130
126
  end
131
127
  def shift_to_zero! # make worst score == 0 by shifting scores of each column
132
- clear_cache
133
128
  @matrix.map!{|position| min = position.min; position.map{|element| element - min}}
134
129
  self
135
130
  end
136
131
  def discrete!(rate)
137
- clear_cache
138
132
  @matrix.map!{|position| position.map{|element| (element * rate).ceil}}
139
133
  self
140
134
  end
@@ -161,23 +155,19 @@ module Bioinform
161
155
  #end
162
156
 
163
157
  def best_score
164
- @best_score ||= @matrix.inject(0.0){|sum, col| sum + col.max}
158
+ @matrix.inject(0.0){|sum, col| sum + col.max}
165
159
  end
166
160
  def worst_score
167
- @worst_score ||= @matrix.inject(0.0){|sum, col| sum + col.min}
161
+ @matrix.inject(0.0){|sum, col| sum + col.min}
168
162
  end
169
163
 
170
164
  # best score of suffix s[i..l]
171
- def best_suffix
172
- @best_suffix ||= Array.new(length + 1) {|i| @matrix[i...length].map(&:max).inject(0.0, &:+) }
165
+ def best_suffix(i)
166
+ @matrix[i...length].map(&:max).inject(0.0, &:+)
173
167
  end
174
168
 
175
- def worst_suffix
176
- @worst_suffix ||= Array.new(length + 1) {|i| @matrix[i...length].map(&:min).inject(0.0, &:+) }
177
- end
178
-
179
- def clear_cache
180
- @best_score, @worst_score, @best_suffix, @worst_suffix = nil,nil,nil,nil
169
+ def worst_suffix(i)
170
+ @matrix[i...length].map(&:min).inject(0.0, &:+)
181
171
  end
182
172
 
183
173
  def reverse_complement
@@ -11,4 +11,7 @@ require 'bioinform/support/yaml_dump_file'
11
11
  require 'bioinform/support/inverf'
12
12
  require 'bioinform/support/deep_dup'
13
13
 
14
- require 'bioinform/support/partial_sums'
14
+ require 'bioinform/support/partial_sums'
15
+
16
+ require 'bioinform/support/array_zip'
17
+ require 'bioinform/support/array_product'
@@ -0,0 +1,6 @@
1
+ class Array
2
+ def self.product(*arrays)
3
+ return [] if arrays.empty?
4
+ arrays.first.product(*arrays[1..-1])
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ class Array
2
+ def self.zip(*arrays)
3
+ return [] if arrays.empty?
4
+ arrays.first.zip(*arrays[1..-1])
5
+ end
6
+ end
@@ -4,3 +4,11 @@ class Array
4
4
  map{|el| sums += el}
5
5
  end
6
6
  end
7
+
8
+ class Hash
9
+ # {1 => 5, 4 => 3, 3 => 2}.partial_sums == {1=>5, 3=>7, 4=>10}
10
+ def partial_sums(initial = 0.0)
11
+ sums = initial
12
+ sort.collect_hash{|k,v| [k, sums += v]}
13
+ end
14
+ end
@@ -1,3 +1,3 @@
1
1
  module Bioinform
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
@@ -318,17 +318,23 @@ module Bioinform
318
318
  end
319
319
 
320
320
  describe '#best_suffix' do
321
- it 'should be an array of best suffices from start of string and to empty suffix inclusive' do
321
+ it 'should return maximal score of suffices from i-th position inclusively i.e. [i..end]' do
322
322
  @pm = PM.new
323
323
  @pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
324
- @pm.best_suffix.should == [(4.9 + 7.13 - 1.0), (7.13 - 1.0), (-1.0), (0.0) ]
324
+ @pm.best_suffix(0).should == (4.9 + 7.13 - 1.0)
325
+ @pm.best_suffix(1).should == (7.13 - 1.0)
326
+ @pm.best_suffix(2).should == (-1.0)
327
+ @pm.best_suffix(3).should == (0.0)
325
328
  end
326
329
  end
327
330
  describe '#worst_suffix' do
328
- it 'should be an array of worst suffices from start of string and to empty suffix inclusive' do
331
+ it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do
329
332
  @pm = PM.new
330
333
  @pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
331
- @pm.worst_suffix.should == [(1.3 + 3.25 - 1.5), (3.25 - 1.5), (- 1.5), (0.0) ]
334
+ @pm.worst_suffix(0).should == (1.3 + 3.25 - 1.5)
335
+ @pm.worst_suffix(1).should == (3.25 - 1.5)
336
+ @pm.worst_suffix(2).should == (- 1.5)
337
+ @pm.worst_suffix(3).should == (0.0)
332
338
  end
333
339
  end
334
340
 
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/support/array_product'
3
+
4
+ describe Array do
5
+ context '::product' do
6
+ it 'should take any number of arrays and product them as if #product was made to first and others' do
7
+ Array.product([1,2,3]).should == [1,2,3].product()
8
+ Array.product([1,2,3],[4,5,6]).should == [1,2,3].product([4,5,6])
9
+ Array.product([1,2,3],[4,5,6],[7,8,9]).should == [1,2,3].product([4,5,6],[7,8,9])
10
+ end
11
+ it 'should return empty array if no arrays\'re given' do
12
+ Array.product().should == []
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/support/array_zip'
3
+
4
+ describe Array do
5
+ context '::zip' do
6
+ it 'should take any number of arrays and zip them as if #zip was made to first and others' do
7
+ Array.zip([1,2,3]).should == [1,2,3].zip()
8
+ Array.zip([1,2,3],[4,5,6]).should == [1,2,3].zip([4,5,6])
9
+ Array.zip([1,2,3],[4,5,6],[7,8,9]).should == [1,2,3].zip([4,5,6],[7,8,9])
10
+ end
11
+ it 'should return empty array if no arrays\'re given' do
12
+ Array.zip().should == []
13
+ end
14
+ end
15
+ end
@@ -1,9 +1,27 @@
1
1
  describe 'Array#partial_sums' do
2
- it 'should return an array of the same size with partial sums of elements 0..ind inclusive with float elements' do
3
- [2,3,4,5].partial_sums.should == [2, 5, 9, 14]
4
- [2,3,4,5].partial_sums.last.should be_kind_of(Float)
2
+ context 'when no initial value given' do
3
+ it 'should return an array of the same size with partial sums of elements 0..ind inclusive with float elements' do
4
+ [2,3,4,5].partial_sums.should == [2, 5, 9, 14]
5
+ [2,3,4,5].partial_sums.last.should be_kind_of(Float)
6
+ end
5
7
  end
6
- it 'should start counting from argument when it\'s given' do
8
+ it 'should start counting from argument when it\'s given. Type of values depends on type of initial value' do
7
9
  [2,3,4,5].partial_sums(100).should == [102,105,109,114]
10
+ [2,3,4,5].partial_sums(100).last.should be_kind_of(Integer)
8
11
  end
9
- end
12
+ end
13
+
14
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums == {1=>5, 3=>7, 4=>10}
15
+
16
+ describe 'Hash#partial_sums' do
17
+ context 'when no initial value given' do
18
+ it 'should return a hash with float values of the same size with partial sums of elements that has keys <= than argument' do
19
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums.should == {1=>5, 3=>7, 4=>10}
20
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums.values.last.should be_kind_of(Float)
21
+ end
22
+ end
23
+ it 'should start counting from argument when it\'s given. Type of values depends on type of initial value' do
24
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums(100).should == {1=>105, 3=>107, 4=>110}
25
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums(100).values.last.should be_kind_of(Integer)
26
+ end
27
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bioinform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-07 00:00:00.000000000 Z
12
+ date: 2012-07-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -59,12 +59,6 @@ files:
59
59
  - lib/bioinform.rb
60
60
  - lib/bioinform/data_models.rb
61
61
  - lib/bioinform/data_models/collection.rb
62
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac.rb
63
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac_word.rb
64
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_count_matrix.rb
65
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_matrix.rb
66
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb
67
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_weight_matrix.rb
68
62
  - lib/bioinform/data_models/parser.rb
69
63
  - lib/bioinform/data_models/parsers.rb
70
64
  - lib/bioinform/data_models/parsers/array_parser.rb
@@ -76,6 +70,8 @@ files:
76
70
  - lib/bioinform/data_models/ppm.rb
77
71
  - lib/bioinform/data_models/pwm.rb
78
72
  - lib/bioinform/support.rb
73
+ - lib/bioinform/support/array_product.rb
74
+ - lib/bioinform/support/array_zip.rb
79
75
  - lib/bioinform/support/callable_symbol.rb
80
76
  - lib/bioinform/support/collect_hash.rb
81
77
  - lib/bioinform/support/deep_dup.rb
@@ -95,6 +91,8 @@ files:
95
91
  - spec/data_models/pm_spec.rb
96
92
  - spec/data_models/pwm_spec.rb
97
93
  - spec/spec_helper.rb
94
+ - spec/support/array_product_spec.rb
95
+ - spec/support/array_zip_spec.rb
98
96
  - spec/support/callable_symbol_spec.rb
99
97
  - spec/support/collect_hash_spec.rb
100
98
  - spec/support/delete_many_spec.rb
@@ -139,6 +137,8 @@ test_files:
139
137
  - spec/data_models/pm_spec.rb
140
138
  - spec/data_models/pwm_spec.rb
141
139
  - spec/spec_helper.rb
140
+ - spec/support/array_product_spec.rb
141
+ - spec/support/array_zip_spec.rb
142
142
  - spec/support/callable_symbol_spec.rb
143
143
  - spec/support/collect_hash_spec.rb
144
144
  - spec/support/delete_many_spec.rb
@@ -1,22 +0,0 @@
1
- class Iupac
2
- attr_reader :words
3
- def initialize(input)
4
- case input
5
- when Array
6
- @words = input.map{|word| IupacWord.new word}
7
- when String
8
- @words = input.gsub("\r\n","\n").split("\n").map{|word| IupacWord.new(word)}
9
- when IupacWord
10
- @words = [input]
11
- else raise ArgumentError, 'Can\'t create IUPAC Word List: unknown input type'
12
- end
13
- raise ArgumentError, 'IUPAC words should be of the same length' unless @words.same_by?(&:length)
14
- end
15
-
16
- def to_pcm
17
- @words.map(&:to_pcm).inject(:+)
18
- end
19
- def to_pwm
20
- to_pcm.to_pwm
21
- end
22
- end
@@ -1,27 +0,0 @@
1
- class IupacWord
2
- IupacLetters = %w{A C G T R Y K M S W B D H V N}
3
- Code = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
4
- "AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
5
- "CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
6
- Decode = Code.invert
7
- LetterCode = Hash[Decode.map{|k,v|
8
- [k, %w{A C G T}.map{|chr| (v.include?(chr) ? 1.0 : 0.0) / v.size} ]
9
- }]
10
-
11
- attr_reader :word
12
- attr_accessor :weight
13
- def initialize(word)
14
- raise "Non-IUPAC letter in a word #{word}" unless word.each_char.all?{|letter| IupacLetters.include? letter}
15
- @word = word
16
- @weight = 1
17
- end
18
-
19
- def length
20
- word.length
21
- end
22
-
23
- def to_pcm
24
- matrix = @word.each_char.map{|letter| LetterCode[letter]}
25
- PositionalCountMatrix.new(matrix)
26
- end
27
- end
@@ -1,26 +0,0 @@
1
- class PositionalCountMatrix < PositionalMatrix
2
- attr_accessor :pseudocount, :background
3
- def initialize(*args)
4
- super
5
- raise ArgumentError, 'PCM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
6
- raise ArgumentError, 'PCM summary count is zero or negative' unless count>=0
7
- # summary counts can slightly differ from each other due to floating point precision
8
- unless @matrix.all?{|position| (position.inject(:+) - count).abs < 0.01*count }
9
- raise ArgumentError, 'PCM has different summary count at each position'
10
- end
11
- @background = [1.0, 1.0, 1.0, 1.0]
12
- @pseudocount = 1.0
13
- end
14
- def count
15
- @count ||= @matrix.first.inject(&:+)
16
- end
17
- def to_pwm
18
- normalize_coef = background.inject(&:+)
19
- bckgr = @background.map{|el| el*1.0/normalize_coef}
20
- PositionalWeightMatrix.new @matrix.map{|pos| pos.map.with_index{|el,ind| Math.log(el+bckgr[ind]*@pseudocount /(bckgr[ind]*(count + @pseudocount))) }}
21
- end
22
- def +(another_pcm)
23
- raise ArgumentError, 'another PCM should be of the same length' unless another_pcm.length == length
24
- PositionalCountMatrix.new matrix.map.with_index {|pos, i| pos.map.with_index{|el,j| el+another_pcm.matrix[i][j] }}
25
- end
26
- end
@@ -1,78 +0,0 @@
1
- require 'active_support/core_ext/hash/indifferent_access'
2
- require 'bioinform/support'
3
-
4
- class PositionalMatrix
5
- module DefaultParser
6
- number_pat = '[+-]?\d+(\.\d+)?'
7
- row_pat = "(#{number_pat} )*#{number_pat}"
8
- name_pat = '>? ?(?<name>[\w.-]+)\n'
9
- matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
10
- Pattern = /\A(#{name_pat})?#{matrix_pat}\z/
11
- def self.parse(input)
12
- input.multiline_squish.match(Pattern)
13
- end
14
- end
15
-
16
- module FantomParser
17
- number_pat = '[+-]?\d+(\.\d+)?'
18
- row_pat = "(#{number_pat} )*#{number_pat}"
19
- matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
20
- Pattern = /\ANA (?<name>.+)\nA C G T\n#{matrix_pat}\z/
21
- def self.trim_first_position(input)
22
- inp = input.split("\n")
23
- ([inp[0]] + inp[1..-1].map{|x| x.split(' ')[1..4].join(' ') }).join("\n")
24
- end
25
- def self.parse(input)
26
- trim_first_position(input.multiline_squish).match(Pattern)
27
- end
28
- end
29
-
30
-
31
-
32
- attr_reader :name, :matrix
33
- def initialize(input,parser = DefaultParser)
34
- case input
35
- when String
36
- match = parser.parse(input)
37
- raise ArgumentError, 'Can\'t create positional matrix basing on such input' unless match
38
- @name = match[:name]
39
- @matrix = match[:matrix].split("\n").map{|row| row.split.map(&:to_f)}
40
- when Hash
41
- inp = input.with_indifferent_access
42
- @matrix = [inp[:A],inp[:C], inp[:G], inp[:T]]
43
- when Array
44
- @matrix = input.map do |pos|
45
- case pos
46
- when Array then pos
47
- when Hash then [pos[:A], pos[:C], pos[:G], pos[:T]]
48
- else raise ArgumentError, 'Unknown type of argument inner dimension'
49
- end
50
- end
51
- else
52
- raise ArgumentError, 'Unknown format of input: only Strings, Arrays and hashes\'re available'
53
- end
54
- raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.same_by?(&:length)
55
- raise ArgumentError unless @matrix.size == 4 || @matrix.first.size == 4
56
- @matrix = @matrix.transpose if @matrix.first.size != 4
57
- end
58
-
59
- def size
60
- @matrix.size
61
- end
62
- alias_method :length, :size
63
-
64
- def to_s(with_name = true)
65
- mat_str = @matrix.map(&:join.("\t")).join("\n")
66
- (with_name && @name) ? "#{@name}\n#{mat_str}" : mat_str
67
- end
68
-
69
- def pretty_string(with_name = true)
70
- header = " A C G T \n"
71
- mat_str = @matrix.map{|position| position.map{|el| el.round(3).to_s.rjust(6)}.join(' ') }.join("\n")
72
- (with_name && @name) ? @name + "\n" + header + mat_str : header + mat_str
73
- end
74
-
75
- def to_hash
76
- {A: @matrix.map{|pos| pos[0]}, C: @matrix.map{|pos| pos[1]}, G: @matrix.map{|pos| pos[2]}, T: @matrix.map{|pos| pos[3]}}.with_indifferent_access
77
- end
78
- end
@@ -1,17 +0,0 @@
1
- class PositionalProbabilityMatrix < PositionalMatrix
2
- attr_accessor :count
3
- def initialize(input_string)
4
- super(input_string)
5
- raise ArgumentError, 'PPM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
6
- # summary counts can slightly differ from each other due to floating point precision
7
- unless @matrix.all?{|position| (position.inject(:+) - 1.0).abs < 0.01 }
8
- raise ArgumentError, 'PPM has summary probability at some position that differs from 1.0'
9
- end
10
- end
11
- def to_pcm
12
- PositionalCountMatrix.new @matrix.map{|pos| pos.map{|el| el*@count}}
13
- end
14
- def to_pwm
15
- to_pcm.to_pwm
16
- end
17
- end
@@ -1,3 +0,0 @@
1
- class PositionalWeightMatrix < PositionalMatrix
2
-
3
- end