bioinform 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -104,37 +104,31 @@ module Bioinform
104
104
  end
105
105
 
106
106
  def background!(new_background)
107
- clear_cache
108
107
  @background = new_background
109
108
  self
110
109
  end
111
110
 
112
111
  def self.zero_column
113
- [0.0, 0.0, 0.0, 0.0]
112
+ [0, 0, 0, 0]
114
113
  end
115
114
 
116
- def reverse_complement!
117
- clear_cache
115
+ def reverse_complement!
118
116
  @matrix.reverse!.map!(&:reverse!)
119
117
  self
120
118
  end
121
119
  def left_augment!(n)
122
- clear_cache
123
120
  n.times{ @matrix.unshift(self.class.zero_column) }
124
121
  self
125
122
  end
126
123
  def right_augment!(n)
127
- clear_cache
128
124
  n.times{ @matrix.push(self.class.zero_column) }
129
125
  self
130
126
  end
131
127
  def shift_to_zero! # make worst score == 0 by shifting scores of each column
132
- clear_cache
133
128
  @matrix.map!{|position| min = position.min; position.map{|element| element - min}}
134
129
  self
135
130
  end
136
131
  def discrete!(rate)
137
- clear_cache
138
132
  @matrix.map!{|position| position.map{|element| (element * rate).ceil}}
139
133
  self
140
134
  end
@@ -161,23 +155,19 @@ module Bioinform
161
155
  #end
162
156
 
163
157
  def best_score
164
- @best_score ||= @matrix.inject(0.0){|sum, col| sum + col.max}
158
+ @matrix.inject(0.0){|sum, col| sum + col.max}
165
159
  end
166
160
  def worst_score
167
- @worst_score ||= @matrix.inject(0.0){|sum, col| sum + col.min}
161
+ @matrix.inject(0.0){|sum, col| sum + col.min}
168
162
  end
169
163
 
170
164
  # best score of suffix s[i..l]
171
- def best_suffix
172
- @best_suffix ||= Array.new(length + 1) {|i| @matrix[i...length].map(&:max).inject(0.0, &:+) }
165
+ def best_suffix(i)
166
+ @matrix[i...length].map(&:max).inject(0.0, &:+)
173
167
  end
174
168
 
175
- def worst_suffix
176
- @worst_suffix ||= Array.new(length + 1) {|i| @matrix[i...length].map(&:min).inject(0.0, &:+) }
177
- end
178
-
179
- def clear_cache
180
- @best_score, @worst_score, @best_suffix, @worst_suffix = nil,nil,nil,nil
169
+ def worst_suffix(i)
170
+ @matrix[i...length].map(&:min).inject(0.0, &:+)
181
171
  end
182
172
 
183
173
  def reverse_complement
@@ -11,4 +11,7 @@ require 'bioinform/support/yaml_dump_file'
11
11
  require 'bioinform/support/inverf'
12
12
  require 'bioinform/support/deep_dup'
13
13
 
14
- require 'bioinform/support/partial_sums'
14
+ require 'bioinform/support/partial_sums'
15
+
16
+ require 'bioinform/support/array_zip'
17
+ require 'bioinform/support/array_product'
@@ -0,0 +1,6 @@
1
+ class Array
2
+ def self.product(*arrays)
3
+ return [] if arrays.empty?
4
+ arrays.first.product(*arrays[1..-1])
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ class Array
2
+ def self.zip(*arrays)
3
+ return [] if arrays.empty?
4
+ arrays.first.zip(*arrays[1..-1])
5
+ end
6
+ end
@@ -4,3 +4,11 @@ class Array
4
4
  map{|el| sums += el}
5
5
  end
6
6
  end
7
+
8
+ class Hash
9
+ # {1 => 5, 4 => 3, 3 => 2}.partial_sums == {1=>5, 3=>7, 4=>10}
10
+ def partial_sums(initial = 0.0)
11
+ sums = initial
12
+ sort.collect_hash{|k,v| [k, sums += v]}
13
+ end
14
+ end
@@ -1,3 +1,3 @@
1
1
  module Bioinform
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
@@ -318,17 +318,23 @@ module Bioinform
318
318
  end
319
319
 
320
320
  describe '#best_suffix' do
321
- it 'should be an array of best suffices from start of string and to empty suffix inclusive' do
321
+ it 'should return maximal score of suffices from i-th position inclusively i.e. [i..end]' do
322
322
  @pm = PM.new
323
323
  @pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
324
- @pm.best_suffix.should == [(4.9 + 7.13 - 1.0), (7.13 - 1.0), (-1.0), (0.0) ]
324
+ @pm.best_suffix(0).should == (4.9 + 7.13 - 1.0)
325
+ @pm.best_suffix(1).should == (7.13 - 1.0)
326
+ @pm.best_suffix(2).should == (-1.0)
327
+ @pm.best_suffix(3).should == (0.0)
325
328
  end
326
329
  end
327
330
  describe '#worst_suffix' do
328
- it 'should be an array of worst suffices from start of string and to empty suffix inclusive' do
331
+ it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do
329
332
  @pm = PM.new
330
333
  @pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
331
- @pm.worst_suffix.should == [(1.3 + 3.25 - 1.5), (3.25 - 1.5), (- 1.5), (0.0) ]
334
+ @pm.worst_suffix(0).should == (1.3 + 3.25 - 1.5)
335
+ @pm.worst_suffix(1).should == (3.25 - 1.5)
336
+ @pm.worst_suffix(2).should == (- 1.5)
337
+ @pm.worst_suffix(3).should == (0.0)
332
338
  end
333
339
  end
334
340
 
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/support/array_product'
3
+
4
+ describe Array do
5
+ context '::product' do
6
+ it 'should take any number of arrays and product them as if #product was made to first and others' do
7
+ Array.product([1,2,3]).should == [1,2,3].product()
8
+ Array.product([1,2,3],[4,5,6]).should == [1,2,3].product([4,5,6])
9
+ Array.product([1,2,3],[4,5,6],[7,8,9]).should == [1,2,3].product([4,5,6],[7,8,9])
10
+ end
11
+ it 'should return empty array if no arrays\'re given' do
12
+ Array.product().should == []
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/support/array_zip'
3
+
4
+ describe Array do
5
+ context '::zip' do
6
+ it 'should take any number of arrays and zip them as if #zip was made to first and others' do
7
+ Array.zip([1,2,3]).should == [1,2,3].zip()
8
+ Array.zip([1,2,3],[4,5,6]).should == [1,2,3].zip([4,5,6])
9
+ Array.zip([1,2,3],[4,5,6],[7,8,9]).should == [1,2,3].zip([4,5,6],[7,8,9])
10
+ end
11
+ it 'should return empty array if no arrays\'re given' do
12
+ Array.zip().should == []
13
+ end
14
+ end
15
+ end
@@ -1,9 +1,27 @@
1
1
  describe 'Array#partial_sums' do
2
- it 'should return an array of the same size with partial sums of elements 0..ind inclusive with float elements' do
3
- [2,3,4,5].partial_sums.should == [2, 5, 9, 14]
4
- [2,3,4,5].partial_sums.last.should be_kind_of(Float)
2
+ context 'when no initial value given' do
3
+ it 'should return an array of the same size with partial sums of elements 0..ind inclusive with float elements' do
4
+ [2,3,4,5].partial_sums.should == [2, 5, 9, 14]
5
+ [2,3,4,5].partial_sums.last.should be_kind_of(Float)
6
+ end
5
7
  end
6
- it 'should start counting from argument when it\'s given' do
8
+ it 'should start counting from argument when it\'s given. Type of values depends on type of initial value' do
7
9
  [2,3,4,5].partial_sums(100).should == [102,105,109,114]
10
+ [2,3,4,5].partial_sums(100).last.should be_kind_of(Integer)
8
11
  end
9
- end
12
+ end
13
+
14
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums == {1=>5, 3=>7, 4=>10}
15
+
16
+ describe 'Hash#partial_sums' do
17
+ context 'when no initial value given' do
18
+ it 'should return a hash with float values of the same size with partial sums of elements that has keys <= than argument' do
19
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums.should == {1=>5, 3=>7, 4=>10}
20
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums.values.last.should be_kind_of(Float)
21
+ end
22
+ end
23
+ it 'should start counting from argument when it\'s given. Type of values depends on type of initial value' do
24
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums(100).should == {1=>105, 3=>107, 4=>110}
25
+ {1 => 5, 4 => 3, 3 => 2}.partial_sums(100).values.last.should be_kind_of(Integer)
26
+ end
27
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bioinform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-07 00:00:00.000000000 Z
12
+ date: 2012-07-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -59,12 +59,6 @@ files:
59
59
  - lib/bioinform.rb
60
60
  - lib/bioinform/data_models.rb
61
61
  - lib/bioinform/data_models/collection.rb
62
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac.rb
63
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac_word.rb
64
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_count_matrix.rb
65
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_matrix.rb
66
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb
67
- - lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_weight_matrix.rb
68
62
  - lib/bioinform/data_models/parser.rb
69
63
  - lib/bioinform/data_models/parsers.rb
70
64
  - lib/bioinform/data_models/parsers/array_parser.rb
@@ -76,6 +70,8 @@ files:
76
70
  - lib/bioinform/data_models/ppm.rb
77
71
  - lib/bioinform/data_models/pwm.rb
78
72
  - lib/bioinform/support.rb
73
+ - lib/bioinform/support/array_product.rb
74
+ - lib/bioinform/support/array_zip.rb
79
75
  - lib/bioinform/support/callable_symbol.rb
80
76
  - lib/bioinform/support/collect_hash.rb
81
77
  - lib/bioinform/support/deep_dup.rb
@@ -95,6 +91,8 @@ files:
95
91
  - spec/data_models/pm_spec.rb
96
92
  - spec/data_models/pwm_spec.rb
97
93
  - spec/spec_helper.rb
94
+ - spec/support/array_product_spec.rb
95
+ - spec/support/array_zip_spec.rb
98
96
  - spec/support/callable_symbol_spec.rb
99
97
  - spec/support/collect_hash_spec.rb
100
98
  - spec/support/delete_many_spec.rb
@@ -139,6 +137,8 @@ test_files:
139
137
  - spec/data_models/pm_spec.rb
140
138
  - spec/data_models/pwm_spec.rb
141
139
  - spec/spec_helper.rb
140
+ - spec/support/array_product_spec.rb
141
+ - spec/support/array_zip_spec.rb
142
142
  - spec/support/callable_symbol_spec.rb
143
143
  - spec/support/collect_hash_spec.rb
144
144
  - spec/support/delete_many_spec.rb
@@ -1,22 +0,0 @@
1
- class Iupac
2
- attr_reader :words
3
- def initialize(input)
4
- case input
5
- when Array
6
- @words = input.map{|word| IupacWord.new word}
7
- when String
8
- @words = input.gsub("\r\n","\n").split("\n").map{|word| IupacWord.new(word)}
9
- when IupacWord
10
- @words = [input]
11
- else raise ArgumentError, 'Can\'t create IUPAC Word List: unknown input type'
12
- end
13
- raise ArgumentError, 'IUPAC words should be of the same length' unless @words.same_by?(&:length)
14
- end
15
-
16
- def to_pcm
17
- @words.map(&:to_pcm).inject(:+)
18
- end
19
- def to_pwm
20
- to_pcm.to_pwm
21
- end
22
- end
@@ -1,27 +0,0 @@
1
- class IupacWord
2
- IupacLetters = %w{A C G T R Y K M S W B D H V N}
3
- Code = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
4
- "AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
5
- "CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
6
- Decode = Code.invert
7
- LetterCode = Hash[Decode.map{|k,v|
8
- [k, %w{A C G T}.map{|chr| (v.include?(chr) ? 1.0 : 0.0) / v.size} ]
9
- }]
10
-
11
- attr_reader :word
12
- attr_accessor :weight
13
- def initialize(word)
14
- raise "Non-IUPAC letter in a word #{word}" unless word.each_char.all?{|letter| IupacLetters.include? letter}
15
- @word = word
16
- @weight = 1
17
- end
18
-
19
- def length
20
- word.length
21
- end
22
-
23
- def to_pcm
24
- matrix = @word.each_char.map{|letter| LetterCode[letter]}
25
- PositionalCountMatrix.new(matrix)
26
- end
27
- end
@@ -1,26 +0,0 @@
1
- class PositionalCountMatrix < PositionalMatrix
2
- attr_accessor :pseudocount, :background
3
- def initialize(*args)
4
- super
5
- raise ArgumentError, 'PCM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
6
- raise ArgumentError, 'PCM summary count is zero or negative' unless count>=0
7
- # summary counts can slightly differ from each other due to floating point precision
8
- unless @matrix.all?{|position| (position.inject(:+) - count).abs < 0.01*count }
9
- raise ArgumentError, 'PCM has different summary count at each position'
10
- end
11
- @background = [1.0, 1.0, 1.0, 1.0]
12
- @pseudocount = 1.0
13
- end
14
- def count
15
- @count ||= @matrix.first.inject(&:+)
16
- end
17
- def to_pwm
18
- normalize_coef = background.inject(&:+)
19
- bckgr = @background.map{|el| el*1.0/normalize_coef}
20
- PositionalWeightMatrix.new @matrix.map{|pos| pos.map.with_index{|el,ind| Math.log(el+bckgr[ind]*@pseudocount /(bckgr[ind]*(count + @pseudocount))) }}
21
- end
22
- def +(another_pcm)
23
- raise ArgumentError, 'another PCM should be of the same length' unless another_pcm.length == length
24
- PositionalCountMatrix.new matrix.map.with_index {|pos, i| pos.map.with_index{|el,j| el+another_pcm.matrix[i][j] }}
25
- end
26
- end
@@ -1,78 +0,0 @@
1
- require 'active_support/core_ext/hash/indifferent_access'
2
- require 'bioinform/support'
3
-
4
- class PositionalMatrix
5
- module DefaultParser
6
- number_pat = '[+-]?\d+(\.\d+)?'
7
- row_pat = "(#{number_pat} )*#{number_pat}"
8
- name_pat = '>? ?(?<name>[\w.-]+)\n'
9
- matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
10
- Pattern = /\A(#{name_pat})?#{matrix_pat}\z/
11
- def self.parse(input)
12
- input.multiline_squish.match(Pattern)
13
- end
14
- end
15
-
16
- module FantomParser
17
- number_pat = '[+-]?\d+(\.\d+)?'
18
- row_pat = "(#{number_pat} )*#{number_pat}"
19
- matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
20
- Pattern = /\ANA (?<name>.+)\nA C G T\n#{matrix_pat}\z/
21
- def self.trim_first_position(input)
22
- inp = input.split("\n")
23
- ([inp[0]] + inp[1..-1].map{|x| x.split(' ')[1..4].join(' ') }).join("\n")
24
- end
25
- def self.parse(input)
26
- trim_first_position(input.multiline_squish).match(Pattern)
27
- end
28
- end
29
-
30
-
31
-
32
- attr_reader :name, :matrix
33
- def initialize(input,parser = DefaultParser)
34
- case input
35
- when String
36
- match = parser.parse(input)
37
- raise ArgumentError, 'Can\'t create positional matrix basing on such input' unless match
38
- @name = match[:name]
39
- @matrix = match[:matrix].split("\n").map{|row| row.split.map(&:to_f)}
40
- when Hash
41
- inp = input.with_indifferent_access
42
- @matrix = [inp[:A],inp[:C], inp[:G], inp[:T]]
43
- when Array
44
- @matrix = input.map do |pos|
45
- case pos
46
- when Array then pos
47
- when Hash then [pos[:A], pos[:C], pos[:G], pos[:T]]
48
- else raise ArgumentError, 'Unknown type of argument inner dimension'
49
- end
50
- end
51
- else
52
- raise ArgumentError, 'Unknown format of input: only Strings, Arrays and hashes\'re available'
53
- end
54
- raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.same_by?(&:length)
55
- raise ArgumentError unless @matrix.size == 4 || @matrix.first.size == 4
56
- @matrix = @matrix.transpose if @matrix.first.size != 4
57
- end
58
-
59
- def size
60
- @matrix.size
61
- end
62
- alias_method :length, :size
63
-
64
- def to_s(with_name = true)
65
- mat_str = @matrix.map(&:join.("\t")).join("\n")
66
- (with_name && @name) ? "#{@name}\n#{mat_str}" : mat_str
67
- end
68
-
69
- def pretty_string(with_name = true)
70
- header = " A C G T \n"
71
- mat_str = @matrix.map{|position| position.map{|el| el.round(3).to_s.rjust(6)}.join(' ') }.join("\n")
72
- (with_name && @name) ? @name + "\n" + header + mat_str : header + mat_str
73
- end
74
-
75
- def to_hash
76
- {A: @matrix.map{|pos| pos[0]}, C: @matrix.map{|pos| pos[1]}, G: @matrix.map{|pos| pos[2]}, T: @matrix.map{|pos| pos[3]}}.with_indifferent_access
77
- end
78
- end
@@ -1,17 +0,0 @@
1
- class PositionalProbabilityMatrix < PositionalMatrix
2
- attr_accessor :count
3
- def initialize(input_string)
4
- super(input_string)
5
- raise ArgumentError, 'PPM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
6
- # summary counts can slightly differ from each other due to floating point precision
7
- unless @matrix.all?{|position| (position.inject(:+) - 1.0).abs < 0.01 }
8
- raise ArgumentError, 'PPM has summary probability at some position that differs from 1.0'
9
- end
10
- end
11
- def to_pcm
12
- PositionalCountMatrix.new @matrix.map{|pos| pos.map{|el| el*@count}}
13
- end
14
- def to_pwm
15
- to_pcm.to_pwm
16
- end
17
- end
@@ -1,3 +0,0 @@
1
- class PositionalWeightMatrix < PositionalMatrix
2
-
3
- end