bioinform 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bioinform/data_models/pm.rb +8 -18
- data/lib/bioinform/support.rb +4 -1
- data/lib/bioinform/support/array_product.rb +6 -0
- data/lib/bioinform/support/array_zip.rb +6 -0
- data/lib/bioinform/support/partial_sums.rb +8 -0
- data/lib/bioinform/version.rb +1 -1
- data/spec/data_models/pm_spec.rb +10 -4
- data/spec/support/array_product_spec.rb +15 -0
- data/spec/support/array_zip_spec.rb +15 -0
- data/spec/support/partial_sums_spec.rb +23 -5
- metadata +8 -8
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac.rb +0 -22
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac_word.rb +0 -27
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_count_matrix.rb +0 -26
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_matrix.rb +0 -78
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb +0 -17
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_weight_matrix.rb +0 -3
@@ -104,37 +104,31 @@ module Bioinform
|
|
104
104
|
end
|
105
105
|
|
106
106
|
def background!(new_background)
|
107
|
-
clear_cache
|
108
107
|
@background = new_background
|
109
108
|
self
|
110
109
|
end
|
111
110
|
|
112
111
|
def self.zero_column
|
113
|
-
[0
|
112
|
+
[0, 0, 0, 0]
|
114
113
|
end
|
115
114
|
|
116
|
-
def reverse_complement!
|
117
|
-
clear_cache
|
115
|
+
def reverse_complement!
|
118
116
|
@matrix.reverse!.map!(&:reverse!)
|
119
117
|
self
|
120
118
|
end
|
121
119
|
def left_augment!(n)
|
122
|
-
clear_cache
|
123
120
|
n.times{ @matrix.unshift(self.class.zero_column) }
|
124
121
|
self
|
125
122
|
end
|
126
123
|
def right_augment!(n)
|
127
|
-
clear_cache
|
128
124
|
n.times{ @matrix.push(self.class.zero_column) }
|
129
125
|
self
|
130
126
|
end
|
131
127
|
def shift_to_zero! # make worst score == 0 by shifting scores of each column
|
132
|
-
clear_cache
|
133
128
|
@matrix.map!{|position| min = position.min; position.map{|element| element - min}}
|
134
129
|
self
|
135
130
|
end
|
136
131
|
def discrete!(rate)
|
137
|
-
clear_cache
|
138
132
|
@matrix.map!{|position| position.map{|element| (element * rate).ceil}}
|
139
133
|
self
|
140
134
|
end
|
@@ -161,23 +155,19 @@ module Bioinform
|
|
161
155
|
#end
|
162
156
|
|
163
157
|
def best_score
|
164
|
-
@
|
158
|
+
@matrix.inject(0.0){|sum, col| sum + col.max}
|
165
159
|
end
|
166
160
|
def worst_score
|
167
|
-
@
|
161
|
+
@matrix.inject(0.0){|sum, col| sum + col.min}
|
168
162
|
end
|
169
163
|
|
170
164
|
# best score of suffix s[i..l]
|
171
|
-
def best_suffix
|
172
|
-
@
|
165
|
+
def best_suffix(i)
|
166
|
+
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
173
167
|
end
|
174
168
|
|
175
|
-
def worst_suffix
|
176
|
-
@
|
177
|
-
end
|
178
|
-
|
179
|
-
def clear_cache
|
180
|
-
@best_score, @worst_score, @best_suffix, @worst_suffix = nil,nil,nil,nil
|
169
|
+
def worst_suffix(i)
|
170
|
+
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
181
171
|
end
|
182
172
|
|
183
173
|
def reverse_complement
|
data/lib/bioinform/support.rb
CHANGED
@@ -11,4 +11,7 @@ require 'bioinform/support/yaml_dump_file'
|
|
11
11
|
require 'bioinform/support/inverf'
|
12
12
|
require 'bioinform/support/deep_dup'
|
13
13
|
|
14
|
-
require 'bioinform/support/partial_sums'
|
14
|
+
require 'bioinform/support/partial_sums'
|
15
|
+
|
16
|
+
require 'bioinform/support/array_zip'
|
17
|
+
require 'bioinform/support/array_product'
|
data/lib/bioinform/version.rb
CHANGED
data/spec/data_models/pm_spec.rb
CHANGED
@@ -318,17 +318,23 @@ module Bioinform
|
|
318
318
|
end
|
319
319
|
|
320
320
|
describe '#best_suffix' do
|
321
|
-
it 'should
|
321
|
+
it 'should return maximal score of suffices from i-th position inclusively i.e. [i..end]' do
|
322
322
|
@pm = PM.new
|
323
323
|
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
324
|
-
@pm.best_suffix.should ==
|
324
|
+
@pm.best_suffix(0).should == (4.9 + 7.13 - 1.0)
|
325
|
+
@pm.best_suffix(1).should == (7.13 - 1.0)
|
326
|
+
@pm.best_suffix(2).should == (-1.0)
|
327
|
+
@pm.best_suffix(3).should == (0.0)
|
325
328
|
end
|
326
329
|
end
|
327
330
|
describe '#worst_suffix' do
|
328
|
-
it 'should
|
331
|
+
it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do
|
329
332
|
@pm = PM.new
|
330
333
|
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
331
|
-
@pm.worst_suffix.should ==
|
334
|
+
@pm.worst_suffix(0).should == (1.3 + 3.25 - 1.5)
|
335
|
+
@pm.worst_suffix(1).should == (3.25 - 1.5)
|
336
|
+
@pm.worst_suffix(2).should == (- 1.5)
|
337
|
+
@pm.worst_suffix(3).should == (0.0)
|
332
338
|
end
|
333
339
|
end
|
334
340
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/support/array_product'
|
3
|
+
|
4
|
+
describe Array do
|
5
|
+
context '::product' do
|
6
|
+
it 'should take any number of arrays and product them as if #product was made to first and others' do
|
7
|
+
Array.product([1,2,3]).should == [1,2,3].product()
|
8
|
+
Array.product([1,2,3],[4,5,6]).should == [1,2,3].product([4,5,6])
|
9
|
+
Array.product([1,2,3],[4,5,6],[7,8,9]).should == [1,2,3].product([4,5,6],[7,8,9])
|
10
|
+
end
|
11
|
+
it 'should return empty array if no arrays\'re given' do
|
12
|
+
Array.product().should == []
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/support/array_zip'
|
3
|
+
|
4
|
+
describe Array do
|
5
|
+
context '::zip' do
|
6
|
+
it 'should take any number of arrays and zip them as if #zip was made to first and others' do
|
7
|
+
Array.zip([1,2,3]).should == [1,2,3].zip()
|
8
|
+
Array.zip([1,2,3],[4,5,6]).should == [1,2,3].zip([4,5,6])
|
9
|
+
Array.zip([1,2,3],[4,5,6],[7,8,9]).should == [1,2,3].zip([4,5,6],[7,8,9])
|
10
|
+
end
|
11
|
+
it 'should return empty array if no arrays\'re given' do
|
12
|
+
Array.zip().should == []
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -1,9 +1,27 @@
|
|
1
1
|
describe 'Array#partial_sums' do
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
context 'when no initial value given' do
|
3
|
+
it 'should return an array of the same size with partial sums of elements 0..ind inclusive with float elements' do
|
4
|
+
[2,3,4,5].partial_sums.should == [2, 5, 9, 14]
|
5
|
+
[2,3,4,5].partial_sums.last.should be_kind_of(Float)
|
6
|
+
end
|
5
7
|
end
|
6
|
-
it 'should start counting from argument when it\'s given' do
|
8
|
+
it 'should start counting from argument when it\'s given. Type of values depends on type of initial value' do
|
7
9
|
[2,3,4,5].partial_sums(100).should == [102,105,109,114]
|
10
|
+
[2,3,4,5].partial_sums(100).last.should be_kind_of(Integer)
|
8
11
|
end
|
9
|
-
end
|
12
|
+
end
|
13
|
+
|
14
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums == {1=>5, 3=>7, 4=>10}
|
15
|
+
|
16
|
+
describe 'Hash#partial_sums' do
|
17
|
+
context 'when no initial value given' do
|
18
|
+
it 'should return a hash with float values of the same size with partial sums of elements that has keys <= than argument' do
|
19
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums.should == {1=>5, 3=>7, 4=>10}
|
20
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums.values.last.should be_kind_of(Float)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
it 'should start counting from argument when it\'s given. Type of values depends on type of initial value' do
|
24
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums(100).should == {1=>105, 3=>107, 4=>110}
|
25
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums(100).values.last.should be_kind_of(Integer)
|
26
|
+
end
|
27
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -59,12 +59,6 @@ files:
|
|
59
59
|
- lib/bioinform.rb
|
60
60
|
- lib/bioinform/data_models.rb
|
61
61
|
- lib/bioinform/data_models/collection.rb
|
62
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac.rb
|
63
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac_word.rb
|
64
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_count_matrix.rb
|
65
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_matrix.rb
|
66
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb
|
67
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_weight_matrix.rb
|
68
62
|
- lib/bioinform/data_models/parser.rb
|
69
63
|
- lib/bioinform/data_models/parsers.rb
|
70
64
|
- lib/bioinform/data_models/parsers/array_parser.rb
|
@@ -76,6 +70,8 @@ files:
|
|
76
70
|
- lib/bioinform/data_models/ppm.rb
|
77
71
|
- lib/bioinform/data_models/pwm.rb
|
78
72
|
- lib/bioinform/support.rb
|
73
|
+
- lib/bioinform/support/array_product.rb
|
74
|
+
- lib/bioinform/support/array_zip.rb
|
79
75
|
- lib/bioinform/support/callable_symbol.rb
|
80
76
|
- lib/bioinform/support/collect_hash.rb
|
81
77
|
- lib/bioinform/support/deep_dup.rb
|
@@ -95,6 +91,8 @@ files:
|
|
95
91
|
- spec/data_models/pm_spec.rb
|
96
92
|
- spec/data_models/pwm_spec.rb
|
97
93
|
- spec/spec_helper.rb
|
94
|
+
- spec/support/array_product_spec.rb
|
95
|
+
- spec/support/array_zip_spec.rb
|
98
96
|
- spec/support/callable_symbol_spec.rb
|
99
97
|
- spec/support/collect_hash_spec.rb
|
100
98
|
- spec/support/delete_many_spec.rb
|
@@ -139,6 +137,8 @@ test_files:
|
|
139
137
|
- spec/data_models/pm_spec.rb
|
140
138
|
- spec/data_models/pwm_spec.rb
|
141
139
|
- spec/spec_helper.rb
|
140
|
+
- spec/support/array_product_spec.rb
|
141
|
+
- spec/support/array_zip_spec.rb
|
142
142
|
- spec/support/callable_symbol_spec.rb
|
143
143
|
- spec/support/collect_hash_spec.rb
|
144
144
|
- spec/support/delete_many_spec.rb
|
@@ -1,22 +0,0 @@
|
|
1
|
-
class Iupac
|
2
|
-
attr_reader :words
|
3
|
-
def initialize(input)
|
4
|
-
case input
|
5
|
-
when Array
|
6
|
-
@words = input.map{|word| IupacWord.new word}
|
7
|
-
when String
|
8
|
-
@words = input.gsub("\r\n","\n").split("\n").map{|word| IupacWord.new(word)}
|
9
|
-
when IupacWord
|
10
|
-
@words = [input]
|
11
|
-
else raise ArgumentError, 'Can\'t create IUPAC Word List: unknown input type'
|
12
|
-
end
|
13
|
-
raise ArgumentError, 'IUPAC words should be of the same length' unless @words.same_by?(&:length)
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_pcm
|
17
|
-
@words.map(&:to_pcm).inject(:+)
|
18
|
-
end
|
19
|
-
def to_pwm
|
20
|
-
to_pcm.to_pwm
|
21
|
-
end
|
22
|
-
end
|
@@ -1,27 +0,0 @@
|
|
1
|
-
class IupacWord
|
2
|
-
IupacLetters = %w{A C G T R Y K M S W B D H V N}
|
3
|
-
Code = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
|
4
|
-
"AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
|
5
|
-
"CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
|
6
|
-
Decode = Code.invert
|
7
|
-
LetterCode = Hash[Decode.map{|k,v|
|
8
|
-
[k, %w{A C G T}.map{|chr| (v.include?(chr) ? 1.0 : 0.0) / v.size} ]
|
9
|
-
}]
|
10
|
-
|
11
|
-
attr_reader :word
|
12
|
-
attr_accessor :weight
|
13
|
-
def initialize(word)
|
14
|
-
raise "Non-IUPAC letter in a word #{word}" unless word.each_char.all?{|letter| IupacLetters.include? letter}
|
15
|
-
@word = word
|
16
|
-
@weight = 1
|
17
|
-
end
|
18
|
-
|
19
|
-
def length
|
20
|
-
word.length
|
21
|
-
end
|
22
|
-
|
23
|
-
def to_pcm
|
24
|
-
matrix = @word.each_char.map{|letter| LetterCode[letter]}
|
25
|
-
PositionalCountMatrix.new(matrix)
|
26
|
-
end
|
27
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
class PositionalCountMatrix < PositionalMatrix
|
2
|
-
attr_accessor :pseudocount, :background
|
3
|
-
def initialize(*args)
|
4
|
-
super
|
5
|
-
raise ArgumentError, 'PCM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
|
6
|
-
raise ArgumentError, 'PCM summary count is zero or negative' unless count>=0
|
7
|
-
# summary counts can slightly differ from each other due to floating point precision
|
8
|
-
unless @matrix.all?{|position| (position.inject(:+) - count).abs < 0.01*count }
|
9
|
-
raise ArgumentError, 'PCM has different summary count at each position'
|
10
|
-
end
|
11
|
-
@background = [1.0, 1.0, 1.0, 1.0]
|
12
|
-
@pseudocount = 1.0
|
13
|
-
end
|
14
|
-
def count
|
15
|
-
@count ||= @matrix.first.inject(&:+)
|
16
|
-
end
|
17
|
-
def to_pwm
|
18
|
-
normalize_coef = background.inject(&:+)
|
19
|
-
bckgr = @background.map{|el| el*1.0/normalize_coef}
|
20
|
-
PositionalWeightMatrix.new @matrix.map{|pos| pos.map.with_index{|el,ind| Math.log(el+bckgr[ind]*@pseudocount /(bckgr[ind]*(count + @pseudocount))) }}
|
21
|
-
end
|
22
|
-
def +(another_pcm)
|
23
|
-
raise ArgumentError, 'another PCM should be of the same length' unless another_pcm.length == length
|
24
|
-
PositionalCountMatrix.new matrix.map.with_index {|pos, i| pos.map.with_index{|el,j| el+another_pcm.matrix[i][j] }}
|
25
|
-
end
|
26
|
-
end
|
@@ -1,78 +0,0 @@
|
|
1
|
-
require 'active_support/core_ext/hash/indifferent_access'
|
2
|
-
require 'bioinform/support'
|
3
|
-
|
4
|
-
class PositionalMatrix
|
5
|
-
module DefaultParser
|
6
|
-
number_pat = '[+-]?\d+(\.\d+)?'
|
7
|
-
row_pat = "(#{number_pat} )*#{number_pat}"
|
8
|
-
name_pat = '>? ?(?<name>[\w.-]+)\n'
|
9
|
-
matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
|
10
|
-
Pattern = /\A(#{name_pat})?#{matrix_pat}\z/
|
11
|
-
def self.parse(input)
|
12
|
-
input.multiline_squish.match(Pattern)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
module FantomParser
|
17
|
-
number_pat = '[+-]?\d+(\.\d+)?'
|
18
|
-
row_pat = "(#{number_pat} )*#{number_pat}"
|
19
|
-
matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
|
20
|
-
Pattern = /\ANA (?<name>.+)\nA C G T\n#{matrix_pat}\z/
|
21
|
-
def self.trim_first_position(input)
|
22
|
-
inp = input.split("\n")
|
23
|
-
([inp[0]] + inp[1..-1].map{|x| x.split(' ')[1..4].join(' ') }).join("\n")
|
24
|
-
end
|
25
|
-
def self.parse(input)
|
26
|
-
trim_first_position(input.multiline_squish).match(Pattern)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
attr_reader :name, :matrix
|
33
|
-
def initialize(input,parser = DefaultParser)
|
34
|
-
case input
|
35
|
-
when String
|
36
|
-
match = parser.parse(input)
|
37
|
-
raise ArgumentError, 'Can\'t create positional matrix basing on such input' unless match
|
38
|
-
@name = match[:name]
|
39
|
-
@matrix = match[:matrix].split("\n").map{|row| row.split.map(&:to_f)}
|
40
|
-
when Hash
|
41
|
-
inp = input.with_indifferent_access
|
42
|
-
@matrix = [inp[:A],inp[:C], inp[:G], inp[:T]]
|
43
|
-
when Array
|
44
|
-
@matrix = input.map do |pos|
|
45
|
-
case pos
|
46
|
-
when Array then pos
|
47
|
-
when Hash then [pos[:A], pos[:C], pos[:G], pos[:T]]
|
48
|
-
else raise ArgumentError, 'Unknown type of argument inner dimension'
|
49
|
-
end
|
50
|
-
end
|
51
|
-
else
|
52
|
-
raise ArgumentError, 'Unknown format of input: only Strings, Arrays and hashes\'re available'
|
53
|
-
end
|
54
|
-
raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.same_by?(&:length)
|
55
|
-
raise ArgumentError unless @matrix.size == 4 || @matrix.first.size == 4
|
56
|
-
@matrix = @matrix.transpose if @matrix.first.size != 4
|
57
|
-
end
|
58
|
-
|
59
|
-
def size
|
60
|
-
@matrix.size
|
61
|
-
end
|
62
|
-
alias_method :length, :size
|
63
|
-
|
64
|
-
def to_s(with_name = true)
|
65
|
-
mat_str = @matrix.map(&:join.("\t")).join("\n")
|
66
|
-
(with_name && @name) ? "#{@name}\n#{mat_str}" : mat_str
|
67
|
-
end
|
68
|
-
|
69
|
-
def pretty_string(with_name = true)
|
70
|
-
header = " A C G T \n"
|
71
|
-
mat_str = @matrix.map{|position| position.map{|el| el.round(3).to_s.rjust(6)}.join(' ') }.join("\n")
|
72
|
-
(with_name && @name) ? @name + "\n" + header + mat_str : header + mat_str
|
73
|
-
end
|
74
|
-
|
75
|
-
def to_hash
|
76
|
-
{A: @matrix.map{|pos| pos[0]}, C: @matrix.map{|pos| pos[1]}, G: @matrix.map{|pos| pos[2]}, T: @matrix.map{|pos| pos[3]}}.with_indifferent_access
|
77
|
-
end
|
78
|
-
end
|
data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
class PositionalProbabilityMatrix < PositionalMatrix
|
2
|
-
attr_accessor :count
|
3
|
-
def initialize(input_string)
|
4
|
-
super(input_string)
|
5
|
-
raise ArgumentError, 'PPM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
|
6
|
-
# summary counts can slightly differ from each other due to floating point precision
|
7
|
-
unless @matrix.all?{|position| (position.inject(:+) - 1.0).abs < 0.01 }
|
8
|
-
raise ArgumentError, 'PPM has summary probability at some position that differs from 1.0'
|
9
|
-
end
|
10
|
-
end
|
11
|
-
def to_pcm
|
12
|
-
PositionalCountMatrix.new @matrix.map{|pos| pos.map{|el| el*@count}}
|
13
|
-
end
|
14
|
-
def to_pwm
|
15
|
-
to_pcm.to_pwm
|
16
|
-
end
|
17
|
-
end
|