bioinform 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bioinform/data_models/pm.rb +8 -18
- data/lib/bioinform/support.rb +4 -1
- data/lib/bioinform/support/array_product.rb +6 -0
- data/lib/bioinform/support/array_zip.rb +6 -0
- data/lib/bioinform/support/partial_sums.rb +8 -0
- data/lib/bioinform/version.rb +1 -1
- data/spec/data_models/pm_spec.rb +10 -4
- data/spec/support/array_product_spec.rb +15 -0
- data/spec/support/array_zip_spec.rb +15 -0
- data/spec/support/partial_sums_spec.rb +23 -5
- metadata +8 -8
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac.rb +0 -22
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac_word.rb +0 -27
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_count_matrix.rb +0 -26
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_matrix.rb +0 -78
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb +0 -17
- data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_weight_matrix.rb +0 -3
@@ -104,37 +104,31 @@ module Bioinform
|
|
104
104
|
end
|
105
105
|
|
106
106
|
def background!(new_background)
|
107
|
-
clear_cache
|
108
107
|
@background = new_background
|
109
108
|
self
|
110
109
|
end
|
111
110
|
|
112
111
|
def self.zero_column
|
113
|
-
[0
|
112
|
+
[0, 0, 0, 0]
|
114
113
|
end
|
115
114
|
|
116
|
-
def reverse_complement!
|
117
|
-
clear_cache
|
115
|
+
def reverse_complement!
|
118
116
|
@matrix.reverse!.map!(&:reverse!)
|
119
117
|
self
|
120
118
|
end
|
121
119
|
def left_augment!(n)
|
122
|
-
clear_cache
|
123
120
|
n.times{ @matrix.unshift(self.class.zero_column) }
|
124
121
|
self
|
125
122
|
end
|
126
123
|
def right_augment!(n)
|
127
|
-
clear_cache
|
128
124
|
n.times{ @matrix.push(self.class.zero_column) }
|
129
125
|
self
|
130
126
|
end
|
131
127
|
def shift_to_zero! # make worst score == 0 by shifting scores of each column
|
132
|
-
clear_cache
|
133
128
|
@matrix.map!{|position| min = position.min; position.map{|element| element - min}}
|
134
129
|
self
|
135
130
|
end
|
136
131
|
def discrete!(rate)
|
137
|
-
clear_cache
|
138
132
|
@matrix.map!{|position| position.map{|element| (element * rate).ceil}}
|
139
133
|
self
|
140
134
|
end
|
@@ -161,23 +155,19 @@ module Bioinform
|
|
161
155
|
#end
|
162
156
|
|
163
157
|
def best_score
|
164
|
-
@
|
158
|
+
@matrix.inject(0.0){|sum, col| sum + col.max}
|
165
159
|
end
|
166
160
|
def worst_score
|
167
|
-
@
|
161
|
+
@matrix.inject(0.0){|sum, col| sum + col.min}
|
168
162
|
end
|
169
163
|
|
170
164
|
# best score of suffix s[i..l]
|
171
|
-
def best_suffix
|
172
|
-
@
|
165
|
+
def best_suffix(i)
|
166
|
+
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
173
167
|
end
|
174
168
|
|
175
|
-
def worst_suffix
|
176
|
-
@
|
177
|
-
end
|
178
|
-
|
179
|
-
def clear_cache
|
180
|
-
@best_score, @worst_score, @best_suffix, @worst_suffix = nil,nil,nil,nil
|
169
|
+
def worst_suffix(i)
|
170
|
+
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
181
171
|
end
|
182
172
|
|
183
173
|
def reverse_complement
|
data/lib/bioinform/support.rb
CHANGED
@@ -11,4 +11,7 @@ require 'bioinform/support/yaml_dump_file'
|
|
11
11
|
require 'bioinform/support/inverf'
|
12
12
|
require 'bioinform/support/deep_dup'
|
13
13
|
|
14
|
-
require 'bioinform/support/partial_sums'
|
14
|
+
require 'bioinform/support/partial_sums'
|
15
|
+
|
16
|
+
require 'bioinform/support/array_zip'
|
17
|
+
require 'bioinform/support/array_product'
|
data/lib/bioinform/version.rb
CHANGED
data/spec/data_models/pm_spec.rb
CHANGED
@@ -318,17 +318,23 @@ module Bioinform
|
|
318
318
|
end
|
319
319
|
|
320
320
|
describe '#best_suffix' do
|
321
|
-
it 'should
|
321
|
+
it 'should return maximal score of suffices from i-th position inclusively i.e. [i..end]' do
|
322
322
|
@pm = PM.new
|
323
323
|
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
324
|
-
@pm.best_suffix.should ==
|
324
|
+
@pm.best_suffix(0).should == (4.9 + 7.13 - 1.0)
|
325
|
+
@pm.best_suffix(1).should == (7.13 - 1.0)
|
326
|
+
@pm.best_suffix(2).should == (-1.0)
|
327
|
+
@pm.best_suffix(3).should == (0.0)
|
325
328
|
end
|
326
329
|
end
|
327
330
|
describe '#worst_suffix' do
|
328
|
-
it 'should
|
331
|
+
it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do
|
329
332
|
@pm = PM.new
|
330
333
|
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
331
|
-
@pm.worst_suffix.should ==
|
334
|
+
@pm.worst_suffix(0).should == (1.3 + 3.25 - 1.5)
|
335
|
+
@pm.worst_suffix(1).should == (3.25 - 1.5)
|
336
|
+
@pm.worst_suffix(2).should == (- 1.5)
|
337
|
+
@pm.worst_suffix(3).should == (0.0)
|
332
338
|
end
|
333
339
|
end
|
334
340
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/support/array_product'
|
3
|
+
|
4
|
+
describe Array do
|
5
|
+
context '::product' do
|
6
|
+
it 'should take any number of arrays and product them as if #product was made to first and others' do
|
7
|
+
Array.product([1,2,3]).should == [1,2,3].product()
|
8
|
+
Array.product([1,2,3],[4,5,6]).should == [1,2,3].product([4,5,6])
|
9
|
+
Array.product([1,2,3],[4,5,6],[7,8,9]).should == [1,2,3].product([4,5,6],[7,8,9])
|
10
|
+
end
|
11
|
+
it 'should return empty array if no arrays\'re given' do
|
12
|
+
Array.product().should == []
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/support/array_zip'
|
3
|
+
|
4
|
+
describe Array do
|
5
|
+
context '::zip' do
|
6
|
+
it 'should take any number of arrays and zip them as if #zip was made to first and others' do
|
7
|
+
Array.zip([1,2,3]).should == [1,2,3].zip()
|
8
|
+
Array.zip([1,2,3],[4,5,6]).should == [1,2,3].zip([4,5,6])
|
9
|
+
Array.zip([1,2,3],[4,5,6],[7,8,9]).should == [1,2,3].zip([4,5,6],[7,8,9])
|
10
|
+
end
|
11
|
+
it 'should return empty array if no arrays\'re given' do
|
12
|
+
Array.zip().should == []
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -1,9 +1,27 @@
|
|
1
1
|
describe 'Array#partial_sums' do
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
context 'when no initial value given' do
|
3
|
+
it 'should return an array of the same size with partial sums of elements 0..ind inclusive with float elements' do
|
4
|
+
[2,3,4,5].partial_sums.should == [2, 5, 9, 14]
|
5
|
+
[2,3,4,5].partial_sums.last.should be_kind_of(Float)
|
6
|
+
end
|
5
7
|
end
|
6
|
-
it 'should start counting from argument when it\'s given' do
|
8
|
+
it 'should start counting from argument when it\'s given. Type of values depends on type of initial value' do
|
7
9
|
[2,3,4,5].partial_sums(100).should == [102,105,109,114]
|
10
|
+
[2,3,4,5].partial_sums(100).last.should be_kind_of(Integer)
|
8
11
|
end
|
9
|
-
end
|
12
|
+
end
|
13
|
+
|
14
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums == {1=>5, 3=>7, 4=>10}
|
15
|
+
|
16
|
+
describe 'Hash#partial_sums' do
|
17
|
+
context 'when no initial value given' do
|
18
|
+
it 'should return a hash with float values of the same size with partial sums of elements that has keys <= than argument' do
|
19
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums.should == {1=>5, 3=>7, 4=>10}
|
20
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums.values.last.should be_kind_of(Float)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
it 'should start counting from argument when it\'s given. Type of values depends on type of initial value' do
|
24
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums(100).should == {1=>105, 3=>107, 4=>110}
|
25
|
+
{1 => 5, 4 => 3, 3 => 2}.partial_sums(100).values.last.should be_kind_of(Integer)
|
26
|
+
end
|
27
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -59,12 +59,6 @@ files:
|
|
59
59
|
- lib/bioinform.rb
|
60
60
|
- lib/bioinform/data_models.rb
|
61
61
|
- lib/bioinform/data_models/collection.rb
|
62
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac.rb
|
63
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/iupac_word.rb
|
64
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_count_matrix.rb
|
65
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_matrix.rb
|
66
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb
|
67
|
-
- lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_weight_matrix.rb
|
68
62
|
- lib/bioinform/data_models/parser.rb
|
69
63
|
- lib/bioinform/data_models/parsers.rb
|
70
64
|
- lib/bioinform/data_models/parsers/array_parser.rb
|
@@ -76,6 +70,8 @@ files:
|
|
76
70
|
- lib/bioinform/data_models/ppm.rb
|
77
71
|
- lib/bioinform/data_models/pwm.rb
|
78
72
|
- lib/bioinform/support.rb
|
73
|
+
- lib/bioinform/support/array_product.rb
|
74
|
+
- lib/bioinform/support/array_zip.rb
|
79
75
|
- lib/bioinform/support/callable_symbol.rb
|
80
76
|
- lib/bioinform/support/collect_hash.rb
|
81
77
|
- lib/bioinform/support/deep_dup.rb
|
@@ -95,6 +91,8 @@ files:
|
|
95
91
|
- spec/data_models/pm_spec.rb
|
96
92
|
- spec/data_models/pwm_spec.rb
|
97
93
|
- spec/spec_helper.rb
|
94
|
+
- spec/support/array_product_spec.rb
|
95
|
+
- spec/support/array_zip_spec.rb
|
98
96
|
- spec/support/callable_symbol_spec.rb
|
99
97
|
- spec/support/collect_hash_spec.rb
|
100
98
|
- spec/support/delete_many_spec.rb
|
@@ -139,6 +137,8 @@ test_files:
|
|
139
137
|
- spec/data_models/pm_spec.rb
|
140
138
|
- spec/data_models/pwm_spec.rb
|
141
139
|
- spec/spec_helper.rb
|
140
|
+
- spec/support/array_product_spec.rb
|
141
|
+
- spec/support/array_zip_spec.rb
|
142
142
|
- spec/support/callable_symbol_spec.rb
|
143
143
|
- spec/support/collect_hash_spec.rb
|
144
144
|
- spec/support/delete_many_spec.rb
|
@@ -1,22 +0,0 @@
|
|
1
|
-
class Iupac
|
2
|
-
attr_reader :words
|
3
|
-
def initialize(input)
|
4
|
-
case input
|
5
|
-
when Array
|
6
|
-
@words = input.map{|word| IupacWord.new word}
|
7
|
-
when String
|
8
|
-
@words = input.gsub("\r\n","\n").split("\n").map{|word| IupacWord.new(word)}
|
9
|
-
when IupacWord
|
10
|
-
@words = [input]
|
11
|
-
else raise ArgumentError, 'Can\'t create IUPAC Word List: unknown input type'
|
12
|
-
end
|
13
|
-
raise ArgumentError, 'IUPAC words should be of the same length' unless @words.same_by?(&:length)
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_pcm
|
17
|
-
@words.map(&:to_pcm).inject(:+)
|
18
|
-
end
|
19
|
-
def to_pwm
|
20
|
-
to_pcm.to_pwm
|
21
|
-
end
|
22
|
-
end
|
@@ -1,27 +0,0 @@
|
|
1
|
-
class IupacWord
|
2
|
-
IupacLetters = %w{A C G T R Y K M S W B D H V N}
|
3
|
-
Code = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
|
4
|
-
"AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
|
5
|
-
"CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
|
6
|
-
Decode = Code.invert
|
7
|
-
LetterCode = Hash[Decode.map{|k,v|
|
8
|
-
[k, %w{A C G T}.map{|chr| (v.include?(chr) ? 1.0 : 0.0) / v.size} ]
|
9
|
-
}]
|
10
|
-
|
11
|
-
attr_reader :word
|
12
|
-
attr_accessor :weight
|
13
|
-
def initialize(word)
|
14
|
-
raise "Non-IUPAC letter in a word #{word}" unless word.each_char.all?{|letter| IupacLetters.include? letter}
|
15
|
-
@word = word
|
16
|
-
@weight = 1
|
17
|
-
end
|
18
|
-
|
19
|
-
def length
|
20
|
-
word.length
|
21
|
-
end
|
22
|
-
|
23
|
-
def to_pcm
|
24
|
-
matrix = @word.each_char.map{|letter| LetterCode[letter]}
|
25
|
-
PositionalCountMatrix.new(matrix)
|
26
|
-
end
|
27
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
class PositionalCountMatrix < PositionalMatrix
|
2
|
-
attr_accessor :pseudocount, :background
|
3
|
-
def initialize(*args)
|
4
|
-
super
|
5
|
-
raise ArgumentError, 'PCM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
|
6
|
-
raise ArgumentError, 'PCM summary count is zero or negative' unless count>=0
|
7
|
-
# summary counts can slightly differ from each other due to floating point precision
|
8
|
-
unless @matrix.all?{|position| (position.inject(:+) - count).abs < 0.01*count }
|
9
|
-
raise ArgumentError, 'PCM has different summary count at each position'
|
10
|
-
end
|
11
|
-
@background = [1.0, 1.0, 1.0, 1.0]
|
12
|
-
@pseudocount = 1.0
|
13
|
-
end
|
14
|
-
def count
|
15
|
-
@count ||= @matrix.first.inject(&:+)
|
16
|
-
end
|
17
|
-
def to_pwm
|
18
|
-
normalize_coef = background.inject(&:+)
|
19
|
-
bckgr = @background.map{|el| el*1.0/normalize_coef}
|
20
|
-
PositionalWeightMatrix.new @matrix.map{|pos| pos.map.with_index{|el,ind| Math.log(el+bckgr[ind]*@pseudocount /(bckgr[ind]*(count + @pseudocount))) }}
|
21
|
-
end
|
22
|
-
def +(another_pcm)
|
23
|
-
raise ArgumentError, 'another PCM should be of the same length' unless another_pcm.length == length
|
24
|
-
PositionalCountMatrix.new matrix.map.with_index {|pos, i| pos.map.with_index{|el,j| el+another_pcm.matrix[i][j] }}
|
25
|
-
end
|
26
|
-
end
|
@@ -1,78 +0,0 @@
|
|
1
|
-
require 'active_support/core_ext/hash/indifferent_access'
|
2
|
-
require 'bioinform/support'
|
3
|
-
|
4
|
-
class PositionalMatrix
|
5
|
-
module DefaultParser
|
6
|
-
number_pat = '[+-]?\d+(\.\d+)?'
|
7
|
-
row_pat = "(#{number_pat} )*#{number_pat}"
|
8
|
-
name_pat = '>? ?(?<name>[\w.-]+)\n'
|
9
|
-
matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
|
10
|
-
Pattern = /\A(#{name_pat})?#{matrix_pat}\z/
|
11
|
-
def self.parse(input)
|
12
|
-
input.multiline_squish.match(Pattern)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
module FantomParser
|
17
|
-
number_pat = '[+-]?\d+(\.\d+)?'
|
18
|
-
row_pat = "(#{number_pat} )*#{number_pat}"
|
19
|
-
matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
|
20
|
-
Pattern = /\ANA (?<name>.+)\nA C G T\n#{matrix_pat}\z/
|
21
|
-
def self.trim_first_position(input)
|
22
|
-
inp = input.split("\n")
|
23
|
-
([inp[0]] + inp[1..-1].map{|x| x.split(' ')[1..4].join(' ') }).join("\n")
|
24
|
-
end
|
25
|
-
def self.parse(input)
|
26
|
-
trim_first_position(input.multiline_squish).match(Pattern)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
attr_reader :name, :matrix
|
33
|
-
def initialize(input,parser = DefaultParser)
|
34
|
-
case input
|
35
|
-
when String
|
36
|
-
match = parser.parse(input)
|
37
|
-
raise ArgumentError, 'Can\'t create positional matrix basing on such input' unless match
|
38
|
-
@name = match[:name]
|
39
|
-
@matrix = match[:matrix].split("\n").map{|row| row.split.map(&:to_f)}
|
40
|
-
when Hash
|
41
|
-
inp = input.with_indifferent_access
|
42
|
-
@matrix = [inp[:A],inp[:C], inp[:G], inp[:T]]
|
43
|
-
when Array
|
44
|
-
@matrix = input.map do |pos|
|
45
|
-
case pos
|
46
|
-
when Array then pos
|
47
|
-
when Hash then [pos[:A], pos[:C], pos[:G], pos[:T]]
|
48
|
-
else raise ArgumentError, 'Unknown type of argument inner dimension'
|
49
|
-
end
|
50
|
-
end
|
51
|
-
else
|
52
|
-
raise ArgumentError, 'Unknown format of input: only Strings, Arrays and hashes\'re available'
|
53
|
-
end
|
54
|
-
raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.same_by?(&:length)
|
55
|
-
raise ArgumentError unless @matrix.size == 4 || @matrix.first.size == 4
|
56
|
-
@matrix = @matrix.transpose if @matrix.first.size != 4
|
57
|
-
end
|
58
|
-
|
59
|
-
def size
|
60
|
-
@matrix.size
|
61
|
-
end
|
62
|
-
alias_method :length, :size
|
63
|
-
|
64
|
-
def to_s(with_name = true)
|
65
|
-
mat_str = @matrix.map(&:join.("\t")).join("\n")
|
66
|
-
(with_name && @name) ? "#{@name}\n#{mat_str}" : mat_str
|
67
|
-
end
|
68
|
-
|
69
|
-
def pretty_string(with_name = true)
|
70
|
-
header = " A C G T \n"
|
71
|
-
mat_str = @matrix.map{|position| position.map{|el| el.round(3).to_s.rjust(6)}.join(' ') }.join("\n")
|
72
|
-
(with_name && @name) ? @name + "\n" + header + mat_str : header + mat_str
|
73
|
-
end
|
74
|
-
|
75
|
-
def to_hash
|
76
|
-
{A: @matrix.map{|pos| pos[0]}, C: @matrix.map{|pos| pos[1]}, G: @matrix.map{|pos| pos[2]}, T: @matrix.map{|pos| pos[3]}}.with_indifferent_access
|
77
|
-
end
|
78
|
-
end
|
data/lib/bioinform/data_models/old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
class PositionalProbabilityMatrix < PositionalMatrix
|
2
|
-
attr_accessor :count
|
3
|
-
def initialize(input_string)
|
4
|
-
super(input_string)
|
5
|
-
raise ArgumentError, 'PPM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
|
6
|
-
# summary counts can slightly differ from each other due to floating point precision
|
7
|
-
unless @matrix.all?{|position| (position.inject(:+) - 1.0).abs < 0.01 }
|
8
|
-
raise ArgumentError, 'PPM has summary probability at some position that differs from 1.0'
|
9
|
-
end
|
10
|
-
end
|
11
|
-
def to_pcm
|
12
|
-
PositionalCountMatrix.new @matrix.map{|pos| pos.map{|el| el*@count}}
|
13
|
-
end
|
14
|
-
def to_pwm
|
15
|
-
to_pcm.to_pwm
|
16
|
-
end
|
17
|
-
end
|