bioinform 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bioinform/data_models/parser.rb +29 -32
- data/lib/bioinform/data_models/parsers/array_parser.rb +1 -1
- data/lib/bioinform/data_models/parsers/hash_parser.rb +1 -1
- data/lib/bioinform/data_models/parsers/string_parser.rb +3 -3
- data/lib/bioinform/data_models/parsers.rb +2 -0
- data/lib/bioinform/data_models/pm.rb +4 -1
- data/lib/bioinform/data_models/pwm.rb +7 -2
- data/lib/bioinform/data_models.rb +2 -2
- data/lib/bioinform/support/callable_symbol.rb +14 -4
- data/lib/bioinform/support.rb +0 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/data_models/parser_spec.rb +6 -6
- data/spec/data_models/parsers/string_parser_spec.rb +46 -2
- data/spec/data_models/pm_spec.rb +1 -1
- data/spec/data_models/pwm_spec.rb +20 -0
- data/spec/spec_helper.rb +17 -19
- metadata +2 -5
- data/lib/bioinform/support/curry_except_self.rb +0 -5
- data/spec/support/curry_except_self_spec.rb +0 -9
@@ -2,40 +2,37 @@ require 'bioinform/support'
|
|
2
2
|
require 'bioinform/data_models/pm'
|
3
3
|
|
4
4
|
module Bioinform
|
5
|
-
class
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
PM::Parser.subclasses << subclass
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def initialize(input)
|
20
|
-
@input = input
|
21
|
-
end
|
22
|
-
|
23
|
-
def parse_core
|
24
|
-
raise NotImplemented
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
def parse
|
29
|
-
parse_core
|
5
|
+
class Parser
|
6
|
+
attr_reader :input
|
7
|
+
|
8
|
+
@subclasses ||= []
|
9
|
+
class << self
|
10
|
+
def subclasses
|
11
|
+
@subclasses
|
12
|
+
end
|
13
|
+
def inherited(subclass)
|
14
|
+
Parser.subclasses << subclass
|
30
15
|
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(input)
|
19
|
+
@input = input
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse_core
|
23
|
+
raise NotImplemented
|
24
|
+
end
|
25
|
+
|
31
26
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
27
|
+
def parse
|
28
|
+
parse_core
|
29
|
+
end
|
30
|
+
|
31
|
+
def can_parse?
|
32
|
+
parse_core
|
33
|
+
true
|
34
|
+
rescue
|
35
|
+
false
|
39
36
|
end
|
40
37
|
end
|
41
38
|
end
|
@@ -2,7 +2,7 @@ require 'bioinform/support'
|
|
2
2
|
require 'bioinform/data_models/parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
|
-
class ArrayParser <
|
5
|
+
class ArrayParser < Parser
|
6
6
|
def parse_core
|
7
7
|
raise ArgumentError unless input.is_a?(Array) && input.all?(&:is_a?.(Array)) && input.same_by?(&:length) && (input.size == 4 || input.sample.size == 4)
|
8
8
|
if input.all?{|line| line.size == 4}
|
@@ -3,15 +3,15 @@ require 'bioinform/data_models/parser'
|
|
3
3
|
require 'bioinform/data_models/parsers/array_parser'
|
4
4
|
|
5
5
|
module Bioinform
|
6
|
-
class StringParser <
|
6
|
+
class StringParser < Parser
|
7
7
|
def number_pat
|
8
|
-
'[+-]?\d+(\.\d+)?'
|
8
|
+
'[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?'
|
9
9
|
end
|
10
10
|
def row_pat
|
11
11
|
"(#{number_pat} )*#{number_pat}"
|
12
12
|
end
|
13
13
|
def name_pat
|
14
|
-
'
|
14
|
+
'(>\s*)?(?<name>\S+)'
|
15
15
|
end
|
16
16
|
def matrix_pat
|
17
17
|
"(?<matrix>(#{row_pat}\n)*#{row_pat})"
|
@@ -1,6 +1,9 @@
|
|
1
1
|
require 'bioinform/support'
|
2
2
|
|
3
3
|
module Bioinform
|
4
|
+
IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}
|
5
|
+
LetterByIndex = {0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T'}
|
6
|
+
|
4
7
|
class PM
|
5
8
|
attr_reader :matrix, :background
|
6
9
|
attr_accessor :name
|
@@ -22,7 +25,7 @@ module Bioinform
|
|
22
25
|
if @parser
|
23
26
|
raise ArgumentError, 'Input cannot be parsed by specified parser' unless @parser.new(@input).can_parse?
|
24
27
|
else
|
25
|
-
@parser =
|
28
|
+
@parser = Parser.subclasses.find{|parser_class| parser_class.new(@input).can_parse? }
|
26
29
|
raise ArgumentError, 'No one parser can parse specified input' unless @parser
|
27
30
|
end
|
28
31
|
end
|
@@ -1,12 +1,10 @@
|
|
1
1
|
require 'bioinform/support'
|
2
2
|
require 'bioinform/data_models/pm'
|
3
3
|
module Bioinform
|
4
|
-
|
5
4
|
class PWM < PM
|
6
5
|
def score_mean
|
7
6
|
matrix.inject(0.0){ |mean, position| mean + position.each_index.inject(0.0){|sum, letter| sum + position[letter] * probability[letter]} }
|
8
7
|
end
|
9
|
-
|
10
8
|
def score_variance
|
11
9
|
matrix.inject(0.0) do |variance, position|
|
12
10
|
variance + position.each_index.inject(0.0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
|
@@ -19,5 +17,12 @@ module Bioinform
|
|
19
17
|
n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
|
20
18
|
score_mean + n_ * sigma
|
21
19
|
end
|
20
|
+
|
21
|
+
def score(word)
|
22
|
+
word = word.upcase
|
23
|
+
raise ArgumentError unless word.length == length
|
24
|
+
raise ArgumentError unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
|
25
|
+
word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(0.0, &:+)
|
26
|
+
end
|
22
27
|
end
|
23
28
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'bioinform/data_models/pm'
|
2
|
-
|
2
|
+
require 'bioinform/data_models/pcm'
|
3
3
|
require 'bioinform/data_models/pwm'
|
4
|
-
|
4
|
+
require 'bioinform/data_models/ppm'
|
5
5
|
#require 'bioinform/data_models/iupac_word'
|
6
6
|
#require 'bioinform/data_models/iupac_wordset'
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'bioinform/support/curry_except_self'
|
2
|
-
|
3
1
|
# Useful extension for &:symbol - syntax to make it possible to pass arguments for method in block
|
4
2
|
# ['abc','','','def','ghi'].tap(&:delete.('')) # ==> ['abc','def','ghi']
|
5
3
|
# [1,2,3].map(&:to_s.(2)) # ==> ['1','10','11']
|
@@ -8,7 +6,7 @@ require 'bioinform/support/curry_except_self'
|
|
8
6
|
# [%w{1 2 3 4 5},%w{6 7 8 9}].map(&:join.().length) # ==> [5,4]
|
9
7
|
class Symbol
|
10
8
|
def call(*args, &block)
|
11
|
-
obj=BasicObject.new.instance_exec(self,args,block) do |meth,params,block|
|
9
|
+
obj = BasicObject.new.instance_exec(self,args,block) do |meth,params,block|
|
12
10
|
@postprocess_meth = [meth]
|
13
11
|
@postprocess_args = [params]
|
14
12
|
@postprocess_block = [block]
|
@@ -37,4 +35,16 @@ class Symbol
|
|
37
35
|
|
38
36
|
obj
|
39
37
|
end
|
40
|
-
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
=begin
|
42
|
+
# Much simplier but ['abc','cdef','xy','z','wwww'].select(&:size.() == 4) wouldn't work
|
43
|
+
class Symbol
|
44
|
+
def call(*args, &block)
|
45
|
+
proc do |recv|
|
46
|
+
recv.__send__(self, *args, &block)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
=end
|
data/lib/bioinform/support.rb
CHANGED
@@ -3,7 +3,6 @@ require 'active_support/core_ext/hash/indifferent_access'
|
|
3
3
|
|
4
4
|
require 'bioinform/support/callable_symbol'
|
5
5
|
require 'bioinform/support/collect_hash'
|
6
|
-
require 'bioinform/support/curry_except_self'
|
7
6
|
require 'bioinform/support/delete_many'
|
8
7
|
require 'bioinform/support/has_keys'
|
9
8
|
require 'bioinform/support/multiline_squish'
|
data/lib/bioinform/version.rb
CHANGED
@@ -2,8 +2,8 @@ require 'spec_helper'
|
|
2
2
|
require 'bioinform/data_models/parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
|
-
describe
|
6
|
-
include
|
5
|
+
describe Parser do
|
6
|
+
include Parser::Helpers
|
7
7
|
|
8
8
|
before :each do
|
9
9
|
parser_stub :ParserBad, false, { matrix: [[0,0,0,0],[1,1,1,1]], name: 'Bad' }
|
@@ -14,9 +14,9 @@ module Bioinform
|
|
14
14
|
end
|
15
15
|
|
16
16
|
context 'when subklass created' do
|
17
|
-
it '
|
18
|
-
|
19
|
-
|
17
|
+
it 'Parser.subclasses should contain all subclasses' do
|
18
|
+
Parser.subclasses.should include ParserBad
|
19
|
+
Parser.subclasses.should include ParserGood
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -29,7 +29,7 @@ module Bioinform
|
|
29
29
|
|
30
30
|
describe '#parse' do
|
31
31
|
it 'should raise an error unless reimplemented' do
|
32
|
-
parser =
|
32
|
+
parser = Parser.new('my stub input')
|
33
33
|
expect{ parser.parse }.to raise_error
|
34
34
|
end
|
35
35
|
|
@@ -48,6 +48,31 @@ module Bioinform
|
|
48
48
|
9 -8.7 6.54 -3210
|
49
49
|
EOS
|
50
50
|
|
51
|
+
@input_with_exponent = <<-EOS
|
52
|
+
1.23 4.56 7.8 9.0
|
53
|
+
9 -8.7 6.54 -3.210e3
|
54
|
+
EOS
|
55
|
+
|
56
|
+
@input_with_plus_exponent = <<-EOS
|
57
|
+
1.23 4.56 7.8 9.0
|
58
|
+
9 -8.7 6.54 -3.210e+3
|
59
|
+
EOS
|
60
|
+
|
61
|
+
@input_with_minus_exponent = <<-EOS
|
62
|
+
1.23 4.56 7.8 9.0
|
63
|
+
9 -87e-1 6.54 -3210
|
64
|
+
EOS
|
65
|
+
|
66
|
+
@input_with_upcase_exponent = <<-EOS
|
67
|
+
1.23 4.56 7.8 9.0
|
68
|
+
9 -8.7 6.54 -3.210E3
|
69
|
+
EOS
|
70
|
+
|
71
|
+
@input_with_manydigit_exponent = <<-EOS
|
72
|
+
1.23 4.56 7.8 9.0
|
73
|
+
9 -8.7 6.54 -0.0000003210e10
|
74
|
+
EOS
|
75
|
+
|
51
76
|
@input_transposed = <<-EOS
|
52
77
|
1.23 9
|
53
78
|
4.56 -8.7
|
@@ -55,14 +80,15 @@ module Bioinform
|
|
55
80
|
9.0 -3210
|
56
81
|
EOS
|
57
82
|
|
83
|
+
|
58
84
|
@bad_input_not_numeric = <<-EOS
|
59
85
|
1.23 4.56 aaa 9.0
|
60
86
|
9 -8.7 6.54 -3210
|
61
87
|
EOS
|
62
88
|
|
63
89
|
@bad_input_different_row_size = <<-EOS
|
64
|
-
1.23 4.56 7.8
|
65
|
-
9 -8.7 6.54
|
90
|
+
1.23 4.56 7.8 9
|
91
|
+
9 -8.7 6.54
|
66
92
|
EOS
|
67
93
|
|
68
94
|
@bad_input_not_4_rows_and_cols = <<-EOS
|
@@ -72,6 +98,11 @@ module Bioinform
|
|
72
98
|
4 5 6
|
73
99
|
1 1 1
|
74
100
|
EOS
|
101
|
+
|
102
|
+
@bad_input_with_empty_exponent = <<-EOS
|
103
|
+
1.23 4.56 7.8 9.0
|
104
|
+
9e -8.7 6.54 3210
|
105
|
+
EOS
|
75
106
|
end
|
76
107
|
|
77
108
|
describe '#can_parse?' do
|
@@ -84,11 +115,18 @@ module Bioinform
|
|
84
115
|
StringParser.new(@input_with_leading_and_finishing_spaces_and_newlines).can_parse?.should be_true
|
85
116
|
StringParser.new(@input_without_name).can_parse?.should be_true
|
86
117
|
StringParser.new(@input_transposed).can_parse?.should be_true
|
118
|
+
StringParser.new(@input_with_exponent).can_parse?.should be_true
|
119
|
+
StringParser.new(@input_with_plus_exponent).can_parse?.should be_true
|
120
|
+
StringParser.new(@input_with_minus_exponent).can_parse?.should be_true
|
121
|
+
StringParser.new(@input_with_upcase_exponent).can_parse?.should be_true
|
122
|
+
StringParser.new(@input_with_manydigit_exponent).can_parse?.should be_true
|
123
|
+
|
87
124
|
end
|
88
125
|
it 'should return false for invalid input string' do
|
89
126
|
StringParser.new(@bad_input_not_numeric).can_parse?.should be_false
|
90
127
|
StringParser.new(@bad_input_different_row_size).can_parse?.should be_false
|
91
128
|
StringParser.new(@bad_input_not_4_rows_and_cols).can_parse?.should be_false
|
129
|
+
StringParser.new(@bad_input_with_empty_exponent).can_parse?.should be_false
|
92
130
|
end
|
93
131
|
end
|
94
132
|
describe '#parse' do
|
@@ -101,11 +139,17 @@ module Bioinform
|
|
101
139
|
StringParser.new(@input_with_leading_and_finishing_spaces_and_newlines).parse.should == {matrix: @matrix}
|
102
140
|
StringParser.new(@input_without_name).parse.should == {matrix: @matrix}
|
103
141
|
StringParser.new(@input_transposed).parse.should == {matrix: @matrix}
|
142
|
+
StringParser.new(@input_with_exponent).parse.should == {matrix: @matrix}
|
143
|
+
StringParser.new(@input_with_plus_exponent).parse.should == {matrix: @matrix}
|
144
|
+
StringParser.new(@input_with_minus_exponent).parse.should == {matrix: @matrix}
|
145
|
+
StringParser.new(@input_with_upcase_exponent).parse.should == {matrix: @matrix}
|
146
|
+
StringParser.new(@input_with_manydigit_exponent).parse.should == {matrix: @matrix}
|
104
147
|
end
|
105
148
|
it 'should raise an error for invalid input string' do
|
106
149
|
expect{ StringParser.new(@bad_input_not_numeric).parse }.to raise_error ArgumentError
|
107
150
|
expect{ StringParser.new(@bad_input_different_row_size).parse }.to raise_error ArgumentError
|
108
151
|
expect{ StringParser.new(@bad_input_not_4_rows_and_cols).parse }.to raise_error ArgumentError
|
152
|
+
expect{ StringParser.new(@bad_input_with_empty_exponent).parse }.to raise_error ArgumentError
|
109
153
|
end
|
110
154
|
end
|
111
155
|
end
|
data/spec/data_models/pm_spec.rb
CHANGED
@@ -21,5 +21,25 @@ module Bioinform
|
|
21
21
|
|
22
22
|
describe '#gauss_estimation' do
|
23
23
|
end
|
24
|
+
|
25
|
+
describe '#score' do
|
26
|
+
let(:pwm) do
|
27
|
+
pwm = PWM.new
|
28
|
+
pwm.matrix = [[10000,20000,30000,40000],[1000,2000,3000,4000],[100,200,300,400],[10,20,30,40],[1,2,3,4]]
|
29
|
+
pwm
|
30
|
+
end
|
31
|
+
it 'should evaluate to score of given word' do
|
32
|
+
pwm.score('aAAAA').should == 11111
|
33
|
+
pwm.score('agata').should == 13141
|
34
|
+
pwm.score('CCGCT').should == 22324
|
35
|
+
end
|
36
|
+
it 'should raise an ArgumentError if word contain bad letter' do
|
37
|
+
expect{ pwm.score('AAAAV') }.to raise_error(ArgumentError)
|
38
|
+
end
|
39
|
+
it 'should raise an ArgumentError if word has size different than size of matrix' do
|
40
|
+
expect{ pwm.score('AAA') }.to raise_error(ArgumentError)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
24
44
|
end
|
25
45
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -3,27 +3,25 @@ $LOAD_PATH.unshift File.dirname(__FILE__)
|
|
3
3
|
|
4
4
|
require 'rspec'
|
5
5
|
module Bioinform
|
6
|
-
class
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
define_method :parse do result end
|
13
|
-
end
|
14
|
-
#class_levels = class_name.to_s.split('::')
|
15
|
-
#class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
|
16
|
-
Bioinform.const_set(class_name.to_s.split('::').last, klass)
|
6
|
+
class Parser
|
7
|
+
module Helpers
|
8
|
+
def parser_stub(class_name, can_parse, result)
|
9
|
+
klass = Class.new(Parser) do
|
10
|
+
define_method :can_parse? do can_parse end
|
11
|
+
define_method :parse do result end
|
17
12
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
13
|
+
#class_levels = class_name.to_s.split('::')
|
14
|
+
#class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
|
15
|
+
Bioinform.const_set(class_name.to_s.split('::').last, klass)
|
16
|
+
end
|
17
|
+
def parser_subclasses_cleanup
|
18
|
+
Parser.subclasses.each do |klass|
|
19
|
+
#class_levels = klass.to_s.split('::')
|
20
|
+
#class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
|
21
|
+
|
22
|
+
Bioinform.send :remove_const, klass.name.split('::').last
|
26
23
|
end
|
24
|
+
Parser.subclasses.clear
|
27
25
|
end
|
28
26
|
end
|
29
27
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -78,7 +78,6 @@ files:
|
|
78
78
|
- lib/bioinform/support.rb
|
79
79
|
- lib/bioinform/support/callable_symbol.rb
|
80
80
|
- lib/bioinform/support/collect_hash.rb
|
81
|
-
- lib/bioinform/support/curry_except_self.rb
|
82
81
|
- lib/bioinform/support/deep_dup.rb
|
83
82
|
- lib/bioinform/support/delete_many.rb
|
84
83
|
- lib/bioinform/support/has_keys.rb
|
@@ -98,7 +97,6 @@ files:
|
|
98
97
|
- spec/spec_helper.rb
|
99
98
|
- spec/support/callable_symbol_spec.rb
|
100
99
|
- spec/support/collect_hash_spec.rb
|
101
|
-
- spec/support/curry_except_self_spec.rb
|
102
100
|
- spec/support/delete_many_spec.rb
|
103
101
|
- spec/support/has_keys_spec.rb
|
104
102
|
- spec/support/inverf_spec.rb
|
@@ -143,7 +141,6 @@ test_files:
|
|
143
141
|
- spec/spec_helper.rb
|
144
142
|
- spec/support/callable_symbol_spec.rb
|
145
143
|
- spec/support/collect_hash_spec.rb
|
146
|
-
- spec/support/curry_except_self_spec.rb
|
147
144
|
- spec/support/delete_many_spec.rb
|
148
145
|
- spec/support/has_keys_spec.rb
|
149
146
|
- spec/support/inverf_spec.rb
|