bioinform 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bioinform/data_models/parser.rb +29 -32
- data/lib/bioinform/data_models/parsers/array_parser.rb +1 -1
- data/lib/bioinform/data_models/parsers/hash_parser.rb +1 -1
- data/lib/bioinform/data_models/parsers/string_parser.rb +3 -3
- data/lib/bioinform/data_models/parsers.rb +2 -0
- data/lib/bioinform/data_models/pm.rb +4 -1
- data/lib/bioinform/data_models/pwm.rb +7 -2
- data/lib/bioinform/data_models.rb +2 -2
- data/lib/bioinform/support/callable_symbol.rb +14 -4
- data/lib/bioinform/support.rb +0 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/data_models/parser_spec.rb +6 -6
- data/spec/data_models/parsers/string_parser_spec.rb +46 -2
- data/spec/data_models/pm_spec.rb +1 -1
- data/spec/data_models/pwm_spec.rb +20 -0
- data/spec/spec_helper.rb +17 -19
- metadata +2 -5
- data/lib/bioinform/support/curry_except_self.rb +0 -5
- data/spec/support/curry_except_self_spec.rb +0 -9
@@ -2,40 +2,37 @@ require 'bioinform/support'
|
|
2
2
|
require 'bioinform/data_models/pm'
|
3
3
|
|
4
4
|
module Bioinform
|
5
|
-
class
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
PM::Parser.subclasses << subclass
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def initialize(input)
|
20
|
-
@input = input
|
21
|
-
end
|
22
|
-
|
23
|
-
def parse_core
|
24
|
-
raise NotImplemented
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
def parse
|
29
|
-
parse_core
|
5
|
+
class Parser
|
6
|
+
attr_reader :input
|
7
|
+
|
8
|
+
@subclasses ||= []
|
9
|
+
class << self
|
10
|
+
def subclasses
|
11
|
+
@subclasses
|
12
|
+
end
|
13
|
+
def inherited(subclass)
|
14
|
+
Parser.subclasses << subclass
|
30
15
|
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(input)
|
19
|
+
@input = input
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse_core
|
23
|
+
raise NotImplemented
|
24
|
+
end
|
25
|
+
|
31
26
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
27
|
+
def parse
|
28
|
+
parse_core
|
29
|
+
end
|
30
|
+
|
31
|
+
def can_parse?
|
32
|
+
parse_core
|
33
|
+
true
|
34
|
+
rescue
|
35
|
+
false
|
39
36
|
end
|
40
37
|
end
|
41
38
|
end
|
@@ -2,7 +2,7 @@ require 'bioinform/support'
|
|
2
2
|
require 'bioinform/data_models/parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
|
-
class ArrayParser <
|
5
|
+
class ArrayParser < Parser
|
6
6
|
def parse_core
|
7
7
|
raise ArgumentError unless input.is_a?(Array) && input.all?(&:is_a?.(Array)) && input.same_by?(&:length) && (input.size == 4 || input.sample.size == 4)
|
8
8
|
if input.all?{|line| line.size == 4}
|
@@ -3,15 +3,15 @@ require 'bioinform/data_models/parser'
|
|
3
3
|
require 'bioinform/data_models/parsers/array_parser'
|
4
4
|
|
5
5
|
module Bioinform
|
6
|
-
class StringParser <
|
6
|
+
class StringParser < Parser
|
7
7
|
def number_pat
|
8
|
-
'[+-]?\d+(\.\d+)?'
|
8
|
+
'[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?'
|
9
9
|
end
|
10
10
|
def row_pat
|
11
11
|
"(#{number_pat} )*#{number_pat}"
|
12
12
|
end
|
13
13
|
def name_pat
|
14
|
-
'
|
14
|
+
'(>\s*)?(?<name>\S+)'
|
15
15
|
end
|
16
16
|
def matrix_pat
|
17
17
|
"(?<matrix>(#{row_pat}\n)*#{row_pat})"
|
@@ -1,6 +1,9 @@
|
|
1
1
|
require 'bioinform/support'
|
2
2
|
|
3
3
|
module Bioinform
|
4
|
+
IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}
|
5
|
+
LetterByIndex = {0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T'}
|
6
|
+
|
4
7
|
class PM
|
5
8
|
attr_reader :matrix, :background
|
6
9
|
attr_accessor :name
|
@@ -22,7 +25,7 @@ module Bioinform
|
|
22
25
|
if @parser
|
23
26
|
raise ArgumentError, 'Input cannot be parsed by specified parser' unless @parser.new(@input).can_parse?
|
24
27
|
else
|
25
|
-
@parser =
|
28
|
+
@parser = Parser.subclasses.find{|parser_class| parser_class.new(@input).can_parse? }
|
26
29
|
raise ArgumentError, 'No one parser can parse specified input' unless @parser
|
27
30
|
end
|
28
31
|
end
|
@@ -1,12 +1,10 @@
|
|
1
1
|
require 'bioinform/support'
|
2
2
|
require 'bioinform/data_models/pm'
|
3
3
|
module Bioinform
|
4
|
-
|
5
4
|
class PWM < PM
|
6
5
|
def score_mean
|
7
6
|
matrix.inject(0.0){ |mean, position| mean + position.each_index.inject(0.0){|sum, letter| sum + position[letter] * probability[letter]} }
|
8
7
|
end
|
9
|
-
|
10
8
|
def score_variance
|
11
9
|
matrix.inject(0.0) do |variance, position|
|
12
10
|
variance + position.each_index.inject(0.0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
|
@@ -19,5 +17,12 @@ module Bioinform
|
|
19
17
|
n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
|
20
18
|
score_mean + n_ * sigma
|
21
19
|
end
|
20
|
+
|
21
|
+
def score(word)
|
22
|
+
word = word.upcase
|
23
|
+
raise ArgumentError unless word.length == length
|
24
|
+
raise ArgumentError unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
|
25
|
+
word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(0.0, &:+)
|
26
|
+
end
|
22
27
|
end
|
23
28
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'bioinform/data_models/pm'
|
2
|
-
|
2
|
+
require 'bioinform/data_models/pcm'
|
3
3
|
require 'bioinform/data_models/pwm'
|
4
|
-
|
4
|
+
require 'bioinform/data_models/ppm'
|
5
5
|
#require 'bioinform/data_models/iupac_word'
|
6
6
|
#require 'bioinform/data_models/iupac_wordset'
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'bioinform/support/curry_except_self'
|
2
|
-
|
3
1
|
# Useful extension for &:symbol - syntax to make it possible to pass arguments for method in block
|
4
2
|
# ['abc','','','def','ghi'].tap(&:delete.('')) # ==> ['abc','def','ghi']
|
5
3
|
# [1,2,3].map(&:to_s.(2)) # ==> ['1','10','11']
|
@@ -8,7 +6,7 @@ require 'bioinform/support/curry_except_self'
|
|
8
6
|
# [%w{1 2 3 4 5},%w{6 7 8 9}].map(&:join.().length) # ==> [5,4]
|
9
7
|
class Symbol
|
10
8
|
def call(*args, &block)
|
11
|
-
obj=BasicObject.new.instance_exec(self,args,block) do |meth,params,block|
|
9
|
+
obj = BasicObject.new.instance_exec(self,args,block) do |meth,params,block|
|
12
10
|
@postprocess_meth = [meth]
|
13
11
|
@postprocess_args = [params]
|
14
12
|
@postprocess_block = [block]
|
@@ -37,4 +35,16 @@ class Symbol
|
|
37
35
|
|
38
36
|
obj
|
39
37
|
end
|
40
|
-
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
=begin
|
42
|
+
# Much simplier but ['abc','cdef','xy','z','wwww'].select(&:size.() == 4) wouldn't work
|
43
|
+
class Symbol
|
44
|
+
def call(*args, &block)
|
45
|
+
proc do |recv|
|
46
|
+
recv.__send__(self, *args, &block)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
=end
|
data/lib/bioinform/support.rb
CHANGED
@@ -3,7 +3,6 @@ require 'active_support/core_ext/hash/indifferent_access'
|
|
3
3
|
|
4
4
|
require 'bioinform/support/callable_symbol'
|
5
5
|
require 'bioinform/support/collect_hash'
|
6
|
-
require 'bioinform/support/curry_except_self'
|
7
6
|
require 'bioinform/support/delete_many'
|
8
7
|
require 'bioinform/support/has_keys'
|
9
8
|
require 'bioinform/support/multiline_squish'
|
data/lib/bioinform/version.rb
CHANGED
@@ -2,8 +2,8 @@ require 'spec_helper'
|
|
2
2
|
require 'bioinform/data_models/parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
|
-
describe
|
6
|
-
include
|
5
|
+
describe Parser do
|
6
|
+
include Parser::Helpers
|
7
7
|
|
8
8
|
before :each do
|
9
9
|
parser_stub :ParserBad, false, { matrix: [[0,0,0,0],[1,1,1,1]], name: 'Bad' }
|
@@ -14,9 +14,9 @@ module Bioinform
|
|
14
14
|
end
|
15
15
|
|
16
16
|
context 'when subklass created' do
|
17
|
-
it '
|
18
|
-
|
19
|
-
|
17
|
+
it 'Parser.subclasses should contain all subclasses' do
|
18
|
+
Parser.subclasses.should include ParserBad
|
19
|
+
Parser.subclasses.should include ParserGood
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -29,7 +29,7 @@ module Bioinform
|
|
29
29
|
|
30
30
|
describe '#parse' do
|
31
31
|
it 'should raise an error unless reimplemented' do
|
32
|
-
parser =
|
32
|
+
parser = Parser.new('my stub input')
|
33
33
|
expect{ parser.parse }.to raise_error
|
34
34
|
end
|
35
35
|
|
@@ -48,6 +48,31 @@ module Bioinform
|
|
48
48
|
9 -8.7 6.54 -3210
|
49
49
|
EOS
|
50
50
|
|
51
|
+
@input_with_exponent = <<-EOS
|
52
|
+
1.23 4.56 7.8 9.0
|
53
|
+
9 -8.7 6.54 -3.210e3
|
54
|
+
EOS
|
55
|
+
|
56
|
+
@input_with_plus_exponent = <<-EOS
|
57
|
+
1.23 4.56 7.8 9.0
|
58
|
+
9 -8.7 6.54 -3.210e+3
|
59
|
+
EOS
|
60
|
+
|
61
|
+
@input_with_minus_exponent = <<-EOS
|
62
|
+
1.23 4.56 7.8 9.0
|
63
|
+
9 -87e-1 6.54 -3210
|
64
|
+
EOS
|
65
|
+
|
66
|
+
@input_with_upcase_exponent = <<-EOS
|
67
|
+
1.23 4.56 7.8 9.0
|
68
|
+
9 -8.7 6.54 -3.210E3
|
69
|
+
EOS
|
70
|
+
|
71
|
+
@input_with_manydigit_exponent = <<-EOS
|
72
|
+
1.23 4.56 7.8 9.0
|
73
|
+
9 -8.7 6.54 -0.0000003210e10
|
74
|
+
EOS
|
75
|
+
|
51
76
|
@input_transposed = <<-EOS
|
52
77
|
1.23 9
|
53
78
|
4.56 -8.7
|
@@ -55,14 +80,15 @@ module Bioinform
|
|
55
80
|
9.0 -3210
|
56
81
|
EOS
|
57
82
|
|
83
|
+
|
58
84
|
@bad_input_not_numeric = <<-EOS
|
59
85
|
1.23 4.56 aaa 9.0
|
60
86
|
9 -8.7 6.54 -3210
|
61
87
|
EOS
|
62
88
|
|
63
89
|
@bad_input_different_row_size = <<-EOS
|
64
|
-
1.23 4.56 7.8
|
65
|
-
9 -8.7 6.54
|
90
|
+
1.23 4.56 7.8 9
|
91
|
+
9 -8.7 6.54
|
66
92
|
EOS
|
67
93
|
|
68
94
|
@bad_input_not_4_rows_and_cols = <<-EOS
|
@@ -72,6 +98,11 @@ module Bioinform
|
|
72
98
|
4 5 6
|
73
99
|
1 1 1
|
74
100
|
EOS
|
101
|
+
|
102
|
+
@bad_input_with_empty_exponent = <<-EOS
|
103
|
+
1.23 4.56 7.8 9.0
|
104
|
+
9e -8.7 6.54 3210
|
105
|
+
EOS
|
75
106
|
end
|
76
107
|
|
77
108
|
describe '#can_parse?' do
|
@@ -84,11 +115,18 @@ module Bioinform
|
|
84
115
|
StringParser.new(@input_with_leading_and_finishing_spaces_and_newlines).can_parse?.should be_true
|
85
116
|
StringParser.new(@input_without_name).can_parse?.should be_true
|
86
117
|
StringParser.new(@input_transposed).can_parse?.should be_true
|
118
|
+
StringParser.new(@input_with_exponent).can_parse?.should be_true
|
119
|
+
StringParser.new(@input_with_plus_exponent).can_parse?.should be_true
|
120
|
+
StringParser.new(@input_with_minus_exponent).can_parse?.should be_true
|
121
|
+
StringParser.new(@input_with_upcase_exponent).can_parse?.should be_true
|
122
|
+
StringParser.new(@input_with_manydigit_exponent).can_parse?.should be_true
|
123
|
+
|
87
124
|
end
|
88
125
|
it 'should return false for invalid input string' do
|
89
126
|
StringParser.new(@bad_input_not_numeric).can_parse?.should be_false
|
90
127
|
StringParser.new(@bad_input_different_row_size).can_parse?.should be_false
|
91
128
|
StringParser.new(@bad_input_not_4_rows_and_cols).can_parse?.should be_false
|
129
|
+
StringParser.new(@bad_input_with_empty_exponent).can_parse?.should be_false
|
92
130
|
end
|
93
131
|
end
|
94
132
|
describe '#parse' do
|
@@ -101,11 +139,17 @@ module Bioinform
|
|
101
139
|
StringParser.new(@input_with_leading_and_finishing_spaces_and_newlines).parse.should == {matrix: @matrix}
|
102
140
|
StringParser.new(@input_without_name).parse.should == {matrix: @matrix}
|
103
141
|
StringParser.new(@input_transposed).parse.should == {matrix: @matrix}
|
142
|
+
StringParser.new(@input_with_exponent).parse.should == {matrix: @matrix}
|
143
|
+
StringParser.new(@input_with_plus_exponent).parse.should == {matrix: @matrix}
|
144
|
+
StringParser.new(@input_with_minus_exponent).parse.should == {matrix: @matrix}
|
145
|
+
StringParser.new(@input_with_upcase_exponent).parse.should == {matrix: @matrix}
|
146
|
+
StringParser.new(@input_with_manydigit_exponent).parse.should == {matrix: @matrix}
|
104
147
|
end
|
105
148
|
it 'should raise an error for invalid input string' do
|
106
149
|
expect{ StringParser.new(@bad_input_not_numeric).parse }.to raise_error ArgumentError
|
107
150
|
expect{ StringParser.new(@bad_input_different_row_size).parse }.to raise_error ArgumentError
|
108
151
|
expect{ StringParser.new(@bad_input_not_4_rows_and_cols).parse }.to raise_error ArgumentError
|
152
|
+
expect{ StringParser.new(@bad_input_with_empty_exponent).parse }.to raise_error ArgumentError
|
109
153
|
end
|
110
154
|
end
|
111
155
|
end
|
data/spec/data_models/pm_spec.rb
CHANGED
@@ -21,5 +21,25 @@ module Bioinform
|
|
21
21
|
|
22
22
|
describe '#gauss_estimation' do
|
23
23
|
end
|
24
|
+
|
25
|
+
describe '#score' do
|
26
|
+
let(:pwm) do
|
27
|
+
pwm = PWM.new
|
28
|
+
pwm.matrix = [[10000,20000,30000,40000],[1000,2000,3000,4000],[100,200,300,400],[10,20,30,40],[1,2,3,4]]
|
29
|
+
pwm
|
30
|
+
end
|
31
|
+
it 'should evaluate to score of given word' do
|
32
|
+
pwm.score('aAAAA').should == 11111
|
33
|
+
pwm.score('agata').should == 13141
|
34
|
+
pwm.score('CCGCT').should == 22324
|
35
|
+
end
|
36
|
+
it 'should raise an ArgumentError if word contain bad letter' do
|
37
|
+
expect{ pwm.score('AAAAV') }.to raise_error(ArgumentError)
|
38
|
+
end
|
39
|
+
it 'should raise an ArgumentError if word has size different than size of matrix' do
|
40
|
+
expect{ pwm.score('AAA') }.to raise_error(ArgumentError)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
24
44
|
end
|
25
45
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -3,27 +3,25 @@ $LOAD_PATH.unshift File.dirname(__FILE__)
|
|
3
3
|
|
4
4
|
require 'rspec'
|
5
5
|
module Bioinform
|
6
|
-
class
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
define_method :parse do result end
|
13
|
-
end
|
14
|
-
#class_levels = class_name.to_s.split('::')
|
15
|
-
#class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
|
16
|
-
Bioinform.const_set(class_name.to_s.split('::').last, klass)
|
6
|
+
class Parser
|
7
|
+
module Helpers
|
8
|
+
def parser_stub(class_name, can_parse, result)
|
9
|
+
klass = Class.new(Parser) do
|
10
|
+
define_method :can_parse? do can_parse end
|
11
|
+
define_method :parse do result end
|
17
12
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
13
|
+
#class_levels = class_name.to_s.split('::')
|
14
|
+
#class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
|
15
|
+
Bioinform.const_set(class_name.to_s.split('::').last, klass)
|
16
|
+
end
|
17
|
+
def parser_subclasses_cleanup
|
18
|
+
Parser.subclasses.each do |klass|
|
19
|
+
#class_levels = klass.to_s.split('::')
|
20
|
+
#class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
|
21
|
+
|
22
|
+
Bioinform.send :remove_const, klass.name.split('::').last
|
26
23
|
end
|
24
|
+
Parser.subclasses.clear
|
27
25
|
end
|
28
26
|
end
|
29
27
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -78,7 +78,6 @@ files:
|
|
78
78
|
- lib/bioinform/support.rb
|
79
79
|
- lib/bioinform/support/callable_symbol.rb
|
80
80
|
- lib/bioinform/support/collect_hash.rb
|
81
|
-
- lib/bioinform/support/curry_except_self.rb
|
82
81
|
- lib/bioinform/support/deep_dup.rb
|
83
82
|
- lib/bioinform/support/delete_many.rb
|
84
83
|
- lib/bioinform/support/has_keys.rb
|
@@ -98,7 +97,6 @@ files:
|
|
98
97
|
- spec/spec_helper.rb
|
99
98
|
- spec/support/callable_symbol_spec.rb
|
100
99
|
- spec/support/collect_hash_spec.rb
|
101
|
-
- spec/support/curry_except_self_spec.rb
|
102
100
|
- spec/support/delete_many_spec.rb
|
103
101
|
- spec/support/has_keys_spec.rb
|
104
102
|
- spec/support/inverf_spec.rb
|
@@ -143,7 +141,6 @@ test_files:
|
|
143
141
|
- spec/spec_helper.rb
|
144
142
|
- spec/support/callable_symbol_spec.rb
|
145
143
|
- spec/support/collect_hash_spec.rb
|
146
|
-
- spec/support/curry_except_self_spec.rb
|
147
144
|
- spec/support/delete_many_spec.rb
|
148
145
|
- spec/support/has_keys_spec.rb
|
149
146
|
- spec/support/inverf_spec.rb
|