bioinform 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,40 +2,37 @@ require 'bioinform/support'
2
2
  require 'bioinform/data_models/pm'
3
3
 
4
4
  module Bioinform
5
- class PM
6
- class Parser
7
- attr_reader :input
8
-
9
- @subclasses ||= []
10
- class << self
11
- def subclasses
12
- @subclasses
13
- end
14
- def inherited(subclass)
15
- PM::Parser.subclasses << subclass
16
- end
17
- end
18
-
19
- def initialize(input)
20
- @input = input
21
- end
22
-
23
- def parse_core
24
- raise NotImplemented
25
- end
26
-
27
-
28
- def parse
29
- parse_core
5
+ class Parser
6
+ attr_reader :input
7
+
8
+ @subclasses ||= []
9
+ class << self
10
+ def subclasses
11
+ @subclasses
12
+ end
13
+ def inherited(subclass)
14
+ Parser.subclasses << subclass
30
15
  end
16
+ end
17
+
18
+ def initialize(input)
19
+ @input = input
20
+ end
21
+
22
+ def parse_core
23
+ raise NotImplemented
24
+ end
25
+
31
26
 
32
- def can_parse?
33
- parse_core
34
- true
35
- rescue
36
- false
37
- end
38
-
27
+ def parse
28
+ parse_core
29
+ end
30
+
31
+ def can_parse?
32
+ parse_core
33
+ true
34
+ rescue
35
+ false
39
36
  end
40
37
  end
41
38
  end
@@ -2,7 +2,7 @@ require 'bioinform/support'
2
2
  require 'bioinform/data_models/parser'
3
3
 
4
4
  module Bioinform
5
- class ArrayParser < PM::Parser
5
+ class ArrayParser < Parser
6
6
  def parse_core
7
7
  raise ArgumentError unless input.is_a?(Array) && input.all?(&:is_a?.(Array)) && input.same_by?(&:length) && (input.size == 4 || input.sample.size == 4)
8
8
  if input.all?{|line| line.size == 4}
@@ -2,7 +2,7 @@ require 'bioinform/support'
2
2
  require 'bioinform/data_models/parser'
3
3
 
4
4
  module Bioinform
5
- class HashParser < PM::Parser
5
+ class HashParser < Parser
6
6
  def parse_core
7
7
  case input
8
8
  when Hash
@@ -3,15 +3,15 @@ require 'bioinform/data_models/parser'
3
3
  require 'bioinform/data_models/parsers/array_parser'
4
4
 
5
5
  module Bioinform
6
- class StringParser < PM::Parser
6
+ class StringParser < Parser
7
7
  def number_pat
8
- '[+-]?\d+(\.\d+)?'
8
+ '[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?'
9
9
  end
10
10
  def row_pat
11
11
  "(#{number_pat} )*#{number_pat}"
12
12
  end
13
13
  def name_pat
14
- '>? ?(?<name>[\w.+:-]+)'
14
+ '(>\s*)?(?<name>\S+)'
15
15
  end
16
16
  def matrix_pat
17
17
  "(?<matrix>(#{row_pat}\n)*#{row_pat})"
@@ -1,3 +1,5 @@
1
+ # Parsers should be included after PM class defined - in order it could catch definitions of new classes
2
+
1
3
  require 'bioinform/data_models/parsers/array_parser'
2
4
  require 'bioinform/data_models/parsers/hash_parser'
3
5
  require 'bioinform/data_models/parsers/string_parser'
@@ -1,6 +1,9 @@
1
1
  require 'bioinform/support'
2
2
 
3
3
  module Bioinform
4
+ IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}
5
+ LetterByIndex = {0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T'}
6
+
4
7
  class PM
5
8
  attr_reader :matrix, :background
6
9
  attr_accessor :name
@@ -22,7 +25,7 @@ module Bioinform
22
25
  if @parser
23
26
  raise ArgumentError, 'Input cannot be parsed by specified parser' unless @parser.new(@input).can_parse?
24
27
  else
25
- @parser = PM::Parser.subclasses.find{|parser_class| parser_class.new(@input).can_parse? }
28
+ @parser = Parser.subclasses.find{|parser_class| parser_class.new(@input).can_parse? }
26
29
  raise ArgumentError, 'No one parser can parse specified input' unless @parser
27
30
  end
28
31
  end
@@ -1,12 +1,10 @@
1
1
  require 'bioinform/support'
2
2
  require 'bioinform/data_models/pm'
3
3
  module Bioinform
4
-
5
4
  class PWM < PM
6
5
  def score_mean
7
6
  matrix.inject(0.0){ |mean, position| mean + position.each_index.inject(0.0){|sum, letter| sum + position[letter] * probability[letter]} }
8
7
  end
9
-
10
8
  def score_variance
11
9
  matrix.inject(0.0) do |variance, position|
12
10
  variance + position.each_index.inject(0.0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
@@ -19,5 +17,12 @@ module Bioinform
19
17
  n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
20
18
  score_mean + n_ * sigma
21
19
  end
20
+
21
+ def score(word)
22
+ word = word.upcase
23
+ raise ArgumentError unless word.length == length
24
+ raise ArgumentError unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
+ word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(0.0, &:+)
26
+ end
22
27
  end
23
28
  end
@@ -1,6 +1,6 @@
1
1
  require 'bioinform/data_models/pm'
2
- #require 'bioinform/data_models/pcm'
2
+ require 'bioinform/data_models/pcm'
3
3
  require 'bioinform/data_models/pwm'
4
- #require 'bioinform/data_models/ppm'
4
+ require 'bioinform/data_models/ppm'
5
5
  #require 'bioinform/data_models/iupac_word'
6
6
  #require 'bioinform/data_models/iupac_wordset'
@@ -1,5 +1,3 @@
1
- require 'bioinform/support/curry_except_self'
2
-
3
1
  # Useful extension for &:symbol - syntax to make it possible to pass arguments for method in block
4
2
  # ['abc','','','def','ghi'].tap(&:delete.('')) # ==> ['abc','def','ghi']
5
3
  # [1,2,3].map(&:to_s.(2)) # ==> ['1','10','11']
@@ -8,7 +6,7 @@ require 'bioinform/support/curry_except_self'
8
6
  # [%w{1 2 3 4 5},%w{6 7 8 9}].map(&:join.().length) # ==> [5,4]
9
7
  class Symbol
10
8
  def call(*args, &block)
11
- obj=BasicObject.new.instance_exec(self,args,block) do |meth,params,block|
9
+ obj = BasicObject.new.instance_exec(self,args,block) do |meth,params,block|
12
10
  @postprocess_meth = [meth]
13
11
  @postprocess_args = [params]
14
12
  @postprocess_block = [block]
@@ -37,4 +35,16 @@ class Symbol
37
35
 
38
36
  obj
39
37
  end
40
- end
38
+ end
39
+
40
+
41
+ =begin
42
+ # Much simplier but ['abc','cdef','xy','z','wwww'].select(&:size.() == 4) wouldn't work
43
+ class Symbol
44
+ def call(*args, &block)
45
+ proc do |recv|
46
+ recv.__send__(self, *args, &block)
47
+ end
48
+ end
49
+ end
50
+ =end
@@ -3,7 +3,6 @@ require 'active_support/core_ext/hash/indifferent_access'
3
3
 
4
4
  require 'bioinform/support/callable_symbol'
5
5
  require 'bioinform/support/collect_hash'
6
- require 'bioinform/support/curry_except_self'
7
6
  require 'bioinform/support/delete_many'
8
7
  require 'bioinform/support/has_keys'
9
8
  require 'bioinform/support/multiline_squish'
@@ -1,3 +1,3 @@
1
1
  module Bioinform
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -2,8 +2,8 @@ require 'spec_helper'
2
2
  require 'bioinform/data_models/parser'
3
3
 
4
4
  module Bioinform
5
- describe PM::Parser do
6
- include PM::Parser::Helpers
5
+ describe Parser do
6
+ include Parser::Helpers
7
7
 
8
8
  before :each do
9
9
  parser_stub :ParserBad, false, { matrix: [[0,0,0,0],[1,1,1,1]], name: 'Bad' }
@@ -14,9 +14,9 @@ module Bioinform
14
14
  end
15
15
 
16
16
  context 'when subklass created' do
17
- it 'PM::Parser.subclasses should contain all subclasses' do
18
- PM::Parser.subclasses.should include ParserBad
19
- PM::Parser.subclasses.should include ParserGood
17
+ it 'Parser.subclasses should contain all subclasses' do
18
+ Parser.subclasses.should include ParserBad
19
+ Parser.subclasses.should include ParserGood
20
20
  end
21
21
  end
22
22
 
@@ -29,7 +29,7 @@ module Bioinform
29
29
 
30
30
  describe '#parse' do
31
31
  it 'should raise an error unless reimplemented' do
32
- parser = PM::Parser.new('my stub input')
32
+ parser = Parser.new('my stub input')
33
33
  expect{ parser.parse }.to raise_error
34
34
  end
35
35
 
@@ -48,6 +48,31 @@ module Bioinform
48
48
  9 -8.7 6.54 -3210
49
49
  EOS
50
50
 
51
+ @input_with_exponent = <<-EOS
52
+ 1.23 4.56 7.8 9.0
53
+ 9 -8.7 6.54 -3.210e3
54
+ EOS
55
+
56
+ @input_with_plus_exponent = <<-EOS
57
+ 1.23 4.56 7.8 9.0
58
+ 9 -8.7 6.54 -3.210e+3
59
+ EOS
60
+
61
+ @input_with_minus_exponent = <<-EOS
62
+ 1.23 4.56 7.8 9.0
63
+ 9 -87e-1 6.54 -3210
64
+ EOS
65
+
66
+ @input_with_upcase_exponent = <<-EOS
67
+ 1.23 4.56 7.8 9.0
68
+ 9 -8.7 6.54 -3.210E3
69
+ EOS
70
+
71
+ @input_with_manydigit_exponent = <<-EOS
72
+ 1.23 4.56 7.8 9.0
73
+ 9 -8.7 6.54 -0.0000003210e10
74
+ EOS
75
+
51
76
  @input_transposed = <<-EOS
52
77
  1.23 9
53
78
  4.56 -8.7
@@ -55,14 +80,15 @@ module Bioinform
55
80
  9.0 -3210
56
81
  EOS
57
82
 
83
+
58
84
  @bad_input_not_numeric = <<-EOS
59
85
  1.23 4.56 aaa 9.0
60
86
  9 -8.7 6.54 -3210
61
87
  EOS
62
88
 
63
89
  @bad_input_different_row_size = <<-EOS
64
- 1.23 4.56 7.8
65
- 9 -8.7 6.54 -3210
90
+ 1.23 4.56 7.8 9
91
+ 9 -8.7 6.54
66
92
  EOS
67
93
 
68
94
  @bad_input_not_4_rows_and_cols = <<-EOS
@@ -72,6 +98,11 @@ module Bioinform
72
98
  4 5 6
73
99
  1 1 1
74
100
  EOS
101
+
102
+ @bad_input_with_empty_exponent = <<-EOS
103
+ 1.23 4.56 7.8 9.0
104
+ 9e -8.7 6.54 3210
105
+ EOS
75
106
  end
76
107
 
77
108
  describe '#can_parse?' do
@@ -84,11 +115,18 @@ module Bioinform
84
115
  StringParser.new(@input_with_leading_and_finishing_spaces_and_newlines).can_parse?.should be_true
85
116
  StringParser.new(@input_without_name).can_parse?.should be_true
86
117
  StringParser.new(@input_transposed).can_parse?.should be_true
118
+ StringParser.new(@input_with_exponent).can_parse?.should be_true
119
+ StringParser.new(@input_with_plus_exponent).can_parse?.should be_true
120
+ StringParser.new(@input_with_minus_exponent).can_parse?.should be_true
121
+ StringParser.new(@input_with_upcase_exponent).can_parse?.should be_true
122
+ StringParser.new(@input_with_manydigit_exponent).can_parse?.should be_true
123
+
87
124
  end
88
125
  it 'should return false for invalid input string' do
89
126
  StringParser.new(@bad_input_not_numeric).can_parse?.should be_false
90
127
  StringParser.new(@bad_input_different_row_size).can_parse?.should be_false
91
128
  StringParser.new(@bad_input_not_4_rows_and_cols).can_parse?.should be_false
129
+ StringParser.new(@bad_input_with_empty_exponent).can_parse?.should be_false
92
130
  end
93
131
  end
94
132
  describe '#parse' do
@@ -101,11 +139,17 @@ module Bioinform
101
139
  StringParser.new(@input_with_leading_and_finishing_spaces_and_newlines).parse.should == {matrix: @matrix}
102
140
  StringParser.new(@input_without_name).parse.should == {matrix: @matrix}
103
141
  StringParser.new(@input_transposed).parse.should == {matrix: @matrix}
142
+ StringParser.new(@input_with_exponent).parse.should == {matrix: @matrix}
143
+ StringParser.new(@input_with_plus_exponent).parse.should == {matrix: @matrix}
144
+ StringParser.new(@input_with_minus_exponent).parse.should == {matrix: @matrix}
145
+ StringParser.new(@input_with_upcase_exponent).parse.should == {matrix: @matrix}
146
+ StringParser.new(@input_with_manydigit_exponent).parse.should == {matrix: @matrix}
104
147
  end
105
148
  it 'should raise an error for invalid input string' do
106
149
  expect{ StringParser.new(@bad_input_not_numeric).parse }.to raise_error ArgumentError
107
150
  expect{ StringParser.new(@bad_input_different_row_size).parse }.to raise_error ArgumentError
108
151
  expect{ StringParser.new(@bad_input_not_4_rows_and_cols).parse }.to raise_error ArgumentError
152
+ expect{ StringParser.new(@bad_input_with_empty_exponent).parse }.to raise_error ArgumentError
109
153
  end
110
154
  end
111
155
  end
@@ -3,7 +3,7 @@ require 'bioinform/data_models/pm'
3
3
 
4
4
  module Bioinform
5
5
  describe PM do
6
- include PM::Parser::Helpers
6
+ include Parser::Helpers
7
7
 
8
8
  describe '#valid?' do
9
9
  it 'should be true iff an argument is an array of arrays of 4 numerics in a column' do
@@ -21,5 +21,25 @@ module Bioinform
21
21
 
22
22
  describe '#gauss_estimation' do
23
23
  end
24
+
25
+ describe '#score' do
26
+ let(:pwm) do
27
+ pwm = PWM.new
28
+ pwm.matrix = [[10000,20000,30000,40000],[1000,2000,3000,4000],[100,200,300,400],[10,20,30,40],[1,2,3,4]]
29
+ pwm
30
+ end
31
+ it 'should evaluate to score of given word' do
32
+ pwm.score('aAAAA').should == 11111
33
+ pwm.score('agata').should == 13141
34
+ pwm.score('CCGCT').should == 22324
35
+ end
36
+ it 'should raise an ArgumentError if word contain bad letter' do
37
+ expect{ pwm.score('AAAAV') }.to raise_error(ArgumentError)
38
+ end
39
+ it 'should raise an ArgumentError if word has size different than size of matrix' do
40
+ expect{ pwm.score('AAA') }.to raise_error(ArgumentError)
41
+ end
42
+ end
43
+
24
44
  end
25
45
  end
data/spec/spec_helper.rb CHANGED
@@ -3,27 +3,25 @@ $LOAD_PATH.unshift File.dirname(__FILE__)
3
3
 
4
4
  require 'rspec'
5
5
  module Bioinform
6
- class PM
7
- class Parser
8
- module Helpers
9
- def parser_stub(class_name, can_parse, result)
10
- klass = Class.new(PM::Parser) do
11
- define_method :can_parse? do can_parse end
12
- define_method :parse do result end
13
- end
14
- #class_levels = class_name.to_s.split('::')
15
- #class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
16
- Bioinform.const_set(class_name.to_s.split('::').last, klass)
6
+ class Parser
7
+ module Helpers
8
+ def parser_stub(class_name, can_parse, result)
9
+ klass = Class.new(Parser) do
10
+ define_method :can_parse? do can_parse end
11
+ define_method :parse do result end
17
12
  end
18
- def parser_subclasses_cleanup
19
- PM::Parser.subclasses.each do |klass|
20
- #class_levels = klass.to_s.split('::')
21
- #class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
22
-
23
- Bioinform.send :remove_const, klass.name.split('::').last
24
- end
25
- PM::Parser.subclasses.clear
13
+ #class_levels = class_name.to_s.split('::')
14
+ #class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
15
+ Bioinform.const_set(class_name.to_s.split('::').last, klass)
16
+ end
17
+ def parser_subclasses_cleanup
18
+ Parser.subclasses.each do |klass|
19
+ #class_levels = klass.to_s.split('::')
20
+ #class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
21
+
22
+ Bioinform.send :remove_const, klass.name.split('::').last
26
23
  end
24
+ Parser.subclasses.clear
27
25
  end
28
26
  end
29
27
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bioinform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-27 00:00:00.000000000 Z
12
+ date: 2012-07-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -78,7 +78,6 @@ files:
78
78
  - lib/bioinform/support.rb
79
79
  - lib/bioinform/support/callable_symbol.rb
80
80
  - lib/bioinform/support/collect_hash.rb
81
- - lib/bioinform/support/curry_except_self.rb
82
81
  - lib/bioinform/support/deep_dup.rb
83
82
  - lib/bioinform/support/delete_many.rb
84
83
  - lib/bioinform/support/has_keys.rb
@@ -98,7 +97,6 @@ files:
98
97
  - spec/spec_helper.rb
99
98
  - spec/support/callable_symbol_spec.rb
100
99
  - spec/support/collect_hash_spec.rb
101
- - spec/support/curry_except_self_spec.rb
102
100
  - spec/support/delete_many_spec.rb
103
101
  - spec/support/has_keys_spec.rb
104
102
  - spec/support/inverf_spec.rb
@@ -143,7 +141,6 @@ test_files:
143
141
  - spec/spec_helper.rb
144
142
  - spec/support/callable_symbol_spec.rb
145
143
  - spec/support/collect_hash_spec.rb
146
- - spec/support/curry_except_self_spec.rb
147
144
  - spec/support/delete_many_spec.rb
148
145
  - spec/support/has_keys_spec.rb
149
146
  - spec/support/inverf_spec.rb
@@ -1,5 +0,0 @@
1
- class Proc
2
- def curry_except_self(*args, &block)
3
- Proc.new{|slf| curry.call(slf, *args, &block) }
4
- end
5
- end
@@ -1,9 +0,0 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/curry_except_self'
3
-
4
- describe Proc do
5
- describe '#curry_except_self' do
6
- it 'should return proc'
7
- it 'should behave like a proc where all arguments except first are curried'
8
- end
9
- end