bioinform 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,6 +2,18 @@ require 'bioinform/support'
2
2
  require 'bioinform/data_models/pm'
3
3
  module Bioinform
4
4
  class PCM < PM
5
+ def count
6
+ matrix.first.inject(&:+)
7
+ end
5
8
 
9
+ def to_pwm(pseudocount = Math.log(count))
10
+ mat = each_position.map do |pos|
11
+ pos.each_index.map do |ind|
12
+ Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
13
+ end
14
+ end
15
+ PWM.new(mat)
16
+ end
17
+
6
18
  end
7
19
  end
@@ -1,87 +1,75 @@
1
1
  require 'bioinform/support'
2
+ require 'bioinform/parsers'
2
3
 
3
4
  module Bioinform
4
5
  IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}
5
6
  LetterByIndex = {0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T'}
6
7
 
7
8
  class PM
8
- attr_reader :matrix, :background
9
- attr_accessor :name
9
+ attr_reader :matrix
10
+ attr_accessor :background, :name
10
11
 
11
- def initialize(input = nil, parser = nil)
12
+ def choose_parser(input)
13
+ input.is_a?(String) ? StringParser : Parser
14
+ [Parser, StringParser, StringFantomParser].find do |parser|
15
+ self.class.new(input, parser) rescue nil
16
+ end
17
+ end
18
+
19
+ def initialize(input, parser = nil)
20
+ parser ||= choose_parser(input)
21
+ result = parser.new(input).parse
22
+ @matrix = result[:matrix]
23
+ @name = result[:name]
12
24
  @background = [1, 1, 1, 1]
13
- @input = input
14
- @parser = parser
15
- return unless @input
16
- parser_init
17
- matrix_init
25
+ raise 'matrix not valid' unless valid?
18
26
  end
19
27
 
20
28
  def ==(other)
21
29
  @matrix == other.matrix && @background == other.background
22
30
  end
23
31
 
24
- def parser_init
25
- if @parser
26
- raise ArgumentError, 'Input cannot be parsed by specified parser' unless @parser.new(@input).can_parse?
27
- else
28
- @parser = Parser.subclasses.find{|parser_class| parser_class.new(@input).can_parse? }
29
- raise ArgumentError, 'No one parser can parse specified input' unless @parser
30
- end
31
- end
32
-
33
- def matrix_init
34
- parse_result = @parser.new(@input).parse
35
- raise ArgumentError, 'Used parser result has no `matrix` key' unless parse_result.has_key? :matrix
36
-
37
- configure_from_hash(parse_result)
38
- end
39
-
40
32
  def valid?
41
- @matrix.is_a?(Array) &&
33
+ @matrix.is_a?(Array) &&
42
34
  @matrix.all?(&:is_a?.(Array)) &&
43
- @matrix.all?(&:all?.(&:is_a?.(Numeric))) &&
44
- @matrix.all?{|pos| pos.size == 4}
35
+ @matrix.all?{|pos| pos.size == 4} &&
36
+ @matrix.all?(&:all?.(&:is_a?.(Numeric)))
45
37
  rescue
46
38
  false
47
39
  end
48
40
 
49
- def configure_from_hash(parse_result)
50
- parse_result.each{|key, value| send("#{key}=", value) if respond_to? "#{key}=" }
51
- end
52
-
53
- def matrix=(new_matrix)
54
- old_matrix, @matrix = matrix, new_matrix
55
- raise ArgumentError, 'Matrix has invalid format:' unless valid?
56
- rescue
57
- @matrix = old_matrix
58
- raise
41
+ def each_position
42
+ if block_given?
43
+ matrix.each{|pos| yield pos}
44
+ else
45
+ Enumerator.new(self, :each_position)
46
+ end
59
47
  end
60
-
61
- def length;
62
- @matrix.length;
48
+
49
+ def length
50
+ @matrix.length
63
51
  end
64
52
  alias_method :size, :length
65
53
 
66
54
  def to_s(with_name = true)
67
- matrix = @matrix.map(&:join.("\t")).join("\n")
55
+ matrix_str = each_position.map(&:join.("\t")).join("\n")
68
56
  if with_name && @name
69
- "#{@name}\n#{matrix}"
57
+ "#{@name}\n#{matrix_str}"
70
58
  else
71
- matrix
59
+ matrix_str
72
60
  end
73
61
  end
74
62
 
75
63
  def pretty_string(with_name = true)
76
64
  header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
77
- matrix_rows = @matrix.map do |position|
65
+ matrix_rows = each_position.map do |position|
78
66
  position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
79
67
  end
80
- matrix = matrix_rows.join("\n")
68
+ matrix_str = matrix_rows.join("\n")
81
69
  if with_name && @name
82
- @name + "\n" + header + matrix
70
+ @name + "\n" + header + matrix_str
83
71
  else
84
- header + matrix
72
+ header + matrix_str
85
73
  end
86
74
  end
87
75
 
@@ -133,17 +121,13 @@ module Bioinform
133
121
  self
134
122
  end
135
123
 
136
- def background_sum
137
- @background.inject(0.0, &:+)
138
- end
139
-
140
124
  def vocabulary_volume
141
- background_sum ** length
125
+ background.inject(&:+) ** length
142
126
  end
143
127
 
144
128
  def probability
145
- sum = background_sum
146
- @background.map{|element| element.to_f / sum}
129
+ sum = background.inject(0.0, &:+)
130
+ background.map{|element| element.to_f / sum}
147
131
  end
148
132
 
149
133
 
@@ -3,12 +3,12 @@ require 'bioinform/data_models/pm'
3
3
  module Bioinform
4
4
  class PWM < PM
5
5
  def score_mean
6
- matrix.inject(0.0){ |mean, position| mean + position.each_index.inject(0.0){|sum, letter| sum + position[letter] * probability[letter]} }
6
+ each_position.inject(0){ |mean, position| mean + position.each_index.inject(0){|sum, letter| sum + position[letter] * probability[letter]} }
7
7
  end
8
8
  def score_variance
9
- matrix.inject(0.0) do |variance, position|
10
- variance + position.each_index.inject(0.0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
11
- position.each_index.inject(0.0) { |sum,letter| sum + position[letter] * probability[letter] }**2
9
+ each_position.inject(0) do |variance, position|
10
+ variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
11
+ position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
12
12
  end
13
13
  end
14
14
 
@@ -20,9 +20,9 @@ module Bioinform
20
20
 
21
21
  def score(word)
22
22
  word = word.upcase
23
- raise ArgumentError unless word.length == length
24
- raise ArgumentError unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
- word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(0.0, &:+)
23
+ raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
24
+ raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
+ word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(&:+)
26
26
  end
27
27
  end
28
28
  end
@@ -1,3 +1,5 @@
1
+ require 'bioinform/parsers'
2
+
1
3
  require 'bioinform/data_models/pm'
2
4
  require 'bioinform/data_models/pcm'
3
5
  require 'bioinform/data_models/pwm'
@@ -0,0 +1,40 @@
1
+ require 'bioinform/support'
2
+
3
+ module Bioinform
4
+ class Parser
5
+ attr_reader :input, :matrix
6
+
7
+ def initialize(input)
8
+ @input = input
9
+ end
10
+
11
+ def parse
12
+ inp = input
13
+ transpose = inp.is_a?(Hash)
14
+ inp = ClassMethods.try_convert_to_array(inp)
15
+ inp.map!{|x| ClassMethods.try_convert_to_array(x)}
16
+ transpose = true if (not inp.all?{|x| x.size == 4}) && inp.size == 4 && inp.same_by?(&:size)
17
+ @matrix = transpose ? inp.transpose : inp
18
+ result
19
+ rescue
20
+ {}
21
+ end
22
+
23
+ def result(options={})
24
+ raise 'Parsing Error' unless matrix.is_a?(Array) && matrix.all?(&:is_a?.(Array)) && matrix.all?{|pos| pos.size == 4} && matrix.all?(&:all?.(&:is_a?.(Numeric)))
25
+ options.merge(matrix: @matrix)
26
+ end
27
+
28
+ class ClassMethods
29
+ def self.array_from_acgt_hash(hsh)
30
+ hsh = hsh.collect_hash{|key,value| [key.to_s.upcase, value] }
31
+ raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == %w[A C G T]
32
+ %w[A C G T].collect{|letter| hsh[letter] }
33
+ end
34
+ def self.try_convert_to_array(inp)
35
+ return inp if inp.is_a? Array
36
+ array_from_acgt_hash(inp)
37
+ end
38
+ end
39
+ end
40
+ end
@@ -1,6 +1,5 @@
1
1
  require 'bioinform/support'
2
- require 'bioinform/data_models/parser'
3
- require 'bioinform/data_models/parsers/string_parser'
2
+ require 'bioinform/parsers/string_parser'
4
3
 
5
4
  module Bioinform
6
5
  class StringFantomParser < StringParser
@@ -1,8 +1,7 @@
1
1
  require 'bioinform/support'
2
- require 'bioinform/data_models/parser'
3
- require 'bioinform/data_models/parsers/array_parser'
2
+ require 'bioinform/parsers/parser'
4
3
 
5
- module Bioinform
4
+ module Bioinform
6
5
  class StringParser < Parser
7
6
  def number_pat
8
7
  '[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?'
@@ -28,18 +27,19 @@ module Bioinform
28
27
  matrix.split("\n").map{|line| line.split.map(&:to_f)}
29
28
  end
30
29
 
31
- def parse_core
30
+ def parse
32
31
  case input
33
- when String
32
+ when String
34
33
  match = input.multiline_squish.match(pattern)
35
34
  raise ArgumentError unless match
36
35
  matrix = matrix_preprocess( match[:matrix] )
37
36
  raise ArgumentError unless matrix
38
- result = ArrayParser.new(matrix).parse
39
- match[:name] ? result.merge(name: match[:name]) : result
37
+ Parser.new(matrix).parse.merge(name: match[:name])
40
38
  else
41
39
  raise ArgumentError
42
40
  end
41
+ rescue
42
+ {}
43
43
  end
44
44
  end
45
45
  end
@@ -0,0 +1,3 @@
1
+ require 'bioinform/parsers/parser'
2
+ require 'bioinform/parsers/string_parser'
3
+ require 'bioinform/parsers/string_fantom_parser'
@@ -1,3 +1,5 @@
1
+ require 'bioinform/support/collect_hash'
2
+
1
3
  class Array
2
4
  def partial_sums(initial = 0.0)
3
5
  sums = initial
@@ -1,3 +1,3 @@
1
1
  module Bioinform
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
data/lib/bioinform.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require 'bioinform/version'
2
2
  require 'bioinform/support'
3
3
  require 'bioinform/data_models'
4
- require 'bioinform/data_models/parsers'
5
4
 
6
5
  module Bioinform
7
6
  # Your code goes here...
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/data_models/pcm'
3
+
4
+ module Bioinform
5
+ describe PCM do
6
+ describe '#count' do
7
+ it 'should be equal to sum of elements at position' do
8
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).count.should == 7
9
+ PCM.new([[1, 2.3, 3.2, 1],[4.4, 1.1, 1, 2]]).count.should == 7.5
10
+ end
11
+ end
12
+
13
+ describe '#to_pwm' do
14
+ it 'should return PWM' do
15
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should be_kind_of(PWM)
16
+ end
17
+ it 'should make transformation: el --> log( (el + p_i*pseudocount) / (p_i*(count + pseudocount)) )' do
18
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(1).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.47, 0.118,0.486,-0.47],[0.754,-0.47,-0.47,0.118]]
19
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(10).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.194, 0.057,0.258,-0.194],[0.425,-0.194,-0.194,0.057]]
20
+ end
21
+ it 'should use default pseudocount equal to log(count)' do
22
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should == PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(Math.log(7))
23
+ end
24
+ end
25
+
26
+ end
27
+ end