bioinform 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,18 @@ require 'bioinform/support'
2
2
  require 'bioinform/data_models/pm'
3
3
  module Bioinform
4
4
  class PCM < PM
5
+ def count
6
+ matrix.first.inject(&:+)
7
+ end
5
8
 
9
+ def to_pwm(pseudocount = Math.log(count))
10
+ mat = each_position.map do |pos|
11
+ pos.each_index.map do |ind|
12
+ Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
13
+ end
14
+ end
15
+ PWM.new(mat)
16
+ end
17
+
6
18
  end
7
19
  end
@@ -1,87 +1,75 @@
1
1
  require 'bioinform/support'
2
+ require 'bioinform/parsers'
2
3
 
3
4
  module Bioinform
4
5
  IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}
5
6
  LetterByIndex = {0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T'}
6
7
 
7
8
  class PM
8
- attr_reader :matrix, :background
9
- attr_accessor :name
9
+ attr_reader :matrix
10
+ attr_accessor :background, :name
10
11
 
11
- def initialize(input = nil, parser = nil)
12
+ def choose_parser(input)
13
+ input.is_a?(String) ? StringParser : Parser
14
+ [Parser, StringParser, StringFantomParser].find do |parser|
15
+ self.class.new(input, parser) rescue nil
16
+ end
17
+ end
18
+
19
+ def initialize(input, parser = nil)
20
+ parser ||= choose_parser(input)
21
+ result = parser.new(input).parse
22
+ @matrix = result[:matrix]
23
+ @name = result[:name]
12
24
  @background = [1, 1, 1, 1]
13
- @input = input
14
- @parser = parser
15
- return unless @input
16
- parser_init
17
- matrix_init
25
+ raise 'matrix not valid' unless valid?
18
26
  end
19
27
 
20
28
  def ==(other)
21
29
  @matrix == other.matrix && @background == other.background
22
30
  end
23
31
 
24
- def parser_init
25
- if @parser
26
- raise ArgumentError, 'Input cannot be parsed by specified parser' unless @parser.new(@input).can_parse?
27
- else
28
- @parser = Parser.subclasses.find{|parser_class| parser_class.new(@input).can_parse? }
29
- raise ArgumentError, 'No one parser can parse specified input' unless @parser
30
- end
31
- end
32
-
33
- def matrix_init
34
- parse_result = @parser.new(@input).parse
35
- raise ArgumentError, 'Used parser result has no `matrix` key' unless parse_result.has_key? :matrix
36
-
37
- configure_from_hash(parse_result)
38
- end
39
-
40
32
  def valid?
41
- @matrix.is_a?(Array) &&
33
+ @matrix.is_a?(Array) &&
42
34
  @matrix.all?(&:is_a?.(Array)) &&
43
- @matrix.all?(&:all?.(&:is_a?.(Numeric))) &&
44
- @matrix.all?{|pos| pos.size == 4}
35
+ @matrix.all?{|pos| pos.size == 4} &&
36
+ @matrix.all?(&:all?.(&:is_a?.(Numeric)))
45
37
  rescue
46
38
  false
47
39
  end
48
40
 
49
- def configure_from_hash(parse_result)
50
- parse_result.each{|key, value| send("#{key}=", value) if respond_to? "#{key}=" }
51
- end
52
-
53
- def matrix=(new_matrix)
54
- old_matrix, @matrix = matrix, new_matrix
55
- raise ArgumentError, 'Matrix has invalid format:' unless valid?
56
- rescue
57
- @matrix = old_matrix
58
- raise
41
+ def each_position
42
+ if block_given?
43
+ matrix.each{|pos| yield pos}
44
+ else
45
+ Enumerator.new(self, :each_position)
46
+ end
59
47
  end
60
-
61
- def length;
62
- @matrix.length;
48
+
49
+ def length
50
+ @matrix.length
63
51
  end
64
52
  alias_method :size, :length
65
53
 
66
54
  def to_s(with_name = true)
67
- matrix = @matrix.map(&:join.("\t")).join("\n")
55
+ matrix_str = each_position.map(&:join.("\t")).join("\n")
68
56
  if with_name && @name
69
- "#{@name}\n#{matrix}"
57
+ "#{@name}\n#{matrix_str}"
70
58
  else
71
- matrix
59
+ matrix_str
72
60
  end
73
61
  end
74
62
 
75
63
  def pretty_string(with_name = true)
76
64
  header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
77
- matrix_rows = @matrix.map do |position|
65
+ matrix_rows = each_position.map do |position|
78
66
  position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
79
67
  end
80
- matrix = matrix_rows.join("\n")
68
+ matrix_str = matrix_rows.join("\n")
81
69
  if with_name && @name
82
- @name + "\n" + header + matrix
70
+ @name + "\n" + header + matrix_str
83
71
  else
84
- header + matrix
72
+ header + matrix_str
85
73
  end
86
74
  end
87
75
 
@@ -133,17 +121,13 @@ module Bioinform
133
121
  self
134
122
  end
135
123
 
136
- def background_sum
137
- @background.inject(0.0, &:+)
138
- end
139
-
140
124
  def vocabulary_volume
141
- background_sum ** length
125
+ background.inject(&:+) ** length
142
126
  end
143
127
 
144
128
  def probability
145
- sum = background_sum
146
- @background.map{|element| element.to_f / sum}
129
+ sum = background.inject(0.0, &:+)
130
+ background.map{|element| element.to_f / sum}
147
131
  end
148
132
 
149
133
 
@@ -3,12 +3,12 @@ require 'bioinform/data_models/pm'
3
3
  module Bioinform
4
4
  class PWM < PM
5
5
  def score_mean
6
- matrix.inject(0.0){ |mean, position| mean + position.each_index.inject(0.0){|sum, letter| sum + position[letter] * probability[letter]} }
6
+ each_position.inject(0){ |mean, position| mean + position.each_index.inject(0){|sum, letter| sum + position[letter] * probability[letter]} }
7
7
  end
8
8
  def score_variance
9
- matrix.inject(0.0) do |variance, position|
10
- variance + position.each_index.inject(0.0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
11
- position.each_index.inject(0.0) { |sum,letter| sum + position[letter] * probability[letter] }**2
9
+ each_position.inject(0) do |variance, position|
10
+ variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
11
+ position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
12
12
  end
13
13
  end
14
14
 
@@ -20,9 +20,9 @@ module Bioinform
20
20
 
21
21
  def score(word)
22
22
  word = word.upcase
23
- raise ArgumentError unless word.length == length
24
- raise ArgumentError unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
- word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(0.0, &:+)
23
+ raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
24
+ raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
+ word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(&:+)
26
26
  end
27
27
  end
28
28
  end
@@ -1,3 +1,5 @@
1
+ require 'bioinform/parsers'
2
+
1
3
  require 'bioinform/data_models/pm'
2
4
  require 'bioinform/data_models/pcm'
3
5
  require 'bioinform/data_models/pwm'
@@ -0,0 +1,40 @@
1
+ require 'bioinform/support'
2
+
3
+ module Bioinform
4
+ class Parser
5
+ attr_reader :input, :matrix
6
+
7
+ def initialize(input)
8
+ @input = input
9
+ end
10
+
11
+ def parse
12
+ inp = input
13
+ transpose = inp.is_a?(Hash)
14
+ inp = ClassMethods.try_convert_to_array(inp)
15
+ inp.map!{|x| ClassMethods.try_convert_to_array(x)}
16
+ transpose = true if (not inp.all?{|x| x.size == 4}) && inp.size == 4 && inp.same_by?(&:size)
17
+ @matrix = transpose ? inp.transpose : inp
18
+ result
19
+ rescue
20
+ {}
21
+ end
22
+
23
+ def result(options={})
24
+ raise 'Parsing Error' unless matrix.is_a?(Array) && matrix.all?(&:is_a?.(Array)) && matrix.all?{|pos| pos.size == 4} && matrix.all?(&:all?.(&:is_a?.(Numeric)))
25
+ options.merge(matrix: @matrix)
26
+ end
27
+
28
+ class ClassMethods
29
+ def self.array_from_acgt_hash(hsh)
30
+ hsh = hsh.collect_hash{|key,value| [key.to_s.upcase, value] }
31
+ raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == %w[A C G T]
32
+ %w[A C G T].collect{|letter| hsh[letter] }
33
+ end
34
+ def self.try_convert_to_array(inp)
35
+ return inp if inp.is_a? Array
36
+ array_from_acgt_hash(inp)
37
+ end
38
+ end
39
+ end
40
+ end
@@ -1,6 +1,5 @@
1
1
  require 'bioinform/support'
2
- require 'bioinform/data_models/parser'
3
- require 'bioinform/data_models/parsers/string_parser'
2
+ require 'bioinform/parsers/string_parser'
4
3
 
5
4
  module Bioinform
6
5
  class StringFantomParser < StringParser
@@ -1,8 +1,7 @@
1
1
  require 'bioinform/support'
2
- require 'bioinform/data_models/parser'
3
- require 'bioinform/data_models/parsers/array_parser'
2
+ require 'bioinform/parsers/parser'
4
3
 
5
- module Bioinform
4
+ module Bioinform
6
5
  class StringParser < Parser
7
6
  def number_pat
8
7
  '[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?'
@@ -28,18 +27,19 @@ module Bioinform
28
27
  matrix.split("\n").map{|line| line.split.map(&:to_f)}
29
28
  end
30
29
 
31
- def parse_core
30
+ def parse
32
31
  case input
33
- when String
32
+ when String
34
33
  match = input.multiline_squish.match(pattern)
35
34
  raise ArgumentError unless match
36
35
  matrix = matrix_preprocess( match[:matrix] )
37
36
  raise ArgumentError unless matrix
38
- result = ArrayParser.new(matrix).parse
39
- match[:name] ? result.merge(name: match[:name]) : result
37
+ Parser.new(matrix).parse.merge(name: match[:name])
40
38
  else
41
39
  raise ArgumentError
42
40
  end
41
+ rescue
42
+ {}
43
43
  end
44
44
  end
45
45
  end
@@ -0,0 +1,3 @@
1
+ require 'bioinform/parsers/parser'
2
+ require 'bioinform/parsers/string_parser'
3
+ require 'bioinform/parsers/string_fantom_parser'
@@ -1,3 +1,5 @@
1
+ require 'bioinform/support/collect_hash'
2
+
1
3
  class Array
2
4
  def partial_sums(initial = 0.0)
3
5
  sums = initial
@@ -1,3 +1,3 @@
1
1
  module Bioinform
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
data/lib/bioinform.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require 'bioinform/version'
2
2
  require 'bioinform/support'
3
3
  require 'bioinform/data_models'
4
- require 'bioinform/data_models/parsers'
5
4
 
6
5
  module Bioinform
7
6
  # Your code goes here...
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/data_models/pcm'
3
+
4
+ module Bioinform
5
+ describe PCM do
6
+ describe '#count' do
7
+ it 'should be equal to sum of elements at position' do
8
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).count.should == 7
9
+ PCM.new([[1, 2.3, 3.2, 1],[4.4, 1.1, 1, 2]]).count.should == 7.5
10
+ end
11
+ end
12
+
13
+ describe '#to_pwm' do
14
+ it 'should return PWM' do
15
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should be_kind_of(PWM)
16
+ end
17
+ it 'should make transformation: el --> log( (el + p_i*pseudocount) / (p_i*(count + pseudocount)) )' do
18
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(1).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.47, 0.118,0.486,-0.47],[0.754,-0.47,-0.47,0.118]]
19
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(10).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.194, 0.057,0.258,-0.194],[0.425,-0.194,-0.194,0.057]]
20
+ end
21
+ it 'should use default pseudocount equal to log(count)' do
22
+ PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should == PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(Math.log(7))
23
+ end
24
+ end
25
+
26
+ end
27
+ end