bioinform 0.1.15 → 0.1.16

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a22c28659b2ef2205e84ba7dcd9778d392ec54b
4
- data.tar.gz: f7280fc53cd3dbdd195278b9df06d91367d5baa0
3
+ metadata.gz: 58c71a402e76e3edcd2105b21dd19a84b37b4c80
4
+ data.tar.gz: f300eba436a2b06c5b5feef4bb3fb95c0397b0a7
5
5
  SHA512:
6
- metadata.gz: 08999aa322ad4c0119a92b2e41087312afcc2747a01e2cf11a772a59e6d47d2eb30f3bb232a1dfc0400ffd363359a01b61235abafcd6e2561aba7742aba4ca56
7
- data.tar.gz: 9a2662d63282b3362340006dea59e2da0f95869f673d4b80a9aa29433e49d0f7f85ec1464123a280b7053a6f502adeea9325f7aef9a2d73ad7b31d3c0931b26a
6
+ metadata.gz: 1ab6af7c582ca7de30a7b08c73c5979bda3b727cfd810d52114bbd9b657311dc7d634d0f991fd375b2efb86b730badbc48cc41ea512d05197f46f49b673b0ee5
7
+ data.tar.gz: f2162a51dec39a7c500474ac4739c40869c00be3b9f837224e35f21137436ea9a827b0d99d5e82d617f0aff30fa30da3dbfed3066d4684fe4cc629f96f3e8443
@@ -72,4 +72,4 @@ module Bioinform
72
72
  end
73
73
 
74
74
  end
75
- end
75
+ end
@@ -53,4 +53,4 @@ module Bioinform
53
53
  parameters.to_s
54
54
  end
55
55
  end
56
- end
56
+ end
@@ -5,6 +5,8 @@ require_relative '../conversion_algorithms/pcm2pwm_converter'
5
5
 
6
6
  module Bioinform
7
7
  class PCM < PM
8
+ make_parameters :pseudocount
9
+
8
10
  def count
9
11
  matrix.first.inject(&:+)
10
12
  end
@@ -31,4 +33,4 @@ module Bioinform
31
33
  super + validation_errors
32
34
  end
33
35
  end
34
- end
36
+ end
@@ -123,6 +123,13 @@ module Bioinform
123
123
  end
124
124
  end
125
125
 
126
+ def consensus
127
+ each_position.map{|pos|
128
+ pos.each_with_index.max_by{|el, letter_index| el}
129
+ }.map{|el, letter_index| letter_index}.map{|letter_index| %w{A C G T}[letter_index] }.join
130
+ end
131
+
132
+
126
133
  def to_hash
127
134
  hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
128
135
  [ letter, @matrix.map{|pos| pos[letter_index]} ]
@@ -187,4 +194,4 @@ module Bioinform
187
194
  PWM.new(get_parameters.merge(matrix: matrix))
188
195
  end
189
196
  end
190
- end
197
+ end
@@ -3,9 +3,19 @@ require_relative '../data_models'
3
3
 
4
4
  module Bioinform
5
5
  class PPM < PM
6
+ make_parameters :effective_count, :pseudocount
6
7
  def to_ppm
7
8
  self
8
9
  end
10
+
11
+ def to_pcm
12
+ PCM.new(matrix.map{|pos| pos.map{|el| el * effective_count} }).tap{|pcm| pcm.name = name}
13
+ end
14
+
15
+ def to_pwm
16
+ pseudocount ? to_pcm.to_pwm(pseudocount) : to_pcm.to_pwm
17
+ end
18
+
9
19
  def self.valid_matrix?(matrix, options = {})
10
20
  precision = options[:precision] || 0.01
11
21
  super && matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) && pos.all?{|el| el >=0 } }
@@ -18,4 +28,4 @@ module Bioinform
18
28
  super + validation_errors
19
29
  end
20
30
  end
21
- end
31
+ end
@@ -52,5 +52,15 @@ module Bioinform
52
52
  def worst_suffix(i)
53
53
  @matrix[i...length].map(&:min).inject(0.0, &:+)
54
54
  end
55
+
56
+
57
+ def matrix_rounded(n)
58
+ matrix.map{|pos| pos.map{|x| x.round(n) } }
59
+ end
60
+ private :matrix_rounded
61
+
62
+ def round(n)
63
+ PWM.new(matrix_rounded(n)).tap{|pm| pm.name = name}
64
+ end
55
65
  end
56
- end
66
+ end
@@ -0,0 +1,40 @@
1
+ module Bioinform
2
+ class MatrixParser
3
+ def initialize(options = {})
4
+ @has_name = options.fetch(:has_name, true)
5
+ @name_pattern = options.fetch(:name_pattern, /^>?\s*(?<name>[^\t\r\n]+).*$/)
6
+ @has_header_row = options.fetch(:has_header_row, false)
7
+ @has_header_column = options.fetch(:has_header_column, false)
8
+ @nucleotides_in = options.fetch(:nucleotides_in, :columns)
9
+
10
+ raise ':nucleotides_in option should be either :rows or :columns' unless [:rows, :columns].include?(@nucleotides_in)
11
+ end
12
+
13
+ def parse!(input)
14
+ lines = input.lines
15
+ if @has_name
16
+ match = lines.shift.match(@name_pattern)
17
+ raise 'Name pattern doesn\'t match' unless match
18
+ name = match[:name]
19
+ end
20
+ lines.shift if @has_header_row
21
+ matrix = lines.map(&:strip).reject(&:empty?).map{|line| line.split }
22
+ matrix = matrix.map{|row| row.drop(1) } if @has_header_column
23
+ matrix = matrix.map{|row| row.map{|el| Float(el) } }
24
+
25
+ matrix = matrix.transpose if @nucleotides_in == :rows
26
+ # raise 'Matrix not valid' unless ! matrix.empty? && matrix.all?{|pos| pos.size == 4 }
27
+ {matrix: matrix, name: name}
28
+ end
29
+
30
+ def parse(input)
31
+ parse!(input) rescue nil
32
+ end
33
+
34
+ def valid?(input)
35
+ result = parse!(input)
36
+ rescue
37
+ false
38
+ end
39
+ end
40
+ end
@@ -4,4 +4,6 @@ require_relative 'parsers/yaml_parser'
4
4
  require_relative 'parsers/string_parser'
5
5
  require_relative 'parsers/string_fantom_parser'
6
6
  require_relative 'parsers/splittable_parser'
7
- require_relative 'parsers/jaspar_parser'
7
+ require_relative 'parsers/jaspar_parser'
8
+
9
+ require_relative 'parsers/matrix_parser'
@@ -1,3 +1,3 @@
1
1
  module Bioinform
2
- VERSION = "0.1.15"
2
+ VERSION = "0.1.16"
3
3
  end
data/lib/bioinform.rb CHANGED
@@ -6,5 +6,32 @@ require_relative 'bioinform/data_models'
6
6
  require_relative 'bioinform/cli'
7
7
 
8
8
  module Bioinform
9
- # Your code goes here...
9
+ class Error < StandardError
10
+ end
11
+
12
+ def self.get_pwm(data_model, matrix, background, pseudocount, effective_count)
13
+ pm = Bioinform.const_get(data_model).new(matrix)
14
+ pm.set_parameters(background: background)
15
+ if pseudocount && ! pseudocount.blank? && [:PCM,:PPM].include?(data_model.to_sym)
16
+ pm.set_parameters(pseudocount: pseudocount)
17
+ end
18
+ if effective_count && [:PPM].include?(data_model.to_sym)
19
+ pm.set_parameters(effective_count: effective_count)
20
+ end
21
+ pm.to_pwm
22
+ rescue => e
23
+ raise "PWM creation failed (#{e})"
24
+ end
25
+
26
+ def self.get_pcm(data_model, matrix, effective_count)
27
+ pm = Bioinform.const_get(data_model).new(matrix)
28
+ if effective_count && [:PPM].include?(data_model.to_sym)
29
+ pm.set_parameters(effective_count: effective_count)
30
+ end
31
+ pm.to_pcm
32
+ end
33
+
34
+ def self.get_ppm(data_model, matrix)
35
+ Bioinform.const_get(data_model).new(matrix).to_ppm
36
+ end
10
37
  end
@@ -356,5 +356,12 @@ module Bioinform
356
356
  end
357
357
  end
358
358
  end
359
+
360
+ describe '#consensus' do
361
+ let(:pm) { PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] ) }
362
+ it 'is a string of nucleotides of maximal weights' do
363
+ pm.consensus.should == 'GAC'
364
+ end
365
+ end
359
366
  end
360
367
  end
@@ -3,6 +3,52 @@ require_relative '../../lib/bioinform/data_models/pcm'
3
3
 
4
4
  module Bioinform
5
5
  describe PPM do
6
+ describe '#to_ppm' do
7
+ let (:ppm_motif) { Fabricate(:ppm) }
8
+ it 'returns self' do
9
+ ppm_motif.to_ppm.should eq ppm_motif
10
+ end
11
+ end
6
12
 
13
+ describe '#to_pcm' do
14
+ let (:ppm_motif) { Fabricate(:ppm_by_pcm) }
15
+ let (:pcm_motif) { Fabricate(:pcm) }
16
+
17
+ it 'returns pcm using given effective_count' do
18
+ ppm = ppm_motif.tap{|ppm| ppm.effective_count = pcm_motif.count }
19
+ ppm.to_pcm.should == pcm_motif
20
+ end
21
+ it 'without given count it raises an error' do
22
+ expect{ ppm_motif.to_pcm }.to raise_error
23
+ end
24
+ it 'returns pcm with the same name' do
25
+ ppm = ppm_motif.tap{|ppm| ppm.effective_count = pcm_motif.count }
26
+ ppm.to_pcm.name.should == ppm_motif.name
27
+ end
28
+ end
29
+
30
+ describe '#to_pwm' do
31
+ let (:ppm_motif_without_count) { Fabricate(:ppm_by_pcm) }
32
+ let (:ppm_motif) { ppm_motif_without_count.tap{|ppm| ppm.effective_count = 137 } }
33
+ let (:ppm_motif_with_log_pseudocount) { ppm_motif.tap{|ppm| ppm.effective_count = 137 } }
34
+ let (:pcm_motif) { ppm_motif.to_pcm }
35
+
36
+ it 'returns pwm the same as pwm of according pcm' do
37
+ ppm_motif.to_pwm.should == pcm_motif.to_pwm
38
+ end
39
+ it 'uses pseudocount to transform according pcm to pwm' do
40
+ ppm = ppm_motif.tap{|ppm| ppm.pseudocount = 10}
41
+ ppm_motif.to_pwm.should == pcm_motif.to_pwm(10)
42
+ end
43
+ it 'by default uses pseudocount equal to log of count' do
44
+ ppm_motif.to_pwm.should == ppm_motif.to_pcm.to_pwm(Math.log(137))
45
+ end
46
+ it 'without given count it raises an error' do
47
+ expect{ ppm_motif_without_count.to_pwm }.to raise_error
48
+ end
49
+ it 'returns pwm with the same name' do
50
+ ppm_motif.to_pwm.name.should == ppm_motif.name
51
+ end
52
+ end
7
53
  end
8
- end
54
+ end
@@ -79,5 +79,18 @@ module Bioinform
79
79
  end
80
80
  end
81
81
 
82
+ describe '#round' do
83
+ let(:matrix) { [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] }
84
+ let(:pm) { PWM.new( matrix ).tap{|pm| pm.name = 'motif name'} }
85
+ it 'gives model with matrix elements rounded' do
86
+ pm.round(1).matrix.should == [[1.3, 2.0, 4.9, 3.2], [7.1, 6.5, 3.3, 4.6], [-1.0, -0.5, -1.5, -1.0]]
87
+ end
88
+ it 'gives PWM model' do
89
+ pm.round(1).should be_kind_of(PWM)
90
+ end
91
+ it 'gives model with the same name' do
92
+ pm.round(1).name.should == 'motif name'
93
+ end
94
+ end
82
95
  end
83
- end
96
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bioinform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.15
4
+ version: 0.1.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-18 00:00:00.000000000 Z
11
+ date: 2014-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: docopt
@@ -68,6 +68,7 @@ files:
68
68
  - lib/bioinform/formatters/transfac_formatter.rb
69
69
  - lib/bioinform/parsers.rb
70
70
  - lib/bioinform/parsers/jaspar_parser.rb
71
+ - lib/bioinform/parsers/matrix_parser.rb
71
72
  - lib/bioinform/parsers/parser.rb
72
73
  - lib/bioinform/parsers/splittable_parser.rb
73
74
  - lib/bioinform/parsers/string_fantom_parser.rb
@@ -180,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
181
  version: '0'
181
182
  requirements: []
182
183
  rubyforge_project:
183
- rubygems_version: 2.2.1
184
+ rubygems_version: 2.2.2
184
185
  signing_key:
185
186
  specification_version: 4
186
187
  summary: Classes for work with different input formats of positional matrices and