bioinform 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a22c28659b2ef2205e84ba7dcd9778d392ec54b
4
- data.tar.gz: f7280fc53cd3dbdd195278b9df06d91367d5baa0
3
+ metadata.gz: 58c71a402e76e3edcd2105b21dd19a84b37b4c80
4
+ data.tar.gz: f300eba436a2b06c5b5feef4bb3fb95c0397b0a7
5
5
  SHA512:
6
- metadata.gz: 08999aa322ad4c0119a92b2e41087312afcc2747a01e2cf11a772a59e6d47d2eb30f3bb232a1dfc0400ffd363359a01b61235abafcd6e2561aba7742aba4ca56
7
- data.tar.gz: 9a2662d63282b3362340006dea59e2da0f95869f673d4b80a9aa29433e49d0f7f85ec1464123a280b7053a6f502adeea9325f7aef9a2d73ad7b31d3c0931b26a
6
+ metadata.gz: 1ab6af7c582ca7de30a7b08c73c5979bda3b727cfd810d52114bbd9b657311dc7d634d0f991fd375b2efb86b730badbc48cc41ea512d05197f46f49b673b0ee5
7
+ data.tar.gz: f2162a51dec39a7c500474ac4739c40869c00be3b9f837224e35f21137436ea9a827b0d99d5e82d617f0aff30fa30da3dbfed3066d4684fe4cc629f96f3e8443
@@ -72,4 +72,4 @@ module Bioinform
72
72
  end
73
73
 
74
74
  end
75
- end
75
+ end
@@ -53,4 +53,4 @@ module Bioinform
53
53
  parameters.to_s
54
54
  end
55
55
  end
56
- end
56
+ end
@@ -5,6 +5,8 @@ require_relative '../conversion_algorithms/pcm2pwm_converter'
5
5
 
6
6
  module Bioinform
7
7
  class PCM < PM
8
+ make_parameters :pseudocount
9
+
8
10
  def count
9
11
  matrix.first.inject(&:+)
10
12
  end
@@ -31,4 +33,4 @@ module Bioinform
31
33
  super + validation_errors
32
34
  end
33
35
  end
34
- end
36
+ end
@@ -123,6 +123,13 @@ module Bioinform
123
123
  end
124
124
  end
125
125
 
126
+ def consensus
127
+ each_position.map{|pos|
128
+ pos.each_with_index.max_by{|el, letter_index| el}
129
+ }.map{|el, letter_index| letter_index}.map{|letter_index| %w{A C G T}[letter_index] }.join
130
+ end
131
+
132
+
126
133
  def to_hash
127
134
  hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
128
135
  [ letter, @matrix.map{|pos| pos[letter_index]} ]
@@ -187,4 +194,4 @@ module Bioinform
187
194
  PWM.new(get_parameters.merge(matrix: matrix))
188
195
  end
189
196
  end
190
- end
197
+ end
@@ -3,9 +3,19 @@ require_relative '../data_models'
3
3
 
4
4
  module Bioinform
5
5
  class PPM < PM
6
+ make_parameters :effective_count, :pseudocount
6
7
  def to_ppm
7
8
  self
8
9
  end
10
+
11
+ def to_pcm
12
+ PCM.new(matrix.map{|pos| pos.map{|el| el * effective_count} }).tap{|pcm| pcm.name = name}
13
+ end
14
+
15
+ def to_pwm
16
+ pseudocount ? to_pcm.to_pwm(pseudocount) : to_pcm.to_pwm
17
+ end
18
+
9
19
  def self.valid_matrix?(matrix, options = {})
10
20
  precision = options[:precision] || 0.01
11
21
  super && matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) && pos.all?{|el| el >=0 } }
@@ -18,4 +28,4 @@ module Bioinform
18
28
  super + validation_errors
19
29
  end
20
30
  end
21
- end
31
+ end
@@ -52,5 +52,15 @@ module Bioinform
52
52
  def worst_suffix(i)
53
53
  @matrix[i...length].map(&:min).inject(0.0, &:+)
54
54
  end
55
+
56
+
57
+ def matrix_rounded(n)
58
+ matrix.map{|pos| pos.map{|x| x.round(n) } }
59
+ end
60
+ private :matrix_rounded
61
+
62
+ def round(n)
63
+ PWM.new(matrix_rounded(n)).tap{|pm| pm.name = name}
64
+ end
55
65
  end
56
- end
66
+ end
@@ -0,0 +1,40 @@
1
+ module Bioinform
2
+ class MatrixParser
3
+ def initialize(options = {})
4
+ @has_name = options.fetch(:has_name, true)
5
+ @name_pattern = options.fetch(:name_pattern, /^>?\s*(?<name>[^\t\r\n]+).*$/)
6
+ @has_header_row = options.fetch(:has_header_row, false)
7
+ @has_header_column = options.fetch(:has_header_column, false)
8
+ @nucleotides_in = options.fetch(:nucleotides_in, :columns)
9
+
10
+ raise ':nucleotides_in option should be either :rows or :columns' unless [:rows, :columns].include?(@nucleotides_in)
11
+ end
12
+
13
+ def parse!(input)
14
+ lines = input.lines
15
+ if @has_name
16
+ match = lines.shift.match(@name_pattern)
17
+ raise 'Name pattern doesn\'t match' unless match
18
+ name = match[:name]
19
+ end
20
+ lines.shift if @has_header_row
21
+ matrix = lines.map(&:strip).reject(&:empty?).map{|line| line.split }
22
+ matrix = matrix.map{|row| row.drop(1) } if @has_header_column
23
+ matrix = matrix.map{|row| row.map{|el| Float(el) } }
24
+
25
+ matrix = matrix.transpose if @nucleotides_in == :rows
26
+ # raise 'Matrix not valid' unless ! matrix.empty? && matrix.all?{|pos| pos.size == 4 }
27
+ {matrix: matrix, name: name}
28
+ end
29
+
30
+ def parse(input)
31
+ parse!(input) rescue nil
32
+ end
33
+
34
+ def valid?(input)
35
+ result = parse!(input)
36
+ rescue
37
+ false
38
+ end
39
+ end
40
+ end
@@ -4,4 +4,6 @@ require_relative 'parsers/yaml_parser'
4
4
  require_relative 'parsers/string_parser'
5
5
  require_relative 'parsers/string_fantom_parser'
6
6
  require_relative 'parsers/splittable_parser'
7
- require_relative 'parsers/jaspar_parser'
7
+ require_relative 'parsers/jaspar_parser'
8
+
9
+ require_relative 'parsers/matrix_parser'
@@ -1,3 +1,3 @@
1
1
  module Bioinform
2
- VERSION = "0.1.15"
2
+ VERSION = "0.1.16"
3
3
  end
data/lib/bioinform.rb CHANGED
@@ -6,5 +6,32 @@ require_relative 'bioinform/data_models'
6
6
  require_relative 'bioinform/cli'
7
7
 
8
8
  module Bioinform
9
- # Your code goes here...
9
+ class Error < StandardError
10
+ end
11
+
12
+ def self.get_pwm(data_model, matrix, background, pseudocount, effective_count)
13
+ pm = Bioinform.const_get(data_model).new(matrix)
14
+ pm.set_parameters(background: background)
15
+ if pseudocount && ! pseudocount.blank? && [:PCM,:PPM].include?(data_model.to_sym)
16
+ pm.set_parameters(pseudocount: pseudocount)
17
+ end
18
+ if effective_count && [:PPM].include?(data_model.to_sym)
19
+ pm.set_parameters(effective_count: effective_count)
20
+ end
21
+ pm.to_pwm
22
+ rescue => e
23
+ raise "PWM creation failed (#{e})"
24
+ end
25
+
26
+ def self.get_pcm(data_model, matrix, effective_count)
27
+ pm = Bioinform.const_get(data_model).new(matrix)
28
+ if effective_count && [:PPM].include?(data_model.to_sym)
29
+ pm.set_parameters(effective_count: effective_count)
30
+ end
31
+ pm.to_pcm
32
+ end
33
+
34
+ def self.get_ppm(data_model, matrix)
35
+ Bioinform.const_get(data_model).new(matrix).to_ppm
36
+ end
10
37
  end
@@ -356,5 +356,12 @@ module Bioinform
356
356
  end
357
357
  end
358
358
  end
359
+
360
+ describe '#consensus' do
361
+ let(:pm) { PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] ) }
362
+ it 'is a string of nucleotides of maximal weights' do
363
+ pm.consensus.should == 'GAC'
364
+ end
365
+ end
359
366
  end
360
367
  end
@@ -3,6 +3,52 @@ require_relative '../../lib/bioinform/data_models/pcm'
3
3
 
4
4
  module Bioinform
5
5
  describe PPM do
6
+ describe '#to_ppm' do
7
+ let (:ppm_motif) { Fabricate(:ppm) }
8
+ it 'returns self' do
9
+ ppm_motif.to_ppm.should eq ppm_motif
10
+ end
11
+ end
6
12
 
13
+ describe '#to_pcm' do
14
+ let (:ppm_motif) { Fabricate(:ppm_by_pcm) }
15
+ let (:pcm_motif) { Fabricate(:pcm) }
16
+
17
+ it 'returns pcm using given effective_count' do
18
+ ppm = ppm_motif.tap{|ppm| ppm.effective_count = pcm_motif.count }
19
+ ppm.to_pcm.should == pcm_motif
20
+ end
21
+ it 'without given count it raises an error' do
22
+ expect{ ppm_motif.to_pcm }.to raise_error
23
+ end
24
+ it 'returns pcm with the same name' do
25
+ ppm = ppm_motif.tap{|ppm| ppm.effective_count = pcm_motif.count }
26
+ ppm.to_pcm.name.should == ppm_motif.name
27
+ end
28
+ end
29
+
30
+ describe '#to_pwm' do
31
+ let (:ppm_motif_without_count) { Fabricate(:ppm_by_pcm) }
32
+ let (:ppm_motif) { ppm_motif_without_count.tap{|ppm| ppm.effective_count = 137 } }
33
+ let (:ppm_motif_with_log_pseudocount) { ppm_motif.tap{|ppm| ppm.effective_count = 137 } }
34
+ let (:pcm_motif) { ppm_motif.to_pcm }
35
+
36
+ it 'returns pwm the same as pwm of according pcm' do
37
+ ppm_motif.to_pwm.should == pcm_motif.to_pwm
38
+ end
39
+ it 'uses pseudocount to transform according pcm to pwm' do
40
+ ppm = ppm_motif.tap{|ppm| ppm.pseudocount = 10}
41
+ ppm_motif.to_pwm.should == pcm_motif.to_pwm(10)
42
+ end
43
+ it 'by default uses pseudocount equal to log of count' do
44
+ ppm_motif.to_pwm.should == ppm_motif.to_pcm.to_pwm(Math.log(137))
45
+ end
46
+ it 'without given count it raises an error' do
47
+ expect{ ppm_motif_without_count.to_pwm }.to raise_error
48
+ end
49
+ it 'returns pwm with the same name' do
50
+ ppm_motif.to_pwm.name.should == ppm_motif.name
51
+ end
52
+ end
7
53
  end
8
- end
54
+ end
@@ -79,5 +79,18 @@ module Bioinform
79
79
  end
80
80
  end
81
81
 
82
+ describe '#round' do
83
+ let(:matrix) { [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] }
84
+ let(:pm) { PWM.new( matrix ).tap{|pm| pm.name = 'motif name'} }
85
+ it 'gives model with matrix elements rounded' do
86
+ pm.round(1).matrix.should == [[1.3, 2.0, 4.9, 3.2], [7.1, 6.5, 3.3, 4.6], [-1.0, -0.5, -1.5, -1.0]]
87
+ end
88
+ it 'gives PWM model' do
89
+ pm.round(1).should be_kind_of(PWM)
90
+ end
91
+ it 'gives model with the same name' do
92
+ pm.round(1).name.should == 'motif name'
93
+ end
94
+ end
82
95
  end
83
- end
96
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bioinform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.15
4
+ version: 0.1.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-18 00:00:00.000000000 Z
11
+ date: 2014-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: docopt
@@ -68,6 +68,7 @@ files:
68
68
  - lib/bioinform/formatters/transfac_formatter.rb
69
69
  - lib/bioinform/parsers.rb
70
70
  - lib/bioinform/parsers/jaspar_parser.rb
71
+ - lib/bioinform/parsers/matrix_parser.rb
71
72
  - lib/bioinform/parsers/parser.rb
72
73
  - lib/bioinform/parsers/splittable_parser.rb
73
74
  - lib/bioinform/parsers/string_fantom_parser.rb
@@ -180,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
181
  version: '0'
181
182
  requirements: []
182
183
  rubyforge_project:
183
- rubygems_version: 2.2.1
184
+ rubygems_version: 2.2.2
184
185
  signing_key:
185
186
  specification_version: 4
186
187
  summary: Classes for work with different input formats of positional matrices and