bioinform 0.1.15 → 0.1.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bioinform/data_models/collection.rb +1 -1
- data/lib/bioinform/data_models/motif.rb +1 -1
- data/lib/bioinform/data_models/pcm.rb +3 -1
- data/lib/bioinform/data_models/pm.rb +8 -1
- data/lib/bioinform/data_models/ppm.rb +11 -1
- data/lib/bioinform/data_models/pwm.rb +11 -1
- data/lib/bioinform/parsers/matrix_parser.rb +40 -0
- data/lib/bioinform/parsers.rb +3 -1
- data/lib/bioinform/version.rb +1 -1
- data/lib/bioinform.rb +28 -1
- data/spec/data_models/pm_spec.rb +7 -0
- data/spec/data_models/ppm_spec.rb +47 -1
- data/spec/data_models/pwm_spec.rb +14 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 58c71a402e76e3edcd2105b21dd19a84b37b4c80
|
4
|
+
data.tar.gz: f300eba436a2b06c5b5feef4bb3fb95c0397b0a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ab6af7c582ca7de30a7b08c73c5979bda3b727cfd810d52114bbd9b657311dc7d634d0f991fd375b2efb86b730badbc48cc41ea512d05197f46f49b673b0ee5
|
7
|
+
data.tar.gz: f2162a51dec39a7c500474ac4739c40869c00be3b9f837224e35f21137436ea9a827b0d99d5e82d617f0aff30fa30da3dbfed3066d4684fe4cc629f96f3e8443
|
@@ -5,6 +5,8 @@ require_relative '../conversion_algorithms/pcm2pwm_converter'
|
|
5
5
|
|
6
6
|
module Bioinform
|
7
7
|
class PCM < PM
|
8
|
+
make_parameters :pseudocount
|
9
|
+
|
8
10
|
def count
|
9
11
|
matrix.first.inject(&:+)
|
10
12
|
end
|
@@ -31,4 +33,4 @@ module Bioinform
|
|
31
33
|
super + validation_errors
|
32
34
|
end
|
33
35
|
end
|
34
|
-
end
|
36
|
+
end
|
@@ -123,6 +123,13 @@ module Bioinform
|
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|
126
|
+
def consensus
|
127
|
+
each_position.map{|pos|
|
128
|
+
pos.each_with_index.max_by{|el, letter_index| el}
|
129
|
+
}.map{|el, letter_index| letter_index}.map{|letter_index| %w{A C G T}[letter_index] }.join
|
130
|
+
end
|
131
|
+
|
132
|
+
|
126
133
|
def to_hash
|
127
134
|
hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
|
128
135
|
[ letter, @matrix.map{|pos| pos[letter_index]} ]
|
@@ -187,4 +194,4 @@ module Bioinform
|
|
187
194
|
PWM.new(get_parameters.merge(matrix: matrix))
|
188
195
|
end
|
189
196
|
end
|
190
|
-
end
|
197
|
+
end
|
@@ -3,9 +3,19 @@ require_relative '../data_models'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
class PPM < PM
|
6
|
+
make_parameters :effective_count, :pseudocount
|
6
7
|
def to_ppm
|
7
8
|
self
|
8
9
|
end
|
10
|
+
|
11
|
+
def to_pcm
|
12
|
+
PCM.new(matrix.map{|pos| pos.map{|el| el * effective_count} }).tap{|pcm| pcm.name = name}
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_pwm
|
16
|
+
pseudocount ? to_pcm.to_pwm(pseudocount) : to_pcm.to_pwm
|
17
|
+
end
|
18
|
+
|
9
19
|
def self.valid_matrix?(matrix, options = {})
|
10
20
|
precision = options[:precision] || 0.01
|
11
21
|
super && matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) && pos.all?{|el| el >=0 } }
|
@@ -18,4 +28,4 @@ module Bioinform
|
|
18
28
|
super + validation_errors
|
19
29
|
end
|
20
30
|
end
|
21
|
-
end
|
31
|
+
end
|
@@ -52,5 +52,15 @@ module Bioinform
|
|
52
52
|
def worst_suffix(i)
|
53
53
|
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
54
54
|
end
|
55
|
+
|
56
|
+
|
57
|
+
def matrix_rounded(n)
|
58
|
+
matrix.map{|pos| pos.map{|x| x.round(n) } }
|
59
|
+
end
|
60
|
+
private :matrix_rounded
|
61
|
+
|
62
|
+
def round(n)
|
63
|
+
PWM.new(matrix_rounded(n)).tap{|pm| pm.name = name}
|
64
|
+
end
|
55
65
|
end
|
56
|
-
end
|
66
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Bioinform
|
2
|
+
class MatrixParser
|
3
|
+
def initialize(options = {})
|
4
|
+
@has_name = options.fetch(:has_name, true)
|
5
|
+
@name_pattern = options.fetch(:name_pattern, /^>?\s*(?<name>[^\t\r\n]+).*$/)
|
6
|
+
@has_header_row = options.fetch(:has_header_row, false)
|
7
|
+
@has_header_column = options.fetch(:has_header_column, false)
|
8
|
+
@nucleotides_in = options.fetch(:nucleotides_in, :columns)
|
9
|
+
|
10
|
+
raise ':nucleotides_in option should be either :rows or :columns' unless [:rows, :columns].include?(@nucleotides_in)
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse!(input)
|
14
|
+
lines = input.lines
|
15
|
+
if @has_name
|
16
|
+
match = lines.shift.match(@name_pattern)
|
17
|
+
raise 'Name pattern doesn\'t match' unless match
|
18
|
+
name = match[:name]
|
19
|
+
end
|
20
|
+
lines.shift if @has_header_row
|
21
|
+
matrix = lines.map(&:strip).reject(&:empty?).map{|line| line.split }
|
22
|
+
matrix = matrix.map{|row| row.drop(1) } if @has_header_column
|
23
|
+
matrix = matrix.map{|row| row.map{|el| Float(el) } }
|
24
|
+
|
25
|
+
matrix = matrix.transpose if @nucleotides_in == :rows
|
26
|
+
# raise 'Matrix not valid' unless ! matrix.empty? && matrix.all?{|pos| pos.size == 4 }
|
27
|
+
{matrix: matrix, name: name}
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse(input)
|
31
|
+
parse!(input) rescue nil
|
32
|
+
end
|
33
|
+
|
34
|
+
def valid?(input)
|
35
|
+
result = parse!(input)
|
36
|
+
rescue
|
37
|
+
false
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/bioinform/parsers.rb
CHANGED
@@ -4,4 +4,6 @@ require_relative 'parsers/yaml_parser'
|
|
4
4
|
require_relative 'parsers/string_parser'
|
5
5
|
require_relative 'parsers/string_fantom_parser'
|
6
6
|
require_relative 'parsers/splittable_parser'
|
7
|
-
require_relative 'parsers/jaspar_parser'
|
7
|
+
require_relative 'parsers/jaspar_parser'
|
8
|
+
|
9
|
+
require_relative 'parsers/matrix_parser'
|
data/lib/bioinform/version.rb
CHANGED
data/lib/bioinform.rb
CHANGED
@@ -6,5 +6,32 @@ require_relative 'bioinform/data_models'
|
|
6
6
|
require_relative 'bioinform/cli'
|
7
7
|
|
8
8
|
module Bioinform
|
9
|
-
|
9
|
+
class Error < StandardError
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.get_pwm(data_model, matrix, background, pseudocount, effective_count)
|
13
|
+
pm = Bioinform.const_get(data_model).new(matrix)
|
14
|
+
pm.set_parameters(background: background)
|
15
|
+
if pseudocount && ! pseudocount.blank? && [:PCM,:PPM].include?(data_model.to_sym)
|
16
|
+
pm.set_parameters(pseudocount: pseudocount)
|
17
|
+
end
|
18
|
+
if effective_count && [:PPM].include?(data_model.to_sym)
|
19
|
+
pm.set_parameters(effective_count: effective_count)
|
20
|
+
end
|
21
|
+
pm.to_pwm
|
22
|
+
rescue => e
|
23
|
+
raise "PWM creation failed (#{e})"
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.get_pcm(data_model, matrix, effective_count)
|
27
|
+
pm = Bioinform.const_get(data_model).new(matrix)
|
28
|
+
if effective_count && [:PPM].include?(data_model.to_sym)
|
29
|
+
pm.set_parameters(effective_count: effective_count)
|
30
|
+
end
|
31
|
+
pm.to_pcm
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.get_ppm(data_model, matrix)
|
35
|
+
Bioinform.const_get(data_model).new(matrix).to_ppm
|
36
|
+
end
|
10
37
|
end
|
data/spec/data_models/pm_spec.rb
CHANGED
@@ -356,5 +356,12 @@ module Bioinform
|
|
356
356
|
end
|
357
357
|
end
|
358
358
|
end
|
359
|
+
|
360
|
+
describe '#consensus' do
|
361
|
+
let(:pm) { PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] ) }
|
362
|
+
it 'is a string of nucleotides of maximal weights' do
|
363
|
+
pm.consensus.should == 'GAC'
|
364
|
+
end
|
365
|
+
end
|
359
366
|
end
|
360
367
|
end
|
@@ -3,6 +3,52 @@ require_relative '../../lib/bioinform/data_models/pcm'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe PPM do
|
6
|
+
describe '#to_ppm' do
|
7
|
+
let (:ppm_motif) { Fabricate(:ppm) }
|
8
|
+
it 'returns self' do
|
9
|
+
ppm_motif.to_ppm.should eq ppm_motif
|
10
|
+
end
|
11
|
+
end
|
6
12
|
|
13
|
+
describe '#to_pcm' do
|
14
|
+
let (:ppm_motif) { Fabricate(:ppm_by_pcm) }
|
15
|
+
let (:pcm_motif) { Fabricate(:pcm) }
|
16
|
+
|
17
|
+
it 'returns pcm using given effective_count' do
|
18
|
+
ppm = ppm_motif.tap{|ppm| ppm.effective_count = pcm_motif.count }
|
19
|
+
ppm.to_pcm.should == pcm_motif
|
20
|
+
end
|
21
|
+
it 'without given count it raises an error' do
|
22
|
+
expect{ ppm_motif.to_pcm }.to raise_error
|
23
|
+
end
|
24
|
+
it 'returns pcm with the same name' do
|
25
|
+
ppm = ppm_motif.tap{|ppm| ppm.effective_count = pcm_motif.count }
|
26
|
+
ppm.to_pcm.name.should == ppm_motif.name
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#to_pwm' do
|
31
|
+
let (:ppm_motif_without_count) { Fabricate(:ppm_by_pcm) }
|
32
|
+
let (:ppm_motif) { ppm_motif_without_count.tap{|ppm| ppm.effective_count = 137 } }
|
33
|
+
let (:ppm_motif_with_log_pseudocount) { ppm_motif.tap{|ppm| ppm.effective_count = 137 } }
|
34
|
+
let (:pcm_motif) { ppm_motif.to_pcm }
|
35
|
+
|
36
|
+
it 'returns pwm the same as pwm of according pcm' do
|
37
|
+
ppm_motif.to_pwm.should == pcm_motif.to_pwm
|
38
|
+
end
|
39
|
+
it 'uses pseudocount to transform according pcm to pwm' do
|
40
|
+
ppm = ppm_motif.tap{|ppm| ppm.pseudocount = 10}
|
41
|
+
ppm_motif.to_pwm.should == pcm_motif.to_pwm(10)
|
42
|
+
end
|
43
|
+
it 'by default uses pseudocount equal to log of count' do
|
44
|
+
ppm_motif.to_pwm.should == ppm_motif.to_pcm.to_pwm(Math.log(137))
|
45
|
+
end
|
46
|
+
it 'without given count it raises an error' do
|
47
|
+
expect{ ppm_motif_without_count.to_pwm }.to raise_error
|
48
|
+
end
|
49
|
+
it 'returns pwm with the same name' do
|
50
|
+
ppm_motif.to_pwm.name.should == ppm_motif.name
|
51
|
+
end
|
52
|
+
end
|
7
53
|
end
|
8
|
-
end
|
54
|
+
end
|
@@ -79,5 +79,18 @@ module Bioinform
|
|
79
79
|
end
|
80
80
|
end
|
81
81
|
|
82
|
+
describe '#round' do
|
83
|
+
let(:matrix) { [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] }
|
84
|
+
let(:pm) { PWM.new( matrix ).tap{|pm| pm.name = 'motif name'} }
|
85
|
+
it 'gives model with matrix elements rounded' do
|
86
|
+
pm.round(1).matrix.should == [[1.3, 2.0, 4.9, 3.2], [7.1, 6.5, 3.3, 4.6], [-1.0, -0.5, -1.5, -1.0]]
|
87
|
+
end
|
88
|
+
it 'gives PWM model' do
|
89
|
+
pm.round(1).should be_kind_of(PWM)
|
90
|
+
end
|
91
|
+
it 'gives model with the same name' do
|
92
|
+
pm.round(1).name.should == 'motif name'
|
93
|
+
end
|
94
|
+
end
|
82
95
|
end
|
83
|
-
end
|
96
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Vorontsov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: docopt
|
@@ -68,6 +68,7 @@ files:
|
|
68
68
|
- lib/bioinform/formatters/transfac_formatter.rb
|
69
69
|
- lib/bioinform/parsers.rb
|
70
70
|
- lib/bioinform/parsers/jaspar_parser.rb
|
71
|
+
- lib/bioinform/parsers/matrix_parser.rb
|
71
72
|
- lib/bioinform/parsers/parser.rb
|
72
73
|
- lib/bioinform/parsers/splittable_parser.rb
|
73
74
|
- lib/bioinform/parsers/string_fantom_parser.rb
|
@@ -180,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
180
181
|
version: '0'
|
181
182
|
requirements: []
|
182
183
|
rubyforge_project:
|
183
|
-
rubygems_version: 2.2.
|
184
|
+
rubygems_version: 2.2.2
|
184
185
|
signing_key:
|
185
186
|
specification_version: 4
|
186
187
|
summary: Classes for work with different input formats of positional matrices and
|