bioinform 0.1.15 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bioinform/data_models/collection.rb +1 -1
- data/lib/bioinform/data_models/motif.rb +1 -1
- data/lib/bioinform/data_models/pcm.rb +3 -1
- data/lib/bioinform/data_models/pm.rb +8 -1
- data/lib/bioinform/data_models/ppm.rb +11 -1
- data/lib/bioinform/data_models/pwm.rb +11 -1
- data/lib/bioinform/parsers/matrix_parser.rb +40 -0
- data/lib/bioinform/parsers.rb +3 -1
- data/lib/bioinform/version.rb +1 -1
- data/lib/bioinform.rb +28 -1
- data/spec/data_models/pm_spec.rb +7 -0
- data/spec/data_models/ppm_spec.rb +47 -1
- data/spec/data_models/pwm_spec.rb +14 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 58c71a402e76e3edcd2105b21dd19a84b37b4c80
|
4
|
+
data.tar.gz: f300eba436a2b06c5b5feef4bb3fb95c0397b0a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ab6af7c582ca7de30a7b08c73c5979bda3b727cfd810d52114bbd9b657311dc7d634d0f991fd375b2efb86b730badbc48cc41ea512d05197f46f49b673b0ee5
|
7
|
+
data.tar.gz: f2162a51dec39a7c500474ac4739c40869c00be3b9f837224e35f21137436ea9a827b0d99d5e82d617f0aff30fa30da3dbfed3066d4684fe4cc629f96f3e8443
|
@@ -5,6 +5,8 @@ require_relative '../conversion_algorithms/pcm2pwm_converter'
|
|
5
5
|
|
6
6
|
module Bioinform
|
7
7
|
class PCM < PM
|
8
|
+
make_parameters :pseudocount
|
9
|
+
|
8
10
|
def count
|
9
11
|
matrix.first.inject(&:+)
|
10
12
|
end
|
@@ -31,4 +33,4 @@ module Bioinform
|
|
31
33
|
super + validation_errors
|
32
34
|
end
|
33
35
|
end
|
34
|
-
end
|
36
|
+
end
|
@@ -123,6 +123,13 @@ module Bioinform
|
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|
126
|
+
def consensus
|
127
|
+
each_position.map{|pos|
|
128
|
+
pos.each_with_index.max_by{|el, letter_index| el}
|
129
|
+
}.map{|el, letter_index| letter_index}.map{|letter_index| %w{A C G T}[letter_index] }.join
|
130
|
+
end
|
131
|
+
|
132
|
+
|
126
133
|
def to_hash
|
127
134
|
hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
|
128
135
|
[ letter, @matrix.map{|pos| pos[letter_index]} ]
|
@@ -187,4 +194,4 @@ module Bioinform
|
|
187
194
|
PWM.new(get_parameters.merge(matrix: matrix))
|
188
195
|
end
|
189
196
|
end
|
190
|
-
end
|
197
|
+
end
|
@@ -3,9 +3,19 @@ require_relative '../data_models'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
class PPM < PM
|
6
|
+
make_parameters :effective_count, :pseudocount
|
6
7
|
def to_ppm
|
7
8
|
self
|
8
9
|
end
|
10
|
+
|
11
|
+
def to_pcm
|
12
|
+
PCM.new(matrix.map{|pos| pos.map{|el| el * effective_count} }).tap{|pcm| pcm.name = name}
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_pwm
|
16
|
+
pseudocount ? to_pcm.to_pwm(pseudocount) : to_pcm.to_pwm
|
17
|
+
end
|
18
|
+
|
9
19
|
def self.valid_matrix?(matrix, options = {})
|
10
20
|
precision = options[:precision] || 0.01
|
11
21
|
super && matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) && pos.all?{|el| el >=0 } }
|
@@ -18,4 +28,4 @@ module Bioinform
|
|
18
28
|
super + validation_errors
|
19
29
|
end
|
20
30
|
end
|
21
|
-
end
|
31
|
+
end
|
@@ -52,5 +52,15 @@ module Bioinform
|
|
52
52
|
def worst_suffix(i)
|
53
53
|
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
54
54
|
end
|
55
|
+
|
56
|
+
|
57
|
+
def matrix_rounded(n)
|
58
|
+
matrix.map{|pos| pos.map{|x| x.round(n) } }
|
59
|
+
end
|
60
|
+
private :matrix_rounded
|
61
|
+
|
62
|
+
def round(n)
|
63
|
+
PWM.new(matrix_rounded(n)).tap{|pm| pm.name = name}
|
64
|
+
end
|
55
65
|
end
|
56
|
-
end
|
66
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Bioinform
|
2
|
+
class MatrixParser
|
3
|
+
def initialize(options = {})
|
4
|
+
@has_name = options.fetch(:has_name, true)
|
5
|
+
@name_pattern = options.fetch(:name_pattern, /^>?\s*(?<name>[^\t\r\n]+).*$/)
|
6
|
+
@has_header_row = options.fetch(:has_header_row, false)
|
7
|
+
@has_header_column = options.fetch(:has_header_column, false)
|
8
|
+
@nucleotides_in = options.fetch(:nucleotides_in, :columns)
|
9
|
+
|
10
|
+
raise ':nucleotides_in option should be either :rows or :columns' unless [:rows, :columns].include?(@nucleotides_in)
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse!(input)
|
14
|
+
lines = input.lines
|
15
|
+
if @has_name
|
16
|
+
match = lines.shift.match(@name_pattern)
|
17
|
+
raise 'Name pattern doesn\'t match' unless match
|
18
|
+
name = match[:name]
|
19
|
+
end
|
20
|
+
lines.shift if @has_header_row
|
21
|
+
matrix = lines.map(&:strip).reject(&:empty?).map{|line| line.split }
|
22
|
+
matrix = matrix.map{|row| row.drop(1) } if @has_header_column
|
23
|
+
matrix = matrix.map{|row| row.map{|el| Float(el) } }
|
24
|
+
|
25
|
+
matrix = matrix.transpose if @nucleotides_in == :rows
|
26
|
+
# raise 'Matrix not valid' unless ! matrix.empty? && matrix.all?{|pos| pos.size == 4 }
|
27
|
+
{matrix: matrix, name: name}
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse(input)
|
31
|
+
parse!(input) rescue nil
|
32
|
+
end
|
33
|
+
|
34
|
+
def valid?(input)
|
35
|
+
result = parse!(input)
|
36
|
+
rescue
|
37
|
+
false
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/bioinform/parsers.rb
CHANGED
@@ -4,4 +4,6 @@ require_relative 'parsers/yaml_parser'
|
|
4
4
|
require_relative 'parsers/string_parser'
|
5
5
|
require_relative 'parsers/string_fantom_parser'
|
6
6
|
require_relative 'parsers/splittable_parser'
|
7
|
-
require_relative 'parsers/jaspar_parser'
|
7
|
+
require_relative 'parsers/jaspar_parser'
|
8
|
+
|
9
|
+
require_relative 'parsers/matrix_parser'
|
data/lib/bioinform/version.rb
CHANGED
data/lib/bioinform.rb
CHANGED
@@ -6,5 +6,32 @@ require_relative 'bioinform/data_models'
|
|
6
6
|
require_relative 'bioinform/cli'
|
7
7
|
|
8
8
|
module Bioinform
|
9
|
-
|
9
|
+
class Error < StandardError
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.get_pwm(data_model, matrix, background, pseudocount, effective_count)
|
13
|
+
pm = Bioinform.const_get(data_model).new(matrix)
|
14
|
+
pm.set_parameters(background: background)
|
15
|
+
if pseudocount && ! pseudocount.blank? && [:PCM,:PPM].include?(data_model.to_sym)
|
16
|
+
pm.set_parameters(pseudocount: pseudocount)
|
17
|
+
end
|
18
|
+
if effective_count && [:PPM].include?(data_model.to_sym)
|
19
|
+
pm.set_parameters(effective_count: effective_count)
|
20
|
+
end
|
21
|
+
pm.to_pwm
|
22
|
+
rescue => e
|
23
|
+
raise "PWM creation failed (#{e})"
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.get_pcm(data_model, matrix, effective_count)
|
27
|
+
pm = Bioinform.const_get(data_model).new(matrix)
|
28
|
+
if effective_count && [:PPM].include?(data_model.to_sym)
|
29
|
+
pm.set_parameters(effective_count: effective_count)
|
30
|
+
end
|
31
|
+
pm.to_pcm
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.get_ppm(data_model, matrix)
|
35
|
+
Bioinform.const_get(data_model).new(matrix).to_ppm
|
36
|
+
end
|
10
37
|
end
|
data/spec/data_models/pm_spec.rb
CHANGED
@@ -356,5 +356,12 @@ module Bioinform
|
|
356
356
|
end
|
357
357
|
end
|
358
358
|
end
|
359
|
+
|
360
|
+
describe '#consensus' do
|
361
|
+
let(:pm) { PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] ) }
|
362
|
+
it 'is a string of nucleotides of maximal weights' do
|
363
|
+
pm.consensus.should == 'GAC'
|
364
|
+
end
|
365
|
+
end
|
359
366
|
end
|
360
367
|
end
|
@@ -3,6 +3,52 @@ require_relative '../../lib/bioinform/data_models/pcm'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe PPM do
|
6
|
+
describe '#to_ppm' do
|
7
|
+
let (:ppm_motif) { Fabricate(:ppm) }
|
8
|
+
it 'returns self' do
|
9
|
+
ppm_motif.to_ppm.should eq ppm_motif
|
10
|
+
end
|
11
|
+
end
|
6
12
|
|
13
|
+
describe '#to_pcm' do
|
14
|
+
let (:ppm_motif) { Fabricate(:ppm_by_pcm) }
|
15
|
+
let (:pcm_motif) { Fabricate(:pcm) }
|
16
|
+
|
17
|
+
it 'returns pcm using given effective_count' do
|
18
|
+
ppm = ppm_motif.tap{|ppm| ppm.effective_count = pcm_motif.count }
|
19
|
+
ppm.to_pcm.should == pcm_motif
|
20
|
+
end
|
21
|
+
it 'without given count it raises an error' do
|
22
|
+
expect{ ppm_motif.to_pcm }.to raise_error
|
23
|
+
end
|
24
|
+
it 'returns pcm with the same name' do
|
25
|
+
ppm = ppm_motif.tap{|ppm| ppm.effective_count = pcm_motif.count }
|
26
|
+
ppm.to_pcm.name.should == ppm_motif.name
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#to_pwm' do
|
31
|
+
let (:ppm_motif_without_count) { Fabricate(:ppm_by_pcm) }
|
32
|
+
let (:ppm_motif) { ppm_motif_without_count.tap{|ppm| ppm.effective_count = 137 } }
|
33
|
+
let (:ppm_motif_with_log_pseudocount) { ppm_motif.tap{|ppm| ppm.effective_count = 137 } }
|
34
|
+
let (:pcm_motif) { ppm_motif.to_pcm }
|
35
|
+
|
36
|
+
it 'returns pwm the same as pwm of according pcm' do
|
37
|
+
ppm_motif.to_pwm.should == pcm_motif.to_pwm
|
38
|
+
end
|
39
|
+
it 'uses pseudocount to transform according pcm to pwm' do
|
40
|
+
ppm = ppm_motif.tap{|ppm| ppm.pseudocount = 10}
|
41
|
+
ppm_motif.to_pwm.should == pcm_motif.to_pwm(10)
|
42
|
+
end
|
43
|
+
it 'by default uses pseudocount equal to log of count' do
|
44
|
+
ppm_motif.to_pwm.should == ppm_motif.to_pcm.to_pwm(Math.log(137))
|
45
|
+
end
|
46
|
+
it 'without given count it raises an error' do
|
47
|
+
expect{ ppm_motif_without_count.to_pwm }.to raise_error
|
48
|
+
end
|
49
|
+
it 'returns pwm with the same name' do
|
50
|
+
ppm_motif.to_pwm.name.should == ppm_motif.name
|
51
|
+
end
|
52
|
+
end
|
7
53
|
end
|
8
|
-
end
|
54
|
+
end
|
@@ -79,5 +79,18 @@ module Bioinform
|
|
79
79
|
end
|
80
80
|
end
|
81
81
|
|
82
|
+
describe '#round' do
|
83
|
+
let(:matrix) { [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] }
|
84
|
+
let(:pm) { PWM.new( matrix ).tap{|pm| pm.name = 'motif name'} }
|
85
|
+
it 'gives model with matrix elements rounded' do
|
86
|
+
pm.round(1).matrix.should == [[1.3, 2.0, 4.9, 3.2], [7.1, 6.5, 3.3, 4.6], [-1.0, -0.5, -1.5, -1.0]]
|
87
|
+
end
|
88
|
+
it 'gives PWM model' do
|
89
|
+
pm.round(1).should be_kind_of(PWM)
|
90
|
+
end
|
91
|
+
it 'gives model with the same name' do
|
92
|
+
pm.round(1).name.should == 'motif name'
|
93
|
+
end
|
94
|
+
end
|
82
95
|
end
|
83
|
-
end
|
96
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Vorontsov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: docopt
|
@@ -68,6 +68,7 @@ files:
|
|
68
68
|
- lib/bioinform/formatters/transfac_formatter.rb
|
69
69
|
- lib/bioinform/parsers.rb
|
70
70
|
- lib/bioinform/parsers/jaspar_parser.rb
|
71
|
+
- lib/bioinform/parsers/matrix_parser.rb
|
71
72
|
- lib/bioinform/parsers/parser.rb
|
72
73
|
- lib/bioinform/parsers/splittable_parser.rb
|
73
74
|
- lib/bioinform/parsers/string_fantom_parser.rb
|
@@ -180,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
180
181
|
version: '0'
|
181
182
|
requirements: []
|
182
183
|
rubyforge_project:
|
183
|
-
rubygems_version: 2.2.
|
184
|
+
rubygems_version: 2.2.2
|
184
185
|
signing_key:
|
185
186
|
specification_version: 4
|
186
187
|
summary: Classes for work with different input formats of positional matrices and
|