bioinform 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -17
- data/Gemfile +4 -4
- data/LICENSE +21 -21
- data/README.md +29 -29
- data/Rakefile +5 -12
- data/bioinform.gemspec +21 -21
- data/lib/bioinform/data_models/collection.rb +2 -0
- data/lib/bioinform/data_models/{iupac.rb → old_style_models_TO_BE_REMOVED/iupac.rb} +1 -1
- data/lib/bioinform/data_models/{iupac_word.rb → old_style_models_TO_BE_REMOVED/iupac_word.rb} +0 -0
- data/lib/bioinform/data_models/{positional_count_matrix.rb → old_style_models_TO_BE_REMOVED/positional_count_matrix.rb} +0 -0
- data/lib/bioinform/data_models/{positional_matrix.rb → old_style_models_TO_BE_REMOVED/positional_matrix.rb} +3 -5
- data/lib/bioinform/data_models/{positional_probability_matrix.rb → old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb} +0 -0
- data/lib/bioinform/data_models/{positional_weight_matrix.rb → old_style_models_TO_BE_REMOVED/positional_weight_matrix.rb} +0 -0
- data/lib/bioinform/data_models/parser.rb +41 -0
- data/lib/bioinform/data_models/parsers/array_parser.rb +17 -0
- data/lib/bioinform/data_models/parsers/hash_parser.rb +19 -0
- data/lib/bioinform/data_models/parsers/string_fantom_parser.rb +21 -0
- data/lib/bioinform/data_models/parsers/string_parser.rb +45 -0
- data/lib/bioinform/data_models/parsers.rb +4 -0
- data/lib/bioinform/data_models/pcm.rb +7 -0
- data/lib/bioinform/data_models/pm.rb +195 -0
- data/lib/bioinform/data_models/ppm.rb +8 -0
- data/lib/bioinform/data_models/pwm.rb +23 -0
- data/lib/bioinform/data_models.rb +5 -5
- data/lib/bioinform/support/callable_symbol.rb +33 -4
- data/lib/bioinform/support/collect_hash.rb +7 -0
- data/lib/bioinform/support/curry_except_self.rb +2 -2
- data/lib/bioinform/support/deep_dup.rb +5 -0
- data/lib/bioinform/support/delete_many.rb +14 -0
- data/lib/bioinform/support/has_keys.rb +14 -0
- data/lib/bioinform/support/inverf.rb +13 -0
- data/lib/bioinform/support/partial_sums.rb +6 -0
- data/lib/bioinform/support/{same.rb → same_by.rb} +1 -1
- data/lib/bioinform/support.rb +13 -5
- data/lib/bioinform/version.rb +3 -3
- data/lib/bioinform.rb +8 -7
- data/spec/data_models/parser_spec.rb +46 -0
- data/spec/data_models/parsers/array_parser_spec.rb +53 -0
- data/spec/data_models/parsers/hash_parser_spec.rb +60 -0
- data/spec/data_models/parsers/string_fantom_parser_spec.rb +38 -0
- data/spec/data_models/parsers/string_parser_spec.rb +112 -0
- data/spec/data_models/pm_spec.rb +369 -0
- data/spec/data_models/pwm_spec.rb +25 -0
- data/spec/spec_helper.rb +30 -0
- data/spec/support/callable_symbol_spec.rb +66 -0
- data/spec/support/collect_hash_spec.rb +15 -0
- data/spec/support/curry_except_self_spec.rb +9 -0
- data/spec/support/delete_many_spec.rb +44 -0
- data/spec/support/has_keys_spec.rb +48 -0
- data/spec/support/inverf_spec.rb +19 -0
- data/spec/support/multiline_squish_spec.rb +11 -0
- data/spec/support/partial_sums_spec.rb +9 -0
- data/spec/support/same_by_spec.rb +36 -0
- metadata +60 -21
- data/lib/bioinform/support/pmap.rb +0 -10
- data/lib/bioinform/support/ptap.rb +0 -7
- data/spec/callable_symbol_spec.rb +0 -37
- data/spec/pmap_test.rb +0 -24
- data/spec/positional_matrix_spec.rb +0 -169
- data/spec/ptap_spec.rb +0 -17
- data/spec/same_spec.rb +0 -19
data/.gitignore
CHANGED
@@ -1,17 +1,18 @@
|
|
1
|
-
*.gem
|
2
|
-
*.rbc
|
3
|
-
.bundle
|
4
|
-
.config
|
5
|
-
.yardoc
|
6
|
-
Gemfile.lock
|
7
|
-
InstalledFiles
|
8
|
-
_yardoc
|
9
|
-
coverage
|
10
|
-
doc/
|
11
|
-
lib/bundler/man
|
12
|
-
pkg
|
13
|
-
rdoc
|
14
|
-
spec/reports
|
15
|
-
test/tmp
|
16
|
-
test/version_tmp
|
17
|
-
tmp
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
TODO.txt
|
data/Gemfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
|
3
|
-
# Specify your gem's dependencies in bioinform.gemspec
|
4
|
-
gemspec
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in bioinform.gemspec
|
4
|
+
gemspec
|
data/LICENSE
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
Copyright (c) 2012 Ilya Vorontsov
|
2
|
-
|
3
|
-
MIT License
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
-
a copy of this software and associated documentation files (the
|
7
|
-
"Software"), to deal in the Software without restriction, including
|
8
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
-
permit persons to whom the Software is furnished to do so, subject to
|
11
|
-
the following conditions:
|
12
|
-
|
13
|
-
The above copyright notice and this permission notice shall be
|
14
|
-
included in all copies or substantial portions of the Software.
|
15
|
-
|
16
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
-
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
-
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
-
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
1
|
+
Copyright (c) 2012 Ilya Vorontsov
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
22
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,29 +1,29 @@
|
|
1
|
-
# Bioinform
|
2
|
-
|
3
|
-
Bioinform is a bunch of classes extracted from daily bioinformatics work. This classes is an attempt to encapsulate loading(parsing) logic for positional matrices in different formats and common transformations. It also includes several core classes extensions which are particularly useful on Enumerables
|
4
|
-
|
5
|
-
## Installation
|
6
|
-
|
7
|
-
Add this line to your application's Gemfile:
|
8
|
-
|
9
|
-
gem 'bioinform'
|
10
|
-
|
11
|
-
And then execute:
|
12
|
-
|
13
|
-
$ bundle
|
14
|
-
|
15
|
-
Or install it yourself as:
|
16
|
-
|
17
|
-
$ gem install bioinform
|
18
|
-
|
19
|
-
## Usage
|
20
|
-
|
21
|
-
Usage is under construction. I don't recommend use this gem for a while: syntax is on the way to change to more simple and concise. But stay tuned
|
22
|
-
|
23
|
-
## Contributing
|
24
|
-
|
25
|
-
1. Fork it
|
26
|
-
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
-
3. Commit your changes (`git commit -am 'Added some feature'`)
|
28
|
-
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
-
5. Create new Pull Request
|
1
|
+
# Bioinform
|
2
|
+
|
3
|
+
Bioinform is a bunch of classes extracted from daily bioinformatics work. This classes is an attempt to encapsulate loading(parsing) logic for positional matrices in different formats and common transformations. It also includes several core classes extensions which are particularly useful on Enumerables
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'bioinform'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install bioinform
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
Usage is under construction. I don't recommend use this gem for a while: syntax is on the way to change to more simple and concise. But stay tuned
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
CHANGED
@@ -1,12 +1,5 @@
|
|
1
|
-
#!/usr/bin/env rake
|
2
|
-
require "bundler/gem_tasks"
|
3
|
-
require 'rspec/core/rake_task'
|
4
|
-
|
5
|
-
|
6
|
-
task :spec do
|
7
|
-
Dir.glob('spec/*_spec.rb') do |spec_file|
|
8
|
-
system("ruby #{spec_file}")
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
# RSpec::Core::RakeTask.new
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new
|
data/bioinform.gemspec
CHANGED
@@ -1,21 +1,21 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
require File.expand_path('../lib/bioinform/version', __FILE__)
|
3
|
-
|
4
|
-
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = ["Ilya Vorontsov"]
|
6
|
-
gem.email = ["prijutme4ty@gmail.com"]
|
7
|
-
gem.description = %q{A bunch of useful classes for bioinformatics}
|
8
|
-
gem.summary = %q{Classes for work with different input formats of positional matrices and IUPAC-words and making simple transform and statistics with them. Also module includes several useful extensions for Enumerable module like parametric map and callable symbols }
|
9
|
-
gem.homepage = ""
|
10
|
-
|
11
|
-
gem.files = `git ls-files`.split($\)
|
12
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
-
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
-
gem.name = "bioinform"
|
15
|
-
gem.require_paths = ["lib"]
|
16
|
-
gem.version = Bioinform::VERSION
|
17
|
-
|
18
|
-
gem.add_dependency('active_support', '~> 3.0.0')
|
19
|
-
|
20
|
-
gem.add_development_dependency "rspec", "~> 2.0"
|
21
|
-
end
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/bioinform/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Ilya Vorontsov"]
|
6
|
+
gem.email = ["prijutme4ty@gmail.com"]
|
7
|
+
gem.description = %q{A bunch of useful classes for bioinformatics}
|
8
|
+
gem.summary = %q{Classes for work with different input formats of positional matrices and IUPAC-words and making simple transform and statistics with them. Also module includes several useful extensions for Enumerable module like parametric map and callable symbols }
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "bioinform"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Bioinform::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency('active_support', '~> 3.0.0')
|
19
|
+
|
20
|
+
gem.add_development_dependency "rspec", "~> 2.0"
|
21
|
+
end
|
@@ -10,7 +10,7 @@ class Iupac
|
|
10
10
|
@words = [input]
|
11
11
|
else raise ArgumentError, 'Can\'t create IUPAC Word List: unknown input type'
|
12
12
|
end
|
13
|
-
raise ArgumentError, 'IUPAC words should be of the same length' unless @words.
|
13
|
+
raise ArgumentError, 'IUPAC words should be of the same length' unless @words.same_by?(&:length)
|
14
14
|
end
|
15
15
|
|
16
16
|
def to_pcm
|
data/lib/bioinform/data_models/{iupac_word.rb → old_style_models_TO_BE_REMOVED/iupac_word.rb}
RENAMED
File without changes
|
File without changes
|
@@ -1,7 +1,5 @@
|
|
1
1
|
require 'active_support/core_ext/hash/indifferent_access'
|
2
|
-
require 'bioinform/support
|
3
|
-
require 'bioinform/support/same'
|
4
|
-
require 'bioinform/support/pmap'
|
2
|
+
require 'bioinform/support'
|
5
3
|
|
6
4
|
class PositionalMatrix
|
7
5
|
module DefaultParser
|
@@ -53,7 +51,7 @@ class PositionalMatrix
|
|
53
51
|
else
|
54
52
|
raise ArgumentError, 'Unknown format of input: only Strings, Arrays and hashes\'re available'
|
55
53
|
end
|
56
|
-
raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.
|
54
|
+
raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.same_by?(&:length)
|
57
55
|
raise ArgumentError unless @matrix.size == 4 || @matrix.first.size == 4
|
58
56
|
@matrix = @matrix.transpose if @matrix.first.size != 4
|
59
57
|
end
|
@@ -64,7 +62,7 @@ class PositionalMatrix
|
|
64
62
|
alias_method :length, :size
|
65
63
|
|
66
64
|
def to_s(with_name = true)
|
67
|
-
mat_str = @matrix.
|
65
|
+
mat_str = @matrix.map(&:join.("\t")).join("\n")
|
68
66
|
(with_name && @name) ? "#{@name}\n#{mat_str}" : mat_str
|
69
67
|
end
|
70
68
|
|
File without changes
|
File without changes
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'bioinform/support'
|
2
|
+
require 'bioinform/data_models/pm'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
class PM
|
6
|
+
class Parser
|
7
|
+
attr_reader :input
|
8
|
+
|
9
|
+
@subclasses ||= []
|
10
|
+
class << self
|
11
|
+
def subclasses
|
12
|
+
@subclasses
|
13
|
+
end
|
14
|
+
def inherited(subclass)
|
15
|
+
PM::Parser.subclasses << subclass
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(input)
|
20
|
+
@input = input
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse_core
|
24
|
+
raise NotImplemented
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def parse
|
29
|
+
parse_core
|
30
|
+
end
|
31
|
+
|
32
|
+
def can_parse?
|
33
|
+
parse_core
|
34
|
+
true
|
35
|
+
rescue
|
36
|
+
false
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'bioinform/support'
|
2
|
+
require 'bioinform/data_models/parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
class ArrayParser < PM::Parser
|
6
|
+
def parse_core
|
7
|
+
raise ArgumentError unless input.is_a?(Array) && input.all?(&:is_a?.(Array)) && input.same_by?(&:length) && (input.size == 4 || input.sample.size == 4)
|
8
|
+
if input.all?{|line| line.size == 4}
|
9
|
+
{matrix: input}
|
10
|
+
elsif input.size == 4
|
11
|
+
{matrix: input.transpose}
|
12
|
+
else
|
13
|
+
raise ArgumentError
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'bioinform/support'
|
2
|
+
require 'bioinform/data_models/parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
class HashParser < PM::Parser
|
6
|
+
def parse_core
|
7
|
+
case input
|
8
|
+
when Hash
|
9
|
+
raise ArgumentError unless input.with_indifferent_access.has_all_keys?(:A, :C, :G, :T) && input.with_indifferent_access.values_at(:A,:C,:G,:T).same_by?(&:length)
|
10
|
+
{ matrix: input.with_indifferent_access.values_at(:A,:C,:G,:T).transpose }
|
11
|
+
when Array
|
12
|
+
raise ArgumentError unless input.all?(&:is_a?.(Hash)) && input.all?{|position| position.size == 4}
|
13
|
+
{ matrix: input.map(&:with_indifferent_access).map(&:values_at.(:A,:C,:G,:T)) }
|
14
|
+
else
|
15
|
+
raise ArgumentError
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'bioinform/support'
|
2
|
+
require 'bioinform/data_models/parser'
|
3
|
+
require 'bioinform/data_models/parsers/string_parser'
|
4
|
+
|
5
|
+
module Bioinform
|
6
|
+
class StringFantomParser < StringParser
|
7
|
+
def row_pat
|
8
|
+
'[\w\d]+ ' + "(#{number_pat} )*#{number_pat}"
|
9
|
+
end
|
10
|
+
def name_pat
|
11
|
+
'NA (?<name>[\w.+:-]+)'
|
12
|
+
end
|
13
|
+
def header_pat
|
14
|
+
"#{name_pat}\n" + '[\w\d]+ ' +"A C G T\n"
|
15
|
+
end
|
16
|
+
|
17
|
+
def matrix_preprocess(matrix)
|
18
|
+
matrix.split("\n").map{|line| line.split[1..-1].map(&:to_f)}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'bioinform/support'
|
2
|
+
require 'bioinform/data_models/parser'
|
3
|
+
require 'bioinform/data_models/parsers/array_parser'
|
4
|
+
|
5
|
+
module Bioinform
|
6
|
+
class StringParser < PM::Parser
|
7
|
+
def number_pat
|
8
|
+
'[+-]?\d+(\.\d+)?'
|
9
|
+
end
|
10
|
+
def row_pat
|
11
|
+
"(#{number_pat} )*#{number_pat}"
|
12
|
+
end
|
13
|
+
def name_pat
|
14
|
+
'>? ?(?<name>[\w.+:-]+)'
|
15
|
+
end
|
16
|
+
def matrix_pat
|
17
|
+
"(?<matrix>(#{row_pat}\n)*#{row_pat})"
|
18
|
+
end
|
19
|
+
def header_pat
|
20
|
+
"(#{name_pat}\n)?"
|
21
|
+
end
|
22
|
+
def pattern
|
23
|
+
/\A#{header_pat}#{matrix_pat}\z/
|
24
|
+
end
|
25
|
+
|
26
|
+
# when matrix is extracted from the string it should be transformed to a matrix of numerics
|
27
|
+
def matrix_preprocess(matrix)
|
28
|
+
matrix.split("\n").map{|line| line.split.map(&:to_f)}
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_core
|
32
|
+
case input
|
33
|
+
when String
|
34
|
+
match = input.multiline_squish.match(pattern)
|
35
|
+
raise ArgumentError unless match
|
36
|
+
matrix = matrix_preprocess( match[:matrix] )
|
37
|
+
raise ArgumentError unless matrix
|
38
|
+
result = ArrayParser.new(matrix).parse
|
39
|
+
match[:name] ? result.merge(name: match[:name]) : result
|
40
|
+
else
|
41
|
+
raise ArgumentError
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,195 @@
|
|
1
|
+
require 'bioinform/support'
|
2
|
+
|
3
|
+
module Bioinform
|
4
|
+
class PM
|
5
|
+
attr_reader :matrix
|
6
|
+
attr_accessor :name
|
7
|
+
|
8
|
+
def initialize(input = nil, parser = nil)
|
9
|
+
@background = [1, 1, 1, 1]
|
10
|
+
@input = input
|
11
|
+
@parser = parser
|
12
|
+
return unless @input
|
13
|
+
parser_init
|
14
|
+
matrix_init
|
15
|
+
end
|
16
|
+
|
17
|
+
def parser_init
|
18
|
+
if @parser
|
19
|
+
raise ArgumentError, 'Input cannot be parsed by specified parser' unless @parser.new(@input).can_parse?
|
20
|
+
else
|
21
|
+
@parser = PM::Parser.subclasses.find{|parser_class| parser_class.new(@input).can_parse? }
|
22
|
+
raise ArgumentError, 'No one parser can parse specified input' unless @parser
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def matrix_init
|
27
|
+
parse_result = @parser.new(@input).parse
|
28
|
+
raise ArgumentError, 'Used parser result has no `matrix` key' unless parse_result.has_key? :matrix
|
29
|
+
|
30
|
+
configure_from_hash(parse_result)
|
31
|
+
end
|
32
|
+
|
33
|
+
def valid?
|
34
|
+
@matrix.is_a?(Array) &&
|
35
|
+
@matrix.all?(&:is_a?.(Array)) &&
|
36
|
+
@matrix.all?(&:all?.(&:is_a?.(Numeric))) &&
|
37
|
+
@matrix.all?{|pos| pos.size == 4}
|
38
|
+
rescue
|
39
|
+
false
|
40
|
+
end
|
41
|
+
|
42
|
+
def configure_from_hash(parse_result)
|
43
|
+
parse_result.each{|key, value| send("#{key}=", value) if respond_to? "#{key}=" }
|
44
|
+
end
|
45
|
+
|
46
|
+
def matrix=(new_matrix)
|
47
|
+
old_matrix, @matrix = matrix, new_matrix
|
48
|
+
raise ArgumentError, 'Matrix has invalid format:' unless valid?
|
49
|
+
rescue
|
50
|
+
@matrix = old_matrix
|
51
|
+
raise
|
52
|
+
end
|
53
|
+
|
54
|
+
def length;
|
55
|
+
@matrix.length;
|
56
|
+
end
|
57
|
+
alias_method :size, :length
|
58
|
+
|
59
|
+
def to_s(with_name = true)
|
60
|
+
matrix = @matrix.map(&:join.("\t")).join("\n")
|
61
|
+
if with_name && @name
|
62
|
+
"#{@name}\n#{matrix}"
|
63
|
+
else
|
64
|
+
matrix
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def pretty_string(with_name = true)
|
69
|
+
header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
|
70
|
+
matrix_rows = @matrix.map do |position|
|
71
|
+
position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
|
72
|
+
end
|
73
|
+
matrix = matrix_rows.join("\n")
|
74
|
+
if with_name && @name
|
75
|
+
@name + "\n" + header + matrix
|
76
|
+
else
|
77
|
+
header + matrix
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def to_hash
|
82
|
+
hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
|
83
|
+
[ letter, @matrix.map(&:at.(letter_index)) ]
|
84
|
+
end
|
85
|
+
hsh.with_indifferent_access
|
86
|
+
end
|
87
|
+
|
88
|
+
# pm.background - returns a @background attribute
|
89
|
+
# pm.background(new_background) - sets an attribute and returns pm itself
|
90
|
+
# if more than one argument passed - raises an exception
|
91
|
+
def background(*args)
|
92
|
+
case args.size
|
93
|
+
when 0 then @background
|
94
|
+
when 1 then background!(args[0])
|
95
|
+
else raise ArgumentError, '#background method can get 0 or 1 argument'
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def background!(new_background)
|
100
|
+
clear_cache
|
101
|
+
@background = new_background
|
102
|
+
self
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.zero_column
|
106
|
+
[0.0, 0.0, 0.0, 0.0]
|
107
|
+
end
|
108
|
+
|
109
|
+
def reverse_complement!
|
110
|
+
clear_cache
|
111
|
+
@matrix.reverse!.map!(&:reverse!)
|
112
|
+
self
|
113
|
+
end
|
114
|
+
def left_augment!(n)
|
115
|
+
clear_cache
|
116
|
+
n.times{ @matrix.unshift(self.class.zero_column) }
|
117
|
+
self
|
118
|
+
end
|
119
|
+
def right_augment!(n)
|
120
|
+
clear_cache
|
121
|
+
n.times{ @matrix.push(self.class.zero_column) }
|
122
|
+
self
|
123
|
+
end
|
124
|
+
def shift_to_zero! # make worst score == 0 by shifting scores of each column
|
125
|
+
clear_cache
|
126
|
+
@matrix.map!{|position| min = position.min; position.map{|element| element - min}}
|
127
|
+
self
|
128
|
+
end
|
129
|
+
def discrete!(rate)
|
130
|
+
clear_cache
|
131
|
+
@matrix.map!{|position| position.map{|element| (element * rate).ceil}}
|
132
|
+
self
|
133
|
+
end
|
134
|
+
|
135
|
+
def background_sum
|
136
|
+
@background.inject(0.0, &:+)
|
137
|
+
end
|
138
|
+
|
139
|
+
def vocabulary_volume
|
140
|
+
background_sum ** length
|
141
|
+
end
|
142
|
+
|
143
|
+
def probability
|
144
|
+
sum = background_sum
|
145
|
+
@background.map{|element| element.to_f / sum}
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
#def split(first_chunk_length)
|
150
|
+
# [@matrix.first(first_chunk_length), matrix.last(length - first_chunk_length)]
|
151
|
+
#end
|
152
|
+
#def permute_columns(permutation_index)
|
153
|
+
# @matrix.values_at(permutation_index)permutation_index.map{|col| matrix[col]}
|
154
|
+
#end
|
155
|
+
|
156
|
+
def best_score
|
157
|
+
@best_score ||= @matrix.inject(0.0){|sum, col| sum + col.max}
|
158
|
+
end
|
159
|
+
def worst_score
|
160
|
+
@worst_score ||= @matrix.inject(0.0){|sum, col| sum + col.min}
|
161
|
+
end
|
162
|
+
|
163
|
+
# best score of suffix s[i..l]
|
164
|
+
def best_suffix
|
165
|
+
@best_suffix ||= Array.new(length + 1) {|i| @matrix[i...length].map(&:max).inject(0.0, &:+) }
|
166
|
+
end
|
167
|
+
|
168
|
+
def worst_suffix
|
169
|
+
@worst_suffix ||= Array.new(length + 1) {|i| @matrix[i...length].map(&:min).inject(0.0, &:+) }
|
170
|
+
end
|
171
|
+
|
172
|
+
def clear_cache
|
173
|
+
@best_score, @worst_score, @best_suffix, @worst_suffix = nil,nil,nil,nil
|
174
|
+
end
|
175
|
+
|
176
|
+
def reverse_complement
|
177
|
+
dup.reverse_complement!
|
178
|
+
end
|
179
|
+
def left_augment(n)
|
180
|
+
dup.left_augment!(n)
|
181
|
+
end
|
182
|
+
def right_augment(n)
|
183
|
+
dup.right_augment!(n)
|
184
|
+
end
|
185
|
+
def shift_to_zero
|
186
|
+
dup.shift_to_zero!
|
187
|
+
end
|
188
|
+
def discrete(rate)
|
189
|
+
dup.discrete!(rate)
|
190
|
+
end
|
191
|
+
def dup
|
192
|
+
deep_dup
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'bioinform/support'
|
2
|
+
require 'bioinform/data_models/pm'
|
3
|
+
module Bioinform
|
4
|
+
|
5
|
+
class PWM < PM
|
6
|
+
def score_mean
|
7
|
+
matrix.inject(0.0){ |mean, position| mean + position.each_index.inject(0.0){|sum, letter| sum + position[letter] * probability[letter]} }
|
8
|
+
end
|
9
|
+
|
10
|
+
def score_variance
|
11
|
+
matrix.inject(0.0) do |variance, position|
|
12
|
+
variance + position.each_index.inject(0.0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
|
13
|
+
position.each_index.inject(0.0) { |sum,letter| sum + position[letter] * probability[letter] }**2
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def threshold_gauss_estimation(pvalue)
|
18
|
+
sigma = Math.sqrt(score_variance)
|
19
|
+
n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
|
20
|
+
score_mean + n_ * sigma
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require 'bioinform/data_models/
|
2
|
-
require 'bioinform/data_models/
|
3
|
-
require 'bioinform/data_models/
|
4
|
-
require 'bioinform/data_models/
|
1
|
+
require 'bioinform/data_models/pm'
|
2
|
+
#require 'bioinform/data_models/pcm'
|
3
|
+
require 'bioinform/data_models/pwm'
|
4
|
+
#require 'bioinform/data_models/ppm'
|
5
5
|
#require 'bioinform/data_models/iupac_word'
|
6
|
-
#require 'bioinform/data_models/
|
6
|
+
#require 'bioinform/data_models/iupac_wordset'
|