bioinform 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/.gitignore +18 -17
  2. data/Gemfile +4 -4
  3. data/LICENSE +21 -21
  4. data/README.md +29 -29
  5. data/Rakefile +5 -12
  6. data/bioinform.gemspec +21 -21
  7. data/lib/bioinform/data_models/collection.rb +2 -0
  8. data/lib/bioinform/data_models/{iupac.rb → old_style_models_TO_BE_REMOVED/iupac.rb} +1 -1
  9. data/lib/bioinform/data_models/{iupac_word.rb → old_style_models_TO_BE_REMOVED/iupac_word.rb} +0 -0
  10. data/lib/bioinform/data_models/{positional_count_matrix.rb → old_style_models_TO_BE_REMOVED/positional_count_matrix.rb} +0 -0
  11. data/lib/bioinform/data_models/{positional_matrix.rb → old_style_models_TO_BE_REMOVED/positional_matrix.rb} +3 -5
  12. data/lib/bioinform/data_models/{positional_probability_matrix.rb → old_style_models_TO_BE_REMOVED/positional_probability_matrix.rb} +0 -0
  13. data/lib/bioinform/data_models/{positional_weight_matrix.rb → old_style_models_TO_BE_REMOVED/positional_weight_matrix.rb} +0 -0
  14. data/lib/bioinform/data_models/parser.rb +41 -0
  15. data/lib/bioinform/data_models/parsers/array_parser.rb +17 -0
  16. data/lib/bioinform/data_models/parsers/hash_parser.rb +19 -0
  17. data/lib/bioinform/data_models/parsers/string_fantom_parser.rb +21 -0
  18. data/lib/bioinform/data_models/parsers/string_parser.rb +45 -0
  19. data/lib/bioinform/data_models/parsers.rb +4 -0
  20. data/lib/bioinform/data_models/pcm.rb +7 -0
  21. data/lib/bioinform/data_models/pm.rb +195 -0
  22. data/lib/bioinform/data_models/ppm.rb +8 -0
  23. data/lib/bioinform/data_models/pwm.rb +23 -0
  24. data/lib/bioinform/data_models.rb +5 -5
  25. data/lib/bioinform/support/callable_symbol.rb +33 -4
  26. data/lib/bioinform/support/collect_hash.rb +7 -0
  27. data/lib/bioinform/support/curry_except_self.rb +2 -2
  28. data/lib/bioinform/support/deep_dup.rb +5 -0
  29. data/lib/bioinform/support/delete_many.rb +14 -0
  30. data/lib/bioinform/support/has_keys.rb +14 -0
  31. data/lib/bioinform/support/inverf.rb +13 -0
  32. data/lib/bioinform/support/partial_sums.rb +6 -0
  33. data/lib/bioinform/support/{same.rb → same_by.rb} +1 -1
  34. data/lib/bioinform/support.rb +13 -5
  35. data/lib/bioinform/version.rb +3 -3
  36. data/lib/bioinform.rb +8 -7
  37. data/spec/data_models/parser_spec.rb +46 -0
  38. data/spec/data_models/parsers/array_parser_spec.rb +53 -0
  39. data/spec/data_models/parsers/hash_parser_spec.rb +60 -0
  40. data/spec/data_models/parsers/string_fantom_parser_spec.rb +38 -0
  41. data/spec/data_models/parsers/string_parser_spec.rb +112 -0
  42. data/spec/data_models/pm_spec.rb +369 -0
  43. data/spec/data_models/pwm_spec.rb +25 -0
  44. data/spec/spec_helper.rb +30 -0
  45. data/spec/support/callable_symbol_spec.rb +66 -0
  46. data/spec/support/collect_hash_spec.rb +15 -0
  47. data/spec/support/curry_except_self_spec.rb +9 -0
  48. data/spec/support/delete_many_spec.rb +44 -0
  49. data/spec/support/has_keys_spec.rb +48 -0
  50. data/spec/support/inverf_spec.rb +19 -0
  51. data/spec/support/multiline_squish_spec.rb +11 -0
  52. data/spec/support/partial_sums_spec.rb +9 -0
  53. data/spec/support/same_by_spec.rb +36 -0
  54. metadata +60 -21
  55. data/lib/bioinform/support/pmap.rb +0 -10
  56. data/lib/bioinform/support/ptap.rb +0 -7
  57. data/spec/callable_symbol_spec.rb +0 -37
  58. data/spec/pmap_test.rb +0 -24
  59. data/spec/positional_matrix_spec.rb +0 -169
  60. data/spec/ptap_spec.rb +0 -17
  61. data/spec/same_spec.rb +0 -19
data/.gitignore CHANGED
@@ -1,17 +1,18 @@
1
- *.gem
2
- *.rbc
3
- .bundle
4
- .config
5
- .yardoc
6
- Gemfile.lock
7
- InstalledFiles
8
- _yardoc
9
- coverage
10
- doc/
11
- lib/bundler/man
12
- pkg
13
- rdoc
14
- spec/reports
15
- test/tmp
16
- test/version_tmp
17
- tmp
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ TODO.txt
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source 'https://rubygems.org'
2
-
3
- # Specify your gem's dependencies in bioinform.gemspec
4
- gemspec
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in bioinform.gemspec
4
+ gemspec
data/LICENSE CHANGED
@@ -1,22 +1,22 @@
1
- Copyright (c) 2012 Ilya Vorontsov
2
-
3
- MIT License
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
1
+ Copyright (c) 2012 Ilya Vorontsov
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
22
  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -1,29 +1,29 @@
1
- # Bioinform
2
-
3
- Bioinform is a bunch of classes extracted from daily bioinformatics work. This classes is an attempt to encapsulate loading(parsing) logic for positional matrices in different formats and common transformations. It also includes several core classes extensions which are particularly useful on Enumerables
4
-
5
- ## Installation
6
-
7
- Add this line to your application's Gemfile:
8
-
9
- gem 'bioinform'
10
-
11
- And then execute:
12
-
13
- $ bundle
14
-
15
- Or install it yourself as:
16
-
17
- $ gem install bioinform
18
-
19
- ## Usage
20
-
21
- Usage is under construction. I don't recommend use this gem for a while: syntax is on the way to change to more simple and concise. But stay tuned
22
-
23
- ## Contributing
24
-
25
- 1. Fork it
26
- 2. Create your feature branch (`git checkout -b my-new-feature`)
27
- 3. Commit your changes (`git commit -am 'Added some feature'`)
28
- 4. Push to the branch (`git push origin my-new-feature`)
29
- 5. Create new Pull Request
1
+ # Bioinform
2
+
3
+ Bioinform is a bunch of classes extracted from daily bioinformatics work. This classes is an attempt to encapsulate loading(parsing) logic for positional matrices in different formats and common transformations. It also includes several core classes extensions which are particularly useful on Enumerables
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'bioinform'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install bioinform
18
+
19
+ ## Usage
20
+
21
+ Usage is under construction. I don't recommend use this gem for a while: syntax is on the way to change to more simple and concise. But stay tuned
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile CHANGED
@@ -1,12 +1,5 @@
1
- #!/usr/bin/env rake
2
- require "bundler/gem_tasks"
3
- require 'rspec/core/rake_task'
4
-
5
- desc 'Spec bioinform library'
6
- task :spec do
7
- Dir.glob('spec/*_spec.rb') do |spec_file|
8
- system("ruby #{spec_file}")
9
- end
10
- end
11
-
12
- # RSpec::Core::RakeTask.new
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new
data/bioinform.gemspec CHANGED
@@ -1,21 +1,21 @@
1
- # -*- encoding: utf-8 -*-
2
- require File.expand_path('../lib/bioinform/version', __FILE__)
3
-
4
- Gem::Specification.new do |gem|
5
- gem.authors = ["Ilya Vorontsov"]
6
- gem.email = ["prijutme4ty@gmail.com"]
7
- gem.description = %q{A bunch of useful classes for bioinformatics}
8
- gem.summary = %q{Classes for work with different input formats of positional matrices and IUPAC-words and making simple transform and statistics with them. Also module includes several useful extensions for Enumerable module like parametric map and callable symbols }
9
- gem.homepage = ""
10
-
11
- gem.files = `git ls-files`.split($\)
12
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
- gem.name = "bioinform"
15
- gem.require_paths = ["lib"]
16
- gem.version = Bioinform::VERSION
17
-
18
- gem.add_dependency('active_support', '~> 3.0.0')
19
-
20
- gem.add_development_dependency "rspec", "~> 2.0"
21
- end
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/bioinform/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Ilya Vorontsov"]
6
+ gem.email = ["prijutme4ty@gmail.com"]
7
+ gem.description = %q{A bunch of useful classes for bioinformatics}
8
+ gem.summary = %q{Classes for work with different input formats of positional matrices and IUPAC-words and making simple transform and statistics with them. Also module includes several useful extensions for Enumerable module like parametric map and callable symbols }
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "bioinform"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Bioinform::VERSION
17
+
18
+ gem.add_dependency('active_support', '~> 3.0.0')
19
+
20
+ gem.add_development_dependency "rspec", "~> 2.0"
21
+ end
@@ -0,0 +1,2 @@
1
+ class PMCollection
2
+ end
@@ -10,7 +10,7 @@ class Iupac
10
10
  @words = [input]
11
11
  else raise ArgumentError, 'Can\'t create IUPAC Word List: unknown input type'
12
12
  end
13
- raise ArgumentError, 'IUPAC words should be of the same length' unless @words.same?(&:length)
13
+ raise ArgumentError, 'IUPAC words should be of the same length' unless @words.same_by?(&:length)
14
14
  end
15
15
 
16
16
  def to_pcm
@@ -1,7 +1,5 @@
1
1
  require 'active_support/core_ext/hash/indifferent_access'
2
- require 'bioinform/support/multiline_squish'
3
- require 'bioinform/support/same'
4
- require 'bioinform/support/pmap'
2
+ require 'bioinform/support'
5
3
 
6
4
  class PositionalMatrix
7
5
  module DefaultParser
@@ -53,7 +51,7 @@ class PositionalMatrix
53
51
  else
54
52
  raise ArgumentError, 'Unknown format of input: only Strings, Arrays and hashes\'re available'
55
53
  end
56
- raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.same?(&:size)
54
+ raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.same_by?(&:length)
57
55
  raise ArgumentError unless @matrix.size == 4 || @matrix.first.size == 4
58
56
  @matrix = @matrix.transpose if @matrix.first.size != 4
59
57
  end
@@ -64,7 +62,7 @@ class PositionalMatrix
64
62
  alias_method :length, :size
65
63
 
66
64
  def to_s(with_name = true)
67
- mat_str = @matrix.pmap("\t",&:join).join("\n")
65
+ mat_str = @matrix.map(&:join.("\t")).join("\n")
68
66
  (with_name && @name) ? "#{@name}\n#{mat_str}" : mat_str
69
67
  end
70
68
 
@@ -0,0 +1,41 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/data_models/pm'
3
+
4
+ module Bioinform
5
+ class PM
6
+ class Parser
7
+ attr_reader :input
8
+
9
+ @subclasses ||= []
10
+ class << self
11
+ def subclasses
12
+ @subclasses
13
+ end
14
+ def inherited(subclass)
15
+ PM::Parser.subclasses << subclass
16
+ end
17
+ end
18
+
19
+ def initialize(input)
20
+ @input = input
21
+ end
22
+
23
+ def parse_core
24
+ raise NotImplemented
25
+ end
26
+
27
+
28
+ def parse
29
+ parse_core
30
+ end
31
+
32
+ def can_parse?
33
+ parse_core
34
+ true
35
+ rescue
36
+ false
37
+ end
38
+
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,17 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/data_models/parser'
3
+
4
+ module Bioinform
5
+ class ArrayParser < PM::Parser
6
+ def parse_core
7
+ raise ArgumentError unless input.is_a?(Array) && input.all?(&:is_a?.(Array)) && input.same_by?(&:length) && (input.size == 4 || input.sample.size == 4)
8
+ if input.all?{|line| line.size == 4}
9
+ {matrix: input}
10
+ elsif input.size == 4
11
+ {matrix: input.transpose}
12
+ else
13
+ raise ArgumentError
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/data_models/parser'
3
+
4
+ module Bioinform
5
+ class HashParser < PM::Parser
6
+ def parse_core
7
+ case input
8
+ when Hash
9
+ raise ArgumentError unless input.with_indifferent_access.has_all_keys?(:A, :C, :G, :T) && input.with_indifferent_access.values_at(:A,:C,:G,:T).same_by?(&:length)
10
+ { matrix: input.with_indifferent_access.values_at(:A,:C,:G,:T).transpose }
11
+ when Array
12
+ raise ArgumentError unless input.all?(&:is_a?.(Hash)) && input.all?{|position| position.size == 4}
13
+ { matrix: input.map(&:with_indifferent_access).map(&:values_at.(:A,:C,:G,:T)) }
14
+ else
15
+ raise ArgumentError
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/data_models/parser'
3
+ require 'bioinform/data_models/parsers/string_parser'
4
+
5
+ module Bioinform
6
+ class StringFantomParser < StringParser
7
+ def row_pat
8
+ '[\w\d]+ ' + "(#{number_pat} )*#{number_pat}"
9
+ end
10
+ def name_pat
11
+ 'NA (?<name>[\w.+:-]+)'
12
+ end
13
+ def header_pat
14
+ "#{name_pat}\n" + '[\w\d]+ ' +"A C G T\n"
15
+ end
16
+
17
+ def matrix_preprocess(matrix)
18
+ matrix.split("\n").map{|line| line.split[1..-1].map(&:to_f)}
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,45 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/data_models/parser'
3
+ require 'bioinform/data_models/parsers/array_parser'
4
+
5
+ module Bioinform
6
+ class StringParser < PM::Parser
7
+ def number_pat
8
+ '[+-]?\d+(\.\d+)?'
9
+ end
10
+ def row_pat
11
+ "(#{number_pat} )*#{number_pat}"
12
+ end
13
+ def name_pat
14
+ '>? ?(?<name>[\w.+:-]+)'
15
+ end
16
+ def matrix_pat
17
+ "(?<matrix>(#{row_pat}\n)*#{row_pat})"
18
+ end
19
+ def header_pat
20
+ "(#{name_pat}\n)?"
21
+ end
22
+ def pattern
23
+ /\A#{header_pat}#{matrix_pat}\z/
24
+ end
25
+
26
+ # when matrix is extracted from the string it should be transformed to a matrix of numerics
27
+ def matrix_preprocess(matrix)
28
+ matrix.split("\n").map{|line| line.split.map(&:to_f)}
29
+ end
30
+
31
+ def parse_core
32
+ case input
33
+ when String
34
+ match = input.multiline_squish.match(pattern)
35
+ raise ArgumentError unless match
36
+ matrix = matrix_preprocess( match[:matrix] )
37
+ raise ArgumentError unless matrix
38
+ result = ArrayParser.new(matrix).parse
39
+ match[:name] ? result.merge(name: match[:name]) : result
40
+ else
41
+ raise ArgumentError
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,4 @@
1
+ require 'bioinform/data_models/parsers/array_parser'
2
+ require 'bioinform/data_models/parsers/hash_parser'
3
+ require 'bioinform/data_models/parsers/string_parser'
4
+ require 'bioinform/data_models/parsers/string_fantom_parser'
@@ -0,0 +1,7 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/data_models/pm'
3
+ module Bioinform
4
+ class PCM < PM
5
+
6
+ end
7
+ end
@@ -0,0 +1,195 @@
1
+ require 'bioinform/support'
2
+
3
+ module Bioinform
4
+ class PM
5
+ attr_reader :matrix
6
+ attr_accessor :name
7
+
8
+ def initialize(input = nil, parser = nil)
9
+ @background = [1, 1, 1, 1]
10
+ @input = input
11
+ @parser = parser
12
+ return unless @input
13
+ parser_init
14
+ matrix_init
15
+ end
16
+
17
+ def parser_init
18
+ if @parser
19
+ raise ArgumentError, 'Input cannot be parsed by specified parser' unless @parser.new(@input).can_parse?
20
+ else
21
+ @parser = PM::Parser.subclasses.find{|parser_class| parser_class.new(@input).can_parse? }
22
+ raise ArgumentError, 'No one parser can parse specified input' unless @parser
23
+ end
24
+ end
25
+
26
+ def matrix_init
27
+ parse_result = @parser.new(@input).parse
28
+ raise ArgumentError, 'Used parser result has no `matrix` key' unless parse_result.has_key? :matrix
29
+
30
+ configure_from_hash(parse_result)
31
+ end
32
+
33
+ def valid?
34
+ @matrix.is_a?(Array) &&
35
+ @matrix.all?(&:is_a?.(Array)) &&
36
+ @matrix.all?(&:all?.(&:is_a?.(Numeric))) &&
37
+ @matrix.all?{|pos| pos.size == 4}
38
+ rescue
39
+ false
40
+ end
41
+
42
+ def configure_from_hash(parse_result)
43
+ parse_result.each{|key, value| send("#{key}=", value) if respond_to? "#{key}=" }
44
+ end
45
+
46
+ def matrix=(new_matrix)
47
+ old_matrix, @matrix = matrix, new_matrix
48
+ raise ArgumentError, 'Matrix has invalid format:' unless valid?
49
+ rescue
50
+ @matrix = old_matrix
51
+ raise
52
+ end
53
+
54
+ def length;
55
+ @matrix.length;
56
+ end
57
+ alias_method :size, :length
58
+
59
+ def to_s(with_name = true)
60
+ matrix = @matrix.map(&:join.("\t")).join("\n")
61
+ if with_name && @name
62
+ "#{@name}\n#{matrix}"
63
+ else
64
+ matrix
65
+ end
66
+ end
67
+
68
+ def pretty_string(with_name = true)
69
+ header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
70
+ matrix_rows = @matrix.map do |position|
71
+ position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
72
+ end
73
+ matrix = matrix_rows.join("\n")
74
+ if with_name && @name
75
+ @name + "\n" + header + matrix
76
+ else
77
+ header + matrix
78
+ end
79
+ end
80
+
81
+ def to_hash
82
+ hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
83
+ [ letter, @matrix.map(&:at.(letter_index)) ]
84
+ end
85
+ hsh.with_indifferent_access
86
+ end
87
+
88
+ # pm.background - returns a @background attribute
89
+ # pm.background(new_background) - sets an attribute and returns pm itself
90
+ # if more than one argument passed - raises an exception
91
+ def background(*args)
92
+ case args.size
93
+ when 0 then @background
94
+ when 1 then background!(args[0])
95
+ else raise ArgumentError, '#background method can get 0 or 1 argument'
96
+ end
97
+ end
98
+
99
+ def background!(new_background)
100
+ clear_cache
101
+ @background = new_background
102
+ self
103
+ end
104
+
105
+ def self.zero_column
106
+ [0.0, 0.0, 0.0, 0.0]
107
+ end
108
+
109
+ def reverse_complement!
110
+ clear_cache
111
+ @matrix.reverse!.map!(&:reverse!)
112
+ self
113
+ end
114
+ def left_augment!(n)
115
+ clear_cache
116
+ n.times{ @matrix.unshift(self.class.zero_column) }
117
+ self
118
+ end
119
+ def right_augment!(n)
120
+ clear_cache
121
+ n.times{ @matrix.push(self.class.zero_column) }
122
+ self
123
+ end
124
+ def shift_to_zero! # make worst score == 0 by shifting scores of each column
125
+ clear_cache
126
+ @matrix.map!{|position| min = position.min; position.map{|element| element - min}}
127
+ self
128
+ end
129
+ def discrete!(rate)
130
+ clear_cache
131
+ @matrix.map!{|position| position.map{|element| (element * rate).ceil}}
132
+ self
133
+ end
134
+
135
+ def background_sum
136
+ @background.inject(0.0, &:+)
137
+ end
138
+
139
+ def vocabulary_volume
140
+ background_sum ** length
141
+ end
142
+
143
+ def probability
144
+ sum = background_sum
145
+ @background.map{|element| element.to_f / sum}
146
+ end
147
+
148
+
149
+ #def split(first_chunk_length)
150
+ # [@matrix.first(first_chunk_length), matrix.last(length - first_chunk_length)]
151
+ #end
152
+ #def permute_columns(permutation_index)
153
+ # @matrix.values_at(permutation_index)permutation_index.map{|col| matrix[col]}
154
+ #end
155
+
156
+ def best_score
157
+ @best_score ||= @matrix.inject(0.0){|sum, col| sum + col.max}
158
+ end
159
+ def worst_score
160
+ @worst_score ||= @matrix.inject(0.0){|sum, col| sum + col.min}
161
+ end
162
+
163
+ # best score of suffix s[i..l]
164
+ def best_suffix
165
+ @best_suffix ||= Array.new(length + 1) {|i| @matrix[i...length].map(&:max).inject(0.0, &:+) }
166
+ end
167
+
168
+ def worst_suffix
169
+ @worst_suffix ||= Array.new(length + 1) {|i| @matrix[i...length].map(&:min).inject(0.0, &:+) }
170
+ end
171
+
172
+ def clear_cache
173
+ @best_score, @worst_score, @best_suffix, @worst_suffix = nil,nil,nil,nil
174
+ end
175
+
176
+ def reverse_complement
177
+ dup.reverse_complement!
178
+ end
179
+ def left_augment(n)
180
+ dup.left_augment!(n)
181
+ end
182
+ def right_augment(n)
183
+ dup.right_augment!(n)
184
+ end
185
+ def shift_to_zero
186
+ dup.shift_to_zero!
187
+ end
188
+ def discrete(rate)
189
+ dup.discrete!(rate)
190
+ end
191
+ def dup
192
+ deep_dup
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,8 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/data_models/pm'
3
+
4
+ module Bioinform
5
+ class PPM < PM
6
+
7
+ end
8
+ end
@@ -0,0 +1,23 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/data_models/pm'
3
+ module Bioinform
4
+
5
+ class PWM < PM
6
+ def score_mean
7
+ matrix.inject(0.0){ |mean, position| mean + position.each_index.inject(0.0){|sum, letter| sum + position[letter] * probability[letter]} }
8
+ end
9
+
10
+ def score_variance
11
+ matrix.inject(0.0) do |variance, position|
12
+ variance + position.each_index.inject(0.0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
13
+ position.each_index.inject(0.0) { |sum,letter| sum + position[letter] * probability[letter] }**2
14
+ end
15
+ end
16
+
17
+ def threshold_gauss_estimation(pvalue)
18
+ sigma = Math.sqrt(score_variance)
19
+ n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
20
+ score_mean + n_ * sigma
21
+ end
22
+ end
23
+ end
@@ -1,6 +1,6 @@
1
- require 'bioinform/data_models/positional_matrix'
2
- require 'bioinform/data_models/positional_count_matrix'
3
- require 'bioinform/data_models/positional_weight_matrix'
4
- require 'bioinform/data_models/positional_probability_matrix'
1
+ require 'bioinform/data_models/pm'
2
+ #require 'bioinform/data_models/pcm'
3
+ require 'bioinform/data_models/pwm'
4
+ #require 'bioinform/data_models/ppm'
5
5
  #require 'bioinform/data_models/iupac_word'
6
- #require 'bioinform/data_models/iupac' #require 'bioinform/data_models/iupac_vocabulary'
6
+ #require 'bioinform/data_models/iupac_wordset'