bioinform 0.1.17 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/LICENSE +0 -1
  4. data/README.md +1 -1
  5. data/TODO.txt +23 -30
  6. data/bin/convert_motif +4 -0
  7. data/bin/pcm2pwm +1 -1
  8. data/bin/split_motifs +1 -1
  9. data/bioinform.gemspec +0 -2
  10. data/lib/bioinform.rb +54 -16
  11. data/lib/bioinform/alphabet.rb +85 -0
  12. data/lib/bioinform/background.rb +90 -0
  13. data/lib/bioinform/cli.rb +1 -2
  14. data/lib/bioinform/cli/convert_motif.rb +52 -17
  15. data/lib/bioinform/cli/pcm2pwm.rb +32 -26
  16. data/lib/bioinform/cli/split_motifs.rb +31 -30
  17. data/lib/bioinform/conversion_algorithms.rb +6 -0
  18. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
  19. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
  20. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
  21. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
  22. data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
  23. data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
  24. data/lib/bioinform/data_models.rb +1 -7
  25. data/lib/bioinform/data_models/named_model.rb +38 -0
  26. data/lib/bioinform/data_models/pcm.rb +18 -28
  27. data/lib/bioinform/data_models/pm.rb +73 -170
  28. data/lib/bioinform/data_models/ppm.rb +11 -24
  29. data/lib/bioinform/data_models/pwm.rb +30 -56
  30. data/lib/bioinform/errors.rb +17 -0
  31. data/lib/bioinform/formatters.rb +4 -2
  32. data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
  33. data/lib/bioinform/formatters/motif_formatter.rb +69 -0
  34. data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
  35. data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
  36. data/lib/bioinform/parsers.rb +1 -8
  37. data/lib/bioinform/parsers/matrix_parser.rb +44 -36
  38. data/lib/bioinform/parsers/motif_splitter.rb +45 -0
  39. data/lib/bioinform/support.rb +46 -14
  40. data/lib/bioinform/support/strip_doc.rb +1 -1
  41. data/lib/bioinform/version.rb +1 -1
  42. data/spec/alphabet_spec.rb +79 -0
  43. data/spec/background_spec.rb +57 -0
  44. data/spec/cli/cli_spec.rb +6 -6
  45. data/spec/cli/convert_motif_spec.rb +88 -88
  46. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
  47. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
  48. data/spec/cli/pcm2pwm_spec.rb +22 -23
  49. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
  50. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
  51. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
  52. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
  53. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  54. data/spec/cli/split_motifs_spec.rb +6 -21
  55. data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
  56. data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
  57. data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
  58. data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
  59. data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
  60. data/spec/data_models/named_model_spec.rb +41 -0
  61. data/spec/data_models/pcm_spec.rb +114 -45
  62. data/spec/data_models/pm_spec.rb +132 -333
  63. data/spec/data_models/ppm_spec.rb +47 -44
  64. data/spec/data_models/pwm_spec.rb +85 -77
  65. data/spec/fabricators/motif_formats_fabricator.rb +116 -116
  66. data/spec/formatters/consensus_formatter_spec.rb +26 -0
  67. data/spec/formatters/raw_formatter_spec.rb +169 -0
  68. data/spec/parsers/matrix_parser_spec.rb +216 -0
  69. data/spec/parsers/motif_splitter_spec.rb +87 -0
  70. data/spec/spec_helper.rb +2 -2
  71. data/spec/spec_helper_source.rb +25 -5
  72. data/spec/support_spec.rb +31 -0
  73. metadata +43 -124
  74. data/bin/merge_into_collection +0 -4
  75. data/lib/bioinform/cli/merge_into_collection.rb +0 -80
  76. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  77. data/lib/bioinform/data_models/collection.rb +0 -75
  78. data/lib/bioinform/data_models/motif.rb +0 -56
  79. data/lib/bioinform/formatters/raw_formatter.rb +0 -41
  80. data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
  81. data/lib/bioinform/parsers/parser.rb +0 -92
  82. data/lib/bioinform/parsers/splittable_parser.rb +0 -57
  83. data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
  84. data/lib/bioinform/parsers/string_parser.rb +0 -72
  85. data/lib/bioinform/parsers/trivial_parser.rb +0 -34
  86. data/lib/bioinform/parsers/yaml_parser.rb +0 -35
  87. data/lib/bioinform/support/advanced_scan.rb +0 -8
  88. data/lib/bioinform/support/array_product.rb +0 -6
  89. data/lib/bioinform/support/array_zip.rb +0 -6
  90. data/lib/bioinform/support/collect_hash.rb +0 -7
  91. data/lib/bioinform/support/deep_dup.rb +0 -5
  92. data/lib/bioinform/support/delete_many.rb +0 -14
  93. data/lib/bioinform/support/inverf.rb +0 -13
  94. data/lib/bioinform/support/multiline_squish.rb +0 -6
  95. data/lib/bioinform/support/parameters.rb +0 -28
  96. data/lib/bioinform/support/partial_sums.rb +0 -16
  97. data/lib/bioinform/support/same_by.rb +0 -12
  98. data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
  99. data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
  100. data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
  101. data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
  102. data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
  103. data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
  104. data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
  105. data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
  106. data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
  107. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
  108. data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
  109. data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
  110. data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
  111. data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
  112. data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
  113. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
  114. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
  115. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
  116. data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
  117. data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
  118. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
  119. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
  120. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
  121. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
  122. data/spec/cli/data/split_motifs/collection.yaml +0 -188
  123. data/spec/cli/merge_into_collection_spec.rb +0 -100
  124. data/spec/data_models/collection_spec.rb +0 -98
  125. data/spec/data_models/motif_spec.rb +0 -224
  126. data/spec/fabricators/collection_fabricator.rb +0 -8
  127. data/spec/fabricators/motif_fabricator.rb +0 -33
  128. data/spec/fabricators/pcm_fabricator.rb +0 -25
  129. data/spec/fabricators/pm_fabricator.rb +0 -52
  130. data/spec/fabricators/ppm_fabricator.rb +0 -14
  131. data/spec/fabricators/pwm_fabricator.rb +0 -16
  132. data/spec/parsers/parser_spec.rb +0 -152
  133. data/spec/parsers/string_fantom_parser_spec.rb +0 -70
  134. data/spec/parsers/string_parser_spec.rb +0 -77
  135. data/spec/parsers/trivial_parser_spec.rb +0 -64
  136. data/spec/parsers/yaml_parser_spec.rb +0 -50
  137. data/spec/support/advanced_scan_spec.rb +0 -32
  138. data/spec/support/array_product_spec.rb +0 -15
  139. data/spec/support/array_zip_spec.rb +0 -15
  140. data/spec/support/collect_hash_spec.rb +0 -15
  141. data/spec/support/delete_many_spec.rb +0 -44
  142. data/spec/support/inverf_spec.rb +0 -19
  143. data/spec/support/multiline_squish_spec.rb +0 -25
  144. data/spec/support/partial_sums_spec.rb +0 -30
  145. data/spec/support/same_by_spec.rb +0 -36
@@ -0,0 +1,38 @@
1
+ module Bioinform
2
+ module MotifModel
3
+ class NamedModel
4
+ attr_reader :model, :name
5
+ def initialize(model, name)
6
+ @model, @name = model, name
7
+ end
8
+
9
+ def motif_klasses
10
+ Bioinform::MotifModel.constants.map{|konst| Bioinform::MotifModel.const_get(konst) }.select{|konst| konst.is_a? Class }
11
+ end
12
+
13
+ def motif?(object)
14
+ motif_klasses.any?{|klass| object.is_a?(klass) }
15
+ end
16
+
17
+ private :motif_klasses, :motif?
18
+
19
+ def method_missing(meth, *args, &block)
20
+ result = model.public_send(meth, *args, &block)
21
+ if motif?(result) && ! result.is_a?(self.class)
22
+ self.class.new(result, name)
23
+ else
24
+ result
25
+ end
26
+ end
27
+
28
+ # should not be delegated to self (because in that case name won't be displayed)
29
+ def to_s
30
+ MotifFormatter.new.format(self)
31
+ end
32
+
33
+ def ==(other)
34
+ model == other.model && name == other.name
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,36 +1,26 @@
1
- require_relative '../support'
2
- require_relative '../data_models'
3
- require_relative '../conversion_algorithms/pcm2ppm_converter'
4
- require_relative '../conversion_algorithms/pcm2pwm_converter'
1
+ require_relative 'pm'
5
2
 
6
3
  module Bioinform
7
- class PCM < PM
8
- make_parameters :pseudocount
9
-
10
- def count
11
- matrix.first.inject(&:+)
12
- end
13
-
14
- def to_pcm
15
- self
16
- end
17
-
18
- def to_pwm(pseudocount = Math.log(count))
19
- ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
20
- end
21
-
22
- def to_ppm
23
- ConversionAlgorithms::PCM2PPMConverter.convert(self)
4
+ module MotifModel
5
+ def self.acts_as_pcm?(pcm)
6
+ pcm.is_a?(MotifModel::PCM) || pcm.is_a?(MotifModel::NamedModel) && acts_as_pcm?(pcm.model)
24
7
  end
25
8
 
26
- def self.valid_matrix?(matrix, options = {})
27
- super && matrix.all?{|pos| pos.all?{|el| el >=0 } }
28
- end
9
+ class PCM < PM
10
+ def validation_errors
11
+ errors = super
12
+ errors << "elements of PCM should be non-negative" unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
13
+ errors
14
+ end
29
15
 
30
- def validation_errors(options = {})
31
- validation_errors = []
32
- validation_errors << "PCM matrix should contain only non-negative elements" unless matrix.all?{|pos| pos.all?{|el| el >=0 } }
33
- super + validation_errors
16
+ def count
17
+ counts = each_position.map{|pos| pos.inject(0.0, &:+)}
18
+ count = counts.first
19
+ diffs = counts.map{|pos_count| (pos_count - count).abs }
20
+ counts_are_same = (diffs.max < count * 1e-3)
21
+ raise Error, 'Different columns have different count' unless counts_are_same
22
+ count
23
+ end
34
24
  end
35
25
  end
36
26
  end
@@ -1,198 +1,101 @@
1
- require 'ostruct'
2
- require_relative '../support'
3
- require_relative '../parsers'
4
- require_relative '../formatters'
1
+ require_relative '../formatters/motif_formatter'
2
+ require_relative '../errors'
3
+ require_relative '../alphabet'
4
+ require_relative 'named_model'
5
5
 
6
6
  module Bioinform
7
- IndexByLetter = { 'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3,
8
- 'a' => 0, 'c' => 1, 'g' => 2, 't' => 3, a: 0, c: 1, g: 2, t: 3}
9
- LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
10
-
11
- class PM
12
- attr_accessor :matrix, :parameters
13
-
14
- include Parameters
15
- make_parameters :name, :background # , :tags
16
-
17
- # def mark(tag)
18
- # tags << tag
19
- # end
20
-
21
- # def tagged?(tag)
22
- # tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
23
- # end
24
-
25
- def self.choose_parser(input)
26
- [TrivialParser, YAMLParser, Parser, StringParser, Bioinform::MatrixParser.new(has_name: false).wrapper, Bioinform::MatrixParser.new(has_name: true).wrapper, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
27
- self.new(input, parser) rescue nil
7
+ module MotifModel
8
+ class PM
9
+ attr_reader :matrix, :alphabet
10
+ def initialize(matrix, options = {})
11
+ @matrix = matrix
12
+ @alphabet = options.fetch(:alphabet, NucleotideAlphabet)
13
+ raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
28
14
  end
29
- end
30
-
31
- def self.split_on_motifs(input)
32
- parser = choose_parser(input)
33
- raise ParsingError, "No parser can parse given input" unless parser
34
- parser.split_on_motifs(input, self)
35
- end
36
-
37
- def initialize(input, parser = nil)
38
- @parameters = OpenStruct.new
39
- parser ||= self.class.choose_parser(input)
40
- raise 'No one parser can process input' unless parser
41
- result = parser.new(input).parse
42
- @matrix = result.matrix
43
- raise 'Non valid matrix' unless self.class.valid_matrix?(@matrix)
44
- self.name = result.name
45
- # self.tags = result.tags || []
46
- self.background = result.background || [1, 1, 1, 1]
47
- end
48
-
49
- def self.new_with_validation(input, parser = nil)
50
- obj = self.new(input, parser)
51
- raise 'matrix not valid' unless obj.valid?
52
- obj
53
- end
54
- def ==(other)
55
- @matrix == other.matrix && background == other.background && name == other.name
56
- rescue
57
- false
58
- end
59
-
60
- def self.valid_matrix?(matrix, options = {})
61
- matrix.is_a?(Array) &&
62
- ! matrix.empty? &&
63
- matrix.all?{|pos| pos.is_a?(Array)} &&
64
- matrix.all?{|pos| pos.size == 4} &&
65
- matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
66
- rescue
67
- false
68
- end
69
15
 
70
- def validation_errors(options = {})
71
- errors = []
72
- if !matrix.is_a?(Array)
73
- errors << 'Matrix is not an array'
74
- elsif matrix.empty?
75
- errors << 'Matrix is not an array'
76
- elsif ! matrix.all?{|pos| pos.is_a?(Array)}
77
- errors << 'Some of matrix positions aren\'t represented as arrays'
78
- elsif ! matrix.all?{|pos| pos.size == 4}
79
- errors << 'Some of matrix positions have number of columns other than 4'
80
- elsif ! matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
81
- errors << 'Some of matrix elements aren\'t represented by numbers'
16
+ def self.from_string(input, options = {})
17
+ parser = options.fetch(:parser, MatrixParser.new)
18
+ alphabet = options.fetch(:alphabet, NucleotideAlphabet)
19
+ info = parser.parse!(input)
20
+ self.new(info[:matrix], alphabet: alphabet).named( info[:name] )
82
21
  end
83
- errors
84
- end
85
-
86
- def valid?(options = {})
87
- self.class.valid_matrix?(@matrix, options)
88
- end
89
22
 
90
- def each_position
91
- if block_given?
92
- matrix.each{|pos| yield pos}
93
- else
94
- self.to_enum(:each_position)
23
+ def self.from_file(filename, options = {})
24
+ parser = options.fetch(:parser, MatrixParser.new)
25
+ alphabet = options.fetch(:alphabet, NucleotideAlphabet)
26
+ info = parser.parse!(File.read(filename))
27
+ name = (info[:name] && !info[:name].strip.empty?) ? info[:name] : File.basename(filename, File.extname(filename))
28
+ self.new(info[:matrix], alphabet: alphabet).named( name )
95
29
  end
96
- end
97
-
98
- def length
99
- @matrix.length
100
- end
101
- alias_method :size, :length
102
30
 
103
- def to_s(options = {}, formatter = RawFormatter)
104
- formatter.new(self, options).to_s
105
- end
31
+ def validation_errors
32
+ errors = []
33
+ errors << "matrix should be an Array" unless matrix.is_a? Array
34
+ errors << "matrix shouldn't be empty" unless matrix.size > 0
35
+ errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
36
+ errors << "each matrix position should be of size compatible with alphabet (=#{alphabet.size})" unless matrix.all?{|pos| pos.size == alphabet.size }
37
+ errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
38
+ errors
39
+ end
40
+ private :validation_errors
106
41
 
107
- def pretty_string(options = {})
108
- default_options = {with_name: true, letters_as_rows: false}
42
+ def valid?
43
+ validation_errors.empty?
44
+ rescue
45
+ false
46
+ end
109
47
 
110
- return to_s(options) if options[:letters_as_rows]
48
+ private :valid?
111
49
 
112
- options = default_options.merge(options)
113
- header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
114
- matrix_rows = each_position.map do |position|
115
- position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
50
+ def length
51
+ matrix.size
116
52
  end
117
53
 
118
- matrix_str = matrix_rows.join("\n")
119
-
120
- if options[:with_name] && name
121
- name + "\n" + header + matrix_str
122
- else
123
- header + matrix_str
54
+ def to_s
55
+ MotifFormatter.new.format(self)
124
56
  end
125
- end
126
57
 
127
- def consensus
128
- each_position.map{|pos|
129
- pos.each_with_index.max_by{|el, letter_index| el}
130
- }.map{|el, letter_index| letter_index}.map{|letter_index| %w{A C G T}[letter_index] }.join
131
- end
58
+ def ==(other)
59
+ self.class == other.class && matrix == other.matrix && alphabet == other.alphabet
60
+ end
132
61
 
62
+ def each_position
63
+ if block_given?
64
+ matrix.each{|pos| yield pos}
65
+ else
66
+ self.to_enum(:each_position)
67
+ end
68
+ end
133
69
 
134
- def to_hash
135
- hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
136
- [ letter, @matrix.map{|pos| pos[letter_index]} ]
70
+ def reversed
71
+ self.class.new(matrix.reverse, alphabet: alphabet)
137
72
  end
138
- hsh.with_indifferent_access
139
- end
140
73
 
141
- def self.zero_column
142
- [0, 0, 0, 0]
143
- end
74
+ def complemented
75
+ self.class.new(complement_matrix, alphabet: alphabet)
76
+ end
144
77
 
145
- def reverse_complement!
146
- @matrix.reverse!.map!(&:reverse!)
147
- self
148
- end
149
- def left_augment!(n)
150
- n.times{ @matrix.unshift(self.class.zero_column) }
151
- self
152
- end
153
- def right_augment!(n)
154
- n.times{ @matrix.push(self.class.zero_column) }
155
- self
156
- end
78
+ def reverse_complemented
79
+ self.class.new(complement_matrix.reverse, alphabet: alphabet)
80
+ end
157
81
 
158
- def discrete!(rate)
159
- @matrix.map!{|position| position.map{|element| (element * rate).ceil}}
160
- self
161
- end
82
+ alias_method :revcomp, :reverse_complemented
162
83
 
163
- def vocabulary_volume
164
- background.inject(&:+) ** length
165
- end
84
+ def complement_matrix
85
+ matrix.map{|pos|
86
+ alphabet.each_letter_index.map{|letter_index| pos[alphabet.complement_index(letter_index)]}
87
+ }
88
+ end
89
+ private :complement_matrix
166
90
 
167
- def probability
168
- sum = background.inject(0.0, &:+)
169
- background.map{|element| element.to_f / sum}
170
- end
91
+ # def consensus
92
+ # ConsensusFormatter.by_maximal_elements.format_string(self)
93
+ # end
171
94
 
172
- def reverse_complement
173
- dup.reverse_complement!
174
- end
175
- def left_augment(n)
176
- dup.left_augment!(n)
177
- end
178
- def right_augment(n)
179
- dup.right_augment!(n)
180
- end
181
- def discrete(rate)
182
- dup.discrete!(rate)
183
- end
184
- def dup
185
- deep_dup
186
- end
95
+ def named(name)
96
+ NamedModel.new(self, name)
97
+ end
187
98
 
188
- def as_pcm
189
- PCM.new(get_parameters.merge(matrix: matrix))
190
- end
191
- def as_ppm
192
- PPM.new(get_parameters.merge(matrix: matrix))
193
- end
194
- def as_pwm
195
- PWM.new(get_parameters.merge(matrix: matrix))
196
99
  end
197
100
  end
198
101
  end
@@ -1,31 +1,18 @@
1
- require_relative '../support'
2
- require_relative '../data_models'
1
+ require_relative 'pm'
3
2
 
4
3
  module Bioinform
5
- class PPM < PM
6
- make_parameters :effective_count, :pseudocount
7
- def to_ppm
8
- self
4
+ module MotifModel
5
+ def self.acts_as_ppm?(ppm)
6
+ ppm.is_a?(MotifModel::PPM) || ppm.is_a?(MotifModel::NamedModel) && acts_as_ppm?(ppm.model)
9
7
  end
10
8
 
11
- def to_pcm
12
- PCM.new(matrix.map{|pos| pos.map{|el| el * effective_count} }).tap{|pcm| pcm.name = name}
13
- end
14
-
15
- def to_pwm
16
- pseudocount ? to_pcm.to_pwm(pseudocount) : to_pcm.to_pwm
17
- end
18
-
19
- def self.valid_matrix?(matrix, options = {})
20
- precision = options[:precision] || 0.01
21
- super && matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) && pos.all?{|el| el >=0 } }
22
- end
23
- def validation_errors(options = {})
24
- precision = options[:precision] || 0.01
25
- validation_errors = []
26
- validation_errors << "PPM matrix should contain only non-negative elements" unless matrix.all?{|pos| pos.all?{|el| el >=0 } }
27
- validation_errors << "Sum of PPM matrix elements for each position should equal to 1" unless matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) }
28
- super + validation_errors
9
+ class PPM < PM
10
+ def validation_errors
11
+ errors = super
12
+ errors << "elements of PPM should be non-negative" unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
13
+ errors << "each PPM position should be equal to 1.0 being summed" unless matrix.all?{|pos| (pos.inject(0.0, &:+) - 1.0).abs < 1e-3 }
14
+ errors
15
+ end
29
16
  end
30
17
  end
31
18
  end
@@ -1,67 +1,41 @@
1
- require_relative '../support'
2
- require_relative '../data_models'
1
+ require_relative 'pm'
3
2
 
4
3
  module Bioinform
5
- class PWM < PM
6
- def score_mean
7
- each_position.inject(0){ |mean, position| mean + position.each_index.inject(0){|sum, letter| sum + position[letter] * probability[letter]} }
8
- end
9
- def score_variance
10
- each_position.inject(0) do |variance, position|
11
- variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
12
- position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
13
- end
14
- end
15
-
16
- def threshold_gauss_estimation(pvalue)
17
- sigma = Math.sqrt(score_variance)
18
- n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
19
- score_mean + n_ * sigma
20
- end
21
-
22
- def score(word)
23
- raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
24
- #raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
- (0...length).map do |pos|
26
- letter = word[pos]
27
- if IndexByLetter[letter]
28
- matrix[pos][IndexByLetter[letter]]
29
- elsif letter == 'N'
30
- matrix[pos].zip(probability).map{|el, p| el * p}.inject(0, &:+)
31
- else
32
- raise ArgumentError, "word in PWM#score(#{word}) should have only ACGT or N letters"
33
- end
34
- end.inject(0, &:+).to_f
35
- end
36
-
37
- def to_pwm
38
- self
4
+ module MotifModel
5
+ def self.acts_as_pwm?(pwm)
6
+ pwm.is_a?(MotifModel::PWM) || pwm.is_a?(MotifModel::NamedModel) && acts_as_pwm?(pwm.model)
39
7
  end
40
8
 
41
- def best_score
42
- best_suffix(0)
43
- end
44
- def worst_score
45
- worst_suffix(0)
46
- end
47
-
48
- # best score of suffix s[i..l]
49
- def best_suffix(i)
50
- @matrix[i...length].map(&:max).inject(0.0, &:+)
51
- end
9
+ class PWM < PM
10
+ def score(word)
11
+ raise Error, 'Word length should be the same as PWM length' unless word.length == length
12
+ length.times.map do |pos|
13
+ matrix[pos][alphabet.index_by_letter(word[pos])]
14
+ end.inject(0.0, &:+)
15
+ end
52
16
 
53
- def worst_suffix(i)
54
- @matrix[i...length].map(&:min).inject(0.0, &:+)
55
- end
17
+ def discreted(rate, options = {})
18
+ rounding_method = options.fetch(:rounding_method, :ceil)
19
+ discreted_matrix = matrix.map{|position| position.map{|element| (element * rate).send(rounding_method) } }
20
+ self.class.new(discreted_matrix, alphabet: alphabet)
21
+ end
56
22
 
23
+ def zero_column
24
+ [0.0] * alphabet.size
25
+ end
26
+ private :zero_column
57
27
 
58
- def matrix_rounded(n)
59
- matrix.map{|pos| pos.map{|x| x.round(n) } }
60
- end
61
- private :matrix_rounded
28
+ def left_augmented(n)
29
+ raise Error, 'Augmenting with negative number of columns is impossible' if n < 0
30
+ augmented_matrix = Array.new(n, zero_column) + matrix
31
+ self.class.new(augmented_matrix, alphabet: alphabet)
32
+ end
62
33
 
63
- def round(n)
64
- PWM.new(matrix_rounded(n)).tap{|pm| pm.name = name}
34
+ def right_augmented(n)
35
+ raise Error, 'Augmenting with negative number of columns is impossible' if n < 0
36
+ augmented_matrix = matrix + Array.new(n, zero_column)
37
+ self.class.new(augmented_matrix, alphabet: alphabet)
38
+ end
65
39
  end
66
40
  end
67
41
  end