bioinform 0.1.17 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/LICENSE +0 -1
  4. data/README.md +1 -1
  5. data/TODO.txt +23 -30
  6. data/bin/convert_motif +4 -0
  7. data/bin/pcm2pwm +1 -1
  8. data/bin/split_motifs +1 -1
  9. data/bioinform.gemspec +0 -2
  10. data/lib/bioinform.rb +54 -16
  11. data/lib/bioinform/alphabet.rb +85 -0
  12. data/lib/bioinform/background.rb +90 -0
  13. data/lib/bioinform/cli.rb +1 -2
  14. data/lib/bioinform/cli/convert_motif.rb +52 -17
  15. data/lib/bioinform/cli/pcm2pwm.rb +32 -26
  16. data/lib/bioinform/cli/split_motifs.rb +31 -30
  17. data/lib/bioinform/conversion_algorithms.rb +6 -0
  18. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
  19. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
  20. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
  21. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
  22. data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
  23. data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
  24. data/lib/bioinform/data_models.rb +1 -7
  25. data/lib/bioinform/data_models/named_model.rb +38 -0
  26. data/lib/bioinform/data_models/pcm.rb +18 -28
  27. data/lib/bioinform/data_models/pm.rb +73 -170
  28. data/lib/bioinform/data_models/ppm.rb +11 -24
  29. data/lib/bioinform/data_models/pwm.rb +30 -56
  30. data/lib/bioinform/errors.rb +17 -0
  31. data/lib/bioinform/formatters.rb +4 -2
  32. data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
  33. data/lib/bioinform/formatters/motif_formatter.rb +69 -0
  34. data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
  35. data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
  36. data/lib/bioinform/parsers.rb +1 -8
  37. data/lib/bioinform/parsers/matrix_parser.rb +44 -36
  38. data/lib/bioinform/parsers/motif_splitter.rb +45 -0
  39. data/lib/bioinform/support.rb +46 -14
  40. data/lib/bioinform/support/strip_doc.rb +1 -1
  41. data/lib/bioinform/version.rb +1 -1
  42. data/spec/alphabet_spec.rb +79 -0
  43. data/spec/background_spec.rb +57 -0
  44. data/spec/cli/cli_spec.rb +6 -6
  45. data/spec/cli/convert_motif_spec.rb +88 -88
  46. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
  47. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
  48. data/spec/cli/pcm2pwm_spec.rb +22 -23
  49. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
  50. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
  51. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
  52. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
  53. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  54. data/spec/cli/split_motifs_spec.rb +6 -21
  55. data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
  56. data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
  57. data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
  58. data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
  59. data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
  60. data/spec/data_models/named_model_spec.rb +41 -0
  61. data/spec/data_models/pcm_spec.rb +114 -45
  62. data/spec/data_models/pm_spec.rb +132 -333
  63. data/spec/data_models/ppm_spec.rb +47 -44
  64. data/spec/data_models/pwm_spec.rb +85 -77
  65. data/spec/fabricators/motif_formats_fabricator.rb +116 -116
  66. data/spec/formatters/consensus_formatter_spec.rb +26 -0
  67. data/spec/formatters/raw_formatter_spec.rb +169 -0
  68. data/spec/parsers/matrix_parser_spec.rb +216 -0
  69. data/spec/parsers/motif_splitter_spec.rb +87 -0
  70. data/spec/spec_helper.rb +2 -2
  71. data/spec/spec_helper_source.rb +25 -5
  72. data/spec/support_spec.rb +31 -0
  73. metadata +43 -124
  74. data/bin/merge_into_collection +0 -4
  75. data/lib/bioinform/cli/merge_into_collection.rb +0 -80
  76. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  77. data/lib/bioinform/data_models/collection.rb +0 -75
  78. data/lib/bioinform/data_models/motif.rb +0 -56
  79. data/lib/bioinform/formatters/raw_formatter.rb +0 -41
  80. data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
  81. data/lib/bioinform/parsers/parser.rb +0 -92
  82. data/lib/bioinform/parsers/splittable_parser.rb +0 -57
  83. data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
  84. data/lib/bioinform/parsers/string_parser.rb +0 -72
  85. data/lib/bioinform/parsers/trivial_parser.rb +0 -34
  86. data/lib/bioinform/parsers/yaml_parser.rb +0 -35
  87. data/lib/bioinform/support/advanced_scan.rb +0 -8
  88. data/lib/bioinform/support/array_product.rb +0 -6
  89. data/lib/bioinform/support/array_zip.rb +0 -6
  90. data/lib/bioinform/support/collect_hash.rb +0 -7
  91. data/lib/bioinform/support/deep_dup.rb +0 -5
  92. data/lib/bioinform/support/delete_many.rb +0 -14
  93. data/lib/bioinform/support/inverf.rb +0 -13
  94. data/lib/bioinform/support/multiline_squish.rb +0 -6
  95. data/lib/bioinform/support/parameters.rb +0 -28
  96. data/lib/bioinform/support/partial_sums.rb +0 -16
  97. data/lib/bioinform/support/same_by.rb +0 -12
  98. data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
  99. data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
  100. data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
  101. data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
  102. data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
  103. data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
  104. data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
  105. data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
  106. data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
  107. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
  108. data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
  109. data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
  110. data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
  111. data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
  112. data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
  113. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
  114. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
  115. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
  116. data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
  117. data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
  118. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
  119. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
  120. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
  121. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
  122. data/spec/cli/data/split_motifs/collection.yaml +0 -188
  123. data/spec/cli/merge_into_collection_spec.rb +0 -100
  124. data/spec/data_models/collection_spec.rb +0 -98
  125. data/spec/data_models/motif_spec.rb +0 -224
  126. data/spec/fabricators/collection_fabricator.rb +0 -8
  127. data/spec/fabricators/motif_fabricator.rb +0 -33
  128. data/spec/fabricators/pcm_fabricator.rb +0 -25
  129. data/spec/fabricators/pm_fabricator.rb +0 -52
  130. data/spec/fabricators/ppm_fabricator.rb +0 -14
  131. data/spec/fabricators/pwm_fabricator.rb +0 -16
  132. data/spec/parsers/parser_spec.rb +0 -152
  133. data/spec/parsers/string_fantom_parser_spec.rb +0 -70
  134. data/spec/parsers/string_parser_spec.rb +0 -77
  135. data/spec/parsers/trivial_parser_spec.rb +0 -64
  136. data/spec/parsers/yaml_parser_spec.rb +0 -50
  137. data/spec/support/advanced_scan_spec.rb +0 -32
  138. data/spec/support/array_product_spec.rb +0 -15
  139. data/spec/support/array_zip_spec.rb +0 -15
  140. data/spec/support/collect_hash_spec.rb +0 -15
  141. data/spec/support/delete_many_spec.rb +0 -44
  142. data/spec/support/inverf_spec.rb +0 -19
  143. data/spec/support/multiline_squish_spec.rb +0 -25
  144. data/spec/support/partial_sums_spec.rb +0 -30
  145. data/spec/support/same_by_spec.rb +0 -36
@@ -0,0 +1,38 @@
1
+ module Bioinform
2
+ module MotifModel
3
+ class NamedModel
4
+ attr_reader :model, :name
5
+ def initialize(model, name)
6
+ @model, @name = model, name
7
+ end
8
+
9
+ def motif_klasses
10
+ Bioinform::MotifModel.constants.map{|konst| Bioinform::MotifModel.const_get(konst) }.select{|konst| konst.is_a? Class }
11
+ end
12
+
13
+ def motif?(object)
14
+ motif_klasses.any?{|klass| object.is_a?(klass) }
15
+ end
16
+
17
+ private :motif_klasses, :motif?
18
+
19
+ def method_missing(meth, *args, &block)
20
+ result = model.public_send(meth, *args, &block)
21
+ if motif?(result) && ! result.is_a?(self.class)
22
+ self.class.new(result, name)
23
+ else
24
+ result
25
+ end
26
+ end
27
+
28
+ # should not be delegated to self (because in that case name won't be displayed)
29
+ def to_s
30
+ MotifFormatter.new.format(self)
31
+ end
32
+
33
+ def ==(other)
34
+ model == other.model && name == other.name
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,36 +1,26 @@
1
- require_relative '../support'
2
- require_relative '../data_models'
3
- require_relative '../conversion_algorithms/pcm2ppm_converter'
4
- require_relative '../conversion_algorithms/pcm2pwm_converter'
1
+ require_relative 'pm'
5
2
 
6
3
  module Bioinform
7
- class PCM < PM
8
- make_parameters :pseudocount
9
-
10
- def count
11
- matrix.first.inject(&:+)
12
- end
13
-
14
- def to_pcm
15
- self
16
- end
17
-
18
- def to_pwm(pseudocount = Math.log(count))
19
- ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
20
- end
21
-
22
- def to_ppm
23
- ConversionAlgorithms::PCM2PPMConverter.convert(self)
4
+ module MotifModel
5
+ def self.acts_as_pcm?(pcm)
6
+ pcm.is_a?(MotifModel::PCM) || pcm.is_a?(MotifModel::NamedModel) && acts_as_pcm?(pcm.model)
24
7
  end
25
8
 
26
- def self.valid_matrix?(matrix, options = {})
27
- super && matrix.all?{|pos| pos.all?{|el| el >=0 } }
28
- end
9
+ class PCM < PM
10
+ def validation_errors
11
+ errors = super
12
+ errors << "elements of PCM should be non-negative" unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
13
+ errors
14
+ end
29
15
 
30
- def validation_errors(options = {})
31
- validation_errors = []
32
- validation_errors << "PCM matrix should contain only non-negative elements" unless matrix.all?{|pos| pos.all?{|el| el >=0 } }
33
- super + validation_errors
16
+ def count
17
+ counts = each_position.map{|pos| pos.inject(0.0, &:+)}
18
+ count = counts.first
19
+ diffs = counts.map{|pos_count| (pos_count - count).abs }
20
+ counts_are_same = (diffs.max < count * 1e-3)
21
+ raise Error, 'Different columns have different count' unless counts_are_same
22
+ count
23
+ end
34
24
  end
35
25
  end
36
26
  end
@@ -1,198 +1,101 @@
1
- require 'ostruct'
2
- require_relative '../support'
3
- require_relative '../parsers'
4
- require_relative '../formatters'
1
+ require_relative '../formatters/motif_formatter'
2
+ require_relative '../errors'
3
+ require_relative '../alphabet'
4
+ require_relative 'named_model'
5
5
 
6
6
  module Bioinform
7
- IndexByLetter = { 'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3,
8
- 'a' => 0, 'c' => 1, 'g' => 2, 't' => 3, a: 0, c: 1, g: 2, t: 3}
9
- LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
10
-
11
- class PM
12
- attr_accessor :matrix, :parameters
13
-
14
- include Parameters
15
- make_parameters :name, :background # , :tags
16
-
17
- # def mark(tag)
18
- # tags << tag
19
- # end
20
-
21
- # def tagged?(tag)
22
- # tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
23
- # end
24
-
25
- def self.choose_parser(input)
26
- [TrivialParser, YAMLParser, Parser, StringParser, Bioinform::MatrixParser.new(has_name: false).wrapper, Bioinform::MatrixParser.new(has_name: true).wrapper, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
27
- self.new(input, parser) rescue nil
7
+ module MotifModel
8
+ class PM
9
+ attr_reader :matrix, :alphabet
10
+ def initialize(matrix, options = {})
11
+ @matrix = matrix
12
+ @alphabet = options.fetch(:alphabet, NucleotideAlphabet)
13
+ raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
28
14
  end
29
- end
30
-
31
- def self.split_on_motifs(input)
32
- parser = choose_parser(input)
33
- raise ParsingError, "No parser can parse given input" unless parser
34
- parser.split_on_motifs(input, self)
35
- end
36
-
37
- def initialize(input, parser = nil)
38
- @parameters = OpenStruct.new
39
- parser ||= self.class.choose_parser(input)
40
- raise 'No one parser can process input' unless parser
41
- result = parser.new(input).parse
42
- @matrix = result.matrix
43
- raise 'Non valid matrix' unless self.class.valid_matrix?(@matrix)
44
- self.name = result.name
45
- # self.tags = result.tags || []
46
- self.background = result.background || [1, 1, 1, 1]
47
- end
48
-
49
- def self.new_with_validation(input, parser = nil)
50
- obj = self.new(input, parser)
51
- raise 'matrix not valid' unless obj.valid?
52
- obj
53
- end
54
- def ==(other)
55
- @matrix == other.matrix && background == other.background && name == other.name
56
- rescue
57
- false
58
- end
59
-
60
- def self.valid_matrix?(matrix, options = {})
61
- matrix.is_a?(Array) &&
62
- ! matrix.empty? &&
63
- matrix.all?{|pos| pos.is_a?(Array)} &&
64
- matrix.all?{|pos| pos.size == 4} &&
65
- matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
66
- rescue
67
- false
68
- end
69
15
 
70
- def validation_errors(options = {})
71
- errors = []
72
- if !matrix.is_a?(Array)
73
- errors << 'Matrix is not an array'
74
- elsif matrix.empty?
75
- errors << 'Matrix is not an array'
76
- elsif ! matrix.all?{|pos| pos.is_a?(Array)}
77
- errors << 'Some of matrix positions aren\'t represented as arrays'
78
- elsif ! matrix.all?{|pos| pos.size == 4}
79
- errors << 'Some of matrix positions have number of columns other than 4'
80
- elsif ! matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
81
- errors << 'Some of matrix elements aren\'t represented by numbers'
16
+ def self.from_string(input, options = {})
17
+ parser = options.fetch(:parser, MatrixParser.new)
18
+ alphabet = options.fetch(:alphabet, NucleotideAlphabet)
19
+ info = parser.parse!(input)
20
+ self.new(info[:matrix], alphabet: alphabet).named( info[:name] )
82
21
  end
83
- errors
84
- end
85
-
86
- def valid?(options = {})
87
- self.class.valid_matrix?(@matrix, options)
88
- end
89
22
 
90
- def each_position
91
- if block_given?
92
- matrix.each{|pos| yield pos}
93
- else
94
- self.to_enum(:each_position)
23
+ def self.from_file(filename, options = {})
24
+ parser = options.fetch(:parser, MatrixParser.new)
25
+ alphabet = options.fetch(:alphabet, NucleotideAlphabet)
26
+ info = parser.parse!(File.read(filename))
27
+ name = (info[:name] && !info[:name].strip.empty?) ? info[:name] : File.basename(filename, File.extname(filename))
28
+ self.new(info[:matrix], alphabet: alphabet).named( name )
95
29
  end
96
- end
97
-
98
- def length
99
- @matrix.length
100
- end
101
- alias_method :size, :length
102
30
 
103
- def to_s(options = {}, formatter = RawFormatter)
104
- formatter.new(self, options).to_s
105
- end
31
+ def validation_errors
32
+ errors = []
33
+ errors << "matrix should be an Array" unless matrix.is_a? Array
34
+ errors << "matrix shouldn't be empty" unless matrix.size > 0
35
+ errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
36
+ errors << "each matrix position should be of size compatible with alphabet (=#{alphabet.size})" unless matrix.all?{|pos| pos.size == alphabet.size }
37
+ errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
38
+ errors
39
+ end
40
+ private :validation_errors
106
41
 
107
- def pretty_string(options = {})
108
- default_options = {with_name: true, letters_as_rows: false}
42
+ def valid?
43
+ validation_errors.empty?
44
+ rescue
45
+ false
46
+ end
109
47
 
110
- return to_s(options) if options[:letters_as_rows]
48
+ private :valid?
111
49
 
112
- options = default_options.merge(options)
113
- header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
114
- matrix_rows = each_position.map do |position|
115
- position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
50
+ def length
51
+ matrix.size
116
52
  end
117
53
 
118
- matrix_str = matrix_rows.join("\n")
119
-
120
- if options[:with_name] && name
121
- name + "\n" + header + matrix_str
122
- else
123
- header + matrix_str
54
+ def to_s
55
+ MotifFormatter.new.format(self)
124
56
  end
125
- end
126
57
 
127
- def consensus
128
- each_position.map{|pos|
129
- pos.each_with_index.max_by{|el, letter_index| el}
130
- }.map{|el, letter_index| letter_index}.map{|letter_index| %w{A C G T}[letter_index] }.join
131
- end
58
+ def ==(other)
59
+ self.class == other.class && matrix == other.matrix && alphabet == other.alphabet
60
+ end
132
61
 
62
+ def each_position
63
+ if block_given?
64
+ matrix.each{|pos| yield pos}
65
+ else
66
+ self.to_enum(:each_position)
67
+ end
68
+ end
133
69
 
134
- def to_hash
135
- hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
136
- [ letter, @matrix.map{|pos| pos[letter_index]} ]
70
+ def reversed
71
+ self.class.new(matrix.reverse, alphabet: alphabet)
137
72
  end
138
- hsh.with_indifferent_access
139
- end
140
73
 
141
- def self.zero_column
142
- [0, 0, 0, 0]
143
- end
74
+ def complemented
75
+ self.class.new(complement_matrix, alphabet: alphabet)
76
+ end
144
77
 
145
- def reverse_complement!
146
- @matrix.reverse!.map!(&:reverse!)
147
- self
148
- end
149
- def left_augment!(n)
150
- n.times{ @matrix.unshift(self.class.zero_column) }
151
- self
152
- end
153
- def right_augment!(n)
154
- n.times{ @matrix.push(self.class.zero_column) }
155
- self
156
- end
78
+ def reverse_complemented
79
+ self.class.new(complement_matrix.reverse, alphabet: alphabet)
80
+ end
157
81
 
158
- def discrete!(rate)
159
- @matrix.map!{|position| position.map{|element| (element * rate).ceil}}
160
- self
161
- end
82
+ alias_method :revcomp, :reverse_complemented
162
83
 
163
- def vocabulary_volume
164
- background.inject(&:+) ** length
165
- end
84
+ def complement_matrix
85
+ matrix.map{|pos|
86
+ alphabet.each_letter_index.map{|letter_index| pos[alphabet.complement_index(letter_index)]}
87
+ }
88
+ end
89
+ private :complement_matrix
166
90
 
167
- def probability
168
- sum = background.inject(0.0, &:+)
169
- background.map{|element| element.to_f / sum}
170
- end
91
+ # def consensus
92
+ # ConsensusFormatter.by_maximal_elements.format_string(self)
93
+ # end
171
94
 
172
- def reverse_complement
173
- dup.reverse_complement!
174
- end
175
- def left_augment(n)
176
- dup.left_augment!(n)
177
- end
178
- def right_augment(n)
179
- dup.right_augment!(n)
180
- end
181
- def discrete(rate)
182
- dup.discrete!(rate)
183
- end
184
- def dup
185
- deep_dup
186
- end
95
+ def named(name)
96
+ NamedModel.new(self, name)
97
+ end
187
98
 
188
- def as_pcm
189
- PCM.new(get_parameters.merge(matrix: matrix))
190
- end
191
- def as_ppm
192
- PPM.new(get_parameters.merge(matrix: matrix))
193
- end
194
- def as_pwm
195
- PWM.new(get_parameters.merge(matrix: matrix))
196
99
  end
197
100
  end
198
101
  end
@@ -1,31 +1,18 @@
1
- require_relative '../support'
2
- require_relative '../data_models'
1
+ require_relative 'pm'
3
2
 
4
3
  module Bioinform
5
- class PPM < PM
6
- make_parameters :effective_count, :pseudocount
7
- def to_ppm
8
- self
4
+ module MotifModel
5
+ def self.acts_as_ppm?(ppm)
6
+ ppm.is_a?(MotifModel::PPM) || ppm.is_a?(MotifModel::NamedModel) && acts_as_ppm?(ppm.model)
9
7
  end
10
8
 
11
- def to_pcm
12
- PCM.new(matrix.map{|pos| pos.map{|el| el * effective_count} }).tap{|pcm| pcm.name = name}
13
- end
14
-
15
- def to_pwm
16
- pseudocount ? to_pcm.to_pwm(pseudocount) : to_pcm.to_pwm
17
- end
18
-
19
- def self.valid_matrix?(matrix, options = {})
20
- precision = options[:precision] || 0.01
21
- super && matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) && pos.all?{|el| el >=0 } }
22
- end
23
- def validation_errors(options = {})
24
- precision = options[:precision] || 0.01
25
- validation_errors = []
26
- validation_errors << "PPM matrix should contain only non-negative elements" unless matrix.all?{|pos| pos.all?{|el| el >=0 } }
27
- validation_errors << "Sum of PPM matrix elements for each position should equal to 1" unless matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) }
28
- super + validation_errors
9
+ class PPM < PM
10
+ def validation_errors
11
+ errors = super
12
+ errors << "elements of PPM should be non-negative" unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
13
+ errors << "each PPM position should be equal to 1.0 being summed" unless matrix.all?{|pos| (pos.inject(0.0, &:+) - 1.0).abs < 1e-3 }
14
+ errors
15
+ end
29
16
  end
30
17
  end
31
18
  end
@@ -1,67 +1,41 @@
1
- require_relative '../support'
2
- require_relative '../data_models'
1
+ require_relative 'pm'
3
2
 
4
3
  module Bioinform
5
- class PWM < PM
6
- def score_mean
7
- each_position.inject(0){ |mean, position| mean + position.each_index.inject(0){|sum, letter| sum + position[letter] * probability[letter]} }
8
- end
9
- def score_variance
10
- each_position.inject(0) do |variance, position|
11
- variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
12
- position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
13
- end
14
- end
15
-
16
- def threshold_gauss_estimation(pvalue)
17
- sigma = Math.sqrt(score_variance)
18
- n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
19
- score_mean + n_ * sigma
20
- end
21
-
22
- def score(word)
23
- raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
24
- #raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
- (0...length).map do |pos|
26
- letter = word[pos]
27
- if IndexByLetter[letter]
28
- matrix[pos][IndexByLetter[letter]]
29
- elsif letter == 'N'
30
- matrix[pos].zip(probability).map{|el, p| el * p}.inject(0, &:+)
31
- else
32
- raise ArgumentError, "word in PWM#score(#{word}) should have only ACGT or N letters"
33
- end
34
- end.inject(0, &:+).to_f
35
- end
36
-
37
- def to_pwm
38
- self
4
+ module MotifModel
5
+ def self.acts_as_pwm?(pwm)
6
+ pwm.is_a?(MotifModel::PWM) || pwm.is_a?(MotifModel::NamedModel) && acts_as_pwm?(pwm.model)
39
7
  end
40
8
 
41
- def best_score
42
- best_suffix(0)
43
- end
44
- def worst_score
45
- worst_suffix(0)
46
- end
47
-
48
- # best score of suffix s[i..l]
49
- def best_suffix(i)
50
- @matrix[i...length].map(&:max).inject(0.0, &:+)
51
- end
9
+ class PWM < PM
10
+ def score(word)
11
+ raise Error, 'Word length should be the same as PWM length' unless word.length == length
12
+ length.times.map do |pos|
13
+ matrix[pos][alphabet.index_by_letter(word[pos])]
14
+ end.inject(0.0, &:+)
15
+ end
52
16
 
53
- def worst_suffix(i)
54
- @matrix[i...length].map(&:min).inject(0.0, &:+)
55
- end
17
+ def discreted(rate, options = {})
18
+ rounding_method = options.fetch(:rounding_method, :ceil)
19
+ discreted_matrix = matrix.map{|position| position.map{|element| (element * rate).send(rounding_method) } }
20
+ self.class.new(discreted_matrix, alphabet: alphabet)
21
+ end
56
22
 
23
+ def zero_column
24
+ [0.0] * alphabet.size
25
+ end
26
+ private :zero_column
57
27
 
58
- def matrix_rounded(n)
59
- matrix.map{|pos| pos.map{|x| x.round(n) } }
60
- end
61
- private :matrix_rounded
28
+ def left_augmented(n)
29
+ raise Error, 'Augmenting with negative number of columns is impossible' if n < 0
30
+ augmented_matrix = Array.new(n, zero_column) + matrix
31
+ self.class.new(augmented_matrix, alphabet: alphabet)
32
+ end
62
33
 
63
- def round(n)
64
- PWM.new(matrix_rounded(n)).tap{|pm| pm.name = name}
34
+ def right_augmented(n)
35
+ raise Error, 'Augmenting with negative number of columns is impossible' if n < 0
36
+ augmented_matrix = matrix + Array.new(n, zero_column)
37
+ self.class.new(augmented_matrix, alphabet: alphabet)
38
+ end
65
39
  end
66
40
  end
67
41
  end