bioinform 0.1.17 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
@@ -0,0 +1,38 @@
|
|
1
|
+
module Bioinform
|
2
|
+
module MotifModel
|
3
|
+
class NamedModel
|
4
|
+
attr_reader :model, :name
|
5
|
+
def initialize(model, name)
|
6
|
+
@model, @name = model, name
|
7
|
+
end
|
8
|
+
|
9
|
+
def motif_klasses
|
10
|
+
Bioinform::MotifModel.constants.map{|konst| Bioinform::MotifModel.const_get(konst) }.select{|konst| konst.is_a? Class }
|
11
|
+
end
|
12
|
+
|
13
|
+
def motif?(object)
|
14
|
+
motif_klasses.any?{|klass| object.is_a?(klass) }
|
15
|
+
end
|
16
|
+
|
17
|
+
private :motif_klasses, :motif?
|
18
|
+
|
19
|
+
def method_missing(meth, *args, &block)
|
20
|
+
result = model.public_send(meth, *args, &block)
|
21
|
+
if motif?(result) && ! result.is_a?(self.class)
|
22
|
+
self.class.new(result, name)
|
23
|
+
else
|
24
|
+
result
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# should not be delegated to self (because in that case name won't be displayed)
|
29
|
+
def to_s
|
30
|
+
MotifFormatter.new.format(self)
|
31
|
+
end
|
32
|
+
|
33
|
+
def ==(other)
|
34
|
+
model == other.model && name == other.name
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -1,36 +1,26 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative '../data_models'
|
3
|
-
require_relative '../conversion_algorithms/pcm2ppm_converter'
|
4
|
-
require_relative '../conversion_algorithms/pcm2pwm_converter'
|
1
|
+
require_relative 'pm'
|
5
2
|
|
6
3
|
module Bioinform
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
def count
|
11
|
-
matrix.first.inject(&:+)
|
12
|
-
end
|
13
|
-
|
14
|
-
def to_pcm
|
15
|
-
self
|
16
|
-
end
|
17
|
-
|
18
|
-
def to_pwm(pseudocount = Math.log(count))
|
19
|
-
ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
|
20
|
-
end
|
21
|
-
|
22
|
-
def to_ppm
|
23
|
-
ConversionAlgorithms::PCM2PPMConverter.convert(self)
|
4
|
+
module MotifModel
|
5
|
+
def self.acts_as_pcm?(pcm)
|
6
|
+
pcm.is_a?(MotifModel::PCM) || pcm.is_a?(MotifModel::NamedModel) && acts_as_pcm?(pcm.model)
|
24
7
|
end
|
25
8
|
|
26
|
-
|
27
|
-
|
28
|
-
|
9
|
+
class PCM < PM
|
10
|
+
def validation_errors
|
11
|
+
errors = super
|
12
|
+
errors << "elements of PCM should be non-negative" unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
|
13
|
+
errors
|
14
|
+
end
|
29
15
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
16
|
+
def count
|
17
|
+
counts = each_position.map{|pos| pos.inject(0.0, &:+)}
|
18
|
+
count = counts.first
|
19
|
+
diffs = counts.map{|pos_count| (pos_count - count).abs }
|
20
|
+
counts_are_same = (diffs.max < count * 1e-3)
|
21
|
+
raise Error, 'Different columns have different count' unless counts_are_same
|
22
|
+
count
|
23
|
+
end
|
34
24
|
end
|
35
25
|
end
|
36
26
|
end
|
@@ -1,198 +1,101 @@
|
|
1
|
-
|
2
|
-
require_relative '../
|
3
|
-
require_relative '../
|
4
|
-
require_relative '
|
1
|
+
require_relative '../formatters/motif_formatter'
|
2
|
+
require_relative '../errors'
|
3
|
+
require_relative '../alphabet'
|
4
|
+
require_relative 'named_model'
|
5
5
|
|
6
6
|
module Bioinform
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
include Parameters
|
15
|
-
make_parameters :name, :background # , :tags
|
16
|
-
|
17
|
-
# def mark(tag)
|
18
|
-
# tags << tag
|
19
|
-
# end
|
20
|
-
|
21
|
-
# def tagged?(tag)
|
22
|
-
# tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
|
23
|
-
# end
|
24
|
-
|
25
|
-
def self.choose_parser(input)
|
26
|
-
[TrivialParser, YAMLParser, Parser, StringParser, Bioinform::MatrixParser.new(has_name: false).wrapper, Bioinform::MatrixParser.new(has_name: true).wrapper, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
|
27
|
-
self.new(input, parser) rescue nil
|
7
|
+
module MotifModel
|
8
|
+
class PM
|
9
|
+
attr_reader :matrix, :alphabet
|
10
|
+
def initialize(matrix, options = {})
|
11
|
+
@matrix = matrix
|
12
|
+
@alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
13
|
+
raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
|
28
14
|
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.split_on_motifs(input)
|
32
|
-
parser = choose_parser(input)
|
33
|
-
raise ParsingError, "No parser can parse given input" unless parser
|
34
|
-
parser.split_on_motifs(input, self)
|
35
|
-
end
|
36
|
-
|
37
|
-
def initialize(input, parser = nil)
|
38
|
-
@parameters = OpenStruct.new
|
39
|
-
parser ||= self.class.choose_parser(input)
|
40
|
-
raise 'No one parser can process input' unless parser
|
41
|
-
result = parser.new(input).parse
|
42
|
-
@matrix = result.matrix
|
43
|
-
raise 'Non valid matrix' unless self.class.valid_matrix?(@matrix)
|
44
|
-
self.name = result.name
|
45
|
-
# self.tags = result.tags || []
|
46
|
-
self.background = result.background || [1, 1, 1, 1]
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.new_with_validation(input, parser = nil)
|
50
|
-
obj = self.new(input, parser)
|
51
|
-
raise 'matrix not valid' unless obj.valid?
|
52
|
-
obj
|
53
|
-
end
|
54
|
-
def ==(other)
|
55
|
-
@matrix == other.matrix && background == other.background && name == other.name
|
56
|
-
rescue
|
57
|
-
false
|
58
|
-
end
|
59
|
-
|
60
|
-
def self.valid_matrix?(matrix, options = {})
|
61
|
-
matrix.is_a?(Array) &&
|
62
|
-
! matrix.empty? &&
|
63
|
-
matrix.all?{|pos| pos.is_a?(Array)} &&
|
64
|
-
matrix.all?{|pos| pos.size == 4} &&
|
65
|
-
matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
|
66
|
-
rescue
|
67
|
-
false
|
68
|
-
end
|
69
15
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
errors << 'Matrix is not an array'
|
76
|
-
elsif ! matrix.all?{|pos| pos.is_a?(Array)}
|
77
|
-
errors << 'Some of matrix positions aren\'t represented as arrays'
|
78
|
-
elsif ! matrix.all?{|pos| pos.size == 4}
|
79
|
-
errors << 'Some of matrix positions have number of columns other than 4'
|
80
|
-
elsif ! matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
|
81
|
-
errors << 'Some of matrix elements aren\'t represented by numbers'
|
16
|
+
def self.from_string(input, options = {})
|
17
|
+
parser = options.fetch(:parser, MatrixParser.new)
|
18
|
+
alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
19
|
+
info = parser.parse!(input)
|
20
|
+
self.new(info[:matrix], alphabet: alphabet).named( info[:name] )
|
82
21
|
end
|
83
|
-
errors
|
84
|
-
end
|
85
|
-
|
86
|
-
def valid?(options = {})
|
87
|
-
self.class.valid_matrix?(@matrix, options)
|
88
|
-
end
|
89
22
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
23
|
+
def self.from_file(filename, options = {})
|
24
|
+
parser = options.fetch(:parser, MatrixParser.new)
|
25
|
+
alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
26
|
+
info = parser.parse!(File.read(filename))
|
27
|
+
name = (info[:name] && !info[:name].strip.empty?) ? info[:name] : File.basename(filename, File.extname(filename))
|
28
|
+
self.new(info[:matrix], alphabet: alphabet).named( name )
|
95
29
|
end
|
96
|
-
end
|
97
|
-
|
98
|
-
def length
|
99
|
-
@matrix.length
|
100
|
-
end
|
101
|
-
alias_method :size, :length
|
102
30
|
|
103
|
-
|
104
|
-
|
105
|
-
|
31
|
+
def validation_errors
|
32
|
+
errors = []
|
33
|
+
errors << "matrix should be an Array" unless matrix.is_a? Array
|
34
|
+
errors << "matrix shouldn't be empty" unless matrix.size > 0
|
35
|
+
errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
|
36
|
+
errors << "each matrix position should be of size compatible with alphabet (=#{alphabet.size})" unless matrix.all?{|pos| pos.size == alphabet.size }
|
37
|
+
errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
|
38
|
+
errors
|
39
|
+
end
|
40
|
+
private :validation_errors
|
106
41
|
|
107
|
-
|
108
|
-
|
42
|
+
def valid?
|
43
|
+
validation_errors.empty?
|
44
|
+
rescue
|
45
|
+
false
|
46
|
+
end
|
109
47
|
|
110
|
-
|
48
|
+
private :valid?
|
111
49
|
|
112
|
-
|
113
|
-
|
114
|
-
matrix_rows = each_position.map do |position|
|
115
|
-
position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
|
50
|
+
def length
|
51
|
+
matrix.size
|
116
52
|
end
|
117
53
|
|
118
|
-
|
119
|
-
|
120
|
-
if options[:with_name] && name
|
121
|
-
name + "\n" + header + matrix_str
|
122
|
-
else
|
123
|
-
header + matrix_str
|
54
|
+
def to_s
|
55
|
+
MotifFormatter.new.format(self)
|
124
56
|
end
|
125
|
-
end
|
126
57
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
}.map{|el, letter_index| letter_index}.map{|letter_index| %w{A C G T}[letter_index] }.join
|
131
|
-
end
|
58
|
+
def ==(other)
|
59
|
+
self.class == other.class && matrix == other.matrix && alphabet == other.alphabet
|
60
|
+
end
|
132
61
|
|
62
|
+
def each_position
|
63
|
+
if block_given?
|
64
|
+
matrix.each{|pos| yield pos}
|
65
|
+
else
|
66
|
+
self.to_enum(:each_position)
|
67
|
+
end
|
68
|
+
end
|
133
69
|
|
134
|
-
|
135
|
-
|
136
|
-
[ letter, @matrix.map{|pos| pos[letter_index]} ]
|
70
|
+
def reversed
|
71
|
+
self.class.new(matrix.reverse, alphabet: alphabet)
|
137
72
|
end
|
138
|
-
hsh.with_indifferent_access
|
139
|
-
end
|
140
73
|
|
141
|
-
|
142
|
-
|
143
|
-
|
74
|
+
def complemented
|
75
|
+
self.class.new(complement_matrix, alphabet: alphabet)
|
76
|
+
end
|
144
77
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
end
|
149
|
-
def left_augment!(n)
|
150
|
-
n.times{ @matrix.unshift(self.class.zero_column) }
|
151
|
-
self
|
152
|
-
end
|
153
|
-
def right_augment!(n)
|
154
|
-
n.times{ @matrix.push(self.class.zero_column) }
|
155
|
-
self
|
156
|
-
end
|
78
|
+
def reverse_complemented
|
79
|
+
self.class.new(complement_matrix.reverse, alphabet: alphabet)
|
80
|
+
end
|
157
81
|
|
158
|
-
|
159
|
-
@matrix.map!{|position| position.map{|element| (element * rate).ceil}}
|
160
|
-
self
|
161
|
-
end
|
82
|
+
alias_method :revcomp, :reverse_complemented
|
162
83
|
|
163
|
-
|
164
|
-
|
165
|
-
|
84
|
+
def complement_matrix
|
85
|
+
matrix.map{|pos|
|
86
|
+
alphabet.each_letter_index.map{|letter_index| pos[alphabet.complement_index(letter_index)]}
|
87
|
+
}
|
88
|
+
end
|
89
|
+
private :complement_matrix
|
166
90
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
end
|
91
|
+
# def consensus
|
92
|
+
# ConsensusFormatter.by_maximal_elements.format_string(self)
|
93
|
+
# end
|
171
94
|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
def left_augment(n)
|
176
|
-
dup.left_augment!(n)
|
177
|
-
end
|
178
|
-
def right_augment(n)
|
179
|
-
dup.right_augment!(n)
|
180
|
-
end
|
181
|
-
def discrete(rate)
|
182
|
-
dup.discrete!(rate)
|
183
|
-
end
|
184
|
-
def dup
|
185
|
-
deep_dup
|
186
|
-
end
|
95
|
+
def named(name)
|
96
|
+
NamedModel.new(self, name)
|
97
|
+
end
|
187
98
|
|
188
|
-
def as_pcm
|
189
|
-
PCM.new(get_parameters.merge(matrix: matrix))
|
190
|
-
end
|
191
|
-
def as_ppm
|
192
|
-
PPM.new(get_parameters.merge(matrix: matrix))
|
193
|
-
end
|
194
|
-
def as_pwm
|
195
|
-
PWM.new(get_parameters.merge(matrix: matrix))
|
196
99
|
end
|
197
100
|
end
|
198
101
|
end
|
@@ -1,31 +1,18 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative '../data_models'
|
1
|
+
require_relative 'pm'
|
3
2
|
|
4
3
|
module Bioinform
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
self
|
4
|
+
module MotifModel
|
5
|
+
def self.acts_as_ppm?(ppm)
|
6
|
+
ppm.is_a?(MotifModel::PPM) || ppm.is_a?(MotifModel::NamedModel) && acts_as_ppm?(ppm.model)
|
9
7
|
end
|
10
8
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
def self.valid_matrix?(matrix, options = {})
|
20
|
-
precision = options[:precision] || 0.01
|
21
|
-
super && matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) && pos.all?{|el| el >=0 } }
|
22
|
-
end
|
23
|
-
def validation_errors(options = {})
|
24
|
-
precision = options[:precision] || 0.01
|
25
|
-
validation_errors = []
|
26
|
-
validation_errors << "PPM matrix should contain only non-negative elements" unless matrix.all?{|pos| pos.all?{|el| el >=0 } }
|
27
|
-
validation_errors << "Sum of PPM matrix elements for each position should equal to 1" unless matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) }
|
28
|
-
super + validation_errors
|
9
|
+
class PPM < PM
|
10
|
+
def validation_errors
|
11
|
+
errors = super
|
12
|
+
errors << "elements of PPM should be non-negative" unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
|
13
|
+
errors << "each PPM position should be equal to 1.0 being summed" unless matrix.all?{|pos| (pos.inject(0.0, &:+) - 1.0).abs < 1e-3 }
|
14
|
+
errors
|
15
|
+
end
|
29
16
|
end
|
30
17
|
end
|
31
18
|
end
|
@@ -1,67 +1,41 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative '../data_models'
|
1
|
+
require_relative 'pm'
|
3
2
|
|
4
3
|
module Bioinform
|
5
|
-
|
6
|
-
def
|
7
|
-
|
8
|
-
end
|
9
|
-
def score_variance
|
10
|
-
each_position.inject(0) do |variance, position|
|
11
|
-
variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
|
12
|
-
position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def threshold_gauss_estimation(pvalue)
|
17
|
-
sigma = Math.sqrt(score_variance)
|
18
|
-
n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
|
19
|
-
score_mean + n_ * sigma
|
20
|
-
end
|
21
|
-
|
22
|
-
def score(word)
|
23
|
-
raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
|
24
|
-
#raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
|
25
|
-
(0...length).map do |pos|
|
26
|
-
letter = word[pos]
|
27
|
-
if IndexByLetter[letter]
|
28
|
-
matrix[pos][IndexByLetter[letter]]
|
29
|
-
elsif letter == 'N'
|
30
|
-
matrix[pos].zip(probability).map{|el, p| el * p}.inject(0, &:+)
|
31
|
-
else
|
32
|
-
raise ArgumentError, "word in PWM#score(#{word}) should have only ACGT or N letters"
|
33
|
-
end
|
34
|
-
end.inject(0, &:+).to_f
|
35
|
-
end
|
36
|
-
|
37
|
-
def to_pwm
|
38
|
-
self
|
4
|
+
module MotifModel
|
5
|
+
def self.acts_as_pwm?(pwm)
|
6
|
+
pwm.is_a?(MotifModel::PWM) || pwm.is_a?(MotifModel::NamedModel) && acts_as_pwm?(pwm.model)
|
39
7
|
end
|
40
8
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
# best score of suffix s[i..l]
|
49
|
-
def best_suffix(i)
|
50
|
-
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
51
|
-
end
|
9
|
+
class PWM < PM
|
10
|
+
def score(word)
|
11
|
+
raise Error, 'Word length should be the same as PWM length' unless word.length == length
|
12
|
+
length.times.map do |pos|
|
13
|
+
matrix[pos][alphabet.index_by_letter(word[pos])]
|
14
|
+
end.inject(0.0, &:+)
|
15
|
+
end
|
52
16
|
|
53
|
-
|
54
|
-
|
55
|
-
|
17
|
+
def discreted(rate, options = {})
|
18
|
+
rounding_method = options.fetch(:rounding_method, :ceil)
|
19
|
+
discreted_matrix = matrix.map{|position| position.map{|element| (element * rate).send(rounding_method) } }
|
20
|
+
self.class.new(discreted_matrix, alphabet: alphabet)
|
21
|
+
end
|
56
22
|
|
23
|
+
def zero_column
|
24
|
+
[0.0] * alphabet.size
|
25
|
+
end
|
26
|
+
private :zero_column
|
57
27
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
28
|
+
def left_augmented(n)
|
29
|
+
raise Error, 'Augmenting with negative number of columns is impossible' if n < 0
|
30
|
+
augmented_matrix = Array.new(n, zero_column) + matrix
|
31
|
+
self.class.new(augmented_matrix, alphabet: alphabet)
|
32
|
+
end
|
62
33
|
|
63
|
-
|
64
|
-
|
34
|
+
def right_augmented(n)
|
35
|
+
raise Error, 'Augmenting with negative number of columns is impossible' if n < 0
|
36
|
+
augmented_matrix = matrix + Array.new(n, zero_column)
|
37
|
+
self.class.new(augmented_matrix, alphabet: alphabet)
|
38
|
+
end
|
65
39
|
end
|
66
40
|
end
|
67
41
|
end
|