bioinform 0.1.17 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module Bioinform
|
|
2
|
+
module MotifModel
|
|
3
|
+
class NamedModel
|
|
4
|
+
attr_reader :model, :name
|
|
5
|
+
def initialize(model, name)
|
|
6
|
+
@model, @name = model, name
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def motif_klasses
|
|
10
|
+
Bioinform::MotifModel.constants.map{|konst| Bioinform::MotifModel.const_get(konst) }.select{|konst| konst.is_a? Class }
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def motif?(object)
|
|
14
|
+
motif_klasses.any?{|klass| object.is_a?(klass) }
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
private :motif_klasses, :motif?
|
|
18
|
+
|
|
19
|
+
def method_missing(meth, *args, &block)
|
|
20
|
+
result = model.public_send(meth, *args, &block)
|
|
21
|
+
if motif?(result) && ! result.is_a?(self.class)
|
|
22
|
+
self.class.new(result, name)
|
|
23
|
+
else
|
|
24
|
+
result
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# should not be delegated to self (because in that case name won't be displayed)
|
|
29
|
+
def to_s
|
|
30
|
+
MotifFormatter.new.format(self)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def ==(other)
|
|
34
|
+
model == other.model && name == other.name
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -1,36 +1,26 @@
|
|
|
1
|
-
require_relative '
|
|
2
|
-
require_relative '../data_models'
|
|
3
|
-
require_relative '../conversion_algorithms/pcm2ppm_converter'
|
|
4
|
-
require_relative '../conversion_algorithms/pcm2pwm_converter'
|
|
1
|
+
require_relative 'pm'
|
|
5
2
|
|
|
6
3
|
module Bioinform
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def count
|
|
11
|
-
matrix.first.inject(&:+)
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def to_pcm
|
|
15
|
-
self
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def to_pwm(pseudocount = Math.log(count))
|
|
19
|
-
ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def to_ppm
|
|
23
|
-
ConversionAlgorithms::PCM2PPMConverter.convert(self)
|
|
4
|
+
module MotifModel
|
|
5
|
+
def self.acts_as_pcm?(pcm)
|
|
6
|
+
pcm.is_a?(MotifModel::PCM) || pcm.is_a?(MotifModel::NamedModel) && acts_as_pcm?(pcm.model)
|
|
24
7
|
end
|
|
25
8
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
9
|
+
class PCM < PM
|
|
10
|
+
def validation_errors
|
|
11
|
+
errors = super
|
|
12
|
+
errors << "elements of PCM should be non-negative" unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
|
|
13
|
+
errors
|
|
14
|
+
end
|
|
29
15
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
16
|
+
def count
|
|
17
|
+
counts = each_position.map{|pos| pos.inject(0.0, &:+)}
|
|
18
|
+
count = counts.first
|
|
19
|
+
diffs = counts.map{|pos_count| (pos_count - count).abs }
|
|
20
|
+
counts_are_same = (diffs.max < count * 1e-3)
|
|
21
|
+
raise Error, 'Different columns have different count' unless counts_are_same
|
|
22
|
+
count
|
|
23
|
+
end
|
|
34
24
|
end
|
|
35
25
|
end
|
|
36
26
|
end
|
|
@@ -1,198 +1,101 @@
|
|
|
1
|
-
|
|
2
|
-
require_relative '../
|
|
3
|
-
require_relative '../
|
|
4
|
-
require_relative '
|
|
1
|
+
require_relative '../formatters/motif_formatter'
|
|
2
|
+
require_relative '../errors'
|
|
3
|
+
require_relative '../alphabet'
|
|
4
|
+
require_relative 'named_model'
|
|
5
5
|
|
|
6
6
|
module Bioinform
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
include Parameters
|
|
15
|
-
make_parameters :name, :background # , :tags
|
|
16
|
-
|
|
17
|
-
# def mark(tag)
|
|
18
|
-
# tags << tag
|
|
19
|
-
# end
|
|
20
|
-
|
|
21
|
-
# def tagged?(tag)
|
|
22
|
-
# tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
|
|
23
|
-
# end
|
|
24
|
-
|
|
25
|
-
def self.choose_parser(input)
|
|
26
|
-
[TrivialParser, YAMLParser, Parser, StringParser, Bioinform::MatrixParser.new(has_name: false).wrapper, Bioinform::MatrixParser.new(has_name: true).wrapper, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
|
|
27
|
-
self.new(input, parser) rescue nil
|
|
7
|
+
module MotifModel
|
|
8
|
+
class PM
|
|
9
|
+
attr_reader :matrix, :alphabet
|
|
10
|
+
def initialize(matrix, options = {})
|
|
11
|
+
@matrix = matrix
|
|
12
|
+
@alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
|
13
|
+
raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
|
|
28
14
|
end
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def self.split_on_motifs(input)
|
|
32
|
-
parser = choose_parser(input)
|
|
33
|
-
raise ParsingError, "No parser can parse given input" unless parser
|
|
34
|
-
parser.split_on_motifs(input, self)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def initialize(input, parser = nil)
|
|
38
|
-
@parameters = OpenStruct.new
|
|
39
|
-
parser ||= self.class.choose_parser(input)
|
|
40
|
-
raise 'No one parser can process input' unless parser
|
|
41
|
-
result = parser.new(input).parse
|
|
42
|
-
@matrix = result.matrix
|
|
43
|
-
raise 'Non valid matrix' unless self.class.valid_matrix?(@matrix)
|
|
44
|
-
self.name = result.name
|
|
45
|
-
# self.tags = result.tags || []
|
|
46
|
-
self.background = result.background || [1, 1, 1, 1]
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def self.new_with_validation(input, parser = nil)
|
|
50
|
-
obj = self.new(input, parser)
|
|
51
|
-
raise 'matrix not valid' unless obj.valid?
|
|
52
|
-
obj
|
|
53
|
-
end
|
|
54
|
-
def ==(other)
|
|
55
|
-
@matrix == other.matrix && background == other.background && name == other.name
|
|
56
|
-
rescue
|
|
57
|
-
false
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def self.valid_matrix?(matrix, options = {})
|
|
61
|
-
matrix.is_a?(Array) &&
|
|
62
|
-
! matrix.empty? &&
|
|
63
|
-
matrix.all?{|pos| pos.is_a?(Array)} &&
|
|
64
|
-
matrix.all?{|pos| pos.size == 4} &&
|
|
65
|
-
matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
|
|
66
|
-
rescue
|
|
67
|
-
false
|
|
68
|
-
end
|
|
69
15
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
errors << 'Matrix is not an array'
|
|
76
|
-
elsif ! matrix.all?{|pos| pos.is_a?(Array)}
|
|
77
|
-
errors << 'Some of matrix positions aren\'t represented as arrays'
|
|
78
|
-
elsif ! matrix.all?{|pos| pos.size == 4}
|
|
79
|
-
errors << 'Some of matrix positions have number of columns other than 4'
|
|
80
|
-
elsif ! matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
|
|
81
|
-
errors << 'Some of matrix elements aren\'t represented by numbers'
|
|
16
|
+
def self.from_string(input, options = {})
|
|
17
|
+
parser = options.fetch(:parser, MatrixParser.new)
|
|
18
|
+
alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
|
19
|
+
info = parser.parse!(input)
|
|
20
|
+
self.new(info[:matrix], alphabet: alphabet).named( info[:name] )
|
|
82
21
|
end
|
|
83
|
-
errors
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def valid?(options = {})
|
|
87
|
-
self.class.valid_matrix?(@matrix, options)
|
|
88
|
-
end
|
|
89
22
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
23
|
+
def self.from_file(filename, options = {})
|
|
24
|
+
parser = options.fetch(:parser, MatrixParser.new)
|
|
25
|
+
alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
|
26
|
+
info = parser.parse!(File.read(filename))
|
|
27
|
+
name = (info[:name] && !info[:name].strip.empty?) ? info[:name] : File.basename(filename, File.extname(filename))
|
|
28
|
+
self.new(info[:matrix], alphabet: alphabet).named( name )
|
|
95
29
|
end
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def length
|
|
99
|
-
@matrix.length
|
|
100
|
-
end
|
|
101
|
-
alias_method :size, :length
|
|
102
30
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
31
|
+
def validation_errors
|
|
32
|
+
errors = []
|
|
33
|
+
errors << "matrix should be an Array" unless matrix.is_a? Array
|
|
34
|
+
errors << "matrix shouldn't be empty" unless matrix.size > 0
|
|
35
|
+
errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
|
|
36
|
+
errors << "each matrix position should be of size compatible with alphabet (=#{alphabet.size})" unless matrix.all?{|pos| pos.size == alphabet.size }
|
|
37
|
+
errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
|
|
38
|
+
errors
|
|
39
|
+
end
|
|
40
|
+
private :validation_errors
|
|
106
41
|
|
|
107
|
-
|
|
108
|
-
|
|
42
|
+
def valid?
|
|
43
|
+
validation_errors.empty?
|
|
44
|
+
rescue
|
|
45
|
+
false
|
|
46
|
+
end
|
|
109
47
|
|
|
110
|
-
|
|
48
|
+
private :valid?
|
|
111
49
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
matrix_rows = each_position.map do |position|
|
|
115
|
-
position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
|
|
50
|
+
def length
|
|
51
|
+
matrix.size
|
|
116
52
|
end
|
|
117
53
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
if options[:with_name] && name
|
|
121
|
-
name + "\n" + header + matrix_str
|
|
122
|
-
else
|
|
123
|
-
header + matrix_str
|
|
54
|
+
def to_s
|
|
55
|
+
MotifFormatter.new.format(self)
|
|
124
56
|
end
|
|
125
|
-
end
|
|
126
57
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
}.map{|el, letter_index| letter_index}.map{|letter_index| %w{A C G T}[letter_index] }.join
|
|
131
|
-
end
|
|
58
|
+
def ==(other)
|
|
59
|
+
self.class == other.class && matrix == other.matrix && alphabet == other.alphabet
|
|
60
|
+
end
|
|
132
61
|
|
|
62
|
+
def each_position
|
|
63
|
+
if block_given?
|
|
64
|
+
matrix.each{|pos| yield pos}
|
|
65
|
+
else
|
|
66
|
+
self.to_enum(:each_position)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
133
69
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
[ letter, @matrix.map{|pos| pos[letter_index]} ]
|
|
70
|
+
def reversed
|
|
71
|
+
self.class.new(matrix.reverse, alphabet: alphabet)
|
|
137
72
|
end
|
|
138
|
-
hsh.with_indifferent_access
|
|
139
|
-
end
|
|
140
73
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
74
|
+
def complemented
|
|
75
|
+
self.class.new(complement_matrix, alphabet: alphabet)
|
|
76
|
+
end
|
|
144
77
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
end
|
|
149
|
-
def left_augment!(n)
|
|
150
|
-
n.times{ @matrix.unshift(self.class.zero_column) }
|
|
151
|
-
self
|
|
152
|
-
end
|
|
153
|
-
def right_augment!(n)
|
|
154
|
-
n.times{ @matrix.push(self.class.zero_column) }
|
|
155
|
-
self
|
|
156
|
-
end
|
|
78
|
+
def reverse_complemented
|
|
79
|
+
self.class.new(complement_matrix.reverse, alphabet: alphabet)
|
|
80
|
+
end
|
|
157
81
|
|
|
158
|
-
|
|
159
|
-
@matrix.map!{|position| position.map{|element| (element * rate).ceil}}
|
|
160
|
-
self
|
|
161
|
-
end
|
|
82
|
+
alias_method :revcomp, :reverse_complemented
|
|
162
83
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
84
|
+
def complement_matrix
|
|
85
|
+
matrix.map{|pos|
|
|
86
|
+
alphabet.each_letter_index.map{|letter_index| pos[alphabet.complement_index(letter_index)]}
|
|
87
|
+
}
|
|
88
|
+
end
|
|
89
|
+
private :complement_matrix
|
|
166
90
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
end
|
|
91
|
+
# def consensus
|
|
92
|
+
# ConsensusFormatter.by_maximal_elements.format_string(self)
|
|
93
|
+
# end
|
|
171
94
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
def left_augment(n)
|
|
176
|
-
dup.left_augment!(n)
|
|
177
|
-
end
|
|
178
|
-
def right_augment(n)
|
|
179
|
-
dup.right_augment!(n)
|
|
180
|
-
end
|
|
181
|
-
def discrete(rate)
|
|
182
|
-
dup.discrete!(rate)
|
|
183
|
-
end
|
|
184
|
-
def dup
|
|
185
|
-
deep_dup
|
|
186
|
-
end
|
|
95
|
+
def named(name)
|
|
96
|
+
NamedModel.new(self, name)
|
|
97
|
+
end
|
|
187
98
|
|
|
188
|
-
def as_pcm
|
|
189
|
-
PCM.new(get_parameters.merge(matrix: matrix))
|
|
190
|
-
end
|
|
191
|
-
def as_ppm
|
|
192
|
-
PPM.new(get_parameters.merge(matrix: matrix))
|
|
193
|
-
end
|
|
194
|
-
def as_pwm
|
|
195
|
-
PWM.new(get_parameters.merge(matrix: matrix))
|
|
196
99
|
end
|
|
197
100
|
end
|
|
198
101
|
end
|
|
@@ -1,31 +1,18 @@
|
|
|
1
|
-
require_relative '
|
|
2
|
-
require_relative '../data_models'
|
|
1
|
+
require_relative 'pm'
|
|
3
2
|
|
|
4
3
|
module Bioinform
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
self
|
|
4
|
+
module MotifModel
|
|
5
|
+
def self.acts_as_ppm?(ppm)
|
|
6
|
+
ppm.is_a?(MotifModel::PPM) || ppm.is_a?(MotifModel::NamedModel) && acts_as_ppm?(ppm.model)
|
|
9
7
|
end
|
|
10
8
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def self.valid_matrix?(matrix, options = {})
|
|
20
|
-
precision = options[:precision] || 0.01
|
|
21
|
-
super && matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) && pos.all?{|el| el >=0 } }
|
|
22
|
-
end
|
|
23
|
-
def validation_errors(options = {})
|
|
24
|
-
precision = options[:precision] || 0.01
|
|
25
|
-
validation_errors = []
|
|
26
|
-
validation_errors << "PPM matrix should contain only non-negative elements" unless matrix.all?{|pos| pos.all?{|el| el >=0 } }
|
|
27
|
-
validation_errors << "Sum of PPM matrix elements for each position should equal to 1" unless matrix.all?{|pos| ((pos.inject(0, &:+) - 1.0).abs <= precision) }
|
|
28
|
-
super + validation_errors
|
|
9
|
+
class PPM < PM
|
|
10
|
+
def validation_errors
|
|
11
|
+
errors = super
|
|
12
|
+
errors << "elements of PPM should be non-negative" unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
|
|
13
|
+
errors << "each PPM position should be equal to 1.0 being summed" unless matrix.all?{|pos| (pos.inject(0.0, &:+) - 1.0).abs < 1e-3 }
|
|
14
|
+
errors
|
|
15
|
+
end
|
|
29
16
|
end
|
|
30
17
|
end
|
|
31
18
|
end
|
|
@@ -1,67 +1,41 @@
|
|
|
1
|
-
require_relative '
|
|
2
|
-
require_relative '../data_models'
|
|
1
|
+
require_relative 'pm'
|
|
3
2
|
|
|
4
3
|
module Bioinform
|
|
5
|
-
|
|
6
|
-
def
|
|
7
|
-
|
|
8
|
-
end
|
|
9
|
-
def score_variance
|
|
10
|
-
each_position.inject(0) do |variance, position|
|
|
11
|
-
variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
|
|
12
|
-
position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
|
|
13
|
-
end
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def threshold_gauss_estimation(pvalue)
|
|
17
|
-
sigma = Math.sqrt(score_variance)
|
|
18
|
-
n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
|
|
19
|
-
score_mean + n_ * sigma
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def score(word)
|
|
23
|
-
raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
|
|
24
|
-
#raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
|
|
25
|
-
(0...length).map do |pos|
|
|
26
|
-
letter = word[pos]
|
|
27
|
-
if IndexByLetter[letter]
|
|
28
|
-
matrix[pos][IndexByLetter[letter]]
|
|
29
|
-
elsif letter == 'N'
|
|
30
|
-
matrix[pos].zip(probability).map{|el, p| el * p}.inject(0, &:+)
|
|
31
|
-
else
|
|
32
|
-
raise ArgumentError, "word in PWM#score(#{word}) should have only ACGT or N letters"
|
|
33
|
-
end
|
|
34
|
-
end.inject(0, &:+).to_f
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def to_pwm
|
|
38
|
-
self
|
|
4
|
+
module MotifModel
|
|
5
|
+
def self.acts_as_pwm?(pwm)
|
|
6
|
+
pwm.is_a?(MotifModel::PWM) || pwm.is_a?(MotifModel::NamedModel) && acts_as_pwm?(pwm.model)
|
|
39
7
|
end
|
|
40
8
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
# best score of suffix s[i..l]
|
|
49
|
-
def best_suffix(i)
|
|
50
|
-
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
|
51
|
-
end
|
|
9
|
+
class PWM < PM
|
|
10
|
+
def score(word)
|
|
11
|
+
raise Error, 'Word length should be the same as PWM length' unless word.length == length
|
|
12
|
+
length.times.map do |pos|
|
|
13
|
+
matrix[pos][alphabet.index_by_letter(word[pos])]
|
|
14
|
+
end.inject(0.0, &:+)
|
|
15
|
+
end
|
|
52
16
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
17
|
+
def discreted(rate, options = {})
|
|
18
|
+
rounding_method = options.fetch(:rounding_method, :ceil)
|
|
19
|
+
discreted_matrix = matrix.map{|position| position.map{|element| (element * rate).send(rounding_method) } }
|
|
20
|
+
self.class.new(discreted_matrix, alphabet: alphabet)
|
|
21
|
+
end
|
|
56
22
|
|
|
23
|
+
def zero_column
|
|
24
|
+
[0.0] * alphabet.size
|
|
25
|
+
end
|
|
26
|
+
private :zero_column
|
|
57
27
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
28
|
+
def left_augmented(n)
|
|
29
|
+
raise Error, 'Augmenting with negative number of columns is impossible' if n < 0
|
|
30
|
+
augmented_matrix = Array.new(n, zero_column) + matrix
|
|
31
|
+
self.class.new(augmented_matrix, alphabet: alphabet)
|
|
32
|
+
end
|
|
62
33
|
|
|
63
|
-
|
|
64
|
-
|
|
34
|
+
def right_augmented(n)
|
|
35
|
+
raise Error, 'Augmenting with negative number of columns is impossible' if n < 0
|
|
36
|
+
augmented_matrix = matrix + Array.new(n, zero_column)
|
|
37
|
+
self.class.new(augmented_matrix, alphabet: alphabet)
|
|
38
|
+
end
|
|
65
39
|
end
|
|
66
40
|
end
|
|
67
41
|
end
|