bioinform 0.1.12 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +16 -16
- data/LICENSE +21 -21
- data/README.md +35 -35
- data/Rakefile +4 -4
- data/TODO.txt +37 -37
- data/bin/merge_into_collection +3 -3
- data/bin/pcm2pwm +3 -3
- data/bin/split_motifs +3 -3
- data/bioinform.gemspec +19 -19
- data/lib/bioinform/cli/convert_motif.rb +107 -107
- data/lib/bioinform/cli/merge_into_collection.rb +79 -79
- data/lib/bioinform/cli/pcm2pwm.rb +46 -46
- data/lib/bioinform/cli/split_motifs.rb +46 -46
- data/lib/bioinform/cli.rb +29 -29
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +18 -18
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +19 -19
- data/lib/bioinform/data_models/collection.rb +74 -74
- data/lib/bioinform/data_models/motif.rb +55 -55
- data/lib/bioinform/data_models/pcm.rb +23 -23
- data/lib/bioinform/data_models/pm.rb +169 -169
- data/lib/bioinform/data_models/ppm.rb +9 -9
- data/lib/bioinform/data_models/pwm.rb +55 -55
- data/lib/bioinform/data_models.rb +10 -10
- data/lib/bioinform/formatters/raw_formatter.rb +40 -40
- data/lib/bioinform/formatters/transfac_formatter.rb +38 -38
- data/lib/bioinform/formatters.rb +1 -1
- data/lib/bioinform/parsers/jaspar_parser.rb +34 -34
- data/lib/bioinform/parsers/parser.rb +87 -87
- data/lib/bioinform/parsers/splittable_parser.rb +56 -56
- data/lib/bioinform/parsers/string_fantom_parser.rb +34 -34
- data/lib/bioinform/parsers/string_parser.rb +71 -71
- data/lib/bioinform/parsers/trivial_parser.rb +33 -33
- data/lib/bioinform/parsers/yaml_parser.rb +34 -34
- data/lib/bioinform/parsers.rb +6 -6
- data/lib/bioinform/support/array_product.rb +5 -5
- data/lib/bioinform/support/array_zip.rb +5 -5
- data/lib/bioinform/support/collect_hash.rb +6 -6
- data/lib/bioinform/support/deep_dup.rb +4 -4
- data/lib/bioinform/support/delete_many.rb +13 -13
- data/lib/bioinform/support/inverf.rb +12 -12
- data/lib/bioinform/support/multiline_squish.rb +5 -5
- data/lib/bioinform/support/parameters.rb +27 -27
- data/lib/bioinform/support/partial_sums.rb +15 -15
- data/lib/bioinform/support/same_by.rb +12 -12
- data/lib/bioinform/support/strip_doc.rb +8 -8
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +3 -0
- data/lib/bioinform/support.rb +17 -17
- data/lib/bioinform/version.rb +3 -3
- data/lib/bioinform.rb +10 -10
- data/spec/cli/cli_spec.rb +13 -13
- data/spec/cli/convert_motif_spec.rb +106 -106
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +11 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +12 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +40 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +188 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +188 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -12
- data/spec/cli/data/pcm2pwm/KLF4 f2 spaced name.pcm +11 -11
- data/spec/cli/data/pcm2pwm/KLF4_f2.pcm +11 -11
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -11
- data/spec/cli/data/pcm2pwm/SP1_f1.pcm +12 -12
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -12
- data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -14
- data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -11
- data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -12
- data/spec/cli/data/split_motifs/collection.yaml +188 -188
- data/spec/cli/data/split_motifs/plain_collection.txt +38 -38
- data/spec/cli/merge_into_collection_spec.rb +99 -99
- data/spec/cli/pcm2pwm_spec.rb +79 -79
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +17 -17
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -14
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +49 -49
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +4 -4
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +76 -76
- data/spec/data_models/collection_spec.rb +97 -97
- data/spec/data_models/motif_spec.rb +223 -223
- data/spec/data_models/pcm_spec.rb +55 -55
- data/spec/data_models/pm_spec.rb +359 -359
- data/spec/data_models/ppm_spec.rb +7 -7
- data/spec/data_models/pwm_spec.rb +82 -82
- data/spec/fabricators/collection_fabricator.rb +7 -7
- data/spec/fabricators/motif_fabricator.rb +32 -32
- data/spec/fabricators/motif_formats_fabricator.rb +124 -124
- data/spec/fabricators/pcm_fabricator.rb +24 -24
- data/spec/fabricators/pm_fabricator.rb +51 -51
- data/spec/fabricators/ppm_fabricator.rb +13 -13
- data/spec/fabricators/pwm_fabricator.rb +16 -16
- data/spec/parsers/parser_spec.rb +152 -152
- data/spec/parsers/string_fantom_parser_spec.rb +69 -69
- data/spec/parsers/string_parser_spec.rb +76 -76
- data/spec/parsers/trivial_parser_spec.rb +63 -63
- data/spec/parsers/yaml_parser_spec.rb +50 -50
- data/spec/spec_helper.rb +10 -10
- data/spec/spec_helper_source.rb +59 -59
- data/spec/support/advanced_scan_spec.rb +31 -31
- data/spec/support/array_product_spec.rb +14 -14
- data/spec/support/array_zip_spec.rb +14 -14
- data/spec/support/collect_hash_spec.rb +14 -14
- data/spec/support/delete_many_spec.rb +43 -43
- data/spec/support/inverf_spec.rb +18 -18
- data/spec/support/multiline_squish_spec.rb +24 -24
- data/spec/support/partial_sums_spec.rb +30 -30
- data/spec/support/same_by_spec.rb +35 -35
- metadata +3 -3
@@ -1,56 +1,56 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
require_relative '../support/third_part/active_support/core_ext/object/try'
|
3
|
-
require_relative '../support/parameters'
|
4
|
-
module Bioinform
|
5
|
-
class Motif
|
6
|
-
include Parameters
|
7
|
-
make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
|
8
|
-
|
9
|
-
# 0)Motif.new()
|
10
|
-
# 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
|
11
|
-
# 2)Motif.new(my_pcm)
|
12
|
-
# 3)Motif.new(pm: my_pcm, threshold: ...)
|
13
|
-
# 2) and 3) cases will automatically choose data model
|
14
|
-
#### What if pm already is a Motif
|
15
|
-
def initialize(parameters = {})
|
16
|
-
case parameters
|
17
|
-
when PM
|
18
|
-
pm = parameters
|
19
|
-
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
20
|
-
self.original_data_model = motif_type
|
21
|
-
set_parameters(motif_type => pm)
|
22
|
-
when Hash
|
23
|
-
if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
|
24
|
-
pm = parameters.delete(:pm)
|
25
|
-
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
26
|
-
self.original_data_model = motif_type
|
27
|
-
set_parameters(motif_type => pm)
|
28
|
-
end
|
29
|
-
set_parameters(parameters)
|
30
|
-
else
|
31
|
-
raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
|
36
|
-
#def pcm; parameters.pcm; end
|
37
|
-
def pwm; parameters.pwm || pcm.try(:to_pwm); end
|
38
|
-
def ppm; parameters.ppm || pcm.try(:to_ppm); end
|
39
|
-
#def pcm=(pcm); parameters.pcm = pcm; end
|
40
|
-
#def pwm=(pwm); parameters.pwm = pwm; end
|
41
|
-
#def ppm=(ppm); parameters.ppm = ppm; end
|
42
|
-
def name; parameters.name || pm.name; end
|
43
|
-
|
44
|
-
def method_missing(meth, *args)
|
45
|
-
parameters.__send__(meth, *args)
|
46
|
-
end
|
47
|
-
|
48
|
-
def ==(other)
|
49
|
-
parameters == other.parameters
|
50
|
-
end
|
51
|
-
|
52
|
-
def to_s
|
53
|
-
parameters.to_s
|
54
|
-
end
|
55
|
-
end
|
1
|
+
require 'ostruct'
|
2
|
+
require_relative '../support/third_part/active_support/core_ext/object/try'
|
3
|
+
require_relative '../support/parameters'
|
4
|
+
module Bioinform
|
5
|
+
class Motif
|
6
|
+
include Parameters
|
7
|
+
make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
|
8
|
+
|
9
|
+
# 0)Motif.new()
|
10
|
+
# 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
|
11
|
+
# 2)Motif.new(my_pcm)
|
12
|
+
# 3)Motif.new(pm: my_pcm, threshold: ...)
|
13
|
+
# 2) and 3) cases will automatically choose data model
|
14
|
+
#### What if pm already is a Motif
|
15
|
+
def initialize(parameters = {})
|
16
|
+
case parameters
|
17
|
+
when PM
|
18
|
+
pm = parameters
|
19
|
+
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
20
|
+
self.original_data_model = motif_type
|
21
|
+
set_parameters(motif_type => pm)
|
22
|
+
when Hash
|
23
|
+
if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
|
24
|
+
pm = parameters.delete(:pm)
|
25
|
+
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
26
|
+
self.original_data_model = motif_type
|
27
|
+
set_parameters(motif_type => pm)
|
28
|
+
end
|
29
|
+
set_parameters(parameters)
|
30
|
+
else
|
31
|
+
raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
|
36
|
+
#def pcm; parameters.pcm; end
|
37
|
+
def pwm; parameters.pwm || pcm.try(:to_pwm); end
|
38
|
+
def ppm; parameters.ppm || pcm.try(:to_ppm); end
|
39
|
+
#def pcm=(pcm); parameters.pcm = pcm; end
|
40
|
+
#def pwm=(pwm); parameters.pwm = pwm; end
|
41
|
+
#def ppm=(ppm); parameters.ppm = ppm; end
|
42
|
+
def name; parameters.name || pm.name; end
|
43
|
+
|
44
|
+
def method_missing(meth, *args)
|
45
|
+
parameters.__send__(meth, *args)
|
46
|
+
end
|
47
|
+
|
48
|
+
def ==(other)
|
49
|
+
parameters == other.parameters
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s
|
53
|
+
parameters.to_s
|
54
|
+
end
|
55
|
+
end
|
56
56
|
end
|
@@ -1,24 +1,24 @@
|
|
1
|
-
require_relative '../support'
|
2
|
-
require_relative '../data_models'
|
3
|
-
require_relative '../conversion_algorithms/pcm2ppm_converter'
|
4
|
-
require_relative '../conversion_algorithms/pcm2pwm_converter'
|
5
|
-
|
6
|
-
module Bioinform
|
7
|
-
class PCM < PM
|
8
|
-
def count
|
9
|
-
matrix.first.inject(&:+)
|
10
|
-
end
|
11
|
-
|
12
|
-
def to_pcm
|
13
|
-
self
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_pwm(pseudocount = Math.log(count))
|
17
|
-
ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
|
18
|
-
end
|
19
|
-
|
20
|
-
def to_ppm
|
21
|
-
ConversionAlgorithms::PCM2PPMConverter.convert(self)
|
22
|
-
end
|
23
|
-
end
|
1
|
+
require_relative '../support'
|
2
|
+
require_relative '../data_models'
|
3
|
+
require_relative '../conversion_algorithms/pcm2ppm_converter'
|
4
|
+
require_relative '../conversion_algorithms/pcm2pwm_converter'
|
5
|
+
|
6
|
+
module Bioinform
|
7
|
+
class PCM < PM
|
8
|
+
def count
|
9
|
+
matrix.first.inject(&:+)
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_pcm
|
13
|
+
self
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_pwm(pseudocount = Math.log(count))
|
17
|
+
ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_ppm
|
21
|
+
ConversionAlgorithms::PCM2PPMConverter.convert(self)
|
22
|
+
end
|
23
|
+
end
|
24
24
|
end
|
@@ -1,170 +1,170 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
require_relative '../support'
|
3
|
-
require_relative '../parsers'
|
4
|
-
require_relative '../formatters'
|
5
|
-
|
6
|
-
module Bioinform
|
7
|
-
IndexByLetter = { 'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3,
|
8
|
-
'a' => 0, 'c' => 1, 'g' => 2, 't' => 3, a: 0, c: 1, g: 2, t: 3}
|
9
|
-
LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
|
10
|
-
|
11
|
-
class PM
|
12
|
-
attr_accessor :matrix, :parameters
|
13
|
-
|
14
|
-
include Parameters
|
15
|
-
make_parameters :name, :background # , :tags
|
16
|
-
|
17
|
-
# def mark(tag)
|
18
|
-
# tags << tag
|
19
|
-
# end
|
20
|
-
|
21
|
-
# def tagged?(tag)
|
22
|
-
# tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
|
23
|
-
# end
|
24
|
-
|
25
|
-
def self.choose_parser(input)
|
26
|
-
[TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
|
27
|
-
self.new(input, parser) rescue nil
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.split_on_motifs(input)
|
32
|
-
parser = choose_parser(input)
|
33
|
-
raise ParsingError, "No parser can parse given input" unless parser
|
34
|
-
parser.split_on_motifs(input, self)
|
35
|
-
end
|
36
|
-
|
37
|
-
def initialize(input, parser = nil)
|
38
|
-
@parameters = OpenStruct.new
|
39
|
-
parser ||= self.class.choose_parser(input)
|
40
|
-
raise 'No one parser can process input' unless parser
|
41
|
-
result = parser.new(input).parse
|
42
|
-
@matrix = result.matrix
|
43
|
-
self.name = result.name
|
44
|
-
# self.tags = result.tags || []
|
45
|
-
self.background = result.background || [1, 1, 1, 1]
|
46
|
-
raise 'matrix not valid' unless valid?
|
47
|
-
end
|
48
|
-
|
49
|
-
def ==(other)
|
50
|
-
@matrix == other.matrix && background == other.background && name == other.name
|
51
|
-
rescue
|
52
|
-
false
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.valid_matrix?(matrix)
|
56
|
-
matrix.is_a?(Array) &&
|
57
|
-
! matrix.empty? &&
|
58
|
-
matrix.all?{|pos| pos.is_a?(Array)} &&
|
59
|
-
matrix.all?{|pos| pos.size == 4} &&
|
60
|
-
matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
|
61
|
-
rescue
|
62
|
-
false
|
63
|
-
end
|
64
|
-
|
65
|
-
def valid?
|
66
|
-
self.class.valid_matrix?(@matrix)
|
67
|
-
end
|
68
|
-
|
69
|
-
def each_position
|
70
|
-
if block_given?
|
71
|
-
matrix.each{|pos| yield pos}
|
72
|
-
else
|
73
|
-
self.to_enum(:each_position)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
def length
|
78
|
-
@matrix.length
|
79
|
-
end
|
80
|
-
alias_method :size, :length
|
81
|
-
|
82
|
-
def to_s(options = {}, formatter = RawFormatter)
|
83
|
-
formatter.new(self, options).to_s
|
84
|
-
end
|
85
|
-
|
86
|
-
def pretty_string(options = {})
|
87
|
-
default_options = {with_name: true, letters_as_rows: false}
|
88
|
-
|
89
|
-
return to_s(options) if options[:letters_as_rows]
|
90
|
-
|
91
|
-
options = default_options.merge(options)
|
92
|
-
header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
|
93
|
-
matrix_rows = each_position.map do |position|
|
94
|
-
position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
|
95
|
-
end
|
96
|
-
|
97
|
-
matrix_str = matrix_rows.join("\n")
|
98
|
-
|
99
|
-
if options[:with_name] && name
|
100
|
-
name + "\n" + header + matrix_str
|
101
|
-
else
|
102
|
-
header + matrix_str
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
def to_hash
|
107
|
-
hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
|
108
|
-
[ letter, @matrix.map{|pos| pos[letter_index]} ]
|
109
|
-
end
|
110
|
-
hsh.with_indifferent_access
|
111
|
-
end
|
112
|
-
|
113
|
-
def self.zero_column
|
114
|
-
[0, 0, 0, 0]
|
115
|
-
end
|
116
|
-
|
117
|
-
def reverse_complement!
|
118
|
-
@matrix.reverse!.map!(&:reverse!)
|
119
|
-
self
|
120
|
-
end
|
121
|
-
def left_augment!(n)
|
122
|
-
n.times{ @matrix.unshift(self.class.zero_column) }
|
123
|
-
self
|
124
|
-
end
|
125
|
-
def right_augment!(n)
|
126
|
-
n.times{ @matrix.push(self.class.zero_column) }
|
127
|
-
self
|
128
|
-
end
|
129
|
-
|
130
|
-
def discrete!(rate)
|
131
|
-
@matrix.map!{|position| position.map{|element| (element * rate).ceil}}
|
132
|
-
self
|
133
|
-
end
|
134
|
-
|
135
|
-
def vocabulary_volume
|
136
|
-
background.inject(&:+) ** length
|
137
|
-
end
|
138
|
-
|
139
|
-
def probability
|
140
|
-
sum = background.inject(0.0, &:+)
|
141
|
-
background.map{|element| element.to_f / sum}
|
142
|
-
end
|
143
|
-
|
144
|
-
def reverse_complement
|
145
|
-
dup.reverse_complement!
|
146
|
-
end
|
147
|
-
def left_augment(n)
|
148
|
-
dup.left_augment!(n)
|
149
|
-
end
|
150
|
-
def right_augment(n)
|
151
|
-
dup.right_augment!(n)
|
152
|
-
end
|
153
|
-
def discrete(rate)
|
154
|
-
dup.discrete!(rate)
|
155
|
-
end
|
156
|
-
def dup
|
157
|
-
deep_dup
|
158
|
-
end
|
159
|
-
|
160
|
-
def as_pcm
|
161
|
-
PCM.new(get_parameters.merge(matrix: matrix))
|
162
|
-
end
|
163
|
-
def as_ppm
|
164
|
-
PPM.new(get_parameters.merge(matrix: matrix))
|
165
|
-
end
|
166
|
-
def as_pwm
|
167
|
-
PWM.new(get_parameters.merge(matrix: matrix))
|
168
|
-
end
|
169
|
-
end
|
1
|
+
require 'ostruct'
|
2
|
+
require_relative '../support'
|
3
|
+
require_relative '../parsers'
|
4
|
+
require_relative '../formatters'
|
5
|
+
|
6
|
+
module Bioinform
|
7
|
+
IndexByLetter = { 'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3,
|
8
|
+
'a' => 0, 'c' => 1, 'g' => 2, 't' => 3, a: 0, c: 1, g: 2, t: 3}
|
9
|
+
LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
|
10
|
+
|
11
|
+
class PM
|
12
|
+
attr_accessor :matrix, :parameters
|
13
|
+
|
14
|
+
include Parameters
|
15
|
+
make_parameters :name, :background # , :tags
|
16
|
+
|
17
|
+
# def mark(tag)
|
18
|
+
# tags << tag
|
19
|
+
# end
|
20
|
+
|
21
|
+
# def tagged?(tag)
|
22
|
+
# tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
|
23
|
+
# end
|
24
|
+
|
25
|
+
def self.choose_parser(input)
|
26
|
+
[TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
|
27
|
+
self.new(input, parser) rescue nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.split_on_motifs(input)
|
32
|
+
parser = choose_parser(input)
|
33
|
+
raise ParsingError, "No parser can parse given input" unless parser
|
34
|
+
parser.split_on_motifs(input, self)
|
35
|
+
end
|
36
|
+
|
37
|
+
def initialize(input, parser = nil)
|
38
|
+
@parameters = OpenStruct.new
|
39
|
+
parser ||= self.class.choose_parser(input)
|
40
|
+
raise 'No one parser can process input' unless parser
|
41
|
+
result = parser.new(input).parse
|
42
|
+
@matrix = result.matrix
|
43
|
+
self.name = result.name
|
44
|
+
# self.tags = result.tags || []
|
45
|
+
self.background = result.background || [1, 1, 1, 1]
|
46
|
+
raise 'matrix not valid' unless valid?
|
47
|
+
end
|
48
|
+
|
49
|
+
def ==(other)
|
50
|
+
@matrix == other.matrix && background == other.background && name == other.name
|
51
|
+
rescue
|
52
|
+
false
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.valid_matrix?(matrix)
|
56
|
+
matrix.is_a?(Array) &&
|
57
|
+
! matrix.empty? &&
|
58
|
+
matrix.all?{|pos| pos.is_a?(Array)} &&
|
59
|
+
matrix.all?{|pos| pos.size == 4} &&
|
60
|
+
matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric)}}
|
61
|
+
rescue
|
62
|
+
false
|
63
|
+
end
|
64
|
+
|
65
|
+
def valid?
|
66
|
+
self.class.valid_matrix?(@matrix)
|
67
|
+
end
|
68
|
+
|
69
|
+
def each_position
|
70
|
+
if block_given?
|
71
|
+
matrix.each{|pos| yield pos}
|
72
|
+
else
|
73
|
+
self.to_enum(:each_position)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def length
|
78
|
+
@matrix.length
|
79
|
+
end
|
80
|
+
alias_method :size, :length
|
81
|
+
|
82
|
+
def to_s(options = {}, formatter = RawFormatter)
|
83
|
+
formatter.new(self, options).to_s
|
84
|
+
end
|
85
|
+
|
86
|
+
def pretty_string(options = {})
|
87
|
+
default_options = {with_name: true, letters_as_rows: false}
|
88
|
+
|
89
|
+
return to_s(options) if options[:letters_as_rows]
|
90
|
+
|
91
|
+
options = default_options.merge(options)
|
92
|
+
header = %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
|
93
|
+
matrix_rows = each_position.map do |position|
|
94
|
+
position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
|
95
|
+
end
|
96
|
+
|
97
|
+
matrix_str = matrix_rows.join("\n")
|
98
|
+
|
99
|
+
if options[:with_name] && name
|
100
|
+
name + "\n" + header + matrix_str
|
101
|
+
else
|
102
|
+
header + matrix_str
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def to_hash
|
107
|
+
hsh = %w{A C G T}.each_with_index.collect_hash do |letter, letter_index|
|
108
|
+
[ letter, @matrix.map{|pos| pos[letter_index]} ]
|
109
|
+
end
|
110
|
+
hsh.with_indifferent_access
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.zero_column
|
114
|
+
[0, 0, 0, 0]
|
115
|
+
end
|
116
|
+
|
117
|
+
def reverse_complement!
|
118
|
+
@matrix.reverse!.map!(&:reverse!)
|
119
|
+
self
|
120
|
+
end
|
121
|
+
def left_augment!(n)
|
122
|
+
n.times{ @matrix.unshift(self.class.zero_column) }
|
123
|
+
self
|
124
|
+
end
|
125
|
+
def right_augment!(n)
|
126
|
+
n.times{ @matrix.push(self.class.zero_column) }
|
127
|
+
self
|
128
|
+
end
|
129
|
+
|
130
|
+
def discrete!(rate)
|
131
|
+
@matrix.map!{|position| position.map{|element| (element * rate).ceil}}
|
132
|
+
self
|
133
|
+
end
|
134
|
+
|
135
|
+
def vocabulary_volume
|
136
|
+
background.inject(&:+) ** length
|
137
|
+
end
|
138
|
+
|
139
|
+
def probability
|
140
|
+
sum = background.inject(0.0, &:+)
|
141
|
+
background.map{|element| element.to_f / sum}
|
142
|
+
end
|
143
|
+
|
144
|
+
def reverse_complement
|
145
|
+
dup.reverse_complement!
|
146
|
+
end
|
147
|
+
def left_augment(n)
|
148
|
+
dup.left_augment!(n)
|
149
|
+
end
|
150
|
+
def right_augment(n)
|
151
|
+
dup.right_augment!(n)
|
152
|
+
end
|
153
|
+
def discrete(rate)
|
154
|
+
dup.discrete!(rate)
|
155
|
+
end
|
156
|
+
def dup
|
157
|
+
deep_dup
|
158
|
+
end
|
159
|
+
|
160
|
+
def as_pcm
|
161
|
+
PCM.new(get_parameters.merge(matrix: matrix))
|
162
|
+
end
|
163
|
+
def as_ppm
|
164
|
+
PPM.new(get_parameters.merge(matrix: matrix))
|
165
|
+
end
|
166
|
+
def as_pwm
|
167
|
+
PWM.new(get_parameters.merge(matrix: matrix))
|
168
|
+
end
|
169
|
+
end
|
170
170
|
end
|
@@ -1,10 +1,10 @@
|
|
1
|
-
require_relative '../support'
|
2
|
-
require_relative '../data_models'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
class PPM < PM
|
6
|
-
def to_ppm
|
7
|
-
self
|
8
|
-
end
|
9
|
-
end
|
1
|
+
require_relative '../support'
|
2
|
+
require_relative '../data_models'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
class PPM < PM
|
6
|
+
def to_ppm
|
7
|
+
self
|
8
|
+
end
|
9
|
+
end
|
10
10
|
end
|
@@ -1,56 +1,56 @@
|
|
1
|
-
require_relative '../support'
|
2
|
-
require_relative '../data_models'
|
3
|
-
module Bioinform
|
4
|
-
class PWM < PM
|
5
|
-
def score_mean
|
6
|
-
each_position.inject(0){ |mean, position| mean + position.each_index.inject(0){|sum, letter| sum + position[letter] * probability[letter]} }
|
7
|
-
end
|
8
|
-
def score_variance
|
9
|
-
each_position.inject(0) do |variance, position|
|
10
|
-
variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
|
11
|
-
position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def threshold_gauss_estimation(pvalue)
|
16
|
-
sigma = Math.sqrt(score_variance)
|
17
|
-
n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
|
18
|
-
score_mean + n_ * sigma
|
19
|
-
end
|
20
|
-
|
21
|
-
def score(word)
|
22
|
-
raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
|
23
|
-
#raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
|
24
|
-
(0...length).map do |pos|
|
25
|
-
letter = word[pos]
|
26
|
-
if IndexByLetter[letter]
|
27
|
-
matrix[pos][IndexByLetter[letter]]
|
28
|
-
elsif letter == 'N'
|
29
|
-
matrix[pos].zip(probability).map{|el, p| el * p}.inject(0, &:+)
|
30
|
-
else
|
31
|
-
raise ArgumentError, "word in PWM#score(#{word}) should have only ACGT or N letters"
|
32
|
-
end
|
33
|
-
end.inject(0, &:+).to_f
|
34
|
-
end
|
35
|
-
|
36
|
-
def to_pwm
|
37
|
-
self
|
38
|
-
end
|
39
|
-
|
40
|
-
def best_score
|
41
|
-
best_suffix(0)
|
42
|
-
end
|
43
|
-
def worst_score
|
44
|
-
worst_suffix(0)
|
45
|
-
end
|
46
|
-
|
47
|
-
# best score of suffix s[i..l]
|
48
|
-
def best_suffix(i)
|
49
|
-
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
50
|
-
end
|
51
|
-
|
52
|
-
def worst_suffix(i)
|
53
|
-
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
54
|
-
end
|
55
|
-
end
|
1
|
+
require_relative '../support'
|
2
|
+
require_relative '../data_models'
|
3
|
+
module Bioinform
|
4
|
+
class PWM < PM
|
5
|
+
def score_mean
|
6
|
+
each_position.inject(0){ |mean, position| mean + position.each_index.inject(0){|sum, letter| sum + position[letter] * probability[letter]} }
|
7
|
+
end
|
8
|
+
def score_variance
|
9
|
+
each_position.inject(0) do |variance, position|
|
10
|
+
variance + position.each_index.inject(0) { |sum,letter| sum + position[letter]**2 * probability[letter] } -
|
11
|
+
position.each_index.inject(0) { |sum,letter| sum + position[letter] * probability[letter] }**2
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def threshold_gauss_estimation(pvalue)
|
16
|
+
sigma = Math.sqrt(score_variance)
|
17
|
+
n_ = Math.inverf(1 - 2 * pvalue) * Math.sqrt(2)
|
18
|
+
score_mean + n_ * sigma
|
19
|
+
end
|
20
|
+
|
21
|
+
def score(word)
|
22
|
+
raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
|
23
|
+
#raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
|
24
|
+
(0...length).map do |pos|
|
25
|
+
letter = word[pos]
|
26
|
+
if IndexByLetter[letter]
|
27
|
+
matrix[pos][IndexByLetter[letter]]
|
28
|
+
elsif letter == 'N'
|
29
|
+
matrix[pos].zip(probability).map{|el, p| el * p}.inject(0, &:+)
|
30
|
+
else
|
31
|
+
raise ArgumentError, "word in PWM#score(#{word}) should have only ACGT or N letters"
|
32
|
+
end
|
33
|
+
end.inject(0, &:+).to_f
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_pwm
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
def best_score
|
41
|
+
best_suffix(0)
|
42
|
+
end
|
43
|
+
def worst_score
|
44
|
+
worst_suffix(0)
|
45
|
+
end
|
46
|
+
|
47
|
+
# best score of suffix s[i..l]
|
48
|
+
def best_suffix(i)
|
49
|
+
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
50
|
+
end
|
51
|
+
|
52
|
+
def worst_suffix(i)
|
53
|
+
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
54
|
+
end
|
55
|
+
end
|
56
56
|
end
|
@@ -1,11 +1,11 @@
|
|
1
|
-
require_relative 'parsers'
|
2
|
-
|
3
|
-
require_relative 'data_models/pm'
|
4
|
-
require_relative 'data_models/pcm'
|
5
|
-
require_relative 'data_models/ppm'
|
6
|
-
require_relative 'data_models/pwm'
|
7
|
-
|
8
|
-
require_relative 'data_models/collection'
|
9
|
-
|
10
|
-
#require_relative 'bioinform/data_models/iupac_word'
|
1
|
+
require_relative 'parsers'
|
2
|
+
|
3
|
+
require_relative 'data_models/pm'
|
4
|
+
require_relative 'data_models/pcm'
|
5
|
+
require_relative 'data_models/ppm'
|
6
|
+
require_relative 'data_models/pwm'
|
7
|
+
|
8
|
+
require_relative 'data_models/collection'
|
9
|
+
|
10
|
+
#require_relative 'bioinform/data_models/iupac_word'
|
11
11
|
#require_relative 'bioinform/data_models/iupac_wordset'
|