bioinform 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +12 -0
- data/Guardfile +9 -0
- data/README.md +7 -1
- data/TODO.txt +8 -0
- data/bioinform.gemspec +7 -5
- data/lib/bioinform.rb +1 -0
- data/lib/bioinform/cli.rb +12 -3
- data/lib/bioinform/cli/convert_motif.rb +108 -0
- data/lib/bioinform/cli/merge_into_collection.rb +6 -2
- data/lib/bioinform/cli/pcm2pwm.rb +1 -1
- data/lib/bioinform/cli/split_motifs.rb +1 -1
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +19 -0
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +20 -0
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +0 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +0 -0
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +21 -35
- data/lib/bioinform/data_models/motif.rb +56 -0
- data/lib/bioinform/data_models/pcm.rb +4 -8
- data/lib/bioinform/data_models/pm.rb +19 -48
- data/lib/bioinform/data_models/pwm.rb +16 -0
- data/lib/bioinform/formatters.rb +2 -0
- data/lib/bioinform/formatters/raw_formatter.rb +41 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +39 -0
- data/lib/bioinform/parsers.rb +2 -1
- data/lib/bioinform/parsers/jaspar_parser.rb +35 -0
- data/lib/bioinform/parsers/string_parser.rb +1 -1
- data/lib/bioinform/parsers/trivial_parser.rb +2 -1
- data/lib/bioinform/parsers/yaml_parser.rb +1 -1
- data/lib/bioinform/support.rb +2 -1
- data/lib/bioinform/support/parameters.rb +27 -18
- data/lib/bioinform/support/strip_doc.rb +9 -0
- data/lib/bioinform/version.rb +1 -1
- data/spec/cli/convert_motif_spec.rb +107 -0
- data/spec/cli/data/merge_into_collection/collection.yaml.result +186 -183
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +186 -183
- data/spec/cli/data/split_motifs/collection.yaml +184 -193
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +18 -0
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -0
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +50 -0
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +5 -0
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +4 -0
- data/spec/data_models/collection_spec.rb +36 -34
- data/spec/data_models/motif_spec.rb +224 -0
- data/spec/data_models/pcm_spec.rb +28 -17
- data/spec/data_models/pm_spec.rb +83 -121
- data/spec/data_models/pwm_spec.rb +38 -0
- data/spec/fabricators/collection_fabricator.rb +2 -2
- data/spec/fabricators/motif_fabricator.rb +33 -0
- data/spec/fabricators/motif_formats_fabricator.rb +125 -0
- data/spec/fabricators/pcm_fabricator.rb +25 -0
- data/spec/fabricators/pm_fabricator.rb +10 -1
- data/spec/fabricators/ppm_fabricator.rb +14 -0
- data/spec/fabricators/pwm_fabricator.rb +16 -0
- data/spec/parsers/trivial_parser_spec.rb +12 -12
- data/spec/parsers/yaml_parser_spec.rb +11 -11
- data/spec/spec_helper.rb +19 -49
- data/spec/spec_helper_source.rb +59 -0
- metadata +78 -7
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
require 'active_support/core_ext/object/try'
|
3
|
+
require_relative '../support/parameters'
|
4
|
+
module Bioinform
|
5
|
+
class Motif
|
6
|
+
include Parameters
|
7
|
+
make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
|
8
|
+
|
9
|
+
# 0)Motif.new()
|
10
|
+
# 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
|
11
|
+
# 2)Motif.new(my_pcm)
|
12
|
+
# 3)Motif.new(pm: my_pcm, threshold: ...)
|
13
|
+
# 2) and 3) cases will automatically choose data model
|
14
|
+
#### What if pm already is a Motif
|
15
|
+
def initialize(parameters = {})
|
16
|
+
case parameters
|
17
|
+
when PM
|
18
|
+
pm = parameters
|
19
|
+
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
20
|
+
self.original_data_model = motif_type
|
21
|
+
set_parameters(motif_type => pm)
|
22
|
+
when Hash
|
23
|
+
if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
|
24
|
+
pm = parameters.delete(:pm)
|
25
|
+
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
26
|
+
self.original_data_model = motif_type
|
27
|
+
set_parameters(motif_type => pm)
|
28
|
+
end
|
29
|
+
set_parameters(parameters)
|
30
|
+
else
|
31
|
+
raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
|
36
|
+
#def pcm; parameters.pcm; end
|
37
|
+
def pwm; parameters.pwm || pcm.try(:to_pwm); end
|
38
|
+
def ppm; parameters.ppm || pcm.try(:to_ppm); end
|
39
|
+
#def pcm=(pcm); parameters.pcm = pcm; end
|
40
|
+
#def pwm=(pwm); parameters.pwm = pwm; end
|
41
|
+
#def ppm=(ppm); parameters.ppm = ppm; end
|
42
|
+
def name; parameters.name || pm.name; end
|
43
|
+
|
44
|
+
def method_missing(meth, *args)
|
45
|
+
parameters.__send__(meth, *args)
|
46
|
+
end
|
47
|
+
|
48
|
+
def ==(other)
|
49
|
+
parameters == other.parameters
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s
|
53
|
+
parameters.to_s
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
require_relative '../support'
|
2
2
|
require_relative '../data_models'
|
3
|
+
require_relative '../conversion_algorithms/pcm2ppm_converter'
|
4
|
+
require_relative '../conversion_algorithms/pcm2pwm_converter'
|
3
5
|
|
4
6
|
module Bioinform
|
5
7
|
class PCM < PM
|
@@ -12,17 +14,11 @@ module Bioinform
|
|
12
14
|
end
|
13
15
|
|
14
16
|
def to_pwm(pseudocount = Math.log(count))
|
15
|
-
|
16
|
-
pos.each_index.map do |ind|
|
17
|
-
Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
|
18
|
-
end
|
19
|
-
end
|
20
|
-
PWM.new(matrix: mat, name: name, tags: tags, background: background)
|
17
|
+
ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
|
21
18
|
end
|
22
19
|
|
23
20
|
def to_ppm
|
24
|
-
|
25
|
-
PPM.new(matrix: mat, name: name, tags: tags, background: background)
|
21
|
+
ConversionAlgorithms::PCM2PPMConverter.convert(self)
|
26
22
|
end
|
27
23
|
end
|
28
24
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'ostruct'
|
2
2
|
require_relative '../support'
|
3
3
|
require_relative '../parsers'
|
4
|
+
require_relative '../formatters'
|
4
5
|
|
5
6
|
module Bioinform
|
6
7
|
IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
|
@@ -10,22 +11,22 @@ module Bioinform
|
|
10
11
|
attr_accessor :matrix, :parameters
|
11
12
|
|
12
13
|
include Parameters
|
13
|
-
make_parameters
|
14
|
+
make_parameters :name, :background # , :tags
|
14
15
|
|
15
|
-
def mark(tag)
|
16
|
-
tags << tag
|
17
|
-
end
|
16
|
+
# def mark(tag)
|
17
|
+
# tags << tag
|
18
|
+
# end
|
18
19
|
|
19
|
-
def tagged?(tag)
|
20
|
-
tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
|
21
|
-
end
|
20
|
+
# def tagged?(tag)
|
21
|
+
# tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
|
22
|
+
# end
|
22
23
|
|
23
24
|
def self.choose_parser(input)
|
24
|
-
[TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
|
25
|
+
[TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
|
25
26
|
self.new(input, parser) rescue nil
|
26
27
|
end
|
27
28
|
end
|
28
|
-
|
29
|
+
|
29
30
|
def self.split_on_motifs(input)
|
30
31
|
parser = choose_parser(input)
|
31
32
|
raise ParsingError, "No parser can parse given input" unless parser
|
@@ -39,7 +40,7 @@ module Bioinform
|
|
39
40
|
result = parser.new(input).parse
|
40
41
|
@matrix = result.matrix
|
41
42
|
self.name = result.name
|
42
|
-
self.tags = result.tags || []
|
43
|
+
# self.tags = result.tags || []
|
43
44
|
self.background = result.background || [1, 1, 1, 1]
|
44
45
|
raise 'matrix not valid' unless valid?
|
45
46
|
end
|
@@ -77,21 +78,8 @@ module Bioinform
|
|
77
78
|
end
|
78
79
|
alias_method :size, :length
|
79
80
|
|
80
|
-
def to_s(options = {})
|
81
|
-
|
82
|
-
options = default_options.merge(options)
|
83
|
-
if options[:letters_as_rows]
|
84
|
-
hsh = to_hash
|
85
|
-
matrix_str = [:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
|
86
|
-
else
|
87
|
-
matrix_str = each_position.map{|pos| pos.join("\t")}.join("\n")
|
88
|
-
end
|
89
|
-
|
90
|
-
if options[:with_name] && name
|
91
|
-
name + "\n" + matrix_str
|
92
|
-
else
|
93
|
-
matrix_str
|
94
|
-
end
|
81
|
+
def to_s(options = {}, formatter = RawFormatter)
|
82
|
+
formatter.new(self, options).to_s
|
95
83
|
end
|
96
84
|
|
97
85
|
def pretty_string(options = {})
|
@@ -152,22 +140,6 @@ module Bioinform
|
|
152
140
|
background.map{|element| element.to_f / sum}
|
153
141
|
end
|
154
142
|
|
155
|
-
def best_score
|
156
|
-
@matrix.inject(0.0){|sum, col| sum + col.max}
|
157
|
-
end
|
158
|
-
def worst_score
|
159
|
-
@matrix.inject(0.0){|sum, col| sum + col.min}
|
160
|
-
end
|
161
|
-
|
162
|
-
# best score of suffix s[i..l]
|
163
|
-
def best_suffix(i)
|
164
|
-
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
165
|
-
end
|
166
|
-
|
167
|
-
def worst_suffix(i)
|
168
|
-
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
169
|
-
end
|
170
|
-
|
171
143
|
def reverse_complement
|
172
144
|
dup.reverse_complement!
|
173
145
|
end
|
@@ -184,15 +156,14 @@ module Bioinform
|
|
184
156
|
deep_dup
|
185
157
|
end
|
186
158
|
|
187
|
-
def
|
188
|
-
PCM.new(matrix: matrix
|
159
|
+
def as_pcm
|
160
|
+
PCM.new(get_parameters.merge(matrix: matrix))
|
189
161
|
end
|
190
|
-
def
|
191
|
-
PPM.new(matrix: matrix
|
162
|
+
def as_ppm
|
163
|
+
PPM.new(get_parameters.merge(matrix: matrix))
|
192
164
|
end
|
193
|
-
def
|
194
|
-
PWM.new(matrix: matrix
|
165
|
+
def as_pwm
|
166
|
+
PWM.new(get_parameters.merge(matrix: matrix))
|
195
167
|
end
|
196
|
-
|
197
168
|
end
|
198
169
|
end
|
@@ -36,5 +36,21 @@ module Bioinform
|
|
36
36
|
def to_pwm
|
37
37
|
self
|
38
38
|
end
|
39
|
+
|
40
|
+
def best_score
|
41
|
+
@matrix.inject(0.0){|sum, col| sum + col.max}
|
42
|
+
end
|
43
|
+
def worst_score
|
44
|
+
@matrix.inject(0.0){|sum, col| sum + col.min}
|
45
|
+
end
|
46
|
+
|
47
|
+
# best score of suffix s[i..l]
|
48
|
+
def best_suffix(i)
|
49
|
+
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
50
|
+
end
|
51
|
+
|
52
|
+
def worst_suffix(i)
|
53
|
+
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
54
|
+
end
|
39
55
|
end
|
40
56
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class RawFormatter
|
2
|
+
attr_accessor :motif, :options
|
3
|
+
|
4
|
+
def initialize(motif, options = {})
|
5
|
+
@motif = motif
|
6
|
+
|
7
|
+
default_options = {with_name: true, letters_as_rows: false}
|
8
|
+
@options = default_options.merge(options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def name
|
12
|
+
motif.name
|
13
|
+
end
|
14
|
+
|
15
|
+
def header
|
16
|
+
if options[:with_name] && name
|
17
|
+
name + "\n"
|
18
|
+
else
|
19
|
+
''
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def matrix_string
|
24
|
+
if options[:letters_as_rows]
|
25
|
+
hsh = motif.to_hash
|
26
|
+
[:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
|
27
|
+
else
|
28
|
+
motif.each_position.map{|pos| pos.join("\t")}.join("\n")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def footer
|
33
|
+
# "\n"
|
34
|
+
''
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def to_s
|
39
|
+
header + matrix_string + footer
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
class TransfacFormatter
|
2
|
+
attr_accessor :motif, :options
|
3
|
+
|
4
|
+
def initialize(motif, options = {})
|
5
|
+
@motif = motif
|
6
|
+
|
7
|
+
default_options = {with_name: true, letters_as_rows: false}
|
8
|
+
@options = default_options.merge(options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def name
|
12
|
+
motif.name
|
13
|
+
end
|
14
|
+
|
15
|
+
def header
|
16
|
+
if options[:with_name] && name
|
17
|
+
"ID #{name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
|
18
|
+
else
|
19
|
+
raise 'Transfac should have the name field'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def matrix_string
|
24
|
+
motif.each_position.map.with_index{|pos,ind|
|
25
|
+
line_number = ind.to_s
|
26
|
+
line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
|
27
|
+
line_number + ' ' + pos.join("\t")
|
28
|
+
}.join("\n")
|
29
|
+
end
|
30
|
+
|
31
|
+
def footer
|
32
|
+
#"XX\n//\n"
|
33
|
+
"\nXX\n//"
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s
|
37
|
+
header + matrix_string + footer
|
38
|
+
end
|
39
|
+
end
|
data/lib/bioinform/parsers.rb
CHANGED
@@ -3,4 +3,5 @@ require_relative 'parsers/trivial_parser'
|
|
3
3
|
require_relative 'parsers/yaml_parser'
|
4
4
|
require_relative 'parsers/string_parser'
|
5
5
|
require_relative 'parsers/string_fantom_parser'
|
6
|
-
require_relative 'parsers/splittable_parser'
|
6
|
+
require_relative 'parsers/splittable_parser'
|
7
|
+
require_relative 'parsers/jaspar_parser'
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative '../support'
|
2
|
+
require_relative '../parsers/string_parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
class JasparParser < StringParser
|
6
|
+
def header_pat
|
7
|
+
/(?<name>)/
|
8
|
+
end
|
9
|
+
|
10
|
+
def row_pat
|
11
|
+
/[ACGT]\s*\[\s*(?<row>(#{number_pat}\s+)*#{number_pat})\s*\]\n?/
|
12
|
+
end
|
13
|
+
|
14
|
+
def scan_splitter
|
15
|
+
scanner.scan(/(\/\/\n)+/)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_matrix
|
19
|
+
matrix = []
|
20
|
+
while row_string = scan_row
|
21
|
+
matrix << split_row(row_string)
|
22
|
+
end
|
23
|
+
matrix.transpose
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse!
|
27
|
+
scan_any_spaces
|
28
|
+
scan_splitter
|
29
|
+
name = parse_name
|
30
|
+
matrix = parse_matrix
|
31
|
+
Parser.parse!(matrix).tap{|result| result.name = name}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -8,7 +8,7 @@ module Bioinform
|
|
8
8
|
attr_reader :scanner, :row_acgt_markers
|
9
9
|
|
10
10
|
def initialize(input)
|
11
|
-
raise ArgumentError unless input.is_a?(String)
|
11
|
+
raise ArgumentError, 'StringParser should be initialized with a String' unless input.is_a?(String)
|
12
12
|
super
|
13
13
|
@scanner = StringScanner.new(input.multiline_squish)
|
14
14
|
end
|
@@ -15,6 +15,7 @@ module Bioinform
|
|
15
15
|
def parse!
|
16
16
|
case input
|
17
17
|
when PM then input
|
18
|
+
when Motif then input.pm
|
18
19
|
when OpenStruct then input
|
19
20
|
when Hash then OpenStruct.new(input)
|
20
21
|
end
|
@@ -27,7 +28,7 @@ module Bioinform
|
|
27
28
|
@input = input
|
28
29
|
end
|
29
30
|
def parse!
|
30
|
-
input.
|
31
|
+
input.container.shift.pm
|
31
32
|
end
|
32
33
|
end
|
33
34
|
end
|
data/lib/bioinform/support.rb
CHANGED
@@ -1,19 +1,28 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
params
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
1
|
+
require 'ostruct'
|
2
|
+
module Bioinform
|
3
|
+
module Parameters
|
4
|
+
def self.included(base)
|
5
|
+
base.extend(ClassMethods)
|
6
|
+
end
|
7
|
+
module ClassMethods
|
8
|
+
def make_parameters(*params)
|
9
|
+
params.each do |param|
|
10
|
+
define_method(param){ parameters.send(param) }
|
11
|
+
define_method("#{param}="){|new_value| parameters.send("#{param}=", new_value) }
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
def parameters; @parameters ||= OpenStruct.new; end
|
16
|
+
def set_parameters(hsh)
|
17
|
+
hsh.each{|k,v| send("#{k}=", v) }
|
18
|
+
self
|
19
|
+
end
|
20
|
+
# return hash of parameters
|
21
|
+
def get_parameters
|
22
|
+
@parameters.marshal_dump
|
23
|
+
end
|
24
|
+
def parameter_defined?(param_name)
|
25
|
+
get_parameters.has_key?(param_name)
|
26
|
+
end
|
27
|
+
end
|
19
28
|
end
|