bioinform 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +12 -0
- data/Guardfile +9 -0
- data/README.md +7 -1
- data/TODO.txt +8 -0
- data/bioinform.gemspec +7 -5
- data/lib/bioinform.rb +1 -0
- data/lib/bioinform/cli.rb +12 -3
- data/lib/bioinform/cli/convert_motif.rb +108 -0
- data/lib/bioinform/cli/merge_into_collection.rb +6 -2
- data/lib/bioinform/cli/pcm2pwm.rb +1 -1
- data/lib/bioinform/cli/split_motifs.rb +1 -1
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +19 -0
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +20 -0
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +0 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +0 -0
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +21 -35
- data/lib/bioinform/data_models/motif.rb +56 -0
- data/lib/bioinform/data_models/pcm.rb +4 -8
- data/lib/bioinform/data_models/pm.rb +19 -48
- data/lib/bioinform/data_models/pwm.rb +16 -0
- data/lib/bioinform/formatters.rb +2 -0
- data/lib/bioinform/formatters/raw_formatter.rb +41 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +39 -0
- data/lib/bioinform/parsers.rb +2 -1
- data/lib/bioinform/parsers/jaspar_parser.rb +35 -0
- data/lib/bioinform/parsers/string_parser.rb +1 -1
- data/lib/bioinform/parsers/trivial_parser.rb +2 -1
- data/lib/bioinform/parsers/yaml_parser.rb +1 -1
- data/lib/bioinform/support.rb +2 -1
- data/lib/bioinform/support/parameters.rb +27 -18
- data/lib/bioinform/support/strip_doc.rb +9 -0
- data/lib/bioinform/version.rb +1 -1
- data/spec/cli/convert_motif_spec.rb +107 -0
- data/spec/cli/data/merge_into_collection/collection.yaml.result +186 -183
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +186 -183
- data/spec/cli/data/split_motifs/collection.yaml +184 -193
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +18 -0
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -0
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +50 -0
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +5 -0
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +4 -0
- data/spec/data_models/collection_spec.rb +36 -34
- data/spec/data_models/motif_spec.rb +224 -0
- data/spec/data_models/pcm_spec.rb +28 -17
- data/spec/data_models/pm_spec.rb +83 -121
- data/spec/data_models/pwm_spec.rb +38 -0
- data/spec/fabricators/collection_fabricator.rb +2 -2
- data/spec/fabricators/motif_fabricator.rb +33 -0
- data/spec/fabricators/motif_formats_fabricator.rb +125 -0
- data/spec/fabricators/pcm_fabricator.rb +25 -0
- data/spec/fabricators/pm_fabricator.rb +10 -1
- data/spec/fabricators/ppm_fabricator.rb +14 -0
- data/spec/fabricators/pwm_fabricator.rb +16 -0
- data/spec/parsers/trivial_parser_spec.rb +12 -12
- data/spec/parsers/yaml_parser_spec.rb +11 -11
- data/spec/spec_helper.rb +19 -49
- data/spec/spec_helper_source.rb +59 -0
- metadata +78 -7
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
require 'active_support/core_ext/object/try'
|
3
|
+
require_relative '../support/parameters'
|
4
|
+
module Bioinform
|
5
|
+
class Motif
|
6
|
+
include Parameters
|
7
|
+
make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
|
8
|
+
|
9
|
+
# 0)Motif.new()
|
10
|
+
# 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
|
11
|
+
# 2)Motif.new(my_pcm)
|
12
|
+
# 3)Motif.new(pm: my_pcm, threshold: ...)
|
13
|
+
# 2) and 3) cases will automatically choose data model
|
14
|
+
#### What if pm already is a Motif
|
15
|
+
def initialize(parameters = {})
|
16
|
+
case parameters
|
17
|
+
when PM
|
18
|
+
pm = parameters
|
19
|
+
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
20
|
+
self.original_data_model = motif_type
|
21
|
+
set_parameters(motif_type => pm)
|
22
|
+
when Hash
|
23
|
+
if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
|
24
|
+
pm = parameters.delete(:pm)
|
25
|
+
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
26
|
+
self.original_data_model = motif_type
|
27
|
+
set_parameters(motif_type => pm)
|
28
|
+
end
|
29
|
+
set_parameters(parameters)
|
30
|
+
else
|
31
|
+
raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
|
36
|
+
#def pcm; parameters.pcm; end
|
37
|
+
def pwm; parameters.pwm || pcm.try(:to_pwm); end
|
38
|
+
def ppm; parameters.ppm || pcm.try(:to_ppm); end
|
39
|
+
#def pcm=(pcm); parameters.pcm = pcm; end
|
40
|
+
#def pwm=(pwm); parameters.pwm = pwm; end
|
41
|
+
#def ppm=(ppm); parameters.ppm = ppm; end
|
42
|
+
def name; parameters.name || pm.name; end
|
43
|
+
|
44
|
+
def method_missing(meth, *args)
|
45
|
+
parameters.__send__(meth, *args)
|
46
|
+
end
|
47
|
+
|
48
|
+
def ==(other)
|
49
|
+
parameters == other.parameters
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s
|
53
|
+
parameters.to_s
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
require_relative '../support'
|
2
2
|
require_relative '../data_models'
|
3
|
+
require_relative '../conversion_algorithms/pcm2ppm_converter'
|
4
|
+
require_relative '../conversion_algorithms/pcm2pwm_converter'
|
3
5
|
|
4
6
|
module Bioinform
|
5
7
|
class PCM < PM
|
@@ -12,17 +14,11 @@ module Bioinform
|
|
12
14
|
end
|
13
15
|
|
14
16
|
def to_pwm(pseudocount = Math.log(count))
|
15
|
-
|
16
|
-
pos.each_index.map do |ind|
|
17
|
-
Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
|
18
|
-
end
|
19
|
-
end
|
20
|
-
PWM.new(matrix: mat, name: name, tags: tags, background: background)
|
17
|
+
ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
|
21
18
|
end
|
22
19
|
|
23
20
|
def to_ppm
|
24
|
-
|
25
|
-
PPM.new(matrix: mat, name: name, tags: tags, background: background)
|
21
|
+
ConversionAlgorithms::PCM2PPMConverter.convert(self)
|
26
22
|
end
|
27
23
|
end
|
28
24
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'ostruct'
|
2
2
|
require_relative '../support'
|
3
3
|
require_relative '../parsers'
|
4
|
+
require_relative '../formatters'
|
4
5
|
|
5
6
|
module Bioinform
|
6
7
|
IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
|
@@ -10,22 +11,22 @@ module Bioinform
|
|
10
11
|
attr_accessor :matrix, :parameters
|
11
12
|
|
12
13
|
include Parameters
|
13
|
-
make_parameters
|
14
|
+
make_parameters :name, :background # , :tags
|
14
15
|
|
15
|
-
def mark(tag)
|
16
|
-
tags << tag
|
17
|
-
end
|
16
|
+
# def mark(tag)
|
17
|
+
# tags << tag
|
18
|
+
# end
|
18
19
|
|
19
|
-
def tagged?(tag)
|
20
|
-
tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
|
21
|
-
end
|
20
|
+
# def tagged?(tag)
|
21
|
+
# tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
|
22
|
+
# end
|
22
23
|
|
23
24
|
def self.choose_parser(input)
|
24
|
-
[TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
|
25
|
+
[TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
|
25
26
|
self.new(input, parser) rescue nil
|
26
27
|
end
|
27
28
|
end
|
28
|
-
|
29
|
+
|
29
30
|
def self.split_on_motifs(input)
|
30
31
|
parser = choose_parser(input)
|
31
32
|
raise ParsingError, "No parser can parse given input" unless parser
|
@@ -39,7 +40,7 @@ module Bioinform
|
|
39
40
|
result = parser.new(input).parse
|
40
41
|
@matrix = result.matrix
|
41
42
|
self.name = result.name
|
42
|
-
self.tags = result.tags || []
|
43
|
+
# self.tags = result.tags || []
|
43
44
|
self.background = result.background || [1, 1, 1, 1]
|
44
45
|
raise 'matrix not valid' unless valid?
|
45
46
|
end
|
@@ -77,21 +78,8 @@ module Bioinform
|
|
77
78
|
end
|
78
79
|
alias_method :size, :length
|
79
80
|
|
80
|
-
def to_s(options = {})
|
81
|
-
|
82
|
-
options = default_options.merge(options)
|
83
|
-
if options[:letters_as_rows]
|
84
|
-
hsh = to_hash
|
85
|
-
matrix_str = [:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
|
86
|
-
else
|
87
|
-
matrix_str = each_position.map{|pos| pos.join("\t")}.join("\n")
|
88
|
-
end
|
89
|
-
|
90
|
-
if options[:with_name] && name
|
91
|
-
name + "\n" + matrix_str
|
92
|
-
else
|
93
|
-
matrix_str
|
94
|
-
end
|
81
|
+
def to_s(options = {}, formatter = RawFormatter)
|
82
|
+
formatter.new(self, options).to_s
|
95
83
|
end
|
96
84
|
|
97
85
|
def pretty_string(options = {})
|
@@ -152,22 +140,6 @@ module Bioinform
|
|
152
140
|
background.map{|element| element.to_f / sum}
|
153
141
|
end
|
154
142
|
|
155
|
-
def best_score
|
156
|
-
@matrix.inject(0.0){|sum, col| sum + col.max}
|
157
|
-
end
|
158
|
-
def worst_score
|
159
|
-
@matrix.inject(0.0){|sum, col| sum + col.min}
|
160
|
-
end
|
161
|
-
|
162
|
-
# best score of suffix s[i..l]
|
163
|
-
def best_suffix(i)
|
164
|
-
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
165
|
-
end
|
166
|
-
|
167
|
-
def worst_suffix(i)
|
168
|
-
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
169
|
-
end
|
170
|
-
|
171
143
|
def reverse_complement
|
172
144
|
dup.reverse_complement!
|
173
145
|
end
|
@@ -184,15 +156,14 @@ module Bioinform
|
|
184
156
|
deep_dup
|
185
157
|
end
|
186
158
|
|
187
|
-
def
|
188
|
-
PCM.new(matrix: matrix
|
159
|
+
def as_pcm
|
160
|
+
PCM.new(get_parameters.merge(matrix: matrix))
|
189
161
|
end
|
190
|
-
def
|
191
|
-
PPM.new(matrix: matrix
|
162
|
+
def as_ppm
|
163
|
+
PPM.new(get_parameters.merge(matrix: matrix))
|
192
164
|
end
|
193
|
-
def
|
194
|
-
PWM.new(matrix: matrix
|
165
|
+
def as_pwm
|
166
|
+
PWM.new(get_parameters.merge(matrix: matrix))
|
195
167
|
end
|
196
|
-
|
197
168
|
end
|
198
169
|
end
|
@@ -36,5 +36,21 @@ module Bioinform
|
|
36
36
|
def to_pwm
|
37
37
|
self
|
38
38
|
end
|
39
|
+
|
40
|
+
def best_score
|
41
|
+
@matrix.inject(0.0){|sum, col| sum + col.max}
|
42
|
+
end
|
43
|
+
def worst_score
|
44
|
+
@matrix.inject(0.0){|sum, col| sum + col.min}
|
45
|
+
end
|
46
|
+
|
47
|
+
# best score of suffix s[i..l]
|
48
|
+
def best_suffix(i)
|
49
|
+
@matrix[i...length].map(&:max).inject(0.0, &:+)
|
50
|
+
end
|
51
|
+
|
52
|
+
def worst_suffix(i)
|
53
|
+
@matrix[i...length].map(&:min).inject(0.0, &:+)
|
54
|
+
end
|
39
55
|
end
|
40
56
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class RawFormatter
|
2
|
+
attr_accessor :motif, :options
|
3
|
+
|
4
|
+
def initialize(motif, options = {})
|
5
|
+
@motif = motif
|
6
|
+
|
7
|
+
default_options = {with_name: true, letters_as_rows: false}
|
8
|
+
@options = default_options.merge(options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def name
|
12
|
+
motif.name
|
13
|
+
end
|
14
|
+
|
15
|
+
def header
|
16
|
+
if options[:with_name] && name
|
17
|
+
name + "\n"
|
18
|
+
else
|
19
|
+
''
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def matrix_string
|
24
|
+
if options[:letters_as_rows]
|
25
|
+
hsh = motif.to_hash
|
26
|
+
[:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
|
27
|
+
else
|
28
|
+
motif.each_position.map{|pos| pos.join("\t")}.join("\n")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def footer
|
33
|
+
# "\n"
|
34
|
+
''
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def to_s
|
39
|
+
header + matrix_string + footer
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
class TransfacFormatter
|
2
|
+
attr_accessor :motif, :options
|
3
|
+
|
4
|
+
def initialize(motif, options = {})
|
5
|
+
@motif = motif
|
6
|
+
|
7
|
+
default_options = {with_name: true, letters_as_rows: false}
|
8
|
+
@options = default_options.merge(options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def name
|
12
|
+
motif.name
|
13
|
+
end
|
14
|
+
|
15
|
+
def header
|
16
|
+
if options[:with_name] && name
|
17
|
+
"ID #{name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
|
18
|
+
else
|
19
|
+
raise 'Transfac should have the name field'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def matrix_string
|
24
|
+
motif.each_position.map.with_index{|pos,ind|
|
25
|
+
line_number = ind.to_s
|
26
|
+
line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
|
27
|
+
line_number + ' ' + pos.join("\t")
|
28
|
+
}.join("\n")
|
29
|
+
end
|
30
|
+
|
31
|
+
def footer
|
32
|
+
#"XX\n//\n"
|
33
|
+
"\nXX\n//"
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s
|
37
|
+
header + matrix_string + footer
|
38
|
+
end
|
39
|
+
end
|
data/lib/bioinform/parsers.rb
CHANGED
@@ -3,4 +3,5 @@ require_relative 'parsers/trivial_parser'
|
|
3
3
|
require_relative 'parsers/yaml_parser'
|
4
4
|
require_relative 'parsers/string_parser'
|
5
5
|
require_relative 'parsers/string_fantom_parser'
|
6
|
-
require_relative 'parsers/splittable_parser'
|
6
|
+
require_relative 'parsers/splittable_parser'
|
7
|
+
require_relative 'parsers/jaspar_parser'
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative '../support'
|
2
|
+
require_relative '../parsers/string_parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
class JasparParser < StringParser
|
6
|
+
def header_pat
|
7
|
+
/(?<name>)/
|
8
|
+
end
|
9
|
+
|
10
|
+
def row_pat
|
11
|
+
/[ACGT]\s*\[\s*(?<row>(#{number_pat}\s+)*#{number_pat})\s*\]\n?/
|
12
|
+
end
|
13
|
+
|
14
|
+
def scan_splitter
|
15
|
+
scanner.scan(/(\/\/\n)+/)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_matrix
|
19
|
+
matrix = []
|
20
|
+
while row_string = scan_row
|
21
|
+
matrix << split_row(row_string)
|
22
|
+
end
|
23
|
+
matrix.transpose
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse!
|
27
|
+
scan_any_spaces
|
28
|
+
scan_splitter
|
29
|
+
name = parse_name
|
30
|
+
matrix = parse_matrix
|
31
|
+
Parser.parse!(matrix).tap{|result| result.name = name}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -8,7 +8,7 @@ module Bioinform
|
|
8
8
|
attr_reader :scanner, :row_acgt_markers
|
9
9
|
|
10
10
|
def initialize(input)
|
11
|
-
raise ArgumentError unless input.is_a?(String)
|
11
|
+
raise ArgumentError, 'StringParser should be initialized with a String' unless input.is_a?(String)
|
12
12
|
super
|
13
13
|
@scanner = StringScanner.new(input.multiline_squish)
|
14
14
|
end
|
@@ -15,6 +15,7 @@ module Bioinform
|
|
15
15
|
def parse!
|
16
16
|
case input
|
17
17
|
when PM then input
|
18
|
+
when Motif then input.pm
|
18
19
|
when OpenStruct then input
|
19
20
|
when Hash then OpenStruct.new(input)
|
20
21
|
end
|
@@ -27,7 +28,7 @@ module Bioinform
|
|
27
28
|
@input = input
|
28
29
|
end
|
29
30
|
def parse!
|
30
|
-
input.
|
31
|
+
input.container.shift.pm
|
31
32
|
end
|
32
33
|
end
|
33
34
|
end
|
data/lib/bioinform/support.rb
CHANGED
@@ -1,19 +1,28 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
params
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
1
|
+
require 'ostruct'
|
2
|
+
module Bioinform
|
3
|
+
module Parameters
|
4
|
+
def self.included(base)
|
5
|
+
base.extend(ClassMethods)
|
6
|
+
end
|
7
|
+
module ClassMethods
|
8
|
+
def make_parameters(*params)
|
9
|
+
params.each do |param|
|
10
|
+
define_method(param){ parameters.send(param) }
|
11
|
+
define_method("#{param}="){|new_value| parameters.send("#{param}=", new_value) }
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
def parameters; @parameters ||= OpenStruct.new; end
|
16
|
+
def set_parameters(hsh)
|
17
|
+
hsh.each{|k,v| send("#{k}=", v) }
|
18
|
+
self
|
19
|
+
end
|
20
|
+
# return hash of parameters
|
21
|
+
def get_parameters
|
22
|
+
@parameters.marshal_dump
|
23
|
+
end
|
24
|
+
def parameter_defined?(param_name)
|
25
|
+
get_parameters.has_key?(param_name)
|
26
|
+
end
|
27
|
+
end
|
19
28
|
end
|