bioinform 0.1.17 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
File without changes
|
@@ -1,75 +0,0 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
require_relative 'motif'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
class Collection
|
6
|
-
attr_accessor :container
|
7
|
-
|
8
|
-
include Parameters
|
9
|
-
make_parameters :name
|
10
|
-
|
11
|
-
# collection name is a tag name for each motif in a collection. But motif can be included in several collections so have several tags
|
12
|
-
def initialize(parameters = {})
|
13
|
-
@container = []
|
14
|
-
@parameters = OpenStruct.new(parameters)
|
15
|
-
yield @parameters if block_given?
|
16
|
-
end
|
17
|
-
|
18
|
-
def size
|
19
|
-
container.size
|
20
|
-
end
|
21
|
-
|
22
|
-
def to_s(with_name = true)
|
23
|
-
result = (with_name) ? "Collection: #{name.to_s}\n" : ''
|
24
|
-
each do |pm, infos|
|
25
|
-
result << pm.to_s << "\n\n"
|
26
|
-
end
|
27
|
-
result
|
28
|
-
end
|
29
|
-
|
30
|
-
def +(other)
|
31
|
-
result = self.class.new
|
32
|
-
container.each do |motif|
|
33
|
-
result.container << motif
|
34
|
-
end
|
35
|
-
other.container.each do |motif|
|
36
|
-
result.container << motif
|
37
|
-
end
|
38
|
-
result
|
39
|
-
end
|
40
|
-
|
41
|
-
def add_pm(pm, info)
|
42
|
-
# pm.mark(self)
|
43
|
-
container << Motif.new(info.marshal_dump.merge(pm: pm))
|
44
|
-
#### What if pm already is a Motif
|
45
|
-
self
|
46
|
-
end
|
47
|
-
|
48
|
-
def <<(pm)
|
49
|
-
add_pm(pm, OpenStruct.new)
|
50
|
-
end
|
51
|
-
|
52
|
-
# collection.each{|motif| ... }
|
53
|
-
# collection.each(:pwm, :threshold){|pwm,threshold| }
|
54
|
-
def each(*args)
|
55
|
-
if block_given?
|
56
|
-
if args.empty?
|
57
|
-
container.each{|motif| yield motif}
|
58
|
-
else
|
59
|
-
container.each{|motif| yield( *args.map{|arg| motif.parameters.send(arg)} ) }
|
60
|
-
end
|
61
|
-
else
|
62
|
-
self.to_enum(:each, *args)
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
include Enumerable
|
67
|
-
|
68
|
-
def ==(other)
|
69
|
-
(parameters == other.parameters) && (container == other.container)
|
70
|
-
rescue
|
71
|
-
false
|
72
|
-
end
|
73
|
-
|
74
|
-
end
|
75
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
require_relative '../support/third_part/active_support/core_ext/object/try'
|
3
|
-
require_relative '../support/parameters'
|
4
|
-
module Bioinform
|
5
|
-
class Motif
|
6
|
-
include Parameters
|
7
|
-
make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
|
8
|
-
|
9
|
-
# 0)Motif.new()
|
10
|
-
# 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
|
11
|
-
# 2)Motif.new(my_pcm)
|
12
|
-
# 3)Motif.new(pm: my_pcm, threshold: ...)
|
13
|
-
# 2) and 3) cases will automatically choose data model
|
14
|
-
#### What if pm already is a Motif
|
15
|
-
def initialize(parameters = {})
|
16
|
-
case parameters
|
17
|
-
when PM
|
18
|
-
pm = parameters
|
19
|
-
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
20
|
-
self.original_data_model = motif_type
|
21
|
-
set_parameters(motif_type => pm)
|
22
|
-
when Hash
|
23
|
-
if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
|
24
|
-
pm = parameters.delete(:pm)
|
25
|
-
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
26
|
-
self.original_data_model = motif_type
|
27
|
-
set_parameters(motif_type => pm)
|
28
|
-
end
|
29
|
-
set_parameters(parameters)
|
30
|
-
else
|
31
|
-
raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
|
36
|
-
#def pcm; parameters.pcm; end
|
37
|
-
def pwm; parameters.pwm || pcm.try(:to_pwm); end
|
38
|
-
def ppm; parameters.ppm || pcm.try(:to_ppm); end
|
39
|
-
#def pcm=(pcm); parameters.pcm = pcm; end
|
40
|
-
#def pwm=(pwm); parameters.pwm = pwm; end
|
41
|
-
#def ppm=(ppm); parameters.ppm = ppm; end
|
42
|
-
def name; parameters.name || pm.name; end
|
43
|
-
|
44
|
-
def method_missing(meth, *args)
|
45
|
-
parameters.__send__(meth, *args)
|
46
|
-
end
|
47
|
-
|
48
|
-
def ==(other)
|
49
|
-
parameters == other.parameters
|
50
|
-
end
|
51
|
-
|
52
|
-
def to_s
|
53
|
-
parameters.to_s
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
class RawFormatter
|
2
|
-
attr_accessor :motif, :options
|
3
|
-
|
4
|
-
def initialize(motif, options = {})
|
5
|
-
@motif = motif
|
6
|
-
|
7
|
-
default_options = {with_name: true, letters_as_rows: false}
|
8
|
-
@options = default_options.merge(options)
|
9
|
-
end
|
10
|
-
|
11
|
-
def name
|
12
|
-
motif.name
|
13
|
-
end
|
14
|
-
|
15
|
-
def header
|
16
|
-
if options[:with_name] && name
|
17
|
-
name + "\n"
|
18
|
-
else
|
19
|
-
''
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def matrix_string
|
24
|
-
if options[:letters_as_rows]
|
25
|
-
hsh = motif.to_hash
|
26
|
-
[:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
|
27
|
-
else
|
28
|
-
motif.each_position.map{|pos| pos.join("\t")}.join("\n")
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def footer
|
33
|
-
# "\n"
|
34
|
-
''
|
35
|
-
end
|
36
|
-
|
37
|
-
|
38
|
-
def to_s
|
39
|
-
header + matrix_string + footer
|
40
|
-
end
|
41
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
require_relative '../support'
|
2
|
-
require_relative '../parsers/string_parser'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
class JasparParser < StringParser
|
6
|
-
def header_pat
|
7
|
-
/(?<name>)/
|
8
|
-
end
|
9
|
-
|
10
|
-
def row_pat
|
11
|
-
/[ACGT]\s*\[\s*(?<row>(#{number_pat}\s+)*#{number_pat})\s*\]\n?/
|
12
|
-
end
|
13
|
-
|
14
|
-
def scan_splitter
|
15
|
-
scanner.scan(/(\/\/\n)+/)
|
16
|
-
end
|
17
|
-
|
18
|
-
def parse_matrix
|
19
|
-
matrix = []
|
20
|
-
while row_string = scan_row
|
21
|
-
matrix << split_row(row_string)
|
22
|
-
end
|
23
|
-
matrix.transpose
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse!
|
27
|
-
scan_any_spaces
|
28
|
-
scan_splitter
|
29
|
-
name = parse_name
|
30
|
-
matrix = parse_matrix
|
31
|
-
Parser.parse!(matrix).tap{|result| result.name = name}
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
end
|
@@ -1,92 +0,0 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
require_relative '../support'
|
3
|
-
require_relative '../data_models/pm'
|
4
|
-
require_relative 'splittable_parser'
|
5
|
-
|
6
|
-
module Bioinform
|
7
|
-
class Error < StandardError; end
|
8
|
-
class ParsingError < Error; end
|
9
|
-
class InvalidMatrix < Error; end
|
10
|
-
|
11
|
-
class Parser
|
12
|
-
attr_reader :input
|
13
|
-
|
14
|
-
def initialize(*input)
|
15
|
-
if input.size == 1 # [ [1,2,3,4] ], [ [[1,2,3,4],[5,6,7,8]] ]
|
16
|
-
if input.first.is_a?(Array) && input.first.all?{|el| el.is_a? Numeric} # [ [1,2,3,4] ]
|
17
|
-
@input = input
|
18
|
-
else # [ [[1,2,3,4],[5,6,7,8]] ]
|
19
|
-
@input = input.first
|
20
|
-
end
|
21
|
-
else #[ [1,2,3,4], [5,6,7,8] ], [ ]
|
22
|
-
@input = input
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse!
|
27
|
-
matrix = self.class.transform_input(input)
|
28
|
-
raise InvalidMatrix unless self.class.valid_matrix?(matrix)
|
29
|
-
OpenStruct.new(matrix: matrix)
|
30
|
-
end
|
31
|
-
|
32
|
-
def parse
|
33
|
-
parse! rescue nil
|
34
|
-
end
|
35
|
-
|
36
|
-
module ClassMethods
|
37
|
-
def choose(input, data_model = PM)
|
38
|
-
data_model.choose_parser(input).new(input)
|
39
|
-
end
|
40
|
-
|
41
|
-
def parse!(*input)
|
42
|
-
new(*input).parse!
|
43
|
-
end
|
44
|
-
def parse(*input)
|
45
|
-
new(*input).parse
|
46
|
-
end
|
47
|
-
|
48
|
-
def valid_matrix?(matrix)
|
49
|
-
PM.valid_matrix?(matrix)
|
50
|
-
end
|
51
|
-
|
52
|
-
# {A: 1, C: 2, G: 3, T: 4} --> [1,2,3,4]
|
53
|
-
# {A: [1,2], C: [3,4], G: [5,6], T: [7,8]} --> [[1,3,5,7],[2,4,6,8]] ( == [[1,2], [3,4], [5,6], [7,8]].transpose)
|
54
|
-
def array_from_acgt_hash(hsh)
|
55
|
-
hsh = normalize_hash_keys(hsh)
|
56
|
-
raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == [:A,:C,:G,:T]
|
57
|
-
result = [:A,:C,:G,:T].collect{|letter| hsh[letter] }
|
58
|
-
result.all?{|el| el.is_a?(Array)} ? result.transpose : result
|
59
|
-
end
|
60
|
-
|
61
|
-
# {a: 1, C: 2, 'g' => 3, 'T' => 4} --> {A: 1, C: 2, G: 3, T: 4}
|
62
|
-
def normalize_hash_keys(hsh)
|
63
|
-
hsh.collect_hash{|key,value| [key.to_s.upcase.to_sym, value] }
|
64
|
-
end
|
65
|
-
|
66
|
-
# [[1,2,3,4], [2,3,4,5]] --> [[1,2,3,4], [2,3,4,5]]
|
67
|
-
# [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}] --> [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}]
|
68
|
-
# {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} --> [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
|
69
|
-
def try_convert_to_array(input)
|
70
|
-
case input
|
71
|
-
when Array then input
|
72
|
-
when Hash then array_from_acgt_hash(input)
|
73
|
-
else raise TypeError, 'input of Bioinform::Parser::array_from_acgt_hash should be Array or Hash'
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
def transform_input(input)
|
78
|
-
result = try_convert_to_array(input).map{|el| try_convert_to_array(el)}
|
79
|
-
need_tranpose?(result) ? result.transpose : result
|
80
|
-
end
|
81
|
-
|
82
|
-
# point whether matrix input positions(need not be transposed -- false) or letters(need -- true) as first index
|
83
|
-
# [[1,3,5,7], [2,4,6,8]] --> false
|
84
|
-
# [[1,2],[3,4],[5,6],[7,8]] --> true
|
85
|
-
def need_tranpose?(input)
|
86
|
-
(input.size == 4) && input.any?{|x| x.size != 4}
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
extend ClassMethods
|
91
|
-
end
|
92
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
module Bioinform
|
2
|
-
class Parser
|
3
|
-
module SingleMotifParser
|
4
|
-
def self.included(base)
|
5
|
-
base.class_eval { extend ClassMethods }
|
6
|
-
include Enumerable
|
7
|
-
alias_method :split, :to_a
|
8
|
-
end
|
9
|
-
module ClassMethods
|
10
|
-
def split_on_motifs(input, pm_klass = PM)
|
11
|
-
[ input.is_a?(pm_klass) ? self : pm_klass.new(input, self) ]
|
12
|
-
end
|
13
|
-
end
|
14
|
-
def each
|
15
|
-
if block_given?
|
16
|
-
yield self
|
17
|
-
else
|
18
|
-
self.to_enum(:each)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
include SingleMotifParser
|
23
|
-
|
24
|
-
module MultipleMotifsParser
|
25
|
-
def self.included(base)
|
26
|
-
base.class_eval { extend ClassMethods }
|
27
|
-
include Enumerable
|
28
|
-
alias_method :split, :to_a
|
29
|
-
end
|
30
|
-
module ClassMethods
|
31
|
-
def split_on_motifs(input, pm_klass = PM)
|
32
|
-
split(input).map{|el| el.is_a?(pm_klass) ? el : pm_klass.new(el)}
|
33
|
-
end
|
34
|
-
def split(input)
|
35
|
-
self.new(input).split
|
36
|
-
end
|
37
|
-
private :split
|
38
|
-
end
|
39
|
-
|
40
|
-
def scanner_reset
|
41
|
-
end
|
42
|
-
|
43
|
-
def each
|
44
|
-
if block_given?
|
45
|
-
scanner_reset
|
46
|
-
while result = parse
|
47
|
-
yield result
|
48
|
-
end
|
49
|
-
else
|
50
|
-
self.to_enum(:each)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
private :scanner_reset
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
require_relative '../support'
|
2
|
-
require_relative '../parsers/string_parser'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
class StringFantomParser < StringParser
|
6
|
-
def header_pat
|
7
|
-
/NA (?<name>[\w.+:-]+)\n[\w\d]+ A C G T.*\n/
|
8
|
-
end
|
9
|
-
|
10
|
-
def row_pat
|
11
|
-
/[\w\d]+ (?<row>(#{number_pat} )*#{number_pat})\n?/
|
12
|
-
end
|
13
|
-
|
14
|
-
def scan_splitter
|
15
|
-
scanner.scan(/(\/\/\n)+/)
|
16
|
-
end
|
17
|
-
|
18
|
-
def parse_matrix
|
19
|
-
matrix = []
|
20
|
-
while row_string = scan_row
|
21
|
-
matrix << split_row(row_string)[0,4]
|
22
|
-
end
|
23
|
-
matrix.transpose
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse!
|
27
|
-
scan_any_spaces
|
28
|
-
scan_splitter
|
29
|
-
name = parse_name
|
30
|
-
matrix = parse_matrix
|
31
|
-
Parser.parse!(matrix).tap{|result| result.name = name}
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
end
|
@@ -1,72 +0,0 @@
|
|
1
|
-
require 'strscan'
|
2
|
-
require_relative '../support'
|
3
|
-
require_relative '../parsers/parser'
|
4
|
-
|
5
|
-
module Bioinform
|
6
|
-
class StringParser < Parser
|
7
|
-
include MultipleMotifsParser
|
8
|
-
attr_reader :scanner, :row_acgt_markers
|
9
|
-
|
10
|
-
def initialize(input)
|
11
|
-
raise ArgumentError, 'StringParser should be initialized with a String' unless input.is_a?(String)
|
12
|
-
super
|
13
|
-
@scanner = StringScanner.new(input.gsub(/[[:blank:]]/,' ').multiline_squish)
|
14
|
-
end
|
15
|
-
|
16
|
-
def number_pat
|
17
|
-
/[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?/
|
18
|
-
end
|
19
|
-
|
20
|
-
def header_pat
|
21
|
-
/>?\s*(?<name>\S+)\n/
|
22
|
-
end
|
23
|
-
|
24
|
-
def row_pat
|
25
|
-
/([ACGT]\s*[:|]?\s*)?(?<row>(#{number_pat} )*#{number_pat})\n?/
|
26
|
-
end
|
27
|
-
|
28
|
-
def scan_row
|
29
|
-
match = scanner.advanced_scan(row_pat)
|
30
|
-
match && match[:row]
|
31
|
-
end
|
32
|
-
|
33
|
-
def split_row(row_string)
|
34
|
-
row_string.split.map(&:to_f)
|
35
|
-
end
|
36
|
-
|
37
|
-
def scan_any_spaces
|
38
|
-
scanner.scan(/\s+/)
|
39
|
-
end
|
40
|
-
|
41
|
-
def parse_name
|
42
|
-
match = scanner.advanced_scan(header_pat)
|
43
|
-
match && match[:name]
|
44
|
-
end
|
45
|
-
|
46
|
-
def parse_matrix
|
47
|
-
matrix = []
|
48
|
-
@row_acgt_markers = true if scanner.check(/A.*\nC.*\nG.*\nT.*\n?/)
|
49
|
-
while row_string = scan_row
|
50
|
-
matrix << split_row(row_string)
|
51
|
-
end
|
52
|
-
matrix
|
53
|
-
end
|
54
|
-
|
55
|
-
def parse_acgt_header
|
56
|
-
scanner.scan(/A\s*C\s*G\s*T\s*\n/i)
|
57
|
-
end
|
58
|
-
|
59
|
-
def parse!
|
60
|
-
scan_any_spaces
|
61
|
-
name = parse_name
|
62
|
-
parse_acgt_header
|
63
|
-
matrix = parse_matrix
|
64
|
-
matrix = matrix.transpose if row_acgt_markers
|
65
|
-
Parser.parse!(matrix).tap{|result| result.name = name}
|
66
|
-
end
|
67
|
-
|
68
|
-
def scanner_reset
|
69
|
-
scanner.reset
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|