bioinform 0.1.17 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
|
File without changes
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
require 'ostruct'
|
|
2
|
-
require_relative 'motif'
|
|
3
|
-
|
|
4
|
-
module Bioinform
|
|
5
|
-
class Collection
|
|
6
|
-
attr_accessor :container
|
|
7
|
-
|
|
8
|
-
include Parameters
|
|
9
|
-
make_parameters :name
|
|
10
|
-
|
|
11
|
-
# collection name is a tag name for each motif in a collection. But motif can be included in several collections so have several tags
|
|
12
|
-
def initialize(parameters = {})
|
|
13
|
-
@container = []
|
|
14
|
-
@parameters = OpenStruct.new(parameters)
|
|
15
|
-
yield @parameters if block_given?
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def size
|
|
19
|
-
container.size
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def to_s(with_name = true)
|
|
23
|
-
result = (with_name) ? "Collection: #{name.to_s}\n" : ''
|
|
24
|
-
each do |pm, infos|
|
|
25
|
-
result << pm.to_s << "\n\n"
|
|
26
|
-
end
|
|
27
|
-
result
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def +(other)
|
|
31
|
-
result = self.class.new
|
|
32
|
-
container.each do |motif|
|
|
33
|
-
result.container << motif
|
|
34
|
-
end
|
|
35
|
-
other.container.each do |motif|
|
|
36
|
-
result.container << motif
|
|
37
|
-
end
|
|
38
|
-
result
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def add_pm(pm, info)
|
|
42
|
-
# pm.mark(self)
|
|
43
|
-
container << Motif.new(info.marshal_dump.merge(pm: pm))
|
|
44
|
-
#### What if pm already is a Motif
|
|
45
|
-
self
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def <<(pm)
|
|
49
|
-
add_pm(pm, OpenStruct.new)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# collection.each{|motif| ... }
|
|
53
|
-
# collection.each(:pwm, :threshold){|pwm,threshold| }
|
|
54
|
-
def each(*args)
|
|
55
|
-
if block_given?
|
|
56
|
-
if args.empty?
|
|
57
|
-
container.each{|motif| yield motif}
|
|
58
|
-
else
|
|
59
|
-
container.each{|motif| yield( *args.map{|arg| motif.parameters.send(arg)} ) }
|
|
60
|
-
end
|
|
61
|
-
else
|
|
62
|
-
self.to_enum(:each, *args)
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
include Enumerable
|
|
67
|
-
|
|
68
|
-
def ==(other)
|
|
69
|
-
(parameters == other.parameters) && (container == other.container)
|
|
70
|
-
rescue
|
|
71
|
-
false
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
end
|
|
75
|
-
end
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
require 'ostruct'
|
|
2
|
-
require_relative '../support/third_part/active_support/core_ext/object/try'
|
|
3
|
-
require_relative '../support/parameters'
|
|
4
|
-
module Bioinform
|
|
5
|
-
class Motif
|
|
6
|
-
include Parameters
|
|
7
|
-
make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
|
|
8
|
-
|
|
9
|
-
# 0)Motif.new()
|
|
10
|
-
# 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
|
|
11
|
-
# 2)Motif.new(my_pcm)
|
|
12
|
-
# 3)Motif.new(pm: my_pcm, threshold: ...)
|
|
13
|
-
# 2) and 3) cases will automatically choose data model
|
|
14
|
-
#### What if pm already is a Motif
|
|
15
|
-
def initialize(parameters = {})
|
|
16
|
-
case parameters
|
|
17
|
-
when PM
|
|
18
|
-
pm = parameters
|
|
19
|
-
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
|
20
|
-
self.original_data_model = motif_type
|
|
21
|
-
set_parameters(motif_type => pm)
|
|
22
|
-
when Hash
|
|
23
|
-
if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
|
|
24
|
-
pm = parameters.delete(:pm)
|
|
25
|
-
motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
|
|
26
|
-
self.original_data_model = motif_type
|
|
27
|
-
set_parameters(motif_type => pm)
|
|
28
|
-
end
|
|
29
|
-
set_parameters(parameters)
|
|
30
|
-
else
|
|
31
|
-
raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
|
|
36
|
-
#def pcm; parameters.pcm; end
|
|
37
|
-
def pwm; parameters.pwm || pcm.try(:to_pwm); end
|
|
38
|
-
def ppm; parameters.ppm || pcm.try(:to_ppm); end
|
|
39
|
-
#def pcm=(pcm); parameters.pcm = pcm; end
|
|
40
|
-
#def pwm=(pwm); parameters.pwm = pwm; end
|
|
41
|
-
#def ppm=(ppm); parameters.ppm = ppm; end
|
|
42
|
-
def name; parameters.name || pm.name; end
|
|
43
|
-
|
|
44
|
-
def method_missing(meth, *args)
|
|
45
|
-
parameters.__send__(meth, *args)
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def ==(other)
|
|
49
|
-
parameters == other.parameters
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def to_s
|
|
53
|
-
parameters.to_s
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
end
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
class RawFormatter
|
|
2
|
-
attr_accessor :motif, :options
|
|
3
|
-
|
|
4
|
-
def initialize(motif, options = {})
|
|
5
|
-
@motif = motif
|
|
6
|
-
|
|
7
|
-
default_options = {with_name: true, letters_as_rows: false}
|
|
8
|
-
@options = default_options.merge(options)
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
def name
|
|
12
|
-
motif.name
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
def header
|
|
16
|
-
if options[:with_name] && name
|
|
17
|
-
name + "\n"
|
|
18
|
-
else
|
|
19
|
-
''
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def matrix_string
|
|
24
|
-
if options[:letters_as_rows]
|
|
25
|
-
hsh = motif.to_hash
|
|
26
|
-
[:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
|
|
27
|
-
else
|
|
28
|
-
motif.each_position.map{|pos| pos.join("\t")}.join("\n")
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
def footer
|
|
33
|
-
# "\n"
|
|
34
|
-
''
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def to_s
|
|
39
|
-
header + matrix_string + footer
|
|
40
|
-
end
|
|
41
|
-
end
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
require_relative '../support'
|
|
2
|
-
require_relative '../parsers/string_parser'
|
|
3
|
-
|
|
4
|
-
module Bioinform
|
|
5
|
-
class JasparParser < StringParser
|
|
6
|
-
def header_pat
|
|
7
|
-
/(?<name>)/
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
def row_pat
|
|
11
|
-
/[ACGT]\s*\[\s*(?<row>(#{number_pat}\s+)*#{number_pat})\s*\]\n?/
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def scan_splitter
|
|
15
|
-
scanner.scan(/(\/\/\n)+/)
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def parse_matrix
|
|
19
|
-
matrix = []
|
|
20
|
-
while row_string = scan_row
|
|
21
|
-
matrix << split_row(row_string)
|
|
22
|
-
end
|
|
23
|
-
matrix.transpose
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def parse!
|
|
27
|
-
scan_any_spaces
|
|
28
|
-
scan_splitter
|
|
29
|
-
name = parse_name
|
|
30
|
-
matrix = parse_matrix
|
|
31
|
-
Parser.parse!(matrix).tap{|result| result.name = name}
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
end
|
|
35
|
-
end
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
require 'ostruct'
|
|
2
|
-
require_relative '../support'
|
|
3
|
-
require_relative '../data_models/pm'
|
|
4
|
-
require_relative 'splittable_parser'
|
|
5
|
-
|
|
6
|
-
module Bioinform
|
|
7
|
-
class Error < StandardError; end
|
|
8
|
-
class ParsingError < Error; end
|
|
9
|
-
class InvalidMatrix < Error; end
|
|
10
|
-
|
|
11
|
-
class Parser
|
|
12
|
-
attr_reader :input
|
|
13
|
-
|
|
14
|
-
def initialize(*input)
|
|
15
|
-
if input.size == 1 # [ [1,2,3,4] ], [ [[1,2,3,4],[5,6,7,8]] ]
|
|
16
|
-
if input.first.is_a?(Array) && input.first.all?{|el| el.is_a? Numeric} # [ [1,2,3,4] ]
|
|
17
|
-
@input = input
|
|
18
|
-
else # [ [[1,2,3,4],[5,6,7,8]] ]
|
|
19
|
-
@input = input.first
|
|
20
|
-
end
|
|
21
|
-
else #[ [1,2,3,4], [5,6,7,8] ], [ ]
|
|
22
|
-
@input = input
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def parse!
|
|
27
|
-
matrix = self.class.transform_input(input)
|
|
28
|
-
raise InvalidMatrix unless self.class.valid_matrix?(matrix)
|
|
29
|
-
OpenStruct.new(matrix: matrix)
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
def parse
|
|
33
|
-
parse! rescue nil
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
module ClassMethods
|
|
37
|
-
def choose(input, data_model = PM)
|
|
38
|
-
data_model.choose_parser(input).new(input)
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def parse!(*input)
|
|
42
|
-
new(*input).parse!
|
|
43
|
-
end
|
|
44
|
-
def parse(*input)
|
|
45
|
-
new(*input).parse
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def valid_matrix?(matrix)
|
|
49
|
-
PM.valid_matrix?(matrix)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# {A: 1, C: 2, G: 3, T: 4} --> [1,2,3,4]
|
|
53
|
-
# {A: [1,2], C: [3,4], G: [5,6], T: [7,8]} --> [[1,3,5,7],[2,4,6,8]] ( == [[1,2], [3,4], [5,6], [7,8]].transpose)
|
|
54
|
-
def array_from_acgt_hash(hsh)
|
|
55
|
-
hsh = normalize_hash_keys(hsh)
|
|
56
|
-
raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == [:A,:C,:G,:T]
|
|
57
|
-
result = [:A,:C,:G,:T].collect{|letter| hsh[letter] }
|
|
58
|
-
result.all?{|el| el.is_a?(Array)} ? result.transpose : result
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# {a: 1, C: 2, 'g' => 3, 'T' => 4} --> {A: 1, C: 2, G: 3, T: 4}
|
|
62
|
-
def normalize_hash_keys(hsh)
|
|
63
|
-
hsh.collect_hash{|key,value| [key.to_s.upcase.to_sym, value] }
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# [[1,2,3,4], [2,3,4,5]] --> [[1,2,3,4], [2,3,4,5]]
|
|
67
|
-
# [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}] --> [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}]
|
|
68
|
-
# {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} --> [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
|
|
69
|
-
def try_convert_to_array(input)
|
|
70
|
-
case input
|
|
71
|
-
when Array then input
|
|
72
|
-
when Hash then array_from_acgt_hash(input)
|
|
73
|
-
else raise TypeError, 'input of Bioinform::Parser::array_from_acgt_hash should be Array or Hash'
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
def transform_input(input)
|
|
78
|
-
result = try_convert_to_array(input).map{|el| try_convert_to_array(el)}
|
|
79
|
-
need_tranpose?(result) ? result.transpose : result
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
# point whether matrix input positions(need not be transposed -- false) or letters(need -- true) as first index
|
|
83
|
-
# [[1,3,5,7], [2,4,6,8]] --> false
|
|
84
|
-
# [[1,2],[3,4],[5,6],[7,8]] --> true
|
|
85
|
-
def need_tranpose?(input)
|
|
86
|
-
(input.size == 4) && input.any?{|x| x.size != 4}
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
extend ClassMethods
|
|
91
|
-
end
|
|
92
|
-
end
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
module Bioinform
|
|
2
|
-
class Parser
|
|
3
|
-
module SingleMotifParser
|
|
4
|
-
def self.included(base)
|
|
5
|
-
base.class_eval { extend ClassMethods }
|
|
6
|
-
include Enumerable
|
|
7
|
-
alias_method :split, :to_a
|
|
8
|
-
end
|
|
9
|
-
module ClassMethods
|
|
10
|
-
def split_on_motifs(input, pm_klass = PM)
|
|
11
|
-
[ input.is_a?(pm_klass) ? self : pm_klass.new(input, self) ]
|
|
12
|
-
end
|
|
13
|
-
end
|
|
14
|
-
def each
|
|
15
|
-
if block_given?
|
|
16
|
-
yield self
|
|
17
|
-
else
|
|
18
|
-
self.to_enum(:each)
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
include SingleMotifParser
|
|
23
|
-
|
|
24
|
-
module MultipleMotifsParser
|
|
25
|
-
def self.included(base)
|
|
26
|
-
base.class_eval { extend ClassMethods }
|
|
27
|
-
include Enumerable
|
|
28
|
-
alias_method :split, :to_a
|
|
29
|
-
end
|
|
30
|
-
module ClassMethods
|
|
31
|
-
def split_on_motifs(input, pm_klass = PM)
|
|
32
|
-
split(input).map{|el| el.is_a?(pm_klass) ? el : pm_klass.new(el)}
|
|
33
|
-
end
|
|
34
|
-
def split(input)
|
|
35
|
-
self.new(input).split
|
|
36
|
-
end
|
|
37
|
-
private :split
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
def scanner_reset
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def each
|
|
44
|
-
if block_given?
|
|
45
|
-
scanner_reset
|
|
46
|
-
while result = parse
|
|
47
|
-
yield result
|
|
48
|
-
end
|
|
49
|
-
else
|
|
50
|
-
self.to_enum(:each)
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
private :scanner_reset
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
end
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
require_relative '../support'
|
|
2
|
-
require_relative '../parsers/string_parser'
|
|
3
|
-
|
|
4
|
-
module Bioinform
|
|
5
|
-
class StringFantomParser < StringParser
|
|
6
|
-
def header_pat
|
|
7
|
-
/NA (?<name>[\w.+:-]+)\n[\w\d]+ A C G T.*\n/
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
def row_pat
|
|
11
|
-
/[\w\d]+ (?<row>(#{number_pat} )*#{number_pat})\n?/
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def scan_splitter
|
|
15
|
-
scanner.scan(/(\/\/\n)+/)
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def parse_matrix
|
|
19
|
-
matrix = []
|
|
20
|
-
while row_string = scan_row
|
|
21
|
-
matrix << split_row(row_string)[0,4]
|
|
22
|
-
end
|
|
23
|
-
matrix.transpose
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def parse!
|
|
27
|
-
scan_any_spaces
|
|
28
|
-
scan_splitter
|
|
29
|
-
name = parse_name
|
|
30
|
-
matrix = parse_matrix
|
|
31
|
-
Parser.parse!(matrix).tap{|result| result.name = name}
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
end
|
|
35
|
-
end
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
require 'strscan'
|
|
2
|
-
require_relative '../support'
|
|
3
|
-
require_relative '../parsers/parser'
|
|
4
|
-
|
|
5
|
-
module Bioinform
|
|
6
|
-
class StringParser < Parser
|
|
7
|
-
include MultipleMotifsParser
|
|
8
|
-
attr_reader :scanner, :row_acgt_markers
|
|
9
|
-
|
|
10
|
-
def initialize(input)
|
|
11
|
-
raise ArgumentError, 'StringParser should be initialized with a String' unless input.is_a?(String)
|
|
12
|
-
super
|
|
13
|
-
@scanner = StringScanner.new(input.gsub(/[[:blank:]]/,' ').multiline_squish)
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def number_pat
|
|
17
|
-
/[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?/
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def header_pat
|
|
21
|
-
/>?\s*(?<name>\S+)\n/
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
def row_pat
|
|
25
|
-
/([ACGT]\s*[:|]?\s*)?(?<row>(#{number_pat} )*#{number_pat})\n?/
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
def scan_row
|
|
29
|
-
match = scanner.advanced_scan(row_pat)
|
|
30
|
-
match && match[:row]
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def split_row(row_string)
|
|
34
|
-
row_string.split.map(&:to_f)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def scan_any_spaces
|
|
38
|
-
scanner.scan(/\s+/)
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def parse_name
|
|
42
|
-
match = scanner.advanced_scan(header_pat)
|
|
43
|
-
match && match[:name]
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
def parse_matrix
|
|
47
|
-
matrix = []
|
|
48
|
-
@row_acgt_markers = true if scanner.check(/A.*\nC.*\nG.*\nT.*\n?/)
|
|
49
|
-
while row_string = scan_row
|
|
50
|
-
matrix << split_row(row_string)
|
|
51
|
-
end
|
|
52
|
-
matrix
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def parse_acgt_header
|
|
56
|
-
scanner.scan(/A\s*C\s*G\s*T\s*\n/i)
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
def parse!
|
|
60
|
-
scan_any_spaces
|
|
61
|
-
name = parse_name
|
|
62
|
-
parse_acgt_header
|
|
63
|
-
matrix = parse_matrix
|
|
64
|
-
matrix = matrix.transpose if row_acgt_markers
|
|
65
|
-
Parser.parse!(matrix).tap{|result| result.name = name}
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
def scanner_reset
|
|
69
|
-
scanner.reset
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|