bioinform 0.1.12 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +16 -16
- data/LICENSE +21 -21
- data/README.md +35 -35
- data/Rakefile +4 -4
- data/TODO.txt +37 -37
- data/bin/merge_into_collection +3 -3
- data/bin/pcm2pwm +3 -3
- data/bin/split_motifs +3 -3
- data/bioinform.gemspec +19 -19
- data/lib/bioinform/cli/convert_motif.rb +107 -107
- data/lib/bioinform/cli/merge_into_collection.rb +79 -79
- data/lib/bioinform/cli/pcm2pwm.rb +46 -46
- data/lib/bioinform/cli/split_motifs.rb +46 -46
- data/lib/bioinform/cli.rb +29 -29
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +18 -18
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +19 -19
- data/lib/bioinform/data_models/collection.rb +74 -74
- data/lib/bioinform/data_models/motif.rb +55 -55
- data/lib/bioinform/data_models/pcm.rb +23 -23
- data/lib/bioinform/data_models/pm.rb +169 -169
- data/lib/bioinform/data_models/ppm.rb +9 -9
- data/lib/bioinform/data_models/pwm.rb +55 -55
- data/lib/bioinform/data_models.rb +10 -10
- data/lib/bioinform/formatters/raw_formatter.rb +40 -40
- data/lib/bioinform/formatters/transfac_formatter.rb +38 -38
- data/lib/bioinform/formatters.rb +1 -1
- data/lib/bioinform/parsers/jaspar_parser.rb +34 -34
- data/lib/bioinform/parsers/parser.rb +87 -87
- data/lib/bioinform/parsers/splittable_parser.rb +56 -56
- data/lib/bioinform/parsers/string_fantom_parser.rb +34 -34
- data/lib/bioinform/parsers/string_parser.rb +71 -71
- data/lib/bioinform/parsers/trivial_parser.rb +33 -33
- data/lib/bioinform/parsers/yaml_parser.rb +34 -34
- data/lib/bioinform/parsers.rb +6 -6
- data/lib/bioinform/support/array_product.rb +5 -5
- data/lib/bioinform/support/array_zip.rb +5 -5
- data/lib/bioinform/support/collect_hash.rb +6 -6
- data/lib/bioinform/support/deep_dup.rb +4 -4
- data/lib/bioinform/support/delete_many.rb +13 -13
- data/lib/bioinform/support/inverf.rb +12 -12
- data/lib/bioinform/support/multiline_squish.rb +5 -5
- data/lib/bioinform/support/parameters.rb +27 -27
- data/lib/bioinform/support/partial_sums.rb +15 -15
- data/lib/bioinform/support/same_by.rb +12 -12
- data/lib/bioinform/support/strip_doc.rb +8 -8
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +3 -0
- data/lib/bioinform/support.rb +17 -17
- data/lib/bioinform/version.rb +3 -3
- data/lib/bioinform.rb +10 -10
- data/spec/cli/cli_spec.rb +13 -13
- data/spec/cli/convert_motif_spec.rb +106 -106
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +11 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +12 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +40 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +188 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +188 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -12
- data/spec/cli/data/pcm2pwm/KLF4 f2 spaced name.pcm +11 -11
- data/spec/cli/data/pcm2pwm/KLF4_f2.pcm +11 -11
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -11
- data/spec/cli/data/pcm2pwm/SP1_f1.pcm +12 -12
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -12
- data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -14
- data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -11
- data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -12
- data/spec/cli/data/split_motifs/collection.yaml +188 -188
- data/spec/cli/data/split_motifs/plain_collection.txt +38 -38
- data/spec/cli/merge_into_collection_spec.rb +99 -99
- data/spec/cli/pcm2pwm_spec.rb +79 -79
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +17 -17
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -14
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +49 -49
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +4 -4
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +76 -76
- data/spec/data_models/collection_spec.rb +97 -97
- data/spec/data_models/motif_spec.rb +223 -223
- data/spec/data_models/pcm_spec.rb +55 -55
- data/spec/data_models/pm_spec.rb +359 -359
- data/spec/data_models/ppm_spec.rb +7 -7
- data/spec/data_models/pwm_spec.rb +82 -82
- data/spec/fabricators/collection_fabricator.rb +7 -7
- data/spec/fabricators/motif_fabricator.rb +32 -32
- data/spec/fabricators/motif_formats_fabricator.rb +124 -124
- data/spec/fabricators/pcm_fabricator.rb +24 -24
- data/spec/fabricators/pm_fabricator.rb +51 -51
- data/spec/fabricators/ppm_fabricator.rb +13 -13
- data/spec/fabricators/pwm_fabricator.rb +16 -16
- data/spec/parsers/parser_spec.rb +152 -152
- data/spec/parsers/string_fantom_parser_spec.rb +69 -69
- data/spec/parsers/string_parser_spec.rb +76 -76
- data/spec/parsers/trivial_parser_spec.rb +63 -63
- data/spec/parsers/yaml_parser_spec.rb +50 -50
- data/spec/spec_helper.rb +10 -10
- data/spec/spec_helper_source.rb +59 -59
- data/spec/support/advanced_scan_spec.rb +31 -31
- data/spec/support/array_product_spec.rb +14 -14
- data/spec/support/array_zip_spec.rb +14 -14
- data/spec/support/collect_hash_spec.rb +14 -14
- data/spec/support/delete_many_spec.rb +43 -43
- data/spec/support/inverf_spec.rb +18 -18
- data/spec/support/multiline_squish_spec.rb +24 -24
- data/spec/support/partial_sums_spec.rb +30 -30
- data/spec/support/same_by_spec.rb +35 -35
- metadata +3 -3
@@ -1,41 +1,41 @@
|
|
1
|
-
class RawFormatter
|
2
|
-
attr_accessor :motif, :options
|
3
|
-
|
4
|
-
def initialize(motif, options = {})
|
5
|
-
@motif = motif
|
6
|
-
|
7
|
-
default_options = {with_name: true, letters_as_rows: false}
|
8
|
-
@options = default_options.merge(options)
|
9
|
-
end
|
10
|
-
|
11
|
-
def name
|
12
|
-
motif.name
|
13
|
-
end
|
14
|
-
|
15
|
-
def header
|
16
|
-
if options[:with_name] && name
|
17
|
-
name + "\n"
|
18
|
-
else
|
19
|
-
''
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def matrix_string
|
24
|
-
if options[:letters_as_rows]
|
25
|
-
hsh = motif.to_hash
|
26
|
-
[:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
|
27
|
-
else
|
28
|
-
motif.each_position.map{|pos| pos.join("\t")}.join("\n")
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def footer
|
33
|
-
# "\n"
|
34
|
-
''
|
35
|
-
end
|
36
|
-
|
37
|
-
|
38
|
-
def to_s
|
39
|
-
header + matrix_string + footer
|
40
|
-
end
|
1
|
+
class RawFormatter
|
2
|
+
attr_accessor :motif, :options
|
3
|
+
|
4
|
+
def initialize(motif, options = {})
|
5
|
+
@motif = motif
|
6
|
+
|
7
|
+
default_options = {with_name: true, letters_as_rows: false}
|
8
|
+
@options = default_options.merge(options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def name
|
12
|
+
motif.name
|
13
|
+
end
|
14
|
+
|
15
|
+
def header
|
16
|
+
if options[:with_name] && name
|
17
|
+
name + "\n"
|
18
|
+
else
|
19
|
+
''
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def matrix_string
|
24
|
+
if options[:letters_as_rows]
|
25
|
+
hsh = motif.to_hash
|
26
|
+
[:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
|
27
|
+
else
|
28
|
+
motif.each_position.map{|pos| pos.join("\t")}.join("\n")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def footer
|
33
|
+
# "\n"
|
34
|
+
''
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def to_s
|
39
|
+
header + matrix_string + footer
|
40
|
+
end
|
41
41
|
end
|
@@ -1,39 +1,39 @@
|
|
1
|
-
class TransfacFormatter
|
2
|
-
attr_accessor :motif, :options
|
3
|
-
|
4
|
-
def initialize(motif, options = {})
|
5
|
-
@motif = motif
|
6
|
-
|
7
|
-
default_options = {with_name: true, letters_as_rows: false}
|
8
|
-
@options = default_options.merge(options)
|
9
|
-
end
|
10
|
-
|
11
|
-
def name
|
12
|
-
motif.name
|
13
|
-
end
|
14
|
-
|
15
|
-
def header
|
16
|
-
if options[:with_name] && name
|
17
|
-
"ID #{name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
|
18
|
-
else
|
19
|
-
raise 'Transfac should have the name field'
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def matrix_string
|
24
|
-
motif.each_position.map.with_index{|pos,ind|
|
25
|
-
line_number = ind.to_s
|
26
|
-
line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
|
27
|
-
line_number + ' ' + pos.join("\t")
|
28
|
-
}.join("\n")
|
29
|
-
end
|
30
|
-
|
31
|
-
def footer
|
32
|
-
#"XX\n//\n"
|
33
|
-
"\nXX\n//"
|
34
|
-
end
|
35
|
-
|
36
|
-
def to_s
|
37
|
-
header + matrix_string + footer
|
38
|
-
end
|
1
|
+
class TransfacFormatter
|
2
|
+
attr_accessor :motif, :options
|
3
|
+
|
4
|
+
def initialize(motif, options = {})
|
5
|
+
@motif = motif
|
6
|
+
|
7
|
+
default_options = {with_name: true, letters_as_rows: false}
|
8
|
+
@options = default_options.merge(options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def name
|
12
|
+
motif.name
|
13
|
+
end
|
14
|
+
|
15
|
+
def header
|
16
|
+
if options[:with_name] && name
|
17
|
+
"ID #{name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
|
18
|
+
else
|
19
|
+
raise 'Transfac should have the name field'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def matrix_string
|
24
|
+
motif.each_position.map.with_index{|pos,ind|
|
25
|
+
line_number = ind.to_s
|
26
|
+
line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
|
27
|
+
line_number + ' ' + pos.join("\t")
|
28
|
+
}.join("\n")
|
29
|
+
end
|
30
|
+
|
31
|
+
def footer
|
32
|
+
#"XX\n//\n"
|
33
|
+
"\nXX\n//"
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s
|
37
|
+
header + matrix_string + footer
|
38
|
+
end
|
39
39
|
end
|
data/lib/bioinform/formatters.rb
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
require_relative 'formatters/raw_formatter'
|
1
|
+
require_relative 'formatters/raw_formatter'
|
2
2
|
require_relative 'formatters/transfac_formatter'
|
@@ -1,35 +1,35 @@
|
|
1
|
-
require_relative '../support'
|
2
|
-
require_relative '../parsers/string_parser'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
class JasparParser < StringParser
|
6
|
-
def header_pat
|
7
|
-
/(?<name>)/
|
8
|
-
end
|
9
|
-
|
10
|
-
def row_pat
|
11
|
-
/[ACGT]\s*\[\s*(?<row>(#{number_pat}\s+)*#{number_pat})\s*\]\n?/
|
12
|
-
end
|
13
|
-
|
14
|
-
def scan_splitter
|
15
|
-
scanner.scan(/(\/\/\n)+/)
|
16
|
-
end
|
17
|
-
|
18
|
-
def parse_matrix
|
19
|
-
matrix = []
|
20
|
-
while row_string = scan_row
|
21
|
-
matrix << split_row(row_string)
|
22
|
-
end
|
23
|
-
matrix.transpose
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse!
|
27
|
-
scan_any_spaces
|
28
|
-
scan_splitter
|
29
|
-
name = parse_name
|
30
|
-
matrix = parse_matrix
|
31
|
-
Parser.parse!(matrix).tap{|result| result.name = name}
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
1
|
+
require_relative '../support'
|
2
|
+
require_relative '../parsers/string_parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
class JasparParser < StringParser
|
6
|
+
def header_pat
|
7
|
+
/(?<name>)/
|
8
|
+
end
|
9
|
+
|
10
|
+
def row_pat
|
11
|
+
/[ACGT]\s*\[\s*(?<row>(#{number_pat}\s+)*#{number_pat})\s*\]\n?/
|
12
|
+
end
|
13
|
+
|
14
|
+
def scan_splitter
|
15
|
+
scanner.scan(/(\/\/\n)+/)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_matrix
|
19
|
+
matrix = []
|
20
|
+
while row_string = scan_row
|
21
|
+
matrix << split_row(row_string)
|
22
|
+
end
|
23
|
+
matrix.transpose
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse!
|
27
|
+
scan_any_spaces
|
28
|
+
scan_splitter
|
29
|
+
name = parse_name
|
30
|
+
matrix = parse_matrix
|
31
|
+
Parser.parse!(matrix).tap{|result| result.name = name}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
35
|
end
|
@@ -1,88 +1,88 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
require_relative '../support'
|
3
|
-
require_relative '../data_models/pm'
|
4
|
-
require_relative 'splittable_parser'
|
5
|
-
|
6
|
-
module Bioinform
|
7
|
-
class Error < StandardError; end
|
8
|
-
class ParsingError < Error; end
|
9
|
-
class InvalidMatrix < Error; end
|
10
|
-
|
11
|
-
class Parser
|
12
|
-
attr_reader :input
|
13
|
-
|
14
|
-
def initialize(*input)
|
15
|
-
if input.size == 1 # [ [1,2,3,4] ], [ [[1,2,3,4],[5,6,7,8]] ]
|
16
|
-
if input.first.is_a?(Array) && input.first.all?{|el| el.is_a? Numeric} # [ [1,2,3,4] ]
|
17
|
-
@input = input
|
18
|
-
else # [ [[1,2,3,4],[5,6,7,8]] ]
|
19
|
-
@input = input.first
|
20
|
-
end
|
21
|
-
else #[ [1,2,3,4], [5,6,7,8] ], [ ]
|
22
|
-
@input = input
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse!
|
27
|
-
matrix = self.class.transform_input(input)
|
28
|
-
raise InvalidMatrix unless self.class.valid_matrix?(matrix)
|
29
|
-
OpenStruct.new(matrix: matrix)
|
30
|
-
end
|
31
|
-
|
32
|
-
def parse
|
33
|
-
parse! rescue nil
|
34
|
-
end
|
35
|
-
|
36
|
-
def self.choose(input, data_model = PM)
|
37
|
-
data_model.choose_parser(input).new(input)
|
38
|
-
end
|
39
|
-
|
40
|
-
def self.parse!(*input)
|
41
|
-
self.new(*input).parse!
|
42
|
-
end
|
43
|
-
def self.parse(*input)
|
44
|
-
self.new(*input).parse
|
45
|
-
end
|
46
|
-
|
47
|
-
def self.valid_matrix?(matrix)
|
48
|
-
PM.valid_matrix?(matrix)
|
49
|
-
end
|
50
|
-
|
51
|
-
# {A: 1, C: 2, G: 3, T: 4} --> [1,2,3,4]
|
52
|
-
# {A: [1,2], C: [3,4], G: [5,6], T: [7,8]} --> [[1,3,5,7],[2,4,6,8]] ( == [[1,2], [3,4], [5,6], [7,8]].transpose)
|
53
|
-
def self.array_from_acgt_hash(hsh)
|
54
|
-
hsh = normalize_hash_keys(hsh)
|
55
|
-
raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == [:A,:C,:G,:T]
|
56
|
-
result = [:A,:C,:G,:T].collect{|letter| hsh[letter] }
|
57
|
-
result.all?{|el| el.is_a?(Array)} ? result.transpose : result
|
58
|
-
end
|
59
|
-
|
60
|
-
# {a: 1, C: 2, 'g' => 3, 'T' => 4} --> {A: 1, C: 2, G: 3, T: 4}
|
61
|
-
def self.normalize_hash_keys(hsh)
|
62
|
-
hsh.collect_hash{|key,value| [key.to_s.upcase.to_sym, value] }
|
63
|
-
end
|
64
|
-
|
65
|
-
# [[1,2,3,4], [2,3,4,5]] --> [[1,2,3,4], [2,3,4,5]]
|
66
|
-
# [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}] --> [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}]
|
67
|
-
# {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} --> [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
|
68
|
-
def self.try_convert_to_array(input)
|
69
|
-
case input
|
70
|
-
when Array then input
|
71
|
-
when Hash then array_from_acgt_hash(input)
|
72
|
-
else raise TypeError, 'input of Bioinform::Parser::array_from_acgt_hash should be Array or Hash'
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def self.transform_input(input)
|
77
|
-
result = try_convert_to_array(input).map{|el| try_convert_to_array(el)}
|
78
|
-
need_tranpose?(result) ? result.transpose : result
|
79
|
-
end
|
80
|
-
|
81
|
-
# point whether matrix input positions(need not be transposed -- false) or letters(need -- true) as first index
|
82
|
-
# [[1,3,5,7], [2,4,6,8]] --> false
|
83
|
-
# [[1,2],[3,4],[5,6],[7,8]] --> true
|
84
|
-
def self.need_tranpose?(input)
|
85
|
-
(input.size == 4) && input.any?{|x| x.size != 4}
|
86
|
-
end
|
87
|
-
end
|
1
|
+
require 'ostruct'
|
2
|
+
require_relative '../support'
|
3
|
+
require_relative '../data_models/pm'
|
4
|
+
require_relative 'splittable_parser'
|
5
|
+
|
6
|
+
module Bioinform
|
7
|
+
class Error < StandardError; end
|
8
|
+
class ParsingError < Error; end
|
9
|
+
class InvalidMatrix < Error; end
|
10
|
+
|
11
|
+
class Parser
|
12
|
+
attr_reader :input
|
13
|
+
|
14
|
+
def initialize(*input)
|
15
|
+
if input.size == 1 # [ [1,2,3,4] ], [ [[1,2,3,4],[5,6,7,8]] ]
|
16
|
+
if input.first.is_a?(Array) && input.first.all?{|el| el.is_a? Numeric} # [ [1,2,3,4] ]
|
17
|
+
@input = input
|
18
|
+
else # [ [[1,2,3,4],[5,6,7,8]] ]
|
19
|
+
@input = input.first
|
20
|
+
end
|
21
|
+
else #[ [1,2,3,4], [5,6,7,8] ], [ ]
|
22
|
+
@input = input
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse!
|
27
|
+
matrix = self.class.transform_input(input)
|
28
|
+
raise InvalidMatrix unless self.class.valid_matrix?(matrix)
|
29
|
+
OpenStruct.new(matrix: matrix)
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse
|
33
|
+
parse! rescue nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.choose(input, data_model = PM)
|
37
|
+
data_model.choose_parser(input).new(input)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.parse!(*input)
|
41
|
+
self.new(*input).parse!
|
42
|
+
end
|
43
|
+
def self.parse(*input)
|
44
|
+
self.new(*input).parse
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.valid_matrix?(matrix)
|
48
|
+
PM.valid_matrix?(matrix)
|
49
|
+
end
|
50
|
+
|
51
|
+
# {A: 1, C: 2, G: 3, T: 4} --> [1,2,3,4]
|
52
|
+
# {A: [1,2], C: [3,4], G: [5,6], T: [7,8]} --> [[1,3,5,7],[2,4,6,8]] ( == [[1,2], [3,4], [5,6], [7,8]].transpose)
|
53
|
+
def self.array_from_acgt_hash(hsh)
|
54
|
+
hsh = normalize_hash_keys(hsh)
|
55
|
+
raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == [:A,:C,:G,:T]
|
56
|
+
result = [:A,:C,:G,:T].collect{|letter| hsh[letter] }
|
57
|
+
result.all?{|el| el.is_a?(Array)} ? result.transpose : result
|
58
|
+
end
|
59
|
+
|
60
|
+
# {a: 1, C: 2, 'g' => 3, 'T' => 4} --> {A: 1, C: 2, G: 3, T: 4}
|
61
|
+
def self.normalize_hash_keys(hsh)
|
62
|
+
hsh.collect_hash{|key,value| [key.to_s.upcase.to_sym, value] }
|
63
|
+
end
|
64
|
+
|
65
|
+
# [[1,2,3,4], [2,3,4,5]] --> [[1,2,3,4], [2,3,4,5]]
|
66
|
+
# [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}] --> [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}]
|
67
|
+
# {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} --> [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
|
68
|
+
def self.try_convert_to_array(input)
|
69
|
+
case input
|
70
|
+
when Array then input
|
71
|
+
when Hash then array_from_acgt_hash(input)
|
72
|
+
else raise TypeError, 'input of Bioinform::Parser::array_from_acgt_hash should be Array or Hash'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.transform_input(input)
|
77
|
+
result = try_convert_to_array(input).map{|el| try_convert_to_array(el)}
|
78
|
+
need_tranpose?(result) ? result.transpose : result
|
79
|
+
end
|
80
|
+
|
81
|
+
# point whether matrix input positions(need not be transposed -- false) or letters(need -- true) as first index
|
82
|
+
# [[1,3,5,7], [2,4,6,8]] --> false
|
83
|
+
# [[1,2],[3,4],[5,6],[7,8]] --> true
|
84
|
+
def self.need_tranpose?(input)
|
85
|
+
(input.size == 4) && input.any?{|x| x.size != 4}
|
86
|
+
end
|
87
|
+
end
|
88
88
|
end
|
@@ -1,57 +1,57 @@
|
|
1
|
-
module Bioinform
|
2
|
-
class Parser
|
3
|
-
module SingleMotifParser
|
4
|
-
def self.included(base)
|
5
|
-
base.class_eval { extend ClassMethods }
|
6
|
-
include Enumerable
|
7
|
-
alias_method :split, :to_a
|
8
|
-
end
|
9
|
-
module ClassMethods
|
10
|
-
def split_on_motifs(input, pm_klass = PM)
|
11
|
-
[ input.is_a?(pm_klass) ? self : pm_klass.new(input, self) ]
|
12
|
-
end
|
13
|
-
end
|
14
|
-
def each
|
15
|
-
if block_given?
|
16
|
-
yield self
|
17
|
-
else
|
18
|
-
self.to_enum(:each)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
include SingleMotifParser
|
23
|
-
|
24
|
-
module MultipleMotifsParser
|
25
|
-
def self.included(base)
|
26
|
-
base.class_eval { extend ClassMethods }
|
27
|
-
include Enumerable
|
28
|
-
alias_method :split, :to_a
|
29
|
-
end
|
30
|
-
module ClassMethods
|
31
|
-
def split_on_motifs(input, pm_klass = PM)
|
32
|
-
split(input).map{|el| el.is_a?(pm_klass) ? el : pm_klass.new(el)}
|
33
|
-
end
|
34
|
-
def split(input)
|
35
|
-
self.new(input).split
|
36
|
-
end
|
37
|
-
private :split
|
38
|
-
end
|
39
|
-
|
40
|
-
def scanner_reset
|
41
|
-
end
|
42
|
-
|
43
|
-
def each
|
44
|
-
if block_given?
|
45
|
-
scanner_reset
|
46
|
-
while result = parse
|
47
|
-
yield result
|
48
|
-
end
|
49
|
-
else
|
50
|
-
self.to_enum(:each)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
private :scanner_reset
|
55
|
-
end
|
56
|
-
end
|
1
|
+
module Bioinform
|
2
|
+
class Parser
|
3
|
+
module SingleMotifParser
|
4
|
+
def self.included(base)
|
5
|
+
base.class_eval { extend ClassMethods }
|
6
|
+
include Enumerable
|
7
|
+
alias_method :split, :to_a
|
8
|
+
end
|
9
|
+
module ClassMethods
|
10
|
+
def split_on_motifs(input, pm_klass = PM)
|
11
|
+
[ input.is_a?(pm_klass) ? self : pm_klass.new(input, self) ]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
def each
|
15
|
+
if block_given?
|
16
|
+
yield self
|
17
|
+
else
|
18
|
+
self.to_enum(:each)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
include SingleMotifParser
|
23
|
+
|
24
|
+
module MultipleMotifsParser
|
25
|
+
def self.included(base)
|
26
|
+
base.class_eval { extend ClassMethods }
|
27
|
+
include Enumerable
|
28
|
+
alias_method :split, :to_a
|
29
|
+
end
|
30
|
+
module ClassMethods
|
31
|
+
def split_on_motifs(input, pm_klass = PM)
|
32
|
+
split(input).map{|el| el.is_a?(pm_klass) ? el : pm_klass.new(el)}
|
33
|
+
end
|
34
|
+
def split(input)
|
35
|
+
self.new(input).split
|
36
|
+
end
|
37
|
+
private :split
|
38
|
+
end
|
39
|
+
|
40
|
+
def scanner_reset
|
41
|
+
end
|
42
|
+
|
43
|
+
def each
|
44
|
+
if block_given?
|
45
|
+
scanner_reset
|
46
|
+
while result = parse
|
47
|
+
yield result
|
48
|
+
end
|
49
|
+
else
|
50
|
+
self.to_enum(:each)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private :scanner_reset
|
55
|
+
end
|
56
|
+
end
|
57
57
|
end
|
@@ -1,35 +1,35 @@
|
|
1
|
-
require_relative '../support'
|
2
|
-
require_relative '../parsers/string_parser'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
class StringFantomParser < StringParser
|
6
|
-
def header_pat
|
7
|
-
/NA (?<name>[\w.+:-]+)\n[\w\d]+ A C G T.*\n/
|
8
|
-
end
|
9
|
-
|
10
|
-
def row_pat
|
11
|
-
/[\w\d]+ (?<row>(#{number_pat} )*#{number_pat})\n?/
|
12
|
-
end
|
13
|
-
|
14
|
-
def scan_splitter
|
15
|
-
scanner.scan(/(\/\/\n)+/)
|
16
|
-
end
|
17
|
-
|
18
|
-
def parse_matrix
|
19
|
-
matrix = []
|
20
|
-
while row_string = scan_row
|
21
|
-
matrix << split_row(row_string)[0,4]
|
22
|
-
end
|
23
|
-
matrix.transpose
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse!
|
27
|
-
scan_any_spaces
|
28
|
-
scan_splitter
|
29
|
-
name = parse_name
|
30
|
-
matrix = parse_matrix
|
31
|
-
Parser.parse!(matrix).tap{|result| result.name = name}
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
1
|
+
require_relative '../support'
|
2
|
+
require_relative '../parsers/string_parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
class StringFantomParser < StringParser
|
6
|
+
def header_pat
|
7
|
+
/NA (?<name>[\w.+:-]+)\n[\w\d]+ A C G T.*\n/
|
8
|
+
end
|
9
|
+
|
10
|
+
def row_pat
|
11
|
+
/[\w\d]+ (?<row>(#{number_pat} )*#{number_pat})\n?/
|
12
|
+
end
|
13
|
+
|
14
|
+
def scan_splitter
|
15
|
+
scanner.scan(/(\/\/\n)+/)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_matrix
|
19
|
+
matrix = []
|
20
|
+
while row_string = scan_row
|
21
|
+
matrix << split_row(row_string)[0,4]
|
22
|
+
end
|
23
|
+
matrix.transpose
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse!
|
27
|
+
scan_any_spaces
|
28
|
+
scan_splitter
|
29
|
+
name = parse_name
|
30
|
+
matrix = parse_matrix
|
31
|
+
Parser.parse!(matrix).tap{|result| result.name = name}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
35
|
end
|