bioinform 0.1.17 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
@@ -0,0 +1,17 @@
|
|
1
|
+
module Bioinform
|
2
|
+
class Error < ::StandardError
|
3
|
+
end
|
4
|
+
|
5
|
+
class ValidationError < Error
|
6
|
+
attr_reader :validation_errors
|
7
|
+
|
8
|
+
def initialize(msg, options = {})
|
9
|
+
super(msg)
|
10
|
+
@validation_errors = options.fetch(:validation_errors, [])
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
"#{super} (#{@validation_errors.join('; ')})"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/bioinform/formatters.rb
CHANGED
@@ -1,2 +1,4 @@
|
|
1
|
-
require_relative 'formatters/
|
2
|
-
require_relative 'formatters/
|
1
|
+
require_relative 'formatters/motif_formatter'
|
2
|
+
require_relative 'formatters/pretty_matrix_formatter'
|
3
|
+
require_relative 'formatters/transfac_formatter'
|
4
|
+
require_relative 'formatters/consensus_formatter'
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative '../alphabet'
|
2
|
+
|
3
|
+
module Bioinform
|
4
|
+
class ConsensusFormatter
|
5
|
+
|
6
|
+
# ConsensusFormatter.new{|pos, el, nucleotide_index| el == pos.max }
|
7
|
+
def initialize(&block)
|
8
|
+
raise Error, 'block is necessary to create an instance of ConsensusFormatter' unless block_given?
|
9
|
+
@block = block
|
10
|
+
end
|
11
|
+
|
12
|
+
# Simplest consensus formatter which takes into account only maximal elements
|
13
|
+
def self.by_maximal_elements
|
14
|
+
self.new{|pos, el, nucleotide_index| el == pos.max }
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def format_string(pm)
|
19
|
+
pm.each_position.map{|pos| iupac_letter_by_position(pos) }.join
|
20
|
+
end
|
21
|
+
|
22
|
+
def nucleotide_indices_by_position(pos)
|
23
|
+
pos.each_index.select{|nucleotide_index|
|
24
|
+
@block.call(pos, pos[nucleotide_index], nucleotide_index)
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def iupac_letter_by_position(pos)
|
29
|
+
nucleotide_indices = nucleotide_indices_by_position(pos)
|
30
|
+
Bioinform::IUPAC::IUPACLettersByNucleotideIndices[nucleotide_indices]
|
31
|
+
end
|
32
|
+
|
33
|
+
private :nucleotide_indices_by_position, :iupac_letter_by_position
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module Bioinform
|
2
|
+
class MotifFormatter
|
3
|
+
attr_reader :with_name, :nucleotides_in, :precision, :with_nucleotide_header, :with_position_header
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
@with_name = options.fetch(:with_name, :auto)
|
7
|
+
@nucleotides_in = options.fetch(:nucleotides_in, :columns).to_sym
|
8
|
+
@precision = options.fetch(:precision, false)
|
9
|
+
@with_nucleotide_header = options.fetch(:with_nucleotide_header, false)
|
10
|
+
@with_position_header = options.fetch(:with_position_header, false)
|
11
|
+
raise Error, "`with_name` can be either `true` or `false` or `:auto` but was `#{@with_name}`" unless [true, false, :auto].include?(@with_name)
|
12
|
+
raise Error, "`nucleotides_in` can be either `:rows` or `:columns` but was `#{@nucleotides_in}`" unless [:rows, :columns].include?(@nucleotides_in)
|
13
|
+
end
|
14
|
+
|
15
|
+
def format_name(motif)
|
16
|
+
case @with_name
|
17
|
+
when true
|
18
|
+
raise Error, "Motif doesn't respond to #name" unless motif.respond_to?(:name)
|
19
|
+
">#{motif.name}\n"
|
20
|
+
when false
|
21
|
+
""
|
22
|
+
when :auto
|
23
|
+
(motif.respond_to?(:name) && motif.name && !motif.name.strip.empty?) ? ">#{motif.name}\n" : ""
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def element_rounded(el)
|
28
|
+
precision ? sprintf("%.#{precision}g", el) : el.to_s
|
29
|
+
end
|
30
|
+
|
31
|
+
def position_index_formatted(pos)
|
32
|
+
sprintf('%02d', pos)
|
33
|
+
end
|
34
|
+
|
35
|
+
private :element_rounded, :position_index_formatted
|
36
|
+
|
37
|
+
def format_matrix(motif)
|
38
|
+
result = ""
|
39
|
+
result << "\t" if with_nucleotide_header && with_position_header
|
40
|
+
|
41
|
+
case @nucleotides_in
|
42
|
+
when :columns
|
43
|
+
if with_nucleotide_header
|
44
|
+
result << motif.alphabet.each_letter.to_a.join("\t") << "\n"
|
45
|
+
end
|
46
|
+
motif.each_position.with_index do |pos, pos_index|
|
47
|
+
result << "\n" if pos_index != 0
|
48
|
+
result << "#{position_index_formatted(pos_index + 1)}\t" if with_position_header
|
49
|
+
result << pos.map{|el| element_rounded(el) }.join("\t")
|
50
|
+
end
|
51
|
+
when :rows
|
52
|
+
if with_position_header
|
53
|
+
result << (1..motif.length).map{|pos| position_index_formatted(pos) }.join("\t") << "\n"
|
54
|
+
end
|
55
|
+
motif.alphabet.each_letter.with_index do |letter, letter_index|
|
56
|
+
result << "\n" if letter_index != 0
|
57
|
+
result << "#{letter}\t" if with_nucleotide_header
|
58
|
+
result << motif.matrix.transpose[letter_index].map{|el| element_rounded(el) }.join("\t")
|
59
|
+
end
|
60
|
+
end
|
61
|
+
result
|
62
|
+
end
|
63
|
+
|
64
|
+
def format(motif)
|
65
|
+
format_name(motif) + format_matrix(motif)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require_relative 'motif_formatter'
|
2
|
+
|
3
|
+
module Bioinform
|
4
|
+
class PrettyMatrixFormatter
|
5
|
+
attr_reader :with_name, :letters_as_rows
|
6
|
+
|
7
|
+
def initialize(options = {})
|
8
|
+
@with_name = options.fetch(:with_name, true)
|
9
|
+
@letters_as_rows = options.fetch(:letters_as_rows, false)
|
10
|
+
end
|
11
|
+
|
12
|
+
def header
|
13
|
+
%w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
|
14
|
+
end
|
15
|
+
|
16
|
+
def optional_name(motif)
|
17
|
+
(@with_name && motif.name) ? (motif.name + "\n") : ''
|
18
|
+
end
|
19
|
+
|
20
|
+
def matrix_string(motif)
|
21
|
+
matrix_rows = motif.each_position.map do |position|
|
22
|
+
position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
|
23
|
+
end
|
24
|
+
|
25
|
+
matrix_str = matrix_rows.join("\n")
|
26
|
+
end
|
27
|
+
|
28
|
+
def format(motif)
|
29
|
+
raise Error, "PM doesn't respond to #name. Use formatter with option `with_name: false`" if @with_name && !motif.respond_to?(:name)
|
30
|
+
return MotifFormatter.new(with_name: @with_name, nucleotides_in: (@letters_as_rows ? :rows : :columns)).format(motif) if @letters_as_rows
|
31
|
+
optional_name(motif) + header + matrix_string(motif)
|
32
|
+
end
|
33
|
+
|
34
|
+
private :header, :optional_name, :matrix_string
|
35
|
+
end
|
36
|
+
end
|
@@ -1,39 +1,31 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
default_options = {with_name: true, letters_as_rows: false}
|
8
|
-
@options = default_options.merge(options)
|
9
|
-
end
|
10
|
-
|
11
|
-
def name
|
12
|
-
motif.name
|
13
|
-
end
|
14
|
-
|
15
|
-
def header
|
16
|
-
if options[:with_name] && name
|
17
|
-
"ID #{name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
|
18
|
-
else
|
19
|
-
raise 'Transfac should have the name field'
|
1
|
+
module Bioinform
|
2
|
+
class TransfacFormatter
|
3
|
+
attr_accessor :with_name
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
@with_name = options.fetch(:with_name, true)
|
20
7
|
end
|
8
|
+
|
9
|
+
def header(motif)
|
10
|
+
if @with_name && motif.name
|
11
|
+
"ID #{motif.name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
|
12
|
+
else
|
13
|
+
raise 'Transfac should have the name field'
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def matrix_string(motif)
|
18
|
+
motif.each_position.map.with_index{|pos,ind|
|
19
|
+
line_number = ind.to_s
|
20
|
+
line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
|
21
|
+
line_number + ' ' + pos.join("\t")
|
22
|
+
}.join("\n")
|
23
|
+
end
|
24
|
+
|
25
|
+
def format(motif)
|
26
|
+
header(motif) + matrix_string(motif) + "\nXX\n//"
|
27
|
+
end
|
28
|
+
|
29
|
+
private :header, :matrix_string
|
21
30
|
end
|
22
|
-
|
23
|
-
def matrix_string
|
24
|
-
motif.each_position.map.with_index{|pos,ind|
|
25
|
-
line_number = ind.to_s
|
26
|
-
line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
|
27
|
-
line_number + ' ' + pos.join("\t")
|
28
|
-
}.join("\n")
|
29
|
-
end
|
30
|
-
|
31
|
-
def footer
|
32
|
-
#"XX\n//\n"
|
33
|
-
"\nXX\n//"
|
34
|
-
end
|
35
|
-
|
36
|
-
def to_s
|
37
|
-
header + matrix_string + footer
|
38
|
-
end
|
39
|
-
end
|
31
|
+
end
|
data/lib/bioinform/parsers.rb
CHANGED
@@ -1,9 +1,2 @@
|
|
1
|
-
require_relative 'parsers/parser'
|
2
|
-
require_relative 'parsers/trivial_parser'
|
3
|
-
require_relative 'parsers/yaml_parser'
|
4
|
-
require_relative 'parsers/string_parser'
|
5
|
-
require_relative 'parsers/string_fantom_parser'
|
6
|
-
require_relative 'parsers/splittable_parser'
|
7
|
-
require_relative 'parsers/jaspar_parser'
|
8
|
-
|
9
1
|
require_relative 'parsers/matrix_parser'
|
2
|
+
require_relative 'parsers/motif_splitter'
|
@@ -1,32 +1,63 @@
|
|
1
|
-
|
1
|
+
require_relative '../errors'
|
2
2
|
|
3
|
-
module Bioinform
|
3
|
+
module Bioinform
|
4
4
|
class MatrixParser
|
5
|
+
# fix_nucleotides_number -- raises if matrix has not enough nucleotide columns
|
6
|
+
attr_reader :has_name, :name_pattern, :has_header_row, :has_header_column, :nucleotides_in, :fix_nucleotides_number
|
5
7
|
def initialize(options = {})
|
6
|
-
@has_name = options.fetch(:has_name,
|
7
|
-
@name_pattern = options.fetch(:name_pattern, /^>?\s*(?<name>[^\t\r\n]
|
8
|
+
@has_name = options.fetch(:has_name, :auto)
|
9
|
+
@name_pattern = options.fetch(:name_pattern, /^>?\s*(?<name>[^-+\d.\t\r\n][^\t\r\n]*).*$/)
|
8
10
|
@has_header_row = options.fetch(:has_header_row, false)
|
9
11
|
@has_header_column = options.fetch(:has_header_column, false)
|
10
|
-
@nucleotides_in = options.fetch(:nucleotides_in, :
|
12
|
+
@nucleotides_in = options.fetch(:nucleotides_in, :auto)
|
13
|
+
@fix_nucleotides_number = options.fetch(:fix_nucleotides_number, 4)
|
11
14
|
|
12
|
-
raise ':nucleotides_in option should be either :rows or :columns' unless [:rows, :columns].include?(@nucleotides_in)
|
15
|
+
raise Error, ':nucleotides_in option should be either :rows or :columns' unless [:rows, :columns, :auto].include?(@nucleotides_in)
|
13
16
|
end
|
14
17
|
|
18
|
+
def need_transpose?(matrix)
|
19
|
+
(matrix.size == @fix_nucleotides_number) && (matrix.first.size != 4)
|
20
|
+
end
|
21
|
+
private :need_transpose?
|
22
|
+
|
15
23
|
def parse!(input)
|
16
|
-
lines = input.lines
|
17
|
-
if @has_name
|
24
|
+
lines = input.strip.lines.to_a
|
25
|
+
if @has_name == :auto
|
26
|
+
match = lines.first.match(@name_pattern)
|
27
|
+
if match
|
28
|
+
lines.shift
|
29
|
+
name = match[:name]
|
30
|
+
end
|
31
|
+
elsif @has_name == false
|
32
|
+
name = nil
|
33
|
+
else
|
18
34
|
match = lines.shift.match(@name_pattern)
|
19
|
-
raise
|
35
|
+
raise Error, "Name pattern doesn't match" unless match
|
20
36
|
name = match[:name]
|
21
37
|
end
|
22
38
|
lines.shift if @has_header_row
|
23
|
-
matrix = lines.map(&:
|
39
|
+
matrix = lines.map(&:rstrip).reject(&:empty?).map{|line| line.split }
|
24
40
|
matrix = matrix.map{|row| row.drop(1) } if @has_header_column
|
25
41
|
matrix = matrix.map{|row| row.map{|el| Float(el) } }
|
26
42
|
|
27
|
-
|
28
|
-
|
29
|
-
|
43
|
+
case @nucleotides_in
|
44
|
+
when :columns
|
45
|
+
matrix = matrix
|
46
|
+
when :rows
|
47
|
+
matrix = matrix.transpose
|
48
|
+
when :auto
|
49
|
+
if @fix_nucleotides_number && need_transpose?(matrix)
|
50
|
+
matrix = matrix.transpose
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
if @fix_nucleotides_number
|
55
|
+
raise Error, 'Not enough nucleotides in a matrix' unless matrix.all?{|pos| pos.size >= @fix_nucleotides_number}
|
56
|
+
matrix = matrix.map{|pos| pos.first(@fix_nucleotides_number) }
|
57
|
+
end
|
58
|
+
{matrix: matrix, name: name}
|
59
|
+
rescue => e
|
60
|
+
raise Error, e.message
|
30
61
|
end
|
31
62
|
|
32
63
|
def parse(input)
|
@@ -38,28 +69,5 @@ module Bioinform
|
|
38
69
|
rescue
|
39
70
|
false
|
40
71
|
end
|
41
|
-
|
42
|
-
class TemporaryWrapper
|
43
|
-
attr_reader :input
|
44
|
-
include Bioinform::Parser::ClassMethods
|
45
|
-
include Bioinform::Parser::SingleMotifParser::ClassMethods
|
46
|
-
def initialize(parser)
|
47
|
-
@parser, input = parser, input
|
48
|
-
end
|
49
|
-
def parse
|
50
|
-
@parser.parse(@input)
|
51
|
-
end
|
52
|
-
def parse!
|
53
|
-
@parser.parse!(@input)
|
54
|
-
end
|
55
|
-
def new(input)
|
56
|
-
@input = input
|
57
|
-
self
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
def wrapper
|
62
|
-
TemporaryWrapper.new(self)
|
63
|
-
end
|
64
72
|
end
|
65
73
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Bioinform
|
2
|
+
#
|
3
|
+
# MotifSpliiter is designed to split text into chunks with separate motifs.
|
4
|
+
# It enumerates input line by line.
|
5
|
+
# One can supply two options:
|
6
|
+
# * pattern for splitter `splitter_pattern`
|
7
|
+
# * `start_motif_pattern` which can determine start of motif but doesn't
|
8
|
+
# match within motif
|
9
|
+
# If specified pattern is nil, corresponding splitting is not applied.
|
10
|
+
# Paterns are applied by `#===` operator, thus both regexp or a Proc are
|
11
|
+
# valid options. Proc accepts a line and should return true if line is
|
12
|
+
# a splitter or is a motif start.
|
13
|
+
#
|
14
|
+
# Splitter method `#split` returns an array of strings. Each of returned
|
15
|
+
# strings represents a motif. Motifs exclude splitter but include motif
|
16
|
+
# start, thus one can divide input both by lines which will be dismissed
|
17
|
+
# and by lines which will be retained.
|
18
|
+
#
|
19
|
+
class MotifSplitter
|
20
|
+
attr_reader :start_motif_pattern, :spliiter
|
21
|
+
|
22
|
+
def initialize(options={})
|
23
|
+
@start_motif_pattern = options.fetch(:start_motif_pattern, /^\s*([^-+\s\d.]+|>.*)/)
|
24
|
+
@splitter_pattern = options.fetch(:splitter_pattern, /^\s*$/)
|
25
|
+
end
|
26
|
+
|
27
|
+
def parts_divided_by_splitter(input)
|
28
|
+
return input unless @splitter_pattern
|
29
|
+
input.each_line.chunk{|line| @splitter_pattern === line }.reject{|is_splitter, lines| is_splitter}.map{|is_splitter, lines| lines.join}
|
30
|
+
end
|
31
|
+
|
32
|
+
def parts_divided_by_motif_starts(input)
|
33
|
+
return input unless @start_motif_pattern
|
34
|
+
input.each_line.slice_before(@start_motif_pattern).map{|motif_lines| motif_lines.join }
|
35
|
+
end
|
36
|
+
|
37
|
+
private :parts_divided_by_splitter, :parts_divided_by_motif_starts
|
38
|
+
|
39
|
+
def split(input)
|
40
|
+
parts_divided_by_splitter(input).map{|chunk|
|
41
|
+
parts_divided_by_motif_starts(chunk)
|
42
|
+
}.flatten.map(&:strip).reject(&:empty?)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/bioinform/support.rb
CHANGED
@@ -1,18 +1,50 @@
|
|
1
|
-
require_relative 'support/
|
2
|
-
require_relative 'support/third_part/active_support/core_ext/hash/indifferent_access'
|
1
|
+
require_relative 'support/strip_doc'
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
3
|
+
module Bioinform
|
4
|
+
module Support
|
5
|
+
# element_indices([:A,:C,:G,:T]) ==> {:A=>0, :C=>1, :G=>2, :T=>3}
|
6
|
+
def self.element_indices(arr)
|
7
|
+
arr.each_with_index.inject({}) {|hsh, (letter, index)| hsh.merge(letter => index) }
|
8
|
+
end
|
10
9
|
|
11
|
-
|
10
|
+
# hash_keys_permuted([0,1], :A) ==> {[0,1] => :A, [1,0] => :A}
|
11
|
+
def self.hash_keys_permuted(key, value)
|
12
|
+
key.permutation.inject({}){|hsh, perm| hsh.merge(perm => value) }
|
13
|
+
end
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
+
# with_key_permutations({[0,1] => :A, [0,2] => :T}) ==> {[0,1] => :A, [1,0] => :A, [0,2] => :T, [2,0]=>:T}
|
16
|
+
def self.with_key_permutations(hash)
|
17
|
+
hash.inject({}) {|h, (indices, letter)| h.merge( hash_keys_permuted(indices, letter) ) }
|
18
|
+
end
|
15
19
|
|
16
|
-
|
17
|
-
|
18
|
-
|
20
|
+
|
21
|
+
# various_key_cases({'a' => 2, 'C' => 3, :g =>5, :T => 8}) ==> {'a' => 2, 'A' => 2, 'c' => 3, 'C' => 3, :g =>5, :G => 5, :T => 8, :t=>8}
|
22
|
+
def self.various_key_cases(hash)
|
23
|
+
hash.inject({}){|h,(k,v)| h.merge(k.downcase => v, k.upcase => v) }
|
24
|
+
end
|
25
|
+
|
26
|
+
# various_key_types({'a' => 2, 'C' => 3, :g =>5, :T => 8}) ==> {'a' => 2, :a => 2, 'C' => 3, :C => 3, :g =>5, 'g' => 5, :T => 8, 'T'=>8}
|
27
|
+
def self.various_key_types(hash)
|
28
|
+
hash.inject({}){|h,(k,v)| h.merge(k.to_s => v, k.to_sym => v) }
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.various_key_case_types(hash)
|
32
|
+
various_key_types(various_key_cases(hash))
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# various_key_value_cases({:A => :T}) ==> {:A => :T, :a => :t}
|
37
|
+
def self.various_key_value_cases(hash)
|
38
|
+
hash.inject({}){|h,(k,v)| h.merge(k.upcase => v.upcase, k.downcase => v.downcase) }
|
39
|
+
end
|
40
|
+
|
41
|
+
# various_key_value_types({:A => :T}) ==> {:A => :T, 'A' => 'T'}
|
42
|
+
def self.various_key_value_types(hash)
|
43
|
+
hash.inject({}){|h,(k,v)| h.merge(k.to_s => v.to_s, k.to_sym => v.to_sym) }
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.various_key_value_case_types(hash)
|
47
|
+
various_key_value_types(various_key_value_cases(hash))
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|