bioinform 0.1.17 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/LICENSE +0 -1
  4. data/README.md +1 -1
  5. data/TODO.txt +23 -30
  6. data/bin/convert_motif +4 -0
  7. data/bin/pcm2pwm +1 -1
  8. data/bin/split_motifs +1 -1
  9. data/bioinform.gemspec +0 -2
  10. data/lib/bioinform.rb +54 -16
  11. data/lib/bioinform/alphabet.rb +85 -0
  12. data/lib/bioinform/background.rb +90 -0
  13. data/lib/bioinform/cli.rb +1 -2
  14. data/lib/bioinform/cli/convert_motif.rb +52 -17
  15. data/lib/bioinform/cli/pcm2pwm.rb +32 -26
  16. data/lib/bioinform/cli/split_motifs.rb +31 -30
  17. data/lib/bioinform/conversion_algorithms.rb +6 -0
  18. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
  19. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
  20. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
  21. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
  22. data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
  23. data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
  24. data/lib/bioinform/data_models.rb +1 -7
  25. data/lib/bioinform/data_models/named_model.rb +38 -0
  26. data/lib/bioinform/data_models/pcm.rb +18 -28
  27. data/lib/bioinform/data_models/pm.rb +73 -170
  28. data/lib/bioinform/data_models/ppm.rb +11 -24
  29. data/lib/bioinform/data_models/pwm.rb +30 -56
  30. data/lib/bioinform/errors.rb +17 -0
  31. data/lib/bioinform/formatters.rb +4 -2
  32. data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
  33. data/lib/bioinform/formatters/motif_formatter.rb +69 -0
  34. data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
  35. data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
  36. data/lib/bioinform/parsers.rb +1 -8
  37. data/lib/bioinform/parsers/matrix_parser.rb +44 -36
  38. data/lib/bioinform/parsers/motif_splitter.rb +45 -0
  39. data/lib/bioinform/support.rb +46 -14
  40. data/lib/bioinform/support/strip_doc.rb +1 -1
  41. data/lib/bioinform/version.rb +1 -1
  42. data/spec/alphabet_spec.rb +79 -0
  43. data/spec/background_spec.rb +57 -0
  44. data/spec/cli/cli_spec.rb +6 -6
  45. data/spec/cli/convert_motif_spec.rb +88 -88
  46. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
  47. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
  48. data/spec/cli/pcm2pwm_spec.rb +22 -23
  49. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
  50. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
  51. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
  52. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
  53. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  54. data/spec/cli/split_motifs_spec.rb +6 -21
  55. data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
  56. data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
  57. data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
  58. data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
  59. data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
  60. data/spec/data_models/named_model_spec.rb +41 -0
  61. data/spec/data_models/pcm_spec.rb +114 -45
  62. data/spec/data_models/pm_spec.rb +132 -333
  63. data/spec/data_models/ppm_spec.rb +47 -44
  64. data/spec/data_models/pwm_spec.rb +85 -77
  65. data/spec/fabricators/motif_formats_fabricator.rb +116 -116
  66. data/spec/formatters/consensus_formatter_spec.rb +26 -0
  67. data/spec/formatters/raw_formatter_spec.rb +169 -0
  68. data/spec/parsers/matrix_parser_spec.rb +216 -0
  69. data/spec/parsers/motif_splitter_spec.rb +87 -0
  70. data/spec/spec_helper.rb +2 -2
  71. data/spec/spec_helper_source.rb +25 -5
  72. data/spec/support_spec.rb +31 -0
  73. metadata +43 -124
  74. data/bin/merge_into_collection +0 -4
  75. data/lib/bioinform/cli/merge_into_collection.rb +0 -80
  76. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  77. data/lib/bioinform/data_models/collection.rb +0 -75
  78. data/lib/bioinform/data_models/motif.rb +0 -56
  79. data/lib/bioinform/formatters/raw_formatter.rb +0 -41
  80. data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
  81. data/lib/bioinform/parsers/parser.rb +0 -92
  82. data/lib/bioinform/parsers/splittable_parser.rb +0 -57
  83. data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
  84. data/lib/bioinform/parsers/string_parser.rb +0 -72
  85. data/lib/bioinform/parsers/trivial_parser.rb +0 -34
  86. data/lib/bioinform/parsers/yaml_parser.rb +0 -35
  87. data/lib/bioinform/support/advanced_scan.rb +0 -8
  88. data/lib/bioinform/support/array_product.rb +0 -6
  89. data/lib/bioinform/support/array_zip.rb +0 -6
  90. data/lib/bioinform/support/collect_hash.rb +0 -7
  91. data/lib/bioinform/support/deep_dup.rb +0 -5
  92. data/lib/bioinform/support/delete_many.rb +0 -14
  93. data/lib/bioinform/support/inverf.rb +0 -13
  94. data/lib/bioinform/support/multiline_squish.rb +0 -6
  95. data/lib/bioinform/support/parameters.rb +0 -28
  96. data/lib/bioinform/support/partial_sums.rb +0 -16
  97. data/lib/bioinform/support/same_by.rb +0 -12
  98. data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
  99. data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
  100. data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
  101. data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
  102. data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
  103. data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
  104. data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
  105. data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
  106. data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
  107. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
  108. data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
  109. data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
  110. data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
  111. data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
  112. data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
  113. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
  114. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
  115. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
  116. data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
  117. data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
  118. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
  119. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
  120. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
  121. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
  122. data/spec/cli/data/split_motifs/collection.yaml +0 -188
  123. data/spec/cli/merge_into_collection_spec.rb +0 -100
  124. data/spec/data_models/collection_spec.rb +0 -98
  125. data/spec/data_models/motif_spec.rb +0 -224
  126. data/spec/fabricators/collection_fabricator.rb +0 -8
  127. data/spec/fabricators/motif_fabricator.rb +0 -33
  128. data/spec/fabricators/pcm_fabricator.rb +0 -25
  129. data/spec/fabricators/pm_fabricator.rb +0 -52
  130. data/spec/fabricators/ppm_fabricator.rb +0 -14
  131. data/spec/fabricators/pwm_fabricator.rb +0 -16
  132. data/spec/parsers/parser_spec.rb +0 -152
  133. data/spec/parsers/string_fantom_parser_spec.rb +0 -70
  134. data/spec/parsers/string_parser_spec.rb +0 -77
  135. data/spec/parsers/trivial_parser_spec.rb +0 -64
  136. data/spec/parsers/yaml_parser_spec.rb +0 -50
  137. data/spec/support/advanced_scan_spec.rb +0 -32
  138. data/spec/support/array_product_spec.rb +0 -15
  139. data/spec/support/array_zip_spec.rb +0 -15
  140. data/spec/support/collect_hash_spec.rb +0 -15
  141. data/spec/support/delete_many_spec.rb +0 -44
  142. data/spec/support/inverf_spec.rb +0 -19
  143. data/spec/support/multiline_squish_spec.rb +0 -25
  144. data/spec/support/partial_sums_spec.rb +0 -30
  145. data/spec/support/same_by_spec.rb +0 -36
@@ -0,0 +1,17 @@
1
+ module Bioinform
2
+ class Error < ::StandardError
3
+ end
4
+
5
+ class ValidationError < Error
6
+ attr_reader :validation_errors
7
+
8
+ def initialize(msg, options = {})
9
+ super(msg)
10
+ @validation_errors = options.fetch(:validation_errors, [])
11
+ end
12
+
13
+ def to_s
14
+ "#{super} (#{@validation_errors.join('; ')})"
15
+ end
16
+ end
17
+ end
@@ -1,2 +1,4 @@
1
- require_relative 'formatters/raw_formatter'
2
- require_relative 'formatters/transfac_formatter'
1
+ require_relative 'formatters/motif_formatter'
2
+ require_relative 'formatters/pretty_matrix_formatter'
3
+ require_relative 'formatters/transfac_formatter'
4
+ require_relative 'formatters/consensus_formatter'
@@ -0,0 +1,35 @@
1
+ require_relative '../alphabet'
2
+
3
+ module Bioinform
4
+ class ConsensusFormatter
5
+
6
+ # ConsensusFormatter.new{|pos, el, nucleotide_index| el == pos.max }
7
+ def initialize(&block)
8
+ raise Error, 'block is necessary to create an instance of ConsensusFormatter' unless block_given?
9
+ @block = block
10
+ end
11
+
12
+ # Simplest consensus formatter which takes into account only maximal elements
13
+ def self.by_maximal_elements
14
+ self.new{|pos, el, nucleotide_index| el == pos.max }
15
+ end
16
+
17
+
18
+ def format_string(pm)
19
+ pm.each_position.map{|pos| iupac_letter_by_position(pos) }.join
20
+ end
21
+
22
+ def nucleotide_indices_by_position(pos)
23
+ pos.each_index.select{|nucleotide_index|
24
+ @block.call(pos, pos[nucleotide_index], nucleotide_index)
25
+ }
26
+ end
27
+
28
+ def iupac_letter_by_position(pos)
29
+ nucleotide_indices = nucleotide_indices_by_position(pos)
30
+ Bioinform::IUPAC::IUPACLettersByNucleotideIndices[nucleotide_indices]
31
+ end
32
+
33
+ private :nucleotide_indices_by_position, :iupac_letter_by_position
34
+ end
35
+ end
@@ -0,0 +1,69 @@
1
+ module Bioinform
2
+ class MotifFormatter
3
+ attr_reader :with_name, :nucleotides_in, :precision, :with_nucleotide_header, :with_position_header
4
+
5
+ def initialize(options = {})
6
+ @with_name = options.fetch(:with_name, :auto)
7
+ @nucleotides_in = options.fetch(:nucleotides_in, :columns).to_sym
8
+ @precision = options.fetch(:precision, false)
9
+ @with_nucleotide_header = options.fetch(:with_nucleotide_header, false)
10
+ @with_position_header = options.fetch(:with_position_header, false)
11
+ raise Error, "`with_name` can be either `true` or `false` or `:auto` but was `#{@with_name}`" unless [true, false, :auto].include?(@with_name)
12
+ raise Error, "`nucleotides_in` can be either `:rows` or `:columns` but was `#{@nucleotides_in}`" unless [:rows, :columns].include?(@nucleotides_in)
13
+ end
14
+
15
+ def format_name(motif)
16
+ case @with_name
17
+ when true
18
+ raise Error, "Motif doesn't respond to #name" unless motif.respond_to?(:name)
19
+ ">#{motif.name}\n"
20
+ when false
21
+ ""
22
+ when :auto
23
+ (motif.respond_to?(:name) && motif.name && !motif.name.strip.empty?) ? ">#{motif.name}\n" : ""
24
+ end
25
+ end
26
+
27
+ def element_rounded(el)
28
+ precision ? sprintf("%.#{precision}g", el) : el.to_s
29
+ end
30
+
31
+ def position_index_formatted(pos)
32
+ sprintf('%02d', pos)
33
+ end
34
+
35
+ private :element_rounded, :position_index_formatted
36
+
37
+ def format_matrix(motif)
38
+ result = ""
39
+ result << "\t" if with_nucleotide_header && with_position_header
40
+
41
+ case @nucleotides_in
42
+ when :columns
43
+ if with_nucleotide_header
44
+ result << motif.alphabet.each_letter.to_a.join("\t") << "\n"
45
+ end
46
+ motif.each_position.with_index do |pos, pos_index|
47
+ result << "\n" if pos_index != 0
48
+ result << "#{position_index_formatted(pos_index + 1)}\t" if with_position_header
49
+ result << pos.map{|el| element_rounded(el) }.join("\t")
50
+ end
51
+ when :rows
52
+ if with_position_header
53
+ result << (1..motif.length).map{|pos| position_index_formatted(pos) }.join("\t") << "\n"
54
+ end
55
+ motif.alphabet.each_letter.with_index do |letter, letter_index|
56
+ result << "\n" if letter_index != 0
57
+ result << "#{letter}\t" if with_nucleotide_header
58
+ result << motif.matrix.transpose[letter_index].map{|el| element_rounded(el) }.join("\t")
59
+ end
60
+ end
61
+ result
62
+ end
63
+
64
+ def format(motif)
65
+ format_name(motif) + format_matrix(motif)
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,36 @@
1
+ require_relative 'motif_formatter'
2
+
3
+ module Bioinform
4
+ class PrettyMatrixFormatter
5
+ attr_reader :with_name, :letters_as_rows
6
+
7
+ def initialize(options = {})
8
+ @with_name = options.fetch(:with_name, true)
9
+ @letters_as_rows = options.fetch(:letters_as_rows, false)
10
+ end
11
+
12
+ def header
13
+ %w{A C G T}.map{|el| el.rjust(4).ljust(7)}.join + "\n"
14
+ end
15
+
16
+ def optional_name(motif)
17
+ (@with_name && motif.name) ? (motif.name + "\n") : ''
18
+ end
19
+
20
+ def matrix_string(motif)
21
+ matrix_rows = motif.each_position.map do |position|
22
+ position.map{|el| el.round(3).to_s.rjust(6)}.join(' ')
23
+ end
24
+
25
+ matrix_str = matrix_rows.join("\n")
26
+ end
27
+
28
+ def format(motif)
29
+ raise Error, "PM doesn't respond to #name. Use formatter with option `with_name: false`" if @with_name && !motif.respond_to?(:name)
30
+ return MotifFormatter.new(with_name: @with_name, nucleotides_in: (@letters_as_rows ? :rows : :columns)).format(motif) if @letters_as_rows
31
+ optional_name(motif) + header + matrix_string(motif)
32
+ end
33
+
34
+ private :header, :optional_name, :matrix_string
35
+ end
36
+ end
@@ -1,39 +1,31 @@
1
- class TransfacFormatter
2
- attr_accessor :motif, :options
3
-
4
- def initialize(motif, options = {})
5
- @motif = motif
6
-
7
- default_options = {with_name: true, letters_as_rows: false}
8
- @options = default_options.merge(options)
9
- end
10
-
11
- def name
12
- motif.name
13
- end
14
-
15
- def header
16
- if options[:with_name] && name
17
- "ID #{name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
18
- else
19
- raise 'Transfac should have the name field'
1
+ module Bioinform
2
+ class TransfacFormatter
3
+ attr_accessor :with_name
4
+
5
+ def initialize(options = {})
6
+ @with_name = options.fetch(:with_name, true)
20
7
  end
8
+
9
+ def header(motif)
10
+ if @with_name && motif.name
11
+ "ID #{motif.name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
12
+ else
13
+ raise 'Transfac should have the name field'
14
+ end
15
+ end
16
+
17
+ def matrix_string(motif)
18
+ motif.each_position.map.with_index{|pos,ind|
19
+ line_number = ind.to_s
20
+ line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
21
+ line_number + ' ' + pos.join("\t")
22
+ }.join("\n")
23
+ end
24
+
25
+ def format(motif)
26
+ header(motif) + matrix_string(motif) + "\nXX\n//"
27
+ end
28
+
29
+ private :header, :matrix_string
21
30
  end
22
-
23
- def matrix_string
24
- motif.each_position.map.with_index{|pos,ind|
25
- line_number = ind.to_s
26
- line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
27
- line_number + ' ' + pos.join("\t")
28
- }.join("\n")
29
- end
30
-
31
- def footer
32
- #"XX\n//\n"
33
- "\nXX\n//"
34
- end
35
-
36
- def to_s
37
- header + matrix_string + footer
38
- end
39
- end
31
+ end
@@ -1,9 +1,2 @@
1
- require_relative 'parsers/parser'
2
- require_relative 'parsers/trivial_parser'
3
- require_relative 'parsers/yaml_parser'
4
- require_relative 'parsers/string_parser'
5
- require_relative 'parsers/string_fantom_parser'
6
- require_relative 'parsers/splittable_parser'
7
- require_relative 'parsers/jaspar_parser'
8
-
9
1
  require_relative 'parsers/matrix_parser'
2
+ require_relative 'parsers/motif_splitter'
@@ -1,32 +1,63 @@
1
- require 'ostruct'
1
+ require_relative '../errors'
2
2
 
3
- module Bioinform
3
+ module Bioinform
4
4
  class MatrixParser
5
+ # fix_nucleotides_number -- raises if matrix has not enough nucleotide columns
6
+ attr_reader :has_name, :name_pattern, :has_header_row, :has_header_column, :nucleotides_in, :fix_nucleotides_number
5
7
  def initialize(options = {})
6
- @has_name = options.fetch(:has_name, true)
7
- @name_pattern = options.fetch(:name_pattern, /^>?\s*(?<name>[^\t\r\n]+).*$/)
8
+ @has_name = options.fetch(:has_name, :auto)
9
+ @name_pattern = options.fetch(:name_pattern, /^>?\s*(?<name>[^-+\d.\t\r\n][^\t\r\n]*).*$/)
8
10
  @has_header_row = options.fetch(:has_header_row, false)
9
11
  @has_header_column = options.fetch(:has_header_column, false)
10
- @nucleotides_in = options.fetch(:nucleotides_in, :columns)
12
+ @nucleotides_in = options.fetch(:nucleotides_in, :auto)
13
+ @fix_nucleotides_number = options.fetch(:fix_nucleotides_number, 4)
11
14
 
12
- raise ':nucleotides_in option should be either :rows or :columns' unless [:rows, :columns].include?(@nucleotides_in)
15
+ raise Error, ':nucleotides_in option should be either :rows or :columns' unless [:rows, :columns, :auto].include?(@nucleotides_in)
13
16
  end
14
17
 
18
+ def need_transpose?(matrix)
19
+ (matrix.size == @fix_nucleotides_number) && (matrix.first.size != 4)
20
+ end
21
+ private :need_transpose?
22
+
15
23
  def parse!(input)
16
- lines = input.lines
17
- if @has_name
24
+ lines = input.strip.lines.to_a
25
+ if @has_name == :auto
26
+ match = lines.first.match(@name_pattern)
27
+ if match
28
+ lines.shift
29
+ name = match[:name]
30
+ end
31
+ elsif @has_name == false
32
+ name = nil
33
+ else
18
34
  match = lines.shift.match(@name_pattern)
19
- raise 'Name pattern doesn\'t match' unless match
35
+ raise Error, "Name pattern doesn't match" unless match
20
36
  name = match[:name]
21
37
  end
22
38
  lines.shift if @has_header_row
23
- matrix = lines.map(&:strip).reject(&:empty?).map{|line| line.split }
39
+ matrix = lines.map(&:rstrip).reject(&:empty?).map{|line| line.split }
24
40
  matrix = matrix.map{|row| row.drop(1) } if @has_header_column
25
41
  matrix = matrix.map{|row| row.map{|el| Float(el) } }
26
42
 
27
- matrix = matrix.transpose if @nucleotides_in == :rows
28
- # raise 'Matrix not valid' unless ! matrix.empty? && matrix.all?{|pos| pos.size == 4 }
29
- OpenStruct.new(matrix: matrix, name: name)
43
+ case @nucleotides_in
44
+ when :columns
45
+ matrix = matrix
46
+ when :rows
47
+ matrix = matrix.transpose
48
+ when :auto
49
+ if @fix_nucleotides_number && need_transpose?(matrix)
50
+ matrix = matrix.transpose
51
+ end
52
+ end
53
+
54
+ if @fix_nucleotides_number
55
+ raise Error, 'Not enough nucleotides in a matrix' unless matrix.all?{|pos| pos.size >= @fix_nucleotides_number}
56
+ matrix = matrix.map{|pos| pos.first(@fix_nucleotides_number) }
57
+ end
58
+ {matrix: matrix, name: name}
59
+ rescue => e
60
+ raise Error, e.message
30
61
  end
31
62
 
32
63
  def parse(input)
@@ -38,28 +69,5 @@ module Bioinform
38
69
  rescue
39
70
  false
40
71
  end
41
-
42
- class TemporaryWrapper
43
- attr_reader :input
44
- include Bioinform::Parser::ClassMethods
45
- include Bioinform::Parser::SingleMotifParser::ClassMethods
46
- def initialize(parser)
47
- @parser, input = parser, input
48
- end
49
- def parse
50
- @parser.parse(@input)
51
- end
52
- def parse!
53
- @parser.parse!(@input)
54
- end
55
- def new(input)
56
- @input = input
57
- self
58
- end
59
- end
60
-
61
- def wrapper
62
- TemporaryWrapper.new(self)
63
- end
64
72
  end
65
73
  end
@@ -0,0 +1,45 @@
1
+ module Bioinform
2
+ #
3
+ # MotifSpliiter is designed to split text into chunks with separate motifs.
4
+ # It enumerates input line by line.
5
+ # One can supply two options:
6
+ # * pattern for splitter `splitter_pattern`
7
+ # * `start_motif_pattern` which can determine start of motif but doesn't
8
+ # match within motif
9
+ # If specified pattern is nil, corresponding splitting is not applied.
10
+ # Paterns are applied by `#===` operator, thus both regexp or a Proc are
11
+ # valid options. Proc accepts a line and should return true if line is
12
+ # a splitter or is a motif start.
13
+ #
14
+ # Splitter method `#split` returns an array of strings. Each of returned
15
+ # strings represents a motif. Motifs exclude splitter but include motif
16
+ # start, thus one can divide input both by lines which will be dismissed
17
+ # and by lines which will be retained.
18
+ #
19
+ class MotifSplitter
20
+ attr_reader :start_motif_pattern, :spliiter
21
+
22
+ def initialize(options={})
23
+ @start_motif_pattern = options.fetch(:start_motif_pattern, /^\s*([^-+\s\d.]+|>.*)/)
24
+ @splitter_pattern = options.fetch(:splitter_pattern, /^\s*$/)
25
+ end
26
+
27
+ def parts_divided_by_splitter(input)
28
+ return input unless @splitter_pattern
29
+ input.each_line.chunk{|line| @splitter_pattern === line }.reject{|is_splitter, lines| is_splitter}.map{|is_splitter, lines| lines.join}
30
+ end
31
+
32
+ def parts_divided_by_motif_starts(input)
33
+ return input unless @start_motif_pattern
34
+ input.each_line.slice_before(@start_motif_pattern).map{|motif_lines| motif_lines.join }
35
+ end
36
+
37
+ private :parts_divided_by_splitter, :parts_divided_by_motif_starts
38
+
39
+ def split(input)
40
+ parts_divided_by_splitter(input).map{|chunk|
41
+ parts_divided_by_motif_starts(chunk)
42
+ }.flatten.map(&:strip).reject(&:empty?)
43
+ end
44
+ end
45
+ end
@@ -1,18 +1,50 @@
1
- require_relative 'support/third_part/active_support/core_ext/string/filters'
2
- require_relative 'support/third_part/active_support/core_ext/hash/indifferent_access'
1
+ require_relative 'support/strip_doc'
3
2
 
4
- require_relative 'support/collect_hash'
5
- require_relative 'support/delete_many'
6
- require_relative 'support/multiline_squish'
7
- require_relative 'support/same_by'
8
- require_relative 'support/inverf'
9
- require_relative 'support/deep_dup'
3
+ module Bioinform
4
+ module Support
5
+ # element_indices([:A,:C,:G,:T]) ==> {:A=>0, :C=>1, :G=>2, :T=>3}
6
+ def self.element_indices(arr)
7
+ arr.each_with_index.inject({}) {|hsh, (letter, index)| hsh.merge(letter => index) }
8
+ end
10
9
 
11
- require_relative 'support/partial_sums'
10
+ # hash_keys_permuted([0,1], :A) ==> {[0,1] => :A, [1,0] => :A}
11
+ def self.hash_keys_permuted(key, value)
12
+ key.permutation.inject({}){|hsh, perm| hsh.merge(perm => value) }
13
+ end
12
14
 
13
- require_relative 'support/array_zip'
14
- require_relative 'support/array_product'
15
+ # with_key_permutations({[0,1] => :A, [0,2] => :T}) ==> {[0,1] => :A, [1,0] => :A, [0,2] => :T, [2,0]=>:T}
16
+ def self.with_key_permutations(hash)
17
+ hash.inject({}) {|h, (indices, letter)| h.merge( hash_keys_permuted(indices, letter) ) }
18
+ end
15
19
 
16
- require_relative 'support/advanced_scan'
17
- require_relative 'support/parameters'
18
- require_relative 'support/strip_doc'
20
+
21
+ # various_key_cases({'a' => 2, 'C' => 3, :g =>5, :T => 8}) ==> {'a' => 2, 'A' => 2, 'c' => 3, 'C' => 3, :g =>5, :G => 5, :T => 8, :t=>8}
22
+ def self.various_key_cases(hash)
23
+ hash.inject({}){|h,(k,v)| h.merge(k.downcase => v, k.upcase => v) }
24
+ end
25
+
26
+ # various_key_types({'a' => 2, 'C' => 3, :g =>5, :T => 8}) ==> {'a' => 2, :a => 2, 'C' => 3, :C => 3, :g =>5, 'g' => 5, :T => 8, 'T'=>8}
27
+ def self.various_key_types(hash)
28
+ hash.inject({}){|h,(k,v)| h.merge(k.to_s => v, k.to_sym => v) }
29
+ end
30
+
31
+ def self.various_key_case_types(hash)
32
+ various_key_types(various_key_cases(hash))
33
+ end
34
+
35
+
36
+ # various_key_value_cases({:A => :T}) ==> {:A => :T, :a => :t}
37
+ def self.various_key_value_cases(hash)
38
+ hash.inject({}){|h,(k,v)| h.merge(k.upcase => v.upcase, k.downcase => v.downcase) }
39
+ end
40
+
41
+ # various_key_value_types({:A => :T}) ==> {:A => :T, 'A' => 'T'}
42
+ def self.various_key_value_types(hash)
43
+ hash.inject({}){|h,(k,v)| h.merge(k.to_s => v.to_s, k.to_sym => v.to_sym) }
44
+ end
45
+
46
+ def self.various_key_value_case_types(hash)
47
+ various_key_value_types(various_key_value_cases(hash))
48
+ end
49
+ end
50
+ end