bioinform 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. data/.gitignore +1 -0
  2. data/Gemfile +12 -0
  3. data/Guardfile +9 -0
  4. data/README.md +7 -1
  5. data/TODO.txt +8 -0
  6. data/bioinform.gemspec +7 -5
  7. data/lib/bioinform.rb +1 -0
  8. data/lib/bioinform/cli.rb +12 -3
  9. data/lib/bioinform/cli/convert_motif.rb +108 -0
  10. data/lib/bioinform/cli/merge_into_collection.rb +6 -2
  11. data/lib/bioinform/cli/pcm2pwm.rb +1 -1
  12. data/lib/bioinform/cli/split_motifs.rb +1 -1
  13. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +19 -0
  14. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +20 -0
  15. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +0 -0
  16. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +0 -0
  17. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  18. data/lib/bioinform/data_models/collection.rb +21 -35
  19. data/lib/bioinform/data_models/motif.rb +56 -0
  20. data/lib/bioinform/data_models/pcm.rb +4 -8
  21. data/lib/bioinform/data_models/pm.rb +19 -48
  22. data/lib/bioinform/data_models/pwm.rb +16 -0
  23. data/lib/bioinform/formatters.rb +2 -0
  24. data/lib/bioinform/formatters/raw_formatter.rb +41 -0
  25. data/lib/bioinform/formatters/transfac_formatter.rb +39 -0
  26. data/lib/bioinform/parsers.rb +2 -1
  27. data/lib/bioinform/parsers/jaspar_parser.rb +35 -0
  28. data/lib/bioinform/parsers/string_parser.rb +1 -1
  29. data/lib/bioinform/parsers/trivial_parser.rb +2 -1
  30. data/lib/bioinform/parsers/yaml_parser.rb +1 -1
  31. data/lib/bioinform/support.rb +2 -1
  32. data/lib/bioinform/support/parameters.rb +27 -18
  33. data/lib/bioinform/support/strip_doc.rb +9 -0
  34. data/lib/bioinform/version.rb +1 -1
  35. data/spec/cli/convert_motif_spec.rb +107 -0
  36. data/spec/cli/data/merge_into_collection/collection.yaml.result +186 -183
  37. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +186 -183
  38. data/spec/cli/data/split_motifs/collection.yaml +184 -193
  39. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +18 -0
  40. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -0
  41. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +50 -0
  42. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +5 -0
  43. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +4 -0
  44. data/spec/data_models/collection_spec.rb +36 -34
  45. data/spec/data_models/motif_spec.rb +224 -0
  46. data/spec/data_models/pcm_spec.rb +28 -17
  47. data/spec/data_models/pm_spec.rb +83 -121
  48. data/spec/data_models/pwm_spec.rb +38 -0
  49. data/spec/fabricators/collection_fabricator.rb +2 -2
  50. data/spec/fabricators/motif_fabricator.rb +33 -0
  51. data/spec/fabricators/motif_formats_fabricator.rb +125 -0
  52. data/spec/fabricators/pcm_fabricator.rb +25 -0
  53. data/spec/fabricators/pm_fabricator.rb +10 -1
  54. data/spec/fabricators/ppm_fabricator.rb +14 -0
  55. data/spec/fabricators/pwm_fabricator.rb +16 -0
  56. data/spec/parsers/trivial_parser_spec.rb +12 -12
  57. data/spec/parsers/yaml_parser_spec.rb +11 -11
  58. data/spec/spec_helper.rb +19 -49
  59. data/spec/spec_helper_source.rb +59 -0
  60. metadata +78 -7
@@ -0,0 +1,56 @@
1
+ require 'ostruct'
2
+ require 'active_support/core_ext/object/try'
3
+ require_relative '../support/parameters'
4
+ module Bioinform
5
+ class Motif
6
+ include Parameters
7
+ make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
8
+
9
+ # 0)Motif.new()
10
+ # 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
11
+ # 2)Motif.new(my_pcm)
12
+ # 3)Motif.new(pm: my_pcm, threshold: ...)
13
+ # 2) and 3) cases will automatically choose data model
14
+ #### What if pm already is a Motif
15
+ def initialize(parameters = {})
16
+ case parameters
17
+ when PM
18
+ pm = parameters
19
+ motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
20
+ self.original_data_model = motif_type
21
+ set_parameters(motif_type => pm)
22
+ when Hash
23
+ if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
24
+ pm = parameters.delete(:pm)
25
+ motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
26
+ self.original_data_model = motif_type
27
+ set_parameters(motif_type => pm)
28
+ end
29
+ set_parameters(parameters)
30
+ else
31
+ raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
32
+ end
33
+ end
34
+
35
+ def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
36
+ #def pcm; parameters.pcm; end
37
+ def pwm; parameters.pwm || pcm.try(:to_pwm); end
38
+ def ppm; parameters.ppm || pcm.try(:to_ppm); end
39
+ #def pcm=(pcm); parameters.pcm = pcm; end
40
+ #def pwm=(pwm); parameters.pwm = pwm; end
41
+ #def ppm=(ppm); parameters.ppm = ppm; end
42
+ def name; parameters.name || pm.name; end
43
+
44
+ def method_missing(meth, *args)
45
+ parameters.__send__(meth, *args)
46
+ end
47
+
48
+ def ==(other)
49
+ parameters == other.parameters
50
+ end
51
+
52
+ def to_s
53
+ parameters.to_s
54
+ end
55
+ end
56
+ end
@@ -1,5 +1,7 @@
1
1
  require_relative '../support'
2
2
  require_relative '../data_models'
3
+ require_relative '../conversion_algorithms/pcm2ppm_converter'
4
+ require_relative '../conversion_algorithms/pcm2pwm_converter'
3
5
 
4
6
  module Bioinform
5
7
  class PCM < PM
@@ -12,17 +14,11 @@ module Bioinform
12
14
  end
13
15
 
14
16
  def to_pwm(pseudocount = Math.log(count))
15
- mat = each_position.map do |pos|
16
- pos.each_index.map do |ind|
17
- Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
18
- end
19
- end
20
- PWM.new(matrix: mat, name: name, tags: tags, background: background)
17
+ ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
21
18
  end
22
19
 
23
20
  def to_ppm
24
- mat = each_position.map{|pos| pos.map{|el| el.to_f / count }}
25
- PPM.new(matrix: mat, name: name, tags: tags, background: background)
21
+ ConversionAlgorithms::PCM2PPMConverter.convert(self)
26
22
  end
27
23
  end
28
24
  end
@@ -1,6 +1,7 @@
1
1
  require 'ostruct'
2
2
  require_relative '../support'
3
3
  require_relative '../parsers'
4
+ require_relative '../formatters'
4
5
 
5
6
  module Bioinform
6
7
  IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
@@ -10,22 +11,22 @@ module Bioinform
10
11
  attr_accessor :matrix, :parameters
11
12
 
12
13
  include Parameters
13
- make_parameters :tags, :name, :background
14
+ make_parameters :name, :background # , :tags
14
15
 
15
- def mark(tag)
16
- tags << tag
17
- end
16
+ # def mark(tag)
17
+ # tags << tag
18
+ # end
18
19
 
19
- def tagged?(tag)
20
- tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
21
- end
20
+ # def tagged?(tag)
21
+ # tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
22
+ # end
22
23
 
23
24
  def self.choose_parser(input)
24
- [TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
25
+ [TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
25
26
  self.new(input, parser) rescue nil
26
27
  end
27
28
  end
28
-
29
+
29
30
  def self.split_on_motifs(input)
30
31
  parser = choose_parser(input)
31
32
  raise ParsingError, "No parser can parse given input" unless parser
@@ -39,7 +40,7 @@ module Bioinform
39
40
  result = parser.new(input).parse
40
41
  @matrix = result.matrix
41
42
  self.name = result.name
42
- self.tags = result.tags || []
43
+ # self.tags = result.tags || []
43
44
  self.background = result.background || [1, 1, 1, 1]
44
45
  raise 'matrix not valid' unless valid?
45
46
  end
@@ -77,21 +78,8 @@ module Bioinform
77
78
  end
78
79
  alias_method :size, :length
79
80
 
80
- def to_s(options = {})
81
- default_options = {with_name: true, letters_as_rows: false}
82
- options = default_options.merge(options)
83
- if options[:letters_as_rows]
84
- hsh = to_hash
85
- matrix_str = [:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
86
- else
87
- matrix_str = each_position.map{|pos| pos.join("\t")}.join("\n")
88
- end
89
-
90
- if options[:with_name] && name
91
- name + "\n" + matrix_str
92
- else
93
- matrix_str
94
- end
81
+ def to_s(options = {}, formatter = RawFormatter)
82
+ formatter.new(self, options).to_s
95
83
  end
96
84
 
97
85
  def pretty_string(options = {})
@@ -152,22 +140,6 @@ module Bioinform
152
140
  background.map{|element| element.to_f / sum}
153
141
  end
154
142
 
155
- def best_score
156
- @matrix.inject(0.0){|sum, col| sum + col.max}
157
- end
158
- def worst_score
159
- @matrix.inject(0.0){|sum, col| sum + col.min}
160
- end
161
-
162
- # best score of suffix s[i..l]
163
- def best_suffix(i)
164
- @matrix[i...length].map(&:max).inject(0.0, &:+)
165
- end
166
-
167
- def worst_suffix(i)
168
- @matrix[i...length].map(&:min).inject(0.0, &:+)
169
- end
170
-
171
143
  def reverse_complement
172
144
  dup.reverse_complement!
173
145
  end
@@ -184,15 +156,14 @@ module Bioinform
184
156
  deep_dup
185
157
  end
186
158
 
187
- def to_pcm
188
- PCM.new(matrix: matrix, name: name, tags: tags, background: background)
159
+ def as_pcm
160
+ PCM.new(get_parameters.merge(matrix: matrix))
189
161
  end
190
- def to_ppm
191
- PPM.new(matrix: matrix, name: name, tags: tags, background: background)
162
+ def as_ppm
163
+ PPM.new(get_parameters.merge(matrix: matrix))
192
164
  end
193
- def to_pwm
194
- PWM.new(matrix: matrix, name: name, tags: tags, background: background)
165
+ def as_pwm
166
+ PWM.new(get_parameters.merge(matrix: matrix))
195
167
  end
196
-
197
168
  end
198
169
  end
@@ -36,5 +36,21 @@ module Bioinform
36
36
  def to_pwm
37
37
  self
38
38
  end
39
+
40
+ def best_score
41
+ @matrix.inject(0.0){|sum, col| sum + col.max}
42
+ end
43
+ def worst_score
44
+ @matrix.inject(0.0){|sum, col| sum + col.min}
45
+ end
46
+
47
+ # best score of suffix s[i..l]
48
+ def best_suffix(i)
49
+ @matrix[i...length].map(&:max).inject(0.0, &:+)
50
+ end
51
+
52
+ def worst_suffix(i)
53
+ @matrix[i...length].map(&:min).inject(0.0, &:+)
54
+ end
39
55
  end
40
56
  end
@@ -0,0 +1,2 @@
1
+ require_relative 'formatters/raw_formatter'
2
+ require_relative 'formatters/transfac_formatter'
@@ -0,0 +1,41 @@
1
+ class RawFormatter
2
+ attr_accessor :motif, :options
3
+
4
+ def initialize(motif, options = {})
5
+ @motif = motif
6
+
7
+ default_options = {with_name: true, letters_as_rows: false}
8
+ @options = default_options.merge(options)
9
+ end
10
+
11
+ def name
12
+ motif.name
13
+ end
14
+
15
+ def header
16
+ if options[:with_name] && name
17
+ name + "\n"
18
+ else
19
+ ''
20
+ end
21
+ end
22
+
23
+ def matrix_string
24
+ if options[:letters_as_rows]
25
+ hsh = motif.to_hash
26
+ [:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
27
+ else
28
+ motif.each_position.map{|pos| pos.join("\t")}.join("\n")
29
+ end
30
+ end
31
+
32
+ def footer
33
+ # "\n"
34
+ ''
35
+ end
36
+
37
+
38
+ def to_s
39
+ header + matrix_string + footer
40
+ end
41
+ end
@@ -0,0 +1,39 @@
1
+ class TransfacFormatter
2
+ attr_accessor :motif, :options
3
+
4
+ def initialize(motif, options = {})
5
+ @motif = motif
6
+
7
+ default_options = {with_name: true, letters_as_rows: false}
8
+ @options = default_options.merge(options)
9
+ end
10
+
11
+ def name
12
+ motif.name
13
+ end
14
+
15
+ def header
16
+ if options[:with_name] && name
17
+ "ID #{name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
18
+ else
19
+ raise 'Transfac should have the name field'
20
+ end
21
+ end
22
+
23
+ def matrix_string
24
+ motif.each_position.map.with_index{|pos,ind|
25
+ line_number = ind.to_s
26
+ line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
27
+ line_number + ' ' + pos.join("\t")
28
+ }.join("\n")
29
+ end
30
+
31
+ def footer
32
+ #"XX\n//\n"
33
+ "\nXX\n//"
34
+ end
35
+
36
+ def to_s
37
+ header + matrix_string + footer
38
+ end
39
+ end
@@ -3,4 +3,5 @@ require_relative 'parsers/trivial_parser'
3
3
  require_relative 'parsers/yaml_parser'
4
4
  require_relative 'parsers/string_parser'
5
5
  require_relative 'parsers/string_fantom_parser'
6
- require_relative 'parsers/splittable_parser'
6
+ require_relative 'parsers/splittable_parser'
7
+ require_relative 'parsers/jaspar_parser'
@@ -0,0 +1,35 @@
1
+ require_relative '../support'
2
+ require_relative '../parsers/string_parser'
3
+
4
+ module Bioinform
5
+ class JasparParser < StringParser
6
+ def header_pat
7
+ /(?<name>)/
8
+ end
9
+
10
+ def row_pat
11
+ /[ACGT]\s*\[\s*(?<row>(#{number_pat}\s+)*#{number_pat})\s*\]\n?/
12
+ end
13
+
14
+ def scan_splitter
15
+ scanner.scan(/(\/\/\n)+/)
16
+ end
17
+
18
+ def parse_matrix
19
+ matrix = []
20
+ while row_string = scan_row
21
+ matrix << split_row(row_string)
22
+ end
23
+ matrix.transpose
24
+ end
25
+
26
+ def parse!
27
+ scan_any_spaces
28
+ scan_splitter
29
+ name = parse_name
30
+ matrix = parse_matrix
31
+ Parser.parse!(matrix).tap{|result| result.name = name}
32
+ end
33
+
34
+ end
35
+ end
@@ -8,7 +8,7 @@ module Bioinform
8
8
  attr_reader :scanner, :row_acgt_markers
9
9
 
10
10
  def initialize(input)
11
- raise ArgumentError unless input.is_a?(String)
11
+ raise ArgumentError, 'StringParser should be initialized with a String' unless input.is_a?(String)
12
12
  super
13
13
  @scanner = StringScanner.new(input.multiline_squish)
14
14
  end
@@ -15,6 +15,7 @@ module Bioinform
15
15
  def parse!
16
16
  case input
17
17
  when PM then input
18
+ when Motif then input.pm
18
19
  when OpenStruct then input
19
20
  when Hash then OpenStruct.new(input)
20
21
  end
@@ -27,7 +28,7 @@ module Bioinform
27
28
  @input = input
28
29
  end
29
30
  def parse!
30
- input.collection.shift.first
31
+ input.container.shift.pm
31
32
  end
32
33
  end
33
34
  end
@@ -27,7 +27,7 @@ module Bioinform
27
27
  @collection ||= YAML.load(input)
28
28
  end
29
29
  def parse!
30
- collection.collection.shift.first
30
+ collection.container.shift.pm
31
31
  rescue Psych::SyntaxError
32
32
  raise 'parsing error'
33
33
  end
@@ -14,4 +14,5 @@ require_relative 'support/array_zip'
14
14
  require_relative 'support/array_product'
15
15
 
16
16
  require_relative 'support/advanced_scan'
17
- require_relative 'support/parameters'
17
+ require_relative 'support/parameters'
18
+ require_relative 'support/strip_doc'
@@ -1,19 +1,28 @@
1
- require 'ostruct'
2
- module Parameters
3
- def self.included(base)
4
- base.extend(ClassMethods)
5
- end
6
- module ClassMethods
7
- def make_parameters(*params)
8
- params.each do |param|
9
- define_method(param){ parameters.send(param) }
10
- define_method("#{param}="){|new_value| parameters.send("#{param}=", new_value) }
11
- end
12
- end
13
- end
14
- def parameters; @parameters ||= OpenStruct.new; end
15
- def set_parameters(hsh)
16
- hsh.each{|k,v| send("#{k}=", v) }
17
- self
18
- end
1
+ require 'ostruct'
2
+ module Bioinform
3
+ module Parameters
4
+ def self.included(base)
5
+ base.extend(ClassMethods)
6
+ end
7
+ module ClassMethods
8
+ def make_parameters(*params)
9
+ params.each do |param|
10
+ define_method(param){ parameters.send(param) }
11
+ define_method("#{param}="){|new_value| parameters.send("#{param}=", new_value) }
12
+ end
13
+ end
14
+ end
15
+ def parameters; @parameters ||= OpenStruct.new; end
16
+ def set_parameters(hsh)
17
+ hsh.each{|k,v| send("#{k}=", v) }
18
+ self
19
+ end
20
+ # return hash of parameters
21
+ def get_parameters
22
+ @parameters.marshal_dump
23
+ end
24
+ def parameter_defined?(param_name)
25
+ get_parameters.has_key?(param_name)
26
+ end
27
+ end
19
28
  end