bioinform 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. data/.gitignore +1 -0
  2. data/Gemfile +12 -0
  3. data/Guardfile +9 -0
  4. data/README.md +7 -1
  5. data/TODO.txt +8 -0
  6. data/bioinform.gemspec +7 -5
  7. data/lib/bioinform.rb +1 -0
  8. data/lib/bioinform/cli.rb +12 -3
  9. data/lib/bioinform/cli/convert_motif.rb +108 -0
  10. data/lib/bioinform/cli/merge_into_collection.rb +6 -2
  11. data/lib/bioinform/cli/pcm2pwm.rb +1 -1
  12. data/lib/bioinform/cli/split_motifs.rb +1 -1
  13. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +19 -0
  14. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +20 -0
  15. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +0 -0
  16. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +0 -0
  17. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  18. data/lib/bioinform/data_models/collection.rb +21 -35
  19. data/lib/bioinform/data_models/motif.rb +56 -0
  20. data/lib/bioinform/data_models/pcm.rb +4 -8
  21. data/lib/bioinform/data_models/pm.rb +19 -48
  22. data/lib/bioinform/data_models/pwm.rb +16 -0
  23. data/lib/bioinform/formatters.rb +2 -0
  24. data/lib/bioinform/formatters/raw_formatter.rb +41 -0
  25. data/lib/bioinform/formatters/transfac_formatter.rb +39 -0
  26. data/lib/bioinform/parsers.rb +2 -1
  27. data/lib/bioinform/parsers/jaspar_parser.rb +35 -0
  28. data/lib/bioinform/parsers/string_parser.rb +1 -1
  29. data/lib/bioinform/parsers/trivial_parser.rb +2 -1
  30. data/lib/bioinform/parsers/yaml_parser.rb +1 -1
  31. data/lib/bioinform/support.rb +2 -1
  32. data/lib/bioinform/support/parameters.rb +27 -18
  33. data/lib/bioinform/support/strip_doc.rb +9 -0
  34. data/lib/bioinform/version.rb +1 -1
  35. data/spec/cli/convert_motif_spec.rb +107 -0
  36. data/spec/cli/data/merge_into_collection/collection.yaml.result +186 -183
  37. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +186 -183
  38. data/spec/cli/data/split_motifs/collection.yaml +184 -193
  39. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +18 -0
  40. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -0
  41. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +50 -0
  42. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +5 -0
  43. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +4 -0
  44. data/spec/data_models/collection_spec.rb +36 -34
  45. data/spec/data_models/motif_spec.rb +224 -0
  46. data/spec/data_models/pcm_spec.rb +28 -17
  47. data/spec/data_models/pm_spec.rb +83 -121
  48. data/spec/data_models/pwm_spec.rb +38 -0
  49. data/spec/fabricators/collection_fabricator.rb +2 -2
  50. data/spec/fabricators/motif_fabricator.rb +33 -0
  51. data/spec/fabricators/motif_formats_fabricator.rb +125 -0
  52. data/spec/fabricators/pcm_fabricator.rb +25 -0
  53. data/spec/fabricators/pm_fabricator.rb +10 -1
  54. data/spec/fabricators/ppm_fabricator.rb +14 -0
  55. data/spec/fabricators/pwm_fabricator.rb +16 -0
  56. data/spec/parsers/trivial_parser_spec.rb +12 -12
  57. data/spec/parsers/yaml_parser_spec.rb +11 -11
  58. data/spec/spec_helper.rb +19 -49
  59. data/spec/spec_helper_source.rb +59 -0
  60. metadata +78 -7
@@ -0,0 +1,56 @@
1
+ require 'ostruct'
2
+ require 'active_support/core_ext/object/try'
3
+ require_relative '../support/parameters'
4
+ module Bioinform
5
+ class Motif
6
+ include Parameters
7
+ make_parameters :pcm, :pwm, :ppm, :name, :original_data_model
8
+
9
+ # 0)Motif.new()
10
+ # 1)Motif.new(pcm: ..., pwm: ..., name: ...,threshold: ...)
11
+ # 2)Motif.new(my_pcm)
12
+ # 3)Motif.new(pm: my_pcm, threshold: ...)
13
+ # 2) and 3) cases will automatically choose data model
14
+ #### What if pm already is a Motif
15
+ def initialize(parameters = {})
16
+ case parameters
17
+ when PM
18
+ pm = parameters
19
+ motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
20
+ self.original_data_model = motif_type
21
+ set_parameters(motif_type => pm)
22
+ when Hash
23
+ if parameters.has_key?(:pm) && parameters[:pm].is_a?(PM)
24
+ pm = parameters.delete(:pm)
25
+ motif_type = pm.class.name.downcase.sub(/^.+::/,'').to_sym
26
+ self.original_data_model = motif_type
27
+ set_parameters(motif_type => pm)
28
+ end
29
+ set_parameters(parameters)
30
+ else
31
+ raise ArgumentError, "Motif::new doesn't accept argument #{parameters} of class #{parameters.class}"
32
+ end
33
+ end
34
+
35
+ def pm; ((original_data_model || :pm) == :pm) ? parameters.pm : send(original_data_model); end
36
+ #def pcm; parameters.pcm; end
37
+ def pwm; parameters.pwm || pcm.try(:to_pwm); end
38
+ def ppm; parameters.ppm || pcm.try(:to_ppm); end
39
+ #def pcm=(pcm); parameters.pcm = pcm; end
40
+ #def pwm=(pwm); parameters.pwm = pwm; end
41
+ #def ppm=(ppm); parameters.ppm = ppm; end
42
+ def name; parameters.name || pm.name; end
43
+
44
+ def method_missing(meth, *args)
45
+ parameters.__send__(meth, *args)
46
+ end
47
+
48
+ def ==(other)
49
+ parameters == other.parameters
50
+ end
51
+
52
+ def to_s
53
+ parameters.to_s
54
+ end
55
+ end
56
+ end
@@ -1,5 +1,7 @@
1
1
  require_relative '../support'
2
2
  require_relative '../data_models'
3
+ require_relative '../conversion_algorithms/pcm2ppm_converter'
4
+ require_relative '../conversion_algorithms/pcm2pwm_converter'
3
5
 
4
6
  module Bioinform
5
7
  class PCM < PM
@@ -12,17 +14,11 @@ module Bioinform
12
14
  end
13
15
 
14
16
  def to_pwm(pseudocount = Math.log(count))
15
- mat = each_position.map do |pos|
16
- pos.each_index.map do |ind|
17
- Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
18
- end
19
- end
20
- PWM.new(matrix: mat, name: name, tags: tags, background: background)
17
+ ConversionAlgorithms::PCM2PWMConverter.convert(self, pseudocount: pseudocount)
21
18
  end
22
19
 
23
20
  def to_ppm
24
- mat = each_position.map{|pos| pos.map{|el| el.to_f / count }}
25
- PPM.new(matrix: mat, name: name, tags: tags, background: background)
21
+ ConversionAlgorithms::PCM2PPMConverter.convert(self)
26
22
  end
27
23
  end
28
24
  end
@@ -1,6 +1,7 @@
1
1
  require 'ostruct'
2
2
  require_relative '../support'
3
3
  require_relative '../parsers'
4
+ require_relative '../formatters'
4
5
 
5
6
  module Bioinform
6
7
  IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
@@ -10,22 +11,22 @@ module Bioinform
10
11
  attr_accessor :matrix, :parameters
11
12
 
12
13
  include Parameters
13
- make_parameters :tags, :name, :background
14
+ make_parameters :name, :background # , :tags
14
15
 
15
- def mark(tag)
16
- tags << tag
17
- end
16
+ # def mark(tag)
17
+ # tags << tag
18
+ # end
18
19
 
19
- def tagged?(tag)
20
- tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
21
- end
20
+ # def tagged?(tag)
21
+ # tags.any?{|t| (t.eql? tag) || (t.respond_to?(:name) && t.name && (t.name == tag)) }
22
+ # end
22
23
 
23
24
  def self.choose_parser(input)
24
- [TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
25
+ [TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, JasparParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
25
26
  self.new(input, parser) rescue nil
26
27
  end
27
28
  end
28
-
29
+
29
30
  def self.split_on_motifs(input)
30
31
  parser = choose_parser(input)
31
32
  raise ParsingError, "No parser can parse given input" unless parser
@@ -39,7 +40,7 @@ module Bioinform
39
40
  result = parser.new(input).parse
40
41
  @matrix = result.matrix
41
42
  self.name = result.name
42
- self.tags = result.tags || []
43
+ # self.tags = result.tags || []
43
44
  self.background = result.background || [1, 1, 1, 1]
44
45
  raise 'matrix not valid' unless valid?
45
46
  end
@@ -77,21 +78,8 @@ module Bioinform
77
78
  end
78
79
  alias_method :size, :length
79
80
 
80
- def to_s(options = {})
81
- default_options = {with_name: true, letters_as_rows: false}
82
- options = default_options.merge(options)
83
- if options[:letters_as_rows]
84
- hsh = to_hash
85
- matrix_str = [:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
86
- else
87
- matrix_str = each_position.map{|pos| pos.join("\t")}.join("\n")
88
- end
89
-
90
- if options[:with_name] && name
91
- name + "\n" + matrix_str
92
- else
93
- matrix_str
94
- end
81
+ def to_s(options = {}, formatter = RawFormatter)
82
+ formatter.new(self, options).to_s
95
83
  end
96
84
 
97
85
  def pretty_string(options = {})
@@ -152,22 +140,6 @@ module Bioinform
152
140
  background.map{|element| element.to_f / sum}
153
141
  end
154
142
 
155
- def best_score
156
- @matrix.inject(0.0){|sum, col| sum + col.max}
157
- end
158
- def worst_score
159
- @matrix.inject(0.0){|sum, col| sum + col.min}
160
- end
161
-
162
- # best score of suffix s[i..l]
163
- def best_suffix(i)
164
- @matrix[i...length].map(&:max).inject(0.0, &:+)
165
- end
166
-
167
- def worst_suffix(i)
168
- @matrix[i...length].map(&:min).inject(0.0, &:+)
169
- end
170
-
171
143
  def reverse_complement
172
144
  dup.reverse_complement!
173
145
  end
@@ -184,15 +156,14 @@ module Bioinform
184
156
  deep_dup
185
157
  end
186
158
 
187
- def to_pcm
188
- PCM.new(matrix: matrix, name: name, tags: tags, background: background)
159
+ def as_pcm
160
+ PCM.new(get_parameters.merge(matrix: matrix))
189
161
  end
190
- def to_ppm
191
- PPM.new(matrix: matrix, name: name, tags: tags, background: background)
162
+ def as_ppm
163
+ PPM.new(get_parameters.merge(matrix: matrix))
192
164
  end
193
- def to_pwm
194
- PWM.new(matrix: matrix, name: name, tags: tags, background: background)
165
+ def as_pwm
166
+ PWM.new(get_parameters.merge(matrix: matrix))
195
167
  end
196
-
197
168
  end
198
169
  end
@@ -36,5 +36,21 @@ module Bioinform
36
36
  def to_pwm
37
37
  self
38
38
  end
39
+
40
+ def best_score
41
+ @matrix.inject(0.0){|sum, col| sum + col.max}
42
+ end
43
+ def worst_score
44
+ @matrix.inject(0.0){|sum, col| sum + col.min}
45
+ end
46
+
47
+ # best score of suffix s[i..l]
48
+ def best_suffix(i)
49
+ @matrix[i...length].map(&:max).inject(0.0, &:+)
50
+ end
51
+
52
+ def worst_suffix(i)
53
+ @matrix[i...length].map(&:min).inject(0.0, &:+)
54
+ end
39
55
  end
40
56
  end
@@ -0,0 +1,2 @@
1
+ require_relative 'formatters/raw_formatter'
2
+ require_relative 'formatters/transfac_formatter'
@@ -0,0 +1,41 @@
1
+ class RawFormatter
2
+ attr_accessor :motif, :options
3
+
4
+ def initialize(motif, options = {})
5
+ @motif = motif
6
+
7
+ default_options = {with_name: true, letters_as_rows: false}
8
+ @options = default_options.merge(options)
9
+ end
10
+
11
+ def name
12
+ motif.name
13
+ end
14
+
15
+ def header
16
+ if options[:with_name] && name
17
+ name + "\n"
18
+ else
19
+ ''
20
+ end
21
+ end
22
+
23
+ def matrix_string
24
+ if options[:letters_as_rows]
25
+ hsh = motif.to_hash
26
+ [:A,:C,:G,:T].collect{|letter| "#{letter}|" + hsh[letter].join("\t")}.join("\n")
27
+ else
28
+ motif.each_position.map{|pos| pos.join("\t")}.join("\n")
29
+ end
30
+ end
31
+
32
+ def footer
33
+ # "\n"
34
+ ''
35
+ end
36
+
37
+
38
+ def to_s
39
+ header + matrix_string + footer
40
+ end
41
+ end
@@ -0,0 +1,39 @@
1
+ class TransfacFormatter
2
+ attr_accessor :motif, :options
3
+
4
+ def initialize(motif, options = {})
5
+ @motif = motif
6
+
7
+ default_options = {with_name: true, letters_as_rows: false}
8
+ @options = default_options.merge(options)
9
+ end
10
+
11
+ def name
12
+ motif.name
13
+ end
14
+
15
+ def header
16
+ if options[:with_name] && name
17
+ "ID #{name}\nBF StubSpeciesName\nP0\tA\tC\tG\tT\n"
18
+ else
19
+ raise 'Transfac should have the name field'
20
+ end
21
+ end
22
+
23
+ def matrix_string
24
+ motif.each_position.map.with_index{|pos,ind|
25
+ line_number = ind.to_s
26
+ line_number = (line_number.size == 1) ? "0#{line_number}" : line_number
27
+ line_number + ' ' + pos.join("\t")
28
+ }.join("\n")
29
+ end
30
+
31
+ def footer
32
+ #"XX\n//\n"
33
+ "\nXX\n//"
34
+ end
35
+
36
+ def to_s
37
+ header + matrix_string + footer
38
+ end
39
+ end
@@ -3,4 +3,5 @@ require_relative 'parsers/trivial_parser'
3
3
  require_relative 'parsers/yaml_parser'
4
4
  require_relative 'parsers/string_parser'
5
5
  require_relative 'parsers/string_fantom_parser'
6
- require_relative 'parsers/splittable_parser'
6
+ require_relative 'parsers/splittable_parser'
7
+ require_relative 'parsers/jaspar_parser'
@@ -0,0 +1,35 @@
1
+ require_relative '../support'
2
+ require_relative '../parsers/string_parser'
3
+
4
+ module Bioinform
5
+ class JasparParser < StringParser
6
+ def header_pat
7
+ /(?<name>)/
8
+ end
9
+
10
+ def row_pat
11
+ /[ACGT]\s*\[\s*(?<row>(#{number_pat}\s+)*#{number_pat})\s*\]\n?/
12
+ end
13
+
14
+ def scan_splitter
15
+ scanner.scan(/(\/\/\n)+/)
16
+ end
17
+
18
+ def parse_matrix
19
+ matrix = []
20
+ while row_string = scan_row
21
+ matrix << split_row(row_string)
22
+ end
23
+ matrix.transpose
24
+ end
25
+
26
+ def parse!
27
+ scan_any_spaces
28
+ scan_splitter
29
+ name = parse_name
30
+ matrix = parse_matrix
31
+ Parser.parse!(matrix).tap{|result| result.name = name}
32
+ end
33
+
34
+ end
35
+ end
@@ -8,7 +8,7 @@ module Bioinform
8
8
  attr_reader :scanner, :row_acgt_markers
9
9
 
10
10
  def initialize(input)
11
- raise ArgumentError unless input.is_a?(String)
11
+ raise ArgumentError, 'StringParser should be initialized with a String' unless input.is_a?(String)
12
12
  super
13
13
  @scanner = StringScanner.new(input.multiline_squish)
14
14
  end
@@ -15,6 +15,7 @@ module Bioinform
15
15
  def parse!
16
16
  case input
17
17
  when PM then input
18
+ when Motif then input.pm
18
19
  when OpenStruct then input
19
20
  when Hash then OpenStruct.new(input)
20
21
  end
@@ -27,7 +28,7 @@ module Bioinform
27
28
  @input = input
28
29
  end
29
30
  def parse!
30
- input.collection.shift.first
31
+ input.container.shift.pm
31
32
  end
32
33
  end
33
34
  end
@@ -27,7 +27,7 @@ module Bioinform
27
27
  @collection ||= YAML.load(input)
28
28
  end
29
29
  def parse!
30
- collection.collection.shift.first
30
+ collection.container.shift.pm
31
31
  rescue Psych::SyntaxError
32
32
  raise 'parsing error'
33
33
  end
@@ -14,4 +14,5 @@ require_relative 'support/array_zip'
14
14
  require_relative 'support/array_product'
15
15
 
16
16
  require_relative 'support/advanced_scan'
17
- require_relative 'support/parameters'
17
+ require_relative 'support/parameters'
18
+ require_relative 'support/strip_doc'
@@ -1,19 +1,28 @@
1
- require 'ostruct'
2
- module Parameters
3
- def self.included(base)
4
- base.extend(ClassMethods)
5
- end
6
- module ClassMethods
7
- def make_parameters(*params)
8
- params.each do |param|
9
- define_method(param){ parameters.send(param) }
10
- define_method("#{param}="){|new_value| parameters.send("#{param}=", new_value) }
11
- end
12
- end
13
- end
14
- def parameters; @parameters ||= OpenStruct.new; end
15
- def set_parameters(hsh)
16
- hsh.each{|k,v| send("#{k}=", v) }
17
- self
18
- end
1
+ require 'ostruct'
2
+ module Bioinform
3
+ module Parameters
4
+ def self.included(base)
5
+ base.extend(ClassMethods)
6
+ end
7
+ module ClassMethods
8
+ def make_parameters(*params)
9
+ params.each do |param|
10
+ define_method(param){ parameters.send(param) }
11
+ define_method("#{param}="){|new_value| parameters.send("#{param}=", new_value) }
12
+ end
13
+ end
14
+ end
15
+ def parameters; @parameters ||= OpenStruct.new; end
16
+ def set_parameters(hsh)
17
+ hsh.each{|k,v| send("#{k}=", v) }
18
+ self
19
+ end
20
+ # return hash of parameters
21
+ def get_parameters
22
+ @parameters.marshal_dump
23
+ end
24
+ def parameter_defined?(param_name)
25
+ get_parameters.has_key?(param_name)
26
+ end
27
+ end
19
28
  end