bioinform 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/TODO.txt +7 -2
  2. data/bin/merge_into_collection +4 -0
  3. data/bin/pcm2pwm +1 -1
  4. data/bin/split_motifs +1 -1
  5. data/bioinform.gemspec +2 -0
  6. data/lib/bioinform/cli/merge_into_collection.rb +76 -0
  7. data/lib/bioinform/cli/pcm2pwm.rb +20 -20
  8. data/lib/bioinform/cli/split_motifs.rb +21 -20
  9. data/lib/bioinform/cli.rb +16 -2
  10. data/lib/bioinform/data_models/collection.rb +13 -10
  11. data/lib/bioinform/data_models/pcm.rb +2 -2
  12. data/lib/bioinform/data_models/pm.rb +24 -37
  13. data/lib/bioinform/data_models/ppm.rb +2 -2
  14. data/lib/bioinform/data_models/pwm.rb +2 -2
  15. data/lib/bioinform/data_models.rb +8 -8
  16. data/lib/bioinform/parsers/parser.rb +10 -5
  17. data/lib/bioinform/parsers/splittable_parser.rb +57 -0
  18. data/lib/bioinform/parsers/string_fantom_parser.rb +3 -3
  19. data/lib/bioinform/parsers/string_parser.rb +5 -24
  20. data/lib/bioinform/parsers/trivial_parser.rb +19 -3
  21. data/lib/bioinform/parsers/yaml_parser.rb +35 -0
  22. data/lib/bioinform/parsers.rb +6 -4
  23. data/lib/bioinform/support/parameters.rb +19 -0
  24. data/lib/bioinform/support/partial_sums.rb +1 -1
  25. data/lib/bioinform/support.rb +11 -10
  26. data/lib/bioinform/version.rb +1 -1
  27. data/lib/bioinform.rb +5 -5
  28. data/spec/cli/cli_spec.rb +8 -7
  29. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -0
  30. data/spec/cli/data/{KLF4_f2.pwm.result → merge_into_collection/KLF4_f2.pwm} +0 -0
  31. data/spec/cli/data/{SP1_f1.pwm.result → merge_into_collection/SP1_f1.pwm} +0 -0
  32. data/spec/cli/data/merge_into_collection/collection.txt.result +40 -0
  33. data/spec/cli/data/merge_into_collection/collection.yaml.result +185 -0
  34. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +185 -0
  35. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -0
  36. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -0
  37. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -0
  38. data/spec/cli/data/{KLF4 f2 spaced name.pcm → pcm2pwm/KLF4 f2 spaced name.pcm} +0 -0
  39. data/spec/cli/data/{KLF4_f2.pcm → pcm2pwm/KLF4_f2.pcm} +0 -0
  40. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -0
  41. data/spec/cli/data/{SP1_f1.pcm → pcm2pwm/SP1_f1.pcm} +0 -0
  42. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -0
  43. data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -0
  44. data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -0
  45. data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -0
  46. data/spec/cli/data/split_motifs/collection.yaml +197 -0
  47. data/spec/cli/data/split_motifs/plain_collection.txt +38 -0
  48. data/spec/cli/merge_into_collection_spec.rb +100 -0
  49. data/spec/cli/pcm2pwm_spec.rb +3 -3
  50. data/spec/cli/split_motifs_spec.rb +74 -3
  51. data/spec/data_models/collection_spec.rb +2 -2
  52. data/spec/data_models/pcm_spec.rb +2 -2
  53. data/spec/data_models/pm_spec.rb +10 -27
  54. data/spec/data_models/ppm_spec.rb +2 -2
  55. data/spec/data_models/pwm_spec.rb +3 -3
  56. data/spec/fabricators/collection_fabricator.rb +8 -0
  57. data/spec/fabricators/pm_fabricator.rb +43 -0
  58. data/spec/parsers/parser_spec.rb +29 -37
  59. data/spec/parsers/string_fantom_parser_spec.rb +38 -35
  60. data/spec/parsers/string_parser_spec.rb +33 -66
  61. data/spec/parsers/trivial_parser_spec.rb +48 -6
  62. data/spec/parsers/yaml_parser_spec.rb +50 -0
  63. data/spec/spec_helper.rb +2 -6
  64. data/spec/support/advanced_scan_spec.rb +2 -2
  65. data/spec/support/array_product_spec.rb +2 -2
  66. data/spec/support/array_zip_spec.rb +2 -2
  67. data/spec/support/collect_hash_spec.rb +2 -2
  68. data/spec/support/delete_many_spec.rb +2 -2
  69. data/spec/support/inverf_spec.rb +2 -2
  70. data/spec/support/multiline_squish_spec.rb +2 -2
  71. data/spec/support/partial_sums_spec.rb +2 -2
  72. data/spec/support/same_by_spec.rb +2 -2
  73. metadata +86 -12
data/TODO.txt CHANGED
@@ -1,11 +1,14 @@
1
1
  ToDo:
2
2
  how to make PM#equal? and PM#hash so that using PMs in Sets wouldn't destroy comparability of Sets and two sets with the same PMs(but different objects) would be equal. (also using pm as a hash-key)
3
-
3
+ Make specs and fix code in such a way that Parser.split_on_motifs and so on returned consistent result. E.g. Parser.parse! raised an error on multiple times invocation
4
+
5
+ refactor CLI::SplitMotifs in place where it splits collection file and choose real data models or makes PM
6
+
4
7
  Make parser exception print out text where parsing was broken (processing line +- 2 nearest lines and command and line numbers)
5
8
  Prevent parser going into infinity loop
6
9
 
7
10
  Create CLI-apps:
8
- -- to merge many files(or whole folder) to a Collection
11
+ -- to merge many files(or whole folder) to a Collection (in a way that makes able to give collection a name)
9
12
 
10
13
  Make Parsers to be switcheable in runtime so that one could parse string composed of two motifs in different formats.
11
14
 
@@ -18,6 +21,8 @@ Decide:
18
21
  -- should background be in PM by default?
19
22
  -- refactor PM.new #== and so on to make possible consistently introduce or remove a variable at a single line
20
23
  -- Make PCM#valid? and PPM#valid? more specific. This shouldn't destroy functionality to load arbitrary data as matrix, but only in force mode (I don't understand yet where should it be: in a constructor or where? And which validation-"severity" levels should be? Strong validation - size-only-validation - size-and-type-validation - no validation ??? or may be options: valid_strictness: 'strict', 'usual', 'strict_with_name' ??? It should be considered)
24
+ -- PM#to_pcm and friends have unintuitive behavior. E.g. pm.to_pcm.to_pwm != pm.to_pwm First is matrix treated as pcm and then converted, while second is matrix treated as pwm from start
25
+ -- Should parser be reloadable or not? May be delete #reset_scanner?
21
26
 
22
27
  Specs
23
28
  -- PWM#probabilities, #score_variance, #gauss_estimation
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/bioinform/cli/merge_into_collection'
4
+ Bioinform::CLI::MergeIntoCollection.main(ARGV)
data/bin/pcm2pwm CHANGED
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'bioinform/cli/pcm2pwm'
3
+ require_relative '../lib/bioinform/cli/pcm2pwm'
4
4
  Bioinform::CLI::PCM2PWM.main(ARGV)
data/bin/split_motifs CHANGED
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'bioinform/cli/split_motifs'
3
+ require_relative '../lib/bioinform/cli/split_motifs'
4
4
  Bioinform::CLI::SplitMotifs.main(ARGV)
data/bioinform.gemspec CHANGED
@@ -16,6 +16,8 @@ Gem::Specification.new do |gem|
16
16
  gem.version = Bioinform::VERSION
17
17
 
18
18
  gem.add_dependency('activesupport', '>= 3.0.0')
19
+ gem.add_dependency('docopt', '>= 0.5.0')
19
20
 
20
21
  gem.add_development_dependency "rspec", ">= 2.0"
22
+ gem.add_development_dependency "fabrication", ">= 2.2.3"
21
23
  end
@@ -0,0 +1,76 @@
1
+ require_relative '../../bioinform'
2
+ require 'docopt'
3
+ require 'shellwords'
4
+ require 'yaml'
5
+
6
+ module Bioinform
7
+ module CLI
8
+ module MergeIntoCollection
9
+ extend Bioinform::CLI::Helpers
10
+ def self.main(argv)
11
+ doc = <<-DOCOPT
12
+ Tool for merging multiple motifs into a single collection file.
13
+ It takes motif files or (entire collections) and creates a collection consisting of them all. By default motifs are treated simply as matrices(PM), but you can (possibly should) specify data model. Output file by default are in YAML-format but it's possible to create plain text file. YAML collections are useful if you want to provide additional information for motifs in collection with another tool, plain text is more readable by humans.
14
+
15
+ Usage:
16
+ #{__FILE__} [options] [<pm-files>...]
17
+
18
+ Options:
19
+ -h --help Show this screen.
20
+ -n --name NAME Specify name for a collection. Default filename is based on this parameter
21
+ -o --output-file FILE Output file for resulting collection
22
+ -m --data-model MODEL Data model: PM, PCM, PPM or PWM [default: PM]
23
+ -p --plain-text Output collection of motifs in plain text (motifs separated with newlines, no additional information included).
24
+ DOCOPT
25
+
26
+ doc.gsub!(/^#{doc[/\A +/]}/,'')
27
+ options = Docopt::docopt(doc, argv: argv)
28
+
29
+ plain_text = options['--plain-text']
30
+ name = options['--name']
31
+ if options['--plain-text']
32
+ output_file = options['--output-file'] || set_extension(name || 'collection', 'txt')
33
+ else
34
+ output_file = options['--output-file'] || set_extension(name || 'collection', 'yaml')
35
+ end
36
+ data_model = Bioinform.const_get(options['--data-model'].upcase)
37
+
38
+ if options['<pm-files>'].empty?
39
+ filelist = $stdin.read.shellsplit
40
+ else
41
+ filelist = options['<pm-files>']
42
+ end
43
+
44
+ filelist = filelist.map do |data_source|
45
+ if File.directory? data_source
46
+ Dir.glob(File.join(data_source, '*'))
47
+ elsif File.file? data_source
48
+ data_source
49
+ else
50
+ raise "File or directory #{data_source} can't be found"
51
+ end
52
+ end.flatten
53
+
54
+ collection = Collection.new
55
+ collection.name = name if name
56
+
57
+ filelist.each do |filename|
58
+ data_model.split_on_motifs(File.read(filename)).each do |pm|
59
+ pm.name ||= File.basename(filename, File.extname(filename))
60
+ collection << pm
61
+ end
62
+ end
63
+
64
+ if plain_text
65
+ File.open(output_file, 'w'){|f| f.puts(collection.to_s(false)) }
66
+ else
67
+ File.open(output_file, 'w'){|f| YAML.dump(collection, f) }
68
+ end
69
+
70
+ rescue Docopt::Exit => e
71
+ puts e.message
72
+ end
73
+
74
+ end
75
+ end
76
+ end
@@ -1,39 +1,39 @@
1
- require 'bioinform'
1
+ require_relative '../../bioinform'
2
2
  require 'docopt'
3
3
  require 'shellwords'
4
4
 
5
5
  module Bioinform
6
- module CLI
6
+ module CLI
7
7
  module PCM2PWM
8
+ extend Bioinform::CLI::Helpers
8
9
  def self.main(argv)
9
10
  doc = <<-DOCOPT
10
- PCM to PWM converter.
11
- It transforms files with PCMs into files with PWMs. Folder for resulting files to save files can be specified. Resulting PWM files have the same name as original file but have another extension (.pwm by default).
12
- When filelist is empty, it's obtained from STDIN. One can use it: `ls -b pcm_folder/*.pcm | pcm2pwm` (ls -b option escape spaces in filenames)
11
+ PCM to PWM converter.
12
+ It transforms files with PCMs into files with PWMs. Folder for resulting files to save files can be specified. Resulting PWM files have the same name as original file but have another extension (.pwm by default).
13
+ When filelist is empty, it's obtained from STDIN. One can use it: `ls -b pcm_folder/*.pcm | pcm2pwm` (ls -b option escape spaces in filenames)
13
14
 
14
- Usage:
15
- #{__FILE__} [options] [<pcm-files>...]
15
+ Usage:
16
+ #{__FILE__} [options] [<pcm-files>...]
16
17
 
17
- Options:
18
- -h --help Show this screen.
19
- -e --extension EXT Extension of output files [default: pwm]
20
- -f --folder FOLDER Where to save output files [default: .]
18
+ Options:
19
+ -h --help Show this screen.
20
+ -e --extension EXT Extension of output files [default: pwm]
21
+ -f --folder FOLDER Where to save output files [default: .]
21
22
  DOCOPT
22
23
 
24
+ doc.gsub!(/^#{doc[/\A +/]}/,'')
23
25
  options = Docopt::docopt(doc, argv: argv)
24
26
 
25
- if options['<pcm-files>'].empty?
26
- filelist = $stdin.read.shellsplit
27
- else
28
- filelist = options['<pcm-files>']
29
- end
30
-
27
+ pcm_files = options['<pcm-files>']
31
28
  folder = options['--folder']
29
+ extension = options['--extension']
30
+
32
31
  Dir.mkdir(folder) unless Dir.exist?(folder)
32
+ filelist = (pcm_files.empty?) ? $stdin.read.shellsplit : pcm_files
33
33
 
34
- filelist.each do |pcm_filename|
35
- pwm = Bioinform::PCM.new( File.read(pcm_filename) ).to_pwm
36
- File.open(Bioinform::CLI.output_filename(pcm_filename, options['--extension'], folder), 'w') do |f|
34
+ filelist.each do |filename|
35
+ pwm = Bioinform::PCM.new( File.read(filename) ).to_pwm
36
+ File.open(change_folder_and_extension(filename, extension, folder), 'w') do |f|
37
37
  f.puts pwm
38
38
  end
39
39
  end
@@ -1,40 +1,41 @@
1
- require 'bioinform'
1
+ require_relative '../../bioinform'
2
2
  require 'docopt'
3
3
 
4
4
  module Bioinform
5
5
  module CLI
6
6
  module SplitMotifs
7
-
7
+ extend Bioinform::CLI::Helpers
8
8
  def self.main(argv)
9
9
  doc = <<-DOCOPT
10
- Motif splitter.
11
- It get a file with a set of motifs and splits it into motifs according to their names.
10
+ Motif splitter.
11
+ It get a file with a set of motifs and splits it into motifs according to their names.
12
12
 
13
- Usage:
14
- #{__FILE__} [options] <collection-file>
13
+ Usage:
14
+ #{__FILE__} [options] <collection-file>
15
15
 
16
- Options:
17
- -h --help Show this screen.
18
- -m --data-model MODEL Data model: PM, PCM, PPM or PWM [default: PM]
19
- -e --extension EXT Extension of output files (by default it's based on data model)
20
- -f --folder FOLDER Where to save output files [default: .]
16
+ Options:
17
+ -h --help Show this screen.
18
+ -e --extension EXT Extension of output files
19
+ -f --folder FOLDER Where to save output files [default: .]
21
20
  DOCOPT
22
21
 
22
+ doc.gsub!(/^#{doc[/\A +/]}/,'')
23
23
  options = Docopt::docopt(doc, argv: argv)
24
24
 
25
25
  folder = options['--folder']
26
- Dir.mkdir(folder) unless Dir.exist?(folder)
27
-
28
- data_model = Bioinform.const_get(options['--data-model'].upcase)
29
- extension = options['--extension'] || options['--data-model'].downcase
30
-
26
+ extension = options['--extension']
31
27
  collection_filename = options['<collection-file>']
28
+
29
+ Dir.mkdir(folder) unless Dir.exist?(folder)
32
30
  raise "File #{collection_filename} not exist" unless File.exist? collection_filename
33
- input = File.read(collection_filename)
34
31
 
35
- data_model.choose_parser(input).split_on_motifs(input, data_model).each do |motif|
36
- File.open(File.join(folder, "#{motif.name}.#{extension}"), 'w') do |f|
37
- f.puts motif
32
+ input = File.read(collection_filename)
33
+ Parser.choose(input).split.each do |motif|
34
+ if motif.is_a? PM
35
+ File.open(set_folder(folder, set_extension(motif.name, extension || motif.class.name.gsub(/^.*::/,'').downcase)), 'w'){|f| f.puts motif}
36
+ else
37
+ motif = PM.new(motif)
38
+ File.open(set_folder(folder, set_extension(motif.name, extension || 'mat')), 'w'){|f| f.puts motif}
38
39
  end
39
40
  end
40
41
  rescue Docopt::Exit => e
data/lib/bioinform/cli.rb CHANGED
@@ -1,7 +1,21 @@
1
1
  module Bioinform
2
2
  module CLI
3
- def self.output_filename(input_filename, extension, folder)
4
- File.join(folder, File.basename(input_filename, File.extname(input_filename)) + ".#{extension}")
3
+ module Helpers
4
+ def basename_wo_extension(filename)
5
+ File.basename(filename, File.extname(filename))
6
+ end
7
+ def set_extension(filename, extension)
8
+ "#{filename}.#{extension}"
9
+ end
10
+ def set_folder(folder, filename)
11
+ File.join(folder, filename)
12
+ end
13
+ def change_extension(filename, extension)
14
+ set_extension(basename_wo_extension(filename), extension)
15
+ end
16
+ def change_folder_and_extension(input_filename, extension, folder)
17
+ set_folder(folder, change_extension(input_filename, extension))
18
+ end
5
19
  end
6
20
  end
7
21
  end
@@ -2,7 +2,10 @@ require 'ostruct'
2
2
 
3
3
  module Bioinform
4
4
  class Collection
5
- attr_reader :collection, :parameters
5
+ attr_reader :collection
6
+
7
+ include Parameters
8
+ make_parameters :name
6
9
 
7
10
  # collection name is a tag name for each motif in a collection. But motif can be included in several collections so have several tags
8
11
  def initialize(parameters = {})
@@ -15,14 +18,14 @@ module Bioinform
15
18
  collection.size
16
19
  end
17
20
 
18
- def name
19
- parameters.name
21
+ def to_s(with_name = true)
22
+ result = (with_name) ? "Collection: #{name.to_s}\n" : ''
23
+ each do |pm, infos|
24
+ result << pm.to_s << "\n\n"
25
+ end
26
+ result
20
27
  end
21
28
 
22
- def to_s
23
- "<Collection '#{name}'>"
24
- end
25
-
26
29
  def +(other)
27
30
  result = self.class.new
28
31
  each do |pm, infos|
@@ -39,7 +42,7 @@ module Bioinform
39
42
  collection << [pm, info]
40
43
  self
41
44
  end
42
-
45
+
43
46
  def <<(pm)
44
47
  add_pm(pm, OpenStruct.new)
45
48
  end
@@ -51,7 +54,7 @@ module Bioinform
51
54
  Enumerator.new(self, :each)
52
55
  end
53
56
  end
54
-
57
+
55
58
  def each_pm
56
59
  if block_given?
57
60
  each{|pm, infos| yield pm}
@@ -75,7 +78,7 @@ module Bioinform
75
78
  end # end
76
79
  end # end
77
80
  end
78
-
81
+
79
82
  def ==(other)
80
83
  (collection == other.collection) && (parameters == other.parameters)
81
84
  rescue
@@ -1,5 +1,5 @@
1
- require 'bioinform/support'
2
- require 'bioinform/data_models'
1
+ require_relative '../support'
2
+ require_relative '../data_models'
3
3
 
4
4
  module Bioinform
5
5
  class PCM < PM
@@ -1,13 +1,16 @@
1
- require 'bioinform/support'
2
- require 'bioinform/parsers'
1
+ require 'ostruct'
2
+ require_relative '../support'
3
+ require_relative '../parsers'
3
4
 
4
5
  module Bioinform
5
6
  IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
6
7
  LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
7
8
 
8
9
  class PM
9
- attr_reader :matrix, :tags
10
- attr_accessor :background, :name
10
+ attr_accessor :matrix, :parameters
11
+
12
+ include Parameters
13
+ make_parameters :tags, :name, :background
11
14
 
12
15
  def mark(tag)
13
16
  tags << tag
@@ -18,24 +21,31 @@ module Bioinform
18
21
  end
19
22
 
20
23
  def self.choose_parser(input)
21
- [TrivialParser, Parser, StringParser, StringFantomParser].find do |parser|
24
+ [TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
22
25
  self.new(input, parser) rescue nil
23
26
  end
24
27
  end
28
+
29
+ def self.split_on_motifs(input)
30
+ parser = choose_parser(input)
31
+ raise ParsingError, "No parser can parse given input" unless parser
32
+ parser.split_on_motifs(input, self)
33
+ end
25
34
 
26
35
  def initialize(input, parser = nil)
36
+ @parameters = OpenStruct.new
27
37
  parser ||= self.class.choose_parser(input)
28
38
  raise 'No one parser can process input' unless parser
29
39
  result = parser.new(input).parse
30
- @matrix = result[:matrix]
31
- @name = result[:name]
32
- @tags = result[:tags] || []
33
- @background = result[:background] || [1, 1, 1, 1]
40
+ @matrix = result.matrix
41
+ self.name = result.name
42
+ self.tags = result.tags || []
43
+ self.background = result.background || [1, 1, 1, 1]
34
44
  raise 'matrix not valid' unless valid?
35
45
  end
36
46
 
37
47
  def ==(other)
38
- @matrix == other.matrix && @background == other.background && @name == other.name
48
+ @matrix == other.matrix && background == other.background && name == other.name
39
49
  rescue
40
50
  false
41
51
  end
@@ -77,8 +87,8 @@ module Bioinform
77
87
  matrix_str = each_position.map{|pos| pos.join("\t")}.join("\n")
78
88
  end
79
89
 
80
- if options[:with_name] && @name
81
- @name + "\n" + matrix_str
90
+ if options[:with_name] && name
91
+ name + "\n" + matrix_str
82
92
  else
83
93
  matrix_str
84
94
  end
@@ -97,8 +107,8 @@ module Bioinform
97
107
 
98
108
  matrix_str = matrix_rows.join("\n")
99
109
 
100
- if options[:with_name] && @name
101
- @name + "\n" + header + matrix_str
110
+ if options[:with_name] && name
111
+ name + "\n" + header + matrix_str
102
112
  else
103
113
  header + matrix_str
104
114
  end
@@ -111,22 +121,6 @@ module Bioinform
111
121
  hsh.with_indifferent_access
112
122
  end
113
123
 
114
- # pm.background - returns a @background attribute
115
- # pm.background(new_background) - sets an attribute and returns pm itself
116
- # if more than one argument passed - raises an exception
117
- def background(*args)
118
- case args.size
119
- when 0 then @background
120
- when 1 then background!(args[0])
121
- else raise ArgumentError, '#background method can get 0 or 1 argument'
122
- end
123
- end
124
-
125
- def background!(new_background)
126
- @background = new_background
127
- self
128
- end
129
-
130
124
  def self.zero_column
131
125
  [0, 0, 0, 0]
132
126
  end
@@ -158,13 +152,6 @@ module Bioinform
158
152
  background.map{|element| element.to_f / sum}
159
153
  end
160
154
 
161
- #def split(first_chunk_length)
162
- # [@matrix.first(first_chunk_length), matrix.last(length - first_chunk_length)]
163
- #end
164
- #def permute_columns(permutation_index)
165
- # @matrix.values_at(permutation_index)permutation_index.map{|col| matrix[col]}
166
- #end
167
-
168
155
  def best_score
169
156
  @matrix.inject(0.0){|sum, col| sum + col.max}
170
157
  end
@@ -1,5 +1,5 @@
1
- require 'bioinform/support'
2
- require 'bioinform/data_models'
1
+ require_relative '../support'
2
+ require_relative '../data_models'
3
3
 
4
4
  module Bioinform
5
5
  class PPM < PM
@@ -1,5 +1,5 @@
1
- require 'bioinform/support'
2
- require 'bioinform/data_models'
1
+ require_relative '../support'
2
+ require_relative '../data_models'
3
3
  module Bioinform
4
4
  class PWM < PM
5
5
  def score_mean
@@ -1,11 +1,11 @@
1
- require 'bioinform/parsers'
1
+ require_relative 'parsers'
2
2
 
3
- require 'bioinform/data_models/pm'
4
- require 'bioinform/data_models/pcm'
5
- require 'bioinform/data_models/ppm'
6
- require 'bioinform/data_models/pwm'
3
+ require_relative 'data_models/pm'
4
+ require_relative 'data_models/pcm'
5
+ require_relative 'data_models/ppm'
6
+ require_relative 'data_models/pwm'
7
7
 
8
- require 'bioinform/data_models/collection'
8
+ require_relative 'data_models/collection'
9
9
 
10
- #require 'bioinform/data_models/iupac_word'
11
- #require 'bioinform/data_models/iupac_wordset'
10
+ #require_relative 'bioinform/data_models/iupac_word'
11
+ #require_relative 'bioinform/data_models/iupac_wordset'
@@ -1,7 +1,13 @@
1
- require 'bioinform/support'
2
- require 'bioinform/data_models/pm'
1
+ require 'ostruct'
2
+ require_relative '../support'
3
+ require_relative '../data_models/pm'
4
+ require_relative 'splittable_parser'
3
5
 
4
6
  module Bioinform
7
+ class Error < StandardError; end
8
+ class ParsingError < Error; end
9
+ class InvalidMatrix < Error; end
10
+
5
11
  class Parser
6
12
  attr_reader :input
7
13
 
@@ -19,8 +25,8 @@ module Bioinform
19
25
 
20
26
  def parse!
21
27
  matrix = self.class.transform_input(input)
22
- raise 'Parsing error' unless self.class.valid_matrix?(matrix)
23
- {matrix: matrix}
28
+ raise InvalidMatrix unless self.class.valid_matrix?(matrix)
29
+ OpenStruct.new(matrix: matrix)
24
30
  end
25
31
 
26
32
  def parse
@@ -78,6 +84,5 @@ module Bioinform
78
84
  def self.need_tranpose?(input)
79
85
  (input.size == 4) && input.any?{|x| x.size != 4}
80
86
  end
81
-
82
87
  end
83
88
  end
@@ -0,0 +1,57 @@
1
+ module Bioinform
2
+ class Parser
3
+ module SingleMotifParser
4
+ def self.included(base)
5
+ base.class_eval { extend ClassMethods }
6
+ include Enumerable
7
+ alias_method :split, :to_a
8
+ end
9
+ module ClassMethods
10
+ def split_on_motifs(input, pm_klass = PM)
11
+ [ input.is_a?(pm_klass) ? self : pm_klass.new(input, self) ]
12
+ end
13
+ end
14
+ def each
15
+ if block_given?
16
+ yield self
17
+ else
18
+ Enumerator.new(self, :each)
19
+ end
20
+ end
21
+ end
22
+ include SingleMotifParser
23
+
24
+ module MultipleMotifsParser
25
+ def self.included(base)
26
+ base.class_eval { extend ClassMethods }
27
+ include Enumerable
28
+ alias_method :split, :to_a
29
+ end
30
+ module ClassMethods
31
+ def split_on_motifs(input, pm_klass = PM)
32
+ split(input).map{|el| el.is_a?(pm_klass) ? el : pm_klass.new(el)}
33
+ end
34
+ def split(input)
35
+ self.new(input).split
36
+ end
37
+ private :split
38
+ end
39
+
40
+ def scanner_reset
41
+ end
42
+
43
+ def each
44
+ if block_given?
45
+ scanner_reset
46
+ while result = parse
47
+ yield result
48
+ end
49
+ else
50
+ Enumerator.new(self, :each)
51
+ end
52
+ end
53
+
54
+ private :scanner_reset
55
+ end
56
+ end
57
+ end
@@ -1,5 +1,5 @@
1
- require 'bioinform/support'
2
- require 'bioinform/parsers/string_parser'
1
+ require_relative '../support'
2
+ require_relative '../parsers/string_parser'
3
3
 
4
4
  module Bioinform
5
5
  class StringFantomParser < StringParser
@@ -28,7 +28,7 @@ module Bioinform
28
28
  scan_splitter
29
29
  name = parse_name
30
30
  matrix = parse_matrix
31
- Parser.parse!(matrix).merge(name: name)
31
+ Parser.parse!(matrix).tap{|result| result.name = name}
32
32
  end
33
33
 
34
34
  end
@@ -1,10 +1,12 @@
1
1
  require 'strscan'
2
- require 'bioinform/support'
3
- require 'bioinform/parsers/parser'
2
+ require_relative '../support'
3
+ require_relative '../parsers/parser'
4
4
 
5
5
  module Bioinform
6
6
  class StringParser < Parser
7
+ include MultipleMotifsParser
7
8
  attr_reader :scanner, :row_acgt_markers
9
+
8
10
  def initialize(input)
9
11
  raise ArgumentError unless input.is_a?(String)
10
12
  super
@@ -60,32 +62,11 @@ module Bioinform
60
62
  parse_acgt_header
61
63
  matrix = parse_matrix
62
64
  matrix = matrix.transpose if row_acgt_markers
63
- Parser.parse!(matrix).merge(name: name)
65
+ Parser.parse!(matrix).tap{|result| result.name = name}
64
66
  end
65
67
 
66
68
  def scanner_reset
67
69
  scanner.reset
68
70
  end
69
-
70
- def each
71
- if block_given?
72
- scanner_reset
73
- while result = parse
74
- yield result
75
- end
76
- else
77
- Enumerator.new(self, :each)
78
- end
79
- end
80
- include Enumerable
81
-
82
- alias_method :split, :to_a
83
- def self.split(input)
84
- self.new(input).split
85
- end
86
-
87
- def self.split_on_motifs(input, pm_klass = PM)
88
- split(input).map{|el| pm_klass.new(el)}
89
- end
90
71
  end
91
72
  end