bioinform 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/TODO.txt +7 -2
- data/bin/merge_into_collection +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +2 -0
- data/lib/bioinform/cli/merge_into_collection.rb +76 -0
- data/lib/bioinform/cli/pcm2pwm.rb +20 -20
- data/lib/bioinform/cli/split_motifs.rb +21 -20
- data/lib/bioinform/cli.rb +16 -2
- data/lib/bioinform/data_models/collection.rb +13 -10
- data/lib/bioinform/data_models/pcm.rb +2 -2
- data/lib/bioinform/data_models/pm.rb +24 -37
- data/lib/bioinform/data_models/ppm.rb +2 -2
- data/lib/bioinform/data_models/pwm.rb +2 -2
- data/lib/bioinform/data_models.rb +8 -8
- data/lib/bioinform/parsers/parser.rb +10 -5
- data/lib/bioinform/parsers/splittable_parser.rb +57 -0
- data/lib/bioinform/parsers/string_fantom_parser.rb +3 -3
- data/lib/bioinform/parsers/string_parser.rb +5 -24
- data/lib/bioinform/parsers/trivial_parser.rb +19 -3
- data/lib/bioinform/parsers/yaml_parser.rb +35 -0
- data/lib/bioinform/parsers.rb +6 -4
- data/lib/bioinform/support/parameters.rb +19 -0
- data/lib/bioinform/support/partial_sums.rb +1 -1
- data/lib/bioinform/support.rb +11 -10
- data/lib/bioinform/version.rb +1 -1
- data/lib/bioinform.rb +5 -5
- data/spec/cli/cli_spec.rb +8 -7
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -0
- data/spec/cli/data/{KLF4_f2.pwm.result → merge_into_collection/KLF4_f2.pwm} +0 -0
- data/spec/cli/data/{SP1_f1.pwm.result → merge_into_collection/SP1_f1.pwm} +0 -0
- data/spec/cli/data/merge_into_collection/collection.txt.result +40 -0
- data/spec/cli/data/merge_into_collection/collection.yaml.result +185 -0
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +185 -0
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -0
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -0
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -0
- data/spec/cli/data/{KLF4 f2 spaced name.pcm → pcm2pwm/KLF4 f2 spaced name.pcm} +0 -0
- data/spec/cli/data/{KLF4_f2.pcm → pcm2pwm/KLF4_f2.pcm} +0 -0
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -0
- data/spec/cli/data/{SP1_f1.pcm → pcm2pwm/SP1_f1.pcm} +0 -0
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -0
- data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -0
- data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -0
- data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -0
- data/spec/cli/data/split_motifs/collection.yaml +197 -0
- data/spec/cli/data/split_motifs/plain_collection.txt +38 -0
- data/spec/cli/merge_into_collection_spec.rb +100 -0
- data/spec/cli/pcm2pwm_spec.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +74 -3
- data/spec/data_models/collection_spec.rb +2 -2
- data/spec/data_models/pcm_spec.rb +2 -2
- data/spec/data_models/pm_spec.rb +10 -27
- data/spec/data_models/ppm_spec.rb +2 -2
- data/spec/data_models/pwm_spec.rb +3 -3
- data/spec/fabricators/collection_fabricator.rb +8 -0
- data/spec/fabricators/pm_fabricator.rb +43 -0
- data/spec/parsers/parser_spec.rb +29 -37
- data/spec/parsers/string_fantom_parser_spec.rb +38 -35
- data/spec/parsers/string_parser_spec.rb +33 -66
- data/spec/parsers/trivial_parser_spec.rb +48 -6
- data/spec/parsers/yaml_parser_spec.rb +50 -0
- data/spec/spec_helper.rb +2 -6
- data/spec/support/advanced_scan_spec.rb +2 -2
- data/spec/support/array_product_spec.rb +2 -2
- data/spec/support/array_zip_spec.rb +2 -2
- data/spec/support/collect_hash_spec.rb +2 -2
- data/spec/support/delete_many_spec.rb +2 -2
- data/spec/support/inverf_spec.rb +2 -2
- data/spec/support/multiline_squish_spec.rb +2 -2
- data/spec/support/partial_sums_spec.rb +2 -2
- data/spec/support/same_by_spec.rb +2 -2
- metadata +86 -12
data/TODO.txt
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
ToDo:
|
2
2
|
how to make PM#equal? and PM#hash so that using PMs in Sets wouldn't destroy comparability of Sets and two sets with the same PMs(but different objects) would be equal. (also using pm as a hash-key)
|
3
|
-
|
3
|
+
Make specs and fix code in such a way that Parser.split_on_motifs and so on returned consistent result. E.g. Parser.parse! raised an error on multiple times invocation
|
4
|
+
|
5
|
+
refactor CLI::SplitMotifs in place where it splits collection file and choose real data models or makes PM
|
6
|
+
|
4
7
|
Make parser exception print out text where parsing was broken (processing line +- 2 nearest lines and command and line numbers)
|
5
8
|
Prevent parser going into infinity loop
|
6
9
|
|
7
10
|
Create CLI-apps:
|
8
|
-
-- to merge many files(or whole folder) to a Collection
|
11
|
+
-- to merge many files(or whole folder) to a Collection (in a way that makes able to give collection a name)
|
9
12
|
|
10
13
|
Make Parsers to be switcheable in runtime so that one could parse string composed of two motifs in different formats.
|
11
14
|
|
@@ -18,6 +21,8 @@ Decide:
|
|
18
21
|
-- should background be in PM by default?
|
19
22
|
-- refactor PM.new #== and so on to make possible consistently introduce or remove a variable at a single line
|
20
23
|
-- Make PCM#valid? and PPM#valid? more specific. This shouldn't destroy functionality to load arbitrary data as matrix, but only in force mode (I don't understand yet where should it be: in a constructor or where? And which validation-"severity" levels should be? Strong validation - size-only-validation - size-and-type-validation - no validation ??? or may be options: valid_strictness: 'strict', 'usual', 'strict_with_name' ??? It should be considered)
|
24
|
+
-- PM#to_pcm and friends have unintuitive behavior. E.g. pm.to_pcm.to_pwm != pm.to_pwm First is matrix treated as pcm and then converted, while second is matrix treated as pwm from start
|
25
|
+
-- Should parser be reloadable or not? May be delete #reset_scanner?
|
21
26
|
|
22
27
|
Specs
|
23
28
|
-- PWM#probabilities, #score_variance, #gauss_estimation
|
data/bin/pcm2pwm
CHANGED
data/bin/split_motifs
CHANGED
data/bioinform.gemspec
CHANGED
@@ -16,6 +16,8 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.version = Bioinform::VERSION
|
17
17
|
|
18
18
|
gem.add_dependency('activesupport', '>= 3.0.0')
|
19
|
+
gem.add_dependency('docopt', '>= 0.5.0')
|
19
20
|
|
20
21
|
gem.add_development_dependency "rspec", ">= 2.0"
|
22
|
+
gem.add_development_dependency "fabrication", ">= 2.2.3"
|
21
23
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require_relative '../../bioinform'
|
2
|
+
require 'docopt'
|
3
|
+
require 'shellwords'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
module Bioinform
|
7
|
+
module CLI
|
8
|
+
module MergeIntoCollection
|
9
|
+
extend Bioinform::CLI::Helpers
|
10
|
+
def self.main(argv)
|
11
|
+
doc = <<-DOCOPT
|
12
|
+
Tool for merging multiple motifs into a single collection file.
|
13
|
+
It takes motif files or (entire collections) and creates a collection consisting of them all. By default motifs are treated simply as matrices(PM), but you can (possibly should) specify data model. Output file by default are in YAML-format but it's possible to create plain text file. YAML collections are useful if you want to provide additional information for motifs in collection with another tool, plain text is more readable by humans.
|
14
|
+
|
15
|
+
Usage:
|
16
|
+
#{__FILE__} [options] [<pm-files>...]
|
17
|
+
|
18
|
+
Options:
|
19
|
+
-h --help Show this screen.
|
20
|
+
-n --name NAME Specify name for a collection. Default filename is based on this parameter
|
21
|
+
-o --output-file FILE Output file for resulting collection
|
22
|
+
-m --data-model MODEL Data model: PM, PCM, PPM or PWM [default: PM]
|
23
|
+
-p --plain-text Output collection of motifs in plain text (motifs separated with newlines, no additional information included).
|
24
|
+
DOCOPT
|
25
|
+
|
26
|
+
doc.gsub!(/^#{doc[/\A +/]}/,'')
|
27
|
+
options = Docopt::docopt(doc, argv: argv)
|
28
|
+
|
29
|
+
plain_text = options['--plain-text']
|
30
|
+
name = options['--name']
|
31
|
+
if options['--plain-text']
|
32
|
+
output_file = options['--output-file'] || set_extension(name || 'collection', 'txt')
|
33
|
+
else
|
34
|
+
output_file = options['--output-file'] || set_extension(name || 'collection', 'yaml')
|
35
|
+
end
|
36
|
+
data_model = Bioinform.const_get(options['--data-model'].upcase)
|
37
|
+
|
38
|
+
if options['<pm-files>'].empty?
|
39
|
+
filelist = $stdin.read.shellsplit
|
40
|
+
else
|
41
|
+
filelist = options['<pm-files>']
|
42
|
+
end
|
43
|
+
|
44
|
+
filelist = filelist.map do |data_source|
|
45
|
+
if File.directory? data_source
|
46
|
+
Dir.glob(File.join(data_source, '*'))
|
47
|
+
elsif File.file? data_source
|
48
|
+
data_source
|
49
|
+
else
|
50
|
+
raise "File or directory #{data_source} can't be found"
|
51
|
+
end
|
52
|
+
end.flatten
|
53
|
+
|
54
|
+
collection = Collection.new
|
55
|
+
collection.name = name if name
|
56
|
+
|
57
|
+
filelist.each do |filename|
|
58
|
+
data_model.split_on_motifs(File.read(filename)).each do |pm|
|
59
|
+
pm.name ||= File.basename(filename, File.extname(filename))
|
60
|
+
collection << pm
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
if plain_text
|
65
|
+
File.open(output_file, 'w'){|f| f.puts(collection.to_s(false)) }
|
66
|
+
else
|
67
|
+
File.open(output_file, 'w'){|f| YAML.dump(collection, f) }
|
68
|
+
end
|
69
|
+
|
70
|
+
rescue Docopt::Exit => e
|
71
|
+
puts e.message
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -1,39 +1,39 @@
|
|
1
|
-
|
1
|
+
require_relative '../../bioinform'
|
2
2
|
require 'docopt'
|
3
3
|
require 'shellwords'
|
4
4
|
|
5
5
|
module Bioinform
|
6
|
-
module CLI
|
6
|
+
module CLI
|
7
7
|
module PCM2PWM
|
8
|
+
extend Bioinform::CLI::Helpers
|
8
9
|
def self.main(argv)
|
9
10
|
doc = <<-DOCOPT
|
10
|
-
PCM to PWM converter.
|
11
|
-
It transforms files with PCMs into files with PWMs. Folder for resulting files to save files can be specified. Resulting PWM files have the same name as original file but have another extension (.pwm by default).
|
12
|
-
When filelist is empty, it's obtained from STDIN. One can use it: `ls -b pcm_folder/*.pcm | pcm2pwm` (ls -b option escape spaces in filenames)
|
11
|
+
PCM to PWM converter.
|
12
|
+
It transforms files with PCMs into files with PWMs. Folder for resulting files to save files can be specified. Resulting PWM files have the same name as original file but have another extension (.pwm by default).
|
13
|
+
When filelist is empty, it's obtained from STDIN. One can use it: `ls -b pcm_folder/*.pcm | pcm2pwm` (ls -b option escape spaces in filenames)
|
13
14
|
|
14
|
-
Usage:
|
15
|
-
|
15
|
+
Usage:
|
16
|
+
#{__FILE__} [options] [<pcm-files>...]
|
16
17
|
|
17
|
-
Options:
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
Options:
|
19
|
+
-h --help Show this screen.
|
20
|
+
-e --extension EXT Extension of output files [default: pwm]
|
21
|
+
-f --folder FOLDER Where to save output files [default: .]
|
21
22
|
DOCOPT
|
22
23
|
|
24
|
+
doc.gsub!(/^#{doc[/\A +/]}/,'')
|
23
25
|
options = Docopt::docopt(doc, argv: argv)
|
24
26
|
|
25
|
-
|
26
|
-
filelist = $stdin.read.shellsplit
|
27
|
-
else
|
28
|
-
filelist = options['<pcm-files>']
|
29
|
-
end
|
30
|
-
|
27
|
+
pcm_files = options['<pcm-files>']
|
31
28
|
folder = options['--folder']
|
29
|
+
extension = options['--extension']
|
30
|
+
|
32
31
|
Dir.mkdir(folder) unless Dir.exist?(folder)
|
32
|
+
filelist = (pcm_files.empty?) ? $stdin.read.shellsplit : pcm_files
|
33
33
|
|
34
|
-
filelist.each do |
|
35
|
-
pwm = Bioinform::PCM.new( File.read(
|
36
|
-
File.open(
|
34
|
+
filelist.each do |filename|
|
35
|
+
pwm = Bioinform::PCM.new( File.read(filename) ).to_pwm
|
36
|
+
File.open(change_folder_and_extension(filename, extension, folder), 'w') do |f|
|
37
37
|
f.puts pwm
|
38
38
|
end
|
39
39
|
end
|
@@ -1,40 +1,41 @@
|
|
1
|
-
|
1
|
+
require_relative '../../bioinform'
|
2
2
|
require 'docopt'
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
module CLI
|
6
6
|
module SplitMotifs
|
7
|
-
|
7
|
+
extend Bioinform::CLI::Helpers
|
8
8
|
def self.main(argv)
|
9
9
|
doc = <<-DOCOPT
|
10
|
-
Motif splitter.
|
11
|
-
It get a file with a set of motifs and splits it into motifs according to their names.
|
10
|
+
Motif splitter.
|
11
|
+
It get a file with a set of motifs and splits it into motifs according to their names.
|
12
12
|
|
13
|
-
Usage:
|
14
|
-
|
13
|
+
Usage:
|
14
|
+
#{__FILE__} [options] <collection-file>
|
15
15
|
|
16
|
-
Options:
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
-f --folder FOLDER Where to save output files [default: .]
|
16
|
+
Options:
|
17
|
+
-h --help Show this screen.
|
18
|
+
-e --extension EXT Extension of output files
|
19
|
+
-f --folder FOLDER Where to save output files [default: .]
|
21
20
|
DOCOPT
|
22
21
|
|
22
|
+
doc.gsub!(/^#{doc[/\A +/]}/,'')
|
23
23
|
options = Docopt::docopt(doc, argv: argv)
|
24
24
|
|
25
25
|
folder = options['--folder']
|
26
|
-
|
27
|
-
|
28
|
-
data_model = Bioinform.const_get(options['--data-model'].upcase)
|
29
|
-
extension = options['--extension'] || options['--data-model'].downcase
|
30
|
-
|
26
|
+
extension = options['--extension']
|
31
27
|
collection_filename = options['<collection-file>']
|
28
|
+
|
29
|
+
Dir.mkdir(folder) unless Dir.exist?(folder)
|
32
30
|
raise "File #{collection_filename} not exist" unless File.exist? collection_filename
|
33
|
-
input = File.read(collection_filename)
|
34
31
|
|
35
|
-
|
36
|
-
|
37
|
-
|
32
|
+
input = File.read(collection_filename)
|
33
|
+
Parser.choose(input).split.each do |motif|
|
34
|
+
if motif.is_a? PM
|
35
|
+
File.open(set_folder(folder, set_extension(motif.name, extension || motif.class.name.gsub(/^.*::/,'').downcase)), 'w'){|f| f.puts motif}
|
36
|
+
else
|
37
|
+
motif = PM.new(motif)
|
38
|
+
File.open(set_folder(folder, set_extension(motif.name, extension || 'mat')), 'w'){|f| f.puts motif}
|
38
39
|
end
|
39
40
|
end
|
40
41
|
rescue Docopt::Exit => e
|
data/lib/bioinform/cli.rb
CHANGED
@@ -1,7 +1,21 @@
|
|
1
1
|
module Bioinform
|
2
2
|
module CLI
|
3
|
-
|
4
|
-
|
3
|
+
module Helpers
|
4
|
+
def basename_wo_extension(filename)
|
5
|
+
File.basename(filename, File.extname(filename))
|
6
|
+
end
|
7
|
+
def set_extension(filename, extension)
|
8
|
+
"#{filename}.#{extension}"
|
9
|
+
end
|
10
|
+
def set_folder(folder, filename)
|
11
|
+
File.join(folder, filename)
|
12
|
+
end
|
13
|
+
def change_extension(filename, extension)
|
14
|
+
set_extension(basename_wo_extension(filename), extension)
|
15
|
+
end
|
16
|
+
def change_folder_and_extension(input_filename, extension, folder)
|
17
|
+
set_folder(folder, change_extension(input_filename, extension))
|
18
|
+
end
|
5
19
|
end
|
6
20
|
end
|
7
21
|
end
|
@@ -2,7 +2,10 @@ require 'ostruct'
|
|
2
2
|
|
3
3
|
module Bioinform
|
4
4
|
class Collection
|
5
|
-
attr_reader :collection
|
5
|
+
attr_reader :collection
|
6
|
+
|
7
|
+
include Parameters
|
8
|
+
make_parameters :name
|
6
9
|
|
7
10
|
# collection name is a tag name for each motif in a collection. But motif can be included in several collections so have several tags
|
8
11
|
def initialize(parameters = {})
|
@@ -15,14 +18,14 @@ module Bioinform
|
|
15
18
|
collection.size
|
16
19
|
end
|
17
20
|
|
18
|
-
def
|
19
|
-
|
21
|
+
def to_s(with_name = true)
|
22
|
+
result = (with_name) ? "Collection: #{name.to_s}\n" : ''
|
23
|
+
each do |pm, infos|
|
24
|
+
result << pm.to_s << "\n\n"
|
25
|
+
end
|
26
|
+
result
|
20
27
|
end
|
21
28
|
|
22
|
-
def to_s
|
23
|
-
"<Collection '#{name}'>"
|
24
|
-
end
|
25
|
-
|
26
29
|
def +(other)
|
27
30
|
result = self.class.new
|
28
31
|
each do |pm, infos|
|
@@ -39,7 +42,7 @@ module Bioinform
|
|
39
42
|
collection << [pm, info]
|
40
43
|
self
|
41
44
|
end
|
42
|
-
|
45
|
+
|
43
46
|
def <<(pm)
|
44
47
|
add_pm(pm, OpenStruct.new)
|
45
48
|
end
|
@@ -51,7 +54,7 @@ module Bioinform
|
|
51
54
|
Enumerator.new(self, :each)
|
52
55
|
end
|
53
56
|
end
|
54
|
-
|
57
|
+
|
55
58
|
def each_pm
|
56
59
|
if block_given?
|
57
60
|
each{|pm, infos| yield pm}
|
@@ -75,7 +78,7 @@ module Bioinform
|
|
75
78
|
end # end
|
76
79
|
end # end
|
77
80
|
end
|
78
|
-
|
81
|
+
|
79
82
|
def ==(other)
|
80
83
|
(collection == other.collection) && (parameters == other.parameters)
|
81
84
|
rescue
|
@@ -1,13 +1,16 @@
|
|
1
|
-
require '
|
2
|
-
|
1
|
+
require 'ostruct'
|
2
|
+
require_relative '../support'
|
3
|
+
require_relative '../parsers'
|
3
4
|
|
4
5
|
module Bioinform
|
5
6
|
IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
|
6
7
|
LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
|
7
8
|
|
8
9
|
class PM
|
9
|
-
|
10
|
-
|
10
|
+
attr_accessor :matrix, :parameters
|
11
|
+
|
12
|
+
include Parameters
|
13
|
+
make_parameters :tags, :name, :background
|
11
14
|
|
12
15
|
def mark(tag)
|
13
16
|
tags << tag
|
@@ -18,24 +21,31 @@ module Bioinform
|
|
18
21
|
end
|
19
22
|
|
20
23
|
def self.choose_parser(input)
|
21
|
-
[TrivialParser, Parser, StringParser, StringFantomParser].find do |parser|
|
24
|
+
[TrivialParser, YAMLParser, Parser, StringParser, StringFantomParser, TrivialCollectionParser, YAMLCollectionParser].find do |parser|
|
22
25
|
self.new(input, parser) rescue nil
|
23
26
|
end
|
24
27
|
end
|
28
|
+
|
29
|
+
def self.split_on_motifs(input)
|
30
|
+
parser = choose_parser(input)
|
31
|
+
raise ParsingError, "No parser can parse given input" unless parser
|
32
|
+
parser.split_on_motifs(input, self)
|
33
|
+
end
|
25
34
|
|
26
35
|
def initialize(input, parser = nil)
|
36
|
+
@parameters = OpenStruct.new
|
27
37
|
parser ||= self.class.choose_parser(input)
|
28
38
|
raise 'No one parser can process input' unless parser
|
29
39
|
result = parser.new(input).parse
|
30
|
-
@matrix = result
|
31
|
-
|
32
|
-
|
33
|
-
|
40
|
+
@matrix = result.matrix
|
41
|
+
self.name = result.name
|
42
|
+
self.tags = result.tags || []
|
43
|
+
self.background = result.background || [1, 1, 1, 1]
|
34
44
|
raise 'matrix not valid' unless valid?
|
35
45
|
end
|
36
46
|
|
37
47
|
def ==(other)
|
38
|
-
@matrix == other.matrix &&
|
48
|
+
@matrix == other.matrix && background == other.background && name == other.name
|
39
49
|
rescue
|
40
50
|
false
|
41
51
|
end
|
@@ -77,8 +87,8 @@ module Bioinform
|
|
77
87
|
matrix_str = each_position.map{|pos| pos.join("\t")}.join("\n")
|
78
88
|
end
|
79
89
|
|
80
|
-
if options[:with_name] &&
|
81
|
-
|
90
|
+
if options[:with_name] && name
|
91
|
+
name + "\n" + matrix_str
|
82
92
|
else
|
83
93
|
matrix_str
|
84
94
|
end
|
@@ -97,8 +107,8 @@ module Bioinform
|
|
97
107
|
|
98
108
|
matrix_str = matrix_rows.join("\n")
|
99
109
|
|
100
|
-
if options[:with_name] &&
|
101
|
-
|
110
|
+
if options[:with_name] && name
|
111
|
+
name + "\n" + header + matrix_str
|
102
112
|
else
|
103
113
|
header + matrix_str
|
104
114
|
end
|
@@ -111,22 +121,6 @@ module Bioinform
|
|
111
121
|
hsh.with_indifferent_access
|
112
122
|
end
|
113
123
|
|
114
|
-
# pm.background - returns a @background attribute
|
115
|
-
# pm.background(new_background) - sets an attribute and returns pm itself
|
116
|
-
# if more than one argument passed - raises an exception
|
117
|
-
def background(*args)
|
118
|
-
case args.size
|
119
|
-
when 0 then @background
|
120
|
-
when 1 then background!(args[0])
|
121
|
-
else raise ArgumentError, '#background method can get 0 or 1 argument'
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def background!(new_background)
|
126
|
-
@background = new_background
|
127
|
-
self
|
128
|
-
end
|
129
|
-
|
130
124
|
def self.zero_column
|
131
125
|
[0, 0, 0, 0]
|
132
126
|
end
|
@@ -158,13 +152,6 @@ module Bioinform
|
|
158
152
|
background.map{|element| element.to_f / sum}
|
159
153
|
end
|
160
154
|
|
161
|
-
#def split(first_chunk_length)
|
162
|
-
# [@matrix.first(first_chunk_length), matrix.last(length - first_chunk_length)]
|
163
|
-
#end
|
164
|
-
#def permute_columns(permutation_index)
|
165
|
-
# @matrix.values_at(permutation_index)permutation_index.map{|col| matrix[col]}
|
166
|
-
#end
|
167
|
-
|
168
155
|
def best_score
|
169
156
|
@matrix.inject(0.0){|sum, col| sum + col.max}
|
170
157
|
end
|
@@ -1,11 +1,11 @@
|
|
1
|
-
|
1
|
+
require_relative 'parsers'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
require_relative 'data_models/pm'
|
4
|
+
require_relative 'data_models/pcm'
|
5
|
+
require_relative 'data_models/ppm'
|
6
|
+
require_relative 'data_models/pwm'
|
7
7
|
|
8
|
-
|
8
|
+
require_relative 'data_models/collection'
|
9
9
|
|
10
|
-
#
|
11
|
-
#
|
10
|
+
#require_relative 'bioinform/data_models/iupac_word'
|
11
|
+
#require_relative 'bioinform/data_models/iupac_wordset'
|
@@ -1,7 +1,13 @@
|
|
1
|
-
require '
|
2
|
-
|
1
|
+
require 'ostruct'
|
2
|
+
require_relative '../support'
|
3
|
+
require_relative '../data_models/pm'
|
4
|
+
require_relative 'splittable_parser'
|
3
5
|
|
4
6
|
module Bioinform
|
7
|
+
class Error < StandardError; end
|
8
|
+
class ParsingError < Error; end
|
9
|
+
class InvalidMatrix < Error; end
|
10
|
+
|
5
11
|
class Parser
|
6
12
|
attr_reader :input
|
7
13
|
|
@@ -19,8 +25,8 @@ module Bioinform
|
|
19
25
|
|
20
26
|
def parse!
|
21
27
|
matrix = self.class.transform_input(input)
|
22
|
-
raise
|
23
|
-
|
28
|
+
raise InvalidMatrix unless self.class.valid_matrix?(matrix)
|
29
|
+
OpenStruct.new(matrix: matrix)
|
24
30
|
end
|
25
31
|
|
26
32
|
def parse
|
@@ -78,6 +84,5 @@ module Bioinform
|
|
78
84
|
def self.need_tranpose?(input)
|
79
85
|
(input.size == 4) && input.any?{|x| x.size != 4}
|
80
86
|
end
|
81
|
-
|
82
87
|
end
|
83
88
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Bioinform
|
2
|
+
class Parser
|
3
|
+
module SingleMotifParser
|
4
|
+
def self.included(base)
|
5
|
+
base.class_eval { extend ClassMethods }
|
6
|
+
include Enumerable
|
7
|
+
alias_method :split, :to_a
|
8
|
+
end
|
9
|
+
module ClassMethods
|
10
|
+
def split_on_motifs(input, pm_klass = PM)
|
11
|
+
[ input.is_a?(pm_klass) ? self : pm_klass.new(input, self) ]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
def each
|
15
|
+
if block_given?
|
16
|
+
yield self
|
17
|
+
else
|
18
|
+
Enumerator.new(self, :each)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
include SingleMotifParser
|
23
|
+
|
24
|
+
module MultipleMotifsParser
|
25
|
+
def self.included(base)
|
26
|
+
base.class_eval { extend ClassMethods }
|
27
|
+
include Enumerable
|
28
|
+
alias_method :split, :to_a
|
29
|
+
end
|
30
|
+
module ClassMethods
|
31
|
+
def split_on_motifs(input, pm_klass = PM)
|
32
|
+
split(input).map{|el| el.is_a?(pm_klass) ? el : pm_klass.new(el)}
|
33
|
+
end
|
34
|
+
def split(input)
|
35
|
+
self.new(input).split
|
36
|
+
end
|
37
|
+
private :split
|
38
|
+
end
|
39
|
+
|
40
|
+
def scanner_reset
|
41
|
+
end
|
42
|
+
|
43
|
+
def each
|
44
|
+
if block_given?
|
45
|
+
scanner_reset
|
46
|
+
while result = parse
|
47
|
+
yield result
|
48
|
+
end
|
49
|
+
else
|
50
|
+
Enumerator.new(self, :each)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private :scanner_reset
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../support'
|
2
|
+
require_relative '../parsers/string_parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
class StringFantomParser < StringParser
|
@@ -28,7 +28,7 @@ module Bioinform
|
|
28
28
|
scan_splitter
|
29
29
|
name = parse_name
|
30
30
|
matrix = parse_matrix
|
31
|
-
Parser.parse!(matrix).
|
31
|
+
Parser.parse!(matrix).tap{|result| result.name = name}
|
32
32
|
end
|
33
33
|
|
34
34
|
end
|
@@ -1,10 +1,12 @@
|
|
1
1
|
require 'strscan'
|
2
|
-
|
3
|
-
|
2
|
+
require_relative '../support'
|
3
|
+
require_relative '../parsers/parser'
|
4
4
|
|
5
5
|
module Bioinform
|
6
6
|
class StringParser < Parser
|
7
|
+
include MultipleMotifsParser
|
7
8
|
attr_reader :scanner, :row_acgt_markers
|
9
|
+
|
8
10
|
def initialize(input)
|
9
11
|
raise ArgumentError unless input.is_a?(String)
|
10
12
|
super
|
@@ -60,32 +62,11 @@ module Bioinform
|
|
60
62
|
parse_acgt_header
|
61
63
|
matrix = parse_matrix
|
62
64
|
matrix = matrix.transpose if row_acgt_markers
|
63
|
-
Parser.parse!(matrix).
|
65
|
+
Parser.parse!(matrix).tap{|result| result.name = name}
|
64
66
|
end
|
65
67
|
|
66
68
|
def scanner_reset
|
67
69
|
scanner.reset
|
68
70
|
end
|
69
|
-
|
70
|
-
def each
|
71
|
-
if block_given?
|
72
|
-
scanner_reset
|
73
|
-
while result = parse
|
74
|
-
yield result
|
75
|
-
end
|
76
|
-
else
|
77
|
-
Enumerator.new(self, :each)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
include Enumerable
|
81
|
-
|
82
|
-
alias_method :split, :to_a
|
83
|
-
def self.split(input)
|
84
|
-
self.new(input).split
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.split_on_motifs(input, pm_klass = PM)
|
88
|
-
split(input).map{|el| pm_klass.new(el)}
|
89
|
-
end
|
90
71
|
end
|
91
72
|
end
|