bioinform 0.1.12 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +16 -16
- data/LICENSE +21 -21
- data/README.md +35 -35
- data/Rakefile +4 -4
- data/TODO.txt +37 -37
- data/bin/merge_into_collection +3 -3
- data/bin/pcm2pwm +3 -3
- data/bin/split_motifs +3 -3
- data/bioinform.gemspec +19 -19
- data/lib/bioinform/cli/convert_motif.rb +107 -107
- data/lib/bioinform/cli/merge_into_collection.rb +79 -79
- data/lib/bioinform/cli/pcm2pwm.rb +46 -46
- data/lib/bioinform/cli/split_motifs.rb +46 -46
- data/lib/bioinform/cli.rb +29 -29
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +18 -18
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +19 -19
- data/lib/bioinform/data_models/collection.rb +74 -74
- data/lib/bioinform/data_models/motif.rb +55 -55
- data/lib/bioinform/data_models/pcm.rb +23 -23
- data/lib/bioinform/data_models/pm.rb +169 -169
- data/lib/bioinform/data_models/ppm.rb +9 -9
- data/lib/bioinform/data_models/pwm.rb +55 -55
- data/lib/bioinform/data_models.rb +10 -10
- data/lib/bioinform/formatters/raw_formatter.rb +40 -40
- data/lib/bioinform/formatters/transfac_formatter.rb +38 -38
- data/lib/bioinform/formatters.rb +1 -1
- data/lib/bioinform/parsers/jaspar_parser.rb +34 -34
- data/lib/bioinform/parsers/parser.rb +87 -87
- data/lib/bioinform/parsers/splittable_parser.rb +56 -56
- data/lib/bioinform/parsers/string_fantom_parser.rb +34 -34
- data/lib/bioinform/parsers/string_parser.rb +71 -71
- data/lib/bioinform/parsers/trivial_parser.rb +33 -33
- data/lib/bioinform/parsers/yaml_parser.rb +34 -34
- data/lib/bioinform/parsers.rb +6 -6
- data/lib/bioinform/support/array_product.rb +5 -5
- data/lib/bioinform/support/array_zip.rb +5 -5
- data/lib/bioinform/support/collect_hash.rb +6 -6
- data/lib/bioinform/support/deep_dup.rb +4 -4
- data/lib/bioinform/support/delete_many.rb +13 -13
- data/lib/bioinform/support/inverf.rb +12 -12
- data/lib/bioinform/support/multiline_squish.rb +5 -5
- data/lib/bioinform/support/parameters.rb +27 -27
- data/lib/bioinform/support/partial_sums.rb +15 -15
- data/lib/bioinform/support/same_by.rb +12 -12
- data/lib/bioinform/support/strip_doc.rb +8 -8
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +3 -0
- data/lib/bioinform/support.rb +17 -17
- data/lib/bioinform/version.rb +3 -3
- data/lib/bioinform.rb +10 -10
- data/spec/cli/cli_spec.rb +13 -13
- data/spec/cli/convert_motif_spec.rb +106 -106
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +11 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +12 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +40 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +188 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +188 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -12
- data/spec/cli/data/pcm2pwm/KLF4 f2 spaced name.pcm +11 -11
- data/spec/cli/data/pcm2pwm/KLF4_f2.pcm +11 -11
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -11
- data/spec/cli/data/pcm2pwm/SP1_f1.pcm +12 -12
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -12
- data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -14
- data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -11
- data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -12
- data/spec/cli/data/split_motifs/collection.yaml +188 -188
- data/spec/cli/data/split_motifs/plain_collection.txt +38 -38
- data/spec/cli/merge_into_collection_spec.rb +99 -99
- data/spec/cli/pcm2pwm_spec.rb +79 -79
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +17 -17
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -14
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +49 -49
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +4 -4
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +76 -76
- data/spec/data_models/collection_spec.rb +97 -97
- data/spec/data_models/motif_spec.rb +223 -223
- data/spec/data_models/pcm_spec.rb +55 -55
- data/spec/data_models/pm_spec.rb +359 -359
- data/spec/data_models/ppm_spec.rb +7 -7
- data/spec/data_models/pwm_spec.rb +82 -82
- data/spec/fabricators/collection_fabricator.rb +7 -7
- data/spec/fabricators/motif_fabricator.rb +32 -32
- data/spec/fabricators/motif_formats_fabricator.rb +124 -124
- data/spec/fabricators/pcm_fabricator.rb +24 -24
- data/spec/fabricators/pm_fabricator.rb +51 -51
- data/spec/fabricators/ppm_fabricator.rb +13 -13
- data/spec/fabricators/pwm_fabricator.rb +16 -16
- data/spec/parsers/parser_spec.rb +152 -152
- data/spec/parsers/string_fantom_parser_spec.rb +69 -69
- data/spec/parsers/string_parser_spec.rb +76 -76
- data/spec/parsers/trivial_parser_spec.rb +63 -63
- data/spec/parsers/yaml_parser_spec.rb +50 -50
- data/spec/spec_helper.rb +10 -10
- data/spec/spec_helper_source.rb +59 -59
- data/spec/support/advanced_scan_spec.rb +31 -31
- data/spec/support/array_product_spec.rb +14 -14
- data/spec/support/array_zip_spec.rb +14 -14
- data/spec/support/collect_hash_spec.rb +14 -14
- data/spec/support/delete_many_spec.rb +43 -43
- data/spec/support/inverf_spec.rb +18 -18
- data/spec/support/multiline_squish_spec.rb +24 -24
- data/spec/support/partial_sums_spec.rb +30 -30
- data/spec/support/same_by_spec.rb +35 -35
- metadata +3 -3
@@ -1,80 +1,80 @@
|
|
1
|
-
require_relative '../../bioinform'
|
2
|
-
require 'docopt'
|
3
|
-
require 'shellwords'
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
module Bioinform
|
7
|
-
module CLI
|
8
|
-
module MergeIntoCollection
|
9
|
-
extend Bioinform::CLI::Helpers
|
10
|
-
def self.main(argv)
|
11
|
-
doc = <<-DOCOPT
|
12
|
-
Tool for merging multiple motifs into a single collection file.
|
13
|
-
It takes motif files or (entire collections) and creates a collection consisting of them all. By default motifs are treated simply as matrices(PM), but you can (possibly should) specify data model. Output file by default are in YAML-format but it's possible to create plain text file. YAML collections are useful if you want to provide additional information for motifs in collection with another tool, plain text is more readable by humans.
|
14
|
-
|
15
|
-
Usage:
|
16
|
-
merge_into_collection [options] [<pm-files>...]
|
17
|
-
|
18
|
-
Options:
|
19
|
-
-h --help Show this screen.
|
20
|
-
-n --name NAME Specify name for a collection. Default filename is based on this parameter
|
21
|
-
-o --output-file FILE Output file for resulting collection
|
22
|
-
-m --data-model MODEL Data model: PM, PCM, PPM or PWM [default: PM]
|
23
|
-
-p --plain-text Output collection of motifs in plain text (motifs separated with newlines, no additional information included).
|
24
|
-
DOCOPT
|
25
|
-
|
26
|
-
doc.gsub!(/^#{doc[/\A +/]}/,'')
|
27
|
-
options = Docopt::docopt(doc, argv: argv)
|
28
|
-
|
29
|
-
plain_text = options['--plain-text']
|
30
|
-
name = options['--name']
|
31
|
-
if options['--plain-text']
|
32
|
-
output_file = options['--output-file'] || set_extension(name || 'collection', 'txt')
|
33
|
-
else
|
34
|
-
output_file = options['--output-file'] || set_extension(name || 'collection', 'yaml')
|
35
|
-
end
|
36
|
-
data_model = Bioinform.const_get(options['--data-model'].upcase)
|
37
|
-
|
38
|
-
if options['<pm-files>'].empty?
|
39
|
-
filelist = $stdin.read.shellsplit
|
40
|
-
else
|
41
|
-
filelist = options['<pm-files>']
|
42
|
-
end
|
43
|
-
|
44
|
-
filelist = filelist.map do |data_source|
|
45
|
-
if File.directory? data_source
|
46
|
-
Dir.glob(File.join(data_source, '*'))
|
47
|
-
elsif File.file? data_source
|
48
|
-
data_source
|
49
|
-
else
|
50
|
-
raise "File or directory #{data_source} can't be found"
|
51
|
-
end
|
52
|
-
end.flatten
|
53
|
-
|
54
|
-
collection = Collection.new
|
55
|
-
collection.name = name if name
|
56
|
-
|
57
|
-
filelist.each do |filename|
|
58
|
-
data_model.split_on_motifs(File.read(filename)).each do |pm|
|
59
|
-
pm.name ||= File.basename(filename, File.extname(filename))
|
60
|
-
collection << pm
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
if plain_text
|
65
|
-
File.open(output_file, 'w') do |f|
|
66
|
-
collection.each(options['--data-model'].downcase) do |pm|
|
67
|
-
f.puts(pm.to_s + "\n\n")
|
68
|
-
end
|
69
|
-
end
|
70
|
-
else
|
71
|
-
File.open(output_file, 'w'){|f| YAML.dump(collection, f) }
|
72
|
-
end
|
73
|
-
|
74
|
-
rescue Docopt::Exit => e
|
75
|
-
puts e.message
|
76
|
-
end
|
77
|
-
|
78
|
-
end
|
79
|
-
end
|
1
|
+
require_relative '../../bioinform'
|
2
|
+
require 'docopt'
|
3
|
+
require 'shellwords'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
module Bioinform
|
7
|
+
module CLI
|
8
|
+
module MergeIntoCollection
|
9
|
+
extend Bioinform::CLI::Helpers
|
10
|
+
def self.main(argv)
|
11
|
+
doc = <<-DOCOPT
|
12
|
+
Tool for merging multiple motifs into a single collection file.
|
13
|
+
It takes motif files or (entire collections) and creates a collection consisting of them all. By default motifs are treated simply as matrices(PM), but you can (possibly should) specify data model. Output file by default are in YAML-format but it's possible to create plain text file. YAML collections are useful if you want to provide additional information for motifs in collection with another tool, plain text is more readable by humans.
|
14
|
+
|
15
|
+
Usage:
|
16
|
+
merge_into_collection [options] [<pm-files>...]
|
17
|
+
|
18
|
+
Options:
|
19
|
+
-h --help Show this screen.
|
20
|
+
-n --name NAME Specify name for a collection. Default filename is based on this parameter
|
21
|
+
-o --output-file FILE Output file for resulting collection
|
22
|
+
-m --data-model MODEL Data model: PM, PCM, PPM or PWM [default: PM]
|
23
|
+
-p --plain-text Output collection of motifs in plain text (motifs separated with newlines, no additional information included).
|
24
|
+
DOCOPT
|
25
|
+
|
26
|
+
doc.gsub!(/^#{doc[/\A +/]}/,'')
|
27
|
+
options = Docopt::docopt(doc, argv: argv)
|
28
|
+
|
29
|
+
plain_text = options['--plain-text']
|
30
|
+
name = options['--name']
|
31
|
+
if options['--plain-text']
|
32
|
+
output_file = options['--output-file'] || set_extension(name || 'collection', 'txt')
|
33
|
+
else
|
34
|
+
output_file = options['--output-file'] || set_extension(name || 'collection', 'yaml')
|
35
|
+
end
|
36
|
+
data_model = Bioinform.const_get(options['--data-model'].upcase)
|
37
|
+
|
38
|
+
if options['<pm-files>'].empty?
|
39
|
+
filelist = $stdin.read.shellsplit
|
40
|
+
else
|
41
|
+
filelist = options['<pm-files>']
|
42
|
+
end
|
43
|
+
|
44
|
+
filelist = filelist.map do |data_source|
|
45
|
+
if File.directory? data_source
|
46
|
+
Dir.glob(File.join(data_source, '*'))
|
47
|
+
elsif File.file? data_source
|
48
|
+
data_source
|
49
|
+
else
|
50
|
+
raise "File or directory #{data_source} can't be found"
|
51
|
+
end
|
52
|
+
end.flatten
|
53
|
+
|
54
|
+
collection = Collection.new
|
55
|
+
collection.name = name if name
|
56
|
+
|
57
|
+
filelist.each do |filename|
|
58
|
+
data_model.split_on_motifs(File.read(filename)).each do |pm|
|
59
|
+
pm.name ||= File.basename(filename, File.extname(filename))
|
60
|
+
collection << pm
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
if plain_text
|
65
|
+
File.open(output_file, 'w') do |f|
|
66
|
+
collection.each(options['--data-model'].downcase) do |pm|
|
67
|
+
f.puts(pm.to_s + "\n\n")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
else
|
71
|
+
File.open(output_file, 'w'){|f| YAML.dump(collection, f) }
|
72
|
+
end
|
73
|
+
|
74
|
+
rescue Docopt::Exit => e
|
75
|
+
puts e.message
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
80
80
|
end
|
@@ -1,47 +1,47 @@
|
|
1
|
-
require_relative '../../bioinform'
|
2
|
-
require 'docopt'
|
3
|
-
require 'shellwords'
|
4
|
-
|
5
|
-
module Bioinform
|
6
|
-
module CLI
|
7
|
-
module PCM2PWM
|
8
|
-
extend Bioinform::CLI::Helpers
|
9
|
-
def self.main(argv)
|
10
|
-
doc = <<-DOCOPT
|
11
|
-
PCM to PWM converter.
|
12
|
-
It transforms files with PCMs into files with PWMs. Folder for resulting files to save files can be specified. Resulting PWM files have the same name as original file but have another extension (.pwm by default).
|
13
|
-
When filelist is empty, it's obtained from STDIN. One can use it: `ls -b pcm_folder/*.pcm | pcm2pwm` (ls -b option escape spaces in filenames)
|
14
|
-
|
15
|
-
Usage:
|
16
|
-
pcm2pwm [options] [<pcm-files>...]
|
17
|
-
|
18
|
-
Options:
|
19
|
-
-h --help Show this screen.
|
20
|
-
-e --extension EXT Extension of output files [default: pwm]
|
21
|
-
-f --folder FOLDER Where to save output files [default: .]
|
22
|
-
DOCOPT
|
23
|
-
|
24
|
-
doc.gsub!(/^#{doc[/\A +/]}/,'')
|
25
|
-
options = Docopt::docopt(doc, argv: argv)
|
26
|
-
|
27
|
-
pcm_files = options['<pcm-files>']
|
28
|
-
folder = options['--folder']
|
29
|
-
extension = options['--extension']
|
30
|
-
|
31
|
-
Dir.mkdir(folder) unless Dir.exist?(folder)
|
32
|
-
filelist = (pcm_files.empty?) ? $stdin.read.shellsplit : pcm_files
|
33
|
-
|
34
|
-
filelist.each do |filename|
|
35
|
-
pwm = Bioinform::PCM.new( File.read(filename) ).to_pwm
|
36
|
-
File.open(change_folder_and_extension(filename, extension, folder), 'w') do |f|
|
37
|
-
f.puts pwm
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
rescue Docopt::Exit => e
|
42
|
-
puts e.message
|
43
|
-
end
|
44
|
-
|
45
|
-
end
|
46
|
-
end
|
1
|
+
require_relative '../../bioinform'
|
2
|
+
require 'docopt'
|
3
|
+
require 'shellwords'
|
4
|
+
|
5
|
+
module Bioinform
|
6
|
+
module CLI
|
7
|
+
module PCM2PWM
|
8
|
+
extend Bioinform::CLI::Helpers
|
9
|
+
def self.main(argv)
|
10
|
+
doc = <<-DOCOPT
|
11
|
+
PCM to PWM converter.
|
12
|
+
It transforms files with PCMs into files with PWMs. Folder for resulting files to save files can be specified. Resulting PWM files have the same name as original file but have another extension (.pwm by default).
|
13
|
+
When filelist is empty, it's obtained from STDIN. One can use it: `ls -b pcm_folder/*.pcm | pcm2pwm` (ls -b option escape spaces in filenames)
|
14
|
+
|
15
|
+
Usage:
|
16
|
+
pcm2pwm [options] [<pcm-files>...]
|
17
|
+
|
18
|
+
Options:
|
19
|
+
-h --help Show this screen.
|
20
|
+
-e --extension EXT Extension of output files [default: pwm]
|
21
|
+
-f --folder FOLDER Where to save output files [default: .]
|
22
|
+
DOCOPT
|
23
|
+
|
24
|
+
doc.gsub!(/^#{doc[/\A +/]}/,'')
|
25
|
+
options = Docopt::docopt(doc, argv: argv)
|
26
|
+
|
27
|
+
pcm_files = options['<pcm-files>']
|
28
|
+
folder = options['--folder']
|
29
|
+
extension = options['--extension']
|
30
|
+
|
31
|
+
Dir.mkdir(folder) unless Dir.exist?(folder)
|
32
|
+
filelist = (pcm_files.empty?) ? $stdin.read.shellsplit : pcm_files
|
33
|
+
|
34
|
+
filelist.each do |filename|
|
35
|
+
pwm = Bioinform::PCM.new( File.read(filename) ).to_pwm
|
36
|
+
File.open(change_folder_and_extension(filename, extension, folder), 'w') do |f|
|
37
|
+
f.puts pwm
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
rescue Docopt::Exit => e
|
42
|
+
puts e.message
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
47
|
end
|
@@ -1,47 +1,47 @@
|
|
1
|
-
require_relative '../../bioinform'
|
2
|
-
require 'docopt'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
module CLI
|
6
|
-
module SplitMotifs
|
7
|
-
extend Bioinform::CLI::Helpers
|
8
|
-
def self.main(argv)
|
9
|
-
doc = <<-DOCOPT
|
10
|
-
Motif splitter.
|
11
|
-
It get a file with a set of motifs and splits it into motifs according to their names.
|
12
|
-
|
13
|
-
Usage:
|
14
|
-
split_motifs [options] <collection-file>
|
15
|
-
|
16
|
-
Options:
|
17
|
-
-h --help Show this screen.
|
18
|
-
-e --extension EXT Extension of output files
|
19
|
-
-f --folder FOLDER Where to save output files [default: .]
|
20
|
-
DOCOPT
|
21
|
-
|
22
|
-
doc.gsub!(/^#{doc[/\A +/]}/,'')
|
23
|
-
options = Docopt::docopt(doc, argv: argv)
|
24
|
-
|
25
|
-
folder = options['--folder']
|
26
|
-
extension = options['--extension']
|
27
|
-
collection_filename = options['<collection-file>']
|
28
|
-
|
29
|
-
Dir.mkdir(folder) unless Dir.exist?(folder)
|
30
|
-
raise "File #{collection_filename} not exist" unless File.exist? collection_filename
|
31
|
-
|
32
|
-
input = File.read(collection_filename)
|
33
|
-
Parser.choose(input).split.each do |motif|
|
34
|
-
if motif.is_a? PM
|
35
|
-
File.open(set_folder(folder, set_extension(motif.name, extension || motif.class.name.gsub(/^.*::/,'').downcase)), 'w'){|f| f.puts motif}
|
36
|
-
else
|
37
|
-
motif = PM.new(motif)
|
38
|
-
File.open(set_folder(folder, set_extension(motif.name, extension || 'mat')), 'w'){|f| f.puts motif}
|
39
|
-
end
|
40
|
-
end
|
41
|
-
rescue Docopt::Exit => e
|
42
|
-
puts e.message
|
43
|
-
end
|
44
|
-
|
45
|
-
end
|
46
|
-
end
|
1
|
+
require_relative '../../bioinform'
|
2
|
+
require 'docopt'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
module CLI
|
6
|
+
module SplitMotifs
|
7
|
+
extend Bioinform::CLI::Helpers
|
8
|
+
def self.main(argv)
|
9
|
+
doc = <<-DOCOPT
|
10
|
+
Motif splitter.
|
11
|
+
It get a file with a set of motifs and splits it into motifs according to their names.
|
12
|
+
|
13
|
+
Usage:
|
14
|
+
split_motifs [options] <collection-file>
|
15
|
+
|
16
|
+
Options:
|
17
|
+
-h --help Show this screen.
|
18
|
+
-e --extension EXT Extension of output files
|
19
|
+
-f --folder FOLDER Where to save output files [default: .]
|
20
|
+
DOCOPT
|
21
|
+
|
22
|
+
doc.gsub!(/^#{doc[/\A +/]}/,'')
|
23
|
+
options = Docopt::docopt(doc, argv: argv)
|
24
|
+
|
25
|
+
folder = options['--folder']
|
26
|
+
extension = options['--extension']
|
27
|
+
collection_filename = options['<collection-file>']
|
28
|
+
|
29
|
+
Dir.mkdir(folder) unless Dir.exist?(folder)
|
30
|
+
raise "File #{collection_filename} not exist" unless File.exist? collection_filename
|
31
|
+
|
32
|
+
input = File.read(collection_filename)
|
33
|
+
Parser.choose(input).split.each do |motif|
|
34
|
+
if motif.is_a? PM
|
35
|
+
File.open(set_folder(folder, set_extension(motif.name, extension || motif.class.name.gsub(/^.*::/,'').downcase)), 'w'){|f| f.puts motif}
|
36
|
+
else
|
37
|
+
motif = PM.new(motif)
|
38
|
+
File.open(set_folder(folder, set_extension(motif.name, extension || 'mat')), 'w'){|f| f.puts motif}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
rescue Docopt::Exit => e
|
42
|
+
puts e.message
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
47
|
end
|
data/lib/bioinform/cli.rb
CHANGED
@@ -1,30 +1,30 @@
|
|
1
|
-
require_relative 'support'
|
2
|
-
|
3
|
-
module Bioinform
|
4
|
-
module CLI
|
5
|
-
module Helpers
|
6
|
-
def name_wo_extension(filename)
|
7
|
-
File.join(File.dirname(filename), basename_wo_extension(filename))
|
8
|
-
end
|
9
|
-
def basename_wo_extension(filename)
|
10
|
-
File.basename(filename, File.extname(filename))
|
11
|
-
end
|
12
|
-
def set_extension(filename, extension)
|
13
|
-
"#{filename}.#{extension}"
|
14
|
-
end
|
15
|
-
def set_folder(folder, filename)
|
16
|
-
File.join(folder, filename)
|
17
|
-
end
|
18
|
-
def basename_changed_extension(filename, extension)
|
19
|
-
set_extension(basename_wo_extension(filename), extension)
|
20
|
-
end
|
21
|
-
def change_folder_and_extension(input_filename, extension, folder)
|
22
|
-
set_folder(folder, basename_changed_extension(input_filename, extension))
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
require_relative 'cli/merge_into_collection'
|
29
|
-
require_relative 'cli/pcm2pwm'
|
1
|
+
require_relative 'support'
|
2
|
+
|
3
|
+
module Bioinform
|
4
|
+
module CLI
|
5
|
+
module Helpers
|
6
|
+
def name_wo_extension(filename)
|
7
|
+
File.join(File.dirname(filename), basename_wo_extension(filename))
|
8
|
+
end
|
9
|
+
def basename_wo_extension(filename)
|
10
|
+
File.basename(filename, File.extname(filename))
|
11
|
+
end
|
12
|
+
def set_extension(filename, extension)
|
13
|
+
"#{filename}.#{extension}"
|
14
|
+
end
|
15
|
+
def set_folder(folder, filename)
|
16
|
+
File.join(folder, filename)
|
17
|
+
end
|
18
|
+
def basename_changed_extension(filename, extension)
|
19
|
+
set_extension(basename_wo_extension(filename), extension)
|
20
|
+
end
|
21
|
+
def change_folder_and_extension(input_filename, extension, folder)
|
22
|
+
set_folder(folder, basename_changed_extension(input_filename, extension))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
require_relative 'cli/merge_into_collection'
|
29
|
+
require_relative 'cli/pcm2pwm'
|
30
30
|
require_relative 'cli/split_motifs'
|
@@ -1,19 +1,19 @@
|
|
1
|
-
module Bioinform
|
2
|
-
module ConversionAlgorithms
|
3
|
-
module PCM2PPMConverter
|
4
|
-
|
5
|
-
# parameters hash is ignored
|
6
|
-
def self.convert(pcm, parameters = {})
|
7
|
-
matrix = pcm.each_position.map do |pos|
|
8
|
-
pos.map do |el|
|
9
|
-
el.to_f / pcm.count
|
10
|
-
end
|
11
|
-
end
|
12
|
-
PPM.new(pcm.get_parameters.merge(matrix: matrix))
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
|
1
|
+
module Bioinform
|
2
|
+
module ConversionAlgorithms
|
3
|
+
module PCM2PPMConverter
|
4
|
+
|
5
|
+
# parameters hash is ignored
|
6
|
+
def self.convert(pcm, parameters = {})
|
7
|
+
matrix = pcm.each_position.map do |pos|
|
8
|
+
pos.map do |el|
|
9
|
+
el.to_f / pcm.count
|
10
|
+
end
|
11
|
+
end
|
12
|
+
PPM.new(pcm.get_parameters.merge(matrix: matrix))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
19
|
|
@@ -1,20 +1,20 @@
|
|
1
|
-
module Bioinform
|
2
|
-
module ConversionAlgorithms
|
3
|
-
module PCM2PWMConverter
|
4
|
-
def self.convert(pcm, parameters = {})
|
5
|
-
default_parameters = {pseudocount: Math.log(pcm.count),
|
6
|
-
probability: (pcm.probability || [0.25, 0.25, 0.25, 0.25])
|
7
|
-
}
|
8
|
-
parameters = default_parameters.merge(parameters)
|
9
|
-
probability = parameters[:probability]
|
10
|
-
pseudocount = parameters[:pseudocount]
|
11
|
-
matrix = pcm.each_position.map do |pos|
|
12
|
-
pos.each_index.map do |index|
|
13
|
-
Math.log((pos[index] + probability[index] * pseudocount) / (probability[index]*(pcm.count + pseudocount)) )
|
14
|
-
end
|
15
|
-
end
|
16
|
-
PWM.new(pcm.get_parameters.merge(matrix: matrix))
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
1
|
+
module Bioinform
|
2
|
+
module ConversionAlgorithms
|
3
|
+
module PCM2PWMConverter
|
4
|
+
def self.convert(pcm, parameters = {})
|
5
|
+
default_parameters = {pseudocount: Math.log(pcm.count),
|
6
|
+
probability: (pcm.probability || [0.25, 0.25, 0.25, 0.25])
|
7
|
+
}
|
8
|
+
parameters = default_parameters.merge(parameters)
|
9
|
+
probability = parameters[:probability]
|
10
|
+
pseudocount = parameters[:pseudocount]
|
11
|
+
matrix = pcm.each_position.map do |pos|
|
12
|
+
pos.each_index.map do |index|
|
13
|
+
Math.log((pos[index] + probability[index] * pseudocount) / (probability[index]*(pcm.count + pseudocount)) )
|
14
|
+
end
|
15
|
+
end
|
16
|
+
PWM.new(pcm.get_parameters.merge(matrix: matrix))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
20
|
end
|
@@ -1,75 +1,75 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
require_relative 'motif'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
class Collection
|
6
|
-
attr_accessor :container
|
7
|
-
|
8
|
-
include Parameters
|
9
|
-
make_parameters :name
|
10
|
-
|
11
|
-
# collection name is a tag name for each motif in a collection. But motif can be included in several collections so have several tags
|
12
|
-
def initialize(parameters = {})
|
13
|
-
@container = []
|
14
|
-
@parameters = OpenStruct.new(parameters)
|
15
|
-
yield @parameters if block_given?
|
16
|
-
end
|
17
|
-
|
18
|
-
def size
|
19
|
-
container.size
|
20
|
-
end
|
21
|
-
|
22
|
-
def to_s(with_name = true)
|
23
|
-
result = (with_name) ? "Collection: #{name.to_s}\n" : ''
|
24
|
-
each do |pm, infos|
|
25
|
-
result << pm.to_s << "\n\n"
|
26
|
-
end
|
27
|
-
result
|
28
|
-
end
|
29
|
-
|
30
|
-
def +(other)
|
31
|
-
result = self.class.new
|
32
|
-
container.each do |motif|
|
33
|
-
result.container << motif
|
34
|
-
end
|
35
|
-
other.container.each do |motif|
|
36
|
-
result.container << motif
|
37
|
-
end
|
38
|
-
result
|
39
|
-
end
|
40
|
-
|
41
|
-
def add_pm(pm, info)
|
42
|
-
# pm.mark(self)
|
43
|
-
container << Motif.new(info.marshal_dump.merge(pm: pm))
|
44
|
-
#### What if pm already is a Motif
|
45
|
-
self
|
46
|
-
end
|
47
|
-
|
48
|
-
def <<(pm)
|
49
|
-
add_pm(pm, OpenStruct.new)
|
50
|
-
end
|
51
|
-
|
52
|
-
# collection.each{|motif| ... }
|
53
|
-
# collection.each(:pwm, :threshold){|pwm,threshold| }
|
54
|
-
def each(*args)
|
55
|
-
if block_given?
|
56
|
-
if args.empty?
|
57
|
-
container.each{|motif| yield motif}
|
58
|
-
else
|
59
|
-
container.each{|motif| yield( *args.map{|arg| motif.parameters.send(arg)} ) }
|
60
|
-
end
|
61
|
-
else
|
62
|
-
self.to_enum(:each, *args)
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
include Enumerable
|
67
|
-
|
68
|
-
def ==(other)
|
69
|
-
(parameters == other.parameters) && (container == other.container)
|
70
|
-
rescue
|
71
|
-
false
|
72
|
-
end
|
73
|
-
|
74
|
-
end
|
1
|
+
require 'ostruct'
|
2
|
+
require_relative 'motif'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
class Collection
|
6
|
+
attr_accessor :container
|
7
|
+
|
8
|
+
include Parameters
|
9
|
+
make_parameters :name
|
10
|
+
|
11
|
+
# collection name is a tag name for each motif in a collection. But motif can be included in several collections so have several tags
|
12
|
+
def initialize(parameters = {})
|
13
|
+
@container = []
|
14
|
+
@parameters = OpenStruct.new(parameters)
|
15
|
+
yield @parameters if block_given?
|
16
|
+
end
|
17
|
+
|
18
|
+
def size
|
19
|
+
container.size
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s(with_name = true)
|
23
|
+
result = (with_name) ? "Collection: #{name.to_s}\n" : ''
|
24
|
+
each do |pm, infos|
|
25
|
+
result << pm.to_s << "\n\n"
|
26
|
+
end
|
27
|
+
result
|
28
|
+
end
|
29
|
+
|
30
|
+
def +(other)
|
31
|
+
result = self.class.new
|
32
|
+
container.each do |motif|
|
33
|
+
result.container << motif
|
34
|
+
end
|
35
|
+
other.container.each do |motif|
|
36
|
+
result.container << motif
|
37
|
+
end
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
def add_pm(pm, info)
|
42
|
+
# pm.mark(self)
|
43
|
+
container << Motif.new(info.marshal_dump.merge(pm: pm))
|
44
|
+
#### What if pm already is a Motif
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def <<(pm)
|
49
|
+
add_pm(pm, OpenStruct.new)
|
50
|
+
end
|
51
|
+
|
52
|
+
# collection.each{|motif| ... }
|
53
|
+
# collection.each(:pwm, :threshold){|pwm,threshold| }
|
54
|
+
def each(*args)
|
55
|
+
if block_given?
|
56
|
+
if args.empty?
|
57
|
+
container.each{|motif| yield motif}
|
58
|
+
else
|
59
|
+
container.each{|motif| yield( *args.map{|arg| motif.parameters.send(arg)} ) }
|
60
|
+
end
|
61
|
+
else
|
62
|
+
self.to_enum(:each, *args)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
include Enumerable
|
67
|
+
|
68
|
+
def ==(other)
|
69
|
+
(parameters == other.parameters) && (container == other.container)
|
70
|
+
rescue
|
71
|
+
false
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
75
|
end
|