bioinform 0.1.12 → 0.1.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +16 -16
  4. data/LICENSE +21 -21
  5. data/README.md +35 -35
  6. data/Rakefile +4 -4
  7. data/TODO.txt +37 -37
  8. data/bin/merge_into_collection +3 -3
  9. data/bin/pcm2pwm +3 -3
  10. data/bin/split_motifs +3 -3
  11. data/bioinform.gemspec +19 -19
  12. data/lib/bioinform/cli/convert_motif.rb +107 -107
  13. data/lib/bioinform/cli/merge_into_collection.rb +79 -79
  14. data/lib/bioinform/cli/pcm2pwm.rb +46 -46
  15. data/lib/bioinform/cli/split_motifs.rb +46 -46
  16. data/lib/bioinform/cli.rb +29 -29
  17. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +18 -18
  18. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +19 -19
  19. data/lib/bioinform/data_models/collection.rb +74 -74
  20. data/lib/bioinform/data_models/motif.rb +55 -55
  21. data/lib/bioinform/data_models/pcm.rb +23 -23
  22. data/lib/bioinform/data_models/pm.rb +169 -169
  23. data/lib/bioinform/data_models/ppm.rb +9 -9
  24. data/lib/bioinform/data_models/pwm.rb +55 -55
  25. data/lib/bioinform/data_models.rb +10 -10
  26. data/lib/bioinform/formatters/raw_formatter.rb +40 -40
  27. data/lib/bioinform/formatters/transfac_formatter.rb +38 -38
  28. data/lib/bioinform/formatters.rb +1 -1
  29. data/lib/bioinform/parsers/jaspar_parser.rb +34 -34
  30. data/lib/bioinform/parsers/parser.rb +87 -87
  31. data/lib/bioinform/parsers/splittable_parser.rb +56 -56
  32. data/lib/bioinform/parsers/string_fantom_parser.rb +34 -34
  33. data/lib/bioinform/parsers/string_parser.rb +71 -71
  34. data/lib/bioinform/parsers/trivial_parser.rb +33 -33
  35. data/lib/bioinform/parsers/yaml_parser.rb +34 -34
  36. data/lib/bioinform/parsers.rb +6 -6
  37. data/lib/bioinform/support/array_product.rb +5 -5
  38. data/lib/bioinform/support/array_zip.rb +5 -5
  39. data/lib/bioinform/support/collect_hash.rb +6 -6
  40. data/lib/bioinform/support/deep_dup.rb +4 -4
  41. data/lib/bioinform/support/delete_many.rb +13 -13
  42. data/lib/bioinform/support/inverf.rb +12 -12
  43. data/lib/bioinform/support/multiline_squish.rb +5 -5
  44. data/lib/bioinform/support/parameters.rb +27 -27
  45. data/lib/bioinform/support/partial_sums.rb +15 -15
  46. data/lib/bioinform/support/same_by.rb +12 -12
  47. data/lib/bioinform/support/strip_doc.rb +8 -8
  48. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +3 -0
  49. data/lib/bioinform/support.rb +17 -17
  50. data/lib/bioinform/version.rb +3 -3
  51. data/lib/bioinform.rb +10 -10
  52. data/spec/cli/cli_spec.rb +13 -13
  53. data/spec/cli/convert_motif_spec.rb +106 -106
  54. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -14
  55. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +11 -11
  56. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +12 -12
  57. data/spec/cli/data/merge_into_collection/collection.txt.result +40 -40
  58. data/spec/cli/data/merge_into_collection/collection.yaml.result +188 -188
  59. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +188 -188
  60. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -14
  61. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -11
  62. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -12
  63. data/spec/cli/data/pcm2pwm/KLF4 f2 spaced name.pcm +11 -11
  64. data/spec/cli/data/pcm2pwm/KLF4_f2.pcm +11 -11
  65. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -11
  66. data/spec/cli/data/pcm2pwm/SP1_f1.pcm +12 -12
  67. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -12
  68. data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -14
  69. data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -11
  70. data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -12
  71. data/spec/cli/data/split_motifs/collection.yaml +188 -188
  72. data/spec/cli/data/split_motifs/plain_collection.txt +38 -38
  73. data/spec/cli/merge_into_collection_spec.rb +99 -99
  74. data/spec/cli/pcm2pwm_spec.rb +79 -79
  75. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +17 -17
  76. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -14
  77. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +49 -49
  78. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +4 -4
  79. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  80. data/spec/cli/split_motifs_spec.rb +76 -76
  81. data/spec/data_models/collection_spec.rb +97 -97
  82. data/spec/data_models/motif_spec.rb +223 -223
  83. data/spec/data_models/pcm_spec.rb +55 -55
  84. data/spec/data_models/pm_spec.rb +359 -359
  85. data/spec/data_models/ppm_spec.rb +7 -7
  86. data/spec/data_models/pwm_spec.rb +82 -82
  87. data/spec/fabricators/collection_fabricator.rb +7 -7
  88. data/spec/fabricators/motif_fabricator.rb +32 -32
  89. data/spec/fabricators/motif_formats_fabricator.rb +124 -124
  90. data/spec/fabricators/pcm_fabricator.rb +24 -24
  91. data/spec/fabricators/pm_fabricator.rb +51 -51
  92. data/spec/fabricators/ppm_fabricator.rb +13 -13
  93. data/spec/fabricators/pwm_fabricator.rb +16 -16
  94. data/spec/parsers/parser_spec.rb +152 -152
  95. data/spec/parsers/string_fantom_parser_spec.rb +69 -69
  96. data/spec/parsers/string_parser_spec.rb +76 -76
  97. data/spec/parsers/trivial_parser_spec.rb +63 -63
  98. data/spec/parsers/yaml_parser_spec.rb +50 -50
  99. data/spec/spec_helper.rb +10 -10
  100. data/spec/spec_helper_source.rb +59 -59
  101. data/spec/support/advanced_scan_spec.rb +31 -31
  102. data/spec/support/array_product_spec.rb +14 -14
  103. data/spec/support/array_zip_spec.rb +14 -14
  104. data/spec/support/collect_hash_spec.rb +14 -14
  105. data/spec/support/delete_many_spec.rb +43 -43
  106. data/spec/support/inverf_spec.rb +18 -18
  107. data/spec/support/multiline_squish_spec.rb +24 -24
  108. data/spec/support/partial_sums_spec.rb +30 -30
  109. data/spec/support/same_by_spec.rb +35 -35
  110. metadata +3 -3
@@ -1,80 +1,80 @@
1
- require_relative '../../bioinform'
2
- require 'docopt'
3
- require 'shellwords'
4
- require 'yaml'
5
-
6
- module Bioinform
7
- module CLI
8
- module MergeIntoCollection
9
- extend Bioinform::CLI::Helpers
10
- def self.main(argv)
11
- doc = <<-DOCOPT
12
- Tool for merging multiple motifs into a single collection file.
13
- It takes motif files or (entire collections) and creates a collection consisting of them all. By default motifs are treated simply as matrices(PM), but you can (possibly should) specify data model. Output file by default are in YAML-format but it's possible to create plain text file. YAML collections are useful if you want to provide additional information for motifs in collection with another tool, plain text is more readable by humans.
14
-
15
- Usage:
16
- merge_into_collection [options] [<pm-files>...]
17
-
18
- Options:
19
- -h --help Show this screen.
20
- -n --name NAME Specify name for a collection. Default filename is based on this parameter
21
- -o --output-file FILE Output file for resulting collection
22
- -m --data-model MODEL Data model: PM, PCM, PPM or PWM [default: PM]
23
- -p --plain-text Output collection of motifs in plain text (motifs separated with newlines, no additional information included).
24
- DOCOPT
25
-
26
- doc.gsub!(/^#{doc[/\A +/]}/,'')
27
- options = Docopt::docopt(doc, argv: argv)
28
-
29
- plain_text = options['--plain-text']
30
- name = options['--name']
31
- if options['--plain-text']
32
- output_file = options['--output-file'] || set_extension(name || 'collection', 'txt')
33
- else
34
- output_file = options['--output-file'] || set_extension(name || 'collection', 'yaml')
35
- end
36
- data_model = Bioinform.const_get(options['--data-model'].upcase)
37
-
38
- if options['<pm-files>'].empty?
39
- filelist = $stdin.read.shellsplit
40
- else
41
- filelist = options['<pm-files>']
42
- end
43
-
44
- filelist = filelist.map do |data_source|
45
- if File.directory? data_source
46
- Dir.glob(File.join(data_source, '*'))
47
- elsif File.file? data_source
48
- data_source
49
- else
50
- raise "File or directory #{data_source} can't be found"
51
- end
52
- end.flatten
53
-
54
- collection = Collection.new
55
- collection.name = name if name
56
-
57
- filelist.each do |filename|
58
- data_model.split_on_motifs(File.read(filename)).each do |pm|
59
- pm.name ||= File.basename(filename, File.extname(filename))
60
- collection << pm
61
- end
62
- end
63
-
64
- if plain_text
65
- File.open(output_file, 'w') do |f|
66
- collection.each(options['--data-model'].downcase) do |pm|
67
- f.puts(pm.to_s + "\n\n")
68
- end
69
- end
70
- else
71
- File.open(output_file, 'w'){|f| YAML.dump(collection, f) }
72
- end
73
-
74
- rescue Docopt::Exit => e
75
- puts e.message
76
- end
77
-
78
- end
79
- end
1
+ require_relative '../../bioinform'
2
+ require 'docopt'
3
+ require 'shellwords'
4
+ require 'yaml'
5
+
6
+ module Bioinform
7
+ module CLI
8
+ module MergeIntoCollection
9
+ extend Bioinform::CLI::Helpers
10
+ def self.main(argv)
11
+ doc = <<-DOCOPT
12
+ Tool for merging multiple motifs into a single collection file.
13
+ It takes motif files or (entire collections) and creates a collection consisting of them all. By default motifs are treated simply as matrices(PM), but you can (possibly should) specify data model. Output file by default are in YAML-format but it's possible to create plain text file. YAML collections are useful if you want to provide additional information for motifs in collection with another tool, plain text is more readable by humans.
14
+
15
+ Usage:
16
+ merge_into_collection [options] [<pm-files>...]
17
+
18
+ Options:
19
+ -h --help Show this screen.
20
+ -n --name NAME Specify name for a collection. Default filename is based on this parameter
21
+ -o --output-file FILE Output file for resulting collection
22
+ -m --data-model MODEL Data model: PM, PCM, PPM or PWM [default: PM]
23
+ -p --plain-text Output collection of motifs in plain text (motifs separated with newlines, no additional information included).
24
+ DOCOPT
25
+
26
+ doc.gsub!(/^#{doc[/\A +/]}/,'')
27
+ options = Docopt::docopt(doc, argv: argv)
28
+
29
+ plain_text = options['--plain-text']
30
+ name = options['--name']
31
+ if options['--plain-text']
32
+ output_file = options['--output-file'] || set_extension(name || 'collection', 'txt')
33
+ else
34
+ output_file = options['--output-file'] || set_extension(name || 'collection', 'yaml')
35
+ end
36
+ data_model = Bioinform.const_get(options['--data-model'].upcase)
37
+
38
+ if options['<pm-files>'].empty?
39
+ filelist = $stdin.read.shellsplit
40
+ else
41
+ filelist = options['<pm-files>']
42
+ end
43
+
44
+ filelist = filelist.map do |data_source|
45
+ if File.directory? data_source
46
+ Dir.glob(File.join(data_source, '*'))
47
+ elsif File.file? data_source
48
+ data_source
49
+ else
50
+ raise "File or directory #{data_source} can't be found"
51
+ end
52
+ end.flatten
53
+
54
+ collection = Collection.new
55
+ collection.name = name if name
56
+
57
+ filelist.each do |filename|
58
+ data_model.split_on_motifs(File.read(filename)).each do |pm|
59
+ pm.name ||= File.basename(filename, File.extname(filename))
60
+ collection << pm
61
+ end
62
+ end
63
+
64
+ if plain_text
65
+ File.open(output_file, 'w') do |f|
66
+ collection.each(options['--data-model'].downcase) do |pm|
67
+ f.puts(pm.to_s + "\n\n")
68
+ end
69
+ end
70
+ else
71
+ File.open(output_file, 'w'){|f| YAML.dump(collection, f) }
72
+ end
73
+
74
+ rescue Docopt::Exit => e
75
+ puts e.message
76
+ end
77
+
78
+ end
79
+ end
80
80
  end
@@ -1,47 +1,47 @@
1
- require_relative '../../bioinform'
2
- require 'docopt'
3
- require 'shellwords'
4
-
5
- module Bioinform
6
- module CLI
7
- module PCM2PWM
8
- extend Bioinform::CLI::Helpers
9
- def self.main(argv)
10
- doc = <<-DOCOPT
11
- PCM to PWM converter.
12
- It transforms files with PCMs into files with PWMs. Folder for resulting files to save files can be specified. Resulting PWM files have the same name as original file but have another extension (.pwm by default).
13
- When filelist is empty, it's obtained from STDIN. One can use it: `ls -b pcm_folder/*.pcm | pcm2pwm` (ls -b option escape spaces in filenames)
14
-
15
- Usage:
16
- pcm2pwm [options] [<pcm-files>...]
17
-
18
- Options:
19
- -h --help Show this screen.
20
- -e --extension EXT Extension of output files [default: pwm]
21
- -f --folder FOLDER Where to save output files [default: .]
22
- DOCOPT
23
-
24
- doc.gsub!(/^#{doc[/\A +/]}/,'')
25
- options = Docopt::docopt(doc, argv: argv)
26
-
27
- pcm_files = options['<pcm-files>']
28
- folder = options['--folder']
29
- extension = options['--extension']
30
-
31
- Dir.mkdir(folder) unless Dir.exist?(folder)
32
- filelist = (pcm_files.empty?) ? $stdin.read.shellsplit : pcm_files
33
-
34
- filelist.each do |filename|
35
- pwm = Bioinform::PCM.new( File.read(filename) ).to_pwm
36
- File.open(change_folder_and_extension(filename, extension, folder), 'w') do |f|
37
- f.puts pwm
38
- end
39
- end
40
-
41
- rescue Docopt::Exit => e
42
- puts e.message
43
- end
44
-
45
- end
46
- end
1
+ require_relative '../../bioinform'
2
+ require 'docopt'
3
+ require 'shellwords'
4
+
5
+ module Bioinform
6
+ module CLI
7
+ module PCM2PWM
8
+ extend Bioinform::CLI::Helpers
9
+ def self.main(argv)
10
+ doc = <<-DOCOPT
11
+ PCM to PWM converter.
12
+ It transforms files with PCMs into files with PWMs. Folder for resulting files to save files can be specified. Resulting PWM files have the same name as original file but have another extension (.pwm by default).
13
+ When filelist is empty, it's obtained from STDIN. One can use it: `ls -b pcm_folder/*.pcm | pcm2pwm` (ls -b option escape spaces in filenames)
14
+
15
+ Usage:
16
+ pcm2pwm [options] [<pcm-files>...]
17
+
18
+ Options:
19
+ -h --help Show this screen.
20
+ -e --extension EXT Extension of output files [default: pwm]
21
+ -f --folder FOLDER Where to save output files [default: .]
22
+ DOCOPT
23
+
24
+ doc.gsub!(/^#{doc[/\A +/]}/,'')
25
+ options = Docopt::docopt(doc, argv: argv)
26
+
27
+ pcm_files = options['<pcm-files>']
28
+ folder = options['--folder']
29
+ extension = options['--extension']
30
+
31
+ Dir.mkdir(folder) unless Dir.exist?(folder)
32
+ filelist = (pcm_files.empty?) ? $stdin.read.shellsplit : pcm_files
33
+
34
+ filelist.each do |filename|
35
+ pwm = Bioinform::PCM.new( File.read(filename) ).to_pwm
36
+ File.open(change_folder_and_extension(filename, extension, folder), 'w') do |f|
37
+ f.puts pwm
38
+ end
39
+ end
40
+
41
+ rescue Docopt::Exit => e
42
+ puts e.message
43
+ end
44
+
45
+ end
46
+ end
47
47
  end
@@ -1,47 +1,47 @@
1
- require_relative '../../bioinform'
2
- require 'docopt'
3
-
4
- module Bioinform
5
- module CLI
6
- module SplitMotifs
7
- extend Bioinform::CLI::Helpers
8
- def self.main(argv)
9
- doc = <<-DOCOPT
10
- Motif splitter.
11
- It get a file with a set of motifs and splits it into motifs according to their names.
12
-
13
- Usage:
14
- split_motifs [options] <collection-file>
15
-
16
- Options:
17
- -h --help Show this screen.
18
- -e --extension EXT Extension of output files
19
- -f --folder FOLDER Where to save output files [default: .]
20
- DOCOPT
21
-
22
- doc.gsub!(/^#{doc[/\A +/]}/,'')
23
- options = Docopt::docopt(doc, argv: argv)
24
-
25
- folder = options['--folder']
26
- extension = options['--extension']
27
- collection_filename = options['<collection-file>']
28
-
29
- Dir.mkdir(folder) unless Dir.exist?(folder)
30
- raise "File #{collection_filename} not exist" unless File.exist? collection_filename
31
-
32
- input = File.read(collection_filename)
33
- Parser.choose(input).split.each do |motif|
34
- if motif.is_a? PM
35
- File.open(set_folder(folder, set_extension(motif.name, extension || motif.class.name.gsub(/^.*::/,'').downcase)), 'w'){|f| f.puts motif}
36
- else
37
- motif = PM.new(motif)
38
- File.open(set_folder(folder, set_extension(motif.name, extension || 'mat')), 'w'){|f| f.puts motif}
39
- end
40
- end
41
- rescue Docopt::Exit => e
42
- puts e.message
43
- end
44
-
45
- end
46
- end
1
+ require_relative '../../bioinform'
2
+ require 'docopt'
3
+
4
+ module Bioinform
5
+ module CLI
6
+ module SplitMotifs
7
+ extend Bioinform::CLI::Helpers
8
+ def self.main(argv)
9
+ doc = <<-DOCOPT
10
+ Motif splitter.
11
+ It get a file with a set of motifs and splits it into motifs according to their names.
12
+
13
+ Usage:
14
+ split_motifs [options] <collection-file>
15
+
16
+ Options:
17
+ -h --help Show this screen.
18
+ -e --extension EXT Extension of output files
19
+ -f --folder FOLDER Where to save output files [default: .]
20
+ DOCOPT
21
+
22
+ doc.gsub!(/^#{doc[/\A +/]}/,'')
23
+ options = Docopt::docopt(doc, argv: argv)
24
+
25
+ folder = options['--folder']
26
+ extension = options['--extension']
27
+ collection_filename = options['<collection-file>']
28
+
29
+ Dir.mkdir(folder) unless Dir.exist?(folder)
30
+ raise "File #{collection_filename} not exist" unless File.exist? collection_filename
31
+
32
+ input = File.read(collection_filename)
33
+ Parser.choose(input).split.each do |motif|
34
+ if motif.is_a? PM
35
+ File.open(set_folder(folder, set_extension(motif.name, extension || motif.class.name.gsub(/^.*::/,'').downcase)), 'w'){|f| f.puts motif}
36
+ else
37
+ motif = PM.new(motif)
38
+ File.open(set_folder(folder, set_extension(motif.name, extension || 'mat')), 'w'){|f| f.puts motif}
39
+ end
40
+ end
41
+ rescue Docopt::Exit => e
42
+ puts e.message
43
+ end
44
+
45
+ end
46
+ end
47
47
  end
data/lib/bioinform/cli.rb CHANGED
@@ -1,30 +1,30 @@
1
- require_relative 'support'
2
-
3
- module Bioinform
4
- module CLI
5
- module Helpers
6
- def name_wo_extension(filename)
7
- File.join(File.dirname(filename), basename_wo_extension(filename))
8
- end
9
- def basename_wo_extension(filename)
10
- File.basename(filename, File.extname(filename))
11
- end
12
- def set_extension(filename, extension)
13
- "#{filename}.#{extension}"
14
- end
15
- def set_folder(folder, filename)
16
- File.join(folder, filename)
17
- end
18
- def basename_changed_extension(filename, extension)
19
- set_extension(basename_wo_extension(filename), extension)
20
- end
21
- def change_folder_and_extension(input_filename, extension, folder)
22
- set_folder(folder, basename_changed_extension(input_filename, extension))
23
- end
24
- end
25
- end
26
- end
27
-
28
- require_relative 'cli/merge_into_collection'
29
- require_relative 'cli/pcm2pwm'
1
+ require_relative 'support'
2
+
3
+ module Bioinform
4
+ module CLI
5
+ module Helpers
6
+ def name_wo_extension(filename)
7
+ File.join(File.dirname(filename), basename_wo_extension(filename))
8
+ end
9
+ def basename_wo_extension(filename)
10
+ File.basename(filename, File.extname(filename))
11
+ end
12
+ def set_extension(filename, extension)
13
+ "#{filename}.#{extension}"
14
+ end
15
+ def set_folder(folder, filename)
16
+ File.join(folder, filename)
17
+ end
18
+ def basename_changed_extension(filename, extension)
19
+ set_extension(basename_wo_extension(filename), extension)
20
+ end
21
+ def change_folder_and_extension(input_filename, extension, folder)
22
+ set_folder(folder, basename_changed_extension(input_filename, extension))
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ require_relative 'cli/merge_into_collection'
29
+ require_relative 'cli/pcm2pwm'
30
30
  require_relative 'cli/split_motifs'
@@ -1,19 +1,19 @@
1
- module Bioinform
2
- module ConversionAlgorithms
3
- module PCM2PPMConverter
4
-
5
- # parameters hash is ignored
6
- def self.convert(pcm, parameters = {})
7
- matrix = pcm.each_position.map do |pos|
8
- pos.map do |el|
9
- el.to_f / pcm.count
10
- end
11
- end
12
- PPM.new(pcm.get_parameters.merge(matrix: matrix))
13
- end
14
- end
15
- end
16
- end
17
-
18
-
1
+ module Bioinform
2
+ module ConversionAlgorithms
3
+ module PCM2PPMConverter
4
+
5
+ # parameters hash is ignored
6
+ def self.convert(pcm, parameters = {})
7
+ matrix = pcm.each_position.map do |pos|
8
+ pos.map do |el|
9
+ el.to_f / pcm.count
10
+ end
11
+ end
12
+ PPM.new(pcm.get_parameters.merge(matrix: matrix))
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+
19
19
 
@@ -1,20 +1,20 @@
1
- module Bioinform
2
- module ConversionAlgorithms
3
- module PCM2PWMConverter
4
- def self.convert(pcm, parameters = {})
5
- default_parameters = {pseudocount: Math.log(pcm.count),
6
- probability: (pcm.probability || [0.25, 0.25, 0.25, 0.25])
7
- }
8
- parameters = default_parameters.merge(parameters)
9
- probability = parameters[:probability]
10
- pseudocount = parameters[:pseudocount]
11
- matrix = pcm.each_position.map do |pos|
12
- pos.each_index.map do |index|
13
- Math.log((pos[index] + probability[index] * pseudocount) / (probability[index]*(pcm.count + pseudocount)) )
14
- end
15
- end
16
- PWM.new(pcm.get_parameters.merge(matrix: matrix))
17
- end
18
- end
19
- end
1
+ module Bioinform
2
+ module ConversionAlgorithms
3
+ module PCM2PWMConverter
4
+ def self.convert(pcm, parameters = {})
5
+ default_parameters = {pseudocount: Math.log(pcm.count),
6
+ probability: (pcm.probability || [0.25, 0.25, 0.25, 0.25])
7
+ }
8
+ parameters = default_parameters.merge(parameters)
9
+ probability = parameters[:probability]
10
+ pseudocount = parameters[:pseudocount]
11
+ matrix = pcm.each_position.map do |pos|
12
+ pos.each_index.map do |index|
13
+ Math.log((pos[index] + probability[index] * pseudocount) / (probability[index]*(pcm.count + pseudocount)) )
14
+ end
15
+ end
16
+ PWM.new(pcm.get_parameters.merge(matrix: matrix))
17
+ end
18
+ end
19
+ end
20
20
  end
@@ -1,75 +1,75 @@
1
- require 'ostruct'
2
- require_relative 'motif'
3
-
4
- module Bioinform
5
- class Collection
6
- attr_accessor :container
7
-
8
- include Parameters
9
- make_parameters :name
10
-
11
- # collection name is a tag name for each motif in a collection. But motif can be included in several collections so have several tags
12
- def initialize(parameters = {})
13
- @container = []
14
- @parameters = OpenStruct.new(parameters)
15
- yield @parameters if block_given?
16
- end
17
-
18
- def size
19
- container.size
20
- end
21
-
22
- def to_s(with_name = true)
23
- result = (with_name) ? "Collection: #{name.to_s}\n" : ''
24
- each do |pm, infos|
25
- result << pm.to_s << "\n\n"
26
- end
27
- result
28
- end
29
-
30
- def +(other)
31
- result = self.class.new
32
- container.each do |motif|
33
- result.container << motif
34
- end
35
- other.container.each do |motif|
36
- result.container << motif
37
- end
38
- result
39
- end
40
-
41
- def add_pm(pm, info)
42
- # pm.mark(self)
43
- container << Motif.new(info.marshal_dump.merge(pm: pm))
44
- #### What if pm already is a Motif
45
- self
46
- end
47
-
48
- def <<(pm)
49
- add_pm(pm, OpenStruct.new)
50
- end
51
-
52
- # collection.each{|motif| ... }
53
- # collection.each(:pwm, :threshold){|pwm,threshold| }
54
- def each(*args)
55
- if block_given?
56
- if args.empty?
57
- container.each{|motif| yield motif}
58
- else
59
- container.each{|motif| yield( *args.map{|arg| motif.parameters.send(arg)} ) }
60
- end
61
- else
62
- self.to_enum(:each, *args)
63
- end
64
- end
65
-
66
- include Enumerable
67
-
68
- def ==(other)
69
- (parameters == other.parameters) && (container == other.container)
70
- rescue
71
- false
72
- end
73
-
74
- end
1
+ require 'ostruct'
2
+ require_relative 'motif'
3
+
4
+ module Bioinform
5
+ class Collection
6
+ attr_accessor :container
7
+
8
+ include Parameters
9
+ make_parameters :name
10
+
11
+ # collection name is a tag name for each motif in a collection. But motif can be included in several collections so have several tags
12
+ def initialize(parameters = {})
13
+ @container = []
14
+ @parameters = OpenStruct.new(parameters)
15
+ yield @parameters if block_given?
16
+ end
17
+
18
+ def size
19
+ container.size
20
+ end
21
+
22
+ def to_s(with_name = true)
23
+ result = (with_name) ? "Collection: #{name.to_s}\n" : ''
24
+ each do |pm, infos|
25
+ result << pm.to_s << "\n\n"
26
+ end
27
+ result
28
+ end
29
+
30
+ def +(other)
31
+ result = self.class.new
32
+ container.each do |motif|
33
+ result.container << motif
34
+ end
35
+ other.container.each do |motif|
36
+ result.container << motif
37
+ end
38
+ result
39
+ end
40
+
41
+ def add_pm(pm, info)
42
+ # pm.mark(self)
43
+ container << Motif.new(info.marshal_dump.merge(pm: pm))
44
+ #### What if pm already is a Motif
45
+ self
46
+ end
47
+
48
+ def <<(pm)
49
+ add_pm(pm, OpenStruct.new)
50
+ end
51
+
52
+ # collection.each{|motif| ... }
53
+ # collection.each(:pwm, :threshold){|pwm,threshold| }
54
+ def each(*args)
55
+ if block_given?
56
+ if args.empty?
57
+ container.each{|motif| yield motif}
58
+ else
59
+ container.each{|motif| yield( *args.map{|arg| motif.parameters.send(arg)} ) }
60
+ end
61
+ else
62
+ self.to_enum(:each, *args)
63
+ end
64
+ end
65
+
66
+ include Enumerable
67
+
68
+ def ==(other)
69
+ (parameters == other.parameters) && (container == other.container)
70
+ rescue
71
+ false
72
+ end
73
+
74
+ end
75
75
  end