macroape 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +4 -4
  4. data/LICENSE +22 -22
  5. data/README.md +70 -70
  6. data/Rakefile.rb +49 -49
  7. data/TODO.txt +46 -46
  8. data/benchmark/benchmark_helper.rb +4 -4
  9. data/benchmark/similarity_benchmark.rb +52 -52
  10. data/bin/align_motifs +4 -4
  11. data/bin/eval_alignment +4 -4
  12. data/bin/eval_similarity +4 -4
  13. data/bin/find_pvalue +4 -4
  14. data/bin/find_threshold +4 -4
  15. data/bin/preprocess_collection +4 -4
  16. data/bin/scan_collection +4 -4
  17. data/lib/macroape.rb +14 -11
  18. data/lib/macroape/aligned_pair_intersection.rb +61 -62
  19. data/lib/macroape/cli.rb +191 -188
  20. data/lib/macroape/cli/align_motifs.rb +120 -100
  21. data/lib/macroape/cli/eval_alignment.rb +157 -156
  22. data/lib/macroape/cli/eval_similarity.rb +138 -137
  23. data/lib/macroape/cli/find_pvalue.rb +93 -87
  24. data/lib/macroape/cli/find_threshold.rb +103 -96
  25. data/lib/macroape/cli/preprocess_collection.rb +169 -161
  26. data/lib/macroape/cli/scan_collection.rb +171 -163
  27. data/lib/macroape/collection.rb +29 -0
  28. data/lib/macroape/motif_with_thresholds.rb +18 -0
  29. data/lib/macroape/pwm_compare.rb +39 -44
  30. data/lib/macroape/pwm_compare_aligned.rb +139 -130
  31. data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
  32. data/lib/macroape/support/inverf.rb +13 -0
  33. data/lib/macroape/support/partial_sums.rb +17 -0
  34. data/lib/macroape/version.rb +4 -4
  35. data/macroape.gemspec +19 -19
  36. data/spec/count_distribution_spec.rb +112 -109
  37. data/spec/inverf_spec.rb +23 -0
  38. data/spec/partial_sums_spec.rb +28 -0
  39. data/spec/spec_helper.rb +11 -11
  40. data/test/align_motifs_test.rb +42 -43
  41. data/test/data/AHR_si.pwm +10 -10
  42. data/test/data/KLF3_f1.pcm +16 -16
  43. data/test/data/KLF3_f1.pwm +16 -16
  44. data/test/data/KLF4_f2.pcm +11 -11
  45. data/test/data/KLF4_f2.pwm +11 -11
  46. data/test/data/KLF4_f2_scan_results_all.txt +2 -2
  47. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
  48. data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
  49. data/test/data/SP1_f1.pcm +12 -12
  50. data/test/data/SP1_f1.pwm +12 -12
  51. data/test/data/SP1_f1_revcomp.pcm +12 -12
  52. data/test/data/SP1_f1_revcomp.pwm +12 -12
  53. data/test/data/medium_motif.pwm +8 -8
  54. data/test/data/short_motif.pwm +7 -7
  55. data/test/data/test_collection.yaml +231 -214
  56. data/test/data/test_collection/GABPA_f1.pwm +14 -14
  57. data/test/data/test_collection/KLF4_f2.pwm +10 -10
  58. data/test/data/test_collection/SP1_f1.pwm +12 -12
  59. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
  60. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
  61. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
  62. data/test/data/test_collection_single_file.txt +38 -38
  63. data/test/data/test_collection_single_file_pcm.txt +37 -37
  64. data/test/data/test_collection_weak.yaml +231 -214
  65. data/test/eval_alignment_test.rb +90 -111
  66. data/test/eval_similarity_test.rb +105 -123
  67. data/test/find_pvalue_test.rb +34 -39
  68. data/test/find_threshold_test.rb +87 -91
  69. data/test/preprocess_collection_test.rb +56 -65
  70. data/test/scan_collection_test.rb +42 -48
  71. data/test/test_helper.rb +159 -160
  72. metadata +14 -10
  73. data/test/data/collection_pcm_without_thresholds.yaml +0 -188
  74. data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,100 +1,120 @@
1
- require_relative '../../macroape'
2
- require 'shellwords'
3
-
4
- module Macroape
5
- module CLI
6
- module AlignMotifs
7
-
8
- def self.main(argv)
9
- doc = <<-EOS.strip_doc
10
- Align motifs tool.
11
- It takes motifs and builds alignment of each motif to the first (leader) motif.
12
-
13
- Output has format:
14
- pwm_file_1 shift_1 orientation_1
15
- pwm_file_2 shift_2 orientation_2
16
- pwm_file_3 shift_3 orientation_3
17
-
18
- Usage:
19
- #{run_tool_cmd} [options] <leader pm> <rest pm files>...
20
- or
21
- ls rest_pms/*.pm | #{run_tool_cmd} [options]
22
- or
23
- ls rest_pms/*.pm | #{run_tool_cmd} [options] <leader pm>
24
-
25
- Options:
26
- [-p <P-value>]
27
- [-d <discretization level>]
28
- [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
29
- [--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
30
- [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
31
- EOS
32
-
33
- if (argv.empty? && $stdin.tty?) || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
34
- $stderr.puts doc
35
- exit
36
- end
37
-
38
- leader_background = [1,1,1,1]
39
- rest_motifs_background = [1,1,1,1]
40
- discretization = 1
41
- pvalue = 0.0005
42
- max_hash_size = 10000000
43
- max_pair_hash_size = 10000
44
- pvalue_boundary = :upper
45
-
46
- data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
47
-
48
- while argv.first && argv.first.start_with?('-')
49
- case argv.shift
50
- when '-p'
51
- pvalue = argv.shift.to_f
52
- when '-d'
53
- discretization = argv.shift.to_f
54
- when '--max-hash-size'
55
- max_hash_size = argv.shift.to_i
56
- when '--max-2d-hash-size'
57
- max_pair_hash_size = argv.shift.to_i
58
- when '-b'
59
- rest_motifs_background = leader_background = argv.shift.split(',').map(&:to_f)
60
- when '-b1'
61
- leader_background = argv.shift.split(',').map(&:to_f)
62
- when '-b2'
63
- rest_motifs_background = argv.shift.split(',').map(&:to_f)
64
- when '--boundary'
65
- pvalue_boundary = argv.shift.to_sym
66
- raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
67
- end
68
- end
69
-
70
- pwm_files = argv
71
- pwm_files += $stdin.read.shellsplit unless $stdin.tty?
72
- leader_pwm_file = pwm_files.first
73
- rest_pwm_files = pwm_files[1..-1]
74
- rest_pwm_files.reject!{|filename| File.expand_path(filename) == File.expand_path(leader_pwm_file)}
75
-
76
- raise 'Specify leader file' unless leader_pwm_file
77
-
78
- shifts = []
79
- shifts << [leader_pwm_file, 0, :direct]
80
- pwm_first = data_model.new(File.read(leader_pwm_file)).set_parameters(background: leader_background).to_pwm
81
- pwm_first.set_parameters(background: leader_background, max_hash_size: max_hash_size).discrete!(discretization)
82
-
83
- rest_pwm_files.each do |motif_name|
84
- pwm_second = data_model.new(File.read(motif_name)).set_parameters(background: rest_motifs_background).to_pwm
85
- pwm_second.set_parameters(background: rest_motifs_background, max_hash_size: max_hash_size).discrete!(discretization)
86
- cmp = Macroape::PWMCompare.new(pwm_first, pwm_second).set_parameters(max_pair_hash_size: max_pair_hash_size)
87
- info = cmp.jaccard_by_pvalue(pvalue)
88
- shifts << [motif_name, info[:shift], info[:orientation]]
89
- end
90
-
91
- shifts.each do |motif_name, shift,orientation|
92
- puts "#{motif_name}\t#{shift}\t#{orientation}"
93
- end
94
- rescue => err
95
- $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
96
- end
97
-
98
- end
99
- end
100
- end
1
+ require_relative '../../macroape'
2
+ require 'shellwords'
3
+
4
+ module Macroape
5
+ module CLI
6
+ module AlignMotifs
7
+
8
+ def self.main(argv)
9
+ doc = <<-EOS.strip_doc
10
+ Align motifs tool.
11
+ It takes motifs and builds alignment of each motif to the first (leader) motif.
12
+
13
+ Output has format:
14
+ pwm_file_1 shift_1 orientation_1
15
+ pwm_file_2 shift_2 orientation_2
16
+ pwm_file_3 shift_3 orientation_3
17
+
18
+ Usage:
19
+ #{run_tool_cmd} [options] <leader pm> <rest pm files>...
20
+ or
21
+ ls rest_pms/*.pm | #{run_tool_cmd} [options]
22
+ or
23
+ ls rest_pms/*.pm | #{run_tool_cmd} [options] <leader pm>
24
+
25
+ Options:
26
+ [-p <P-value>]
27
+ [-d <discretization level>]
28
+ [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
29
+ [--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
30
+ [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
31
+ EOS
32
+
33
+ if (argv.empty? && $stdin.tty?) || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
34
+ $stderr.puts doc
35
+ exit
36
+ end
37
+
38
+ leader_background = Bioinform::Background::Wordwise
39
+ rest_motifs_background = Bioinform::Background::Wordwise
40
+ discretization = 1
41
+ pvalue = 0.0005
42
+ max_hash_size = 10000000
43
+ max_pair_hash_size = 10000
44
+ pvalue_boundary = :upper
45
+
46
+ data_model = argv.delete('--pcm') ? :pcm : :pwm
47
+
48
+ while argv.first && argv.first.start_with?('-')
49
+ case argv.shift
50
+ when '-p'
51
+ pvalue = argv.shift.to_f
52
+ when '-d'
53
+ discretization = argv.shift.to_f
54
+ when '--max-hash-size'
55
+ max_hash_size = argv.shift.to_i
56
+ when '--max-2d-hash-size'
57
+ max_pair_hash_size = argv.shift.to_i
58
+ when '-b'
59
+ rest_motifs_background = leader_background = Bioinform::Background.from_string(argv.shift)
60
+ when '-b1'
61
+ leader_background = Bioinform::Background.from_string(argv.shift)
62
+ when '-b2'
63
+ rest_motifs_background = Bioinform::Background.from_string(argv.shift)
64
+ when '--boundary'
65
+ pvalue_boundary = argv.shift.to_sym
66
+ raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
67
+ end
68
+ end
69
+
70
+ pwm_files = argv
71
+ pwm_files += $stdin.read.shellsplit unless $stdin.tty?
72
+ leader_pwm_file = pwm_files.first
73
+ rest_pwm_files = pwm_files[1..-1]
74
+ rest_pwm_files.reject!{|filename| File.expand_path(filename) == File.expand_path(leader_pwm_file)}
75
+
76
+ raise 'Specify leader file' unless leader_pwm_file
77
+
78
+ shifts = []
79
+ shifts << [leader_pwm_file, 0, :direct]
80
+
81
+ input_first = File.read(leader_pwm_file)
82
+ input_first = Bioinform::MatrixParser.new.parse!(input_first)
83
+ case data_model
84
+ when :pcm
85
+ pcm_first = Bioinform::MotifModel::PCM.new(input_first[:matrix]).named(input_first[:name])
86
+ pwm_first = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: leader_background).convert(pcm_first)
87
+ when :pwm
88
+ pwm_first = Bioinform::MotifModel::PWM.new(input_first[:matrix]).named(input_first[:name])
89
+ end
90
+
91
+ pwm_first = pwm_first.discreted(discretization)
92
+ counting_first = PWMCounting.new(pwm_first, background: leader_background, max_hash_size: max_hash_size)
93
+
94
+ rest_pwm_files.each do |motif_name|
95
+ input_second = File.read(motif_name)
96
+ input_second = Bioinform::MatrixParser.new.parse!(input_second)
97
+ case data_model
98
+ when :pcm
99
+ pcm_second = Bioinform::MotifModel::PCM.new(input_second[:matrix]).named(input_second[:name])
100
+ pwm_second = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: rest_motifs_background).convert(pcm_second)
101
+ when :pwm
102
+ pwm_second = Bioinform::MotifModel::PWM.new(input_second[:matrix]).named(input_second[:name])
103
+ end
104
+ pwm_second = pwm_second.discreted(discretization)
105
+ counting_second = PWMCounting.new(pwm_second, background: rest_motifs_background, max_hash_size: max_hash_size)
106
+ cmp = Macroape::PWMCompare.new(counting_first, counting_second).tap{|x| x.max_pair_hash_size = max_pair_hash_size }
107
+ info = cmp.jaccard_by_pvalue(pvalue)
108
+ shifts << [motif_name, info[:shift], info[:orientation]]
109
+ end
110
+
111
+ shifts.each do |motif_name, shift,orientation|
112
+ puts "#{motif_name}\t#{shift}\t#{orientation}"
113
+ end
114
+ rescue => err
115
+ $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
116
+ end
117
+
118
+ end
119
+ end
120
+ end
@@ -1,156 +1,157 @@
1
- require_relative '../../macroape'
2
-
3
- module Macroape
4
- module CLI
5
- module EvalAlignment
6
-
7
- def self.main(argv)
8
- doc = <<-EOS.strip_doc
9
- Command-line format:
10
- #{run_tool_cmd} <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
11
-
12
- Options:
13
- [-p <P-value>]
14
- [-d <discretization level>]
15
- [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
16
- [--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
17
- [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
18
- [--first-threshold <threshold for the first matrix>]
19
- [--second-threshold <threshold for the second matrix>]
20
-
21
- Examples:
22
- #{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.3,0.2,0.2,0.3
23
- #{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat 3 revcomp
24
- EOS
25
-
26
- if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
27
- $stderr.puts doc
28
- exit
29
- end
30
-
31
- pvalue = 0.0005
32
- discretization = 10.0
33
-
34
- first_background = [1,1,1,1]
35
- second_background = [1,1,1,1]
36
- max_hash_size = 10000000
37
- max_pair_hash_size = 10000
38
- pvalue_boundary = :upper
39
-
40
- data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
41
-
42
- first_file = argv.shift
43
- second_file = argv.shift
44
-
45
- shift = argv.shift
46
- orientation = argv.shift
47
-
48
- raise 'You should specify two input sources (each is filename or .stdin)' unless first_file and second_file
49
- raise 'You should specify shift' unless shift
50
- raise 'You should specify orientation' unless orientation
51
-
52
- shift = shift.to_i
53
- orientation = orientation.to_sym
54
-
55
- case orientation
56
- when :direct
57
- reverse = false
58
- when :revcomp
59
- reverse = true
60
- else
61
- raise 'Unknown orientation(direct/revcomp)'
62
- end
63
-
64
-
65
- until argv.empty?
66
- case argv.shift
67
- when '-p'
68
- pvalue = argv.shift.to_f
69
- when '-d'
70
- discretization = argv.shift.to_f
71
- when '--max-hash-size'
72
- max_hash_size = argv.shift.to_i
73
- when '--max-2d-hash-size'
74
- max_pair_hash_size = argv.shift.to_i
75
- when '-b'
76
- second_background = first_background = argv.shift.split(',').map(&:to_f)
77
- when '-b1'
78
- first_background = argv.shift.split(',').map(&:to_f)
79
- when '-b2'
80
- second_background = argv.shift.split(',').map(&:to_f)
81
- when '--boundary'
82
- pvalue_boundary = argv.shift.to_sym
83
- raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
84
- when '--first-threshold'
85
- predefined_threshold_first = argv.shift.to_f
86
- when '--second-threshold'
87
- predefined_threshold_second = argv.shift.to_f
88
- end
89
- end
90
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
91
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
92
-
93
- if first_file == '.stdin' || second_file == '.stdin'
94
- input = $stdin.read
95
- parser = data_model.choose_parser(input).new(input)
96
- end
97
-
98
- if first_file == '.stdin'
99
- input_first = parser.parse
100
- else
101
- raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
102
- input_first = File.read(first_file)
103
- end
104
- pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
105
-
106
- if second_file == '.stdin'
107
- input_second = parser.parse
108
- else
109
- raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
110
- input_second = File.read(second_file)
111
- end
112
- pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
113
-
114
- pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
115
- pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
116
-
117
- cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation).set_parameters(max_pair_hash_size: max_pair_hash_size)
118
-
119
- if predefined_threshold_first
120
- threshold_first = predefined_threshold_first * discretization
121
- else
122
- if pvalue_boundary == :lower
123
- threshold_first = pwm_first.threshold(pvalue)
124
- else
125
- threshold_first = pwm_first.weak_threshold(pvalue)
126
- end
127
- end
128
-
129
- if predefined_threshold_second
130
- threshold_second = predefined_threshold_second * discretization
131
- else
132
- if pvalue_boundary == :lower
133
- threshold_second = pwm_second.threshold(pvalue)
134
- else
135
- threshold_second = pwm_second.weak_threshold(pvalue)
136
- end
137
- end
138
- info = cmp.alignment_infos.merge( cmp.jaccard(threshold_first, threshold_second) )
139
- info.merge!(predefined_threshold_first: predefined_threshold_first,
140
- predefined_threshold_second: predefined_threshold_second,
141
- threshold_first: threshold_first / discretization,
142
- threshold_second: threshold_second / discretization,
143
- discretization: discretization,
144
- first_background: first_background,
145
- second_background: second_background,
146
- requested_pvalue: pvalue,
147
- pvalue_boundary: pvalue_boundary)
148
- puts Helper.similarity_info_string(info)
149
-
150
- rescue => err
151
- $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
152
- end
153
-
154
- end
155
- end
156
- end
1
+ require_relative '../../macroape'
2
+
3
+ module Macroape
4
+ module CLI
5
+ module EvalAlignment
6
+
7
+ def self.main(argv)
8
+ doc = <<-EOS.strip_doc
9
+ Command-line format:
10
+ #{run_tool_cmd} <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
11
+
12
+ Options:
13
+ [-p <P-value>]
14
+ [-d <discretization level>]
15
+ [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
16
+ [--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
17
+ [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
18
+ [--first-threshold <threshold for the first matrix>]
19
+ [--second-threshold <threshold for the second matrix>]
20
+
21
+ Examples:
22
+ #{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.3,0.2,0.2,0.3
23
+ #{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat 3 revcomp
24
+ EOS
25
+
26
+ if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
27
+ $stderr.puts doc
28
+ exit
29
+ end
30
+
31
+ pvalue = 0.0005
32
+ discretization = 10.0
33
+
34
+ first_background = Bioinform::Background::Wordwise
35
+ second_background = Bioinform::Background::Wordwise
36
+ max_hash_size = 10000000
37
+ max_pair_hash_size = 10000
38
+ pvalue_boundary = :upper
39
+
40
+ data_model = argv.delete('--pcm') ? :pcm : :pwm
41
+
42
+ first_file = argv.shift
43
+ second_file = argv.shift
44
+
45
+ shift = argv.shift
46
+ orientation = argv.shift
47
+
48
+ raise 'You should specify two input files' unless first_file and second_file
49
+ raise 'You should specify shift' unless shift
50
+ raise 'You should specify orientation' unless orientation
51
+
52
+ shift = shift.to_i
53
+ orientation = orientation.to_sym
54
+
55
+ case orientation
56
+ when :direct
57
+ reverse = false
58
+ when :revcomp
59
+ reverse = true
60
+ else
61
+ raise 'Unknown orientation(direct/revcomp)'
62
+ end
63
+
64
+
65
+ until argv.empty?
66
+ case argv.shift
67
+ when '-p'
68
+ pvalue = argv.shift.to_f
69
+ when '-d'
70
+ discretization = argv.shift.to_f
71
+ when '--max-hash-size'
72
+ max_hash_size = argv.shift.to_i
73
+ when '--max-2d-hash-size'
74
+ max_pair_hash_size = argv.shift.to_i
75
+ when '-b'
76
+ second_background = first_background = Bioinform::Background.from_string(argv.shift)
77
+ when '-b1'
78
+ first_background = Bioinform::Background.from_string(argv.shift)
79
+ when '-b2'
80
+ second_background = Bioinform::Background.from_string(argv.shift)
81
+ when '--boundary'
82
+ pvalue_boundary = argv.shift.to_sym
83
+ raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
84
+ when '--first-threshold'
85
+ predefined_threshold_first = argv.shift.to_f
86
+ when '--second-threshold'
87
+ predefined_threshold_second = argv.shift.to_f
88
+ end
89
+ end
90
+ raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background.symmetric?
91
+ raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background.symmetric?
92
+
93
+ raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
94
+ input_first = File.read(first_file)
95
+ input_first = Bioinform::MatrixParser.new.parse!(input_first)
96
+
97
+ raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
98
+ input_second = File.read(second_file)
99
+ input_second = Bioinform::MatrixParser.new.parse!(input_second)
100
+
101
+ case data_model
102
+ when :pcm
103
+ pcm_first = Bioinform::MotifModel::PCM.new(input_first[:matrix]).named(input_first[:name])
104
+ pwm_first = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: first_background).convert(pcm_first)
105
+ pcm_second = Bioinform::MotifModel::PCM.new(input_second[:matrix]).named(input_second[:name])
106
+ pwm_second = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: second_background).convert(pcm_second)
107
+ when :pwm
108
+ pwm_first = Bioinform::MotifModel::PWM.new(input_first[:matrix]).named(input_first[:name])
109
+ pwm_second = Bioinform::MotifModel::PWM.new(input_second[:matrix]).named(input_second[:name])
110
+ end
111
+
112
+ pwm_first = pwm_first.discreted(discretization)
113
+ pwm_second = pwm_second.discreted(discretization)
114
+
115
+ counting_first = PWMCounting.new(pwm_first, background: first_background, max_hash_size: max_hash_size)
116
+ counting_second = PWMCounting.new(pwm_second, background: second_background, max_hash_size: max_hash_size)
117
+
118
+ cmp = Macroape::PWMCompareAligned.new(counting_first, counting_second, shift, orientation).tap{|x| x.max_pair_hash_size = max_pair_hash_size }
119
+
120
+ if predefined_threshold_first
121
+ threshold_first = predefined_threshold_first * discretization
122
+ else
123
+ if pvalue_boundary == :lower
124
+ threshold_first = counting_first.threshold(pvalue)
125
+ else
126
+ threshold_first = counting_first.weak_threshold(pvalue)
127
+ end
128
+ end
129
+
130
+ if predefined_threshold_second
131
+ threshold_second = predefined_threshold_second * discretization
132
+ else
133
+ if pvalue_boundary == :lower
134
+ threshold_second = counting_second.threshold(pvalue)
135
+ else
136
+ threshold_second = counting_second.weak_threshold(pvalue)
137
+ end
138
+ end
139
+ info = cmp.alignment_infos.merge( cmp.jaccard(threshold_first, threshold_second) )
140
+ info.merge!(predefined_threshold_first: predefined_threshold_first,
141
+ predefined_threshold_second: predefined_threshold_second,
142
+ threshold_first: threshold_first / discretization,
143
+ threshold_second: threshold_second / discretization,
144
+ discretization: discretization,
145
+ first_background: first_background,
146
+ second_background: second_background,
147
+ requested_pvalue: pvalue,
148
+ pvalue_boundary: pvalue_boundary)
149
+ puts Helper.similarity_info_string(info)
150
+
151
+ rescue => err
152
+ $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
153
+ end
154
+
155
+ end
156
+ end
157
+ end