macroape 4.0.2 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +4 -4
  4. data/LICENSE +22 -22
  5. data/README.md +70 -70
  6. data/Rakefile.rb +49 -49
  7. data/TODO.txt +46 -46
  8. data/benchmark/benchmark_helper.rb +4 -4
  9. data/benchmark/similarity_benchmark.rb +52 -52
  10. data/bin/align_motifs +4 -4
  11. data/bin/eval_alignment +4 -4
  12. data/bin/eval_similarity +4 -4
  13. data/bin/find_pvalue +4 -4
  14. data/bin/find_threshold +4 -4
  15. data/bin/preprocess_collection +4 -4
  16. data/bin/scan_collection +4 -4
  17. data/lib/macroape.rb +14 -11
  18. data/lib/macroape/aligned_pair_intersection.rb +61 -62
  19. data/lib/macroape/cli.rb +191 -188
  20. data/lib/macroape/cli/align_motifs.rb +120 -100
  21. data/lib/macroape/cli/eval_alignment.rb +157 -156
  22. data/lib/macroape/cli/eval_similarity.rb +138 -137
  23. data/lib/macroape/cli/find_pvalue.rb +93 -87
  24. data/lib/macroape/cli/find_threshold.rb +103 -96
  25. data/lib/macroape/cli/preprocess_collection.rb +169 -161
  26. data/lib/macroape/cli/scan_collection.rb +171 -163
  27. data/lib/macroape/collection.rb +29 -0
  28. data/lib/macroape/motif_with_thresholds.rb +18 -0
  29. data/lib/macroape/pwm_compare.rb +39 -44
  30. data/lib/macroape/pwm_compare_aligned.rb +139 -130
  31. data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
  32. data/lib/macroape/support/inverf.rb +13 -0
  33. data/lib/macroape/support/partial_sums.rb +17 -0
  34. data/lib/macroape/version.rb +4 -4
  35. data/macroape.gemspec +19 -19
  36. data/spec/count_distribution_spec.rb +112 -109
  37. data/spec/inverf_spec.rb +23 -0
  38. data/spec/partial_sums_spec.rb +28 -0
  39. data/spec/spec_helper.rb +11 -11
  40. data/test/align_motifs_test.rb +42 -43
  41. data/test/data/AHR_si.pwm +10 -10
  42. data/test/data/KLF3_f1.pcm +16 -16
  43. data/test/data/KLF3_f1.pwm +16 -16
  44. data/test/data/KLF4_f2.pcm +11 -11
  45. data/test/data/KLF4_f2.pwm +11 -11
  46. data/test/data/KLF4_f2_scan_results_all.txt +2 -2
  47. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
  48. data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
  49. data/test/data/SP1_f1.pcm +12 -12
  50. data/test/data/SP1_f1.pwm +12 -12
  51. data/test/data/SP1_f1_revcomp.pcm +12 -12
  52. data/test/data/SP1_f1_revcomp.pwm +12 -12
  53. data/test/data/medium_motif.pwm +8 -8
  54. data/test/data/short_motif.pwm +7 -7
  55. data/test/data/test_collection.yaml +231 -214
  56. data/test/data/test_collection/GABPA_f1.pwm +14 -14
  57. data/test/data/test_collection/KLF4_f2.pwm +10 -10
  58. data/test/data/test_collection/SP1_f1.pwm +12 -12
  59. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
  60. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
  61. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
  62. data/test/data/test_collection_single_file.txt +38 -38
  63. data/test/data/test_collection_single_file_pcm.txt +37 -37
  64. data/test/data/test_collection_weak.yaml +231 -214
  65. data/test/eval_alignment_test.rb +90 -111
  66. data/test/eval_similarity_test.rb +105 -123
  67. data/test/find_pvalue_test.rb +34 -39
  68. data/test/find_threshold_test.rb +87 -91
  69. data/test/preprocess_collection_test.rb +56 -65
  70. data/test/scan_collection_test.rb +42 -48
  71. data/test/test_helper.rb +159 -160
  72. metadata +14 -10
  73. data/test/data/collection_pcm_without_thresholds.yaml +0 -188
  74. data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,100 +1,120 @@
1
- require_relative '../../macroape'
2
- require 'shellwords'
3
-
4
- module Macroape
5
- module CLI
6
- module AlignMotifs
7
-
8
- def self.main(argv)
9
- doc = <<-EOS.strip_doc
10
- Align motifs tool.
11
- It takes motifs and builds alignment of each motif to the first (leader) motif.
12
-
13
- Output has format:
14
- pwm_file_1 shift_1 orientation_1
15
- pwm_file_2 shift_2 orientation_2
16
- pwm_file_3 shift_3 orientation_3
17
-
18
- Usage:
19
- #{run_tool_cmd} [options] <leader pm> <rest pm files>...
20
- or
21
- ls rest_pms/*.pm | #{run_tool_cmd} [options]
22
- or
23
- ls rest_pms/*.pm | #{run_tool_cmd} [options] <leader pm>
24
-
25
- Options:
26
- [-p <P-value>]
27
- [-d <discretization level>]
28
- [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
29
- [--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
30
- [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
31
- EOS
32
-
33
- if (argv.empty? && $stdin.tty?) || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
34
- $stderr.puts doc
35
- exit
36
- end
37
-
38
- leader_background = [1,1,1,1]
39
- rest_motifs_background = [1,1,1,1]
40
- discretization = 1
41
- pvalue = 0.0005
42
- max_hash_size = 10000000
43
- max_pair_hash_size = 10000
44
- pvalue_boundary = :upper
45
-
46
- data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
47
-
48
- while argv.first && argv.first.start_with?('-')
49
- case argv.shift
50
- when '-p'
51
- pvalue = argv.shift.to_f
52
- when '-d'
53
- discretization = argv.shift.to_f
54
- when '--max-hash-size'
55
- max_hash_size = argv.shift.to_i
56
- when '--max-2d-hash-size'
57
- max_pair_hash_size = argv.shift.to_i
58
- when '-b'
59
- rest_motifs_background = leader_background = argv.shift.split(',').map(&:to_f)
60
- when '-b1'
61
- leader_background = argv.shift.split(',').map(&:to_f)
62
- when '-b2'
63
- rest_motifs_background = argv.shift.split(',').map(&:to_f)
64
- when '--boundary'
65
- pvalue_boundary = argv.shift.to_sym
66
- raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
67
- end
68
- end
69
-
70
- pwm_files = argv
71
- pwm_files += $stdin.read.shellsplit unless $stdin.tty?
72
- leader_pwm_file = pwm_files.first
73
- rest_pwm_files = pwm_files[1..-1]
74
- rest_pwm_files.reject!{|filename| File.expand_path(filename) == File.expand_path(leader_pwm_file)}
75
-
76
- raise 'Specify leader file' unless leader_pwm_file
77
-
78
- shifts = []
79
- shifts << [leader_pwm_file, 0, :direct]
80
- pwm_first = data_model.new(File.read(leader_pwm_file)).set_parameters(background: leader_background).to_pwm
81
- pwm_first.set_parameters(background: leader_background, max_hash_size: max_hash_size).discrete!(discretization)
82
-
83
- rest_pwm_files.each do |motif_name|
84
- pwm_second = data_model.new(File.read(motif_name)).set_parameters(background: rest_motifs_background).to_pwm
85
- pwm_second.set_parameters(background: rest_motifs_background, max_hash_size: max_hash_size).discrete!(discretization)
86
- cmp = Macroape::PWMCompare.new(pwm_first, pwm_second).set_parameters(max_pair_hash_size: max_pair_hash_size)
87
- info = cmp.jaccard_by_pvalue(pvalue)
88
- shifts << [motif_name, info[:shift], info[:orientation]]
89
- end
90
-
91
- shifts.each do |motif_name, shift,orientation|
92
- puts "#{motif_name}\t#{shift}\t#{orientation}"
93
- end
94
- rescue => err
95
- $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
96
- end
97
-
98
- end
99
- end
100
- end
1
+ require_relative '../../macroape'
2
+ require 'shellwords'
3
+
4
+ module Macroape
5
+ module CLI
6
+ module AlignMotifs
7
+
8
+ def self.main(argv)
9
+ doc = <<-EOS.strip_doc
10
+ Align motifs tool.
11
+ It takes motifs and builds alignment of each motif to the first (leader) motif.
12
+
13
+ Output has format:
14
+ pwm_file_1 shift_1 orientation_1
15
+ pwm_file_2 shift_2 orientation_2
16
+ pwm_file_3 shift_3 orientation_3
17
+
18
+ Usage:
19
+ #{run_tool_cmd} [options] <leader pm> <rest pm files>...
20
+ or
21
+ ls rest_pms/*.pm | #{run_tool_cmd} [options]
22
+ or
23
+ ls rest_pms/*.pm | #{run_tool_cmd} [options] <leader pm>
24
+
25
+ Options:
26
+ [-p <P-value>]
27
+ [-d <discretization level>]
28
+ [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
29
+ [--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
30
+ [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
31
+ EOS
32
+
33
+ if (argv.empty? && $stdin.tty?) || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
34
+ $stderr.puts doc
35
+ exit
36
+ end
37
+
38
+ leader_background = Bioinform::Background::Wordwise
39
+ rest_motifs_background = Bioinform::Background::Wordwise
40
+ discretization = 1
41
+ pvalue = 0.0005
42
+ max_hash_size = 10000000
43
+ max_pair_hash_size = 10000
44
+ pvalue_boundary = :upper
45
+
46
+ data_model = argv.delete('--pcm') ? :pcm : :pwm
47
+
48
+ while argv.first && argv.first.start_with?('-')
49
+ case argv.shift
50
+ when '-p'
51
+ pvalue = argv.shift.to_f
52
+ when '-d'
53
+ discretization = argv.shift.to_f
54
+ when '--max-hash-size'
55
+ max_hash_size = argv.shift.to_i
56
+ when '--max-2d-hash-size'
57
+ max_pair_hash_size = argv.shift.to_i
58
+ when '-b'
59
+ rest_motifs_background = leader_background = Bioinform::Background.from_string(argv.shift)
60
+ when '-b1'
61
+ leader_background = Bioinform::Background.from_string(argv.shift)
62
+ when '-b2'
63
+ rest_motifs_background = Bioinform::Background.from_string(argv.shift)
64
+ when '--boundary'
65
+ pvalue_boundary = argv.shift.to_sym
66
+ raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
67
+ end
68
+ end
69
+
70
+ pwm_files = argv
71
+ pwm_files += $stdin.read.shellsplit unless $stdin.tty?
72
+ leader_pwm_file = pwm_files.first
73
+ rest_pwm_files = pwm_files[1..-1]
74
+ rest_pwm_files.reject!{|filename| File.expand_path(filename) == File.expand_path(leader_pwm_file)}
75
+
76
+ raise 'Specify leader file' unless leader_pwm_file
77
+
78
+ shifts = []
79
+ shifts << [leader_pwm_file, 0, :direct]
80
+
81
+ input_first = File.read(leader_pwm_file)
82
+ input_first = Bioinform::MatrixParser.new.parse!(input_first)
83
+ case data_model
84
+ when :pcm
85
+ pcm_first = Bioinform::MotifModel::PCM.new(input_first[:matrix]).named(input_first[:name])
86
+ pwm_first = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: leader_background).convert(pcm_first)
87
+ when :pwm
88
+ pwm_first = Bioinform::MotifModel::PWM.new(input_first[:matrix]).named(input_first[:name])
89
+ end
90
+
91
+ pwm_first = pwm_first.discreted(discretization)
92
+ counting_first = PWMCounting.new(pwm_first, background: leader_background, max_hash_size: max_hash_size)
93
+
94
+ rest_pwm_files.each do |motif_name|
95
+ input_second = File.read(motif_name)
96
+ input_second = Bioinform::MatrixParser.new.parse!(input_second)
97
+ case data_model
98
+ when :pcm
99
+ pcm_second = Bioinform::MotifModel::PCM.new(input_second[:matrix]).named(input_second[:name])
100
+ pwm_second = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: rest_motifs_background).convert(pcm_second)
101
+ when :pwm
102
+ pwm_second = Bioinform::MotifModel::PWM.new(input_second[:matrix]).named(input_second[:name])
103
+ end
104
+ pwm_second = pwm_second.discreted(discretization)
105
+ counting_second = PWMCounting.new(pwm_second, background: rest_motifs_background, max_hash_size: max_hash_size)
106
+ cmp = Macroape::PWMCompare.new(counting_first, counting_second).tap{|x| x.max_pair_hash_size = max_pair_hash_size }
107
+ info = cmp.jaccard_by_pvalue(pvalue)
108
+ shifts << [motif_name, info[:shift], info[:orientation]]
109
+ end
110
+
111
+ shifts.each do |motif_name, shift,orientation|
112
+ puts "#{motif_name}\t#{shift}\t#{orientation}"
113
+ end
114
+ rescue => err
115
+ $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
116
+ end
117
+
118
+ end
119
+ end
120
+ end
@@ -1,156 +1,157 @@
1
- require_relative '../../macroape'
2
-
3
- module Macroape
4
- module CLI
5
- module EvalAlignment
6
-
7
- def self.main(argv)
8
- doc = <<-EOS.strip_doc
9
- Command-line format:
10
- #{run_tool_cmd} <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
11
-
12
- Options:
13
- [-p <P-value>]
14
- [-d <discretization level>]
15
- [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
16
- [--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
17
- [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
18
- [--first-threshold <threshold for the first matrix>]
19
- [--second-threshold <threshold for the second matrix>]
20
-
21
- Examples:
22
- #{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.3,0.2,0.2,0.3
23
- #{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat 3 revcomp
24
- EOS
25
-
26
- if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
27
- $stderr.puts doc
28
- exit
29
- end
30
-
31
- pvalue = 0.0005
32
- discretization = 10.0
33
-
34
- first_background = [1,1,1,1]
35
- second_background = [1,1,1,1]
36
- max_hash_size = 10000000
37
- max_pair_hash_size = 10000
38
- pvalue_boundary = :upper
39
-
40
- data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
41
-
42
- first_file = argv.shift
43
- second_file = argv.shift
44
-
45
- shift = argv.shift
46
- orientation = argv.shift
47
-
48
- raise 'You should specify two input sources (each is filename or .stdin)' unless first_file and second_file
49
- raise 'You should specify shift' unless shift
50
- raise 'You should specify orientation' unless orientation
51
-
52
- shift = shift.to_i
53
- orientation = orientation.to_sym
54
-
55
- case orientation
56
- when :direct
57
- reverse = false
58
- when :revcomp
59
- reverse = true
60
- else
61
- raise 'Unknown orientation(direct/revcomp)'
62
- end
63
-
64
-
65
- until argv.empty?
66
- case argv.shift
67
- when '-p'
68
- pvalue = argv.shift.to_f
69
- when '-d'
70
- discretization = argv.shift.to_f
71
- when '--max-hash-size'
72
- max_hash_size = argv.shift.to_i
73
- when '--max-2d-hash-size'
74
- max_pair_hash_size = argv.shift.to_i
75
- when '-b'
76
- second_background = first_background = argv.shift.split(',').map(&:to_f)
77
- when '-b1'
78
- first_background = argv.shift.split(',').map(&:to_f)
79
- when '-b2'
80
- second_background = argv.shift.split(',').map(&:to_f)
81
- when '--boundary'
82
- pvalue_boundary = argv.shift.to_sym
83
- raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
84
- when '--first-threshold'
85
- predefined_threshold_first = argv.shift.to_f
86
- when '--second-threshold'
87
- predefined_threshold_second = argv.shift.to_f
88
- end
89
- end
90
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
91
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
92
-
93
- if first_file == '.stdin' || second_file == '.stdin'
94
- input = $stdin.read
95
- parser = data_model.choose_parser(input).new(input)
96
- end
97
-
98
- if first_file == '.stdin'
99
- input_first = parser.parse
100
- else
101
- raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
102
- input_first = File.read(first_file)
103
- end
104
- pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
105
-
106
- if second_file == '.stdin'
107
- input_second = parser.parse
108
- else
109
- raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
110
- input_second = File.read(second_file)
111
- end
112
- pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
113
-
114
- pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
115
- pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
116
-
117
- cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation).set_parameters(max_pair_hash_size: max_pair_hash_size)
118
-
119
- if predefined_threshold_first
120
- threshold_first = predefined_threshold_first * discretization
121
- else
122
- if pvalue_boundary == :lower
123
- threshold_first = pwm_first.threshold(pvalue)
124
- else
125
- threshold_first = pwm_first.weak_threshold(pvalue)
126
- end
127
- end
128
-
129
- if predefined_threshold_second
130
- threshold_second = predefined_threshold_second * discretization
131
- else
132
- if pvalue_boundary == :lower
133
- threshold_second = pwm_second.threshold(pvalue)
134
- else
135
- threshold_second = pwm_second.weak_threshold(pvalue)
136
- end
137
- end
138
- info = cmp.alignment_infos.merge( cmp.jaccard(threshold_first, threshold_second) )
139
- info.merge!(predefined_threshold_first: predefined_threshold_first,
140
- predefined_threshold_second: predefined_threshold_second,
141
- threshold_first: threshold_first / discretization,
142
- threshold_second: threshold_second / discretization,
143
- discretization: discretization,
144
- first_background: first_background,
145
- second_background: second_background,
146
- requested_pvalue: pvalue,
147
- pvalue_boundary: pvalue_boundary)
148
- puts Helper.similarity_info_string(info)
149
-
150
- rescue => err
151
- $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
152
- end
153
-
154
- end
155
- end
156
- end
1
+ require_relative '../../macroape'
2
+
3
+ module Macroape
4
+ module CLI
5
+ module EvalAlignment
6
+
7
+ def self.main(argv)
8
+ doc = <<-EOS.strip_doc
9
+ Command-line format:
10
+ #{run_tool_cmd} <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
11
+
12
+ Options:
13
+ [-p <P-value>]
14
+ [-d <discretization level>]
15
+ [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
16
+ [--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
17
+ [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
18
+ [--first-threshold <threshold for the first matrix>]
19
+ [--second-threshold <threshold for the second matrix>]
20
+
21
+ Examples:
22
+ #{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.3,0.2,0.2,0.3
23
+ #{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat 3 revcomp
24
+ EOS
25
+
26
+ if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
27
+ $stderr.puts doc
28
+ exit
29
+ end
30
+
31
+ pvalue = 0.0005
32
+ discretization = 10.0
33
+
34
+ first_background = Bioinform::Background::Wordwise
35
+ second_background = Bioinform::Background::Wordwise
36
+ max_hash_size = 10000000
37
+ max_pair_hash_size = 10000
38
+ pvalue_boundary = :upper
39
+
40
+ data_model = argv.delete('--pcm') ? :pcm : :pwm
41
+
42
+ first_file = argv.shift
43
+ second_file = argv.shift
44
+
45
+ shift = argv.shift
46
+ orientation = argv.shift
47
+
48
+ raise 'You should specify two input files' unless first_file and second_file
49
+ raise 'You should specify shift' unless shift
50
+ raise 'You should specify orientation' unless orientation
51
+
52
+ shift = shift.to_i
53
+ orientation = orientation.to_sym
54
+
55
+ case orientation
56
+ when :direct
57
+ reverse = false
58
+ when :revcomp
59
+ reverse = true
60
+ else
61
+ raise 'Unknown orientation(direct/revcomp)'
62
+ end
63
+
64
+
65
+ until argv.empty?
66
+ case argv.shift
67
+ when '-p'
68
+ pvalue = argv.shift.to_f
69
+ when '-d'
70
+ discretization = argv.shift.to_f
71
+ when '--max-hash-size'
72
+ max_hash_size = argv.shift.to_i
73
+ when '--max-2d-hash-size'
74
+ max_pair_hash_size = argv.shift.to_i
75
+ when '-b'
76
+ second_background = first_background = Bioinform::Background.from_string(argv.shift)
77
+ when '-b1'
78
+ first_background = Bioinform::Background.from_string(argv.shift)
79
+ when '-b2'
80
+ second_background = Bioinform::Background.from_string(argv.shift)
81
+ when '--boundary'
82
+ pvalue_boundary = argv.shift.to_sym
83
+ raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
84
+ when '--first-threshold'
85
+ predefined_threshold_first = argv.shift.to_f
86
+ when '--second-threshold'
87
+ predefined_threshold_second = argv.shift.to_f
88
+ end
89
+ end
90
+ raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background.symmetric?
91
+ raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background.symmetric?
92
+
93
+ raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
94
+ input_first = File.read(first_file)
95
+ input_first = Bioinform::MatrixParser.new.parse!(input_first)
96
+
97
+ raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
98
+ input_second = File.read(second_file)
99
+ input_second = Bioinform::MatrixParser.new.parse!(input_second)
100
+
101
+ case data_model
102
+ when :pcm
103
+ pcm_first = Bioinform::MotifModel::PCM.new(input_first[:matrix]).named(input_first[:name])
104
+ pwm_first = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: first_background).convert(pcm_first)
105
+ pcm_second = Bioinform::MotifModel::PCM.new(input_second[:matrix]).named(input_second[:name])
106
+ pwm_second = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: second_background).convert(pcm_second)
107
+ when :pwm
108
+ pwm_first = Bioinform::MotifModel::PWM.new(input_first[:matrix]).named(input_first[:name])
109
+ pwm_second = Bioinform::MotifModel::PWM.new(input_second[:matrix]).named(input_second[:name])
110
+ end
111
+
112
+ pwm_first = pwm_first.discreted(discretization)
113
+ pwm_second = pwm_second.discreted(discretization)
114
+
115
+ counting_first = PWMCounting.new(pwm_first, background: first_background, max_hash_size: max_hash_size)
116
+ counting_second = PWMCounting.new(pwm_second, background: second_background, max_hash_size: max_hash_size)
117
+
118
+ cmp = Macroape::PWMCompareAligned.new(counting_first, counting_second, shift, orientation).tap{|x| x.max_pair_hash_size = max_pair_hash_size }
119
+
120
+ if predefined_threshold_first
121
+ threshold_first = predefined_threshold_first * discretization
122
+ else
123
+ if pvalue_boundary == :lower
124
+ threshold_first = counting_first.threshold(pvalue)
125
+ else
126
+ threshold_first = counting_first.weak_threshold(pvalue)
127
+ end
128
+ end
129
+
130
+ if predefined_threshold_second
131
+ threshold_second = predefined_threshold_second * discretization
132
+ else
133
+ if pvalue_boundary == :lower
134
+ threshold_second = counting_second.threshold(pvalue)
135
+ else
136
+ threshold_second = counting_second.weak_threshold(pvalue)
137
+ end
138
+ end
139
+ info = cmp.alignment_infos.merge( cmp.jaccard(threshold_first, threshold_second) )
140
+ info.merge!(predefined_threshold_first: predefined_threshold_first,
141
+ predefined_threshold_second: predefined_threshold_second,
142
+ threshold_first: threshold_first / discretization,
143
+ threshold_second: threshold_second / discretization,
144
+ discretization: discretization,
145
+ first_background: first_background,
146
+ second_background: second_background,
147
+ requested_pvalue: pvalue,
148
+ pvalue_boundary: pvalue_boundary)
149
+ puts Helper.similarity_info_string(info)
150
+
151
+ rescue => err
152
+ $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
153
+ end
154
+
155
+ end
156
+ end
157
+ end