macroape 4.0.2 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +22 -22
- data/README.md +70 -70
- data/Rakefile.rb +49 -49
- data/TODO.txt +46 -46
- data/benchmark/benchmark_helper.rb +4 -4
- data/benchmark/similarity_benchmark.rb +52 -52
- data/bin/align_motifs +4 -4
- data/bin/eval_alignment +4 -4
- data/bin/eval_similarity +4 -4
- data/bin/find_pvalue +4 -4
- data/bin/find_threshold +4 -4
- data/bin/preprocess_collection +4 -4
- data/bin/scan_collection +4 -4
- data/lib/macroape.rb +14 -11
- data/lib/macroape/aligned_pair_intersection.rb +61 -62
- data/lib/macroape/cli.rb +191 -188
- data/lib/macroape/cli/align_motifs.rb +120 -100
- data/lib/macroape/cli/eval_alignment.rb +157 -156
- data/lib/macroape/cli/eval_similarity.rb +138 -137
- data/lib/macroape/cli/find_pvalue.rb +93 -87
- data/lib/macroape/cli/find_threshold.rb +103 -96
- data/lib/macroape/cli/preprocess_collection.rb +169 -161
- data/lib/macroape/cli/scan_collection.rb +171 -163
- data/lib/macroape/collection.rb +29 -0
- data/lib/macroape/motif_with_thresholds.rb +18 -0
- data/lib/macroape/pwm_compare.rb +39 -44
- data/lib/macroape/pwm_compare_aligned.rb +139 -130
- data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
- data/lib/macroape/support/inverf.rb +13 -0
- data/lib/macroape/support/partial_sums.rb +17 -0
- data/lib/macroape/version.rb +4 -4
- data/macroape.gemspec +19 -19
- data/spec/count_distribution_spec.rb +112 -109
- data/spec/inverf_spec.rb +23 -0
- data/spec/partial_sums_spec.rb +28 -0
- data/spec/spec_helper.rb +11 -11
- data/test/align_motifs_test.rb +42 -43
- data/test/data/AHR_si.pwm +10 -10
- data/test/data/KLF3_f1.pcm +16 -16
- data/test/data/KLF3_f1.pwm +16 -16
- data/test/data/KLF4_f2.pcm +11 -11
- data/test/data/KLF4_f2.pwm +11 -11
- data/test/data/KLF4_f2_scan_results_all.txt +2 -2
- data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
- data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
- data/test/data/SP1_f1.pcm +12 -12
- data/test/data/SP1_f1.pwm +12 -12
- data/test/data/SP1_f1_revcomp.pcm +12 -12
- data/test/data/SP1_f1_revcomp.pwm +12 -12
- data/test/data/medium_motif.pwm +8 -8
- data/test/data/short_motif.pwm +7 -7
- data/test/data/test_collection.yaml +231 -214
- data/test/data/test_collection/GABPA_f1.pwm +14 -14
- data/test/data/test_collection/KLF4_f2.pwm +10 -10
- data/test/data/test_collection/SP1_f1.pwm +12 -12
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
- data/test/data/test_collection_single_file.txt +38 -38
- data/test/data/test_collection_single_file_pcm.txt +37 -37
- data/test/data/test_collection_weak.yaml +231 -214
- data/test/eval_alignment_test.rb +90 -111
- data/test/eval_similarity_test.rb +105 -123
- data/test/find_pvalue_test.rb +34 -39
- data/test/find_threshold_test.rb +87 -91
- data/test/preprocess_collection_test.rb +56 -65
- data/test/scan_collection_test.rb +42 -48
- data/test/test_helper.rb +159 -160
- metadata +14 -10
- data/test/data/collection_pcm_without_thresholds.yaml +0 -188
- data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,100 +1,120 @@
|
|
1
|
-
require_relative '../../macroape'
|
2
|
-
require 'shellwords'
|
3
|
-
|
4
|
-
module Macroape
|
5
|
-
module CLI
|
6
|
-
module AlignMotifs
|
7
|
-
|
8
|
-
def self.main(argv)
|
9
|
-
doc = <<-EOS.strip_doc
|
10
|
-
Align motifs tool.
|
11
|
-
It takes motifs and builds alignment of each motif to the first (leader) motif.
|
12
|
-
|
13
|
-
Output has format:
|
14
|
-
pwm_file_1 shift_1 orientation_1
|
15
|
-
pwm_file_2 shift_2 orientation_2
|
16
|
-
pwm_file_3 shift_3 orientation_3
|
17
|
-
|
18
|
-
Usage:
|
19
|
-
#{run_tool_cmd} [options] <leader pm> <rest pm files>...
|
20
|
-
or
|
21
|
-
ls rest_pms/*.pm | #{run_tool_cmd} [options]
|
22
|
-
or
|
23
|
-
ls rest_pms/*.pm | #{run_tool_cmd} [options] <leader pm>
|
24
|
-
|
25
|
-
Options:
|
26
|
-
[-p <P-value>]
|
27
|
-
[-d <discretization level>]
|
28
|
-
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
|
29
|
-
[--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
|
30
|
-
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
|
31
|
-
EOS
|
32
|
-
|
33
|
-
if (argv.empty? && $stdin.tty?) || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
|
34
|
-
$stderr.puts doc
|
35
|
-
exit
|
36
|
-
end
|
37
|
-
|
38
|
-
leader_background =
|
39
|
-
rest_motifs_background =
|
40
|
-
discretization = 1
|
41
|
-
pvalue = 0.0005
|
42
|
-
max_hash_size = 10000000
|
43
|
-
max_pair_hash_size = 10000
|
44
|
-
pvalue_boundary = :upper
|
45
|
-
|
46
|
-
data_model = argv.delete('--pcm') ?
|
47
|
-
|
48
|
-
while argv.first && argv.first.start_with?('-')
|
49
|
-
case argv.shift
|
50
|
-
when '-p'
|
51
|
-
pvalue = argv.shift.to_f
|
52
|
-
when '-d'
|
53
|
-
discretization = argv.shift.to_f
|
54
|
-
when '--max-hash-size'
|
55
|
-
max_hash_size = argv.shift.to_i
|
56
|
-
when '--max-2d-hash-size'
|
57
|
-
max_pair_hash_size = argv.shift.to_i
|
58
|
-
when '-b'
|
59
|
-
rest_motifs_background = leader_background = argv.shift
|
60
|
-
when '-b1'
|
61
|
-
leader_background = argv.shift
|
62
|
-
when '-b2'
|
63
|
-
rest_motifs_background = argv.shift
|
64
|
-
when '--boundary'
|
65
|
-
pvalue_boundary = argv.shift.to_sym
|
66
|
-
raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
pwm_files = argv
|
71
|
-
pwm_files += $stdin.read.shellsplit unless $stdin.tty?
|
72
|
-
leader_pwm_file = pwm_files.first
|
73
|
-
rest_pwm_files = pwm_files[1..-1]
|
74
|
-
rest_pwm_files.reject!{|filename| File.expand_path(filename) == File.expand_path(leader_pwm_file)}
|
75
|
-
|
76
|
-
raise 'Specify leader file' unless leader_pwm_file
|
77
|
-
|
78
|
-
shifts = []
|
79
|
-
shifts << [leader_pwm_file, 0, :direct]
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
end
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
1
|
+
require_relative '../../macroape'
|
2
|
+
require 'shellwords'
|
3
|
+
|
4
|
+
module Macroape
|
5
|
+
module CLI
|
6
|
+
module AlignMotifs
|
7
|
+
|
8
|
+
def self.main(argv)
|
9
|
+
doc = <<-EOS.strip_doc
|
10
|
+
Align motifs tool.
|
11
|
+
It takes motifs and builds alignment of each motif to the first (leader) motif.
|
12
|
+
|
13
|
+
Output has format:
|
14
|
+
pwm_file_1 shift_1 orientation_1
|
15
|
+
pwm_file_2 shift_2 orientation_2
|
16
|
+
pwm_file_3 shift_3 orientation_3
|
17
|
+
|
18
|
+
Usage:
|
19
|
+
#{run_tool_cmd} [options] <leader pm> <rest pm files>...
|
20
|
+
or
|
21
|
+
ls rest_pms/*.pm | #{run_tool_cmd} [options]
|
22
|
+
or
|
23
|
+
ls rest_pms/*.pm | #{run_tool_cmd} [options] <leader pm>
|
24
|
+
|
25
|
+
Options:
|
26
|
+
[-p <P-value>]
|
27
|
+
[-d <discretization level>]
|
28
|
+
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
|
29
|
+
[--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
|
30
|
+
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
|
31
|
+
EOS
|
32
|
+
|
33
|
+
if (argv.empty? && $stdin.tty?) || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
|
34
|
+
$stderr.puts doc
|
35
|
+
exit
|
36
|
+
end
|
37
|
+
|
38
|
+
leader_background = Bioinform::Background::Wordwise
|
39
|
+
rest_motifs_background = Bioinform::Background::Wordwise
|
40
|
+
discretization = 1
|
41
|
+
pvalue = 0.0005
|
42
|
+
max_hash_size = 10000000
|
43
|
+
max_pair_hash_size = 10000
|
44
|
+
pvalue_boundary = :upper
|
45
|
+
|
46
|
+
data_model = argv.delete('--pcm') ? :pcm : :pwm
|
47
|
+
|
48
|
+
while argv.first && argv.first.start_with?('-')
|
49
|
+
case argv.shift
|
50
|
+
when '-p'
|
51
|
+
pvalue = argv.shift.to_f
|
52
|
+
when '-d'
|
53
|
+
discretization = argv.shift.to_f
|
54
|
+
when '--max-hash-size'
|
55
|
+
max_hash_size = argv.shift.to_i
|
56
|
+
when '--max-2d-hash-size'
|
57
|
+
max_pair_hash_size = argv.shift.to_i
|
58
|
+
when '-b'
|
59
|
+
rest_motifs_background = leader_background = Bioinform::Background.from_string(argv.shift)
|
60
|
+
when '-b1'
|
61
|
+
leader_background = Bioinform::Background.from_string(argv.shift)
|
62
|
+
when '-b2'
|
63
|
+
rest_motifs_background = Bioinform::Background.from_string(argv.shift)
|
64
|
+
when '--boundary'
|
65
|
+
pvalue_boundary = argv.shift.to_sym
|
66
|
+
raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
pwm_files = argv
|
71
|
+
pwm_files += $stdin.read.shellsplit unless $stdin.tty?
|
72
|
+
leader_pwm_file = pwm_files.first
|
73
|
+
rest_pwm_files = pwm_files[1..-1]
|
74
|
+
rest_pwm_files.reject!{|filename| File.expand_path(filename) == File.expand_path(leader_pwm_file)}
|
75
|
+
|
76
|
+
raise 'Specify leader file' unless leader_pwm_file
|
77
|
+
|
78
|
+
shifts = []
|
79
|
+
shifts << [leader_pwm_file, 0, :direct]
|
80
|
+
|
81
|
+
input_first = File.read(leader_pwm_file)
|
82
|
+
input_first = Bioinform::MatrixParser.new.parse!(input_first)
|
83
|
+
case data_model
|
84
|
+
when :pcm
|
85
|
+
pcm_first = Bioinform::MotifModel::PCM.new(input_first[:matrix]).named(input_first[:name])
|
86
|
+
pwm_first = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: leader_background).convert(pcm_first)
|
87
|
+
when :pwm
|
88
|
+
pwm_first = Bioinform::MotifModel::PWM.new(input_first[:matrix]).named(input_first[:name])
|
89
|
+
end
|
90
|
+
|
91
|
+
pwm_first = pwm_first.discreted(discretization)
|
92
|
+
counting_first = PWMCounting.new(pwm_first, background: leader_background, max_hash_size: max_hash_size)
|
93
|
+
|
94
|
+
rest_pwm_files.each do |motif_name|
|
95
|
+
input_second = File.read(motif_name)
|
96
|
+
input_second = Bioinform::MatrixParser.new.parse!(input_second)
|
97
|
+
case data_model
|
98
|
+
when :pcm
|
99
|
+
pcm_second = Bioinform::MotifModel::PCM.new(input_second[:matrix]).named(input_second[:name])
|
100
|
+
pwm_second = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: rest_motifs_background).convert(pcm_second)
|
101
|
+
when :pwm
|
102
|
+
pwm_second = Bioinform::MotifModel::PWM.new(input_second[:matrix]).named(input_second[:name])
|
103
|
+
end
|
104
|
+
pwm_second = pwm_second.discreted(discretization)
|
105
|
+
counting_second = PWMCounting.new(pwm_second, background: rest_motifs_background, max_hash_size: max_hash_size)
|
106
|
+
cmp = Macroape::PWMCompare.new(counting_first, counting_second).tap{|x| x.max_pair_hash_size = max_pair_hash_size }
|
107
|
+
info = cmp.jaccard_by_pvalue(pvalue)
|
108
|
+
shifts << [motif_name, info[:shift], info[:orientation]]
|
109
|
+
end
|
110
|
+
|
111
|
+
shifts.each do |motif_name, shift,orientation|
|
112
|
+
puts "#{motif_name}\t#{shift}\t#{orientation}"
|
113
|
+
end
|
114
|
+
rescue => err
|
115
|
+
$stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -1,156 +1,157 @@
|
|
1
|
-
require_relative '../../macroape'
|
2
|
-
|
3
|
-
module Macroape
|
4
|
-
module CLI
|
5
|
-
module EvalAlignment
|
6
|
-
|
7
|
-
def self.main(argv)
|
8
|
-
doc = <<-EOS.strip_doc
|
9
|
-
Command-line format:
|
10
|
-
#{run_tool_cmd} <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
|
11
|
-
|
12
|
-
Options:
|
13
|
-
[-p <P-value>]
|
14
|
-
[-d <discretization level>]
|
15
|
-
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
|
16
|
-
[--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
|
17
|
-
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
|
18
|
-
[--first-threshold <threshold for the first matrix>]
|
19
|
-
[--second-threshold <threshold for the second matrix>]
|
20
|
-
|
21
|
-
Examples:
|
22
|
-
#{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.3,0.2,0.2,0.3
|
23
|
-
#{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat 3 revcomp
|
24
|
-
EOS
|
25
|
-
|
26
|
-
if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
|
27
|
-
$stderr.puts doc
|
28
|
-
exit
|
29
|
-
end
|
30
|
-
|
31
|
-
pvalue = 0.0005
|
32
|
-
discretization = 10.0
|
33
|
-
|
34
|
-
first_background =
|
35
|
-
second_background =
|
36
|
-
max_hash_size = 10000000
|
37
|
-
max_pair_hash_size = 10000
|
38
|
-
pvalue_boundary = :upper
|
39
|
-
|
40
|
-
data_model = argv.delete('--pcm') ?
|
41
|
-
|
42
|
-
first_file = argv.shift
|
43
|
-
second_file = argv.shift
|
44
|
-
|
45
|
-
shift = argv.shift
|
46
|
-
orientation = argv.shift
|
47
|
-
|
48
|
-
raise 'You should specify two input
|
49
|
-
raise 'You should specify shift' unless shift
|
50
|
-
raise 'You should specify orientation' unless orientation
|
51
|
-
|
52
|
-
shift = shift.to_i
|
53
|
-
orientation = orientation.to_sym
|
54
|
-
|
55
|
-
case orientation
|
56
|
-
when :direct
|
57
|
-
reverse = false
|
58
|
-
when :revcomp
|
59
|
-
reverse = true
|
60
|
-
else
|
61
|
-
raise 'Unknown orientation(direct/revcomp)'
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
until argv.empty?
|
66
|
-
case argv.shift
|
67
|
-
when '-p'
|
68
|
-
pvalue = argv.shift.to_f
|
69
|
-
when '-d'
|
70
|
-
discretization = argv.shift.to_f
|
71
|
-
when '--max-hash-size'
|
72
|
-
max_hash_size = argv.shift.to_i
|
73
|
-
when '--max-2d-hash-size'
|
74
|
-
max_pair_hash_size = argv.shift.to_i
|
75
|
-
when '-b'
|
76
|
-
second_background = first_background = argv.shift
|
77
|
-
when '-b1'
|
78
|
-
first_background = argv.shift
|
79
|
-
when '-b2'
|
80
|
-
second_background = argv.shift
|
81
|
-
when '--boundary'
|
82
|
-
pvalue_boundary = argv.shift.to_sym
|
83
|
-
raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
|
84
|
-
when '--first-threshold'
|
85
|
-
predefined_threshold_first = argv.shift.to_f
|
86
|
-
when '--second-threshold'
|
87
|
-
predefined_threshold_second = argv.shift.to_f
|
88
|
-
end
|
89
|
-
end
|
90
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background
|
91
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
info.merge
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
end
|
1
|
+
require_relative '../../macroape'
|
2
|
+
|
3
|
+
module Macroape
|
4
|
+
module CLI
|
5
|
+
module EvalAlignment
|
6
|
+
|
7
|
+
def self.main(argv)
|
8
|
+
doc = <<-EOS.strip_doc
|
9
|
+
Command-line format:
|
10
|
+
#{run_tool_cmd} <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
|
11
|
+
|
12
|
+
Options:
|
13
|
+
[-p <P-value>]
|
14
|
+
[-d <discretization level>]
|
15
|
+
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
|
16
|
+
[--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
|
17
|
+
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
|
18
|
+
[--first-threshold <threshold for the first matrix>]
|
19
|
+
[--second-threshold <threshold for the second matrix>]
|
20
|
+
|
21
|
+
Examples:
|
22
|
+
#{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.3,0.2,0.2,0.3
|
23
|
+
#{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat 3 revcomp
|
24
|
+
EOS
|
25
|
+
|
26
|
+
if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
|
27
|
+
$stderr.puts doc
|
28
|
+
exit
|
29
|
+
end
|
30
|
+
|
31
|
+
pvalue = 0.0005
|
32
|
+
discretization = 10.0
|
33
|
+
|
34
|
+
first_background = Bioinform::Background::Wordwise
|
35
|
+
second_background = Bioinform::Background::Wordwise
|
36
|
+
max_hash_size = 10000000
|
37
|
+
max_pair_hash_size = 10000
|
38
|
+
pvalue_boundary = :upper
|
39
|
+
|
40
|
+
data_model = argv.delete('--pcm') ? :pcm : :pwm
|
41
|
+
|
42
|
+
first_file = argv.shift
|
43
|
+
second_file = argv.shift
|
44
|
+
|
45
|
+
shift = argv.shift
|
46
|
+
orientation = argv.shift
|
47
|
+
|
48
|
+
raise 'You should specify two input files' unless first_file and second_file
|
49
|
+
raise 'You should specify shift' unless shift
|
50
|
+
raise 'You should specify orientation' unless orientation
|
51
|
+
|
52
|
+
shift = shift.to_i
|
53
|
+
orientation = orientation.to_sym
|
54
|
+
|
55
|
+
case orientation
|
56
|
+
when :direct
|
57
|
+
reverse = false
|
58
|
+
when :revcomp
|
59
|
+
reverse = true
|
60
|
+
else
|
61
|
+
raise 'Unknown orientation(direct/revcomp)'
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
until argv.empty?
|
66
|
+
case argv.shift
|
67
|
+
when '-p'
|
68
|
+
pvalue = argv.shift.to_f
|
69
|
+
when '-d'
|
70
|
+
discretization = argv.shift.to_f
|
71
|
+
when '--max-hash-size'
|
72
|
+
max_hash_size = argv.shift.to_i
|
73
|
+
when '--max-2d-hash-size'
|
74
|
+
max_pair_hash_size = argv.shift.to_i
|
75
|
+
when '-b'
|
76
|
+
second_background = first_background = Bioinform::Background.from_string(argv.shift)
|
77
|
+
when '-b1'
|
78
|
+
first_background = Bioinform::Background.from_string(argv.shift)
|
79
|
+
when '-b2'
|
80
|
+
second_background = Bioinform::Background.from_string(argv.shift)
|
81
|
+
when '--boundary'
|
82
|
+
pvalue_boundary = argv.shift.to_sym
|
83
|
+
raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
|
84
|
+
when '--first-threshold'
|
85
|
+
predefined_threshold_first = argv.shift.to_f
|
86
|
+
when '--second-threshold'
|
87
|
+
predefined_threshold_second = argv.shift.to_f
|
88
|
+
end
|
89
|
+
end
|
90
|
+
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background.symmetric?
|
91
|
+
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background.symmetric?
|
92
|
+
|
93
|
+
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
94
|
+
input_first = File.read(first_file)
|
95
|
+
input_first = Bioinform::MatrixParser.new.parse!(input_first)
|
96
|
+
|
97
|
+
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
98
|
+
input_second = File.read(second_file)
|
99
|
+
input_second = Bioinform::MatrixParser.new.parse!(input_second)
|
100
|
+
|
101
|
+
case data_model
|
102
|
+
when :pcm
|
103
|
+
pcm_first = Bioinform::MotifModel::PCM.new(input_first[:matrix]).named(input_first[:name])
|
104
|
+
pwm_first = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: first_background).convert(pcm_first)
|
105
|
+
pcm_second = Bioinform::MotifModel::PCM.new(input_second[:matrix]).named(input_second[:name])
|
106
|
+
pwm_second = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: second_background).convert(pcm_second)
|
107
|
+
when :pwm
|
108
|
+
pwm_first = Bioinform::MotifModel::PWM.new(input_first[:matrix]).named(input_first[:name])
|
109
|
+
pwm_second = Bioinform::MotifModel::PWM.new(input_second[:matrix]).named(input_second[:name])
|
110
|
+
end
|
111
|
+
|
112
|
+
pwm_first = pwm_first.discreted(discretization)
|
113
|
+
pwm_second = pwm_second.discreted(discretization)
|
114
|
+
|
115
|
+
counting_first = PWMCounting.new(pwm_first, background: first_background, max_hash_size: max_hash_size)
|
116
|
+
counting_second = PWMCounting.new(pwm_second, background: second_background, max_hash_size: max_hash_size)
|
117
|
+
|
118
|
+
cmp = Macroape::PWMCompareAligned.new(counting_first, counting_second, shift, orientation).tap{|x| x.max_pair_hash_size = max_pair_hash_size }
|
119
|
+
|
120
|
+
if predefined_threshold_first
|
121
|
+
threshold_first = predefined_threshold_first * discretization
|
122
|
+
else
|
123
|
+
if pvalue_boundary == :lower
|
124
|
+
threshold_first = counting_first.threshold(pvalue)
|
125
|
+
else
|
126
|
+
threshold_first = counting_first.weak_threshold(pvalue)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
if predefined_threshold_second
|
131
|
+
threshold_second = predefined_threshold_second * discretization
|
132
|
+
else
|
133
|
+
if pvalue_boundary == :lower
|
134
|
+
threshold_second = counting_second.threshold(pvalue)
|
135
|
+
else
|
136
|
+
threshold_second = counting_second.weak_threshold(pvalue)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
info = cmp.alignment_infos.merge( cmp.jaccard(threshold_first, threshold_second) )
|
140
|
+
info.merge!(predefined_threshold_first: predefined_threshold_first,
|
141
|
+
predefined_threshold_second: predefined_threshold_second,
|
142
|
+
threshold_first: threshold_first / discretization,
|
143
|
+
threshold_second: threshold_second / discretization,
|
144
|
+
discretization: discretization,
|
145
|
+
first_background: first_background,
|
146
|
+
second_background: second_background,
|
147
|
+
requested_pvalue: pvalue,
|
148
|
+
pvalue_boundary: pvalue_boundary)
|
149
|
+
puts Helper.similarity_info_string(info)
|
150
|
+
|
151
|
+
rescue => err
|
152
|
+
$stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|