macroape 4.0.2 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +22 -22
- data/README.md +70 -70
- data/Rakefile.rb +49 -49
- data/TODO.txt +46 -46
- data/benchmark/benchmark_helper.rb +4 -4
- data/benchmark/similarity_benchmark.rb +52 -52
- data/bin/align_motifs +4 -4
- data/bin/eval_alignment +4 -4
- data/bin/eval_similarity +4 -4
- data/bin/find_pvalue +4 -4
- data/bin/find_threshold +4 -4
- data/bin/preprocess_collection +4 -4
- data/bin/scan_collection +4 -4
- data/lib/macroape.rb +14 -11
- data/lib/macroape/aligned_pair_intersection.rb +61 -62
- data/lib/macroape/cli.rb +191 -188
- data/lib/macroape/cli/align_motifs.rb +120 -100
- data/lib/macroape/cli/eval_alignment.rb +157 -156
- data/lib/macroape/cli/eval_similarity.rb +138 -137
- data/lib/macroape/cli/find_pvalue.rb +93 -87
- data/lib/macroape/cli/find_threshold.rb +103 -96
- data/lib/macroape/cli/preprocess_collection.rb +169 -161
- data/lib/macroape/cli/scan_collection.rb +171 -163
- data/lib/macroape/collection.rb +29 -0
- data/lib/macroape/motif_with_thresholds.rb +18 -0
- data/lib/macroape/pwm_compare.rb +39 -44
- data/lib/macroape/pwm_compare_aligned.rb +139 -130
- data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
- data/lib/macroape/support/inverf.rb +13 -0
- data/lib/macroape/support/partial_sums.rb +17 -0
- data/lib/macroape/version.rb +4 -4
- data/macroape.gemspec +19 -19
- data/spec/count_distribution_spec.rb +112 -109
- data/spec/inverf_spec.rb +23 -0
- data/spec/partial_sums_spec.rb +28 -0
- data/spec/spec_helper.rb +11 -11
- data/test/align_motifs_test.rb +42 -43
- data/test/data/AHR_si.pwm +10 -10
- data/test/data/KLF3_f1.pcm +16 -16
- data/test/data/KLF3_f1.pwm +16 -16
- data/test/data/KLF4_f2.pcm +11 -11
- data/test/data/KLF4_f2.pwm +11 -11
- data/test/data/KLF4_f2_scan_results_all.txt +2 -2
- data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
- data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
- data/test/data/SP1_f1.pcm +12 -12
- data/test/data/SP1_f1.pwm +12 -12
- data/test/data/SP1_f1_revcomp.pcm +12 -12
- data/test/data/SP1_f1_revcomp.pwm +12 -12
- data/test/data/medium_motif.pwm +8 -8
- data/test/data/short_motif.pwm +7 -7
- data/test/data/test_collection.yaml +231 -214
- data/test/data/test_collection/GABPA_f1.pwm +14 -14
- data/test/data/test_collection/KLF4_f2.pwm +10 -10
- data/test/data/test_collection/SP1_f1.pwm +12 -12
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
- data/test/data/test_collection_single_file.txt +38 -38
- data/test/data/test_collection_single_file_pcm.txt +37 -37
- data/test/data/test_collection_weak.yaml +231 -214
- data/test/eval_alignment_test.rb +90 -111
- data/test/eval_similarity_test.rb +105 -123
- data/test/find_pvalue_test.rb +34 -39
- data/test/find_threshold_test.rb +87 -91
- data/test/preprocess_collection_test.rb +56 -65
- data/test/scan_collection_test.rb +42 -48
- data/test/test_helper.rb +159 -160
- metadata +14 -10
- data/test/data/collection_pcm_without_thresholds.yaml +0 -188
- data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,96 +1,103 @@
|
|
1
|
-
require_relative '../../macroape'
|
2
|
-
|
3
|
-
module Macroape
|
4
|
-
module CLI
|
5
|
-
module FindThreshold
|
6
|
-
|
7
|
-
def self.main(argv)
|
8
|
-
doc = <<-EOS.strip_doc
|
9
|
-
Command-line format:
|
10
|
-
#{run_tool_cmd} <pat-file> [<list of P-values>...] [options]
|
11
|
-
|
12
|
-
Options:
|
13
|
-
[-d <discretization level>]
|
14
|
-
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
|
15
|
-
[--boundary lower|upper] Lower boundary (default) means that the obtained P-value is less than or equal to the requested P-value
|
16
|
-
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
|
17
|
-
|
18
|
-
Example:
|
19
|
-
#{run_tool_cmd} motifs/KLF4_f2.pat
|
20
|
-
#{run_tool_cmd} motifs/KLF4_f2.pat 0.001 0.0001 0.0005 -d 1000 -b 0.4,0.3,0.2,0.1
|
21
|
-
EOS
|
22
|
-
|
23
|
-
if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
|
24
|
-
$stderr.puts doc
|
25
|
-
exit
|
26
|
-
end
|
27
|
-
|
28
|
-
background =
|
29
|
-
default_pvalues = [0.0005]
|
30
|
-
discretization = 10000
|
31
|
-
max_hash_size = 10000000
|
32
|
-
data_model = argv.delete('--pcm') ?
|
33
|
-
|
34
|
-
pvalue_boundary = :lower
|
35
|
-
|
36
|
-
|
37
|
-
filename = argv.shift
|
38
|
-
raise 'No input. You should specify input file' unless filename
|
39
|
-
|
40
|
-
pvalues = []
|
41
|
-
loop do
|
42
|
-
begin
|
43
|
-
Float(argv.first)
|
44
|
-
pvalues << argv.shift.to_f
|
45
|
-
rescue
|
46
|
-
raise StopIteration
|
47
|
-
end
|
48
|
-
end
|
49
|
-
pvalues = default_pvalues if pvalues.empty?
|
50
|
-
|
51
|
-
until argv.empty?
|
52
|
-
case argv.shift
|
53
|
-
when '-b'
|
54
|
-
background = argv.shift
|
55
|
-
when '--max-hash-size'
|
56
|
-
max_hash_size = argv.shift.to_i
|
57
|
-
when '-d'
|
58
|
-
discretization = argv.shift.to_f
|
59
|
-
when '--boundary'
|
60
|
-
pvalue_boundary = argv.shift.to_sym
|
61
|
-
raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
1
|
+
require_relative '../../macroape'
|
2
|
+
|
3
|
+
module Macroape
|
4
|
+
module CLI
|
5
|
+
module FindThreshold
|
6
|
+
|
7
|
+
def self.main(argv)
|
8
|
+
doc = <<-EOS.strip_doc
|
9
|
+
Command-line format:
|
10
|
+
#{run_tool_cmd} <pat-file> [<list of P-values>...] [options]
|
11
|
+
|
12
|
+
Options:
|
13
|
+
[-d <discretization level>]
|
14
|
+
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
|
15
|
+
[--boundary lower|upper] Lower boundary (default) means that the obtained P-value is less than or equal to the requested P-value
|
16
|
+
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
|
17
|
+
|
18
|
+
Example:
|
19
|
+
#{run_tool_cmd} motifs/KLF4_f2.pat
|
20
|
+
#{run_tool_cmd} motifs/KLF4_f2.pat 0.001 0.0001 0.0005 -d 1000 -b 0.4,0.3,0.2,0.1
|
21
|
+
EOS
|
22
|
+
|
23
|
+
if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
|
24
|
+
$stderr.puts doc
|
25
|
+
exit
|
26
|
+
end
|
27
|
+
|
28
|
+
background = Bioinform::Background::Wordwise
|
29
|
+
default_pvalues = [0.0005]
|
30
|
+
discretization = 10000
|
31
|
+
max_hash_size = 10000000
|
32
|
+
data_model = argv.delete('--pcm') ? :pcm : :pwm
|
33
|
+
|
34
|
+
pvalue_boundary = :lower
|
35
|
+
|
36
|
+
|
37
|
+
filename = argv.shift
|
38
|
+
raise 'No input. You should specify input file' unless filename
|
39
|
+
|
40
|
+
pvalues = []
|
41
|
+
loop do
|
42
|
+
begin
|
43
|
+
Float(argv.first)
|
44
|
+
pvalues << argv.shift.to_f
|
45
|
+
rescue
|
46
|
+
raise StopIteration
|
47
|
+
end
|
48
|
+
end
|
49
|
+
pvalues = default_pvalues if pvalues.empty?
|
50
|
+
|
51
|
+
until argv.empty?
|
52
|
+
case argv.shift
|
53
|
+
when '-b'
|
54
|
+
background = Bioinform::Background.from_string(argv.shift)
|
55
|
+
when '--max-hash-size'
|
56
|
+
max_hash_size = argv.shift.to_i
|
57
|
+
when '-d'
|
58
|
+
discretization = argv.shift.to_f
|
59
|
+
when '--boundary'
|
60
|
+
pvalue_boundary = argv.shift.to_sym
|
61
|
+
raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
66
|
+
input = File.read(filename)
|
67
|
+
|
68
|
+
parser = Bioinform::MatrixParser.new
|
69
|
+
motif_data = parser.parse!(input)
|
70
|
+
case data_model
|
71
|
+
when :pcm
|
72
|
+
pcm = Bioinform::MotifModel::PCM.new(motif_data[:matrix]).named(motif_data[:name])
|
73
|
+
pwm = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: background).convert(pcm)
|
74
|
+
when :pwm
|
75
|
+
pwm = Bioinform::MotifModel::PWM.new(motif_data[:matrix]).named(motif_data[:name])
|
76
|
+
end
|
77
|
+
|
78
|
+
pwm = pwm.discreted(discretization)
|
79
|
+
counting = PWMCounting.new(pwm, background: background, max_hash_size: max_hash_size)
|
80
|
+
|
81
|
+
infos = []
|
82
|
+
collect_infos_proc = ->(pvalue, threshold, real_pvalue) do
|
83
|
+
infos << {expected_pvalue: pvalue,
|
84
|
+
threshold: threshold / discretization,
|
85
|
+
real_pvalue: real_pvalue,
|
86
|
+
recognized_words: real_pvalue * counting.vocabulary_volume }
|
87
|
+
end
|
88
|
+
if pvalue_boundary == :lower
|
89
|
+
counting.thresholds(*pvalues, &collect_infos_proc)
|
90
|
+
else
|
91
|
+
counting.weak_thresholds(*pvalues, &collect_infos_proc)
|
92
|
+
end
|
93
|
+
puts Helper.threshold_infos_string(infos,
|
94
|
+
{discretization: discretization,
|
95
|
+
background: background,
|
96
|
+
pvalue_boundary: pvalue_boundary} )
|
97
|
+
rescue => err
|
98
|
+
$stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -1,161 +1,169 @@
|
|
1
|
-
require_relative '../../macroape'
|
2
|
-
require 'yaml'
|
3
|
-
require 'shellwords'
|
4
|
-
|
5
|
-
module Macroape
|
6
|
-
module CLI
|
7
|
-
module PreprocessCollection
|
8
|
-
|
9
|
-
def self.
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
when '
|
64
|
-
|
65
|
-
when '
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
elsif File.file?(data_source)
|
85
|
-
input = File.read(data_source)
|
86
|
-
|
87
|
-
elsif data_source == '.stdin'
|
88
|
-
filelist = $stdin.read.shellsplit
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
1
|
+
require_relative '../../macroape'
|
2
|
+
require 'yaml'
|
3
|
+
require 'shellwords'
|
4
|
+
|
5
|
+
module Macroape
|
6
|
+
module CLI
|
7
|
+
module PreprocessCollection
|
8
|
+
|
9
|
+
def self.motif_infos_from_file(filename)
|
10
|
+
input = File.read(filename)
|
11
|
+
motif_input = Bioinform::MatrixParser.new.parse(input)
|
12
|
+
{ matrix: motif_input[:matrix],
|
13
|
+
name: motif_input[:name] || File.basename(filename, File.extname(filename)) }
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.main(argv)
|
17
|
+
doc = <<-EOS.strip_doc
|
18
|
+
Command-line format:
|
19
|
+
#{run_tool_cmd} <file or folder with PWMs or .stdin with filenames> <output file> [options]
|
20
|
+
|
21
|
+
Options:
|
22
|
+
[-p <list of P-values>] - comma separated(no spaces allowed) list of P-values to precalculate thresholds
|
23
|
+
[-d <rough discretization>,<precise discretization>] - set discretization rates, comma delimited (no spaces allowed), order doesn't matter
|
24
|
+
[--silent] - hide current progress information during scan (printed to stderr by default)
|
25
|
+
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
|
26
|
+
[--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
|
27
|
+
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
|
28
|
+
|
29
|
+
The tool preprocesses and stores Macroape motif collection in the specified YAML-file.
|
30
|
+
|
31
|
+
Example:
|
32
|
+
#{run_tool_cmd} ./motifs collection.yaml -p 0.001,0.0005,0.0001 -d 1,10 -b 0.2,0.3,0.3,0.2
|
33
|
+
EOS
|
34
|
+
|
35
|
+
if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
|
36
|
+
$stderr.puts doc
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
|
40
|
+
data_model = argv.delete('--pcm') ? :pcm : :pwm
|
41
|
+
default_pvalues = [0.0005]
|
42
|
+
background = Bioinform::Background::Wordwise
|
43
|
+
rough_discretization = 1
|
44
|
+
precise_discretization = 10
|
45
|
+
max_hash_size = 10000000
|
46
|
+
|
47
|
+
data_source = argv.shift
|
48
|
+
output_file = argv.shift
|
49
|
+
|
50
|
+
raise 'No input. You should specify file or folder with pwms' unless data_source
|
51
|
+
raise "Error! File or folder #{data_source} doesn't exist" unless Dir.exist?(data_source) || File.exist?(data_source) || data_source == '.stdin'
|
52
|
+
raise 'You should specify output file' unless output_file
|
53
|
+
|
54
|
+
pvalues = []
|
55
|
+
silent = false
|
56
|
+
pvalue_boundary = :upper
|
57
|
+
|
58
|
+
until argv.empty?
|
59
|
+
case argv.shift
|
60
|
+
when '-b'
|
61
|
+
background = Bioinform::Background.from_string(argv.shift)
|
62
|
+
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless background.symmetric?
|
63
|
+
when '-p'
|
64
|
+
pvalues = argv.shift.split(',').map(&:to_f)
|
65
|
+
when '-d'
|
66
|
+
rough_discretization, precise_discretization = argv.shift.split(',').map(&:to_f).sort
|
67
|
+
when '--max-hash-size'
|
68
|
+
max_hash_size = argv.shift.to_i
|
69
|
+
when '--silent'
|
70
|
+
silent = true
|
71
|
+
when '--boundary'
|
72
|
+
pvalue_boundary = argv.shift.to_sym
|
73
|
+
raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
|
74
|
+
end
|
75
|
+
end
|
76
|
+
pvalues = default_pvalues if pvalues.empty?
|
77
|
+
|
78
|
+
data_source = data_source.gsub("\\",'/')
|
79
|
+
|
80
|
+
pcm2pwm_converter = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: background)
|
81
|
+
|
82
|
+
if File.directory?(data_source)
|
83
|
+
motif_inputs = Dir.glob(File.join(data_source,'*')).sort.map{|filename| motif_infos_from_file(filename) }
|
84
|
+
elsif File.file?(data_source)
|
85
|
+
input = File.read(data_source)
|
86
|
+
motif_inputs = Bioinform::MotifSplitter.new.split(input).map{|motif_input| Bioinform::MatrixParser.new.parse(motif_input) }
|
87
|
+
elsif data_source == '.stdin'
|
88
|
+
filelist = $stdin.read.shellsplit
|
89
|
+
motif_inputs = filelist.map{|filename| motif_infos_from_file(filename) }
|
90
|
+
else
|
91
|
+
raise "Specified data source `#{data_source}` is neither directory nor file nor even .stdin"
|
92
|
+
end
|
93
|
+
|
94
|
+
pwms = motif_inputs.map{|motif_input|
|
95
|
+
if data_model == :pwm
|
96
|
+
pwm = Bioinform::MotifModel::PWM.new(motif_input[:matrix]).named(motif_input[:name])
|
97
|
+
elsif data_model == :pcm
|
98
|
+
pcm = Bioinform::MotifModel::PCM.new(motif_input[:matrix]).named(motif_input[:name])
|
99
|
+
pwm = pcm2pwm_converter.convert(pcm)
|
100
|
+
end
|
101
|
+
}
|
102
|
+
|
103
|
+
collection = Macroape::Collection.new(rough_discretization: rough_discretization,
|
104
|
+
precise_discretization: precise_discretization,
|
105
|
+
background: background,
|
106
|
+
pvalues: pvalues)
|
107
|
+
|
108
|
+
pwms.each_with_index do |pwm,index|
|
109
|
+
$stderr.puts "Motif #{pwm.name}, length: #{pwm.length} (#{index+1} of #{pwms.size}, #{index*100/pwms.size}% complete)" unless silent
|
110
|
+
|
111
|
+
# When support of onefile collections is introduced - then here should be check if name exists.
|
112
|
+
# Otherwise it should skip motif and tell you about this
|
113
|
+
# Also two command line options to fail on skipping or to skip silently should be included
|
114
|
+
|
115
|
+
info = {rough: {}, precise: {}, background: background}
|
116
|
+
skip_motif = false
|
117
|
+
|
118
|
+
fill_rough_infos = ->(pvalue, threshold, real_pvalue) do
|
119
|
+
if real_pvalue == 0
|
120
|
+
$stderr.puts "#{pwm.name} at pvalue #{pvalue} has threshold that yields real-pvalue 0 in rough mode. Rough calculation will be skipped"
|
121
|
+
else
|
122
|
+
info[:rough][pvalue] = threshold / rough_discretization
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
fill_precise_infos = ->(pvalue, threshold, real_pvalue) do
|
127
|
+
if real_pvalue == 0
|
128
|
+
$stderr.puts "#{pwm.name} at pvalue #{pvalue} has threshold that yields real-pvalue 0 in precise mode. Motif will be excluded from collection"
|
129
|
+
skip_motif = true
|
130
|
+
else
|
131
|
+
info[:precise][pvalue] = threshold / precise_discretization
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
rough_counting = PWMCounting.new(pwm.discreted(rough_discretization), background: background, max_hash_size: max_hash_size)
|
136
|
+
precise_counting = PWMCounting.new(pwm.discreted(precise_discretization), background: background, max_hash_size: max_hash_size)
|
137
|
+
|
138
|
+
if pvalue_boundary == :lower
|
139
|
+
rough_counting.thresholds(*pvalues, &fill_rough_infos)
|
140
|
+
else
|
141
|
+
rough_counting.weak_thresholds(*pvalues, &fill_rough_infos)
|
142
|
+
end
|
143
|
+
|
144
|
+
if pvalue_boundary == :lower
|
145
|
+
precise_counting.thresholds(*pvalues, &fill_precise_infos)
|
146
|
+
else
|
147
|
+
precise_counting.weak_thresholds(*pvalues,&fill_precise_infos)
|
148
|
+
end
|
149
|
+
|
150
|
+
collection << Macroape::MotifWithThresholds.new(pwm, info) unless skip_motif
|
151
|
+
end
|
152
|
+
$stderr.puts "100% complete. Saving results" unless silent
|
153
|
+
File.open(output_file, 'w') do |f|
|
154
|
+
f.puts(collection.to_yaml)
|
155
|
+
end
|
156
|
+
puts OutputInformation.new{|infos|
|
157
|
+
infos.add_parameter('P', 'P-value list', pvalues.join(','))
|
158
|
+
infos.add_parameter('VR', 'discretization value, rough', rough_discretization)
|
159
|
+
infos.add_parameter('VP', 'discretization value, precise', precise_discretization)
|
160
|
+
infos.add_parameter('PB', 'P-value boundary', pvalue_boundary)
|
161
|
+
infos.background_parameter('B', 'background', background)
|
162
|
+
}.result
|
163
|
+
rescue => err
|
164
|
+
$stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|