macroape 4.0.2 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +22 -22
- data/README.md +70 -70
- data/Rakefile.rb +49 -49
- data/TODO.txt +46 -46
- data/benchmark/benchmark_helper.rb +4 -4
- data/benchmark/similarity_benchmark.rb +52 -52
- data/bin/align_motifs +4 -4
- data/bin/eval_alignment +4 -4
- data/bin/eval_similarity +4 -4
- data/bin/find_pvalue +4 -4
- data/bin/find_threshold +4 -4
- data/bin/preprocess_collection +4 -4
- data/bin/scan_collection +4 -4
- data/lib/macroape.rb +14 -11
- data/lib/macroape/aligned_pair_intersection.rb +61 -62
- data/lib/macroape/cli.rb +191 -188
- data/lib/macroape/cli/align_motifs.rb +120 -100
- data/lib/macroape/cli/eval_alignment.rb +157 -156
- data/lib/macroape/cli/eval_similarity.rb +138 -137
- data/lib/macroape/cli/find_pvalue.rb +93 -87
- data/lib/macroape/cli/find_threshold.rb +103 -96
- data/lib/macroape/cli/preprocess_collection.rb +169 -161
- data/lib/macroape/cli/scan_collection.rb +171 -163
- data/lib/macroape/collection.rb +29 -0
- data/lib/macroape/motif_with_thresholds.rb +18 -0
- data/lib/macroape/pwm_compare.rb +39 -44
- data/lib/macroape/pwm_compare_aligned.rb +139 -130
- data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
- data/lib/macroape/support/inverf.rb +13 -0
- data/lib/macroape/support/partial_sums.rb +17 -0
- data/lib/macroape/version.rb +4 -4
- data/macroape.gemspec +19 -19
- data/spec/count_distribution_spec.rb +112 -109
- data/spec/inverf_spec.rb +23 -0
- data/spec/partial_sums_spec.rb +28 -0
- data/spec/spec_helper.rb +11 -11
- data/test/align_motifs_test.rb +42 -43
- data/test/data/AHR_si.pwm +10 -10
- data/test/data/KLF3_f1.pcm +16 -16
- data/test/data/KLF3_f1.pwm +16 -16
- data/test/data/KLF4_f2.pcm +11 -11
- data/test/data/KLF4_f2.pwm +11 -11
- data/test/data/KLF4_f2_scan_results_all.txt +2 -2
- data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
- data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
- data/test/data/SP1_f1.pcm +12 -12
- data/test/data/SP1_f1.pwm +12 -12
- data/test/data/SP1_f1_revcomp.pcm +12 -12
- data/test/data/SP1_f1_revcomp.pwm +12 -12
- data/test/data/medium_motif.pwm +8 -8
- data/test/data/short_motif.pwm +7 -7
- data/test/data/test_collection.yaml +231 -214
- data/test/data/test_collection/GABPA_f1.pwm +14 -14
- data/test/data/test_collection/KLF4_f2.pwm +10 -10
- data/test/data/test_collection/SP1_f1.pwm +12 -12
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
- data/test/data/test_collection_single_file.txt +38 -38
- data/test/data/test_collection_single_file_pcm.txt +37 -37
- data/test/data/test_collection_weak.yaml +231 -214
- data/test/eval_alignment_test.rb +90 -111
- data/test/eval_similarity_test.rb +105 -123
- data/test/find_pvalue_test.rb +34 -39
- data/test/find_threshold_test.rb +87 -91
- data/test/preprocess_collection_test.rb +56 -65
- data/test/scan_collection_test.rb +42 -48
- data/test/test_helper.rb +159 -160
- metadata +14 -10
- data/test/data/collection_pcm_without_thresholds.yaml +0 -188
- data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,53 +1,53 @@
|
|
1
|
-
require_relative 'benchmark_helper'
|
2
|
-
|
3
|
-
class TaskToBenchmark
|
4
|
-
def setup
|
5
|
-
@matrix_first = "KLF4_f2.xml
|
6
|
-
0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
|
7
|
-
-1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
|
8
|
-
-2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
|
9
|
-
-2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
|
10
|
-
-0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
|
11
|
-
-1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
|
12
|
-
-2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
|
13
|
-
-1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
|
14
|
-
-2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
|
15
|
-
-1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
|
16
|
-
"
|
17
|
-
|
18
|
-
@matrix_second = "> SP1_f1
|
19
|
-
-0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
|
20
|
-
-1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
|
21
|
-
-0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
|
22
|
-
-4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
|
23
|
-
-2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
|
24
|
-
-0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
|
25
|
-
-1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
|
26
|
-
-4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
|
27
|
-
-0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
|
28
|
-
-1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
|
29
|
-
-0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606"
|
30
|
-
|
31
|
-
@pvalue = 0.0005
|
32
|
-
@discretization = 1
|
33
|
-
@first_background, @second_background = [1,1,1,1], [1,1,1,1]
|
34
|
-
|
35
|
-
@pwm_first = Bioinform::PWM.new(@matrix_first).set_parameters(background: @first_background).discrete(@discretization)
|
36
|
-
@pwm_second = Bioinform::PWM.new(@matrix_second).set_parameters(background: @second_background).discrete(@discretization)
|
37
|
-
@cmp = Macroape::PWMCompare.new(@pwm_first, @pwm_second)
|
38
|
-
@first_threshold = @pwm_first.threshold(@pvalue)
|
39
|
-
@second_threshold = @pwm_second.threshold(@pvalue)
|
40
|
-
self
|
41
|
-
end
|
42
|
-
|
43
|
-
def run
|
44
|
-
info = @cmp.jaccard(@first_threshold, @second_threshold)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
benchmark_result = 100.times.collect do
|
49
|
-
task_to_benchmark = TaskToBenchmark.new.setup
|
50
|
-
Benchmark.measure{ task_to_benchmark.run }
|
51
|
-
end.inject(&:+)
|
52
|
-
|
1
|
+
require_relative 'benchmark_helper'
|
2
|
+
|
3
|
+
class TaskToBenchmark
|
4
|
+
def setup
|
5
|
+
@matrix_first = "KLF4_f2.xml
|
6
|
+
0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
|
7
|
+
-1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
|
8
|
+
-2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
|
9
|
+
-2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
|
10
|
+
-0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
|
11
|
+
-1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
|
12
|
+
-2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
|
13
|
+
-1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
|
14
|
+
-2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
|
15
|
+
-1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
|
16
|
+
"
|
17
|
+
|
18
|
+
@matrix_second = "> SP1_f1
|
19
|
+
-0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
|
20
|
+
-1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
|
21
|
+
-0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
|
22
|
+
-4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
|
23
|
+
-2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
|
24
|
+
-0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
|
25
|
+
-1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
|
26
|
+
-4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
|
27
|
+
-0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
|
28
|
+
-1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
|
29
|
+
-0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606"
|
30
|
+
|
31
|
+
@pvalue = 0.0005
|
32
|
+
@discretization = 1
|
33
|
+
@first_background, @second_background = [1,1,1,1], [1,1,1,1]
|
34
|
+
|
35
|
+
@pwm_first = Bioinform::PWM.new(@matrix_first).set_parameters(background: @first_background).discrete(@discretization)
|
36
|
+
@pwm_second = Bioinform::PWM.new(@matrix_second).set_parameters(background: @second_background).discrete(@discretization)
|
37
|
+
@cmp = Macroape::PWMCompare.new(@pwm_first, @pwm_second)
|
38
|
+
@first_threshold = @pwm_first.threshold(@pvalue)
|
39
|
+
@second_threshold = @pwm_second.threshold(@pvalue)
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
def run
|
44
|
+
info = @cmp.jaccard(@first_threshold, @second_threshold)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
benchmark_result = 100.times.collect do
|
49
|
+
task_to_benchmark = TaskToBenchmark.new.setup
|
50
|
+
Benchmark.measure{ task_to_benchmark.run }
|
51
|
+
end.inject(&:+)
|
52
|
+
|
53
53
|
puts benchmark_result
|
data/bin/align_motifs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/align_motifs'
|
4
|
-
Macroape::CLI::AlignMotifs.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/align_motifs'
|
4
|
+
Macroape::CLI::AlignMotifs.main(ARGV)
|
data/bin/eval_alignment
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/eval_alignment'
|
4
|
-
Macroape::CLI::EvalAlignment.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/eval_alignment'
|
4
|
+
Macroape::CLI::EvalAlignment.main(ARGV)
|
data/bin/eval_similarity
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/eval_similarity'
|
4
|
-
Macroape::CLI::EvalSimilarity.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/eval_similarity'
|
4
|
+
Macroape::CLI::EvalSimilarity.main(ARGV)
|
data/bin/find_pvalue
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/find_pvalue'
|
4
|
-
Macroape::CLI::FindPValue.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/find_pvalue'
|
4
|
+
Macroape::CLI::FindPValue.main(ARGV)
|
data/bin/find_threshold
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/find_threshold'
|
4
|
-
Macroape::CLI::FindThreshold.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/find_threshold'
|
4
|
+
Macroape::CLI::FindThreshold.main(ARGV)
|
data/bin/preprocess_collection
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/preprocess_collection'
|
4
|
-
Macroape::CLI::PreprocessCollection.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/preprocess_collection'
|
4
|
+
Macroape::CLI::PreprocessCollection.main(ARGV)
|
data/bin/scan_collection
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/scan_collection'
|
4
|
-
Macroape::CLI::ScanCollection.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/scan_collection'
|
4
|
+
Macroape::CLI::ScanCollection.main(ARGV)
|
data/lib/macroape.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
|
-
require_relative 'macroape/version'
|
2
|
-
|
3
|
-
|
4
|
-
require_relative 'macroape/
|
5
|
-
require_relative 'macroape/
|
6
|
-
require_relative 'macroape/
|
7
|
-
require_relative 'macroape/
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
1
|
+
require_relative 'macroape/version'
|
2
|
+
require 'bioinform'
|
3
|
+
|
4
|
+
require_relative 'macroape/pwm_counting'
|
5
|
+
require_relative 'macroape/aligned_pair_intersection'
|
6
|
+
require_relative 'macroape/pwm_compare_aligned'
|
7
|
+
require_relative 'macroape/pwm_compare'
|
8
|
+
require_relative 'macroape/collection'
|
9
|
+
require_relative 'macroape/motif_with_thresholds'
|
10
|
+
require_relative 'macroape/cli'
|
11
|
+
|
12
|
+
module Macroape
|
13
|
+
# Your code goes here...
|
14
|
+
end
|
@@ -1,62 +1,61 @@
|
|
1
|
-
module Macroape
|
2
|
-
class PWMCompareAligned
|
3
|
-
# unoptimized version of this and related methods
|
4
|
-
def counts_for_two_matrices(threshold_first, threshold_second)
|
5
|
-
# just not to call method each time
|
6
|
-
first_background = first.background
|
7
|
-
second_background = second.background
|
8
|
-
unless first_background == second_background
|
9
|
-
first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
|
10
|
-
second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
|
11
|
-
return [first_result, second_result]
|
12
|
-
end
|
13
|
-
if first.background
|
14
|
-
result = get_counts(threshold_first, threshold_second) {|score,letter| score}
|
15
|
-
[result, result]
|
16
|
-
else
|
17
|
-
result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
|
18
|
-
[result, result]
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
|
23
|
-
# block has form: {|score,letter| contribution to count by `letter` with `score` }
|
24
|
-
def get_counts(threshold_first, threshold_second, &count_contribution_block)
|
25
|
-
# scores_on_first_pwm, scores_on_second_pwm --> count
|
26
|
-
scores = { 0 => {0 => 1} }
|
27
|
-
length.times do |column|
|
28
|
-
new_scores = recalc_score_hash(scores,
|
29
|
-
first.matrix[column], second.matrix[column],
|
30
|
-
threshold_first - first.best_suffix(column + 1),
|
31
|
-
threshold_second - second.best_suffix(column + 1), &count_contribution_block)
|
32
|
-
scores.replace(new_scores)
|
33
|
-
if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size
|
34
|
-
raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
|
35
|
-
end
|
36
|
-
end
|
37
|
-
scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}
|
38
|
-
end
|
39
|
-
|
40
|
-
# wouldn't work without count_contribution_block
|
41
|
-
def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
|
42
|
-
new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
|
43
|
-
scores.each do |score_first, second_scores|
|
44
|
-
second_scores.each do |score_second, count|
|
45
|
-
|
46
|
-
4.times do |letter|
|
47
|
-
new_score_first = score_first + first_column[letter]
|
48
|
-
if new_score_first >= least_sufficient_first
|
49
|
-
new_score_second = score_second + second_column[letter]
|
50
|
-
if new_score_second >= least_sufficient_second
|
51
|
-
new_scores[new_score_first][new_score_second] += yield(count, letter)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
end
|
57
|
-
end
|
58
|
-
new_scores
|
59
|
-
end
|
60
|
-
|
61
|
-
|
62
|
-
end
|
1
|
+
module Macroape
|
2
|
+
class PWMCompareAligned
|
3
|
+
# unoptimized version of this and related methods
|
4
|
+
def counts_for_two_matrices(threshold_first, threshold_second)
|
5
|
+
# just not to call method each time
|
6
|
+
first_background = first.background.counts
|
7
|
+
second_background = second.background.counts
|
8
|
+
unless first_background == second_background
|
9
|
+
first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
|
10
|
+
second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
|
11
|
+
return [first_result, second_result]
|
12
|
+
end
|
13
|
+
if first.background.wordwise?
|
14
|
+
result = get_counts(threshold_first, threshold_second) {|score,letter| score}
|
15
|
+
[result, result]
|
16
|
+
else
|
17
|
+
result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
|
18
|
+
[result, result]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
# block has form: {|score,letter| contribution to count by `letter` with `score` }
|
24
|
+
def get_counts(threshold_first, threshold_second, &count_contribution_block)
|
25
|
+
# scores_on_first_pwm, scores_on_second_pwm --> count
|
26
|
+
scores = { 0 => {0 => 1} }
|
27
|
+
length.times do |column|
|
28
|
+
new_scores = recalc_score_hash(scores,
|
29
|
+
first.matrix[column], second.matrix[column],
|
30
|
+
threshold_first - first.best_suffix(column + 1),
|
31
|
+
threshold_second - second.best_suffix(column + 1), &count_contribution_block)
|
32
|
+
scores.replace(new_scores)
|
33
|
+
if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size
|
34
|
+
raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
|
35
|
+
end
|
36
|
+
end
|
37
|
+
scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}
|
38
|
+
end
|
39
|
+
|
40
|
+
# wouldn't work without count_contribution_block
|
41
|
+
def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
|
42
|
+
new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
|
43
|
+
scores.each do |score_first, second_scores|
|
44
|
+
second_scores.each do |score_second, count|
|
45
|
+
|
46
|
+
4.times do |letter|
|
47
|
+
new_score_first = score_first + first_column[letter]
|
48
|
+
if new_score_first >= least_sufficient_first
|
49
|
+
new_score_second = score_second + second_column[letter]
|
50
|
+
if new_score_second >= least_sufficient_second
|
51
|
+
new_scores[new_score_first][new_score_second] += yield(count, letter)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
new_scores
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/macroape/cli.rb
CHANGED
@@ -1,188 +1,191 @@
|
|
1
|
-
require 'bioinform/support/strip_doc'
|
2
|
-
|
3
|
-
class String
|
4
|
-
def snake_case
|
5
|
-
gsub(/[A-Z]+/){|big| "_#{big.downcase}" }.sub(/^_/,'')
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
class Module
|
10
|
-
def run_tool_cmd
|
11
|
-
if Macroape::STANDALONE
|
12
|
-
"ruby #{tool_name}.rb"
|
13
|
-
else
|
14
|
-
tool_name
|
15
|
-
end
|
16
|
-
end
|
17
|
-
def tool_name
|
18
|
-
self.name.split('::').last.snake_case
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
module Macroape
|
23
|
-
module CLI
|
24
|
-
class OutputInformation
|
25
|
-
def initialize(data = nil)
|
26
|
-
@table_parameter_descriptions = []
|
27
|
-
|
28
|
-
@parameter_descriptions = []
|
29
|
-
@parameter_value_infos = []
|
30
|
-
|
31
|
-
@resulting_value_descriptions = []
|
32
|
-
@resulting_value_infos = []
|
33
|
-
|
34
|
-
@table_headers = []
|
35
|
-
@table_rows = []
|
36
|
-
@table_rows_callbacks =
|
37
|
-
@data = data
|
38
|
-
yield self if block_given?
|
39
|
-
end
|
40
|
-
|
41
|
-
def parameters_info
|
42
|
-
[*@parameter_descriptions, *@parameter_value_infos]
|
43
|
-
end
|
44
|
-
def resulting_values_info
|
45
|
-
[*@resulting_value_descriptions, *@resulting_value_infos]
|
46
|
-
end
|
47
|
-
def result
|
48
|
-
[parameters_info, resulting_values_info, resulting_table].reject(&:empty?).map{|b|b.join("\n")}.join("\n#\n")
|
49
|
-
#[*parameters_info, '#', *resulting_values_info, '#', *resulting_table].join("\n")
|
50
|
-
end
|
51
|
-
|
52
|
-
def add_parameter(param_name, description, value, &block)
|
53
|
-
@parameter_descriptions << parameter_description_string(param_name, description)
|
54
|
-
@parameter_value_infos << "# #{param_name} = #{value}"
|
55
|
-
end
|
56
|
-
|
57
|
-
def add_resulting_value(param_name, description, value, &block)
|
58
|
-
@resulting_value_descriptions << parameter_description_string(param_name, description)
|
59
|
-
@resulting_value_infos << "#{param_name}\t#{value}"
|
60
|
-
end
|
61
|
-
|
62
|
-
def add_table_parameter(param_name, description, key_in_hash, &block)
|
63
|
-
@table_parameter_descriptions << parameter_description_string(param_name, description)
|
64
|
-
add_table_parameter_without_description(param_name, key_in_hash, &block)
|
65
|
-
end
|
66
|
-
|
67
|
-
def add_table_parameter_without_description(param_name, key_in_hash, &block)
|
68
|
-
@table_headers << param_name
|
69
|
-
@table_rows << key_in_hash
|
70
|
-
@table_rows_callbacks
|
71
|
-
end
|
72
|
-
|
73
|
-
def parameter_description_string(param_name, description)
|
74
|
-
"# #{param_name}: #{description}"
|
75
|
-
end
|
76
|
-
|
77
|
-
def table_content
|
78
|
-
@data.map{|info|
|
79
|
-
@table_rows.
|
80
|
-
}
|
81
|
-
end
|
82
|
-
|
83
|
-
def header_content
|
84
|
-
'# ' + @table_headers.join("\t")
|
85
|
-
end
|
86
|
-
|
87
|
-
def resulting_table
|
88
|
-
@data ? [*@table_parameter_descriptions, header_content, *table_content] : []
|
89
|
-
end
|
90
|
-
|
91
|
-
# printed only if it is not wordwise [1,1,1,1]
|
92
|
-
def background_parameter(param_name, description, value, &block)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
infos.add_parameter('
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
infos.add_resulting_value('
|
118
|
-
infos.add_resulting_value('
|
119
|
-
infos.add_resulting_value('
|
120
|
-
infos.add_resulting_value('
|
121
|
-
infos.add_resulting_value('
|
122
|
-
infos.add_resulting_value('
|
123
|
-
infos.add_resulting_value('
|
124
|
-
infos.add_resulting_value('
|
125
|
-
infos.add_resulting_value('
|
126
|
-
infos.add_resulting_value('
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
infos.
|
139
|
-
infos.
|
140
|
-
infos.
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
infos.add_parameter('
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
infos.
|
164
|
-
infos.
|
165
|
-
|
166
|
-
infos.add_table_parameter_without_description('
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
infos.
|
182
|
-
infos.
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
end
|
1
|
+
require 'bioinform/support/strip_doc'
|
2
|
+
|
3
|
+
class String
|
4
|
+
def snake_case
|
5
|
+
gsub(/[A-Z]+/){|big| "_#{big.downcase}" }.sub(/^_/,'')
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class Module
|
10
|
+
def run_tool_cmd
|
11
|
+
if Macroape::STANDALONE
|
12
|
+
"ruby #{tool_name}.rb"
|
13
|
+
else
|
14
|
+
tool_name
|
15
|
+
end
|
16
|
+
end
|
17
|
+
def tool_name
|
18
|
+
self.name.split('::').last.snake_case
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module Macroape
|
23
|
+
module CLI
|
24
|
+
class OutputInformation
|
25
|
+
def initialize(data = nil)
|
26
|
+
@table_parameter_descriptions = []
|
27
|
+
|
28
|
+
@parameter_descriptions = []
|
29
|
+
@parameter_value_infos = []
|
30
|
+
|
31
|
+
@resulting_value_descriptions = []
|
32
|
+
@resulting_value_infos = []
|
33
|
+
|
34
|
+
@table_headers = []
|
35
|
+
@table_rows = []
|
36
|
+
@table_rows_callbacks = {}
|
37
|
+
@data = data
|
38
|
+
yield self if block_given?
|
39
|
+
end
|
40
|
+
|
41
|
+
def parameters_info
|
42
|
+
[*@parameter_descriptions, *@parameter_value_infos]
|
43
|
+
end
|
44
|
+
def resulting_values_info
|
45
|
+
[*@resulting_value_descriptions, *@resulting_value_infos]
|
46
|
+
end
|
47
|
+
def result
|
48
|
+
[parameters_info, resulting_values_info, resulting_table].reject(&:empty?).map{|b|b.join("\n")}.join("\n#\n")
|
49
|
+
#[*parameters_info, '#', *resulting_values_info, '#', *resulting_table].join("\n")
|
50
|
+
end
|
51
|
+
|
52
|
+
def add_parameter(param_name, description, value, &block)
|
53
|
+
@parameter_descriptions << parameter_description_string(param_name, description)
|
54
|
+
@parameter_value_infos << "# #{param_name} = #{value}"
|
55
|
+
end
|
56
|
+
|
57
|
+
def add_resulting_value(param_name, description, value, &block)
|
58
|
+
@resulting_value_descriptions << parameter_description_string(param_name, description)
|
59
|
+
@resulting_value_infos << "#{param_name}\t#{value}"
|
60
|
+
end
|
61
|
+
|
62
|
+
def add_table_parameter(param_name, description, key_in_hash, &block)
|
63
|
+
@table_parameter_descriptions << parameter_description_string(param_name, description)
|
64
|
+
add_table_parameter_without_description(param_name, key_in_hash, &block)
|
65
|
+
end
|
66
|
+
|
67
|
+
def add_table_parameter_without_description(param_name, key_in_hash, &block)
|
68
|
+
@table_headers << param_name
|
69
|
+
@table_rows << key_in_hash
|
70
|
+
@table_rows_callbacks[key_in_hash] = block
|
71
|
+
end
|
72
|
+
|
73
|
+
def parameter_description_string(param_name, description)
|
74
|
+
"# #{param_name}: #{description}"
|
75
|
+
end
|
76
|
+
|
77
|
+
def table_content
|
78
|
+
@data.map{|info|
|
79
|
+
@table_rows.map{|row| @table_rows_callbacks[row] ? @table_rows_callbacks[row].call(info[row]) : info[row] }.join("\t")
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
def header_content
|
84
|
+
'# ' + @table_headers.join("\t")
|
85
|
+
end
|
86
|
+
|
87
|
+
def resulting_table
|
88
|
+
@data ? [*@table_parameter_descriptions, header_content, *table_content] : []
|
89
|
+
end
|
90
|
+
|
91
|
+
# printed only if it is not wordwise [1,1,1,1]
|
92
|
+
def background_parameter(param_name, description, value, &block)
|
93
|
+
if value.is_a?(Bioinform::Background)
|
94
|
+
add_parameter(param_name, description, value.to_s, &block) unless value.wordwise?
|
95
|
+
else
|
96
|
+
add_parameter(param_name, description, value.join(','), &block) unless value == [1,1,1,1]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
module Helper
|
102
|
+
|
103
|
+
def self.similarity_info_string(info)
|
104
|
+
OutputInformation.new { |infos|
|
105
|
+
infos.add_parameter('V', 'discretization', info[:discretization] )
|
106
|
+
infos.add_parameter('P', 'requested P-value', info[:requested_pvalue]) unless info[:predefined_threshold_first] && info[:predefined_threshold_second]
|
107
|
+
infos.add_parameter('T1', 'threshold for the 1st matrix', info[:predefined_threshold_first] ) if info[:predefined_threshold_first]
|
108
|
+
infos.add_parameter('T2', 'threshold for the 2nd matrix', info[:predefined_threshold_second] ) if info[:predefined_threshold_second]
|
109
|
+
infos.add_parameter('PB', 'P-value boundary', info[:pvalue_boundary])
|
110
|
+
if info[:first_background] == info[:second_background]
|
111
|
+
infos.background_parameter('B', 'background', info[:first_background])
|
112
|
+
else
|
113
|
+
infos.background_parameter('B1', 'background for the 1st model', info[:first_background])
|
114
|
+
infos.background_parameter('B2', 'background for the 2nd model', info[:second_background])
|
115
|
+
end
|
116
|
+
|
117
|
+
infos.add_resulting_value('S', 'similarity', info[:similarity])
|
118
|
+
infos.add_resulting_value('D', 'distance (1-similarity)', info[:tanimoto])
|
119
|
+
infos.add_resulting_value('L', 'length of the alignment', info[:alignment_length])
|
120
|
+
infos.add_resulting_value('SH', 'shift of the 2nd PWM relative to the 1st', info[:shift])
|
121
|
+
infos.add_resulting_value('OR', 'orientation of the 2nd PWM relative to the 1st', info[:orientation])
|
122
|
+
infos.add_resulting_value('A1', 'aligned 1st matrix', info[:text].lines.to_a.first.strip )
|
123
|
+
infos.add_resulting_value('A2', 'aligned 2nd matrix', info[:text].lines.to_a.last.strip )
|
124
|
+
infos.add_resulting_value('W', 'number of words recognized by both models (model = PWM + threshold)', info[:recognized_by_both] )
|
125
|
+
infos.add_resulting_value('W1', 'number of words and recognized by the first model', info[:recognized_by_first] )
|
126
|
+
infos.add_resulting_value('P1', 'P-value for the 1st matrix', info[:real_pvalue_first] )
|
127
|
+
infos.add_resulting_value('T1', 'threshold for the 1st matrix', info[:threshold_first] ) unless info[:predefined_threshold_first]
|
128
|
+
infos.add_resulting_value('W2', 'number of words recognized by the 2nd model', info[:recognized_by_second] )
|
129
|
+
infos.add_resulting_value('P2', 'P-value for the 2nd matrix', info[:real_pvalue_second] )
|
130
|
+
infos.add_resulting_value('T2', 'threshold for the 2nd matrix', info[:threshold_second] ) unless info[:predefined_threshold_second]
|
131
|
+
}.result
|
132
|
+
end
|
133
|
+
|
134
|
+
############################################
|
135
|
+
|
136
|
+
def self.threshold_infos_string(data, parameters)
|
137
|
+
OutputInformation.new(data) { |infos|
|
138
|
+
infos.add_parameter('V', 'discretization value', parameters[:discretization])
|
139
|
+
infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
|
140
|
+
infos.background_parameter('B', 'background', parameters[:background])
|
141
|
+
|
142
|
+
infos.add_table_parameter('P', 'requested P-value', :expected_pvalue)
|
143
|
+
infos.add_table_parameter('AP', 'actual P-value', :real_pvalue)
|
144
|
+
infos.add_table_parameter('W', 'number of recognized words', :recognized_words) if parameters[:background].wordwise?
|
145
|
+
infos.add_table_parameter('T', 'threshold', :threshold)
|
146
|
+
}.result
|
147
|
+
end
|
148
|
+
|
149
|
+
############################################
|
150
|
+
|
151
|
+
def self.scan_collection_infos_string(data, parameters)
|
152
|
+
OutputInformation.new(data) { |infos|
|
153
|
+
infos.add_parameter('MS', 'minimal similarity to output', parameters[:cutoff])
|
154
|
+
infos.add_parameter('P', 'P-value', parameters[:pvalue])
|
155
|
+
infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
|
156
|
+
if parameters[:precision_mode] == :precise
|
157
|
+
infos.add_parameter('VR', 'discretization value, rough', parameters[:rough_discretization])
|
158
|
+
infos.add_parameter('VP', 'discretization value, precise', parameters[:precise_discretization])
|
159
|
+
infos.add_parameter('MP', 'minimal similarity for the 2nd pass in \'precise\' mode', parameters[:minimal_similarity])
|
160
|
+
else
|
161
|
+
infos.add_parameter('V', 'discretization value', parameters[:rough_discretization])
|
162
|
+
end
|
163
|
+
infos.background_parameter('BQ', 'background for query matrix', parameters[:query_background])
|
164
|
+
infos.background_parameter('BC', 'background for collection', parameters[:collection_background])
|
165
|
+
|
166
|
+
infos.add_table_parameter_without_description('motif', :name)
|
167
|
+
infos.add_table_parameter_without_description('similarity', :similarity)
|
168
|
+
infos.add_table_parameter_without_description('shift', :shift)
|
169
|
+
infos.add_table_parameter_without_description('overlap', :overlap)
|
170
|
+
infos.add_table_parameter_without_description('orientation', :orientation)
|
171
|
+
if parameters[:precision_mode] == :precise
|
172
|
+
infos.add_table_parameter_without_description('precise mode', :precision_mode){|precision| precision == :precise ? '*' : '.' }
|
173
|
+
end
|
174
|
+
}.result
|
175
|
+
end
|
176
|
+
|
177
|
+
############################################
|
178
|
+
|
179
|
+
def self.find_pvalue_info_string(data, parameters)
|
180
|
+
OutputInformation.new(data) {|infos|
|
181
|
+
infos.add_parameter('V', 'discretization value', parameters[:discretization])
|
182
|
+
infos.background_parameter('B', 'background', parameters[:background])
|
183
|
+
|
184
|
+
infos.add_table_parameter('T', 'threshold', :threshold)
|
185
|
+
infos.add_table_parameter('W', 'number of recognized words', :number_of_recognized_words) if parameters[:background].wordwise?
|
186
|
+
infos.add_table_parameter('P', 'P-value', :pvalue)
|
187
|
+
}.result
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|