macroape 4.0.2 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +22 -22
- data/README.md +70 -70
- data/Rakefile.rb +49 -49
- data/TODO.txt +46 -46
- data/benchmark/benchmark_helper.rb +4 -4
- data/benchmark/similarity_benchmark.rb +52 -52
- data/bin/align_motifs +4 -4
- data/bin/eval_alignment +4 -4
- data/bin/eval_similarity +4 -4
- data/bin/find_pvalue +4 -4
- data/bin/find_threshold +4 -4
- data/bin/preprocess_collection +4 -4
- data/bin/scan_collection +4 -4
- data/lib/macroape.rb +14 -11
- data/lib/macroape/aligned_pair_intersection.rb +61 -62
- data/lib/macroape/cli.rb +191 -188
- data/lib/macroape/cli/align_motifs.rb +120 -100
- data/lib/macroape/cli/eval_alignment.rb +157 -156
- data/lib/macroape/cli/eval_similarity.rb +138 -137
- data/lib/macroape/cli/find_pvalue.rb +93 -87
- data/lib/macroape/cli/find_threshold.rb +103 -96
- data/lib/macroape/cli/preprocess_collection.rb +169 -161
- data/lib/macroape/cli/scan_collection.rb +171 -163
- data/lib/macroape/collection.rb +29 -0
- data/lib/macroape/motif_with_thresholds.rb +18 -0
- data/lib/macroape/pwm_compare.rb +39 -44
- data/lib/macroape/pwm_compare_aligned.rb +139 -130
- data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
- data/lib/macroape/support/inverf.rb +13 -0
- data/lib/macroape/support/partial_sums.rb +17 -0
- data/lib/macroape/version.rb +4 -4
- data/macroape.gemspec +19 -19
- data/spec/count_distribution_spec.rb +112 -109
- data/spec/inverf_spec.rb +23 -0
- data/spec/partial_sums_spec.rb +28 -0
- data/spec/spec_helper.rb +11 -11
- data/test/align_motifs_test.rb +42 -43
- data/test/data/AHR_si.pwm +10 -10
- data/test/data/KLF3_f1.pcm +16 -16
- data/test/data/KLF3_f1.pwm +16 -16
- data/test/data/KLF4_f2.pcm +11 -11
- data/test/data/KLF4_f2.pwm +11 -11
- data/test/data/KLF4_f2_scan_results_all.txt +2 -2
- data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
- data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
- data/test/data/SP1_f1.pcm +12 -12
- data/test/data/SP1_f1.pwm +12 -12
- data/test/data/SP1_f1_revcomp.pcm +12 -12
- data/test/data/SP1_f1_revcomp.pwm +12 -12
- data/test/data/medium_motif.pwm +8 -8
- data/test/data/short_motif.pwm +7 -7
- data/test/data/test_collection.yaml +231 -214
- data/test/data/test_collection/GABPA_f1.pwm +14 -14
- data/test/data/test_collection/KLF4_f2.pwm +10 -10
- data/test/data/test_collection/SP1_f1.pwm +12 -12
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
- data/test/data/test_collection_single_file.txt +38 -38
- data/test/data/test_collection_single_file_pcm.txt +37 -37
- data/test/data/test_collection_weak.yaml +231 -214
- data/test/eval_alignment_test.rb +90 -111
- data/test/eval_similarity_test.rb +105 -123
- data/test/find_pvalue_test.rb +34 -39
- data/test/find_threshold_test.rb +87 -91
- data/test/preprocess_collection_test.rb +56 -65
- data/test/scan_collection_test.rb +42 -48
- data/test/test_helper.rb +159 -160
- metadata +14 -10
- data/test/data/collection_pcm_without_thresholds.yaml +0 -188
- data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,53 +1,53 @@
|
|
1
|
-
require_relative 'benchmark_helper'
|
2
|
-
|
3
|
-
class TaskToBenchmark
|
4
|
-
def setup
|
5
|
-
@matrix_first = "KLF4_f2.xml
|
6
|
-
0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
|
7
|
-
-1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
|
8
|
-
-2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
|
9
|
-
-2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
|
10
|
-
-0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
|
11
|
-
-1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
|
12
|
-
-2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
|
13
|
-
-1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
|
14
|
-
-2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
|
15
|
-
-1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
|
16
|
-
"
|
17
|
-
|
18
|
-
@matrix_second = "> SP1_f1
|
19
|
-
-0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
|
20
|
-
-1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
|
21
|
-
-0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
|
22
|
-
-4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
|
23
|
-
-2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
|
24
|
-
-0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
|
25
|
-
-1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
|
26
|
-
-4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
|
27
|
-
-0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
|
28
|
-
-1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
|
29
|
-
-0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606"
|
30
|
-
|
31
|
-
@pvalue = 0.0005
|
32
|
-
@discretization = 1
|
33
|
-
@first_background, @second_background = [1,1,1,1], [1,1,1,1]
|
34
|
-
|
35
|
-
@pwm_first = Bioinform::PWM.new(@matrix_first).set_parameters(background: @first_background).discrete(@discretization)
|
36
|
-
@pwm_second = Bioinform::PWM.new(@matrix_second).set_parameters(background: @second_background).discrete(@discretization)
|
37
|
-
@cmp = Macroape::PWMCompare.new(@pwm_first, @pwm_second)
|
38
|
-
@first_threshold = @pwm_first.threshold(@pvalue)
|
39
|
-
@second_threshold = @pwm_second.threshold(@pvalue)
|
40
|
-
self
|
41
|
-
end
|
42
|
-
|
43
|
-
def run
|
44
|
-
info = @cmp.jaccard(@first_threshold, @second_threshold)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
benchmark_result = 100.times.collect do
|
49
|
-
task_to_benchmark = TaskToBenchmark.new.setup
|
50
|
-
Benchmark.measure{ task_to_benchmark.run }
|
51
|
-
end.inject(&:+)
|
52
|
-
|
1
|
+
require_relative 'benchmark_helper'
|
2
|
+
|
3
|
+
class TaskToBenchmark
|
4
|
+
def setup
|
5
|
+
@matrix_first = "KLF4_f2.xml
|
6
|
+
0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
|
7
|
+
-1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
|
8
|
+
-2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
|
9
|
+
-2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
|
10
|
+
-0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
|
11
|
+
-1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
|
12
|
+
-2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
|
13
|
+
-1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
|
14
|
+
-2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
|
15
|
+
-1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
|
16
|
+
"
|
17
|
+
|
18
|
+
@matrix_second = "> SP1_f1
|
19
|
+
-0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
|
20
|
+
-1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
|
21
|
+
-0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
|
22
|
+
-4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
|
23
|
+
-2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
|
24
|
+
-0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
|
25
|
+
-1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
|
26
|
+
-4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
|
27
|
+
-0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
|
28
|
+
-1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
|
29
|
+
-0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606"
|
30
|
+
|
31
|
+
@pvalue = 0.0005
|
32
|
+
@discretization = 1
|
33
|
+
@first_background, @second_background = [1,1,1,1], [1,1,1,1]
|
34
|
+
|
35
|
+
@pwm_first = Bioinform::PWM.new(@matrix_first).set_parameters(background: @first_background).discrete(@discretization)
|
36
|
+
@pwm_second = Bioinform::PWM.new(@matrix_second).set_parameters(background: @second_background).discrete(@discretization)
|
37
|
+
@cmp = Macroape::PWMCompare.new(@pwm_first, @pwm_second)
|
38
|
+
@first_threshold = @pwm_first.threshold(@pvalue)
|
39
|
+
@second_threshold = @pwm_second.threshold(@pvalue)
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
def run
|
44
|
+
info = @cmp.jaccard(@first_threshold, @second_threshold)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
benchmark_result = 100.times.collect do
|
49
|
+
task_to_benchmark = TaskToBenchmark.new.setup
|
50
|
+
Benchmark.measure{ task_to_benchmark.run }
|
51
|
+
end.inject(&:+)
|
52
|
+
|
53
53
|
puts benchmark_result
|
data/bin/align_motifs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/align_motifs'
|
4
|
-
Macroape::CLI::AlignMotifs.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/align_motifs'
|
4
|
+
Macroape::CLI::AlignMotifs.main(ARGV)
|
data/bin/eval_alignment
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/eval_alignment'
|
4
|
-
Macroape::CLI::EvalAlignment.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/eval_alignment'
|
4
|
+
Macroape::CLI::EvalAlignment.main(ARGV)
|
data/bin/eval_similarity
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/eval_similarity'
|
4
|
-
Macroape::CLI::EvalSimilarity.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/eval_similarity'
|
4
|
+
Macroape::CLI::EvalSimilarity.main(ARGV)
|
data/bin/find_pvalue
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/find_pvalue'
|
4
|
-
Macroape::CLI::FindPValue.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/find_pvalue'
|
4
|
+
Macroape::CLI::FindPValue.main(ARGV)
|
data/bin/find_threshold
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/find_threshold'
|
4
|
-
Macroape::CLI::FindThreshold.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/find_threshold'
|
4
|
+
Macroape::CLI::FindThreshold.main(ARGV)
|
data/bin/preprocess_collection
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/preprocess_collection'
|
4
|
-
Macroape::CLI::PreprocessCollection.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/preprocess_collection'
|
4
|
+
Macroape::CLI::PreprocessCollection.main(ARGV)
|
data/bin/scan_collection
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require_relative '../lib/macroape/cli/scan_collection'
|
4
|
-
Macroape::CLI::ScanCollection.main(ARGV)
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/macroape/cli/scan_collection'
|
4
|
+
Macroape::CLI::ScanCollection.main(ARGV)
|
data/lib/macroape.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
|
-
require_relative 'macroape/version'
|
2
|
-
|
3
|
-
|
4
|
-
require_relative 'macroape/
|
5
|
-
require_relative 'macroape/
|
6
|
-
require_relative 'macroape/
|
7
|
-
require_relative 'macroape/
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
1
|
+
require_relative 'macroape/version'
|
2
|
+
require 'bioinform'
|
3
|
+
|
4
|
+
require_relative 'macroape/pwm_counting'
|
5
|
+
require_relative 'macroape/aligned_pair_intersection'
|
6
|
+
require_relative 'macroape/pwm_compare_aligned'
|
7
|
+
require_relative 'macroape/pwm_compare'
|
8
|
+
require_relative 'macroape/collection'
|
9
|
+
require_relative 'macroape/motif_with_thresholds'
|
10
|
+
require_relative 'macroape/cli'
|
11
|
+
|
12
|
+
module Macroape
|
13
|
+
# Your code goes here...
|
14
|
+
end
|
@@ -1,62 +1,61 @@
|
|
1
|
-
module Macroape
|
2
|
-
class PWMCompareAligned
|
3
|
-
# unoptimized version of this and related methods
|
4
|
-
def counts_for_two_matrices(threshold_first, threshold_second)
|
5
|
-
# just not to call method each time
|
6
|
-
first_background = first.background
|
7
|
-
second_background = second.background
|
8
|
-
unless first_background == second_background
|
9
|
-
first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
|
10
|
-
second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
|
11
|
-
return [first_result, second_result]
|
12
|
-
end
|
13
|
-
if first.background
|
14
|
-
result = get_counts(threshold_first, threshold_second) {|score,letter| score}
|
15
|
-
[result, result]
|
16
|
-
else
|
17
|
-
result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
|
18
|
-
[result, result]
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
|
23
|
-
# block has form: {|score,letter| contribution to count by `letter` with `score` }
|
24
|
-
def get_counts(threshold_first, threshold_second, &count_contribution_block)
|
25
|
-
# scores_on_first_pwm, scores_on_second_pwm --> count
|
26
|
-
scores = { 0 => {0 => 1} }
|
27
|
-
length.times do |column|
|
28
|
-
new_scores = recalc_score_hash(scores,
|
29
|
-
first.matrix[column], second.matrix[column],
|
30
|
-
threshold_first - first.best_suffix(column + 1),
|
31
|
-
threshold_second - second.best_suffix(column + 1), &count_contribution_block)
|
32
|
-
scores.replace(new_scores)
|
33
|
-
if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size
|
34
|
-
raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
|
35
|
-
end
|
36
|
-
end
|
37
|
-
scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}
|
38
|
-
end
|
39
|
-
|
40
|
-
# wouldn't work without count_contribution_block
|
41
|
-
def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
|
42
|
-
new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
|
43
|
-
scores.each do |score_first, second_scores|
|
44
|
-
second_scores.each do |score_second, count|
|
45
|
-
|
46
|
-
4.times do |letter|
|
47
|
-
new_score_first = score_first + first_column[letter]
|
48
|
-
if new_score_first >= least_sufficient_first
|
49
|
-
new_score_second = score_second + second_column[letter]
|
50
|
-
if new_score_second >= least_sufficient_second
|
51
|
-
new_scores[new_score_first][new_score_second] += yield(count, letter)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
end
|
57
|
-
end
|
58
|
-
new_scores
|
59
|
-
end
|
60
|
-
|
61
|
-
|
62
|
-
end
|
1
|
+
module Macroape
|
2
|
+
class PWMCompareAligned
|
3
|
+
# unoptimized version of this and related methods
|
4
|
+
def counts_for_two_matrices(threshold_first, threshold_second)
|
5
|
+
# just not to call method each time
|
6
|
+
first_background = first.background.counts
|
7
|
+
second_background = second.background.counts
|
8
|
+
unless first_background == second_background
|
9
|
+
first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
|
10
|
+
second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
|
11
|
+
return [first_result, second_result]
|
12
|
+
end
|
13
|
+
if first.background.wordwise?
|
14
|
+
result = get_counts(threshold_first, threshold_second) {|score,letter| score}
|
15
|
+
[result, result]
|
16
|
+
else
|
17
|
+
result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
|
18
|
+
[result, result]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
# block has form: {|score,letter| contribution to count by `letter` with `score` }
|
24
|
+
def get_counts(threshold_first, threshold_second, &count_contribution_block)
|
25
|
+
# scores_on_first_pwm, scores_on_second_pwm --> count
|
26
|
+
scores = { 0 => {0 => 1} }
|
27
|
+
length.times do |column|
|
28
|
+
new_scores = recalc_score_hash(scores,
|
29
|
+
first.matrix[column], second.matrix[column],
|
30
|
+
threshold_first - first.best_suffix(column + 1),
|
31
|
+
threshold_second - second.best_suffix(column + 1), &count_contribution_block)
|
32
|
+
scores.replace(new_scores)
|
33
|
+
if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size
|
34
|
+
raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
|
35
|
+
end
|
36
|
+
end
|
37
|
+
scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}
|
38
|
+
end
|
39
|
+
|
40
|
+
# wouldn't work without count_contribution_block
|
41
|
+
def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
|
42
|
+
new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
|
43
|
+
scores.each do |score_first, second_scores|
|
44
|
+
second_scores.each do |score_second, count|
|
45
|
+
|
46
|
+
4.times do |letter|
|
47
|
+
new_score_first = score_first + first_column[letter]
|
48
|
+
if new_score_first >= least_sufficient_first
|
49
|
+
new_score_second = score_second + second_column[letter]
|
50
|
+
if new_score_second >= least_sufficient_second
|
51
|
+
new_scores[new_score_first][new_score_second] += yield(count, letter)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
new_scores
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/macroape/cli.rb
CHANGED
@@ -1,188 +1,191 @@
|
|
1
|
-
require 'bioinform/support/strip_doc'
|
2
|
-
|
3
|
-
class String
|
4
|
-
def snake_case
|
5
|
-
gsub(/[A-Z]+/){|big| "_#{big.downcase}" }.sub(/^_/,'')
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
class Module
|
10
|
-
def run_tool_cmd
|
11
|
-
if Macroape::STANDALONE
|
12
|
-
"ruby #{tool_name}.rb"
|
13
|
-
else
|
14
|
-
tool_name
|
15
|
-
end
|
16
|
-
end
|
17
|
-
def tool_name
|
18
|
-
self.name.split('::').last.snake_case
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
module Macroape
|
23
|
-
module CLI
|
24
|
-
class OutputInformation
|
25
|
-
def initialize(data = nil)
|
26
|
-
@table_parameter_descriptions = []
|
27
|
-
|
28
|
-
@parameter_descriptions = []
|
29
|
-
@parameter_value_infos = []
|
30
|
-
|
31
|
-
@resulting_value_descriptions = []
|
32
|
-
@resulting_value_infos = []
|
33
|
-
|
34
|
-
@table_headers = []
|
35
|
-
@table_rows = []
|
36
|
-
@table_rows_callbacks =
|
37
|
-
@data = data
|
38
|
-
yield self if block_given?
|
39
|
-
end
|
40
|
-
|
41
|
-
def parameters_info
|
42
|
-
[*@parameter_descriptions, *@parameter_value_infos]
|
43
|
-
end
|
44
|
-
def resulting_values_info
|
45
|
-
[*@resulting_value_descriptions, *@resulting_value_infos]
|
46
|
-
end
|
47
|
-
def result
|
48
|
-
[parameters_info, resulting_values_info, resulting_table].reject(&:empty?).map{|b|b.join("\n")}.join("\n#\n")
|
49
|
-
#[*parameters_info, '#', *resulting_values_info, '#', *resulting_table].join("\n")
|
50
|
-
end
|
51
|
-
|
52
|
-
def add_parameter(param_name, description, value, &block)
|
53
|
-
@parameter_descriptions << parameter_description_string(param_name, description)
|
54
|
-
@parameter_value_infos << "# #{param_name} = #{value}"
|
55
|
-
end
|
56
|
-
|
57
|
-
def add_resulting_value(param_name, description, value, &block)
|
58
|
-
@resulting_value_descriptions << parameter_description_string(param_name, description)
|
59
|
-
@resulting_value_infos << "#{param_name}\t#{value}"
|
60
|
-
end
|
61
|
-
|
62
|
-
def add_table_parameter(param_name, description, key_in_hash, &block)
|
63
|
-
@table_parameter_descriptions << parameter_description_string(param_name, description)
|
64
|
-
add_table_parameter_without_description(param_name, key_in_hash, &block)
|
65
|
-
end
|
66
|
-
|
67
|
-
def add_table_parameter_without_description(param_name, key_in_hash, &block)
|
68
|
-
@table_headers << param_name
|
69
|
-
@table_rows << key_in_hash
|
70
|
-
@table_rows_callbacks
|
71
|
-
end
|
72
|
-
|
73
|
-
def parameter_description_string(param_name, description)
|
74
|
-
"# #{param_name}: #{description}"
|
75
|
-
end
|
76
|
-
|
77
|
-
def table_content
|
78
|
-
@data.map{|info|
|
79
|
-
@table_rows.
|
80
|
-
}
|
81
|
-
end
|
82
|
-
|
83
|
-
def header_content
|
84
|
-
'# ' + @table_headers.join("\t")
|
85
|
-
end
|
86
|
-
|
87
|
-
def resulting_table
|
88
|
-
@data ? [*@table_parameter_descriptions, header_content, *table_content] : []
|
89
|
-
end
|
90
|
-
|
91
|
-
# printed only if it is not wordwise [1,1,1,1]
|
92
|
-
def background_parameter(param_name, description, value, &block)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
infos.add_parameter('
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
infos.add_resulting_value('
|
118
|
-
infos.add_resulting_value('
|
119
|
-
infos.add_resulting_value('
|
120
|
-
infos.add_resulting_value('
|
121
|
-
infos.add_resulting_value('
|
122
|
-
infos.add_resulting_value('
|
123
|
-
infos.add_resulting_value('
|
124
|
-
infos.add_resulting_value('
|
125
|
-
infos.add_resulting_value('
|
126
|
-
infos.add_resulting_value('
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
infos.
|
139
|
-
infos.
|
140
|
-
infos.
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
infos.add_parameter('
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
infos.
|
164
|
-
infos.
|
165
|
-
|
166
|
-
infos.add_table_parameter_without_description('
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
infos.
|
182
|
-
infos.
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
end
|
1
|
+
require 'bioinform/support/strip_doc'
|
2
|
+
|
3
|
+
class String
|
4
|
+
def snake_case
|
5
|
+
gsub(/[A-Z]+/){|big| "_#{big.downcase}" }.sub(/^_/,'')
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class Module
|
10
|
+
def run_tool_cmd
|
11
|
+
if Macroape::STANDALONE
|
12
|
+
"ruby #{tool_name}.rb"
|
13
|
+
else
|
14
|
+
tool_name
|
15
|
+
end
|
16
|
+
end
|
17
|
+
def tool_name
|
18
|
+
self.name.split('::').last.snake_case
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module Macroape
|
23
|
+
module CLI
|
24
|
+
class OutputInformation
|
25
|
+
def initialize(data = nil)
|
26
|
+
@table_parameter_descriptions = []
|
27
|
+
|
28
|
+
@parameter_descriptions = []
|
29
|
+
@parameter_value_infos = []
|
30
|
+
|
31
|
+
@resulting_value_descriptions = []
|
32
|
+
@resulting_value_infos = []
|
33
|
+
|
34
|
+
@table_headers = []
|
35
|
+
@table_rows = []
|
36
|
+
@table_rows_callbacks = {}
|
37
|
+
@data = data
|
38
|
+
yield self if block_given?
|
39
|
+
end
|
40
|
+
|
41
|
+
def parameters_info
|
42
|
+
[*@parameter_descriptions, *@parameter_value_infos]
|
43
|
+
end
|
44
|
+
def resulting_values_info
|
45
|
+
[*@resulting_value_descriptions, *@resulting_value_infos]
|
46
|
+
end
|
47
|
+
def result
|
48
|
+
[parameters_info, resulting_values_info, resulting_table].reject(&:empty?).map{|b|b.join("\n")}.join("\n#\n")
|
49
|
+
#[*parameters_info, '#', *resulting_values_info, '#', *resulting_table].join("\n")
|
50
|
+
end
|
51
|
+
|
52
|
+
def add_parameter(param_name, description, value, &block)
|
53
|
+
@parameter_descriptions << parameter_description_string(param_name, description)
|
54
|
+
@parameter_value_infos << "# #{param_name} = #{value}"
|
55
|
+
end
|
56
|
+
|
57
|
+
def add_resulting_value(param_name, description, value, &block)
|
58
|
+
@resulting_value_descriptions << parameter_description_string(param_name, description)
|
59
|
+
@resulting_value_infos << "#{param_name}\t#{value}"
|
60
|
+
end
|
61
|
+
|
62
|
+
def add_table_parameter(param_name, description, key_in_hash, &block)
|
63
|
+
@table_parameter_descriptions << parameter_description_string(param_name, description)
|
64
|
+
add_table_parameter_without_description(param_name, key_in_hash, &block)
|
65
|
+
end
|
66
|
+
|
67
|
+
def add_table_parameter_without_description(param_name, key_in_hash, &block)
|
68
|
+
@table_headers << param_name
|
69
|
+
@table_rows << key_in_hash
|
70
|
+
@table_rows_callbacks[key_in_hash] = block
|
71
|
+
end
|
72
|
+
|
73
|
+
def parameter_description_string(param_name, description)
|
74
|
+
"# #{param_name}: #{description}"
|
75
|
+
end
|
76
|
+
|
77
|
+
def table_content
|
78
|
+
@data.map{|info|
|
79
|
+
@table_rows.map{|row| @table_rows_callbacks[row] ? @table_rows_callbacks[row].call(info[row]) : info[row] }.join("\t")
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
def header_content
|
84
|
+
'# ' + @table_headers.join("\t")
|
85
|
+
end
|
86
|
+
|
87
|
+
def resulting_table
|
88
|
+
@data ? [*@table_parameter_descriptions, header_content, *table_content] : []
|
89
|
+
end
|
90
|
+
|
91
|
+
# printed only if it is not wordwise [1,1,1,1]
|
92
|
+
def background_parameter(param_name, description, value, &block)
|
93
|
+
if value.is_a?(Bioinform::Background)
|
94
|
+
add_parameter(param_name, description, value.to_s, &block) unless value.wordwise?
|
95
|
+
else
|
96
|
+
add_parameter(param_name, description, value.join(','), &block) unless value == [1,1,1,1]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
module Helper
|
102
|
+
|
103
|
+
def self.similarity_info_string(info)
|
104
|
+
OutputInformation.new { |infos|
|
105
|
+
infos.add_parameter('V', 'discretization', info[:discretization] )
|
106
|
+
infos.add_parameter('P', 'requested P-value', info[:requested_pvalue]) unless info[:predefined_threshold_first] && info[:predefined_threshold_second]
|
107
|
+
infos.add_parameter('T1', 'threshold for the 1st matrix', info[:predefined_threshold_first] ) if info[:predefined_threshold_first]
|
108
|
+
infos.add_parameter('T2', 'threshold for the 2nd matrix', info[:predefined_threshold_second] ) if info[:predefined_threshold_second]
|
109
|
+
infos.add_parameter('PB', 'P-value boundary', info[:pvalue_boundary])
|
110
|
+
if info[:first_background] == info[:second_background]
|
111
|
+
infos.background_parameter('B', 'background', info[:first_background])
|
112
|
+
else
|
113
|
+
infos.background_parameter('B1', 'background for the 1st model', info[:first_background])
|
114
|
+
infos.background_parameter('B2', 'background for the 2nd model', info[:second_background])
|
115
|
+
end
|
116
|
+
|
117
|
+
infos.add_resulting_value('S', 'similarity', info[:similarity])
|
118
|
+
infos.add_resulting_value('D', 'distance (1-similarity)', info[:tanimoto])
|
119
|
+
infos.add_resulting_value('L', 'length of the alignment', info[:alignment_length])
|
120
|
+
infos.add_resulting_value('SH', 'shift of the 2nd PWM relative to the 1st', info[:shift])
|
121
|
+
infos.add_resulting_value('OR', 'orientation of the 2nd PWM relative to the 1st', info[:orientation])
|
122
|
+
infos.add_resulting_value('A1', 'aligned 1st matrix', info[:text].lines.to_a.first.strip )
|
123
|
+
infos.add_resulting_value('A2', 'aligned 2nd matrix', info[:text].lines.to_a.last.strip )
|
124
|
+
infos.add_resulting_value('W', 'number of words recognized by both models (model = PWM + threshold)', info[:recognized_by_both] )
|
125
|
+
infos.add_resulting_value('W1', 'number of words and recognized by the first model', info[:recognized_by_first] )
|
126
|
+
infos.add_resulting_value('P1', 'P-value for the 1st matrix', info[:real_pvalue_first] )
|
127
|
+
infos.add_resulting_value('T1', 'threshold for the 1st matrix', info[:threshold_first] ) unless info[:predefined_threshold_first]
|
128
|
+
infos.add_resulting_value('W2', 'number of words recognized by the 2nd model', info[:recognized_by_second] )
|
129
|
+
infos.add_resulting_value('P2', 'P-value for the 2nd matrix', info[:real_pvalue_second] )
|
130
|
+
infos.add_resulting_value('T2', 'threshold for the 2nd matrix', info[:threshold_second] ) unless info[:predefined_threshold_second]
|
131
|
+
}.result
|
132
|
+
end
|
133
|
+
|
134
|
+
############################################
|
135
|
+
|
136
|
+
def self.threshold_infos_string(data, parameters)
|
137
|
+
OutputInformation.new(data) { |infos|
|
138
|
+
infos.add_parameter('V', 'discretization value', parameters[:discretization])
|
139
|
+
infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
|
140
|
+
infos.background_parameter('B', 'background', parameters[:background])
|
141
|
+
|
142
|
+
infos.add_table_parameter('P', 'requested P-value', :expected_pvalue)
|
143
|
+
infos.add_table_parameter('AP', 'actual P-value', :real_pvalue)
|
144
|
+
infos.add_table_parameter('W', 'number of recognized words', :recognized_words) if parameters[:background].wordwise?
|
145
|
+
infos.add_table_parameter('T', 'threshold', :threshold)
|
146
|
+
}.result
|
147
|
+
end
|
148
|
+
|
149
|
+
############################################
|
150
|
+
|
151
|
+
def self.scan_collection_infos_string(data, parameters)
|
152
|
+
OutputInformation.new(data) { |infos|
|
153
|
+
infos.add_parameter('MS', 'minimal similarity to output', parameters[:cutoff])
|
154
|
+
infos.add_parameter('P', 'P-value', parameters[:pvalue])
|
155
|
+
infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
|
156
|
+
if parameters[:precision_mode] == :precise
|
157
|
+
infos.add_parameter('VR', 'discretization value, rough', parameters[:rough_discretization])
|
158
|
+
infos.add_parameter('VP', 'discretization value, precise', parameters[:precise_discretization])
|
159
|
+
infos.add_parameter('MP', 'minimal similarity for the 2nd pass in \'precise\' mode', parameters[:minimal_similarity])
|
160
|
+
else
|
161
|
+
infos.add_parameter('V', 'discretization value', parameters[:rough_discretization])
|
162
|
+
end
|
163
|
+
infos.background_parameter('BQ', 'background for query matrix', parameters[:query_background])
|
164
|
+
infos.background_parameter('BC', 'background for collection', parameters[:collection_background])
|
165
|
+
|
166
|
+
infos.add_table_parameter_without_description('motif', :name)
|
167
|
+
infos.add_table_parameter_without_description('similarity', :similarity)
|
168
|
+
infos.add_table_parameter_without_description('shift', :shift)
|
169
|
+
infos.add_table_parameter_without_description('overlap', :overlap)
|
170
|
+
infos.add_table_parameter_without_description('orientation', :orientation)
|
171
|
+
if parameters[:precision_mode] == :precise
|
172
|
+
infos.add_table_parameter_without_description('precise mode', :precision_mode){|precision| precision == :precise ? '*' : '.' }
|
173
|
+
end
|
174
|
+
}.result
|
175
|
+
end
|
176
|
+
|
177
|
+
############################################
|
178
|
+
|
179
|
+
def self.find_pvalue_info_string(data, parameters)
|
180
|
+
OutputInformation.new(data) {|infos|
|
181
|
+
infos.add_parameter('V', 'discretization value', parameters[:discretization])
|
182
|
+
infos.background_parameter('B', 'background', parameters[:background])
|
183
|
+
|
184
|
+
infos.add_table_parameter('T', 'threshold', :threshold)
|
185
|
+
infos.add_table_parameter('W', 'number of recognized words', :number_of_recognized_words) if parameters[:background].wordwise?
|
186
|
+
infos.add_table_parameter('P', 'P-value', :pvalue)
|
187
|
+
}.result
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|