macroape 4.0.2 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +4 -4
  4. data/LICENSE +22 -22
  5. data/README.md +70 -70
  6. data/Rakefile.rb +49 -49
  7. data/TODO.txt +46 -46
  8. data/benchmark/benchmark_helper.rb +4 -4
  9. data/benchmark/similarity_benchmark.rb +52 -52
  10. data/bin/align_motifs +4 -4
  11. data/bin/eval_alignment +4 -4
  12. data/bin/eval_similarity +4 -4
  13. data/bin/find_pvalue +4 -4
  14. data/bin/find_threshold +4 -4
  15. data/bin/preprocess_collection +4 -4
  16. data/bin/scan_collection +4 -4
  17. data/lib/macroape.rb +14 -11
  18. data/lib/macroape/aligned_pair_intersection.rb +61 -62
  19. data/lib/macroape/cli.rb +191 -188
  20. data/lib/macroape/cli/align_motifs.rb +120 -100
  21. data/lib/macroape/cli/eval_alignment.rb +157 -156
  22. data/lib/macroape/cli/eval_similarity.rb +138 -137
  23. data/lib/macroape/cli/find_pvalue.rb +93 -87
  24. data/lib/macroape/cli/find_threshold.rb +103 -96
  25. data/lib/macroape/cli/preprocess_collection.rb +169 -161
  26. data/lib/macroape/cli/scan_collection.rb +171 -163
  27. data/lib/macroape/collection.rb +29 -0
  28. data/lib/macroape/motif_with_thresholds.rb +18 -0
  29. data/lib/macroape/pwm_compare.rb +39 -44
  30. data/lib/macroape/pwm_compare_aligned.rb +139 -130
  31. data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
  32. data/lib/macroape/support/inverf.rb +13 -0
  33. data/lib/macroape/support/partial_sums.rb +17 -0
  34. data/lib/macroape/version.rb +4 -4
  35. data/macroape.gemspec +19 -19
  36. data/spec/count_distribution_spec.rb +112 -109
  37. data/spec/inverf_spec.rb +23 -0
  38. data/spec/partial_sums_spec.rb +28 -0
  39. data/spec/spec_helper.rb +11 -11
  40. data/test/align_motifs_test.rb +42 -43
  41. data/test/data/AHR_si.pwm +10 -10
  42. data/test/data/KLF3_f1.pcm +16 -16
  43. data/test/data/KLF3_f1.pwm +16 -16
  44. data/test/data/KLF4_f2.pcm +11 -11
  45. data/test/data/KLF4_f2.pwm +11 -11
  46. data/test/data/KLF4_f2_scan_results_all.txt +2 -2
  47. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
  48. data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
  49. data/test/data/SP1_f1.pcm +12 -12
  50. data/test/data/SP1_f1.pwm +12 -12
  51. data/test/data/SP1_f1_revcomp.pcm +12 -12
  52. data/test/data/SP1_f1_revcomp.pwm +12 -12
  53. data/test/data/medium_motif.pwm +8 -8
  54. data/test/data/short_motif.pwm +7 -7
  55. data/test/data/test_collection.yaml +231 -214
  56. data/test/data/test_collection/GABPA_f1.pwm +14 -14
  57. data/test/data/test_collection/KLF4_f2.pwm +10 -10
  58. data/test/data/test_collection/SP1_f1.pwm +12 -12
  59. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
  60. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
  61. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
  62. data/test/data/test_collection_single_file.txt +38 -38
  63. data/test/data/test_collection_single_file_pcm.txt +37 -37
  64. data/test/data/test_collection_weak.yaml +231 -214
  65. data/test/eval_alignment_test.rb +90 -111
  66. data/test/eval_similarity_test.rb +105 -123
  67. data/test/find_pvalue_test.rb +34 -39
  68. data/test/find_threshold_test.rb +87 -91
  69. data/test/preprocess_collection_test.rb +56 -65
  70. data/test/scan_collection_test.rb +42 -48
  71. data/test/test_helper.rb +159 -160
  72. metadata +14 -10
  73. data/test/data/collection_pcm_without_thresholds.yaml +0 -188
  74. data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,53 +1,53 @@
1
- require_relative 'benchmark_helper'
2
-
3
- class TaskToBenchmark
4
- def setup
5
- @matrix_first = "KLF4_f2.xml
6
- 0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
7
- -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
8
- -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
9
- -2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
10
- -0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
11
- -1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
12
- -2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
13
- -1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
14
- -2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
15
- -1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
16
- "
17
-
18
- @matrix_second = "> SP1_f1
19
- -0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
20
- -1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
21
- -0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
22
- -4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
23
- -2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
24
- -0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
25
- -1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
26
- -4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
27
- -0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
28
- -1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
29
- -0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606"
30
-
31
- @pvalue = 0.0005
32
- @discretization = 1
33
- @first_background, @second_background = [1,1,1,1], [1,1,1,1]
34
-
35
- @pwm_first = Bioinform::PWM.new(@matrix_first).set_parameters(background: @first_background).discrete(@discretization)
36
- @pwm_second = Bioinform::PWM.new(@matrix_second).set_parameters(background: @second_background).discrete(@discretization)
37
- @cmp = Macroape::PWMCompare.new(@pwm_first, @pwm_second)
38
- @first_threshold = @pwm_first.threshold(@pvalue)
39
- @second_threshold = @pwm_second.threshold(@pvalue)
40
- self
41
- end
42
-
43
- def run
44
- info = @cmp.jaccard(@first_threshold, @second_threshold)
45
- end
46
- end
47
-
48
- benchmark_result = 100.times.collect do
49
- task_to_benchmark = TaskToBenchmark.new.setup
50
- Benchmark.measure{ task_to_benchmark.run }
51
- end.inject(&:+)
52
-
1
+ require_relative 'benchmark_helper'
2
+
3
+ class TaskToBenchmark
4
+ def setup
5
+ @matrix_first = "KLF4_f2.xml
6
+ 0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
7
+ -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
8
+ -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
9
+ -2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
10
+ -0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
11
+ -1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
12
+ -2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
13
+ -1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
14
+ -2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
15
+ -1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
16
+ "
17
+
18
+ @matrix_second = "> SP1_f1
19
+ -0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
20
+ -1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
21
+ -0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
22
+ -4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
23
+ -2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
24
+ -0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
25
+ -1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
26
+ -4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
27
+ -0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
28
+ -1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
29
+ -0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606"
30
+
31
+ @pvalue = 0.0005
32
+ @discretization = 1
33
+ @first_background, @second_background = [1,1,1,1], [1,1,1,1]
34
+
35
+ @pwm_first = Bioinform::PWM.new(@matrix_first).set_parameters(background: @first_background).discrete(@discretization)
36
+ @pwm_second = Bioinform::PWM.new(@matrix_second).set_parameters(background: @second_background).discrete(@discretization)
37
+ @cmp = Macroape::PWMCompare.new(@pwm_first, @pwm_second)
38
+ @first_threshold = @pwm_first.threshold(@pvalue)
39
+ @second_threshold = @pwm_second.threshold(@pvalue)
40
+ self
41
+ end
42
+
43
+ def run
44
+ info = @cmp.jaccard(@first_threshold, @second_threshold)
45
+ end
46
+ end
47
+
48
+ benchmark_result = 100.times.collect do
49
+ task_to_benchmark = TaskToBenchmark.new.setup
50
+ Benchmark.measure{ task_to_benchmark.run }
51
+ end.inject(&:+)
52
+
53
53
  puts benchmark_result
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/align_motifs'
4
- Macroape::CLI::AlignMotifs.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/align_motifs'
4
+ Macroape::CLI::AlignMotifs.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/eval_alignment'
4
- Macroape::CLI::EvalAlignment.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/eval_alignment'
4
+ Macroape::CLI::EvalAlignment.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/eval_similarity'
4
- Macroape::CLI::EvalSimilarity.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/eval_similarity'
4
+ Macroape::CLI::EvalSimilarity.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/find_pvalue'
4
- Macroape::CLI::FindPValue.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/find_pvalue'
4
+ Macroape::CLI::FindPValue.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/find_threshold'
4
- Macroape::CLI::FindThreshold.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/find_threshold'
4
+ Macroape::CLI::FindThreshold.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/preprocess_collection'
4
- Macroape::CLI::PreprocessCollection.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/preprocess_collection'
4
+ Macroape::CLI::PreprocessCollection.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/scan_collection'
4
- Macroape::CLI::ScanCollection.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/scan_collection'
4
+ Macroape::CLI::ScanCollection.main(ARGV)
@@ -1,11 +1,14 @@
1
- require_relative 'macroape/version'
2
-
3
- require_relative 'macroape/counting'
4
- require_relative 'macroape/aligned_pair_intersection'
5
- require_relative 'macroape/pwm_compare_aligned'
6
- require_relative 'macroape/pwm_compare'
7
- require_relative 'macroape/cli'
8
-
9
- module Macroape
10
- # Your code goes here...
11
- end
1
+ require_relative 'macroape/version'
2
+ require 'bioinform'
3
+
4
+ require_relative 'macroape/pwm_counting'
5
+ require_relative 'macroape/aligned_pair_intersection'
6
+ require_relative 'macroape/pwm_compare_aligned'
7
+ require_relative 'macroape/pwm_compare'
8
+ require_relative 'macroape/collection'
9
+ require_relative 'macroape/motif_with_thresholds'
10
+ require_relative 'macroape/cli'
11
+
12
+ module Macroape
13
+ # Your code goes here...
14
+ end
@@ -1,62 +1,61 @@
1
- module Macroape
2
- class PWMCompareAligned
3
- # unoptimized version of this and related methods
4
- def counts_for_two_matrices(threshold_first, threshold_second)
5
- # just not to call method each time
6
- first_background = first.background
7
- second_background = second.background
8
- unless first_background == second_background
9
- first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
10
- second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
11
- return [first_result, second_result]
12
- end
13
- if first.background == [1,1,1,1]
14
- result = get_counts(threshold_first, threshold_second) {|score,letter| score}
15
- [result, result]
16
- else
17
- result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
18
- [result, result]
19
- end
20
- end
21
-
22
-
23
- # block has form: {|score,letter| contribution to count by `letter` with `score` }
24
- def get_counts(threshold_first, threshold_second, &count_contribution_block)
25
- # scores_on_first_pwm, scores_on_second_pwm --> count
26
- scores = { 0 => {0 => 1} }
27
- length.times do |column|
28
- new_scores = recalc_score_hash(scores,
29
- first.matrix[column], second.matrix[column],
30
- threshold_first - first.best_suffix(column + 1),
31
- threshold_second - second.best_suffix(column + 1), &count_contribution_block)
32
- scores.replace(new_scores)
33
- if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size
34
- raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
35
- end
36
- end
37
- scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}
38
- end
39
-
40
- # wouldn't work without count_contribution_block
41
- def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
42
- new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
43
- scores.each do |score_first, second_scores|
44
- second_scores.each do |score_second, count|
45
-
46
- 4.times do |letter|
47
- new_score_first = score_first + first_column[letter]
48
- if new_score_first >= least_sufficient_first
49
- new_score_second = score_second + second_column[letter]
50
- if new_score_second >= least_sufficient_second
51
- new_scores[new_score_first][new_score_second] += yield(count, letter)
52
- end
53
- end
54
- end
55
-
56
- end
57
- end
58
- new_scores
59
- end
60
-
61
- end
62
- end
1
+ module Macroape
2
+ class PWMCompareAligned
3
+ # unoptimized version of this and related methods
4
+ def counts_for_two_matrices(threshold_first, threshold_second)
5
+ # just not to call method each time
6
+ first_background = first.background.counts
7
+ second_background = second.background.counts
8
+ unless first_background == second_background
9
+ first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
10
+ second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
11
+ return [first_result, second_result]
12
+ end
13
+ if first.background.wordwise?
14
+ result = get_counts(threshold_first, threshold_second) {|score,letter| score}
15
+ [result, result]
16
+ else
17
+ result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
18
+ [result, result]
19
+ end
20
+ end
21
+
22
+
23
+ # block has form: {|score,letter| contribution to count by `letter` with `score` }
24
+ def get_counts(threshold_first, threshold_second, &count_contribution_block)
25
+ # scores_on_first_pwm, scores_on_second_pwm --> count
26
+ scores = { 0 => {0 => 1} }
27
+ length.times do |column|
28
+ new_scores = recalc_score_hash(scores,
29
+ first.matrix[column], second.matrix[column],
30
+ threshold_first - first.best_suffix(column + 1),
31
+ threshold_second - second.best_suffix(column + 1), &count_contribution_block)
32
+ scores.replace(new_scores)
33
+ if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size
34
+ raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
35
+ end
36
+ end
37
+ scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}
38
+ end
39
+
40
+ # wouldn't work without count_contribution_block
41
+ def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
42
+ new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
43
+ scores.each do |score_first, second_scores|
44
+ second_scores.each do |score_second, count|
45
+
46
+ 4.times do |letter|
47
+ new_score_first = score_first + first_column[letter]
48
+ if new_score_first >= least_sufficient_first
49
+ new_score_second = score_second + second_column[letter]
50
+ if new_score_second >= least_sufficient_second
51
+ new_scores[new_score_first][new_score_second] += yield(count, letter)
52
+ end
53
+ end
54
+ end
55
+
56
+ end
57
+ end
58
+ new_scores
59
+ end
60
+ end
61
+ end
@@ -1,188 +1,191 @@
1
- require 'bioinform/support/strip_doc'
2
-
3
- class String
4
- def snake_case
5
- gsub(/[A-Z]+/){|big| "_#{big.downcase}" }.sub(/^_/,'')
6
- end
7
- end
8
-
9
- class Module
10
- def run_tool_cmd
11
- if Macroape::STANDALONE
12
- "ruby #{tool_name}.rb"
13
- else
14
- tool_name
15
- end
16
- end
17
- def tool_name
18
- self.name.split('::').last.snake_case
19
- end
20
- end
21
-
22
- module Macroape
23
- module CLI
24
- class OutputInformation
25
- def initialize(data = nil)
26
- @table_parameter_descriptions = []
27
-
28
- @parameter_descriptions = []
29
- @parameter_value_infos = []
30
-
31
- @resulting_value_descriptions = []
32
- @resulting_value_infos = []
33
-
34
- @table_headers = []
35
- @table_rows = []
36
- @table_rows_callbacks = []
37
- @data = data
38
- yield self if block_given?
39
- end
40
-
41
- def parameters_info
42
- [*@parameter_descriptions, *@parameter_value_infos]
43
- end
44
- def resulting_values_info
45
- [*@resulting_value_descriptions, *@resulting_value_infos]
46
- end
47
- def result
48
- [parameters_info, resulting_values_info, resulting_table].reject(&:empty?).map{|b|b.join("\n")}.join("\n#\n")
49
- #[*parameters_info, '#', *resulting_values_info, '#', *resulting_table].join("\n")
50
- end
51
-
52
- def add_parameter(param_name, description, value, &block)
53
- @parameter_descriptions << parameter_description_string(param_name, description)
54
- @parameter_value_infos << "# #{param_name} = #{value}"
55
- end
56
-
57
- def add_resulting_value(param_name, description, value, &block)
58
- @resulting_value_descriptions << parameter_description_string(param_name, description)
59
- @resulting_value_infos << "#{param_name}\t#{value}"
60
- end
61
-
62
- def add_table_parameter(param_name, description, key_in_hash, &block)
63
- @table_parameter_descriptions << parameter_description_string(param_name, description)
64
- add_table_parameter_without_description(param_name, key_in_hash, &block)
65
- end
66
-
67
- def add_table_parameter_without_description(param_name, key_in_hash, &block)
68
- @table_headers << param_name
69
- @table_rows << key_in_hash
70
- @table_rows_callbacks << block
71
- end
72
-
73
- def parameter_description_string(param_name, description)
74
- "# #{param_name}: #{description}"
75
- end
76
-
77
- def table_content
78
- @data.map{|info|
79
- @table_rows.zip(@table_rows_callbacks).map{|row,callback| callback ? callback.call(info[row]) : info[row] }.join("\t")
80
- }
81
- end
82
-
83
- def header_content
84
- '# ' + @table_headers.join("\t")
85
- end
86
-
87
- def resulting_table
88
- @data ? [*@table_parameter_descriptions, header_content, *table_content] : []
89
- end
90
-
91
- # printed only if it is not wordwise [1,1,1,1]
92
- def background_parameter(param_name, description, value, &block)
93
- add_parameter(param_name, description, value.join(','), &block) unless value == [1,1,1,1]
94
- end
95
- end
96
-
97
- module Helper
98
-
99
- def self.similarity_info_string(info)
100
- OutputInformation.new { |infos|
101
- infos.add_parameter('V', 'discretization', info[:discretization] )
102
- infos.add_parameter('P', 'requested P-value', info[:requested_pvalue]) unless info[:predefined_threshold_first] && info[:predefined_threshold_second]
103
- infos.add_parameter('T1', 'threshold for the 1st matrix', info[:predefined_threshold_first] ) if info[:predefined_threshold_first]
104
- infos.add_parameter('T2', 'threshold for the 2nd matrix', info[:predefined_threshold_second] ) if info[:predefined_threshold_second]
105
- infos.add_parameter('PB', 'P-value boundary', info[:pvalue_boundary])
106
- if info[:first_background] == info[:second_background]
107
- infos.background_parameter('B', 'background', info[:first_background])
108
- else
109
- infos.background_parameter('B1', 'background for the 1st model', info[:first_background])
110
- infos.background_parameter('B2', 'background for the 2nd model', info[:second_background])
111
- end
112
-
113
- infos.add_resulting_value('S', 'similarity', info[:similarity])
114
- infos.add_resulting_value('D', 'distance (1-similarity)', info[:tanimoto])
115
- infos.add_resulting_value('L', 'length of the alignment', info[:alignment_length])
116
- infos.add_resulting_value('SH', 'shift of the 2nd PWM relative to the 1st', info[:shift])
117
- infos.add_resulting_value('OR', 'orientation of the 2nd PWM relative to the 1st', info[:orientation])
118
- infos.add_resulting_value('A1', 'aligned 1st matrix', info[:text].lines.to_a.first.strip )
119
- infos.add_resulting_value('A2', 'aligned 2nd matrix', info[:text].lines.to_a.last.strip )
120
- infos.add_resulting_value('W', 'number of words recognized by both models (model = PWM + threshold)', info[:recognized_by_both] )
121
- infos.add_resulting_value('W1', 'number of words and recognized by the first model', info[:recognized_by_first] )
122
- infos.add_resulting_value('P1', 'P-value for the 1st matrix', info[:real_pvalue_first] )
123
- infos.add_resulting_value('T1', 'threshold for the 1st matrix', info[:threshold_first] ) unless info[:predefined_threshold_first]
124
- infos.add_resulting_value('W2', 'number of words recognized by the 2nd model', info[:recognized_by_second] )
125
- infos.add_resulting_value('P2', 'P-value for the 2nd matrix', info[:real_pvalue_second] )
126
- infos.add_resulting_value('T2', 'threshold for the 2nd matrix', info[:threshold_second] ) unless info[:predefined_threshold_second]
127
- }.result
128
- end
129
-
130
- ############################################
131
-
132
- def self.threshold_infos_string(data, parameters)
133
- OutputInformation.new(data) { |infos|
134
- infos.add_parameter('V', 'discretization value', parameters[:discretization])
135
- infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
136
- infos.background_parameter('B', 'background', parameters[:background])
137
-
138
- infos.add_table_parameter('P', 'requested P-value', :expected_pvalue)
139
- infos.add_table_parameter('AP', 'actual P-value', :real_pvalue)
140
- infos.add_table_parameter('W', 'number of recognized words', :recognized_words) if parameters[:background] == [1, 1, 1, 1]
141
- infos.add_table_parameter('T', 'threshold', :threshold)
142
- }.result
143
- end
144
-
145
- ############################################
146
-
147
- def self.scan_collection_infos_string(data, parameters)
148
- OutputInformation.new(data) { |infos|
149
- infos.add_parameter('MS', 'minimal similarity to output', parameters[:cutoff])
150
- infos.add_parameter('P', 'P-value', parameters[:pvalue])
151
- infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
152
- if parameters[:precision_mode] == :precise
153
- infos.add_parameter('VR', 'discretization value, rough', parameters[:rough_discretization])
154
- infos.add_parameter('VP', 'discretization value, precise', parameters[:precise_discretization])
155
- infos.add_parameter('MP', 'minimal similarity for the 2nd pass in \'precise\' mode', parameters[:minimal_similarity])
156
- else
157
- infos.add_parameter('V', 'discretization value', parameters[:rough_discretization])
158
- end
159
- infos.background_parameter('BQ', 'background for query matrix', parameters[:query_background])
160
- infos.background_parameter('BC', 'background for collection', parameters[:collection_background])
161
-
162
- infos.add_table_parameter_without_description('motif', :name)
163
- infos.add_table_parameter_without_description('similarity', :similarity)
164
- infos.add_table_parameter_without_description('shift', :shift)
165
- infos.add_table_parameter_without_description('overlap', :overlap)
166
- infos.add_table_parameter_without_description('orientation', :orientation)
167
- if parameters[:precision_mode] == :precise
168
- infos.add_table_parameter_without_description('precise mode', :precision_mode){|precision| precision == :precise ? '*' : '.' }
169
- end
170
- }.result
171
- end
172
-
173
- ############################################
174
-
175
- def self.find_pvalue_info_string(data, parameters)
176
- OutputInformation.new(data) {|infos|
177
- infos.add_parameter('V', 'discretization value', parameters[:discretization])
178
- infos.background_parameter('B', 'background', parameters[:background])
179
-
180
- infos.add_table_parameter('T', 'threshold', :threshold)
181
- infos.add_table_parameter('W', 'number of recognized words', :number_of_recognized_words) if parameters[:background] == [1,1,1,1]
182
- infos.add_table_parameter('P', 'P-value', :pvalue)
183
- }.result
184
- end
185
-
186
- end
187
- end
188
- end
1
+ require 'bioinform/support/strip_doc'
2
+
3
+ class String
4
+ def snake_case
5
+ gsub(/[A-Z]+/){|big| "_#{big.downcase}" }.sub(/^_/,'')
6
+ end
7
+ end
8
+
9
+ class Module
10
+ def run_tool_cmd
11
+ if Macroape::STANDALONE
12
+ "ruby #{tool_name}.rb"
13
+ else
14
+ tool_name
15
+ end
16
+ end
17
+ def tool_name
18
+ self.name.split('::').last.snake_case
19
+ end
20
+ end
21
+
22
+ module Macroape
23
+ module CLI
24
+ class OutputInformation
25
+ def initialize(data = nil)
26
+ @table_parameter_descriptions = []
27
+
28
+ @parameter_descriptions = []
29
+ @parameter_value_infos = []
30
+
31
+ @resulting_value_descriptions = []
32
+ @resulting_value_infos = []
33
+
34
+ @table_headers = []
35
+ @table_rows = []
36
+ @table_rows_callbacks = {}
37
+ @data = data
38
+ yield self if block_given?
39
+ end
40
+
41
+ def parameters_info
42
+ [*@parameter_descriptions, *@parameter_value_infos]
43
+ end
44
+ def resulting_values_info
45
+ [*@resulting_value_descriptions, *@resulting_value_infos]
46
+ end
47
+ def result
48
+ [parameters_info, resulting_values_info, resulting_table].reject(&:empty?).map{|b|b.join("\n")}.join("\n#\n")
49
+ #[*parameters_info, '#', *resulting_values_info, '#', *resulting_table].join("\n")
50
+ end
51
+
52
+ def add_parameter(param_name, description, value, &block)
53
+ @parameter_descriptions << parameter_description_string(param_name, description)
54
+ @parameter_value_infos << "# #{param_name} = #{value}"
55
+ end
56
+
57
+ def add_resulting_value(param_name, description, value, &block)
58
+ @resulting_value_descriptions << parameter_description_string(param_name, description)
59
+ @resulting_value_infos << "#{param_name}\t#{value}"
60
+ end
61
+
62
+ def add_table_parameter(param_name, description, key_in_hash, &block)
63
+ @table_parameter_descriptions << parameter_description_string(param_name, description)
64
+ add_table_parameter_without_description(param_name, key_in_hash, &block)
65
+ end
66
+
67
+ def add_table_parameter_without_description(param_name, key_in_hash, &block)
68
+ @table_headers << param_name
69
+ @table_rows << key_in_hash
70
+ @table_rows_callbacks[key_in_hash] = block
71
+ end
72
+
73
+ def parameter_description_string(param_name, description)
74
+ "# #{param_name}: #{description}"
75
+ end
76
+
77
+ def table_content
78
+ @data.map{|info|
79
+ @table_rows.map{|row| @table_rows_callbacks[row] ? @table_rows_callbacks[row].call(info[row]) : info[row] }.join("\t")
80
+ }
81
+ end
82
+
83
+ def header_content
84
+ '# ' + @table_headers.join("\t")
85
+ end
86
+
87
+ def resulting_table
88
+ @data ? [*@table_parameter_descriptions, header_content, *table_content] : []
89
+ end
90
+
91
+ # printed only if it is not wordwise [1,1,1,1]
92
+ def background_parameter(param_name, description, value, &block)
93
+ if value.is_a?(Bioinform::Background)
94
+ add_parameter(param_name, description, value.to_s, &block) unless value.wordwise?
95
+ else
96
+ add_parameter(param_name, description, value.join(','), &block) unless value == [1,1,1,1]
97
+ end
98
+ end
99
+ end
100
+
101
+ module Helper
102
+
103
+ def self.similarity_info_string(info)
104
+ OutputInformation.new { |infos|
105
+ infos.add_parameter('V', 'discretization', info[:discretization] )
106
+ infos.add_parameter('P', 'requested P-value', info[:requested_pvalue]) unless info[:predefined_threshold_first] && info[:predefined_threshold_second]
107
+ infos.add_parameter('T1', 'threshold for the 1st matrix', info[:predefined_threshold_first] ) if info[:predefined_threshold_first]
108
+ infos.add_parameter('T2', 'threshold for the 2nd matrix', info[:predefined_threshold_second] ) if info[:predefined_threshold_second]
109
+ infos.add_parameter('PB', 'P-value boundary', info[:pvalue_boundary])
110
+ if info[:first_background] == info[:second_background]
111
+ infos.background_parameter('B', 'background', info[:first_background])
112
+ else
113
+ infos.background_parameter('B1', 'background for the 1st model', info[:first_background])
114
+ infos.background_parameter('B2', 'background for the 2nd model', info[:second_background])
115
+ end
116
+
117
+ infos.add_resulting_value('S', 'similarity', info[:similarity])
118
+ infos.add_resulting_value('D', 'distance (1-similarity)', info[:tanimoto])
119
+ infos.add_resulting_value('L', 'length of the alignment', info[:alignment_length])
120
+ infos.add_resulting_value('SH', 'shift of the 2nd PWM relative to the 1st', info[:shift])
121
+ infos.add_resulting_value('OR', 'orientation of the 2nd PWM relative to the 1st', info[:orientation])
122
+ infos.add_resulting_value('A1', 'aligned 1st matrix', info[:text].lines.to_a.first.strip )
123
+ infos.add_resulting_value('A2', 'aligned 2nd matrix', info[:text].lines.to_a.last.strip )
124
+ infos.add_resulting_value('W', 'number of words recognized by both models (model = PWM + threshold)', info[:recognized_by_both] )
125
+ infos.add_resulting_value('W1', 'number of words and recognized by the first model', info[:recognized_by_first] )
126
+ infos.add_resulting_value('P1', 'P-value for the 1st matrix', info[:real_pvalue_first] )
127
+ infos.add_resulting_value('T1', 'threshold for the 1st matrix', info[:threshold_first] ) unless info[:predefined_threshold_first]
128
+ infos.add_resulting_value('W2', 'number of words recognized by the 2nd model', info[:recognized_by_second] )
129
+ infos.add_resulting_value('P2', 'P-value for the 2nd matrix', info[:real_pvalue_second] )
130
+ infos.add_resulting_value('T2', 'threshold for the 2nd matrix', info[:threshold_second] ) unless info[:predefined_threshold_second]
131
+ }.result
132
+ end
133
+
134
+ ############################################
135
+
136
+ def self.threshold_infos_string(data, parameters)
137
+ OutputInformation.new(data) { |infos|
138
+ infos.add_parameter('V', 'discretization value', parameters[:discretization])
139
+ infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
140
+ infos.background_parameter('B', 'background', parameters[:background])
141
+
142
+ infos.add_table_parameter('P', 'requested P-value', :expected_pvalue)
143
+ infos.add_table_parameter('AP', 'actual P-value', :real_pvalue)
144
+ infos.add_table_parameter('W', 'number of recognized words', :recognized_words) if parameters[:background].wordwise?
145
+ infos.add_table_parameter('T', 'threshold', :threshold)
146
+ }.result
147
+ end
148
+
149
+ ############################################
150
+
151
+ def self.scan_collection_infos_string(data, parameters)
152
+ OutputInformation.new(data) { |infos|
153
+ infos.add_parameter('MS', 'minimal similarity to output', parameters[:cutoff])
154
+ infos.add_parameter('P', 'P-value', parameters[:pvalue])
155
+ infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
156
+ if parameters[:precision_mode] == :precise
157
+ infos.add_parameter('VR', 'discretization value, rough', parameters[:rough_discretization])
158
+ infos.add_parameter('VP', 'discretization value, precise', parameters[:precise_discretization])
159
+ infos.add_parameter('MP', 'minimal similarity for the 2nd pass in \'precise\' mode', parameters[:minimal_similarity])
160
+ else
161
+ infos.add_parameter('V', 'discretization value', parameters[:rough_discretization])
162
+ end
163
+ infos.background_parameter('BQ', 'background for query matrix', parameters[:query_background])
164
+ infos.background_parameter('BC', 'background for collection', parameters[:collection_background])
165
+
166
+ infos.add_table_parameter_without_description('motif', :name)
167
+ infos.add_table_parameter_without_description('similarity', :similarity)
168
+ infos.add_table_parameter_without_description('shift', :shift)
169
+ infos.add_table_parameter_without_description('overlap', :overlap)
170
+ infos.add_table_parameter_without_description('orientation', :orientation)
171
+ if parameters[:precision_mode] == :precise
172
+ infos.add_table_parameter_without_description('precise mode', :precision_mode){|precision| precision == :precise ? '*' : '.' }
173
+ end
174
+ }.result
175
+ end
176
+
177
+ ############################################
178
+
179
+ def self.find_pvalue_info_string(data, parameters)
180
+ OutputInformation.new(data) {|infos|
181
+ infos.add_parameter('V', 'discretization value', parameters[:discretization])
182
+ infos.background_parameter('B', 'background', parameters[:background])
183
+
184
+ infos.add_table_parameter('T', 'threshold', :threshold)
185
+ infos.add_table_parameter('W', 'number of recognized words', :number_of_recognized_words) if parameters[:background].wordwise?
186
+ infos.add_table_parameter('P', 'P-value', :pvalue)
187
+ }.result
188
+ end
189
+ end
190
+ end
191
+ end