macroape 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +4 -4
  4. data/LICENSE +22 -22
  5. data/README.md +70 -70
  6. data/Rakefile.rb +49 -49
  7. data/TODO.txt +46 -46
  8. data/benchmark/benchmark_helper.rb +4 -4
  9. data/benchmark/similarity_benchmark.rb +52 -52
  10. data/bin/align_motifs +4 -4
  11. data/bin/eval_alignment +4 -4
  12. data/bin/eval_similarity +4 -4
  13. data/bin/find_pvalue +4 -4
  14. data/bin/find_threshold +4 -4
  15. data/bin/preprocess_collection +4 -4
  16. data/bin/scan_collection +4 -4
  17. data/lib/macroape.rb +14 -11
  18. data/lib/macroape/aligned_pair_intersection.rb +61 -62
  19. data/lib/macroape/cli.rb +191 -188
  20. data/lib/macroape/cli/align_motifs.rb +120 -100
  21. data/lib/macroape/cli/eval_alignment.rb +157 -156
  22. data/lib/macroape/cli/eval_similarity.rb +138 -137
  23. data/lib/macroape/cli/find_pvalue.rb +93 -87
  24. data/lib/macroape/cli/find_threshold.rb +103 -96
  25. data/lib/macroape/cli/preprocess_collection.rb +169 -161
  26. data/lib/macroape/cli/scan_collection.rb +171 -163
  27. data/lib/macroape/collection.rb +29 -0
  28. data/lib/macroape/motif_with_thresholds.rb +18 -0
  29. data/lib/macroape/pwm_compare.rb +39 -44
  30. data/lib/macroape/pwm_compare_aligned.rb +139 -130
  31. data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
  32. data/lib/macroape/support/inverf.rb +13 -0
  33. data/lib/macroape/support/partial_sums.rb +17 -0
  34. data/lib/macroape/version.rb +4 -4
  35. data/macroape.gemspec +19 -19
  36. data/spec/count_distribution_spec.rb +112 -109
  37. data/spec/inverf_spec.rb +23 -0
  38. data/spec/partial_sums_spec.rb +28 -0
  39. data/spec/spec_helper.rb +11 -11
  40. data/test/align_motifs_test.rb +42 -43
  41. data/test/data/AHR_si.pwm +10 -10
  42. data/test/data/KLF3_f1.pcm +16 -16
  43. data/test/data/KLF3_f1.pwm +16 -16
  44. data/test/data/KLF4_f2.pcm +11 -11
  45. data/test/data/KLF4_f2.pwm +11 -11
  46. data/test/data/KLF4_f2_scan_results_all.txt +2 -2
  47. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
  48. data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
  49. data/test/data/SP1_f1.pcm +12 -12
  50. data/test/data/SP1_f1.pwm +12 -12
  51. data/test/data/SP1_f1_revcomp.pcm +12 -12
  52. data/test/data/SP1_f1_revcomp.pwm +12 -12
  53. data/test/data/medium_motif.pwm +8 -8
  54. data/test/data/short_motif.pwm +7 -7
  55. data/test/data/test_collection.yaml +231 -214
  56. data/test/data/test_collection/GABPA_f1.pwm +14 -14
  57. data/test/data/test_collection/KLF4_f2.pwm +10 -10
  58. data/test/data/test_collection/SP1_f1.pwm +12 -12
  59. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
  60. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
  61. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
  62. data/test/data/test_collection_single_file.txt +38 -38
  63. data/test/data/test_collection_single_file_pcm.txt +37 -37
  64. data/test/data/test_collection_weak.yaml +231 -214
  65. data/test/eval_alignment_test.rb +90 -111
  66. data/test/eval_similarity_test.rb +105 -123
  67. data/test/find_pvalue_test.rb +34 -39
  68. data/test/find_threshold_test.rb +87 -91
  69. data/test/preprocess_collection_test.rb +56 -65
  70. data/test/scan_collection_test.rb +42 -48
  71. data/test/test_helper.rb +159 -160
  72. metadata +14 -10
  73. data/test/data/collection_pcm_without_thresholds.yaml +0 -188
  74. data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,53 +1,53 @@
1
- require_relative 'benchmark_helper'
2
-
3
- class TaskToBenchmark
4
- def setup
5
- @matrix_first = "KLF4_f2.xml
6
- 0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
7
- -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
8
- -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
9
- -2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
10
- -0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
11
- -1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
12
- -2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
13
- -1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
14
- -2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
15
- -1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
16
- "
17
-
18
- @matrix_second = "> SP1_f1
19
- -0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
20
- -1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
21
- -0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
22
- -4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
23
- -2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
24
- -0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
25
- -1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
26
- -4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
27
- -0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
28
- -1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
29
- -0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606"
30
-
31
- @pvalue = 0.0005
32
- @discretization = 1
33
- @first_background, @second_background = [1,1,1,1], [1,1,1,1]
34
-
35
- @pwm_first = Bioinform::PWM.new(@matrix_first).set_parameters(background: @first_background).discrete(@discretization)
36
- @pwm_second = Bioinform::PWM.new(@matrix_second).set_parameters(background: @second_background).discrete(@discretization)
37
- @cmp = Macroape::PWMCompare.new(@pwm_first, @pwm_second)
38
- @first_threshold = @pwm_first.threshold(@pvalue)
39
- @second_threshold = @pwm_second.threshold(@pvalue)
40
- self
41
- end
42
-
43
- def run
44
- info = @cmp.jaccard(@first_threshold, @second_threshold)
45
- end
46
- end
47
-
48
- benchmark_result = 100.times.collect do
49
- task_to_benchmark = TaskToBenchmark.new.setup
50
- Benchmark.measure{ task_to_benchmark.run }
51
- end.inject(&:+)
52
-
1
+ require_relative 'benchmark_helper'
2
+
3
+ class TaskToBenchmark
4
+ def setup
5
+ @matrix_first = "KLF4_f2.xml
6
+ 0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
7
+ -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
8
+ -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
9
+ -2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
10
+ -0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
11
+ -1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
12
+ -2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
13
+ -1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
14
+ -2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
15
+ -1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
16
+ "
17
+
18
+ @matrix_second = "> SP1_f1
19
+ -0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
20
+ -1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
21
+ -0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
22
+ -4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
23
+ -2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
24
+ -0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
25
+ -1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
26
+ -4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
27
+ -0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
28
+ -1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
29
+ -0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606"
30
+
31
+ @pvalue = 0.0005
32
+ @discretization = 1
33
+ @first_background, @second_background = [1,1,1,1], [1,1,1,1]
34
+
35
+ @pwm_first = Bioinform::PWM.new(@matrix_first).set_parameters(background: @first_background).discrete(@discretization)
36
+ @pwm_second = Bioinform::PWM.new(@matrix_second).set_parameters(background: @second_background).discrete(@discretization)
37
+ @cmp = Macroape::PWMCompare.new(@pwm_first, @pwm_second)
38
+ @first_threshold = @pwm_first.threshold(@pvalue)
39
+ @second_threshold = @pwm_second.threshold(@pvalue)
40
+ self
41
+ end
42
+
43
+ def run
44
+ info = @cmp.jaccard(@first_threshold, @second_threshold)
45
+ end
46
+ end
47
+
48
+ benchmark_result = 100.times.collect do
49
+ task_to_benchmark = TaskToBenchmark.new.setup
50
+ Benchmark.measure{ task_to_benchmark.run }
51
+ end.inject(&:+)
52
+
53
53
  puts benchmark_result
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/align_motifs'
4
- Macroape::CLI::AlignMotifs.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/align_motifs'
4
+ Macroape::CLI::AlignMotifs.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/eval_alignment'
4
- Macroape::CLI::EvalAlignment.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/eval_alignment'
4
+ Macroape::CLI::EvalAlignment.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/eval_similarity'
4
- Macroape::CLI::EvalSimilarity.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/eval_similarity'
4
+ Macroape::CLI::EvalSimilarity.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/find_pvalue'
4
- Macroape::CLI::FindPValue.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/find_pvalue'
4
+ Macroape::CLI::FindPValue.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/find_threshold'
4
- Macroape::CLI::FindThreshold.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/find_threshold'
4
+ Macroape::CLI::FindThreshold.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/preprocess_collection'
4
- Macroape::CLI::PreprocessCollection.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/preprocess_collection'
4
+ Macroape::CLI::PreprocessCollection.main(ARGV)
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/macroape/cli/scan_collection'
4
- Macroape::CLI::ScanCollection.main(ARGV)
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/macroape/cli/scan_collection'
4
+ Macroape::CLI::ScanCollection.main(ARGV)
@@ -1,11 +1,14 @@
1
- require_relative 'macroape/version'
2
-
3
- require_relative 'macroape/counting'
4
- require_relative 'macroape/aligned_pair_intersection'
5
- require_relative 'macroape/pwm_compare_aligned'
6
- require_relative 'macroape/pwm_compare'
7
- require_relative 'macroape/cli'
8
-
9
- module Macroape
10
- # Your code goes here...
11
- end
1
+ require_relative 'macroape/version'
2
+ require 'bioinform'
3
+
4
+ require_relative 'macroape/pwm_counting'
5
+ require_relative 'macroape/aligned_pair_intersection'
6
+ require_relative 'macroape/pwm_compare_aligned'
7
+ require_relative 'macroape/pwm_compare'
8
+ require_relative 'macroape/collection'
9
+ require_relative 'macroape/motif_with_thresholds'
10
+ require_relative 'macroape/cli'
11
+
12
+ module Macroape
13
+ # Your code goes here...
14
+ end
@@ -1,62 +1,61 @@
1
- module Macroape
2
- class PWMCompareAligned
3
- # unoptimized version of this and related methods
4
- def counts_for_two_matrices(threshold_first, threshold_second)
5
- # just not to call method each time
6
- first_background = first.background
7
- second_background = second.background
8
- unless first_background == second_background
9
- first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
10
- second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
11
- return [first_result, second_result]
12
- end
13
- if first.background == [1,1,1,1]
14
- result = get_counts(threshold_first, threshold_second) {|score,letter| score}
15
- [result, result]
16
- else
17
- result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
18
- [result, result]
19
- end
20
- end
21
-
22
-
23
- # block has form: {|score,letter| contribution to count by `letter` with `score` }
24
- def get_counts(threshold_first, threshold_second, &count_contribution_block)
25
- # scores_on_first_pwm, scores_on_second_pwm --> count
26
- scores = { 0 => {0 => 1} }
27
- length.times do |column|
28
- new_scores = recalc_score_hash(scores,
29
- first.matrix[column], second.matrix[column],
30
- threshold_first - first.best_suffix(column + 1),
31
- threshold_second - second.best_suffix(column + 1), &count_contribution_block)
32
- scores.replace(new_scores)
33
- if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size
34
- raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
35
- end
36
- end
37
- scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}
38
- end
39
-
40
- # wouldn't work without count_contribution_block
41
- def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
42
- new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
43
- scores.each do |score_first, second_scores|
44
- second_scores.each do |score_second, count|
45
-
46
- 4.times do |letter|
47
- new_score_first = score_first + first_column[letter]
48
- if new_score_first >= least_sufficient_first
49
- new_score_second = score_second + second_column[letter]
50
- if new_score_second >= least_sufficient_second
51
- new_scores[new_score_first][new_score_second] += yield(count, letter)
52
- end
53
- end
54
- end
55
-
56
- end
57
- end
58
- new_scores
59
- end
60
-
61
- end
62
- end
1
+ module Macroape
2
+ class PWMCompareAligned
3
+ # unoptimized version of this and related methods
4
+ def counts_for_two_matrices(threshold_first, threshold_second)
5
+ # just not to call method each time
6
+ first_background = first.background.counts
7
+ second_background = second.background.counts
8
+ unless first_background == second_background
9
+ first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
10
+ second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
11
+ return [first_result, second_result]
12
+ end
13
+ if first.background.wordwise?
14
+ result = get_counts(threshold_first, threshold_second) {|score,letter| score}
15
+ [result, result]
16
+ else
17
+ result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
18
+ [result, result]
19
+ end
20
+ end
21
+
22
+
23
+ # block has form: {|score,letter| contribution to count by `letter` with `score` }
24
+ def get_counts(threshold_first, threshold_second, &count_contribution_block)
25
+ # scores_on_first_pwm, scores_on_second_pwm --> count
26
+ scores = { 0 => {0 => 1} }
27
+ length.times do |column|
28
+ new_scores = recalc_score_hash(scores,
29
+ first.matrix[column], second.matrix[column],
30
+ threshold_first - first.best_suffix(column + 1),
31
+ threshold_second - second.best_suffix(column + 1), &count_contribution_block)
32
+ scores.replace(new_scores)
33
+ if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size
34
+ raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
35
+ end
36
+ end
37
+ scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}
38
+ end
39
+
40
+ # wouldn't work without count_contribution_block
41
+ def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
42
+ new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
43
+ scores.each do |score_first, second_scores|
44
+ second_scores.each do |score_second, count|
45
+
46
+ 4.times do |letter|
47
+ new_score_first = score_first + first_column[letter]
48
+ if new_score_first >= least_sufficient_first
49
+ new_score_second = score_second + second_column[letter]
50
+ if new_score_second >= least_sufficient_second
51
+ new_scores[new_score_first][new_score_second] += yield(count, letter)
52
+ end
53
+ end
54
+ end
55
+
56
+ end
57
+ end
58
+ new_scores
59
+ end
60
+ end
61
+ end
@@ -1,188 +1,191 @@
1
- require 'bioinform/support/strip_doc'
2
-
3
- class String
4
- def snake_case
5
- gsub(/[A-Z]+/){|big| "_#{big.downcase}" }.sub(/^_/,'')
6
- end
7
- end
8
-
9
- class Module
10
- def run_tool_cmd
11
- if Macroape::STANDALONE
12
- "ruby #{tool_name}.rb"
13
- else
14
- tool_name
15
- end
16
- end
17
- def tool_name
18
- self.name.split('::').last.snake_case
19
- end
20
- end
21
-
22
- module Macroape
23
- module CLI
24
- class OutputInformation
25
- def initialize(data = nil)
26
- @table_parameter_descriptions = []
27
-
28
- @parameter_descriptions = []
29
- @parameter_value_infos = []
30
-
31
- @resulting_value_descriptions = []
32
- @resulting_value_infos = []
33
-
34
- @table_headers = []
35
- @table_rows = []
36
- @table_rows_callbacks = []
37
- @data = data
38
- yield self if block_given?
39
- end
40
-
41
- def parameters_info
42
- [*@parameter_descriptions, *@parameter_value_infos]
43
- end
44
- def resulting_values_info
45
- [*@resulting_value_descriptions, *@resulting_value_infos]
46
- end
47
- def result
48
- [parameters_info, resulting_values_info, resulting_table].reject(&:empty?).map{|b|b.join("\n")}.join("\n#\n")
49
- #[*parameters_info, '#', *resulting_values_info, '#', *resulting_table].join("\n")
50
- end
51
-
52
- def add_parameter(param_name, description, value, &block)
53
- @parameter_descriptions << parameter_description_string(param_name, description)
54
- @parameter_value_infos << "# #{param_name} = #{value}"
55
- end
56
-
57
- def add_resulting_value(param_name, description, value, &block)
58
- @resulting_value_descriptions << parameter_description_string(param_name, description)
59
- @resulting_value_infos << "#{param_name}\t#{value}"
60
- end
61
-
62
- def add_table_parameter(param_name, description, key_in_hash, &block)
63
- @table_parameter_descriptions << parameter_description_string(param_name, description)
64
- add_table_parameter_without_description(param_name, key_in_hash, &block)
65
- end
66
-
67
- def add_table_parameter_without_description(param_name, key_in_hash, &block)
68
- @table_headers << param_name
69
- @table_rows << key_in_hash
70
- @table_rows_callbacks << block
71
- end
72
-
73
- def parameter_description_string(param_name, description)
74
- "# #{param_name}: #{description}"
75
- end
76
-
77
- def table_content
78
- @data.map{|info|
79
- @table_rows.zip(@table_rows_callbacks).map{|row,callback| callback ? callback.call(info[row]) : info[row] }.join("\t")
80
- }
81
- end
82
-
83
- def header_content
84
- '# ' + @table_headers.join("\t")
85
- end
86
-
87
- def resulting_table
88
- @data ? [*@table_parameter_descriptions, header_content, *table_content] : []
89
- end
90
-
91
- # printed only if it is not wordwise [1,1,1,1]
92
- def background_parameter(param_name, description, value, &block)
93
- add_parameter(param_name, description, value.join(','), &block) unless value == [1,1,1,1]
94
- end
95
- end
96
-
97
- module Helper
98
-
99
- def self.similarity_info_string(info)
100
- OutputInformation.new { |infos|
101
- infos.add_parameter('V', 'discretization', info[:discretization] )
102
- infos.add_parameter('P', 'requested P-value', info[:requested_pvalue]) unless info[:predefined_threshold_first] && info[:predefined_threshold_second]
103
- infos.add_parameter('T1', 'threshold for the 1st matrix', info[:predefined_threshold_first] ) if info[:predefined_threshold_first]
104
- infos.add_parameter('T2', 'threshold for the 2nd matrix', info[:predefined_threshold_second] ) if info[:predefined_threshold_second]
105
- infos.add_parameter('PB', 'P-value boundary', info[:pvalue_boundary])
106
- if info[:first_background] == info[:second_background]
107
- infos.background_parameter('B', 'background', info[:first_background])
108
- else
109
- infos.background_parameter('B1', 'background for the 1st model', info[:first_background])
110
- infos.background_parameter('B2', 'background for the 2nd model', info[:second_background])
111
- end
112
-
113
- infos.add_resulting_value('S', 'similarity', info[:similarity])
114
- infos.add_resulting_value('D', 'distance (1-similarity)', info[:tanimoto])
115
- infos.add_resulting_value('L', 'length of the alignment', info[:alignment_length])
116
- infos.add_resulting_value('SH', 'shift of the 2nd PWM relative to the 1st', info[:shift])
117
- infos.add_resulting_value('OR', 'orientation of the 2nd PWM relative to the 1st', info[:orientation])
118
- infos.add_resulting_value('A1', 'aligned 1st matrix', info[:text].lines.to_a.first.strip )
119
- infos.add_resulting_value('A2', 'aligned 2nd matrix', info[:text].lines.to_a.last.strip )
120
- infos.add_resulting_value('W', 'number of words recognized by both models (model = PWM + threshold)', info[:recognized_by_both] )
121
- infos.add_resulting_value('W1', 'number of words and recognized by the first model', info[:recognized_by_first] )
122
- infos.add_resulting_value('P1', 'P-value for the 1st matrix', info[:real_pvalue_first] )
123
- infos.add_resulting_value('T1', 'threshold for the 1st matrix', info[:threshold_first] ) unless info[:predefined_threshold_first]
124
- infos.add_resulting_value('W2', 'number of words recognized by the 2nd model', info[:recognized_by_second] )
125
- infos.add_resulting_value('P2', 'P-value for the 2nd matrix', info[:real_pvalue_second] )
126
- infos.add_resulting_value('T2', 'threshold for the 2nd matrix', info[:threshold_second] ) unless info[:predefined_threshold_second]
127
- }.result
128
- end
129
-
130
- ############################################
131
-
132
- def self.threshold_infos_string(data, parameters)
133
- OutputInformation.new(data) { |infos|
134
- infos.add_parameter('V', 'discretization value', parameters[:discretization])
135
- infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
136
- infos.background_parameter('B', 'background', parameters[:background])
137
-
138
- infos.add_table_parameter('P', 'requested P-value', :expected_pvalue)
139
- infos.add_table_parameter('AP', 'actual P-value', :real_pvalue)
140
- infos.add_table_parameter('W', 'number of recognized words', :recognized_words) if parameters[:background] == [1, 1, 1, 1]
141
- infos.add_table_parameter('T', 'threshold', :threshold)
142
- }.result
143
- end
144
-
145
- ############################################
146
-
147
- def self.scan_collection_infos_string(data, parameters)
148
- OutputInformation.new(data) { |infos|
149
- infos.add_parameter('MS', 'minimal similarity to output', parameters[:cutoff])
150
- infos.add_parameter('P', 'P-value', parameters[:pvalue])
151
- infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
152
- if parameters[:precision_mode] == :precise
153
- infos.add_parameter('VR', 'discretization value, rough', parameters[:rough_discretization])
154
- infos.add_parameter('VP', 'discretization value, precise', parameters[:precise_discretization])
155
- infos.add_parameter('MP', 'minimal similarity for the 2nd pass in \'precise\' mode', parameters[:minimal_similarity])
156
- else
157
- infos.add_parameter('V', 'discretization value', parameters[:rough_discretization])
158
- end
159
- infos.background_parameter('BQ', 'background for query matrix', parameters[:query_background])
160
- infos.background_parameter('BC', 'background for collection', parameters[:collection_background])
161
-
162
- infos.add_table_parameter_without_description('motif', :name)
163
- infos.add_table_parameter_without_description('similarity', :similarity)
164
- infos.add_table_parameter_without_description('shift', :shift)
165
- infos.add_table_parameter_without_description('overlap', :overlap)
166
- infos.add_table_parameter_without_description('orientation', :orientation)
167
- if parameters[:precision_mode] == :precise
168
- infos.add_table_parameter_without_description('precise mode', :precision_mode){|precision| precision == :precise ? '*' : '.' }
169
- end
170
- }.result
171
- end
172
-
173
- ############################################
174
-
175
- def self.find_pvalue_info_string(data, parameters)
176
- OutputInformation.new(data) {|infos|
177
- infos.add_parameter('V', 'discretization value', parameters[:discretization])
178
- infos.background_parameter('B', 'background', parameters[:background])
179
-
180
- infos.add_table_parameter('T', 'threshold', :threshold)
181
- infos.add_table_parameter('W', 'number of recognized words', :number_of_recognized_words) if parameters[:background] == [1,1,1,1]
182
- infos.add_table_parameter('P', 'P-value', :pvalue)
183
- }.result
184
- end
185
-
186
- end
187
- end
188
- end
1
+ require 'bioinform/support/strip_doc'
2
+
3
+ class String
4
+ def snake_case
5
+ gsub(/[A-Z]+/){|big| "_#{big.downcase}" }.sub(/^_/,'')
6
+ end
7
+ end
8
+
9
+ class Module
10
+ def run_tool_cmd
11
+ if Macroape::STANDALONE
12
+ "ruby #{tool_name}.rb"
13
+ else
14
+ tool_name
15
+ end
16
+ end
17
+ def tool_name
18
+ self.name.split('::').last.snake_case
19
+ end
20
+ end
21
+
22
+ module Macroape
23
+ module CLI
24
+ class OutputInformation
25
+ def initialize(data = nil)
26
+ @table_parameter_descriptions = []
27
+
28
+ @parameter_descriptions = []
29
+ @parameter_value_infos = []
30
+
31
+ @resulting_value_descriptions = []
32
+ @resulting_value_infos = []
33
+
34
+ @table_headers = []
35
+ @table_rows = []
36
+ @table_rows_callbacks = {}
37
+ @data = data
38
+ yield self if block_given?
39
+ end
40
+
41
+ def parameters_info
42
+ [*@parameter_descriptions, *@parameter_value_infos]
43
+ end
44
+ def resulting_values_info
45
+ [*@resulting_value_descriptions, *@resulting_value_infos]
46
+ end
47
+ def result
48
+ [parameters_info, resulting_values_info, resulting_table].reject(&:empty?).map{|b|b.join("\n")}.join("\n#\n")
49
+ #[*parameters_info, '#', *resulting_values_info, '#', *resulting_table].join("\n")
50
+ end
51
+
52
+ def add_parameter(param_name, description, value, &block)
53
+ @parameter_descriptions << parameter_description_string(param_name, description)
54
+ @parameter_value_infos << "# #{param_name} = #{value}"
55
+ end
56
+
57
+ def add_resulting_value(param_name, description, value, &block)
58
+ @resulting_value_descriptions << parameter_description_string(param_name, description)
59
+ @resulting_value_infos << "#{param_name}\t#{value}"
60
+ end
61
+
62
+ def add_table_parameter(param_name, description, key_in_hash, &block)
63
+ @table_parameter_descriptions << parameter_description_string(param_name, description)
64
+ add_table_parameter_without_description(param_name, key_in_hash, &block)
65
+ end
66
+
67
+ def add_table_parameter_without_description(param_name, key_in_hash, &block)
68
+ @table_headers << param_name
69
+ @table_rows << key_in_hash
70
+ @table_rows_callbacks[key_in_hash] = block
71
+ end
72
+
73
+ def parameter_description_string(param_name, description)
74
+ "# #{param_name}: #{description}"
75
+ end
76
+
77
+ def table_content
78
+ @data.map{|info|
79
+ @table_rows.map{|row| @table_rows_callbacks[row] ? @table_rows_callbacks[row].call(info[row]) : info[row] }.join("\t")
80
+ }
81
+ end
82
+
83
+ def header_content
84
+ '# ' + @table_headers.join("\t")
85
+ end
86
+
87
+ def resulting_table
88
+ @data ? [*@table_parameter_descriptions, header_content, *table_content] : []
89
+ end
90
+
91
+ # printed only if it is not wordwise [1,1,1,1]
92
+ def background_parameter(param_name, description, value, &block)
93
+ if value.is_a?(Bioinform::Background)
94
+ add_parameter(param_name, description, value.to_s, &block) unless value.wordwise?
95
+ else
96
+ add_parameter(param_name, description, value.join(','), &block) unless value == [1,1,1,1]
97
+ end
98
+ end
99
+ end
100
+
101
+ module Helper
102
+
103
+ def self.similarity_info_string(info)
104
+ OutputInformation.new { |infos|
105
+ infos.add_parameter('V', 'discretization', info[:discretization] )
106
+ infos.add_parameter('P', 'requested P-value', info[:requested_pvalue]) unless info[:predefined_threshold_first] && info[:predefined_threshold_second]
107
+ infos.add_parameter('T1', 'threshold for the 1st matrix', info[:predefined_threshold_first] ) if info[:predefined_threshold_first]
108
+ infos.add_parameter('T2', 'threshold for the 2nd matrix', info[:predefined_threshold_second] ) if info[:predefined_threshold_second]
109
+ infos.add_parameter('PB', 'P-value boundary', info[:pvalue_boundary])
110
+ if info[:first_background] == info[:second_background]
111
+ infos.background_parameter('B', 'background', info[:first_background])
112
+ else
113
+ infos.background_parameter('B1', 'background for the 1st model', info[:first_background])
114
+ infos.background_parameter('B2', 'background for the 2nd model', info[:second_background])
115
+ end
116
+
117
+ infos.add_resulting_value('S', 'similarity', info[:similarity])
118
+ infos.add_resulting_value('D', 'distance (1-similarity)', info[:tanimoto])
119
+ infos.add_resulting_value('L', 'length of the alignment', info[:alignment_length])
120
+ infos.add_resulting_value('SH', 'shift of the 2nd PWM relative to the 1st', info[:shift])
121
+ infos.add_resulting_value('OR', 'orientation of the 2nd PWM relative to the 1st', info[:orientation])
122
+ infos.add_resulting_value('A1', 'aligned 1st matrix', info[:text].lines.to_a.first.strip )
123
+ infos.add_resulting_value('A2', 'aligned 2nd matrix', info[:text].lines.to_a.last.strip )
124
+ infos.add_resulting_value('W', 'number of words recognized by both models (model = PWM + threshold)', info[:recognized_by_both] )
125
+ infos.add_resulting_value('W1', 'number of words and recognized by the first model', info[:recognized_by_first] )
126
+ infos.add_resulting_value('P1', 'P-value for the 1st matrix', info[:real_pvalue_first] )
127
+ infos.add_resulting_value('T1', 'threshold for the 1st matrix', info[:threshold_first] ) unless info[:predefined_threshold_first]
128
+ infos.add_resulting_value('W2', 'number of words recognized by the 2nd model', info[:recognized_by_second] )
129
+ infos.add_resulting_value('P2', 'P-value for the 2nd matrix', info[:real_pvalue_second] )
130
+ infos.add_resulting_value('T2', 'threshold for the 2nd matrix', info[:threshold_second] ) unless info[:predefined_threshold_second]
131
+ }.result
132
+ end
133
+
134
+ ############################################
135
+
136
+ def self.threshold_infos_string(data, parameters)
137
+ OutputInformation.new(data) { |infos|
138
+ infos.add_parameter('V', 'discretization value', parameters[:discretization])
139
+ infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
140
+ infos.background_parameter('B', 'background', parameters[:background])
141
+
142
+ infos.add_table_parameter('P', 'requested P-value', :expected_pvalue)
143
+ infos.add_table_parameter('AP', 'actual P-value', :real_pvalue)
144
+ infos.add_table_parameter('W', 'number of recognized words', :recognized_words) if parameters[:background].wordwise?
145
+ infos.add_table_parameter('T', 'threshold', :threshold)
146
+ }.result
147
+ end
148
+
149
+ ############################################
150
+
151
+ def self.scan_collection_infos_string(data, parameters)
152
+ OutputInformation.new(data) { |infos|
153
+ infos.add_parameter('MS', 'minimal similarity to output', parameters[:cutoff])
154
+ infos.add_parameter('P', 'P-value', parameters[:pvalue])
155
+ infos.add_parameter('PB', 'P-value boundary', parameters[:pvalue_boundary])
156
+ if parameters[:precision_mode] == :precise
157
+ infos.add_parameter('VR', 'discretization value, rough', parameters[:rough_discretization])
158
+ infos.add_parameter('VP', 'discretization value, precise', parameters[:precise_discretization])
159
+ infos.add_parameter('MP', 'minimal similarity for the 2nd pass in \'precise\' mode', parameters[:minimal_similarity])
160
+ else
161
+ infos.add_parameter('V', 'discretization value', parameters[:rough_discretization])
162
+ end
163
+ infos.background_parameter('BQ', 'background for query matrix', parameters[:query_background])
164
+ infos.background_parameter('BC', 'background for collection', parameters[:collection_background])
165
+
166
+ infos.add_table_parameter_without_description('motif', :name)
167
+ infos.add_table_parameter_without_description('similarity', :similarity)
168
+ infos.add_table_parameter_without_description('shift', :shift)
169
+ infos.add_table_parameter_without_description('overlap', :overlap)
170
+ infos.add_table_parameter_without_description('orientation', :orientation)
171
+ if parameters[:precision_mode] == :precise
172
+ infos.add_table_parameter_without_description('precise mode', :precision_mode){|precision| precision == :precise ? '*' : '.' }
173
+ end
174
+ }.result
175
+ end
176
+
177
+ ############################################
178
+
179
+ def self.find_pvalue_info_string(data, parameters)
180
+ OutputInformation.new(data) {|infos|
181
+ infos.add_parameter('V', 'discretization value', parameters[:discretization])
182
+ infos.background_parameter('B', 'background', parameters[:background])
183
+
184
+ infos.add_table_parameter('T', 'threshold', :threshold)
185
+ infos.add_table_parameter('W', 'number of recognized words', :number_of_recognized_words) if parameters[:background].wordwise?
186
+ infos.add_table_parameter('P', 'P-value', :pvalue)
187
+ }.result
188
+ end
189
+ end
190
+ end
191
+ end