macroape 3.3.7 → 3.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/README.md +2 -2
  2. data/Rakefile.rb +6 -6
  3. data/TODO.txt +23 -3
  4. data/benchmark/similarity_benchmark.rb +18 -18
  5. data/lib/macroape/aligned_pair_intersection.rb +4 -4
  6. data/lib/macroape/cli/align_motifs.rb +34 -28
  7. data/lib/macroape/cli/eval_alignment.rb +73 -47
  8. data/lib/macroape/cli/eval_similarity.rb +65 -40
  9. data/lib/macroape/cli/find_pvalue.rb +30 -34
  10. data/lib/macroape/cli/find_threshold.rb +52 -41
  11. data/lib/macroape/cli/preprocess_collection.rb +68 -58
  12. data/lib/macroape/cli/scan_collection.rb +89 -73
  13. data/lib/macroape/cli.rb +184 -1
  14. data/lib/macroape/counting.rb +31 -5
  15. data/lib/macroape/pwm_compare.rb +8 -2
  16. data/lib/macroape/pwm_compare_aligned.rb +15 -10
  17. data/lib/macroape/version.rb +2 -1
  18. data/macroape.gemspec +2 -1
  19. data/spec/count_distribution_spec.rb +11 -11
  20. data/test/align_motifs_test.rb +16 -4
  21. data/test/data/{AHR_si.pat → AHR_si.pwm} +0 -0
  22. data/test/data/{KLF3_f1.pat → KLF3_f1.pwm} +0 -0
  23. data/test/data/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
  24. data/test/data/KLF4_f2_scan_results_all.txt +1 -2
  25. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -2
  26. data/test/data/KLF4_f2_scan_results_precise_mode.txt +1 -2
  27. data/test/data/KLF4_f2_scan_results_weak_threshold.txt +2 -0
  28. data/test/data/{SP1_f1.pat → SP1_f1.pwm} +0 -0
  29. data/test/data/{SP1_f1_revcomp.pat → SP1_f1_revcomp.pwm} +0 -0
  30. data/test/data/collection_pcm_without_thresholds.yaml +186 -183
  31. data/test/data/collection_without_thresholds.yaml +186 -183
  32. data/test/data/{medium_motif.pat → medium_motif.pwm} +0 -0
  33. data/test/data/{short_motif.pat → short_motif.pwm} +0 -0
  34. data/test/data/test_collection/{GABPA_f1.pat → GABPA_f1.pwm} +0 -0
  35. data/test/data/test_collection/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
  36. data/test/data/test_collection/{SP1_f1.pat → SP1_f1.pwm} +0 -0
  37. data/test/data/test_collection.yaml +179 -176
  38. data/test/data/test_collection_weak.yaml +214 -0
  39. data/test/eval_alignment_test.rb +97 -21
  40. data/test/eval_similarity_test.rb +104 -26
  41. data/test/find_pvalue_test.rb +22 -9
  42. data/test/find_threshold_test.rb +76 -25
  43. data/test/preprocess_collection_test.rb +16 -21
  44. data/test/scan_collection_test.rb +26 -14
  45. data/test/test_helper.rb +96 -12
  46. metadata +44 -24
data/test/test_helper.rb CHANGED
@@ -12,7 +12,7 @@ require_relative '../lib/macroape/cli/eval_alignment'
12
12
  require_relative '../lib/macroape/cli/preprocess_collection'
13
13
  require_relative '../lib/macroape/cli/scan_collection'
14
14
  require_relative '../lib/macroape/cli/align_motifs'
15
-
15
+
16
16
  module Helpers
17
17
  # from minitest
18
18
  def self.capture_io(&block)
@@ -25,25 +25,40 @@ module Helpers
25
25
  $stdout = orig_stdout
26
26
  $stderr = orig_stderr
27
27
  end
28
-
28
+
29
+ def self.suppress_output(&block)
30
+ orig_stdout, orig_stderr = $stdout, $stderr
31
+ captured_stdout, captured_stderr = StringIO.new, StringIO.new
32
+ $stdout, $stderr = captured_stdout, captured_stderr
33
+ yield
34
+ ensure
35
+ $stdout = orig_stdout
36
+ $stderr = orig_stderr
37
+ end
38
+
29
39
  # Method stubs $stdin not STDIN !
30
40
  def self.provide_stdin(input, &block)
31
41
  orig_stdin = $stdin
32
42
  $stdin = StringIO.new(input)
33
43
  yield
34
- ensure
44
+ ensure
35
45
  $stdin = orig_stdin
36
46
  end
37
-
47
+
38
48
  def self.capture_output(&block)
39
49
  capture_io(&block)[:stdout]
40
50
  end
41
51
  def self.capture_stderr(&block)
42
52
  capture_io(&block)[:stderr]
43
53
  end
44
-
54
+
55
+ # aaa\tbbb\nccc\tddd ==> [['aaa','bbb'],['ccc','ddd']]
56
+ def self.split_on_lines(str)
57
+ str.lines.map{|line| line.strip.split("\t")}
58
+ end
59
+
45
60
  def self.obtain_pvalue_by_threshold(args)
46
- find_pvalue_output(args).strip.split.last
61
+ find_pvalue_output(args).last.last
47
62
  end
48
63
  def self.exec_cmd(executable, param_list)
49
64
  "ruby -I #{$lib_folder} #{$lib_folder}/../bin/#{executable} #{param_list}"
@@ -52,25 +67,94 @@ module Helpers
52
67
  capture_output{ Macroape::CLI::FindThreshold.main(param_list.shellsplit) }
53
68
  end
54
69
  def self.align_motifs_output(param_list)
55
- capture_output{ Macroape::CLI::AlignMotifs.main(param_list.shellsplit) }
70
+ split_on_lines( capture_output{ Macroape::CLI::AlignMotifs.main(param_list.shellsplit)} )
56
71
  end
57
72
  def self.find_pvalue_output(param_list)
58
- capture_output{ Macroape::CLI::FindPValue.main(param_list.shellsplit) }
73
+ capture_output{ Macroape::CLI::FindPValue.main(param_list.shellsplit)} .lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).map{|line|line.split("\t")}
59
74
  end
60
75
  def self.eval_similarity_output(param_list)
61
- capture_output{ Macroape::CLI::EvalSimilarity.main(param_list.shellsplit) }
76
+ capture_output{ Macroape::CLI::EvalSimilarity.main(param_list.shellsplit)}
62
77
  end
63
78
  def self.eval_alignment_output(param_list)
64
- capture_output{ Macroape::CLI::EvalAlignment.main(param_list.shellsplit) }
79
+ capture_output{ Macroape::CLI::EvalAlignment.main(param_list.shellsplit)}
65
80
  end
66
81
  def self.scan_collection_output(param_list)
67
- capture_output{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }
82
+ capture_output{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }.lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).join("\n")
68
83
  end
69
84
  def self.scan_collection_stderr(param_list)
70
85
  capture_stderr{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }
71
86
  end
72
87
  def self.run_preprocess_collection(param_list)
73
- Macroape::CLI::PreprocessCollection.main(param_list.shellsplit)
88
+ suppress_output{ Macroape::CLI::PreprocessCollection.main(param_list.shellsplit) }
89
+ end
90
+
91
+ def parse_similarity_infos_string(info_string)
92
+ infos = {}
93
+ info_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
94
+ key, value = line.split
95
+ case key
96
+ when 'S' then infos[:similarity] = value
97
+ when 'D' then infos[:distance] = value
98
+ when 'L' then infos[:length] = value
99
+ when 'SH' then infos[:shift] = value
100
+ when 'OR' then infos[:orientation] = value
101
+ when 'W' then infos[:words_recognized_by_both] = value
102
+
103
+ when 'W1' then infos[:words_recognized_by_first] = value
104
+ when 'P1' then infos[:pvalue_recognized_by_first] = value
105
+ when 'T1' then infos[:threshold_first] = value
106
+
107
+ when 'W2' then infos[:words_recognized_by_second] = value
108
+ when 'P2' then infos[:pvalue_recognized_by_second] = value
109
+ when 'T2' then infos[:threshold_second] = value
110
+
111
+ when 'A1' then infos[:matrix_first_alignment] = value
112
+ when 'A2' then infos[:matrix_second_alignment] = value
113
+
114
+ when 'V' then infos[:discretization] = value
115
+ end
116
+ end
117
+ infos
118
+ end
119
+
120
+ def assert_similarity_info_output(expected_info, info_string)
121
+ infos = parse_similarity_infos_string(info_string)
122
+ expected_info.each do |key, value|
123
+ assert_equal value.to_s, infos[key]
124
+ end
125
+ end
126
+
127
+ def parse_threshold_infos_string(infos_string)
128
+ infos = []
129
+ infos_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
130
+ info_data = line.split
131
+ if info_data.size == 4
132
+ requested_pvalue, real_pvalue, number_of_recognized_words, threshold = info_data
133
+ info = {requested_pvalue: requested_pvalue,
134
+ real_pvalue: real_pvalue,
135
+ number_of_recognized_words: number_of_recognized_words,
136
+ threshold: threshold }
137
+ elsif info_data.size == 3
138
+ requested_pvalue, real_pvalue, threshold = info_data
139
+ info = {requested_pvalue: requested_pvalue,
140
+ real_pvalue: real_pvalue,
141
+ threshold: threshold }
142
+ else
143
+ raise 'can\'t parse threshold infos table'
144
+ end
145
+ infos << info
146
+ end
147
+ infos
148
+ end
149
+
150
+ def assert_threshold_info_output(*expected_infos, info_string)
151
+ infos = parse_threshold_infos_string(info_string)
152
+ expected_infos.zip(infos).each do |expected_info, info|
153
+ assert_not_nil info
154
+ expected_info.each do |key, value|
155
+ assert_equal value.to_s, info[key]
156
+ end
157
+ end
74
158
  end
75
159
 
76
160
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.7
4
+ version: 3.3.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-15 00:00:00.000000000 Z
12
+ date: 2012-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bioinform
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - '='
20
20
  - !ruby/object:Gem::Version
21
- version: 0.1.8
21
+ version: 0.1.9
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,23 @@ dependencies:
26
26
  requirements:
27
27
  - - '='
28
28
  - !ruby/object:Gem::Version
29
- version: 0.1.8
29
+ version: 0.1.9
30
+ - !ruby/object:Gem::Dependency
31
+ name: docopt
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - '='
36
+ - !ruby/object:Gem::Version
37
+ version: 0.5.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - '='
44
+ - !ruby/object:Gem::Version
45
+ version: 0.5.0
30
46
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
31
47
  Estimation. It's a bioinformatic tool for evaluating similarity measure and best
32
48
  alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
@@ -78,31 +94,33 @@ files:
78
94
  - spec/count_distribution_spec.rb
79
95
  - spec/spec_helper.rb
80
96
  - test/align_motifs_test.rb
81
- - test/data/AHR_si.pat
82
- - test/data/KLF3_f1.pat
97
+ - test/data/AHR_si.pwm
83
98
  - test/data/KLF3_f1.pcm
84
- - test/data/KLF4_f2.pat
99
+ - test/data/KLF3_f1.pwm
85
100
  - test/data/KLF4_f2.pcm
101
+ - test/data/KLF4_f2.pwm
86
102
  - test/data/KLF4_f2_scan_results_all.txt
87
103
  - test/data/KLF4_f2_scan_results_default_cutoff.txt
88
104
  - test/data/KLF4_f2_scan_results_precise_mode.txt
89
- - test/data/SP1_f1.pat
105
+ - test/data/KLF4_f2_scan_results_weak_threshold.txt
90
106
  - test/data/SP1_f1.pcm
91
- - test/data/SP1_f1_revcomp.pat
107
+ - test/data/SP1_f1.pwm
92
108
  - test/data/SP1_f1_revcomp.pcm
109
+ - test/data/SP1_f1_revcomp.pwm
93
110
  - test/data/collection_pcm_without_thresholds.yaml
94
111
  - test/data/collection_without_thresholds.yaml
95
- - test/data/medium_motif.pat
96
- - test/data/short_motif.pat
112
+ - test/data/medium_motif.pwm
113
+ - test/data/short_motif.pwm
97
114
  - test/data/test_collection.yaml
98
- - test/data/test_collection/GABPA_f1.pat
99
- - test/data/test_collection/KLF4_f2.pat
100
- - test/data/test_collection/SP1_f1.pat
115
+ - test/data/test_collection/GABPA_f1.pwm
116
+ - test/data/test_collection/KLF4_f2.pwm
117
+ - test/data/test_collection/SP1_f1.pwm
101
118
  - test/data/test_collection_pcm/GABPA_f1.pcm
102
119
  - test/data/test_collection_pcm/KLF4_f2.pcm
103
120
  - test/data/test_collection_pcm/SP1_f1.pcm
104
121
  - test/data/test_collection_single_file.txt
105
122
  - test/data/test_collection_single_file_pcm.txt
123
+ - test/data/test_collection_weak.yaml
106
124
  - test/eval_alignment_test.rb
107
125
  - test/eval_similarity_test.rb
108
126
  - test/find_pvalue_test.rb
@@ -138,31 +156,33 @@ test_files:
138
156
  - spec/count_distribution_spec.rb
139
157
  - spec/spec_helper.rb
140
158
  - test/align_motifs_test.rb
141
- - test/data/AHR_si.pat
142
- - test/data/KLF3_f1.pat
159
+ - test/data/AHR_si.pwm
143
160
  - test/data/KLF3_f1.pcm
144
- - test/data/KLF4_f2.pat
161
+ - test/data/KLF3_f1.pwm
145
162
  - test/data/KLF4_f2.pcm
163
+ - test/data/KLF4_f2.pwm
146
164
  - test/data/KLF4_f2_scan_results_all.txt
147
165
  - test/data/KLF4_f2_scan_results_default_cutoff.txt
148
166
  - test/data/KLF4_f2_scan_results_precise_mode.txt
149
- - test/data/SP1_f1.pat
167
+ - test/data/KLF4_f2_scan_results_weak_threshold.txt
150
168
  - test/data/SP1_f1.pcm
151
- - test/data/SP1_f1_revcomp.pat
169
+ - test/data/SP1_f1.pwm
152
170
  - test/data/SP1_f1_revcomp.pcm
171
+ - test/data/SP1_f1_revcomp.pwm
153
172
  - test/data/collection_pcm_without_thresholds.yaml
154
173
  - test/data/collection_without_thresholds.yaml
155
- - test/data/medium_motif.pat
156
- - test/data/short_motif.pat
174
+ - test/data/medium_motif.pwm
175
+ - test/data/short_motif.pwm
157
176
  - test/data/test_collection.yaml
158
- - test/data/test_collection/GABPA_f1.pat
159
- - test/data/test_collection/KLF4_f2.pat
160
- - test/data/test_collection/SP1_f1.pat
177
+ - test/data/test_collection/GABPA_f1.pwm
178
+ - test/data/test_collection/KLF4_f2.pwm
179
+ - test/data/test_collection/SP1_f1.pwm
161
180
  - test/data/test_collection_pcm/GABPA_f1.pcm
162
181
  - test/data/test_collection_pcm/KLF4_f2.pcm
163
182
  - test/data/test_collection_pcm/SP1_f1.pcm
164
183
  - test/data/test_collection_single_file.txt
165
184
  - test/data/test_collection_single_file_pcm.txt
185
+ - test/data/test_collection_weak.yaml
166
186
  - test/eval_alignment_test.rb
167
187
  - test/eval_similarity_test.rb
168
188
  - test/find_pvalue_test.rb