macroape 3.3.7 → 3.3.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/README.md +2 -2
  2. data/Rakefile.rb +6 -6
  3. data/TODO.txt +23 -3
  4. data/benchmark/similarity_benchmark.rb +18 -18
  5. data/lib/macroape/aligned_pair_intersection.rb +4 -4
  6. data/lib/macroape/cli/align_motifs.rb +34 -28
  7. data/lib/macroape/cli/eval_alignment.rb +73 -47
  8. data/lib/macroape/cli/eval_similarity.rb +65 -40
  9. data/lib/macroape/cli/find_pvalue.rb +30 -34
  10. data/lib/macroape/cli/find_threshold.rb +52 -41
  11. data/lib/macroape/cli/preprocess_collection.rb +68 -58
  12. data/lib/macroape/cli/scan_collection.rb +89 -73
  13. data/lib/macroape/cli.rb +184 -1
  14. data/lib/macroape/counting.rb +31 -5
  15. data/lib/macroape/pwm_compare.rb +8 -2
  16. data/lib/macroape/pwm_compare_aligned.rb +15 -10
  17. data/lib/macroape/version.rb +2 -1
  18. data/macroape.gemspec +2 -1
  19. data/spec/count_distribution_spec.rb +11 -11
  20. data/test/align_motifs_test.rb +16 -4
  21. data/test/data/{AHR_si.pat → AHR_si.pwm} +0 -0
  22. data/test/data/{KLF3_f1.pat → KLF3_f1.pwm} +0 -0
  23. data/test/data/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
  24. data/test/data/KLF4_f2_scan_results_all.txt +1 -2
  25. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -2
  26. data/test/data/KLF4_f2_scan_results_precise_mode.txt +1 -2
  27. data/test/data/KLF4_f2_scan_results_weak_threshold.txt +2 -0
  28. data/test/data/{SP1_f1.pat → SP1_f1.pwm} +0 -0
  29. data/test/data/{SP1_f1_revcomp.pat → SP1_f1_revcomp.pwm} +0 -0
  30. data/test/data/collection_pcm_without_thresholds.yaml +186 -183
  31. data/test/data/collection_without_thresholds.yaml +186 -183
  32. data/test/data/{medium_motif.pat → medium_motif.pwm} +0 -0
  33. data/test/data/{short_motif.pat → short_motif.pwm} +0 -0
  34. data/test/data/test_collection/{GABPA_f1.pat → GABPA_f1.pwm} +0 -0
  35. data/test/data/test_collection/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
  36. data/test/data/test_collection/{SP1_f1.pat → SP1_f1.pwm} +0 -0
  37. data/test/data/test_collection.yaml +179 -176
  38. data/test/data/test_collection_weak.yaml +214 -0
  39. data/test/eval_alignment_test.rb +97 -21
  40. data/test/eval_similarity_test.rb +104 -26
  41. data/test/find_pvalue_test.rb +22 -9
  42. data/test/find_threshold_test.rb +76 -25
  43. data/test/preprocess_collection_test.rb +16 -21
  44. data/test/scan_collection_test.rb +26 -14
  45. data/test/test_helper.rb +96 -12
  46. metadata +44 -24
data/test/test_helper.rb CHANGED
@@ -12,7 +12,7 @@ require_relative '../lib/macroape/cli/eval_alignment'
12
12
  require_relative '../lib/macroape/cli/preprocess_collection'
13
13
  require_relative '../lib/macroape/cli/scan_collection'
14
14
  require_relative '../lib/macroape/cli/align_motifs'
15
-
15
+
16
16
  module Helpers
17
17
  # from minitest
18
18
  def self.capture_io(&block)
@@ -25,25 +25,40 @@ module Helpers
25
25
  $stdout = orig_stdout
26
26
  $stderr = orig_stderr
27
27
  end
28
-
28
+
29
+ def self.suppress_output(&block)
30
+ orig_stdout, orig_stderr = $stdout, $stderr
31
+ captured_stdout, captured_stderr = StringIO.new, StringIO.new
32
+ $stdout, $stderr = captured_stdout, captured_stderr
33
+ yield
34
+ ensure
35
+ $stdout = orig_stdout
36
+ $stderr = orig_stderr
37
+ end
38
+
29
39
  # Method stubs $stdin not STDIN !
30
40
  def self.provide_stdin(input, &block)
31
41
  orig_stdin = $stdin
32
42
  $stdin = StringIO.new(input)
33
43
  yield
34
- ensure
44
+ ensure
35
45
  $stdin = orig_stdin
36
46
  end
37
-
47
+
38
48
  def self.capture_output(&block)
39
49
  capture_io(&block)[:stdout]
40
50
  end
41
51
  def self.capture_stderr(&block)
42
52
  capture_io(&block)[:stderr]
43
53
  end
44
-
54
+
55
+ # aaa\tbbb\nccc\tddd ==> [['aaa','bbb'],['ccc','ddd']]
56
+ def self.split_on_lines(str)
57
+ str.lines.map{|line| line.strip.split("\t")}
58
+ end
59
+
45
60
  def self.obtain_pvalue_by_threshold(args)
46
- find_pvalue_output(args).strip.split.last
61
+ find_pvalue_output(args).last.last
47
62
  end
48
63
  def self.exec_cmd(executable, param_list)
49
64
  "ruby -I #{$lib_folder} #{$lib_folder}/../bin/#{executable} #{param_list}"
@@ -52,25 +67,94 @@ module Helpers
52
67
  capture_output{ Macroape::CLI::FindThreshold.main(param_list.shellsplit) }
53
68
  end
54
69
  def self.align_motifs_output(param_list)
55
- capture_output{ Macroape::CLI::AlignMotifs.main(param_list.shellsplit) }
70
+ split_on_lines( capture_output{ Macroape::CLI::AlignMotifs.main(param_list.shellsplit)} )
56
71
  end
57
72
  def self.find_pvalue_output(param_list)
58
- capture_output{ Macroape::CLI::FindPValue.main(param_list.shellsplit) }
73
+ capture_output{ Macroape::CLI::FindPValue.main(param_list.shellsplit)} .lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).map{|line|line.split("\t")}
59
74
  end
60
75
  def self.eval_similarity_output(param_list)
61
- capture_output{ Macroape::CLI::EvalSimilarity.main(param_list.shellsplit) }
76
+ capture_output{ Macroape::CLI::EvalSimilarity.main(param_list.shellsplit)}
62
77
  end
63
78
  def self.eval_alignment_output(param_list)
64
- capture_output{ Macroape::CLI::EvalAlignment.main(param_list.shellsplit) }
79
+ capture_output{ Macroape::CLI::EvalAlignment.main(param_list.shellsplit)}
65
80
  end
66
81
  def self.scan_collection_output(param_list)
67
- capture_output{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }
82
+ capture_output{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }.lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).join("\n")
68
83
  end
69
84
  def self.scan_collection_stderr(param_list)
70
85
  capture_stderr{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }
71
86
  end
72
87
  def self.run_preprocess_collection(param_list)
73
- Macroape::CLI::PreprocessCollection.main(param_list.shellsplit)
88
+ suppress_output{ Macroape::CLI::PreprocessCollection.main(param_list.shellsplit) }
89
+ end
90
+
91
+ def parse_similarity_infos_string(info_string)
92
+ infos = {}
93
+ info_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
94
+ key, value = line.split
95
+ case key
96
+ when 'S' then infos[:similarity] = value
97
+ when 'D' then infos[:distance] = value
98
+ when 'L' then infos[:length] = value
99
+ when 'SH' then infos[:shift] = value
100
+ when 'OR' then infos[:orientation] = value
101
+ when 'W' then infos[:words_recognized_by_both] = value
102
+
103
+ when 'W1' then infos[:words_recognized_by_first] = value
104
+ when 'P1' then infos[:pvalue_recognized_by_first] = value
105
+ when 'T1' then infos[:threshold_first] = value
106
+
107
+ when 'W2' then infos[:words_recognized_by_second] = value
108
+ when 'P2' then infos[:pvalue_recognized_by_second] = value
109
+ when 'T2' then infos[:threshold_second] = value
110
+
111
+ when 'A1' then infos[:matrix_first_alignment] = value
112
+ when 'A2' then infos[:matrix_second_alignment] = value
113
+
114
+ when 'V' then infos[:discretization] = value
115
+ end
116
+ end
117
+ infos
118
+ end
119
+
120
+ def assert_similarity_info_output(expected_info, info_string)
121
+ infos = parse_similarity_infos_string(info_string)
122
+ expected_info.each do |key, value|
123
+ assert_equal value.to_s, infos[key]
124
+ end
125
+ end
126
+
127
+ def parse_threshold_infos_string(infos_string)
128
+ infos = []
129
+ infos_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
130
+ info_data = line.split
131
+ if info_data.size == 4
132
+ requested_pvalue, real_pvalue, number_of_recognized_words, threshold = info_data
133
+ info = {requested_pvalue: requested_pvalue,
134
+ real_pvalue: real_pvalue,
135
+ number_of_recognized_words: number_of_recognized_words,
136
+ threshold: threshold }
137
+ elsif info_data.size == 3
138
+ requested_pvalue, real_pvalue, threshold = info_data
139
+ info = {requested_pvalue: requested_pvalue,
140
+ real_pvalue: real_pvalue,
141
+ threshold: threshold }
142
+ else
143
+ raise 'can\'t parse threshold infos table'
144
+ end
145
+ infos << info
146
+ end
147
+ infos
148
+ end
149
+
150
+ def assert_threshold_info_output(*expected_infos, info_string)
151
+ infos = parse_threshold_infos_string(info_string)
152
+ expected_infos.zip(infos).each do |expected_info, info|
153
+ assert_not_nil info
154
+ expected_info.each do |key, value|
155
+ assert_equal value.to_s, info[key]
156
+ end
157
+ end
74
158
  end
75
159
 
76
160
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.7
4
+ version: 3.3.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-15 00:00:00.000000000 Z
12
+ date: 2012-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bioinform
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - '='
20
20
  - !ruby/object:Gem::Version
21
- version: 0.1.8
21
+ version: 0.1.9
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,23 @@ dependencies:
26
26
  requirements:
27
27
  - - '='
28
28
  - !ruby/object:Gem::Version
29
- version: 0.1.8
29
+ version: 0.1.9
30
+ - !ruby/object:Gem::Dependency
31
+ name: docopt
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - '='
36
+ - !ruby/object:Gem::Version
37
+ version: 0.5.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - '='
44
+ - !ruby/object:Gem::Version
45
+ version: 0.5.0
30
46
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
31
47
  Estimation. It's a bioinformatic tool for evaluating similarity measure and best
32
48
  alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
@@ -78,31 +94,33 @@ files:
78
94
  - spec/count_distribution_spec.rb
79
95
  - spec/spec_helper.rb
80
96
  - test/align_motifs_test.rb
81
- - test/data/AHR_si.pat
82
- - test/data/KLF3_f1.pat
97
+ - test/data/AHR_si.pwm
83
98
  - test/data/KLF3_f1.pcm
84
- - test/data/KLF4_f2.pat
99
+ - test/data/KLF3_f1.pwm
85
100
  - test/data/KLF4_f2.pcm
101
+ - test/data/KLF4_f2.pwm
86
102
  - test/data/KLF4_f2_scan_results_all.txt
87
103
  - test/data/KLF4_f2_scan_results_default_cutoff.txt
88
104
  - test/data/KLF4_f2_scan_results_precise_mode.txt
89
- - test/data/SP1_f1.pat
105
+ - test/data/KLF4_f2_scan_results_weak_threshold.txt
90
106
  - test/data/SP1_f1.pcm
91
- - test/data/SP1_f1_revcomp.pat
107
+ - test/data/SP1_f1.pwm
92
108
  - test/data/SP1_f1_revcomp.pcm
109
+ - test/data/SP1_f1_revcomp.pwm
93
110
  - test/data/collection_pcm_without_thresholds.yaml
94
111
  - test/data/collection_without_thresholds.yaml
95
- - test/data/medium_motif.pat
96
- - test/data/short_motif.pat
112
+ - test/data/medium_motif.pwm
113
+ - test/data/short_motif.pwm
97
114
  - test/data/test_collection.yaml
98
- - test/data/test_collection/GABPA_f1.pat
99
- - test/data/test_collection/KLF4_f2.pat
100
- - test/data/test_collection/SP1_f1.pat
115
+ - test/data/test_collection/GABPA_f1.pwm
116
+ - test/data/test_collection/KLF4_f2.pwm
117
+ - test/data/test_collection/SP1_f1.pwm
101
118
  - test/data/test_collection_pcm/GABPA_f1.pcm
102
119
  - test/data/test_collection_pcm/KLF4_f2.pcm
103
120
  - test/data/test_collection_pcm/SP1_f1.pcm
104
121
  - test/data/test_collection_single_file.txt
105
122
  - test/data/test_collection_single_file_pcm.txt
123
+ - test/data/test_collection_weak.yaml
106
124
  - test/eval_alignment_test.rb
107
125
  - test/eval_similarity_test.rb
108
126
  - test/find_pvalue_test.rb
@@ -138,31 +156,33 @@ test_files:
138
156
  - spec/count_distribution_spec.rb
139
157
  - spec/spec_helper.rb
140
158
  - test/align_motifs_test.rb
141
- - test/data/AHR_si.pat
142
- - test/data/KLF3_f1.pat
159
+ - test/data/AHR_si.pwm
143
160
  - test/data/KLF3_f1.pcm
144
- - test/data/KLF4_f2.pat
161
+ - test/data/KLF3_f1.pwm
145
162
  - test/data/KLF4_f2.pcm
163
+ - test/data/KLF4_f2.pwm
146
164
  - test/data/KLF4_f2_scan_results_all.txt
147
165
  - test/data/KLF4_f2_scan_results_default_cutoff.txt
148
166
  - test/data/KLF4_f2_scan_results_precise_mode.txt
149
- - test/data/SP1_f1.pat
167
+ - test/data/KLF4_f2_scan_results_weak_threshold.txt
150
168
  - test/data/SP1_f1.pcm
151
- - test/data/SP1_f1_revcomp.pat
169
+ - test/data/SP1_f1.pwm
152
170
  - test/data/SP1_f1_revcomp.pcm
171
+ - test/data/SP1_f1_revcomp.pwm
153
172
  - test/data/collection_pcm_without_thresholds.yaml
154
173
  - test/data/collection_without_thresholds.yaml
155
- - test/data/medium_motif.pat
156
- - test/data/short_motif.pat
174
+ - test/data/medium_motif.pwm
175
+ - test/data/short_motif.pwm
157
176
  - test/data/test_collection.yaml
158
- - test/data/test_collection/GABPA_f1.pat
159
- - test/data/test_collection/KLF4_f2.pat
160
- - test/data/test_collection/SP1_f1.pat
177
+ - test/data/test_collection/GABPA_f1.pwm
178
+ - test/data/test_collection/KLF4_f2.pwm
179
+ - test/data/test_collection/SP1_f1.pwm
161
180
  - test/data/test_collection_pcm/GABPA_f1.pcm
162
181
  - test/data/test_collection_pcm/KLF4_f2.pcm
163
182
  - test/data/test_collection_pcm/SP1_f1.pcm
164
183
  - test/data/test_collection_single_file.txt
165
184
  - test/data/test_collection_single_file_pcm.txt
185
+ - test/data/test_collection_weak.yaml
166
186
  - test/eval_alignment_test.rb
167
187
  - test/eval_similarity_test.rb
168
188
  - test/find_pvalue_test.rb