BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,327 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for ReadTable.
34
+ #
35
+ # rubocop: disable ClassLength
36
+ class TestReadTable < Test::Unit::TestCase
37
+ def setup
38
+ @tmpdir = Dir.mktmpdir('BioDSL')
39
+
40
+ @data = <<-EOF.gsub(/^\s+\|/, '')
41
+ |#ID COUNT
42
+ |# 2014-10-14
43
+ |TCMID104 12
44
+ |TCMID105 123
45
+ |TCMID106 1231
46
+ |
47
+ EOF
48
+
49
+ setup_file1
50
+ setup_file2
51
+ setup_data
52
+
53
+ @p = BioDSL::Pipeline.new
54
+ end
55
+
56
+ def setup_file1
57
+ @file = File.join(@tmpdir, 'test.tab')
58
+
59
+ File.open(@file, 'w') do |ios|
60
+ ios.puts @data
61
+ end
62
+ end
63
+
64
+ def setup_file2
65
+ @file2 = File.join(@tmpdir, 'test2.tab')
66
+
67
+ File.open(@file2, 'w') do |ios|
68
+ ios.puts @data
69
+ end
70
+ end
71
+
72
+ def setup_data
73
+ @input, @output = BioDSL::Stream.pipe
74
+ @input2, @output2 = BioDSL::Stream.pipe
75
+
76
+ @output.write(SEQ_NAME: 'test1', SEQ: 'atgcagcac', SEQ_LEN: 9)
77
+ @output.write(SEQ_NAME: 'test2', SEQ: 'acagcactgA', SEQ_LEN: 10)
78
+ @output.close
79
+ end
80
+
81
+ def teardown
82
+ FileUtils.rm_r @tmpdir
83
+ end
84
+
85
+ test 'BioDSL::Pipeline::ReadTable with invalid options raises' do
86
+ assert_raise(BioDSL::OptionError) { @p.read_table(foo: 'bar') }
87
+ end
88
+
89
+ test 'BioDSL::Pipeline::ReadTable without required options raises' do
90
+ assert_raise(BioDSL::OptionError) { @p.read_table }
91
+ end
92
+
93
+ test 'BioDSL::Pipeline::ReadTable with bad first raises' do
94
+ assert_raise(BioDSL::OptionError) do
95
+ @p.read_table(input: @file, first: -1)
96
+ end
97
+ end
98
+
99
+ test 'BioDSL::Pipeline::ReadTable with bad last raises' do
100
+ assert_raise(BioDSL::OptionError) do
101
+ @p.read_table(input: @file, last: -1)
102
+ end
103
+ end
104
+
105
+ test 'BioDSL::Pipeline::ReadTable with exclusive unique options raises' do
106
+ assert_raise(BioDSL::OptionError) do
107
+ @p.read_table(input: @file, first: 1, last: 1)
108
+ end
109
+ end
110
+
111
+ test 'BioDSL::Pipeline::ReadTable with non-existing input file raises' do
112
+ assert_raise(BioDSL::OptionError) { @p.read_table(input: '___adsf') }
113
+ end
114
+
115
+ test 'BioDSL::Pipeline::ReadTable with duplicate keys raises' do
116
+ assert_raise(BioDSL::OptionError) do
117
+ @p.read_table(input: @file, keys: [:a, :a])
118
+ end
119
+ end
120
+
121
+ test 'BioDSL::Pipeline::ReadTable with duplicate select raises' do
122
+ assert_raise(BioDSL::OptionError) do
123
+ @p.read_table(input: @file, select: [1, 1])
124
+ end
125
+ end
126
+
127
+ test 'BioDSL::Pipeline::ReadTable with duplicate reject raises' do
128
+ assert_raise(BioDSL::OptionError) do
129
+ @p.read_table(input: @file, reject: [1, 1])
130
+ end
131
+ end
132
+
133
+ test 'BioDSL::Pipeline::ReadTable returns correctly' do
134
+ @p.read_table(input: @file).run(output: @output2)
135
+
136
+ expected = <<-EXP.gsub(/^\s+\|/, '')
137
+ |{:ID=>"TCMID104", :COUNT=>12}
138
+ |{:ID=>"TCMID105", :COUNT=>123}
139
+ |{:ID=>"TCMID106", :COUNT=>1231}
140
+ EXP
141
+
142
+ assert_equal(expected, collect_result)
143
+ end
144
+
145
+ test 'BioDSL::Pipeline::ReadTable status returns correctly' do
146
+ @p.read_table(input: @file).run(output: @output2)
147
+
148
+ assert_equal(0, @p.status.first[:records_in])
149
+ assert_equal(3, @p.status.first[:records_out])
150
+ end
151
+
152
+ test 'BioDSL::Pipeline::ReadTable with :skip returns correctly' do
153
+ @p.read_table(input: @file, skip: 2).run(output: @output2)
154
+
155
+ expected = <<-EXP.gsub(/^\s+\|/, '')
156
+ |{:V0=>"TCMID104", :V1=>12}
157
+ |{:V0=>"TCMID105", :V1=>123}
158
+ |{:V0=>"TCMID106", :V1=>1231}
159
+ EXP
160
+
161
+ assert_equal(expected, collect_result)
162
+ end
163
+
164
+ test 'BioDSL::Pipeline::ReadTable with :delimeter returns correctly' do
165
+ @p.read_table(input: @file, skip: 2, delimiter: 'ID').run(output: @output2)
166
+
167
+ expected = <<-EXP.gsub(/^\s+\|/, '')
168
+ |{:V0=>"TCM", :V1=>"104 12"}
169
+ |{:V0=>"TCM", :V1=>"105 123"}
170
+ |{:V0=>"TCM", :V1=>"106 1231"}
171
+ EXP
172
+
173
+ assert_equal(expected, collect_result)
174
+ end
175
+
176
+ test 'BioDSL::Pipeline::ReadTable with :select returns correctly' do
177
+ @p.read_table(input: @file, select: [:COUNT]).run(output: @output2)
178
+
179
+ expected = <<-EXP.gsub(/^\s+\|/, '')
180
+ |{:COUNT=>12}
181
+ |{:COUNT=>123}
182
+ |{:COUNT=>1231}
183
+ EXP
184
+
185
+ assert_equal(expected, collect_result)
186
+ end
187
+
188
+ test 'BioDSL::Pipeline::ReadTable with :reject returns correctly' do
189
+ @p.read_table(input: @file, reject: [:COUNT]).run(output: @output2)
190
+
191
+ expected = <<-EXP.gsub(/^\s+\|/, '')
192
+ |{:ID=>"TCMID104"}
193
+ |{:ID=>"TCMID105"}
194
+ |{:ID=>"TCMID106"}
195
+ EXP
196
+
197
+ assert_equal(expected, collect_result)
198
+ end
199
+
200
+ test 'BioDSL::Pipeline::ReadTable with :keys returns correctly' do
201
+ @p.read_table(input: @file, keys: ['FOO', :BAR]).run(output: @output2)
202
+
203
+ expected = <<-EXP.gsub(/^\s+\|/, '')
204
+ |{:FOO=>"TCMID104", :BAR=>12}
205
+ |{:FOO=>"TCMID105", :BAR=>123}
206
+ |{:FOO=>"TCMID106", :BAR=>1231}
207
+ EXP
208
+
209
+ assert_equal(expected, collect_result)
210
+ end
211
+
212
+ test 'BioDSL::Pipeline::ReadTable with :skip and :keys returns OK' do
213
+ @p.read_table(input: @file, skip: 2, keys: ['FOO', :BAR]).
214
+ run(output: @output2)
215
+
216
+ expected = <<-EXP.gsub(/^\s+\|/, '')
217
+ |{:FOO=>"TCMID104", :BAR=>12}
218
+ |{:FOO=>"TCMID105", :BAR=>123}
219
+ |{:FOO=>"TCMID106", :BAR=>1231}
220
+ EXP
221
+
222
+ assert_equal(expected, collect_result)
223
+ end
224
+
225
+ test 'BioDSL::Pipeline::ReadTable with gzipped data returns correctly' do
226
+ `gzip #{@file}`
227
+
228
+ @p.read_table(input: "#{@file}.gz").run(output: @output2)
229
+
230
+ expected = <<-EXP.gsub(/^\s+\|/, '')
231
+ |{:ID=>"TCMID104", :COUNT=>12}
232
+ |{:ID=>"TCMID105", :COUNT=>123}
233
+ |{:ID=>"TCMID106", :COUNT=>1231}
234
+ EXP
235
+
236
+ assert_equal(expected, collect_result)
237
+ end
238
+
239
+ test 'BioDSL::Pipeline::ReadTable with bzip2\'ed data returns correctly' do
240
+ `bzip2 #{@file}`
241
+
242
+ @p.read_table(input: "#{@file}.bz2").run(output: @output2)
243
+
244
+ expected = <<-EXP.gsub(/^\s+\|/, '')
245
+ |{:ID=>"TCMID104", :COUNT=>12}
246
+ |{:ID=>"TCMID105", :COUNT=>123}
247
+ |{:ID=>"TCMID106", :COUNT=>1231}
248
+ EXP
249
+
250
+ assert_equal(expected, collect_result)
251
+ end
252
+
253
+ test 'BioDSL::Pipeline::ReadTable with multiple files returns correctly' do
254
+ @p.read_table(input: [@file, @file2]).run(output: @output2)
255
+
256
+ expected = <<-EXP.gsub(/^\s+\|/, '')
257
+ |{:ID=>"TCMID104", :COUNT=>12}
258
+ |{:ID=>"TCMID105", :COUNT=>123}
259
+ |{:ID=>"TCMID106", :COUNT=>1231}
260
+ |{:ID=>"TCMID104", :COUNT=>12}
261
+ |{:ID=>"TCMID105", :COUNT=>123}
262
+ |{:ID=>"TCMID106", :COUNT=>1231}
263
+ EXP
264
+
265
+ assert_equal(expected, collect_result)
266
+ end
267
+
268
+ test 'BioDSL::Pipeline::ReadTable with input glob returns correctly' do
269
+ @p.read_table(input: File.join(@tmpdir, 'test*.tab')).run(output: @output2)
270
+
271
+ expected = <<-EXP.gsub(/^\s+\|/, '')
272
+ |{:ID=>"TCMID104", :COUNT=>12}
273
+ |{:ID=>"TCMID105", :COUNT=>123}
274
+ |{:ID=>"TCMID106", :COUNT=>1231}
275
+ |{:ID=>"TCMID104", :COUNT=>12}
276
+ |{:ID=>"TCMID105", :COUNT=>123}
277
+ |{:ID=>"TCMID106", :COUNT=>1231}
278
+ EXP
279
+
280
+ assert_equal(expected, collect_result)
281
+ end
282
+
283
+ test 'BioDSL::Pipeline::ReadTable with :first returns correctly' do
284
+ @p.read_table(input: [@file, @file2], first: 3).run(output: @output2)
285
+
286
+ expected = <<-EXP.gsub(/^\s+\|/, '')
287
+ |{:ID=>"TCMID104", :COUNT=>12}
288
+ |{:ID=>"TCMID105", :COUNT=>123}
289
+ |{:ID=>"TCMID106", :COUNT=>1231}
290
+ EXP
291
+
292
+ assert_equal(expected, collect_result)
293
+ end
294
+
295
+ test 'BioDSL::Pipeline::ReadTable#to_s with :first returns correctly' do
296
+ @p.read_table(input: @file, first: 3)
297
+
298
+ expected = %{BP.new.read_table(input: "#{@file}", first: 3)}
299
+
300
+ assert_equal(expected, @p.to_s)
301
+ end
302
+
303
+ test 'BioDSL::Pipeline::ReadTable with :last returns correctly' do
304
+ @p.read_table(input: [@file, @file2], last: 2).run(output: @output2)
305
+
306
+ expected = <<-EXP.gsub(/^\s+\|/, '')
307
+ |{:ID=>"TCMID105", :COUNT=>123}
308
+ |{:ID=>"TCMID106", :COUNT=>1231}
309
+ EXP
310
+
311
+ assert_equal(expected, collect_result)
312
+ end
313
+
314
+ test 'BioDSL::Pipeline::ReadTable with flux returns correctly' do
315
+ @p.read_table(input: @file2).run(input: @input, output: @output2)
316
+
317
+ expected = <<-EXP.gsub(/^\s+\|/, '')
318
+ |{:SEQ_NAME=>"test1", :SEQ=>"atgcagcac", :SEQ_LEN=>9}
319
+ |{:SEQ_NAME=>"test2", :SEQ=>"acagcactgA", :SEQ_LEN=>10}
320
+ |{:ID=>"TCMID104", :COUNT=>12}
321
+ |{:ID=>"TCMID105", :COUNT=>123}
322
+ |{:ID=>"TCMID106", :COUNT=>1231}
323
+ EXP
324
+
325
+ assert_equal(expected, collect_result)
326
+ end
327
+ end
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for ReverseSeq.
34
+ class TestReverseSeq < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ hash = {
40
+ SEQ_NAME: 'test',
41
+ SEQ: 'gatcgatcgt',
42
+ SEQ_LEN: 10,
43
+ SCORES: 'ABCDEFGHII'
44
+ }
45
+
46
+ @output.write hash
47
+ @output.close
48
+
49
+ @p = BioDSL::Pipeline.new
50
+ end
51
+
52
+ test 'BioDSL::Pipeline::ReverseSeq with invalid options raises' do
53
+ assert_raise(BioDSL::OptionError) { @p.reverse_seq(foo: 'bar') }
54
+ end
55
+
56
+ test 'BioDSL::Pipeline::ReverseSeq returns correctly' do
57
+ @p.reverse_seq.run(input: @input, output: @output2)
58
+
59
+ expected = <<-EXP.gsub(/^\s+\|/, '')
60
+ |{:SEQ_NAME=>"test",
61
+ | :SEQ=>"tgctagctag",
62
+ | :SEQ_LEN=>10,
63
+ | :SCORES=>"IIHGFEDCBA"}
64
+ EXP
65
+
66
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
67
+ end
68
+
69
+ test 'BioDSL::Pipeline::ReverseSeq status returns correctly' do
70
+ @p.reverse_seq.run(input: @input, output: @output2)
71
+
72
+ assert_equal(1, @p.status.first[:records_in])
73
+ assert_equal(1, @p.status.first[:records_out])
74
+ assert_equal(1, @p.status.first[:sequences_in])
75
+ assert_equal(1, @p.status.first[:sequences_out])
76
+ assert_equal(10, @p.status.first[:residues_in])
77
+ assert_equal(10, @p.status.first[:residues_out])
78
+ end
79
+ end
@@ -0,0 +1,218 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for SliceAlign
34
+ # rubocop: disable ClassLength
35
+ class TestSliceAlign < Test::Unit::TestCase
36
+ def setup
37
+ require 'tempfile'
38
+
39
+ @input, @output = BioDSL::Stream.pipe
40
+ @input2, @output2 = BioDSL::Stream.pipe
41
+
42
+ @output.write(SEQ_NAME: 'ID0', SEQ: 'CCGCATACG-------CCCTGAGGGG----')
43
+ @output.write(SEQ_NAME: 'ID1', SEQ: 'CCGCATGAT-------ACCTGAGGGT----')
44
+ @output.write(SEQ_NAME: 'ID2', SEQ: 'CCGCATATACTCTTGACGCTAAAGCGTAGT')
45
+ @output.write(SEQ_NAME: 'ID3', SEQ: 'CCGTATGTG-------CCCTTCGGGG----')
46
+ @output.write(SEQ_NAME: 'ID4', SEQ: 'CCGGATAAG-------CCCTTACGGG----')
47
+ @output.write(SEQ_NAME: 'ID5', SEQ: 'CCGGATAAG-------CCCTTACGGG----')
48
+ @output.write(FOO: 'BAR')
49
+ @output.close
50
+
51
+ setup_template_file
52
+
53
+ @p = BioDSL::Pipeline.new
54
+ end
55
+
56
+ def setup_template_file
57
+ @template_file = Tempfile.new('slice_align')
58
+
59
+ File.open(@template_file, 'w') do |ios|
60
+ ios.puts '>template'
61
+ ios.puts 'CTGAATACG-------CCATTCGATGG---'
62
+ end
63
+ end
64
+
65
+ def teardown
66
+ @template_file.close
67
+ @template_file.unlink
68
+ end
69
+
70
+ test 'BioDSL::Pipeline::SliceAlign with invalid options raises' do
71
+ assert_raise(BioDSL::OptionError) do
72
+ @p.slice_align(slice: 1, foo: 'bar')
73
+ end
74
+ end
75
+
76
+ test 'BioDSL::Pipeline::SliceAlign with valid options don\'t raise' do
77
+ assert_nothing_raised { @p.slice_align(slice: 1) }
78
+ end
79
+
80
+ test 'BioDSL::Pipeline::SliceAlign with slice and primers raises' do
81
+ assert_raise(BioDSL::OptionError) do
82
+ @p.slice_align(slice: 1, forward: 'foo', reverse: 'bar')
83
+ end
84
+ end
85
+
86
+ test 'BioDSL::Pipeline::SliceAlign with index returns correctly' do
87
+ @p.slice_align(slice: 14..27).run(input: @input, output: @output2)
88
+
89
+ expected = <<-EXP.gsub(/^\s+\|/, '')
90
+ |{:SEQ_NAME=>"ID0", :SEQ=>"--CCCTGAGGGG--", :SEQ_LEN=>14}
91
+ |{:SEQ_NAME=>"ID1", :SEQ=>"--ACCTGAGGGT--", :SEQ_LEN=>14}
92
+ |{:SEQ_NAME=>"ID2", :SEQ=>"GACGCTAAAGCGTA", :SEQ_LEN=>14}
93
+ |{:SEQ_NAME=>"ID3", :SEQ=>"--CCCTTCGGGG--", :SEQ_LEN=>14}
94
+ |{:SEQ_NAME=>"ID4", :SEQ=>"--CCCTTACGGG--", :SEQ_LEN=>14}
95
+ |{:SEQ_NAME=>"ID5", :SEQ=>"--CCCTTACGGG--", :SEQ_LEN=>14}
96
+ |{:FOO=>"BAR"}
97
+ EXP
98
+
99
+ assert_equal(expected, collect_result)
100
+ end
101
+
102
+ test 'BioDSL::Pipeline::SliceAlign status returns correctly' do
103
+ @p.slice_align(slice: 14..27).run(input: @input, output: @output2)
104
+
105
+ assert_equal(7, @p.status.first[:records_in])
106
+ assert_equal(7, @p.status.first[:records_out])
107
+ assert_equal(6, @p.status.first[:sequences_in])
108
+ assert_equal(6, @p.status.first[:sequences_out])
109
+ assert_equal(180, @p.status.first[:residues_in])
110
+ assert_equal(84, @p.status.first[:residues_out])
111
+ end
112
+
113
+ test 'BioDSL::Pipeline::SliceAlign with forward primer miss raises' do
114
+ assert_raise(BioDSL::SeqError) do
115
+ @p.slice_align(forward: 'AAAAAAA', reverse: 'GAGGGG').
116
+ run(input: @input, output: @output2)
117
+ end
118
+ end
119
+
120
+ test 'BioDSL::Pipeline::SliceAlign with reverse primer miss raises' do
121
+ assert_raise(BioDSL::SeqError) do
122
+ @p.slice_align(forward: 'CGCATACG', reverse: 'AAAAAAA').
123
+ run(input: @input, output: @output2)
124
+ end
125
+ end
126
+
127
+ test 'BioDSL::Pipeline::SliceAlign with primers returns correctly' do
128
+ @p.slice_align(forward: 'CGCATACG', reverse: 'GAGGGG', max_mismatches: 0,
129
+ max_insertions: 0, max_deletions: 0).
130
+ run(input: @input, output: @output2)
131
+
132
+ expected = <<-EXP.gsub(/^\s+\|/, '')
133
+ |{:SEQ_NAME=>"ID0", :SEQ=>"CGCATACG-------CCCTGAGGGG", :SEQ_LEN=>25}
134
+ |{:SEQ_NAME=>"ID1", :SEQ=>"CGCATGAT-------ACCTGAGGGT", :SEQ_LEN=>25}
135
+ |{:SEQ_NAME=>"ID2", :SEQ=>"CGCATATACTCTTGACGCTAAAGCG", :SEQ_LEN=>25}
136
+ |{:SEQ_NAME=>"ID3", :SEQ=>"CGTATGTG-------CCCTTCGGGG", :SEQ_LEN=>25}
137
+ |{:SEQ_NAME=>"ID4", :SEQ=>"CGGATAAG-------CCCTTACGGG", :SEQ_LEN=>25}
138
+ |{:SEQ_NAME=>"ID5", :SEQ=>"CGGATAAG-------CCCTTACGGG", :SEQ_LEN=>25}
139
+ |{:FOO=>"BAR"}
140
+ EXP
141
+
142
+ assert_equal(expected, collect_result)
143
+ end
144
+
145
+ test 'BioDSL::Pipeline::SliceAlign with forward_rc primer returns OK' do
146
+ @p.slice_align(forward_rc: 'cgtatgcg', reverse: 'GAGGGG', max_mismatches: 0,
147
+ max_insertions: 0, max_deletions: 0).
148
+ run(input: @input, output: @output2)
149
+
150
+ expected = <<-EXP.gsub(/^\s+\|/, '')
151
+ |{:SEQ_NAME=>"ID0", :SEQ=>"CGCATACG-------CCCTGAGGGG", :SEQ_LEN=>25}
152
+ |{:SEQ_NAME=>"ID1", :SEQ=>"CGCATGAT-------ACCTGAGGGT", :SEQ_LEN=>25}
153
+ |{:SEQ_NAME=>"ID2", :SEQ=>"CGCATATACTCTTGACGCTAAAGCG", :SEQ_LEN=>25}
154
+ |{:SEQ_NAME=>"ID3", :SEQ=>"CGTATGTG-------CCCTTCGGGG", :SEQ_LEN=>25}
155
+ |{:SEQ_NAME=>"ID4", :SEQ=>"CGGATAAG-------CCCTTACGGG", :SEQ_LEN=>25}
156
+ |{:SEQ_NAME=>"ID5", :SEQ=>"CGGATAAG-------CCCTTACGGG", :SEQ_LEN=>25}
157
+ |{:FOO=>"BAR"}
158
+ EXP
159
+
160
+ assert_equal(expected, collect_result)
161
+ end
162
+
163
+ test 'BioDSL::Pipeline::SliceAlign with reverse_rc primer returns OK' do
164
+ @p.slice_align(forward: 'CGCATACG', reverse_rc: 'cccctc', max_mismatches: 0,
165
+ max_insertions: 0, max_deletions: 0).
166
+ run(input: @input, output: @output2)
167
+
168
+ expected = <<-EXP.gsub(/^\s+\|/, '')
169
+ |{:SEQ_NAME=>"ID0", :SEQ=>"CGCATACG-------CCCTGAGGGG", :SEQ_LEN=>25}
170
+ |{:SEQ_NAME=>"ID1", :SEQ=>"CGCATGAT-------ACCTGAGGGT", :SEQ_LEN=>25}
171
+ |{:SEQ_NAME=>"ID2", :SEQ=>"CGCATATACTCTTGACGCTAAAGCG", :SEQ_LEN=>25}
172
+ |{:SEQ_NAME=>"ID3", :SEQ=>"CGTATGTG-------CCCTTCGGGG", :SEQ_LEN=>25}
173
+ |{:SEQ_NAME=>"ID4", :SEQ=>"CGGATAAG-------CCCTTACGGG", :SEQ_LEN=>25}
174
+ |{:SEQ_NAME=>"ID5", :SEQ=>"CGGATAAG-------CCCTTACGGG", :SEQ_LEN=>25}
175
+ |{:FOO=>"BAR"}
176
+ EXP
177
+
178
+ assert_equal(expected, collect_result)
179
+ end
180
+
181
+ test 'BioDSL::Pipeline::SliceAlign with primers and template_file ' \
182
+ 'returns correctly' do
183
+ @p.slice_align(forward: 'GAATACG', reverse: 'ATTCGAT',
184
+ template_file: @template_file, max_mismatches: 0,
185
+ max_insertions: 0, max_deletions: 0).
186
+ run(input: @input, output: @output2)
187
+
188
+ expected = <<-EXP.gsub(/^\s+\|/, '')
189
+ |{:SEQ_NAME=>"ID0", :SEQ=>"GCATACG-------CCCTGAGGG", :SEQ_LEN=>23}
190
+ |{:SEQ_NAME=>"ID1", :SEQ=>"GCATGAT-------ACCTGAGGG", :SEQ_LEN=>23}
191
+ |{:SEQ_NAME=>"ID2", :SEQ=>"GCATATACTCTTGACGCTAAAGC", :SEQ_LEN=>23}
192
+ |{:SEQ_NAME=>"ID3", :SEQ=>"GTATGTG-------CCCTTCGGG", :SEQ_LEN=>23}
193
+ |{:SEQ_NAME=>"ID4", :SEQ=>"GGATAAG-------CCCTTACGG", :SEQ_LEN=>23}
194
+ |{:SEQ_NAME=>"ID5", :SEQ=>"GGATAAG-------CCCTTACGG", :SEQ_LEN=>23}
195
+ |{:FOO=>"BAR"}
196
+ EXP
197
+
198
+ assert_equal(expected, collect_result)
199
+ end
200
+
201
+ test 'BioDSL::Pipeline::SliceAlign with template_file and slice ' \
202
+ 'returns correctly' do
203
+ @p.slice_align(template_file: @template_file, slice: 4..14).
204
+ run(input: @input, output: @output2)
205
+
206
+ expected = <<-EXP.gsub(/^\s+\|/, '')
207
+ |{:SEQ_NAME=>"ID0", :SEQ=>"ATACG-------CCCTGA", :SEQ_LEN=>18}
208
+ |{:SEQ_NAME=>"ID1", :SEQ=>"ATGAT-------ACCTGA", :SEQ_LEN=>18}
209
+ |{:SEQ_NAME=>"ID2", :SEQ=>"ATATACTCTTGACGCTAA", :SEQ_LEN=>18}
210
+ |{:SEQ_NAME=>"ID3", :SEQ=>"ATGTG-------CCCTTC", :SEQ_LEN=>18}
211
+ |{:SEQ_NAME=>"ID4", :SEQ=>"ATAAG-------CCCTTA", :SEQ_LEN=>18}
212
+ |{:SEQ_NAME=>"ID5", :SEQ=>"ATAAG-------CCCTTA", :SEQ_LEN=>18}
213
+ |{:FOO=>"BAR"}
214
+ EXP
215
+
216
+ assert_equal(expected, collect_result)
217
+ end
218
+ end