BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,329 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for TrimPrimer.
34
+ #
35
+ # rubocop: disable ClassLength
36
+ class TestTrimPrimer < Test::Unit::TestCase
37
+ def setup
38
+ @input, @output = BioDSL::Stream.pipe
39
+ @input2, @output2 = BioDSL::Stream.pipe
40
+
41
+ @p = BioDSL::Pipeline.new
42
+ end
43
+
44
+ test 'BioDSL::Pipeline::TrimPrimer with invalid options raises' do
45
+ assert_raise(BioDSL::OptionError) { @p.trim_primer(foo: 'bar') }
46
+ end
47
+
48
+ test 'BioDSL::Pipeline::TrimPrimer with valid options dont raise' do
49
+ assert_nothing_raised do
50
+ @p.trim_primer(primer: 'atcg', direction: :forward)
51
+ end
52
+ end
53
+
54
+ test 'BioDSL::Pipeline::TrimPrimer with forward and pattern longer than ' \
55
+ 'sequence returns correctly' do
56
+ @output.write(SEQ: 'TATG')
57
+ @output.close
58
+ @p.trim_primer(primer: 'TCGTATG', direction: :forward, overlap_min: 1).
59
+ run(input: @input, output: @output2)
60
+
61
+ expected = <<-EXP.gsub(/^\s+\|/, '')
62
+ |{:SEQ=>"",
63
+ | :SEQ_LEN=>0,
64
+ | :TRIM_PRIMER_DIR=>"FORWARD",
65
+ | :TRIM_PRIMER_POS=>0,
66
+ | :TRIM_PRIMER_LEN=>4,
67
+ | :TRIM_PRIMER_PAT=>"TATG"}
68
+ EXP
69
+
70
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
71
+ end
72
+
73
+ test 'BioDSL::Pipeline::TrimPrimer with reverse and pattern longer than ' \
74
+ 'sequence returns correctly' do
75
+ @output.write(SEQ: 'TCGT')
76
+ @output.close
77
+ @p.trim_primer(primer: 'TCGTATG', direction: :reverse, overlap_min: 1).
78
+ run(input: @input, output: @output2)
79
+
80
+ expected = <<-EXP.gsub(/^\s+\|/, '')
81
+ |{:SEQ=>"",
82
+ | :SEQ_LEN=>0,
83
+ | :TRIM_PRIMER_DIR=>"REVERSE",
84
+ | :TRIM_PRIMER_POS=>0,
85
+ | :TRIM_PRIMER_LEN=>4,
86
+ | :TRIM_PRIMER_PAT=>"TCGT"}
87
+ EXP
88
+
89
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
90
+ end
91
+
92
+ test 'BioDSL::Pipeline::TrimPrimer with forward and internal match ' \
93
+ 'returns correctly' do
94
+ @output.write(SEQ: 'aTCGTATGactgactgatcgca')
95
+ @output.close
96
+ @p.trim_primer(primer: 'TCGTATG', direction: :forward).
97
+ run(input: @input, output: @output2)
98
+
99
+ expected = '{:SEQ=>"aTCGTATGactgactgatcgca"}'
100
+
101
+ assert_equal(expected, collect_result.chomp)
102
+ end
103
+
104
+ test 'BioDSL::Pipeline::TrimPrimer with reverse and internal match ' \
105
+ 'returns correctly' do
106
+ @output.write(SEQ: 'ctgactgatcgcaaTCGTATGa')
107
+ @output.close
108
+ @p.trim_primer(primer: 'TCGTATG', direction: :reverse).
109
+ run(input: @input, output: @output2)
110
+
111
+ expected = '{:SEQ=>"ctgactgatcgcaaTCGTATGa"}'
112
+
113
+ assert_equal(expected, collect_result.chomp)
114
+ end
115
+
116
+ test 'BioDSL::Pipeline::TrimPrimer w. forward and full match returns OK' do
117
+ @output.write(SEQ: 'TCGTATGactgactgatcgca')
118
+ @output.close
119
+ @p.trim_primer(primer: 'TCGTATG', direction: :forward).
120
+ run(input: @input, output: @output2)
121
+
122
+ expected = <<-EXP.gsub(/^\s+\|/, '')
123
+ |{:SEQ=>"actgactgatcgca",
124
+ | :SEQ_LEN=>14,
125
+ | :TRIM_PRIMER_DIR=>"FORWARD",
126
+ | :TRIM_PRIMER_POS=>0,
127
+ | :TRIM_PRIMER_LEN=>7,
128
+ | :TRIM_PRIMER_PAT=>"TCGTATG"}
129
+ EXP
130
+
131
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
132
+ end
133
+
134
+ test 'BioDSL::Pipeline::TrimPrimer w. reverse and full match returns OK' do
135
+ @output.write(SEQ: 'ctgactgatcgcaaTCGTATG')
136
+ @output.close
137
+ @p.trim_primer(primer: 'TCGTATG', direction: :reverse).
138
+ run(input: @input, output: @output2)
139
+
140
+ expected = <<-EXP.gsub(/^\s+\|/, '')
141
+ |{:SEQ=>"ctgactgatcgcaa",
142
+ | :SEQ_LEN=>14,
143
+ | :TRIM_PRIMER_DIR=>"REVERSE",
144
+ | :TRIM_PRIMER_POS=>14,
145
+ | :TRIM_PRIMER_LEN=>7,
146
+ | :TRIM_PRIMER_PAT=>"TCGTATG"}
147
+ EXP
148
+
149
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
150
+ end
151
+
152
+ test 'BioDSL::Pipeline::TrimPrimer w. forward and partial match returns ' \
153
+ 'correctly' do
154
+ @output.write(SEQ: 'TATGactgactgatcgca')
155
+ @output.close
156
+ @p.trim_primer(primer: 'TCGTATG', direction: :forward).
157
+ run(input: @input, output: @output2)
158
+
159
+ expected = <<-EXP.gsub(/^\s+\|/, '')
160
+ |{:SEQ=>"actgactgatcgca",
161
+ | :SEQ_LEN=>14,
162
+ | :TRIM_PRIMER_DIR=>"FORWARD",
163
+ | :TRIM_PRIMER_POS=>0,
164
+ | :TRIM_PRIMER_LEN=>4,
165
+ | :TRIM_PRIMER_PAT=>"TATG"}
166
+ EXP
167
+
168
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
169
+ end
170
+
171
+ test 'BioDSL::Pipeline::TrimPrimer with forward and partial match and ' \
172
+ 'reverse_complment: true returns correctly' do
173
+ @output.write(SEQ: 'TATGactgactgatcgca')
174
+ @output.close
175
+ @p.trim_primer(primer: 'CATACGA', direction: :forward,
176
+ reverse_complement: true).
177
+ run(input: @input, output: @output2)
178
+
179
+ expected = <<-EXP.gsub(/^\s+\|/, '')
180
+ |{:SEQ=>"actgactgatcgca",
181
+ | :SEQ_LEN=>14,
182
+ | :TRIM_PRIMER_DIR=>"FORWARD",
183
+ | :TRIM_PRIMER_POS=>0,
184
+ | :TRIM_PRIMER_LEN=>4,
185
+ | :TRIM_PRIMER_PAT=>"TATG"}
186
+ EXP
187
+
188
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
189
+ end
190
+
191
+ test 'BioDSL::Pipeline::TrimPrimer w. reverse and partial match returns ' \
192
+ 'correctly' do
193
+ @output.write(SEQ: 'ctgactgatcgcaaTCGT')
194
+ @output.close
195
+ @p.trim_primer(primer: 'TCGTATG', direction: :reverse).
196
+ run(input: @input, output: @output2)
197
+
198
+ expected = <<-EXP.gsub(/^\s+\|/, '')
199
+ |{:SEQ=>"ctgactgatcgcaa",
200
+ | :SEQ_LEN=>14,
201
+ | :TRIM_PRIMER_DIR=>"REVERSE",
202
+ | :TRIM_PRIMER_POS=>14,
203
+ | :TRIM_PRIMER_LEN=>4,
204
+ | :TRIM_PRIMER_PAT=>"TCGT"}
205
+ EXP
206
+
207
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
208
+ end
209
+
210
+ test 'BioDSL::Pipeline::TrimPrimer with reverse and partial match and ' \
211
+ 'reverse_complment: true returns correctly' do
212
+ @output.write(SEQ: 'ctgactgatcgcaaTCGT')
213
+ @output.close
214
+ @p.trim_primer(primer: 'CATACGA', direction: :reverse,
215
+ reverse_complement: true).
216
+ run(input: @input, output: @output2)
217
+
218
+ expected = <<-EXP.gsub(/^\s+\|/, '')
219
+ |{:SEQ=>"ctgactgatcgcaa",
220
+ | :SEQ_LEN=>14,
221
+ | :TRIM_PRIMER_DIR=>"REVERSE",
222
+ | :TRIM_PRIMER_POS=>14,
223
+ | :TRIM_PRIMER_LEN=>4,
224
+ | :TRIM_PRIMER_PAT=>"TCGT"}
225
+ EXP
226
+
227
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
228
+ end
229
+
230
+ test 'BioDSL::Pipeline::TrimPrimer with forward and minimum match ' \
231
+ 'returns correctly' do
232
+ @output.write(SEQ: 'Gactgactgatcgca')
233
+ @output.close
234
+ @p.trim_primer(primer: 'TCGTATG', direction: :forward).
235
+ run(input: @input, output: @output2)
236
+
237
+ expected = <<-EXP.gsub(/^\s+\|/, '')
238
+ |{:SEQ=>"actgactgatcgca",
239
+ | :SEQ_LEN=>14,
240
+ | :TRIM_PRIMER_DIR=>"FORWARD",
241
+ | :TRIM_PRIMER_POS=>0,
242
+ | :TRIM_PRIMER_LEN=>1,
243
+ | :TRIM_PRIMER_PAT=>"G"}
244
+ EXP
245
+
246
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
247
+ end
248
+
249
+ test 'BioDSL::Pipeline::TrimPrimer with reverse and minimum match ' \
250
+ 'returns correctly' do
251
+ @output.write(SEQ: 'ctgactgatcgcaaT')
252
+ @output.close
253
+ @p.trim_primer(primer: 'TCGTATG', direction: :reverse).
254
+ run(input: @input, output: @output2)
255
+
256
+ expected = <<-EXP.gsub(/^\s+\|/, '')
257
+ |{:SEQ=>"ctgactgatcgcaa",
258
+ | :SEQ_LEN=>14,
259
+ | :TRIM_PRIMER_DIR=>"REVERSE",
260
+ | :TRIM_PRIMER_POS=>14,
261
+ | :TRIM_PRIMER_LEN=>1,
262
+ | :TRIM_PRIMER_PAT=>"T"}
263
+ EXP
264
+
265
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
266
+ end
267
+
268
+ test 'BioDSL::Pipeline::TrimPrimer with forward and partial match and ' \
269
+ 'overlap_min returns correctly' do
270
+ @output.write(SEQ: 'TATGactgactgatcgca')
271
+ @output.close
272
+ @p.trim_primer(primer: 'TCGTATG', direction: :forward, overlap_min: 4).
273
+ run(input: @input, output: @output2)
274
+
275
+ expected = <<-EXP.gsub(/^\s+\|/, '')
276
+ |{:SEQ=>"actgactgatcgca",
277
+ | :SEQ_LEN=>14,
278
+ | :TRIM_PRIMER_DIR=>"FORWARD",
279
+ | :TRIM_PRIMER_POS=>0,
280
+ | :TRIM_PRIMER_LEN=>4,
281
+ | :TRIM_PRIMER_PAT=>"TATG"}
282
+ EXP
283
+
284
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
285
+ end
286
+
287
+ test 'BioDSL::Pipeline::TrimPrimer with reverse and partial match and ' \
288
+ 'overlap_min returns correctly' do
289
+ @output.write(SEQ: 'ctgactgatcgcaaTCGT')
290
+ @output.close
291
+ @p.trim_primer(primer: 'TCGTATG', direction: :reverse, overlap_min: 4).
292
+ run(input: @input, output: @output2)
293
+
294
+ expected = <<-EXP.gsub(/^\s+\|/, '')
295
+ |{:SEQ=>"ctgactgatcgcaa",
296
+ | :SEQ_LEN=>14,
297
+ | :TRIM_PRIMER_DIR=>"REVERSE",
298
+ | :TRIM_PRIMER_POS=>14,
299
+ | :TRIM_PRIMER_LEN=>4,
300
+ | :TRIM_PRIMER_PAT=>"TCGT"}
301
+ EXP
302
+
303
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
304
+ end
305
+
306
+ test 'BioDSL::Pipeline::TrimPrimer with forward and partial miss due ' \
307
+ 'to overlap_min returns correctly' do
308
+ @output.write(SEQ: 'TATGactgactgatcgca')
309
+ @output.close
310
+ @p.trim_primer(primer: 'TCGTATG', direction: :forward, overlap_min: 5).
311
+ run(input: @input, output: @output2)
312
+
313
+ expected = '{:SEQ=>"TATGactgactgatcgca"}'
314
+
315
+ assert_equal(expected, collect_result.chomp)
316
+ end
317
+
318
+ test 'BioDSL::Pipeline::TrimPrimer with reverse and partial miss due ' \
319
+ 'to overlap_min returns correctly' do
320
+ @output.write(SEQ: 'ctgactgatcgcaaTCGT')
321
+ @output.close
322
+ @p.trim_primer(primer: 'TCGTATG', direction: :reverse, overlap_min: 5).
323
+ run(input: @input, output: @output2)
324
+
325
+ expected = '{:SEQ=>"ctgactgatcgcaaTCGT"}'
326
+
327
+ assert_equal(expected, collect_result.chomp)
328
+ end
329
+ end
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for TrimSeq.
34
+ class TestTrimSeq < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ hash = {
40
+ SEQ_NAME: 'test',
41
+
42
+ SEQ: 'gatcgatcgtacgagcagcatctgacgtatcgatcgttgtctacgacgagcatgctagctag',
43
+ SEQ_LEN: 42,
44
+ SCORES: %q[!"#$%&'()*+,-./0123456789:;<=>?@ABCDEF876543210/.-,+*)('&%$III]
45
+ }
46
+
47
+ @output.write hash
48
+ @output.close
49
+
50
+ @p = BioDSL::Pipeline.new
51
+ end
52
+
53
+ test 'BioDSL::Pipeline::TrimSeq with invalid options raises' do
54
+ assert_raise(BioDSL::OptionError) { @p.trim_seq(foo: 'bar') }
55
+ end
56
+
57
+ test 'BioDSL::Pipeline::TrimSeq with valid options don\'t raise' do
58
+ assert_nothing_raised { @p.trim_seq(mode: :left) }
59
+ end
60
+
61
+ test 'BioDSL::Pipeline::TrimSeq returns correctly' do
62
+ @p.trim_seq.run(input: @input, output: @output2)
63
+
64
+ expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
65
+ |{:SEQ_NAME=>"test",
66
+ |:SEQ=>"tctgacgtatcgatcgttgtctacgacgagcatgctagctag",
67
+ |:SEQ_LEN=>42,
68
+ |:SCORES=>"56789:;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
69
+ EXP
70
+
71
+ assert_equal(expected, collect_result.chomp)
72
+ end
73
+
74
+ test 'BioDSL::Pipeline::TrimSeq status returns correctly' do
75
+ @p.trim_seq.run(input: @input, output: @output2)
76
+
77
+ assert_equal(1, @p.status.first[:records_in])
78
+ assert_equal(1, @p.status.first[:records_out])
79
+ assert_equal(1, @p.status.first[:sequences_in])
80
+ assert_equal(1, @p.status.first[:sequences_out])
81
+ assert_equal(62, @p.status.first[:residues_in])
82
+ assert_equal(42, @p.status.first[:residues_out])
83
+ end
84
+
85
+ test 'BioDSL::Pipeline::TrimSeq with :quality_min returns correctly' do
86
+ @p.trim_seq(quality_min: 25).run(input: @input, output: @output2)
87
+
88
+ expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
89
+ |{:SEQ_NAME=>"test",
90
+ |:SEQ=>"cgtatcgatcgttgtctacgacgagcatgctagctag",
91
+ |:SEQ_LEN=>37,
92
+ |:SCORES=>":;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
93
+ EXP
94
+
95
+ assert_equal(expected, collect_result.chomp)
96
+ end
97
+
98
+ test 'BioDSL::Pipeline::TrimSeq with mode: both: returns correctly' do
99
+ @p.trim_seq(mode: :both).run(input: @input, output: @output2)
100
+
101
+ expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
102
+ |{:SEQ_NAME=>"test",
103
+ |:SEQ=>"tctgacgtatcgatcgttgtctacgacgagcatgctagctag",
104
+ |:SEQ_LEN=>42,
105
+ |:SCORES=>"56789:;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
106
+ EXP
107
+
108
+ assert_equal(expected, collect_result.chomp)
109
+ end
110
+
111
+ test 'BioDSL::Pipeline::TrimSeq with mode: :left returns correctly' do
112
+ @p.trim_seq(mode: :left).run(input: @input, output: @output2)
113
+
114
+ expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
115
+ |{:SEQ_NAME=>"test",
116
+ |:SEQ=>"tctgacgtatcgatcgttgtctacgacgagcatgctagctag",
117
+ |:SEQ_LEN=>42,
118
+ |:SCORES=>"56789:;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
119
+ EXP
120
+
121
+ assert_equal(expected, collect_result.chomp)
122
+ end
123
+
124
+ # rubocop:disable LineLength
125
+ test 'BioDSL::Pipeline::TrimSeq with mode: :right returns correctly' do
126
+ @p.trim_seq(mode: :right).run(input: @input, output: @output2)
127
+
128
+ expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
129
+ |{:SEQ_NAME=>"test",
130
+ |:SEQ=>"gatcgatcgtacgagcagcatctgacgtatcgatcgttgtctacgacgagcatgctagctag",
131
+ |:SEQ_LEN=>62,
132
+ |:SCORES=>"!\\"\\#\$%&'()*+,-./0123456789:;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
133
+ EXP
134
+
135
+ assert_equal(expected, collect_result.chomp)
136
+ end
137
+
138
+ test 'BioDSL::Pipeline::TrimSeq with :length_min returns correctly' do
139
+ @p.trim_seq(length_min: 4).run(input: @input, output: @output2)
140
+
141
+ expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
142
+ |{:SEQ_NAME=>"test",
143
+ |:SEQ=>"tctgacgtatcgatcgttgtct",
144
+ |:SEQ_LEN=>22,
145
+ |:SCORES=>"56789:;<=>?@ABCDEF8765"}
146
+ EXP
147
+
148
+ assert_equal(expected, collect_result.chomp)
149
+ end
150
+ end
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for UchimeRef.
34
+ class TestUchimeRef < Test::Unit::TestCase
35
+ require 'tempfile'
36
+
37
+ def setup
38
+ omit('usearch not found') unless BioDSL::Filesys.which('usearch')
39
+
40
+ data = <<-DAT.gsub(/^\s+\|/, '')
41
+ |>test1
42
+ |gtgtgtagctacgatcagctagcgatcgagctatatgttt
43
+ DAT
44
+
45
+ @db = Tempfile.new('database')
46
+
47
+ File.open(@db, 'w') do |ios|
48
+ ios << data
49
+ end
50
+ end
51
+
52
+ def teardown
53
+ @db.close
54
+ @db.unlink
55
+ end
56
+
57
+ test 'BioDSL::Pipeline#uchime_ref with disallowed option raises' do
58
+ p = BioDSL::Pipeline.new
59
+ assert_raise(BioDSL::OptionError) { p.uchime_ref(foo: 'bar') }
60
+ end
61
+
62
+ test 'BioDSL::Pipeline#uchime_ref with allowed option dont raise' do
63
+ p = BioDSL::Pipeline.new
64
+ assert_nothing_raised { p.uchime_ref(database: @db.path) }
65
+ end
66
+
67
+ test 'BioDSL::Pipeline#uchime_ref outputs correctly' do
68
+ input, output = BioDSL::Stream.pipe
69
+ @input2, output2 = BioDSL::Stream.pipe
70
+
71
+ output.write(one: 1, two: 2, three: 3)
72
+ output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
73
+ output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
74
+ output.close
75
+
76
+ p = BioDSL::Pipeline.new
77
+ p.uchime_ref(database: @db.path).run(input: input, output: output2)
78
+
79
+ expected = <<-EXP.gsub(/^\s+\|/, '')
80
+ |{:one=>1,
81
+ | :two=>2,
82
+ | :three=>3}
83
+ |{:SEQ_NAME=>"1",
84
+ | :SEQ=>"atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct",
85
+ | :SEQ_LEN=>40}
86
+ |{:SEQ_NAME=>"2",
87
+ | :SEQ=>"atcgatcgatcgatcgatcgatcgatcgtacgacgtagct",
88
+ | :SEQ_LEN=>40}
89
+ EXP
90
+
91
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
92
+ end
93
+
94
+ test 'BioDSL::Pipeline#uchime_ref status outputs correctly' do
95
+ input, output = BioDSL::Stream.pipe
96
+ @input2, output2 = BioDSL::Stream.pipe
97
+
98
+ output.write(one: 1, two: 2, three: 3)
99
+ output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
100
+ output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
101
+ output.close
102
+
103
+ p = BioDSL::Pipeline.new
104
+ p.uchime_ref(database: @db.path).run(input: input, output: output2)
105
+
106
+ assert_equal(3, p.status.first[:records_in])
107
+ assert_equal(3, p.status.first[:records_out])
108
+ assert_equal(2, p.status.first[:sequences_in])
109
+ assert_equal(2, p.status.first[:sequences_out])
110
+ assert_equal(80, p.status.first[:residues_in])
111
+ assert_equal(80, p.status.first[:residues_out])
112
+ end
113
+ end