BioDSL 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for ClassifySeqMothur.
34
+ class TestClassifySeqMothur < Test::Unit::TestCase
35
+ def setup
36
+ omit('mothur not found') unless BioDSL::Filesys.which('mothur')
37
+
38
+ @p = BP.new
39
+ @database = __FILE__
40
+ @taxonomy = __FILE__
41
+ end
42
+
43
+ test 'BioDSL::Pipeline#classify_seq_mothur with disallowed option fail' do
44
+ assert_raise(BioDSL::OptionError) do
45
+ @p.classify_seq_mothur(database: @database, taxonomy: @taxonomy,
46
+ foo: 'bar')
47
+ end
48
+ end
49
+
50
+ test 'BioDSL::Pipeline#classify_seq_mothur w. allowed option dont fail' do
51
+ assert_nothing_raised do
52
+ @p.classify_seq_mothur(database: @database, taxonomy: @taxonomy, cpus: 2)
53
+ end
54
+ end
55
+
56
+ # test "BioDSL::Pipeline#classify_seq_mothur outputs correctly" do
57
+ # # TODO: mock this sucker.
58
+ # end
59
+ end
@@ -0,0 +1,377 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for ClipPrimer.
34
+ # rubocop:disable ClassLength
35
+ class TestClipPrimer < Test::Unit::TestCase
36
+ def setup
37
+ @input, @output = BioDSL::Stream.pipe
38
+ @input2, @output2 = BioDSL::Stream.pipe
39
+
40
+ @p = BioDSL::Pipeline.new
41
+ end
42
+
43
+ test 'BioDSL::Pipeline::ClipPrimer with invalid options raises' do
44
+ assert_raise(BioDSL::OptionError) { @p.clip_primer(foo: 'bar') }
45
+ end
46
+
47
+ test 'BioDSL::Pipeline::ClipPrimer with valid options dont raise' do
48
+ assert_nothing_raised do
49
+ @p.clip_primer(primer: 'atcg', direction: :forward)
50
+ end
51
+ end
52
+
53
+ test 'BioDSL::Pipeline::ClipPrimer with forward full length match ' \
54
+ 'returns correctly' do
55
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTT')
56
+ @output.close
57
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
58
+ run(input: @input, output: @output2)
59
+
60
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
61
+ |{:SEQ=>"",
62
+ | :SEQ_LEN=>0,
63
+ | :CLIP_PRIMER_DIR=>"FORWARD",
64
+ | :CLIP_PRIMER_POS=>0,
65
+ | :CLIP_PRIMER_LEN=>20,
66
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
67
+ EXP
68
+
69
+ assert_equal(expected, collect_result.chomp)
70
+ end
71
+
72
+ test 'BioDSL::Pipeline::ClipPrimer status returns correctly' do
73
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTT')
74
+ @output.close
75
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
76
+ run(input: @input, output: @output2)
77
+
78
+ assert_equal(1, @p.status.first[:records_in])
79
+ assert_equal(1, @p.status.first[:records_out])
80
+ assert_equal(1, @p.status.first[:sequences_in])
81
+ assert_equal(1, @p.status.first[:sequences_out])
82
+ assert_equal(20, @p.status.first[:residues_in])
83
+ assert_equal(20, @p.status.first[:residues_out])
84
+ end
85
+
86
+ test 'BioDSL::Pipeline::ClipPrimer with reverse full length match ' \
87
+ 'returns correctly' do
88
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTT')
89
+ @output.close
90
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
91
+ run(input: @input, output: @output2)
92
+
93
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
94
+ |{:SEQ=>"",
95
+ | :SEQ_LEN=>0,
96
+ | :CLIP_PRIMER_DIR=>"REVERSE",
97
+ | :CLIP_PRIMER_POS=>0,
98
+ | :CLIP_PRIMER_LEN=>20,
99
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
100
+ EXP
101
+
102
+ assert_equal(expected, collect_result.chomp)
103
+ end
104
+
105
+ test 'BioDSL::Pipeline::ClipPrimer w. forward begin match returns OK' do
106
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTTactacgt')
107
+ @output.close
108
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
109
+ run(input: @input, output: @output2)
110
+
111
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
112
+ |{:SEQ=>"actacgt",
113
+ | :SEQ_LEN=>7,
114
+ | :CLIP_PRIMER_DIR=>"FORWARD",
115
+ | :CLIP_PRIMER_POS=>0,
116
+ | :CLIP_PRIMER_LEN=>20,
117
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
118
+ EXP
119
+
120
+ assert_equal(expected, collect_result.chomp)
121
+ end
122
+
123
+ test 'BioDSL::Pipeline::ClipPrimer with reverse begin match returns OK' do
124
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTTactacgt')
125
+ @output.close
126
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
127
+ run(input: @input, output: @output2)
128
+
129
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
130
+ |{:SEQ=>"",
131
+ | :SEQ_LEN=>0,
132
+ | :CLIP_PRIMER_DIR=>"REVERSE",
133
+ | :CLIP_PRIMER_POS=>0,
134
+ | :CLIP_PRIMER_LEN=>20,
135
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
136
+ EXP
137
+
138
+ assert_equal(expected, collect_result.chomp)
139
+ end
140
+
141
+ test 'BioDSL::Pipeline::ClipPrimer with forward middle match returns OK' do
142
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
143
+ @output.close
144
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
145
+ run(input: @input, output: @output2)
146
+
147
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
148
+ |{:SEQ=>"actacgt",
149
+ | :SEQ_LEN=>7,
150
+ | :CLIP_PRIMER_DIR=>"FORWARD",
151
+ | :CLIP_PRIMER_POS=>9,
152
+ | :CLIP_PRIMER_LEN=>20,
153
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
154
+ EXP
155
+
156
+ assert_equal(expected, collect_result.chomp)
157
+ end
158
+
159
+ test 'BioDSL::Pipeline::ClipPrimer with reverse middle match returns OK' do
160
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
161
+ @output.close
162
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
163
+ run(input: @input, output: @output2)
164
+
165
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
166
+ |{:SEQ=>"actgactga",
167
+ | :SEQ_LEN=>9,
168
+ | :CLIP_PRIMER_DIR=>"REVERSE",
169
+ | :CLIP_PRIMER_POS=>9,
170
+ | :CLIP_PRIMER_LEN=>20,
171
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
172
+ EXP
173
+
174
+ assert_equal(expected, collect_result.chomp)
175
+ end
176
+
177
+ test 'BioDSL::Pipeline::ClipPrimer with forward end match returns OK' do
178
+ @output.write(SEQ: 'gactgaTCGTATGCCGTCTTCTGCTT')
179
+ @output.close
180
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
181
+ run(input: @input, output: @output2)
182
+
183
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
184
+ |{:SEQ=>"",
185
+ | :SEQ_LEN=>0,
186
+ | :CLIP_PRIMER_DIR=>"FORWARD",
187
+ | :CLIP_PRIMER_POS=>6,
188
+ | :CLIP_PRIMER_LEN=>20,
189
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
190
+ EXP
191
+
192
+ assert_equal(expected, collect_result.chomp)
193
+ end
194
+
195
+ test 'BioDSL::Pipeline::ClipPrimer with reverse end match returns OK' do
196
+ @output.write(SEQ: 'gactgaTCGTATGCCGTCTTCTGCTT')
197
+ @output.close
198
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
199
+ run(input: @input, output: @output2)
200
+
201
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
202
+ |{:SEQ=>"gactga",
203
+ | :SEQ_LEN=>6,
204
+ | :CLIP_PRIMER_DIR=>"REVERSE",
205
+ | :CLIP_PRIMER_POS=>6,
206
+ | :CLIP_PRIMER_LEN=>20,
207
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
208
+ EXP
209
+
210
+ assert_equal(expected, collect_result.chomp)
211
+ end
212
+
213
+ test 'BioDSL::Pipeline::ClipPrimer with forward middle match and ' \
214
+ 'reverse_complement returns correctly' do
215
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
216
+ @output.close
217
+ @p.clip_primer(primer: 'AAGCAGAAGACGGCATACGA', direction: :forward,
218
+ reverse_complement: true)
219
+ @p.run(input: @input, output: @output2)
220
+
221
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
222
+ |{:SEQ=>"actacgt",
223
+ | :SEQ_LEN=>7,
224
+ | :CLIP_PRIMER_DIR=>"FORWARD",
225
+ | :CLIP_PRIMER_POS=>9,
226
+ | :CLIP_PRIMER_LEN=>20,
227
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
228
+ EXP
229
+
230
+ assert_equal(expected, collect_result.chomp)
231
+ end
232
+
233
+ test 'BioDSL::Pipeline::ClipPrimer with reverse middle match and ' \
234
+ 'reverse_complement returns correctly' do
235
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
236
+ @output.close
237
+ @p.clip_primer(primer: 'AAGCAGAAGACGGCATACGA', direction: :reverse,
238
+ reverse_complement: true)
239
+ @p.run(input: @input, output: @output2)
240
+
241
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
242
+ |{:SEQ=>"actgactga",
243
+ | :SEQ_LEN=>9,
244
+ | :CLIP_PRIMER_DIR=>"REVERSE",
245
+ | :CLIP_PRIMER_POS=>9,
246
+ | :CLIP_PRIMER_LEN=>20,
247
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
248
+ EXP
249
+
250
+ assert_equal(expected, collect_result.chomp)
251
+ end
252
+
253
+ test 'BioDSL::Pipeline::ClipPrimer with forward middle miss and ' \
254
+ 'search_distance returns correctly' do
255
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
256
+ @output.close
257
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward,
258
+ search_distance: 28)
259
+ @p.run(input: @input, output: @output2)
260
+
261
+ expected = '{:SEQ=>"actgactgaTCGTATGCCGTCTTCTGCTTactacgt"}'
262
+
263
+ assert_equal(expected, collect_result.chomp)
264
+ end
265
+
266
+ test 'BioDSL::Pipeline::ClipPrimer with forward middle match and ' \
267
+ 'search_distance returns correctly' do
268
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
269
+ @output.close
270
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward,
271
+ search_distance: 29)
272
+ @p.run(input: @input, output: @output2)
273
+
274
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
275
+ |{:SEQ=>"actacgt",
276
+ | :SEQ_LEN=>7,
277
+ | :CLIP_PRIMER_DIR=>"FORWARD",
278
+ | :CLIP_PRIMER_POS=>9,
279
+ | :CLIP_PRIMER_LEN=>20,
280
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
281
+ EXP
282
+
283
+ assert_equal(expected, collect_result.chomp)
284
+ end
285
+
286
+ test 'BioDSL::Pipeline::ClipPrimer with reverse middle miss and ' \
287
+ 'search_distance returns correctly' do
288
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
289
+ @output.close
290
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse,
291
+ search_distance: 26).run(input: @input, output: @output2)
292
+
293
+ expected = '{:SEQ=>"actgactgaTCGTATGCCGTCTTCTGCTTactacgt"}'
294
+
295
+ assert_equal(expected, collect_result.chomp)
296
+ end
297
+
298
+ test 'BioDSL::Pipeline::ClipPrimer with reverse middle match and ' \
299
+ 'search_distance returns correctly' do
300
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
301
+ @output.close
302
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse,
303
+ search_distance: 27).run(input: @input, output: @output2)
304
+
305
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
306
+ |{:SEQ=>"actgactga",
307
+ | :SEQ_LEN=>9,
308
+ | :CLIP_PRIMER_DIR=>"REVERSE",
309
+ | :CLIP_PRIMER_POS=>9,
310
+ | :CLIP_PRIMER_LEN=>20,
311
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
312
+ EXP
313
+
314
+ assert_equal(expected, collect_result.chomp)
315
+ end
316
+
317
+ test 'BioDSL::Pipeline::ClipPrimer with forward match and ' \
318
+ 'search_distance longer than sequence returns correctly' do
319
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
320
+ @output.close
321
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward,
322
+ search_distance: 70).run(input: @input, output: @output2)
323
+
324
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
325
+ |{:SEQ=>"actacgt",
326
+ | :SEQ_LEN=>7,
327
+ | :CLIP_PRIMER_DIR=>"FORWARD",
328
+ | :CLIP_PRIMER_POS=>9,
329
+ | :CLIP_PRIMER_LEN=>20,
330
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
331
+ EXP
332
+
333
+ assert_equal(expected, collect_result.chomp)
334
+ end
335
+
336
+ test 'BioDSL::Pipeline::ClipPrimer with reverse match and ' \
337
+ 'search_distance longer than sequence returns correctly' do
338
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
339
+ @output.close
340
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse,
341
+ search_distance: 70).run(input: @input, output: @output2)
342
+
343
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
344
+ |{:SEQ=>"actgactga",
345
+ | :SEQ_LEN=>9,
346
+ | :CLIP_PRIMER_DIR=>"REVERSE",
347
+ | :CLIP_PRIMER_POS=>9,
348
+ | :CLIP_PRIMER_LEN=>20,
349
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
350
+ EXP
351
+
352
+ assert_equal(expected, collect_result.chomp)
353
+ end
354
+
355
+ test 'BioDSL::Pipeline::ClipPrimer with sequence length shorter than ' \
356
+ 'pattern returns correctly' do
357
+ @output.write(SEQ: 'actgactgaTC')
358
+ @output.close
359
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
360
+ run(input: @input, output: @output2)
361
+
362
+ expected = '{:SEQ=>"actgactgaTC"}'
363
+
364
+ assert_equal(expected, collect_result.chomp)
365
+ end
366
+
367
+ test 'BioDSL::Pipeline::ClipPrimer with sequence length 0 returns OK' do
368
+ @output.write(SEQ: '')
369
+ @output.close
370
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
371
+ run(input: @input, output: @output2)
372
+
373
+ expected = '{:SEQ=>""}'
374
+
375
+ assert_equal(expected, collect_result.chomp)
376
+ end
377
+ end
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for ClusterOtus.
34
+ class TestClusterOtus < Test::Unit::TestCase
35
+ def setup
36
+ omit('usearch not found') unless BioDSL::Filesys.which('usearch')
37
+ end
38
+
39
+ test 'BioDSL::Pipeline#cluster_otus with disallowed option raises' do
40
+ p = BioDSL::Pipeline.new
41
+ assert_raise(BioDSL::OptionError) { p.cluster_otus(foo: 'bar') }
42
+ end
43
+
44
+ test 'BioDSL::Pipeline#cluster_otus with allowed option dont raise' do
45
+ p = BioDSL::Pipeline.new
46
+ assert_nothing_raised { p.cluster_otus(identity: 1) }
47
+ end
48
+
49
+ test 'BioDSL::Pipeline#cluster_otus with SEQ and no SEQ_COUNT raises' do
50
+ input, output = BioDSL::Stream.pipe
51
+ input2, output2 = BioDSL::Stream.pipe
52
+
53
+ output.write(one: 1, two: 2, three: 3)
54
+ output.write(SEQ: 'atcg')
55
+ output.write(SEQ: 'atcg')
56
+ output.close
57
+
58
+ p = BioDSL::Pipeline.new
59
+
60
+ assert_raise(BioDSL::SeqError) do
61
+ p.cluster_otus.run(input: input, output: output2)
62
+ end
63
+
64
+ input2.close
65
+ end
66
+
67
+ test 'BioDSL::Pipeline#cluster_otus with SEQ and unsorted SEQ_COUNT ' \
68
+ 'raises' do
69
+ input, output = BioDSL::Stream.pipe
70
+ input2, output2 = BioDSL::Stream.pipe
71
+
72
+ output.write(one: 1, two: 2, three: 3)
73
+ output.write(SEQ_COUNT: 3, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
74
+ output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
75
+ output.close
76
+
77
+ p = BioDSL::Pipeline.new
78
+
79
+ assert_raise(BioDSL::UsearchError) do
80
+ p.cluster_otus.run(input: input, output: output2)
81
+ end
82
+
83
+ input2.close
84
+ end
85
+
86
+ test 'BioDSL::Pipeline#cluster_otus outputs correctly' do
87
+ input, output = BioDSL::Stream.pipe
88
+ @input2, output2 = BioDSL::Stream.pipe
89
+
90
+ output.write(one: 1, two: 2, three: 3)
91
+ output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
92
+ output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
93
+ output.close
94
+
95
+ p = BioDSL::Pipeline.new.cluster_otus.run(input: input, output: output2)
96
+
97
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
98
+ |{:one=>1,
99
+ | :two=>2,
100
+ | :three=>3}
101
+ |{:SEQ_NAME=>"1",
102
+ | :SEQ=>"ATCGAACGATCGATCGATCGATCGATCGTACGACGTAGCT",
103
+ | :SEQ_LEN=>40,
104
+ | :SEQ_COUNT=>5}
105
+ EXP
106
+
107
+ assert_equal(expected, collect_result.delete("\n"))
108
+ end
109
+
110
+ test 'BioDSL::Pipeline#cluster_otus status outputs correctly' do
111
+ input, output = BioDSL::Stream.pipe
112
+ input2, output2 = BioDSL::Stream.pipe
113
+
114
+ output.write(one: 1, two: 2, three: 3)
115
+ output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
116
+ output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
117
+ output.close
118
+
119
+ p = BioDSL::Pipeline.new.cluster_otus.run(input: input, output: output2)
120
+
121
+ assert_equal(3, p.status.first[:records_in])
122
+ assert_equal(2, p.status.first[:records_out])
123
+ assert_equal(2, p.status.first[:sequences_in])
124
+ assert_equal(1, p.status.first[:sequences_out])
125
+ assert_equal(80, p.status.first[:residues_in])
126
+ assert_equal(40, p.status.first[:residues_out])
127
+ end
128
+ end
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for CollapseOtus.
34
+ class TestCollapseOtus < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(OTU: 'OTU_0', SAMPLE1_COUNT: 3352,
40
+ TAXONOMY: 'Streptococcaceae(100);Lactococcus(100)')
41
+ @output.write(OTU: 'OTU_1', SAMPLE1_COUNT: 881,
42
+ TAXONOMY: 'Leuconostocaceae(100);Leuconostoc(100)')
43
+ @output.write(OTU: 'OTU_2', SAMPLE1_COUNT: 228,
44
+ TAXONOMY: 'Streptococcaceae(100);Lactococcus(100)')
45
+ @output.write(OTU: 'OTU_3', SAMPLE1_COUNT: 5,
46
+ TAXONOMY: 'Pseudomonadaceae(100);Pseudomonas(100)')
47
+
48
+ @output.close
49
+
50
+ @p = BP.new
51
+ end
52
+
53
+ test 'BioDSL::Pipeline::Count with invalid options raises' do
54
+ assert_raise(BioDSL::OptionError) { @p.collapse_otus(foo: 'bar') }
55
+ end
56
+
57
+ test 'BioDSL::Pipeline::Count to file outputs correctly' do
58
+ @p.collapse_otus.run(input: @input, output: @output2)
59
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
60
+ |{:OTU=>"OTU_0",
61
+ | :SAMPLE1_COUNT=>3580,
62
+ | :TAXONOMY=>"Streptococcaceae(100);Lactococcus(100)"}
63
+ |{:OTU=>"OTU_1",
64
+ | :SAMPLE1_COUNT=>881,
65
+ | :TAXONOMY=>"Leuconostocaceae(100);Leuconostoc(100)"}
66
+ |{:OTU=>"OTU_3",
67
+ | :SAMPLE1_COUNT=>5,
68
+ | :TAXONOMY=>"Pseudomonadaceae(100);Pseudomonas(100)"}
69
+ EXP
70
+ assert_equal(expected, collect_result.delete("\n"))
71
+ end
72
+
73
+ test 'BioDSL::Pipeline::Count status outputs correctly' do
74
+ @p.collapse_otus.run(input: @input, output: @output2)
75
+
76
+ assert_equal(4, @p.status.first[:records_in])
77
+ assert_equal(3, @p.status.first[:records_out])
78
+ assert_equal(4, @p.status.first[:otus_in])
79
+ assert_equal(3, @p.status.first[:otus_out])
80
+ end
81
+ end