BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for ClassifySeqMothur.
34
+ class TestClassifySeqMothur < Test::Unit::TestCase
35
+ def setup
36
+ omit('mothur not found') unless BioDSL::Filesys.which('mothur')
37
+
38
+ @p = BP.new
39
+ @database = __FILE__
40
+ @taxonomy = __FILE__
41
+ end
42
+
43
+ test 'BioDSL::Pipeline#classify_seq_mothur with disallowed option fail' do
44
+ assert_raise(BioDSL::OptionError) do
45
+ @p.classify_seq_mothur(database: @database, taxonomy: @taxonomy,
46
+ foo: 'bar')
47
+ end
48
+ end
49
+
50
+ test 'BioDSL::Pipeline#classify_seq_mothur w. allowed option dont fail' do
51
+ assert_nothing_raised do
52
+ @p.classify_seq_mothur(database: @database, taxonomy: @taxonomy, cpus: 2)
53
+ end
54
+ end
55
+
56
+ # test "BioDSL::Pipeline#classify_seq_mothur outputs correctly" do
57
+ # # TODO: mock this sucker.
58
+ # end
59
+ end
@@ -0,0 +1,377 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for ClipPrimer.
34
+ # rubocop:disable ClassLength
35
+ class TestClipPrimer < Test::Unit::TestCase
36
+ def setup
37
+ @input, @output = BioDSL::Stream.pipe
38
+ @input2, @output2 = BioDSL::Stream.pipe
39
+
40
+ @p = BioDSL::Pipeline.new
41
+ end
42
+
43
+ test 'BioDSL::Pipeline::ClipPrimer with invalid options raises' do
44
+ assert_raise(BioDSL::OptionError) { @p.clip_primer(foo: 'bar') }
45
+ end
46
+
47
+ test 'BioDSL::Pipeline::ClipPrimer with valid options dont raise' do
48
+ assert_nothing_raised do
49
+ @p.clip_primer(primer: 'atcg', direction: :forward)
50
+ end
51
+ end
52
+
53
+ test 'BioDSL::Pipeline::ClipPrimer with forward full length match ' \
54
+ 'returns correctly' do
55
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTT')
56
+ @output.close
57
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
58
+ run(input: @input, output: @output2)
59
+
60
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
61
+ |{:SEQ=>"",
62
+ | :SEQ_LEN=>0,
63
+ | :CLIP_PRIMER_DIR=>"FORWARD",
64
+ | :CLIP_PRIMER_POS=>0,
65
+ | :CLIP_PRIMER_LEN=>20,
66
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
67
+ EXP
68
+
69
+ assert_equal(expected, collect_result.chomp)
70
+ end
71
+
72
+ test 'BioDSL::Pipeline::ClipPrimer status returns correctly' do
73
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTT')
74
+ @output.close
75
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
76
+ run(input: @input, output: @output2)
77
+
78
+ assert_equal(1, @p.status.first[:records_in])
79
+ assert_equal(1, @p.status.first[:records_out])
80
+ assert_equal(1, @p.status.first[:sequences_in])
81
+ assert_equal(1, @p.status.first[:sequences_out])
82
+ assert_equal(20, @p.status.first[:residues_in])
83
+ assert_equal(20, @p.status.first[:residues_out])
84
+ end
85
+
86
+ test 'BioDSL::Pipeline::ClipPrimer with reverse full length match ' \
87
+ 'returns correctly' do
88
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTT')
89
+ @output.close
90
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
91
+ run(input: @input, output: @output2)
92
+
93
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
94
+ |{:SEQ=>"",
95
+ | :SEQ_LEN=>0,
96
+ | :CLIP_PRIMER_DIR=>"REVERSE",
97
+ | :CLIP_PRIMER_POS=>0,
98
+ | :CLIP_PRIMER_LEN=>20,
99
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
100
+ EXP
101
+
102
+ assert_equal(expected, collect_result.chomp)
103
+ end
104
+
105
+ test 'BioDSL::Pipeline::ClipPrimer w. forward begin match returns OK' do
106
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTTactacgt')
107
+ @output.close
108
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
109
+ run(input: @input, output: @output2)
110
+
111
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
112
+ |{:SEQ=>"actacgt",
113
+ | :SEQ_LEN=>7,
114
+ | :CLIP_PRIMER_DIR=>"FORWARD",
115
+ | :CLIP_PRIMER_POS=>0,
116
+ | :CLIP_PRIMER_LEN=>20,
117
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
118
+ EXP
119
+
120
+ assert_equal(expected, collect_result.chomp)
121
+ end
122
+
123
+ test 'BioDSL::Pipeline::ClipPrimer with reverse begin match returns OK' do
124
+ @output.write(SEQ: 'TCGTATGCCGTCTTCTGCTTactacgt')
125
+ @output.close
126
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
127
+ run(input: @input, output: @output2)
128
+
129
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
130
+ |{:SEQ=>"",
131
+ | :SEQ_LEN=>0,
132
+ | :CLIP_PRIMER_DIR=>"REVERSE",
133
+ | :CLIP_PRIMER_POS=>0,
134
+ | :CLIP_PRIMER_LEN=>20,
135
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
136
+ EXP
137
+
138
+ assert_equal(expected, collect_result.chomp)
139
+ end
140
+
141
+ test 'BioDSL::Pipeline::ClipPrimer with forward middle match returns OK' do
142
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
143
+ @output.close
144
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
145
+ run(input: @input, output: @output2)
146
+
147
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
148
+ |{:SEQ=>"actacgt",
149
+ | :SEQ_LEN=>7,
150
+ | :CLIP_PRIMER_DIR=>"FORWARD",
151
+ | :CLIP_PRIMER_POS=>9,
152
+ | :CLIP_PRIMER_LEN=>20,
153
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
154
+ EXP
155
+
156
+ assert_equal(expected, collect_result.chomp)
157
+ end
158
+
159
+ test 'BioDSL::Pipeline::ClipPrimer with reverse middle match returns OK' do
160
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
161
+ @output.close
162
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
163
+ run(input: @input, output: @output2)
164
+
165
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
166
+ |{:SEQ=>"actgactga",
167
+ | :SEQ_LEN=>9,
168
+ | :CLIP_PRIMER_DIR=>"REVERSE",
169
+ | :CLIP_PRIMER_POS=>9,
170
+ | :CLIP_PRIMER_LEN=>20,
171
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
172
+ EXP
173
+
174
+ assert_equal(expected, collect_result.chomp)
175
+ end
176
+
177
+ test 'BioDSL::Pipeline::ClipPrimer with forward end match returns OK' do
178
+ @output.write(SEQ: 'gactgaTCGTATGCCGTCTTCTGCTT')
179
+ @output.close
180
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
181
+ run(input: @input, output: @output2)
182
+
183
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
184
+ |{:SEQ=>"",
185
+ | :SEQ_LEN=>0,
186
+ | :CLIP_PRIMER_DIR=>"FORWARD",
187
+ | :CLIP_PRIMER_POS=>6,
188
+ | :CLIP_PRIMER_LEN=>20,
189
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
190
+ EXP
191
+
192
+ assert_equal(expected, collect_result.chomp)
193
+ end
194
+
195
+ test 'BioDSL::Pipeline::ClipPrimer with reverse end match returns OK' do
196
+ @output.write(SEQ: 'gactgaTCGTATGCCGTCTTCTGCTT')
197
+ @output.close
198
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
199
+ run(input: @input, output: @output2)
200
+
201
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
202
+ |{:SEQ=>"gactga",
203
+ | :SEQ_LEN=>6,
204
+ | :CLIP_PRIMER_DIR=>"REVERSE",
205
+ | :CLIP_PRIMER_POS=>6,
206
+ | :CLIP_PRIMER_LEN=>20,
207
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
208
+ EXP
209
+
210
+ assert_equal(expected, collect_result.chomp)
211
+ end
212
+
213
+ test 'BioDSL::Pipeline::ClipPrimer with forward middle match and ' \
214
+ 'reverse_complement returns correctly' do
215
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
216
+ @output.close
217
+ @p.clip_primer(primer: 'AAGCAGAAGACGGCATACGA', direction: :forward,
218
+ reverse_complement: true)
219
+ @p.run(input: @input, output: @output2)
220
+
221
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
222
+ |{:SEQ=>"actacgt",
223
+ | :SEQ_LEN=>7,
224
+ | :CLIP_PRIMER_DIR=>"FORWARD",
225
+ | :CLIP_PRIMER_POS=>9,
226
+ | :CLIP_PRIMER_LEN=>20,
227
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
228
+ EXP
229
+
230
+ assert_equal(expected, collect_result.chomp)
231
+ end
232
+
233
+ test 'BioDSL::Pipeline::ClipPrimer with reverse middle match and ' \
234
+ 'reverse_complement returns correctly' do
235
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
236
+ @output.close
237
+ @p.clip_primer(primer: 'AAGCAGAAGACGGCATACGA', direction: :reverse,
238
+ reverse_complement: true)
239
+ @p.run(input: @input, output: @output2)
240
+
241
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
242
+ |{:SEQ=>"actgactga",
243
+ | :SEQ_LEN=>9,
244
+ | :CLIP_PRIMER_DIR=>"REVERSE",
245
+ | :CLIP_PRIMER_POS=>9,
246
+ | :CLIP_PRIMER_LEN=>20,
247
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
248
+ EXP
249
+
250
+ assert_equal(expected, collect_result.chomp)
251
+ end
252
+
253
+ test 'BioDSL::Pipeline::ClipPrimer with forward middle miss and ' \
254
+ 'search_distance returns correctly' do
255
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
256
+ @output.close
257
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward,
258
+ search_distance: 28)
259
+ @p.run(input: @input, output: @output2)
260
+
261
+ expected = '{:SEQ=>"actgactgaTCGTATGCCGTCTTCTGCTTactacgt"}'
262
+
263
+ assert_equal(expected, collect_result.chomp)
264
+ end
265
+
266
+ test 'BioDSL::Pipeline::ClipPrimer with forward middle match and ' \
267
+ 'search_distance returns correctly' do
268
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
269
+ @output.close
270
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward,
271
+ search_distance: 29)
272
+ @p.run(input: @input, output: @output2)
273
+
274
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
275
+ |{:SEQ=>"actacgt",
276
+ | :SEQ_LEN=>7,
277
+ | :CLIP_PRIMER_DIR=>"FORWARD",
278
+ | :CLIP_PRIMER_POS=>9,
279
+ | :CLIP_PRIMER_LEN=>20,
280
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
281
+ EXP
282
+
283
+ assert_equal(expected, collect_result.chomp)
284
+ end
285
+
286
+ test 'BioDSL::Pipeline::ClipPrimer with reverse middle miss and ' \
287
+ 'search_distance returns correctly' do
288
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
289
+ @output.close
290
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse,
291
+ search_distance: 26).run(input: @input, output: @output2)
292
+
293
+ expected = '{:SEQ=>"actgactgaTCGTATGCCGTCTTCTGCTTactacgt"}'
294
+
295
+ assert_equal(expected, collect_result.chomp)
296
+ end
297
+
298
+ test 'BioDSL::Pipeline::ClipPrimer with reverse middle match and ' \
299
+ 'search_distance returns correctly' do
300
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
301
+ @output.close
302
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse,
303
+ search_distance: 27).run(input: @input, output: @output2)
304
+
305
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
306
+ |{:SEQ=>"actgactga",
307
+ | :SEQ_LEN=>9,
308
+ | :CLIP_PRIMER_DIR=>"REVERSE",
309
+ | :CLIP_PRIMER_POS=>9,
310
+ | :CLIP_PRIMER_LEN=>20,
311
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
312
+ EXP
313
+
314
+ assert_equal(expected, collect_result.chomp)
315
+ end
316
+
317
+ test 'BioDSL::Pipeline::ClipPrimer with forward match and ' \
318
+ 'search_distance longer than sequence returns correctly' do
319
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
320
+ @output.close
321
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward,
322
+ search_distance: 70).run(input: @input, output: @output2)
323
+
324
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
325
+ |{:SEQ=>"actacgt",
326
+ | :SEQ_LEN=>7,
327
+ | :CLIP_PRIMER_DIR=>"FORWARD",
328
+ | :CLIP_PRIMER_POS=>9,
329
+ | :CLIP_PRIMER_LEN=>20,
330
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
331
+ EXP
332
+
333
+ assert_equal(expected, collect_result.chomp)
334
+ end
335
+
336
+ test 'BioDSL::Pipeline::ClipPrimer with reverse match and ' \
337
+ 'search_distance longer than sequence returns correctly' do
338
+ @output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
339
+ @output.close
340
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse,
341
+ search_distance: 70).run(input: @input, output: @output2)
342
+
343
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
344
+ |{:SEQ=>"actgactga",
345
+ | :SEQ_LEN=>9,
346
+ | :CLIP_PRIMER_DIR=>"REVERSE",
347
+ | :CLIP_PRIMER_POS=>9,
348
+ | :CLIP_PRIMER_LEN=>20,
349
+ | :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
350
+ EXP
351
+
352
+ assert_equal(expected, collect_result.chomp)
353
+ end
354
+
355
+ test 'BioDSL::Pipeline::ClipPrimer with sequence length shorter than ' \
356
+ 'pattern returns correctly' do
357
+ @output.write(SEQ: 'actgactgaTC')
358
+ @output.close
359
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
360
+ run(input: @input, output: @output2)
361
+
362
+ expected = '{:SEQ=>"actgactgaTC"}'
363
+
364
+ assert_equal(expected, collect_result.chomp)
365
+ end
366
+
367
+ test 'BioDSL::Pipeline::ClipPrimer with sequence length 0 returns OK' do
368
+ @output.write(SEQ: '')
369
+ @output.close
370
+ @p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
371
+ run(input: @input, output: @output2)
372
+
373
+ expected = '{:SEQ=>""}'
374
+
375
+ assert_equal(expected, collect_result.chomp)
376
+ end
377
+ end
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for ClusterOtus.
34
+ class TestClusterOtus < Test::Unit::TestCase
35
+ def setup
36
+ omit('usearch not found') unless BioDSL::Filesys.which('usearch')
37
+ end
38
+
39
+ test 'BioDSL::Pipeline#cluster_otus with disallowed option raises' do
40
+ p = BioDSL::Pipeline.new
41
+ assert_raise(BioDSL::OptionError) { p.cluster_otus(foo: 'bar') }
42
+ end
43
+
44
+ test 'BioDSL::Pipeline#cluster_otus with allowed option dont raise' do
45
+ p = BioDSL::Pipeline.new
46
+ assert_nothing_raised { p.cluster_otus(identity: 1) }
47
+ end
48
+
49
+ test 'BioDSL::Pipeline#cluster_otus with SEQ and no SEQ_COUNT raises' do
50
+ input, output = BioDSL::Stream.pipe
51
+ input2, output2 = BioDSL::Stream.pipe
52
+
53
+ output.write(one: 1, two: 2, three: 3)
54
+ output.write(SEQ: 'atcg')
55
+ output.write(SEQ: 'atcg')
56
+ output.close
57
+
58
+ p = BioDSL::Pipeline.new
59
+
60
+ assert_raise(BioDSL::SeqError) do
61
+ p.cluster_otus.run(input: input, output: output2)
62
+ end
63
+
64
+ input2.close
65
+ end
66
+
67
+ test 'BioDSL::Pipeline#cluster_otus with SEQ and unsorted SEQ_COUNT ' \
68
+ 'raises' do
69
+ input, output = BioDSL::Stream.pipe
70
+ input2, output2 = BioDSL::Stream.pipe
71
+
72
+ output.write(one: 1, two: 2, three: 3)
73
+ output.write(SEQ_COUNT: 3, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
74
+ output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
75
+ output.close
76
+
77
+ p = BioDSL::Pipeline.new
78
+
79
+ assert_raise(BioDSL::UsearchError) do
80
+ p.cluster_otus.run(input: input, output: output2)
81
+ end
82
+
83
+ input2.close
84
+ end
85
+
86
+ test 'BioDSL::Pipeline#cluster_otus outputs correctly' do
87
+ input, output = BioDSL::Stream.pipe
88
+ @input2, output2 = BioDSL::Stream.pipe
89
+
90
+ output.write(one: 1, two: 2, three: 3)
91
+ output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
92
+ output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
93
+ output.close
94
+
95
+ p = BioDSL::Pipeline.new.cluster_otus.run(input: input, output: output2)
96
+
97
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
98
+ |{:one=>1,
99
+ | :two=>2,
100
+ | :three=>3}
101
+ |{:SEQ_NAME=>"1",
102
+ | :SEQ=>"ATCGAACGATCGATCGATCGATCGATCGTACGACGTAGCT",
103
+ | :SEQ_LEN=>40,
104
+ | :SEQ_COUNT=>5}
105
+ EXP
106
+
107
+ assert_equal(expected, collect_result.delete("\n"))
108
+ end
109
+
110
+ test 'BioDSL::Pipeline#cluster_otus status outputs correctly' do
111
+ input, output = BioDSL::Stream.pipe
112
+ input2, output2 = BioDSL::Stream.pipe
113
+
114
+ output.write(one: 1, two: 2, three: 3)
115
+ output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
116
+ output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
117
+ output.close
118
+
119
+ p = BioDSL::Pipeline.new.cluster_otus.run(input: input, output: output2)
120
+
121
+ assert_equal(3, p.status.first[:records_in])
122
+ assert_equal(2, p.status.first[:records_out])
123
+ assert_equal(2, p.status.first[:sequences_in])
124
+ assert_equal(1, p.status.first[:sequences_out])
125
+ assert_equal(80, p.status.first[:residues_in])
126
+ assert_equal(40, p.status.first[:residues_out])
127
+ end
128
+ end
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for CollapseOtus.
34
+ class TestCollapseOtus < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(OTU: 'OTU_0', SAMPLE1_COUNT: 3352,
40
+ TAXONOMY: 'Streptococcaceae(100);Lactococcus(100)')
41
+ @output.write(OTU: 'OTU_1', SAMPLE1_COUNT: 881,
42
+ TAXONOMY: 'Leuconostocaceae(100);Leuconostoc(100)')
43
+ @output.write(OTU: 'OTU_2', SAMPLE1_COUNT: 228,
44
+ TAXONOMY: 'Streptococcaceae(100);Lactococcus(100)')
45
+ @output.write(OTU: 'OTU_3', SAMPLE1_COUNT: 5,
46
+ TAXONOMY: 'Pseudomonadaceae(100);Pseudomonas(100)')
47
+
48
+ @output.close
49
+
50
+ @p = BP.new
51
+ end
52
+
53
+ test 'BioDSL::Pipeline::Count with invalid options raises' do
54
+ assert_raise(BioDSL::OptionError) { @p.collapse_otus(foo: 'bar') }
55
+ end
56
+
57
+ test 'BioDSL::Pipeline::Count to file outputs correctly' do
58
+ @p.collapse_otus.run(input: @input, output: @output2)
59
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
60
+ |{:OTU=>"OTU_0",
61
+ | :SAMPLE1_COUNT=>3580,
62
+ | :TAXONOMY=>"Streptococcaceae(100);Lactococcus(100)"}
63
+ |{:OTU=>"OTU_1",
64
+ | :SAMPLE1_COUNT=>881,
65
+ | :TAXONOMY=>"Leuconostocaceae(100);Leuconostoc(100)"}
66
+ |{:OTU=>"OTU_3",
67
+ | :SAMPLE1_COUNT=>5,
68
+ | :TAXONOMY=>"Pseudomonadaceae(100);Pseudomonas(100)"}
69
+ EXP
70
+ assert_equal(expected, collect_result.delete("\n"))
71
+ end
72
+
73
+ test 'BioDSL::Pipeline::Count status outputs correctly' do
74
+ @p.collapse_otus.run(input: @input, output: @output2)
75
+
76
+ assert_equal(4, @p.status.first[:records_in])
77
+ assert_equal(3, @p.status.first[:records_out])
78
+ assert_equal(4, @p.status.first[:otus_in])
79
+ assert_equal(3, @p.status.first[:otus_out])
80
+ end
81
+ end