BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # rubocop: disable ClassLength
34
+
35
+ # Test class for WriteFastq.
36
+ class TestWriteFastq < Test::Unit::TestCase
37
+ def setup
38
+ @zcat = BioDSL::Filesys.which('gzcat') ||
39
+ BioDSL::Filesys.which('zcat')
40
+
41
+ @tmpdir = Dir.mktmpdir('BioDSL')
42
+ @file = File.join(@tmpdir, 'test.fq')
43
+ @file2 = File.join(@tmpdir, 'test.fq')
44
+
45
+ setup_data
46
+
47
+ @p = BioDSL::Pipeline.new
48
+ end
49
+
50
+ def setup_data
51
+ @input, @output = BioDSL::Stream.pipe
52
+ @input2, @output2 = BioDSL::Stream.pipe
53
+
54
+ @output.write(SEQ_NAME: 'test1', SEQ: 'atcg', SEQ_LEN: 4, SCORES: '!!II')
55
+ @output.write(SEQ_NAME: 'test2', SEQ: 'gtac', SEQ_LEN: 4, SCORES: '!!II')
56
+ @output.close
57
+ end
58
+
59
+ def teardown
60
+ FileUtils.rm_r @tmpdir
61
+ end
62
+
63
+ test 'BioDSL::Pipeline::WriteFastq with invalid options raises' do
64
+ assert_raise(BioDSL::OptionError) { @p.write_fastq(foo: 'bar') }
65
+ end
66
+
67
+ test 'BioDSL::Pipeline::WriteFastq with invalid encoding raises' do
68
+ assert_raise(BioDSL::OptionError) { @p.write_fastq(encoding: 'foo') }
69
+ end
70
+
71
+ test 'BioDSL::Pipeline::WriteFastq with valid encoding dont raise' do
72
+ assert_nothing_raised { @p.write_fastq(encoding: :base_33) }
73
+ assert_nothing_raised { @p.write_fastq(encoding: :base_64) }
74
+ end
75
+
76
+ test 'BioDSL::Pipeline::WriteFastq to stdout outputs correctly' do
77
+ result = capture_stdout { @p.write_fastq.run(input: @input) }
78
+ expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
79
+ assert_equal(expected, result)
80
+ end
81
+
82
+ test 'BioDSL::Pipeline::WriteFastq status outputs correctly' do
83
+ capture_stdout { @p.write_fastq.run(input: @input) }
84
+ assert_equal(2, @p.status.first[:records_in])
85
+ assert_equal(2, @p.status.first[:records_out])
86
+ assert_equal(2, @p.status.first[:sequences_in])
87
+ assert_equal(2, @p.status.first[:sequences_out])
88
+ assert_equal(8, @p.status.first[:residues_in])
89
+ assert_equal(8, @p.status.first[:residues_out])
90
+ end
91
+
92
+ test 'BioDSL::Pipeline::WriteFastq to stdout with base 64 encoding ' \
93
+ 'outputs correctly' do
94
+ result = capture_stdout do
95
+ @p.write_fastq(encoding: :base_64).run(input: @input)
96
+ end
97
+ expected = "@test1\natcg\n+\n@@hh\n@test2\ngtac\n+\n@@hh\n"
98
+ assert_equal(expected, result)
99
+ end
100
+
101
+ test 'BioDSL::Pipeline::WriteFastq to file outputs correctly' do
102
+ @p.write_fastq(output: @file).run(input: @input, output: @output2)
103
+ result = File.open(@file).read
104
+ expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
105
+ assert_equal(expected, result)
106
+ assert_equal(expected, result)
107
+ end
108
+
109
+ test 'BioDSL::Pipeline::WriteFastq to existing file raises' do
110
+ `touch #{@file}`
111
+ assert_raise(BioDSL::OptionError) { @p.write_fastq(output: @file) }
112
+ end
113
+
114
+ test 'BioDSL::Pipeline::WriteFastq to existing file with :force ' \
115
+ 'outputs OK' do
116
+ `touch #{@file}`
117
+ @p.write_fastq(output: @file, force: true).run(input: @input)
118
+ result = File.open(@file).read
119
+ expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
120
+ assert_equal(expected, result)
121
+ end
122
+
123
+ test 'BioDSL::Pipeline::WriteFastq with gzipped data and no output ' \
124
+ 'file raises' do
125
+ assert_raise(BioDSL::OptionError) { @p.write_fastq(gzip: true) }
126
+ end
127
+
128
+ test 'BioDSL::Pipeline::WriteFastq w. bzip2ed data and no ' \
129
+ 'output file raises' do
130
+ assert_raise(BioDSL::OptionError) { @p.write_fastq(bzip2: true) }
131
+ end
132
+
133
+ test 'BioDSL::Pipeline::WriteFastq to file outputs gzipped data OK' do
134
+ @p.write_fastq(output: @file, gzip: true).run(input: @input)
135
+ result = `#{@zcat} #{@file}`
136
+ expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
137
+ assert_equal(expected, result)
138
+ end
139
+
140
+ test 'BioDSL::Pipeline::WriteFastq to file outputs bzip2ed data OK' do
141
+ @p.write_fastq(output: @file, bzip2: true).run(input: @input)
142
+ result = `bzcat #{@file}`
143
+ expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
144
+ assert_equal(expected, result)
145
+ end
146
+
147
+ test 'BioDSL::Pipeline::WriteFastq w. both gzip and bzip2 output raises' do
148
+ assert_raise(BioDSL::OptionError) do
149
+ @p.write_fastq(output: @file, gzip: true, bzip2: true)
150
+ end
151
+ end
152
+
153
+ test 'BioDSL::Pipeline::WriteFastq with flux outputs correctly' do
154
+ @p.write_fastq(output: @file).run(input: @input, output: @output2)
155
+ result = File.open(@file).read
156
+ expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
157
+ assert_equal(expected, result)
158
+
159
+ expected = <<-EXP.gsub(/^\s+\|/, '')
160
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4, :SCORES=>"!!II"}
161
+ |{:SEQ_NAME=>"test2", :SEQ=>"gtac", :SEQ_LEN=>4, :SCORES=>"!!II"}
162
+ EXP
163
+
164
+ assert_equal(expected, collect_result)
165
+ end
166
+ end
@@ -0,0 +1,411 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # rubocop: disable ClassLength
34
+
35
+ # Test class for WriteTable.
36
+ class TestWriteTable < Test::Unit::TestCase
37
+ def setup
38
+ @zcat = BioDSL::Filesys.which('gzcat') ||
39
+ BioDSL::Filesys.which('zcat')
40
+
41
+ @tmpdir = Dir.mktmpdir('BioDSL')
42
+ @file = File.join(@tmpdir, 'test.fna')
43
+ @file2 = File.join(@tmpdir, 'test.fna')
44
+
45
+ setup_data
46
+
47
+ @p = BioDSL::Pipeline.new
48
+ end
49
+
50
+ def setup_data
51
+ @input, @output = BioDSL::Stream.pipe
52
+ @input2, @output2 = BioDSL::Stream.pipe
53
+
54
+ @output.write(ORGANISM: 'Human', COUNT: 23_524, SEQ: 'ATACGTCAG')
55
+ @output.write(ORGANISM: 'Dog', COUNT: 2442, SEQ: 'AGCATGAC')
56
+ @output.write(ORGANISM: 'Mouse', COUNT: 234, SEQ: 'GACTG')
57
+ @output.write(ORGANISM: 'Cat', COUNT: 2_342, SEQ: 'AAATGCA')
58
+
59
+ @output.close
60
+ end
61
+
62
+ def teardown
63
+ FileUtils.rm_r @tmpdir
64
+ end
65
+
66
+ test 'BioDSL::Pipeline::WriteTable with invalid options raises' do
67
+ assert_raise(BioDSL::OptionError) { @p.write_table(foo: 'bar') }
68
+ end
69
+
70
+ test 'BioDSL::Pipeline::WriteTable with valid options dont raise' do
71
+ assert_nothing_raised { @p.write_table(keys: [:SEQ]) }
72
+ end
73
+
74
+ test 'BioDSL::Pipeline::WriteTable to stdout outputs correctly' do
75
+ result = capture_stdout { @p.write_table.run(input: @input) }
76
+ expected = <<-EXP.gsub(/^\s+\|/, '')
77
+ |Human\t23524\tATACGTCAG
78
+ |Dog\t2442\tAGCATGAC
79
+ |Mouse\t234\tGACTG
80
+ |Cat\t2342\tAAATGCA
81
+ EXP
82
+
83
+ assert_equal(expected, result)
84
+ end
85
+
86
+ test 'BioDSL::Pipeline::WriteTable status outputs correctly' do
87
+ capture_stdout { @p.write_table.run(input: @input) }
88
+
89
+ assert_equal(4, @p.status.first[:records_in])
90
+ assert_equal(4, @p.status.first[:records_out])
91
+ end
92
+
93
+ test 'BioDSL::Pipeline::WriteTable with :keys outputs correctly' do
94
+ result = capture_stdout do
95
+ @p.write_table(keys: [:SEQ, 'COUNT']).run(input: @input)
96
+ end
97
+
98
+ expected = <<-EXP.gsub(/^\s+\|/, '')
99
+ |ATACGTCAG\t23524
100
+ |AGCATGAC\t2442
101
+ |GACTG\t234
102
+ |AAATGCA\t2342
103
+ EXP
104
+ assert_equal(expected, result)
105
+ end
106
+
107
+ test 'BioDSL::Pipeline::WriteTable with :skip outputs correctly' do
108
+ result = capture_stdout do
109
+ @p.write_table(skip: [:SEQ, 'COUNT']).run(input: @input)
110
+ end
111
+
112
+ expected = "Human\nDog\nMouse\nCat\n"
113
+ assert_equal(expected, result)
114
+ end
115
+
116
+ test 'BioDSL::Pipeline::WriteTable with :header outputs correctly' do
117
+ result = capture_stdout { @p.write_table(header: true).run(input: @input) }
118
+ expected = <<-EXP.gsub(/^\s+\|/, '')
119
+ |#ORGANISM\tCOUNT\tSEQ
120
+ |Human\t23524\tATACGTCAG
121
+ |Dog\t2442\tAGCATGAC
122
+ |Mouse\t234\tGACTG
123
+ |Cat\t2342\tAAATGCA
124
+ EXP
125
+ assert_equal(expected, result)
126
+ end
127
+
128
+ test 'BioDSL::Pipeline::WriteTable with :delimiter outputs correctly' do
129
+ result = capture_stdout do
130
+ @p.write_table(delimiter: ';').run(input: @input)
131
+ end
132
+
133
+ expected = <<-EXP.gsub(/^\s+\|/, '')
134
+ |Human;23524;ATACGTCAG
135
+ |Dog;2442;AGCATGAC
136
+ |Mouse;234;GACTG
137
+ |Cat;2342;AAATGCA
138
+ EXP
139
+ assert_equal(expected, result)
140
+ end
141
+
142
+ test 'BioDSL::Pipeline::WriteTable w. :delimiter and :pretty raises' do
143
+ assert_raise(BioDSL::OptionError) do
144
+ @p.write_table(delimiter: ';', pretty: true)
145
+ end
146
+ end
147
+
148
+ test 'BioDSL::Pipeline::WriteTable with :commify and :pretty raises' do
149
+ assert_raise(BioDSL::OptionError) { @p.write_table(commify: true) }
150
+ end
151
+
152
+ test 'BioDSL::Pipeline::WriteTable with :pretty outputs correctly' do
153
+ result = capture_stdout { @p.write_table(pretty: true).run(input: @input) }
154
+ expected = <<-EXP.gsub(/^\s+\|/, '')
155
+ |+-------+-------+-----------+
156
+ || Human | 23524 | ATACGTCAG |
157
+ || Dog | 2442 | AGCATGAC |
158
+ || Mouse | 234 | GACTG |
159
+ || Cat | 2342 | AAATGCA |
160
+ |+-------+-------+-----------+
161
+ EXP
162
+
163
+ assert_equal(expected, result)
164
+ end
165
+
166
+ test 'BioDSL::Pipeline::WriteTable with :pretty and :header outputs OK' do
167
+ result = capture_stdout do
168
+ @p.write_table(pretty: true, header: true).run(input: @input)
169
+ end
170
+
171
+ expected = <<-EXP.gsub(/^\s+\|/, '')
172
+ |+----------+-------+-----------+
173
+ || ORGANISM | COUNT | SEQ |
174
+ |+----------+-------+-----------+
175
+ || Human | 23524 | ATACGTCAG |
176
+ || Dog | 2442 | AGCATGAC |
177
+ || Mouse | 234 | GACTG |
178
+ || Cat | 2342 | AAATGCA |
179
+ |+----------+-------+-----------+
180
+ EXP
181
+ assert_equal(expected, result)
182
+ end
183
+
184
+ test 'BioDSL::Pipeline::WriteTable w. :pretty and :commify outputs OK' do
185
+ result = capture_stdout do
186
+ @p.write_table(pretty: true, commify: true).run(input: @input)
187
+ end
188
+ expected = <<-EXP.gsub(/^\s+\|/, '')
189
+ |+-------+--------+-----------+
190
+ || Human | 23,524 | ATACGTCAG |
191
+ || Dog | 2,442 | AGCATGAC |
192
+ || Mouse | 234 | GACTG |
193
+ || Cat | 2,342 | AAATGCA |
194
+ |+-------+--------+-----------+
195
+ EXP
196
+ assert_equal(expected, result)
197
+ end
198
+
199
+ test 'BioDSL::Pipeline::WriteTable w. :pretty and :commify and floats ' \
200
+ 'outputs correctly' do
201
+ input, output = BioDSL::Stream.pipe
202
+
203
+ output.write(ORGANISM: 'Human', COUNT: 23_524, SEQ: 'ATACGTCAG')
204
+ output.write(ORGANISM: 'Dog', COUNT: 244.1, SEQ: 'AGCATGAC')
205
+ output.write(ORGANISM: 'Mouse', COUNT: 234, SEQ: 'GACTG')
206
+ output.write(ORGANISM: 'Cat', COUNT: 2_342, SEQ: 'AAATGCA')
207
+
208
+ output.close
209
+
210
+ p = BioDSL::Pipeline.new
211
+
212
+ result = capture_stdout do
213
+ p.write_table(pretty: true, commify: true).run(input: input)
214
+ end
215
+
216
+ expected = <<-EXP.gsub(/^\s+\|/, '')
217
+ |+-------+--------+-----------+
218
+ || Human | 23,524 | ATACGTCAG |
219
+ || Dog | 244.1 | AGCATGAC |
220
+ || Mouse | 234 | GACTG |
221
+ || Cat | 2,342 | AAATGCA |
222
+ |+-------+--------+-----------+
223
+ EXP
224
+ assert_equal(expected, result)
225
+ end
226
+
227
+ test 'BioDSL::Pipeline::WriteTable with V<num> keys are output OK' do
228
+ input, output = BioDSL::Stream.pipe
229
+
230
+ output.write(V1: 'Human', V2: 23_524, V0: 'ATACGTCAG')
231
+ output.write(V1: 'Dog', V2: 244.1, V0: 'AGCATGAC')
232
+ output.write(V1: 'Mouse', V2: 234, V0: 'GACTG')
233
+ output.write(V1: 'Cat', V2: 2_342, V0: 'AAATGCA')
234
+
235
+ output.close
236
+
237
+ p = BioDSL::Pipeline.new
238
+
239
+ result = capture_stdout { p.write_table.run(input: input) }
240
+ expected = <<-EXP.gsub(/^\s+\|/, '')
241
+ |ATACGTCAG\tHuman\t23524
242
+ |AGCATGAC\tDog\t244.1
243
+ |GACTG\tMouse\t234
244
+ |AAATGCA\tCat\t2342
245
+ EXP
246
+ assert_equal(expected, result)
247
+ end
248
+
249
+ test 'BioDSL::Pipeline::WriteTable to file outputs correctly' do
250
+ @p.write_table(output: @file).run(input: @input, output: @output2)
251
+ result = File.open(@file).read
252
+ expected = <<-EXP.gsub(/^\s+\|/, '')
253
+ |Human\t23524\tATACGTCAG
254
+ |Dog\t2442\tAGCATGAC
255
+ |Mouse\t234\tGACTG
256
+ |Cat\t2342\tAAATGCA
257
+ EXP
258
+ assert_equal(expected, result)
259
+ end
260
+
261
+ test 'BioDSL::Pipeline::WriteTable to file with :first outputs OK' do
262
+ @p.write_table(output: @file, first: 1).run(input: @input, output: @output2)
263
+ result = File.open(@file).read
264
+ expected = "Human\t23524\tATACGTCAG\n"
265
+ assert_equal(expected, result)
266
+ end
267
+
268
+ test 'BioDSL::Pipeline::WriteTable to file with :last outputs correctly' do
269
+ @p.write_table(output: @file, last: 1).run(input: @input, output: @output2)
270
+ result = File.open(@file).read
271
+ expected = "Cat\t2342\tAAATGCA\n"
272
+ assert_equal(expected, result)
273
+ end
274
+
275
+ test 'BioDSL::Pipeline::WriteTable to file with :pretty outputs OK' do
276
+ @p.write_table(output: @file, pretty: true, header: true, commify: true).
277
+ run(input: @input, output: @output2)
278
+
279
+ result = File.open(@file).read
280
+ expected = <<-EXP.gsub(/^\s+\|/, '')
281
+ |+----------+--------+-----------+
282
+ || ORGANISM | COUNT | SEQ |
283
+ |+----------+--------+-----------+
284
+ || Human | 23,524 | ATACGTCAG |
285
+ || Dog | 2,442 | AGCATGAC |
286
+ || Mouse | 234 | GACTG |
287
+ || Cat | 2,342 | AAATGCA |
288
+ |+----------+--------+-----------+
289
+ EXP
290
+ assert_equal(expected, result)
291
+ end
292
+
293
+ test 'BioDSL::Pipeline::WriteTable to file with :pretty and :first ' \
294
+ 'outputs correctly' do
295
+ @p.write_table(output: @file, pretty: true, header: true,
296
+ commify: true, first: 1).run(input: @input, output: @output2)
297
+
298
+ result = File.open(@file).read
299
+ expected = <<-EXP.gsub(/^\s+\|/, '')
300
+ |+----------+--------+-----------+
301
+ || ORGANISM | COUNT | SEQ |
302
+ |+----------+--------+-----------+
303
+ || Human | 23,524 | ATACGTCAG |
304
+ |+----------+--------+-----------+
305
+ EXP
306
+ assert_equal(expected, result)
307
+ end
308
+
309
+ test 'BioDSL::Pipeline::WriteTable to file with :pretty and :last ' \
310
+ 'outputs correctly' do
311
+ @p.write_table(output: @file, pretty: true, header: true,
312
+ commify: true, last: 1).run(input: @input, output: @output2)
313
+
314
+ result = File.open(@file).read
315
+ expected = <<-EXP.gsub(/^\s+\|/, '')
316
+ |+----------+-------+---------+
317
+ || ORGANISM | COUNT | SEQ |
318
+ |+----------+-------+---------+
319
+ || Cat | 2,342 | AAATGCA |
320
+ |+----------+-------+---------+
321
+ EXP
322
+ assert_equal(expected, result)
323
+ end
324
+
325
+ test 'BioDSL::Pipeline::WriteTable to existing file raises' do
326
+ `touch #{@file}`
327
+ assert_raise(BioDSL::OptionError) { @p.write_table(output: @file) }
328
+ end
329
+
330
+ test 'BioDSL::Pipeline::WriteTable to existing file w. :force outputs ' \
331
+ 'OK' do
332
+ `touch #{@file}`
333
+ @p.write_table(output: @file, force: true).run(input: @input)
334
+ result = File.open(@file).read
335
+ expected = <<-EXP.gsub(/^\s+\|/, '')
336
+ |Human\t23524\tATACGTCAG
337
+ |Dog\t2442\tAGCATGAC
338
+ |Mouse\t234\tGACTG
339
+ |Cat\t2342\tAAATGCA
340
+ EXP
341
+ assert_equal(expected, result)
342
+ end
343
+
344
+ test 'BioDSL::Pipeline::WriteTable with gzipped data and no output ' \
345
+ ' file raises' do
346
+ assert_raise(BioDSL::OptionError) { @p.write_table(gzip: true) }
347
+ end
348
+
349
+ test 'BioDSL::Pipeline::WriteTable with bzip2ed data and no output ' \
350
+ 'file raises' do
351
+ assert_raise(BioDSL::OptionError) { @p.write_table(bzip2: true) }
352
+ end
353
+
354
+ test 'BioDSL::Pipeline::WriteTable to file outputs gzipped data OK' do
355
+ @p.write_table(output: @file, gzip: true).run(input: @input)
356
+ result = `#{@zcat} #{@file}`
357
+
358
+ expected = <<-EXP.gsub(/^\s+\|/, '')
359
+ |Human\t23524\tATACGTCAG
360
+ |Dog\t2442\tAGCATGAC
361
+ |Mouse\t234\tGACTG
362
+ |Cat\t2342\tAAATGCA
363
+ EXP
364
+
365
+ assert_equal(expected, result)
366
+ end
367
+
368
+ test 'BioDSL::Pipeline::WriteTable to file outputs bzip2ed data OK' do
369
+ @p.write_table(output: @file, bzip2: true).run(input: @input)
370
+ result = `bzcat #{@file}`
371
+
372
+ expected = <<-EXP.gsub(/^\s+\|/, '')
373
+ |Human\t23524\tATACGTCAG
374
+ |Dog\t2442\tAGCATGAC
375
+ |Mouse\t234\tGACTG
376
+ |Cat\t2342\tAAATGCA
377
+ EXP
378
+
379
+ assert_equal(expected, result)
380
+ end
381
+
382
+ test 'BioDSL::Pipeline::WriteTable with both gzip and bzip2 output ' \
383
+ 'raises' do
384
+ assert_raise(BioDSL::OptionError) do
385
+ @p.write_table(output: @file, gzip: true, bzip2: true)
386
+ end
387
+ end
388
+
389
+ test 'BioDSL::Pipeline::WriteTable with flux outputs correctly' do
390
+ @p.write_table(output: @file).run(input: @input, output: @output2)
391
+ result = File.open(@file).read
392
+
393
+ expected = <<-EXP.gsub(/^\s+\|/, '')
394
+ |Human\t23524\tATACGTCAG
395
+ |Dog\t2442\tAGCATGAC
396
+ |Mouse\t234\tGACTG
397
+ |Cat\t2342\tAAATGCA
398
+ EXP
399
+
400
+ assert_equal(expected, result)
401
+
402
+ expected = <<-EXP.gsub(/^\s+\|/, '')
403
+ |{:ORGANISM=>"Human", :COUNT=>23524, :SEQ=>"ATACGTCAG"}
404
+ |{:ORGANISM=>"Dog", :COUNT=>2442, :SEQ=>"AGCATGAC"}
405
+ |{:ORGANISM=>"Mouse", :COUNT=>234, :SEQ=>"GACTG"}
406
+ |{:ORGANISM=>"Cat", :COUNT=>2342, :SEQ=>"AAATGCA"}
407
+ EXP
408
+
409
+ assert_equal(expected, collect_result)
410
+ end
411
+ end