BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# rubocop: disable ClassLength
|
|
34
|
+
|
|
35
|
+
# Test class for WriteFastq.
|
|
36
|
+
class TestWriteFastq < Test::Unit::TestCase
|
|
37
|
+
def setup
|
|
38
|
+
@zcat = BioDSL::Filesys.which('gzcat') ||
|
|
39
|
+
BioDSL::Filesys.which('zcat')
|
|
40
|
+
|
|
41
|
+
@tmpdir = Dir.mktmpdir('BioDSL')
|
|
42
|
+
@file = File.join(@tmpdir, 'test.fq')
|
|
43
|
+
@file2 = File.join(@tmpdir, 'test.fq')
|
|
44
|
+
|
|
45
|
+
setup_data
|
|
46
|
+
|
|
47
|
+
@p = BioDSL::Pipeline.new
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def setup_data
|
|
51
|
+
@input, @output = BioDSL::Stream.pipe
|
|
52
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
53
|
+
|
|
54
|
+
@output.write(SEQ_NAME: 'test1', SEQ: 'atcg', SEQ_LEN: 4, SCORES: '!!II')
|
|
55
|
+
@output.write(SEQ_NAME: 'test2', SEQ: 'gtac', SEQ_LEN: 4, SCORES: '!!II')
|
|
56
|
+
@output.close
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def teardown
|
|
60
|
+
FileUtils.rm_r @tmpdir
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
test 'BioDSL::Pipeline::WriteFastq with invalid options raises' do
|
|
64
|
+
assert_raise(BioDSL::OptionError) { @p.write_fastq(foo: 'bar') }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
test 'BioDSL::Pipeline::WriteFastq with invalid encoding raises' do
|
|
68
|
+
assert_raise(BioDSL::OptionError) { @p.write_fastq(encoding: 'foo') }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
test 'BioDSL::Pipeline::WriteFastq with valid encoding dont raise' do
|
|
72
|
+
assert_nothing_raised { @p.write_fastq(encoding: :base_33) }
|
|
73
|
+
assert_nothing_raised { @p.write_fastq(encoding: :base_64) }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
test 'BioDSL::Pipeline::WriteFastq to stdout outputs correctly' do
|
|
77
|
+
result = capture_stdout { @p.write_fastq.run(input: @input) }
|
|
78
|
+
expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
|
|
79
|
+
assert_equal(expected, result)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
test 'BioDSL::Pipeline::WriteFastq status outputs correctly' do
|
|
83
|
+
capture_stdout { @p.write_fastq.run(input: @input) }
|
|
84
|
+
assert_equal(2, @p.status.first[:records_in])
|
|
85
|
+
assert_equal(2, @p.status.first[:records_out])
|
|
86
|
+
assert_equal(2, @p.status.first[:sequences_in])
|
|
87
|
+
assert_equal(2, @p.status.first[:sequences_out])
|
|
88
|
+
assert_equal(8, @p.status.first[:residues_in])
|
|
89
|
+
assert_equal(8, @p.status.first[:residues_out])
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
test 'BioDSL::Pipeline::WriteFastq to stdout with base 64 encoding ' \
|
|
93
|
+
'outputs correctly' do
|
|
94
|
+
result = capture_stdout do
|
|
95
|
+
@p.write_fastq(encoding: :base_64).run(input: @input)
|
|
96
|
+
end
|
|
97
|
+
expected = "@test1\natcg\n+\n@@hh\n@test2\ngtac\n+\n@@hh\n"
|
|
98
|
+
assert_equal(expected, result)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
test 'BioDSL::Pipeline::WriteFastq to file outputs correctly' do
|
|
102
|
+
@p.write_fastq(output: @file).run(input: @input, output: @output2)
|
|
103
|
+
result = File.open(@file).read
|
|
104
|
+
expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
|
|
105
|
+
assert_equal(expected, result)
|
|
106
|
+
assert_equal(expected, result)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
test 'BioDSL::Pipeline::WriteFastq to existing file raises' do
|
|
110
|
+
`touch #{@file}`
|
|
111
|
+
assert_raise(BioDSL::OptionError) { @p.write_fastq(output: @file) }
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
test 'BioDSL::Pipeline::WriteFastq to existing file with :force ' \
|
|
115
|
+
'outputs OK' do
|
|
116
|
+
`touch #{@file}`
|
|
117
|
+
@p.write_fastq(output: @file, force: true).run(input: @input)
|
|
118
|
+
result = File.open(@file).read
|
|
119
|
+
expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
|
|
120
|
+
assert_equal(expected, result)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
test 'BioDSL::Pipeline::WriteFastq with gzipped data and no output ' \
|
|
124
|
+
'file raises' do
|
|
125
|
+
assert_raise(BioDSL::OptionError) { @p.write_fastq(gzip: true) }
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
test 'BioDSL::Pipeline::WriteFastq w. bzip2ed data and no ' \
|
|
129
|
+
'output file raises' do
|
|
130
|
+
assert_raise(BioDSL::OptionError) { @p.write_fastq(bzip2: true) }
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
test 'BioDSL::Pipeline::WriteFastq to file outputs gzipped data OK' do
|
|
134
|
+
@p.write_fastq(output: @file, gzip: true).run(input: @input)
|
|
135
|
+
result = `#{@zcat} #{@file}`
|
|
136
|
+
expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
|
|
137
|
+
assert_equal(expected, result)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
test 'BioDSL::Pipeline::WriteFastq to file outputs bzip2ed data OK' do
|
|
141
|
+
@p.write_fastq(output: @file, bzip2: true).run(input: @input)
|
|
142
|
+
result = `bzcat #{@file}`
|
|
143
|
+
expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
|
|
144
|
+
assert_equal(expected, result)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
test 'BioDSL::Pipeline::WriteFastq w. both gzip and bzip2 output raises' do
|
|
148
|
+
assert_raise(BioDSL::OptionError) do
|
|
149
|
+
@p.write_fastq(output: @file, gzip: true, bzip2: true)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
test 'BioDSL::Pipeline::WriteFastq with flux outputs correctly' do
|
|
154
|
+
@p.write_fastq(output: @file).run(input: @input, output: @output2)
|
|
155
|
+
result = File.open(@file).read
|
|
156
|
+
expected = "@test1\natcg\n+\n!!II\n@test2\ngtac\n+\n!!II\n"
|
|
157
|
+
assert_equal(expected, result)
|
|
158
|
+
|
|
159
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
160
|
+
|{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4, :SCORES=>"!!II"}
|
|
161
|
+
|{:SEQ_NAME=>"test2", :SEQ=>"gtac", :SEQ_LEN=>4, :SCORES=>"!!II"}
|
|
162
|
+
EXP
|
|
163
|
+
|
|
164
|
+
assert_equal(expected, collect_result)
|
|
165
|
+
end
|
|
166
|
+
end
|
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# rubocop: disable ClassLength
|
|
34
|
+
|
|
35
|
+
# Test class for WriteTable.
|
|
36
|
+
class TestWriteTable < Test::Unit::TestCase
|
|
37
|
+
def setup
|
|
38
|
+
@zcat = BioDSL::Filesys.which('gzcat') ||
|
|
39
|
+
BioDSL::Filesys.which('zcat')
|
|
40
|
+
|
|
41
|
+
@tmpdir = Dir.mktmpdir('BioDSL')
|
|
42
|
+
@file = File.join(@tmpdir, 'test.fna')
|
|
43
|
+
@file2 = File.join(@tmpdir, 'test.fna')
|
|
44
|
+
|
|
45
|
+
setup_data
|
|
46
|
+
|
|
47
|
+
@p = BioDSL::Pipeline.new
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def setup_data
|
|
51
|
+
@input, @output = BioDSL::Stream.pipe
|
|
52
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
53
|
+
|
|
54
|
+
@output.write(ORGANISM: 'Human', COUNT: 23_524, SEQ: 'ATACGTCAG')
|
|
55
|
+
@output.write(ORGANISM: 'Dog', COUNT: 2442, SEQ: 'AGCATGAC')
|
|
56
|
+
@output.write(ORGANISM: 'Mouse', COUNT: 234, SEQ: 'GACTG')
|
|
57
|
+
@output.write(ORGANISM: 'Cat', COUNT: 2_342, SEQ: 'AAATGCA')
|
|
58
|
+
|
|
59
|
+
@output.close
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def teardown
|
|
63
|
+
FileUtils.rm_r @tmpdir
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
test 'BioDSL::Pipeline::WriteTable with invalid options raises' do
|
|
67
|
+
assert_raise(BioDSL::OptionError) { @p.write_table(foo: 'bar') }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
test 'BioDSL::Pipeline::WriteTable with valid options dont raise' do
|
|
71
|
+
assert_nothing_raised { @p.write_table(keys: [:SEQ]) }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
test 'BioDSL::Pipeline::WriteTable to stdout outputs correctly' do
|
|
75
|
+
result = capture_stdout { @p.write_table.run(input: @input) }
|
|
76
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
77
|
+
|Human\t23524\tATACGTCAG
|
|
78
|
+
|Dog\t2442\tAGCATGAC
|
|
79
|
+
|Mouse\t234\tGACTG
|
|
80
|
+
|Cat\t2342\tAAATGCA
|
|
81
|
+
EXP
|
|
82
|
+
|
|
83
|
+
assert_equal(expected, result)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
test 'BioDSL::Pipeline::WriteTable status outputs correctly' do
|
|
87
|
+
capture_stdout { @p.write_table.run(input: @input) }
|
|
88
|
+
|
|
89
|
+
assert_equal(4, @p.status.first[:records_in])
|
|
90
|
+
assert_equal(4, @p.status.first[:records_out])
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
test 'BioDSL::Pipeline::WriteTable with :keys outputs correctly' do
|
|
94
|
+
result = capture_stdout do
|
|
95
|
+
@p.write_table(keys: [:SEQ, 'COUNT']).run(input: @input)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
99
|
+
|ATACGTCAG\t23524
|
|
100
|
+
|AGCATGAC\t2442
|
|
101
|
+
|GACTG\t234
|
|
102
|
+
|AAATGCA\t2342
|
|
103
|
+
EXP
|
|
104
|
+
assert_equal(expected, result)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
test 'BioDSL::Pipeline::WriteTable with :skip outputs correctly' do
|
|
108
|
+
result = capture_stdout do
|
|
109
|
+
@p.write_table(skip: [:SEQ, 'COUNT']).run(input: @input)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
expected = "Human\nDog\nMouse\nCat\n"
|
|
113
|
+
assert_equal(expected, result)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
test 'BioDSL::Pipeline::WriteTable with :header outputs correctly' do
|
|
117
|
+
result = capture_stdout { @p.write_table(header: true).run(input: @input) }
|
|
118
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
119
|
+
|#ORGANISM\tCOUNT\tSEQ
|
|
120
|
+
|Human\t23524\tATACGTCAG
|
|
121
|
+
|Dog\t2442\tAGCATGAC
|
|
122
|
+
|Mouse\t234\tGACTG
|
|
123
|
+
|Cat\t2342\tAAATGCA
|
|
124
|
+
EXP
|
|
125
|
+
assert_equal(expected, result)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
test 'BioDSL::Pipeline::WriteTable with :delimiter outputs correctly' do
|
|
129
|
+
result = capture_stdout do
|
|
130
|
+
@p.write_table(delimiter: ';').run(input: @input)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
134
|
+
|Human;23524;ATACGTCAG
|
|
135
|
+
|Dog;2442;AGCATGAC
|
|
136
|
+
|Mouse;234;GACTG
|
|
137
|
+
|Cat;2342;AAATGCA
|
|
138
|
+
EXP
|
|
139
|
+
assert_equal(expected, result)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
test 'BioDSL::Pipeline::WriteTable w. :delimiter and :pretty raises' do
|
|
143
|
+
assert_raise(BioDSL::OptionError) do
|
|
144
|
+
@p.write_table(delimiter: ';', pretty: true)
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
test 'BioDSL::Pipeline::WriteTable with :commify and :pretty raises' do
|
|
149
|
+
assert_raise(BioDSL::OptionError) { @p.write_table(commify: true) }
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
test 'BioDSL::Pipeline::WriteTable with :pretty outputs correctly' do
|
|
153
|
+
result = capture_stdout { @p.write_table(pretty: true).run(input: @input) }
|
|
154
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
155
|
+
|+-------+-------+-----------+
|
|
156
|
+
|| Human | 23524 | ATACGTCAG |
|
|
157
|
+
|| Dog | 2442 | AGCATGAC |
|
|
158
|
+
|| Mouse | 234 | GACTG |
|
|
159
|
+
|| Cat | 2342 | AAATGCA |
|
|
160
|
+
|+-------+-------+-----------+
|
|
161
|
+
EXP
|
|
162
|
+
|
|
163
|
+
assert_equal(expected, result)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
test 'BioDSL::Pipeline::WriteTable with :pretty and :header outputs OK' do
|
|
167
|
+
result = capture_stdout do
|
|
168
|
+
@p.write_table(pretty: true, header: true).run(input: @input)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
172
|
+
|+----------+-------+-----------+
|
|
173
|
+
|| ORGANISM | COUNT | SEQ |
|
|
174
|
+
|+----------+-------+-----------+
|
|
175
|
+
|| Human | 23524 | ATACGTCAG |
|
|
176
|
+
|| Dog | 2442 | AGCATGAC |
|
|
177
|
+
|| Mouse | 234 | GACTG |
|
|
178
|
+
|| Cat | 2342 | AAATGCA |
|
|
179
|
+
|+----------+-------+-----------+
|
|
180
|
+
EXP
|
|
181
|
+
assert_equal(expected, result)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
test 'BioDSL::Pipeline::WriteTable w. :pretty and :commify outputs OK' do
|
|
185
|
+
result = capture_stdout do
|
|
186
|
+
@p.write_table(pretty: true, commify: true).run(input: @input)
|
|
187
|
+
end
|
|
188
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
189
|
+
|+-------+--------+-----------+
|
|
190
|
+
|| Human | 23,524 | ATACGTCAG |
|
|
191
|
+
|| Dog | 2,442 | AGCATGAC |
|
|
192
|
+
|| Mouse | 234 | GACTG |
|
|
193
|
+
|| Cat | 2,342 | AAATGCA |
|
|
194
|
+
|+-------+--------+-----------+
|
|
195
|
+
EXP
|
|
196
|
+
assert_equal(expected, result)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
test 'BioDSL::Pipeline::WriteTable w. :pretty and :commify and floats ' \
|
|
200
|
+
'outputs correctly' do
|
|
201
|
+
input, output = BioDSL::Stream.pipe
|
|
202
|
+
|
|
203
|
+
output.write(ORGANISM: 'Human', COUNT: 23_524, SEQ: 'ATACGTCAG')
|
|
204
|
+
output.write(ORGANISM: 'Dog', COUNT: 244.1, SEQ: 'AGCATGAC')
|
|
205
|
+
output.write(ORGANISM: 'Mouse', COUNT: 234, SEQ: 'GACTG')
|
|
206
|
+
output.write(ORGANISM: 'Cat', COUNT: 2_342, SEQ: 'AAATGCA')
|
|
207
|
+
|
|
208
|
+
output.close
|
|
209
|
+
|
|
210
|
+
p = BioDSL::Pipeline.new
|
|
211
|
+
|
|
212
|
+
result = capture_stdout do
|
|
213
|
+
p.write_table(pretty: true, commify: true).run(input: input)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
217
|
+
|+-------+--------+-----------+
|
|
218
|
+
|| Human | 23,524 | ATACGTCAG |
|
|
219
|
+
|| Dog | 244.1 | AGCATGAC |
|
|
220
|
+
|| Mouse | 234 | GACTG |
|
|
221
|
+
|| Cat | 2,342 | AAATGCA |
|
|
222
|
+
|+-------+--------+-----------+
|
|
223
|
+
EXP
|
|
224
|
+
assert_equal(expected, result)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
test 'BioDSL::Pipeline::WriteTable with V<num> keys are output OK' do
|
|
228
|
+
input, output = BioDSL::Stream.pipe
|
|
229
|
+
|
|
230
|
+
output.write(V1: 'Human', V2: 23_524, V0: 'ATACGTCAG')
|
|
231
|
+
output.write(V1: 'Dog', V2: 244.1, V0: 'AGCATGAC')
|
|
232
|
+
output.write(V1: 'Mouse', V2: 234, V0: 'GACTG')
|
|
233
|
+
output.write(V1: 'Cat', V2: 2_342, V0: 'AAATGCA')
|
|
234
|
+
|
|
235
|
+
output.close
|
|
236
|
+
|
|
237
|
+
p = BioDSL::Pipeline.new
|
|
238
|
+
|
|
239
|
+
result = capture_stdout { p.write_table.run(input: input) }
|
|
240
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
241
|
+
|ATACGTCAG\tHuman\t23524
|
|
242
|
+
|AGCATGAC\tDog\t244.1
|
|
243
|
+
|GACTG\tMouse\t234
|
|
244
|
+
|AAATGCA\tCat\t2342
|
|
245
|
+
EXP
|
|
246
|
+
assert_equal(expected, result)
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
test 'BioDSL::Pipeline::WriteTable to file outputs correctly' do
|
|
250
|
+
@p.write_table(output: @file).run(input: @input, output: @output2)
|
|
251
|
+
result = File.open(@file).read
|
|
252
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
253
|
+
|Human\t23524\tATACGTCAG
|
|
254
|
+
|Dog\t2442\tAGCATGAC
|
|
255
|
+
|Mouse\t234\tGACTG
|
|
256
|
+
|Cat\t2342\tAAATGCA
|
|
257
|
+
EXP
|
|
258
|
+
assert_equal(expected, result)
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
test 'BioDSL::Pipeline::WriteTable to file with :first outputs OK' do
|
|
262
|
+
@p.write_table(output: @file, first: 1).run(input: @input, output: @output2)
|
|
263
|
+
result = File.open(@file).read
|
|
264
|
+
expected = "Human\t23524\tATACGTCAG\n"
|
|
265
|
+
assert_equal(expected, result)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
test 'BioDSL::Pipeline::WriteTable to file with :last outputs correctly' do
|
|
269
|
+
@p.write_table(output: @file, last: 1).run(input: @input, output: @output2)
|
|
270
|
+
result = File.open(@file).read
|
|
271
|
+
expected = "Cat\t2342\tAAATGCA\n"
|
|
272
|
+
assert_equal(expected, result)
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
test 'BioDSL::Pipeline::WriteTable to file with :pretty outputs OK' do
|
|
276
|
+
@p.write_table(output: @file, pretty: true, header: true, commify: true).
|
|
277
|
+
run(input: @input, output: @output2)
|
|
278
|
+
|
|
279
|
+
result = File.open(@file).read
|
|
280
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
281
|
+
|+----------+--------+-----------+
|
|
282
|
+
|| ORGANISM | COUNT | SEQ |
|
|
283
|
+
|+----------+--------+-----------+
|
|
284
|
+
|| Human | 23,524 | ATACGTCAG |
|
|
285
|
+
|| Dog | 2,442 | AGCATGAC |
|
|
286
|
+
|| Mouse | 234 | GACTG |
|
|
287
|
+
|| Cat | 2,342 | AAATGCA |
|
|
288
|
+
|+----------+--------+-----------+
|
|
289
|
+
EXP
|
|
290
|
+
assert_equal(expected, result)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
test 'BioDSL::Pipeline::WriteTable to file with :pretty and :first ' \
|
|
294
|
+
'outputs correctly' do
|
|
295
|
+
@p.write_table(output: @file, pretty: true, header: true,
|
|
296
|
+
commify: true, first: 1).run(input: @input, output: @output2)
|
|
297
|
+
|
|
298
|
+
result = File.open(@file).read
|
|
299
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
300
|
+
|+----------+--------+-----------+
|
|
301
|
+
|| ORGANISM | COUNT | SEQ |
|
|
302
|
+
|+----------+--------+-----------+
|
|
303
|
+
|| Human | 23,524 | ATACGTCAG |
|
|
304
|
+
|+----------+--------+-----------+
|
|
305
|
+
EXP
|
|
306
|
+
assert_equal(expected, result)
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
test 'BioDSL::Pipeline::WriteTable to file with :pretty and :last ' \
|
|
310
|
+
'outputs correctly' do
|
|
311
|
+
@p.write_table(output: @file, pretty: true, header: true,
|
|
312
|
+
commify: true, last: 1).run(input: @input, output: @output2)
|
|
313
|
+
|
|
314
|
+
result = File.open(@file).read
|
|
315
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
316
|
+
|+----------+-------+---------+
|
|
317
|
+
|| ORGANISM | COUNT | SEQ |
|
|
318
|
+
|+----------+-------+---------+
|
|
319
|
+
|| Cat | 2,342 | AAATGCA |
|
|
320
|
+
|+----------+-------+---------+
|
|
321
|
+
EXP
|
|
322
|
+
assert_equal(expected, result)
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
test 'BioDSL::Pipeline::WriteTable to existing file raises' do
|
|
326
|
+
`touch #{@file}`
|
|
327
|
+
assert_raise(BioDSL::OptionError) { @p.write_table(output: @file) }
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
test 'BioDSL::Pipeline::WriteTable to existing file w. :force outputs ' \
|
|
331
|
+
'OK' do
|
|
332
|
+
`touch #{@file}`
|
|
333
|
+
@p.write_table(output: @file, force: true).run(input: @input)
|
|
334
|
+
result = File.open(@file).read
|
|
335
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
336
|
+
|Human\t23524\tATACGTCAG
|
|
337
|
+
|Dog\t2442\tAGCATGAC
|
|
338
|
+
|Mouse\t234\tGACTG
|
|
339
|
+
|Cat\t2342\tAAATGCA
|
|
340
|
+
EXP
|
|
341
|
+
assert_equal(expected, result)
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
test 'BioDSL::Pipeline::WriteTable with gzipped data and no output ' \
|
|
345
|
+
' file raises' do
|
|
346
|
+
assert_raise(BioDSL::OptionError) { @p.write_table(gzip: true) }
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
test 'BioDSL::Pipeline::WriteTable with bzip2ed data and no output ' \
|
|
350
|
+
'file raises' do
|
|
351
|
+
assert_raise(BioDSL::OptionError) { @p.write_table(bzip2: true) }
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
test 'BioDSL::Pipeline::WriteTable to file outputs gzipped data OK' do
|
|
355
|
+
@p.write_table(output: @file, gzip: true).run(input: @input)
|
|
356
|
+
result = `#{@zcat} #{@file}`
|
|
357
|
+
|
|
358
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
359
|
+
|Human\t23524\tATACGTCAG
|
|
360
|
+
|Dog\t2442\tAGCATGAC
|
|
361
|
+
|Mouse\t234\tGACTG
|
|
362
|
+
|Cat\t2342\tAAATGCA
|
|
363
|
+
EXP
|
|
364
|
+
|
|
365
|
+
assert_equal(expected, result)
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
test 'BioDSL::Pipeline::WriteTable to file outputs bzip2ed data OK' do
|
|
369
|
+
@p.write_table(output: @file, bzip2: true).run(input: @input)
|
|
370
|
+
result = `bzcat #{@file}`
|
|
371
|
+
|
|
372
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
373
|
+
|Human\t23524\tATACGTCAG
|
|
374
|
+
|Dog\t2442\tAGCATGAC
|
|
375
|
+
|Mouse\t234\tGACTG
|
|
376
|
+
|Cat\t2342\tAAATGCA
|
|
377
|
+
EXP
|
|
378
|
+
|
|
379
|
+
assert_equal(expected, result)
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
test 'BioDSL::Pipeline::WriteTable with both gzip and bzip2 output ' \
|
|
383
|
+
'raises' do
|
|
384
|
+
assert_raise(BioDSL::OptionError) do
|
|
385
|
+
@p.write_table(output: @file, gzip: true, bzip2: true)
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
test 'BioDSL::Pipeline::WriteTable with flux outputs correctly' do
|
|
390
|
+
@p.write_table(output: @file).run(input: @input, output: @output2)
|
|
391
|
+
result = File.open(@file).read
|
|
392
|
+
|
|
393
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
394
|
+
|Human\t23524\tATACGTCAG
|
|
395
|
+
|Dog\t2442\tAGCATGAC
|
|
396
|
+
|Mouse\t234\tGACTG
|
|
397
|
+
|Cat\t2342\tAAATGCA
|
|
398
|
+
EXP
|
|
399
|
+
|
|
400
|
+
assert_equal(expected, result)
|
|
401
|
+
|
|
402
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
403
|
+
|{:ORGANISM=>"Human", :COUNT=>23524, :SEQ=>"ATACGTCAG"}
|
|
404
|
+
|{:ORGANISM=>"Dog", :COUNT=>2442, :SEQ=>"AGCATGAC"}
|
|
405
|
+
|{:ORGANISM=>"Mouse", :COUNT=>234, :SEQ=>"GACTG"}
|
|
406
|
+
|{:ORGANISM=>"Cat", :COUNT=>2342, :SEQ=>"AAATGCA"}
|
|
407
|
+
EXP
|
|
408
|
+
|
|
409
|
+
assert_equal(expected, collect_result)
|
|
410
|
+
end
|
|
411
|
+
end
|