BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
@@ -0,0 +1,187 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..')
|
3
|
+
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
5
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
6
|
+
# #
|
7
|
+
# This program is free software; you can redistribute it and/or #
|
8
|
+
# modify it under the terms of the GNU General Public License #
|
9
|
+
# as published by the Free Software Foundation; either version 2 #
|
10
|
+
# of the License, or (at your option) any later version. #
|
11
|
+
# #
|
12
|
+
# This program is distributed in the hope that it will be useful, #
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
15
|
+
# GNU General Public License for more details. #
|
16
|
+
# #
|
17
|
+
# You should have received a copy of the GNU General Public License #
|
18
|
+
# along with this program; if not, write to the Free Software #
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
20
|
+
# USA. #
|
21
|
+
# #
|
22
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
23
|
+
# #
|
24
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
25
|
+
# #
|
26
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
27
|
+
# #
|
28
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
29
|
+
|
30
|
+
require 'test/helper'
|
31
|
+
|
32
|
+
# rubocop: disable ClassLength
|
33
|
+
|
34
|
+
# Test class for Pipeline.
|
35
|
+
class PipelineTest < Test::Unit::TestCase
|
36
|
+
require 'yaml'
|
37
|
+
|
38
|
+
def setup
|
39
|
+
@tmpdir = Dir.mktmpdir('BioDSL')
|
40
|
+
|
41
|
+
setup_fasta_files
|
42
|
+
|
43
|
+
Mail.defaults do
|
44
|
+
delivery_method :test
|
45
|
+
end
|
46
|
+
|
47
|
+
@p = BP.new
|
48
|
+
end
|
49
|
+
|
50
|
+
def setup_fasta_files
|
51
|
+
@fasta_file = File.join(@tmpdir, 'test.fna')
|
52
|
+
@fasta_file2 = File.join(@tmpdir, 'test2.fna')
|
53
|
+
|
54
|
+
File.open(@fasta_file, 'w') do |ios|
|
55
|
+
ios.puts <<-DATA.gsub(/^\s+\|/, '')
|
56
|
+
|>test1
|
57
|
+
|atcg
|
58
|
+
|>test2
|
59
|
+
|tgac
|
60
|
+
DATA
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def teardown
|
65
|
+
FileUtils.rm_r @tmpdir
|
66
|
+
|
67
|
+
Mail::TestMailer.deliveries.clear
|
68
|
+
end
|
69
|
+
|
70
|
+
test 'BioDSL::Pipeline#to_s w/o options and w/o .run() returns OK' do
|
71
|
+
@p.commands << BioDSL::Command.new('dump', nil, {})
|
72
|
+
expected = %(BP.new.dump)
|
73
|
+
assert_equal(expected, @p.to_s)
|
74
|
+
end
|
75
|
+
|
76
|
+
test 'BioDSL::Pipeline#to_s with options and w/o .run() returns OK' do
|
77
|
+
@p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
|
78
|
+
expected = %(BP.new.read_fasta(input: "test.fna"))
|
79
|
+
assert_equal(expected, @p.to_s)
|
80
|
+
end
|
81
|
+
|
82
|
+
test 'BioDSL::Pipeline#to_s w/o options and .run() returns OK' do
|
83
|
+
@p.commands << BioDSL::Command.new('dump', nil, {})
|
84
|
+
@p.complete = true
|
85
|
+
expected = %(BP.new.dump.run)
|
86
|
+
assert_equal(expected, @p.run.to_s)
|
87
|
+
end
|
88
|
+
|
89
|
+
test 'BioDSL::Pipeline#to_s with options and .run() returns OK' do
|
90
|
+
@p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
|
91
|
+
@p.complete = true
|
92
|
+
expected = %{BP.new.read_fasta(input: "test.fna").run}
|
93
|
+
assert_equal(expected, @p.run.to_s)
|
94
|
+
end
|
95
|
+
|
96
|
+
test 'BioDSL::Pipeline#run with no commands raises' do
|
97
|
+
assert_raise(BioDSL::PipelineError) { @p.run }
|
98
|
+
end
|
99
|
+
|
100
|
+
test 'BioDSL::Pipeline#size returns correctly' do
|
101
|
+
assert_equal(0, @p.size)
|
102
|
+
@p.dump
|
103
|
+
assert_equal(1, @p.size)
|
104
|
+
end
|
105
|
+
|
106
|
+
test 'BioDSL::Pipeline#+ with non-Pipeline object raises' do
|
107
|
+
assert_raise(BioDSL::PipelineError) { @p + 'foo' }
|
108
|
+
end
|
109
|
+
|
110
|
+
test 'BioDSL::Pipeline#+ with Pipeline object dont raise' do
|
111
|
+
assert_nothing_raised { @p + @p }
|
112
|
+
end
|
113
|
+
|
114
|
+
test 'BioDSL::Pipeline#+ of two Pipelines return correctly' do
|
115
|
+
p = BioDSL::Pipeline.new.dump(first: 2)
|
116
|
+
assert_equal('BP.new.dump(first: 2)', (@p + p).to_s)
|
117
|
+
end
|
118
|
+
|
119
|
+
test 'BioDSL::Pipeline#+ of three Pipelines return correctly' do
|
120
|
+
p1 = BioDSL::Pipeline.new.dump(first: 2)
|
121
|
+
p2 = BioDSL::Pipeline.new.dump(last: 3)
|
122
|
+
assert_equal('BP.new.dump(first: 2).dump(last: 3)', (@p + p1 + p2).to_s)
|
123
|
+
end
|
124
|
+
|
125
|
+
test 'BioDSL::Pipeline#pop decreases size' do
|
126
|
+
@p.dump
|
127
|
+
assert_equal(1, @p.size)
|
128
|
+
@p.pop
|
129
|
+
assert_equal(0, @p.size)
|
130
|
+
@p.pop
|
131
|
+
assert_equal(0, @p.size)
|
132
|
+
end
|
133
|
+
|
134
|
+
test 'BioDSL::Pipeline#pop returns correctly' do
|
135
|
+
@p.dump
|
136
|
+
assert_equal(BioDSL::Pipeline.new.dump.to_s, @p.pop.to_s)
|
137
|
+
assert_equal(BioDSL::Pipeline.new.to_s, @p.to_s)
|
138
|
+
end
|
139
|
+
|
140
|
+
test 'BioDSL::Pipeline#status without .run() returns correctly' do
|
141
|
+
status = @p.read_fasta(input: __FILE__).status
|
142
|
+
assert_equal({}, status.first)
|
143
|
+
end
|
144
|
+
|
145
|
+
test 'BioDSL::Pipeline#status with .run() returns correctly' do
|
146
|
+
expected = %{BioDSL::Pipeline.new.read_fasta(input: "#{@fasta_file}")}
|
147
|
+
@p.expects(:status).returns(expected)
|
148
|
+
assert_equal(expected, @p.read_fasta(input: @fasta_file).run.status)
|
149
|
+
end
|
150
|
+
|
151
|
+
test 'BioDSL::Pipeline#run with disallowed option raises' do
|
152
|
+
assert_raise(BioDSL::OptionError) do
|
153
|
+
@p.read_fasta(input: @fasta_file).run(foo: 'bar')
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
test 'BioDSL::Pipeline#run returns correctly' do
|
158
|
+
@p.read_fasta(input: @fasta_file).write_fasta(output: @fasta_file2).run
|
159
|
+
|
160
|
+
expected = File.read(@fasta_file)
|
161
|
+
result = File.read(@fasta_file2)
|
162
|
+
|
163
|
+
assert_equal(expected, result)
|
164
|
+
end
|
165
|
+
|
166
|
+
test 'BioDSL::Pipeline#run with subject but no email raises' do
|
167
|
+
assert_raise(BioDSL::OptionError) do
|
168
|
+
@p.read_fasta(input: @fasta_file).run(subject: 'foobar')
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
test 'BioDSL::Pipeline#run with email sends mail correctly' do
|
173
|
+
omit
|
174
|
+
@p.read_fasta(input: @fasta_file).run(email: 'test@foobar.com')
|
175
|
+
assert_equal(1, Mail::TestMailer.deliveries.length)
|
176
|
+
assert_equal(@p.to_s, Mail::TestMailer.deliveries.first.subject)
|
177
|
+
end
|
178
|
+
|
179
|
+
test 'BioDSL::Pipeline#run with email and subject sends correctly' do
|
180
|
+
omit
|
181
|
+
@p.read_fasta(input: @fasta_file).
|
182
|
+
run(email: 'test@foobar.com', subject: 'foobar')
|
183
|
+
|
184
|
+
assert_equal(1, Mail::TestMailer.deliveries.length)
|
185
|
+
assert_equal('foobar', Mail::TestMailer.deliveries.first.subject)
|
186
|
+
end
|
187
|
+
end
|