BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for TrimPrimer.
|
|
34
|
+
#
|
|
35
|
+
# rubocop: disable ClassLength
|
|
36
|
+
class TestTrimPrimer < Test::Unit::TestCase
|
|
37
|
+
def setup
|
|
38
|
+
@input, @output = BioDSL::Stream.pipe
|
|
39
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
40
|
+
|
|
41
|
+
@p = BioDSL::Pipeline.new
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
test 'BioDSL::Pipeline::TrimPrimer with invalid options raises' do
|
|
45
|
+
assert_raise(BioDSL::OptionError) { @p.trim_primer(foo: 'bar') }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
test 'BioDSL::Pipeline::TrimPrimer with valid options dont raise' do
|
|
49
|
+
assert_nothing_raised do
|
|
50
|
+
@p.trim_primer(primer: 'atcg', direction: :forward)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
test 'BioDSL::Pipeline::TrimPrimer with forward and pattern longer than ' \
|
|
55
|
+
'sequence returns correctly' do
|
|
56
|
+
@output.write(SEQ: 'TATG')
|
|
57
|
+
@output.close
|
|
58
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :forward, overlap_min: 1).
|
|
59
|
+
run(input: @input, output: @output2)
|
|
60
|
+
|
|
61
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
62
|
+
|{:SEQ=>"",
|
|
63
|
+
| :SEQ_LEN=>0,
|
|
64
|
+
| :TRIM_PRIMER_DIR=>"FORWARD",
|
|
65
|
+
| :TRIM_PRIMER_POS=>0,
|
|
66
|
+
| :TRIM_PRIMER_LEN=>4,
|
|
67
|
+
| :TRIM_PRIMER_PAT=>"TATG"}
|
|
68
|
+
EXP
|
|
69
|
+
|
|
70
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
test 'BioDSL::Pipeline::TrimPrimer with reverse and pattern longer than ' \
|
|
74
|
+
'sequence returns correctly' do
|
|
75
|
+
@output.write(SEQ: 'TCGT')
|
|
76
|
+
@output.close
|
|
77
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :reverse, overlap_min: 1).
|
|
78
|
+
run(input: @input, output: @output2)
|
|
79
|
+
|
|
80
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
81
|
+
|{:SEQ=>"",
|
|
82
|
+
| :SEQ_LEN=>0,
|
|
83
|
+
| :TRIM_PRIMER_DIR=>"REVERSE",
|
|
84
|
+
| :TRIM_PRIMER_POS=>0,
|
|
85
|
+
| :TRIM_PRIMER_LEN=>4,
|
|
86
|
+
| :TRIM_PRIMER_PAT=>"TCGT"}
|
|
87
|
+
EXP
|
|
88
|
+
|
|
89
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
test 'BioDSL::Pipeline::TrimPrimer with forward and internal match ' \
|
|
93
|
+
'returns correctly' do
|
|
94
|
+
@output.write(SEQ: 'aTCGTATGactgactgatcgca')
|
|
95
|
+
@output.close
|
|
96
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :forward).
|
|
97
|
+
run(input: @input, output: @output2)
|
|
98
|
+
|
|
99
|
+
expected = '{:SEQ=>"aTCGTATGactgactgatcgca"}'
|
|
100
|
+
|
|
101
|
+
assert_equal(expected, collect_result.chomp)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
test 'BioDSL::Pipeline::TrimPrimer with reverse and internal match ' \
|
|
105
|
+
'returns correctly' do
|
|
106
|
+
@output.write(SEQ: 'ctgactgatcgcaaTCGTATGa')
|
|
107
|
+
@output.close
|
|
108
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :reverse).
|
|
109
|
+
run(input: @input, output: @output2)
|
|
110
|
+
|
|
111
|
+
expected = '{:SEQ=>"ctgactgatcgcaaTCGTATGa"}'
|
|
112
|
+
|
|
113
|
+
assert_equal(expected, collect_result.chomp)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
test 'BioDSL::Pipeline::TrimPrimer w. forward and full match returns OK' do
|
|
117
|
+
@output.write(SEQ: 'TCGTATGactgactgatcgca')
|
|
118
|
+
@output.close
|
|
119
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :forward).
|
|
120
|
+
run(input: @input, output: @output2)
|
|
121
|
+
|
|
122
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
123
|
+
|{:SEQ=>"actgactgatcgca",
|
|
124
|
+
| :SEQ_LEN=>14,
|
|
125
|
+
| :TRIM_PRIMER_DIR=>"FORWARD",
|
|
126
|
+
| :TRIM_PRIMER_POS=>0,
|
|
127
|
+
| :TRIM_PRIMER_LEN=>7,
|
|
128
|
+
| :TRIM_PRIMER_PAT=>"TCGTATG"}
|
|
129
|
+
EXP
|
|
130
|
+
|
|
131
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
test 'BioDSL::Pipeline::TrimPrimer w. reverse and full match returns OK' do
|
|
135
|
+
@output.write(SEQ: 'ctgactgatcgcaaTCGTATG')
|
|
136
|
+
@output.close
|
|
137
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :reverse).
|
|
138
|
+
run(input: @input, output: @output2)
|
|
139
|
+
|
|
140
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
141
|
+
|{:SEQ=>"ctgactgatcgcaa",
|
|
142
|
+
| :SEQ_LEN=>14,
|
|
143
|
+
| :TRIM_PRIMER_DIR=>"REVERSE",
|
|
144
|
+
| :TRIM_PRIMER_POS=>14,
|
|
145
|
+
| :TRIM_PRIMER_LEN=>7,
|
|
146
|
+
| :TRIM_PRIMER_PAT=>"TCGTATG"}
|
|
147
|
+
EXP
|
|
148
|
+
|
|
149
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
test 'BioDSL::Pipeline::TrimPrimer w. forward and partial match returns ' \
|
|
153
|
+
'correctly' do
|
|
154
|
+
@output.write(SEQ: 'TATGactgactgatcgca')
|
|
155
|
+
@output.close
|
|
156
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :forward).
|
|
157
|
+
run(input: @input, output: @output2)
|
|
158
|
+
|
|
159
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
160
|
+
|{:SEQ=>"actgactgatcgca",
|
|
161
|
+
| :SEQ_LEN=>14,
|
|
162
|
+
| :TRIM_PRIMER_DIR=>"FORWARD",
|
|
163
|
+
| :TRIM_PRIMER_POS=>0,
|
|
164
|
+
| :TRIM_PRIMER_LEN=>4,
|
|
165
|
+
| :TRIM_PRIMER_PAT=>"TATG"}
|
|
166
|
+
EXP
|
|
167
|
+
|
|
168
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
test 'BioDSL::Pipeline::TrimPrimer with forward and partial match and ' \
|
|
172
|
+
'reverse_complment: true returns correctly' do
|
|
173
|
+
@output.write(SEQ: 'TATGactgactgatcgca')
|
|
174
|
+
@output.close
|
|
175
|
+
@p.trim_primer(primer: 'CATACGA', direction: :forward,
|
|
176
|
+
reverse_complement: true).
|
|
177
|
+
run(input: @input, output: @output2)
|
|
178
|
+
|
|
179
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
180
|
+
|{:SEQ=>"actgactgatcgca",
|
|
181
|
+
| :SEQ_LEN=>14,
|
|
182
|
+
| :TRIM_PRIMER_DIR=>"FORWARD",
|
|
183
|
+
| :TRIM_PRIMER_POS=>0,
|
|
184
|
+
| :TRIM_PRIMER_LEN=>4,
|
|
185
|
+
| :TRIM_PRIMER_PAT=>"TATG"}
|
|
186
|
+
EXP
|
|
187
|
+
|
|
188
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
test 'BioDSL::Pipeline::TrimPrimer w. reverse and partial match returns ' \
|
|
192
|
+
'correctly' do
|
|
193
|
+
@output.write(SEQ: 'ctgactgatcgcaaTCGT')
|
|
194
|
+
@output.close
|
|
195
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :reverse).
|
|
196
|
+
run(input: @input, output: @output2)
|
|
197
|
+
|
|
198
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
199
|
+
|{:SEQ=>"ctgactgatcgcaa",
|
|
200
|
+
| :SEQ_LEN=>14,
|
|
201
|
+
| :TRIM_PRIMER_DIR=>"REVERSE",
|
|
202
|
+
| :TRIM_PRIMER_POS=>14,
|
|
203
|
+
| :TRIM_PRIMER_LEN=>4,
|
|
204
|
+
| :TRIM_PRIMER_PAT=>"TCGT"}
|
|
205
|
+
EXP
|
|
206
|
+
|
|
207
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
test 'BioDSL::Pipeline::TrimPrimer with reverse and partial match and ' \
|
|
211
|
+
'reverse_complment: true returns correctly' do
|
|
212
|
+
@output.write(SEQ: 'ctgactgatcgcaaTCGT')
|
|
213
|
+
@output.close
|
|
214
|
+
@p.trim_primer(primer: 'CATACGA', direction: :reverse,
|
|
215
|
+
reverse_complement: true).
|
|
216
|
+
run(input: @input, output: @output2)
|
|
217
|
+
|
|
218
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
219
|
+
|{:SEQ=>"ctgactgatcgcaa",
|
|
220
|
+
| :SEQ_LEN=>14,
|
|
221
|
+
| :TRIM_PRIMER_DIR=>"REVERSE",
|
|
222
|
+
| :TRIM_PRIMER_POS=>14,
|
|
223
|
+
| :TRIM_PRIMER_LEN=>4,
|
|
224
|
+
| :TRIM_PRIMER_PAT=>"TCGT"}
|
|
225
|
+
EXP
|
|
226
|
+
|
|
227
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
test 'BioDSL::Pipeline::TrimPrimer with forward and minimum match ' \
|
|
231
|
+
'returns correctly' do
|
|
232
|
+
@output.write(SEQ: 'Gactgactgatcgca')
|
|
233
|
+
@output.close
|
|
234
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :forward).
|
|
235
|
+
run(input: @input, output: @output2)
|
|
236
|
+
|
|
237
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
238
|
+
|{:SEQ=>"actgactgatcgca",
|
|
239
|
+
| :SEQ_LEN=>14,
|
|
240
|
+
| :TRIM_PRIMER_DIR=>"FORWARD",
|
|
241
|
+
| :TRIM_PRIMER_POS=>0,
|
|
242
|
+
| :TRIM_PRIMER_LEN=>1,
|
|
243
|
+
| :TRIM_PRIMER_PAT=>"G"}
|
|
244
|
+
EXP
|
|
245
|
+
|
|
246
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
test 'BioDSL::Pipeline::TrimPrimer with reverse and minimum match ' \
|
|
250
|
+
'returns correctly' do
|
|
251
|
+
@output.write(SEQ: 'ctgactgatcgcaaT')
|
|
252
|
+
@output.close
|
|
253
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :reverse).
|
|
254
|
+
run(input: @input, output: @output2)
|
|
255
|
+
|
|
256
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
257
|
+
|{:SEQ=>"ctgactgatcgcaa",
|
|
258
|
+
| :SEQ_LEN=>14,
|
|
259
|
+
| :TRIM_PRIMER_DIR=>"REVERSE",
|
|
260
|
+
| :TRIM_PRIMER_POS=>14,
|
|
261
|
+
| :TRIM_PRIMER_LEN=>1,
|
|
262
|
+
| :TRIM_PRIMER_PAT=>"T"}
|
|
263
|
+
EXP
|
|
264
|
+
|
|
265
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
test 'BioDSL::Pipeline::TrimPrimer with forward and partial match and ' \
|
|
269
|
+
'overlap_min returns correctly' do
|
|
270
|
+
@output.write(SEQ: 'TATGactgactgatcgca')
|
|
271
|
+
@output.close
|
|
272
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :forward, overlap_min: 4).
|
|
273
|
+
run(input: @input, output: @output2)
|
|
274
|
+
|
|
275
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
276
|
+
|{:SEQ=>"actgactgatcgca",
|
|
277
|
+
| :SEQ_LEN=>14,
|
|
278
|
+
| :TRIM_PRIMER_DIR=>"FORWARD",
|
|
279
|
+
| :TRIM_PRIMER_POS=>0,
|
|
280
|
+
| :TRIM_PRIMER_LEN=>4,
|
|
281
|
+
| :TRIM_PRIMER_PAT=>"TATG"}
|
|
282
|
+
EXP
|
|
283
|
+
|
|
284
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
test 'BioDSL::Pipeline::TrimPrimer with reverse and partial match and ' \
|
|
288
|
+
'overlap_min returns correctly' do
|
|
289
|
+
@output.write(SEQ: 'ctgactgatcgcaaTCGT')
|
|
290
|
+
@output.close
|
|
291
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :reverse, overlap_min: 4).
|
|
292
|
+
run(input: @input, output: @output2)
|
|
293
|
+
|
|
294
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
295
|
+
|{:SEQ=>"ctgactgatcgcaa",
|
|
296
|
+
| :SEQ_LEN=>14,
|
|
297
|
+
| :TRIM_PRIMER_DIR=>"REVERSE",
|
|
298
|
+
| :TRIM_PRIMER_POS=>14,
|
|
299
|
+
| :TRIM_PRIMER_LEN=>4,
|
|
300
|
+
| :TRIM_PRIMER_PAT=>"TCGT"}
|
|
301
|
+
EXP
|
|
302
|
+
|
|
303
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
test 'BioDSL::Pipeline::TrimPrimer with forward and partial miss due ' \
|
|
307
|
+
'to overlap_min returns correctly' do
|
|
308
|
+
@output.write(SEQ: 'TATGactgactgatcgca')
|
|
309
|
+
@output.close
|
|
310
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :forward, overlap_min: 5).
|
|
311
|
+
run(input: @input, output: @output2)
|
|
312
|
+
|
|
313
|
+
expected = '{:SEQ=>"TATGactgactgatcgca"}'
|
|
314
|
+
|
|
315
|
+
assert_equal(expected, collect_result.chomp)
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
test 'BioDSL::Pipeline::TrimPrimer with reverse and partial miss due ' \
|
|
319
|
+
'to overlap_min returns correctly' do
|
|
320
|
+
@output.write(SEQ: 'ctgactgatcgcaaTCGT')
|
|
321
|
+
@output.close
|
|
322
|
+
@p.trim_primer(primer: 'TCGTATG', direction: :reverse, overlap_min: 5).
|
|
323
|
+
run(input: @input, output: @output2)
|
|
324
|
+
|
|
325
|
+
expected = '{:SEQ=>"ctgactgatcgcaaTCGT"}'
|
|
326
|
+
|
|
327
|
+
assert_equal(expected, collect_result.chomp)
|
|
328
|
+
end
|
|
329
|
+
end
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for TrimSeq.
|
|
34
|
+
class TestTrimSeq < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
@input, @output = BioDSL::Stream.pipe
|
|
37
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
38
|
+
|
|
39
|
+
hash = {
|
|
40
|
+
SEQ_NAME: 'test',
|
|
41
|
+
|
|
42
|
+
SEQ: 'gatcgatcgtacgagcagcatctgacgtatcgatcgttgtctacgacgagcatgctagctag',
|
|
43
|
+
SEQ_LEN: 42,
|
|
44
|
+
SCORES: %q[!"#$%&'()*+,-./0123456789:;<=>?@ABCDEF876543210/.-,+*)('&%$III]
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
@output.write hash
|
|
48
|
+
@output.close
|
|
49
|
+
|
|
50
|
+
@p = BioDSL::Pipeline.new
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
test 'BioDSL::Pipeline::TrimSeq with invalid options raises' do
|
|
54
|
+
assert_raise(BioDSL::OptionError) { @p.trim_seq(foo: 'bar') }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
test 'BioDSL::Pipeline::TrimSeq with valid options don\'t raise' do
|
|
58
|
+
assert_nothing_raised { @p.trim_seq(mode: :left) }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
test 'BioDSL::Pipeline::TrimSeq returns correctly' do
|
|
62
|
+
@p.trim_seq.run(input: @input, output: @output2)
|
|
63
|
+
|
|
64
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
|
|
65
|
+
|{:SEQ_NAME=>"test",
|
|
66
|
+
|:SEQ=>"tctgacgtatcgatcgttgtctacgacgagcatgctagctag",
|
|
67
|
+
|:SEQ_LEN=>42,
|
|
68
|
+
|:SCORES=>"56789:;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
|
|
69
|
+
EXP
|
|
70
|
+
|
|
71
|
+
assert_equal(expected, collect_result.chomp)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
test 'BioDSL::Pipeline::TrimSeq status returns correctly' do
|
|
75
|
+
@p.trim_seq.run(input: @input, output: @output2)
|
|
76
|
+
|
|
77
|
+
assert_equal(1, @p.status.first[:records_in])
|
|
78
|
+
assert_equal(1, @p.status.first[:records_out])
|
|
79
|
+
assert_equal(1, @p.status.first[:sequences_in])
|
|
80
|
+
assert_equal(1, @p.status.first[:sequences_out])
|
|
81
|
+
assert_equal(62, @p.status.first[:residues_in])
|
|
82
|
+
assert_equal(42, @p.status.first[:residues_out])
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
test 'BioDSL::Pipeline::TrimSeq with :quality_min returns correctly' do
|
|
86
|
+
@p.trim_seq(quality_min: 25).run(input: @input, output: @output2)
|
|
87
|
+
|
|
88
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
|
|
89
|
+
|{:SEQ_NAME=>"test",
|
|
90
|
+
|:SEQ=>"cgtatcgatcgttgtctacgacgagcatgctagctag",
|
|
91
|
+
|:SEQ_LEN=>37,
|
|
92
|
+
|:SCORES=>":;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
|
|
93
|
+
EXP
|
|
94
|
+
|
|
95
|
+
assert_equal(expected, collect_result.chomp)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
test 'BioDSL::Pipeline::TrimSeq with mode: both: returns correctly' do
|
|
99
|
+
@p.trim_seq(mode: :both).run(input: @input, output: @output2)
|
|
100
|
+
|
|
101
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
|
|
102
|
+
|{:SEQ_NAME=>"test",
|
|
103
|
+
|:SEQ=>"tctgacgtatcgatcgttgtctacgacgagcatgctagctag",
|
|
104
|
+
|:SEQ_LEN=>42,
|
|
105
|
+
|:SCORES=>"56789:;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
|
|
106
|
+
EXP
|
|
107
|
+
|
|
108
|
+
assert_equal(expected, collect_result.chomp)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
test 'BioDSL::Pipeline::TrimSeq with mode: :left returns correctly' do
|
|
112
|
+
@p.trim_seq(mode: :left).run(input: @input, output: @output2)
|
|
113
|
+
|
|
114
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
|
|
115
|
+
|{:SEQ_NAME=>"test",
|
|
116
|
+
|:SEQ=>"tctgacgtatcgatcgttgtctacgacgagcatgctagctag",
|
|
117
|
+
|:SEQ_LEN=>42,
|
|
118
|
+
|:SCORES=>"56789:;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
|
|
119
|
+
EXP
|
|
120
|
+
|
|
121
|
+
assert_equal(expected, collect_result.chomp)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# rubocop:disable LineLength
|
|
125
|
+
test 'BioDSL::Pipeline::TrimSeq with mode: :right returns correctly' do
|
|
126
|
+
@p.trim_seq(mode: :right).run(input: @input, output: @output2)
|
|
127
|
+
|
|
128
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
|
|
129
|
+
|{:SEQ_NAME=>"test",
|
|
130
|
+
|:SEQ=>"gatcgatcgtacgagcagcatctgacgtatcgatcgttgtctacgacgagcatgctagctag",
|
|
131
|
+
|:SEQ_LEN=>62,
|
|
132
|
+
|:SCORES=>"!\\"\\#\$%&'()*+,-./0123456789:;<=>?@ABCDEF876543210/.-,+*)('&%$III"}
|
|
133
|
+
EXP
|
|
134
|
+
|
|
135
|
+
assert_equal(expected, collect_result.chomp)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
test 'BioDSL::Pipeline::TrimSeq with :length_min returns correctly' do
|
|
139
|
+
@p.trim_seq(length_min: 4).run(input: @input, output: @output2)
|
|
140
|
+
|
|
141
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').tr("\n", ' ')[0..-2]
|
|
142
|
+
|{:SEQ_NAME=>"test",
|
|
143
|
+
|:SEQ=>"tctgacgtatcgatcgttgtct",
|
|
144
|
+
|:SEQ_LEN=>22,
|
|
145
|
+
|:SCORES=>"56789:;<=>?@ABCDEF8765"}
|
|
146
|
+
EXP
|
|
147
|
+
|
|
148
|
+
assert_equal(expected, collect_result.chomp)
|
|
149
|
+
end
|
|
150
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for UchimeRef.
|
|
34
|
+
class TestUchimeRef < Test::Unit::TestCase
|
|
35
|
+
require 'tempfile'
|
|
36
|
+
|
|
37
|
+
def setup
|
|
38
|
+
omit('usearch not found') unless BioDSL::Filesys.which('usearch')
|
|
39
|
+
|
|
40
|
+
data = <<-DAT.gsub(/^\s+\|/, '')
|
|
41
|
+
|>test1
|
|
42
|
+
|gtgtgtagctacgatcagctagcgatcgagctatatgttt
|
|
43
|
+
DAT
|
|
44
|
+
|
|
45
|
+
@db = Tempfile.new('database')
|
|
46
|
+
|
|
47
|
+
File.open(@db, 'w') do |ios|
|
|
48
|
+
ios << data
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def teardown
|
|
53
|
+
@db.close
|
|
54
|
+
@db.unlink
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
test 'BioDSL::Pipeline#uchime_ref with disallowed option raises' do
|
|
58
|
+
p = BioDSL::Pipeline.new
|
|
59
|
+
assert_raise(BioDSL::OptionError) { p.uchime_ref(foo: 'bar') }
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
test 'BioDSL::Pipeline#uchime_ref with allowed option dont raise' do
|
|
63
|
+
p = BioDSL::Pipeline.new
|
|
64
|
+
assert_nothing_raised { p.uchime_ref(database: @db.path) }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
test 'BioDSL::Pipeline#uchime_ref outputs correctly' do
|
|
68
|
+
input, output = BioDSL::Stream.pipe
|
|
69
|
+
@input2, output2 = BioDSL::Stream.pipe
|
|
70
|
+
|
|
71
|
+
output.write(one: 1, two: 2, three: 3)
|
|
72
|
+
output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
73
|
+
output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
74
|
+
output.close
|
|
75
|
+
|
|
76
|
+
p = BioDSL::Pipeline.new
|
|
77
|
+
p.uchime_ref(database: @db.path).run(input: input, output: output2)
|
|
78
|
+
|
|
79
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
80
|
+
|{:one=>1,
|
|
81
|
+
| :two=>2,
|
|
82
|
+
| :three=>3}
|
|
83
|
+
|{:SEQ_NAME=>"1",
|
|
84
|
+
| :SEQ=>"atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct",
|
|
85
|
+
| :SEQ_LEN=>40}
|
|
86
|
+
|{:SEQ_NAME=>"2",
|
|
87
|
+
| :SEQ=>"atcgatcgatcgatcgatcgatcgatcgtacgacgtagct",
|
|
88
|
+
| :SEQ_LEN=>40}
|
|
89
|
+
EXP
|
|
90
|
+
|
|
91
|
+
assert_equal(expected.delete("\n"), collect_result.delete("\n"))
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
test 'BioDSL::Pipeline#uchime_ref status outputs correctly' do
|
|
95
|
+
input, output = BioDSL::Stream.pipe
|
|
96
|
+
@input2, output2 = BioDSL::Stream.pipe
|
|
97
|
+
|
|
98
|
+
output.write(one: 1, two: 2, three: 3)
|
|
99
|
+
output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
100
|
+
output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
101
|
+
output.close
|
|
102
|
+
|
|
103
|
+
p = BioDSL::Pipeline.new
|
|
104
|
+
p.uchime_ref(database: @db.path).run(input: input, output: output2)
|
|
105
|
+
|
|
106
|
+
assert_equal(3, p.status.first[:records_in])
|
|
107
|
+
assert_equal(3, p.status.first[:records_out])
|
|
108
|
+
assert_equal(2, p.status.first[:sequences_in])
|
|
109
|
+
assert_equal(2, p.status.first[:sequences_out])
|
|
110
|
+
assert_equal(80, p.status.first[:residues_in])
|
|
111
|
+
assert_equal(80, p.status.first[:residues_out])
|
|
112
|
+
end
|
|
113
|
+
end
|