BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for ClassifySeqMothur.
|
|
34
|
+
class TestClassifySeqMothur < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
omit('mothur not found') unless BioDSL::Filesys.which('mothur')
|
|
37
|
+
|
|
38
|
+
@p = BP.new
|
|
39
|
+
@database = __FILE__
|
|
40
|
+
@taxonomy = __FILE__
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
test 'BioDSL::Pipeline#classify_seq_mothur with disallowed option fail' do
|
|
44
|
+
assert_raise(BioDSL::OptionError) do
|
|
45
|
+
@p.classify_seq_mothur(database: @database, taxonomy: @taxonomy,
|
|
46
|
+
foo: 'bar')
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
test 'BioDSL::Pipeline#classify_seq_mothur w. allowed option dont fail' do
|
|
51
|
+
assert_nothing_raised do
|
|
52
|
+
@p.classify_seq_mothur(database: @database, taxonomy: @taxonomy, cpus: 2)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# test "BioDSL::Pipeline#classify_seq_mothur outputs correctly" do
|
|
57
|
+
# # TODO: mock this sucker.
|
|
58
|
+
# end
|
|
59
|
+
end
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for ClipPrimer.
|
|
34
|
+
# rubocop:disable ClassLength
|
|
35
|
+
class TestClipPrimer < Test::Unit::TestCase
|
|
36
|
+
def setup
|
|
37
|
+
@input, @output = BioDSL::Stream.pipe
|
|
38
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
39
|
+
|
|
40
|
+
@p = BioDSL::Pipeline.new
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
test 'BioDSL::Pipeline::ClipPrimer with invalid options raises' do
|
|
44
|
+
assert_raise(BioDSL::OptionError) { @p.clip_primer(foo: 'bar') }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
test 'BioDSL::Pipeline::ClipPrimer with valid options dont raise' do
|
|
48
|
+
assert_nothing_raised do
|
|
49
|
+
@p.clip_primer(primer: 'atcg', direction: :forward)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
test 'BioDSL::Pipeline::ClipPrimer with forward full length match ' \
|
|
54
|
+
'returns correctly' do
|
|
55
|
+
@output.write(SEQ: 'TCGTATGCCGTCTTCTGCTT')
|
|
56
|
+
@output.close
|
|
57
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
|
|
58
|
+
run(input: @input, output: @output2)
|
|
59
|
+
|
|
60
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
61
|
+
|{:SEQ=>"",
|
|
62
|
+
| :SEQ_LEN=>0,
|
|
63
|
+
| :CLIP_PRIMER_DIR=>"FORWARD",
|
|
64
|
+
| :CLIP_PRIMER_POS=>0,
|
|
65
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
66
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
67
|
+
EXP
|
|
68
|
+
|
|
69
|
+
assert_equal(expected, collect_result.chomp)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
test 'BioDSL::Pipeline::ClipPrimer status returns correctly' do
|
|
73
|
+
@output.write(SEQ: 'TCGTATGCCGTCTTCTGCTT')
|
|
74
|
+
@output.close
|
|
75
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
|
|
76
|
+
run(input: @input, output: @output2)
|
|
77
|
+
|
|
78
|
+
assert_equal(1, @p.status.first[:records_in])
|
|
79
|
+
assert_equal(1, @p.status.first[:records_out])
|
|
80
|
+
assert_equal(1, @p.status.first[:sequences_in])
|
|
81
|
+
assert_equal(1, @p.status.first[:sequences_out])
|
|
82
|
+
assert_equal(20, @p.status.first[:residues_in])
|
|
83
|
+
assert_equal(20, @p.status.first[:residues_out])
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
test 'BioDSL::Pipeline::ClipPrimer with reverse full length match ' \
|
|
87
|
+
'returns correctly' do
|
|
88
|
+
@output.write(SEQ: 'TCGTATGCCGTCTTCTGCTT')
|
|
89
|
+
@output.close
|
|
90
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
|
|
91
|
+
run(input: @input, output: @output2)
|
|
92
|
+
|
|
93
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
94
|
+
|{:SEQ=>"",
|
|
95
|
+
| :SEQ_LEN=>0,
|
|
96
|
+
| :CLIP_PRIMER_DIR=>"REVERSE",
|
|
97
|
+
| :CLIP_PRIMER_POS=>0,
|
|
98
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
99
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
100
|
+
EXP
|
|
101
|
+
|
|
102
|
+
assert_equal(expected, collect_result.chomp)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
test 'BioDSL::Pipeline::ClipPrimer w. forward begin match returns OK' do
|
|
106
|
+
@output.write(SEQ: 'TCGTATGCCGTCTTCTGCTTactacgt')
|
|
107
|
+
@output.close
|
|
108
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
|
|
109
|
+
run(input: @input, output: @output2)
|
|
110
|
+
|
|
111
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
112
|
+
|{:SEQ=>"actacgt",
|
|
113
|
+
| :SEQ_LEN=>7,
|
|
114
|
+
| :CLIP_PRIMER_DIR=>"FORWARD",
|
|
115
|
+
| :CLIP_PRIMER_POS=>0,
|
|
116
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
117
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
118
|
+
EXP
|
|
119
|
+
|
|
120
|
+
assert_equal(expected, collect_result.chomp)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
test 'BioDSL::Pipeline::ClipPrimer with reverse begin match returns OK' do
|
|
124
|
+
@output.write(SEQ: 'TCGTATGCCGTCTTCTGCTTactacgt')
|
|
125
|
+
@output.close
|
|
126
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
|
|
127
|
+
run(input: @input, output: @output2)
|
|
128
|
+
|
|
129
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
130
|
+
|{:SEQ=>"",
|
|
131
|
+
| :SEQ_LEN=>0,
|
|
132
|
+
| :CLIP_PRIMER_DIR=>"REVERSE",
|
|
133
|
+
| :CLIP_PRIMER_POS=>0,
|
|
134
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
135
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
136
|
+
EXP
|
|
137
|
+
|
|
138
|
+
assert_equal(expected, collect_result.chomp)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
test 'BioDSL::Pipeline::ClipPrimer with forward middle match returns OK' do
|
|
142
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
143
|
+
@output.close
|
|
144
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
|
|
145
|
+
run(input: @input, output: @output2)
|
|
146
|
+
|
|
147
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
148
|
+
|{:SEQ=>"actacgt",
|
|
149
|
+
| :SEQ_LEN=>7,
|
|
150
|
+
| :CLIP_PRIMER_DIR=>"FORWARD",
|
|
151
|
+
| :CLIP_PRIMER_POS=>9,
|
|
152
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
153
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
154
|
+
EXP
|
|
155
|
+
|
|
156
|
+
assert_equal(expected, collect_result.chomp)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
test 'BioDSL::Pipeline::ClipPrimer with reverse middle match returns OK' do
|
|
160
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
161
|
+
@output.close
|
|
162
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
|
|
163
|
+
run(input: @input, output: @output2)
|
|
164
|
+
|
|
165
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
166
|
+
|{:SEQ=>"actgactga",
|
|
167
|
+
| :SEQ_LEN=>9,
|
|
168
|
+
| :CLIP_PRIMER_DIR=>"REVERSE",
|
|
169
|
+
| :CLIP_PRIMER_POS=>9,
|
|
170
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
171
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
172
|
+
EXP
|
|
173
|
+
|
|
174
|
+
assert_equal(expected, collect_result.chomp)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
test 'BioDSL::Pipeline::ClipPrimer with forward end match returns OK' do
|
|
178
|
+
@output.write(SEQ: 'gactgaTCGTATGCCGTCTTCTGCTT')
|
|
179
|
+
@output.close
|
|
180
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
|
|
181
|
+
run(input: @input, output: @output2)
|
|
182
|
+
|
|
183
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
184
|
+
|{:SEQ=>"",
|
|
185
|
+
| :SEQ_LEN=>0,
|
|
186
|
+
| :CLIP_PRIMER_DIR=>"FORWARD",
|
|
187
|
+
| :CLIP_PRIMER_POS=>6,
|
|
188
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
189
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
190
|
+
EXP
|
|
191
|
+
|
|
192
|
+
assert_equal(expected, collect_result.chomp)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
test 'BioDSL::Pipeline::ClipPrimer with reverse end match returns OK' do
|
|
196
|
+
@output.write(SEQ: 'gactgaTCGTATGCCGTCTTCTGCTT')
|
|
197
|
+
@output.close
|
|
198
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse).
|
|
199
|
+
run(input: @input, output: @output2)
|
|
200
|
+
|
|
201
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
202
|
+
|{:SEQ=>"gactga",
|
|
203
|
+
| :SEQ_LEN=>6,
|
|
204
|
+
| :CLIP_PRIMER_DIR=>"REVERSE",
|
|
205
|
+
| :CLIP_PRIMER_POS=>6,
|
|
206
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
207
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
208
|
+
EXP
|
|
209
|
+
|
|
210
|
+
assert_equal(expected, collect_result.chomp)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
test 'BioDSL::Pipeline::ClipPrimer with forward middle match and ' \
|
|
214
|
+
'reverse_complement returns correctly' do
|
|
215
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
216
|
+
@output.close
|
|
217
|
+
@p.clip_primer(primer: 'AAGCAGAAGACGGCATACGA', direction: :forward,
|
|
218
|
+
reverse_complement: true)
|
|
219
|
+
@p.run(input: @input, output: @output2)
|
|
220
|
+
|
|
221
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
222
|
+
|{:SEQ=>"actacgt",
|
|
223
|
+
| :SEQ_LEN=>7,
|
|
224
|
+
| :CLIP_PRIMER_DIR=>"FORWARD",
|
|
225
|
+
| :CLIP_PRIMER_POS=>9,
|
|
226
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
227
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
228
|
+
EXP
|
|
229
|
+
|
|
230
|
+
assert_equal(expected, collect_result.chomp)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
test 'BioDSL::Pipeline::ClipPrimer with reverse middle match and ' \
|
|
234
|
+
'reverse_complement returns correctly' do
|
|
235
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
236
|
+
@output.close
|
|
237
|
+
@p.clip_primer(primer: 'AAGCAGAAGACGGCATACGA', direction: :reverse,
|
|
238
|
+
reverse_complement: true)
|
|
239
|
+
@p.run(input: @input, output: @output2)
|
|
240
|
+
|
|
241
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
242
|
+
|{:SEQ=>"actgactga",
|
|
243
|
+
| :SEQ_LEN=>9,
|
|
244
|
+
| :CLIP_PRIMER_DIR=>"REVERSE",
|
|
245
|
+
| :CLIP_PRIMER_POS=>9,
|
|
246
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
247
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
248
|
+
EXP
|
|
249
|
+
|
|
250
|
+
assert_equal(expected, collect_result.chomp)
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
test 'BioDSL::Pipeline::ClipPrimer with forward middle miss and ' \
|
|
254
|
+
'search_distance returns correctly' do
|
|
255
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
256
|
+
@output.close
|
|
257
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward,
|
|
258
|
+
search_distance: 28)
|
|
259
|
+
@p.run(input: @input, output: @output2)
|
|
260
|
+
|
|
261
|
+
expected = '{:SEQ=>"actgactgaTCGTATGCCGTCTTCTGCTTactacgt"}'
|
|
262
|
+
|
|
263
|
+
assert_equal(expected, collect_result.chomp)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
test 'BioDSL::Pipeline::ClipPrimer with forward middle match and ' \
|
|
267
|
+
'search_distance returns correctly' do
|
|
268
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
269
|
+
@output.close
|
|
270
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward,
|
|
271
|
+
search_distance: 29)
|
|
272
|
+
@p.run(input: @input, output: @output2)
|
|
273
|
+
|
|
274
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
275
|
+
|{:SEQ=>"actacgt",
|
|
276
|
+
| :SEQ_LEN=>7,
|
|
277
|
+
| :CLIP_PRIMER_DIR=>"FORWARD",
|
|
278
|
+
| :CLIP_PRIMER_POS=>9,
|
|
279
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
280
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
281
|
+
EXP
|
|
282
|
+
|
|
283
|
+
assert_equal(expected, collect_result.chomp)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
test 'BioDSL::Pipeline::ClipPrimer with reverse middle miss and ' \
|
|
287
|
+
'search_distance returns correctly' do
|
|
288
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
289
|
+
@output.close
|
|
290
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse,
|
|
291
|
+
search_distance: 26).run(input: @input, output: @output2)
|
|
292
|
+
|
|
293
|
+
expected = '{:SEQ=>"actgactgaTCGTATGCCGTCTTCTGCTTactacgt"}'
|
|
294
|
+
|
|
295
|
+
assert_equal(expected, collect_result.chomp)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
test 'BioDSL::Pipeline::ClipPrimer with reverse middle match and ' \
|
|
299
|
+
'search_distance returns correctly' do
|
|
300
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
301
|
+
@output.close
|
|
302
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse,
|
|
303
|
+
search_distance: 27).run(input: @input, output: @output2)
|
|
304
|
+
|
|
305
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
306
|
+
|{:SEQ=>"actgactga",
|
|
307
|
+
| :SEQ_LEN=>9,
|
|
308
|
+
| :CLIP_PRIMER_DIR=>"REVERSE",
|
|
309
|
+
| :CLIP_PRIMER_POS=>9,
|
|
310
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
311
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
312
|
+
EXP
|
|
313
|
+
|
|
314
|
+
assert_equal(expected, collect_result.chomp)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
test 'BioDSL::Pipeline::ClipPrimer with forward match and ' \
|
|
318
|
+
'search_distance longer than sequence returns correctly' do
|
|
319
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
320
|
+
@output.close
|
|
321
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward,
|
|
322
|
+
search_distance: 70).run(input: @input, output: @output2)
|
|
323
|
+
|
|
324
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
325
|
+
|{:SEQ=>"actacgt",
|
|
326
|
+
| :SEQ_LEN=>7,
|
|
327
|
+
| :CLIP_PRIMER_DIR=>"FORWARD",
|
|
328
|
+
| :CLIP_PRIMER_POS=>9,
|
|
329
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
330
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
331
|
+
EXP
|
|
332
|
+
|
|
333
|
+
assert_equal(expected, collect_result.chomp)
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
test 'BioDSL::Pipeline::ClipPrimer with reverse match and ' \
|
|
337
|
+
'search_distance longer than sequence returns correctly' do
|
|
338
|
+
@output.write(SEQ: 'actgactgaTCGTATGCCGTCTTCTGCTTactacgt')
|
|
339
|
+
@output.close
|
|
340
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :reverse,
|
|
341
|
+
search_distance: 70).run(input: @input, output: @output2)
|
|
342
|
+
|
|
343
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
344
|
+
|{:SEQ=>"actgactga",
|
|
345
|
+
| :SEQ_LEN=>9,
|
|
346
|
+
| :CLIP_PRIMER_DIR=>"REVERSE",
|
|
347
|
+
| :CLIP_PRIMER_POS=>9,
|
|
348
|
+
| :CLIP_PRIMER_LEN=>20,
|
|
349
|
+
| :CLIP_PRIMER_PAT=>"TCGTATGCCGTCTTCTGCTT"}
|
|
350
|
+
EXP
|
|
351
|
+
|
|
352
|
+
assert_equal(expected, collect_result.chomp)
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
test 'BioDSL::Pipeline::ClipPrimer with sequence length shorter than ' \
|
|
356
|
+
'pattern returns correctly' do
|
|
357
|
+
@output.write(SEQ: 'actgactgaTC')
|
|
358
|
+
@output.close
|
|
359
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
|
|
360
|
+
run(input: @input, output: @output2)
|
|
361
|
+
|
|
362
|
+
expected = '{:SEQ=>"actgactgaTC"}'
|
|
363
|
+
|
|
364
|
+
assert_equal(expected, collect_result.chomp)
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
test 'BioDSL::Pipeline::ClipPrimer with sequence length 0 returns OK' do
|
|
368
|
+
@output.write(SEQ: '')
|
|
369
|
+
@output.close
|
|
370
|
+
@p.clip_primer(primer: 'TCGTATGCCGTCTTCTGCTT', direction: :forward).
|
|
371
|
+
run(input: @input, output: @output2)
|
|
372
|
+
|
|
373
|
+
expected = '{:SEQ=>""}'
|
|
374
|
+
|
|
375
|
+
assert_equal(expected, collect_result.chomp)
|
|
376
|
+
end
|
|
377
|
+
end
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for ClusterOtus.
|
|
34
|
+
class TestClusterOtus < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
omit('usearch not found') unless BioDSL::Filesys.which('usearch')
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
test 'BioDSL::Pipeline#cluster_otus with disallowed option raises' do
|
|
40
|
+
p = BioDSL::Pipeline.new
|
|
41
|
+
assert_raise(BioDSL::OptionError) { p.cluster_otus(foo: 'bar') }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
test 'BioDSL::Pipeline#cluster_otus with allowed option dont raise' do
|
|
45
|
+
p = BioDSL::Pipeline.new
|
|
46
|
+
assert_nothing_raised { p.cluster_otus(identity: 1) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
test 'BioDSL::Pipeline#cluster_otus with SEQ and no SEQ_COUNT raises' do
|
|
50
|
+
input, output = BioDSL::Stream.pipe
|
|
51
|
+
input2, output2 = BioDSL::Stream.pipe
|
|
52
|
+
|
|
53
|
+
output.write(one: 1, two: 2, three: 3)
|
|
54
|
+
output.write(SEQ: 'atcg')
|
|
55
|
+
output.write(SEQ: 'atcg')
|
|
56
|
+
output.close
|
|
57
|
+
|
|
58
|
+
p = BioDSL::Pipeline.new
|
|
59
|
+
|
|
60
|
+
assert_raise(BioDSL::SeqError) do
|
|
61
|
+
p.cluster_otus.run(input: input, output: output2)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
input2.close
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
test 'BioDSL::Pipeline#cluster_otus with SEQ and unsorted SEQ_COUNT ' \
|
|
68
|
+
'raises' do
|
|
69
|
+
input, output = BioDSL::Stream.pipe
|
|
70
|
+
input2, output2 = BioDSL::Stream.pipe
|
|
71
|
+
|
|
72
|
+
output.write(one: 1, two: 2, three: 3)
|
|
73
|
+
output.write(SEQ_COUNT: 3, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
74
|
+
output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
75
|
+
output.close
|
|
76
|
+
|
|
77
|
+
p = BioDSL::Pipeline.new
|
|
78
|
+
|
|
79
|
+
assert_raise(BioDSL::UsearchError) do
|
|
80
|
+
p.cluster_otus.run(input: input, output: output2)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
input2.close
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
test 'BioDSL::Pipeline#cluster_otus outputs correctly' do
|
|
87
|
+
input, output = BioDSL::Stream.pipe
|
|
88
|
+
@input2, output2 = BioDSL::Stream.pipe
|
|
89
|
+
|
|
90
|
+
output.write(one: 1, two: 2, three: 3)
|
|
91
|
+
output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
92
|
+
output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
93
|
+
output.close
|
|
94
|
+
|
|
95
|
+
p = BioDSL::Pipeline.new.cluster_otus.run(input: input, output: output2)
|
|
96
|
+
|
|
97
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
98
|
+
|{:one=>1,
|
|
99
|
+
| :two=>2,
|
|
100
|
+
| :three=>3}
|
|
101
|
+
|{:SEQ_NAME=>"1",
|
|
102
|
+
| :SEQ=>"ATCGAACGATCGATCGATCGATCGATCGTACGACGTAGCT",
|
|
103
|
+
| :SEQ_LEN=>40,
|
|
104
|
+
| :SEQ_COUNT=>5}
|
|
105
|
+
EXP
|
|
106
|
+
|
|
107
|
+
assert_equal(expected, collect_result.delete("\n"))
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
test 'BioDSL::Pipeline#cluster_otus status outputs correctly' do
|
|
111
|
+
input, output = BioDSL::Stream.pipe
|
|
112
|
+
input2, output2 = BioDSL::Stream.pipe
|
|
113
|
+
|
|
114
|
+
output.write(one: 1, two: 2, three: 3)
|
|
115
|
+
output.write(SEQ_COUNT: 5, SEQ: 'atcgaAcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
116
|
+
output.write(SEQ_COUNT: 4, SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
|
|
117
|
+
output.close
|
|
118
|
+
|
|
119
|
+
p = BioDSL::Pipeline.new.cluster_otus.run(input: input, output: output2)
|
|
120
|
+
|
|
121
|
+
assert_equal(3, p.status.first[:records_in])
|
|
122
|
+
assert_equal(2, p.status.first[:records_out])
|
|
123
|
+
assert_equal(2, p.status.first[:sequences_in])
|
|
124
|
+
assert_equal(1, p.status.first[:sequences_out])
|
|
125
|
+
assert_equal(80, p.status.first[:residues_in])
|
|
126
|
+
assert_equal(40, p.status.first[:residues_out])
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for CollapseOtus.
|
|
34
|
+
class TestCollapseOtus < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
@input, @output = BioDSL::Stream.pipe
|
|
37
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
38
|
+
|
|
39
|
+
@output.write(OTU: 'OTU_0', SAMPLE1_COUNT: 3352,
|
|
40
|
+
TAXONOMY: 'Streptococcaceae(100);Lactococcus(100)')
|
|
41
|
+
@output.write(OTU: 'OTU_1', SAMPLE1_COUNT: 881,
|
|
42
|
+
TAXONOMY: 'Leuconostocaceae(100);Leuconostoc(100)')
|
|
43
|
+
@output.write(OTU: 'OTU_2', SAMPLE1_COUNT: 228,
|
|
44
|
+
TAXONOMY: 'Streptococcaceae(100);Lactococcus(100)')
|
|
45
|
+
@output.write(OTU: 'OTU_3', SAMPLE1_COUNT: 5,
|
|
46
|
+
TAXONOMY: 'Pseudomonadaceae(100);Pseudomonas(100)')
|
|
47
|
+
|
|
48
|
+
@output.close
|
|
49
|
+
|
|
50
|
+
@p = BP.new
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
test 'BioDSL::Pipeline::Count with invalid options raises' do
|
|
54
|
+
assert_raise(BioDSL::OptionError) { @p.collapse_otus(foo: 'bar') }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
test 'BioDSL::Pipeline::Count to file outputs correctly' do
|
|
58
|
+
@p.collapse_otus.run(input: @input, output: @output2)
|
|
59
|
+
expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
|
|
60
|
+
|{:OTU=>"OTU_0",
|
|
61
|
+
| :SAMPLE1_COUNT=>3580,
|
|
62
|
+
| :TAXONOMY=>"Streptococcaceae(100);Lactococcus(100)"}
|
|
63
|
+
|{:OTU=>"OTU_1",
|
|
64
|
+
| :SAMPLE1_COUNT=>881,
|
|
65
|
+
| :TAXONOMY=>"Leuconostocaceae(100);Leuconostoc(100)"}
|
|
66
|
+
|{:OTU=>"OTU_3",
|
|
67
|
+
| :SAMPLE1_COUNT=>5,
|
|
68
|
+
| :TAXONOMY=>"Pseudomonadaceae(100);Pseudomonas(100)"}
|
|
69
|
+
EXP
|
|
70
|
+
assert_equal(expected, collect_result.delete("\n"))
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
test 'BioDSL::Pipeline::Count status outputs correctly' do
|
|
74
|
+
@p.collapse_otus.run(input: @input, output: @output2)
|
|
75
|
+
|
|
76
|
+
assert_equal(4, @p.status.first[:records_in])
|
|
77
|
+
assert_equal(3, @p.status.first[:records_out])
|
|
78
|
+
assert_equal(4, @p.status.first[:otus_in])
|
|
79
|
+
assert_equal(3, @p.status.first[:otus_out])
|
|
80
|
+
end
|
|
81
|
+
end
|