BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for SliceSeq.
|
|
34
|
+
class TestSliceSeq < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
@input, @output = BioDSL::Stream.pipe
|
|
37
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
38
|
+
|
|
39
|
+
@output.write(FOO: 'BAR', SEQ: 'atcg')
|
|
40
|
+
@output.write(SEQ: 'atcg', SCORES: '0123')
|
|
41
|
+
@output.close
|
|
42
|
+
|
|
43
|
+
@p = BioDSL::Pipeline.new
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
test 'BioDSL::Pipeline::SliceSeq with invalid options raises' do
|
|
47
|
+
assert_raise(BioDSL::OptionError) { @p.slice_seq(slice: 1, foo: 'bar') }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
test 'BioDSL::Pipeline::SliceSeq with valid options don\'t raise' do
|
|
51
|
+
assert_nothing_raised { @p.slice_seq(slice: 1) }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
test 'BioDSL::Pipeline::SliceSeq with index returns correctly' do
|
|
55
|
+
@p.slice_seq(slice: 1).run(input: @input, output: @output2)
|
|
56
|
+
|
|
57
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
58
|
+
|{:FOO=>"BAR", :SEQ=>"t", :SEQ_LEN=>1}
|
|
59
|
+
|{:SEQ=>"t", :SCORES=>"1", :SEQ_LEN=>1}
|
|
60
|
+
EXP
|
|
61
|
+
|
|
62
|
+
assert_equal(expected, collect_result)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
test 'BioDSL::Pipeline::SliceSeq with out of range index returns OK' do
|
|
66
|
+
@p.slice_seq(slice: 10).run(input: @input, output: @output2)
|
|
67
|
+
|
|
68
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
69
|
+
|{:FOO=>"BAR", :SEQ=>"", :SEQ_LEN=>0}
|
|
70
|
+
|{:SEQ=>"", :SCORES=>"", :SEQ_LEN=>0}
|
|
71
|
+
EXP
|
|
72
|
+
|
|
73
|
+
assert_equal(expected, collect_result)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
test 'BioDSL::Pipeline::SliceSeq with negative index returns correctly' do
|
|
77
|
+
@p.slice_seq(slice: -1).run(input: @input, output: @output2)
|
|
78
|
+
|
|
79
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
80
|
+
|{:FOO=>"BAR", :SEQ=>"g", :SEQ_LEN=>1}
|
|
81
|
+
|{:SEQ=>"g", :SCORES=>"3", :SEQ_LEN=>1}
|
|
82
|
+
EXP
|
|
83
|
+
|
|
84
|
+
assert_equal(expected, collect_result)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
test 'BioDSL::Pipeline::SliceSeq with negative out of range index ' \
|
|
88
|
+
'returns correctly' do
|
|
89
|
+
@p.slice_seq(slice: -10).run(input: @input, output: @output2)
|
|
90
|
+
|
|
91
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
92
|
+
|{:FOO=>"BAR", :SEQ=>"", :SEQ_LEN=>0}
|
|
93
|
+
|{:SEQ=>"", :SCORES=>"", :SEQ_LEN=>0}
|
|
94
|
+
EXP
|
|
95
|
+
|
|
96
|
+
assert_equal(expected, collect_result)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
test 'BioDSL::Pipeline::SliceSeq with range returns correctly' do
|
|
100
|
+
@p.slice_seq(slice: 1..-1).run(input: @input, output: @output2)
|
|
101
|
+
|
|
102
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
103
|
+
|{:FOO=>"BAR", :SEQ=>"tcg", :SEQ_LEN=>3}
|
|
104
|
+
|{:SEQ=>"tcg", :SCORES=>"123", :SEQ_LEN=>3}
|
|
105
|
+
EXP
|
|
106
|
+
|
|
107
|
+
assert_equal(expected, collect_result)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
test 'BioDSL::Pipeline::SliceSeq with out of range end range returns OK' do
|
|
111
|
+
@p.slice_seq(slice: 1..10).run(input: @input, output: @output2)
|
|
112
|
+
|
|
113
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
114
|
+
|{:FOO=>"BAR", :SEQ=>"tcg", :SEQ_LEN=>3}
|
|
115
|
+
|{:SEQ=>"tcg", :SCORES=>"123", :SEQ_LEN=>3}
|
|
116
|
+
EXP
|
|
117
|
+
|
|
118
|
+
assert_equal(expected, collect_result)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
test 'BioDSL::Pipeline::SliceSeq status returns OK' do
|
|
122
|
+
@p.slice_seq(slice: 1..10).run(input: @input, output: @output2)
|
|
123
|
+
|
|
124
|
+
assert_equal(2, @p.status.first[:records_in])
|
|
125
|
+
assert_equal(2, @p.status.first[:records_out])
|
|
126
|
+
assert_equal(2, @p.status.first[:sequences_in])
|
|
127
|
+
assert_equal(2, @p.status.first[:sequences_out])
|
|
128
|
+
assert_equal(8, @p.status.first[:residues_in])
|
|
129
|
+
assert_equal(6, @p.status.first[:residues_out])
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for Sort.
|
|
34
|
+
class TestSort < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
@input, @output = BioDSL::Stream.pipe
|
|
37
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
38
|
+
|
|
39
|
+
@output.write(NAME: 'test2', COUNT: 4)
|
|
40
|
+
@output.write(NAME: 'test1', COUNT: 21)
|
|
41
|
+
@output.write(NAME: 'test2', COUNT: 2)
|
|
42
|
+
@output.write(NAME: 'test3', COUNT: 9)
|
|
43
|
+
@output.close
|
|
44
|
+
|
|
45
|
+
@p = BioDSL::Pipeline.new
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
test 'BioDSL::Pipeline::Sort with invalid options raises' do
|
|
49
|
+
assert_raise(BioDSL::OptionError) { @p.sort(key: :COUNT, foo: 'bar') }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
test 'BioDSL::Pipeline::Sort with valid options don\'t raise' do
|
|
53
|
+
assert_nothing_raised { @p.sort(key: :COUNT) }
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
test 'BioDSL::Pipeline::Sort alphabetical returns correctly' do
|
|
57
|
+
@p.sort(key: 'NAME').run(input: @input, output: @output2)
|
|
58
|
+
|
|
59
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
60
|
+
|{:NAME=>"test1", :COUNT=>21}
|
|
61
|
+
|{:NAME=>"test2", :COUNT=>4}
|
|
62
|
+
|{:NAME=>"test2", :COUNT=>2}
|
|
63
|
+
|{:NAME=>"test3", :COUNT=>9}
|
|
64
|
+
EXP
|
|
65
|
+
|
|
66
|
+
assert_equal(expected, collect_result)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
test 'BioDSL::Pipeline::Sort numerical returns correctly' do
|
|
70
|
+
@p.sort(key: :COUNT).run(input: @input, output: @output2)
|
|
71
|
+
|
|
72
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
73
|
+
|{:NAME=>"test2", :COUNT=>2}
|
|
74
|
+
|{:NAME=>"test2", :COUNT=>4}
|
|
75
|
+
|{:NAME=>"test3", :COUNT=>9}
|
|
76
|
+
|{:NAME=>"test1", :COUNT=>21}
|
|
77
|
+
EXP
|
|
78
|
+
|
|
79
|
+
assert_equal(expected, collect_result)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
test 'BioDSL::Pipeline::Sort reverse returns correctly' do
|
|
83
|
+
@p.sort(key: :COUNT, reverse: true).run(input: @input, output: @output2)
|
|
84
|
+
|
|
85
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
86
|
+
|{:NAME=>"test1", :COUNT=>21}
|
|
87
|
+
|{:NAME=>"test3", :COUNT=>9}
|
|
88
|
+
|{:NAME=>"test2", :COUNT=>4}
|
|
89
|
+
|{:NAME=>"test2", :COUNT=>2}
|
|
90
|
+
EXP
|
|
91
|
+
|
|
92
|
+
assert_equal(expected, collect_result)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
test 'BioDSL::Pipeline::Sort with block_size returns correctly' do
|
|
96
|
+
@p.sort(key: :COUNT, block_size: 60).run(input: @input, output: @output2)
|
|
97
|
+
|
|
98
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
99
|
+
|{:NAME=>"test2", :COUNT=>2}
|
|
100
|
+
|{:NAME=>"test2", :COUNT=>4}
|
|
101
|
+
|{:NAME=>"test3", :COUNT=>9}
|
|
102
|
+
|{:NAME=>"test1", :COUNT=>21}
|
|
103
|
+
EXP
|
|
104
|
+
|
|
105
|
+
assert_equal(expected, collect_result)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
test 'BioDSL::Pipeline::Sort with block_size and reverse returns OK' do
|
|
109
|
+
@p.sort(key: :COUNT, block_size: 30, reverse: true).
|
|
110
|
+
run(input: @input, output: @output2)
|
|
111
|
+
|
|
112
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
113
|
+
|{:NAME=>"test1", :COUNT=>21}
|
|
114
|
+
|{:NAME=>"test3", :COUNT=>9}
|
|
115
|
+
|{:NAME=>"test2", :COUNT=>4}
|
|
116
|
+
|{:NAME=>"test2", :COUNT=>2}
|
|
117
|
+
EXP
|
|
118
|
+
|
|
119
|
+
assert_equal(expected, collect_result)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
test 'BioDSL::Pipeline::Sort status returns OK' do
|
|
123
|
+
@p.sort(key: :COUNT).run(input: @input, output: @output2)
|
|
124
|
+
|
|
125
|
+
assert_equal(4, @p.status.first[:records_in])
|
|
126
|
+
assert_equal(4, @p.status.first[:records_out])
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for SplitPairSeq.
|
|
34
|
+
class TestSplitPairSeq < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
@input, @output = BioDSL::Stream.pipe
|
|
37
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
38
|
+
|
|
39
|
+
setup_output
|
|
40
|
+
|
|
41
|
+
@output.close
|
|
42
|
+
|
|
43
|
+
setup_expected
|
|
44
|
+
|
|
45
|
+
@p = BioDSL::Pipeline.new
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# rubocop: disable MethodLength
|
|
49
|
+
def setup_output
|
|
50
|
+
[
|
|
51
|
+
{SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14',
|
|
52
|
+
SEQ: 'TGGGGAATATTGGACAATGGCCTGTTTGCTACCCACGCTT',
|
|
53
|
+
SEQ_LEN: 40,
|
|
54
|
+
SCORES: '<??????BDDDDDDDDGGGG?????BB<-<BDDDDDFEEF',
|
|
55
|
+
SEQ_LEN_LEFT: 20,
|
|
56
|
+
SEQ_LEN_RIGHT: 20},
|
|
57
|
+
{SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14',
|
|
58
|
+
SEQ: 'TAGGGAATCTTGCACAATGGACTCTTCGCTACCCATGCTT',
|
|
59
|
+
SEQ_LEN: 40,
|
|
60
|
+
SCORES: '<???9?BBBDBDDBDDFFFF,5<??BB?DDABDBDDFFFF',
|
|
61
|
+
SEQ_LEN_LEFT: 20,
|
|
62
|
+
SEQ_LEN_RIGHT: 20},
|
|
63
|
+
{SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14',
|
|
64
|
+
SEQ: 'TAGGGAATCTTGCACAATGGCCTCTTCGCTACCCATGCTT',
|
|
65
|
+
SEQ_LEN: 40,
|
|
66
|
+
SCORES: '?????BBBBBDDBDDBFFFF??,<??B?BB?BBBBBFF?F',
|
|
67
|
+
SEQ_LEN_LEFT: 20,
|
|
68
|
+
SEQ_LEN_RIGHT: 20}
|
|
69
|
+
].each do |record|
|
|
70
|
+
@output.write record
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def setup_expected
|
|
75
|
+
@expected = <<-EOD.gsub(/^\s+\|/, '')
|
|
76
|
+
|{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14",
|
|
77
|
+
| :SEQ=>"TGGGGAATATTGGACAATGG",
|
|
78
|
+
| :SEQ_LEN=>20,
|
|
79
|
+
| :SCORES=>"<??????BDDDDDDDDGGGG"}
|
|
80
|
+
|{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14862:1868 2:N:0:14",
|
|
81
|
+
| :SEQ=>"CCTGTTTGCTACCCACGCTT",
|
|
82
|
+
| :SEQ_LEN=>20,
|
|
83
|
+
| :SCORES=>"?????BB<-<BDDDDDFEEF"}
|
|
84
|
+
|{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14",
|
|
85
|
+
| :SEQ=>"TAGGGAATCTTGCACAATGG",
|
|
86
|
+
| :SEQ_LEN=>20,
|
|
87
|
+
| :SCORES=>"<???9?BBBDBDDBDDFFFF"}
|
|
88
|
+
|{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:13906:2139 2:N:0:14",
|
|
89
|
+
| :SEQ=>"ACTCTTCGCTACCCATGCTT",
|
|
90
|
+
| :SEQ_LEN=>20,
|
|
91
|
+
| :SCORES=>",5<??BB?DDABDBDDFFFF"}
|
|
92
|
+
|{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14",
|
|
93
|
+
| :SEQ=>"TAGGGAATCTTGCACAATGG",
|
|
94
|
+
| :SEQ_LEN=>20,
|
|
95
|
+
| :SCORES=>"?????BBBBBDDBDDBFFFF"}
|
|
96
|
+
|{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14865:2158 2:N:0:14",
|
|
97
|
+
| :SEQ=>"CCTCTTCGCTACCCATGCTT",
|
|
98
|
+
| :SEQ_LEN=>20,
|
|
99
|
+
| :SCORES=>"??,<??B?BB?BBBBBFF?F"}
|
|
100
|
+
EOD
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# rubocop: enable MethodLength
|
|
104
|
+
test 'BioDSL::Pipeline::SplitPairSeq with invalid options raises' do
|
|
105
|
+
assert_raise(BioDSL::OptionError) { @p.split_pair_seq(foo: 'bar') }
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
test 'BioDSL::Pipeline::SplitPairSeq with bad sequence lengths raises' do
|
|
109
|
+
input, output = BioDSL::Stream.pipe
|
|
110
|
+
|
|
111
|
+
record = {
|
|
112
|
+
SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14',
|
|
113
|
+
SEQ: 'TGGGGAATATTGGACAATGGCCTGTTTGCTACCCACGCTT',
|
|
114
|
+
SEQ_LEN: 40,
|
|
115
|
+
SCORES: '<??????BDDDDDDDDGGGG?????BB<-<BDDDDDFEEF',
|
|
116
|
+
SEQ_LEN_LEFT: 10,
|
|
117
|
+
SEQ_LEN_RIGHT: 20
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
output.write record
|
|
121
|
+
output.close
|
|
122
|
+
|
|
123
|
+
assert_raise(BioDSL::SeqError) do
|
|
124
|
+
@p.split_pair_seq.run(input: input, output: @output2)
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
test 'BioDSL::Pipeline::SplitPairSeq with bad sequence name raises' do
|
|
129
|
+
input, output = BioDSL::Stream.pipe
|
|
130
|
+
|
|
131
|
+
record = {
|
|
132
|
+
SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14862:18681:N:0:14',
|
|
133
|
+
SEQ: 'TGGGGAATATTGGACAATGGCCTGTTTGCTACCCACGCTT',
|
|
134
|
+
SEQ_LEN: 40,
|
|
135
|
+
SCORES: '<??????BDDDDDDDDGGGG?????BB<-<BDDDDDFEEF',
|
|
136
|
+
SEQ_LEN_LEFT: 20,
|
|
137
|
+
SEQ_LEN_RIGHT: 20
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
output.write record
|
|
141
|
+
output.close
|
|
142
|
+
|
|
143
|
+
assert_raise(RuntimeError) do
|
|
144
|
+
@p.split_pair_seq.run(input: input, output: @output2)
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
test 'BioDSL::Pipeline::SplitPairSeq returns correctly' do
|
|
149
|
+
@p.split_pair_seq.run(input: @input, output: @output2)
|
|
150
|
+
|
|
151
|
+
assert_equal(@expected.delete("\n"), collect_result.delete("\n"))
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
test 'BioDSL::Pipeline::SplitPairSeq status returns correctly' do
|
|
155
|
+
@p.split_pair_seq.run(input: @input, output: @output2)
|
|
156
|
+
|
|
157
|
+
assert_equal(3, @p.status.first[:records_in])
|
|
158
|
+
assert_equal(6, @p.status.first[:records_out])
|
|
159
|
+
assert_equal(3, @p.status.first[:sequences_in])
|
|
160
|
+
assert_equal(6, @p.status.first[:sequences_out])
|
|
161
|
+
assert_equal(120, @p.status.first[:residues_in])
|
|
162
|
+
assert_equal(120, @p.status.first[:residues_out])
|
|
163
|
+
end
|
|
164
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for SplitValues.
|
|
34
|
+
class TestSplitValues < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
@input, @output = BioDSL::Stream.pipe
|
|
37
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
38
|
+
|
|
39
|
+
@output.write(ID: 'FOO:count=10', SEQ: 'gataag')
|
|
40
|
+
@output.write(ID: 'FOO_10_20', SEQ: 'gataag')
|
|
41
|
+
@output.close
|
|
42
|
+
|
|
43
|
+
@p = BioDSL::Pipeline.new
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
test 'BioDSL::Pipeline::SplitValues with invalid options raises' do
|
|
47
|
+
assert_raise(BioDSL::OptionError) { @p.split_values(foo: 'bar') }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
test 'BioDSL::Pipeline::SplitValues with valid options don\'t raise' do
|
|
51
|
+
assert_nothing_raised { @p.split_values(key: :ID) }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
test 'BioDSL::Pipeline::SplitValues returns correctly' do
|
|
55
|
+
@p.split_values(key: :ID).run(input: @input, output: @output2)
|
|
56
|
+
|
|
57
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
58
|
+
|{:ID=>"FOO:count=10", :SEQ=>"gataag"}
|
|
59
|
+
|{:ID=>"FOO_10_20", :SEQ=>"gataag", :ID_0=>"FOO", :ID_1=>10, :ID_2=>20}
|
|
60
|
+
EXP
|
|
61
|
+
|
|
62
|
+
assert_equal(expected, collect_result)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
test 'BioDSL::Pipeline::SplitValues status returns correctly' do
|
|
66
|
+
@p.split_values(key: :ID).run(input: @input, output: @output2)
|
|
67
|
+
|
|
68
|
+
assert_equal(2, @p.status.first[:records_in])
|
|
69
|
+
assert_equal(2, @p.status.first[:records_out])
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
test 'BioDSL::Pipeline::SplitValues with :delimiter returns correctly' do
|
|
73
|
+
@p.split_values(key: 'ID', delimiter: ':count=').
|
|
74
|
+
run(input: @input, output: @output2)
|
|
75
|
+
|
|
76
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
77
|
+
|{:ID=>"FOO:count=10", :SEQ=>"gataag", :ID_0=>"FOO", :ID_1=>10}
|
|
78
|
+
|{:ID=>"FOO_10_20", :SEQ=>"gataag"}
|
|
79
|
+
EXP
|
|
80
|
+
|
|
81
|
+
assert_equal(expected, collect_result)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
test 'BioDSL::Pipeline::SplitValues w. :delimiter and :keys returns OK' do
|
|
85
|
+
@p.split_values(key: 'ID', keys: ['ID', :COUNT], delimiter: ':count=').
|
|
86
|
+
run(input: @input, output: @output2)
|
|
87
|
+
|
|
88
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
89
|
+
|{:ID=>"FOO", :SEQ=>"gataag", :COUNT=>10}
|
|
90
|
+
|{:ID=>"FOO_10_20", :SEQ=>"gataag"}
|
|
91
|
+
EXP
|
|
92
|
+
|
|
93
|
+
assert_equal(expected, collect_result)
|
|
94
|
+
end
|
|
95
|
+
end
|