BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for AlignSeqMothur.
|
|
34
|
+
class TestAlignSeqMothur < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
require 'tempfile'
|
|
37
|
+
|
|
38
|
+
omit('mothur not found') unless BioDSL::Filesys.which('mothur')
|
|
39
|
+
|
|
40
|
+
@template = Tempfile.new('template')
|
|
41
|
+
|
|
42
|
+
write_template
|
|
43
|
+
|
|
44
|
+
@input, @output = BioDSL::Stream.pipe
|
|
45
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
46
|
+
|
|
47
|
+
@output.write(SEQ_NAME: 'test', SEQ: 'gattccgatcgatcgatcga')
|
|
48
|
+
@output.close
|
|
49
|
+
|
|
50
|
+
@p = BP.new
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def write_template
|
|
54
|
+
seq_name = 'ref'
|
|
55
|
+
seq = '--a-ttc--c-a-tcga----Ttcg-at---cCa---'
|
|
56
|
+
BioDSL::Fasta.open(@template, 'w') do |ios|
|
|
57
|
+
ios.puts BioDSL::Seq.new(seq_name: seq_name, seq: seq).to_fasta
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def teardown
|
|
62
|
+
@template.close
|
|
63
|
+
@template.unlink
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
test 'BioDSL::Pipeline#align_seq_mothur with disallowed option raises' do
|
|
67
|
+
assert_raise(BioDSL::OptionError) do
|
|
68
|
+
@p.align_seq_mothur(template_file: @template, foo: 'bar')
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
test 'BioDSL::Pipeline#align_seq_mothur w. allowed option don\'t raise' do
|
|
73
|
+
assert_nothing_raised do
|
|
74
|
+
@p.align_seq_mothur(template_file: @template, cpus: 2)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
test 'BioDSL::Pipeline#align_seq_mothur outputs correctly' do
|
|
79
|
+
@p.align_seq_mothur(template_file: @template.path).
|
|
80
|
+
run(input: @input, output: @output2)
|
|
81
|
+
|
|
82
|
+
expected = '{:SEQ_NAME=>"test", ' \
|
|
83
|
+
':SEQ=>"..A-TTC--CGA-TCGA-----TCG-AT---CGA...", :SEQ_LEN=>37}'
|
|
84
|
+
|
|
85
|
+
assert_equal(expected, collect_result.chomp)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
test 'BioDSL::Pipeline#align_seq_mothur status returns correctly' do
|
|
89
|
+
@p.align_seq_mothur(template_file: @template.path).
|
|
90
|
+
run(input: @input, output: @output2)
|
|
91
|
+
|
|
92
|
+
assert_equal(1, @p.status.first[:records_in])
|
|
93
|
+
assert_equal(1, @p.status.first[:records_out])
|
|
94
|
+
assert_equal(1, @p.status.first[:sequences_in])
|
|
95
|
+
assert_equal(1, @p.status.first[:sequences_in])
|
|
96
|
+
assert_equal(20, @p.status.first[:residues_in])
|
|
97
|
+
assert_equal(20, @p.status.first[:residues_in])
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for AnalyzeResidueDistribution.
|
|
34
|
+
class TestAnalyzeResidueDistribution < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
@tmpdir = Dir.mktmpdir('BioDSL')
|
|
37
|
+
@file = File.join(@tmpdir, 'test.plot')
|
|
38
|
+
|
|
39
|
+
@input, @output = BioDSL::Stream.pipe
|
|
40
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
41
|
+
|
|
42
|
+
[{SEQ: 'AGCT'},
|
|
43
|
+
{SEQ: 'AGCU'},
|
|
44
|
+
{SEQ: 'FLS*'},
|
|
45
|
+
{SEQ: '-.~'},
|
|
46
|
+
{FOO: 'BAR'}].each do |record|
|
|
47
|
+
@output.write(record)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
@output.close
|
|
51
|
+
|
|
52
|
+
@p = BP.new
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def teardown
|
|
56
|
+
FileUtils.rm_r @tmpdir
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
test 'BioDSL::Pipeline#analyze_residue_distribution with disallowed ' \
|
|
60
|
+
'option raises' do
|
|
61
|
+
assert_raise(BioDSL::OptionError) do
|
|
62
|
+
@p.analyze_residue_distribution(foo: 'bar')
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
test 'BioDSL::Pipeline#analyze_residue_distribution with allowed ' \
|
|
67
|
+
'options don\'t raise' do
|
|
68
|
+
assert_nothing_raised { @p.analyze_residue_distribution(percent: true) }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# rubocop:disable Metrics/LineLength
|
|
72
|
+
|
|
73
|
+
test 'BioDSL::Pipeline#analyze_residue_distribution returns correctly' do
|
|
74
|
+
@p.analyze_residue_distribution.run(input: @input, output: @output2)
|
|
75
|
+
expected = <<-EOD.gsub(/^\s*\|/, '')
|
|
76
|
+
|{:SEQ=>"AGCT"}
|
|
77
|
+
|{:SEQ=>"AGCU"}
|
|
78
|
+
|{:SEQ=>"FLS*"}
|
|
79
|
+
|{:SEQ=>"-.~"}
|
|
80
|
+
|{:FOO=>"BAR"}
|
|
81
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"A", :V1=>2, :V2=>0, :V3=>0, :V4=>0}
|
|
82
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"G", :V1=>0, :V2=>2, :V3=>0, :V4=>0}
|
|
83
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"C", :V1=>0, :V2=>0, :V3=>2, :V4=>0}
|
|
84
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"T", :V1=>0, :V2=>0, :V3=>0, :V4=>1}
|
|
85
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"U", :V1=>0, :V2=>0, :V3=>0, :V4=>1}
|
|
86
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"F", :V1=>1, :V2=>0, :V3=>0, :V4=>0}
|
|
87
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"L", :V1=>0, :V2=>1, :V3=>0, :V4=>0}
|
|
88
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"S", :V1=>0, :V2=>0, :V3=>1, :V4=>0}
|
|
89
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"*", :V1=>0, :V2=>0, :V3=>0, :V4=>1}
|
|
90
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"-", :V1=>1, :V2=>0, :V3=>0, :V4=>0}
|
|
91
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>".", :V1=>0, :V2=>1, :V3=>0, :V4=>0}
|
|
92
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"~", :V1=>0, :V2=>0, :V3=>1, :V4=>0}
|
|
93
|
+
EOD
|
|
94
|
+
assert_equal(expected, collect_result)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
test 'BioDSL::Pipeline#analyze_residue_distribution with :precent returns correctly' do
|
|
98
|
+
@p.analyze_residue_distribution(percent: true).run(input: @input, output: @output2)
|
|
99
|
+
expected = <<-EOD.gsub(/^\s*\|/, '')
|
|
100
|
+
|{:SEQ=>"AGCT"}
|
|
101
|
+
|{:SEQ=>"AGCU"}
|
|
102
|
+
|{:SEQ=>"FLS*"}
|
|
103
|
+
|{:SEQ=>"-.~"}
|
|
104
|
+
|{:FOO=>"BAR"}
|
|
105
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"A", :V1=>50, :V2=>0, :V3=>0, :V4=>0}
|
|
106
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"G", :V1=>0, :V2=>50, :V3=>0, :V4=>0}
|
|
107
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"C", :V1=>0, :V2=>0, :V3=>50, :V4=>0}
|
|
108
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"T", :V1=>0, :V2=>0, :V3=>0, :V4=>33}
|
|
109
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"U", :V1=>0, :V2=>0, :V3=>0, :V4=>33}
|
|
110
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"F", :V1=>25, :V2=>0, :V3=>0, :V4=>0}
|
|
111
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"L", :V1=>0, :V2=>25, :V3=>0, :V4=>0}
|
|
112
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"S", :V1=>0, :V2=>0, :V3=>25, :V4=>0}
|
|
113
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"*", :V1=>0, :V2=>0, :V3=>0, :V4=>33}
|
|
114
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"-", :V1=>25, :V2=>0, :V3=>0, :V4=>0}
|
|
115
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>".", :V1=>0, :V2=>25, :V3=>0, :V4=>0}
|
|
116
|
+
|{:RECORD_TYPE=>"residue distribution", :V0=>"~", :V1=>0, :V2=>0, :V3=>25, :V4=>0}
|
|
117
|
+
EOD
|
|
118
|
+
assert_equal(expected.gsub(/ /, ' '), collect_result)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# rubocop:enable Metrics/LineLength
|
|
122
|
+
|
|
123
|
+
test 'BioDSL::Pipeline#analyze_residue_distribution status returns OK' do
|
|
124
|
+
@p.analyze_residue_distribution(percent: true).
|
|
125
|
+
run(input: @input, output: @output2)
|
|
126
|
+
|
|
127
|
+
assert_equal(5, @p.status.first[:records_in])
|
|
128
|
+
assert_equal(5, @p.status.first[:records_out])
|
|
129
|
+
assert_equal(4, @p.status.first[:sequences_in])
|
|
130
|
+
assert_equal(4, @p.status.first[:sequences_in])
|
|
131
|
+
assert_equal(15, @p.status.first[:residues_in])
|
|
132
|
+
assert_equal(15, @p.status.first[:residues_in])
|
|
133
|
+
end
|
|
134
|
+
end
|