BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
data/lib/BioDSL/test.rb
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of the BioDSL framework (www.BioDSL.org). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
# Namespace for BioDSL.
|
|
29
|
+
module BioDSL
|
|
30
|
+
# Class variabel visible across the BioDSL module scope.
|
|
31
|
+
@@test = false
|
|
32
|
+
|
|
33
|
+
# Class variable getter method.
|
|
34
|
+
def self.test
|
|
35
|
+
@@test
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Class variable setter method.
|
|
39
|
+
def self.test=(x)
|
|
40
|
+
@@test = x
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
3
|
+
# #
|
|
4
|
+
# This program is free software; you can redistribute it and/or #
|
|
5
|
+
# modify it under the terms of the GNU General Public License #
|
|
6
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
7
|
+
# of the License, or (at your option) any later version. #
|
|
8
|
+
# #
|
|
9
|
+
# This program is distributed in the hope that it will be useful, #
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
12
|
+
# GNU General Public License for more details. #
|
|
13
|
+
# #
|
|
14
|
+
# You should have received a copy of the GNU General Public License #
|
|
15
|
+
# along with this program; if not, write to the Free Software #
|
|
16
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
17
|
+
# USA. #
|
|
18
|
+
# #
|
|
19
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
20
|
+
# #
|
|
21
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
22
|
+
# #
|
|
23
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
module BioDSL
|
|
27
|
+
# Module to provide a temporary directory.
|
|
28
|
+
module TmpDir
|
|
29
|
+
require 'tempfile'
|
|
30
|
+
|
|
31
|
+
# Create a temporary directory in block context. The directory is deleted
|
|
32
|
+
# when the TmpDir object is garbage collected or the Ruby intepreter exits.
|
|
33
|
+
# If called with a list of filenames, these are provided as block arguments
|
|
34
|
+
# such that the files parent are the temporary directory. However, the last
|
|
35
|
+
# block argument is always the path to the temporary directory.
|
|
36
|
+
#
|
|
37
|
+
# @param files [Array] List of file names.
|
|
38
|
+
#
|
|
39
|
+
# @example
|
|
40
|
+
# BioDSL::TmpDir.create do |dir|
|
|
41
|
+
# puts dir
|
|
42
|
+
# # => "<tmp_dir>"
|
|
43
|
+
# end
|
|
44
|
+
#
|
|
45
|
+
# @example
|
|
46
|
+
# BioDSL::TmpDir.create("foo", "bar") do |foo, bar, dir|
|
|
47
|
+
# puts foo
|
|
48
|
+
# # => "<tmp_dir>/foo"
|
|
49
|
+
# puts bar
|
|
50
|
+
# # => "<tmp_dir>/foo"
|
|
51
|
+
# puts dir
|
|
52
|
+
# # => "<tmp_dir>"
|
|
53
|
+
# end
|
|
54
|
+
def self.create(*files, &block)
|
|
55
|
+
fail 'no block given' unless block
|
|
56
|
+
|
|
57
|
+
Dir.mktmpdir(nil, BioDSL::Config::TMP_DIR) do |dir|
|
|
58
|
+
paths = files.each_with_object([]) { |e, a| a << File.join(dir, e) }
|
|
59
|
+
|
|
60
|
+
if paths.empty?
|
|
61
|
+
block.call(dir)
|
|
62
|
+
else
|
|
63
|
+
block.call(paths << dir)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
# Namespace for BioDSL.
|
|
29
|
+
module BioDSL
|
|
30
|
+
# Error class for all exceptions to do with Usearch.
|
|
31
|
+
class UsearchError < StandardError; end
|
|
32
|
+
|
|
33
|
+
# rubocop: disable ClassLength
|
|
34
|
+
|
|
35
|
+
# Class with methods to execute Usearch and parse the results.
|
|
36
|
+
class Usearch
|
|
37
|
+
include Enumerable
|
|
38
|
+
|
|
39
|
+
# Execute cluster_smallmem.
|
|
40
|
+
#
|
|
41
|
+
# @param options [Hash] Options Hash
|
|
42
|
+
# @option options [String] :input
|
|
43
|
+
# @option options [String] :output
|
|
44
|
+
# @option options [String] :database
|
|
45
|
+
# @option options [Float] :identity
|
|
46
|
+
# @option options [Fixnum] :cpus
|
|
47
|
+
# @option options [String] :strand
|
|
48
|
+
def self.cluster_smallmem(options)
|
|
49
|
+
usearch = new(options)
|
|
50
|
+
usearch.cluster_smallmem
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Execute cluster_otus.
|
|
54
|
+
#
|
|
55
|
+
# @param options [Hash] Options Hash
|
|
56
|
+
# @option options [String] :input
|
|
57
|
+
# @option options [String] :output
|
|
58
|
+
# @option options [String] :database
|
|
59
|
+
# @option options [Float] :identity
|
|
60
|
+
# @option options [Fixnum] :cpus
|
|
61
|
+
# @option options [String] :strand
|
|
62
|
+
def self.cluster_otus(options)
|
|
63
|
+
usearch = new(options)
|
|
64
|
+
usearch.cluster_otus
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Execute uchime_ref.
|
|
68
|
+
#
|
|
69
|
+
# @param options [Hash] Options Hash
|
|
70
|
+
# @option options [String] :input
|
|
71
|
+
# @option options [String] :output
|
|
72
|
+
# @option options [String] :database
|
|
73
|
+
# @option options [Float] :identity
|
|
74
|
+
# @option options [Fixnum] :cpus
|
|
75
|
+
# @option options [String] :strand
|
|
76
|
+
def self.uchime_ref(options)
|
|
77
|
+
usearch = new(options)
|
|
78
|
+
usearch.uchime_ref
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Execute usearch_local.
|
|
82
|
+
#
|
|
83
|
+
# @param options [Hash] Options Hash
|
|
84
|
+
# @option options [String] :input
|
|
85
|
+
# @option options [String] :output
|
|
86
|
+
# @option options [String] :database
|
|
87
|
+
# @option options [Float] :identity
|
|
88
|
+
# @option options [Fixnum] :cpus
|
|
89
|
+
# @option options [String] :strand
|
|
90
|
+
def self.usearch_global(options)
|
|
91
|
+
usearch = new(options)
|
|
92
|
+
usearch.usearch_global
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Execute usearch_local.
|
|
96
|
+
#
|
|
97
|
+
# @param options [Hash] Options Hash
|
|
98
|
+
# @option options [String] :input
|
|
99
|
+
# @option options [String] :output
|
|
100
|
+
# @option options [String] :database
|
|
101
|
+
# @option options [Float] :identity
|
|
102
|
+
# @option options [Fixnum] :cpus
|
|
103
|
+
# @option options [String] :strand
|
|
104
|
+
def self.usearch_local(options)
|
|
105
|
+
usearch = new(options)
|
|
106
|
+
usearch.usearch_local
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Open a Usearch file.
|
|
110
|
+
#
|
|
111
|
+
# @param [Array] List of open arguments.
|
|
112
|
+
#
|
|
113
|
+
# @yield [IO] stream.
|
|
114
|
+
# @return [IO] stream.
|
|
115
|
+
def self.open(*args)
|
|
116
|
+
ios = IO.open(*args)
|
|
117
|
+
|
|
118
|
+
if block_given?
|
|
119
|
+
yield ios
|
|
120
|
+
else
|
|
121
|
+
return ios
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Constructor for Usearch class.
|
|
126
|
+
#
|
|
127
|
+
# @param options [Hash] Options Hash
|
|
128
|
+
# @option options [String] :input
|
|
129
|
+
# @option options [String] :output
|
|
130
|
+
# @option options [String] :database
|
|
131
|
+
# @option options [Float] :identity
|
|
132
|
+
# @option options [Fixnum] :cpus
|
|
133
|
+
# @option options [String] :strand
|
|
134
|
+
#
|
|
135
|
+
# @return [Usearch] Class instance.
|
|
136
|
+
def initialize(options)
|
|
137
|
+
@options = options
|
|
138
|
+
@stderr = nil
|
|
139
|
+
|
|
140
|
+
return self unless File.size(@options[:input]) == 0
|
|
141
|
+
|
|
142
|
+
fail UsearchError, %(Empty input file -> "#{@options[:input]}")
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Combose a command list and execute cluster_smallmem with this.
|
|
146
|
+
#
|
|
147
|
+
# @return [self]
|
|
148
|
+
def cluster_smallmem
|
|
149
|
+
command = []
|
|
150
|
+
command << 'usearch'
|
|
151
|
+
command << "-cluster_smallmem #{@options[:input]}"
|
|
152
|
+
command << "-id #{@options[:identity]}"
|
|
153
|
+
command << "-threads #{@options[:cpus]}" if @options[:cpus]
|
|
154
|
+
command << "-strand #{@options[:strand]}"
|
|
155
|
+
|
|
156
|
+
if @options[:align]
|
|
157
|
+
command << "-msaout #{@options[:output]}"
|
|
158
|
+
else
|
|
159
|
+
command << "-uc #{@options[:output]}"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
execute(command)
|
|
163
|
+
|
|
164
|
+
self
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Combose a command list and execute cluster_otus with this.
|
|
168
|
+
#
|
|
169
|
+
# @return [self]
|
|
170
|
+
def cluster_otus
|
|
171
|
+
command = []
|
|
172
|
+
command << 'usearch'
|
|
173
|
+
command << "-cluster_otus #{@options[:input]}"
|
|
174
|
+
command << "-otus #{@options[:output]}"
|
|
175
|
+
command << "-id #{@options[:identity]}"
|
|
176
|
+
command << "-threads #{@options[:cpus]}" if @options[:cpus]
|
|
177
|
+
|
|
178
|
+
execute(command)
|
|
179
|
+
|
|
180
|
+
self
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Combose a command list and execute uchime_ref with this.
|
|
184
|
+
#
|
|
185
|
+
# @return [self]
|
|
186
|
+
def uchime_ref
|
|
187
|
+
command = []
|
|
188
|
+
command << 'usearch'
|
|
189
|
+
command << "-uchime_ref #{@options[:input]}"
|
|
190
|
+
command << "-db #{@options[:database]}"
|
|
191
|
+
command << "-strand #{@options[:strand]}"
|
|
192
|
+
command << "-threads #{@options[:cpus]}" if @options[:cpus]
|
|
193
|
+
command << "-nonchimeras #{@options[:output]}"
|
|
194
|
+
|
|
195
|
+
execute(command)
|
|
196
|
+
|
|
197
|
+
self
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Combose a command list and execute usearch_global with this.
|
|
201
|
+
#
|
|
202
|
+
# @return [self]
|
|
203
|
+
def usearch_global
|
|
204
|
+
command = []
|
|
205
|
+
command << 'usearch'
|
|
206
|
+
command << '-notrunclabels'
|
|
207
|
+
command << "-usearch_global #{@options[:input]}"
|
|
208
|
+
command << "-db #{@options[:database]}"
|
|
209
|
+
command << "-strand #{@options[:strand]}" if @options[:strand]
|
|
210
|
+
command << "-threads #{@options[:cpus]}" if @options[:cpus]
|
|
211
|
+
command << "-id #{@options[:identity]}"
|
|
212
|
+
command << "-uc #{@options[:output]}"
|
|
213
|
+
|
|
214
|
+
execute(command)
|
|
215
|
+
|
|
216
|
+
self
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Combose a command list and execute usearch_local with this.
|
|
220
|
+
#
|
|
221
|
+
# @return [self]
|
|
222
|
+
def usearch_local
|
|
223
|
+
command = []
|
|
224
|
+
command << 'usearch'
|
|
225
|
+
command << '-notrunclabels'
|
|
226
|
+
command << "-usearch_local #{@options[:input]}"
|
|
227
|
+
command << "-db #{@options[:database]}"
|
|
228
|
+
command << "-strand #{@options[:strand]}" if @options[:strand]
|
|
229
|
+
command << "-threads #{@options[:cpus]}" if @options[:cpus]
|
|
230
|
+
command << "-id #{@options[:identity]}"
|
|
231
|
+
command << "-uc #{@options[:output]}"
|
|
232
|
+
|
|
233
|
+
execute(command)
|
|
234
|
+
|
|
235
|
+
self
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
private
|
|
239
|
+
|
|
240
|
+
# Execute Usearch on a given command.
|
|
241
|
+
#
|
|
242
|
+
# @param command [Array] Usearch command list.
|
|
243
|
+
def execute(command)
|
|
244
|
+
command << '--quiet' unless @options[:verbose]
|
|
245
|
+
command_str = command.join(' ')
|
|
246
|
+
|
|
247
|
+
$stderr.puts "Running command: #{command_str}" if @options[:verbose]
|
|
248
|
+
|
|
249
|
+
Open3.popen3(command_str) do |_stdin, _stdout, stderr, wait_thr|
|
|
250
|
+
@stderr = stderr.read.split $INPUT_RECORD_SEPARATOR
|
|
251
|
+
exit_status = wait_thr.value # Process::Status object returned.
|
|
252
|
+
|
|
253
|
+
unless exit_status.success?
|
|
254
|
+
# TODO: write error message to log.
|
|
255
|
+
fail UsearchError, "Command failed: #{command_str} + \
|
|
256
|
+
#{@stderr.join $INPUT_RECORD_SEPARATOR}"
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Class for Usearch IO.
|
|
262
|
+
class IO < Filesys
|
|
263
|
+
# Parse a given type of Uclust format and yield the result.
|
|
264
|
+
#
|
|
265
|
+
# @param format [Symbol] Format type to parse.
|
|
266
|
+
def each(format = :uc)
|
|
267
|
+
case format
|
|
268
|
+
when :uc then each_uc { |e| yield e }
|
|
269
|
+
else
|
|
270
|
+
fail UsearchError, "Unknown iterator format: #{format}"
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# rubocop: disable Metrics/AbcSize
|
|
275
|
+
|
|
276
|
+
# Parse each UC type record and yield the result.
|
|
277
|
+
#
|
|
278
|
+
# @yield [Hash] BioDSL record with UC result.
|
|
279
|
+
def each_uc
|
|
280
|
+
@io.each do |line|
|
|
281
|
+
fields = line.chomp.split("\t")
|
|
282
|
+
record = {TYPE: fields[0],
|
|
283
|
+
CLUSTER: fields[1].to_i}
|
|
284
|
+
|
|
285
|
+
case fields[0]
|
|
286
|
+
when 'C' then record[:CLUSTER_SIZE] = fields[2].to_i
|
|
287
|
+
else record[:SEQ_LEN] = fields[2].to_i
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
record[:IDENT] = fields[3].to_f if fields[0] == 'H'
|
|
291
|
+
record[:STRAND] = fields[4]
|
|
292
|
+
record[:CIGAR] = fields[7]
|
|
293
|
+
record[:Q_ID] = fields[8]
|
|
294
|
+
record[:S_ID] = fields[9] if fields[0] == 'H'
|
|
295
|
+
|
|
296
|
+
yield record
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of the BioDSL framework (www.BioDSL.org). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
# Namespace for BioDSL.
|
|
29
|
+
module BioDSL
|
|
30
|
+
# Class variabel visible across the BioDSL module scope.
|
|
31
|
+
@@verbose = false
|
|
32
|
+
|
|
33
|
+
# Class variable getter method.
|
|
34
|
+
def self.verbose
|
|
35
|
+
@@verbose
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Class variable setter method.
|
|
39
|
+
def self.verbose=(x)
|
|
40
|
+
@@verbose = x
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of the BioDSL framework (www.BioDSL.org). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
# Namespace for BioDSL.
|
|
29
|
+
module BioDSL
|
|
30
|
+
VERSION = '1.0.0'
|
|
31
|
+
end
|
data/lib/BioDSL.rb
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
fail 'Ruby 2.0 or later required' if RUBY_VERSION < '2.0'
|
|
29
|
+
|
|
30
|
+
# Commify numbers.
|
|
31
|
+
class Numeric
|
|
32
|
+
def commify
|
|
33
|
+
to_s.gsub(/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/, '\1,')
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Convert string to float or integer if applicable.
|
|
38
|
+
class String
|
|
39
|
+
def to_num
|
|
40
|
+
Integer(self)
|
|
41
|
+
to_i
|
|
42
|
+
rescue ArgumentError
|
|
43
|
+
begin
|
|
44
|
+
Float(self)
|
|
45
|
+
to_f
|
|
46
|
+
rescue ArgumentError
|
|
47
|
+
self
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Namespace for BioDSL.
|
|
53
|
+
module BioDSL
|
|
54
|
+
require 'pp'
|
|
55
|
+
require 'BioDSL/cary'
|
|
56
|
+
require 'BioDSL/commands'
|
|
57
|
+
require 'BioDSL/debug'
|
|
58
|
+
require 'BioDSL/helpers'
|
|
59
|
+
require 'BioDSL/seq'
|
|
60
|
+
require 'BioDSL/config'
|
|
61
|
+
require 'BioDSL/hamming'
|
|
62
|
+
require 'BioDSL/version'
|
|
63
|
+
require 'BioDSL/filesys'
|
|
64
|
+
require 'BioDSL/csv'
|
|
65
|
+
require 'BioDSL/fork'
|
|
66
|
+
require 'BioDSL/html_report'
|
|
67
|
+
require 'BioDSL/pipeline'
|
|
68
|
+
require 'BioDSL/fasta'
|
|
69
|
+
require 'BioDSL/fastq'
|
|
70
|
+
require 'BioDSL/math'
|
|
71
|
+
require 'BioDSL/mummer'
|
|
72
|
+
require 'BioDSL/taxonomy'
|
|
73
|
+
require 'BioDSL/tmp_dir'
|
|
74
|
+
require 'BioDSL/serializer'
|
|
75
|
+
require 'BioDSL/stream'
|
|
76
|
+
require 'BioDSL/test'
|
|
77
|
+
require 'BioDSL/usearch'
|
|
78
|
+
require 'BioDSL/verbose'
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
BP = BioDSL::Pipeline # Module alias for irb short hand
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
30
|
+
|
|
31
|
+
require 'test/helper'
|
|
32
|
+
|
|
33
|
+
# Test class for AddKey.
|
|
34
|
+
class TestAddKey < Test::Unit::TestCase
|
|
35
|
+
def setup
|
|
36
|
+
@input, @output = BioDSL::Stream.pipe
|
|
37
|
+
@input2, @output2 = BioDSL::Stream.pipe
|
|
38
|
+
|
|
39
|
+
@output.write(one: 1, two: 2, three: 3)
|
|
40
|
+
@output.write(SEQ_NAME: 'test1', SEQ: 'atcg', SEQ_LEN: 4)
|
|
41
|
+
@output.write(SEQ_NAME: 'test2', SEQ: 'gtac', SEQ_LEN: 4)
|
|
42
|
+
@output.close
|
|
43
|
+
|
|
44
|
+
@p = BioDSL::Pipeline.new
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
test 'BioDSL::Pipeline#add_key with disallowed option raises' do
|
|
48
|
+
assert_raise(BioDSL::OptionError) { @p.add_key(foo: 'bar') }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
test 'BioDSL::Pipeline#add_key with value and prefix options raise' do
|
|
52
|
+
assert_raise(BioDSL::OptionError) do
|
|
53
|
+
@p.add_key(key: 'SEQ_NAME', value: 'foobar', prefix: 'foo')
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
test 'BioDSL::Pipeline#add_key with allowed options don\'t raise' do
|
|
58
|
+
assert_nothing_raised { @p.add_key(key: 'SEQ_NAME', value: 'fobar') }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
test 'BioDSL::Pipeline#add_key status returns correctly' do
|
|
62
|
+
@p.add_key(key: 'SEQ_NAME', value: 'fobar').
|
|
63
|
+
run(input: @input, output: @output2)
|
|
64
|
+
|
|
65
|
+
assert_equal(3, @p.status.last[:records_in])
|
|
66
|
+
assert_equal(3, @p.status.last[:records_out])
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
test 'BioDSL::Pipeline#add_key with value returns correctly' do
|
|
70
|
+
@p.add_key(key: 'SEQ_NAME', value: 'fobar').
|
|
71
|
+
run(input: @input, output: @output2)
|
|
72
|
+
|
|
73
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
74
|
+
|{:one=>1, :two=>2, :three=>3, :SEQ_NAME=>"fobar"}
|
|
75
|
+
|{:SEQ_NAME=>"fobar", :SEQ=>"atcg", :SEQ_LEN=>4}
|
|
76
|
+
|{:SEQ_NAME=>"fobar", :SEQ=>"gtac", :SEQ_LEN=>4}
|
|
77
|
+
EXP
|
|
78
|
+
|
|
79
|
+
assert_equal(expected, collect_result)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
test 'BioDSL::Pipeline#add_key with empty prefix returns correctly' do
|
|
83
|
+
@p.add_key(key: 'SEQ_NAME', prefix: '').run(input: @input, output: @output2)
|
|
84
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
85
|
+
|{:one=>1, :two=>2, :three=>3, :SEQ_NAME=>"0"}
|
|
86
|
+
|{:SEQ_NAME=>"1", :SEQ=>"atcg", :SEQ_LEN=>4}
|
|
87
|
+
|{:SEQ_NAME=>"2", :SEQ=>"gtac", :SEQ_LEN=>4}
|
|
88
|
+
EXP
|
|
89
|
+
|
|
90
|
+
assert_equal(expected, collect_result)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
test 'BioDSL::Pipeline#add_key with prefix returns correctly' do
|
|
94
|
+
@p.add_key(key: 'SEQ_NAME', prefix: 'ID_').
|
|
95
|
+
run(input: @input, output: @output2)
|
|
96
|
+
|
|
97
|
+
expected = <<-EXP.gsub(/^\s+\|/, '')
|
|
98
|
+
|{:one=>1, :two=>2, :three=>3, :SEQ_NAME=>"ID_0"}
|
|
99
|
+
|{:SEQ_NAME=>"ID_1", :SEQ=>"atcg", :SEQ_LEN=>4}
|
|
100
|
+
|{:SEQ_NAME=>"ID_2", :SEQ=>"gtac", :SEQ_LEN=>4}
|
|
101
|
+
EXP
|
|
102
|
+
|
|
103
|
+
assert_equal(expected, collect_result)
|
|
104
|
+
end
|
|
105
|
+
end
|