BioDSL 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/BioDSL.gemspec +1 -1
- data/Gemfile +6 -0
- data/README.md +289 -155
- data/Rakefile +18 -16
- data/lib/BioDSL.rb +1 -1
- data/lib/BioDSL/cary.rb +78 -53
- data/lib/BioDSL/command.rb +2 -2
- data/lib/BioDSL/commands.rb +1 -1
- data/lib/BioDSL/commands/add_key.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
- data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
- data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
- data/lib/BioDSL/commands/classify_seq.rb +8 -8
- data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
- data/lib/BioDSL/commands/clip_primer.rb +7 -7
- data/lib/BioDSL/commands/cluster_otus.rb +5 -5
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/collect_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +4 -4
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +2 -2
- data/lib/BioDSL/commands/degap_seq.rb +6 -7
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/dump.rb +2 -2
- data/lib/BioDSL/commands/filter_rrna.rb +4 -4
- data/lib/BioDSL/commands/genecall.rb +7 -7
- data/lib/BioDSL/commands/grab.rb +1 -1
- data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
- data/lib/BioDSL/commands/mask_seq.rb +4 -4
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/merge_values.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
- data/lib/BioDSL/commands/plot_histogram.rb +4 -4
- data/lib/BioDSL/commands/plot_matches.rb +5 -5
- data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
- data/lib/BioDSL/commands/plot_scores.rb +7 -7
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fasta.rb +9 -9
- data/lib/BioDSL/commands/read_fastq.rb +16 -16
- data/lib/BioDSL/commands/read_table.rb +2 -3
- data/lib/BioDSL/commands/reverse_seq.rb +4 -4
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +3 -3
- data/lib/BioDSL/commands/sort.rb +1 -1
- data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
- data/lib/BioDSL/commands/split_values.rb +2 -2
- data/lib/BioDSL/commands/trim_primer.rb +13 -8
- data/lib/BioDSL/commands/trim_seq.rb +5 -5
- data/lib/BioDSL/commands/uchime_ref.rb +6 -6
- data/lib/BioDSL/commands/uclust.rb +5 -5
- data/lib/BioDSL/commands/unique_values.rb +1 -1
- data/lib/BioDSL/commands/usearch_global.rb +2 -2
- data/lib/BioDSL/commands/usearch_local.rb +2 -2
- data/lib/BioDSL/commands/write_fasta.rb +7 -9
- data/lib/BioDSL/commands/write_fastq.rb +4 -4
- data/lib/BioDSL/commands/write_table.rb +3 -3
- data/lib/BioDSL/commands/write_tree.rb +2 -3
- data/lib/BioDSL/config.rb +2 -2
- data/lib/BioDSL/csv.rb +8 -10
- data/lib/BioDSL/debug.rb +1 -1
- data/lib/BioDSL/fasta.rb +54 -40
- data/lib/BioDSL/fastq.rb +35 -32
- data/lib/BioDSL/filesys.rb +56 -47
- data/lib/BioDSL/fork.rb +1 -1
- data/lib/BioDSL/hamming.rb +1 -1
- data/lib/BioDSL/helpers.rb +1 -1
- data/lib/BioDSL/helpers/aux_helper.rb +1 -1
- data/lib/BioDSL/helpers/email_helper.rb +1 -1
- data/lib/BioDSL/helpers/history_helper.rb +1 -1
- data/lib/BioDSL/helpers/log_helper.rb +1 -1
- data/lib/BioDSL/helpers/options_helper.rb +1 -1
- data/lib/BioDSL/helpers/status_helper.rb +1 -1
- data/lib/BioDSL/html_report.rb +1 -1
- data/lib/BioDSL/math.rb +1 -1
- data/lib/BioDSL/mummer.rb +1 -1
- data/lib/BioDSL/pipeline.rb +1 -1
- data/lib/BioDSL/seq.rb +240 -231
- data/lib/BioDSL/seq/ambiguity.rb +1 -1
- data/lib/BioDSL/seq/assemble.rb +1 -1
- data/lib/BioDSL/seq/backtrack.rb +93 -76
- data/lib/BioDSL/seq/digest.rb +1 -1
- data/lib/BioDSL/seq/dynamic.rb +43 -55
- data/lib/BioDSL/seq/homopolymer.rb +34 -36
- data/lib/BioDSL/seq/kmer.rb +67 -50
- data/lib/BioDSL/seq/levenshtein.rb +35 -40
- data/lib/BioDSL/seq/translate.rb +64 -55
- data/lib/BioDSL/seq/trim.rb +60 -50
- data/lib/BioDSL/serializer.rb +1 -1
- data/lib/BioDSL/stream.rb +1 -1
- data/lib/BioDSL/taxonomy.rb +1 -1
- data/lib/BioDSL/test.rb +1 -1
- data/lib/BioDSL/tmp_dir.rb +1 -1
- data/lib/BioDSL/usearch.rb +1 -1
- data/lib/BioDSL/verbose.rb +1 -1
- data/lib/BioDSL/version.rb +2 -2
- data/test/BioDSL/commands/test_add_key.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_clip_primer.rb +1 -1
- data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_collect_otus.rb +1 -1
- data/test/BioDSL/commands/test_complement_seq.rb +1 -1
- data/test/BioDSL/commands/test_count.rb +1 -1
- data/test/BioDSL/commands/test_count_values.rb +1 -1
- data/test/BioDSL/commands/test_degap_seq.rb +1 -1
- data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
- data/test/BioDSL/commands/test_dump.rb +1 -1
- data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
- data/test/BioDSL/commands/test_genecall.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
- data/test/BioDSL/commands/test_mask_seq.rb +1 -1
- data/test/BioDSL/commands/test_mean_scores.rb +1 -1
- data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_merge_table.rb +1 -1
- data/test/BioDSL/commands/test_merge_values.rb +1 -1
- data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
- data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
- data/test/BioDSL/commands/test_plot_matches.rb +1 -1
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_plot_scores.rb +1 -1
- data/test/BioDSL/commands/test_random.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
- data/test/BioDSL/commands/test_slice_align.rb +1 -1
- data/test/BioDSL/commands/test_slice_seq.rb +1 -1
- data/test/BioDSL/commands/test_sort.rb +1 -1
- data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_split_values.rb +1 -1
- data/test/BioDSL/commands/test_trim_primer.rb +1 -1
- data/test/BioDSL/commands/test_trim_seq.rb +1 -1
- data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
- data/test/BioDSL/commands/test_uclust.rb +1 -1
- data/test/BioDSL/commands/test_unique_values.rb +1 -1
- data/test/BioDSL/commands/test_usearch_global.rb +1 -1
- data/test/BioDSL/commands/test_usearch_local.rb +1 -1
- data/test/BioDSL/commands/test_write_fasta.rb +1 -1
- data/test/BioDSL/commands/test_write_fastq.rb +1 -1
- data/test/BioDSL/commands/test_write_table.rb +1 -1
- data/test/BioDSL/commands/test_write_tree.rb +1 -1
- data/test/BioDSL/helpers/test_options_helper.rb +3 -3
- data/test/BioDSL/seq/test_assemble.rb +58 -56
- data/test/BioDSL/seq/test_backtrack.rb +83 -81
- data/test/BioDSL/seq/test_digest.rb +47 -45
- data/test/BioDSL/seq/test_dynamic.rb +66 -64
- data/test/BioDSL/seq/test_homopolymer.rb +35 -33
- data/test/BioDSL/seq/test_kmer.rb +29 -28
- data/test/BioDSL/seq/test_translate.rb +44 -42
- data/test/BioDSL/seq/test_trim.rb +59 -57
- data/test/BioDSL/test_cary.rb +1 -1
- data/test/BioDSL/test_command.rb +2 -2
- data/test/BioDSL/test_csv.rb +34 -31
- data/test/BioDSL/test_debug.rb +31 -31
- data/test/BioDSL/test_fasta.rb +30 -29
- data/test/BioDSL/test_fastq.rb +27 -26
- data/test/BioDSL/test_filesys.rb +28 -27
- data/test/BioDSL/test_fork.rb +29 -28
- data/test/BioDSL/test_math.rb +31 -30
- data/test/BioDSL/test_mummer.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +1 -1
- data/test/BioDSL/test_seq.rb +42 -41
- data/test/BioDSL/test_serializer.rb +35 -33
- data/test/BioDSL/test_stream.rb +28 -27
- data/test/BioDSL/test_taxonomy.rb +38 -37
- data/test/BioDSL/test_test.rb +32 -31
- data/test/BioDSL/test_tmp_dir.rb +1 -1
- data/test/BioDSL/test_usearch.rb +28 -27
- data/test/BioDSL/test_verbose.rb +32 -31
- data/test/helper.rb +34 -31
- metadata +3 -2
data/Rakefile
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
require 'bundler'
|
|
2
|
+
require 'English'
|
|
2
3
|
require 'rake/testtask'
|
|
3
4
|
require 'pp'
|
|
4
5
|
|
|
5
6
|
Bundler::GemHelper.install_tasks
|
|
6
7
|
|
|
7
|
-
task :
|
|
8
|
-
|
|
8
|
+
task default: 'test'
|
|
9
|
+
|
|
9
10
|
Rake::TestTask.new do |t|
|
|
10
|
-
t.description =
|
|
11
|
+
t.description = 'Run test suite'
|
|
11
12
|
t.test_files = Dir['test/**/*'].select { |f| f.match(/\.rb$/) }
|
|
12
13
|
t.warning = true
|
|
13
14
|
end
|
|
14
|
-
|
|
15
|
+
|
|
15
16
|
desc 'Run test suite with simplecov'
|
|
16
17
|
task :simplecov do
|
|
17
18
|
ENV['SIMPLECOV'] = 'true'
|
|
@@ -23,7 +24,7 @@ task :doc do
|
|
|
23
24
|
run_docgen
|
|
24
25
|
end
|
|
25
26
|
|
|
26
|
-
task :
|
|
27
|
+
task build: :boilerplate
|
|
27
28
|
|
|
28
29
|
desc 'Add or update license boilerplate in source files'
|
|
29
30
|
task :boilerplate do
|
|
@@ -31,9 +32,9 @@ task :boilerplate do
|
|
|
31
32
|
end
|
|
32
33
|
|
|
33
34
|
def run_docgen
|
|
34
|
-
$stderr.puts
|
|
35
|
+
$stderr.puts 'Building docs'
|
|
35
36
|
`yardoc lib/`
|
|
36
|
-
$stderr.puts
|
|
37
|
+
$stderr.puts 'Docs done'
|
|
37
38
|
end
|
|
38
39
|
|
|
39
40
|
def run_boilerplate
|
|
@@ -61,7 +62,7 @@ def run_boilerplate
|
|
|
61
62
|
# #
|
|
62
63
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
63
64
|
# #
|
|
64
|
-
# This software is part of BioDSL (
|
|
65
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
65
66
|
# #
|
|
66
67
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
67
68
|
END
|
|
@@ -69,26 +70,27 @@ END
|
|
|
69
70
|
files = Rake::FileList.new('bin/**/*', 'lib/**/*.rb', 'test/**/*.rb')
|
|
70
71
|
|
|
71
72
|
files.each do |file|
|
|
72
|
-
body =
|
|
73
|
+
body = ''
|
|
73
74
|
|
|
74
75
|
File.open(file) do |ios|
|
|
75
76
|
body = ios.read
|
|
76
77
|
end
|
|
77
78
|
|
|
78
|
-
if body.match(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/)
|
|
79
|
+
if body.match(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/) &&
|
|
80
|
+
Regexp.last_match[1].to_i != Time.now.year
|
|
79
81
|
STDERR.puts "Updating boilerplate: #{file}"
|
|
80
82
|
|
|
81
|
-
body.sub!(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/,
|
|
83
|
+
body.sub!(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/,
|
|
84
|
+
"Copyright (C) 2007-#{Time.now.year} Martin Asser Hansen")
|
|
82
85
|
|
|
83
86
|
File.open(file, 'w') do |ios|
|
|
84
87
|
ios.puts body
|
|
85
88
|
end
|
|
86
89
|
end
|
|
87
90
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
end
|
|
91
|
+
next if body.match('Copyright')
|
|
92
|
+
STDERR.puts "Warning: missing boilerplate in #{file}"
|
|
93
|
+
STDERR.puts body.split($RS).first(10).join($RS)
|
|
94
|
+
exit
|
|
93
95
|
end
|
|
94
96
|
end
|
data/lib/BioDSL.rb
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of BioDSL (
|
|
24
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
data/lib/BioDSL/cary.rb
CHANGED
|
@@ -1,34 +1,37 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk).
|
|
4
|
-
#
|
|
5
|
-
# This program is free software; you can redistribute it and/or
|
|
6
|
-
# modify it under the terms of the GNU General Public License
|
|
7
|
-
# as published by the Free Software Foundation; either version 2
|
|
8
|
-
# of the License, or (at your option) any later version.
|
|
9
|
-
#
|
|
10
|
-
# This program is distributed in the hope that it will be useful,
|
|
11
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
-
# GNU General Public License for more details.
|
|
14
|
-
#
|
|
15
|
-
# You should have received a copy of the GNU General Public License
|
|
16
|
-
# along with this program; if not, write to the Free Software
|
|
17
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
|
18
|
-
#
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
26
|
-
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
# Namespace for BioDSL.
|
|
27
29
|
module BioDSL
|
|
28
30
|
# Error class for all exceptions to do with CAry.
|
|
29
31
|
class CAryError < StandardError; end
|
|
30
32
|
|
|
31
|
-
# Class to manipulate a Ruby byte array which is fit for inline C
|
|
33
|
+
# Class to manipulate a Ruby byte array which is fit for inline C
|
|
34
|
+
# manipulation.
|
|
32
35
|
class CAry
|
|
33
36
|
require 'inline'
|
|
34
37
|
|
|
@@ -37,8 +40,8 @@ module BioDSL
|
|
|
37
40
|
# Class method to store to a given file a given ary.
|
|
38
41
|
def self.store(file, ary)
|
|
39
42
|
File.open(file, 'w') do |ios|
|
|
40
|
-
ios.write([ary.count].pack(
|
|
41
|
-
ios.write([ary.size].pack(
|
|
43
|
+
ios.write([ary.count].pack('I'))
|
|
44
|
+
ios.write([ary.size].pack('I'))
|
|
42
45
|
ios.write(ary.ary)
|
|
43
46
|
end
|
|
44
47
|
|
|
@@ -52,20 +55,20 @@ module BioDSL
|
|
|
52
55
|
ary = nil
|
|
53
56
|
|
|
54
57
|
File.open(file) do |ios|
|
|
55
|
-
count = ios.read(4).unpack(
|
|
56
|
-
size = ios.read(4).unpack(
|
|
58
|
+
count = ios.read(4).unpack('I').first
|
|
59
|
+
size = ios.read(4).unpack('I').first
|
|
57
60
|
ary = ios.read
|
|
58
61
|
end
|
|
59
62
|
|
|
60
63
|
CAry.new(count, size, ary)
|
|
61
64
|
end
|
|
62
65
|
|
|
63
|
-
# Method to initialize a new CAry object which is either empty
|
|
64
|
-
#
|
|
65
|
-
#
|
|
66
|
+
# Method to initialize a new CAry object which is either empty or created
|
|
67
|
+
# from a given byte string. Count is the number of elements in the ary, and
|
|
68
|
+
# size is the byte size of a element.
|
|
66
69
|
def initialize(count, size, ary = nil)
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
fail CAryError, "count must be positive - not #{count}" if count <= 0
|
|
71
|
+
fail CAryError, "size must be positive - not #{size}" if size <= 0
|
|
69
72
|
|
|
70
73
|
@count = count
|
|
71
74
|
@size = size
|
|
@@ -95,34 +98,58 @@ module BioDSL
|
|
|
95
98
|
end
|
|
96
99
|
|
|
97
100
|
# Method to do bitwise AND operation between two CArys.
|
|
98
|
-
def &(
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
101
|
+
def &(other)
|
|
102
|
+
unless other.is_a? CAry
|
|
103
|
+
fail BioDSL::CAryError, "Bad object type: #{other.class}"
|
|
104
|
+
end
|
|
102
105
|
|
|
103
|
-
|
|
106
|
+
if @count != other.count
|
|
107
|
+
fail BioDSL::CAryError, "Counts mismatch: #{@count} != #{other.count}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
if @size != other.size
|
|
111
|
+
fail BioDSL::CAryError, "Sizes mismatch: #{@size} != #{other.size}"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
bitwise_and_C(@ary, other.ary, @count * @size)
|
|
104
115
|
|
|
105
116
|
self
|
|
106
117
|
end
|
|
107
118
|
|
|
108
119
|
# Method to do bitwise OR operation between two CArys.
|
|
109
|
-
def |(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
120
|
+
def |(other)
|
|
121
|
+
unless other.is_a? CAry
|
|
122
|
+
fail BioDSL::CAryError, "Bad object type: #{other.class}"
|
|
123
|
+
end
|
|
113
124
|
|
|
114
|
-
|
|
125
|
+
if @count != other.count
|
|
126
|
+
fail BioDSL::CAryError, "Counts mismatch: #{@count} != #{other.count}"
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
if @size != other.size
|
|
130
|
+
fail BioDSL::CAryError, "Sizes mismatch: #{@size} != #{other.size}"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
bitwise_or_C(@ary, other.ary, @count * @size)
|
|
115
134
|
|
|
116
135
|
self
|
|
117
136
|
end
|
|
118
137
|
|
|
119
138
|
# Method to do bitwise XOR operation between two CArys.
|
|
120
|
-
def ^(
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
139
|
+
def ^(other)
|
|
140
|
+
unless other.is_a? CAry
|
|
141
|
+
fail BioDSL::CAryError, "Bad object type: #{other.class}"
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
if @count != other.count
|
|
145
|
+
fail BioDSL::CAryError, "Counts mismatch: #{@count} != #{other.count}"
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
if @size != other.size
|
|
149
|
+
fail BioDSL::CAryError, "Sizes mismatch: #{@size} != #{other.size}"
|
|
150
|
+
end
|
|
124
151
|
|
|
125
|
-
bitwise_xor_C(@ary,
|
|
152
|
+
bitwise_xor_C(@ary, other.ary, @count * @size)
|
|
126
153
|
|
|
127
154
|
self
|
|
128
155
|
end
|
|
@@ -138,8 +165,6 @@ module BioDSL
|
|
|
138
165
|
@ary.unpack('B*').first
|
|
139
166
|
end
|
|
140
167
|
|
|
141
|
-
private
|
|
142
|
-
|
|
143
168
|
inline do |builder|
|
|
144
169
|
# Method that given a byte array and its size in bytes
|
|
145
170
|
# sets all bytes to 0.
|
data/lib/BioDSL/command.rb
CHANGED
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
# #
|
|
21
21
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
22
22
|
# #
|
|
23
|
-
# This software is part of BioDSL (
|
|
23
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
24
24
|
# #
|
|
25
25
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
26
|
module BioDSL
|
|
@@ -80,7 +80,7 @@ module BioDSL
|
|
|
80
80
|
# @return [BioDSL::Status] returns self.
|
|
81
81
|
def calc_time_elapsed
|
|
82
82
|
delta = @status[:time_stop] - @status[:time_start]
|
|
83
|
-
@status[:time_elapsed] = (Time.mktime(0) + delta).strftime(
|
|
83
|
+
@status[:time_elapsed] = (Time.mktime(0) + delta).strftime('%H:%M:%S')
|
|
84
84
|
|
|
85
85
|
self
|
|
86
86
|
end
|
data/lib/BioDSL/commands.rb
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# #
|
|
17
17
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
18
18
|
# #
|
|
19
|
-
# This software is part of BioDSL (
|
|
19
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
20
20
|
# #
|
|
21
21
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
22
22
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -145,7 +145,7 @@ module BioDSL
|
|
|
145
145
|
entry = BioDSL::Seq.new(seq_name: seq_name, seq: record[:SEQ])
|
|
146
146
|
|
|
147
147
|
@status[:sequences_in] += 1
|
|
148
|
-
@status[:residues_in]
|
|
148
|
+
@status[:residues_in] += entry.length
|
|
149
149
|
|
|
150
150
|
ios.puts entry.to_fasta
|
|
151
151
|
end
|
|
@@ -158,9 +158,9 @@ module BioDSL
|
|
|
158
158
|
BioDSL::Fasta.open(tmp_out) do |ios|
|
|
159
159
|
ios.each do |entry|
|
|
160
160
|
output << entry.to_bp
|
|
161
|
-
@status[:records_out]
|
|
161
|
+
@status[:records_out] += 1
|
|
162
162
|
@status[:sequences_out] += 1
|
|
163
|
-
@status[:residues_out]
|
|
163
|
+
@status[:residues_out] += entry.length
|
|
164
164
|
end
|
|
165
165
|
end
|
|
166
166
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -166,15 +166,15 @@ module BioDSL
|
|
|
166
166
|
#
|
|
167
167
|
# @param seq [String] - Sequence to analyze.
|
|
168
168
|
def analyze_residues(seq)
|
|
169
|
-
@status[:sequences_in]
|
|
169
|
+
@status[:sequences_in] += 1
|
|
170
170
|
@status[:sequences_out] += 1
|
|
171
|
-
@status[:residues_in]
|
|
172
|
-
@status[:residues_out]
|
|
171
|
+
@status[:residues_in] += seq.length
|
|
172
|
+
@status[:residues_out] += seq.length
|
|
173
173
|
|
|
174
174
|
seq.upcase.chars.each_with_index do |char, i|
|
|
175
175
|
c = char.to_sym
|
|
176
176
|
@counts[i][c] += 1
|
|
177
|
-
@total[i]
|
|
177
|
+
@total[i] += 1
|
|
178
178
|
@residues.add(c)
|
|
179
179
|
end
|
|
180
180
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -160,7 +160,7 @@ module BioDSL
|
|
|
160
160
|
# Set default options.
|
|
161
161
|
def defaults
|
|
162
162
|
@options[:mismatch_percent] ||= 20
|
|
163
|
-
@options[:overlap_min]
|
|
163
|
+
@options[:overlap_min] ||= 1
|
|
164
164
|
end
|
|
165
165
|
|
|
166
166
|
# Output a record to the stream if a stram is provided.
|
|
@@ -182,7 +182,7 @@ module BioDSL
|
|
|
182
182
|
entry1, entry2 = records2entries(record1, record2)
|
|
183
183
|
|
|
184
184
|
if overlap_possible?(entry1, entry2, @options[:overlap_min]) &&
|
|
185
|
-
assembled = assemble_entries(entry1, entry2)
|
|
185
|
+
(assembled = assemble_entries(entry1, entry2))
|
|
186
186
|
output_assembled(assembled, output)
|
|
187
187
|
elsif @options[:merge_unassembled]
|
|
188
188
|
output_merged(entry1, entry2, output)
|
|
@@ -211,7 +211,7 @@ module BioDSL
|
|
|
211
211
|
end
|
|
212
212
|
|
|
213
213
|
@status[:sequences_in] += 2
|
|
214
|
-
@status[:residues_in]
|
|
214
|
+
@status[:residues_in] += entry1.length + entry2.length
|
|
215
215
|
|
|
216
216
|
[entry1, entry2]
|
|
217
217
|
end
|
|
@@ -252,10 +252,10 @@ module BioDSL
|
|
|
252
252
|
def output_assembled(assembled, output)
|
|
253
253
|
output << assembled2record(assembled)
|
|
254
254
|
|
|
255
|
-
@status[:assembled]
|
|
256
|
-
@status[:records_out]
|
|
255
|
+
@status[:assembled] += 1
|
|
256
|
+
@status[:records_out] += 1
|
|
257
257
|
@status[:sequences_out] += 1
|
|
258
|
-
@status[:residues_out]
|
|
258
|
+
@status[:residues_out] += assembled.length
|
|
259
259
|
end
|
|
260
260
|
|
|
261
261
|
# Convert a sequence entry to a BioPiece record with hamming distance and
|
|
@@ -289,10 +289,10 @@ module BioDSL
|
|
|
289
289
|
|
|
290
290
|
output << entry2record(entry1)
|
|
291
291
|
|
|
292
|
-
@status[:unassembled]
|
|
292
|
+
@status[:unassembled] += 1
|
|
293
293
|
@status[:sequences_out] += 1
|
|
294
|
-
@status[:residues_out]
|
|
295
|
-
@status[:records_out]
|
|
294
|
+
@status[:residues_out] += entry1.length
|
|
295
|
+
@status[:records_out] += 1
|
|
296
296
|
end
|
|
297
297
|
|
|
298
298
|
# Output unassembled entries to the stream.
|
|
@@ -304,10 +304,10 @@ module BioDSL
|
|
|
304
304
|
output << entry2record(entry1)
|
|
305
305
|
output << entry2record(entry2)
|
|
306
306
|
|
|
307
|
-
@status[:unassembled]
|
|
307
|
+
@status[:unassembled] += 2
|
|
308
308
|
@status[:sequences_out] += 2
|
|
309
|
-
@status[:residues_out]
|
|
310
|
-
@status[:records_out]
|
|
309
|
+
@status[:residues_out] += entry1.length + entry2.length
|
|
310
|
+
@status[:records_out] += 2
|
|
311
311
|
end
|
|
312
312
|
|
|
313
313
|
# Converts a sequence entry to a BioPeice record.
|