BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
data/Rakefile
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
require 'bundler'
|
|
2
|
+
require 'rake/testtask'
|
|
3
|
+
require 'pp'
|
|
4
|
+
|
|
5
|
+
Bundler::GemHelper.install_tasks
|
|
6
|
+
|
|
7
|
+
task :default => 'test'
|
|
8
|
+
|
|
9
|
+
Rake::TestTask.new do |t|
|
|
10
|
+
t.description = "Run test suite"
|
|
11
|
+
t.test_files = Dir['test/**/*'].select { |f| f.match(/\.rb$/) }
|
|
12
|
+
t.warning = true
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
desc 'Run test suite with simplecov'
|
|
16
|
+
task :simplecov do
|
|
17
|
+
ENV['SIMPLECOV'] = 'true'
|
|
18
|
+
Rake::Task['test'].invoke
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
desc 'Add or update yardoc'
|
|
22
|
+
task :doc do
|
|
23
|
+
run_docgen
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
task :build => :boilerplate
|
|
27
|
+
|
|
28
|
+
desc 'Add or update license boilerplate in source files'
|
|
29
|
+
task :boilerplate do
|
|
30
|
+
run_boilerplate
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def run_docgen
|
|
34
|
+
$stderr.puts "Building docs"
|
|
35
|
+
`yardoc lib/`
|
|
36
|
+
$stderr.puts "Docs done"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def run_boilerplate
|
|
40
|
+
boilerplate = <<END
|
|
41
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
42
|
+
# #
|
|
43
|
+
# Copyright (C) 2007-#{Time.now.year} Martin Asser Hansen (mail@maasha.dk). #
|
|
44
|
+
# #
|
|
45
|
+
# This program is free software; you can redistribute it and/or #
|
|
46
|
+
# modify it under the terms of the GNU General Public License #
|
|
47
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
48
|
+
# of the License, or (at your option) any later version. #
|
|
49
|
+
# #
|
|
50
|
+
# This program is distributed in the hope that it will be useful, #
|
|
51
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
52
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
53
|
+
# GNU General Public License for more details. #
|
|
54
|
+
# #
|
|
55
|
+
# You should have received a copy of the GNU General Public License #
|
|
56
|
+
# along with this program; if not, write to the Free Software #
|
|
57
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
58
|
+
# USA. #
|
|
59
|
+
# #
|
|
60
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
61
|
+
# #
|
|
62
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
63
|
+
# #
|
|
64
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
65
|
+
# #
|
|
66
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
67
|
+
END
|
|
68
|
+
|
|
69
|
+
files = Rake::FileList.new('bin/**/*', 'lib/**/*.rb', 'test/**/*.rb')
|
|
70
|
+
|
|
71
|
+
files.each do |file|
|
|
72
|
+
body = ""
|
|
73
|
+
|
|
74
|
+
File.open(file) do |ios|
|
|
75
|
+
body = ios.read
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
if body.match(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/) and $1.to_i != Time.now.year
|
|
79
|
+
STDERR.puts "Updating boilerplate: #{file}"
|
|
80
|
+
|
|
81
|
+
body.sub!(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/, "Copyright (C) 2007-#{Time.now.year} Martin Asser Hansen")
|
|
82
|
+
|
|
83
|
+
File.open(file, 'w') do |ios|
|
|
84
|
+
ios.puts body
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
unless body.match('Copyright')
|
|
89
|
+
STDERR.puts "Warning: missing boilerplate in #{file}"
|
|
90
|
+
STDERR.puts body.split($/).first(10).join($/)
|
|
91
|
+
exit
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
data/lib/BioDSL/cary.rb
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
|
|
18
|
+
# #
|
|
19
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
20
|
+
# #
|
|
21
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
22
|
+
# #
|
|
23
|
+
# This software is part of BioDSL (www.BioDSL.org). #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
|
|
27
|
+
module BioDSL
|
|
28
|
+
# Error class for all exceptions to do with CAry.
|
|
29
|
+
class CAryError < StandardError; end
|
|
30
|
+
|
|
31
|
+
# Class to manipulate a Ruby byte array which is fit for inline C manipulation.
|
|
32
|
+
class CAry
|
|
33
|
+
require 'inline'
|
|
34
|
+
|
|
35
|
+
attr_reader :count, :size, :ary
|
|
36
|
+
|
|
37
|
+
# Class method to store to a given file a given ary.
|
|
38
|
+
def self.store(file, ary)
|
|
39
|
+
File.open(file, 'w') do |ios|
|
|
40
|
+
ios.write([ary.count].pack("I"))
|
|
41
|
+
ios.write([ary.size].pack("I"))
|
|
42
|
+
ios.write(ary.ary)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
nil
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Class method to retrieve and return an ary from a given file.
|
|
49
|
+
def self.retrieve(file)
|
|
50
|
+
count = nil
|
|
51
|
+
size = nil
|
|
52
|
+
ary = nil
|
|
53
|
+
|
|
54
|
+
File.open(file) do |ios|
|
|
55
|
+
count = ios.read(4).unpack("I").first
|
|
56
|
+
size = ios.read(4).unpack("I").first
|
|
57
|
+
ary = ios.read
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
CAry.new(count, size, ary)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Method to initialize a new CAry object which is either empty
|
|
64
|
+
# or created from a given byte string. Count is the number of
|
|
65
|
+
# elements in the ary, and size is the byte size of a element.
|
|
66
|
+
def initialize(count, size, ary = nil)
|
|
67
|
+
raise CAryError, "count must be positive - not #{count}" if count <= 0
|
|
68
|
+
raise CAryError, "size must be positive - not #{size}" if size <= 0
|
|
69
|
+
|
|
70
|
+
@count = count
|
|
71
|
+
@size = size
|
|
72
|
+
@ary = ary || "\0" * count * size
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Method to set all members in an ary to 1.
|
|
76
|
+
def fill!
|
|
77
|
+
self.zero!
|
|
78
|
+
self.~
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Method to set all members in an ary to 1.
|
|
82
|
+
def fill
|
|
83
|
+
CAry.new(@count, @size).fill!
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Method to set all members in an ary to zero.
|
|
87
|
+
def zero!
|
|
88
|
+
zero_ary_C(@ary, @count * @size)
|
|
89
|
+
self
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Method to set all members in an ary to zero.
|
|
93
|
+
def zero
|
|
94
|
+
CAry.new(@count, @size).zero!
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Method to do bitwise AND operation between two CArys.
|
|
98
|
+
def &(cary)
|
|
99
|
+
raise BioDSL::CAryError, "Bad object type: #{cary.class}" unless cary.is_a? CAry
|
|
100
|
+
raise BioDSL::CAryError, "Counts mismatch: #{self.count} != #{cary.count}" if self.count != cary.count
|
|
101
|
+
raise BioDSL::CAryError, "Sizes mismatch: #{self.size} != #{cary.size}" if self.size != cary.size
|
|
102
|
+
|
|
103
|
+
bitwise_and_C(@ary, cary.ary, @count * @size)
|
|
104
|
+
|
|
105
|
+
self
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Method to do bitwise OR operation between two CArys.
|
|
109
|
+
def |(cary)
|
|
110
|
+
raise BioDSL::CAryError, "Bad object type: #{cary.class}" unless cary.is_a? CAry
|
|
111
|
+
raise BioDSL::CAryError, "Counts mismatch: #{self.count} != #{cary.count}" if self.count != cary.count
|
|
112
|
+
raise BioDSL::CAryError, "Sizes mismatch: #{self.size} != #{cary.size}" if self.size != cary.size
|
|
113
|
+
|
|
114
|
+
bitwise_or_C(@ary, cary.ary, @count * @size)
|
|
115
|
+
|
|
116
|
+
self
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Method to do bitwise XOR operation between two CArys.
|
|
120
|
+
def ^(cary)
|
|
121
|
+
raise BioDSL::CAryError, "Bad object type: #{cary.class}" unless cary.is_a? CAry
|
|
122
|
+
raise BioDSL::CAryError, "Counts mismatch: #{self.count} != #{cary.count}" if self.count != cary.count
|
|
123
|
+
raise BioDSL::CAryError, "Sizes mismatch: #{self.size} != #{cary.size}" if self.size != cary.size
|
|
124
|
+
|
|
125
|
+
bitwise_xor_C(@ary, cary.ary, @count * @size)
|
|
126
|
+
|
|
127
|
+
self
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Method to complement all bits in an ary.
|
|
131
|
+
def ~
|
|
132
|
+
complement_ary_C(@ary, @count * @size)
|
|
133
|
+
self
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Method that returns a string from an ary.
|
|
137
|
+
def to_s
|
|
138
|
+
@ary.unpack('B*').first
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
private
|
|
142
|
+
|
|
143
|
+
inline do |builder|
|
|
144
|
+
# Method that given a byte array and its size in bytes
|
|
145
|
+
# sets all bytes to 0.
|
|
146
|
+
builder.c %{
|
|
147
|
+
void zero_ary_C(
|
|
148
|
+
VALUE _ary, // Byte array to zero.
|
|
149
|
+
VALUE _ary_size // Size of array.
|
|
150
|
+
)
|
|
151
|
+
{
|
|
152
|
+
char *ary = (char *) StringValuePtr(_ary);
|
|
153
|
+
unsigned int ary_size = FIX2UINT(_ary_size);
|
|
154
|
+
|
|
155
|
+
bzero(ary, ary_size);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
# Method that given two byte arrays perform bitwise AND operation
|
|
160
|
+
# beween these and save the result in the first.
|
|
161
|
+
builder.c %{
|
|
162
|
+
void bitwise_and_C(
|
|
163
|
+
VALUE _ary1, // Byte array to recieve.
|
|
164
|
+
VALUE _ary2, // Byte array to &.
|
|
165
|
+
VALUE _ary_size // Size of arrays.
|
|
166
|
+
)
|
|
167
|
+
{
|
|
168
|
+
char *ary1 = (char *) StringValuePtr(_ary1);
|
|
169
|
+
char *ary2 = (char *) StringValuePtr(_ary2);
|
|
170
|
+
unsigned int ary_size = FIX2UINT(_ary_size);
|
|
171
|
+
int i = 0;
|
|
172
|
+
|
|
173
|
+
for (i = ary_size - 1; i >= 0; i--)
|
|
174
|
+
{
|
|
175
|
+
ary1[i] = ary1[i] & ary2[i];
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# Method that given two byte arrays perform bitwise OR operation
|
|
181
|
+
# beween these and save the result in the first.
|
|
182
|
+
builder.c %{
|
|
183
|
+
void bitwise_or_C(
|
|
184
|
+
VALUE _ary1, // Byte array to recieve.
|
|
185
|
+
VALUE _ary2, // Byte array to &.
|
|
186
|
+
VALUE _ary_size // Size of arrays.
|
|
187
|
+
)
|
|
188
|
+
{
|
|
189
|
+
char *ary1 = (char *) StringValuePtr(_ary1);
|
|
190
|
+
char *ary2 = (char *) StringValuePtr(_ary2);
|
|
191
|
+
unsigned int ary_size = FIX2UINT(_ary_size);
|
|
192
|
+
int i = 0;
|
|
193
|
+
|
|
194
|
+
for (i = ary_size - 1; i >= 0; i--)
|
|
195
|
+
{
|
|
196
|
+
ary1[i] = ary1[i] | ary2[i];
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
# Method that given two byte arrays perform bitwise XOR operation
|
|
202
|
+
# beween these and save the result in the first.
|
|
203
|
+
builder.c %{
|
|
204
|
+
void bitwise_xor_C(
|
|
205
|
+
VALUE _ary1, // Byte array to recieve.
|
|
206
|
+
VALUE _ary2, // Byte array to &.
|
|
207
|
+
VALUE _ary_size // Size of arrays.
|
|
208
|
+
)
|
|
209
|
+
{
|
|
210
|
+
char *ary1 = (char *) StringValuePtr(_ary1);
|
|
211
|
+
char *ary2 = (char *) StringValuePtr(_ary2);
|
|
212
|
+
unsigned int ary_size = FIX2UINT(_ary_size);
|
|
213
|
+
int i = 0;
|
|
214
|
+
|
|
215
|
+
for (i = ary_size - 1; i >= 0; i--)
|
|
216
|
+
{
|
|
217
|
+
ary1[i] = ary1[i] ^ ary2[i];
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
# Method that given a byte array and its size in bytes
|
|
223
|
+
# complements all bits using bitwise ~.
|
|
224
|
+
builder.c %{
|
|
225
|
+
void complement_ary_C(
|
|
226
|
+
VALUE _ary, // Byte array complement.
|
|
227
|
+
VALUE _ary_size // Size of array.
|
|
228
|
+
)
|
|
229
|
+
{
|
|
230
|
+
char *ary = (char *) StringValuePtr(_ary);
|
|
231
|
+
unsigned int ary_size = FIX2UINT(_ary_size);
|
|
232
|
+
int i = 0;
|
|
233
|
+
|
|
234
|
+
for (i = ary_size - 1; i >= 0; i--)
|
|
235
|
+
{
|
|
236
|
+
ary[i] = ~ary[i];
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
3
|
+
# #
|
|
4
|
+
# This program is free software; you can redistribute it and/or #
|
|
5
|
+
# modify it under the terms of the GNU General Public License #
|
|
6
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
7
|
+
# of the License, or (at your option) any later version. #
|
|
8
|
+
# #
|
|
9
|
+
# This program is distributed in the hope that it will be useful, #
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
12
|
+
# GNU General Public License for more details. #
|
|
13
|
+
# #
|
|
14
|
+
# You should have received a copy of the GNU General Public License #
|
|
15
|
+
# along with this program; if not, write to the Free Software #
|
|
16
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
17
|
+
# USA. #
|
|
18
|
+
# #
|
|
19
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
20
|
+
# #
|
|
21
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
22
|
+
# #
|
|
23
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
module BioDSL
|
|
27
|
+
# Command class for initiating and calling commands.
|
|
28
|
+
class Command
|
|
29
|
+
attr_reader :name, :status, :options
|
|
30
|
+
attr_accessor :run_status
|
|
31
|
+
|
|
32
|
+
# Constructor for Command objects.
|
|
33
|
+
#
|
|
34
|
+
# @param name [Symbol] Name of command.
|
|
35
|
+
# @param lmb [Proc] Lambda for command callback execution.
|
|
36
|
+
# @param options [Hash] Options hash.
|
|
37
|
+
def initialize(name, lmb, options)
|
|
38
|
+
@name = name
|
|
39
|
+
@lmb = lmb
|
|
40
|
+
@run_status = 'running'
|
|
41
|
+
@options = options
|
|
42
|
+
@status = {}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Callback method for executing a Command lambda.
|
|
46
|
+
#
|
|
47
|
+
# @param args [Array] List of arguments used in the callback.
|
|
48
|
+
def call(*args)
|
|
49
|
+
@lmb.call(*args, @status)
|
|
50
|
+
|
|
51
|
+
@run_status = 'done'
|
|
52
|
+
@status[:time_stop] = Time.now
|
|
53
|
+
calc_time_elapsed
|
|
54
|
+
calc_delta
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Return string representation of a Command object.
|
|
58
|
+
#
|
|
59
|
+
# @return [String] With formated command.
|
|
60
|
+
def to_s
|
|
61
|
+
options_list = []
|
|
62
|
+
|
|
63
|
+
@options.each do |key, value|
|
|
64
|
+
options_list << case value.class.to_s
|
|
65
|
+
when 'String'
|
|
66
|
+
value = Regexp.quote(value) if key == :delimiter
|
|
67
|
+
%(#{key}: "#{value}")
|
|
68
|
+
when 'Symbol'
|
|
69
|
+
"#{key}: :#{value}"
|
|
70
|
+
else
|
|
71
|
+
"#{key}: #{value}"
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
@options.empty? ? @name : "#{@name}(#{options_list.join(', ')})"
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Add a key with time_elapsed to the status.
|
|
79
|
+
#
|
|
80
|
+
# @return [BioDSL::Status] returns self.
|
|
81
|
+
def calc_time_elapsed
|
|
82
|
+
delta = @status[:time_stop] - @status[:time_start]
|
|
83
|
+
@status[:time_elapsed] = (Time.mktime(0) + delta).strftime("%H:%M:%S")
|
|
84
|
+
|
|
85
|
+
self
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Locate all status key pairs <foo>_in and <foo>_out and add a new status
|
|
89
|
+
# key <foo>_delta with the numerical difference.
|
|
90
|
+
#
|
|
91
|
+
# @return [BioDSL::Status] returns self.
|
|
92
|
+
def calc_delta
|
|
93
|
+
@status.keys.select { |s| s[-3..-1] == '_in' }.each do |in_key|
|
|
94
|
+
base = in_key[0...-3]
|
|
95
|
+
out_key = "#{base}_out".to_sym
|
|
96
|
+
|
|
97
|
+
next unless @status.key? out_key
|
|
98
|
+
|
|
99
|
+
@status["#{base}_delta".to_sym] = delta(in_key, out_key)
|
|
100
|
+
@status["#{base}_delta_percent".to_sym] = delta_percent(in_key, out_key)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
self
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
# Calculate the difference between status values given two status keys.
|
|
109
|
+
#
|
|
110
|
+
# @param in_key [Symbol] Status hash key.
|
|
111
|
+
# @param out_key [Symbol] Status hash key.
|
|
112
|
+
#
|
|
113
|
+
# @return [Fixnum] Difference.
|
|
114
|
+
def delta(in_key, out_key)
|
|
115
|
+
@status[out_key] - @status[in_key]
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Calculate the percent difference between status values given two status
|
|
119
|
+
# keys.
|
|
120
|
+
#
|
|
121
|
+
# @param in_key [Symbol] Status hash key.
|
|
122
|
+
# @param out_key [Symbol] Status hash key.
|
|
123
|
+
#
|
|
124
|
+
# @return [Float] Percentage rounded to 2 decimals.
|
|
125
|
+
def delta_percent(in_key, out_key)
|
|
126
|
+
d = @status[out_key] - @status[in_key]
|
|
127
|
+
|
|
128
|
+
return 0.0 if d == 0
|
|
129
|
+
|
|
130
|
+
(100 * d.to_f / [@status[out_key], @status[in_key]].max).round(2)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of the BioDSL framework (www.BioDSL.org). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
module BioDSL
|
|
29
|
+
# == Add a key/value pair to all records in stream.
|
|
30
|
+
#
|
|
31
|
+
# +add_key+ can be used to add a fixed value to a specified key to all
|
|
32
|
+
# records in the stream, or add a numeric forth running number (zero-based)
|
|
33
|
+
# with a specified prefix.
|
|
34
|
+
#
|
|
35
|
+
# == Usage
|
|
36
|
+
#
|
|
37
|
+
# add_key(<key: <string>[, value: <string> | prefix: <string>])
|
|
38
|
+
#
|
|
39
|
+
# === Options
|
|
40
|
+
#
|
|
41
|
+
# * key: <string> - Key to add or overwrite.
|
|
42
|
+
# * value: <string> - Value to use with +key+.
|
|
43
|
+
# * prefix: <string> - Prefix to use with +key+.
|
|
44
|
+
#
|
|
45
|
+
# == Examples
|
|
46
|
+
#
|
|
47
|
+
# To add a value to all records in the stream do:
|
|
48
|
+
#
|
|
49
|
+
# add_key(key: "FOO", value: "BAR")
|
|
50
|
+
#
|
|
51
|
+
# To add a forth running number to all records in the stream do:
|
|
52
|
+
#
|
|
53
|
+
# add_key(key: :ID, prefix: "")
|
|
54
|
+
#
|
|
55
|
+
# Finally, to add a forth running number with a prefix do:
|
|
56
|
+
#
|
|
57
|
+
# add_key(key: :ID, prefix: "ID_")
|
|
58
|
+
class AddKey
|
|
59
|
+
STATS = %i(records_in records_out)
|
|
60
|
+
|
|
61
|
+
# Constructor for AddKey.
|
|
62
|
+
#
|
|
63
|
+
# @param [Hash] options Options hash.
|
|
64
|
+
# @option options [Symbol] :key Key to add or replace.
|
|
65
|
+
# @option options [String] :value Value to use with :key.
|
|
66
|
+
# @option options [String] :prefix Prefix to use with :key.
|
|
67
|
+
#
|
|
68
|
+
# @return [Proc] Returns class instance.
|
|
69
|
+
def initialize(options)
|
|
70
|
+
@options = options
|
|
71
|
+
|
|
72
|
+
check_options
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Add a key or replace a key for all records with a specified value or a
|
|
76
|
+
# forthrunning number with a prefix.
|
|
77
|
+
#
|
|
78
|
+
# @param [Hash] options Options hash.
|
|
79
|
+
# @option options [Symbol] :key Key to add or replace.
|
|
80
|
+
# @option options [String] :value Value to use with :key.
|
|
81
|
+
# @option options [String] :prefix Prefix to use with :key.
|
|
82
|
+
#
|
|
83
|
+
# @return [Proc] Returns the command lambda.
|
|
84
|
+
def lmb
|
|
85
|
+
lambda do |input, output, status|
|
|
86
|
+
status_init(status, STATS)
|
|
87
|
+
|
|
88
|
+
input.each_with_index do |record, i|
|
|
89
|
+
@status[:records_in] += 1
|
|
90
|
+
|
|
91
|
+
record[@options[:key].to_sym] = @options[:value] ||
|
|
92
|
+
"#{@options[:prefix]}#{i}"
|
|
93
|
+
|
|
94
|
+
output << record
|
|
95
|
+
|
|
96
|
+
@status[:records_out] += 1
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
# Check all options.
|
|
104
|
+
def check_options
|
|
105
|
+
options_allowed(@options, :key, :value, :prefix)
|
|
106
|
+
options_required(@options, :key)
|
|
107
|
+
options_required_unique(@options, :value, :prefix)
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|