BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
data/Rakefile ADDED
@@ -0,0 +1,94 @@
1
+ require 'bundler'
2
+ require 'rake/testtask'
3
+ require 'pp'
4
+
5
+ Bundler::GemHelper.install_tasks
6
+
7
+ task :default => 'test'
8
+
9
+ Rake::TestTask.new do |t|
10
+ t.description = "Run test suite"
11
+ t.test_files = Dir['test/**/*'].select { |f| f.match(/\.rb$/) }
12
+ t.warning = true
13
+ end
14
+
15
+ desc 'Run test suite with simplecov'
16
+ task :simplecov do
17
+ ENV['SIMPLECOV'] = 'true'
18
+ Rake::Task['test'].invoke
19
+ end
20
+
21
+ desc 'Add or update yardoc'
22
+ task :doc do
23
+ run_docgen
24
+ end
25
+
26
+ task :build => :boilerplate
27
+
28
+ desc 'Add or update license boilerplate in source files'
29
+ task :boilerplate do
30
+ run_boilerplate
31
+ end
32
+
33
+ def run_docgen
34
+ $stderr.puts "Building docs"
35
+ `yardoc lib/`
36
+ $stderr.puts "Docs done"
37
+ end
38
+
39
+ def run_boilerplate
40
+ boilerplate = <<END
41
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
42
+ # #
43
+ # Copyright (C) 2007-#{Time.now.year} Martin Asser Hansen (mail@maasha.dk). #
44
+ # #
45
+ # This program is free software; you can redistribute it and/or #
46
+ # modify it under the terms of the GNU General Public License #
47
+ # as published by the Free Software Foundation; either version 2 #
48
+ # of the License, or (at your option) any later version. #
49
+ # #
50
+ # This program is distributed in the hope that it will be useful, #
51
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
52
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
53
+ # GNU General Public License for more details. #
54
+ # #
55
+ # You should have received a copy of the GNU General Public License #
56
+ # along with this program; if not, write to the Free Software #
57
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
58
+ # USA. #
59
+ # #
60
+ # http://www.gnu.org/copyleft/gpl.html #
61
+ # #
62
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
63
+ # #
64
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
65
+ # #
66
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
67
+ END
68
+
69
+ files = Rake::FileList.new('bin/**/*', 'lib/**/*.rb', 'test/**/*.rb')
70
+
71
+ files.each do |file|
72
+ body = ""
73
+
74
+ File.open(file) do |ios|
75
+ body = ios.read
76
+ end
77
+
78
+ if body.match(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/) and $1.to_i != Time.now.year
79
+ STDERR.puts "Updating boilerplate: #{file}"
80
+
81
+ body.sub!(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/, "Copyright (C) 2007-#{Time.now.year} Martin Asser Hansen")
82
+
83
+ File.open(file, 'w') do |ios|
84
+ ios.puts body
85
+ end
86
+ end
87
+
88
+ unless body.match('Copyright')
89
+ STDERR.puts "Warning: missing boilerplate in #{file}"
90
+ STDERR.puts body.split($/).first(10).join($/)
91
+ exit
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'BioDSL'
4
+
5
+ # Read in sequences in FASTQ format from the file `test.fq` and save them in
6
+ # FASTA format in the file `test.fna`.
7
+
8
+ BP.new.read_fastq(input: "test.fq").write_fasta(output: "test.fna").run
@@ -0,0 +1,242 @@
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # #
3
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
4
+ # #
5
+ # This program is free software; you can redistribute it and/or #
6
+ # modify it under the terms of the GNU General Public License #
7
+ # as published by the Free Software Foundation; either version 2 #
8
+ # of the License, or (at your option) any later version. #
9
+ # #
10
+ # This program is distributed in the hope that it will be useful, #
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
+ # GNU General Public License for more details. #
14
+ # #
15
+ # You should have received a copy of the GNU General Public License #
16
+ # along with this program; if not, write to the Free Software #
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
18
+ # #
19
+ # http://www.gnu.org/copyleft/gpl.html #
20
+ # #
21
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
22
+ # #
23
+ # This software is part of BioDSL (www.BioDSL.org). #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+
27
+ module BioDSL
28
+ # Error class for all exceptions to do with CAry.
29
+ class CAryError < StandardError; end
30
+
31
+ # Class to manipulate a Ruby byte array which is fit for inline C manipulation.
32
+ class CAry
33
+ require 'inline'
34
+
35
+ attr_reader :count, :size, :ary
36
+
37
+ # Class method to store to a given file a given ary.
38
+ def self.store(file, ary)
39
+ File.open(file, 'w') do |ios|
40
+ ios.write([ary.count].pack("I"))
41
+ ios.write([ary.size].pack("I"))
42
+ ios.write(ary.ary)
43
+ end
44
+
45
+ nil
46
+ end
47
+
48
+ # Class method to retrieve and return an ary from a given file.
49
+ def self.retrieve(file)
50
+ count = nil
51
+ size = nil
52
+ ary = nil
53
+
54
+ File.open(file) do |ios|
55
+ count = ios.read(4).unpack("I").first
56
+ size = ios.read(4).unpack("I").first
57
+ ary = ios.read
58
+ end
59
+
60
+ CAry.new(count, size, ary)
61
+ end
62
+
63
+ # Method to initialize a new CAry object which is either empty
64
+ # or created from a given byte string. Count is the number of
65
+ # elements in the ary, and size is the byte size of a element.
66
+ def initialize(count, size, ary = nil)
67
+ raise CAryError, "count must be positive - not #{count}" if count <= 0
68
+ raise CAryError, "size must be positive - not #{size}" if size <= 0
69
+
70
+ @count = count
71
+ @size = size
72
+ @ary = ary || "\0" * count * size
73
+ end
74
+
75
+ # Method to set all members in an ary to 1.
76
+ def fill!
77
+ self.zero!
78
+ self.~
79
+ end
80
+
81
+ # Method to set all members in an ary to 1.
82
+ def fill
83
+ CAry.new(@count, @size).fill!
84
+ end
85
+
86
+ # Method to set all members in an ary to zero.
87
+ def zero!
88
+ zero_ary_C(@ary, @count * @size)
89
+ self
90
+ end
91
+
92
+ # Method to set all members in an ary to zero.
93
+ def zero
94
+ CAry.new(@count, @size).zero!
95
+ end
96
+
97
+ # Method to do bitwise AND operation between two CArys.
98
+ def &(cary)
99
+ raise BioDSL::CAryError, "Bad object type: #{cary.class}" unless cary.is_a? CAry
100
+ raise BioDSL::CAryError, "Counts mismatch: #{self.count} != #{cary.count}" if self.count != cary.count
101
+ raise BioDSL::CAryError, "Sizes mismatch: #{self.size} != #{cary.size}" if self.size != cary.size
102
+
103
+ bitwise_and_C(@ary, cary.ary, @count * @size)
104
+
105
+ self
106
+ end
107
+
108
+ # Method to do bitwise OR operation between two CArys.
109
+ def |(cary)
110
+ raise BioDSL::CAryError, "Bad object type: #{cary.class}" unless cary.is_a? CAry
111
+ raise BioDSL::CAryError, "Counts mismatch: #{self.count} != #{cary.count}" if self.count != cary.count
112
+ raise BioDSL::CAryError, "Sizes mismatch: #{self.size} != #{cary.size}" if self.size != cary.size
113
+
114
+ bitwise_or_C(@ary, cary.ary, @count * @size)
115
+
116
+ self
117
+ end
118
+
119
+ # Method to do bitwise XOR operation between two CArys.
120
+ def ^(cary)
121
+ raise BioDSL::CAryError, "Bad object type: #{cary.class}" unless cary.is_a? CAry
122
+ raise BioDSL::CAryError, "Counts mismatch: #{self.count} != #{cary.count}" if self.count != cary.count
123
+ raise BioDSL::CAryError, "Sizes mismatch: #{self.size} != #{cary.size}" if self.size != cary.size
124
+
125
+ bitwise_xor_C(@ary, cary.ary, @count * @size)
126
+
127
+ self
128
+ end
129
+
130
+ # Method to complement all bits in an ary.
131
+ def ~
132
+ complement_ary_C(@ary, @count * @size)
133
+ self
134
+ end
135
+
136
+ # Method that returns a string from an ary.
137
+ def to_s
138
+ @ary.unpack('B*').first
139
+ end
140
+
141
+ private
142
+
143
+ inline do |builder|
144
+ # Method that given a byte array and its size in bytes
145
+ # sets all bytes to 0.
146
+ builder.c %{
147
+ void zero_ary_C(
148
+ VALUE _ary, // Byte array to zero.
149
+ VALUE _ary_size // Size of array.
150
+ )
151
+ {
152
+ char *ary = (char *) StringValuePtr(_ary);
153
+ unsigned int ary_size = FIX2UINT(_ary_size);
154
+
155
+ bzero(ary, ary_size);
156
+ }
157
+ }
158
+
159
+ # Method that given two byte arrays perform bitwise AND operation
160
+ # beween these and save the result in the first.
161
+ builder.c %{
162
+ void bitwise_and_C(
163
+ VALUE _ary1, // Byte array to recieve.
164
+ VALUE _ary2, // Byte array to &.
165
+ VALUE _ary_size // Size of arrays.
166
+ )
167
+ {
168
+ char *ary1 = (char *) StringValuePtr(_ary1);
169
+ char *ary2 = (char *) StringValuePtr(_ary2);
170
+ unsigned int ary_size = FIX2UINT(_ary_size);
171
+ int i = 0;
172
+
173
+ for (i = ary_size - 1; i >= 0; i--)
174
+ {
175
+ ary1[i] = ary1[i] & ary2[i];
176
+ }
177
+ }
178
+ }
179
+
180
+ # Method that given two byte arrays perform bitwise OR operation
181
+ # beween these and save the result in the first.
182
+ builder.c %{
183
+ void bitwise_or_C(
184
+ VALUE _ary1, // Byte array to recieve.
185
+ VALUE _ary2, // Byte array to &.
186
+ VALUE _ary_size // Size of arrays.
187
+ )
188
+ {
189
+ char *ary1 = (char *) StringValuePtr(_ary1);
190
+ char *ary2 = (char *) StringValuePtr(_ary2);
191
+ unsigned int ary_size = FIX2UINT(_ary_size);
192
+ int i = 0;
193
+
194
+ for (i = ary_size - 1; i >= 0; i--)
195
+ {
196
+ ary1[i] = ary1[i] | ary2[i];
197
+ }
198
+ }
199
+ }
200
+
201
+ # Method that given two byte arrays perform bitwise XOR operation
202
+ # beween these and save the result in the first.
203
+ builder.c %{
204
+ void bitwise_xor_C(
205
+ VALUE _ary1, // Byte array to recieve.
206
+ VALUE _ary2, // Byte array to &.
207
+ VALUE _ary_size // Size of arrays.
208
+ )
209
+ {
210
+ char *ary1 = (char *) StringValuePtr(_ary1);
211
+ char *ary2 = (char *) StringValuePtr(_ary2);
212
+ unsigned int ary_size = FIX2UINT(_ary_size);
213
+ int i = 0;
214
+
215
+ for (i = ary_size - 1; i >= 0; i--)
216
+ {
217
+ ary1[i] = ary1[i] ^ ary2[i];
218
+ }
219
+ }
220
+ }
221
+
222
+ # Method that given a byte array and its size in bytes
223
+ # complements all bits using bitwise ~.
224
+ builder.c %{
225
+ void complement_ary_C(
226
+ VALUE _ary, // Byte array complement.
227
+ VALUE _ary_size // Size of array.
228
+ )
229
+ {
230
+ char *ary = (char *) StringValuePtr(_ary);
231
+ unsigned int ary_size = FIX2UINT(_ary_size);
232
+ int i = 0;
233
+
234
+ for (i = ary_size - 1; i >= 0; i--)
235
+ {
236
+ ary[i] = ~ary[i];
237
+ }
238
+ }
239
+ }
240
+ end
241
+ end
242
+ end
@@ -0,0 +1,133 @@
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
3
+ # #
4
+ # This program is free software; you can redistribute it and/or #
5
+ # modify it under the terms of the GNU General Public License #
6
+ # as published by the Free Software Foundation; either version 2 #
7
+ # of the License, or (at your option) any later version. #
8
+ # #
9
+ # This program is distributed in the hope that it will be useful, #
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
12
+ # GNU General Public License for more details. #
13
+ # #
14
+ # You should have received a copy of the GNU General Public License #
15
+ # along with this program; if not, write to the Free Software #
16
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
17
+ # USA. #
18
+ # #
19
+ # http://www.gnu.org/copyleft/gpl.html #
20
+ # #
21
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
22
+ # #
23
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ module BioDSL
27
+ # Command class for initiating and calling commands.
28
+ class Command
29
+ attr_reader :name, :status, :options
30
+ attr_accessor :run_status
31
+
32
+ # Constructor for Command objects.
33
+ #
34
+ # @param name [Symbol] Name of command.
35
+ # @param lmb [Proc] Lambda for command callback execution.
36
+ # @param options [Hash] Options hash.
37
+ def initialize(name, lmb, options)
38
+ @name = name
39
+ @lmb = lmb
40
+ @run_status = 'running'
41
+ @options = options
42
+ @status = {}
43
+ end
44
+
45
+ # Callback method for executing a Command lambda.
46
+ #
47
+ # @param args [Array] List of arguments used in the callback.
48
+ def call(*args)
49
+ @lmb.call(*args, @status)
50
+
51
+ @run_status = 'done'
52
+ @status[:time_stop] = Time.now
53
+ calc_time_elapsed
54
+ calc_delta
55
+ end
56
+
57
+ # Return string representation of a Command object.
58
+ #
59
+ # @return [String] With formated command.
60
+ def to_s
61
+ options_list = []
62
+
63
+ @options.each do |key, value|
64
+ options_list << case value.class.to_s
65
+ when 'String'
66
+ value = Regexp.quote(value) if key == :delimiter
67
+ %(#{key}: "#{value}")
68
+ when 'Symbol'
69
+ "#{key}: :#{value}"
70
+ else
71
+ "#{key}: #{value}"
72
+ end
73
+ end
74
+
75
+ @options.empty? ? @name : "#{@name}(#{options_list.join(', ')})"
76
+ end
77
+
78
+ # Add a key with time_elapsed to the status.
79
+ #
80
+ # @return [BioDSL::Status] returns self.
81
+ def calc_time_elapsed
82
+ delta = @status[:time_stop] - @status[:time_start]
83
+ @status[:time_elapsed] = (Time.mktime(0) + delta).strftime("%H:%M:%S")
84
+
85
+ self
86
+ end
87
+
88
+ # Locate all status key pairs <foo>_in and <foo>_out and add a new status
89
+ # key <foo>_delta with the numerical difference.
90
+ #
91
+ # @return [BioDSL::Status] returns self.
92
+ def calc_delta
93
+ @status.keys.select { |s| s[-3..-1] == '_in' }.each do |in_key|
94
+ base = in_key[0...-3]
95
+ out_key = "#{base}_out".to_sym
96
+
97
+ next unless @status.key? out_key
98
+
99
+ @status["#{base}_delta".to_sym] = delta(in_key, out_key)
100
+ @status["#{base}_delta_percent".to_sym] = delta_percent(in_key, out_key)
101
+ end
102
+
103
+ self
104
+ end
105
+
106
+ private
107
+
108
+ # Calculate the difference between status values given two status keys.
109
+ #
110
+ # @param in_key [Symbol] Status hash key.
111
+ # @param out_key [Symbol] Status hash key.
112
+ #
113
+ # @return [Fixnum] Difference.
114
+ def delta(in_key, out_key)
115
+ @status[out_key] - @status[in_key]
116
+ end
117
+
118
+ # Calculate the percent difference between status values given two status
119
+ # keys.
120
+ #
121
+ # @param in_key [Symbol] Status hash key.
122
+ # @param out_key [Symbol] Status hash key.
123
+ #
124
+ # @return [Float] Percentage rounded to 2 decimals.
125
+ def delta_percent(in_key, out_key)
126
+ d = @status[out_key] - @status[in_key]
127
+
128
+ return 0.0 if d == 0
129
+
130
+ (100 * d.to_f / [@status[out_key], @status[in_key]].max).round(2)
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,110 @@
1
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
2
+ # #
3
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
4
+ # #
5
+ # This program is free software; you can redistribute it and/or #
6
+ # modify it under the terms of the GNU General Public License #
7
+ # as published by the Free Software Foundation; either version 2 #
8
+ # of the License, or (at your option) any later version. #
9
+ # #
10
+ # This program is distributed in the hope that it will be useful, #
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
13
+ # GNU General Public License for more details. #
14
+ # #
15
+ # You should have received a copy of the GNU General Public License #
16
+ # along with this program; if not, write to the Free Software #
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
18
+ # USA. #
19
+ # #
20
+ # http://www.gnu.org/copyleft/gpl.html #
21
+ # #
22
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
+ # #
24
+ # This software is part of the BioDSL framework (www.BioDSL.org). #
25
+ # #
26
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
+
28
+ module BioDSL
29
+ # == Add a key/value pair to all records in stream.
30
+ #
31
+ # +add_key+ can be used to add a fixed value to a specified key to all
32
+ # records in the stream, or add a numeric forth running number (zero-based)
33
+ # with a specified prefix.
34
+ #
35
+ # == Usage
36
+ #
37
+ # add_key(<key: <string>[, value: <string> | prefix: <string>])
38
+ #
39
+ # === Options
40
+ #
41
+ # * key: <string> - Key to add or overwrite.
42
+ # * value: <string> - Value to use with +key+.
43
+ # * prefix: <string> - Prefix to use with +key+.
44
+ #
45
+ # == Examples
46
+ #
47
+ # To add a value to all records in the stream do:
48
+ #
49
+ # add_key(key: "FOO", value: "BAR")
50
+ #
51
+ # To add a forth running number to all records in the stream do:
52
+ #
53
+ # add_key(key: :ID, prefix: "")
54
+ #
55
+ # Finally, to add a forth running number with a prefix do:
56
+ #
57
+ # add_key(key: :ID, prefix: "ID_")
58
+ class AddKey
59
+ STATS = %i(records_in records_out)
60
+
61
+ # Constructor for AddKey.
62
+ #
63
+ # @param [Hash] options Options hash.
64
+ # @option options [Symbol] :key Key to add or replace.
65
+ # @option options [String] :value Value to use with :key.
66
+ # @option options [String] :prefix Prefix to use with :key.
67
+ #
68
+ # @return [Proc] Returns class instance.
69
+ def initialize(options)
70
+ @options = options
71
+
72
+ check_options
73
+ end
74
+
75
+ # Add a key or replace a key for all records with a specified value or a
76
+ # forthrunning number with a prefix.
77
+ #
78
+ # @param [Hash] options Options hash.
79
+ # @option options [Symbol] :key Key to add or replace.
80
+ # @option options [String] :value Value to use with :key.
81
+ # @option options [String] :prefix Prefix to use with :key.
82
+ #
83
+ # @return [Proc] Returns the command lambda.
84
+ def lmb
85
+ lambda do |input, output, status|
86
+ status_init(status, STATS)
87
+
88
+ input.each_with_index do |record, i|
89
+ @status[:records_in] += 1
90
+
91
+ record[@options[:key].to_sym] = @options[:value] ||
92
+ "#{@options[:prefix]}#{i}"
93
+
94
+ output << record
95
+
96
+ @status[:records_out] += 1
97
+ end
98
+ end
99
+ end
100
+
101
+ private
102
+
103
+ # Check all options.
104
+ def check_options
105
+ options_allowed(@options, :key, :value, :prefix)
106
+ options_required(@options, :key)
107
+ options_required_unique(@options, :value, :prefix)
108
+ end
109
+ end
110
+ end