BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,187 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
6
+ # #
7
+ # This program is free software; you can redistribute it and/or #
8
+ # modify it under the terms of the GNU General Public License #
9
+ # as published by the Free Software Foundation; either version 2 #
10
+ # of the License, or (at your option) any later version. #
11
+ # #
12
+ # This program is distributed in the hope that it will be useful, #
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15
+ # GNU General Public License for more details. #
16
+ # #
17
+ # You should have received a copy of the GNU General Public License #
18
+ # along with this program; if not, write to the Free Software #
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
20
+ # USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ # rubocop: disable ClassLength
33
+
34
+ # Test class for Pipeline.
35
+ class PipelineTest < Test::Unit::TestCase
36
+ require 'yaml'
37
+
38
+ def setup
39
+ @tmpdir = Dir.mktmpdir('BioDSL')
40
+
41
+ setup_fasta_files
42
+
43
+ Mail.defaults do
44
+ delivery_method :test
45
+ end
46
+
47
+ @p = BP.new
48
+ end
49
+
50
+ def setup_fasta_files
51
+ @fasta_file = File.join(@tmpdir, 'test.fna')
52
+ @fasta_file2 = File.join(@tmpdir, 'test2.fna')
53
+
54
+ File.open(@fasta_file, 'w') do |ios|
55
+ ios.puts <<-DATA.gsub(/^\s+\|/, '')
56
+ |>test1
57
+ |atcg
58
+ |>test2
59
+ |tgac
60
+ DATA
61
+ end
62
+ end
63
+
64
+ def teardown
65
+ FileUtils.rm_r @tmpdir
66
+
67
+ Mail::TestMailer.deliveries.clear
68
+ end
69
+
70
+ test 'BioDSL::Pipeline#to_s w/o options and w/o .run() returns OK' do
71
+ @p.commands << BioDSL::Command.new('dump', nil, {})
72
+ expected = %(BP.new.dump)
73
+ assert_equal(expected, @p.to_s)
74
+ end
75
+
76
+ test 'BioDSL::Pipeline#to_s with options and w/o .run() returns OK' do
77
+ @p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
78
+ expected = %(BP.new.read_fasta(input: "test.fna"))
79
+ assert_equal(expected, @p.to_s)
80
+ end
81
+
82
+ test 'BioDSL::Pipeline#to_s w/o options and .run() returns OK' do
83
+ @p.commands << BioDSL::Command.new('dump', nil, {})
84
+ @p.complete = true
85
+ expected = %(BP.new.dump.run)
86
+ assert_equal(expected, @p.run.to_s)
87
+ end
88
+
89
+ test 'BioDSL::Pipeline#to_s with options and .run() returns OK' do
90
+ @p.commands << BioDSL::Command.new('read_fasta', nil, input: 'test.fna')
91
+ @p.complete = true
92
+ expected = %{BP.new.read_fasta(input: "test.fna").run}
93
+ assert_equal(expected, @p.run.to_s)
94
+ end
95
+
96
+ test 'BioDSL::Pipeline#run with no commands raises' do
97
+ assert_raise(BioDSL::PipelineError) { @p.run }
98
+ end
99
+
100
+ test 'BioDSL::Pipeline#size returns correctly' do
101
+ assert_equal(0, @p.size)
102
+ @p.dump
103
+ assert_equal(1, @p.size)
104
+ end
105
+
106
+ test 'BioDSL::Pipeline#+ with non-Pipeline object raises' do
107
+ assert_raise(BioDSL::PipelineError) { @p + 'foo' }
108
+ end
109
+
110
+ test 'BioDSL::Pipeline#+ with Pipeline object dont raise' do
111
+ assert_nothing_raised { @p + @p }
112
+ end
113
+
114
+ test 'BioDSL::Pipeline#+ of two Pipelines return correctly' do
115
+ p = BioDSL::Pipeline.new.dump(first: 2)
116
+ assert_equal('BP.new.dump(first: 2)', (@p + p).to_s)
117
+ end
118
+
119
+ test 'BioDSL::Pipeline#+ of three Pipelines return correctly' do
120
+ p1 = BioDSL::Pipeline.new.dump(first: 2)
121
+ p2 = BioDSL::Pipeline.new.dump(last: 3)
122
+ assert_equal('BP.new.dump(first: 2).dump(last: 3)', (@p + p1 + p2).to_s)
123
+ end
124
+
125
+ test 'BioDSL::Pipeline#pop decreases size' do
126
+ @p.dump
127
+ assert_equal(1, @p.size)
128
+ @p.pop
129
+ assert_equal(0, @p.size)
130
+ @p.pop
131
+ assert_equal(0, @p.size)
132
+ end
133
+
134
+ test 'BioDSL::Pipeline#pop returns correctly' do
135
+ @p.dump
136
+ assert_equal(BioDSL::Pipeline.new.dump.to_s, @p.pop.to_s)
137
+ assert_equal(BioDSL::Pipeline.new.to_s, @p.to_s)
138
+ end
139
+
140
+ test 'BioDSL::Pipeline#status without .run() returns correctly' do
141
+ status = @p.read_fasta(input: __FILE__).status
142
+ assert_equal({}, status.first)
143
+ end
144
+
145
+ test 'BioDSL::Pipeline#status with .run() returns correctly' do
146
+ expected = %{BioDSL::Pipeline.new.read_fasta(input: "#{@fasta_file}")}
147
+ @p.expects(:status).returns(expected)
148
+ assert_equal(expected, @p.read_fasta(input: @fasta_file).run.status)
149
+ end
150
+
151
+ test 'BioDSL::Pipeline#run with disallowed option raises' do
152
+ assert_raise(BioDSL::OptionError) do
153
+ @p.read_fasta(input: @fasta_file).run(foo: 'bar')
154
+ end
155
+ end
156
+
157
+ test 'BioDSL::Pipeline#run returns correctly' do
158
+ @p.read_fasta(input: @fasta_file).write_fasta(output: @fasta_file2).run
159
+
160
+ expected = File.read(@fasta_file)
161
+ result = File.read(@fasta_file2)
162
+
163
+ assert_equal(expected, result)
164
+ end
165
+
166
+ test 'BioDSL::Pipeline#run with subject but no email raises' do
167
+ assert_raise(BioDSL::OptionError) do
168
+ @p.read_fasta(input: @fasta_file).run(subject: 'foobar')
169
+ end
170
+ end
171
+
172
+ test 'BioDSL::Pipeline#run with email sends mail correctly' do
173
+ omit
174
+ @p.read_fasta(input: @fasta_file).run(email: 'test@foobar.com')
175
+ assert_equal(1, Mail::TestMailer.deliveries.length)
176
+ assert_equal(@p.to_s, Mail::TestMailer.deliveries.first.subject)
177
+ end
178
+
179
+ test 'BioDSL::Pipeline#run with email and subject sends correctly' do
180
+ omit
181
+ @p.read_fasta(input: @fasta_file).
182
+ run(email: 'test@foobar.com', subject: 'foobar')
183
+
184
+ assert_equal(1, Mail::TestMailer.deliveries.length)
185
+ assert_equal('foobar', Mail::TestMailer.deliveries.first.subject)
186
+ end
187
+ end