BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for FilterRrna.
34
+ class TestFilterRrna < Test::Unit::TestCase
35
+ def setup
36
+ @tmp_dir = Dir.mktmpdir('filter_rrna')
37
+
38
+ omit('sortmerna not found') unless BioDSL::Filesys.which('sortmerna')
39
+ omit('indexdb_rna not found') unless BioDSL::Filesys.which('indexdb_rna')
40
+
41
+ setup_test_streams
42
+ setup_test_data
43
+ setup_fasta_file
44
+ setup_indexdb
45
+
46
+ @p = BioDSL::Pipeline.new
47
+ end
48
+
49
+ def setup_test_streams
50
+ @input, @output = BioDSL::Stream.pipe
51
+ @input2, @output2 = BioDSL::Stream.pipe
52
+ end
53
+
54
+ def setup_test_data
55
+ @hash1 = {
56
+ SEQ_NAME: 'test1',
57
+ SEQ: 'gatcagatcgtacgagcagcatctgacgtatcgatcgttgattagttgctagctatgcag',
58
+ SEQ_LEN: 60
59
+ }
60
+
61
+ @hash2 = {
62
+ SEQ_NAME: 'test2',
63
+ SEQ: 'ggttagtcagcgactgactgactacgatatatatcgatacgcggaggtatatatagagag',
64
+ SEQ_LEN: 60
65
+ }
66
+
67
+ @output.write @hash1
68
+ @output.write @hash2
69
+ @output.close
70
+ end
71
+
72
+ def setup_fasta_file
73
+ @ref_fasta = File.join(@tmp_dir, 'test.fna')
74
+ @ref_index = "#{@ref_fasta}.idx"
75
+
76
+ BioDSL::Fasta.open(@ref_fasta, 'w') do |ios|
77
+ ios.puts BioDSL::Seq.new_bp(@hash1).to_fasta
78
+ end
79
+ end
80
+
81
+ def setup_indexdb
82
+ cmd = "indexdb_rna --ref #{@ref_fasta},#{@ref_index}"
83
+ system(cmd)
84
+
85
+ fail "Running command failed: #{cmd}" unless $CHILD_STATUS.success?
86
+ end
87
+
88
+ def teardown
89
+ FileUtils.rm_rf(@tmp_dir)
90
+ end
91
+
92
+ test 'BioDSL::Pipeline::FilterRrna with invalid options raises' do
93
+ assert_raise(BioDSL::OptionError) do
94
+ @p.filter_rrna(ref_fasta: __FILE__, ref_index: __FILE__, foo: 'bar')
95
+ end
96
+ end
97
+
98
+ test 'BioDSL::Pipeline::FilterRrna with valid options don\'t raise' do
99
+ assert_nothing_raised do
100
+ @p.filter_rrna(ref_fasta: __FILE__, ref_index: __FILE__)
101
+ end
102
+ end
103
+
104
+ test 'BioDSL::Pipeline::FilterRrna returns correctly' do
105
+ @p.filter_rrna(ref_fasta: @ref_fasta, ref_index: "#{@ref_index}*").
106
+ run(input: @input, output: @output2)
107
+
108
+ expected = <<-EXP.gsub(/^\s+|\|/, '').delete("\n")
109
+ |{:SEQ_NAME=>"test2",
110
+ | :SEQ=>"ggttagtcagcgactgactgactacgatatatatcgatacgcggaggtatatatagagag",
111
+ | :SEQ_LEN=>60}
112
+ EXP
113
+
114
+ assert_equal(expected, collect_result.chomp)
115
+ end
116
+
117
+ test 'BioDSL::Pipeline::FilterRrna status returns correctly' do
118
+ @p.filter_rrna(ref_fasta: @ref_fasta, ref_index: "#{@ref_index}*").
119
+ run(input: @input, output: @output2)
120
+
121
+ assert_equal(2, @p.status.first[:records_in])
122
+ assert_equal(1, @p.status.first[:records_out])
123
+ assert_equal(2, @p.status.first[:sequences_in])
124
+ assert_equal(1, @p.status.first[:sequences_out])
125
+ assert_equal(120, @p.status.first[:residues_in])
126
+ assert_equal(60, @p.status.first[:residues_out])
127
+ end
128
+ end
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for Genecall.
34
+ class TestGenecall < Test::Unit::TestCase
35
+ def setup
36
+ omit('prodigal not found') unless BioDSL::Filesys.which('ray')
37
+
38
+ @p = BioDSL::Pipeline.new
39
+ end
40
+
41
+ test 'BioDSL::Pipeline::Genecall with invalid options raises' do
42
+ assert_raise(BioDSL::OptionError) { @p.assemble_seq_ray(foo: 'bar') }
43
+ end
44
+
45
+ test 'BioDSL::Pipeline::Genecall with valid options don\'t raise' do
46
+ assert_nothing_raised { @p.assemble_seq_ray(cpus: 1) }
47
+ end
48
+
49
+ # FIXME: tests missing!
50
+ end
@@ -0,0 +1,398 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for the grab command.
34
+ # rubocop:disable ClassLength
35
+ class TestGrab < Test::Unit::TestCase
36
+ def setup
37
+ @tmpdir = Dir.mktmpdir('BioDSL')
38
+
39
+ @input, @output = BioDSL::Stream.pipe
40
+ @input2, @output2 = BioDSL::Stream.pipe
41
+
42
+ write_stream
43
+ write_test_file1
44
+ write_test_file2
45
+
46
+ @p = BioDSL::Pipeline.new
47
+ @e = BioDSL::OptionError
48
+ end
49
+
50
+ def write_stream
51
+ @output.write(SEQ_NAME: 'test1', SEQ: 'atcg', SEQ_LEN: 4)
52
+ @output.write(SEQ_NAME: 'test2', SEQ: 'DSEQM', SEQ_LEN: 5)
53
+ @output.write(FOO: 'SEQ')
54
+ @output.close
55
+ end
56
+
57
+ def write_test_file1
58
+ @pattern_file = File.join(@tmpdir, 'patterns.txt')
59
+
60
+ File.open(@pattern_file, 'w') do |ios|
61
+ ios.puts 'test'
62
+ ios.puts 'seq'
63
+ end
64
+ end
65
+
66
+ def write_test_file2
67
+ @pattern_file2 = File.join(@tmpdir, 'patterns2.txt')
68
+
69
+ File.open(@pattern_file2, 'w') do |ios|
70
+ ios.puts 4
71
+ ios.puts 'SEQ'
72
+ end
73
+ end
74
+
75
+ def teardown
76
+ FileUtils.rm_r @tmpdir
77
+ end
78
+
79
+ test 'BioDSL::Pipeline::Grab with invalid options raises' do
80
+ assert_raise(@e) { @p.grab(foo: 'bar') }
81
+ end
82
+
83
+ test 'BioDSL::Pipeline::Grab with select and reject raises' do
84
+ assert_raise(@e) { @p.grab(select: 'foo', reject: 'bar') }
85
+ end
86
+
87
+ test 'BioDSL::Pipeline::Grab with keys_only and values_only raises' do
88
+ assert_raise(@e) do
89
+ @p.grab(select: 'foo', keys_only: true, values_only: true)
90
+ end
91
+ end
92
+
93
+ test 'BioDSL::Pipeline::Grab with evaluate and conflicting keys raises' do
94
+ assert_raise(@e) { @p.grab(evaluate: 0, select: 'foo') }
95
+ assert_raise(@e) { @p.grab(evaluate: 0, reject: 'foo') }
96
+ assert_raise(@e) { @p.grab(evaluate: 0, keys: 'foo') }
97
+ assert_raise(@e) { @p.grab(evaluate: 0, keys_only: true) }
98
+ assert_raise(@e) { @p.grab(evaluate: 0, values_only: true) }
99
+ assert_raise(@e) { @p.grab(evaluate: 0, ignore_case: true) }
100
+ assert_raise(@e) { @p.grab(evaluate: 0, exact: true) }
101
+ end
102
+
103
+ test 'BioDSL::Pipeline::Grab with missing select_file raises' do
104
+ assert_raise(@e) { @p.grab(select_file: '___select') }
105
+ end
106
+
107
+ test 'BioDSL::Pipeline::Grab with missing reject_file raises' do
108
+ assert_raise(@e) { @p.grab(reject_file: '___reject') }
109
+ end
110
+
111
+ test 'BioDSL::Pipeline::Grab#to_s with select and symbol key return OK' do
112
+ @p.grab(select: :SEQ_NAME)
113
+ expected = 'BP.new.grab(select: :SEQ_NAME)'
114
+ assert_equal(expected, @p.to_s)
115
+ end
116
+
117
+ test 'BioDSL::Pipeline::Grab with no hits return OK' do
118
+ @p.grab(select: 'fish').run(input: @input, output: @output2)
119
+ assert_equal('', collect_result)
120
+ end
121
+
122
+ test 'BioDSL::Pipeline::Grab with select and key hit return OK' do
123
+ @p.grab(select: 'SEQ_NAME').run(input: @input, output: @output2)
124
+ expected = <<-EXP.gsub(/^\s+\|/, '')
125
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}
126
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
127
+ EXP
128
+ assert_equal(expected, collect_result)
129
+ end
130
+
131
+ test 'BioDSL::Pipeline::Grab status returns correctly' do
132
+ @p.grab(select: 'SEQ_NAME').run(input: @input, output: @output2)
133
+
134
+ assert_equal(3, @p.status.first[:records_in])
135
+ assert_equal(2, @p.status.first[:records_out])
136
+ end
137
+
138
+ test 'BioDSL::Pipeline::Grab with multiple select patterns return OK' do
139
+ @p.grab(select: %w(est1 QM)).run(input: @input, output: @output2)
140
+ expected = <<-EXP.gsub(/^\s+\|/, '')
141
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}
142
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
143
+ EXP
144
+ assert_equal(expected, collect_result)
145
+ end
146
+
147
+ test 'BioDSL::Pipeline::Grab with multiple reject patterns return OK' do
148
+ @p.grab(reject: %w(est QM)).run(input: @input, output: @output2)
149
+ expected = %({:FOO=>"SEQ"}\n)
150
+ assert_equal(expected, collect_result)
151
+ end
152
+
153
+ test 'BioDSL::Pipeline::Grab with reject and key hit return OK' do
154
+ @p.grab(reject: 'SEQ_NAME').run(input: @input, output: @output2)
155
+ expected = %({:FOO=>"SEQ"}\n)
156
+ assert_equal(expected, collect_result)
157
+ end
158
+
159
+ test 'BioDSL::Pipeline::Grab with reject with symbol return OK' do
160
+ @p.grab(reject: :SEQ_NAME).run(input: @input, output: @output2)
161
+ expected = %({:FOO=>"SEQ"}\n)
162
+ assert_equal(expected, collect_result)
163
+ end
164
+
165
+ test 'BioDSL::Pipeline::Grab with select and value hit return OK' do
166
+ @p.grab(select: 'test1').run(input: @input, output: @output2)
167
+ expected = %({:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}\n)
168
+ assert_equal(expected, collect_result)
169
+ end
170
+
171
+ test 'BioDSL::Pipeline::Grab with reject and value hit return OK' do
172
+ @p.grab(reject: 'test1').run(input: @input, output: @output2)
173
+ expected = <<-EXP.gsub(/^\s+\|/, '')
174
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
175
+ |{:FOO=>"SEQ"}
176
+ EXP
177
+ assert_equal(expected, collect_result)
178
+ end
179
+
180
+ test 'BioDSL::Pipeline::Grab with select and keys_only return OK' do
181
+ @p.grab(select: 'SEQ', keys_only: true).run(input: @input, output: @output2)
182
+ expected = <<-EXP.gsub(/^\s+\|/, '')
183
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}
184
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
185
+ EXP
186
+ assert_equal(expected, collect_result)
187
+ end
188
+
189
+ test 'BioDSL::Pipeline::Grab with reject and keys_only return OK' do
190
+ @p.grab(reject: 'SEQ', keys_only: true).run(input: @input, output: @output2)
191
+ expected = %({:FOO=>"SEQ"}\n)
192
+ assert_equal(expected, collect_result)
193
+ end
194
+
195
+ test 'BioDSL::Pipeline::Grab with select and values_only return OK' do
196
+ @p.grab(select: 'SEQ', values_only: true).
197
+ run(input: @input, output: @output2)
198
+
199
+ expected = <<-EXP.gsub(/^\s+\|/, '')
200
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
201
+ |{:FOO=>"SEQ"}
202
+ EXP
203
+ assert_equal(expected, collect_result)
204
+ end
205
+
206
+ test 'BioDSL::Pipeline::Grab with reject and values_only return OK' do
207
+ @p.grab(reject: 'SEQ', values_only: true).
208
+ run(input: @input, output: @output2)
209
+
210
+ expected = %({:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}\n)
211
+ assert_equal(expected, collect_result)
212
+ end
213
+
214
+ test 'BioDSL::Pipeline::Grab w. select and values_only and ^ return OK' do
215
+ @p.grab(select: '^SEQ', values_only: true).
216
+ run(input: @input, output: @output2)
217
+
218
+ expected = %({:FOO=>"SEQ"}\n)
219
+ assert_equal(expected, collect_result)
220
+ end
221
+
222
+ test 'BioDSL::Pipeline::Grab w. reject and values_only and ^ return OK' do
223
+ @p.grab(reject: '^SEQ', values_only: true).
224
+ run(input: @input, output: @output2)
225
+
226
+ expected = <<-EXP.gsub(/^\s+\|/, '')
227
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}
228
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
229
+ EXP
230
+ assert_equal(expected, collect_result)
231
+ end
232
+
233
+ test 'BioDSL::Pipeline::Grab with select and ignore_case return OK' do
234
+ @p.grab(select: 'ATCG', ignore_case: true).
235
+ run(input: @input, output: @output2)
236
+
237
+ expected = %({:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}\n)
238
+ assert_equal(expected, collect_result)
239
+ end
240
+
241
+ test 'BioDSL::Pipeline::Grab with reject and ignore_case return OK' do
242
+ @p.grab(reject: 'ATCG', ignore_case: true).
243
+ run(input: @input, output: @output2)
244
+
245
+ expected = <<-EXP.gsub(/^\s+\|/, '')
246
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
247
+ |{:FOO=>"SEQ"}
248
+ EXP
249
+ assert_equal(expected, collect_result)
250
+ end
251
+
252
+ test 'BioDSL::Pipeline::Grab with select and specified keys return OK' do
253
+ @p.grab(select: 'SEQ', keys: :FOO).run(input: @input, output: @output2)
254
+ expected = %({:FOO=>"SEQ"}\n)
255
+ assert_equal(expected, collect_result)
256
+ end
257
+
258
+ test 'BioDSL::Pipeline::Grab w. select and keys in Array return OK' do
259
+ @p.grab(select: 'SEQ', values_only: true, keys: [:FOO, :SEQ]).
260
+ run(input: @input, output: @output2)
261
+
262
+ expected = <<-EXP.gsub(/^\s+\|/, '')
263
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
264
+ |{:FOO=>"SEQ"}
265
+ EXP
266
+
267
+ assert_equal(expected, collect_result)
268
+ end
269
+
270
+ test 'BioDSL::Pipeline::Grab with select and keys in String return OK' do
271
+ @p.grab(select: 'SEQ', values_only: true, keys: ':FOO, :SEQ').
272
+ run(input: @input, output: @output2)
273
+
274
+ expected = <<-EXP.gsub(/^\s+\|/, '')
275
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
276
+ |{:FOO=>"SEQ"}
277
+ EXP
278
+
279
+ assert_equal(expected, collect_result)
280
+ end
281
+
282
+ test 'BioDSL::Pipeline::Grab with reject and specified keys return OK' do
283
+ @p.grab(reject: 'SEQ', keys: :FOO).run(input: @input, output: @output2)
284
+
285
+ expected = <<-EXP.gsub(/^\s+\|/, '')
286
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}
287
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
288
+ EXP
289
+
290
+ assert_equal(expected, collect_result)
291
+ end
292
+
293
+ test 'BioDSL::Pipeline::Grab with evaluate return OK' do
294
+ @p.grab(evaluate: ':SEQ_LEN > 4').run(input: @input, output: @output2)
295
+
296
+ expected = %({:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}\n)
297
+ assert_equal(expected, collect_result)
298
+ end
299
+
300
+ test 'BioDSL::Pipeline::Grab with select_file return OK' do
301
+ @p.grab(select_file: @pattern_file).run(input: @input, output: @output2)
302
+
303
+ expected = <<-EXP.gsub(/^\s+\|/, '')
304
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}
305
+ |{:SEQ_NAME=>"test2", :SEQ=>"DSEQM", :SEQ_LEN=>5}
306
+ EXP
307
+
308
+ assert_equal(expected, collect_result)
309
+ end
310
+
311
+ test 'BioDSL::Pipeline::Grab w. select and exact w/o match return OK' do
312
+ @p.grab(select: 'tcg', exact: true).run(input: @input, output: @output2)
313
+
314
+ assert_equal('', collect_result)
315
+ end
316
+
317
+ test 'BioDSL::Pipeline::Grab w. select and exact match return OK' do
318
+ @p.grab(select: 'atcg', exact: true).run(input: @input, output: @output2)
319
+
320
+ expected = %({:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}\n)
321
+ assert_equal(expected, collect_result)
322
+ end
323
+
324
+ test 'BioDSL::Pipeline::Grab w. select and exact number match return OK' do
325
+ @p.grab(select: 4, exact: true).run(input: @input, output: @output2)
326
+
327
+ expected = %({:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}\n)
328
+ assert_equal(expected, collect_result)
329
+ end
330
+
331
+ test 'BioDSL::Pipeline::Grab w. select, exact number and keys_only OK' do
332
+ @p.grab(select: 4, exact: true, keys_only: true).
333
+ run(input: @input, output: @output2)
334
+
335
+ assert_equal('', collect_result)
336
+ end
337
+
338
+ test 'BioDSL::Pipeline::Grab w. select, exact number and values_only OK' do
339
+ @p.grab(select: 4, exact: true, values_only: true).
340
+ run(input: @input, output: @output2)
341
+
342
+ expected = %({:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}\n)
343
+ assert_equal(expected, collect_result)
344
+ end
345
+
346
+ test 'BioDSL::Pipeline::Grab w. select, exact, keys and no match OK' do
347
+ @p.grab(select: 'atcg', exact: true, keys: :SEQ_LEN).
348
+ run(input: @input, output: @output2)
349
+
350
+ assert_equal('', collect_result)
351
+ end
352
+
353
+ test 'BioDSL::Pipeline::Grab w. select, exact, keys and match return OK' do
354
+ @p.grab(select: 'atcg', exact: true, keys: :SEQ).
355
+ run(input: @input, output: @output2)
356
+
357
+ expected = %({:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}\n)
358
+ assert_equal(expected, collect_result)
359
+ end
360
+
361
+ test 'BioDSL::Pipeline::Grab w. select, exact, keys_only and no match ' \
362
+ 'return OK' do
363
+ @p.grab(select: 'atcg', exact: true, keys_only: true).
364
+ run(input: @input, output: @output2)
365
+
366
+ assert_equal('', collect_result)
367
+ end
368
+
369
+ test 'BioDSL::Pipeline::Grab w. select, exact, keys_only and String ' \
370
+ 'match return OK' do
371
+ @p.grab(select: 'FOO', exact: true, keys_only: true).
372
+ run(input: @input, output: @output2)
373
+
374
+ expected = %({:FOO=>"SEQ"}\n)
375
+ assert_equal(expected, collect_result)
376
+ end
377
+
378
+ test 'BioDSL::Pipeline::Grab w. select, exact, keys_only and Symbol ' \
379
+ 'match return OK' do
380
+ @p.grab(select: :FOO, exact: true, keys_only: true).
381
+ run(input: @input, output: @output2)
382
+
383
+ expected = %({:FOO=>"SEQ"}\n)
384
+ assert_equal(expected, collect_result)
385
+ end
386
+
387
+ test 'BioDSL::Pipeline::Grab with reject_file return OK' do
388
+ @p.grab(reject_file: @pattern_file2, values_only: true, keys: :SEQ).
389
+ run(input: @input, output: @output2)
390
+
391
+ expected = <<-EXP.gsub(/^\s+\|/, '')
392
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}
393
+ |{:FOO=>"SEQ"}
394
+ EXP
395
+
396
+ assert_equal(expected, collect_result)
397
+ end
398
+ end
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for IndexTaxonomy.
34
+ class TestIndexTaxonomy < Test::Unit::TestCase
35
+ def setup
36
+ @tmpdir = Dir.mktmpdir('BioDSL')
37
+
38
+ @input, @output = BioDSL::Stream.pipe
39
+ @input2, @output2 = BioDSL::Stream.pipe
40
+
41
+ @p = BioDSL::Pipeline.new
42
+ end
43
+
44
+ def teardown
45
+ FileUtils.rm_r @tmpdir
46
+ end
47
+
48
+ test 'BioDSL::Pipeline::IndexTaxonomy with invalid options raises' do
49
+ assert_raise(BioDSL::OptionError) do
50
+ @p.index_taxonomy(output_dir: @tmpdir, foo: 'bar')
51
+ end
52
+ end
53
+
54
+ test 'BioDSL::Pipeline::IndexTaxonomy with valid options don\'t raise' do
55
+ assert_nothing_raised do
56
+ @p.index_taxonomy(output_dir: @tmpdir, kmer_size: 8, step_size: 1,
57
+ prefix: 'foo')
58
+ end
59
+ end
60
+
61
+ # TODO: write some tests!
62
+ end