BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for CollectOtus.
34
+ class TestCollectOtus < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(one: 1, two: 2, three: 3)
40
+ @output.write(TYPE: 'H', S_ID: 'OTU_0', SAMPLE: 'Sample0')
41
+ @output.write(TYPE: 'H', S_ID: 'OTU_0', SAMPLE: 'Sample0')
42
+ @output.write(TYPE: 'H', S_ID: 'OTU_0', SAMPLE: 'Sample1')
43
+ @output.write(TYPE: 'H', S_ID: 'OTU_1', SAMPLE: 'Sample0')
44
+ @output.write(TYPE: 'H', S_ID: 'OTU_1', SAMPLE: 'Sample1')
45
+ @output.write(TYPE: 'H', S_ID: 'OTU_1', SAMPLE: 'Sample1')
46
+ @output.close
47
+
48
+ @p = BioDSL::Pipeline.new
49
+ end
50
+
51
+ test 'BioDSL::Pipeline#collect_otus with disallowed option raises' do
52
+ assert_raise(BioDSL::OptionError) { @p.collect_otus(foo: 'bar') }
53
+ end
54
+
55
+ test 'BioDSL::Pipeline#collect_otus outputs correctly' do
56
+ @p.collect_otus.run(input: @input, output: @output2)
57
+ expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n")
58
+ |{:one=>1, :two=>2, :three=>3}
59
+ |{:TYPE=>"H", :S_ID=>"OTU_0", :SAMPLE=>"Sample0"}
60
+ |{:TYPE=>"H", :S_ID=>"OTU_0", :SAMPLE=>"Sample0"}
61
+ |{:TYPE=>"H", :S_ID=>"OTU_0", :SAMPLE=>"Sample1"}
62
+ |{:TYPE=>"H", :S_ID=>"OTU_1", :SAMPLE=>"Sample0"}
63
+ |{:TYPE=>"H", :S_ID=>"OTU_1", :SAMPLE=>"Sample1"}
64
+ |{:TYPE=>"H", :S_ID=>"OTU_1", :SAMPLE=>"Sample1"}
65
+ |{:RECORD_TYPE=>"OTU", :OTU=>"OTU_0", :SAMPLE0_COUNT=>2,
66
+ | :SAMPLE1_COUNT=>1}
67
+ |{:RECORD_TYPE=>"OTU", :OTU=>"OTU_1", :SAMPLE0_COUNT=>1,
68
+ | :SAMPLE1_COUNT=>2}
69
+ EXP
70
+
71
+ assert_equal(expected, collect_result.delete("\n"))
72
+ end
73
+
74
+ test 'BioDSL::Pipeline#collect_otus status outputs correctly' do
75
+ @p.collect_otus.run(input: @input, output: @output2)
76
+
77
+ assert_equal(7, @p.status.first[:records_in])
78
+ assert_equal(9, @p.status.first[:records_out])
79
+ assert_equal(6, @p.status.first[:hits_in])
80
+ assert_equal(2, @p.status.first[:hits_out])
81
+ end
82
+ end
@@ -0,0 +1,78 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for ComplementSeq.
34
+ class TestComplementSeq < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @p = BioDSL::Pipeline.new
40
+ end
41
+
42
+ test 'BioDSL::Pipeline::ComplementSeq with invalid options raises' do
43
+ assert_raise(BioDSL::OptionError) { @p.complement_seq(foo: 'bar') }
44
+ end
45
+
46
+ test 'BioDSL::Pipeline::ComplementSeq of DNA returns correctly' do
47
+ @output.write(SEQ: 'gatcGATCGT')
48
+ @output.close
49
+ @p.complement_seq.run(input: @input, output: @output2)
50
+
51
+ expected = '{:SEQ=>"ctagCTAGCA", :SEQ_LEN=>10}'
52
+
53
+ assert_equal(expected, collect_result.chomp)
54
+ end
55
+
56
+ test 'BioDSL::Pipeline::ComplementSeq of RNA returns correctly' do
57
+ @output.write(SEQ: 'gaucGAUCGU')
58
+ @output.close
59
+ @p.complement_seq.run(input: @input, output: @output2)
60
+
61
+ expected = '{:SEQ=>"cuagCUAGCA", :SEQ_LEN=>10}'
62
+
63
+ assert_equal(expected, collect_result.chomp)
64
+ end
65
+
66
+ test 'BioDSL::Pipeline::ComplementSeq status returns correctly' do
67
+ @output.write(SEQ: 'gaucGAUCGU')
68
+ @output.close
69
+ @p.complement_seq.run(input: @input, output: @output2)
70
+
71
+ assert_equal(1, @p.status.first[:records_in])
72
+ assert_equal(1, @p.status.first[:records_out])
73
+ assert_equal(1, @p.status.first[:sequences_in])
74
+ assert_equal(1, @p.status.first[:sequences_out])
75
+ assert_equal(10, @p.status.first[:residues_in])
76
+ assert_equal(10, @p.status.first[:residues_out])
77
+ end
78
+ end
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for Count.
34
+ class TestCount < Test::Unit::TestCase
35
+ def setup
36
+ @tmpdir = Dir.mktmpdir('BioDSL')
37
+ @file = File.join(@tmpdir, 'test.txt')
38
+ @file2 = File.join(@tmpdir, 'test.txt')
39
+
40
+ @input, @output = BioDSL::Stream.pipe
41
+ @input2, @output2 = BioDSL::Stream.pipe
42
+
43
+ @output.write(SEQ_NAME: 'test1', SEQ: 'atcg', SEQ_LEN: 4)
44
+ @output.write(SEQ_NAME: 'test2', SEQ: 'gtac', SEQ_LEN: 4)
45
+ @output.close
46
+
47
+ @p = BioDSL::Pipeline.new
48
+ end
49
+
50
+ def teardown
51
+ FileUtils.rm_r @tmpdir
52
+ end
53
+
54
+ test 'BioDSL::Pipeline::Count with invalid options raises' do
55
+ assert_raise(BioDSL::OptionError) { @p.count(foo: 'bar') }
56
+ end
57
+
58
+ test 'BioDSL::Pipeline::Count with valid options don\'t raise' do
59
+ assert_nothing_raised { @p.count(output: @file) }
60
+ end
61
+
62
+ test 'BioDSL::Pipeline::Count to file outputs correctly' do
63
+ @p.count(output: @file).run(input: @input, output: @output2)
64
+ result = File.open(@file).read
65
+ expected = "#RECORD_TYPE\tCOUNT\ncount\t2\n"
66
+ assert_equal(expected, result)
67
+ end
68
+
69
+ test 'BioDSL::Pipeline::Count to existing file raises' do
70
+ `touch #{@file}`
71
+ assert_raise(BioDSL::OptionError) { @p.count(output: @file) }
72
+ end
73
+
74
+ test 'BioDSL::Pipeline::Count to existing file with :force outputs OK' do
75
+ `touch #{@file}`
76
+ @p.count(output: @file, force: true).run(input: @input)
77
+ result = File.open(@file).read
78
+ expected = "#RECORD_TYPE\tCOUNT\ncount\t2\n"
79
+ assert_equal(expected, result)
80
+ end
81
+
82
+ test 'BioDSL::Pipeline::Count with flux outputs correctly' do
83
+ @p.count(output: @file).run(input: @input, output: @output2)
84
+ result = File.open(@file).read
85
+ expected = "#RECORD_TYPE\tCOUNT\ncount\t2\n"
86
+ assert_equal(expected, result)
87
+
88
+ stream_expected = <<-EXP.gsub(/^\s+\|/, '')
89
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}
90
+ |{:SEQ_NAME=>"test2", :SEQ=>"gtac", :SEQ_LEN=>4}
91
+ |{:RECORD_TYPE=>"count", :COUNT=>2}
92
+ EXP
93
+
94
+ assert_equal(stream_expected, collect_result)
95
+ end
96
+
97
+ test 'BioDSL::Pipeline::Count status outputs correctly' do
98
+ @p.count.run(input: @input, output: @output2)
99
+
100
+ assert_equal(2, @p.status.first[:records_in])
101
+ assert_equal(3, @p.status.first[:records_out])
102
+ end
103
+ end
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for CountValues.
34
+ class TestCountValues < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ [{V0: 'HUMAN', V1: 'H1'},
40
+ {V0: 'HUMAN', V1: 'H2'},
41
+ {V0: 'HUMAN', V1: 'H3'},
42
+ {V0: 'DOG', V1: 'D1'},
43
+ {V0: 'DOG', V1: 'D2'},
44
+ {V0: 'MOUSE', V1: 'M1'}
45
+ ].each do |record|
46
+ @output.write record
47
+ end
48
+
49
+ @output.close
50
+
51
+ @p = BioDSL::Pipeline.new
52
+ end
53
+
54
+ test 'BioDSL::Pipeline#count_values with disallowed option raises' do
55
+ assert_raise(BioDSL::OptionError) { @p.count_values(foo: 'bar') }
56
+ end
57
+
58
+ test 'BioDSL::Pipeline#count_values with allowed options don\'t raise' do
59
+ assert_nothing_raised { @p.count_values(keys: [:V0]) }
60
+ end
61
+
62
+ test 'BioDSL::Pipeline#count_values returns correctly' do
63
+ @p.count_values(keys: ['V0', :V1, :FOO]).
64
+ run(input: @input, output: @output2)
65
+
66
+ expected = <<-EXP.gsub(/^\s+\|/, '')
67
+ |{:V0=>"HUMAN", :V1=>"H1", :V0_COUNT=>3, :V1_COUNT=>1}
68
+ |{:V0=>"HUMAN", :V1=>"H2", :V0_COUNT=>3, :V1_COUNT=>1}
69
+ |{:V0=>"HUMAN", :V1=>"H3", :V0_COUNT=>3, :V1_COUNT=>1}
70
+ |{:V0=>"DOG", :V1=>"D1", :V0_COUNT=>2, :V1_COUNT=>1}
71
+ |{:V0=>"DOG", :V1=>"D2", :V0_COUNT=>2, :V1_COUNT=>1}
72
+ |{:V0=>"MOUSE", :V1=>"M1", :V0_COUNT=>1, :V1_COUNT=>1}
73
+ EXP
74
+
75
+ assert_equal(expected, collect_result)
76
+ end
77
+
78
+ test 'BioDSL::Pipeline#count_values status returns correctly' do
79
+ @p.count_values(keys: ['V0', :V1, :FOO]).
80
+ run(input: @input, output: @output2)
81
+
82
+ assert_equal(6, @p.status.first[:records_in])
83
+ assert_equal(6, @p.status.first[:records_out])
84
+ end
85
+ end
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for DegapSeq.
34
+ class TestDegapSeq < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @p = BioDSL::Pipeline.new
40
+ end
41
+
42
+ test 'BioDSL::Pipeline::DegapSeq with invalid options raises' do
43
+ assert_raise(BioDSL::OptionError) { @p.degap_seq(foo: 'bar') }
44
+ end
45
+
46
+ test 'BioDSL::Pipeline::DegapSeq with valid options don\'t raise' do
47
+ assert_nothing_raised { @p.degap_seq(columns_only: true) }
48
+ end
49
+
50
+ test 'BioDSL::Pipeline::DegapSeq returns correctly' do
51
+ @output.write(SEQ: 'AT--C.G~')
52
+ @output.close
53
+ @p.degap_seq.run(input: @input, output: @output2)
54
+
55
+ expected = '{:SEQ=>"ATCG", :SEQ_LEN=>4}'
56
+
57
+ assert_equal(expected, collect_result.chomp)
58
+ end
59
+
60
+ test 'BioDSL::Pipeline::DegapSeq status returns correctly' do
61
+ @output.write(SEQ: 'AT--C.G~')
62
+ @output.close
63
+ @p.degap_seq.run(input: @input, output: @output2)
64
+
65
+ assert_equal(1, @p.status.first[:records_in])
66
+ assert_equal(1, @p.status.first[:records_out])
67
+ assert_equal(1, @p.status.first[:sequences_in])
68
+ assert_equal(1, @p.status.first[:sequences_out])
69
+ assert_equal(8, @p.status.first[:residues_in])
70
+ assert_equal(4, @p.status.first[:residues_out])
71
+ end
72
+
73
+ test 'BioDSL::Pipeline::DegapSeq with :columns_only and uneven seq ' \
74
+ 'lengths raises' do
75
+ @output.write(SEQ: 'AT--C.G~')
76
+ @output.write(SEQ: 'AT--C.G')
77
+ @output.close
78
+ assert_raise(BioDSL::SeqError) do
79
+ @p.degap_seq(columns_only: true).run(input: @input, output: @output2)
80
+ end
81
+ end
82
+
83
+ test 'BioDSL::Pipeline::DegapSeq with :columns_only returns correctly' do
84
+ @output.write(SEQ: 'ATA-C.G~')
85
+ @output.write(SEQ: 'AT--C.G.')
86
+ @output.close
87
+ @p.degap_seq(columns_only: true).run(input: @input, output: @output2)
88
+
89
+ expected = <<-EXP.gsub(/^\s+\|/, '')
90
+ |{:SEQ=>"ATACG", :SEQ_LEN=>5}
91
+ |{:SEQ=>"AT-CG", :SEQ_LEN=>5}
92
+ EXP
93
+
94
+ assert_equal(expected, collect_result)
95
+ end
96
+ end
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for DereplicateSeq.
34
+ class TestDereplicateSeq < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(SEQ_NAME: 'test1', SEQ: 'ATCG')
40
+ @output.write(SEQ_NAME: 'test2', SEQ: 'ATCG')
41
+ @output.write(SEQ_NAME: 'test3', SEQ: 'atcg')
42
+ @output.write(SEQ_NAME: 'test4', SEQ: 'GCTA')
43
+ @output.write(FISH: 'eel')
44
+ @output.close
45
+
46
+ @p = BioDSL::Pipeline.new
47
+ end
48
+
49
+ test 'BioDSL::Pipeline::DereplicateSeq with invalid options raises' do
50
+ assert_raise(BioDSL::OptionError) { @p.dereplicate_seq(foo: 'bar') }
51
+ end
52
+
53
+ test 'BioDSL::Pipeline::DereplicateSeq with valid options don\'t raise' do
54
+ assert_nothing_raised { @p.dereplicate_seq(ignore_case: true) }
55
+ end
56
+
57
+ test 'BioDSL::Pipeline::DereplicateSeq returns correctly' do
58
+ @p.dereplicate_seq.run(input: @input, output: @output2)
59
+
60
+ expected = <<-EXP.gsub(/^\s+\|/, '')
61
+ |{:FISH=>"eel"}
62
+ |{:SEQ_NAME=>"test1", :SEQ=>"ATCG", :SEQ_COUNT=>2}
63
+ |{:SEQ_NAME=>"test3", :SEQ=>"atcg", :SEQ_COUNT=>1}
64
+ |{:SEQ_NAME=>"test4", :SEQ=>"GCTA", :SEQ_COUNT=>1}
65
+ EXP
66
+
67
+ assert_equal(expected, collect_result)
68
+ end
69
+
70
+ test 'BioDSL::Pipeline::DereplicateSeq status returns correctly' do
71
+ @p.dereplicate_seq.run(input: @input, output: @output2)
72
+
73
+ assert_equal(5, @p.status.first[:records_in])
74
+ assert_equal(4, @p.status.first[:records_out])
75
+ assert_equal(4, @p.status.first[:sequences_in])
76
+ assert_equal(3, @p.status.first[:sequences_out])
77
+ assert_equal(16, @p.status.first[:residues_in])
78
+ assert_equal(12, @p.status.first[:residues_out])
79
+ end
80
+
81
+ test 'BioDSL::Pipeline::DereplicateSeq with ignore_case returns OK' do
82
+ @p.dereplicate_seq(ignore_case: true).run(input: @input, output: @output2)
83
+
84
+ expected = <<-EXP.gsub(/^\s+\|/, '')
85
+ |{:FISH=>"eel"}
86
+ |{:SEQ_NAME=>"test1", :SEQ=>"ATCG", :SEQ_COUNT=>3}
87
+ |{:SEQ_NAME=>"test4", :SEQ=>"GCTA", :SEQ_COUNT=>1}
88
+ EXP
89
+
90
+ assert_equal(expected, collect_result)
91
+ end
92
+ end
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for the dump command.
34
+ class TestDump < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(one: 1, two: 2, three: 3)
40
+ @output.write(SEQ_NAME: 'test1', SEQ: 'atcg', SEQ_LEN: 4)
41
+ @output.write(SEQ_NAME: 'test2', SEQ: 'gtac', SEQ_LEN: 4)
42
+ @output.close
43
+
44
+ @p = BioDSL::Pipeline.new
45
+ end
46
+
47
+ test 'BioDSL::Pipeline#dump with disallowed option raises' do
48
+ assert_raise(BioDSL::OptionError) { @p.dump(foo: 'bar') }
49
+ end
50
+
51
+ test 'BioDSL::Pipeline#dump with bad first raises' do
52
+ assert_raise(BioDSL::OptionError) { @p.dump(first: 0) }
53
+ end
54
+
55
+ test 'BioDSL::Pipeline#dump with bad last raises' do
56
+ assert_raise(BioDSL::OptionError) { @p.dump(last: 0) }
57
+ end
58
+
59
+ test 'BioDSL::Pipeline#dump with first and last raises' do
60
+ assert_raise(BioDSL::OptionError) { @p.dump(first: 1, last: 1) }
61
+ end
62
+
63
+ test 'BioDSL::Pipeline#dump returns correctly' do
64
+ result1 = capture_stdout { @p.dump.run(input: @input, output: @output2) }
65
+ result2 = collect_result
66
+
67
+ expected = <<-EXP.gsub(/^\s+\|/, '')
68
+ |{:one=>1, :two=>2, :three=>3}
69
+ |{:SEQ_NAME=>\"test1\", :SEQ=>\"atcg\", :SEQ_LEN=>4}
70
+ |{:SEQ_NAME=>\"test2\", :SEQ=>\"gtac\", :SEQ_LEN=>4}
71
+ EXP
72
+
73
+ assert_equal(expected, result1)
74
+ assert_equal(expected, result2)
75
+ end
76
+
77
+ test 'BioDSL::Pipeline#dump status returns correctly' do
78
+ capture_stdout { @p.dump.run(input: @input, output: @output2) }
79
+
80
+ assert_equal(3, @p.status.first[:records_in])
81
+ assert_equal(3, @p.status.first[:records_out])
82
+ end
83
+
84
+ test 'BioDSL::Pipeline#dump with options[first: 1] returns correctly' do
85
+ result1 = capture_stdout do
86
+ @p.dump(first: 1).run(input: @input, output: @output2)
87
+ end
88
+
89
+ result2 = collect_result
90
+
91
+ expected = "{:one=>1, :two=>2, :three=>3}\n"
92
+
93
+ assert_equal(expected, result1)
94
+ assert_equal(expected, result2)
95
+ end
96
+
97
+ test 'BioDSL::Pipeline#dump with options[last: 1] returns correctly' do
98
+ result1 = capture_stdout do
99
+ @p.dump(last: 1).run(input: @input, output: @output2)
100
+ end
101
+
102
+ result2 = collect_result
103
+
104
+ expected = "{:SEQ_NAME=>\"test2\", :SEQ=>\"gtac\", :SEQ_LEN=>4}\n"
105
+
106
+ assert_equal(expected, result1)
107
+ assert_equal(expected, result2)
108
+ end
109
+ end