BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for SliceSeq.
34
+ class TestSliceSeq < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(FOO: 'BAR', SEQ: 'atcg')
40
+ @output.write(SEQ: 'atcg', SCORES: '0123')
41
+ @output.close
42
+
43
+ @p = BioDSL::Pipeline.new
44
+ end
45
+
46
+ test 'BioDSL::Pipeline::SliceSeq with invalid options raises' do
47
+ assert_raise(BioDSL::OptionError) { @p.slice_seq(slice: 1, foo: 'bar') }
48
+ end
49
+
50
+ test 'BioDSL::Pipeline::SliceSeq with valid options don\'t raise' do
51
+ assert_nothing_raised { @p.slice_seq(slice: 1) }
52
+ end
53
+
54
+ test 'BioDSL::Pipeline::SliceSeq with index returns correctly' do
55
+ @p.slice_seq(slice: 1).run(input: @input, output: @output2)
56
+
57
+ expected = <<-EXP.gsub(/^\s+\|/, '')
58
+ |{:FOO=>"BAR", :SEQ=>"t", :SEQ_LEN=>1}
59
+ |{:SEQ=>"t", :SCORES=>"1", :SEQ_LEN=>1}
60
+ EXP
61
+
62
+ assert_equal(expected, collect_result)
63
+ end
64
+
65
+ test 'BioDSL::Pipeline::SliceSeq with out of range index returns OK' do
66
+ @p.slice_seq(slice: 10).run(input: @input, output: @output2)
67
+
68
+ expected = <<-EXP.gsub(/^\s+\|/, '')
69
+ |{:FOO=>"BAR", :SEQ=>"", :SEQ_LEN=>0}
70
+ |{:SEQ=>"", :SCORES=>"", :SEQ_LEN=>0}
71
+ EXP
72
+
73
+ assert_equal(expected, collect_result)
74
+ end
75
+
76
+ test 'BioDSL::Pipeline::SliceSeq with negative index returns correctly' do
77
+ @p.slice_seq(slice: -1).run(input: @input, output: @output2)
78
+
79
+ expected = <<-EXP.gsub(/^\s+\|/, '')
80
+ |{:FOO=>"BAR", :SEQ=>"g", :SEQ_LEN=>1}
81
+ |{:SEQ=>"g", :SCORES=>"3", :SEQ_LEN=>1}
82
+ EXP
83
+
84
+ assert_equal(expected, collect_result)
85
+ end
86
+
87
+ test 'BioDSL::Pipeline::SliceSeq with negative out of range index ' \
88
+ 'returns correctly' do
89
+ @p.slice_seq(slice: -10).run(input: @input, output: @output2)
90
+
91
+ expected = <<-EXP.gsub(/^\s+\|/, '')
92
+ |{:FOO=>"BAR", :SEQ=>"", :SEQ_LEN=>0}
93
+ |{:SEQ=>"", :SCORES=>"", :SEQ_LEN=>0}
94
+ EXP
95
+
96
+ assert_equal(expected, collect_result)
97
+ end
98
+
99
+ test 'BioDSL::Pipeline::SliceSeq with range returns correctly' do
100
+ @p.slice_seq(slice: 1..-1).run(input: @input, output: @output2)
101
+
102
+ expected = <<-EXP.gsub(/^\s+\|/, '')
103
+ |{:FOO=>"BAR", :SEQ=>"tcg", :SEQ_LEN=>3}
104
+ |{:SEQ=>"tcg", :SCORES=>"123", :SEQ_LEN=>3}
105
+ EXP
106
+
107
+ assert_equal(expected, collect_result)
108
+ end
109
+
110
+ test 'BioDSL::Pipeline::SliceSeq with out of range end range returns OK' do
111
+ @p.slice_seq(slice: 1..10).run(input: @input, output: @output2)
112
+
113
+ expected = <<-EXP.gsub(/^\s+\|/, '')
114
+ |{:FOO=>"BAR", :SEQ=>"tcg", :SEQ_LEN=>3}
115
+ |{:SEQ=>"tcg", :SCORES=>"123", :SEQ_LEN=>3}
116
+ EXP
117
+
118
+ assert_equal(expected, collect_result)
119
+ end
120
+
121
+ test 'BioDSL::Pipeline::SliceSeq status returns OK' do
122
+ @p.slice_seq(slice: 1..10).run(input: @input, output: @output2)
123
+
124
+ assert_equal(2, @p.status.first[:records_in])
125
+ assert_equal(2, @p.status.first[:records_out])
126
+ assert_equal(2, @p.status.first[:sequences_in])
127
+ assert_equal(2, @p.status.first[:sequences_out])
128
+ assert_equal(8, @p.status.first[:residues_in])
129
+ assert_equal(6, @p.status.first[:residues_out])
130
+ end
131
+ end
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for Sort.
34
+ class TestSort < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(NAME: 'test2', COUNT: 4)
40
+ @output.write(NAME: 'test1', COUNT: 21)
41
+ @output.write(NAME: 'test2', COUNT: 2)
42
+ @output.write(NAME: 'test3', COUNT: 9)
43
+ @output.close
44
+
45
+ @p = BioDSL::Pipeline.new
46
+ end
47
+
48
+ test 'BioDSL::Pipeline::Sort with invalid options raises' do
49
+ assert_raise(BioDSL::OptionError) { @p.sort(key: :COUNT, foo: 'bar') }
50
+ end
51
+
52
+ test 'BioDSL::Pipeline::Sort with valid options don\'t raise' do
53
+ assert_nothing_raised { @p.sort(key: :COUNT) }
54
+ end
55
+
56
+ test 'BioDSL::Pipeline::Sort alphabetical returns correctly' do
57
+ @p.sort(key: 'NAME').run(input: @input, output: @output2)
58
+
59
+ expected = <<-EXP.gsub(/^\s+\|/, '')
60
+ |{:NAME=>"test1", :COUNT=>21}
61
+ |{:NAME=>"test2", :COUNT=>4}
62
+ |{:NAME=>"test2", :COUNT=>2}
63
+ |{:NAME=>"test3", :COUNT=>9}
64
+ EXP
65
+
66
+ assert_equal(expected, collect_result)
67
+ end
68
+
69
+ test 'BioDSL::Pipeline::Sort numerical returns correctly' do
70
+ @p.sort(key: :COUNT).run(input: @input, output: @output2)
71
+
72
+ expected = <<-EXP.gsub(/^\s+\|/, '')
73
+ |{:NAME=>"test2", :COUNT=>2}
74
+ |{:NAME=>"test2", :COUNT=>4}
75
+ |{:NAME=>"test3", :COUNT=>9}
76
+ |{:NAME=>"test1", :COUNT=>21}
77
+ EXP
78
+
79
+ assert_equal(expected, collect_result)
80
+ end
81
+
82
+ test 'BioDSL::Pipeline::Sort reverse returns correctly' do
83
+ @p.sort(key: :COUNT, reverse: true).run(input: @input, output: @output2)
84
+
85
+ expected = <<-EXP.gsub(/^\s+\|/, '')
86
+ |{:NAME=>"test1", :COUNT=>21}
87
+ |{:NAME=>"test3", :COUNT=>9}
88
+ |{:NAME=>"test2", :COUNT=>4}
89
+ |{:NAME=>"test2", :COUNT=>2}
90
+ EXP
91
+
92
+ assert_equal(expected, collect_result)
93
+ end
94
+
95
+ test 'BioDSL::Pipeline::Sort with block_size returns correctly' do
96
+ @p.sort(key: :COUNT, block_size: 60).run(input: @input, output: @output2)
97
+
98
+ expected = <<-EXP.gsub(/^\s+\|/, '')
99
+ |{:NAME=>"test2", :COUNT=>2}
100
+ |{:NAME=>"test2", :COUNT=>4}
101
+ |{:NAME=>"test3", :COUNT=>9}
102
+ |{:NAME=>"test1", :COUNT=>21}
103
+ EXP
104
+
105
+ assert_equal(expected, collect_result)
106
+ end
107
+
108
+ test 'BioDSL::Pipeline::Sort with block_size and reverse returns OK' do
109
+ @p.sort(key: :COUNT, block_size: 30, reverse: true).
110
+ run(input: @input, output: @output2)
111
+
112
+ expected = <<-EXP.gsub(/^\s+\|/, '')
113
+ |{:NAME=>"test1", :COUNT=>21}
114
+ |{:NAME=>"test3", :COUNT=>9}
115
+ |{:NAME=>"test2", :COUNT=>4}
116
+ |{:NAME=>"test2", :COUNT=>2}
117
+ EXP
118
+
119
+ assert_equal(expected, collect_result)
120
+ end
121
+
122
+ test 'BioDSL::Pipeline::Sort status returns OK' do
123
+ @p.sort(key: :COUNT).run(input: @input, output: @output2)
124
+
125
+ assert_equal(4, @p.status.first[:records_in])
126
+ assert_equal(4, @p.status.first[:records_out])
127
+ end
128
+ end
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for SplitPairSeq.
34
+ class TestSplitPairSeq < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ setup_output
40
+
41
+ @output.close
42
+
43
+ setup_expected
44
+
45
+ @p = BioDSL::Pipeline.new
46
+ end
47
+
48
+ # rubocop: disable MethodLength
49
+ def setup_output
50
+ [
51
+ {SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14',
52
+ SEQ: 'TGGGGAATATTGGACAATGGCCTGTTTGCTACCCACGCTT',
53
+ SEQ_LEN: 40,
54
+ SCORES: '<??????BDDDDDDDDGGGG?????BB<-<BDDDDDFEEF',
55
+ SEQ_LEN_LEFT: 20,
56
+ SEQ_LEN_RIGHT: 20},
57
+ {SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14',
58
+ SEQ: 'TAGGGAATCTTGCACAATGGACTCTTCGCTACCCATGCTT',
59
+ SEQ_LEN: 40,
60
+ SCORES: '<???9?BBBDBDDBDDFFFF,5<??BB?DDABDBDDFFFF',
61
+ SEQ_LEN_LEFT: 20,
62
+ SEQ_LEN_RIGHT: 20},
63
+ {SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14',
64
+ SEQ: 'TAGGGAATCTTGCACAATGGCCTCTTCGCTACCCATGCTT',
65
+ SEQ_LEN: 40,
66
+ SCORES: '?????BBBBBDDBDDBFFFF??,<??B?BB?BBBBBFF?F',
67
+ SEQ_LEN_LEFT: 20,
68
+ SEQ_LEN_RIGHT: 20}
69
+ ].each do |record|
70
+ @output.write record
71
+ end
72
+ end
73
+
74
+ def setup_expected
75
+ @expected = <<-EOD.gsub(/^\s+\|/, '')
76
+ |{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14",
77
+ | :SEQ=>"TGGGGAATATTGGACAATGG",
78
+ | :SEQ_LEN=>20,
79
+ | :SCORES=>"<??????BDDDDDDDDGGGG"}
80
+ |{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14862:1868 2:N:0:14",
81
+ | :SEQ=>"CCTGTTTGCTACCCACGCTT",
82
+ | :SEQ_LEN=>20,
83
+ | :SCORES=>"?????BB<-<BDDDDDFEEF"}
84
+ |{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14",
85
+ | :SEQ=>"TAGGGAATCTTGCACAATGG",
86
+ | :SEQ_LEN=>20,
87
+ | :SCORES=>"<???9?BBBDBDDBDDFFFF"}
88
+ |{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:13906:2139 2:N:0:14",
89
+ | :SEQ=>"ACTCTTCGCTACCCATGCTT",
90
+ | :SEQ_LEN=>20,
91
+ | :SCORES=>",5<??BB?DDABDBDDFFFF"}
92
+ |{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14",
93
+ | :SEQ=>"TAGGGAATCTTGCACAATGG",
94
+ | :SEQ_LEN=>20,
95
+ | :SCORES=>"?????BBBBBDDBDDBFFFF"}
96
+ |{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14865:2158 2:N:0:14",
97
+ | :SEQ=>"CCTCTTCGCTACCCATGCTT",
98
+ | :SEQ_LEN=>20,
99
+ | :SCORES=>"??,<??B?BB?BBBBBFF?F"}
100
+ EOD
101
+ end
102
+
103
+ # rubocop: enable MethodLength
104
+ test 'BioDSL::Pipeline::SplitPairSeq with invalid options raises' do
105
+ assert_raise(BioDSL::OptionError) { @p.split_pair_seq(foo: 'bar') }
106
+ end
107
+
108
+ test 'BioDSL::Pipeline::SplitPairSeq with bad sequence lengths raises' do
109
+ input, output = BioDSL::Stream.pipe
110
+
111
+ record = {
112
+ SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14',
113
+ SEQ: 'TGGGGAATATTGGACAATGGCCTGTTTGCTACCCACGCTT',
114
+ SEQ_LEN: 40,
115
+ SCORES: '<??????BDDDDDDDDGGGG?????BB<-<BDDDDDFEEF',
116
+ SEQ_LEN_LEFT: 10,
117
+ SEQ_LEN_RIGHT: 20
118
+ }
119
+
120
+ output.write record
121
+ output.close
122
+
123
+ assert_raise(BioDSL::SeqError) do
124
+ @p.split_pair_seq.run(input: input, output: @output2)
125
+ end
126
+ end
127
+
128
+ test 'BioDSL::Pipeline::SplitPairSeq with bad sequence name raises' do
129
+ input, output = BioDSL::Stream.pipe
130
+
131
+ record = {
132
+ SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14862:18681:N:0:14',
133
+ SEQ: 'TGGGGAATATTGGACAATGGCCTGTTTGCTACCCACGCTT',
134
+ SEQ_LEN: 40,
135
+ SCORES: '<??????BDDDDDDDDGGGG?????BB<-<BDDDDDFEEF',
136
+ SEQ_LEN_LEFT: 20,
137
+ SEQ_LEN_RIGHT: 20
138
+ }
139
+
140
+ output.write record
141
+ output.close
142
+
143
+ assert_raise(RuntimeError) do
144
+ @p.split_pair_seq.run(input: input, output: @output2)
145
+ end
146
+ end
147
+
148
+ test 'BioDSL::Pipeline::SplitPairSeq returns correctly' do
149
+ @p.split_pair_seq.run(input: @input, output: @output2)
150
+
151
+ assert_equal(@expected.delete("\n"), collect_result.delete("\n"))
152
+ end
153
+
154
+ test 'BioDSL::Pipeline::SplitPairSeq status returns correctly' do
155
+ @p.split_pair_seq.run(input: @input, output: @output2)
156
+
157
+ assert_equal(3, @p.status.first[:records_in])
158
+ assert_equal(6, @p.status.first[:records_out])
159
+ assert_equal(3, @p.status.first[:sequences_in])
160
+ assert_equal(6, @p.status.first[:sequences_out])
161
+ assert_equal(120, @p.status.first[:residues_in])
162
+ assert_equal(120, @p.status.first[:residues_out])
163
+ end
164
+ end
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for SplitValues.
34
+ class TestSplitValues < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(ID: 'FOO:count=10', SEQ: 'gataag')
40
+ @output.write(ID: 'FOO_10_20', SEQ: 'gataag')
41
+ @output.close
42
+
43
+ @p = BioDSL::Pipeline.new
44
+ end
45
+
46
+ test 'BioDSL::Pipeline::SplitValues with invalid options raises' do
47
+ assert_raise(BioDSL::OptionError) { @p.split_values(foo: 'bar') }
48
+ end
49
+
50
+ test 'BioDSL::Pipeline::SplitValues with valid options don\'t raise' do
51
+ assert_nothing_raised { @p.split_values(key: :ID) }
52
+ end
53
+
54
+ test 'BioDSL::Pipeline::SplitValues returns correctly' do
55
+ @p.split_values(key: :ID).run(input: @input, output: @output2)
56
+
57
+ expected = <<-EXP.gsub(/^\s+\|/, '')
58
+ |{:ID=>"FOO:count=10", :SEQ=>"gataag"}
59
+ |{:ID=>"FOO_10_20", :SEQ=>"gataag", :ID_0=>"FOO", :ID_1=>10, :ID_2=>20}
60
+ EXP
61
+
62
+ assert_equal(expected, collect_result)
63
+ end
64
+
65
+ test 'BioDSL::Pipeline::SplitValues status returns correctly' do
66
+ @p.split_values(key: :ID).run(input: @input, output: @output2)
67
+
68
+ assert_equal(2, @p.status.first[:records_in])
69
+ assert_equal(2, @p.status.first[:records_out])
70
+ end
71
+
72
+ test 'BioDSL::Pipeline::SplitValues with :delimiter returns correctly' do
73
+ @p.split_values(key: 'ID', delimiter: ':count=').
74
+ run(input: @input, output: @output2)
75
+
76
+ expected = <<-EXP.gsub(/^\s+\|/, '')
77
+ |{:ID=>"FOO:count=10", :SEQ=>"gataag", :ID_0=>"FOO", :ID_1=>10}
78
+ |{:ID=>"FOO_10_20", :SEQ=>"gataag"}
79
+ EXP
80
+
81
+ assert_equal(expected, collect_result)
82
+ end
83
+
84
+ test 'BioDSL::Pipeline::SplitValues w. :delimiter and :keys returns OK' do
85
+ @p.split_values(key: 'ID', keys: ['ID', :COUNT], delimiter: ':count=').
86
+ run(input: @input, output: @output2)
87
+
88
+ expected = <<-EXP.gsub(/^\s+\|/, '')
89
+ |{:ID=>"FOO", :SEQ=>"gataag", :COUNT=>10}
90
+ |{:ID=>"FOO_10_20", :SEQ=>"gataag"}
91
+ EXP
92
+
93
+ assert_equal(expected, collect_result)
94
+ end
95
+ end