BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for MaskSeq.
34
+ #
35
+ # rubocop:disable Metrics/LineLength
36
+ class TestMaskSeq < Test::Unit::TestCase
37
+ def setup
38
+ @input, @output = BioDSL::Stream.pipe
39
+ @input2, @output2 = BioDSL::Stream.pipe
40
+
41
+ hash = {
42
+ SEQ_NAME: 'test',
43
+ SEQ: 'gatcgatcgtacgagcagcatctgacgtatcgatcatgcagtctacgacgagcatgctagctag',
44
+ SEQ_LEN: 82,
45
+ SCORES: '!"#$%&()*+,-013456;<=>?@ABCDEIIHGCBA@?>=<;:9843210/.-,+*)(&%$III'
46
+ }
47
+
48
+ @output.write hash
49
+ @output.close
50
+
51
+ @p = BioDSL::Pipeline.new
52
+ end
53
+
54
+ test 'BioDSL::Pipeline::MaskSeq with invalid options raises' do
55
+ assert_raise(BioDSL::OptionError) { @p.mask_seq(foo: 'bar') }
56
+ end
57
+
58
+ test 'BioDSL::Pipeline::MaskSeq with valid options don\'t raise' do
59
+ assert_nothing_raised { @p.mask_seq(mask: :hard) }
60
+ end
61
+
62
+ test 'BioDSL::Pipeline::MaskSeq with mask: :soft returns correctly' do
63
+ @p.mask_seq.run(input: @input, output: @output2)
64
+
65
+ expected = <<-EXP.gsub(/^\s+\|/, '')
66
+ |{:SEQ_NAME=>"test",
67
+ | :SEQ=>"gatcgatcgtacgagcAGCATCTGACGTATCGATCATGCAGTCTAcgacgagcatgctagcTAG",
68
+ | :SEQ_LEN=>64,
69
+ | :SCORES=>"!\\\"\\\#$%&()*+,-013456;<=>?@ABCDEIIHGCBA@?>=<;:9843210/.-,+*)(&%$III"}
70
+ EXP
71
+
72
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
73
+ end
74
+
75
+ test 'BioDSL::Pipeline::MaskSeq with mask: :hard returns correctly' do
76
+ @p.mask_seq(mask: 'hard').run(input: @input, output: @output2)
77
+
78
+ expected = <<-EXP.gsub(/^\s+\|/, '')
79
+ |{:SEQ_NAME=>"test",
80
+ | :SEQ=>"NNNNNNNNNNNNNNNNAGCATCTGACGTATCGATCATGCAGTCTANNNNNNNNNNNNNNNNTAG",
81
+ | :SEQ_LEN=>64,
82
+ | :SCORES=>"!\\\"\\\#$%&()*+,-013456;<=>?@ABCDEIIHGCBA@?>=<;:9843210/.-,+*)(&%$III"}
83
+ EXP
84
+
85
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
86
+ end
87
+
88
+ test 'BioDSL::Pipeline::MaskSeq status returns correctly' do
89
+ @p.mask_seq(mask: 'hard').run(input: @input, output: @output2)
90
+
91
+ assert_equal(1, @p.status.first[:records_in])
92
+ assert_equal(1, @p.status.first[:records_out])
93
+ assert_equal(1, @p.status.first[:sequences_in])
94
+ assert_equal(1, @p.status.first[:sequences_out])
95
+ assert_equal(64, @p.status.first[:residues_in])
96
+ assert_equal(64, @p.status.first[:residues_out])
97
+ end
98
+ end
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for MeanScores.
34
+ class TestMeanScores < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(SCORES: 'IIIIIIIIIIIIIIIIIIII')
40
+ @output.write(SCORES: '!!!!!IIIIIIIIIIIIIII')
41
+ @output.write(SCORES: 'IIIIIIIIIIIIIII!!!!!')
42
+ @output.close
43
+
44
+ @p = BioDSL::Pipeline.new
45
+ end
46
+
47
+ test 'BioDSL::Pipeline::MeanScores with invalid options raises' do
48
+ assert_raise(BioDSL::OptionError) { @p.mean_scores(foo: 'bar') }
49
+ end
50
+
51
+ test 'BioDSL::Pipeline::MeanScores with valid options don\'t raise' do
52
+ assert_nothing_raised { @p.mean_scores(local: true) }
53
+ end
54
+
55
+ test 'BioDSL::Pipeline::MeanScores with window_size and local: false ' \
56
+ 'raises' do
57
+ assert_raise(BioDSL::OptionError) { @p.mean_scores(window_size: 10) }
58
+ end
59
+
60
+ test 'BioDSL::Pipeline::MeanScores returns correctly' do
61
+ @p.mean_scores.run(input: @input, output: @output2)
62
+
63
+ expected = <<-EXP.gsub(/^\s+\|/, '')
64
+ |{:SCORES=>"IIIIIIIIIIIIIIIIIIII", :SCORES_MEAN=>40.0}
65
+ |{:SCORES=>"!!!!!IIIIIIIIIIIIIII", :SCORES_MEAN=>30.0}
66
+ |{:SCORES=>"IIIIIIIIIIIIIII!!!!!", :SCORES_MEAN=>30.0}
67
+ EXP
68
+
69
+ assert_equal(expected, collect_result)
70
+ end
71
+
72
+ test 'BioDSL::Pipeline::MeanScores status returns correctly' do
73
+ @p.mean_scores.run(input: @input, output: @output2)
74
+
75
+ assert_equal(3, @p.status.first[:records_in])
76
+ assert_equal(3, @p.status.first[:records_out])
77
+ assert_equal(0, @p.status.first[:sequences_in])
78
+ assert_equal(0, @p.status.first[:sequences_out])
79
+ assert_equal(0, @p.status.first[:residues_in])
80
+ assert_equal(0, @p.status.first[:residues_out])
81
+ assert_equal(0, @p.status.first[:min_mean])
82
+ assert_equal(40, @p.status.first[:max_mean])
83
+ assert_equal(33.33, @p.status.first[:mean_mean])
84
+ end
85
+
86
+ test 'BioDSL::Pipeline::MeanScores with local: true returns correctly' do
87
+ @p.mean_scores(local: true).run(input: @input, output: @output2)
88
+
89
+ expected = <<-EXP.gsub(/^\s+\|/, '')
90
+ |{:SCORES=>"IIIIIIIIIIIIIIIIIIII", :SCORES_MEAN_LOCAL=>40.0}
91
+ |{:SCORES=>"!!!!!IIIIIIIIIIIIIII", :SCORES_MEAN_LOCAL=>0.0}
92
+ |{:SCORES=>"IIIIIIIIIIIIIII!!!!!", :SCORES_MEAN_LOCAL=>0.0}
93
+ EXP
94
+
95
+ assert_equal(expected, collect_result)
96
+ end
97
+
98
+ test 'BioDSL::Pipeline::MeanScores with local: true and :window_size ' \
99
+ 'returns correctly' do
100
+ @p.mean_scores(local: true, window_size: 10).
101
+ run(input: @input, output: @output2)
102
+
103
+ expected = <<-EXP.gsub(/^\s+\|/, '')
104
+ |{:SCORES=>"IIIIIIIIIIIIIIIIIIII", :SCORES_MEAN_LOCAL=>40.0}
105
+ |{:SCORES=>"!!!!!IIIIIIIIIIIIIII", :SCORES_MEAN_LOCAL=>20.0}
106
+ |{:SCORES=>"IIIIIIIIIIIIIII!!!!!", :SCORES_MEAN_LOCAL=>20.0}
107
+ EXP
108
+
109
+ assert_equal(expected, collect_result)
110
+ end
111
+ end
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for MergePairSeq.
34
+ class TestMergePairSeq < Test::Unit::TestCase
35
+ # rubocop:disable MethodLength
36
+ def setup
37
+ @input, @output = BioDSL::Stream.pipe
38
+ @input2, @output2 = BioDSL::Stream.pipe
39
+
40
+ [
41
+ {SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14',
42
+ SEQ: 'TGGGGAATATTGGACAATGG',
43
+ SEQ_LEN: 20,
44
+ SCORES: '<??????BDDDDDDDDGGGG'},
45
+ {SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14862:1868 2:N:0:14',
46
+ SEQ: 'CCTGTTTGCTACCCACGCTT',
47
+ SEQ_LEN: 20,
48
+ SCORES: '?????BB<-<BDDDDDFEEF'},
49
+ {SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14',
50
+ SEQ: 'TAGGGAATCTTGCACAATGG',
51
+ SEQ_LEN: 20,
52
+ SCORES: '<???9?BBBDBDDBDDFFFF'},
53
+ {SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:13906:2139 2:N:0:14',
54
+ SEQ: 'ACTCTTCGCTACCCATGCTT',
55
+ SEQ_LEN: 20,
56
+ SCORES: ',5<??BB?DDABDBDDFFFF'},
57
+ {SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14',
58
+ SEQ: 'TAGGGAATCTTGCACAATGG',
59
+ SEQ_LEN: 20,
60
+ SCORES: '?????BBBBBDDBDDBFFFF'},
61
+ {SEQ_NAME: 'M01168:16:000000000-A1R9L:1:1101:14865:2158 2:N:0:14',
62
+ SEQ: 'CCTCTTCGCTACCCATGCTT',
63
+ SEQ_LEN: 20,
64
+ SCORES: '??,<??B?BB?BBBBBFF?F'}
65
+ ].each do |record|
66
+ @output.write record
67
+ end
68
+
69
+ @output.close
70
+
71
+ @p = BioDSL::Pipeline.new
72
+ end
73
+
74
+ test 'BioDSL::Pipeline::MergePairSeq with invalid options raises' do
75
+ assert_raise(BioDSL::OptionError) { @p.merge_pair_seq(foo: 'bar') }
76
+ end
77
+
78
+ test 'BioDSL::Pipeline::MergePairSeq returns correctly' do
79
+ @p.merge_pair_seq.run(input: @input, output: @output2)
80
+
81
+ expected = <<-EXP.gsub(/^\s+\|/, '')
82
+ |{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14",
83
+ | :SEQ=>"TGGGGAATATTGGACAATGGCCTGTTTGCTACCCACGCTT",
84
+ | :SEQ_LEN=>40,
85
+ | :SCORES=>"<??????BDDDDDDDDGGGG?????BB<-<BDDDDDFEEF",
86
+ | :SEQ_LEN_LEFT=>20,
87
+ | :SEQ_LEN_RIGHT=>20}
88
+ |{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14",
89
+ | :SEQ=>"TAGGGAATCTTGCACAATGGACTCTTCGCTACCCATGCTT",
90
+ | :SEQ_LEN=>40,
91
+ | :SCORES=>"<???9?BBBDBDDBDDFFFF,5<??BB?DDABDBDDFFFF",
92
+ | :SEQ_LEN_LEFT=>20,
93
+ | :SEQ_LEN_RIGHT=>20}
94
+ |{:SEQ_NAME=>"M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14",
95
+ | :SEQ=>"TAGGGAATCTTGCACAATGGCCTCTTCGCTACCCATGCTT",
96
+ | :SEQ_LEN=>40,
97
+ | :SCORES=>"?????BBBBBDDBDDBFFFF??,<??B?BB?BBBBBFF?F",
98
+ | :SEQ_LEN_LEFT=>20,
99
+ | :SEQ_LEN_RIGHT=>20}
100
+ EXP
101
+
102
+ assert_equal(expected.delete("\n"), collect_result.delete("\n"))
103
+ end
104
+
105
+ test 'BioDSL::Pipeline::MergePairSeq status returns correctly' do
106
+ @p.merge_pair_seq.run(input: @input, output: @output2)
107
+
108
+ assert_equal(6, @p.status.first[:records_in])
109
+ assert_equal(3, @p.status.first[:records_out])
110
+ assert_equal(6, @p.status.first[:sequences_in])
111
+ assert_equal(3, @p.status.first[:sequences_out])
112
+ assert_equal(120, @p.status.first[:residues_in])
113
+ assert_equal(120, @p.status.first[:residues_out])
114
+ end
115
+ end
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for MergeTable.
34
+ class TestMergeTable < Test::Unit::TestCase
35
+ def setup
36
+ @tmpdir = Dir.mktmpdir('BioDSL')
37
+
38
+ @file = File.join(@tmpdir, 'test.tab')
39
+
40
+ setup_data_file
41
+
42
+ @input, @output = BioDSL::Stream.pipe
43
+ @input2, @output2 = BioDSL::Stream.pipe
44
+
45
+ @output.write(ID: 1, COUNT: 5423)
46
+ @output.write(ID: 2, COUNT: 34)
47
+ @output.write(ID: 3, COUNT: 2423)
48
+ @output.write(ID: 4, COUNT: 234)
49
+ @output.write(ID: 5, COUNT: 2334)
50
+
51
+ @output.close
52
+
53
+ @p = BioDSL::Pipeline.new
54
+ end
55
+
56
+ def setup_data_file
57
+ data = <<-EOF.gsub(/^\s+\|/, '')
58
+ |#ID ORGANISM
59
+ |1 parrot
60
+ |2 eel
61
+ |3 platypus
62
+ |4 beetle
63
+ EOF
64
+
65
+ File.open(@file, 'w') do |ios|
66
+ ios.puts data
67
+ end
68
+ end
69
+
70
+ def teardown
71
+ FileUtils.rm_r @tmpdir
72
+ end
73
+
74
+ test 'BioDSL::Pipeline::MergeTable with invalid options raises' do
75
+ assert_raise(BioDSL::OptionError) { @p.merge_table(foo: 'bar') }
76
+ end
77
+
78
+ test 'BioDSL::Pipeline::MergeTable without required options raises' do
79
+ assert_raise(BioDSL::OptionError) { @p.merge_table }
80
+ end
81
+
82
+ test 'BioDSL::Pipeline::MergeTable with non-existing input file raises' do
83
+ assert_raise(BioDSL::OptionError) do
84
+ @p.merge_table(input: '___adsf', key: :ID)
85
+ end
86
+ end
87
+
88
+ test 'BioDSL::Pipeline::MergeTable with bad skip value file raises' do
89
+ assert_raise(BioDSL::OptionError) do
90
+ @p.merge_table(input: @file, key: :ID, skip: -1)
91
+ end
92
+ end
93
+
94
+ test 'BioDSL::Pipeline::MergeTable with duplicate keys raises' do
95
+ assert_raise(BioDSL::OptionError) do
96
+ @p.merge_table(input: @file, key: :ID, keys: [:a, :a])
97
+ end
98
+ end
99
+
100
+ test 'BioDSL::Pipeline::MergeTable with duplicate columns raises' do
101
+ assert_raise(BioDSL::OptionError) do
102
+ @p.merge_table(input: @file, key: :ID, columns: [1, 1])
103
+ end
104
+ end
105
+
106
+ test 'BioDSL::Pipeline::MergeTable returns correctly' do
107
+ @p.merge_table(input: @file, key: :ID).run(input: @input, output: @output2)
108
+
109
+ expected = <<-EXP.gsub(/^\s+\|/, '')
110
+ |{:ID=>1, :COUNT=>5423, :ORGANISM=>"parrot"}
111
+ |{:ID=>2, :COUNT=>34, :ORGANISM=>"eel"}
112
+ |{:ID=>3, :COUNT=>2423, :ORGANISM=>"platypus"}
113
+ |{:ID=>4, :COUNT=>234, :ORGANISM=>"beetle"}
114
+ |{:ID=>5, :COUNT=>2334}
115
+ EXP
116
+
117
+ assert_equal(expected, collect_result)
118
+ end
119
+
120
+ test 'BioDSL::Pipeline::MergeTable status returns correctly' do
121
+ @p.merge_table(input: @file, key: :ID).run(input: @input, output: @output2)
122
+
123
+ assert_equal(5, @p.status.first[:records_in])
124
+ assert_equal(5, @p.status.first[:records_out])
125
+ assert_equal(4, @p.status.first[:merged])
126
+ assert_equal(1, @p.status.first[:non_merged])
127
+ assert_equal(4, @p.status.first[:rows_matched])
128
+ assert_equal(0, @p.status.first[:rows_unmatched])
129
+ assert_equal(4, @p.status.first[:rows_total])
130
+ end
131
+ end
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for MergeValues.
34
+ class TestMergeValues < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ @output.write(ID: 'FOO', COUNT: 10, SEQ: 'gataag')
40
+ @output.write(ID: 'FOO', SEQ: 'gataag')
41
+ @output.close
42
+
43
+ @p = BioDSL::Pipeline.new
44
+ end
45
+
46
+ test 'BioDSL::Pipeline::MergeValues with invalid options raises' do
47
+ assert_raise(BioDSL::OptionError) { @p.merge_values(foo: 'bar') }
48
+ end
49
+
50
+ test 'BioDSL::Pipeline::MergeValues with valid options don\'t raise' do
51
+ assert_nothing_raised { @p.merge_values(keys: [:ID]) }
52
+ end
53
+
54
+ test 'BioDSL::Pipeline::MergeValues returns correctly' do
55
+ @p.merge_values(keys: [:COUNT, :ID]).run(input: @input, output: @output2)
56
+
57
+ expected = <<-EXP.gsub(/^\s+\|/, '')
58
+ |{:ID=>"FOO", :COUNT=>"10_FOO", :SEQ=>"gataag"}
59
+ |{:ID=>"FOO", :SEQ=>"gataag"}
60
+ EXP
61
+
62
+ assert_equal(expected, collect_result)
63
+ end
64
+
65
+ test 'BioDSL::Pipeline::MergeValues status returns correctly' do
66
+ @p.merge_values(keys: [:COUNT, :ID]).run(input: @input, output: @output2)
67
+
68
+ assert_equal(2, @p.status.first[:records_in])
69
+ assert_equal(2, @p.status.first[:records_out])
70
+ end
71
+
72
+ test 'BioDSL::Pipeline::MergeValues with :delimiter returns correctly' do
73
+ @p.merge_values(keys: [:ID, :COUNT], delimiter: ':count=').
74
+ run(input: @input, output: @output2)
75
+
76
+ expected = <<-EXP.gsub(/^\s+\|/, '')
77
+ |{:ID=>"FOO:count=10", :COUNT=>10, :SEQ=>"gataag"}
78
+ |{:ID=>"FOO", :SEQ=>"gataag"}
79
+ EXP
80
+
81
+ assert_equal(expected, collect_result)
82
+ end
83
+ end