BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for Uclust.
34
+ class TestUclust < Test::Unit::TestCase
35
+ def setup
36
+ omit('usearch not found') unless BioDSL::Filesys.which('usearch')
37
+ end
38
+
39
+ test 'BioDSL::Pipeline#uclust with disallowed option raises' do
40
+ p = BioDSL::Pipeline.new
41
+ assert_raise(BioDSL::OptionError) { p.uclust(foo: 'bar') }
42
+ end
43
+
44
+ test 'BioDSL::Pipeline#uclust with allowed option dont raise' do
45
+ p = BioDSL::Pipeline.new
46
+ assert_nothing_raised { p.uclust(identity: 1, strand: :both) }
47
+ end
48
+
49
+ test 'BioDSL::Pipeline#uclust outputs correctly' do
50
+ input, output = BioDSL::Stream.pipe
51
+ @input2, output2 = BioDSL::Stream.pipe
52
+
53
+ output.write(one: 1, two: 2, three: 3)
54
+ output.write(SEQ: 'gtgtgtagctacgatcagctagcgatcgagctatatgttt')
55
+ output.write(SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
56
+ output.close
57
+
58
+ p = BioDSL::Pipeline.new
59
+ p.uclust(identity: 0.97, strand: 'plus').run(input: input, output: output2)
60
+
61
+ expected = <<-EXP.gsub(/^\s+\|/, '')
62
+ |{:SEQ=>"atcgatcgatcgatcgatcgatcgatcgtacgacgtagct",
63
+ | :SEQ_NAME=>"2",
64
+ | :TYPE=>"C",
65
+ | :CLUSTER=>1,
66
+ | :CLUSTER_SIZE=>1,
67
+ | :STRAND=>"*",
68
+ | :CIGAR=>"*",
69
+ | :Q_ID=>"2",
70
+ | :RECORD_TYPE=>"uclust"}
71
+ |{:SEQ=>"gtgtgtagctacgatcagctagcgatcgagctatatgttt",
72
+ | :SEQ_NAME=>"1",
73
+ | :TYPE=>"C",
74
+ | :CLUSTER=>0,
75
+ | :CLUSTER_SIZE=>1,
76
+ | :STRAND=>"*",
77
+ | :CIGAR=>"*",
78
+ | :Q_ID=>"1",
79
+ | :RECORD_TYPE=>"uclust"}
80
+ |{:one=>1,
81
+ | :two=>2,
82
+ | :three=>3}
83
+ EXP
84
+
85
+ assert_equal(expected.delete("\n"), collect_sorted_result.delete("\n"))
86
+ end
87
+
88
+ test 'BioDSL::Pipeline#uclust status outputs correctly' do
89
+ input, output = BioDSL::Stream.pipe
90
+ @input2, output2 = BioDSL::Stream.pipe
91
+
92
+ output.write(one: 1, two: 2, three: 3)
93
+ output.write(SEQ: 'gtgtgtagctacgatcagctagcgatcgagctatatgttt')
94
+ output.write(SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
95
+ output.close
96
+
97
+ p = BioDSL::Pipeline.new
98
+ p.uclust(identity: 0.97, strand: 'plus').run(input: input, output: output2)
99
+
100
+ assert_equal(3, p.status.first[:records_in])
101
+ assert_equal(3, p.status.first[:records_out])
102
+ assert_equal(2, p.status.first[:sequences_in])
103
+ assert_equal(2, p.status.first[:sequences_out])
104
+ assert_equal(80, p.status.first[:residues_in])
105
+ assert_equal(80, p.status.first[:residues_out])
106
+ end
107
+
108
+ test 'BioDSL::Pipeline#uclust outputs msa correctly' do
109
+ input, output = BioDSL::Stream.pipe
110
+ @input2, output2 = BioDSL::Stream.pipe
111
+
112
+ output.write(one: 1, two: 2, three: 3)
113
+ output.write(SEQ: 'gtgtgtagctacgatcagctagcgatcgagctatatgttt')
114
+ output.write(SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
115
+ output.close
116
+
117
+ p = BioDSL::Pipeline.new
118
+ p.uclust(identity: 0.97, strand: 'plus', align: true).
119
+ run(input: input, output: output2)
120
+
121
+ expected = <<-EXP.gsub(/^\s+\|/, '')
122
+ |{:RECORD_TYPE=>"uclust",
123
+ | :CLUSTER=>0,
124
+ | :SEQ_NAME=>"*1",
125
+ | :SEQ=>"GTgtgtAGCTACGATCAGCTAGCGATCGAGCTATATGTTT",
126
+ | :SEQ_LEN=>40}
127
+ |{:RECORD_TYPE=>"uclust",
128
+ | :CLUSTER=>1,
129
+ | :SEQ_NAME=>"*2",
130
+ | :SEQ=>"ATCGATCGATCGATCGATCGATCGATCGTACGACGTAGCT",
131
+ | :SEQ_LEN=>40}
132
+ |{:one=>1,
133
+ | :two=>2,
134
+ | :three=>3}
135
+ EXP
136
+
137
+ assert_equal(expected.delete("\n"), collect_sorted_result.delete("\n"))
138
+ end
139
+ end
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for UniqueValues.
34
+ class TestUniqueValues < Test::Unit::TestCase
35
+ def setup
36
+ @input, @output = BioDSL::Stream.pipe
37
+ @input2, @output2 = BioDSL::Stream.pipe
38
+
39
+ [{V0: 'HUMAN', V1: 'H1'},
40
+ {V0: 'HUMAN', V1: 'H2'},
41
+ {V0: 'HUMAN', V1: 'H3'},
42
+ {V0: 'DOG', V1: 'D1'},
43
+ {V0: 'DOG', V1: 'D2'},
44
+ {V0: 'MOUSE', V1: 'M1'},
45
+ {FOO: 'BAR'}
46
+ ].each do |record|
47
+ @output.write record
48
+ end
49
+
50
+ @output.close
51
+
52
+ @p = BioDSL::Pipeline.new
53
+ end
54
+
55
+ test 'BioDSL::Pipeline#unique_values with disallowed option raises' do
56
+ assert_raise(BioDSL::OptionError) do
57
+ @p.unique_values(key: :V0, foo: 'bar')
58
+ end
59
+ end
60
+
61
+ test 'BioDSL::Pipeline#unique_values with allowed options dont raise' do
62
+ assert_nothing_raised { @p.unique_values(key: :V0) }
63
+ end
64
+
65
+ test 'BioDSL::Pipeline#unique_values returns correctly' do
66
+ @p.unique_values(key: 'V0').run(input: @input, output: @output2)
67
+
68
+ expected = <<-EXP.gsub(/^\s+\|/, '')
69
+ |{:V0=>"HUMAN", :V1=>"H1"}
70
+ |{:V0=>"DOG", :V1=>"D1"}
71
+ |{:V0=>"MOUSE", :V1=>"M1"}
72
+ |{:FOO=>"BAR"}
73
+ EXP
74
+
75
+ assert_equal(expected, collect_result)
76
+ end
77
+
78
+ test 'BioDSL::Pipeline#unique_values status returns correctly' do
79
+ @p.unique_values(key: 'V0').run(input: @input, output: @output2)
80
+
81
+ assert_equal(7, @p.status.first[:records_in])
82
+ assert_equal(4, @p.status.first[:records_out])
83
+ end
84
+
85
+ test 'BioDSL::Pipeline#unique_values with :invert returns correctly' do
86
+ @p.unique_values(key: 'V0', invert: true).
87
+ run(input: @input, output: @output2)
88
+
89
+ expected = <<-EXP.gsub(/^\s+\|/, '')
90
+ |{:V0=>"HUMAN", :V1=>"H2"}
91
+ |{:V0=>"HUMAN", :V1=>"H3"}
92
+ |{:V0=>"DOG", :V1=>"D2"}
93
+ |{:FOO=>"BAR"}
94
+ EXP
95
+
96
+ assert_equal(expected, collect_result)
97
+ end
98
+ end
@@ -0,0 +1,123 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for UsearchGlobal.
34
+ class TestUsearchGlobal < Test::Unit::TestCase
35
+ require 'tempfile'
36
+
37
+ def setup
38
+ omit('usearch not found') unless BioDSL::Filesys.which('usearch')
39
+
40
+ data = <<-DAT.gsub(/^\s+\|/, '')
41
+ |>test1
42
+ |gtgtgtagctacgatcagctagcgatcgagctatatgttt
43
+ DAT
44
+
45
+ @db = Tempfile.new('database')
46
+
47
+ File.open(@db, 'w') do |ios|
48
+ ios << data
49
+ end
50
+ end
51
+
52
+ def teardown
53
+ @db.close
54
+ @db.unlink
55
+ end
56
+
57
+ test 'BioDSL::Pipeline#usearch_global with disallowed option raises' do
58
+ p = BioDSL::Pipeline.new
59
+ assert_raise(BioDSL::OptionError) { p.usearch_global(foo: 'bar') }
60
+ end
61
+
62
+ test 'BioDSL::Pipeline#usearch_global with allowed option dont raise' do
63
+ p = BioDSL::Pipeline.new
64
+ assert_nothing_raised { p.usearch_global(database: @db.path, identity: 1) }
65
+ end
66
+
67
+ test 'BioDSL::Pipeline#usearch_global outputs correctly' do
68
+ input, output = BioDSL::Stream.pipe
69
+ @input2, output2 = BioDSL::Stream.pipe
70
+
71
+ output.write(one: 1, two: 2, three: 3)
72
+ output.write(SEQ: 'gtgtgtagctacgatcagctagcgatcgagctatatgttt')
73
+ output.write(SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
74
+ output.close
75
+
76
+ p = BioDSL::Pipeline.new
77
+ p.usearch_global(database: @db.path, identity: 0.97, strand: 'plus').
78
+ run(input: input, output: output2)
79
+
80
+ expected = <<-EXP.gsub(/^\s+\|/, '')
81
+ |{:SEQ=>"atcgatcgatcgatcgatcgatcgatcgtacgacgtagct"}
82
+ |{:SEQ=>"gtgtgtagctacgatcagctagcgatcgagctatatgttt"}
83
+ |{:TYPE=>"H",
84
+ | :CLUSTER=>0,
85
+ | :SEQ_LEN=>40,
86
+ | :IDENT=>100.0,
87
+ | :STRAND=>"+",
88
+ | :CIGAR=>"40M",
89
+ | :Q_ID=>"1",
90
+ | :S_ID=>"test1",
91
+ | :RECORD_TYPE=>"usearch"}
92
+ |{:TYPE=>"N",
93
+ | :CLUSTER=>0,
94
+ | :SEQ_LEN=>0,
95
+ | :STRAND=>".",
96
+ | :CIGAR=>"*",
97
+ | :Q_ID=>"2",
98
+ | :RECORD_TYPE=>"usearch"}
99
+ |{:one=>1, :two=>2, :three=>3}
100
+ EXP
101
+
102
+ assert_equal(expected.delete("\n"), collect_sorted_result.delete("\n"))
103
+ end
104
+
105
+ test 'BioDSL::Pipeline#usearch_global status outputs correctly' do
106
+ input, output = BioDSL::Stream.pipe
107
+ @input2, output2 = BioDSL::Stream.pipe
108
+
109
+ output.write(one: 1, two: 2, three: 3)
110
+ output.write(SEQ: 'gtgtgtagctacgatcagctagcgatcgagctatatgttt')
111
+ output.write(SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
112
+ output.close
113
+
114
+ p = BioDSL::Pipeline.new
115
+ p.usearch_global(database: @db.path, identity: 0.97, strand: 'plus').
116
+ run(input: input, output: output2)
117
+
118
+ assert_equal(3, p.status.first[:records_in])
119
+ assert_equal(5, p.status.first[:records_out])
120
+ assert_equal(2, p.status.first[:sequences_in])
121
+ assert_equal(2, p.status.first[:hits_out])
122
+ end
123
+ end
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for UsearchLocal.
34
+ class TestUsearchLocal < Test::Unit::TestCase
35
+ require 'tempfile'
36
+
37
+ def setup
38
+ omit('usearch not found') unless BioDSL::Filesys.which('usearch')
39
+
40
+ data = <<-DAT.gsub(/^\s+\|/, '')
41
+ |>test1
42
+ |gtgtgtagctacgatcagctagcgatcgagctatatgttt
43
+ DAT
44
+
45
+ @db = Tempfile.new('database')
46
+
47
+ File.open(@db, 'w') do |ios|
48
+ ios << data
49
+ end
50
+ end
51
+
52
+ def teardown
53
+ @db.close
54
+ @db.unlink
55
+ end
56
+
57
+ test 'BioDSL::Pipeline#usearch_local with disallowed option raises' do
58
+ p = BioDSL::Pipeline.new
59
+ assert_raise(BioDSL::OptionError) { p.usearch_local(foo: 'bar') }
60
+ end
61
+
62
+ test 'BioDSL::Pipeline#usearch_local with allowed option dont raise' do
63
+ p = BioDSL::Pipeline.new
64
+ assert_nothing_raised { p.usearch_local(database: @db.path, identity: 1) }
65
+ end
66
+
67
+ test 'BioDSL::Pipeline#usearch_local outputs correctly' do
68
+ input, output = BioDSL::Stream.pipe
69
+ @input2, output2 = BioDSL::Stream.pipe
70
+
71
+ output.write(one: 1, two: 2, three: 3)
72
+ output.write(SEQ: 'gtgtgtagctacgatcagctagcgatcgagctatatgttt')
73
+ output.write(SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
74
+ output.close
75
+
76
+ p = BioDSL::Pipeline.new
77
+ p.usearch_local(database: @db.path, identity: 0.97, strand: 'plus').
78
+ run(input: input, output: output2)
79
+
80
+ expected = <<-EXP.gsub(/^\s+\|/, '')
81
+ |{:SEQ=>"atcgatcgatcgatcgatcgatcgatcgtacgacgtagct"}
82
+ |{:SEQ=>"gtgtgtagctacgatcagctagcgatcgagctatatgttt"}
83
+ |{:TYPE=>"H",
84
+ | :CLUSTER=>0,
85
+ | :SEQ_LEN=>40,
86
+ | :IDENT=>100.0,
87
+ | :STRAND=>"+",
88
+ | :CIGAR=>"40M",
89
+ | :Q_ID=>"1",
90
+ | :S_ID=>"test1",
91
+ | :RECORD_TYPE=>"usearch"}
92
+ |{:TYPE=>"N",
93
+ | :CLUSTER=>0,
94
+ | :SEQ_LEN=>0,
95
+ | :STRAND=>".",
96
+ | :CIGAR=>"*",
97
+ | :Q_ID=>"2",
98
+ | :RECORD_TYPE=>"usearch"}
99
+ |{:one=>1,
100
+ | :two=>2,
101
+ | :three=>3}
102
+ EXP
103
+
104
+ assert_equal(expected.delete("\n"), collect_sorted_result.delete("\n"))
105
+ end
106
+
107
+ test 'BioDSL::Pipeline#usearch_local status outputs correctly' do
108
+ input, output = BioDSL::Stream.pipe
109
+ @input2, output2 = BioDSL::Stream.pipe
110
+
111
+ output.write(one: 1, two: 2, three: 3)
112
+ output.write(SEQ: 'gtgtgtagctacgatcagctagcgatcgagctatatgttt')
113
+ output.write(SEQ: 'atcgatcgatcgatcgatcgatcgatcgtacgacgtagct')
114
+ output.close
115
+
116
+ p = BioDSL::Pipeline.new
117
+ p.usearch_local(database: @db.path, identity: 0.97, strand: 'plus').
118
+ run(input: input, output: output2)
119
+
120
+ assert_equal(3, p.status.first[:records_in])
121
+ assert_equal(5, p.status.first[:records_out])
122
+ assert_equal(2, p.status.first[:sequences_in])
123
+ assert_equal(2, p.status.first[:hits_out])
124
+ end
125
+ end
@@ -0,0 +1,159 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
21
+ # USA. #
22
+ # #
23
+ # http://www.gnu.org/copyleft/gpl.html #
24
+ # #
25
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
26
+ # #
27
+ # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
28
+ # #
29
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
30
+
31
+ require 'test/helper'
32
+
33
+ # Test class for WriteFasta.
34
+ class TestWriteFasta < Test::Unit::TestCase
35
+ def setup
36
+ @zcat = BioDSL::Filesys.which('gzcat') ||
37
+ BioDSL::Filesys.which('zcat')
38
+
39
+ init_test_files
40
+ init_data_streams
41
+
42
+ @expected = <<-EXP.gsub(/^\s+\|/, '')
43
+ |>test1
44
+ |atcg
45
+ |>test2
46
+ |gtac
47
+ EXP
48
+
49
+ @p = BioDSL::Pipeline.new
50
+ @e = BioDSL::OptionError
51
+ end
52
+
53
+ def init_test_files
54
+ @tmpdir = Dir.mktmpdir('BioDSL')
55
+ @file = File.join(@tmpdir, 'test.fna')
56
+ @file2 = File.join(@tmpdir, 'test.fna')
57
+ end
58
+
59
+ def init_data_streams
60
+ @input, @output = BioDSL::Stream.pipe
61
+ @input2, @output2 = BioDSL::Stream.pipe
62
+
63
+ @output.write(SEQ_NAME: 'test1', SEQ: 'atcg', SEQ_LEN: 4)
64
+ @output.write(SEQ_NAME: 'test2', SEQ: 'gtac', SEQ_LEN: 4)
65
+ @output.close
66
+ end
67
+
68
+ def teardown
69
+ FileUtils.rm_r @tmpdir
70
+ end
71
+
72
+ test 'BioDSL::Pipeline::WriteFasta with invalid options raises' do
73
+ assert_raise(@e) { @p.write_fasta(foo: 'bar') }
74
+ end
75
+
76
+ test 'BioDSL::Pipeline::WriteFasta to stdout outputs correctly' do
77
+ result = capture_stdout { @p.write_fasta.run(input: @input) }
78
+ assert_equal(@expected, result)
79
+ end
80
+
81
+ test 'BioDSL::Pipeline::WriteFasta status outputs correctly' do
82
+ capture_stdout { @p.write_fasta.run(input: @input) }
83
+ assert_equal(2, @p.status.first[:records_in])
84
+ assert_equal(2, @p.status.first[:records_out])
85
+ assert_equal(2, @p.status.first[:sequences_in])
86
+ assert_equal(2, @p.status.first[:sequences_out])
87
+ assert_equal(8, @p.status.first[:residues_in])
88
+ assert_equal(8, @p.status.first[:residues_out])
89
+ end
90
+
91
+ test 'BioDSL::Pipeline::WriteFasta with :wrap outputs correctly' do
92
+ result = capture_stdout { @p.write_fasta(wrap: 2).run(input: @input) }
93
+
94
+ expected = <<-EXP.gsub(/^\s+\|/, '')
95
+ |>test1
96
+ |at
97
+ |cg
98
+ |>test2
99
+ |gt
100
+ |ac
101
+ EXP
102
+
103
+ assert_equal(expected, result)
104
+ end
105
+
106
+ test 'BioDSL::Pipeline::WriteFasta to file outputs correctly' do
107
+ @p.write_fasta(output: @file).run(input: @input, output: @output2)
108
+
109
+ assert_equal(@expected, File.read(@file))
110
+ end
111
+
112
+ test 'BioDSL::Pipeline::WriteFasta to existing file raises' do
113
+ `touch #{@file}`
114
+ assert_raise(@e) { @p.write_fasta(output: @file) }
115
+ end
116
+
117
+ test 'BioDSL::Pipeline::WriteFasta to file with :force outputs OK' do
118
+ `touch #{@file}`
119
+ @p.write_fasta(output: @file, force: true).run(input: @input)
120
+
121
+ assert_equal(@expected, File.open(@file).read)
122
+ end
123
+
124
+ test 'BioDSL::Pipeline::WriteFasta with gzipdata and w/o file raises' do
125
+ assert_raise(@e) { @p.write_fasta(gzip: true) }
126
+ end
127
+
128
+ test 'BioDSL::Pipeline::WriteFasta with bzip2 data w/o file raises' do
129
+ assert_raise(@e) { @p.write_fasta(bzip2: true) }
130
+ end
131
+
132
+ test 'BioDSL::Pipeline::WriteFasta to file outputs gzipped data OK' do
133
+ @p.write_fasta(output: @file, gzip: true).run(input: @input)
134
+
135
+ assert_equal(@expected, `#{@zcat} #{@file}`)
136
+ end
137
+
138
+ test 'BioDSL::Pipeline::WriteFasta to file outputs bzip2\'ed data OK' do
139
+ @p.write_fasta(output: @file, bzip2: true).run(input: @input)
140
+
141
+ assert_equal(@expected, `bzcat #{@file}`)
142
+ end
143
+
144
+ test 'BioDSL::Pipeline::WriteFasta with gzip and bzip2 output raises' do
145
+ assert_raise(@e) { @p.write_fasta(output: @file, gzip: true, bzip2: true) }
146
+ end
147
+
148
+ test 'BioDSL::Pipeline::WriteFasta with flux outputs correctly' do
149
+ @p.write_fasta(output: @file).run(input: @input, output: @output2)
150
+
151
+ expected2 = <<-EXP.gsub(/^\s+\|/, '')
152
+ |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4}
153
+ |{:SEQ_NAME=>"test2", :SEQ=>"gtac", :SEQ_LEN=>4}
154
+ EXP
155
+
156
+ assert_equal(@expected, File.open(@file).read)
157
+ assert_equal(expected2, collect_result)
158
+ end
159
+ end