BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,514 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class StringIO
33
+ def get_entry
34
+ self.gets
35
+ end
36
+ end
37
+
38
+ class TestCSV < Test::Unit::TestCase
39
+ require 'stringio'
40
+ require 'tempfile'
41
+
42
+ def setup
43
+ table = <<END
44
+
45
+ #Organism Sequence Count
46
+ Human ATACGTCAG 23524
47
+ Dog AGCATGAC 2442
48
+ Mouse GACTG 234
49
+ Cat AAATGCA 2342
50
+
51
+ END
52
+
53
+ table2 = <<END
54
+
55
+ #Organism;Sequence;Count
56
+ Human;ATACGTCAG;23524
57
+ Dog;AGCATGAC;2442
58
+ Mouse;GACTG;234
59
+ Cat;AAATGCA;2342
60
+
61
+ END
62
+
63
+ table3 = <<END
64
+
65
+ Human ATACGTCAG 5.24
66
+ Dog AGCATGAC 4.2
67
+ Mouse GACTG 3.4
68
+ Cat AAATGCA 3.42
69
+
70
+ END
71
+
72
+ io = StringIO.new(table)
73
+ @csv = BioDSL::CSV.new(io)
74
+
75
+ @table = table
76
+ @table2 = table2
77
+ @table3 = table3
78
+
79
+ @file = Tempfile.new('foo')
80
+ end
81
+
82
+ def teardown
83
+ @file.close
84
+ @file.unlink
85
+ end
86
+
87
+ test "CSV#skip returns correctly" do
88
+ @csv.skip(3)
89
+
90
+ result = []
91
+ @csv.each_array { |array| result << array }
92
+
93
+ expected = [["Mouse", "GACTG", 234],
94
+ ["Cat", "AAATGCA", 2342]]
95
+
96
+ assert_equal(expected, result)
97
+ end
98
+
99
+ test "CSV.read_array returns correctly" do
100
+ @file.write(@table)
101
+ @file.rewind
102
+ result = BioDSL::CSV.read_array(@file.path)
103
+ expected = [["Human", "ATACGTCAG", 23524],
104
+ ["Dog", "AGCATGAC", 2442],
105
+ ["Mouse", "GACTG", 234],
106
+ ["Cat", "AAATGCA", 2342]]
107
+
108
+ assert_equal(expected, result)
109
+ end
110
+
111
+ test "CSV.read_array with floats returns correctly" do
112
+ @file.write(@table3)
113
+ @file.rewind
114
+ result = BioDSL::CSV.read_array(@file.path)
115
+ expected = [["Human", "ATACGTCAG", 5.24],
116
+ ["Dog", "AGCATGAC", 4.2],
117
+ ["Mouse", "GACTG", 3.4],
118
+ ["Cat", "AAATGCA", 3.42]]
119
+
120
+ assert_equal(expected, result)
121
+ end
122
+
123
+ test "CSV.read_array with include_header: true returns correctly" do
124
+ @file.write(@table)
125
+ @file.rewind
126
+ result = BioDSL::CSV.read_array(@file.path, include_header: true)
127
+ expected = [["Organism", "Sequence", "Count"],
128
+ ["Human", "ATACGTCAG", 23524],
129
+ ["Dog", "AGCATGAC", 2442],
130
+ ["Mouse", "GACTG", 234],
131
+ ["Cat", "AAATGCA", 2342]]
132
+
133
+ assert_equal(expected, result)
134
+ end
135
+
136
+ test "CSV.read_array with :delimiter returns correctly" do
137
+ @file.write(@table2)
138
+ @file.rewind
139
+ result = BioDSL::CSV.read_array(@file.path, delimiter: ";")
140
+ expected = [["Human", "ATACGTCAG", 23524],
141
+ ["Dog", "AGCATGAC", 2442],
142
+ ["Mouse", "GACTG", 234],
143
+ ["Cat", "AAATGCA", 2342]]
144
+
145
+ assert_equal(expected, result)
146
+ end
147
+
148
+ test "CSV.read_array with :delimiter and :include_header returns correctly" do
149
+ @file.write(@table2)
150
+ @file.rewind
151
+ result = BioDSL::CSV.read_array(@file.path, delimiter: ";", include_header: true)
152
+ expected = [["Organism", "Sequence", "Count"],
153
+ ["Human", "ATACGTCAG", 23524],
154
+ ["Dog", "AGCATGAC", 2442],
155
+ ["Mouse", "GACTG", 234],
156
+ ["Cat", "AAATGCA", 2342]]
157
+
158
+ assert_equal(expected, result)
159
+ end
160
+
161
+ test "CSV.read_array with :select and out-of-bounds numerical value raises" do
162
+ @file.write(@table)
163
+ @file.rewind
164
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_array(@file.path, select: [3]) }
165
+ end
166
+
167
+ test "CSV.read_array with :select of numerical values return correctly" do
168
+ @file.write(@table)
169
+ @file.rewind
170
+ result = BioDSL::CSV.read_array(@file.path, select: [2, 0])
171
+ expected = [[23524, "Human"],
172
+ [2442, "Dog"],
173
+ [234, "Mouse"],
174
+ [2342, "Cat"]]
175
+
176
+ assert_equal(expected, result)
177
+ end
178
+
179
+ test "CSV.read_array with :select of numerical values and :include_header returns correctly" do
180
+ @file.write(@table)
181
+ @file.rewind
182
+ result = BioDSL::CSV.read_array(@file.path, select: [2, 0], include_header: true)
183
+ expected = [["Count", "Organism"],
184
+ [23524, "Human"],
185
+ [2442, "Dog"],
186
+ [234, "Mouse"],
187
+ [2342, "Cat"]]
188
+
189
+ assert_equal(expected, result)
190
+ end
191
+
192
+ test "CSV.read_array with :select and out-of-bounds range raises" do
193
+ @file.write(@table)
194
+ @file.rewind
195
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_array(@file.path, select: 1 .. 3) }
196
+ end
197
+
198
+ test "CSV.read_array with :select of range return correctly" do
199
+ @file.write(@table)
200
+ @file.rewind
201
+ result = BioDSL::CSV.read_array(@file.path, select: 0 .. 1)
202
+ expected = [["Human", "ATACGTCAG"],
203
+ ["Dog", "AGCATGAC"],
204
+ ["Mouse", "GACTG"],
205
+ ["Cat", "AAATGCA"]]
206
+
207
+ assert_equal(expected, result)
208
+ end
209
+
210
+ test "CSV.read_array with :select of range and :include_header returns correctly" do
211
+ @file.write(@table)
212
+ @file.rewind
213
+ result = BioDSL::CSV.read_array(@file.path, select: 0 .. 1, include_header: true)
214
+ expected = [["Organism", "Sequence"],
215
+ ["Human", "ATACGTCAG"],
216
+ ["Dog", "AGCATGAC"],
217
+ ["Mouse", "GACTG"],
218
+ ["Cat", "AAATGCA"]]
219
+
220
+ assert_equal(expected, result)
221
+ end
222
+
223
+ test "CSV.read_array with :select of non-numerical values and no header raises" do
224
+ @file.write(@table3)
225
+ @file.rewind
226
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_array(@file.path, select: ["Organism"]) }
227
+ end
228
+
229
+ test "CSV.read_array with :select of non-numerical values not matching header raises" do
230
+ @file.write(@table)
231
+ @file.rewind
232
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_array(@file.path, select: ["ount"]) }
233
+ end
234
+
235
+ test "CSV.read_array with :select of non-numerical values returns correctly" do
236
+ @file.write(@table)
237
+ @file.rewind
238
+ result = BioDSL::CSV.read_array(@file.path, select: ["Count", :Organism])
239
+ expected = [[23524, "Human"],
240
+ [2442, "Dog"],
241
+ [234, "Mouse"],
242
+ [2342, "Cat"]]
243
+
244
+ assert_equal(expected, result)
245
+ end
246
+
247
+ test "CSV.read_array with :select of non-numerical values and :include_header returns correctly" do
248
+ @file.write(@table)
249
+ @file.rewind
250
+ result = BioDSL::CSV.read_array(@file.path, select: ["Count", :Organism], include_header: true)
251
+ expected = [["Count", "Organism"],
252
+ [23524, "Human"],
253
+ [2442, "Dog"],
254
+ [234, "Mouse"],
255
+ [2342, "Cat"]]
256
+
257
+ assert_equal(expected, result)
258
+ end
259
+
260
+ test "CSV.read_array with :reject and out-of-bounds numerical value raises" do
261
+ @file.write(@table)
262
+ @file.rewind
263
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_array(@file.path, reject: [3]) }
264
+ end
265
+
266
+ test "CSV.read_array with :reject of numerical values return correctly" do
267
+ @file.write(@table)
268
+ @file.rewind
269
+ result = BioDSL::CSV.read_array(@file.path, reject: [2, 0])
270
+ expected = [["ATACGTCAG"],
271
+ ["AGCATGAC"],
272
+ ["GACTG"],
273
+ ["AAATGCA"]]
274
+
275
+ assert_equal(expected, result)
276
+ end
277
+
278
+ test "CSV.read_array with :reject of numerical values and :include_header returns correctly" do
279
+ @file.write(@table)
280
+ @file.rewind
281
+ result = BioDSL::CSV.read_array(@file.path, reject: [2, 0], include_header: true)
282
+ expected = [["Sequence"],
283
+ ["ATACGTCAG"],
284
+ ["AGCATGAC"],
285
+ ["GACTG"],
286
+ ["AAATGCA"]]
287
+
288
+ assert_equal(expected, result)
289
+ end
290
+
291
+ test "CSV.read_array with :reject and out-of-bounds range raises" do
292
+ @file.write(@table)
293
+ @file.rewind
294
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_array(@file.path, reject: 1 .. 3) }
295
+ end
296
+
297
+ test "CSV.read_array with :reject of range return correctly" do
298
+ @file.write(@table)
299
+ @file.rewind
300
+ result = BioDSL::CSV.read_array(@file.path, reject: 0 .. 1)
301
+ expected = [[23524],
302
+ [2442],
303
+ [234],
304
+ [2342]]
305
+
306
+ assert_equal(expected, result)
307
+ end
308
+
309
+ test "CSV.read_array with :reject of range and :include_header returns correctly" do
310
+ @file.write(@table)
311
+ @file.rewind
312
+ result = BioDSL::CSV.read_array(@file.path, reject: 0 .. 1, include_header: true)
313
+ expected = [["Count"],
314
+ [23524],
315
+ [2442],
316
+ [234],
317
+ [2342]]
318
+
319
+ assert_equal(expected, result)
320
+ end
321
+
322
+ test "CSV.read_array with :reject of non-numerical values and no header raises" do
323
+ @file.write(@table3)
324
+ @file.rewind
325
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_array(@file.path, reject: ["Organism"]) }
326
+ end
327
+
328
+ test "CSV.read_array with :reject of non-numerical values not matching header raises" do
329
+ @file.write(@table)
330
+ @file.rewind
331
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_array(@file.path, reject: ["ount"]) }
332
+ end
333
+
334
+ test "CSV.read_array with :reject of non-numerical values returns correctly" do
335
+ @file.write(@table)
336
+ @file.rewind
337
+ result = BioDSL::CSV.read_array(@file.path, reject: ["Count", :Organism])
338
+ expected = [["ATACGTCAG"],
339
+ ["AGCATGAC"],
340
+ ["GACTG"],
341
+ ["AAATGCA"]]
342
+
343
+ assert_equal(expected, result)
344
+ end
345
+
346
+ test "CSV.read_array with :reject of non-numerical values and :include_header returns correctly" do
347
+ @file.write(@table)
348
+ @file.rewind
349
+ result = BioDSL::CSV.read_array(@file.path, reject: ["Count", :Organism], include_header: true)
350
+ expected = [["Sequence"],
351
+ ["ATACGTCAG"],
352
+ ["AGCATGAC"],
353
+ ["GACTG"],
354
+ ["AAATGCA"]]
355
+
356
+ assert_equal(expected, result)
357
+ end
358
+
359
+ test "CSV.read_hash returns correctly" do
360
+ @file.write(@table)
361
+ @file.rewind
362
+ result = BioDSL::CSV.read_hash(@file.path)
363
+ expected = [{Count: 23524, Organism: "Human", Sequence: "ATACGTCAG"},
364
+ {Count: 2442, Organism: "Dog", Sequence: "AGCATGAC"},
365
+ {Count: 234, Organism: "Mouse", Sequence: "GACTG"},
366
+ {Count: 2342, Organism: "Cat", Sequence: "AAATGCA"}]
367
+
368
+ assert_equal(expected, result)
369
+ end
370
+
371
+ test "CSV.read_hash with no header returns correctly" do
372
+ @file.write(@table3)
373
+ @file.rewind
374
+ result = BioDSL::CSV.read_hash(@file.path)
375
+ expected = [{V0: "Human", V1: "ATACGTCAG", V2: 5.24},
376
+ {V0: "Dog", V1: "AGCATGAC", V2: 4.2},
377
+ {V0: "Mouse", V1: "GACTG", V2: 3.4},
378
+ {V0: "Cat", V1: "AAATGCA", V2: 3.42}]
379
+
380
+ assert_equal(expected, result)
381
+ end
382
+
383
+ test "CSV.read_hash with :delimiter returns correctly" do
384
+ @file.write(@table2)
385
+ @file.rewind
386
+ result = BioDSL::CSV.read_hash(@file.path, delimiter: ";")
387
+ expected = [{:Count=>23524, :Organism=>"Human", :Sequence=>"ATACGTCAG"},
388
+ {:Count=>2442, :Organism=>"Dog", :Sequence=>"AGCATGAC"},
389
+ {:Count=>234, :Organism=>"Mouse", :Sequence=>"GACTG"},
390
+ {:Count=>2342, :Organism=>"Cat", :Sequence=>"AAATGCA"}]
391
+
392
+ assert_equal(expected, result)
393
+ end
394
+
395
+ test "CSV.read_hash with :select and out-of-bounds numerical value raises" do
396
+ @file.write(@table)
397
+ @file.rewind
398
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_hash(@file.path, select: [3]) }
399
+ end
400
+
401
+ test "CSV.read_hash with :select of numerical values return correctly" do
402
+ @file.write(@table)
403
+ @file.rewind
404
+ result = BioDSL::CSV.read_hash(@file.path, select: [2, 0])
405
+ expected = [{:Count=>23524, :Organism=>"Human"},
406
+ {:Count=>2442, :Organism=>"Dog"},
407
+ {:Count=>234, :Organism=>"Mouse"},
408
+ {:Count=>2342, :Organism=>"Cat"}]
409
+
410
+ assert_equal(expected, result)
411
+ end
412
+
413
+ test "CSV.read_hash with :select and out-of-bounds range raises" do
414
+ @file.write(@table)
415
+ @file.rewind
416
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_hash(@file.path, select: 1 .. 3) }
417
+ end
418
+
419
+ test "CSV.read_hash with :select of range return correctly" do
420
+ @file.write(@table)
421
+ @file.rewind
422
+ result = BioDSL::CSV.read_hash(@file.path, select: 0 .. 1)
423
+ expected = [{:Organism=>"Human", :Sequence=>"ATACGTCAG"},
424
+ {:Organism=>"Dog", :Sequence=>"AGCATGAC"},
425
+ {:Organism=>"Mouse", :Sequence=>"GACTG"},
426
+ {:Organism=>"Cat", :Sequence=>"AAATGCA"}]
427
+
428
+ assert_equal(expected, result)
429
+ end
430
+
431
+ test "CSV.read_hash with :select of non-numerical values and no header raises" do
432
+ @file.write(@table3)
433
+ @file.rewind
434
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_hash(@file.path, select: ["Organism"]) }
435
+ end
436
+
437
+ test "CSV.read_hash with :select of non-numerical values not matching header raises" do
438
+ @file.write(@table)
439
+ @file.rewind
440
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_hash(@file.path, select: ["ount"]) }
441
+ end
442
+
443
+ test "CSV.read_hash with :select of non-numerical values returns correctly" do
444
+ @file.write(@table)
445
+ @file.rewind
446
+ result = BioDSL::CSV.read_hash(@file.path, select: ["Count", :Organism])
447
+ expected = [{:Count=>23524, :Organism=>"Human"},
448
+ {:Count=>2442, :Organism=>"Dog"},
449
+ {:Count=>234, :Organism=>"Mouse"},
450
+ {:Count=>2342, :Organism=>"Cat"}]
451
+
452
+ assert_equal(expected, result)
453
+ end
454
+
455
+ test "CSV.read_hash with :reject and out-of-bounds numerical value raises" do
456
+ @file.write(@table)
457
+ @file.rewind
458
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_hash(@file.path, reject: [3]) }
459
+ end
460
+
461
+ test "CSV.read_hash with :reject of numerical values return correctly" do
462
+ @file.write(@table)
463
+ @file.rewind
464
+ result = BioDSL::CSV.read_hash(@file.path, reject: [2, 0])
465
+ expected = [{:Sequence=>"ATACGTCAG"},
466
+ {:Sequence=>"AGCATGAC"},
467
+ {:Sequence=>"GACTG"},
468
+ {:Sequence=>"AAATGCA"}]
469
+
470
+ assert_equal(expected, result)
471
+ end
472
+
473
+ test "CSV.read_hash with :reject and out-of-bounds range raises" do
474
+ @file.write(@table)
475
+ @file.rewind
476
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_hash(@file.path, reject: 1 .. 3) }
477
+ end
478
+
479
+ test "CSV.read_hash with :reject of range return correctly" do
480
+ @file.write(@table)
481
+ @file.rewind
482
+ result = BioDSL::CSV.read_hash(@file.path, reject: 0 .. 1)
483
+ expected = [{:Count=>23524},
484
+ {:Count=>2442},
485
+ {:Count=>234},
486
+ {:Count=>2342}]
487
+
488
+ assert_equal(expected, result)
489
+ end
490
+
491
+ test "CSV.read_hash with :reject of non-numerical values and no header raises" do
492
+ @file.write(@table3)
493
+ @file.rewind
494
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_hash(@file.path, reject: ["Organism"]) }
495
+ end
496
+
497
+ test "CSV.read_hash with :reject of non-numerical values not matching header raises" do
498
+ @file.write(@table)
499
+ @file.rewind
500
+ assert_raise(BioDSL::CSVError) { BioDSL::CSV.read_hash(@file.path, reject: ["ount"]) }
501
+ end
502
+
503
+ test "CSV.read_hash with :reject of non-numerical values returns correctly" do
504
+ @file.write(@table)
505
+ @file.rewind
506
+ result = BioDSL::CSV.read_hash(@file.path, reject: ["Count", :Organism])
507
+ expected = [{:Sequence=>"ATACGTCAG"},
508
+ {:Sequence=>"AGCATGAC"},
509
+ {:Sequence=>"GACTG"},
510
+ {:Sequence=>"AAATGCA"}]
511
+
512
+ assert_equal(expected, result)
513
+ end
514
+ end
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class DebugTest < Test::Unit::TestCase
33
+ def teardown
34
+ BioDSL::debug = false
35
+ end
36
+
37
+ test "BioDSL::debug returns correctly" do
38
+ BioDSL::debug = true
39
+ assert_equal(true, BioDSL::debug)
40
+ end
41
+ end
42
+