BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class TestDigest < Test::Unit::TestCase
33
+ def setup
34
+ @entry = BioDSL::Seq.new(seq: "cgatcgatcGGATCCgagagggtgtgtagtgGAATTCcgctgc")
35
+ end
36
+
37
+ test "#each_digest with bad residue in pattern raises" do
38
+ assert_raise(BioDSL::DigestError) { @entry.each_digest("X", 0).to_a }
39
+ end
40
+
41
+ test "#each_digest returns correctly" do
42
+ digests = @entry.each_digest("GGATCC", 1).to_a
43
+ assert_equal(2, digests.size)
44
+ assert_equal("[0-9]", digests.first.seq_name)
45
+ assert_equal("cgatcgatcG", digests.first.seq)
46
+ assert_equal("[10-42]", digests.last.seq_name)
47
+ assert_equal("GATCCgagagggtgtgtagtgGAATTCcgctgc", digests.last.seq)
48
+ end
49
+
50
+ test "#each_digest with negavive offset returns correctly" do
51
+ digests = @entry.each_digest("CGATCG", -1).to_a
52
+ assert_equal(1, digests.size)
53
+ assert_equal("[0-42]", digests.first.seq_name)
54
+ assert_equal(@entry.seq, digests.first.seq)
55
+ end
56
+
57
+ test "#each_digest with offset out of bounds returns correctly" do
58
+ digests = @entry.each_digest("AATTCcgctgc", 15).to_a
59
+ assert_equal(1, digests.size)
60
+ assert_equal("[0-42]", digests.first.seq_name)
61
+ assert_equal(@entry.seq, digests.first.seq)
62
+ end
63
+
64
+ test "#each_digest in block context returns correctly" do
65
+ @entry.each_digest("GGATCC", 1) do |digest|
66
+ assert_equal("[0-9]", digest.seq_name)
67
+ assert_equal("cgatcgatcG", digest.seq)
68
+ break
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class TestDynamic < Test::Unit::TestCase
33
+ def setup
34
+ @p = BioDSL::Seq.new(seq_name: "test", seq: "atcg")
35
+ @p.extend(BioDSL::Dynamic)
36
+ end
37
+
38
+ test "#patmatch with no match returns nil" do
39
+ assert_nil(@p.patmatch("gggg"))
40
+ end
41
+
42
+ test "#patmatch with perfect match returns correctly" do
43
+ m = @p.patmatch("atcg")
44
+ assert_equal(0, m.beg)
45
+ assert_equal("atcg", m.match)
46
+ assert_equal(0, m.mis)
47
+ assert_equal(0, m.ins)
48
+ assert_equal(0, m.del)
49
+ assert_equal(4, m.length)
50
+ end
51
+
52
+ test "#patmatch with perfect match with ambiguity codes returns correctly" do
53
+ m = @p.patmatch("nnnn")
54
+ assert_equal(0, m.beg)
55
+ assert_equal("atcg", m.match)
56
+ assert_equal(0, m.mis)
57
+ assert_equal(0, m.ins)
58
+ assert_equal(0, m.del)
59
+ assert_equal(4, m.length)
60
+ end
61
+
62
+ test "#patmatch with one mismatch and edit dist zero returns nil" do
63
+ assert_nil(@p.patmatch("aCcg"))
64
+ end
65
+
66
+ test "#patmatch with one mismatch and edit dist one returns correctly" do
67
+ m = @p.patmatch("aCcg", 0, 1)
68
+ assert_equal(0, m.beg)
69
+ assert_equal("atcg", m.match)
70
+ assert_equal(1, m.mis)
71
+ assert_equal(0, m.ins)
72
+ assert_equal(0, m.del)
73
+ assert_equal(4, m.length)
74
+ end
75
+
76
+ test "#patmatch with two mismatch and edit dist one returns nil" do
77
+ assert_nil(@p.patmatch("aGcA", 0, 1))
78
+ end
79
+
80
+ test "#patmatch with one insertion and edit dist zero returns nil" do
81
+ assert_nil(@p.patmatch("atGcg"))
82
+ end
83
+
84
+ test "#patmatch with one insertion and edit dist one returns correctly" do
85
+ m = @p.patmatch("atGcg", 0, 1)
86
+ assert_equal(0, m.beg)
87
+ assert_equal("atcg", m.match)
88
+ assert_equal(0, m.mis)
89
+ assert_equal(1, m.ins)
90
+ assert_equal(0, m.del)
91
+ assert_equal(4, m.length)
92
+ end
93
+
94
+ test "#patmatch with two insertions and edit dist one returns nil" do
95
+ assert_nil(@p.patmatch("atGcTg", 0, 1))
96
+ end
97
+
98
+ test "#patmatch with two insertions and edit dist two returns correctly" do
99
+ m = @p.patmatch("atGcTg", 0, 2)
100
+ assert_equal(0, m.beg)
101
+ assert_equal("atcg", m.match)
102
+ assert_equal(0, m.mis)
103
+ assert_equal(2, m.ins)
104
+ assert_equal(0, m.del)
105
+ assert_equal(4, m.length)
106
+ end
107
+
108
+ test "#patmatch with one deletion and edit distance zero returns nil" do
109
+ assert_nil(@p.patmatch("acg"))
110
+ end
111
+
112
+ test "#patmatch with one deletion and edit distance one returns correctly" do
113
+ m = @p.patmatch("acg", 0, 1)
114
+ assert_equal(0, m.beg)
115
+ assert_equal("atcg", m.match)
116
+ assert_equal(0, m.mis)
117
+ assert_equal(0, m.ins)
118
+ assert_equal(1, m.del)
119
+ assert_equal(4, m.length)
120
+ end
121
+
122
+ test "#patscan locates three patterns ok" do
123
+ p = BioDSL::Seq.new(seq_name: "test", seq: "ataacgagctagctagctagctgactac")
124
+ p.extend(BioDSL::Dynamic)
125
+ assert_equal(3, p.patscan("tag").count)
126
+ end
127
+
128
+ test "#patscan with pos locates two patterns ok" do
129
+ p = BioDSL::Seq.new(seq_name: "test", seq: "ataacgagctagctagctagctgactac")
130
+ p.extend(BioDSL::Dynamic)
131
+ assert_equal(2, p.patscan("tag", 10).count)
132
+ end
133
+ end
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class TestHomopolymer < Test::Unit::TestCase
33
+ def setup
34
+ @entry = BioDSL::Seq.new(seq: "atcgatTTTTTTcggttga")
35
+ end
36
+
37
+ test "#each_homopolymer with bad min raises" do
38
+ assert_raise(BioDSL::HomopolymerError) { @entry.each_homopolymer(0) }
39
+ assert_raise(BioDSL::HomopolymerError) { @entry.each_homopolymer(-1) }
40
+ end
41
+
42
+ test "#each_homopolymer returns correctly" do
43
+ hps = @entry.each_homopolymer(3)
44
+ assert_equal(1, hps.size)
45
+ assert_equal(7, hps.first.length)
46
+ assert_equal("TTTTTTT", hps.first.pattern)
47
+ assert_equal(5, hps.first.pos)
48
+ end
49
+
50
+ test "#each_homopolymer in block context returns correctly" do
51
+ @entry.each_homopolymer(3) do |hp|
52
+ assert_equal(7, hp.length)
53
+ assert_equal("TTTTTTT", hp.pattern)
54
+ assert_equal(5, hp.pos)
55
+ break
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class TestKmer < Test::Unit::TestCase
33
+ def setup
34
+ @entry = BioDSL::Seq.new(seq: "aNacCGactGAtacACGTAC")
35
+ end
36
+
37
+ test "#to_kmers without argument raises" do
38
+ assert_raise(ArgumentError) { @entry.to_kmers() }
39
+ end
40
+
41
+ test "#to_kmers without :kmer_size raises" do
42
+ assert_raise(BioDSL::KmerError) { @entry.to_kmers(step_size: 1) }
43
+ end
44
+
45
+ test "#to_kmers with bad :kmer_size raises" do
46
+ assert_raise(BioDSL::KmerError) { @entry.to_kmers(kmer_size: 0) }
47
+ assert_raise(BioDSL::KmerError) { @entry.to_kmers(kmer_size: 13) }
48
+ end
49
+
50
+ test "#to_kmers with OK :kmer_size don't raise" do
51
+ assert_nothing_raised { @entry.to_kmers(kmer_size: 1) }
52
+ assert_nothing_raised { @entry.to_kmers(kmer_size: 12) }
53
+ end
54
+
55
+ test "#to_kmers with bad :step_size raises" do
56
+ assert_raise(BioDSL::KmerError) { @entry.to_kmers(kmer_size: 8, step_size: 0) }
57
+ assert_raise(BioDSL::KmerError) { @entry.to_kmers(kmer_size: 8, step_size: 13) }
58
+ end
59
+
60
+ test "#to_kmers with OK :step_size don't raise" do
61
+ assert_nothing_raised { @entry.to_kmers(kmer_size: 8, step_size: 1) }
62
+ assert_nothing_raised { @entry.to_kmers(kmer_size: 8, step_size: 12) }
63
+ end
64
+
65
+ test "#to_kmers with bad :score_min raises" do
66
+ @entry.qual = "IIIIIIIII!IIIIIIIIII"
67
+ assert_raise(BioDSL::KmerError) { @entry.to_kmers(kmer_size: 8, score_min: -1) }
68
+ assert_raise(BioDSL::KmerError) { @entry.to_kmers(kmer_size: 8, score_min: 41) }
69
+ end
70
+
71
+ test "#to_kmers with OK :score_min don't raise" do
72
+ @entry.qual = "IIIIIIIII!IIIIIIIIII"
73
+ assert_nothing_raised { @entry.to_kmers(kmer_size: 8, score_min: 0) }
74
+ assert_nothing_raised { @entry.to_kmers(kmer_size: 8, score_min: 40) }
75
+ end
76
+
77
+ test "#to_kmers with kmer_size: 1 returns correctly" do
78
+ result = @entry.to_kmers(kmer_size: 1)
79
+ expected = [0, 1, 2, 3]
80
+ assert_equal(expected, result)
81
+ end
82
+
83
+ test "#to_kmers with kmer_size: 1 and step_size: 2 returns correctly" do
84
+ result = @entry.to_kmers(kmer_size: 1, step_size: 2)
85
+ expected = [0, 1, 2, 3]
86
+ assert_equal(expected, result)
87
+ end
88
+
89
+ test "#to_kmers with kmer_size: 5 returns correctly" do
90
+ result = @entry.to_kmers(kmer_size: 5)
91
+ expected = [72, 139, 156, 172, 180, 290, 452, 557, 625, 690, 713, 722, 786, 807]
92
+ assert_equal(expected, result)
93
+ end
94
+
95
+ test "#to_kmers with kmer_size: 5 and step_size: 2 returns correctly" do
96
+ result = @entry.to_kmers(kmer_size: 5, step_size: 2)
97
+ expected = [72, 139, 156, 172, 180, 452, 713]
98
+ assert_equal(expected, result)
99
+ end
100
+
101
+ test "#to_kmers with kmer_size: 1 and score_min: 20 returns correctly" do
102
+ @entry.qual = "IIIIIIIII!IIIIIIIIII"
103
+ result = @entry.to_kmers(kmer_size: 1, scores_min: 20)
104
+ expected = [0, 1, 2, 3]
105
+ assert_equal(expected, result)
106
+ end
107
+
108
+ test "#to_kmers with kmer_size: 1 and score_min: 20 and step_size: 2 returns correctly" do
109
+ @entry.qual = "IIIIIIIII!IIIIIIIIII"
110
+ result = @entry.to_kmers(kmer_size: 1, scores_min: 20, step_size: 2)
111
+ expected = [0, 1, 2, 3]
112
+ assert_equal(expected, result)
113
+ end
114
+
115
+ test "#to_kmers with kmer_size: 5 and score_min: 20 returns correctly" do
116
+ @entry.qual = "IIIIIIIII!IIIIIIIIII"
117
+ result = @entry.to_kmers(kmer_size: 5, scores_min: 20)
118
+ expected = [72, 139, 172, 180, 290, 557, 690, 713, 722]
119
+ assert_equal(expected, result)
120
+ end
121
+
122
+ test "#to_kmers with kmer_size: 5 and score_min: 20 and step_size: 2 returns correctly" do
123
+ @entry.qual = "IIIIIIIII!IIIIIIIIII"
124
+ result = @entry.to_kmers(kmer_size: 5, scores_min: 20, step_size: 2)
125
+ expected = [72, 139, 172, 180, 713]
126
+ assert_equal(expected, result)
127
+ end
128
+
129
+ test "Kmer#to_oligos return correctly" do
130
+ kmers = @entry.to_kmers(kmer_size: 5)
131
+ result = %w{ataca acacg actga accga acgta tacac tgata cacgt ctgat ccgac cgact cgtac gatac gactg}
132
+ assert_equal(result, BioDSL::Kmer.to_oligos(kmers, 5))
133
+ end
134
+ end
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class TestTranslate < Test::Unit::TestCase
33
+ def setup
34
+ @entry = BioDSL::Seq.new(seq: "atcgatcgatcgtacggttga", type: :dna)
35
+ end
36
+
37
+ test "#tranlate with bad type raises" do
38
+ @entry.type = nil
39
+ assert_raise(BioDSL::SeqError) { @entry.translate }
40
+ end
41
+
42
+ test "#tranlate with bad length raises" do
43
+ @entry.seq = "atcgatcgatcgtacggtga"
44
+ assert_raise(BioDSL::SeqError) { @entry.translate }
45
+ end
46
+
47
+ test "#tranlate with bad translation table raises" do
48
+ @entry.seq = "atcgatcgatcgtacggttga"
49
+ assert_raise(BioDSL::SeqError) { @entry.translate(0) }
50
+ end
51
+
52
+ test "#tranlate with bad start codon raises" do
53
+ @entry.seq = "ttagatcgatcgtacggttga"
54
+ assert_raise(BioDSL::SeqError) { @entry.translate }
55
+ end
56
+
57
+ test "#tranlate with bad codon raises" do
58
+ @entry.seq = "atggatcgaxxxtcgtacggttga"
59
+ assert_raise(BioDSL::SeqError) { @entry.translate }
60
+ end
61
+
62
+ test "#tranlate returns correctly" do
63
+ entry = @entry.translate
64
+ assert_equal("MDRSYG", entry.seq)
65
+ assert_equal(:protein, entry.type)
66
+ assert_equal("atcgatcgatcgtacggttga", @entry.seq)
67
+ assert_equal(:dna, @entry.type)
68
+ end
69
+
70
+ test "#tranlate! returns correctly" do
71
+ @entry.translate!
72
+ assert_equal("MDRSYG", @entry.seq)
73
+ assert_equal(:protein, @entry.type)
74
+ end
75
+ end
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class TestTrim < Test::Unit::TestCase
33
+ def setup
34
+ @entry = BioDSL::Seq.new
35
+ # 2 3 44 3 2
36
+ # 8901234567890123456789009876543210987654321098
37
+ @entry.qual = "3456789:;<=>?@3BCDEFGHIIHGFEDCB3@?>=<;:9876543"
38
+ @entry.seq = "abcdefghijklmnopqrstuvxxvutsrqponmlkjihgfedcba"
39
+ end
40
+
41
+ test "#quality_trim with nil seq raises" do
42
+ @entry.seq = nil
43
+ assert_raise(BioDSL::TrimError) { @entry.quality_trim(20, 1) }
44
+ end
45
+
46
+ test "#quality_trim with nil qual raises" do
47
+ @entry.qual = nil
48
+ assert_raise(BioDSL::TrimError) { @entry.quality_trim(20, 1) }
49
+ end
50
+
51
+ test "#quality_trim with bad min_qual raises" do
52
+ assert_raise(BioDSL::TrimError) { @entry.quality_trim(-1, 1) }
53
+ assert_raise(BioDSL::TrimError) { @entry.quality_trim(41, 1) }
54
+ end
55
+
56
+ test "#quality_trim with bad min_len raises" do
57
+ assert_raise(BioDSL::TrimError) { @entry.quality_trim(20, 0) }
58
+ end
59
+
60
+ test "#quality_trim returns correctly" do
61
+ trimmed = @entry.quality_trim(30, 3)
62
+ assert_equal("pqrstuvxxvutsrqp", trimmed.seq)
63
+ assert_equal("BCDEFGHIIHGFEDCB", trimmed.qual)
64
+ assert_equal("abcdefghijklmnopqrstuvxxvutsrqponmlkjihgfedcba", @entry.seq)
65
+ assert_equal("3456789:;<=>?@3BCDEFGHIIHGFEDCB3@?>=<;:9876543", @entry.qual)
66
+ end
67
+
68
+ test "#quality_trim! returns correctly" do
69
+ @entry.quality_trim!(30, 3)
70
+ assert_equal("pqrstuvxxvutsrqp", @entry.seq)
71
+ assert_equal("BCDEFGHIIHGFEDCB", @entry.qual)
72
+ end
73
+
74
+ test "#quality_trim_left returns correctly" do
75
+ trimmed = @entry.quality_trim_left(30, 3)
76
+ assert_equal("pqrstuvxxvutsrqponmlkjihgfedcba", trimmed.seq)
77
+ assert_equal("BCDEFGHIIHGFEDCB3@?>=<;:9876543", trimmed.qual)
78
+ assert_equal("abcdefghijklmnopqrstuvxxvutsrqponmlkjihgfedcba", @entry.seq)
79
+ assert_equal("3456789:;<=>?@3BCDEFGHIIHGFEDCB3@?>=<;:9876543", @entry.qual)
80
+ end
81
+
82
+ test "#quality_trim_left! returns correctly" do
83
+ @entry.quality_trim_left!(30, 3)
84
+ assert_equal("pqrstuvxxvutsrqponmlkjihgfedcba", @entry.seq)
85
+ assert_equal("BCDEFGHIIHGFEDCB3@?>=<;:9876543", @entry.qual)
86
+ end
87
+
88
+ test "#quality_trim_rigth returns correctly" do
89
+ trimmed = @entry.quality_trim_right(30, 3)
90
+ assert_equal("abcdefghijklmnopqrstuvxxvutsrqpo", trimmed.seq)
91
+ assert_equal("3456789:;<=>?@3BCDEFGHIIHGFEDCB3", trimmed.qual)
92
+ assert_equal("abcdefghijklmnopqrstuvxxvutsrqponmlkjihgfedcba", @entry.seq)
93
+ assert_equal("3456789:;<=>?@3BCDEFGHIIHGFEDCB3@?>=<;:9876543", @entry.qual)
94
+ end
95
+
96
+ test "#quality_trim_right! returns correctly" do
97
+ @entry.quality_trim_right!(30, 3)
98
+ assert_equal("abcdefghijklmnopqrstuvxxvutsrqpo", @entry.seq)
99
+ assert_equal("3456789:;<=>?@3BCDEFGHIIHGFEDCB3", @entry.qual)
100
+ end
101
+ end