BioDSL 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,790 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class TestSeq < Test::Unit::TestCase
33
+ def setup
34
+ @entry = BioDSL::Seq.new
35
+ end
36
+
37
+ test "BioDSL::Seq.new with differnet length SEQ and SCORES raises" do
38
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "hhh") }
39
+ end
40
+
41
+ test "BioDSL::Seq.new_bp returns correctly" do
42
+ record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => :dna, :SCORES => "hhhh"}
43
+ seq = BioDSL::Seq.new_bp(record)
44
+ assert_equal("test", seq.seq_name)
45
+ assert_equal("ATCG", seq.seq)
46
+ assert_equal(:dna, seq.type)
47
+ assert_equal("hhhh", seq.qual)
48
+ end
49
+
50
+ test "BioDSL::Seq.generate_oligos with bad type raises" do
51
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.generate_oligos(2, :foo) }
52
+ end
53
+
54
+ test "BioDSL::Seq.generate_oligos with bad length raises" do
55
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.generate_oligos(0, :dna) }
56
+ end
57
+
58
+ test "BioDSL::Seq.generate_oligos returns correctly" do
59
+ expected = %w{aa at ac ag ta tt tc tg ca ct cc cg ga gt gc gg}
60
+ assert_equal(expected, BioDSL::Seq.generate_oligos(2, :dna))
61
+ expected = %w{aa au ac ag ua uu uc ug ca cu cc cg ga gu gc gg}
62
+ assert_equal(expected, BioDSL::Seq.generate_oligos(2, :rna))
63
+ expected = %w{
64
+ ff fl fs fy fc fw fp fh fq fr fi fm ft fn fk fv fa fd fe fg
65
+ lf ll ls ly lc lw lp lh lq lr li lm lt ln lk lv la ld le lg
66
+ sf sl ss sy sc sw sp sh sq sr si sm st sn sk sv sa sd se sg
67
+ yf yl ys yy yc yw yp yh yq yr yi ym yt yn yk yv ya yd ye yg
68
+ cf cl cs cy cc cw cp ch cq cr ci cm ct cn ck cv ca cd ce cg
69
+ wf wl ws wy wc ww wp wh wq wr wi wm wt wn wk wv wa wd we wg
70
+ pf pl ps py pc pw pp ph pq pr pi pm pt pn pk pv pa pd pe pg
71
+ hf hl hs hy hc hw hp hh hq hr hi hm ht hn hk hv ha hd he hg
72
+ qf ql qs qy qc qw qp qh qq qr qi qm qt qn qk qv qa qd qe qg
73
+ rf rl rs ry rc rw rp rh rq rr ri rm rt rn rk rv ra rd re rg
74
+ if il is iy ic iw ip ih iq ir ii im it in ik iv ia id ie ig
75
+ mf ml ms my mc mw mp mh mq mr mi mm mt mn mk mv ma md me mg
76
+ tf tl ts ty tc tw tp th tq tr ti tm tt tn tk tv ta td te tg
77
+ nf nl ns ny nc nw np nh nq nr ni nm nt nn nk nv na nd ne ng
78
+ kf kl ks ky kc kw kp kh kq kr ki km kt kn kk kv ka kd ke kg
79
+ vf vl vs vy vc vw vp vh vq vr vi vm vt vn vk vv va vd ve vg
80
+ af al as ay ac aw ap ah aq ar ai am at an ak av aa ad ae ag
81
+ df dl ds dy dc dw dp dh dq dr di dm dt dn dk dv da dd de dg
82
+ ef el es ey ec ew ep eh eq er ei em et en ek ev ea ed ee eg
83
+ gf gl gs gy gc gw gp gh gq gr gi gm gt gn gk gv ga gd ge gg
84
+ }
85
+ assert_equal(expected, BioDSL::Seq.generate_oligos(2, :protein))
86
+ end
87
+
88
+ test "BioDSL::Seq.check_name_pair with badly formatted names raises" do
89
+ entry1 = BioDSL::Seq.new(seq_name: "foo")
90
+ entry2 = BioDSL::Seq.new(seq_name: "bar")
91
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry1, entry2) }
92
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry2, entry1) }
93
+ end
94
+
95
+ test "BioDSL::Seq.check_name_pair with Illumina1.3/1.5 names and no match raises" do
96
+ entry1 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:941:1973#0/1")
97
+ entry2 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:491:1793#0/2")
98
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry1, entry2) }
99
+ end
100
+
101
+ test "BioDSL::Seq.check_name_pair with Illumina1.8 names and no match raises" do
102
+ entry1 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG")
103
+ entry2 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15433:179393 2:Y:18:ATCACG")
104
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry1, entry2) }
105
+ end
106
+
107
+ test "BioDSL::Seq.check_name_pair with Illumina1.3/1.5 names and match don't raise" do
108
+ entry1 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:941:1973#0/1")
109
+ entry2 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:941:1973#0/2")
110
+ assert_nothing_raised { BioDSL::Seq.check_name_pair(entry1, entry2) }
111
+ end
112
+
113
+ test "BioDSL::Seq.check_name_pair with Illumina1.8 names and match don't raise" do
114
+ entry1 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG")
115
+ entry2 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG")
116
+ assert_nothing_raised { BioDSL::Seq.check_name_pair(entry1, entry2) }
117
+ end
118
+
119
+ test "#is_dna? with no sequence type returns false" do
120
+ assert(@entry.is_dna? == false)
121
+ end
122
+
123
+ test "#is_dna? with dna sequence type returns true" do
124
+ @entry.type = :dna
125
+ assert(@entry.is_dna? == true)
126
+ end
127
+
128
+ test "#is_rna? with no sequence type returns false" do
129
+ assert(@entry.is_rna? == false)
130
+ end
131
+
132
+ test "#is_rna? with rna sequence type returns true" do
133
+ @entry.type = :rna
134
+ assert(@entry.is_rna? == true)
135
+ end
136
+
137
+ test "#is_protein? with no sequence type returns false" do
138
+ assert(@entry.is_protein? == false)
139
+ end
140
+
141
+ test "#is_protein? with protein sequence type returns true" do
142
+ @entry.type = :protein
143
+ assert_equal(true, @entry.is_protein?)
144
+ end
145
+
146
+ test "#type_guess without sequence raises" do
147
+ assert_raise(BioDSL::SeqError) { @entry.type_guess }
148
+ end
149
+
150
+ test "#type_guess with protein returns protein" do
151
+ @entry.seq = 'atcatcrFgatcg'
152
+ assert_equal(:protein, @entry.type_guess)
153
+ end
154
+
155
+ test "#type_guess with rna returns rna" do
156
+ @entry.seq = 'atcatcrUgatcg'
157
+ assert_equal(:rna, @entry.type_guess)
158
+ end
159
+
160
+ test "#type_guess with dna returns dna" do
161
+ @entry.seq = 'atcatcgatcg'
162
+ assert_equal(:dna, @entry.type_guess)
163
+ end
164
+
165
+ test "#type_guess! without sequence raises" do
166
+ assert_raise(BioDSL::SeqError) { @entry.type_guess! }
167
+ end
168
+
169
+ test "#type_guess! with protein returns protein" do
170
+ @entry.seq = 'atcatcrFgatcg'
171
+ @entry.type_guess!
172
+ assert_equal(:protein, @entry.type)
173
+ end
174
+
175
+ test "#type_guess! with rna returns rna" do
176
+ @entry.seq = 'atcatcrUgatcg'
177
+ @entry.type_guess!
178
+ assert_equal(:rna, @entry.type)
179
+ end
180
+
181
+ test "#type_guess! with dna returns dna" do
182
+ @entry.seq = 'atcatcgatcg'
183
+ @entry.type_guess!
184
+ assert_equal(:dna, @entry.type)
185
+ end
186
+
187
+ test "#length returns corretly" do
188
+ @entry.seq = 'ATCG'
189
+ assert_equal(4, @entry.length)
190
+ end
191
+
192
+ test "#indels returns correctly" do
193
+ @entry.seq = 'ATCG.-~_'
194
+ assert_equal(4, @entry.indels)
195
+ end
196
+
197
+ test "#to_rna with no sequence raises" do
198
+ @entry.type = :dna
199
+ assert_raise(BioDSL::SeqError) { @entry.to_rna }
200
+ end
201
+
202
+ test "#to_rna with bad type raises" do
203
+ @entry.seq = 'ATCG'
204
+ @entry.type = :rna
205
+ assert_raise(BioDSL::SeqError) { @entry.to_rna }
206
+ end
207
+
208
+ test "#to_rna transcribes correctly" do
209
+ @entry.seq = 'ATCGatcg'
210
+ @entry.type = :dna
211
+ assert_equal("AUCGaucg", @entry.to_rna)
212
+ end
213
+
214
+ test "#to_rna changes entry type to rna" do
215
+ @entry.seq = 'ATCGatcg'
216
+ @entry.type = :dna
217
+ @entry.to_rna
218
+ assert_equal(:rna, @entry.type)
219
+ end
220
+
221
+ test "#to_dna with no sequence raises" do
222
+ @entry.type = :rna
223
+ assert_raise(BioDSL::SeqError) { @entry.to_dna }
224
+ end
225
+
226
+ test "#to_dna with bad type raises" do
227
+ @entry.seq = 'AUCG'
228
+ @entry.type = :dna
229
+ assert_raise(BioDSL::SeqError) { @entry.to_dna }
230
+ end
231
+
232
+ test "#to_dna transcribes correctly" do
233
+ @entry.seq = 'AUCGaucg'
234
+ @entry.type = :rna
235
+ assert_equal("ATCGatcg", @entry.to_dna)
236
+ end
237
+
238
+ test "#to_dna changes entry type to dna" do
239
+ @entry.seq = 'AUCGaucg'
240
+ @entry.type = :rna
241
+ @entry.to_dna
242
+ assert_equal(:dna, @entry.type)
243
+ end
244
+
245
+ test "#to_bp returns correct record" do
246
+ @entry.seq_name = 'test'
247
+ @entry.seq = 'ATCG'
248
+ assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
249
+ end
250
+
251
+ test "#to_fasta with missing seq_name raises" do
252
+ @entry.seq = 'ATCG'
253
+ assert_raise(BioDSL::SeqError) { @entry.to_fasta }
254
+ end
255
+
256
+ test "#to_fasta with empty seq_name raises" do
257
+ @entry.seq_name = ''
258
+ @entry.seq = 'ATCG'
259
+ assert_raise(BioDSL::SeqError) { @entry.to_fasta }
260
+ end
261
+
262
+ test "#to_fasta with missing seq raises" do
263
+ @entry.seq_name = 'test'
264
+ assert_raise(BioDSL::SeqError) { @entry.to_fasta }
265
+ end
266
+
267
+ test "#to_fasta with empty seq raises" do
268
+ @entry.seq_name = 'test'
269
+ @entry.seq = ''
270
+ assert_raise(BioDSL::SeqError) { @entry.to_fasta }
271
+ end
272
+
273
+ test "#to_fasta returns correct entry" do
274
+ @entry.seq_name = 'test'
275
+ @entry.seq = 'ATCG'
276
+ assert_equal(">test\nATCG\n", @entry.to_fasta)
277
+ end
278
+
279
+ test "#to_fasta wraps correctly" do
280
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "ATCG")
281
+ assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
282
+ end
283
+
284
+ test "#to_fastq returns correct entry" do
285
+ @entry.seq_name = 'test'
286
+ @entry.seq = 'ATCG'
287
+ @entry.qual = 'hhhh'
288
+ assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
289
+ end
290
+
291
+ test "#to_key with bad residue raises" do
292
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "AUCG")
293
+ assert_raise(BioDSL::SeqError) { entry.to_key }
294
+ end
295
+
296
+ test "#to_key returns correctly" do
297
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "ATCG")
298
+ assert_equal(54, entry.to_key)
299
+ end
300
+
301
+ test "#reverse returns correctly" do
302
+ @entry.seq = "ATCG"
303
+ new_entry = @entry.reverse
304
+ assert_equal("GCTA", new_entry.seq)
305
+ assert_equal("ATCG", @entry.seq)
306
+ end
307
+
308
+ test "#reverse! returns correctly" do
309
+ @entry.seq = "ATCG"
310
+ @entry.reverse!
311
+ assert_equal("GCTA", @entry.seq)
312
+ end
313
+
314
+ test "#complement with no sequence raises" do
315
+ @entry.type = :dna
316
+ assert_raise(BioDSL::SeqError) { @entry.complement }
317
+ end
318
+
319
+ test "#complement with bad type raises" do
320
+ @entry.seq = 'ATCG'
321
+ @entry.type = :protein
322
+ assert_raise(BioDSL::SeqError) { @entry.complement }
323
+ end
324
+
325
+ test "#complement for DNA is correct" do
326
+ @entry.seq = 'ATCGatcg'
327
+ @entry.type = :dna
328
+ comp = @entry.complement
329
+ assert_equal("TAGCtagc", comp.seq)
330
+ assert_equal("ATCGatcg", @entry.seq)
331
+ end
332
+
333
+ test "#complement for RNA is correct" do
334
+ @entry.seq = 'AUCGaucg'
335
+ @entry.type = :rna
336
+ comp = @entry.complement
337
+ assert_equal("UAGCuagc", comp.seq)
338
+ assert_equal("AUCGaucg", @entry.seq)
339
+ end
340
+
341
+ test "#complement! with no sequence raises" do
342
+ @entry.type = :dna
343
+ assert_raise(BioDSL::SeqError) { @entry.complement! }
344
+ end
345
+
346
+ test "#complement! with bad type raises" do
347
+ @entry.seq = 'ATCG'
348
+ @entry.type = :protein
349
+ assert_raise(BioDSL::SeqError) { @entry.complement! }
350
+ end
351
+
352
+ test "#complement! for DNA is correct" do
353
+ @entry.seq = 'ATCGatcg'
354
+ @entry.type = :dna
355
+ assert_equal("TAGCtagc", @entry.complement!.seq)
356
+ end
357
+
358
+ test "#complement! for RNA is correct" do
359
+ @entry.seq = 'AUCGaucg'
360
+ @entry.type = :rna
361
+ assert_equal("UAGCuagc", @entry.complement!.seq)
362
+ end
363
+
364
+ test "#hamming_distance returns correctly" do
365
+ seq1 = BioDSL::Seq.new(seq: "ATCG")
366
+ seq2 = BioDSL::Seq.new(seq: "atgg")
367
+ assert_equal(1, seq1.hamming_distance(seq2))
368
+ end
369
+
370
+ test "#hamming_distance with ambiguity codes return correctly" do
371
+ seq1 = BioDSL::Seq.new(seq: "ATCG")
372
+ seq2 = BioDSL::Seq.new(seq: "atng")
373
+
374
+ assert_equal(1, seq1.hamming_distance(seq2))
375
+ assert_equal(0, seq1.hamming_distance(seq2, ambiguity: true))
376
+ end
377
+
378
+ test "#edit_distance returns correctly" do
379
+ seq1 = BioDSL::Seq.new(seq: "ATCG")
380
+ seq2 = BioDSL::Seq.new(seq: "tgncg")
381
+ assert_equal(2, seq1.edit_distance(seq2))
382
+ end
383
+
384
+ test "#generate with length < 1 raises" do
385
+ assert_raise(BioDSL::SeqError) { @entry.generate(-10, :dna) }
386
+ assert_raise(BioDSL::SeqError) { @entry.generate(0, :dna) }
387
+ end
388
+
389
+ test "#generate with bad type raises" do
390
+ assert_raise(BioDSL::SeqError) { @entry.generate(10, "foo") }
391
+ end
392
+
393
+ test "#generate with ok type dont raise" do
394
+ %w[dna rna protein].each do |type|
395
+ assert_nothing_raised { @entry.generate(10, type.to_sym) }
396
+ end
397
+ end
398
+
399
+ test "#shuffle returns correctly" do
400
+ orig = "actgactgactgatcgatcgatcgatcgtactg"
401
+ @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
402
+ entry_shuf = @entry.shuffle
403
+ assert_equal(orig, @entry.seq)
404
+ assert_not_equal(@entry.seq, entry_shuf.seq)
405
+ end
406
+
407
+ test "#shuffle! returns correctly" do
408
+ @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
409
+ assert_not_equal(@entry.seq, @entry.shuffle!.seq)
410
+ end
411
+
412
+ test "#+ without qual returns correctly" do
413
+ entry = BioDSL::Seq.new(seq_name: "test1", seq: "at") + BioDSL::Seq.new(seq_name: "test2", seq: "cg")
414
+ assert_nil(entry.seq_name)
415
+ assert_equal("atcg", entry.seq)
416
+ assert_nil(entry.type)
417
+ assert_nil(entry.qual)
418
+ end
419
+
420
+ test "#+ with qual returns correctly" do
421
+ entry = BioDSL::Seq.new(seq_name: "test1", seq: "at", type: :dna, qual: "II") + BioDSL::Seq.new(seq_name: "test2", seq: "cg", type: :dna, qual: "JJ")
422
+ assert_nil(entry.seq_name)
423
+ assert_equal("atcg", entry.seq)
424
+ assert_equal(:dna, entry.type)
425
+ assert_equal("IIJJ", entry.qual)
426
+ end
427
+
428
+ test "#<< with different types raises" do
429
+ @entry.seq = "atcg"
430
+ assert_raise(BioDSL::SeqError) { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
431
+ end
432
+
433
+ test "#<< with missing qual in one entry raises" do
434
+ @entry.seq = "atcg"
435
+ @entry.type = :dna
436
+ assert_raise(BioDSL::SeqError) { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
437
+ @entry.qual = "IIII"
438
+ assert_raise(BioDSL::SeqError) { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
439
+ end
440
+
441
+ test "#<< with nil qual in both entries dont raise" do
442
+ @entry.seq = "atcg"
443
+ assert_nothing_raised { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg") }
444
+ end
445
+
446
+ test "#<< with qual in both entries dont raise" do
447
+ @entry.seq = "atcg"
448
+ @entry.type = :dna
449
+ @entry.qual = "IIII"
450
+ assert_nothing_raised { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
451
+ end
452
+
453
+ test "#<< without qual returns correctly" do
454
+ @entry.seq = "atcg"
455
+ @entry << BioDSL::Seq.new(seq_name: "test", seq: "ATCG")
456
+ assert_equal("atcgATCG", @entry.seq)
457
+ end
458
+
459
+ test "#<< with qual returns correctly" do
460
+ @entry.seq = "atcg"
461
+ @entry.type = :dna
462
+ @entry.qual = "HHHH"
463
+ @entry << BioDSL::Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "IIII")
464
+ assert_equal("atcgATCG", @entry.seq)
465
+ assert_equal("HHHHIIII", @entry.qual)
466
+ end
467
+
468
+ test "#[] with qual returns correctly" do
469
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
470
+
471
+ e = entry[2]
472
+
473
+ assert_equal("test", e.seq_name)
474
+ assert_equal("c", e.seq)
475
+ assert_equal(:dna, e.type)
476
+ assert_equal("H", e.qual)
477
+ assert_equal("atcg", entry.seq)
478
+ assert_equal("FGHI", entry.qual)
479
+ end
480
+
481
+ test "#[] without qual returns correctly" do
482
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg")
483
+
484
+ e = entry[2]
485
+
486
+ assert_equal("test", e.seq_name)
487
+ assert_equal("c", e.seq)
488
+ assert_nil(e.qual)
489
+ assert_equal("atcg", entry.seq)
490
+ end
491
+
492
+ test "[]= with qual returns correctly" do
493
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
494
+
495
+ entry[0] = BioDSL::Seq.new(seq_name: "foo", seq: "T", type: :dna, qual: "I")
496
+
497
+ assert_equal("test", entry.seq_name)
498
+ assert_equal("Ttcg", entry.seq)
499
+ assert_equal(:dna, entry.type)
500
+ assert_equal("IGHI", entry.qual)
501
+ end
502
+
503
+ test "[]= without qual returns correctly" do
504
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg")
505
+
506
+ entry[0] = BioDSL::Seq.new(seq_name: "foo", seq: "T")
507
+
508
+ assert_equal("test", entry.seq_name)
509
+ assert_equal("Ttcg", entry.seq)
510
+ end
511
+
512
+ test "#indels_remove without qual returns correctly" do
513
+ @entry.seq = "A-T.CG~CG"
514
+ @entry.qual = nil
515
+ assert_equal("ATCGCG", @entry.indels_remove.seq)
516
+ end
517
+
518
+ test "#indels_remove with qual returns correctly" do
519
+ @entry.seq = "A-T.CG~CG"
520
+ @entry.qual = "a@b@cd@fg"
521
+ assert_equal("ATCGCG", @entry.indels_remove.seq)
522
+ assert_equal("abcdfg", @entry.indels_remove.qual)
523
+ end
524
+
525
+ test "#composition returns correctly" do
526
+ @entry.seq = "AAAATTTCCG"
527
+ assert_equal(4, @entry.composition["A"])
528
+ assert_equal(3, @entry.composition["T"])
529
+ assert_equal(2, @entry.composition["C"])
530
+ assert_equal(1, @entry.composition["G"])
531
+ assert_equal(0, @entry.composition["X"])
532
+ end
533
+
534
+ test "#hard_mask returns correctly" do
535
+ @entry.seq = "--AAAANn"
536
+ assert_equal(33.33, @entry.hard_mask)
537
+ end
538
+
539
+ test "#soft_mask returns correctly" do
540
+ @entry.seq = "--AAAa"
541
+ assert_equal(25.00, @entry.soft_mask)
542
+ end
543
+
544
+ test "#mask_seq_hard! with nil seq raises" do
545
+ @entry.seq = nil
546
+ @entry.qual = ""
547
+
548
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(20) }
549
+ end
550
+
551
+ test "#mask_seq_hard! with nil qual raises" do
552
+ @entry.seq = ""
553
+ @entry.qual = nil
554
+
555
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(20) }
556
+ end
557
+
558
+ test "#mask_seq_hard! with bad cutoff raises" do
559
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(-1) }
560
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(41) }
561
+ end
562
+
563
+ test "#mask_seq_hard! with OK cutoff dont raise" do
564
+ @entry.seq = "ATCG"
565
+ @entry.qual = "RSTU"
566
+
567
+ assert_nothing_raised { @entry.mask_seq_hard!(0) }
568
+ assert_nothing_raised { @entry.mask_seq_hard!(40) }
569
+ end
570
+
571
+ test "#mask_seq_hard! returns correctly" do
572
+ @entry.seq = "-ATCG"
573
+ @entry.qual = "33456"
574
+
575
+ assert_equal("-NNCG", @entry.mask_seq_hard!(20).seq)
576
+ end
577
+
578
+ test "#mask_seq_soft! with nil seq raises" do
579
+ @entry.seq = nil
580
+ @entry.qual = ""
581
+
582
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(20) }
583
+ end
584
+
585
+ test "#mask_seq_soft! with nil qual raises" do
586
+ @entry.seq = ""
587
+ @entry.qual = nil
588
+
589
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(20) }
590
+ end
591
+
592
+ test "#mask_seq_soft! with bad cutoff raises" do
593
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(-1) }
594
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(41) }
595
+ end
596
+
597
+ test "#mask_seq_soft! with OK cutoff dont raise" do
598
+ @entry.seq = "ATCG"
599
+ @entry.qual = "RSTU"
600
+
601
+ assert_nothing_raised { @entry.mask_seq_soft!(0) }
602
+ assert_nothing_raised { @entry.mask_seq_soft!(40) }
603
+ end
604
+
605
+ test "#mask_seq_soft! returns correctly" do
606
+ @entry.seq = "-ATCG"
607
+ @entry.qual = "33456"
608
+
609
+ assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
610
+ end
611
+
612
+ # qual score detection
613
+
614
+ test "#qual_base33? returns correctly" do
615
+ # self.qual.match(/[!-:]/)
616
+ @entry.qual = '!"#$%&\'()*+,-./0123456789:'
617
+ assert_equal(true, @entry.qual_base33? )
618
+ @entry.qual = 32.chr
619
+ assert_equal(false, @entry.qual_base33? )
620
+ @entry.qual = 59.chr
621
+ assert_equal(false, @entry.qual_base33? )
622
+ end
623
+
624
+ test "#qual_base64? returns correctly" do
625
+ # self.qual.match(/[K-h]/)
626
+ @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
627
+ assert_equal(true, @entry.qual_base64? )
628
+ @entry.qual = 74.chr
629
+ assert_equal(false, @entry.qual_base64? )
630
+ @entry.qual = 105.chr
631
+ assert_equal(false, @entry.qual_base64? )
632
+ end
633
+
634
+ test "#qual_valid? with nil qual raises" do
635
+ assert_raise(BioDSL::SeqError) { @entry.qual_valid?(:base_33) }
636
+ assert_raise(BioDSL::SeqError) { @entry.qual_valid?(:base_64) }
637
+ end
638
+
639
+ test "#qual_valid? with bad encoding raises" do
640
+ @entry.qual = "abc"
641
+ assert_raise(BioDSL::SeqError) { @entry.qual_valid?("foobar") }
642
+ end
643
+
644
+ test "#qual_valid? with OK range returns correctly" do
645
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 33).chr .. (BioDSL::Seq::SCORE_MAX + 33).chr).to_a.join
646
+ assert_equal(true, @entry.qual_valid?(:base_33))
647
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 64).chr .. (BioDSL::Seq::SCORE_MAX + 64).chr).to_a.join
648
+ assert_equal(true, @entry.qual_valid?(:base_64))
649
+ end
650
+
651
+ test "#qual_valid? with bad range returns correctly" do
652
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 33 - 1).chr .. (BioDSL::Seq::SCORE_MAX + 33).chr).to_a.join
653
+ assert_equal(false, @entry.qual_valid?(:base_33))
654
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 33).chr .. (BioDSL::Seq::SCORE_MAX + 33 + 1).chr).to_a.join
655
+ assert_equal(false, @entry.qual_valid?(:base_33))
656
+
657
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 64 - 1).chr .. (BioDSL::Seq::SCORE_MAX + 64).chr).to_a.join
658
+ assert_equal(false, @entry.qual_valid?(:base_64))
659
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 64).chr .. (BioDSL::Seq::SCORE_MAX + 64 + 1).chr).to_a.join
660
+ assert_equal(false, @entry.qual_valid?(:base_64))
661
+ end
662
+
663
+ # convert sanger to ...
664
+
665
+ test "#qual_convert! from base33 to base33 returns OK" do
666
+ @entry.qual = 'BCDEFGHI'
667
+ assert_equal('BCDEFGHI', @entry.qual_convert!(:base_33, :base_33).qual)
668
+ end
669
+
670
+ test "#qual_convert! from base33 to base64 returns OK" do
671
+ @entry.qual = 'BCDEFGHI'
672
+ assert_equal('abcdefgh', @entry.qual_convert!(:base_33, :base_64).qual)
673
+ end
674
+
675
+ test "#qual_convert! from base64 to base64 returns OK" do
676
+ @entry.qual = 'BCDEFGHI'
677
+ assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_64).qual)
678
+ end
679
+
680
+ test "#qual_convert! from base64 to base33 returns OK" do
681
+ @entry.qual = 'abcdefgh'
682
+ assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_33).qual)
683
+ end
684
+
685
+ test "#qual_coerce! with bad base raises" do
686
+ @entry.qual = ('!' .. '~').to_a.join
687
+ assert_raise(BioDSL::SeqError) { @entry.qual_coerce!(:foo) }
688
+ end
689
+
690
+ test "#qual_coerce! returns correctly" do
691
+ @entry.qual = ('!' .. '~').to_a.join
692
+ assert_equal(%q{!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII}, @entry.qual_coerce!(:base_33).qual)
693
+ @entry.qual = ('!' .. '~').to_a.join
694
+ assert_equal(%q{@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghhhhhhhhhhhhhhhhhhhhhhh}, @entry.qual_coerce!(:base_64).qual)
695
+ end
696
+
697
+ test "#scores_mean without qual raises" do
698
+ @entry.qual = nil
699
+ assert_raise(BioDSL::SeqError) { @entry.scores_mean }
700
+ end
701
+
702
+ test "#scores_mean returns correctly" do
703
+ @entry.qual = '!!II'
704
+ assert_equal(20.0, @entry.scores_mean)
705
+ end
706
+
707
+ test "#scores_min without qual raises" do
708
+ @entry.qual = nil
709
+ assert_raise(BioDSL::SeqError) { @entry.scores_min }
710
+ end
711
+
712
+ test "#scores_min returns correctly" do
713
+ @entry.qual = '!!II'
714
+ assert_equal(0, @entry.scores_min)
715
+ end
716
+
717
+ test "#scores_max without qual raises" do
718
+ @entry.qual = nil
719
+ assert_raise(BioDSL::SeqError) { @entry.scores_max }
720
+ end
721
+
722
+ test "#scores_max returns correctly" do
723
+ @entry.qual = '!!II'
724
+ assert_equal(40.0, @entry.scores_max)
725
+ end
726
+
727
+ test "#scores_mean_local without qual raises" do
728
+ @entry.qual = nil
729
+ assert_raise(BioDSL::SeqError) { @entry.scores_mean_local(2) }
730
+ end
731
+
732
+ test "#scores_mean_local returns correctly" do
733
+ @entry.qual = '!!II'
734
+ assert_equal(0.0, @entry.scores_mean_local(2))
735
+ end
736
+
737
+ test "#each_orf returns correctly" do
738
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
739
+ orfs = @entry.each_orf
740
+
741
+ assert_equal(2, orfs.size)
742
+ assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
743
+ assert_equal(2, orfs.first.start)
744
+ assert_equal(37, orfs.first.stop)
745
+ assert_equal("ATGcatcgatcagcatcgatcgatTAA", orfs.last.entry.seq)
746
+ assert_equal(11, orfs.last.start)
747
+ assert_equal(37, orfs.last.stop)
748
+ end
749
+
750
+ test "#each_orf in block context returns correctly" do
751
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
752
+ @entry.each_orf do |orf|
753
+ assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orf.entry.seq)
754
+ assert_equal(2, orf.start)
755
+ assert_equal(37, orf.stop)
756
+
757
+ break
758
+ end
759
+ end
760
+
761
+ test "#each_orf with size_min returns correctly" do
762
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
763
+ orfs = @entry.each_orf(size_min: 30)
764
+
765
+ assert_equal(1, orfs.size)
766
+ assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
767
+ assert_equal(2, orfs.first.start)
768
+ assert_equal(37, orfs.first.stop)
769
+ end
770
+
771
+ test "#each_orf with size_max returns correctly" do
772
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
773
+ orfs = @entry.each_orf(size_max: 30)
774
+
775
+ assert_equal(1, orfs.size)
776
+ assert_equal("ATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
777
+ assert_equal(11, orfs.first.start)
778
+ assert_equal(37, orfs.first.stop)
779
+ end
780
+
781
+ test "#each_orf with pick_longest returns correctly" do
782
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
783
+ orfs = @entry.each_orf(pick_longest: true)
784
+
785
+ assert_equal(1, orfs.size)
786
+ assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
787
+ assert_equal(2, orfs.first.start)
788
+ assert_equal(37, orfs.first.stop)
789
+ end
790
+ end