BioDSL 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (197) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/BioDSL.gemspec +64 -0
  4. data/LICENSE +339 -0
  5. data/README.md +205 -0
  6. data/Rakefile +94 -0
  7. data/examples/fastq_to_fasta.rb +8 -0
  8. data/lib/BioDSL/cary.rb +242 -0
  9. data/lib/BioDSL/command.rb +133 -0
  10. data/lib/BioDSL/commands/add_key.rb +110 -0
  11. data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
  12. data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
  13. data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
  14. data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
  15. data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
  16. data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
  17. data/lib/BioDSL/commands/classify_seq.rb +217 -0
  18. data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
  19. data/lib/BioDSL/commands/clip_primer.rb +318 -0
  20. data/lib/BioDSL/commands/cluster_otus.rb +181 -0
  21. data/lib/BioDSL/commands/collapse_otus.rb +170 -0
  22. data/lib/BioDSL/commands/collect_otus.rb +150 -0
  23. data/lib/BioDSL/commands/complement_seq.rb +117 -0
  24. data/lib/BioDSL/commands/count.rb +135 -0
  25. data/lib/BioDSL/commands/count_values.rb +149 -0
  26. data/lib/BioDSL/commands/degap_seq.rb +253 -0
  27. data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
  28. data/lib/BioDSL/commands/dump.rb +157 -0
  29. data/lib/BioDSL/commands/filter_rrna.rb +239 -0
  30. data/lib/BioDSL/commands/genecall.rb +237 -0
  31. data/lib/BioDSL/commands/grab.rb +535 -0
  32. data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
  33. data/lib/BioDSL/commands/mask_seq.rb +175 -0
  34. data/lib/BioDSL/commands/mean_scores.rb +168 -0
  35. data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
  36. data/lib/BioDSL/commands/merge_table.rb +225 -0
  37. data/lib/BioDSL/commands/merge_values.rb +113 -0
  38. data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
  39. data/lib/BioDSL/commands/plot_histogram.rb +306 -0
  40. data/lib/BioDSL/commands/plot_matches.rb +282 -0
  41. data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
  42. data/lib/BioDSL/commands/plot_scores.rb +285 -0
  43. data/lib/BioDSL/commands/random.rb +153 -0
  44. data/lib/BioDSL/commands/read_fasta.rb +222 -0
  45. data/lib/BioDSL/commands/read_fastq.rb +414 -0
  46. data/lib/BioDSL/commands/read_table.rb +329 -0
  47. data/lib/BioDSL/commands/reverse_seq.rb +113 -0
  48. data/lib/BioDSL/commands/slice_align.rb +400 -0
  49. data/lib/BioDSL/commands/slice_seq.rb +151 -0
  50. data/lib/BioDSL/commands/sort.rb +223 -0
  51. data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
  52. data/lib/BioDSL/commands/split_values.rb +165 -0
  53. data/lib/BioDSL/commands/trim_primer.rb +314 -0
  54. data/lib/BioDSL/commands/trim_seq.rb +192 -0
  55. data/lib/BioDSL/commands/uchime_ref.rb +170 -0
  56. data/lib/BioDSL/commands/uclust.rb +286 -0
  57. data/lib/BioDSL/commands/unique_values.rb +145 -0
  58. data/lib/BioDSL/commands/usearch_global.rb +171 -0
  59. data/lib/BioDSL/commands/usearch_local.rb +171 -0
  60. data/lib/BioDSL/commands/write_fasta.rb +207 -0
  61. data/lib/BioDSL/commands/write_fastq.rb +191 -0
  62. data/lib/BioDSL/commands/write_table.rb +419 -0
  63. data/lib/BioDSL/commands/write_tree.rb +167 -0
  64. data/lib/BioDSL/commands.rb +31 -0
  65. data/lib/BioDSL/config.rb +55 -0
  66. data/lib/BioDSL/csv.rb +307 -0
  67. data/lib/BioDSL/debug.rb +42 -0
  68. data/lib/BioDSL/fasta.rb +133 -0
  69. data/lib/BioDSL/fastq.rb +77 -0
  70. data/lib/BioDSL/filesys.rb +137 -0
  71. data/lib/BioDSL/fork.rb +145 -0
  72. data/lib/BioDSL/hamming.rb +128 -0
  73. data/lib/BioDSL/helpers/aux_helper.rb +44 -0
  74. data/lib/BioDSL/helpers/email_helper.rb +66 -0
  75. data/lib/BioDSL/helpers/history_helper.rb +40 -0
  76. data/lib/BioDSL/helpers/log_helper.rb +55 -0
  77. data/lib/BioDSL/helpers/options_helper.rb +405 -0
  78. data/lib/BioDSL/helpers/status_helper.rb +132 -0
  79. data/lib/BioDSL/helpers.rb +35 -0
  80. data/lib/BioDSL/html_report.rb +200 -0
  81. data/lib/BioDSL/math.rb +55 -0
  82. data/lib/BioDSL/mummer.rb +216 -0
  83. data/lib/BioDSL/pipeline.rb +354 -0
  84. data/lib/BioDSL/seq/ambiguity.rb +66 -0
  85. data/lib/BioDSL/seq/assemble.rb +240 -0
  86. data/lib/BioDSL/seq/backtrack.rb +252 -0
  87. data/lib/BioDSL/seq/digest.rb +99 -0
  88. data/lib/BioDSL/seq/dynamic.rb +263 -0
  89. data/lib/BioDSL/seq/homopolymer.rb +59 -0
  90. data/lib/BioDSL/seq/kmer.rb +293 -0
  91. data/lib/BioDSL/seq/levenshtein.rb +113 -0
  92. data/lib/BioDSL/seq/translate.rb +109 -0
  93. data/lib/BioDSL/seq/trim.rb +188 -0
  94. data/lib/BioDSL/seq.rb +742 -0
  95. data/lib/BioDSL/serializer.rb +98 -0
  96. data/lib/BioDSL/stream.rb +113 -0
  97. data/lib/BioDSL/taxonomy.rb +691 -0
  98. data/lib/BioDSL/test.rb +42 -0
  99. data/lib/BioDSL/tmp_dir.rb +68 -0
  100. data/lib/BioDSL/usearch.rb +301 -0
  101. data/lib/BioDSL/verbose.rb +42 -0
  102. data/lib/BioDSL/version.rb +31 -0
  103. data/lib/BioDSL.rb +81 -0
  104. data/test/BioDSL/commands/test_add_key.rb +105 -0
  105. data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
  106. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
  107. data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
  108. data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
  109. data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
  110. data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
  111. data/test/BioDSL/commands/test_classify_seq.rb +50 -0
  112. data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
  113. data/test/BioDSL/commands/test_clip_primer.rb +377 -0
  114. data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
  115. data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
  116. data/test/BioDSL/commands/test_collect_otus.rb +82 -0
  117. data/test/BioDSL/commands/test_complement_seq.rb +78 -0
  118. data/test/BioDSL/commands/test_count.rb +103 -0
  119. data/test/BioDSL/commands/test_count_values.rb +85 -0
  120. data/test/BioDSL/commands/test_degap_seq.rb +96 -0
  121. data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
  122. data/test/BioDSL/commands/test_dump.rb +109 -0
  123. data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
  124. data/test/BioDSL/commands/test_genecall.rb +50 -0
  125. data/test/BioDSL/commands/test_grab.rb +398 -0
  126. data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
  127. data/test/BioDSL/commands/test_mask_seq.rb +98 -0
  128. data/test/BioDSL/commands/test_mean_scores.rb +111 -0
  129. data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
  130. data/test/BioDSL/commands/test_merge_table.rb +131 -0
  131. data/test/BioDSL/commands/test_merge_values.rb +83 -0
  132. data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
  133. data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
  134. data/test/BioDSL/commands/test_plot_matches.rb +157 -0
  135. data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
  136. data/test/BioDSL/commands/test_plot_scores.rb +308 -0
  137. data/test/BioDSL/commands/test_random.rb +88 -0
  138. data/test/BioDSL/commands/test_read_fasta.rb +229 -0
  139. data/test/BioDSL/commands/test_read_fastq.rb +552 -0
  140. data/test/BioDSL/commands/test_read_table.rb +327 -0
  141. data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
  142. data/test/BioDSL/commands/test_slice_align.rb +218 -0
  143. data/test/BioDSL/commands/test_slice_seq.rb +131 -0
  144. data/test/BioDSL/commands/test_sort.rb +128 -0
  145. data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
  146. data/test/BioDSL/commands/test_split_values.rb +95 -0
  147. data/test/BioDSL/commands/test_trim_primer.rb +329 -0
  148. data/test/BioDSL/commands/test_trim_seq.rb +150 -0
  149. data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
  150. data/test/BioDSL/commands/test_uclust.rb +139 -0
  151. data/test/BioDSL/commands/test_unique_values.rb +98 -0
  152. data/test/BioDSL/commands/test_usearch_global.rb +123 -0
  153. data/test/BioDSL/commands/test_usearch_local.rb +125 -0
  154. data/test/BioDSL/commands/test_write_fasta.rb +159 -0
  155. data/test/BioDSL/commands/test_write_fastq.rb +166 -0
  156. data/test/BioDSL/commands/test_write_table.rb +411 -0
  157. data/test/BioDSL/commands/test_write_tree.rb +122 -0
  158. data/test/BioDSL/helpers/test_options_helper.rb +272 -0
  159. data/test/BioDSL/seq/test_assemble.rb +98 -0
  160. data/test/BioDSL/seq/test_backtrack.rb +176 -0
  161. data/test/BioDSL/seq/test_digest.rb +71 -0
  162. data/test/BioDSL/seq/test_dynamic.rb +133 -0
  163. data/test/BioDSL/seq/test_homopolymer.rb +58 -0
  164. data/test/BioDSL/seq/test_kmer.rb +134 -0
  165. data/test/BioDSL/seq/test_translate.rb +75 -0
  166. data/test/BioDSL/seq/test_trim.rb +101 -0
  167. data/test/BioDSL/test_cary.rb +176 -0
  168. data/test/BioDSL/test_command.rb +45 -0
  169. data/test/BioDSL/test_csv.rb +514 -0
  170. data/test/BioDSL/test_debug.rb +42 -0
  171. data/test/BioDSL/test_fasta.rb +154 -0
  172. data/test/BioDSL/test_fastq.rb +46 -0
  173. data/test/BioDSL/test_filesys.rb +145 -0
  174. data/test/BioDSL/test_fork.rb +85 -0
  175. data/test/BioDSL/test_math.rb +41 -0
  176. data/test/BioDSL/test_mummer.rb +79 -0
  177. data/test/BioDSL/test_pipeline.rb +187 -0
  178. data/test/BioDSL/test_seq.rb +790 -0
  179. data/test/BioDSL/test_serializer.rb +72 -0
  180. data/test/BioDSL/test_stream.rb +55 -0
  181. data/test/BioDSL/test_taxonomy.rb +336 -0
  182. data/test/BioDSL/test_test.rb +42 -0
  183. data/test/BioDSL/test_tmp_dir.rb +58 -0
  184. data/test/BioDSL/test_usearch.rb +33 -0
  185. data/test/BioDSL/test_verbose.rb +42 -0
  186. data/test/helper.rb +82 -0
  187. data/www/command.html.haml +14 -0
  188. data/www/css.html.haml +55 -0
  189. data/www/input_files.html.haml +3 -0
  190. data/www/layout.html.haml +12 -0
  191. data/www/output_files.html.haml +3 -0
  192. data/www/overview.html.haml +15 -0
  193. data/www/pipeline.html.haml +4 -0
  194. data/www/png.html.haml +2 -0
  195. data/www/status.html.haml +9 -0
  196. data/www/time.html.haml +11 -0
  197. metadata +503 -0
@@ -0,0 +1,790 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', '..')
3
+
4
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
5
+ # #
6
+ # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
7
+ # #
8
+ # This program is free software; you can redistribute it and/or #
9
+ # modify it under the terms of the GNU General Public License #
10
+ # as published by the Free Software Foundation; either version 2 #
11
+ # of the License, or (at your option) any later version. #
12
+ # #
13
+ # This program is distributed in the hope that it will be useful, #
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
16
+ # GNU General Public License for more details. #
17
+ # #
18
+ # You should have received a copy of the GNU General Public License #
19
+ # along with this program; if not, write to the Free Software #
20
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
21
+ # #
22
+ # http://www.gnu.org/copyleft/gpl.html #
23
+ # #
24
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
25
+ # #
26
+ # This software is part of BioDSL (www.BioDSL.org). #
27
+ # #
28
+ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
29
+
30
+ require 'test/helper'
31
+
32
+ class TestSeq < Test::Unit::TestCase
33
+ def setup
34
+ @entry = BioDSL::Seq.new
35
+ end
36
+
37
+ test "BioDSL::Seq.new with differnet length SEQ and SCORES raises" do
38
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "hhh") }
39
+ end
40
+
41
+ test "BioDSL::Seq.new_bp returns correctly" do
42
+ record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => :dna, :SCORES => "hhhh"}
43
+ seq = BioDSL::Seq.new_bp(record)
44
+ assert_equal("test", seq.seq_name)
45
+ assert_equal("ATCG", seq.seq)
46
+ assert_equal(:dna, seq.type)
47
+ assert_equal("hhhh", seq.qual)
48
+ end
49
+
50
+ test "BioDSL::Seq.generate_oligos with bad type raises" do
51
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.generate_oligos(2, :foo) }
52
+ end
53
+
54
+ test "BioDSL::Seq.generate_oligos with bad length raises" do
55
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.generate_oligos(0, :dna) }
56
+ end
57
+
58
+ test "BioDSL::Seq.generate_oligos returns correctly" do
59
+ expected = %w{aa at ac ag ta tt tc tg ca ct cc cg ga gt gc gg}
60
+ assert_equal(expected, BioDSL::Seq.generate_oligos(2, :dna))
61
+ expected = %w{aa au ac ag ua uu uc ug ca cu cc cg ga gu gc gg}
62
+ assert_equal(expected, BioDSL::Seq.generate_oligos(2, :rna))
63
+ expected = %w{
64
+ ff fl fs fy fc fw fp fh fq fr fi fm ft fn fk fv fa fd fe fg
65
+ lf ll ls ly lc lw lp lh lq lr li lm lt ln lk lv la ld le lg
66
+ sf sl ss sy sc sw sp sh sq sr si sm st sn sk sv sa sd se sg
67
+ yf yl ys yy yc yw yp yh yq yr yi ym yt yn yk yv ya yd ye yg
68
+ cf cl cs cy cc cw cp ch cq cr ci cm ct cn ck cv ca cd ce cg
69
+ wf wl ws wy wc ww wp wh wq wr wi wm wt wn wk wv wa wd we wg
70
+ pf pl ps py pc pw pp ph pq pr pi pm pt pn pk pv pa pd pe pg
71
+ hf hl hs hy hc hw hp hh hq hr hi hm ht hn hk hv ha hd he hg
72
+ qf ql qs qy qc qw qp qh qq qr qi qm qt qn qk qv qa qd qe qg
73
+ rf rl rs ry rc rw rp rh rq rr ri rm rt rn rk rv ra rd re rg
74
+ if il is iy ic iw ip ih iq ir ii im it in ik iv ia id ie ig
75
+ mf ml ms my mc mw mp mh mq mr mi mm mt mn mk mv ma md me mg
76
+ tf tl ts ty tc tw tp th tq tr ti tm tt tn tk tv ta td te tg
77
+ nf nl ns ny nc nw np nh nq nr ni nm nt nn nk nv na nd ne ng
78
+ kf kl ks ky kc kw kp kh kq kr ki km kt kn kk kv ka kd ke kg
79
+ vf vl vs vy vc vw vp vh vq vr vi vm vt vn vk vv va vd ve vg
80
+ af al as ay ac aw ap ah aq ar ai am at an ak av aa ad ae ag
81
+ df dl ds dy dc dw dp dh dq dr di dm dt dn dk dv da dd de dg
82
+ ef el es ey ec ew ep eh eq er ei em et en ek ev ea ed ee eg
83
+ gf gl gs gy gc gw gp gh gq gr gi gm gt gn gk gv ga gd ge gg
84
+ }
85
+ assert_equal(expected, BioDSL::Seq.generate_oligos(2, :protein))
86
+ end
87
+
88
+ test "BioDSL::Seq.check_name_pair with badly formatted names raises" do
89
+ entry1 = BioDSL::Seq.new(seq_name: "foo")
90
+ entry2 = BioDSL::Seq.new(seq_name: "bar")
91
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry1, entry2) }
92
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry2, entry1) }
93
+ end
94
+
95
+ test "BioDSL::Seq.check_name_pair with Illumina1.3/1.5 names and no match raises" do
96
+ entry1 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:941:1973#0/1")
97
+ entry2 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:491:1793#0/2")
98
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry1, entry2) }
99
+ end
100
+
101
+ test "BioDSL::Seq.check_name_pair with Illumina1.8 names and no match raises" do
102
+ entry1 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG")
103
+ entry2 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15433:179393 2:Y:18:ATCACG")
104
+ assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry1, entry2) }
105
+ end
106
+
107
+ test "BioDSL::Seq.check_name_pair with Illumina1.3/1.5 names and match don't raise" do
108
+ entry1 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:941:1973#0/1")
109
+ entry2 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:941:1973#0/2")
110
+ assert_nothing_raised { BioDSL::Seq.check_name_pair(entry1, entry2) }
111
+ end
112
+
113
+ test "BioDSL::Seq.check_name_pair with Illumina1.8 names and match don't raise" do
114
+ entry1 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG")
115
+ entry2 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG")
116
+ assert_nothing_raised { BioDSL::Seq.check_name_pair(entry1, entry2) }
117
+ end
118
+
119
+ test "#is_dna? with no sequence type returns false" do
120
+ assert(@entry.is_dna? == false)
121
+ end
122
+
123
+ test "#is_dna? with dna sequence type returns true" do
124
+ @entry.type = :dna
125
+ assert(@entry.is_dna? == true)
126
+ end
127
+
128
+ test "#is_rna? with no sequence type returns false" do
129
+ assert(@entry.is_rna? == false)
130
+ end
131
+
132
+ test "#is_rna? with rna sequence type returns true" do
133
+ @entry.type = :rna
134
+ assert(@entry.is_rna? == true)
135
+ end
136
+
137
+ test "#is_protein? with no sequence type returns false" do
138
+ assert(@entry.is_protein? == false)
139
+ end
140
+
141
+ test "#is_protein? with protein sequence type returns true" do
142
+ @entry.type = :protein
143
+ assert_equal(true, @entry.is_protein?)
144
+ end
145
+
146
+ test "#type_guess without sequence raises" do
147
+ assert_raise(BioDSL::SeqError) { @entry.type_guess }
148
+ end
149
+
150
+ test "#type_guess with protein returns protein" do
151
+ @entry.seq = 'atcatcrFgatcg'
152
+ assert_equal(:protein, @entry.type_guess)
153
+ end
154
+
155
+ test "#type_guess with rna returns rna" do
156
+ @entry.seq = 'atcatcrUgatcg'
157
+ assert_equal(:rna, @entry.type_guess)
158
+ end
159
+
160
+ test "#type_guess with dna returns dna" do
161
+ @entry.seq = 'atcatcgatcg'
162
+ assert_equal(:dna, @entry.type_guess)
163
+ end
164
+
165
+ test "#type_guess! without sequence raises" do
166
+ assert_raise(BioDSL::SeqError) { @entry.type_guess! }
167
+ end
168
+
169
+ test "#type_guess! with protein returns protein" do
170
+ @entry.seq = 'atcatcrFgatcg'
171
+ @entry.type_guess!
172
+ assert_equal(:protein, @entry.type)
173
+ end
174
+
175
+ test "#type_guess! with rna returns rna" do
176
+ @entry.seq = 'atcatcrUgatcg'
177
+ @entry.type_guess!
178
+ assert_equal(:rna, @entry.type)
179
+ end
180
+
181
+ test "#type_guess! with dna returns dna" do
182
+ @entry.seq = 'atcatcgatcg'
183
+ @entry.type_guess!
184
+ assert_equal(:dna, @entry.type)
185
+ end
186
+
187
+ test "#length returns corretly" do
188
+ @entry.seq = 'ATCG'
189
+ assert_equal(4, @entry.length)
190
+ end
191
+
192
+ test "#indels returns correctly" do
193
+ @entry.seq = 'ATCG.-~_'
194
+ assert_equal(4, @entry.indels)
195
+ end
196
+
197
+ test "#to_rna with no sequence raises" do
198
+ @entry.type = :dna
199
+ assert_raise(BioDSL::SeqError) { @entry.to_rna }
200
+ end
201
+
202
+ test "#to_rna with bad type raises" do
203
+ @entry.seq = 'ATCG'
204
+ @entry.type = :rna
205
+ assert_raise(BioDSL::SeqError) { @entry.to_rna }
206
+ end
207
+
208
+ test "#to_rna transcribes correctly" do
209
+ @entry.seq = 'ATCGatcg'
210
+ @entry.type = :dna
211
+ assert_equal("AUCGaucg", @entry.to_rna)
212
+ end
213
+
214
+ test "#to_rna changes entry type to rna" do
215
+ @entry.seq = 'ATCGatcg'
216
+ @entry.type = :dna
217
+ @entry.to_rna
218
+ assert_equal(:rna, @entry.type)
219
+ end
220
+
221
+ test "#to_dna with no sequence raises" do
222
+ @entry.type = :rna
223
+ assert_raise(BioDSL::SeqError) { @entry.to_dna }
224
+ end
225
+
226
+ test "#to_dna with bad type raises" do
227
+ @entry.seq = 'AUCG'
228
+ @entry.type = :dna
229
+ assert_raise(BioDSL::SeqError) { @entry.to_dna }
230
+ end
231
+
232
+ test "#to_dna transcribes correctly" do
233
+ @entry.seq = 'AUCGaucg'
234
+ @entry.type = :rna
235
+ assert_equal("ATCGatcg", @entry.to_dna)
236
+ end
237
+
238
+ test "#to_dna changes entry type to dna" do
239
+ @entry.seq = 'AUCGaucg'
240
+ @entry.type = :rna
241
+ @entry.to_dna
242
+ assert_equal(:dna, @entry.type)
243
+ end
244
+
245
+ test "#to_bp returns correct record" do
246
+ @entry.seq_name = 'test'
247
+ @entry.seq = 'ATCG'
248
+ assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
249
+ end
250
+
251
+ test "#to_fasta with missing seq_name raises" do
252
+ @entry.seq = 'ATCG'
253
+ assert_raise(BioDSL::SeqError) { @entry.to_fasta }
254
+ end
255
+
256
+ test "#to_fasta with empty seq_name raises" do
257
+ @entry.seq_name = ''
258
+ @entry.seq = 'ATCG'
259
+ assert_raise(BioDSL::SeqError) { @entry.to_fasta }
260
+ end
261
+
262
+ test "#to_fasta with missing seq raises" do
263
+ @entry.seq_name = 'test'
264
+ assert_raise(BioDSL::SeqError) { @entry.to_fasta }
265
+ end
266
+
267
+ test "#to_fasta with empty seq raises" do
268
+ @entry.seq_name = 'test'
269
+ @entry.seq = ''
270
+ assert_raise(BioDSL::SeqError) { @entry.to_fasta }
271
+ end
272
+
273
+ test "#to_fasta returns correct entry" do
274
+ @entry.seq_name = 'test'
275
+ @entry.seq = 'ATCG'
276
+ assert_equal(">test\nATCG\n", @entry.to_fasta)
277
+ end
278
+
279
+ test "#to_fasta wraps correctly" do
280
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "ATCG")
281
+ assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
282
+ end
283
+
284
+ test "#to_fastq returns correct entry" do
285
+ @entry.seq_name = 'test'
286
+ @entry.seq = 'ATCG'
287
+ @entry.qual = 'hhhh'
288
+ assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
289
+ end
290
+
291
+ test "#to_key with bad residue raises" do
292
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "AUCG")
293
+ assert_raise(BioDSL::SeqError) { entry.to_key }
294
+ end
295
+
296
+ test "#to_key returns correctly" do
297
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "ATCG")
298
+ assert_equal(54, entry.to_key)
299
+ end
300
+
301
+ test "#reverse returns correctly" do
302
+ @entry.seq = "ATCG"
303
+ new_entry = @entry.reverse
304
+ assert_equal("GCTA", new_entry.seq)
305
+ assert_equal("ATCG", @entry.seq)
306
+ end
307
+
308
+ test "#reverse! returns correctly" do
309
+ @entry.seq = "ATCG"
310
+ @entry.reverse!
311
+ assert_equal("GCTA", @entry.seq)
312
+ end
313
+
314
+ test "#complement with no sequence raises" do
315
+ @entry.type = :dna
316
+ assert_raise(BioDSL::SeqError) { @entry.complement }
317
+ end
318
+
319
+ test "#complement with bad type raises" do
320
+ @entry.seq = 'ATCG'
321
+ @entry.type = :protein
322
+ assert_raise(BioDSL::SeqError) { @entry.complement }
323
+ end
324
+
325
+ test "#complement for DNA is correct" do
326
+ @entry.seq = 'ATCGatcg'
327
+ @entry.type = :dna
328
+ comp = @entry.complement
329
+ assert_equal("TAGCtagc", comp.seq)
330
+ assert_equal("ATCGatcg", @entry.seq)
331
+ end
332
+
333
+ test "#complement for RNA is correct" do
334
+ @entry.seq = 'AUCGaucg'
335
+ @entry.type = :rna
336
+ comp = @entry.complement
337
+ assert_equal("UAGCuagc", comp.seq)
338
+ assert_equal("AUCGaucg", @entry.seq)
339
+ end
340
+
341
+ test "#complement! with no sequence raises" do
342
+ @entry.type = :dna
343
+ assert_raise(BioDSL::SeqError) { @entry.complement! }
344
+ end
345
+
346
+ test "#complement! with bad type raises" do
347
+ @entry.seq = 'ATCG'
348
+ @entry.type = :protein
349
+ assert_raise(BioDSL::SeqError) { @entry.complement! }
350
+ end
351
+
352
+ test "#complement! for DNA is correct" do
353
+ @entry.seq = 'ATCGatcg'
354
+ @entry.type = :dna
355
+ assert_equal("TAGCtagc", @entry.complement!.seq)
356
+ end
357
+
358
+ test "#complement! for RNA is correct" do
359
+ @entry.seq = 'AUCGaucg'
360
+ @entry.type = :rna
361
+ assert_equal("UAGCuagc", @entry.complement!.seq)
362
+ end
363
+
364
+ test "#hamming_distance returns correctly" do
365
+ seq1 = BioDSL::Seq.new(seq: "ATCG")
366
+ seq2 = BioDSL::Seq.new(seq: "atgg")
367
+ assert_equal(1, seq1.hamming_distance(seq2))
368
+ end
369
+
370
+ test "#hamming_distance with ambiguity codes return correctly" do
371
+ seq1 = BioDSL::Seq.new(seq: "ATCG")
372
+ seq2 = BioDSL::Seq.new(seq: "atng")
373
+
374
+ assert_equal(1, seq1.hamming_distance(seq2))
375
+ assert_equal(0, seq1.hamming_distance(seq2, ambiguity: true))
376
+ end
377
+
378
+ test "#edit_distance returns correctly" do
379
+ seq1 = BioDSL::Seq.new(seq: "ATCG")
380
+ seq2 = BioDSL::Seq.new(seq: "tgncg")
381
+ assert_equal(2, seq1.edit_distance(seq2))
382
+ end
383
+
384
+ test "#generate with length < 1 raises" do
385
+ assert_raise(BioDSL::SeqError) { @entry.generate(-10, :dna) }
386
+ assert_raise(BioDSL::SeqError) { @entry.generate(0, :dna) }
387
+ end
388
+
389
+ test "#generate with bad type raises" do
390
+ assert_raise(BioDSL::SeqError) { @entry.generate(10, "foo") }
391
+ end
392
+
393
+ test "#generate with ok type dont raise" do
394
+ %w[dna rna protein].each do |type|
395
+ assert_nothing_raised { @entry.generate(10, type.to_sym) }
396
+ end
397
+ end
398
+
399
+ test "#shuffle returns correctly" do
400
+ orig = "actgactgactgatcgatcgatcgatcgtactg"
401
+ @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
402
+ entry_shuf = @entry.shuffle
403
+ assert_equal(orig, @entry.seq)
404
+ assert_not_equal(@entry.seq, entry_shuf.seq)
405
+ end
406
+
407
+ test "#shuffle! returns correctly" do
408
+ @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
409
+ assert_not_equal(@entry.seq, @entry.shuffle!.seq)
410
+ end
411
+
412
+ test "#+ without qual returns correctly" do
413
+ entry = BioDSL::Seq.new(seq_name: "test1", seq: "at") + BioDSL::Seq.new(seq_name: "test2", seq: "cg")
414
+ assert_nil(entry.seq_name)
415
+ assert_equal("atcg", entry.seq)
416
+ assert_nil(entry.type)
417
+ assert_nil(entry.qual)
418
+ end
419
+
420
+ test "#+ with qual returns correctly" do
421
+ entry = BioDSL::Seq.new(seq_name: "test1", seq: "at", type: :dna, qual: "II") + BioDSL::Seq.new(seq_name: "test2", seq: "cg", type: :dna, qual: "JJ")
422
+ assert_nil(entry.seq_name)
423
+ assert_equal("atcg", entry.seq)
424
+ assert_equal(:dna, entry.type)
425
+ assert_equal("IIJJ", entry.qual)
426
+ end
427
+
428
+ test "#<< with different types raises" do
429
+ @entry.seq = "atcg"
430
+ assert_raise(BioDSL::SeqError) { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
431
+ end
432
+
433
+ test "#<< with missing qual in one entry raises" do
434
+ @entry.seq = "atcg"
435
+ @entry.type = :dna
436
+ assert_raise(BioDSL::SeqError) { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
437
+ @entry.qual = "IIII"
438
+ assert_raise(BioDSL::SeqError) { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
439
+ end
440
+
441
+ test "#<< with nil qual in both entries dont raise" do
442
+ @entry.seq = "atcg"
443
+ assert_nothing_raised { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg") }
444
+ end
445
+
446
+ test "#<< with qual in both entries dont raise" do
447
+ @entry.seq = "atcg"
448
+ @entry.type = :dna
449
+ @entry.qual = "IIII"
450
+ assert_nothing_raised { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
451
+ end
452
+
453
+ test "#<< without qual returns correctly" do
454
+ @entry.seq = "atcg"
455
+ @entry << BioDSL::Seq.new(seq_name: "test", seq: "ATCG")
456
+ assert_equal("atcgATCG", @entry.seq)
457
+ end
458
+
459
+ test "#<< with qual returns correctly" do
460
+ @entry.seq = "atcg"
461
+ @entry.type = :dna
462
+ @entry.qual = "HHHH"
463
+ @entry << BioDSL::Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "IIII")
464
+ assert_equal("atcgATCG", @entry.seq)
465
+ assert_equal("HHHHIIII", @entry.qual)
466
+ end
467
+
468
+ test "#[] with qual returns correctly" do
469
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
470
+
471
+ e = entry[2]
472
+
473
+ assert_equal("test", e.seq_name)
474
+ assert_equal("c", e.seq)
475
+ assert_equal(:dna, e.type)
476
+ assert_equal("H", e.qual)
477
+ assert_equal("atcg", entry.seq)
478
+ assert_equal("FGHI", entry.qual)
479
+ end
480
+
481
+ test "#[] without qual returns correctly" do
482
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg")
483
+
484
+ e = entry[2]
485
+
486
+ assert_equal("test", e.seq_name)
487
+ assert_equal("c", e.seq)
488
+ assert_nil(e.qual)
489
+ assert_equal("atcg", entry.seq)
490
+ end
491
+
492
+ test "[]= with qual returns correctly" do
493
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
494
+
495
+ entry[0] = BioDSL::Seq.new(seq_name: "foo", seq: "T", type: :dna, qual: "I")
496
+
497
+ assert_equal("test", entry.seq_name)
498
+ assert_equal("Ttcg", entry.seq)
499
+ assert_equal(:dna, entry.type)
500
+ assert_equal("IGHI", entry.qual)
501
+ end
502
+
503
+ test "[]= without qual returns correctly" do
504
+ entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg")
505
+
506
+ entry[0] = BioDSL::Seq.new(seq_name: "foo", seq: "T")
507
+
508
+ assert_equal("test", entry.seq_name)
509
+ assert_equal("Ttcg", entry.seq)
510
+ end
511
+
512
+ test "#indels_remove without qual returns correctly" do
513
+ @entry.seq = "A-T.CG~CG"
514
+ @entry.qual = nil
515
+ assert_equal("ATCGCG", @entry.indels_remove.seq)
516
+ end
517
+
518
+ test "#indels_remove with qual returns correctly" do
519
+ @entry.seq = "A-T.CG~CG"
520
+ @entry.qual = "a@b@cd@fg"
521
+ assert_equal("ATCGCG", @entry.indels_remove.seq)
522
+ assert_equal("abcdfg", @entry.indels_remove.qual)
523
+ end
524
+
525
+ test "#composition returns correctly" do
526
+ @entry.seq = "AAAATTTCCG"
527
+ assert_equal(4, @entry.composition["A"])
528
+ assert_equal(3, @entry.composition["T"])
529
+ assert_equal(2, @entry.composition["C"])
530
+ assert_equal(1, @entry.composition["G"])
531
+ assert_equal(0, @entry.composition["X"])
532
+ end
533
+
534
+ test "#hard_mask returns correctly" do
535
+ @entry.seq = "--AAAANn"
536
+ assert_equal(33.33, @entry.hard_mask)
537
+ end
538
+
539
+ test "#soft_mask returns correctly" do
540
+ @entry.seq = "--AAAa"
541
+ assert_equal(25.00, @entry.soft_mask)
542
+ end
543
+
544
+ test "#mask_seq_hard! with nil seq raises" do
545
+ @entry.seq = nil
546
+ @entry.qual = ""
547
+
548
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(20) }
549
+ end
550
+
551
+ test "#mask_seq_hard! with nil qual raises" do
552
+ @entry.seq = ""
553
+ @entry.qual = nil
554
+
555
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(20) }
556
+ end
557
+
558
+ test "#mask_seq_hard! with bad cutoff raises" do
559
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(-1) }
560
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(41) }
561
+ end
562
+
563
+ test "#mask_seq_hard! with OK cutoff dont raise" do
564
+ @entry.seq = "ATCG"
565
+ @entry.qual = "RSTU"
566
+
567
+ assert_nothing_raised { @entry.mask_seq_hard!(0) }
568
+ assert_nothing_raised { @entry.mask_seq_hard!(40) }
569
+ end
570
+
571
+ test "#mask_seq_hard! returns correctly" do
572
+ @entry.seq = "-ATCG"
573
+ @entry.qual = "33456"
574
+
575
+ assert_equal("-NNCG", @entry.mask_seq_hard!(20).seq)
576
+ end
577
+
578
+ test "#mask_seq_soft! with nil seq raises" do
579
+ @entry.seq = nil
580
+ @entry.qual = ""
581
+
582
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(20) }
583
+ end
584
+
585
+ test "#mask_seq_soft! with nil qual raises" do
586
+ @entry.seq = ""
587
+ @entry.qual = nil
588
+
589
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(20) }
590
+ end
591
+
592
+ test "#mask_seq_soft! with bad cutoff raises" do
593
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(-1) }
594
+ assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(41) }
595
+ end
596
+
597
+ test "#mask_seq_soft! with OK cutoff dont raise" do
598
+ @entry.seq = "ATCG"
599
+ @entry.qual = "RSTU"
600
+
601
+ assert_nothing_raised { @entry.mask_seq_soft!(0) }
602
+ assert_nothing_raised { @entry.mask_seq_soft!(40) }
603
+ end
604
+
605
+ test "#mask_seq_soft! returns correctly" do
606
+ @entry.seq = "-ATCG"
607
+ @entry.qual = "33456"
608
+
609
+ assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
610
+ end
611
+
612
+ # qual score detection
613
+
614
+ test "#qual_base33? returns correctly" do
615
+ # self.qual.match(/[!-:]/)
616
+ @entry.qual = '!"#$%&\'()*+,-./0123456789:'
617
+ assert_equal(true, @entry.qual_base33? )
618
+ @entry.qual = 32.chr
619
+ assert_equal(false, @entry.qual_base33? )
620
+ @entry.qual = 59.chr
621
+ assert_equal(false, @entry.qual_base33? )
622
+ end
623
+
624
+ test "#qual_base64? returns correctly" do
625
+ # self.qual.match(/[K-h]/)
626
+ @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
627
+ assert_equal(true, @entry.qual_base64? )
628
+ @entry.qual = 74.chr
629
+ assert_equal(false, @entry.qual_base64? )
630
+ @entry.qual = 105.chr
631
+ assert_equal(false, @entry.qual_base64? )
632
+ end
633
+
634
+ test "#qual_valid? with nil qual raises" do
635
+ assert_raise(BioDSL::SeqError) { @entry.qual_valid?(:base_33) }
636
+ assert_raise(BioDSL::SeqError) { @entry.qual_valid?(:base_64) }
637
+ end
638
+
639
+ test "#qual_valid? with bad encoding raises" do
640
+ @entry.qual = "abc"
641
+ assert_raise(BioDSL::SeqError) { @entry.qual_valid?("foobar") }
642
+ end
643
+
644
+ test "#qual_valid? with OK range returns correctly" do
645
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 33).chr .. (BioDSL::Seq::SCORE_MAX + 33).chr).to_a.join
646
+ assert_equal(true, @entry.qual_valid?(:base_33))
647
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 64).chr .. (BioDSL::Seq::SCORE_MAX + 64).chr).to_a.join
648
+ assert_equal(true, @entry.qual_valid?(:base_64))
649
+ end
650
+
651
+ test "#qual_valid? with bad range returns correctly" do
652
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 33 - 1).chr .. (BioDSL::Seq::SCORE_MAX + 33).chr).to_a.join
653
+ assert_equal(false, @entry.qual_valid?(:base_33))
654
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 33).chr .. (BioDSL::Seq::SCORE_MAX + 33 + 1).chr).to_a.join
655
+ assert_equal(false, @entry.qual_valid?(:base_33))
656
+
657
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 64 - 1).chr .. (BioDSL::Seq::SCORE_MAX + 64).chr).to_a.join
658
+ assert_equal(false, @entry.qual_valid?(:base_64))
659
+ @entry.qual = ((BioDSL::Seq::SCORE_MIN + 64).chr .. (BioDSL::Seq::SCORE_MAX + 64 + 1).chr).to_a.join
660
+ assert_equal(false, @entry.qual_valid?(:base_64))
661
+ end
662
+
663
+ # convert sanger to ...
664
+
665
+ test "#qual_convert! from base33 to base33 returns OK" do
666
+ @entry.qual = 'BCDEFGHI'
667
+ assert_equal('BCDEFGHI', @entry.qual_convert!(:base_33, :base_33).qual)
668
+ end
669
+
670
+ test "#qual_convert! from base33 to base64 returns OK" do
671
+ @entry.qual = 'BCDEFGHI'
672
+ assert_equal('abcdefgh', @entry.qual_convert!(:base_33, :base_64).qual)
673
+ end
674
+
675
+ test "#qual_convert! from base64 to base64 returns OK" do
676
+ @entry.qual = 'BCDEFGHI'
677
+ assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_64).qual)
678
+ end
679
+
680
+ test "#qual_convert! from base64 to base33 returns OK" do
681
+ @entry.qual = 'abcdefgh'
682
+ assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_33).qual)
683
+ end
684
+
685
+ test "#qual_coerce! with bad base raises" do
686
+ @entry.qual = ('!' .. '~').to_a.join
687
+ assert_raise(BioDSL::SeqError) { @entry.qual_coerce!(:foo) }
688
+ end
689
+
690
+ test "#qual_coerce! returns correctly" do
691
+ @entry.qual = ('!' .. '~').to_a.join
692
+ assert_equal(%q{!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII}, @entry.qual_coerce!(:base_33).qual)
693
+ @entry.qual = ('!' .. '~').to_a.join
694
+ assert_equal(%q{@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghhhhhhhhhhhhhhhhhhhhhhh}, @entry.qual_coerce!(:base_64).qual)
695
+ end
696
+
697
+ test "#scores_mean without qual raises" do
698
+ @entry.qual = nil
699
+ assert_raise(BioDSL::SeqError) { @entry.scores_mean }
700
+ end
701
+
702
+ test "#scores_mean returns correctly" do
703
+ @entry.qual = '!!II'
704
+ assert_equal(20.0, @entry.scores_mean)
705
+ end
706
+
707
+ test "#scores_min without qual raises" do
708
+ @entry.qual = nil
709
+ assert_raise(BioDSL::SeqError) { @entry.scores_min }
710
+ end
711
+
712
+ test "#scores_min returns correctly" do
713
+ @entry.qual = '!!II'
714
+ assert_equal(0, @entry.scores_min)
715
+ end
716
+
717
+ test "#scores_max without qual raises" do
718
+ @entry.qual = nil
719
+ assert_raise(BioDSL::SeqError) { @entry.scores_max }
720
+ end
721
+
722
+ test "#scores_max returns correctly" do
723
+ @entry.qual = '!!II'
724
+ assert_equal(40.0, @entry.scores_max)
725
+ end
726
+
727
+ test "#scores_mean_local without qual raises" do
728
+ @entry.qual = nil
729
+ assert_raise(BioDSL::SeqError) { @entry.scores_mean_local(2) }
730
+ end
731
+
732
+ test "#scores_mean_local returns correctly" do
733
+ @entry.qual = '!!II'
734
+ assert_equal(0.0, @entry.scores_mean_local(2))
735
+ end
736
+
737
+ test "#each_orf returns correctly" do
738
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
739
+ orfs = @entry.each_orf
740
+
741
+ assert_equal(2, orfs.size)
742
+ assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
743
+ assert_equal(2, orfs.first.start)
744
+ assert_equal(37, orfs.first.stop)
745
+ assert_equal("ATGcatcgatcagcatcgatcgatTAA", orfs.last.entry.seq)
746
+ assert_equal(11, orfs.last.start)
747
+ assert_equal(37, orfs.last.stop)
748
+ end
749
+
750
+ test "#each_orf in block context returns correctly" do
751
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
752
+ @entry.each_orf do |orf|
753
+ assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orf.entry.seq)
754
+ assert_equal(2, orf.start)
755
+ assert_equal(37, orf.stop)
756
+
757
+ break
758
+ end
759
+ end
760
+
761
+ test "#each_orf with size_min returns correctly" do
762
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
763
+ orfs = @entry.each_orf(size_min: 30)
764
+
765
+ assert_equal(1, orfs.size)
766
+ assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
767
+ assert_equal(2, orfs.first.start)
768
+ assert_equal(37, orfs.first.stop)
769
+ end
770
+
771
+ test "#each_orf with size_max returns correctly" do
772
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
773
+ orfs = @entry.each_orf(size_max: 30)
774
+
775
+ assert_equal(1, orfs.size)
776
+ assert_equal("ATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
777
+ assert_equal(11, orfs.first.start)
778
+ assert_equal(37, orfs.first.stop)
779
+ end
780
+
781
+ test "#each_orf with pick_longest returns correctly" do
782
+ @entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
783
+ orfs = @entry.each_orf(pick_longest: true)
784
+
785
+ assert_equal(1, orfs.size)
786
+ assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
787
+ assert_equal(2, orfs.first.start)
788
+ assert_equal(37, orfs.first.stop)
789
+ end
790
+ end