BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
@@ -0,0 +1,790 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', '..')
|
3
|
+
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
7
|
+
# #
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
11
|
+
# of the License, or (at your option) any later version. #
|
12
|
+
# #
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
16
|
+
# GNU General Public License for more details. #
|
17
|
+
# #
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
19
|
+
# along with this program; if not, write to the Free Software #
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
|
21
|
+
# #
|
22
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
23
|
+
# #
|
24
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
25
|
+
# #
|
26
|
+
# This software is part of BioDSL (www.BioDSL.org). #
|
27
|
+
# #
|
28
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
29
|
+
|
30
|
+
require 'test/helper'
|
31
|
+
|
32
|
+
class TestSeq < Test::Unit::TestCase
|
33
|
+
def setup
|
34
|
+
@entry = BioDSL::Seq.new
|
35
|
+
end
|
36
|
+
|
37
|
+
test "BioDSL::Seq.new with differnet length SEQ and SCORES raises" do
|
38
|
+
assert_raise(BioDSL::SeqError) { BioDSL::Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "hhh") }
|
39
|
+
end
|
40
|
+
|
41
|
+
test "BioDSL::Seq.new_bp returns correctly" do
|
42
|
+
record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => :dna, :SCORES => "hhhh"}
|
43
|
+
seq = BioDSL::Seq.new_bp(record)
|
44
|
+
assert_equal("test", seq.seq_name)
|
45
|
+
assert_equal("ATCG", seq.seq)
|
46
|
+
assert_equal(:dna, seq.type)
|
47
|
+
assert_equal("hhhh", seq.qual)
|
48
|
+
end
|
49
|
+
|
50
|
+
test "BioDSL::Seq.generate_oligos with bad type raises" do
|
51
|
+
assert_raise(BioDSL::SeqError) { BioDSL::Seq.generate_oligos(2, :foo) }
|
52
|
+
end
|
53
|
+
|
54
|
+
test "BioDSL::Seq.generate_oligos with bad length raises" do
|
55
|
+
assert_raise(BioDSL::SeqError) { BioDSL::Seq.generate_oligos(0, :dna) }
|
56
|
+
end
|
57
|
+
|
58
|
+
test "BioDSL::Seq.generate_oligos returns correctly" do
|
59
|
+
expected = %w{aa at ac ag ta tt tc tg ca ct cc cg ga gt gc gg}
|
60
|
+
assert_equal(expected, BioDSL::Seq.generate_oligos(2, :dna))
|
61
|
+
expected = %w{aa au ac ag ua uu uc ug ca cu cc cg ga gu gc gg}
|
62
|
+
assert_equal(expected, BioDSL::Seq.generate_oligos(2, :rna))
|
63
|
+
expected = %w{
|
64
|
+
ff fl fs fy fc fw fp fh fq fr fi fm ft fn fk fv fa fd fe fg
|
65
|
+
lf ll ls ly lc lw lp lh lq lr li lm lt ln lk lv la ld le lg
|
66
|
+
sf sl ss sy sc sw sp sh sq sr si sm st sn sk sv sa sd se sg
|
67
|
+
yf yl ys yy yc yw yp yh yq yr yi ym yt yn yk yv ya yd ye yg
|
68
|
+
cf cl cs cy cc cw cp ch cq cr ci cm ct cn ck cv ca cd ce cg
|
69
|
+
wf wl ws wy wc ww wp wh wq wr wi wm wt wn wk wv wa wd we wg
|
70
|
+
pf pl ps py pc pw pp ph pq pr pi pm pt pn pk pv pa pd pe pg
|
71
|
+
hf hl hs hy hc hw hp hh hq hr hi hm ht hn hk hv ha hd he hg
|
72
|
+
qf ql qs qy qc qw qp qh qq qr qi qm qt qn qk qv qa qd qe qg
|
73
|
+
rf rl rs ry rc rw rp rh rq rr ri rm rt rn rk rv ra rd re rg
|
74
|
+
if il is iy ic iw ip ih iq ir ii im it in ik iv ia id ie ig
|
75
|
+
mf ml ms my mc mw mp mh mq mr mi mm mt mn mk mv ma md me mg
|
76
|
+
tf tl ts ty tc tw tp th tq tr ti tm tt tn tk tv ta td te tg
|
77
|
+
nf nl ns ny nc nw np nh nq nr ni nm nt nn nk nv na nd ne ng
|
78
|
+
kf kl ks ky kc kw kp kh kq kr ki km kt kn kk kv ka kd ke kg
|
79
|
+
vf vl vs vy vc vw vp vh vq vr vi vm vt vn vk vv va vd ve vg
|
80
|
+
af al as ay ac aw ap ah aq ar ai am at an ak av aa ad ae ag
|
81
|
+
df dl ds dy dc dw dp dh dq dr di dm dt dn dk dv da dd de dg
|
82
|
+
ef el es ey ec ew ep eh eq er ei em et en ek ev ea ed ee eg
|
83
|
+
gf gl gs gy gc gw gp gh gq gr gi gm gt gn gk gv ga gd ge gg
|
84
|
+
}
|
85
|
+
assert_equal(expected, BioDSL::Seq.generate_oligos(2, :protein))
|
86
|
+
end
|
87
|
+
|
88
|
+
test "BioDSL::Seq.check_name_pair with badly formatted names raises" do
|
89
|
+
entry1 = BioDSL::Seq.new(seq_name: "foo")
|
90
|
+
entry2 = BioDSL::Seq.new(seq_name: "bar")
|
91
|
+
assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry1, entry2) }
|
92
|
+
assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry2, entry1) }
|
93
|
+
end
|
94
|
+
|
95
|
+
test "BioDSL::Seq.check_name_pair with Illumina1.3/1.5 names and no match raises" do
|
96
|
+
entry1 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:941:1973#0/1")
|
97
|
+
entry2 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:491:1793#0/2")
|
98
|
+
assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry1, entry2) }
|
99
|
+
end
|
100
|
+
|
101
|
+
test "BioDSL::Seq.check_name_pair with Illumina1.8 names and no match raises" do
|
102
|
+
entry1 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG")
|
103
|
+
entry2 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15433:179393 2:Y:18:ATCACG")
|
104
|
+
assert_raise(BioDSL::SeqError) { BioDSL::Seq.check_name_pair(entry1, entry2) }
|
105
|
+
end
|
106
|
+
|
107
|
+
test "BioDSL::Seq.check_name_pair with Illumina1.3/1.5 names and match don't raise" do
|
108
|
+
entry1 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:941:1973#0/1")
|
109
|
+
entry2 = BioDSL::Seq.new(seq_name: "HWUSI-EAS100R:6:73:941:1973#0/2")
|
110
|
+
assert_nothing_raised { BioDSL::Seq.check_name_pair(entry1, entry2) }
|
111
|
+
end
|
112
|
+
|
113
|
+
test "BioDSL::Seq.check_name_pair with Illumina1.8 names and match don't raise" do
|
114
|
+
entry1 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG")
|
115
|
+
entry2 = BioDSL::Seq.new(seq_name: "EAS139:136:FC706VJ:2:2104:15343:197393 2:Y:18:ATCACG")
|
116
|
+
assert_nothing_raised { BioDSL::Seq.check_name_pair(entry1, entry2) }
|
117
|
+
end
|
118
|
+
|
119
|
+
test "#is_dna? with no sequence type returns false" do
|
120
|
+
assert(@entry.is_dna? == false)
|
121
|
+
end
|
122
|
+
|
123
|
+
test "#is_dna? with dna sequence type returns true" do
|
124
|
+
@entry.type = :dna
|
125
|
+
assert(@entry.is_dna? == true)
|
126
|
+
end
|
127
|
+
|
128
|
+
test "#is_rna? with no sequence type returns false" do
|
129
|
+
assert(@entry.is_rna? == false)
|
130
|
+
end
|
131
|
+
|
132
|
+
test "#is_rna? with rna sequence type returns true" do
|
133
|
+
@entry.type = :rna
|
134
|
+
assert(@entry.is_rna? == true)
|
135
|
+
end
|
136
|
+
|
137
|
+
test "#is_protein? with no sequence type returns false" do
|
138
|
+
assert(@entry.is_protein? == false)
|
139
|
+
end
|
140
|
+
|
141
|
+
test "#is_protein? with protein sequence type returns true" do
|
142
|
+
@entry.type = :protein
|
143
|
+
assert_equal(true, @entry.is_protein?)
|
144
|
+
end
|
145
|
+
|
146
|
+
test "#type_guess without sequence raises" do
|
147
|
+
assert_raise(BioDSL::SeqError) { @entry.type_guess }
|
148
|
+
end
|
149
|
+
|
150
|
+
test "#type_guess with protein returns protein" do
|
151
|
+
@entry.seq = 'atcatcrFgatcg'
|
152
|
+
assert_equal(:protein, @entry.type_guess)
|
153
|
+
end
|
154
|
+
|
155
|
+
test "#type_guess with rna returns rna" do
|
156
|
+
@entry.seq = 'atcatcrUgatcg'
|
157
|
+
assert_equal(:rna, @entry.type_guess)
|
158
|
+
end
|
159
|
+
|
160
|
+
test "#type_guess with dna returns dna" do
|
161
|
+
@entry.seq = 'atcatcgatcg'
|
162
|
+
assert_equal(:dna, @entry.type_guess)
|
163
|
+
end
|
164
|
+
|
165
|
+
test "#type_guess! without sequence raises" do
|
166
|
+
assert_raise(BioDSL::SeqError) { @entry.type_guess! }
|
167
|
+
end
|
168
|
+
|
169
|
+
test "#type_guess! with protein returns protein" do
|
170
|
+
@entry.seq = 'atcatcrFgatcg'
|
171
|
+
@entry.type_guess!
|
172
|
+
assert_equal(:protein, @entry.type)
|
173
|
+
end
|
174
|
+
|
175
|
+
test "#type_guess! with rna returns rna" do
|
176
|
+
@entry.seq = 'atcatcrUgatcg'
|
177
|
+
@entry.type_guess!
|
178
|
+
assert_equal(:rna, @entry.type)
|
179
|
+
end
|
180
|
+
|
181
|
+
test "#type_guess! with dna returns dna" do
|
182
|
+
@entry.seq = 'atcatcgatcg'
|
183
|
+
@entry.type_guess!
|
184
|
+
assert_equal(:dna, @entry.type)
|
185
|
+
end
|
186
|
+
|
187
|
+
test "#length returns corretly" do
|
188
|
+
@entry.seq = 'ATCG'
|
189
|
+
assert_equal(4, @entry.length)
|
190
|
+
end
|
191
|
+
|
192
|
+
test "#indels returns correctly" do
|
193
|
+
@entry.seq = 'ATCG.-~_'
|
194
|
+
assert_equal(4, @entry.indels)
|
195
|
+
end
|
196
|
+
|
197
|
+
test "#to_rna with no sequence raises" do
|
198
|
+
@entry.type = :dna
|
199
|
+
assert_raise(BioDSL::SeqError) { @entry.to_rna }
|
200
|
+
end
|
201
|
+
|
202
|
+
test "#to_rna with bad type raises" do
|
203
|
+
@entry.seq = 'ATCG'
|
204
|
+
@entry.type = :rna
|
205
|
+
assert_raise(BioDSL::SeqError) { @entry.to_rna }
|
206
|
+
end
|
207
|
+
|
208
|
+
test "#to_rna transcribes correctly" do
|
209
|
+
@entry.seq = 'ATCGatcg'
|
210
|
+
@entry.type = :dna
|
211
|
+
assert_equal("AUCGaucg", @entry.to_rna)
|
212
|
+
end
|
213
|
+
|
214
|
+
test "#to_rna changes entry type to rna" do
|
215
|
+
@entry.seq = 'ATCGatcg'
|
216
|
+
@entry.type = :dna
|
217
|
+
@entry.to_rna
|
218
|
+
assert_equal(:rna, @entry.type)
|
219
|
+
end
|
220
|
+
|
221
|
+
test "#to_dna with no sequence raises" do
|
222
|
+
@entry.type = :rna
|
223
|
+
assert_raise(BioDSL::SeqError) { @entry.to_dna }
|
224
|
+
end
|
225
|
+
|
226
|
+
test "#to_dna with bad type raises" do
|
227
|
+
@entry.seq = 'AUCG'
|
228
|
+
@entry.type = :dna
|
229
|
+
assert_raise(BioDSL::SeqError) { @entry.to_dna }
|
230
|
+
end
|
231
|
+
|
232
|
+
test "#to_dna transcribes correctly" do
|
233
|
+
@entry.seq = 'AUCGaucg'
|
234
|
+
@entry.type = :rna
|
235
|
+
assert_equal("ATCGatcg", @entry.to_dna)
|
236
|
+
end
|
237
|
+
|
238
|
+
test "#to_dna changes entry type to dna" do
|
239
|
+
@entry.seq = 'AUCGaucg'
|
240
|
+
@entry.type = :rna
|
241
|
+
@entry.to_dna
|
242
|
+
assert_equal(:dna, @entry.type)
|
243
|
+
end
|
244
|
+
|
245
|
+
test "#to_bp returns correct record" do
|
246
|
+
@entry.seq_name = 'test'
|
247
|
+
@entry.seq = 'ATCG'
|
248
|
+
assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
|
249
|
+
end
|
250
|
+
|
251
|
+
test "#to_fasta with missing seq_name raises" do
|
252
|
+
@entry.seq = 'ATCG'
|
253
|
+
assert_raise(BioDSL::SeqError) { @entry.to_fasta }
|
254
|
+
end
|
255
|
+
|
256
|
+
test "#to_fasta with empty seq_name raises" do
|
257
|
+
@entry.seq_name = ''
|
258
|
+
@entry.seq = 'ATCG'
|
259
|
+
assert_raise(BioDSL::SeqError) { @entry.to_fasta }
|
260
|
+
end
|
261
|
+
|
262
|
+
test "#to_fasta with missing seq raises" do
|
263
|
+
@entry.seq_name = 'test'
|
264
|
+
assert_raise(BioDSL::SeqError) { @entry.to_fasta }
|
265
|
+
end
|
266
|
+
|
267
|
+
test "#to_fasta with empty seq raises" do
|
268
|
+
@entry.seq_name = 'test'
|
269
|
+
@entry.seq = ''
|
270
|
+
assert_raise(BioDSL::SeqError) { @entry.to_fasta }
|
271
|
+
end
|
272
|
+
|
273
|
+
test "#to_fasta returns correct entry" do
|
274
|
+
@entry.seq_name = 'test'
|
275
|
+
@entry.seq = 'ATCG'
|
276
|
+
assert_equal(">test\nATCG\n", @entry.to_fasta)
|
277
|
+
end
|
278
|
+
|
279
|
+
test "#to_fasta wraps correctly" do
|
280
|
+
entry = BioDSL::Seq.new(seq_name: "test", seq: "ATCG")
|
281
|
+
assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
|
282
|
+
end
|
283
|
+
|
284
|
+
test "#to_fastq returns correct entry" do
|
285
|
+
@entry.seq_name = 'test'
|
286
|
+
@entry.seq = 'ATCG'
|
287
|
+
@entry.qual = 'hhhh'
|
288
|
+
assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
|
289
|
+
end
|
290
|
+
|
291
|
+
test "#to_key with bad residue raises" do
|
292
|
+
entry = BioDSL::Seq.new(seq_name: "test", seq: "AUCG")
|
293
|
+
assert_raise(BioDSL::SeqError) { entry.to_key }
|
294
|
+
end
|
295
|
+
|
296
|
+
test "#to_key returns correctly" do
|
297
|
+
entry = BioDSL::Seq.new(seq_name: "test", seq: "ATCG")
|
298
|
+
assert_equal(54, entry.to_key)
|
299
|
+
end
|
300
|
+
|
301
|
+
test "#reverse returns correctly" do
|
302
|
+
@entry.seq = "ATCG"
|
303
|
+
new_entry = @entry.reverse
|
304
|
+
assert_equal("GCTA", new_entry.seq)
|
305
|
+
assert_equal("ATCG", @entry.seq)
|
306
|
+
end
|
307
|
+
|
308
|
+
test "#reverse! returns correctly" do
|
309
|
+
@entry.seq = "ATCG"
|
310
|
+
@entry.reverse!
|
311
|
+
assert_equal("GCTA", @entry.seq)
|
312
|
+
end
|
313
|
+
|
314
|
+
test "#complement with no sequence raises" do
|
315
|
+
@entry.type = :dna
|
316
|
+
assert_raise(BioDSL::SeqError) { @entry.complement }
|
317
|
+
end
|
318
|
+
|
319
|
+
test "#complement with bad type raises" do
|
320
|
+
@entry.seq = 'ATCG'
|
321
|
+
@entry.type = :protein
|
322
|
+
assert_raise(BioDSL::SeqError) { @entry.complement }
|
323
|
+
end
|
324
|
+
|
325
|
+
test "#complement for DNA is correct" do
|
326
|
+
@entry.seq = 'ATCGatcg'
|
327
|
+
@entry.type = :dna
|
328
|
+
comp = @entry.complement
|
329
|
+
assert_equal("TAGCtagc", comp.seq)
|
330
|
+
assert_equal("ATCGatcg", @entry.seq)
|
331
|
+
end
|
332
|
+
|
333
|
+
test "#complement for RNA is correct" do
|
334
|
+
@entry.seq = 'AUCGaucg'
|
335
|
+
@entry.type = :rna
|
336
|
+
comp = @entry.complement
|
337
|
+
assert_equal("UAGCuagc", comp.seq)
|
338
|
+
assert_equal("AUCGaucg", @entry.seq)
|
339
|
+
end
|
340
|
+
|
341
|
+
test "#complement! with no sequence raises" do
|
342
|
+
@entry.type = :dna
|
343
|
+
assert_raise(BioDSL::SeqError) { @entry.complement! }
|
344
|
+
end
|
345
|
+
|
346
|
+
test "#complement! with bad type raises" do
|
347
|
+
@entry.seq = 'ATCG'
|
348
|
+
@entry.type = :protein
|
349
|
+
assert_raise(BioDSL::SeqError) { @entry.complement! }
|
350
|
+
end
|
351
|
+
|
352
|
+
test "#complement! for DNA is correct" do
|
353
|
+
@entry.seq = 'ATCGatcg'
|
354
|
+
@entry.type = :dna
|
355
|
+
assert_equal("TAGCtagc", @entry.complement!.seq)
|
356
|
+
end
|
357
|
+
|
358
|
+
test "#complement! for RNA is correct" do
|
359
|
+
@entry.seq = 'AUCGaucg'
|
360
|
+
@entry.type = :rna
|
361
|
+
assert_equal("UAGCuagc", @entry.complement!.seq)
|
362
|
+
end
|
363
|
+
|
364
|
+
test "#hamming_distance returns correctly" do
|
365
|
+
seq1 = BioDSL::Seq.new(seq: "ATCG")
|
366
|
+
seq2 = BioDSL::Seq.new(seq: "atgg")
|
367
|
+
assert_equal(1, seq1.hamming_distance(seq2))
|
368
|
+
end
|
369
|
+
|
370
|
+
test "#hamming_distance with ambiguity codes return correctly" do
|
371
|
+
seq1 = BioDSL::Seq.new(seq: "ATCG")
|
372
|
+
seq2 = BioDSL::Seq.new(seq: "atng")
|
373
|
+
|
374
|
+
assert_equal(1, seq1.hamming_distance(seq2))
|
375
|
+
assert_equal(0, seq1.hamming_distance(seq2, ambiguity: true))
|
376
|
+
end
|
377
|
+
|
378
|
+
test "#edit_distance returns correctly" do
|
379
|
+
seq1 = BioDSL::Seq.new(seq: "ATCG")
|
380
|
+
seq2 = BioDSL::Seq.new(seq: "tgncg")
|
381
|
+
assert_equal(2, seq1.edit_distance(seq2))
|
382
|
+
end
|
383
|
+
|
384
|
+
test "#generate with length < 1 raises" do
|
385
|
+
assert_raise(BioDSL::SeqError) { @entry.generate(-10, :dna) }
|
386
|
+
assert_raise(BioDSL::SeqError) { @entry.generate(0, :dna) }
|
387
|
+
end
|
388
|
+
|
389
|
+
test "#generate with bad type raises" do
|
390
|
+
assert_raise(BioDSL::SeqError) { @entry.generate(10, "foo") }
|
391
|
+
end
|
392
|
+
|
393
|
+
test "#generate with ok type dont raise" do
|
394
|
+
%w[dna rna protein].each do |type|
|
395
|
+
assert_nothing_raised { @entry.generate(10, type.to_sym) }
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
test "#shuffle returns correctly" do
|
400
|
+
orig = "actgactgactgatcgatcgatcgatcgtactg"
|
401
|
+
@entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
|
402
|
+
entry_shuf = @entry.shuffle
|
403
|
+
assert_equal(orig, @entry.seq)
|
404
|
+
assert_not_equal(@entry.seq, entry_shuf.seq)
|
405
|
+
end
|
406
|
+
|
407
|
+
test "#shuffle! returns correctly" do
|
408
|
+
@entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
|
409
|
+
assert_not_equal(@entry.seq, @entry.shuffle!.seq)
|
410
|
+
end
|
411
|
+
|
412
|
+
test "#+ without qual returns correctly" do
|
413
|
+
entry = BioDSL::Seq.new(seq_name: "test1", seq: "at") + BioDSL::Seq.new(seq_name: "test2", seq: "cg")
|
414
|
+
assert_nil(entry.seq_name)
|
415
|
+
assert_equal("atcg", entry.seq)
|
416
|
+
assert_nil(entry.type)
|
417
|
+
assert_nil(entry.qual)
|
418
|
+
end
|
419
|
+
|
420
|
+
test "#+ with qual returns correctly" do
|
421
|
+
entry = BioDSL::Seq.new(seq_name: "test1", seq: "at", type: :dna, qual: "II") + BioDSL::Seq.new(seq_name: "test2", seq: "cg", type: :dna, qual: "JJ")
|
422
|
+
assert_nil(entry.seq_name)
|
423
|
+
assert_equal("atcg", entry.seq)
|
424
|
+
assert_equal(:dna, entry.type)
|
425
|
+
assert_equal("IIJJ", entry.qual)
|
426
|
+
end
|
427
|
+
|
428
|
+
test "#<< with different types raises" do
|
429
|
+
@entry.seq = "atcg"
|
430
|
+
assert_raise(BioDSL::SeqError) { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
|
431
|
+
end
|
432
|
+
|
433
|
+
test "#<< with missing qual in one entry raises" do
|
434
|
+
@entry.seq = "atcg"
|
435
|
+
@entry.type = :dna
|
436
|
+
assert_raise(BioDSL::SeqError) { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
|
437
|
+
@entry.qual = "IIII"
|
438
|
+
assert_raise(BioDSL::SeqError) { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
|
439
|
+
end
|
440
|
+
|
441
|
+
test "#<< with nil qual in both entries dont raise" do
|
442
|
+
@entry.seq = "atcg"
|
443
|
+
assert_nothing_raised { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg") }
|
444
|
+
end
|
445
|
+
|
446
|
+
test "#<< with qual in both entries dont raise" do
|
447
|
+
@entry.seq = "atcg"
|
448
|
+
@entry.type = :dna
|
449
|
+
@entry.qual = "IIII"
|
450
|
+
assert_nothing_raised { @entry << BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
|
451
|
+
end
|
452
|
+
|
453
|
+
test "#<< without qual returns correctly" do
|
454
|
+
@entry.seq = "atcg"
|
455
|
+
@entry << BioDSL::Seq.new(seq_name: "test", seq: "ATCG")
|
456
|
+
assert_equal("atcgATCG", @entry.seq)
|
457
|
+
end
|
458
|
+
|
459
|
+
test "#<< with qual returns correctly" do
|
460
|
+
@entry.seq = "atcg"
|
461
|
+
@entry.type = :dna
|
462
|
+
@entry.qual = "HHHH"
|
463
|
+
@entry << BioDSL::Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "IIII")
|
464
|
+
assert_equal("atcgATCG", @entry.seq)
|
465
|
+
assert_equal("HHHHIIII", @entry.qual)
|
466
|
+
end
|
467
|
+
|
468
|
+
test "#[] with qual returns correctly" do
|
469
|
+
entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
|
470
|
+
|
471
|
+
e = entry[2]
|
472
|
+
|
473
|
+
assert_equal("test", e.seq_name)
|
474
|
+
assert_equal("c", e.seq)
|
475
|
+
assert_equal(:dna, e.type)
|
476
|
+
assert_equal("H", e.qual)
|
477
|
+
assert_equal("atcg", entry.seq)
|
478
|
+
assert_equal("FGHI", entry.qual)
|
479
|
+
end
|
480
|
+
|
481
|
+
test "#[] without qual returns correctly" do
|
482
|
+
entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg")
|
483
|
+
|
484
|
+
e = entry[2]
|
485
|
+
|
486
|
+
assert_equal("test", e.seq_name)
|
487
|
+
assert_equal("c", e.seq)
|
488
|
+
assert_nil(e.qual)
|
489
|
+
assert_equal("atcg", entry.seq)
|
490
|
+
end
|
491
|
+
|
492
|
+
test "[]= with qual returns correctly" do
|
493
|
+
entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
|
494
|
+
|
495
|
+
entry[0] = BioDSL::Seq.new(seq_name: "foo", seq: "T", type: :dna, qual: "I")
|
496
|
+
|
497
|
+
assert_equal("test", entry.seq_name)
|
498
|
+
assert_equal("Ttcg", entry.seq)
|
499
|
+
assert_equal(:dna, entry.type)
|
500
|
+
assert_equal("IGHI", entry.qual)
|
501
|
+
end
|
502
|
+
|
503
|
+
test "[]= without qual returns correctly" do
|
504
|
+
entry = BioDSL::Seq.new(seq_name: "test", seq: "atcg")
|
505
|
+
|
506
|
+
entry[0] = BioDSL::Seq.new(seq_name: "foo", seq: "T")
|
507
|
+
|
508
|
+
assert_equal("test", entry.seq_name)
|
509
|
+
assert_equal("Ttcg", entry.seq)
|
510
|
+
end
|
511
|
+
|
512
|
+
test "#indels_remove without qual returns correctly" do
|
513
|
+
@entry.seq = "A-T.CG~CG"
|
514
|
+
@entry.qual = nil
|
515
|
+
assert_equal("ATCGCG", @entry.indels_remove.seq)
|
516
|
+
end
|
517
|
+
|
518
|
+
test "#indels_remove with qual returns correctly" do
|
519
|
+
@entry.seq = "A-T.CG~CG"
|
520
|
+
@entry.qual = "a@b@cd@fg"
|
521
|
+
assert_equal("ATCGCG", @entry.indels_remove.seq)
|
522
|
+
assert_equal("abcdfg", @entry.indels_remove.qual)
|
523
|
+
end
|
524
|
+
|
525
|
+
test "#composition returns correctly" do
|
526
|
+
@entry.seq = "AAAATTTCCG"
|
527
|
+
assert_equal(4, @entry.composition["A"])
|
528
|
+
assert_equal(3, @entry.composition["T"])
|
529
|
+
assert_equal(2, @entry.composition["C"])
|
530
|
+
assert_equal(1, @entry.composition["G"])
|
531
|
+
assert_equal(0, @entry.composition["X"])
|
532
|
+
end
|
533
|
+
|
534
|
+
test "#hard_mask returns correctly" do
|
535
|
+
@entry.seq = "--AAAANn"
|
536
|
+
assert_equal(33.33, @entry.hard_mask)
|
537
|
+
end
|
538
|
+
|
539
|
+
test "#soft_mask returns correctly" do
|
540
|
+
@entry.seq = "--AAAa"
|
541
|
+
assert_equal(25.00, @entry.soft_mask)
|
542
|
+
end
|
543
|
+
|
544
|
+
test "#mask_seq_hard! with nil seq raises" do
|
545
|
+
@entry.seq = nil
|
546
|
+
@entry.qual = ""
|
547
|
+
|
548
|
+
assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(20) }
|
549
|
+
end
|
550
|
+
|
551
|
+
test "#mask_seq_hard! with nil qual raises" do
|
552
|
+
@entry.seq = ""
|
553
|
+
@entry.qual = nil
|
554
|
+
|
555
|
+
assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(20) }
|
556
|
+
end
|
557
|
+
|
558
|
+
test "#mask_seq_hard! with bad cutoff raises" do
|
559
|
+
assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(-1) }
|
560
|
+
assert_raise(BioDSL::SeqError) { @entry.mask_seq_hard!(41) }
|
561
|
+
end
|
562
|
+
|
563
|
+
test "#mask_seq_hard! with OK cutoff dont raise" do
|
564
|
+
@entry.seq = "ATCG"
|
565
|
+
@entry.qual = "RSTU"
|
566
|
+
|
567
|
+
assert_nothing_raised { @entry.mask_seq_hard!(0) }
|
568
|
+
assert_nothing_raised { @entry.mask_seq_hard!(40) }
|
569
|
+
end
|
570
|
+
|
571
|
+
test "#mask_seq_hard! returns correctly" do
|
572
|
+
@entry.seq = "-ATCG"
|
573
|
+
@entry.qual = "33456"
|
574
|
+
|
575
|
+
assert_equal("-NNCG", @entry.mask_seq_hard!(20).seq)
|
576
|
+
end
|
577
|
+
|
578
|
+
test "#mask_seq_soft! with nil seq raises" do
|
579
|
+
@entry.seq = nil
|
580
|
+
@entry.qual = ""
|
581
|
+
|
582
|
+
assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(20) }
|
583
|
+
end
|
584
|
+
|
585
|
+
test "#mask_seq_soft! with nil qual raises" do
|
586
|
+
@entry.seq = ""
|
587
|
+
@entry.qual = nil
|
588
|
+
|
589
|
+
assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(20) }
|
590
|
+
end
|
591
|
+
|
592
|
+
test "#mask_seq_soft! with bad cutoff raises" do
|
593
|
+
assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(-1) }
|
594
|
+
assert_raise(BioDSL::SeqError) { @entry.mask_seq_soft!(41) }
|
595
|
+
end
|
596
|
+
|
597
|
+
test "#mask_seq_soft! with OK cutoff dont raise" do
|
598
|
+
@entry.seq = "ATCG"
|
599
|
+
@entry.qual = "RSTU"
|
600
|
+
|
601
|
+
assert_nothing_raised { @entry.mask_seq_soft!(0) }
|
602
|
+
assert_nothing_raised { @entry.mask_seq_soft!(40) }
|
603
|
+
end
|
604
|
+
|
605
|
+
test "#mask_seq_soft! returns correctly" do
|
606
|
+
@entry.seq = "-ATCG"
|
607
|
+
@entry.qual = "33456"
|
608
|
+
|
609
|
+
assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
|
610
|
+
end
|
611
|
+
|
612
|
+
# qual score detection
|
613
|
+
|
614
|
+
test "#qual_base33? returns correctly" do
|
615
|
+
# self.qual.match(/[!-:]/)
|
616
|
+
@entry.qual = '!"#$%&\'()*+,-./0123456789:'
|
617
|
+
assert_equal(true, @entry.qual_base33? )
|
618
|
+
@entry.qual = 32.chr
|
619
|
+
assert_equal(false, @entry.qual_base33? )
|
620
|
+
@entry.qual = 59.chr
|
621
|
+
assert_equal(false, @entry.qual_base33? )
|
622
|
+
end
|
623
|
+
|
624
|
+
test "#qual_base64? returns correctly" do
|
625
|
+
# self.qual.match(/[K-h]/)
|
626
|
+
@entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
|
627
|
+
assert_equal(true, @entry.qual_base64? )
|
628
|
+
@entry.qual = 74.chr
|
629
|
+
assert_equal(false, @entry.qual_base64? )
|
630
|
+
@entry.qual = 105.chr
|
631
|
+
assert_equal(false, @entry.qual_base64? )
|
632
|
+
end
|
633
|
+
|
634
|
+
test "#qual_valid? with nil qual raises" do
|
635
|
+
assert_raise(BioDSL::SeqError) { @entry.qual_valid?(:base_33) }
|
636
|
+
assert_raise(BioDSL::SeqError) { @entry.qual_valid?(:base_64) }
|
637
|
+
end
|
638
|
+
|
639
|
+
test "#qual_valid? with bad encoding raises" do
|
640
|
+
@entry.qual = "abc"
|
641
|
+
assert_raise(BioDSL::SeqError) { @entry.qual_valid?("foobar") }
|
642
|
+
end
|
643
|
+
|
644
|
+
test "#qual_valid? with OK range returns correctly" do
|
645
|
+
@entry.qual = ((BioDSL::Seq::SCORE_MIN + 33).chr .. (BioDSL::Seq::SCORE_MAX + 33).chr).to_a.join
|
646
|
+
assert_equal(true, @entry.qual_valid?(:base_33))
|
647
|
+
@entry.qual = ((BioDSL::Seq::SCORE_MIN + 64).chr .. (BioDSL::Seq::SCORE_MAX + 64).chr).to_a.join
|
648
|
+
assert_equal(true, @entry.qual_valid?(:base_64))
|
649
|
+
end
|
650
|
+
|
651
|
+
test "#qual_valid? with bad range returns correctly" do
|
652
|
+
@entry.qual = ((BioDSL::Seq::SCORE_MIN + 33 - 1).chr .. (BioDSL::Seq::SCORE_MAX + 33).chr).to_a.join
|
653
|
+
assert_equal(false, @entry.qual_valid?(:base_33))
|
654
|
+
@entry.qual = ((BioDSL::Seq::SCORE_MIN + 33).chr .. (BioDSL::Seq::SCORE_MAX + 33 + 1).chr).to_a.join
|
655
|
+
assert_equal(false, @entry.qual_valid?(:base_33))
|
656
|
+
|
657
|
+
@entry.qual = ((BioDSL::Seq::SCORE_MIN + 64 - 1).chr .. (BioDSL::Seq::SCORE_MAX + 64).chr).to_a.join
|
658
|
+
assert_equal(false, @entry.qual_valid?(:base_64))
|
659
|
+
@entry.qual = ((BioDSL::Seq::SCORE_MIN + 64).chr .. (BioDSL::Seq::SCORE_MAX + 64 + 1).chr).to_a.join
|
660
|
+
assert_equal(false, @entry.qual_valid?(:base_64))
|
661
|
+
end
|
662
|
+
|
663
|
+
# convert sanger to ...
|
664
|
+
|
665
|
+
test "#qual_convert! from base33 to base33 returns OK" do
|
666
|
+
@entry.qual = 'BCDEFGHI'
|
667
|
+
assert_equal('BCDEFGHI', @entry.qual_convert!(:base_33, :base_33).qual)
|
668
|
+
end
|
669
|
+
|
670
|
+
test "#qual_convert! from base33 to base64 returns OK" do
|
671
|
+
@entry.qual = 'BCDEFGHI'
|
672
|
+
assert_equal('abcdefgh', @entry.qual_convert!(:base_33, :base_64).qual)
|
673
|
+
end
|
674
|
+
|
675
|
+
test "#qual_convert! from base64 to base64 returns OK" do
|
676
|
+
@entry.qual = 'BCDEFGHI'
|
677
|
+
assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_64).qual)
|
678
|
+
end
|
679
|
+
|
680
|
+
test "#qual_convert! from base64 to base33 returns OK" do
|
681
|
+
@entry.qual = 'abcdefgh'
|
682
|
+
assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_33).qual)
|
683
|
+
end
|
684
|
+
|
685
|
+
test "#qual_coerce! with bad base raises" do
|
686
|
+
@entry.qual = ('!' .. '~').to_a.join
|
687
|
+
assert_raise(BioDSL::SeqError) { @entry.qual_coerce!(:foo) }
|
688
|
+
end
|
689
|
+
|
690
|
+
test "#qual_coerce! returns correctly" do
|
691
|
+
@entry.qual = ('!' .. '~').to_a.join
|
692
|
+
assert_equal(%q{!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII}, @entry.qual_coerce!(:base_33).qual)
|
693
|
+
@entry.qual = ('!' .. '~').to_a.join
|
694
|
+
assert_equal(%q{@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghhhhhhhhhhhhhhhhhhhhhhh}, @entry.qual_coerce!(:base_64).qual)
|
695
|
+
end
|
696
|
+
|
697
|
+
test "#scores_mean without qual raises" do
|
698
|
+
@entry.qual = nil
|
699
|
+
assert_raise(BioDSL::SeqError) { @entry.scores_mean }
|
700
|
+
end
|
701
|
+
|
702
|
+
test "#scores_mean returns correctly" do
|
703
|
+
@entry.qual = '!!II'
|
704
|
+
assert_equal(20.0, @entry.scores_mean)
|
705
|
+
end
|
706
|
+
|
707
|
+
test "#scores_min without qual raises" do
|
708
|
+
@entry.qual = nil
|
709
|
+
assert_raise(BioDSL::SeqError) { @entry.scores_min }
|
710
|
+
end
|
711
|
+
|
712
|
+
test "#scores_min returns correctly" do
|
713
|
+
@entry.qual = '!!II'
|
714
|
+
assert_equal(0, @entry.scores_min)
|
715
|
+
end
|
716
|
+
|
717
|
+
test "#scores_max without qual raises" do
|
718
|
+
@entry.qual = nil
|
719
|
+
assert_raise(BioDSL::SeqError) { @entry.scores_max }
|
720
|
+
end
|
721
|
+
|
722
|
+
test "#scores_max returns correctly" do
|
723
|
+
@entry.qual = '!!II'
|
724
|
+
assert_equal(40.0, @entry.scores_max)
|
725
|
+
end
|
726
|
+
|
727
|
+
test "#scores_mean_local without qual raises" do
|
728
|
+
@entry.qual = nil
|
729
|
+
assert_raise(BioDSL::SeqError) { @entry.scores_mean_local(2) }
|
730
|
+
end
|
731
|
+
|
732
|
+
test "#scores_mean_local returns correctly" do
|
733
|
+
@entry.qual = '!!II'
|
734
|
+
assert_equal(0.0, @entry.scores_mean_local(2))
|
735
|
+
end
|
736
|
+
|
737
|
+
test "#each_orf returns correctly" do
|
738
|
+
@entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
|
739
|
+
orfs = @entry.each_orf
|
740
|
+
|
741
|
+
assert_equal(2, orfs.size)
|
742
|
+
assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
|
743
|
+
assert_equal(2, orfs.first.start)
|
744
|
+
assert_equal(37, orfs.first.stop)
|
745
|
+
assert_equal("ATGcatcgatcagcatcgatcgatTAA", orfs.last.entry.seq)
|
746
|
+
assert_equal(11, orfs.last.start)
|
747
|
+
assert_equal(37, orfs.last.stop)
|
748
|
+
end
|
749
|
+
|
750
|
+
test "#each_orf in block context returns correctly" do
|
751
|
+
@entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
|
752
|
+
@entry.each_orf do |orf|
|
753
|
+
assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orf.entry.seq)
|
754
|
+
assert_equal(2, orf.start)
|
755
|
+
assert_equal(37, orf.stop)
|
756
|
+
|
757
|
+
break
|
758
|
+
end
|
759
|
+
end
|
760
|
+
|
761
|
+
test "#each_orf with size_min returns correctly" do
|
762
|
+
@entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
|
763
|
+
orfs = @entry.each_orf(size_min: 30)
|
764
|
+
|
765
|
+
assert_equal(1, orfs.size)
|
766
|
+
assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
|
767
|
+
assert_equal(2, orfs.first.start)
|
768
|
+
assert_equal(37, orfs.first.stop)
|
769
|
+
end
|
770
|
+
|
771
|
+
test "#each_orf with size_max returns correctly" do
|
772
|
+
@entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
|
773
|
+
orfs = @entry.each_orf(size_max: 30)
|
774
|
+
|
775
|
+
assert_equal(1, orfs.size)
|
776
|
+
assert_equal("ATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
|
777
|
+
assert_equal(11, orfs.first.start)
|
778
|
+
assert_equal(37, orfs.first.stop)
|
779
|
+
end
|
780
|
+
|
781
|
+
test "#each_orf with pick_longest returns correctly" do
|
782
|
+
@entry.seq = "atATGcgatcgATGcatcgatcagcatcgatcgatTAAcg"
|
783
|
+
orfs = @entry.each_orf(pick_longest: true)
|
784
|
+
|
785
|
+
assert_equal(1, orfs.size)
|
786
|
+
assert_equal("ATGcgatcgATGcatcgatcagcatcgatcgatTAA", orfs.first.entry.seq)
|
787
|
+
assert_equal(2, orfs.first.start)
|
788
|
+
assert_equal(37, orfs.first.stop)
|
789
|
+
end
|
790
|
+
end
|