BioDSL 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/BioDSL.gemspec +1 -1
- data/Gemfile +6 -0
- data/README.md +289 -155
- data/Rakefile +18 -16
- data/lib/BioDSL.rb +1 -1
- data/lib/BioDSL/cary.rb +78 -53
- data/lib/BioDSL/command.rb +2 -2
- data/lib/BioDSL/commands.rb +1 -1
- data/lib/BioDSL/commands/add_key.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
- data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
- data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
- data/lib/BioDSL/commands/classify_seq.rb +8 -8
- data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
- data/lib/BioDSL/commands/clip_primer.rb +7 -7
- data/lib/BioDSL/commands/cluster_otus.rb +5 -5
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/collect_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +4 -4
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +2 -2
- data/lib/BioDSL/commands/degap_seq.rb +6 -7
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/dump.rb +2 -2
- data/lib/BioDSL/commands/filter_rrna.rb +4 -4
- data/lib/BioDSL/commands/genecall.rb +7 -7
- data/lib/BioDSL/commands/grab.rb +1 -1
- data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
- data/lib/BioDSL/commands/mask_seq.rb +4 -4
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/merge_values.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
- data/lib/BioDSL/commands/plot_histogram.rb +4 -4
- data/lib/BioDSL/commands/plot_matches.rb +5 -5
- data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
- data/lib/BioDSL/commands/plot_scores.rb +7 -7
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fasta.rb +9 -9
- data/lib/BioDSL/commands/read_fastq.rb +16 -16
- data/lib/BioDSL/commands/read_table.rb +2 -3
- data/lib/BioDSL/commands/reverse_seq.rb +4 -4
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +3 -3
- data/lib/BioDSL/commands/sort.rb +1 -1
- data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
- data/lib/BioDSL/commands/split_values.rb +2 -2
- data/lib/BioDSL/commands/trim_primer.rb +13 -8
- data/lib/BioDSL/commands/trim_seq.rb +5 -5
- data/lib/BioDSL/commands/uchime_ref.rb +6 -6
- data/lib/BioDSL/commands/uclust.rb +5 -5
- data/lib/BioDSL/commands/unique_values.rb +1 -1
- data/lib/BioDSL/commands/usearch_global.rb +2 -2
- data/lib/BioDSL/commands/usearch_local.rb +2 -2
- data/lib/BioDSL/commands/write_fasta.rb +7 -9
- data/lib/BioDSL/commands/write_fastq.rb +4 -4
- data/lib/BioDSL/commands/write_table.rb +3 -3
- data/lib/BioDSL/commands/write_tree.rb +2 -3
- data/lib/BioDSL/config.rb +2 -2
- data/lib/BioDSL/csv.rb +8 -10
- data/lib/BioDSL/debug.rb +1 -1
- data/lib/BioDSL/fasta.rb +54 -40
- data/lib/BioDSL/fastq.rb +35 -32
- data/lib/BioDSL/filesys.rb +56 -47
- data/lib/BioDSL/fork.rb +1 -1
- data/lib/BioDSL/hamming.rb +1 -1
- data/lib/BioDSL/helpers.rb +1 -1
- data/lib/BioDSL/helpers/aux_helper.rb +1 -1
- data/lib/BioDSL/helpers/email_helper.rb +1 -1
- data/lib/BioDSL/helpers/history_helper.rb +1 -1
- data/lib/BioDSL/helpers/log_helper.rb +1 -1
- data/lib/BioDSL/helpers/options_helper.rb +1 -1
- data/lib/BioDSL/helpers/status_helper.rb +1 -1
- data/lib/BioDSL/html_report.rb +1 -1
- data/lib/BioDSL/math.rb +1 -1
- data/lib/BioDSL/mummer.rb +1 -1
- data/lib/BioDSL/pipeline.rb +1 -1
- data/lib/BioDSL/seq.rb +240 -231
- data/lib/BioDSL/seq/ambiguity.rb +1 -1
- data/lib/BioDSL/seq/assemble.rb +1 -1
- data/lib/BioDSL/seq/backtrack.rb +93 -76
- data/lib/BioDSL/seq/digest.rb +1 -1
- data/lib/BioDSL/seq/dynamic.rb +43 -55
- data/lib/BioDSL/seq/homopolymer.rb +34 -36
- data/lib/BioDSL/seq/kmer.rb +67 -50
- data/lib/BioDSL/seq/levenshtein.rb +35 -40
- data/lib/BioDSL/seq/translate.rb +64 -55
- data/lib/BioDSL/seq/trim.rb +60 -50
- data/lib/BioDSL/serializer.rb +1 -1
- data/lib/BioDSL/stream.rb +1 -1
- data/lib/BioDSL/taxonomy.rb +1 -1
- data/lib/BioDSL/test.rb +1 -1
- data/lib/BioDSL/tmp_dir.rb +1 -1
- data/lib/BioDSL/usearch.rb +1 -1
- data/lib/BioDSL/verbose.rb +1 -1
- data/lib/BioDSL/version.rb +2 -2
- data/test/BioDSL/commands/test_add_key.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_clip_primer.rb +1 -1
- data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_collect_otus.rb +1 -1
- data/test/BioDSL/commands/test_complement_seq.rb +1 -1
- data/test/BioDSL/commands/test_count.rb +1 -1
- data/test/BioDSL/commands/test_count_values.rb +1 -1
- data/test/BioDSL/commands/test_degap_seq.rb +1 -1
- data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
- data/test/BioDSL/commands/test_dump.rb +1 -1
- data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
- data/test/BioDSL/commands/test_genecall.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
- data/test/BioDSL/commands/test_mask_seq.rb +1 -1
- data/test/BioDSL/commands/test_mean_scores.rb +1 -1
- data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_merge_table.rb +1 -1
- data/test/BioDSL/commands/test_merge_values.rb +1 -1
- data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
- data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
- data/test/BioDSL/commands/test_plot_matches.rb +1 -1
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_plot_scores.rb +1 -1
- data/test/BioDSL/commands/test_random.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
- data/test/BioDSL/commands/test_slice_align.rb +1 -1
- data/test/BioDSL/commands/test_slice_seq.rb +1 -1
- data/test/BioDSL/commands/test_sort.rb +1 -1
- data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_split_values.rb +1 -1
- data/test/BioDSL/commands/test_trim_primer.rb +1 -1
- data/test/BioDSL/commands/test_trim_seq.rb +1 -1
- data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
- data/test/BioDSL/commands/test_uclust.rb +1 -1
- data/test/BioDSL/commands/test_unique_values.rb +1 -1
- data/test/BioDSL/commands/test_usearch_global.rb +1 -1
- data/test/BioDSL/commands/test_usearch_local.rb +1 -1
- data/test/BioDSL/commands/test_write_fasta.rb +1 -1
- data/test/BioDSL/commands/test_write_fastq.rb +1 -1
- data/test/BioDSL/commands/test_write_table.rb +1 -1
- data/test/BioDSL/commands/test_write_tree.rb +1 -1
- data/test/BioDSL/helpers/test_options_helper.rb +3 -3
- data/test/BioDSL/seq/test_assemble.rb +58 -56
- data/test/BioDSL/seq/test_backtrack.rb +83 -81
- data/test/BioDSL/seq/test_digest.rb +47 -45
- data/test/BioDSL/seq/test_dynamic.rb +66 -64
- data/test/BioDSL/seq/test_homopolymer.rb +35 -33
- data/test/BioDSL/seq/test_kmer.rb +29 -28
- data/test/BioDSL/seq/test_translate.rb +44 -42
- data/test/BioDSL/seq/test_trim.rb +59 -57
- data/test/BioDSL/test_cary.rb +1 -1
- data/test/BioDSL/test_command.rb +2 -2
- data/test/BioDSL/test_csv.rb +34 -31
- data/test/BioDSL/test_debug.rb +31 -31
- data/test/BioDSL/test_fasta.rb +30 -29
- data/test/BioDSL/test_fastq.rb +27 -26
- data/test/BioDSL/test_filesys.rb +28 -27
- data/test/BioDSL/test_fork.rb +29 -28
- data/test/BioDSL/test_math.rb +31 -30
- data/test/BioDSL/test_mummer.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +1 -1
- data/test/BioDSL/test_seq.rb +42 -41
- data/test/BioDSL/test_serializer.rb +35 -33
- data/test/BioDSL/test_stream.rb +28 -27
- data/test/BioDSL/test_taxonomy.rb +38 -37
- data/test/BioDSL/test_test.rb +32 -31
- data/test/BioDSL/test_tmp_dir.rb +1 -1
- data/test/BioDSL/test_usearch.rb +28 -27
- data/test/BioDSL/test_verbose.rb +32 -31
- data/test/helper.rb +34 -31
- metadata +3 -2
@@ -1,173 +1,175 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
|
4
|
-
#
|
5
|
-
#
|
6
|
-
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk).
|
7
|
-
#
|
8
|
-
# This program is free software; you can redistribute it and/or
|
9
|
-
# modify it under the terms of the GNU General Public License
|
10
|
-
# as published by the Free Software Foundation; either version 2
|
11
|
-
# of the License, or (at your option) any later version.
|
12
|
-
#
|
13
|
-
# This program is distributed in the hope that it will be useful,
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
-
# GNU General Public License for more details.
|
17
|
-
#
|
18
|
-
# You should have received a copy of the GNU General Public License
|
19
|
-
# along with this program; if not, write to the Free Software
|
20
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
3
|
+
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
7
|
+
# #
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
11
|
+
# of the License, or (at your option) any later version. #
|
12
|
+
# #
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
16
|
+
# GNU General Public License for more details. #
|
17
|
+
# #
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
19
|
+
# along with this program; if not, write to the Free Software #
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
21
|
+
# USA. #
|
22
|
+
# #
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
24
|
+
# #
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
26
|
+
# #
|
27
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
28
|
+
# #
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
29
30
|
|
30
31
|
require 'test/helper'
|
31
32
|
|
33
|
+
# Test class for BackTrack.
|
32
34
|
class BackTrackTest < Test::Unit::TestCase
|
33
35
|
def setup
|
34
36
|
# 0 1
|
35
37
|
# 01234567890123456789
|
36
|
-
@seq = BioDSL::Seq.new(seq:
|
38
|
+
@seq = BioDSL::Seq.new(seq: 'tacgatgctagcatgcacgg')
|
37
39
|
@seq.extend(BioDSL::BackTrack)
|
38
40
|
end
|
39
41
|
|
40
|
-
test
|
41
|
-
[
|
42
|
+
test '#patscan with bad pattern raises' do
|
43
|
+
['', 'X', '1'].each { |pattern|
|
42
44
|
assert_raise(BioDSL::BackTrackError) { @seq.patscan(pattern) }
|
43
45
|
}
|
44
46
|
end
|
45
47
|
|
46
|
-
test
|
47
|
-
[
|
48
|
+
test '#patscan with OK pattern dont raise' do
|
49
|
+
['N', 'atcg'].each { |pattern|
|
48
50
|
assert_nothing_raised { @seq.patscan(pattern) }
|
49
51
|
}
|
50
52
|
end
|
51
53
|
|
52
|
-
test
|
54
|
+
test '#patscan with bad start raises' do
|
53
55
|
[-1, 20].each { |start|
|
54
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
56
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', start: start) }
|
55
57
|
}
|
56
58
|
end
|
57
59
|
|
58
|
-
test
|
60
|
+
test '#patscan with OK start dont raise' do
|
59
61
|
[0, 19].each { |start|
|
60
|
-
assert_nothing_raised { @seq.patscan(
|
62
|
+
assert_nothing_raised { @seq.patscan('N', start: start) }
|
61
63
|
}
|
62
64
|
end
|
63
65
|
|
64
|
-
test
|
66
|
+
test '#patscan with bad stop raises' do
|
65
67
|
[-1, 20].each { |stop|
|
66
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
68
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', stop: stop) }
|
67
69
|
}
|
68
70
|
end
|
69
71
|
|
70
|
-
test
|
72
|
+
test '#patscan with OK stop dont raise' do
|
71
73
|
[0, 19].each { |stop|
|
72
|
-
assert_nothing_raised { @seq.patscan(
|
74
|
+
assert_nothing_raised { @seq.patscan('N', stop: stop) }
|
73
75
|
}
|
74
76
|
end
|
75
77
|
|
76
|
-
test
|
77
|
-
assert_nil(@seq.patmatch(
|
78
|
-
assert_equal(
|
78
|
+
test '#patscan with stop returns correctly' do
|
79
|
+
assert_nil(@seq.patmatch('G', start: 0, stop: 2))
|
80
|
+
assert_equal('3:1:g', @seq.patmatch('G', start: 0, stop: 3).to_s)
|
79
81
|
end
|
80
82
|
|
81
|
-
test
|
83
|
+
test '#patscan with bad mis raises' do
|
82
84
|
[-1, 6].each { |mis|
|
83
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
85
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', max_mismatches: mis) }
|
84
86
|
}
|
85
87
|
end
|
86
88
|
|
87
|
-
test
|
89
|
+
test '#patscan with OK mis dont raise' do
|
88
90
|
[0, 5].each { |mis|
|
89
|
-
assert_nothing_raised { @seq.patscan(
|
91
|
+
assert_nothing_raised { @seq.patscan('N', max_mismatches: mis) }
|
90
92
|
}
|
91
93
|
end
|
92
94
|
|
93
|
-
test
|
95
|
+
test '#patscan with bad ins raises' do
|
94
96
|
[-1, 6].each { |ins|
|
95
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
97
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', max_insertions: ins) }
|
96
98
|
}
|
97
99
|
end
|
98
100
|
|
99
|
-
test
|
101
|
+
test '#patscan with OK ins dont raise' do
|
100
102
|
[0, 5].each { |ins|
|
101
|
-
assert_nothing_raised { @seq.patscan(
|
103
|
+
assert_nothing_raised { @seq.patscan('N', max_insertions: ins) }
|
102
104
|
}
|
103
105
|
end
|
104
106
|
|
105
|
-
test
|
107
|
+
test '#patscan with bad del raises' do
|
106
108
|
[-1, 6].each { |del|
|
107
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
109
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', max_deletions: del) }
|
108
110
|
}
|
109
111
|
end
|
110
112
|
|
111
|
-
test
|
113
|
+
test '#patscan with OK del dont raise' do
|
112
114
|
[0, 5].each { |del|
|
113
|
-
assert_nothing_raised { @seq.patscan(
|
115
|
+
assert_nothing_raised { @seq.patscan('N', max_deletions: del) }
|
114
116
|
}
|
115
117
|
end
|
116
118
|
|
117
|
-
test
|
118
|
-
assert_equal(
|
119
|
+
test '#patscan perfect left is ok' do
|
120
|
+
assert_equal('0:7:tacgatg', @seq.patscan('TACGATG').first.to_s)
|
119
121
|
end
|
120
122
|
|
121
|
-
test
|
122
|
-
assert_equal(
|
123
|
+
test '#patscan perfect right is ok' do
|
124
|
+
assert_equal('13:7:tgcacgg', @seq.patscan('TGCACGG').first.to_s)
|
123
125
|
end
|
124
126
|
|
125
|
-
test
|
126
|
-
assert_equal(
|
127
|
+
test '#patscan ambiguity is ok' do
|
128
|
+
assert_equal('13:7:tgcacgg', @seq.patscan('TGCACNN').first.to_s)
|
127
129
|
end
|
128
130
|
|
129
|
-
test
|
130
|
-
assert_equal(
|
131
|
-
assert_equal(
|
131
|
+
test '#patscan start is ok' do
|
132
|
+
assert_equal('10:1:g', @seq.patscan('N', start: 10).first.to_s)
|
133
|
+
assert_equal('19:1:g', @seq.patscan('N', start: 10).last.to_s)
|
132
134
|
end
|
133
135
|
|
134
|
-
test
|
135
|
-
assert_equal(
|
136
|
+
test '#patscan mis left is ok' do
|
137
|
+
assert_equal('0:7:tacgatg', @seq.patscan('Aacgatg', max_mismatches: 1).first.to_s)
|
136
138
|
end
|
137
139
|
|
138
|
-
test
|
139
|
-
assert_equal(
|
140
|
+
test '#patscan mis right is ok' do
|
141
|
+
assert_equal('13:7:tgcacgg', @seq.patscan('tgcacgA', max_mismatches: 1).first.to_s)
|
140
142
|
end
|
141
143
|
|
142
|
-
test
|
143
|
-
assert_equal(
|
144
|
+
test '#patscan ins left is ok' do
|
145
|
+
assert_equal('0:7:tacgatg', @seq.patscan('Atacgatg', max_insertions: 1).first.to_s)
|
144
146
|
end
|
145
147
|
|
146
|
-
test
|
147
|
-
assert_equal(
|
148
|
+
test '#patscan ins right is ok' do
|
149
|
+
assert_equal('13:7:tgcacgg', @seq.patscan('tgcacggA', max_insertions: 1).first.to_s)
|
148
150
|
end
|
149
151
|
|
150
|
-
test
|
151
|
-
assert_equal(
|
152
|
+
test '#patscan del left is ok' do
|
153
|
+
assert_equal('0:7:tacgatg', @seq.patscan('acgatg', max_deletions: 1).first.to_s)
|
152
154
|
end
|
153
155
|
|
154
|
-
test
|
155
|
-
assert_equal(
|
156
|
+
test '#patscan del right is ok' do
|
157
|
+
assert_equal('12:8:atgcacgg', @seq.patscan('tgcacgg', max_deletions: 1).first.to_s)
|
156
158
|
end
|
157
159
|
|
158
|
-
test
|
159
|
-
assert_equal(
|
160
|
+
test '#patscan ambiguity mis ins del all ok' do
|
161
|
+
assert_equal('0:20:tacgatgctagcatgcacgg', @seq.patscan('tacatgcNagGatgcCacgg',
|
160
162
|
max_mismatches: 1,
|
161
163
|
max_insertions: 1,
|
162
164
|
max_deletions: 1).first.to_s)
|
163
165
|
end
|
164
166
|
|
165
|
-
test
|
166
|
-
@seq.patmatch(
|
167
|
+
test '#patmatch in block context returns correctly' do
|
168
|
+
@seq.patmatch('tacatgcNagGatgcCacgg',
|
167
169
|
max_mismatches: 1,
|
168
170
|
max_insertions: 1,
|
169
171
|
max_deletions: 1) do |hit|
|
170
|
-
assert_equal(
|
172
|
+
assert_equal('tacgatgctagcatgcacgg', hit.match)
|
171
173
|
assert_equal(0, hit.pos)
|
172
174
|
assert_equal(20, hit.length)
|
173
175
|
break
|
@@ -1,70 +1,72 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
3
3
|
|
4
|
-
#
|
5
|
-
#
|
6
|
-
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk).
|
7
|
-
#
|
8
|
-
# This program is free software; you can redistribute it and/or
|
9
|
-
# modify it under the terms of the GNU General Public License
|
10
|
-
# as published by the Free Software Foundation; either version 2
|
11
|
-
# of the License, or (at your option) any later version.
|
12
|
-
#
|
13
|
-
# This program is distributed in the hope that it will be useful,
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
-
# GNU General Public License for more details.
|
17
|
-
#
|
18
|
-
# You should have received a copy of the GNU General Public License
|
19
|
-
# along with this program; if not, write to the Free Software
|
20
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
7
|
+
# #
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
11
|
+
# of the License, or (at your option) any later version. #
|
12
|
+
# #
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
16
|
+
# GNU General Public License for more details. #
|
17
|
+
# #
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
19
|
+
# along with this program; if not, write to the Free Software #
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
21
|
+
# USA. #
|
22
|
+
# #
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
24
|
+
# #
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
26
|
+
# #
|
27
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
28
|
+
# #
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
29
30
|
|
30
31
|
require 'test/helper'
|
31
32
|
|
33
|
+
# Test class for Digest.
|
32
34
|
class TestDigest < Test::Unit::TestCase
|
33
35
|
def setup
|
34
|
-
@entry = BioDSL::Seq.new(seq:
|
36
|
+
@entry = BioDSL::Seq.new(seq: 'cgatcgatcGGATCCgagagggtgtgtagtgGAATTCcgctgc')
|
35
37
|
end
|
36
38
|
|
37
|
-
test
|
38
|
-
assert_raise(BioDSL::DigestError) { @entry.each_digest(
|
39
|
+
test '#each_digest with bad residue in pattern raises' do
|
40
|
+
assert_raise(BioDSL::DigestError) { @entry.each_digest('X', 0).to_a }
|
39
41
|
end
|
40
42
|
|
41
|
-
test
|
42
|
-
digests = @entry.each_digest(
|
43
|
+
test '#each_digest returns correctly' do
|
44
|
+
digests = @entry.each_digest('GGATCC', 1).to_a
|
43
45
|
assert_equal(2, digests.size)
|
44
|
-
assert_equal(
|
45
|
-
assert_equal(
|
46
|
-
assert_equal(
|
47
|
-
assert_equal(
|
46
|
+
assert_equal('[0-9]', digests.first.seq_name)
|
47
|
+
assert_equal('cgatcgatcG', digests.first.seq)
|
48
|
+
assert_equal('[10-42]', digests.last.seq_name)
|
49
|
+
assert_equal('GATCCgagagggtgtgtagtgGAATTCcgctgc', digests.last.seq)
|
48
50
|
end
|
49
51
|
|
50
|
-
test
|
51
|
-
digests = @entry.each_digest(
|
52
|
+
test '#each_digest with negavive offset returns correctly' do
|
53
|
+
digests = @entry.each_digest('CGATCG', -1).to_a
|
52
54
|
assert_equal(1, digests.size)
|
53
|
-
assert_equal(
|
55
|
+
assert_equal('[0-42]', digests.first.seq_name)
|
54
56
|
assert_equal(@entry.seq, digests.first.seq)
|
55
57
|
end
|
56
58
|
|
57
|
-
test
|
58
|
-
digests = @entry.each_digest(
|
59
|
+
test '#each_digest with offset out of bounds returns correctly' do
|
60
|
+
digests = @entry.each_digest('AATTCcgctgc', 15).to_a
|
59
61
|
assert_equal(1, digests.size)
|
60
|
-
assert_equal(
|
62
|
+
assert_equal('[0-42]', digests.first.seq_name)
|
61
63
|
assert_equal(@entry.seq, digests.first.seq)
|
62
64
|
end
|
63
65
|
|
64
|
-
test
|
65
|
-
@entry.each_digest(
|
66
|
-
assert_equal(
|
67
|
-
assert_equal(
|
66
|
+
test '#each_digest in block context returns correctly' do
|
67
|
+
@entry.each_digest('GGATCC', 1) do |digest|
|
68
|
+
assert_equal('[0-9]', digest.seq_name)
|
69
|
+
assert_equal('cgatcgatcG', digest.seq)
|
68
70
|
break
|
69
71
|
end
|
70
72
|
end
|
@@ -1,133 +1,135 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
|
4
|
-
#
|
5
|
-
#
|
6
|
-
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk).
|
7
|
-
#
|
8
|
-
# This program is free software; you can redistribute it and/or
|
9
|
-
# modify it under the terms of the GNU General Public License
|
10
|
-
# as published by the Free Software Foundation; either version 2
|
11
|
-
# of the License, or (at your option) any later version.
|
12
|
-
#
|
13
|
-
# This program is distributed in the hope that it will be useful,
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
-
# GNU General Public License for more details.
|
17
|
-
#
|
18
|
-
# You should have received a copy of the GNU General Public License
|
19
|
-
# along with this program; if not, write to the Free Software
|
20
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
3
|
+
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
7
|
+
# #
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
11
|
+
# of the License, or (at your option) any later version. #
|
12
|
+
# #
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
16
|
+
# GNU General Public License for more details. #
|
17
|
+
# #
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
19
|
+
# along with this program; if not, write to the Free Software #
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
21
|
+
# USA. #
|
22
|
+
# #
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
24
|
+
# #
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
26
|
+
# #
|
27
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
28
|
+
# #
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
29
30
|
|
30
31
|
require 'test/helper'
|
31
32
|
|
33
|
+
# Test class for Dynamic.
|
32
34
|
class TestDynamic < Test::Unit::TestCase
|
33
35
|
def setup
|
34
|
-
@p = BioDSL::Seq.new(seq_name:
|
36
|
+
@p = BioDSL::Seq.new(seq_name: 'test', seq: 'atcg')
|
35
37
|
@p.extend(BioDSL::Dynamic)
|
36
38
|
end
|
37
39
|
|
38
|
-
test
|
39
|
-
assert_nil(@p.patmatch(
|
40
|
+
test '#patmatch with no match returns nil' do
|
41
|
+
assert_nil(@p.patmatch('gggg'))
|
40
42
|
end
|
41
43
|
|
42
|
-
test
|
43
|
-
m = @p.patmatch(
|
44
|
+
test '#patmatch with perfect match returns correctly' do
|
45
|
+
m = @p.patmatch('atcg')
|
44
46
|
assert_equal(0, m.beg)
|
45
|
-
assert_equal(
|
47
|
+
assert_equal('atcg', m.match)
|
46
48
|
assert_equal(0, m.mis)
|
47
49
|
assert_equal(0, m.ins)
|
48
50
|
assert_equal(0, m.del)
|
49
51
|
assert_equal(4, m.length)
|
50
52
|
end
|
51
53
|
|
52
|
-
test
|
53
|
-
m = @p.patmatch(
|
54
|
+
test '#patmatch with perfect match with ambiguity codes returns correctly' do
|
55
|
+
m = @p.patmatch('nnnn')
|
54
56
|
assert_equal(0, m.beg)
|
55
|
-
assert_equal(
|
57
|
+
assert_equal('atcg', m.match)
|
56
58
|
assert_equal(0, m.mis)
|
57
59
|
assert_equal(0, m.ins)
|
58
60
|
assert_equal(0, m.del)
|
59
61
|
assert_equal(4, m.length)
|
60
62
|
end
|
61
63
|
|
62
|
-
test
|
63
|
-
assert_nil(@p.patmatch(
|
64
|
+
test '#patmatch with one mismatch and edit dist zero returns nil' do
|
65
|
+
assert_nil(@p.patmatch('aCcg'))
|
64
66
|
end
|
65
67
|
|
66
|
-
test
|
67
|
-
m = @p.patmatch(
|
68
|
+
test '#patmatch with one mismatch and edit dist one returns correctly' do
|
69
|
+
m = @p.patmatch('aCcg', 0, 1)
|
68
70
|
assert_equal(0, m.beg)
|
69
|
-
assert_equal(
|
71
|
+
assert_equal('atcg', m.match)
|
70
72
|
assert_equal(1, m.mis)
|
71
73
|
assert_equal(0, m.ins)
|
72
74
|
assert_equal(0, m.del)
|
73
75
|
assert_equal(4, m.length)
|
74
76
|
end
|
75
77
|
|
76
|
-
test
|
77
|
-
assert_nil(@p.patmatch(
|
78
|
+
test '#patmatch with two mismatch and edit dist one returns nil' do
|
79
|
+
assert_nil(@p.patmatch('aGcA', 0, 1))
|
78
80
|
end
|
79
81
|
|
80
|
-
test
|
81
|
-
assert_nil(@p.patmatch(
|
82
|
+
test '#patmatch with one insertion and edit dist zero returns nil' do
|
83
|
+
assert_nil(@p.patmatch('atGcg'))
|
82
84
|
end
|
83
85
|
|
84
|
-
test
|
85
|
-
m = @p.patmatch(
|
86
|
+
test '#patmatch with one insertion and edit dist one returns correctly' do
|
87
|
+
m = @p.patmatch('atGcg', 0, 1)
|
86
88
|
assert_equal(0, m.beg)
|
87
|
-
assert_equal(
|
89
|
+
assert_equal('atcg', m.match)
|
88
90
|
assert_equal(0, m.mis)
|
89
91
|
assert_equal(1, m.ins)
|
90
92
|
assert_equal(0, m.del)
|
91
93
|
assert_equal(4, m.length)
|
92
94
|
end
|
93
95
|
|
94
|
-
test
|
95
|
-
assert_nil(@p.patmatch(
|
96
|
+
test '#patmatch with two insertions and edit dist one returns nil' do
|
97
|
+
assert_nil(@p.patmatch('atGcTg', 0, 1))
|
96
98
|
end
|
97
99
|
|
98
|
-
test
|
99
|
-
m = @p.patmatch(
|
100
|
+
test '#patmatch with two insertions and edit dist two returns correctly' do
|
101
|
+
m = @p.patmatch('atGcTg', 0, 2)
|
100
102
|
assert_equal(0, m.beg)
|
101
|
-
assert_equal(
|
103
|
+
assert_equal('atcg', m.match)
|
102
104
|
assert_equal(0, m.mis)
|
103
105
|
assert_equal(2, m.ins)
|
104
106
|
assert_equal(0, m.del)
|
105
107
|
assert_equal(4, m.length)
|
106
108
|
end
|
107
109
|
|
108
|
-
test
|
109
|
-
assert_nil(@p.patmatch(
|
110
|
+
test '#patmatch with one deletion and edit distance zero returns nil' do
|
111
|
+
assert_nil(@p.patmatch('acg'))
|
110
112
|
end
|
111
113
|
|
112
|
-
test
|
113
|
-
m = @p.patmatch(
|
114
|
+
test '#patmatch with one deletion and edit distance one returns correctly' do
|
115
|
+
m = @p.patmatch('acg', 0, 1)
|
114
116
|
assert_equal(0, m.beg)
|
115
|
-
assert_equal(
|
117
|
+
assert_equal('atcg', m.match)
|
116
118
|
assert_equal(0, m.mis)
|
117
119
|
assert_equal(0, m.ins)
|
118
120
|
assert_equal(1, m.del)
|
119
121
|
assert_equal(4, m.length)
|
120
122
|
end
|
121
123
|
|
122
|
-
test
|
123
|
-
p = BioDSL::Seq.new(seq_name:
|
124
|
+
test '#patscan locates three patterns ok' do
|
125
|
+
p = BioDSL::Seq.new(seq_name: 'test', seq: 'ataacgagctagctagctagctgactac')
|
124
126
|
p.extend(BioDSL::Dynamic)
|
125
|
-
assert_equal(3, p.patscan(
|
127
|
+
assert_equal(3, p.patscan('tag').count)
|
126
128
|
end
|
127
129
|
|
128
|
-
test
|
129
|
-
p = BioDSL::Seq.new(seq_name:
|
130
|
+
test '#patscan with pos locates two patterns ok' do
|
131
|
+
p = BioDSL::Seq.new(seq_name: 'test', seq: 'ataacgagctagctagctagctgactac')
|
130
132
|
p.extend(BioDSL::Dynamic)
|
131
|
-
assert_equal(2, p.patscan(
|
133
|
+
assert_equal(2, p.patscan('tag', 10).count)
|
132
134
|
end
|
133
135
|
end
|