BioDSL 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/BioDSL.gemspec +1 -1
- data/Gemfile +6 -0
- data/README.md +289 -155
- data/Rakefile +18 -16
- data/lib/BioDSL.rb +1 -1
- data/lib/BioDSL/cary.rb +78 -53
- data/lib/BioDSL/command.rb +2 -2
- data/lib/BioDSL/commands.rb +1 -1
- data/lib/BioDSL/commands/add_key.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
- data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
- data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
- data/lib/BioDSL/commands/classify_seq.rb +8 -8
- data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
- data/lib/BioDSL/commands/clip_primer.rb +7 -7
- data/lib/BioDSL/commands/cluster_otus.rb +5 -5
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/collect_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +4 -4
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +2 -2
- data/lib/BioDSL/commands/degap_seq.rb +6 -7
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/dump.rb +2 -2
- data/lib/BioDSL/commands/filter_rrna.rb +4 -4
- data/lib/BioDSL/commands/genecall.rb +7 -7
- data/lib/BioDSL/commands/grab.rb +1 -1
- data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
- data/lib/BioDSL/commands/mask_seq.rb +4 -4
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/merge_values.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
- data/lib/BioDSL/commands/plot_histogram.rb +4 -4
- data/lib/BioDSL/commands/plot_matches.rb +5 -5
- data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
- data/lib/BioDSL/commands/plot_scores.rb +7 -7
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fasta.rb +9 -9
- data/lib/BioDSL/commands/read_fastq.rb +16 -16
- data/lib/BioDSL/commands/read_table.rb +2 -3
- data/lib/BioDSL/commands/reverse_seq.rb +4 -4
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +3 -3
- data/lib/BioDSL/commands/sort.rb +1 -1
- data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
- data/lib/BioDSL/commands/split_values.rb +2 -2
- data/lib/BioDSL/commands/trim_primer.rb +13 -8
- data/lib/BioDSL/commands/trim_seq.rb +5 -5
- data/lib/BioDSL/commands/uchime_ref.rb +6 -6
- data/lib/BioDSL/commands/uclust.rb +5 -5
- data/lib/BioDSL/commands/unique_values.rb +1 -1
- data/lib/BioDSL/commands/usearch_global.rb +2 -2
- data/lib/BioDSL/commands/usearch_local.rb +2 -2
- data/lib/BioDSL/commands/write_fasta.rb +7 -9
- data/lib/BioDSL/commands/write_fastq.rb +4 -4
- data/lib/BioDSL/commands/write_table.rb +3 -3
- data/lib/BioDSL/commands/write_tree.rb +2 -3
- data/lib/BioDSL/config.rb +2 -2
- data/lib/BioDSL/csv.rb +8 -10
- data/lib/BioDSL/debug.rb +1 -1
- data/lib/BioDSL/fasta.rb +54 -40
- data/lib/BioDSL/fastq.rb +35 -32
- data/lib/BioDSL/filesys.rb +56 -47
- data/lib/BioDSL/fork.rb +1 -1
- data/lib/BioDSL/hamming.rb +1 -1
- data/lib/BioDSL/helpers.rb +1 -1
- data/lib/BioDSL/helpers/aux_helper.rb +1 -1
- data/lib/BioDSL/helpers/email_helper.rb +1 -1
- data/lib/BioDSL/helpers/history_helper.rb +1 -1
- data/lib/BioDSL/helpers/log_helper.rb +1 -1
- data/lib/BioDSL/helpers/options_helper.rb +1 -1
- data/lib/BioDSL/helpers/status_helper.rb +1 -1
- data/lib/BioDSL/html_report.rb +1 -1
- data/lib/BioDSL/math.rb +1 -1
- data/lib/BioDSL/mummer.rb +1 -1
- data/lib/BioDSL/pipeline.rb +1 -1
- data/lib/BioDSL/seq.rb +240 -231
- data/lib/BioDSL/seq/ambiguity.rb +1 -1
- data/lib/BioDSL/seq/assemble.rb +1 -1
- data/lib/BioDSL/seq/backtrack.rb +93 -76
- data/lib/BioDSL/seq/digest.rb +1 -1
- data/lib/BioDSL/seq/dynamic.rb +43 -55
- data/lib/BioDSL/seq/homopolymer.rb +34 -36
- data/lib/BioDSL/seq/kmer.rb +67 -50
- data/lib/BioDSL/seq/levenshtein.rb +35 -40
- data/lib/BioDSL/seq/translate.rb +64 -55
- data/lib/BioDSL/seq/trim.rb +60 -50
- data/lib/BioDSL/serializer.rb +1 -1
- data/lib/BioDSL/stream.rb +1 -1
- data/lib/BioDSL/taxonomy.rb +1 -1
- data/lib/BioDSL/test.rb +1 -1
- data/lib/BioDSL/tmp_dir.rb +1 -1
- data/lib/BioDSL/usearch.rb +1 -1
- data/lib/BioDSL/verbose.rb +1 -1
- data/lib/BioDSL/version.rb +2 -2
- data/test/BioDSL/commands/test_add_key.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_clip_primer.rb +1 -1
- data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_collect_otus.rb +1 -1
- data/test/BioDSL/commands/test_complement_seq.rb +1 -1
- data/test/BioDSL/commands/test_count.rb +1 -1
- data/test/BioDSL/commands/test_count_values.rb +1 -1
- data/test/BioDSL/commands/test_degap_seq.rb +1 -1
- data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
- data/test/BioDSL/commands/test_dump.rb +1 -1
- data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
- data/test/BioDSL/commands/test_genecall.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
- data/test/BioDSL/commands/test_mask_seq.rb +1 -1
- data/test/BioDSL/commands/test_mean_scores.rb +1 -1
- data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_merge_table.rb +1 -1
- data/test/BioDSL/commands/test_merge_values.rb +1 -1
- data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
- data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
- data/test/BioDSL/commands/test_plot_matches.rb +1 -1
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_plot_scores.rb +1 -1
- data/test/BioDSL/commands/test_random.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
- data/test/BioDSL/commands/test_slice_align.rb +1 -1
- data/test/BioDSL/commands/test_slice_seq.rb +1 -1
- data/test/BioDSL/commands/test_sort.rb +1 -1
- data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_split_values.rb +1 -1
- data/test/BioDSL/commands/test_trim_primer.rb +1 -1
- data/test/BioDSL/commands/test_trim_seq.rb +1 -1
- data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
- data/test/BioDSL/commands/test_uclust.rb +1 -1
- data/test/BioDSL/commands/test_unique_values.rb +1 -1
- data/test/BioDSL/commands/test_usearch_global.rb +1 -1
- data/test/BioDSL/commands/test_usearch_local.rb +1 -1
- data/test/BioDSL/commands/test_write_fasta.rb +1 -1
- data/test/BioDSL/commands/test_write_fastq.rb +1 -1
- data/test/BioDSL/commands/test_write_table.rb +1 -1
- data/test/BioDSL/commands/test_write_tree.rb +1 -1
- data/test/BioDSL/helpers/test_options_helper.rb +3 -3
- data/test/BioDSL/seq/test_assemble.rb +58 -56
- data/test/BioDSL/seq/test_backtrack.rb +83 -81
- data/test/BioDSL/seq/test_digest.rb +47 -45
- data/test/BioDSL/seq/test_dynamic.rb +66 -64
- data/test/BioDSL/seq/test_homopolymer.rb +35 -33
- data/test/BioDSL/seq/test_kmer.rb +29 -28
- data/test/BioDSL/seq/test_translate.rb +44 -42
- data/test/BioDSL/seq/test_trim.rb +59 -57
- data/test/BioDSL/test_cary.rb +1 -1
- data/test/BioDSL/test_command.rb +2 -2
- data/test/BioDSL/test_csv.rb +34 -31
- data/test/BioDSL/test_debug.rb +31 -31
- data/test/BioDSL/test_fasta.rb +30 -29
- data/test/BioDSL/test_fastq.rb +27 -26
- data/test/BioDSL/test_filesys.rb +28 -27
- data/test/BioDSL/test_fork.rb +29 -28
- data/test/BioDSL/test_math.rb +31 -30
- data/test/BioDSL/test_mummer.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +1 -1
- data/test/BioDSL/test_seq.rb +42 -41
- data/test/BioDSL/test_serializer.rb +35 -33
- data/test/BioDSL/test_stream.rb +28 -27
- data/test/BioDSL/test_taxonomy.rb +38 -37
- data/test/BioDSL/test_test.rb +32 -31
- data/test/BioDSL/test_tmp_dir.rb +1 -1
- data/test/BioDSL/test_usearch.rb +28 -27
- data/test/BioDSL/test_verbose.rb +32 -31
- data/test/helper.rb +34 -31
- metadata +3 -2
|
@@ -1,173 +1,175 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk).
|
|
7
|
-
#
|
|
8
|
-
# This program is free software; you can redistribute it and/or
|
|
9
|
-
# modify it under the terms of the GNU General Public License
|
|
10
|
-
# as published by the Free Software Foundation; either version 2
|
|
11
|
-
# of the License, or (at your option) any later version.
|
|
12
|
-
#
|
|
13
|
-
# This program is distributed in the hope that it will be useful,
|
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
-
# GNU General Public License for more details.
|
|
17
|
-
#
|
|
18
|
-
# You should have received a copy of the GNU General Public License
|
|
19
|
-
# along with this program; if not, write to the Free Software
|
|
20
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
#
|
|
28
|
-
#
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
29
30
|
|
|
30
31
|
require 'test/helper'
|
|
31
32
|
|
|
33
|
+
# Test class for BackTrack.
|
|
32
34
|
class BackTrackTest < Test::Unit::TestCase
|
|
33
35
|
def setup
|
|
34
36
|
# 0 1
|
|
35
37
|
# 01234567890123456789
|
|
36
|
-
@seq = BioDSL::Seq.new(seq:
|
|
38
|
+
@seq = BioDSL::Seq.new(seq: 'tacgatgctagcatgcacgg')
|
|
37
39
|
@seq.extend(BioDSL::BackTrack)
|
|
38
40
|
end
|
|
39
41
|
|
|
40
|
-
test
|
|
41
|
-
[
|
|
42
|
+
test '#patscan with bad pattern raises' do
|
|
43
|
+
['', 'X', '1'].each { |pattern|
|
|
42
44
|
assert_raise(BioDSL::BackTrackError) { @seq.patscan(pattern) }
|
|
43
45
|
}
|
|
44
46
|
end
|
|
45
47
|
|
|
46
|
-
test
|
|
47
|
-
[
|
|
48
|
+
test '#patscan with OK pattern dont raise' do
|
|
49
|
+
['N', 'atcg'].each { |pattern|
|
|
48
50
|
assert_nothing_raised { @seq.patscan(pattern) }
|
|
49
51
|
}
|
|
50
52
|
end
|
|
51
53
|
|
|
52
|
-
test
|
|
54
|
+
test '#patscan with bad start raises' do
|
|
53
55
|
[-1, 20].each { |start|
|
|
54
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
|
56
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', start: start) }
|
|
55
57
|
}
|
|
56
58
|
end
|
|
57
59
|
|
|
58
|
-
test
|
|
60
|
+
test '#patscan with OK start dont raise' do
|
|
59
61
|
[0, 19].each { |start|
|
|
60
|
-
assert_nothing_raised { @seq.patscan(
|
|
62
|
+
assert_nothing_raised { @seq.patscan('N', start: start) }
|
|
61
63
|
}
|
|
62
64
|
end
|
|
63
65
|
|
|
64
|
-
test
|
|
66
|
+
test '#patscan with bad stop raises' do
|
|
65
67
|
[-1, 20].each { |stop|
|
|
66
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
|
68
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', stop: stop) }
|
|
67
69
|
}
|
|
68
70
|
end
|
|
69
71
|
|
|
70
|
-
test
|
|
72
|
+
test '#patscan with OK stop dont raise' do
|
|
71
73
|
[0, 19].each { |stop|
|
|
72
|
-
assert_nothing_raised { @seq.patscan(
|
|
74
|
+
assert_nothing_raised { @seq.patscan('N', stop: stop) }
|
|
73
75
|
}
|
|
74
76
|
end
|
|
75
77
|
|
|
76
|
-
test
|
|
77
|
-
assert_nil(@seq.patmatch(
|
|
78
|
-
assert_equal(
|
|
78
|
+
test '#patscan with stop returns correctly' do
|
|
79
|
+
assert_nil(@seq.patmatch('G', start: 0, stop: 2))
|
|
80
|
+
assert_equal('3:1:g', @seq.patmatch('G', start: 0, stop: 3).to_s)
|
|
79
81
|
end
|
|
80
82
|
|
|
81
|
-
test
|
|
83
|
+
test '#patscan with bad mis raises' do
|
|
82
84
|
[-1, 6].each { |mis|
|
|
83
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
|
85
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', max_mismatches: mis) }
|
|
84
86
|
}
|
|
85
87
|
end
|
|
86
88
|
|
|
87
|
-
test
|
|
89
|
+
test '#patscan with OK mis dont raise' do
|
|
88
90
|
[0, 5].each { |mis|
|
|
89
|
-
assert_nothing_raised { @seq.patscan(
|
|
91
|
+
assert_nothing_raised { @seq.patscan('N', max_mismatches: mis) }
|
|
90
92
|
}
|
|
91
93
|
end
|
|
92
94
|
|
|
93
|
-
test
|
|
95
|
+
test '#patscan with bad ins raises' do
|
|
94
96
|
[-1, 6].each { |ins|
|
|
95
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
|
97
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', max_insertions: ins) }
|
|
96
98
|
}
|
|
97
99
|
end
|
|
98
100
|
|
|
99
|
-
test
|
|
101
|
+
test '#patscan with OK ins dont raise' do
|
|
100
102
|
[0, 5].each { |ins|
|
|
101
|
-
assert_nothing_raised { @seq.patscan(
|
|
103
|
+
assert_nothing_raised { @seq.patscan('N', max_insertions: ins) }
|
|
102
104
|
}
|
|
103
105
|
end
|
|
104
106
|
|
|
105
|
-
test
|
|
107
|
+
test '#patscan with bad del raises' do
|
|
106
108
|
[-1, 6].each { |del|
|
|
107
|
-
assert_raise(BioDSL::BackTrackError) { @seq.patscan(
|
|
109
|
+
assert_raise(BioDSL::BackTrackError) { @seq.patscan('N', max_deletions: del) }
|
|
108
110
|
}
|
|
109
111
|
end
|
|
110
112
|
|
|
111
|
-
test
|
|
113
|
+
test '#patscan with OK del dont raise' do
|
|
112
114
|
[0, 5].each { |del|
|
|
113
|
-
assert_nothing_raised { @seq.patscan(
|
|
115
|
+
assert_nothing_raised { @seq.patscan('N', max_deletions: del) }
|
|
114
116
|
}
|
|
115
117
|
end
|
|
116
118
|
|
|
117
|
-
test
|
|
118
|
-
assert_equal(
|
|
119
|
+
test '#patscan perfect left is ok' do
|
|
120
|
+
assert_equal('0:7:tacgatg', @seq.patscan('TACGATG').first.to_s)
|
|
119
121
|
end
|
|
120
122
|
|
|
121
|
-
test
|
|
122
|
-
assert_equal(
|
|
123
|
+
test '#patscan perfect right is ok' do
|
|
124
|
+
assert_equal('13:7:tgcacgg', @seq.patscan('TGCACGG').first.to_s)
|
|
123
125
|
end
|
|
124
126
|
|
|
125
|
-
test
|
|
126
|
-
assert_equal(
|
|
127
|
+
test '#patscan ambiguity is ok' do
|
|
128
|
+
assert_equal('13:7:tgcacgg', @seq.patscan('TGCACNN').first.to_s)
|
|
127
129
|
end
|
|
128
130
|
|
|
129
|
-
test
|
|
130
|
-
assert_equal(
|
|
131
|
-
assert_equal(
|
|
131
|
+
test '#patscan start is ok' do
|
|
132
|
+
assert_equal('10:1:g', @seq.patscan('N', start: 10).first.to_s)
|
|
133
|
+
assert_equal('19:1:g', @seq.patscan('N', start: 10).last.to_s)
|
|
132
134
|
end
|
|
133
135
|
|
|
134
|
-
test
|
|
135
|
-
assert_equal(
|
|
136
|
+
test '#patscan mis left is ok' do
|
|
137
|
+
assert_equal('0:7:tacgatg', @seq.patscan('Aacgatg', max_mismatches: 1).first.to_s)
|
|
136
138
|
end
|
|
137
139
|
|
|
138
|
-
test
|
|
139
|
-
assert_equal(
|
|
140
|
+
test '#patscan mis right is ok' do
|
|
141
|
+
assert_equal('13:7:tgcacgg', @seq.patscan('tgcacgA', max_mismatches: 1).first.to_s)
|
|
140
142
|
end
|
|
141
143
|
|
|
142
|
-
test
|
|
143
|
-
assert_equal(
|
|
144
|
+
test '#patscan ins left is ok' do
|
|
145
|
+
assert_equal('0:7:tacgatg', @seq.patscan('Atacgatg', max_insertions: 1).first.to_s)
|
|
144
146
|
end
|
|
145
147
|
|
|
146
|
-
test
|
|
147
|
-
assert_equal(
|
|
148
|
+
test '#patscan ins right is ok' do
|
|
149
|
+
assert_equal('13:7:tgcacgg', @seq.patscan('tgcacggA', max_insertions: 1).first.to_s)
|
|
148
150
|
end
|
|
149
151
|
|
|
150
|
-
test
|
|
151
|
-
assert_equal(
|
|
152
|
+
test '#patscan del left is ok' do
|
|
153
|
+
assert_equal('0:7:tacgatg', @seq.patscan('acgatg', max_deletions: 1).first.to_s)
|
|
152
154
|
end
|
|
153
155
|
|
|
154
|
-
test
|
|
155
|
-
assert_equal(
|
|
156
|
+
test '#patscan del right is ok' do
|
|
157
|
+
assert_equal('12:8:atgcacgg', @seq.patscan('tgcacgg', max_deletions: 1).first.to_s)
|
|
156
158
|
end
|
|
157
159
|
|
|
158
|
-
test
|
|
159
|
-
assert_equal(
|
|
160
|
+
test '#patscan ambiguity mis ins del all ok' do
|
|
161
|
+
assert_equal('0:20:tacgatgctagcatgcacgg', @seq.patscan('tacatgcNagGatgcCacgg',
|
|
160
162
|
max_mismatches: 1,
|
|
161
163
|
max_insertions: 1,
|
|
162
164
|
max_deletions: 1).first.to_s)
|
|
163
165
|
end
|
|
164
166
|
|
|
165
|
-
test
|
|
166
|
-
@seq.patmatch(
|
|
167
|
+
test '#patmatch in block context returns correctly' do
|
|
168
|
+
@seq.patmatch('tacatgcNagGatgcCacgg',
|
|
167
169
|
max_mismatches: 1,
|
|
168
170
|
max_insertions: 1,
|
|
169
171
|
max_deletions: 1) do |hit|
|
|
170
|
-
assert_equal(
|
|
172
|
+
assert_equal('tacgatgctagcatgcacgg', hit.match)
|
|
171
173
|
assert_equal(0, hit.pos)
|
|
172
174
|
assert_equal(20, hit.length)
|
|
173
175
|
break
|
|
@@ -1,70 +1,72 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
-
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
3
|
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk).
|
|
7
|
-
#
|
|
8
|
-
# This program is free software; you can redistribute it and/or
|
|
9
|
-
# modify it under the terms of the GNU General Public License
|
|
10
|
-
# as published by the Free Software Foundation; either version 2
|
|
11
|
-
# of the License, or (at your option) any later version.
|
|
12
|
-
#
|
|
13
|
-
# This program is distributed in the hope that it will be useful,
|
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
-
# GNU General Public License for more details.
|
|
17
|
-
#
|
|
18
|
-
# You should have received a copy of the GNU General Public License
|
|
19
|
-
# along with this program; if not, write to the Free Software
|
|
20
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
#
|
|
28
|
-
#
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
29
30
|
|
|
30
31
|
require 'test/helper'
|
|
31
32
|
|
|
33
|
+
# Test class for Digest.
|
|
32
34
|
class TestDigest < Test::Unit::TestCase
|
|
33
35
|
def setup
|
|
34
|
-
@entry = BioDSL::Seq.new(seq:
|
|
36
|
+
@entry = BioDSL::Seq.new(seq: 'cgatcgatcGGATCCgagagggtgtgtagtgGAATTCcgctgc')
|
|
35
37
|
end
|
|
36
38
|
|
|
37
|
-
test
|
|
38
|
-
assert_raise(BioDSL::DigestError) { @entry.each_digest(
|
|
39
|
+
test '#each_digest with bad residue in pattern raises' do
|
|
40
|
+
assert_raise(BioDSL::DigestError) { @entry.each_digest('X', 0).to_a }
|
|
39
41
|
end
|
|
40
42
|
|
|
41
|
-
test
|
|
42
|
-
digests = @entry.each_digest(
|
|
43
|
+
test '#each_digest returns correctly' do
|
|
44
|
+
digests = @entry.each_digest('GGATCC', 1).to_a
|
|
43
45
|
assert_equal(2, digests.size)
|
|
44
|
-
assert_equal(
|
|
45
|
-
assert_equal(
|
|
46
|
-
assert_equal(
|
|
47
|
-
assert_equal(
|
|
46
|
+
assert_equal('[0-9]', digests.first.seq_name)
|
|
47
|
+
assert_equal('cgatcgatcG', digests.first.seq)
|
|
48
|
+
assert_equal('[10-42]', digests.last.seq_name)
|
|
49
|
+
assert_equal('GATCCgagagggtgtgtagtgGAATTCcgctgc', digests.last.seq)
|
|
48
50
|
end
|
|
49
51
|
|
|
50
|
-
test
|
|
51
|
-
digests = @entry.each_digest(
|
|
52
|
+
test '#each_digest with negavive offset returns correctly' do
|
|
53
|
+
digests = @entry.each_digest('CGATCG', -1).to_a
|
|
52
54
|
assert_equal(1, digests.size)
|
|
53
|
-
assert_equal(
|
|
55
|
+
assert_equal('[0-42]', digests.first.seq_name)
|
|
54
56
|
assert_equal(@entry.seq, digests.first.seq)
|
|
55
57
|
end
|
|
56
58
|
|
|
57
|
-
test
|
|
58
|
-
digests = @entry.each_digest(
|
|
59
|
+
test '#each_digest with offset out of bounds returns correctly' do
|
|
60
|
+
digests = @entry.each_digest('AATTCcgctgc', 15).to_a
|
|
59
61
|
assert_equal(1, digests.size)
|
|
60
|
-
assert_equal(
|
|
62
|
+
assert_equal('[0-42]', digests.first.seq_name)
|
|
61
63
|
assert_equal(@entry.seq, digests.first.seq)
|
|
62
64
|
end
|
|
63
65
|
|
|
64
|
-
test
|
|
65
|
-
@entry.each_digest(
|
|
66
|
-
assert_equal(
|
|
67
|
-
assert_equal(
|
|
66
|
+
test '#each_digest in block context returns correctly' do
|
|
67
|
+
@entry.each_digest('GGATCC', 1) do |digest|
|
|
68
|
+
assert_equal('[0-9]', digest.seq_name)
|
|
69
|
+
assert_equal('cgatcgatcG', digest.seq)
|
|
68
70
|
break
|
|
69
71
|
end
|
|
70
72
|
end
|
|
@@ -1,133 +1,135 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk).
|
|
7
|
-
#
|
|
8
|
-
# This program is free software; you can redistribute it and/or
|
|
9
|
-
# modify it under the terms of the GNU General Public License
|
|
10
|
-
# as published by the Free Software Foundation; either version 2
|
|
11
|
-
# of the License, or (at your option) any later version.
|
|
12
|
-
#
|
|
13
|
-
# This program is distributed in the hope that it will be useful,
|
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
-
# GNU General Public License for more details.
|
|
17
|
-
#
|
|
18
|
-
# You should have received a copy of the GNU General Public License
|
|
19
|
-
# along with this program; if not, write to the Free Software
|
|
20
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
#
|
|
28
|
-
#
|
|
2
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..')
|
|
3
|
+
|
|
4
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
5
|
+
# #
|
|
6
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
7
|
+
# #
|
|
8
|
+
# This program is free software; you can redistribute it and/or #
|
|
9
|
+
# modify it under the terms of the GNU General Public License #
|
|
10
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
11
|
+
# of the License, or (at your option) any later version. #
|
|
12
|
+
# #
|
|
13
|
+
# This program is distributed in the hope that it will be useful, #
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
16
|
+
# GNU General Public License for more details. #
|
|
17
|
+
# #
|
|
18
|
+
# You should have received a copy of the GNU General Public License #
|
|
19
|
+
# along with this program; if not, write to the Free Software #
|
|
20
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
21
|
+
# USA. #
|
|
22
|
+
# #
|
|
23
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
# #
|
|
27
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
28
|
+
# #
|
|
29
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
29
30
|
|
|
30
31
|
require 'test/helper'
|
|
31
32
|
|
|
33
|
+
# Test class for Dynamic.
|
|
32
34
|
class TestDynamic < Test::Unit::TestCase
|
|
33
35
|
def setup
|
|
34
|
-
@p = BioDSL::Seq.new(seq_name:
|
|
36
|
+
@p = BioDSL::Seq.new(seq_name: 'test', seq: 'atcg')
|
|
35
37
|
@p.extend(BioDSL::Dynamic)
|
|
36
38
|
end
|
|
37
39
|
|
|
38
|
-
test
|
|
39
|
-
assert_nil(@p.patmatch(
|
|
40
|
+
test '#patmatch with no match returns nil' do
|
|
41
|
+
assert_nil(@p.patmatch('gggg'))
|
|
40
42
|
end
|
|
41
43
|
|
|
42
|
-
test
|
|
43
|
-
m = @p.patmatch(
|
|
44
|
+
test '#patmatch with perfect match returns correctly' do
|
|
45
|
+
m = @p.patmatch('atcg')
|
|
44
46
|
assert_equal(0, m.beg)
|
|
45
|
-
assert_equal(
|
|
47
|
+
assert_equal('atcg', m.match)
|
|
46
48
|
assert_equal(0, m.mis)
|
|
47
49
|
assert_equal(0, m.ins)
|
|
48
50
|
assert_equal(0, m.del)
|
|
49
51
|
assert_equal(4, m.length)
|
|
50
52
|
end
|
|
51
53
|
|
|
52
|
-
test
|
|
53
|
-
m = @p.patmatch(
|
|
54
|
+
test '#patmatch with perfect match with ambiguity codes returns correctly' do
|
|
55
|
+
m = @p.patmatch('nnnn')
|
|
54
56
|
assert_equal(0, m.beg)
|
|
55
|
-
assert_equal(
|
|
57
|
+
assert_equal('atcg', m.match)
|
|
56
58
|
assert_equal(0, m.mis)
|
|
57
59
|
assert_equal(0, m.ins)
|
|
58
60
|
assert_equal(0, m.del)
|
|
59
61
|
assert_equal(4, m.length)
|
|
60
62
|
end
|
|
61
63
|
|
|
62
|
-
test
|
|
63
|
-
assert_nil(@p.patmatch(
|
|
64
|
+
test '#patmatch with one mismatch and edit dist zero returns nil' do
|
|
65
|
+
assert_nil(@p.patmatch('aCcg'))
|
|
64
66
|
end
|
|
65
67
|
|
|
66
|
-
test
|
|
67
|
-
m = @p.patmatch(
|
|
68
|
+
test '#patmatch with one mismatch and edit dist one returns correctly' do
|
|
69
|
+
m = @p.patmatch('aCcg', 0, 1)
|
|
68
70
|
assert_equal(0, m.beg)
|
|
69
|
-
assert_equal(
|
|
71
|
+
assert_equal('atcg', m.match)
|
|
70
72
|
assert_equal(1, m.mis)
|
|
71
73
|
assert_equal(0, m.ins)
|
|
72
74
|
assert_equal(0, m.del)
|
|
73
75
|
assert_equal(4, m.length)
|
|
74
76
|
end
|
|
75
77
|
|
|
76
|
-
test
|
|
77
|
-
assert_nil(@p.patmatch(
|
|
78
|
+
test '#patmatch with two mismatch and edit dist one returns nil' do
|
|
79
|
+
assert_nil(@p.patmatch('aGcA', 0, 1))
|
|
78
80
|
end
|
|
79
81
|
|
|
80
|
-
test
|
|
81
|
-
assert_nil(@p.patmatch(
|
|
82
|
+
test '#patmatch with one insertion and edit dist zero returns nil' do
|
|
83
|
+
assert_nil(@p.patmatch('atGcg'))
|
|
82
84
|
end
|
|
83
85
|
|
|
84
|
-
test
|
|
85
|
-
m = @p.patmatch(
|
|
86
|
+
test '#patmatch with one insertion and edit dist one returns correctly' do
|
|
87
|
+
m = @p.patmatch('atGcg', 0, 1)
|
|
86
88
|
assert_equal(0, m.beg)
|
|
87
|
-
assert_equal(
|
|
89
|
+
assert_equal('atcg', m.match)
|
|
88
90
|
assert_equal(0, m.mis)
|
|
89
91
|
assert_equal(1, m.ins)
|
|
90
92
|
assert_equal(0, m.del)
|
|
91
93
|
assert_equal(4, m.length)
|
|
92
94
|
end
|
|
93
95
|
|
|
94
|
-
test
|
|
95
|
-
assert_nil(@p.patmatch(
|
|
96
|
+
test '#patmatch with two insertions and edit dist one returns nil' do
|
|
97
|
+
assert_nil(@p.patmatch('atGcTg', 0, 1))
|
|
96
98
|
end
|
|
97
99
|
|
|
98
|
-
test
|
|
99
|
-
m = @p.patmatch(
|
|
100
|
+
test '#patmatch with two insertions and edit dist two returns correctly' do
|
|
101
|
+
m = @p.patmatch('atGcTg', 0, 2)
|
|
100
102
|
assert_equal(0, m.beg)
|
|
101
|
-
assert_equal(
|
|
103
|
+
assert_equal('atcg', m.match)
|
|
102
104
|
assert_equal(0, m.mis)
|
|
103
105
|
assert_equal(2, m.ins)
|
|
104
106
|
assert_equal(0, m.del)
|
|
105
107
|
assert_equal(4, m.length)
|
|
106
108
|
end
|
|
107
109
|
|
|
108
|
-
test
|
|
109
|
-
assert_nil(@p.patmatch(
|
|
110
|
+
test '#patmatch with one deletion and edit distance zero returns nil' do
|
|
111
|
+
assert_nil(@p.patmatch('acg'))
|
|
110
112
|
end
|
|
111
113
|
|
|
112
|
-
test
|
|
113
|
-
m = @p.patmatch(
|
|
114
|
+
test '#patmatch with one deletion and edit distance one returns correctly' do
|
|
115
|
+
m = @p.patmatch('acg', 0, 1)
|
|
114
116
|
assert_equal(0, m.beg)
|
|
115
|
-
assert_equal(
|
|
117
|
+
assert_equal('atcg', m.match)
|
|
116
118
|
assert_equal(0, m.mis)
|
|
117
119
|
assert_equal(0, m.ins)
|
|
118
120
|
assert_equal(1, m.del)
|
|
119
121
|
assert_equal(4, m.length)
|
|
120
122
|
end
|
|
121
123
|
|
|
122
|
-
test
|
|
123
|
-
p = BioDSL::Seq.new(seq_name:
|
|
124
|
+
test '#patscan locates three patterns ok' do
|
|
125
|
+
p = BioDSL::Seq.new(seq_name: 'test', seq: 'ataacgagctagctagctagctgactac')
|
|
124
126
|
p.extend(BioDSL::Dynamic)
|
|
125
|
-
assert_equal(3, p.patscan(
|
|
127
|
+
assert_equal(3, p.patscan('tag').count)
|
|
126
128
|
end
|
|
127
129
|
|
|
128
|
-
test
|
|
129
|
-
p = BioDSL::Seq.new(seq_name:
|
|
130
|
+
test '#patscan with pos locates two patterns ok' do
|
|
131
|
+
p = BioDSL::Seq.new(seq_name: 'test', seq: 'ataacgagctagctagctagctgactac')
|
|
130
132
|
p.extend(BioDSL::Dynamic)
|
|
131
|
-
assert_equal(2, p.patscan(
|
|
133
|
+
assert_equal(2, p.patscan('tag', 10).count)
|
|
132
134
|
end
|
|
133
135
|
end
|