genevalidator 1.6.1 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -1
- data/.travis.yml +2 -0
- data/README.md +78 -30
- data/Rakefile +11 -8
- data/aux/app_template_footer.erb +1 -6
- data/aux/app_template_header.erb +12 -32
- data/aux/files/css/style.css +2 -8
- data/aux/files/js/plots.js +564 -576
- data/aux/files/js/script.js +10 -0
- data/aux/json_footer.erb +8 -0
- data/aux/json_header.erb +19 -0
- data/aux/json_query.erb +14 -0
- data/aux/template_footer.erb +9 -58
- data/aux/template_header.erb +18 -58
- data/aux/template_query.erb +8 -36
- data/bin/genevalidator +45 -32
- data/genevalidator.gemspec +11 -7
- data/lib/genevalidator.rb +75 -455
- data/lib/genevalidator/arg_validation.rb +78 -107
- data/lib/genevalidator/blast.rb +57 -60
- data/lib/genevalidator/clusterization.rb +15 -15
- data/lib/genevalidator/exceptions.rb +32 -5
- data/lib/genevalidator/get_raw_sequences.rb +70 -33
- data/lib/genevalidator/hsp.rb +1 -4
- data/lib/genevalidator/json_to_gv_results.rb +109 -0
- data/lib/genevalidator/output.rb +177 -185
- data/lib/genevalidator/pool.rb +2 -1
- data/lib/genevalidator/sequences.rb +3 -3
- data/lib/genevalidator/tabular_parser.rb +24 -18
- data/lib/genevalidator/validation.rb +279 -0
- data/lib/genevalidator/validation_alignment.rb +31 -47
- data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
- data/lib/genevalidator/validation_duplication.rb +23 -19
- data/lib/genevalidator/validation_gene_merge.rb +30 -65
- data/lib/genevalidator/validation_length_cluster.rb +14 -53
- data/lib/genevalidator/validation_length_rank.rb +10 -11
- data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
- data/lib/genevalidator/validation_report.rb +2 -5
- data/lib/genevalidator/validation_test.rb +8 -4
- data/lib/genevalidator/version.rb +1 -1
- data/test/test_all_validations.rb +51 -66
- data/test/test_blast.rb +68 -51
- data/test/test_clusterization.rb +1 -1
- data/test/test_clusterization_2d.rb +19 -13
- data/test/test_extended_array_methods.rb +1 -1
- data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
- data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
- data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
- data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
- data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
- data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
- data/test/test_sequences.rb +46 -41
- data/test/test_validation_open_reading_frame.rb +318 -202
- data/test/test_validations.rb +48 -32
- metadata +76 -102
- data/doc/AliasDuplicationError.html +0 -134
- data/doc/AlignmentValidation.html +0 -1687
- data/doc/AlignmentValidationOutput.html +0 -659
- data/doc/Blast.html +0 -1905
- data/doc/BlastRFValidationOutput.html +0 -545
- data/doc/BlastReadingFrameValidation.html +0 -370
- data/doc/BlastUtils.html +0 -875
- data/doc/ClasspathError.html +0 -134
- data/doc/Cluster.html +0 -1316
- data/doc/DuplciationValidationOutput.html +0 -564
- data/doc/DuplicationValidation.html +0 -920
- data/doc/DuplicationValidationOutput.html +0 -564
- data/doc/FileNotFoundException.html +0 -134
- data/doc/GeneMergeValidation.html +0 -935
- data/doc/GeneMergeValidationOutput.html +0 -652
- data/doc/HierarchicalClusterization.html +0 -994
- data/doc/Hsp.html +0 -1485
- data/doc/InconsistentTabularFormat.html +0 -135
- data/doc/LengthClusterValidation.html +0 -982
- data/doc/LengthClusterValidationOutput.html +0 -515
- data/doc/LengthRankValidation.html +0 -496
- data/doc/LengthRankValidationOutput.html +0 -517
- data/doc/NoInternetError.html +0 -135
- data/doc/NoMafftInstallationError.html +0 -134
- data/doc/NoPIdentError.html +0 -134
- data/doc/NoValidationError.html +0 -134
- data/doc/NotEnoughHitsError.html +0 -135
- data/doc/ORFValidationOutput.html +0 -593
- data/doc/OpenReadingFrameValidation.html +0 -1107
- data/doc/OtherError.html +0 -123
- data/doc/Output.html +0 -1540
- data/doc/Pair.html +0 -309
- data/doc/PairCluster.html +0 -767
- data/doc/Plot.html +0 -837
- data/doc/QueryError.html +0 -134
- data/doc/ReportClassError.html +0 -135
- data/doc/Sequence.html +0 -1299
- data/doc/SequenceTypeError.html +0 -135
- data/doc/TabularEntry.html +0 -837
- data/doc/TabularParser.html +0 -1104
- data/doc/Validation.html +0 -2147
- data/doc/ValidationClassError.html +0 -134
- data/doc/ValidationOutput.html +0 -460
- data/doc/ValidationReport.html +0 -940
- data/doc/ValidationTest.html +0 -939
- data/doc/_index.html +0 -449
- data/doc/class_list.html +0 -54
- data/doc/css/common.css +0 -1
- data/doc/css/full_list.css +0 -57
- data/doc/css/style.css +0 -338
- data/doc/file.README.html +0 -151
- data/doc/file_list.html +0 -56
- data/doc/frames.html +0 -26
- data/doc/index.html +0 -151
- data/doc/js/app.js +0 -214
- data/doc/js/full_list.js +0 -178
- data/doc/js/jquery.js +0 -4
- data/doc/method_list.html +0 -1505
- data/doc/top-level-namespace.html +0 -112
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
- data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
- data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
data/test/test_sequences.rb
CHANGED
@@ -3,32 +3,38 @@ require 'minitest/autorun'
|
|
3
3
|
require 'genevalidator/sequences'
|
4
4
|
require 'genevalidator/hsp'
|
5
5
|
module GeneValidator
|
6
|
+
# Test the Sequence class
|
6
7
|
class TestSequenceClass < Minitest::Test
|
7
8
|
describe 'Test Sequence Class' do
|
8
|
-
|
9
9
|
it 'should get sequence by accession for mrna' do
|
10
10
|
seq_mrna = Sequence.new
|
11
|
-
seq_mrna.get_sequence_by_accession_no('EF100000', 'nucleotide',
|
11
|
+
seq_mrna.get_sequence_by_accession_no('EF100000', 'nucleotide',
|
12
|
+
'swissprot -remote')
|
12
13
|
assert_equal('AGAGTTTGAT', seq_mrna.raw_sequence[0..9])
|
13
|
-
|
14
|
+
start_idx = seq_mrna.raw_sequence.length - 10
|
15
|
+
end_idx = seq_mrna.raw_sequence.length - 1
|
16
|
+
assert_equal('GCCCGTCAAG', seq_mrna.raw_sequence[start_idx..end_idx])
|
14
17
|
end
|
15
18
|
|
16
19
|
it 'should get sequence by accession for protein' do
|
17
20
|
seq_prot = Sequence.new
|
18
|
-
seq_prot.get_sequence_by_accession_no('F8WCM5', 'protein',
|
21
|
+
seq_prot.get_sequence_by_accession_no('F8WCM5', 'protein',
|
22
|
+
'swissprot -remote')
|
19
23
|
assert_equal('MALWMRLLPL', seq_prot.raw_sequence[0..9])
|
20
|
-
|
24
|
+
start_idx = seq_prot.raw_sequence.length - 10
|
25
|
+
end_idx = seq_prot.raw_sequence.length - 1
|
26
|
+
assert_equal('WPRRPQRSQN', seq_prot.raw_sequence[start_idx..end_idx])
|
21
27
|
end
|
22
28
|
|
23
29
|
it 'should initialize seq tabular attributes' do
|
24
30
|
identifier = 'sp|Q8N302|AGGF1_HUMAN'
|
25
|
-
accession_no =
|
31
|
+
accession_no = 'Q8N302'
|
26
32
|
slen = 714
|
27
33
|
seq = Sequence.new
|
28
|
-
hash = {
|
29
|
-
|
30
|
-
|
31
|
-
|
34
|
+
hash = { 'qseqid' => 'GB10034-PA',
|
35
|
+
'sseqid' => identifier,
|
36
|
+
'sacc' => accession_no,
|
37
|
+
'slen' => slen }
|
32
38
|
|
33
39
|
seq.init_tabular_attribute(hash)
|
34
40
|
|
@@ -39,38 +45,37 @@ module GeneValidator
|
|
39
45
|
end
|
40
46
|
|
41
47
|
it 'should initialize hsp tabular attributes' do
|
42
|
-
qseqid =
|
43
|
-
sseqid =
|
44
|
-
sacc =
|
45
|
-
slen =
|
46
|
-
qstart =
|
47
|
-
qend =
|
48
|
-
sstart =
|
49
|
-
send =
|
50
|
-
length =
|
51
|
-
qframe =
|
52
|
-
pident =
|
53
|
-
nident =
|
54
|
-
evalue =
|
55
|
-
qseq =
|
56
|
-
sseq =
|
57
|
-
|
48
|
+
qseqid = 'sp|Q8GBW6|12S_PROFR'
|
49
|
+
sseqid = 'sp|A5GSN7|ACCD_SYNR3'
|
50
|
+
sacc = 'A5GSN7'
|
51
|
+
slen = '291'
|
52
|
+
qstart = '49'
|
53
|
+
qend = '217'
|
54
|
+
sstart = '65'
|
55
|
+
send = '247'
|
56
|
+
length = '183'
|
57
|
+
qframe = '1'
|
58
|
+
pident = '34.43'
|
59
|
+
nident = '63.0'
|
60
|
+
evalue = '8.0e-12'
|
61
|
+
qseq = 'ERLNNLLDPHSFDEVG---------'
|
62
|
+
sseq = 'ERLRILLDPGSFIPVDGELSPTDPL'
|
58
63
|
|
59
|
-
hash = {
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
64
|
+
hash = { 'qseqid' => qseqid,
|
65
|
+
'sseqid' => sseqid,
|
66
|
+
'sacc' => sacc,
|
67
|
+
'slen' => slen,
|
68
|
+
'qstart' => qstart,
|
69
|
+
'qend' => qend,
|
70
|
+
'sstart' => sstart,
|
71
|
+
'send' => send,
|
72
|
+
'length' => length,
|
73
|
+
'qframe' => qframe,
|
74
|
+
'pident' => pident,
|
75
|
+
'nident' => nident,
|
76
|
+
'evalue' => evalue,
|
77
|
+
'qseq' => qseq,
|
78
|
+
'sseq' => sseq }
|
74
79
|
|
75
80
|
seq = Hsp.new
|
76
81
|
seq.init_tabular_attribute(hash)
|
@@ -4,231 +4,347 @@ require 'minitest/autorun'
|
|
4
4
|
require 'genevalidator/validation_test'
|
5
5
|
require 'genevalidator/validation_open_reading_frame'
|
6
6
|
require 'genevalidator/sequences'
|
7
|
+
require 'genevalidator'
|
8
|
+
|
7
9
|
module GeneValidator
|
10
|
+
# Classs to test the ORF validation
|
8
11
|
class TestORFValidation < Minitest::Test
|
9
12
|
describe 'ORF Validation' do
|
10
|
-
|
11
13
|
it 'should find ORFs - test 1 ' do
|
12
|
-
|
14
|
+
GeneValidator.config = {}
|
15
|
+
GeneValidator.config[:type] = :nucleotide
|
13
16
|
prediction = Sequence.new
|
14
|
-
prediction.raw_sequence = '
|
15
|
-
'
|
16
|
-
'
|
17
|
-
'
|
18
|
-
'
|
19
|
-
'
|
20
|
-
'
|
21
|
-
'
|
17
|
+
prediction.raw_sequence = 'ATGGCTCTCTGGATCCGGTCGCTGCCTCTCCTGGCCCTTCTT' \
|
18
|
+
'GCTCTTTCTGGCCCTGGGATCAGCCACGCAGCTGCCAACCAG' \
|
19
|
+
'CACCTCTGTGGCTCCCACTTGGTTGAGGCTCTCTACCTGGTG' \
|
20
|
+
'TGTGGGGAGCGGGGTTTCTTCTACTCCCCCAAAACACGGCGG' \
|
21
|
+
'GACGTTGAGCAGCCTCTAGTGAACGGTCCCCTGCATGGCGAG' \
|
22
|
+
'GTGGGAGAGCTGCCGTTCCAGCATGAGGAATACCAGAAAGTC' \
|
23
|
+
'AAGCGAGGCATCGTTGAGCAATGCTGTGAAAACCCGTGCTCC' \
|
24
|
+
'CTCTACCAACTGGAAAACTACTGCAACTAG'
|
22
25
|
|
23
|
-
validation = OpenReadingFrameValidation.new(
|
24
|
-
result = { 1 => { frame: 1, orf_start: 1, orf_end: 105, coverage: 100,
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
26
|
+
validation = OpenReadingFrameValidation.new(prediction, nil)
|
27
|
+
result = { 1 => { frame: 1, orf_start: 1, orf_end: 105, coverage: 100,
|
28
|
+
translated_length: 106 },
|
29
|
+
2 => { frame: 2, orf_start: 1, orf_end: 59, coverage: 58,
|
30
|
+
translated_length: 105 },
|
31
|
+
3 => { frame: 2, orf_start: 64, orf_end: 105, coverage: 42,
|
32
|
+
translated_length: 105 },
|
33
|
+
4 => { frame: 3, orf_start: 1, orf_end: 33, coverage: 33,
|
34
|
+
translated_length: 105 },
|
35
|
+
5 => { frame: -1, orf_start: 1, orf_end: 44, coverage: 43,
|
36
|
+
translated_length: 106 },
|
37
|
+
6 => { frame: -1, orf_start: 48, orf_end: 106, coverage: 57,
|
38
|
+
translated_length: 106 },
|
39
|
+
7 => { frame: -2, orf_start: 10, orf_end: 56, coverage: 46,
|
40
|
+
translated_length: 105 },
|
41
|
+
8 => { frame: -2, orf_start: 70, orf_end: 105, coverage: 36,
|
42
|
+
translated_length: 105 },
|
43
|
+
9 => { frame: -3, orf_start: 25, orf_end: 84, coverage: 58,
|
44
|
+
translated_length: 105 } }
|
33
45
|
assert_equal(result, validation.get_orfs)
|
34
46
|
|
35
|
-
validation = OpenReadingFrameValidation.new(
|
36
|
-
result = { 1 => { frame: 1, orf_start: 1, orf_end: 105, coverage: 100,
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
47
|
+
validation = OpenReadingFrameValidation.new(prediction, nil)
|
48
|
+
result = { 1 => { frame: 1, orf_start: 1, orf_end: 105, coverage: 100,
|
49
|
+
translated_length: 106 },
|
50
|
+
2 => { frame: 2, orf_start: 1, orf_end: 59, coverage: 58,
|
51
|
+
translated_length: 105 },
|
52
|
+
3 => { frame: 2, orf_start: 64, orf_end: 105, coverage: 42,
|
53
|
+
translated_length: 105 },
|
54
|
+
4 => { frame: 3, orf_start: 1, orf_end: 33, coverage: 33,
|
55
|
+
translated_length: 105 },
|
56
|
+
5 => { frame: -1, orf_start: 1, orf_end: 44, coverage: 43,
|
57
|
+
translated_length: 106 },
|
58
|
+
6 => { frame: -1, orf_start: 48, orf_end: 106, coverage: 57,
|
59
|
+
translated_length: 106 },
|
60
|
+
7 => { frame: -2, orf_start: 10, orf_end: 56, coverage: 46,
|
61
|
+
translated_length: 105 },
|
62
|
+
8 => { frame: -2, orf_start: 70, orf_end: 105, coverage: 36,
|
63
|
+
translated_length: 105 },
|
64
|
+
9 => { frame: -3, orf_start: 25, orf_end: 84, coverage: 58,
|
65
|
+
translated_length: 105 } }
|
45
66
|
assert_equal(result, validation.get_orfs)
|
46
67
|
end
|
47
68
|
|
48
69
|
it 'should find - test 2 ' do
|
70
|
+
GeneValidator.config = {}
|
71
|
+
GeneValidator.config[:type] = :nucleotide
|
49
72
|
prediction = Sequence.new
|
50
|
-
prediction.raw_sequence = '
|
51
|
-
'
|
52
|
-
'
|
53
|
-
'
|
54
|
-
'
|
55
|
-
'
|
56
|
-
'
|
57
|
-
'
|
58
|
-
'
|
59
|
-
'
|
60
|
-
'
|
61
|
-
'
|
73
|
+
prediction.raw_sequence = 'ATGGCTCTCTGGATCCGGTCGCTGCCTCTCCTGGCCCTTCTT' \
|
74
|
+
'GCTCTTTCTGGCCCTGGGATCAGCCACGCAGCTGCCAACCAG' \
|
75
|
+
'CACCTCTGTGGCTCCCACTTGGTTGAGGCTCTCTACCTGGTG' \
|
76
|
+
'TGTGGGGAGCGGGGTTTCTTCTACTCCCCCAAAACACGGCGG' \
|
77
|
+
'GACGTTGAGCAGCCTCTAGTGAACGGTCCCCTGCATGGCGAG' \
|
78
|
+
'GTGGGAGAGCTGCCGTTCCAGCATGAGGAATACCAGACAGCA' \
|
79
|
+
'CCTCTGTGGCTCCCACTTGGTTGAGGCTCTCTACCTGGTGTG' \
|
80
|
+
'TGGGGAGCGGGGTTTCTTCTACTCCCCCAAAACACGGCGGGA' \
|
81
|
+
'CGTTGAGCAGCCTCTAGTGAACGGTCCCCTGCATGGCGAGGT' \
|
82
|
+
'GGGAGAGCTGCCGTTCCAGCATGAGGAATACCAGAAAGTCAA' \
|
83
|
+
'GCGAGGCATCGTTGAGCAATGCTGTGAAAACCCGTGCTCCCT' \
|
84
|
+
'CTACCAACTGGAAAACTACTGCAACTAG'
|
62
85
|
|
63
|
-
validation = OpenReadingFrameValidation.new(
|
64
|
-
result = { 1 => { frame: 1, orf_start: 1, orf_end: 88, coverage: 56,
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
86
|
+
validation = OpenReadingFrameValidation.new(prediction, nil)
|
87
|
+
result = { 1 => { frame: 1, orf_start: 1, orf_end: 88, coverage: 56,
|
88
|
+
translated_length: 160 },
|
89
|
+
2 => { frame: 2, orf_start: 1, orf_end: 58, coverage: 38,
|
90
|
+
translated_length: 160 },
|
91
|
+
3 => { frame: 2, orf_start: 64, orf_end: 159, coverage: 61,
|
92
|
+
translated_length: 160 },
|
93
|
+
4 => { frame: 3, orf_start: 1, orf_end: 32, coverage: 22,
|
94
|
+
translated_length: 159 },
|
95
|
+
5 => { frame: 3, orf_start: 79, orf_end: 113, coverage: 24,
|
96
|
+
translated_length: 159 },
|
97
|
+
6 => { frame: 3, orf_start: 119, orf_end: 159, coverage: 28,
|
98
|
+
translated_length: 159 },
|
99
|
+
7 => { frame: -1, orf_start: 1, orf_end: 43, coverage: 29,
|
100
|
+
translated_length: 160 },
|
101
|
+
8 => { frame: -1, orf_start: 48, orf_end: 139, coverage: 59,
|
102
|
+
translated_length: 160 },
|
103
|
+
9 => { frame: -2, orf_start: 10, orf_end: 55, coverage: 31,
|
104
|
+
translated_length: 160 },
|
105
|
+
10 => { frame: -2, orf_start: 70, orf_end: 98, coverage: 20,
|
106
|
+
translated_length: 160 },
|
107
|
+
11 => { frame: -2, orf_start: 103, orf_end: 160,
|
108
|
+
coverage: 38, translated_length: 160 },
|
109
|
+
12 => { frame: -3, orf_start: 25, orf_end: 110, coverage: 55,
|
110
|
+
translated_length: 159 },
|
111
|
+
13 => { frame: -3, orf_start: 125, orf_end: 159,
|
112
|
+
coverage: 24, translated_length: 159 } }
|
77
113
|
assert_equal(result, validation.get_orfs)
|
78
114
|
|
79
|
-
validation = OpenReadingFrameValidation.new(
|
80
|
-
result = { 1 => { frame: 1, orf_start: 1, orf_end: 88, coverage: 56,
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
115
|
+
validation = OpenReadingFrameValidation.new(prediction, nil)
|
116
|
+
result = { 1 => { frame: 1, orf_start: 1, orf_end: 88, coverage: 56,
|
117
|
+
translated_length: 160 },
|
118
|
+
2 => { frame: 2, orf_start: 1, orf_end: 58, coverage: 38,
|
119
|
+
translated_length: 160 },
|
120
|
+
3 => { frame: 2, orf_start: 64, orf_end: 159, coverage: 61,
|
121
|
+
translated_length: 160 },
|
122
|
+
4 => { frame: 3, orf_start: 1, orf_end: 32, coverage: 22,
|
123
|
+
translated_length: 159 },
|
124
|
+
5 => { frame: 3, orf_start: 79, orf_end: 113, coverage: 24,
|
125
|
+
translated_length: 159 },
|
126
|
+
6 => { frame: 3, orf_start: 119, orf_end: 159, coverage: 28,
|
127
|
+
translated_length: 159 },
|
128
|
+
7 => { frame: -1, orf_start: 1, orf_end: 43, coverage: 29,
|
129
|
+
translated_length: 160 },
|
130
|
+
8 => { frame: -1, orf_start: 48, orf_end: 139, coverage: 59,
|
131
|
+
translated_length: 160 },
|
132
|
+
9 => { frame: -2, orf_start: 10, orf_end: 55, coverage: 31,
|
133
|
+
translated_length: 160 },
|
134
|
+
10 => { frame: -2, orf_start: 70, orf_end: 98, coverage: 20,
|
135
|
+
translated_length: 160 },
|
136
|
+
11 => { frame: -2, orf_start: 103, orf_end: 160,
|
137
|
+
coverage: 38, translated_length: 160 },
|
138
|
+
12 => { frame: -3, orf_start: 25, orf_end: 110, coverage: 55,
|
139
|
+
translated_length: 159 },
|
140
|
+
13 => { frame: -3, orf_start: 125, orf_end: 159,
|
141
|
+
coverage: 24, translated_length: 159 } }
|
93
142
|
assert_equal(result, validation.get_orfs)
|
94
143
|
end
|
95
144
|
|
96
145
|
it 'should find - test 3 ' do
|
146
|
+
GeneValidator.config = {}
|
147
|
+
GeneValidator.config[:type] = :nucleotide
|
97
148
|
prediction = Sequence.new
|
98
|
-
prediction.raw_sequence = '
|
99
|
-
'
|
100
|
-
'
|
101
|
-
'
|
102
|
-
'
|
103
|
-
'
|
104
|
-
'
|
105
|
-
'
|
106
|
-
'
|
107
|
-
'
|
108
|
-
'
|
109
|
-
'
|
110
|
-
'
|
111
|
-
'
|
112
|
-
'
|
113
|
-
'
|
114
|
-
'
|
115
|
-
'
|
116
|
-
'
|
117
|
-
'
|
118
|
-
'
|
119
|
-
'
|
120
|
-
'
|
121
|
-
'
|
122
|
-
'
|
123
|
-
'
|
124
|
-
'
|
125
|
-
'
|
126
|
-
'
|
127
|
-
'
|
128
|
-
'
|
129
|
-
'
|
130
|
-
'
|
131
|
-
'
|
132
|
-
'
|
133
|
-
'
|
134
|
-
'
|
135
|
-
'
|
136
|
-
'
|
137
|
-
'
|
138
|
-
'
|
139
|
-
'
|
140
|
-
'
|
141
|
-
'
|
142
|
-
'
|
143
|
-
'
|
144
|
-
'
|
145
|
-
'
|
146
|
-
'
|
147
|
-
'
|
148
|
-
'
|
149
|
-
'
|
150
|
-
'
|
151
|
-
'
|
152
|
-
'
|
153
|
-
'
|
154
|
-
'
|
155
|
-
'
|
156
|
-
'
|
157
|
-
'
|
158
|
-
'
|
159
|
-
'
|
160
|
-
'
|
161
|
-
'
|
162
|
-
'
|
163
|
-
'
|
164
|
-
'
|
165
|
-
'
|
166
|
-
'
|
167
|
-
'
|
149
|
+
prediction.raw_sequence = 'GGCGGGGCGGGAGGGCGGCGCGGAGTGCGCCGGCGCGTCGTC' \
|
150
|
+
'GGGGACGCCGGGTCCAGGATCTTGCTAGGGAACCAGTGTTGT' \
|
151
|
+
'CGCGTCGTCCCGCCCCCTCGGGGCTTTTGCTCCCGTTAACTG' \
|
152
|
+
'TCGGCGGGGCAGGCTCCGCAGCGCAGGGCGACATGCCGGTGC' \
|
153
|
+
'GCTTCAAGGGGCTGAGTGAATACCAGAGAAACTTCCTGTGGA' \
|
154
|
+
'AAAAGTCCTATTTGTCAGAGTCTTATAATCCCTCAGTGGGAC' \
|
155
|
+
'AAAAGTACTCATGGGCAGGACTTAGATCGGATCAGTTGGGGA' \
|
156
|
+
'TCACGAAAGAACCAGGTTTTATTTCAAAAAGAAGAGTTCCCT' \
|
157
|
+
'ACCATGACCCTCAGATTTCAAAATACCTGGAGTGGAACGGAA' \
|
158
|
+
'CCGTCAGAAAGAAGGATACGCTTGTCCCACCAGAACCCCAGG' \
|
159
|
+
'CCTTTGGAACGCCAAAGCCACAAGAGGCTGAGCAAGGAGAAG' \
|
160
|
+
'ATGCCAATCAAGAAGCAGTTCTCTCACTAGAGGCCTCCAGGG' \
|
161
|
+
'TTCCCAAGAGAACTCGGTCTCATTCTGCGGACTCGAGAGCTG' \
|
162
|
+
'AAGGGGTTTCAGACACTGTGGAAAAGCACCAGGGTGTCACGA' \
|
163
|
+
'GAAGCCATGCGCCAGTTAGCGCGGATGTGGAGCTGAGACCTT' \
|
164
|
+
'CCAGCAAACAACCTCTCTCCCAGAGCATAGATCCCAGGTTGG' \
|
165
|
+
'ATAGGCATCTTCGTAAGAAAGCTGGATTGGCCGTTGTTCCCA' \
|
166
|
+
'CGAATAATGCCTTGAGAAATTCTGAATACCAAAGGCAGTTTG' \
|
167
|
+
'TTTGGAAGACTTCTAAAGAAAGCGCTCCAGTGTTTGCATCCA' \
|
168
|
+
'ATCAGGTTTTCCGTAATAAAAGCCAAATTATTCCACAGTTCC' \
|
169
|
+
'AAGGCAATACATTCACCCACGAGACTGAATACAAGCGAAATT' \
|
170
|
+
'TCAAGGGTTTAACTCCAGTGAAGGAACCAAAGTCAAGAGAGT' \
|
171
|
+
'ATTTGAAAGGAAACAGCAGTCTGGAGATGCTGACTCCAGTAA' \
|
172
|
+
'AGAAGGCAGATGAGCCTTTAGACTTAGAAGTAGACATGGCGT' \
|
173
|
+
'CGGAAGACTCAGACCAGTCTGTAAAGAAGCCTGCTTCATGGA' \
|
174
|
+
'GACACCAAAGGCTTGGAAAAGTGAATTCTGAATATAGAGCAA' \
|
175
|
+
'AGTTCCTGAGCCCAGCCCAGTATTTCTATAAAGCTGGAGCTT' \
|
176
|
+
'GGACCCGGGTGAAGGAGAACCTGTCAAACCAGGTTAAGGAGC' \
|
177
|
+
'TCCGAGAAAAGGCCGAATCTTACAGGAAGCGAGTTCAGGGGA' \
|
178
|
+
'CACATTTTTCTCGGGACCATCTGAACCAGATTATGTCGGACA' \
|
179
|
+
'GCAACTGCTGTTGGGACGTCTCCTCAGTCACAAGCTCGGAAG' \
|
180
|
+
'GCACCGTCAGTAGCAACATCCGAGCACTGGATCTTGCTGGAG' \
|
181
|
+
'ACCTTACAAACCACAGGACCCCCCAGAAACACCCTCCTACCA' \
|
182
|
+
'AACTAGAAGAAAGAAAAGTTGCCTCGGGAGAGCAGCCCCTGA' \
|
183
|
+
'AAAACTCCACCAGGAGACTGGAGATGCCAGAGCCTGCCGCCT' \
|
184
|
+
'CGGTCAGGAGGAAGCTGGCTTGGGATGCTGAGGAGAGCACGA' \
|
185
|
+
'AGGAAGACACCCAGGAGGAGCCCAGGGCGGAGGAGGACGGGA' \
|
186
|
+
'GAGAGGAGAGAGGACAGGACAAGCAGACCTGTGCGGTAGAGC' \
|
187
|
+
'TGGAGAAACCGGACACACAGACACCCAAGGCAGACAGACTGA' \
|
188
|
+
'CAGAAGGGTCGGAGACATCTTCTGTTTCCTCAGGGAAGGGAG' \
|
189
|
+
'GCAGGCTTCCTACACCGAGGCTGAGAGAACTCGGTATCCAGC' \
|
190
|
+
'GGACGCACCATGATCTCACGACGCCAGCTGTTGGTGGCGCAG' \
|
191
|
+
'TCTTAGTGTCTCCATCTAAAGTGAAGCCACCAGGCCTCGAGC' \
|
192
|
+
'AGAGGAGGAGAGCGTCCTCCCAAGATGGCTTAGAAACTCTGA' \
|
193
|
+
'AGAAAGACATTACTAAGAAAGGAAAACCCCGTCCCATGTCTC' \
|
194
|
+
'TGTTGACTTCTCCGGCTGCTGGCATGAAGACAGTTGATCCCC' \
|
195
|
+
'TGCCTCTGCGAGAAGACTGTGAAGCCAATGTGCTCAGATTTG' \
|
196
|
+
'CTGATACTCTTCCTGTTTCGAAAATTTTGGACCGTCAGCCCA' \
|
197
|
+
'GCACCCCTGGGCAGCTGCCTCCATGTGCCCCGCCTTACTGTC' \
|
198
|
+
'ATCCGTCCAGCAGGATCCAGGGCCGTCTGCGAGACCCTGAGT' \
|
199
|
+
'TTCAGCACAACAATGCAGATAGACTGTCTGAGATCTCTGCTC' \
|
200
|
+
'GCTCTGCAGTTTCCAGCCTCCGGGCTTTCCAGACTCTAGCCC' \
|
201
|
+
'GAGCTCAGAAAAGAAAGGAGAATTTCTGGGGCAAGCCATAAA' \
|
202
|
+
'CCTCTCATCTTATCTAGTGACAAGCTGGCTCATCTTTACTCA' \
|
203
|
+
'CTCAGTGTGTTAAGGTTTTCAGAGGGTTTGGAGTTTCTTCTA' \
|
204
|
+
'ACACTTCTGACTCAGATAATTTGAATTTTCAGTGGCTCATCT' \
|
205
|
+
'TAGCCAGAAAATTGCCATGCAGCTGTGTCTAAGTCTGACTCT' \
|
206
|
+
'TTGAGAGCACCTTTGCACTTGTCTGAGTACAAAGGTGCGGGG' \
|
207
|
+
'TTGTGTATTTCTTCACACACTCTTGACTTTTGTGTCAGGTCT' \
|
208
|
+
'CGGGGGTTGCTAGTAGAAGCCTGAAGGTCATCTACAGAATAT' \
|
209
|
+
'TCTAAAGGGAGAAAATGAAGTCAACATTAAGATCTTCCAACT' \
|
210
|
+
'TAATTTCCCCTCAGATTGGTCTTAGGCATTTTAATAGCTGTA' \
|
211
|
+
'GGTGTCATGAAAAGAATCTCACTGTTTTATTAGCGCCTTCTG' \
|
212
|
+
'TATACACAGGTGCAGTGTTAAGATGATTGGACTTTGAAAAGC' \
|
213
|
+
'TGGCTGTACATATTTTTCTTATTTATGTAACAAAATTTGCTG' \
|
214
|
+
'AGAGAATATGTATATTTTTGATCTTTTTATGTATTTTATTTG' \
|
215
|
+
'TATAATAACTGGCATACATTTGAATAATGTCTAGATTTTGAA' \
|
216
|
+
'AAATGATTTGTGAAATGGAGAATTAAAATTTTGTAGACATTT' \
|
217
|
+
'AAAAATGAAAATTAAGTGTGCTTGGCTTCTTCAGGAAGTTAT' \
|
218
|
+
'CATGTGGAATAAATATCTTCTAGAAGCATTCTATTAGAACTG' \
|
219
|
+
'CTTAATCAAAAATTATACTACTATTGCAGCTGCTAAATGCAG' \
|
220
|
+
'TGAAACTGAGTCTACAGTATTTTTTTTTTCACAAATACGAGG' \
|
221
|
+
'TTTTAAAAACAGATTCATTAAAAAATTTAAACACCAAAAAAA' \
|
222
|
+
'AAAAA'
|
168
223
|
|
169
|
-
validation = OpenReadingFrameValidation.new(
|
224
|
+
validation = OpenReadingFrameValidation.new(prediction, nil)
|
170
225
|
result =
|
171
|
-
{ 1 => { frame: 1, orf_start: 1, orf_end: 20, coverage: 4,
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
226
|
+
{ 1 => { frame: 1, orf_start: 1, orf_end: 20, coverage: 4,
|
227
|
+
translated_length: 1003 },
|
228
|
+
2 => { frame: 1, orf_start: 62, orf_end: 143, coverage: 10,
|
229
|
+
translated_length: 1003 },
|
230
|
+
3 => { frame: 1, orf_start: 165, orf_end: 187, coverage: 5,
|
231
|
+
translated_length: 1003 },
|
232
|
+
4 => { frame: 1, orf_start: 244, orf_end: 277, coverage: 6,
|
233
|
+
translated_length: 1003 },
|
234
|
+
5 => { frame: 1, orf_start: 383, orf_end: 393, coverage: 4,
|
235
|
+
translated_length: 1003 },
|
236
|
+
6 => { frame: 1, orf_start: 415, orf_end: 443, coverage: 5,
|
237
|
+
translated_length: 1003 },
|
238
|
+
7 => { frame: 1, orf_start: 477, orf_end: 510, coverage: 6,
|
239
|
+
translated_length: 1003 },
|
240
|
+
8 => { frame: 1, orf_start: 640, orf_end: 706, coverage: 9,
|
241
|
+
translated_length: 1003 },
|
242
|
+
9 => { frame: 1, orf_start: 728, orf_end: 757, coverage: 5,
|
243
|
+
translated_length: 1003 },
|
244
|
+
10 => { frame: 1, orf_start: 786, orf_end: 813, coverage: 5,
|
245
|
+
translated_length: 1003 },
|
246
|
+
11 => { frame: 2, orf_start: 24, orf_end: 41, coverage: 4,
|
247
|
+
translated_length: 1003 },
|
248
|
+
12 => { frame: 2, orf_start: 115, orf_end: 129, coverage: 4,
|
249
|
+
translated_length: 1003 },
|
250
|
+
13 => { frame: 2, orf_start: 151, orf_end: 161, coverage: 4,
|
251
|
+
translated_length: 1003 },
|
252
|
+
14 => { frame: 2, orf_start: 290, orf_end: 305, coverage: 4,
|
253
|
+
translated_length: 1003 },
|
254
|
+
15 => { frame: 2, orf_start: 327, orf_end: 339, coverage: 4,
|
255
|
+
translated_length: 1003 },
|
256
|
+
16 => { frame: 2, orf_start: 391, orf_end: 417, coverage: 5,
|
257
|
+
translated_length: 1003 },
|
258
|
+
17 => { frame: 2, orf_start: 439, orf_end: 479, coverage: 6,
|
259
|
+
translated_length: 1003 },
|
260
|
+
18 => { frame: 2, orf_start: 501, orf_end: 557, coverage: 8,
|
261
|
+
translated_length: 1003 },
|
262
|
+
19 => { frame: 2, orf_start: 660, orf_end: 678, coverage: 4,
|
263
|
+
translated_length: 1003 },
|
264
|
+
20 => { frame: 2, orf_start: 711, orf_end: 739, coverage: 5,
|
265
|
+
translated_length: 1003 },
|
266
|
+
21 => { frame: 2, orf_start: 800, orf_end: 809, coverage: 3,
|
267
|
+
translated_length: 1003 },
|
268
|
+
22 => { frame: 2, orf_start: 832, orf_end: 841, coverage: 3,
|
269
|
+
translated_length: 1003 },
|
270
|
+
23 => { frame: 2, orf_start: 943, orf_end: 957, coverage: 4,
|
271
|
+
translated_length: 1003 },
|
272
|
+
24 => { frame: 2, orf_start: 979, orf_end: 1003, coverage: 5,
|
273
|
+
translated_length: 1003 },
|
274
|
+
25 => { frame: 3, orf_start: 1, orf_end: 720, coverage: 73,
|
275
|
+
translated_length: 1003 },
|
276
|
+
26 => { frame: 3, orf_start: 749, orf_end: 773, coverage: 5,
|
277
|
+
translated_length: 1003 },
|
278
|
+
27 => { frame: 3, orf_start: 842, orf_end: 869, coverage: 5,
|
279
|
+
translated_length: 1003 },
|
280
|
+
28 => { frame: 3, orf_start: 891, orf_end: 904, coverage: 4,
|
281
|
+
translated_length: 1003 },
|
282
|
+
29 => { frame: 3, orf_start: 982, orf_end: 1003, coverage: 5,
|
283
|
+
translated_length: 1003 },
|
284
|
+
30 => { frame: -1, orf_start: 69, orf_end: 81, coverage: 4,
|
285
|
+
translated_length: 1003 },
|
286
|
+
31 => { frame: -1, orf_start: 106, orf_end: 115, coverage: 3,
|
287
|
+
translated_length: 1003 },
|
288
|
+
32 => { frame: -1, orf_start: 178, orf_end: 219, coverage: 7,
|
289
|
+
translated_length: 1003 },
|
290
|
+
33 => { frame: -1, orf_start: 299, orf_end: 391, coverage: 12,
|
291
|
+
translated_length: 1003 },
|
292
|
+
34 => { frame: -1, orf_start: 436, orf_end: 447, coverage: 4,
|
293
|
+
translated_length: 1003 },
|
294
|
+
35 => { frame: -1, orf_start: 469, orf_end: 540, coverage: 9,
|
295
|
+
translated_length: 1003 },
|
296
|
+
36 => { frame: -1, orf_start: 562, orf_end: 575, coverage: 4,
|
297
|
+
translated_length: 1003 },
|
298
|
+
37 => { frame: -1, orf_start: 597, orf_end: 617, coverage: 5,
|
299
|
+
translated_length: 1003 },
|
300
|
+
38 => { frame: -1, orf_start: 639, orf_end: 655, coverage: 4,
|
301
|
+
translated_length: 1003 },
|
302
|
+
39 => { frame: -1, orf_start: 728, orf_end: 818, coverage: 11,
|
303
|
+
translated_length: 1003 },
|
304
|
+
40 => { frame: -1, orf_start: 863, orf_end: 885, coverage: 5,
|
305
|
+
translated_length: 1003 },
|
306
|
+
41 => { frame: -1, orf_start: 950, orf_end: 963, coverage: 4,
|
307
|
+
translated_length: 1003 },
|
308
|
+
42 => { frame: -1, orf_start: 985, orf_end: 1003, coverage: 4,
|
309
|
+
translated_length: 1003 },
|
310
|
+
43 => { frame: -2, orf_start: 79, orf_end: 99, coverage: 5,
|
311
|
+
translated_length: 1003 },
|
312
|
+
44 => { frame: -2, orf_start: 121, orf_end: 133, coverage: 4,
|
313
|
+
translated_length: 1003 },
|
314
|
+
45 => { frame: -2, orf_start: 355, orf_end: 599, coverage: 26,
|
315
|
+
translated_length: 1003 },
|
316
|
+
46 => { frame: -2, orf_start: 652, orf_end: 736, coverage: 11,
|
317
|
+
translated_length: 1003 },
|
318
|
+
47 => { frame: -2, orf_start: 758, orf_end: 828, coverage: 9,
|
319
|
+
translated_length: 1003 },
|
320
|
+
48 => { frame: -2, orf_start: 868, orf_end: 887, coverage: 4,
|
321
|
+
translated_length: 1003 },
|
322
|
+
49 => { frame: -2, orf_start: 952, orf_end: 1003, coverage: 8,
|
323
|
+
translated_length: 1003 },
|
324
|
+
50 => { frame: -3, orf_start: 1, orf_end: 18, coverage: 4,
|
325
|
+
translated_length: 1003 },
|
326
|
+
51 => { frame: -3, orf_start: 90, orf_end: 100, coverage: 4,
|
327
|
+
translated_length: 1003 },
|
328
|
+
52 => { frame: -3, orf_start: 208, orf_end: 220, coverage: 4,
|
329
|
+
translated_length: 1003 },
|
330
|
+
53 => { frame: -3, orf_start: 279, orf_end: 347, coverage: 9,
|
331
|
+
translated_length: 1003 },
|
332
|
+
54 => { frame: -3, orf_start: 369, orf_end: 382, coverage: 4,
|
333
|
+
translated_length: 1003 },
|
334
|
+
55 => { frame: -3, orf_start: 461, orf_end: 511, coverage: 7,
|
335
|
+
translated_length: 1003 },
|
336
|
+
56 => { frame: -3, orf_start: 533, orf_end: 542, coverage: 3,
|
337
|
+
translated_length: 1003 },
|
338
|
+
57 => { frame: -3, orf_start: 635, orf_end: 708, coverage: 10,
|
339
|
+
translated_length: 1003 },
|
340
|
+
58 => { frame: -3, orf_start: 768, orf_end: 801, coverage: 6,
|
341
|
+
translated_length: 1003 },
|
342
|
+
59 => { frame: -3, orf_start: 830, orf_end: 875, coverage: 7,
|
343
|
+
translated_length: 1003 },
|
344
|
+
60 => { frame: -3, orf_start: 933, orf_end: 945, coverage: 4,
|
345
|
+
translated_length: 1003 },
|
346
|
+
61 => { frame: -3, orf_start: 967, orf_end: 980, coverage: 4,
|
347
|
+
translated_length: 1003 } }
|
232
348
|
assert_equal(result, validation.get_orfs)
|
233
349
|
end
|
234
350
|
end
|