fstrozzi-Gmap 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +41 -0
- data/doc/classes/Gmap.html +129 -0
- data/doc/classes/Gmap/Core.html +252 -0
- data/doc/classes/Gmap/Core.src/M000006.html +18 -0
- data/doc/classes/Gmap/Core.src/M000007.html +25 -0
- data/doc/classes/Gmap/Core.src/M000008.html +18 -0
- data/doc/classes/Gmap/Core.src/M000009.html +52 -0
- data/doc/classes/Gmap/Result.html +337 -0
- data/doc/classes/Gmap/Result.src/M000010.html +18 -0
- data/doc/classes/Gmap/Result.src/M000011.html +41 -0
- data/doc/classes/Gmap/Result.src/M000012.html +22 -0
- data/doc/classes/Gmap/Result.src/M000013.html +22 -0
- data/doc/classes/Gmap/Result.src/M000014.html +18 -0
- data/doc/classes/GmapTest.html +197 -0
- data/doc/classes/GmapTest.src/M000001.html +18 -0
- data/doc/classes/GmapTest.src/M000002.html +21 -0
- data/doc/classes/GmapTest.src/M000003.html +25 -0
- data/doc/classes/GmapTest.src/M000004.html +62 -0
- data/doc/classes/GmapTest.src/M000005.html +65 -0
- data/doc/created.rid +1 -0
- data/doc/files/lib/gmap/core_rb.html +118 -0
- data/doc/files/lib/gmap_rb.html +101 -0
- data/doc/files/test/unit/gmap_test_rb.html +109 -0
- data/doc/fr_class_index.html +30 -0
- data/doc/fr_file_index.html +29 -0
- data/doc/fr_method_index.html +40 -0
- data/doc/index.html +24 -0
- data/lib/gmap.rb +1 -0
- data/lib/gmap/core.rb +269 -0
- data/samples/test.gmap +148 -0
- data/test/unit/gmap_test.rb +127 -0
- metadata +93 -0
data/lib/gmap.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + '/gmap/core.rb'
|
data/lib/gmap/core.rb
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
|
|
2
|
+
# Author:: Francesco Strozzi
|
|
3
|
+
# Email:: francesco.strozzi@gmail.com
|
|
4
|
+
# Copyright:: 2008 Francesco Strozzi
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
module Gmap
|
|
10
|
+
|
|
11
|
+
# This module allows the parsing of the standard output of Gmap (http://www.gene.com/share/gmap/)
|
|
12
|
+
#
|
|
13
|
+
# Example:
|
|
14
|
+
#
|
|
15
|
+
# Gmap::Core.open("output.gmap") do |gmap|
|
|
16
|
+
#
|
|
17
|
+
# gmap.each_sequence do |seq|
|
|
18
|
+
#
|
|
19
|
+
# seq.each do |result|
|
|
20
|
+
#
|
|
21
|
+
# result.query (Query sequence name)
|
|
22
|
+
# result.target (Target sequence name)
|
|
23
|
+
# result.q_start (Start coordinate of the query sequence)
|
|
24
|
+
# result.q_end (End coordinate of the query sequence)
|
|
25
|
+
# result.start (Start coordintate of the target sequence)
|
|
26
|
+
# result.end (End coordinate of the target sequence)
|
|
27
|
+
# result.strand (Strand of the target sequence)
|
|
28
|
+
# result.exons (# exons found)
|
|
29
|
+
# result.coverage (Coverage of the query sequence)
|
|
30
|
+
# result.perc_identity (Pecentage of identity from the alignment)
|
|
31
|
+
# result.indels (# insertion or deletions)
|
|
32
|
+
# result.mismatch (# mismatch)
|
|
33
|
+
# result.aa_change (Prediction of AA changes from mismatches and indels found)
|
|
34
|
+
# result.aln (Raw alignment between target and query sequences)
|
|
35
|
+
#
|
|
36
|
+
# ONLY IF GENE MAPS ARE USED WITH GMAP
|
|
37
|
+
#
|
|
38
|
+
# result.gene_start (Start coordinate of the overlapping gene)
|
|
39
|
+
# result.gene_end (End coordinate of the overlapping gene)
|
|
40
|
+
# result.gene_id (ID of the overlapping gene)
|
|
41
|
+
# end
|
|
42
|
+
#
|
|
43
|
+
# end
|
|
44
|
+
#
|
|
45
|
+
# end
|
|
46
|
+
#
|
|
47
|
+
|
|
48
|
+
class Core
|
|
49
|
+
|
|
50
|
+
attr_reader :io
|
|
51
|
+
def initialize(io)
|
|
52
|
+
@io = io
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Open the gmap file for reading
|
|
56
|
+
|
|
57
|
+
def self.open(file)
|
|
58
|
+
|
|
59
|
+
f = File.open(file)
|
|
60
|
+
if block_given?
|
|
61
|
+
yield Gmap::Core.new(f)
|
|
62
|
+
f.close
|
|
63
|
+
else
|
|
64
|
+
Gmap::Core.new(f)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Close the IO stream on the Gmap file
|
|
69
|
+
|
|
70
|
+
def close
|
|
71
|
+
@io.close
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Iterates on every sequence processed by Gmap and returns an array of Gmap::Result objects
|
|
75
|
+
# each of them corresponding to a Path (result) for that sequence
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def each_sequence
|
|
79
|
+
start = false
|
|
80
|
+
res = Gmap::Result.new
|
|
81
|
+
all_results = []
|
|
82
|
+
query = nil
|
|
83
|
+
@io.each_line do |l|
|
|
84
|
+
if l=~/>(\d+|\w+)\s/ and !start then
|
|
85
|
+
start = true
|
|
86
|
+
query = "#{$1}"
|
|
87
|
+
elsif l=~/>(\d+|\w+)\s/ and start then
|
|
88
|
+
res.query = query
|
|
89
|
+
all_results << res.dup if res.target != nil
|
|
90
|
+
query = "#{$1}"
|
|
91
|
+
if block_given?
|
|
92
|
+
yield all_results
|
|
93
|
+
else
|
|
94
|
+
raise ArgumentError, "Block needed"
|
|
95
|
+
end
|
|
96
|
+
all_results.clear
|
|
97
|
+
res.clear
|
|
98
|
+
elsif l=~/Path\s\d+/ and res.target != nil then
|
|
99
|
+
res.query = query
|
|
100
|
+
all_results << res.dup
|
|
101
|
+
res.clear
|
|
102
|
+
end
|
|
103
|
+
res = parse_line(res,l)
|
|
104
|
+
end
|
|
105
|
+
if start then
|
|
106
|
+
res.query = query
|
|
107
|
+
all_results << res.dup if res.target != nil
|
|
108
|
+
if block_given?
|
|
109
|
+
yield all_results
|
|
110
|
+
else
|
|
111
|
+
raise ArgumentError, "Block needed"
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
private
|
|
117
|
+
|
|
118
|
+
# The method is called internally from the Gmap#each_result method,
|
|
119
|
+
# to parse the lines in the output of Gmap and save the information into a Gmap::Result object
|
|
120
|
+
|
|
121
|
+
def parse_line(res,l)
|
|
122
|
+
l.chomp!
|
|
123
|
+
if res.search_aln then
|
|
124
|
+
res = get_aln(res,l)
|
|
125
|
+
else
|
|
126
|
+
case l
|
|
127
|
+
when /Path \d+:\s+query\s+(\d+)--(\d+)\s+\(\d+ bp\)\s+=>/
|
|
128
|
+
res.q_start = "#{$1}".to_i
|
|
129
|
+
res.q_end = "#{$2}".to_i
|
|
130
|
+
when /Genomic pos:.*\((.*)\sstrand\)/
|
|
131
|
+
if "#{$1}"=~/\+/ then
|
|
132
|
+
res.strand = 1
|
|
133
|
+
else
|
|
134
|
+
res.strand = -1
|
|
135
|
+
end
|
|
136
|
+
when /Accessions:\s+(.*):(.*)--(.*)\s+\(out of.*/
|
|
137
|
+
res.target = "#{$1}"
|
|
138
|
+
t_start = "#{$2}"
|
|
139
|
+
t_end = "#{$3}"
|
|
140
|
+
t_start.gsub!(/,/,'')
|
|
141
|
+
t_end.gsub!(/,/,'')
|
|
142
|
+
res.start = t_start.to_i
|
|
143
|
+
res.end = t_end.to_i
|
|
144
|
+
when /Number of exons: (\d+)/
|
|
145
|
+
if res.exons.nil?
|
|
146
|
+
res.exons = "#{$1}".to_i
|
|
147
|
+
end
|
|
148
|
+
when /Trimmed coverage:\s(.*)\s\(trimmed length/
|
|
149
|
+
res.coverage = "#$1".to_f if res.coverage.nil?
|
|
150
|
+
when /Percent identity:\s(.*)\s\(\d+ matches, (\d+) mismatches, (\d+) indels,/
|
|
151
|
+
if res.perc_identity.nil?
|
|
152
|
+
res.perc_identity = "#{$1}".to_f
|
|
153
|
+
res.mismatch = "#{$2}".to_i
|
|
154
|
+
res.indels = "#{$3}".to_i
|
|
155
|
+
end
|
|
156
|
+
when /Amino acid changes: (.*)/
|
|
157
|
+
aa = "#{$1}"
|
|
158
|
+
res.aa_change = aa if aa.to_s=~/\w+/
|
|
159
|
+
when /Alignment for path \d+:/
|
|
160
|
+
res.set_search
|
|
161
|
+
when /.*gene_maps\s+\S+:(\d+)..(\d+)\s+(\d+)/
|
|
162
|
+
res.gene_start = "#{$1}".to_i
|
|
163
|
+
res.gene_end = "#{$2}".to_i
|
|
164
|
+
res.gene_id = "#{$3}".to_i
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
res
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# The method is called from 'parse_line' to save the sequence alignment information from the gmap output
|
|
171
|
+
|
|
172
|
+
def get_aln(res,l)
|
|
173
|
+
|
|
174
|
+
if l =~/.*:\d+\s[A|T|C|G].+.*/ then
|
|
175
|
+
res.aln << l+"\n"
|
|
176
|
+
res.set_save
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
if res.c >= 1 and res.c < 3 then
|
|
180
|
+
res.aln << l+"\n"
|
|
181
|
+
end
|
|
182
|
+
res.aln << l+"\n" if l=~/aa.g/
|
|
183
|
+
res.aln << l+"\n" if l=~/aa.c/
|
|
184
|
+
if res.c == 3 then
|
|
185
|
+
res.aln.chomp!
|
|
186
|
+
res.set_search
|
|
187
|
+
res.set_save
|
|
188
|
+
end
|
|
189
|
+
if res.search_aln and res.save_aln then
|
|
190
|
+
res.count
|
|
191
|
+
end
|
|
192
|
+
res
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# This class store the informations of a single Gmap result
|
|
199
|
+
|
|
200
|
+
class Result
|
|
201
|
+
|
|
202
|
+
attr_accessor :query, :target, :q_start, :q_end, :start, :end, :strand ,:exons, :coverage, :perc_identity, :indels, :mismatch, :aa_change, :gene_start, :gene_end, :gene_id, :aln
|
|
203
|
+
attr_reader :search_aln, :c, :save_aln
|
|
204
|
+
|
|
205
|
+
def initialize
|
|
206
|
+
clear
|
|
207
|
+
end
|
|
208
|
+
# Initializes all the attributes of the result
|
|
209
|
+
def clear
|
|
210
|
+
@query = nil
|
|
211
|
+
@target = nil
|
|
212
|
+
@start = nil
|
|
213
|
+
@end = nil
|
|
214
|
+
@strand = nil
|
|
215
|
+
@exons = nil
|
|
216
|
+
@coverage = nil
|
|
217
|
+
@perc_identity = nil
|
|
218
|
+
@indels = nil
|
|
219
|
+
@mismatch = nil
|
|
220
|
+
@aa_change = nil
|
|
221
|
+
@gene_start = nil
|
|
222
|
+
@gene_end = nil
|
|
223
|
+
@gene_id = nil
|
|
224
|
+
@q_start = nil
|
|
225
|
+
@q_end = nil
|
|
226
|
+
@aln = ""
|
|
227
|
+
|
|
228
|
+
# Inizalize control attributes
|
|
229
|
+
|
|
230
|
+
@maps = false
|
|
231
|
+
@search_aln = false
|
|
232
|
+
@save_aln = false
|
|
233
|
+
@c = 0
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def set_search
|
|
237
|
+
if @search_aln then
|
|
238
|
+
@search_aln = false
|
|
239
|
+
else
|
|
240
|
+
@search_aln = true
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def set_save
|
|
246
|
+
if @save_aln then
|
|
247
|
+
@save_aln = false
|
|
248
|
+
else
|
|
249
|
+
@save_aln = true
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def count
|
|
254
|
+
@c += 1
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
protected
|
|
258
|
+
|
|
259
|
+
attr_writer :search_aln, :c, :path, :maps
|
|
260
|
+
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
|
data/samples/test.gmap
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
>FIRST
|
|
2
|
+
Paths (8):
|
|
3
|
+
Path 1: query 14--26 (13 bp) => chr ENSBTAT00000028007:11,489--11,477 (-13 bp)
|
|
4
|
+
cDNA direction: indeterminate
|
|
5
|
+
Genomic pos: cow_trans:38,344,745--38,344,733 (- strand)
|
|
6
|
+
Accessions: ENSBTAT00000028007:11,477--11,489 (out of 16907 bp)
|
|
7
|
+
Number of exons: 1
|
|
8
|
+
Coverage: 36.1 (query length: 36 bp)
|
|
9
|
+
Trimmed coverage: 36.1 (trimmed length: 36 bp, trimmed region: 1..36)
|
|
10
|
+
Percent identity: 100.0 (13 matches, 0 mismatches, 0 indels, 0 unknowns)
|
|
11
|
+
Amino acid changes:
|
|
12
|
+
|
|
13
|
+
Alignments:
|
|
14
|
+
Alignment for path 1:
|
|
15
|
+
|
|
16
|
+
-ENSBTAT00000028007:11489-11477 (14-26) 100%
|
|
17
|
+
|
|
18
|
+
0 . :
|
|
19
|
+
|
|
20
|
+
-ENSBTAT00000028007:11489 CTTCGTATTGCTG
|
|
21
|
+
|||||||||||||
|
|
22
|
+
14 CTTCGTATTGCTG
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
Path 2: query 1--36 (36 bp) => chr ENSBTAT00000042528:264--229 (-36 bp)
|
|
26
|
+
cDNA direction: indeterminate
|
|
27
|
+
Genomic pos: cow_trans:264--229 (- strand)
|
|
28
|
+
Accessions: ENSBTAT00000042528:229--264 (out of 957 bp)
|
|
29
|
+
Number of exons: 1
|
|
30
|
+
Coverage: 100.0 (query length: 36 bp)
|
|
31
|
+
Trimmed coverage: 100.0 (trimmed length: 36 bp, trimmed region: 1..36)
|
|
32
|
+
Percent identity: 97.2 (35 matches, 1 mismatches, 0 indels, 0 unknowns)
|
|
33
|
+
Translation: 2..34 (11 aa)
|
|
34
|
+
Amino acid changes: K10T [28]
|
|
35
|
+
|
|
36
|
+
Alignments:
|
|
37
|
+
Alignment for path 2:
|
|
38
|
+
|
|
39
|
+
-ENSBTAT00000042528:264-229 (1-36) 97%
|
|
40
|
+
|
|
41
|
+
0 . : . : . : .
|
|
42
|
+
aa.g 1 G I H M V K A R P K A
|
|
43
|
+
-ENSBTAT00000042528:264 GGGAATTCACATGGTTAAGGCTAGGCCTAAAGCTAT
|
|
44
|
+
||||||||||||||||||||||||||||| ||||||
|
|
45
|
+
1 GGGAATTCACATGGTTAAGGCTAGGCCTACAGCTAT
|
|
46
|
+
aa.c 1 G I H M V K A R P T A
|
|
47
|
+
|
|
48
|
+
Path 3: query 18--31 (14 bp) => chr ENSBTAT00000044819:611--624 (14 bp)
|
|
49
|
+
cDNA direction: indeterminate
|
|
50
|
+
Genomic pos: cow_trans:30,682,928--30,682,941 (+ strand)
|
|
51
|
+
Accessions: ENSBTAT00000044819:611--624 (out of 1972 bp)
|
|
52
|
+
Number of exons: 1
|
|
53
|
+
Coverage: 38.9 (query length: 36 bp)
|
|
54
|
+
Trimmed coverage: 38.9 (trimmed length: 36 bp, trimmed region: 1..36)
|
|
55
|
+
Percent identity: 100.0 (14 matches, 0 mismatches, 0 indels, 0 unknowns)
|
|
56
|
+
Amino acid changes:
|
|
57
|
+
|
|
58
|
+
Alignments:
|
|
59
|
+
Alignment for path 3:
|
|
60
|
+
|
|
61
|
+
+ENSBTAT00000044819:611-624 (18-31) 100%
|
|
62
|
+
|
|
63
|
+
0 . :
|
|
64
|
+
|
|
65
|
+
+ENSBTAT00000044819:611 GAAATCTTGACTGA
|
|
66
|
+
||||||||||||||
|
|
67
|
+
18 GAAATCTTGACTGA
|
|
68
|
+
|
|
69
|
+
>SECOND
|
|
70
|
+
Paths (37):
|
|
71
|
+
Path 1: query 12--26 (15 bp) => chr chr17:21,154,442--21,154,428 (-15 bp)
|
|
72
|
+
cDNA direction: indeterminate
|
|
73
|
+
Genomic pos: bt3.1:609,940,407--609,940,393 (- strand)
|
|
74
|
+
Accessions: chr17:21,154,428--21,154,442 (out of 70149481 bp)
|
|
75
|
+
Number of exons: 1
|
|
76
|
+
Coverage: 41.7 (query length: 36 bp)
|
|
77
|
+
Trimmed coverage: 41.7 (trimmed length: 36 bp, trimmed region: 1..36)
|
|
78
|
+
Percent identity: 100.0 (15 matches, 0 mismatches, 0 indels, 0 unknowns)
|
|
79
|
+
Amino acid changes:
|
|
80
|
+
|
|
81
|
+
Alignments:
|
|
82
|
+
Alignment for path 1:
|
|
83
|
+
|
|
84
|
+
-chr17:21154442-21154428 (12-26) 100%
|
|
85
|
+
|
|
86
|
+
0 . : .
|
|
87
|
+
|
|
88
|
+
-chr17:21154442 TTCGTATACCGTATT
|
|
89
|
+
|||||||||||||||
|
|
90
|
+
12 TTCGTATACCGTATT
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
Maps:
|
|
94
|
+
Map hits for path 1 (0):
|
|
95
|
+
|
|
96
|
+
Path 2: query 9--27 (19 bp) => chr chr11:99,537,167--99,537,185 (19 bp)
|
|
97
|
+
cDNA direction: indeterminate
|
|
98
|
+
Genomic pos: bt3.1:195,355,821--195,355,839 (+ strand)
|
|
99
|
+
Accessions: chr11:99,537,167--99,537,185 (out of 101635058 bp)
|
|
100
|
+
Number of exons: 1
|
|
101
|
+
Coverage: 52.8 (query length: 36 bp)
|
|
102
|
+
Trimmed coverage: 52.8 (trimmed length: 36 bp, trimmed region: 1..36)
|
|
103
|
+
Percent identity: 94.7 (18 matches, 1 mismatches, 0 indels, 0 unknowns)
|
|
104
|
+
Amino acid changes:
|
|
105
|
+
|
|
106
|
+
Alignments:
|
|
107
|
+
Alignment for path 2:
|
|
108
|
+
|
|
109
|
+
+chr11:99537167-99537185 (9-27) 94%
|
|
110
|
+
|
|
111
|
+
0 . : .
|
|
112
|
+
|
|
113
|
+
+chr11:99537167 AGGCATGCATGGCCCGAAC
|
|
114
|
+
| |||||||||||||||||
|
|
115
|
+
9 ACGCATGCATGGCCCGAAC
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
Maps:
|
|
119
|
+
Map hits for path 2 (1):
|
|
120
|
+
gene_maps chr11:3711585..3721335 788340
|
|
121
|
+
|
|
122
|
+
Path 3: query 1--36 (36 bp) => chr chr22:57,923,909--57,926,444 (36 bp)
|
|
123
|
+
cDNA direction: indeterminate
|
|
124
|
+
Genomic pos: bt3.1:1,120,956,262--1,120,953,727 (- strand)
|
|
125
|
+
Accessions: chr22:57,923,909--57,926,444 (out of 59883977 bp)
|
|
126
|
+
Number of exons: 2
|
|
127
|
+
Coverage: 100.0 (query length: 36 bp)
|
|
128
|
+
Trimmed coverage: 100.0 (trimmed length: 36 bp, trimmed region: 1..36)
|
|
129
|
+
Percent identity: 100.0 (36 matches, 0 mismatches, 0 indels, 0 unknowns)
|
|
130
|
+
Translation: 1..36 (11 aa)
|
|
131
|
+
Amino acid changes:
|
|
132
|
+
|
|
133
|
+
Alignments:
|
|
134
|
+
Alignment for path 3:
|
|
135
|
+
|
|
136
|
+
-chr22:57923925-57923909 (20-36) 100%
|
|
137
|
+
|
|
138
|
+
0 . : . : . : . : .
|
|
139
|
+
aa.g 1 R A P R R A G E G R G *
|
|
140
|
+
-chr22:57926444 CGCGCACCTCGGCGTGCAGGTG...CAGGTGAAGGGAGAGGATGA
|
|
141
|
+
||||||||||||||||||||||||||||||||||||
|
|
142
|
+
1 CGCGCACCTCGGCGTGCAG 2500 GTGAAGGGAGAGGATGA
|
|
143
|
+
aa.c 1 R A P R R A G E G R G *
|
|
144
|
+
|
|
145
|
+
Maps:
|
|
146
|
+
Map hits for path 3 (1):
|
|
147
|
+
gene_maps chr10:57912718..57926714 507939
|
|
148
|
+
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
require 'test/unit'
|
|
2
|
+
require 'lib/gmap'
|
|
3
|
+
|
|
4
|
+
class GmapTest < Test::Unit::TestCase
|
|
5
|
+
|
|
6
|
+
def setup
|
|
7
|
+
@data = 'samples/test.gmap'
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def test_open
|
|
11
|
+
assert_nothing_raised do
|
|
12
|
+
g = Gmap::Core.open(@data)
|
|
13
|
+
g.close
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_result
|
|
18
|
+
Gmap::Core.open(@data) do |g|
|
|
19
|
+
g.each_sequence do |seq|
|
|
20
|
+
assert_equal 3,seq.size
|
|
21
|
+
assert_equal 'Array',seq.class.to_s
|
|
22
|
+
check_first(seq) if seq[0].query == 'FIRST'
|
|
23
|
+
check_second(seq) if seq[0].query == 'SECOND'
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def check_first(seq)
|
|
29
|
+
# first result
|
|
30
|
+
assert_equal 'ENSBTAT00000028007',seq[0].target
|
|
31
|
+
assert_equal 36.1, seq[0].coverage
|
|
32
|
+
assert_equal 100.0, seq[0].perc_identity
|
|
33
|
+
assert_equal 0, seq[0].mismatch
|
|
34
|
+
assert_equal 0, seq[0].indels
|
|
35
|
+
assert_equal 14, seq[0].q_start
|
|
36
|
+
assert_equal 26, seq[0].q_end
|
|
37
|
+
assert_equal -1, seq[0].strand
|
|
38
|
+
assert_equal 11477, seq[0].start
|
|
39
|
+
assert_equal 11489, seq[0].end
|
|
40
|
+
assert_equal 1, seq[0].exons
|
|
41
|
+
assert_equal nil, seq[0].aa_change
|
|
42
|
+
assert_match(/CTTCGTATTGCTG/,seq[0].aln)
|
|
43
|
+
# second result
|
|
44
|
+
assert_equal 'ENSBTAT00000042528',seq[1].target
|
|
45
|
+
assert_equal 100.0, seq[1].coverage
|
|
46
|
+
assert_equal 97.2, seq[1].perc_identity
|
|
47
|
+
assert_equal 1, seq[1].mismatch
|
|
48
|
+
assert_equal 0, seq[1].indels
|
|
49
|
+
assert_equal 1, seq[1].q_start
|
|
50
|
+
assert_equal 36, seq[1].q_end
|
|
51
|
+
assert_equal -1, seq[1].strand
|
|
52
|
+
assert_equal 229, seq[1].start
|
|
53
|
+
assert_equal 264, seq[1].end
|
|
54
|
+
assert_equal 1, seq[1].exons
|
|
55
|
+
assert_equal 'K10T [28]', seq[1].aa_change
|
|
56
|
+
assert_match(/G I H M V K A R P K A/,seq[1].aln)
|
|
57
|
+
assert_match(/GGGAATTCACATGGTTAAGGCTAGGCCTAAAGCTAT/,seq[1].aln)
|
|
58
|
+
assert_match(/GGGAATTCACATGGTTAAGGCTAGGCCTACAGCTAT/,seq[1].aln)
|
|
59
|
+
assert_match(/G I H M V K A R P T A/,seq[1].aln)
|
|
60
|
+
# third result
|
|
61
|
+
assert_equal 'ENSBTAT00000044819',seq[2].target
|
|
62
|
+
assert_equal 38.9, seq[2].coverage
|
|
63
|
+
assert_equal 100.0, seq[2].perc_identity
|
|
64
|
+
assert_equal 0, seq[2].mismatch
|
|
65
|
+
assert_equal 0, seq[2].indels
|
|
66
|
+
assert_equal 18, seq[2].q_start
|
|
67
|
+
assert_equal 31, seq[2].q_end
|
|
68
|
+
assert_equal 1, seq[2].strand
|
|
69
|
+
assert_equal 611, seq[2].start
|
|
70
|
+
assert_equal 624, seq[2].end
|
|
71
|
+
assert_equal 1, seq[2].exons
|
|
72
|
+
assert_equal nil, seq[2].aa_change
|
|
73
|
+
assert_match(/GAAATCTTGACTGA/,seq[2].aln)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def check_second(seq)
|
|
77
|
+
# first result
|
|
78
|
+
assert_equal 'chr17',seq[0].target
|
|
79
|
+
assert_equal 41.7, seq[0].coverage
|
|
80
|
+
assert_equal 100.0, seq[0].perc_identity
|
|
81
|
+
assert_equal 0, seq[0].mismatch
|
|
82
|
+
assert_equal 0, seq[0].indels
|
|
83
|
+
assert_equal 12, seq[0].q_start
|
|
84
|
+
assert_equal 26, seq[0].q_end
|
|
85
|
+
assert_equal -1, seq[0].strand
|
|
86
|
+
assert_equal 21154428, seq[0].start
|
|
87
|
+
assert_equal 21154442, seq[0].end
|
|
88
|
+
assert_equal 1, seq[0].exons
|
|
89
|
+
assert_equal nil, seq[0].aa_change
|
|
90
|
+
assert_match(/TTCGTATACCGTATT/,seq[0].aln)
|
|
91
|
+
# second result
|
|
92
|
+
assert_equal 'chr11',seq[1].target
|
|
93
|
+
assert_equal 52.8, seq[1].coverage
|
|
94
|
+
assert_equal 94.7, seq[1].perc_identity
|
|
95
|
+
assert_equal 1, seq[1].mismatch
|
|
96
|
+
assert_equal 0, seq[1].indels
|
|
97
|
+
assert_equal 9, seq[1].q_start
|
|
98
|
+
assert_equal 27, seq[1].q_end
|
|
99
|
+
assert_equal 1, seq[1].strand
|
|
100
|
+
assert_equal 99537167, seq[1].start
|
|
101
|
+
assert_equal 99537185, seq[1].end
|
|
102
|
+
assert_equal 1, seq[1].exons
|
|
103
|
+
assert_equal nil, seq[1].aa_change
|
|
104
|
+
assert_match(/AGGCATGCATGGCCCGAAC/,seq[1].aln)
|
|
105
|
+
assert_match(/ACGCATGCATGGCCCGAAC/,seq[1].aln)
|
|
106
|
+
# third result
|
|
107
|
+
assert_equal 'chr22',seq[2].target
|
|
108
|
+
assert_equal 100.0, seq[2].coverage
|
|
109
|
+
assert_equal 100.0, seq[2].perc_identity
|
|
110
|
+
assert_equal 0, seq[2].mismatch
|
|
111
|
+
assert_equal 0, seq[2].indels
|
|
112
|
+
assert_equal 1, seq[2].q_start
|
|
113
|
+
assert_equal 36, seq[2].q_end
|
|
114
|
+
assert_equal -1, seq[2].strand
|
|
115
|
+
assert_equal 57923909, seq[2].start
|
|
116
|
+
assert_equal 57926444, seq[2].end
|
|
117
|
+
assert_equal 2, seq[2].exons
|
|
118
|
+
assert_equal nil, seq[2].aa_change
|
|
119
|
+
assert_match(/R A P R R A G E G R G \*/,seq[2].aln)
|
|
120
|
+
assert_match(/CGCGCACCTCGGCGTGCAGGTG\.\.\.CAGGTGAAGGGAGAGGATGA/,seq[2].aln)
|
|
121
|
+
assert_match(/CGCGCACCTCGGCGTGCAG 2500 GTGAAGGGAGAGGATGA/,seq[2].aln)
|
|
122
|
+
assert_equal 57912718,seq[2].gene_start
|
|
123
|
+
assert_equal 57926714,seq[2].gene_end
|
|
124
|
+
assert_equal 507939,seq[2].gene_id
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
end
|