scbi_blast 0.0.30 → 0.0.31
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +1 -1
- data/README.rdoc +76 -8
- data/lib/scbi_blast/batch_blast.rb +101 -85
- data/lib/scbi_blast/blast_hit.rb +70 -57
- data/lib/scbi_blast/blast_query.rb +44 -17
- data/lib/scbi_blast/blast_simplexml_result.rb +28 -14
- data/lib/scbi_blast/blast_table_result.rb +164 -149
- data/lib/scbi_blast/blast_xml_result.rb +105 -94
- data/lib/scbi_blast/dust_masker.rb +59 -44
- data/lib/scbi_blast.rb +23 -1
- metadata +7 -5
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
data/README.rdoc
CHANGED
@@ -1,32 +1,100 @@
|
|
1
1
|
= scbi_blast
|
2
2
|
|
3
|
-
* http://
|
3
|
+
* http://www.scbi.uma.es/downloads
|
4
4
|
|
5
5
|
== DESCRIPTION:
|
6
6
|
|
7
|
-
|
7
|
+
scbi_blast is a ruby gem to handle blast+ executions without the need of temporary files,
|
8
|
+
it has been developed at [SCBI](http://www.scbi.uma.es) by Almudena Bocinos & Dario Guerrero.
|
8
9
|
|
9
|
-
== FEATURES
|
10
|
+
== FEATURES:
|
10
11
|
|
11
|
-
*
|
12
|
+
* Execute blast within ruby without creating temporary files (using pipes)
|
13
|
+
* Parse XML and table results into Query and Hit objects
|
14
|
+
* Execute DustMasker without temporary files (using pipes)
|
12
15
|
|
13
16
|
== SYNOPSIS:
|
14
17
|
|
15
|
-
|
18
|
+
scbi_blast can handle *blastn*, *blastp* and *dustmasker* applications from NCBI blast package.
|
19
|
+
Input sequences can be supplied as an array (see the example below) or as a chunk of text
|
20
|
+
inside a string variable.
|
21
|
+
|
22
|
+
There are two output formats supported (_table_ and _xml_) by the built-in parsers.
|
23
|
+
|
24
|
+
scbi_blast returns an object with all data parsed and splitted into querys and their respective hits.
|
25
|
+
|
26
|
+
=== Blast
|
27
|
+
|
28
|
+
Here is an example that shows how to use it to do a blastn:
|
29
|
+
|
30
|
+
require 'scbi_blast'
|
31
|
+
|
32
|
+
# create a blast processor object pointing to a formatted blast database
|
33
|
+
# that uses a blastn with 4 parallel threads
|
34
|
+
|
35
|
+
blast=BatchBlast.new('-db formatted_blast_db.fasta','blastn','--num_threads 4')
|
36
|
+
|
37
|
+
# fill in some sample sequences (in your code those sequences will come
|
38
|
+
# from a file, an output of another process, etc...)
|
39
|
+
|
40
|
+
seqs=[]
|
41
|
+
seqs << ">GFIXVR"
|
42
|
+
seqs << "GACTACACGACGACCCGACGACGACGAGAGNGNGGACCCGACGACG"
|
43
|
+
seqs << ">GFIM12"
|
44
|
+
seqs << "GACTACACGACGACTAGACCCGACGACGTGACCCGACGACG"
|
45
|
+
|
46
|
+
|
47
|
+
# execute blast
|
48
|
+
res=blast.do_blast(seqs)
|
49
|
+
|
50
|
+
|
51
|
+
# iterate over results printing hit id, start and end positions.
|
52
|
+
|
53
|
+
res.querys.each do |query|
|
54
|
+
query.hits.each do |hit|
|
55
|
+
puts hit.subject_id, hit.q_beg, hit.q_end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
=== DustMasker
|
61
|
+
|
62
|
+
An example that shows how to use it to find dust into some sequences:
|
63
|
+
|
64
|
+
require 'scbi_blast'
|
65
|
+
|
66
|
+
# create DustMasker object
|
67
|
+
dust_masker=DustMasker.new()
|
68
|
+
|
69
|
+
seqs=[]
|
70
|
+
seqs << ">GFIXVR"
|
71
|
+
seqs << "GACTACACGACGACCCGACGACGACGAGAGNGNGGACCCGACGACG"
|
72
|
+
seqs << ">GFIM12"
|
73
|
+
seqs << "GACTACACGACGACTAGACCCGACGACGTGACCCGACGACG"
|
74
|
+
|
75
|
+
dust_regions = dust_masker.do_dust(seqs.join("\n"))
|
76
|
+
|
77
|
+
puts "Found #{dust_regions.count} dust regions"
|
78
|
+
|
79
|
+
dust_regions.each do |dust|
|
80
|
+
# region is defined as an array, where element [0] is the start
|
81
|
+
# of the region, and [1] is the end
|
82
|
+
puts dust.join(',')
|
83
|
+
end
|
16
84
|
|
17
85
|
== REQUIREMENTS:
|
18
86
|
|
19
|
-
*
|
87
|
+
* NCBI blast+ already installed
|
20
88
|
|
21
89
|
== INSTALL:
|
22
90
|
|
23
|
-
|
91
|
+
gem install scbi_blast
|
24
92
|
|
25
93
|
== LICENSE:
|
26
94
|
|
27
95
|
(The MIT License)
|
28
96
|
|
29
|
-
Copyright (c) 2010 Dario Guerrero
|
97
|
+
Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
30
98
|
|
31
99
|
Permission is hereby granted, free of charge, to any person obtaining
|
32
100
|
a copy of this software and associated documentation files (the
|
@@ -1,115 +1,131 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# 'Software'), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
|
23
|
+
# class to execute Blast without temporary files (it uses pipes)
|
4
24
|
class BatchBlast
|
5
25
|
|
26
|
+
# class initialization
|
6
27
|
def initialize(database, blast_type = 'blastn', extra_params = '')
|
7
28
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
29
|
+
@blast_type = blast_type
|
30
|
+
@database = database
|
31
|
+
@extra_params = extra_params
|
32
|
+
|
33
|
+
|
13
34
|
end
|
14
35
|
|
36
|
+
# returns the blast cmd that will be used to launch blast
|
15
37
|
def get_blast_cmd(fmt = :table)
|
16
38
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
39
|
+
if fmt==:table
|
40
|
+
format = ' -outfmt "7 qseqid sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore score qframe sframe qseq sseq" '
|
41
|
+
elsif fmt ==:xml
|
42
|
+
format = ' -outfmt 5 '
|
43
|
+
end
|
44
|
+
|
45
|
+
dust=''
|
22
46
|
|
23
|
-
|
47
|
+
cmd = @blast_type+' '+dust+@extra_params + format + @database
|
24
48
|
|
25
|
-
|
26
|
-
# if @blast_type.index('blastn')
|
27
|
-
# dust=' -dust no '
|
28
|
-
# end
|
29
|
-
|
30
|
-
cmd = @blast_type+' '+dust+@extra_params + format + @database
|
31
|
-
|
32
|
-
return cmd
|
49
|
+
return cmd
|
33
50
|
|
34
51
|
end
|
35
52
|
|
53
|
+
# do a blast to seqs
|
36
54
|
def do_blast(seqs, fmt = :table,parse_output=true)
|
37
|
-
|
38
|
-
if seqs.is_a?(Array)
|
39
|
-
seq_fasta=seqs.join("\n")
|
40
|
-
else
|
41
|
-
seq_fasta=seqs
|
42
|
-
end
|
43
55
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
56
|
+
if seqs.is_a?(Array)
|
57
|
+
seq_fasta=seqs.join("\n")
|
58
|
+
else
|
59
|
+
seq_fasta=seqs
|
60
|
+
end
|
61
|
+
|
62
|
+
cmd = get_blast_cmd(fmt)
|
63
|
+
|
64
|
+
res=''
|
65
|
+
|
66
|
+
# Ojo, que una vez nos ibamos a volver locos buscando porque esto no devolvia todos los hits que se encontraban al ejecutar el blast a mano, y era porque en el blast a mano le estabamos pasando la secuencia completa mientras que en el MID le estabamos pasando solo los 20 primeros nt.
|
67
|
+
IO.popen(cmd,'w+') {|blast|
|
68
|
+
blast.sync = true
|
69
|
+
blast.write(seq_fasta)
|
70
|
+
blast.close_write
|
71
|
+
res = blast.readlines
|
72
|
+
blast.close_read
|
73
|
+
}
|
74
|
+
|
75
|
+
if !$?.exitstatus.nil? && $?.exitstatus>0
|
76
|
+
raise "Error doing blast #{cmd} to fasta: #{seq_fasta}"
|
77
|
+
end
|
78
|
+
|
79
|
+
# check if all sequences where processed
|
80
|
+
if parse_output
|
81
|
+
if fmt == :table
|
82
|
+
res = BlastTableResult.new(res)
|
83
|
+
elsif fmt == :xml
|
84
|
+
res = BlastSimplexmlResult.new(res)
|
68
85
|
# elsif fmt ==:xml2
|
69
86
|
# res = BlastXmlResult.new(res)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# puts "#{seq_fasta.count('>')}, #{res.querys.count}"
|
90
|
+
|
91
|
+
if seq_fasta.count('>')!=res.querys.count
|
92
|
+
not_processed = seqs.select{|e| e.index('>')}
|
93
|
+
|
94
|
+
res.querys.each do |query|
|
95
|
+
if not_processed.include?('>'+query.query_id)
|
96
|
+
not_processed.delete('>'+query.query_id)
|
81
97
|
end
|
82
|
-
|
83
|
-
raise "These sequences #{seq_fasta.count('>')},#{res.querys.count} where not processed by #{cmd}:\n #{not_processed} \n Full_data:\n seqs=#{seqs};\n"
|
84
98
|
end
|
85
|
-
|
99
|
+
|
100
|
+
raise "These sequences #{seq_fasta.count('>')},#{res.querys.count} where not processed by #{cmd}:\n #{not_processed} \n Full_data:\n seqs=#{seqs};\n"
|
86
101
|
end
|
87
|
-
|
88
|
-
|
89
|
-
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
return res
|
106
|
+
|
90
107
|
end
|
91
|
-
|
108
|
+
|
109
|
+
# do blast to an array of Sequence objects
|
92
110
|
def do_blast_seqs(seqs, fmt = :table,parse_output=true)
|
93
111
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
112
|
+
|
113
|
+
cmd = get_blast_cmd(fmt)
|
114
|
+
|
115
|
+
fastas=[]
|
116
|
+
|
117
|
+
seqs.each do |seq|
|
118
|
+
fastas.push '>'+seq.seq_name
|
119
|
+
fastas.push seq.seq_fasta
|
120
|
+
end
|
121
|
+
|
122
|
+
return do_blast(fastas,fmt,parse_output)
|
105
123
|
|
106
124
|
end
|
107
|
-
|
125
|
+
|
108
126
|
|
109
127
|
def close
|
110
|
-
|
128
|
+
|
111
129
|
end
|
112
130
|
|
113
131
|
end
|
114
|
-
|
115
|
-
|
data/lib/scbi_blast/blast_hit.rb
CHANGED
@@ -1,115 +1,128 @@
|
|
1
|
+
# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# 'Software'), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
|
23
|
+
# Class for a Blast Hit (a concordance between a query and a subject)
|
1
24
|
class BlastHit
|
2
|
-
|
25
|
+
|
26
|
+
# initializes a new hit
|
3
27
|
def initialize(q_beg,q_end,s_beg,s_end)
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
# 6. GAPS - Number of gaps.
|
27
|
-
# 7. Q_BEG - Query begin.
|
28
|
-
# 8. Q_END - Query end.
|
29
|
-
# 9. S_BEG - Subject begin.
|
30
|
-
# 10. S_END - Subject end.
|
31
|
-
# 11. E_VAL - Expect value.
|
32
|
-
# 12. BIT_SCORE - Bit score.
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
def subject_id=(v)
|
37
|
-
@subject_id = v
|
28
|
+
|
29
|
+
@q_beg = q_beg.to_i-1 #blast indexes are 1 based
|
30
|
+
@q_end = q_end.to_i-1
|
31
|
+
@s_beg = s_beg.to_i-1
|
32
|
+
@s_end = s_end.to_i-1
|
33
|
+
|
34
|
+
@reversed = false
|
35
|
+
|
36
|
+
# TODO -Reversed should be taken from q_frame and s_frame instead of s_end. In proteins comes from q_frame. In nt from s_frames.
|
37
|
+
|
38
|
+
# check if reversed
|
39
|
+
if @s_beg > @s_end
|
40
|
+
@s_beg = s_end.to_i-1
|
41
|
+
@s_end = s_beg.to_i-1
|
42
|
+
@reversed = true
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
# some accessors
|
48
|
+
def subject_id=(v)
|
49
|
+
@subject_id = v
|
38
50
|
end
|
39
51
|
|
40
52
|
def ident=(v)
|
41
|
-
|
53
|
+
@ident = v.to_f
|
42
54
|
end
|
43
55
|
|
44
56
|
def align_len=(v)
|
45
|
-
|
57
|
+
@align_len = v.to_i
|
46
58
|
end
|
47
59
|
|
48
60
|
def mismatches=(v)
|
49
|
-
|
61
|
+
@mismatches = v.to_i
|
50
62
|
end
|
51
63
|
|
52
64
|
def gaps=(v)
|
53
|
-
|
65
|
+
@gaps = v.to_i
|
54
66
|
end
|
55
67
|
|
56
|
-
def e_val=(v)
|
57
|
-
|
68
|
+
def e_val=(v)
|
69
|
+
@e_val = v.to_f
|
58
70
|
end
|
59
71
|
|
60
72
|
def bit_score=(v)
|
61
|
-
|
73
|
+
@bit_score = v.to_f
|
62
74
|
end
|
63
75
|
|
64
76
|
def score=(v)
|
65
|
-
|
66
|
-
|
77
|
+
|
78
|
+
@score = v.to_f
|
67
79
|
end
|
68
80
|
|
69
81
|
def acc=(v)
|
70
|
-
|
82
|
+
@acc = v
|
71
83
|
end
|
72
84
|
|
73
85
|
def definition=(v)
|
74
|
-
|
86
|
+
@definition = v
|
75
87
|
end
|
76
88
|
|
77
89
|
def q_frame=(v)
|
78
|
-
|
90
|
+
@q_frame = v.to_i
|
79
91
|
end
|
80
92
|
|
81
93
|
def s_frame=(v)
|
82
|
-
|
94
|
+
@s_frame = v.to_i
|
83
95
|
end
|
84
96
|
|
85
97
|
def s_seq=(v)
|
86
|
-
|
98
|
+
@s_seq = v
|
87
99
|
end
|
88
100
|
|
89
101
|
def q_seq=(v)
|
90
|
-
|
102
|
+
@q_seq = v
|
91
103
|
end
|
92
104
|
|
93
105
|
def full_subject_length=(v)
|
94
|
-
|
106
|
+
@full_subject_length = v
|
95
107
|
end
|
96
108
|
|
109
|
+
# puts all hit info on a string
|
97
110
|
def inspect
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
111
|
+
res = "Hit: #{@subject_id.ljust(10)} #{@ident.to_s.rjust(4)} #{@align_len.to_s.rjust(2)} #{@mismatches.to_s.rjust(2)} #{@gaps.to_s.rjust(2)} #{@q_beg.to_s.rjust(5)} #{@q_end.to_s.rjust(5)} #{@s_beg.to_s.rjust(5)} #{@s_end.to_s.rjust(5)} #{@e_val.to_s.rjust(5)} #{@bit_score.to_s.rjust(5)} #{@reversed.to_s.rjust(5)}"
|
112
|
+
res += " #{@score.to_s.rjust(5)} #{@acc.ljust(10)} #{@definition.ljust(10)} #{@q_frame.to_s.rjust(2)} #{@s_frame.to_s.rjust(2)} #{@full_subject_length.to_s.rjust(5)} #{@q_seq}.#{@s_seq}."
|
113
|
+
|
114
|
+
return res
|
102
115
|
end
|
103
116
|
|
104
117
|
def get_subject
|
105
118
|
return @subject_id
|
106
119
|
end
|
107
120
|
|
121
|
+
# readers and accessor for properties
|
108
122
|
attr_accessor :q_beg, :q_end, :s_beg, :s_end
|
109
|
-
|
110
123
|
attr_reader :subject_id, :align_len, :gaps, :mismatches
|
111
124
|
attr_accessor :reversed
|
112
125
|
attr_reader :score, :acc, :definition, :q_frame, :s_frame, :full_subject_length, :ident, :e_val, :bit_score
|
113
126
|
attr_reader :q_seq, :s_seq
|
114
|
-
|
127
|
+
|
115
128
|
end
|
@@ -1,37 +1,64 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# 'Software'), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
|
23
|
+
# Object to encapsulate a Blast Query
|
24
|
+
class BlastQuery
|
25
|
+
|
26
|
+
attr_accessor :hits
|
27
|
+
attr_accessor :query_id, :query_def, :full_query_length
|
28
|
+
|
29
|
+
# initializes a new Query object
|
7
30
|
def initialize(query_id)
|
8
31
|
@query_id = query_id
|
9
32
|
@query_def = query_id
|
10
33
|
@full_query_length = 0
|
11
34
|
@hits = []
|
12
|
-
|
35
|
+
# inspect
|
13
36
|
end
|
14
37
|
|
38
|
+
# add a hit to query
|
15
39
|
def add_hit(h)
|
16
40
|
@hits.push h
|
17
41
|
end
|
18
|
-
|
42
|
+
|
43
|
+
# inspect query values with all hits
|
19
44
|
def inspect
|
20
|
-
res = "\n * Query #{@query_id} :"
|
45
|
+
res = "\n * Query #{@query_id} :"
|
21
46
|
res += "subject_id ident align_len mismatches gaps q_beg q_end s_beg s_end e_val bit_score reversed\n\n"
|
22
47
|
@hits.each{ |h| res+= h.inspect+"\n" }
|
23
|
-
|
48
|
+
|
24
49
|
return res
|
25
|
-
end
|
26
|
-
|
50
|
+
end
|
51
|
+
|
52
|
+
# get num of hits
|
27
53
|
def size
|
28
54
|
return @hits.size
|
29
|
-
end
|
30
|
-
|
55
|
+
end
|
56
|
+
|
57
|
+
# sort hits by command
|
31
58
|
def sort(comand)
|
32
59
|
return @hits.sort(comand)
|
33
60
|
end
|
34
|
-
|
35
|
-
end
|
36
61
|
|
37
|
-
|
62
|
+
end
|
63
|
+
|
64
|
+
|
@@ -1,23 +1,35 @@
|
|
1
|
+
# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# 'Software'), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
1
22
|
require "blast_query.rb"
|
2
23
|
require "blast_hit.rb"
|
3
24
|
|
4
25
|
require 'zlib'
|
5
26
|
require 'xmlsimple'
|
6
|
-
#xml=File.open('orf.1.xml').read
|
7
|
-
#data = XmlSimple.xml_in(xml)
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#data['best_orf'][0]['start'][0]
|
11
|
-
#data['best_orf'][0]['content']
|
12
|
-
|
13
|
-
######################################
|
14
|
-
# Author:: Almudena Bocinos Rioboo
|
15
|
-
# Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
|
16
|
-
|
17
|
-
######################################
|
18
27
|
|
28
|
+
# Extracts results from a blast results in XML
|
29
|
+
# format and uses it to create instances of "BlastQuery" and "BlastHit"
|
19
30
|
class BlastSimplexmlResult
|
20
31
|
|
32
|
+
# Parser initialization
|
21
33
|
def initialize(input)
|
22
34
|
|
23
35
|
@querys = []
|
@@ -105,7 +117,7 @@ class BlastSimplexmlResult
|
|
105
117
|
|
106
118
|
hit.q_seq = hsp['Hsp_qseq'][0]
|
107
119
|
hit.s_seq = hsp['Hsp_hseq'][0]
|
108
|
-
|
120
|
+
|
109
121
|
|
110
122
|
hit.subject_id= subject_id
|
111
123
|
hit.full_subject_length=full_subject_length
|
@@ -134,6 +146,7 @@ class BlastSimplexmlResult
|
|
134
146
|
return res
|
135
147
|
end
|
136
148
|
|
149
|
+
# finds a query by name
|
137
150
|
def find_query(querys,name_q)
|
138
151
|
# newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
|
139
152
|
new_q=nil
|
@@ -145,11 +158,12 @@ class BlastSimplexmlResult
|
|
145
158
|
return new_q
|
146
159
|
end
|
147
160
|
|
161
|
+
# check if there are querys
|
148
162
|
def empty?
|
149
|
-
|
150
163
|
return @querys.empty?
|
151
164
|
end
|
152
165
|
|
166
|
+
# get num of querys
|
153
167
|
def size
|
154
168
|
@querys.size
|
155
169
|
end
|