scbi_blast 0.0.30 → 0.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +1 -1
- data/README.rdoc +76 -8
- data/lib/scbi_blast/batch_blast.rb +101 -85
- data/lib/scbi_blast/blast_hit.rb +70 -57
- data/lib/scbi_blast/blast_query.rb +44 -17
- data/lib/scbi_blast/blast_simplexml_result.rb +28 -14
- data/lib/scbi_blast/blast_table_result.rb +164 -149
- data/lib/scbi_blast/blast_xml_result.rb +105 -94
- data/lib/scbi_blast/dust_masker.rb +59 -44
- data/lib/scbi_blast.rb +23 -1
- metadata +7 -5
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
data/README.rdoc
CHANGED
@@ -1,32 +1,100 @@
|
|
1
1
|
= scbi_blast
|
2
2
|
|
3
|
-
* http://
|
3
|
+
* http://www.scbi.uma.es/downloads
|
4
4
|
|
5
5
|
== DESCRIPTION:
|
6
6
|
|
7
|
-
|
7
|
+
scbi_blast is a ruby gem to handle blast+ executions without the need of temporary files,
|
8
|
+
it has been developed at [SCBI](http://www.scbi.uma.es) by Almudena Bocinos & Dario Guerrero.
|
8
9
|
|
9
|
-
== FEATURES
|
10
|
+
== FEATURES:
|
10
11
|
|
11
|
-
*
|
12
|
+
* Execute blast within ruby without creating temporary files (using pipes)
|
13
|
+
* Parse XML and table results into Query and Hit objects
|
14
|
+
* Execute DustMasker without temporary files (using pipes)
|
12
15
|
|
13
16
|
== SYNOPSIS:
|
14
17
|
|
15
|
-
|
18
|
+
scbi_blast can handle *blastn*, *blastp* and *dustmasker* applications from NCBI blast package.
|
19
|
+
Input sequences can be supplied as an array (see the example below) or as a chunk of text
|
20
|
+
inside a string variable.
|
21
|
+
|
22
|
+
There are two output formats supported (_table_ and _xml_) by the built-in parsers.
|
23
|
+
|
24
|
+
scbi_blast returns an object with all data parsed and splitted into querys and their respective hits.
|
25
|
+
|
26
|
+
=== Blast
|
27
|
+
|
28
|
+
Here is an example that shows how to use it to do a blastn:
|
29
|
+
|
30
|
+
require 'scbi_blast'
|
31
|
+
|
32
|
+
# create a blast processor object pointing to a formatted blast database
|
33
|
+
# that uses a blastn with 4 parallel threads
|
34
|
+
|
35
|
+
blast=BatchBlast.new('-db formatted_blast_db.fasta','blastn','--num_threads 4')
|
36
|
+
|
37
|
+
# fill in some sample sequences (in your code those sequences will come
|
38
|
+
# from a file, an output of another process, etc...)
|
39
|
+
|
40
|
+
seqs=[]
|
41
|
+
seqs << ">GFIXVR"
|
42
|
+
seqs << "GACTACACGACGACCCGACGACGACGAGAGNGNGGACCCGACGACG"
|
43
|
+
seqs << ">GFIM12"
|
44
|
+
seqs << "GACTACACGACGACTAGACCCGACGACGTGACCCGACGACG"
|
45
|
+
|
46
|
+
|
47
|
+
# execute blast
|
48
|
+
res=blast.do_blast(seqs)
|
49
|
+
|
50
|
+
|
51
|
+
# iterate over results printing hit id, start and end positions.
|
52
|
+
|
53
|
+
res.querys.each do |query|
|
54
|
+
query.hits.each do |hit|
|
55
|
+
puts hit.subject_id, hit.q_beg, hit.q_end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
=== DustMasker
|
61
|
+
|
62
|
+
An example that shows how to use it to find dust into some sequences:
|
63
|
+
|
64
|
+
require 'scbi_blast'
|
65
|
+
|
66
|
+
# create DustMasker object
|
67
|
+
dust_masker=DustMasker.new()
|
68
|
+
|
69
|
+
seqs=[]
|
70
|
+
seqs << ">GFIXVR"
|
71
|
+
seqs << "GACTACACGACGACCCGACGACGACGAGAGNGNGGACCCGACGACG"
|
72
|
+
seqs << ">GFIM12"
|
73
|
+
seqs << "GACTACACGACGACTAGACCCGACGACGTGACCCGACGACG"
|
74
|
+
|
75
|
+
dust_regions = dust_masker.do_dust(seqs.join("\n"))
|
76
|
+
|
77
|
+
puts "Found #{dust_regions.count} dust regions"
|
78
|
+
|
79
|
+
dust_regions.each do |dust|
|
80
|
+
# region is defined as an array, where element [0] is the start
|
81
|
+
# of the region, and [1] is the end
|
82
|
+
puts dust.join(',')
|
83
|
+
end
|
16
84
|
|
17
85
|
== REQUIREMENTS:
|
18
86
|
|
19
|
-
*
|
87
|
+
* NCBI blast+ already installed
|
20
88
|
|
21
89
|
== INSTALL:
|
22
90
|
|
23
|
-
|
91
|
+
gem install scbi_blast
|
24
92
|
|
25
93
|
== LICENSE:
|
26
94
|
|
27
95
|
(The MIT License)
|
28
96
|
|
29
|
-
Copyright (c) 2010 Dario Guerrero
|
97
|
+
Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
30
98
|
|
31
99
|
Permission is hereby granted, free of charge, to any person obtaining
|
32
100
|
a copy of this software and associated documentation files (the
|
@@ -1,115 +1,131 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# 'Software'), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
|
23
|
+
# class to execute Blast without temporary files (it uses pipes)
|
4
24
|
class BatchBlast
|
5
25
|
|
26
|
+
# class initialization
|
6
27
|
def initialize(database, blast_type = 'blastn', extra_params = '')
|
7
28
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
29
|
+
@blast_type = blast_type
|
30
|
+
@database = database
|
31
|
+
@extra_params = extra_params
|
32
|
+
|
33
|
+
|
13
34
|
end
|
14
35
|
|
36
|
+
# returns the blast cmd that will be used to launch blast
|
15
37
|
def get_blast_cmd(fmt = :table)
|
16
38
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
39
|
+
if fmt==:table
|
40
|
+
format = ' -outfmt "7 qseqid sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore score qframe sframe qseq sseq" '
|
41
|
+
elsif fmt ==:xml
|
42
|
+
format = ' -outfmt 5 '
|
43
|
+
end
|
44
|
+
|
45
|
+
dust=''
|
22
46
|
|
23
|
-
|
47
|
+
cmd = @blast_type+' '+dust+@extra_params + format + @database
|
24
48
|
|
25
|
-
|
26
|
-
# if @blast_type.index('blastn')
|
27
|
-
# dust=' -dust no '
|
28
|
-
# end
|
29
|
-
|
30
|
-
cmd = @blast_type+' '+dust+@extra_params + format + @database
|
31
|
-
|
32
|
-
return cmd
|
49
|
+
return cmd
|
33
50
|
|
34
51
|
end
|
35
52
|
|
53
|
+
# do a blast to seqs
|
36
54
|
def do_blast(seqs, fmt = :table,parse_output=true)
|
37
|
-
|
38
|
-
if seqs.is_a?(Array)
|
39
|
-
seq_fasta=seqs.join("\n")
|
40
|
-
else
|
41
|
-
seq_fasta=seqs
|
42
|
-
end
|
43
55
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
56
|
+
if seqs.is_a?(Array)
|
57
|
+
seq_fasta=seqs.join("\n")
|
58
|
+
else
|
59
|
+
seq_fasta=seqs
|
60
|
+
end
|
61
|
+
|
62
|
+
cmd = get_blast_cmd(fmt)
|
63
|
+
|
64
|
+
res=''
|
65
|
+
|
66
|
+
# Ojo, que una vez nos ibamos a volver locos buscando porque esto no devolvia todos los hits que se encontraban al ejecutar el blast a mano, y era porque en el blast a mano le estabamos pasando la secuencia completa mientras que en el MID le estabamos pasando solo los 20 primeros nt.
|
67
|
+
IO.popen(cmd,'w+') {|blast|
|
68
|
+
blast.sync = true
|
69
|
+
blast.write(seq_fasta)
|
70
|
+
blast.close_write
|
71
|
+
res = blast.readlines
|
72
|
+
blast.close_read
|
73
|
+
}
|
74
|
+
|
75
|
+
if !$?.exitstatus.nil? && $?.exitstatus>0
|
76
|
+
raise "Error doing blast #{cmd} to fasta: #{seq_fasta}"
|
77
|
+
end
|
78
|
+
|
79
|
+
# check if all sequences where processed
|
80
|
+
if parse_output
|
81
|
+
if fmt == :table
|
82
|
+
res = BlastTableResult.new(res)
|
83
|
+
elsif fmt == :xml
|
84
|
+
res = BlastSimplexmlResult.new(res)
|
68
85
|
# elsif fmt ==:xml2
|
69
86
|
# res = BlastXmlResult.new(res)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# puts "#{seq_fasta.count('>')}, #{res.querys.count}"
|
90
|
+
|
91
|
+
if seq_fasta.count('>')!=res.querys.count
|
92
|
+
not_processed = seqs.select{|e| e.index('>')}
|
93
|
+
|
94
|
+
res.querys.each do |query|
|
95
|
+
if not_processed.include?('>'+query.query_id)
|
96
|
+
not_processed.delete('>'+query.query_id)
|
81
97
|
end
|
82
|
-
|
83
|
-
raise "These sequences #{seq_fasta.count('>')},#{res.querys.count} where not processed by #{cmd}:\n #{not_processed} \n Full_data:\n seqs=#{seqs};\n"
|
84
98
|
end
|
85
|
-
|
99
|
+
|
100
|
+
raise "These sequences #{seq_fasta.count('>')},#{res.querys.count} where not processed by #{cmd}:\n #{not_processed} \n Full_data:\n seqs=#{seqs};\n"
|
86
101
|
end
|
87
|
-
|
88
|
-
|
89
|
-
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
return res
|
106
|
+
|
90
107
|
end
|
91
|
-
|
108
|
+
|
109
|
+
# do blast to an array of Sequence objects
|
92
110
|
def do_blast_seqs(seqs, fmt = :table,parse_output=true)
|
93
111
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
112
|
+
|
113
|
+
cmd = get_blast_cmd(fmt)
|
114
|
+
|
115
|
+
fastas=[]
|
116
|
+
|
117
|
+
seqs.each do |seq|
|
118
|
+
fastas.push '>'+seq.seq_name
|
119
|
+
fastas.push seq.seq_fasta
|
120
|
+
end
|
121
|
+
|
122
|
+
return do_blast(fastas,fmt,parse_output)
|
105
123
|
|
106
124
|
end
|
107
|
-
|
125
|
+
|
108
126
|
|
109
127
|
def close
|
110
|
-
|
128
|
+
|
111
129
|
end
|
112
130
|
|
113
131
|
end
|
114
|
-
|
115
|
-
|
data/lib/scbi_blast/blast_hit.rb
CHANGED
@@ -1,115 +1,128 @@
|
|
1
|
+
# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# 'Software'), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
|
23
|
+
# Class for a Blast Hit (a concordance between a query and a subject)
|
1
24
|
class BlastHit
|
2
|
-
|
25
|
+
|
26
|
+
# initializes a new hit
|
3
27
|
def initialize(q_beg,q_end,s_beg,s_end)
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
# 6. GAPS - Number of gaps.
|
27
|
-
# 7. Q_BEG - Query begin.
|
28
|
-
# 8. Q_END - Query end.
|
29
|
-
# 9. S_BEG - Subject begin.
|
30
|
-
# 10. S_END - Subject end.
|
31
|
-
# 11. E_VAL - Expect value.
|
32
|
-
# 12. BIT_SCORE - Bit score.
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
def subject_id=(v)
|
37
|
-
@subject_id = v
|
28
|
+
|
29
|
+
@q_beg = q_beg.to_i-1 #blast indexes are 1 based
|
30
|
+
@q_end = q_end.to_i-1
|
31
|
+
@s_beg = s_beg.to_i-1
|
32
|
+
@s_end = s_end.to_i-1
|
33
|
+
|
34
|
+
@reversed = false
|
35
|
+
|
36
|
+
# TODO -Reversed should be taken from q_frame and s_frame instead of s_end. In proteins comes from q_frame. In nt from s_frames.
|
37
|
+
|
38
|
+
# check if reversed
|
39
|
+
if @s_beg > @s_end
|
40
|
+
@s_beg = s_end.to_i-1
|
41
|
+
@s_end = s_beg.to_i-1
|
42
|
+
@reversed = true
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
# some accessors
|
48
|
+
def subject_id=(v)
|
49
|
+
@subject_id = v
|
38
50
|
end
|
39
51
|
|
40
52
|
def ident=(v)
|
41
|
-
|
53
|
+
@ident = v.to_f
|
42
54
|
end
|
43
55
|
|
44
56
|
def align_len=(v)
|
45
|
-
|
57
|
+
@align_len = v.to_i
|
46
58
|
end
|
47
59
|
|
48
60
|
def mismatches=(v)
|
49
|
-
|
61
|
+
@mismatches = v.to_i
|
50
62
|
end
|
51
63
|
|
52
64
|
def gaps=(v)
|
53
|
-
|
65
|
+
@gaps = v.to_i
|
54
66
|
end
|
55
67
|
|
56
|
-
def e_val=(v)
|
57
|
-
|
68
|
+
def e_val=(v)
|
69
|
+
@e_val = v.to_f
|
58
70
|
end
|
59
71
|
|
60
72
|
def bit_score=(v)
|
61
|
-
|
73
|
+
@bit_score = v.to_f
|
62
74
|
end
|
63
75
|
|
64
76
|
def score=(v)
|
65
|
-
|
66
|
-
|
77
|
+
|
78
|
+
@score = v.to_f
|
67
79
|
end
|
68
80
|
|
69
81
|
def acc=(v)
|
70
|
-
|
82
|
+
@acc = v
|
71
83
|
end
|
72
84
|
|
73
85
|
def definition=(v)
|
74
|
-
|
86
|
+
@definition = v
|
75
87
|
end
|
76
88
|
|
77
89
|
def q_frame=(v)
|
78
|
-
|
90
|
+
@q_frame = v.to_i
|
79
91
|
end
|
80
92
|
|
81
93
|
def s_frame=(v)
|
82
|
-
|
94
|
+
@s_frame = v.to_i
|
83
95
|
end
|
84
96
|
|
85
97
|
def s_seq=(v)
|
86
|
-
|
98
|
+
@s_seq = v
|
87
99
|
end
|
88
100
|
|
89
101
|
def q_seq=(v)
|
90
|
-
|
102
|
+
@q_seq = v
|
91
103
|
end
|
92
104
|
|
93
105
|
def full_subject_length=(v)
|
94
|
-
|
106
|
+
@full_subject_length = v
|
95
107
|
end
|
96
108
|
|
109
|
+
# puts all hit info on a string
|
97
110
|
def inspect
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
111
|
+
res = "Hit: #{@subject_id.ljust(10)} #{@ident.to_s.rjust(4)} #{@align_len.to_s.rjust(2)} #{@mismatches.to_s.rjust(2)} #{@gaps.to_s.rjust(2)} #{@q_beg.to_s.rjust(5)} #{@q_end.to_s.rjust(5)} #{@s_beg.to_s.rjust(5)} #{@s_end.to_s.rjust(5)} #{@e_val.to_s.rjust(5)} #{@bit_score.to_s.rjust(5)} #{@reversed.to_s.rjust(5)}"
|
112
|
+
res += " #{@score.to_s.rjust(5)} #{@acc.ljust(10)} #{@definition.ljust(10)} #{@q_frame.to_s.rjust(2)} #{@s_frame.to_s.rjust(2)} #{@full_subject_length.to_s.rjust(5)} #{@q_seq}.#{@s_seq}."
|
113
|
+
|
114
|
+
return res
|
102
115
|
end
|
103
116
|
|
104
117
|
def get_subject
|
105
118
|
return @subject_id
|
106
119
|
end
|
107
120
|
|
121
|
+
# readers and accessor for properties
|
108
122
|
attr_accessor :q_beg, :q_end, :s_beg, :s_end
|
109
|
-
|
110
123
|
attr_reader :subject_id, :align_len, :gaps, :mismatches
|
111
124
|
attr_accessor :reversed
|
112
125
|
attr_reader :score, :acc, :definition, :q_frame, :s_frame, :full_subject_length, :ident, :e_val, :bit_score
|
113
126
|
attr_reader :q_seq, :s_seq
|
114
|
-
|
127
|
+
|
115
128
|
end
|
@@ -1,37 +1,64 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# 'Software'), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
|
23
|
+
# Object to encapsulate a Blast Query
|
24
|
+
class BlastQuery
|
25
|
+
|
26
|
+
attr_accessor :hits
|
27
|
+
attr_accessor :query_id, :query_def, :full_query_length
|
28
|
+
|
29
|
+
# initializes a new Query object
|
7
30
|
def initialize(query_id)
|
8
31
|
@query_id = query_id
|
9
32
|
@query_def = query_id
|
10
33
|
@full_query_length = 0
|
11
34
|
@hits = []
|
12
|
-
|
35
|
+
# inspect
|
13
36
|
end
|
14
37
|
|
38
|
+
# add a hit to query
|
15
39
|
def add_hit(h)
|
16
40
|
@hits.push h
|
17
41
|
end
|
18
|
-
|
42
|
+
|
43
|
+
# inspect query values with all hits
|
19
44
|
def inspect
|
20
|
-
res = "\n * Query #{@query_id} :"
|
45
|
+
res = "\n * Query #{@query_id} :"
|
21
46
|
res += "subject_id ident align_len mismatches gaps q_beg q_end s_beg s_end e_val bit_score reversed\n\n"
|
22
47
|
@hits.each{ |h| res+= h.inspect+"\n" }
|
23
|
-
|
48
|
+
|
24
49
|
return res
|
25
|
-
end
|
26
|
-
|
50
|
+
end
|
51
|
+
|
52
|
+
# get num of hits
|
27
53
|
def size
|
28
54
|
return @hits.size
|
29
|
-
end
|
30
|
-
|
55
|
+
end
|
56
|
+
|
57
|
+
# sort hits by command
|
31
58
|
def sort(comand)
|
32
59
|
return @hits.sort(comand)
|
33
60
|
end
|
34
|
-
|
35
|
-
end
|
36
61
|
|
37
|
-
|
62
|
+
end
|
63
|
+
|
64
|
+
|
@@ -1,23 +1,35 @@
|
|
1
|
+
# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# 'Software'), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
1
22
|
require "blast_query.rb"
|
2
23
|
require "blast_hit.rb"
|
3
24
|
|
4
25
|
require 'zlib'
|
5
26
|
require 'xmlsimple'
|
6
|
-
#xml=File.open('orf.1.xml').read
|
7
|
-
#data = XmlSimple.xml_in(xml)
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#data['best_orf'][0]['start'][0]
|
11
|
-
#data['best_orf'][0]['content']
|
12
|
-
|
13
|
-
######################################
|
14
|
-
# Author:: Almudena Bocinos Rioboo
|
15
|
-
# Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
|
16
|
-
|
17
|
-
######################################
|
18
27
|
|
28
|
+
# Extracts results from a blast results in XML
|
29
|
+
# format and uses it to create instances of "BlastQuery" and "BlastHit"
|
19
30
|
class BlastSimplexmlResult
|
20
31
|
|
32
|
+
# Parser initialization
|
21
33
|
def initialize(input)
|
22
34
|
|
23
35
|
@querys = []
|
@@ -105,7 +117,7 @@ class BlastSimplexmlResult
|
|
105
117
|
|
106
118
|
hit.q_seq = hsp['Hsp_qseq'][0]
|
107
119
|
hit.s_seq = hsp['Hsp_hseq'][0]
|
108
|
-
|
120
|
+
|
109
121
|
|
110
122
|
hit.subject_id= subject_id
|
111
123
|
hit.full_subject_length=full_subject_length
|
@@ -134,6 +146,7 @@ class BlastSimplexmlResult
|
|
134
146
|
return res
|
135
147
|
end
|
136
148
|
|
149
|
+
# finds a query by name
|
137
150
|
def find_query(querys,name_q)
|
138
151
|
# newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
|
139
152
|
new_q=nil
|
@@ -145,11 +158,12 @@ class BlastSimplexmlResult
|
|
145
158
|
return new_q
|
146
159
|
end
|
147
160
|
|
161
|
+
# check if there are querys
|
148
162
|
def empty?
|
149
|
-
|
150
163
|
return @querys.empty?
|
151
164
|
end
|
152
165
|
|
166
|
+
# get num of querys
|
153
167
|
def size
|
154
168
|
@querys.size
|
155
169
|
end
|