scbi_blast 0.0.30

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ === 0.0.1 2010-10-19
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/Manifest.txt ADDED
@@ -0,0 +1,21 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ lib/scbi_blast.rb
7
+ script/console
8
+ script/destroy
9
+ script/generate
10
+ test/test_helper.rb
11
+ test/test_scbi_blast.rb
12
+ test/blast.xml
13
+ test/empty_blast.xml
14
+ test/blast.txt
15
+ lib/scbi_blast/blast_hit.rb
16
+ lib/scbi_blast/blast_query.rb
17
+ lib/scbi_blast/blast_table_result.rb
18
+ lib/scbi_blast/blast_xml_result.rb
19
+ lib/scbi_blast/blast_simplexml_result.rb
20
+ lib/scbi_blast/batch_blast.rb
21
+ lib/scbi_blast/dust_masker.rb
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+
2
+ For more information on scbi_blast, see http://scbi_blast.rubyforge.org
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
data/README.rdoc ADDED
@@ -0,0 +1,48 @@
1
+ = scbi_blast
2
+
3
+ * http://github.com/#{github_username}/#{project_name}
4
+
5
+ == DESCRIPTION:
6
+
7
+ FIX (describe your package)
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * FIX (list of features or problems)
12
+
13
+ == SYNOPSIS:
14
+
15
+ FIX (code sample of usage)
16
+
17
+ == REQUIREMENTS:
18
+
19
+ * FIX (list of requirements)
20
+
21
+ == INSTALL:
22
+
23
+ * FIX (sudo gem install, anything else)
24
+
25
+ == LICENSE:
26
+
27
+ (The MIT License)
28
+
29
+ Copyright (c) 2010 Dario Guerrero
30
+
31
+ Permission is hereby granted, free of charge, to any person obtaining
32
+ a copy of this software and associated documentation files (the
33
+ 'Software'), to deal in the Software without restriction, including
34
+ without limitation the rights to use, copy, modify, merge, publish,
35
+ distribute, sublicense, and/or sell copies of the Software, and to
36
+ permit persons to whom the Software is furnished to do so, subject to
37
+ the following conditions:
38
+
39
+ The above copyright notice and this permission notice shall be
40
+ included in all copies or substantial portions of the Software.
41
+
42
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
43
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
44
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
45
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
46
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
47
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
48
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/scbi_blast'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'scbi_blast' do
14
+ self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com, alkoke@gmail.com'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ # self.extra_deps = [['activesupport','>= 2.0.2']]
18
+
19
+ end
20
+
21
+ require 'newgem/tasks'
22
+ Dir['tasks/**/*.rake'].each { |t| load t }
23
+
24
+ # TODO - want other tests/tasks run by default? Add them to the list
25
+ # remove_task :default
26
+ # task :default => [:spec, :features]
data/lib/scbi_blast.rb ADDED
@@ -0,0 +1,15 @@
1
+ $: << File.join(File.dirname(__FILE__),File.basename(__FILE__,File.extname(__FILE__)))
2
+
3
+ require 'batch_blast'
4
+ require 'dust_masker'
5
+ require 'blast_query'
6
+ # require 'blast_xml_result'
7
+ require 'blast_simplexml_result'
8
+ require 'blast_hit'
9
+ require 'blast_table_result'
10
+
11
+ module ScbiBlast
12
+ VERSION = '0.0.30'
13
+ end
14
+
15
+
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ class BatchBlast
5
+
6
+ def initialize(database, blast_type = 'blastn', extra_params = '')
7
+
8
+ @blast_type = blast_type
9
+ @database = database
10
+ @extra_params = extra_params
11
+
12
+
13
+ end
14
+
15
+ def get_blast_cmd(fmt = :table)
16
+
17
+ if fmt==:table
18
+ format = ' -outfmt "7 qseqid sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore score qframe sframe qseq sseq" '
19
+ elsif fmt ==:xml
20
+ format = ' -outfmt 5 '
21
+ end
22
+
23
+ dust=''
24
+
25
+ #disable dust flag if blastn
26
+ # if @blast_type.index('blastn')
27
+ # dust=' -dust no '
28
+ # end
29
+
30
+ cmd = @blast_type+' '+dust+@extra_params + format + @database
31
+
32
+ return cmd
33
+
34
+ end
35
+
36
+ def do_blast(seqs, fmt = :table,parse_output=true)
37
+
38
+ if seqs.is_a?(Array)
39
+ seq_fasta=seqs.join("\n")
40
+ else
41
+ seq_fasta=seqs
42
+ end
43
+
44
+ cmd = get_blast_cmd(fmt)
45
+
46
+ res=''
47
+
48
+ # Ojo, que una vez nos ibamos a volver locos buscando porque esto no devolvia todos los hits que se encontraban al ejecutar el blast a mano, y era porque en el blast a mano le estabamos pasando la secuencia completa mientras que en el MID le estabamos pasando sólo los 20 primeros nt.
49
+ IO.popen(cmd,'w+') {|blast|
50
+ blast.sync = true
51
+ blast.write(seq_fasta)
52
+ blast.close_write
53
+ res = blast.readlines
54
+ blast.close_read
55
+ }
56
+
57
+ if !$?.exitstatus.nil? && $?.exitstatus>0
58
+ raise "Error doing blast #{cmd} to fasta: #{seq_fasta}"
59
+ end
60
+
61
+ # check if all sequences where processed
62
+
63
+ if parse_output
64
+ if fmt == :table
65
+ res = BlastTableResult.new(res)
66
+ elsif fmt == :xml
67
+ res = BlastSimplexmlResult.new(res)
68
+ # elsif fmt ==:xml2
69
+ # res = BlastXmlResult.new(res)
70
+ end
71
+
72
+ # puts "#{seq_fasta.count('>')}, #{res.querys.count}"
73
+
74
+ if seq_fasta.count('>')!=res.querys.count
75
+ not_processed = seqs.select{|e| e.index('>')}
76
+
77
+ res.querys.each do |query|
78
+ if not_processed.include?('>'+query.query_id)
79
+ not_processed.delete('>'+query.query_id)
80
+ end
81
+ end
82
+
83
+ raise "These sequences #{seq_fasta.count('>')},#{res.querys.count} where not processed by #{cmd}:\n #{not_processed} \n Full_data:\n seqs=#{seqs};\n"
84
+ end
85
+
86
+ end
87
+
88
+ return res
89
+
90
+ end
91
+
92
+ def do_blast_seqs(seqs, fmt = :table,parse_output=true)
93
+
94
+
95
+ cmd = get_blast_cmd(fmt)
96
+
97
+ fastas=[]
98
+
99
+ seqs.each do |seq|
100
+ fastas.push '>'+seq.seq_name
101
+ fastas.push seq.seq_fasta
102
+ end
103
+
104
+ return do_blast(fastas,fmt,parse_output)
105
+
106
+ end
107
+
108
+
109
+ def close
110
+
111
+ end
112
+
113
+ end
114
+
115
+
@@ -0,0 +1,115 @@
1
+ class BlastHit
2
+
3
+ def initialize(q_beg,q_end,s_beg,s_end)
4
+
5
+ @q_beg = q_beg.to_i-1 #to storage in array [0 .. ]
6
+ @q_end = q_end.to_i-1 #to storage in array [0 .. ]
7
+ @s_beg = s_beg.to_i-1
8
+ @s_end = s_end.to_i-1
9
+
10
+ @reversed = false
11
+
12
+ # TODO -Reversed should be taken from q_frame and s_frame instead of s_end. In proteins comes from q_frame. In nt from s_frames.
13
+
14
+ if @s_beg > @s_end
15
+ @s_beg = s_end.to_i-1
16
+ @s_end = s_beg.to_i-1
17
+ @reversed = true
18
+ end
19
+
20
+
21
+
22
+ # 2. S_ID - Subject ID.
23
+ # 3. IDENT - Identity (%).
24
+ # 4. ALIGN_LEN - Alignment length.
25
+ # 5. MISMATCHES - Number of mismatches.
26
+ # 6. GAPS - Number of gaps.
27
+ # 7. Q_BEG - Query begin.
28
+ # 8. Q_END - Query end.
29
+ # 9. S_BEG - Subject begin.
30
+ # 10. S_END - Subject end.
31
+ # 11. E_VAL - Expect value.
32
+ # 12. BIT_SCORE - Bit score.
33
+
34
+ end
35
+
36
+ def subject_id=(v)
37
+ @subject_id = v
38
+ end
39
+
40
+ def ident=(v)
41
+ @ident = v.to_f
42
+ end
43
+
44
+ def align_len=(v)
45
+ @align_len = v.to_i
46
+ end
47
+
48
+ def mismatches=(v)
49
+ @mismatches = v.to_i
50
+ end
51
+
52
+ def gaps=(v)
53
+ @gaps = v.to_i
54
+ end
55
+
56
+ def e_val=(v)
57
+ @e_val = v.to_f
58
+ end
59
+
60
+ def bit_score=(v)
61
+ @bit_score = v.to_f
62
+ end
63
+
64
+ def score=(v)
65
+
66
+ @score = v.to_f
67
+ end
68
+
69
+ def acc=(v)
70
+ @acc = v
71
+ end
72
+
73
+ def definition=(v)
74
+ @definition = v
75
+ end
76
+
77
+ def q_frame=(v)
78
+ @q_frame = v.to_i
79
+ end
80
+
81
+ def s_frame=(v)
82
+ @s_frame = v.to_i
83
+ end
84
+
85
+ def s_seq=(v)
86
+ @s_seq = v
87
+ end
88
+
89
+ def q_seq=(v)
90
+ @q_seq = v
91
+ end
92
+
93
+ def full_subject_length=(v)
94
+ @full_subject_length = v
95
+ end
96
+
97
+ def inspect
98
+ res = "Hit: #{@subject_id.ljust(10)} #{@ident.to_s.rjust(4)} #{@align_len.to_s.rjust(2)} #{@mismatches.to_s.rjust(2)} #{@gaps.to_s.rjust(2)} #{@q_beg.to_s.rjust(5)} #{@q_end.to_s.rjust(5)} #{@s_beg.to_s.rjust(5)} #{@s_end.to_s.rjust(5)} #{@e_val.to_s.rjust(5)} #{@bit_score.to_s.rjust(5)} #{@reversed.to_s.rjust(5)}"
99
+ res += " #{@score.to_s.rjust(5)} #{@acc.ljust(10)} #{@definition.ljust(10)} #{@q_frame.to_s.rjust(2)} #{@s_frame.to_s.rjust(2)} #{@full_subject_length.to_s.rjust(5)} #{@q_seq}.#{@s_seq}."
100
+
101
+ return res
102
+ end
103
+
104
+ def get_subject
105
+ return @subject_id
106
+ end
107
+
108
+ attr_accessor :q_beg, :q_end, :s_beg, :s_end
109
+
110
+ attr_reader :subject_id, :align_len, :gaps, :mismatches
111
+ attr_accessor :reversed
112
+ attr_reader :score, :acc, :definition, :q_frame, :s_frame, :full_subject_length, :ident, :e_val, :bit_score
113
+ attr_reader :q_seq, :s_seq
114
+
115
+ end
@@ -0,0 +1,37 @@
1
+ class BlastQuery
2
+
3
+ attr_accessor :hits
4
+ attr_accessor :query_id, :query_def, :full_query_length
5
+
6
+
7
+ def initialize(query_id)
8
+ @query_id = query_id
9
+ @query_def = query_id
10
+ @full_query_length = 0
11
+ @hits = []
12
+ # inspect
13
+ end
14
+
15
+ def add_hit(h)
16
+ @hits.push h
17
+ end
18
+
19
+ def inspect
20
+ res = "\n * Query #{@query_id} :"
21
+ res += "subject_id ident align_len mismatches gaps q_beg q_end s_beg s_end e_val bit_score reversed\n\n"
22
+ @hits.each{ |h| res+= h.inspect+"\n" }
23
+
24
+ return res
25
+ end
26
+
27
+ def size
28
+ return @hits.size
29
+ end
30
+
31
+ def sort(comand)
32
+ return @hits.sort(comand)
33
+ end
34
+
35
+ end
36
+
37
+
@@ -0,0 +1,158 @@
1
+ require "blast_query.rb"
2
+ require "blast_hit.rb"
3
+
4
+ require 'zlib'
5
+ require 'xmlsimple'
6
+ #xml=File.open('orf.1.xml').read
7
+ #data = XmlSimple.xml_in(xml)
8
+ #
9
+ #
10
+ #data['best_orf'][0]['start'][0]
11
+ #data['best_orf'][0]['content']
12
+
13
+ ######################################
14
+ # Author:: Almudena Bocinos Rioboo
15
+ # Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
16
+
17
+ ######################################
18
+
19
+ class BlastSimplexmlResult
20
+
21
+ def initialize(input)
22
+
23
+ @querys = []
24
+ lines=[]
25
+ if input.is_a?(Array)
26
+ lines=input
27
+ elsif !input.strip.empty?
28
+ if File.exists?(input)
29
+ fich = File.open(input,'r')
30
+ lines = fich.readlines
31
+ fich.close
32
+ else
33
+ raise "File #{input} doesn't exists"
34
+ end
35
+
36
+ end
37
+
38
+ # puts "lines length #{lines.length}"
39
+ if !lines.empty?
40
+ data = XmlSimple.xml_in(lines.join)
41
+ iterations = data['BlastOutput_iterations']
42
+ #require 'json'
43
+ #puts iterations.to_json
44
+
45
+ iterations[0]['Iteration'].each do |iteration|
46
+
47
+ # puts JSON::pretty_generate(iteration)
48
+
49
+ query_id = iteration['Iteration_query-ID'][0]
50
+ full_query_length = iteration['Iteration_query-len'][0]
51
+ query_def = iteration['Iteration_query-def'][0]
52
+
53
+ if query_def =~ /^([^\s]+)/
54
+ query_def=$1
55
+ end
56
+
57
+ #@query_def = iteration['Iteration_query-def'][0]
58
+
59
+ query = BlastQuery.new(query_id)
60
+ query.query_def = query_def
61
+ query.full_query_length = full_query_length
62
+ @querys.push query
63
+
64
+
65
+
66
+ hits = iteration['Iteration_hits'][0]['Hit']
67
+ if !hits.nil?
68
+ hits.each do |h|
69
+ #puts JSON::pretty_generate(h)
70
+
71
+
72
+
73
+ subject_id=h['Hit_id'][0]
74
+ acc =h['Hit_accession'][0]
75
+ full_subject_length = h['Hit_len'][0].to_i
76
+ hit_def=h['Hit_def'][0]
77
+ if hit_def=='No definition line'
78
+ hit_def =subject_id
79
+ end
80
+
81
+ hsps = h['Hit_hsps'][0]['Hsp']
82
+
83
+ hsps.each do |hsp|
84
+
85
+ q_beg=hsp['Hsp_query-from'][0].to_i
86
+ q_end=hsp['Hsp_query-to'][0].to_i
87
+ s_beg=hsp['Hsp_hit-from'][0].to_i
88
+ s_end=hsp['Hsp_hit-to'][0].to_i
89
+
90
+ # creates the hit
91
+ hit = BlastHit.new(q_beg,q_end,s_beg,s_end)
92
+
93
+ hit.align_len=hsp['Hsp_align-len'][0].to_i
94
+ hit.ident=(hsp['Hsp_identity'][0].to_f/hit.align_len)*100
95
+ hit.gaps=hsp['Hsp_gaps'][0].to_i
96
+ hit.mismatches=hsp['Hsp_midline'][0].count(' ').to_i - hit.gaps
97
+ hit.e_val=hsp['Hsp_evalue'][0].to_f
98
+ hit.e_val = (hit.e_val*1000).round/1000.0
99
+ hit.bit_score=hsp['Hsp_bit-score'][0].to_f
100
+ hit.bit_score = (hit.bit_score*100).round/100.0
101
+
102
+ hit.score = hsp['Hsp_score'][0].to_f
103
+ hit.q_frame = hsp['Hsp_query-frame'][0].to_i
104
+ hit.s_frame =hsp['Hsp_hit-frame'][0].to_i
105
+
106
+ hit.q_seq = hsp['Hsp_qseq'][0]
107
+ hit.s_seq = hsp['Hsp_hseq'][0]
108
+
109
+
110
+ hit.subject_id= subject_id
111
+ hit.full_subject_length=full_subject_length
112
+ # hit.full_query_length = full_query_length
113
+ hit.definition=hit_def
114
+ hit.acc=acc
115
+
116
+ query.add_hit(hit)
117
+
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+ #inspect
124
+ end
125
+
126
+
127
+
128
+ def inspect
129
+
130
+ res = "Blast results:\n"
131
+ res+= '-'*20
132
+ res+= "\nQuerys: #{@querys.count}\n"
133
+ @querys.each{|q| res+=q.inspect+"\n"}
134
+ return res
135
+ end
136
+
137
+ def find_query(querys,name_q)
138
+ # newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
139
+ new_q=nil
140
+
141
+ if !querys.empty?
142
+ new_q=querys.find{|q| (q.query_id==name_q)}
143
+ end
144
+
145
+ return new_q
146
+ end
147
+
148
+ def empty?
149
+
150
+ return @querys.empty?
151
+ end
152
+
153
+ def size
154
+ @querys.size
155
+ end
156
+
157
+ attr_accessor :querys
158
+ end