scbi_blast 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ === 0.0.1 2010-10-19
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/Manifest.txt ADDED
@@ -0,0 +1,21 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ lib/scbi_blast.rb
7
+ script/console
8
+ script/destroy
9
+ script/generate
10
+ test/test_helper.rb
11
+ test/test_scbi_blast.rb
12
+ test/blast.xml
13
+ test/empty_blast.xml
14
+ test/blast.txt
15
+ lib/scbi_blast/blast_hit.rb
16
+ lib/scbi_blast/blast_query.rb
17
+ lib/scbi_blast/blast_table_result.rb
18
+ lib/scbi_blast/blast_xml_result.rb
19
+ lib/scbi_blast/blast_simplexml_result.rb
20
+ lib/scbi_blast/batch_blast.rb
21
+ lib/scbi_blast/dust_masker.rb
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+
2
+ For more information on scbi_blast, see http://scbi_blast.rubyforge.org
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
data/README.rdoc ADDED
@@ -0,0 +1,48 @@
1
+ = scbi_blast
2
+
3
+ * http://github.com/#{github_username}/#{project_name}
4
+
5
+ == DESCRIPTION:
6
+
7
+ FIX (describe your package)
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * FIX (list of features or problems)
12
+
13
+ == SYNOPSIS:
14
+
15
+ FIX (code sample of usage)
16
+
17
+ == REQUIREMENTS:
18
+
19
+ * FIX (list of requirements)
20
+
21
+ == INSTALL:
22
+
23
+ * FIX (sudo gem install, anything else)
24
+
25
+ == LICENSE:
26
+
27
+ (The MIT License)
28
+
29
+ Copyright (c) 2010 Dario Guerrero
30
+
31
+ Permission is hereby granted, free of charge, to any person obtaining
32
+ a copy of this software and associated documentation files (the
33
+ 'Software'), to deal in the Software without restriction, including
34
+ without limitation the rights to use, copy, modify, merge, publish,
35
+ distribute, sublicense, and/or sell copies of the Software, and to
36
+ permit persons to whom the Software is furnished to do so, subject to
37
+ the following conditions:
38
+
39
+ The above copyright notice and this permission notice shall be
40
+ included in all copies or substantial portions of the Software.
41
+
42
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
43
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
44
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
45
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
46
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
47
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
48
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/scbi_blast'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'scbi_blast' do
14
+ self.developer 'Dario Guerrero & Almudena Bocinos', 'dariogf@gmail.com, alkoke@gmail.com'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ # self.extra_deps = [['activesupport','>= 2.0.2']]
18
+
19
+ end
20
+
21
+ require 'newgem/tasks'
22
+ Dir['tasks/**/*.rake'].each { |t| load t }
23
+
24
+ # TODO - want other tests/tasks run by default? Add them to the list
25
+ # remove_task :default
26
+ # task :default => [:spec, :features]
data/lib/scbi_blast.rb ADDED
@@ -0,0 +1,15 @@
1
+ $: << File.join(File.dirname(__FILE__),File.basename(__FILE__,File.extname(__FILE__)))
2
+
3
+ require 'batch_blast'
4
+ require 'dust_masker'
5
+ require 'blast_query'
6
+ # require 'blast_xml_result'
7
+ require 'blast_simplexml_result'
8
+ require 'blast_hit'
9
+ require 'blast_table_result'
10
+
11
+ module ScbiBlast
12
+ VERSION = '0.0.30'
13
+ end
14
+
15
+
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ class BatchBlast
5
+
6
+ def initialize(database, blast_type = 'blastn', extra_params = '')
7
+
8
+ @blast_type = blast_type
9
+ @database = database
10
+ @extra_params = extra_params
11
+
12
+
13
+ end
14
+
15
+ def get_blast_cmd(fmt = :table)
16
+
17
+ if fmt==:table
18
+ format = ' -outfmt "7 qseqid sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore score qframe sframe qseq sseq" '
19
+ elsif fmt ==:xml
20
+ format = ' -outfmt 5 '
21
+ end
22
+
23
+ dust=''
24
+
25
+ #disable dust flag if blastn
26
+ # if @blast_type.index('blastn')
27
+ # dust=' -dust no '
28
+ # end
29
+
30
+ cmd = @blast_type+' '+dust+@extra_params + format + @database
31
+
32
+ return cmd
33
+
34
+ end
35
+
36
+ def do_blast(seqs, fmt = :table,parse_output=true)
37
+
38
+ if seqs.is_a?(Array)
39
+ seq_fasta=seqs.join("\n")
40
+ else
41
+ seq_fasta=seqs
42
+ end
43
+
44
+ cmd = get_blast_cmd(fmt)
45
+
46
+ res=''
47
+
48
+ # Ojo, que una vez nos ibamos a volver locos buscando porque esto no devolvia todos los hits que se encontraban al ejecutar el blast a mano, y era porque en el blast a mano le estabamos pasando la secuencia completa mientras que en el MID le estabamos pasando sólo los 20 primeros nt.
49
+ IO.popen(cmd,'w+') {|blast|
50
+ blast.sync = true
51
+ blast.write(seq_fasta)
52
+ blast.close_write
53
+ res = blast.readlines
54
+ blast.close_read
55
+ }
56
+
57
+ if !$?.exitstatus.nil? && $?.exitstatus>0
58
+ raise "Error doing blast #{cmd} to fasta: #{seq_fasta}"
59
+ end
60
+
61
+ # check if all sequences where processed
62
+
63
+ if parse_output
64
+ if fmt == :table
65
+ res = BlastTableResult.new(res)
66
+ elsif fmt == :xml
67
+ res = BlastSimplexmlResult.new(res)
68
+ # elsif fmt ==:xml2
69
+ # res = BlastXmlResult.new(res)
70
+ end
71
+
72
+ # puts "#{seq_fasta.count('>')}, #{res.querys.count}"
73
+
74
+ if seq_fasta.count('>')!=res.querys.count
75
+ not_processed = seqs.select{|e| e.index('>')}
76
+
77
+ res.querys.each do |query|
78
+ if not_processed.include?('>'+query.query_id)
79
+ not_processed.delete('>'+query.query_id)
80
+ end
81
+ end
82
+
83
+ raise "These sequences #{seq_fasta.count('>')},#{res.querys.count} where not processed by #{cmd}:\n #{not_processed} \n Full_data:\n seqs=#{seqs};\n"
84
+ end
85
+
86
+ end
87
+
88
+ return res
89
+
90
+ end
91
+
92
+ def do_blast_seqs(seqs, fmt = :table,parse_output=true)
93
+
94
+
95
+ cmd = get_blast_cmd(fmt)
96
+
97
+ fastas=[]
98
+
99
+ seqs.each do |seq|
100
+ fastas.push '>'+seq.seq_name
101
+ fastas.push seq.seq_fasta
102
+ end
103
+
104
+ return do_blast(fastas,fmt,parse_output)
105
+
106
+ end
107
+
108
+
109
+ def close
110
+
111
+ end
112
+
113
+ end
114
+
115
+
@@ -0,0 +1,115 @@
1
+ class BlastHit
2
+
3
+ def initialize(q_beg,q_end,s_beg,s_end)
4
+
5
+ @q_beg = q_beg.to_i-1 #to storage in array [0 .. ]
6
+ @q_end = q_end.to_i-1 #to storage in array [0 .. ]
7
+ @s_beg = s_beg.to_i-1
8
+ @s_end = s_end.to_i-1
9
+
10
+ @reversed = false
11
+
12
+ # TODO -Reversed should be taken from q_frame and s_frame instead of s_end. In proteins comes from q_frame. In nt from s_frames.
13
+
14
+ if @s_beg > @s_end
15
+ @s_beg = s_end.to_i-1
16
+ @s_end = s_beg.to_i-1
17
+ @reversed = true
18
+ end
19
+
20
+
21
+
22
+ # 2. S_ID - Subject ID.
23
+ # 3. IDENT - Identity (%).
24
+ # 4. ALIGN_LEN - Alignment length.
25
+ # 5. MISMATCHES - Number of mismatches.
26
+ # 6. GAPS - Number of gaps.
27
+ # 7. Q_BEG - Query begin.
28
+ # 8. Q_END - Query end.
29
+ # 9. S_BEG - Subject begin.
30
+ # 10. S_END - Subject end.
31
+ # 11. E_VAL - Expect value.
32
+ # 12. BIT_SCORE - Bit score.
33
+
34
+ end
35
+
36
+ def subject_id=(v)
37
+ @subject_id = v
38
+ end
39
+
40
+ def ident=(v)
41
+ @ident = v.to_f
42
+ end
43
+
44
+ def align_len=(v)
45
+ @align_len = v.to_i
46
+ end
47
+
48
+ def mismatches=(v)
49
+ @mismatches = v.to_i
50
+ end
51
+
52
+ def gaps=(v)
53
+ @gaps = v.to_i
54
+ end
55
+
56
+ def e_val=(v)
57
+ @e_val = v.to_f
58
+ end
59
+
60
+ def bit_score=(v)
61
+ @bit_score = v.to_f
62
+ end
63
+
64
+ def score=(v)
65
+
66
+ @score = v.to_f
67
+ end
68
+
69
+ def acc=(v)
70
+ @acc = v
71
+ end
72
+
73
+ def definition=(v)
74
+ @definition = v
75
+ end
76
+
77
+ def q_frame=(v)
78
+ @q_frame = v.to_i
79
+ end
80
+
81
+ def s_frame=(v)
82
+ @s_frame = v.to_i
83
+ end
84
+
85
+ def s_seq=(v)
86
+ @s_seq = v
87
+ end
88
+
89
+ def q_seq=(v)
90
+ @q_seq = v
91
+ end
92
+
93
+ def full_subject_length=(v)
94
+ @full_subject_length = v
95
+ end
96
+
97
+ def inspect
98
+ res = "Hit: #{@subject_id.ljust(10)} #{@ident.to_s.rjust(4)} #{@align_len.to_s.rjust(2)} #{@mismatches.to_s.rjust(2)} #{@gaps.to_s.rjust(2)} #{@q_beg.to_s.rjust(5)} #{@q_end.to_s.rjust(5)} #{@s_beg.to_s.rjust(5)} #{@s_end.to_s.rjust(5)} #{@e_val.to_s.rjust(5)} #{@bit_score.to_s.rjust(5)} #{@reversed.to_s.rjust(5)}"
99
+ res += " #{@score.to_s.rjust(5)} #{@acc.ljust(10)} #{@definition.ljust(10)} #{@q_frame.to_s.rjust(2)} #{@s_frame.to_s.rjust(2)} #{@full_subject_length.to_s.rjust(5)} #{@q_seq}.#{@s_seq}."
100
+
101
+ return res
102
+ end
103
+
104
+ def get_subject
105
+ return @subject_id
106
+ end
107
+
108
+ attr_accessor :q_beg, :q_end, :s_beg, :s_end
109
+
110
+ attr_reader :subject_id, :align_len, :gaps, :mismatches
111
+ attr_accessor :reversed
112
+ attr_reader :score, :acc, :definition, :q_frame, :s_frame, :full_subject_length, :ident, :e_val, :bit_score
113
+ attr_reader :q_seq, :s_seq
114
+
115
+ end
@@ -0,0 +1,37 @@
1
+ class BlastQuery
2
+
3
+ attr_accessor :hits
4
+ attr_accessor :query_id, :query_def, :full_query_length
5
+
6
+
7
+ def initialize(query_id)
8
+ @query_id = query_id
9
+ @query_def = query_id
10
+ @full_query_length = 0
11
+ @hits = []
12
+ # inspect
13
+ end
14
+
15
+ def add_hit(h)
16
+ @hits.push h
17
+ end
18
+
19
+ def inspect
20
+ res = "\n * Query #{@query_id} :"
21
+ res += "subject_id ident align_len mismatches gaps q_beg q_end s_beg s_end e_val bit_score reversed\n\n"
22
+ @hits.each{ |h| res+= h.inspect+"\n" }
23
+
24
+ return res
25
+ end
26
+
27
+ def size
28
+ return @hits.size
29
+ end
30
+
31
+ def sort(comand)
32
+ return @hits.sort(comand)
33
+ end
34
+
35
+ end
36
+
37
+
@@ -0,0 +1,158 @@
1
+ require "blast_query.rb"
2
+ require "blast_hit.rb"
3
+
4
+ require 'zlib'
5
+ require 'xmlsimple'
6
+ #xml=File.open('orf.1.xml').read
7
+ #data = XmlSimple.xml_in(xml)
8
+ #
9
+ #
10
+ #data['best_orf'][0]['start'][0]
11
+ #data['best_orf'][0]['content']
12
+
13
+ ######################################
14
+ # Author:: Almudena Bocinos Rioboo
15
+ # Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
16
+
17
+ ######################################
18
+
19
+ class BlastSimplexmlResult
20
+
21
+ def initialize(input)
22
+
23
+ @querys = []
24
+ lines=[]
25
+ if input.is_a?(Array)
26
+ lines=input
27
+ elsif !input.strip.empty?
28
+ if File.exists?(input)
29
+ fich = File.open(input,'r')
30
+ lines = fich.readlines
31
+ fich.close
32
+ else
33
+ raise "File #{input} doesn't exists"
34
+ end
35
+
36
+ end
37
+
38
+ # puts "lines length #{lines.length}"
39
+ if !lines.empty?
40
+ data = XmlSimple.xml_in(lines.join)
41
+ iterations = data['BlastOutput_iterations']
42
+ #require 'json'
43
+ #puts iterations.to_json
44
+
45
+ iterations[0]['Iteration'].each do |iteration|
46
+
47
+ # puts JSON::pretty_generate(iteration)
48
+
49
+ query_id = iteration['Iteration_query-ID'][0]
50
+ full_query_length = iteration['Iteration_query-len'][0]
51
+ query_def = iteration['Iteration_query-def'][0]
52
+
53
+ if query_def =~ /^([^\s]+)/
54
+ query_def=$1
55
+ end
56
+
57
+ #@query_def = iteration['Iteration_query-def'][0]
58
+
59
+ query = BlastQuery.new(query_id)
60
+ query.query_def = query_def
61
+ query.full_query_length = full_query_length
62
+ @querys.push query
63
+
64
+
65
+
66
+ hits = iteration['Iteration_hits'][0]['Hit']
67
+ if !hits.nil?
68
+ hits.each do |h|
69
+ #puts JSON::pretty_generate(h)
70
+
71
+
72
+
73
+ subject_id=h['Hit_id'][0]
74
+ acc =h['Hit_accession'][0]
75
+ full_subject_length = h['Hit_len'][0].to_i
76
+ hit_def=h['Hit_def'][0]
77
+ if hit_def=='No definition line'
78
+ hit_def =subject_id
79
+ end
80
+
81
+ hsps = h['Hit_hsps'][0]['Hsp']
82
+
83
+ hsps.each do |hsp|
84
+
85
+ q_beg=hsp['Hsp_query-from'][0].to_i
86
+ q_end=hsp['Hsp_query-to'][0].to_i
87
+ s_beg=hsp['Hsp_hit-from'][0].to_i
88
+ s_end=hsp['Hsp_hit-to'][0].to_i
89
+
90
+ # creates the hit
91
+ hit = BlastHit.new(q_beg,q_end,s_beg,s_end)
92
+
93
+ hit.align_len=hsp['Hsp_align-len'][0].to_i
94
+ hit.ident=(hsp['Hsp_identity'][0].to_f/hit.align_len)*100
95
+ hit.gaps=hsp['Hsp_gaps'][0].to_i
96
+ hit.mismatches=hsp['Hsp_midline'][0].count(' ').to_i - hit.gaps
97
+ hit.e_val=hsp['Hsp_evalue'][0].to_f
98
+ hit.e_val = (hit.e_val*1000).round/1000.0
99
+ hit.bit_score=hsp['Hsp_bit-score'][0].to_f
100
+ hit.bit_score = (hit.bit_score*100).round/100.0
101
+
102
+ hit.score = hsp['Hsp_score'][0].to_f
103
+ hit.q_frame = hsp['Hsp_query-frame'][0].to_i
104
+ hit.s_frame =hsp['Hsp_hit-frame'][0].to_i
105
+
106
+ hit.q_seq = hsp['Hsp_qseq'][0]
107
+ hit.s_seq = hsp['Hsp_hseq'][0]
108
+
109
+
110
+ hit.subject_id= subject_id
111
+ hit.full_subject_length=full_subject_length
112
+ # hit.full_query_length = full_query_length
113
+ hit.definition=hit_def
114
+ hit.acc=acc
115
+
116
+ query.add_hit(hit)
117
+
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+ #inspect
124
+ end
125
+
126
+
127
+
128
+ def inspect
129
+
130
+ res = "Blast results:\n"
131
+ res+= '-'*20
132
+ res+= "\nQuerys: #{@querys.count}\n"
133
+ @querys.each{|q| res+=q.inspect+"\n"}
134
+ return res
135
+ end
136
+
137
+ def find_query(querys,name_q)
138
+ # newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
139
+ new_q=nil
140
+
141
+ if !querys.empty?
142
+ new_q=querys.find{|q| (q.query_id==name_q)}
143
+ end
144
+
145
+ return new_q
146
+ end
147
+
148
+ def empty?
149
+
150
+ return @querys.empty?
151
+ end
152
+
153
+ def size
154
+ @querys.size
155
+ end
156
+
157
+ attr_accessor :querys
158
+ end