scbi_blast 0.0.30

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,190 @@
1
+ require "blast_query.rb"
2
+ require "blast_hit.rb"
3
+
4
+
5
+
6
+ ######################################
7
+ # Author:: Almudena Bocinos Rioboo
8
+ # Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
9
+
10
+ ######################################
11
+
12
+ class BlastTableResult
13
+
14
+ def initialize(input)
15
+
16
+ @querys = []
17
+
18
+
19
+ if input.is_a?(Array)
20
+ lines=input
21
+
22
+ else
23
+
24
+ fich = File.open(input,'r')
25
+ lines = fich.readlines
26
+ fich.close
27
+
28
+ end
29
+
30
+ # puts "lines length #{lines.length}"
31
+ query_name=''
32
+
33
+ lines.each do |line|
34
+
35
+ line.chomp! #delete end of line
36
+
37
+ if line =~ /^\s*#/
38
+ if line =~ /^#\sQuery:\s+(.+)$/
39
+ query_name = $1
40
+ elsif line =~ /^#\s0\shits\sfound$/
41
+ @querys.push BlastQuery.new(query_name)
42
+ end
43
+ # 0 hits found
44
+
45
+
46
+ else
47
+ params = line.split(/\t+/)
48
+
49
+ # creates the hit
50
+ #hit = BlastHit.new( params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8], params[9], params[10], params[11])
51
+
52
+ # puts "Extracted #{params[0]} #{params[1]} #{params[2]} #{params[3]} #{params[4]} #{params[5]} #{params[6]} #{params[7]} #{params[8]} #{params[9]} #{params[10]} #{params[11]}"
53
+ # Options 6, 7, and 10 can be additionally configured to produce
54
+ # a custom format specified by space delimited format specifiers.
55
+ # The supported format specifiers are:
56
+ # qseqid means Query Seq-id
57
+ # qgi means Query GI
58
+ # qacc means Query accesion
59
+ # sseqid means Subject Seq-id
60
+ # sallseqid means All subject Seq-id(s), separated by a ';'
61
+ # sgi means Subject GI
62
+ # sallgi means All subject GIs
63
+ # sacc means Subject accession
64
+ # sallacc means All subject accessions
65
+ # qstart means Start of alignment in query
66
+ # qend means End of alignment in query
67
+ # sstart means Start of alignment in subject
68
+ # send means End of alignment in subject
69
+ # qseq means Aligned part of query sequence
70
+ # sseq means Aligned part of subject sequence
71
+ # evalue means Expect value
72
+ # bitscore means Bit score
73
+ # score means Raw score
74
+ # length means Alignment length
75
+ # pident means Percentage of identical matches
76
+ # nident means Number of identical matches
77
+ # mismatch means Number of mismatches
78
+ # positive means Number of positive-scoring matches
79
+ # gapopen means Number of gap openings
80
+ # gaps means Total number of gaps
81
+ # ppos means Percentage of positive-scoring matches
82
+ # frames means Query and subject frames separated by a '/'
83
+ # qframe means Query frame
84
+ # sframe means Subject frame
85
+ # When not provided, the default value is:
86
+ # 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send
87
+ # evalue bitscore', which is equivalent to the keyword 'std'
88
+
89
+ # if the query doesn't exist, then create a new one,
90
+ # else the hit will be added to the last query
91
+
92
+ qseqid,sacc,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,score,qframe,sframe,qseq,sseq = params
93
+
94
+ # creates the hit
95
+ hit = BlastHit.new(qstart,qend,sstart,send)
96
+
97
+ hit.align_len=length
98
+ hit.ident=pident
99
+
100
+ hit.gaps=gapopen
101
+ hit.mismatches=mismatch
102
+ hit.e_val=evalue
103
+ hit.bit_score=bitscore
104
+
105
+ hit.score = score
106
+ hit.q_frame = qframe
107
+ hit.s_frame = sframe
108
+
109
+ hit.subject_id = sacc
110
+ hit.full_subject_length=0
111
+ hit.definition=sacc
112
+ hit.acc=sacc
113
+ hit.q_seq=qseq
114
+ hit.s_seq=sseq
115
+
116
+ query=find_query(@querys,qseqid)
117
+
118
+ if (query) #if it is a new query, it is created and added
119
+ query.add_hit(hit)
120
+
121
+ else # else the hit is added in last query added
122
+ query = BlastQuery.new(qseqid)
123
+ query.add_hit(hit)
124
+ @querys.push query
125
+ end
126
+
127
+
128
+ #Description
129
+
130
+ # read_blast_tab read tabular BLAST format created with blast_seq and written to file with write_blast - or with blastall and the -m 8 or -m 9 switch.
131
+ # Each column in the table corresponds to the following keys:
132
+ #
133
+ # 1. Q_ID - Query ID.
134
+ # 2. S_ID - Subject ID.
135
+ # 3. IDENT - Identity (%).
136
+ # 4. ALIGN_LEN - Alignment length.
137
+ # 5. MISMATCHES - Number of mismatches.
138
+ # 6. GAPS - Number of gaps.
139
+ # 7. Q_BEG - Query begin.
140
+ # 8. Q_END - Query end.
141
+ # 9. S_BEG - Subject begin.
142
+ # 10. S_END - Subject end.
143
+ # 11. E_VAL - Expect value.
144
+ # 12. BIT_SCORE - Bit score.
145
+ #
146
+ # Furthermore, two extra keys are added to the record:
147
+ #
148
+ # * STRAND - Strand.
149
+ # * REC_TYPE - Record type.
150
+ end
151
+ end
152
+
153
+ #inspect
154
+
155
+ end
156
+
157
+
158
+
159
+ def inspect
160
+ # puts "Table Results:"
161
+ # puts(@querys.each{|q| q.inspect}).join("\n")
162
+ res = "Blast results:\n"
163
+ res+= '-'*20
164
+ res+= "\nQuerys: #{@querys.count}\n"
165
+ @querys.each{|q| res+=q.inspect+"\n"}
166
+ return res
167
+ end
168
+
169
+ def find_query(querys,name_q)
170
+ # newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
171
+ new_q=nil
172
+
173
+ if !querys.empty?
174
+ new_q=querys.find{|q| (q.query_id==name_q)}
175
+ end
176
+
177
+ return new_q
178
+ end
179
+
180
+ def empty?
181
+
182
+ return @querys.empty?
183
+ end
184
+
185
+ def size
186
+ @querys.size
187
+ end
188
+
189
+ attr_accessor :querys
190
+ end
@@ -0,0 +1,152 @@
1
+ require "blast_query.rb"
2
+ require "blast_hit.rb"
3
+
4
+ require 'nokogiri'
5
+ #xml=File.open('orf.1.xml').read
6
+ #data = XmlSimple.xml_in(xml)
7
+ #
8
+ #
9
+ #data['best_orf'][0]['start'][0]
10
+ #data['best_orf'][0]['content']
11
+
12
+ ######################################
13
+ # Author:: Almudena Bocinos Rioboo
14
+ # Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
15
+
16
+ ######################################
17
+
18
+ class BlastXmlResult
19
+
20
+ def initialize(input)
21
+
22
+ @querys = []
23
+ lines=[]
24
+
25
+ if input.is_a?(Array)
26
+ lines=input
27
+ else
28
+ if File.exists?(input)
29
+ fich = File.open(input,'r')
30
+ lines = fich.readlines
31
+ fich.close
32
+ end
33
+
34
+ end
35
+
36
+ # puts "lines length #{lines.length}"
37
+ if !lines.empty?
38
+ data = Nokogiri::XML(lines.join)
39
+ data.root.xpath('//Iteration').each do |iteration|
40
+
41
+ # puts JSON::pretty_generate(iteration)
42
+ query_id = iteration.xpath('Iteration_query-ID').text
43
+
44
+ full_query_length = iteration.xpath('Iteration_query-len').text
45
+ query_def = iteration.xpath('Iteration_query-def').text
46
+
47
+ if query_def =~ /^([^\s]+)/
48
+ query_def=$1
49
+ end
50
+
51
+ #@query_def = iteration['Iteration_query-def'][0]
52
+
53
+ query = BlastQuery.new(query_id)
54
+ query.query_def = query_def
55
+ query.full_query_length = full_query_length
56
+ @querys.push query
57
+
58
+
59
+ hits = iteration.xpath('Iteration_hits/Hit')
60
+ if !hits.nil?
61
+ hits.each do |h|
62
+ #puts JSON::pretty_generate(h)
63
+
64
+
65
+
66
+ subject_id=h.xpath('Hit_id').text
67
+ acc =h.xpath('Hit_accession').text
68
+ full_subject_length = h.xpath('Hit_len').text.to_i
69
+ hit_def=h.xpath('Hit_def').text
70
+ if hit_def=='No definition line'
71
+ hit_def =subject_id
72
+ end
73
+
74
+ hsps = h.xpath('Hit_hsps/Hsp')
75
+
76
+ hsps.each do |hsp|
77
+
78
+ q_beg=hsp.xpath('Hsp_query-from').text.to_i
79
+ q_end=hsp.xpath('Hsp_query-to').text.to_i
80
+ s_beg=hsp.xpath('Hsp_hit-from').text.to_i
81
+ s_end=hsp.xpath('Hsp_hit-to').text.to_i
82
+
83
+ # creates the hit
84
+ hit = BlastHit.new(q_beg,q_end,s_beg,s_end)
85
+
86
+ hit.align_len=hsp.xpath('Hsp_align-len').text.to_i
87
+ hit.ident=(hsp.xpath('Hsp_identity').text.to_f/hit.align_len)*100
88
+ hit.gaps=hsp.xpath('Hsp_gaps').text.to_i
89
+ hit.mismatches=hsp.xpath('Hsp_midline').text.count(' ').to_i - hit.gaps
90
+ hit.e_val=hsp.xpath('Hsp_evalue').text.to_f
91
+ hit.e_val = (hit.e_val*1000).round/1000.0
92
+ hit.bit_score=hsp.xpath('Hsp_bit-score').text.to_f
93
+ hit.bit_score = (hit.bit_score*100).round/100.0
94
+
95
+ hit.score = hsp.xpath('Hsp_score').text.to_f
96
+ hit.q_frame = hsp.xpath('Hsp_query-frame').text.to_i
97
+ hit.s_frame =hsp.xpath('Hsp_hit-frame').text.to_i
98
+
99
+ hit.q_seq = hsp.xpath('Hsp_qseq').text
100
+ hit.s_seq = hsp.xpath('Hsp_hseq').text
101
+
102
+
103
+ hit.subject_id= subject_id
104
+ hit.full_subject_length=full_subject_length
105
+ # hit.full_query_length = full_query_length
106
+ hit.definition=hit_def
107
+ hit.acc=acc
108
+
109
+ query.add_hit(hit)
110
+
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
116
+ #inspect
117
+
118
+ end
119
+
120
+
121
+
122
+ def inspect
123
+
124
+ res = "Blast results:\n"
125
+ res+= '-'*20
126
+ res+= "\nQuerys: #{@querys.count}\n"
127
+ @querys.each{|q| res+=q.inspect+"\n"}
128
+ return res
129
+ end
130
+
131
+ def find_query(querys,name_q)
132
+ # newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
133
+ new_q=nil
134
+
135
+ if !querys.empty?
136
+ new_q=querys.find{|q| (q.query_id==name_q)}
137
+ end
138
+
139
+ return new_q
140
+ end
141
+
142
+ def empty?
143
+
144
+ return @querys.empty?
145
+ end
146
+
147
+ def size
148
+ @querys.size
149
+ end
150
+
151
+ attr_accessor :querys
152
+ end
@@ -0,0 +1,103 @@
1
+
2
+ class DustQuery
3
+
4
+ attr_accessor :query_id,:dust
5
+
6
+ def initialize(query_id)
7
+ @dust=[]
8
+ @query_id = query_id
9
+ end
10
+
11
+ def push(interval)
12
+ @dust.push interval
13
+ end
14
+
15
+ def inspect
16
+ res= "Query #{query_id}:"
17
+ @dust.each do |d|
18
+ res += " from #{d[0]} to #{d[1]}"
19
+ end
20
+ end
21
+
22
+ end
23
+
24
+ class DustMasker
25
+
26
+ def initialize(extra_params = '')
27
+
28
+ @format = 'interval'
29
+ @extra_params=extra_params
30
+
31
+ end
32
+
33
+ def get_cmd(extra_params = '')
34
+
35
+ cmd = 'dustmasker '+@extra_params + '-outfmt '+ @format + ' 2>/dev/null'
36
+ return cmd
37
+
38
+ end
39
+
40
+ def do_dust(seq_fasta)
41
+ intervals=[]
42
+
43
+ if !seq_fasta.nil? && !seq_fasta.empty?
44
+
45
+ if seq_fasta.is_a?(Array)
46
+ seq_fasta=seq_fasta.join("\n")
47
+ end
48
+
49
+ cmd = get_cmd(@extra_params)
50
+ if !seq_fasta.index('>')
51
+ raise "Data passed to dust must be in fasta format"
52
+ end
53
+
54
+ # puts seq_fasta
55
+ res=''
56
+
57
+ # Ojo, que una vez nos ibamos a volver locos buscando porque esto no devolvia todos los hits que se encontraban al ejecutar el blast a mano, y era porque en el blast a mano le estabamos pasando la secuencia completa mientras que en el MID le estabamos pasando sólo los 20 primeros nt.
58
+ IO.popen(cmd,'w+') {|blast|
59
+ blast.sync = true
60
+ # blast.write(">seq\n")
61
+ blast.write(seq_fasta)
62
+ blast.close_write
63
+ res = blast.readlines
64
+ blast.close_read
65
+ }
66
+
67
+ if !$?.exitstatus.nil? && $?.exitstatus>0
68
+ raise "Error while doing #{cmd} to seq: #{seq_fasta}"
69
+ end
70
+ # puts cmd
71
+ # puts $?.class
72
+ # puts res
73
+ #parse results
74
+
75
+ # >seq
76
+ # 3 - 346
77
+ # 354 - 683
78
+ # .
79
+
80
+
81
+ res.each do |line|
82
+ # puts "LINEA:" + line
83
+ if line =~ /^>(.*)$/
84
+ intervals.push DustQuery.new($1)
85
+ elsif line =~ /^(\d+)\s\-\s(\d+)/
86
+ # puts "Algo #{$1}, #{$2}"
87
+ intervals.last.push [$1.to_i,$2.to_i]
88
+ end
89
+
90
+ end
91
+ end
92
+
93
+ return intervals
94
+
95
+ end
96
+
97
+ def close
98
+
99
+ end
100
+
101
+ end
102
+
103
+
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.expand_path(File.join(File.dirname(__FILE__),'..','lib','scbi_blast.rb'))}"
9
+ puts "Loading scbi_blast gem"
10
+ exec "#{irb} #{libs} --simple-prompt"