scbi_blast 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,190 @@
1
+ require "blast_query.rb"
2
+ require "blast_hit.rb"
3
+
4
+
5
+
6
+ ######################################
7
+ # Author:: Almudena Bocinos Rioboo
8
+ # Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
9
+
10
+ ######################################
11
+
12
+ class BlastTableResult
13
+
14
+ def initialize(input)
15
+
16
+ @querys = []
17
+
18
+
19
+ if input.is_a?(Array)
20
+ lines=input
21
+
22
+ else
23
+
24
+ fich = File.open(input,'r')
25
+ lines = fich.readlines
26
+ fich.close
27
+
28
+ end
29
+
30
+ # puts "lines length #{lines.length}"
31
+ query_name=''
32
+
33
+ lines.each do |line|
34
+
35
+ line.chomp! #delete end of line
36
+
37
+ if line =~ /^\s*#/
38
+ if line =~ /^#\sQuery:\s+(.+)$/
39
+ query_name = $1
40
+ elsif line =~ /^#\s0\shits\sfound$/
41
+ @querys.push BlastQuery.new(query_name)
42
+ end
43
+ # 0 hits found
44
+
45
+
46
+ else
47
+ params = line.split(/\t+/)
48
+
49
+ # creates the hit
50
+ #hit = BlastHit.new( params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8], params[9], params[10], params[11])
51
+
52
+ # puts "Extracted #{params[0]} #{params[1]} #{params[2]} #{params[3]} #{params[4]} #{params[5]} #{params[6]} #{params[7]} #{params[8]} #{params[9]} #{params[10]} #{params[11]}"
53
+ # Options 6, 7, and 10 can be additionally configured to produce
54
+ # a custom format specified by space delimited format specifiers.
55
+ # The supported format specifiers are:
56
+ # qseqid means Query Seq-id
57
+ # qgi means Query GI
58
+ # qacc means Query accesion
59
+ # sseqid means Subject Seq-id
60
+ # sallseqid means All subject Seq-id(s), separated by a ';'
61
+ # sgi means Subject GI
62
+ # sallgi means All subject GIs
63
+ # sacc means Subject accession
64
+ # sallacc means All subject accessions
65
+ # qstart means Start of alignment in query
66
+ # qend means End of alignment in query
67
+ # sstart means Start of alignment in subject
68
+ # send means End of alignment in subject
69
+ # qseq means Aligned part of query sequence
70
+ # sseq means Aligned part of subject sequence
71
+ # evalue means Expect value
72
+ # bitscore means Bit score
73
+ # score means Raw score
74
+ # length means Alignment length
75
+ # pident means Percentage of identical matches
76
+ # nident means Number of identical matches
77
+ # mismatch means Number of mismatches
78
+ # positive means Number of positive-scoring matches
79
+ # gapopen means Number of gap openings
80
+ # gaps means Total number of gaps
81
+ # ppos means Percentage of positive-scoring matches
82
+ # frames means Query and subject frames separated by a '/'
83
+ # qframe means Query frame
84
+ # sframe means Subject frame
85
+ # When not provided, the default value is:
86
+ # 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send
87
+ # evalue bitscore', which is equivalent to the keyword 'std'
88
+
89
+ # if the query doesn't exist, then create a new one,
90
+ # else the hit will be added to the last query
91
+
92
+ qseqid,sacc,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,score,qframe,sframe,qseq,sseq = params
93
+
94
+ # creates the hit
95
+ hit = BlastHit.new(qstart,qend,sstart,send)
96
+
97
+ hit.align_len=length
98
+ hit.ident=pident
99
+
100
+ hit.gaps=gapopen
101
+ hit.mismatches=mismatch
102
+ hit.e_val=evalue
103
+ hit.bit_score=bitscore
104
+
105
+ hit.score = score
106
+ hit.q_frame = qframe
107
+ hit.s_frame = sframe
108
+
109
+ hit.subject_id = sacc
110
+ hit.full_subject_length=0
111
+ hit.definition=sacc
112
+ hit.acc=sacc
113
+ hit.q_seq=qseq
114
+ hit.s_seq=sseq
115
+
116
+ query=find_query(@querys,qseqid)
117
+
118
+ if (query) #if it is a new query, it is created and added
119
+ query.add_hit(hit)
120
+
121
+ else # else the hit is added in last query added
122
+ query = BlastQuery.new(qseqid)
123
+ query.add_hit(hit)
124
+ @querys.push query
125
+ end
126
+
127
+
128
+ #Description
129
+
130
+ # read_blast_tab read tabular BLAST format created with blast_seq and written to file with write_blast - or with blastall and the -m 8 or -m 9 switch.
131
+ # Each column in the table corresponds to the following keys:
132
+ #
133
+ # 1. Q_ID - Query ID.
134
+ # 2. S_ID - Subject ID.
135
+ # 3. IDENT - Identity (%).
136
+ # 4. ALIGN_LEN - Alignment length.
137
+ # 5. MISMATCHES - Number of mismatches.
138
+ # 6. GAPS - Number of gaps.
139
+ # 7. Q_BEG - Query begin.
140
+ # 8. Q_END - Query end.
141
+ # 9. S_BEG - Subject begin.
142
+ # 10. S_END - Subject end.
143
+ # 11. E_VAL - Expect value.
144
+ # 12. BIT_SCORE - Bit score.
145
+ #
146
+ # Furthermore, two extra keys are added to the record:
147
+ #
148
+ # * STRAND - Strand.
149
+ # * REC_TYPE - Record type.
150
+ end
151
+ end
152
+
153
+ #inspect
154
+
155
+ end
156
+
157
+
158
+
159
+ def inspect
160
+ # puts "Table Results:"
161
+ # puts(@querys.each{|q| q.inspect}).join("\n")
162
+ res = "Blast results:\n"
163
+ res+= '-'*20
164
+ res+= "\nQuerys: #{@querys.count}\n"
165
+ @querys.each{|q| res+=q.inspect+"\n"}
166
+ return res
167
+ end
168
+
169
+ def find_query(querys,name_q)
170
+ # newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
171
+ new_q=nil
172
+
173
+ if !querys.empty?
174
+ new_q=querys.find{|q| (q.query_id==name_q)}
175
+ end
176
+
177
+ return new_q
178
+ end
179
+
180
+ def empty?
181
+
182
+ return @querys.empty?
183
+ end
184
+
185
+ def size
186
+ @querys.size
187
+ end
188
+
189
+ attr_accessor :querys
190
+ end
@@ -0,0 +1,152 @@
1
+ require "blast_query.rb"
2
+ require "blast_hit.rb"
3
+
4
+ require 'nokogiri'
5
+ #xml=File.open('orf.1.xml').read
6
+ #data = XmlSimple.xml_in(xml)
7
+ #
8
+ #
9
+ #data['best_orf'][0]['start'][0]
10
+ #data['best_orf'][0]['content']
11
+
12
+ ######################################
13
+ # Author:: Almudena Bocinos Rioboo
14
+ # Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
15
+
16
+ ######################################
17
+
18
+ class BlastXmlResult
19
+
20
+ def initialize(input)
21
+
22
+ @querys = []
23
+ lines=[]
24
+
25
+ if input.is_a?(Array)
26
+ lines=input
27
+ else
28
+ if File.exists?(input)
29
+ fich = File.open(input,'r')
30
+ lines = fich.readlines
31
+ fich.close
32
+ end
33
+
34
+ end
35
+
36
+ # puts "lines length #{lines.length}"
37
+ if !lines.empty?
38
+ data = Nokogiri::XML(lines.join)
39
+ data.root.xpath('//Iteration').each do |iteration|
40
+
41
+ # puts JSON::pretty_generate(iteration)
42
+ query_id = iteration.xpath('Iteration_query-ID').text
43
+
44
+ full_query_length = iteration.xpath('Iteration_query-len').text
45
+ query_def = iteration.xpath('Iteration_query-def').text
46
+
47
+ if query_def =~ /^([^\s]+)/
48
+ query_def=$1
49
+ end
50
+
51
+ #@query_def = iteration['Iteration_query-def'][0]
52
+
53
+ query = BlastQuery.new(query_id)
54
+ query.query_def = query_def
55
+ query.full_query_length = full_query_length
56
+ @querys.push query
57
+
58
+
59
+ hits = iteration.xpath('Iteration_hits/Hit')
60
+ if !hits.nil?
61
+ hits.each do |h|
62
+ #puts JSON::pretty_generate(h)
63
+
64
+
65
+
66
+ subject_id=h.xpath('Hit_id').text
67
+ acc =h.xpath('Hit_accession').text
68
+ full_subject_length = h.xpath('Hit_len').text.to_i
69
+ hit_def=h.xpath('Hit_def').text
70
+ if hit_def=='No definition line'
71
+ hit_def =subject_id
72
+ end
73
+
74
+ hsps = h.xpath('Hit_hsps/Hsp')
75
+
76
+ hsps.each do |hsp|
77
+
78
+ q_beg=hsp.xpath('Hsp_query-from').text.to_i
79
+ q_end=hsp.xpath('Hsp_query-to').text.to_i
80
+ s_beg=hsp.xpath('Hsp_hit-from').text.to_i
81
+ s_end=hsp.xpath('Hsp_hit-to').text.to_i
82
+
83
+ # creates the hit
84
+ hit = BlastHit.new(q_beg,q_end,s_beg,s_end)
85
+
86
+ hit.align_len=hsp.xpath('Hsp_align-len').text.to_i
87
+ hit.ident=(hsp.xpath('Hsp_identity').text.to_f/hit.align_len)*100
88
+ hit.gaps=hsp.xpath('Hsp_gaps').text.to_i
89
+ hit.mismatches=hsp.xpath('Hsp_midline').text.count(' ').to_i - hit.gaps
90
+ hit.e_val=hsp.xpath('Hsp_evalue').text.to_f
91
+ hit.e_val = (hit.e_val*1000).round/1000.0
92
+ hit.bit_score=hsp.xpath('Hsp_bit-score').text.to_f
93
+ hit.bit_score = (hit.bit_score*100).round/100.0
94
+
95
+ hit.score = hsp.xpath('Hsp_score').text.to_f
96
+ hit.q_frame = hsp.xpath('Hsp_query-frame').text.to_i
97
+ hit.s_frame =hsp.xpath('Hsp_hit-frame').text.to_i
98
+
99
+ hit.q_seq = hsp.xpath('Hsp_qseq').text
100
+ hit.s_seq = hsp.xpath('Hsp_hseq').text
101
+
102
+
103
+ hit.subject_id= subject_id
104
+ hit.full_subject_length=full_subject_length
105
+ # hit.full_query_length = full_query_length
106
+ hit.definition=hit_def
107
+ hit.acc=acc
108
+
109
+ query.add_hit(hit)
110
+
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
116
+ #inspect
117
+
118
+ end
119
+
120
+
121
+
122
+ def inspect
123
+
124
+ res = "Blast results:\n"
125
+ res+= '-'*20
126
+ res+= "\nQuerys: #{@querys.count}\n"
127
+ @querys.each{|q| res+=q.inspect+"\n"}
128
+ return res
129
+ end
130
+
131
+ def find_query(querys,name_q)
132
+ # newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
133
+ new_q=nil
134
+
135
+ if !querys.empty?
136
+ new_q=querys.find{|q| (q.query_id==name_q)}
137
+ end
138
+
139
+ return new_q
140
+ end
141
+
142
+ def empty?
143
+
144
+ return @querys.empty?
145
+ end
146
+
147
+ def size
148
+ @querys.size
149
+ end
150
+
151
+ attr_accessor :querys
152
+ end
@@ -0,0 +1,103 @@
1
+
2
+ class DustQuery
3
+
4
+ attr_accessor :query_id,:dust
5
+
6
+ def initialize(query_id)
7
+ @dust=[]
8
+ @query_id = query_id
9
+ end
10
+
11
+ def push(interval)
12
+ @dust.push interval
13
+ end
14
+
15
+ def inspect
16
+ res= "Query #{query_id}:"
17
+ @dust.each do |d|
18
+ res += " from #{d[0]} to #{d[1]}"
19
+ end
20
+ end
21
+
22
+ end
23
+
24
+ class DustMasker
25
+
26
+ def initialize(extra_params = '')
27
+
28
+ @format = 'interval'
29
+ @extra_params=extra_params
30
+
31
+ end
32
+
33
+ def get_cmd(extra_params = '')
34
+
35
+ cmd = 'dustmasker '+@extra_params + '-outfmt '+ @format + ' 2>/dev/null'
36
+ return cmd
37
+
38
+ end
39
+
40
+ def do_dust(seq_fasta)
41
+ intervals=[]
42
+
43
+ if !seq_fasta.nil? && !seq_fasta.empty?
44
+
45
+ if seq_fasta.is_a?(Array)
46
+ seq_fasta=seq_fasta.join("\n")
47
+ end
48
+
49
+ cmd = get_cmd(@extra_params)
50
+ if !seq_fasta.index('>')
51
+ raise "Data passed to dust must be in fasta format"
52
+ end
53
+
54
+ # puts seq_fasta
55
+ res=''
56
+
57
+ # Ojo, que una vez nos ibamos a volver locos buscando porque esto no devolvia todos los hits que se encontraban al ejecutar el blast a mano, y era porque en el blast a mano le estabamos pasando la secuencia completa mientras que en el MID le estabamos pasando sólo los 20 primeros nt.
58
+ IO.popen(cmd,'w+') {|blast|
59
+ blast.sync = true
60
+ # blast.write(">seq\n")
61
+ blast.write(seq_fasta)
62
+ blast.close_write
63
+ res = blast.readlines
64
+ blast.close_read
65
+ }
66
+
67
+ if !$?.exitstatus.nil? && $?.exitstatus>0
68
+ raise "Error while doing #{cmd} to seq: #{seq_fasta}"
69
+ end
70
+ # puts cmd
71
+ # puts $?.class
72
+ # puts res
73
+ #parse results
74
+
75
+ # >seq
76
+ # 3 - 346
77
+ # 354 - 683
78
+ # .
79
+
80
+
81
+ res.each do |line|
82
+ # puts "LINEA:" + line
83
+ if line =~ /^>(.*)$/
84
+ intervals.push DustQuery.new($1)
85
+ elsif line =~ /^(\d+)\s\-\s(\d+)/
86
+ # puts "Algo #{$1}, #{$2}"
87
+ intervals.last.push [$1.to_i,$2.to_i]
88
+ end
89
+
90
+ end
91
+ end
92
+
93
+ return intervals
94
+
95
+ end
96
+
97
+ def close
98
+
99
+ end
100
+
101
+ end
102
+
103
+
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.expand_path(File.join(File.dirname(__FILE__),'..','lib','scbi_blast.rb'))}"
9
+ puts "Loading scbi_blast gem"
10
+ exec "#{irb} #{libs} --simple-prompt"