bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,1402 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/blast/format0.rb - BLAST default output (-m 0) parser
|
|
3
|
+
#
|
|
4
|
+
# Author:: Naohisa GOTO
|
|
5
|
+
# Copyright:: Copyright (C) 2003 GOTO Naohisa <ng@bioruby.org>
|
|
6
|
+
# License:: LGPL
|
|
7
|
+
#
|
|
8
|
+
#--
|
|
9
|
+
# This library is free software; you can redistribute it and/or
|
|
10
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
11
|
+
# License as published by the Free Software Foundation; either
|
|
12
|
+
# version 2 of the License, or (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# This library is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
17
|
+
# Lesser General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
20
|
+
# License along with this library; if not, write to the Free Software
|
|
21
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
22
|
+
#++
|
|
23
|
+
#
|
|
24
|
+
# $Id: format0.rb,v 1.16 2005/11/01 05:32:23 ngoto Exp $
|
|
25
|
+
#
|
|
26
|
+
# NCBI BLAST default (-m 0 option) output parser.
|
|
27
|
+
#
|
|
28
|
+
# == References
|
|
29
|
+
#
|
|
30
|
+
# * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
|
|
31
|
+
# Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
|
|
32
|
+
# "Gapped BLAST and PSI-BLAST: a new generation of protein database search
|
|
33
|
+
# programs", Nucleic Acids Res. 25:3389-3402.
|
|
34
|
+
# * http://www.ncbi.nlm.nih.gov/blast/
|
|
35
|
+
#
|
|
36
|
+
|
|
37
|
+
begin
|
|
38
|
+
require 'strscan'
|
|
39
|
+
rescue LoadError
|
|
40
|
+
end
|
|
41
|
+
require 'singleton'
|
|
42
|
+
|
|
43
|
+
#--
|
|
44
|
+
#require 'bio/db'
|
|
45
|
+
#++
|
|
46
|
+
require 'bio/io/flatfile'
|
|
47
|
+
|
|
48
|
+
module Bio
|
|
49
|
+
class Blast
|
|
50
|
+
module Default #:nodoc:
|
|
51
|
+
|
|
52
|
+
# Bio::Blast::Default::Report parses NCBI BLAST default output
|
|
53
|
+
# and stores information in the data.
|
|
54
|
+
# It may store some Bio::Blast::Default::Report::Iteration objects.
|
|
55
|
+
class Report #< DB
|
|
56
|
+
# Delimiter of each entry. Bio::FlatFile uses it.
|
|
57
|
+
DELIMITER = RS = "\nBLAST"
|
|
58
|
+
|
|
59
|
+
# Opens file by using Bio::FlatFile.open.
|
|
60
|
+
def self.open(filename, *mode)
|
|
61
|
+
Bio::FlatFile.open(self, filename, *mode)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Creates a new Report object from BLAST result text.
|
|
65
|
+
def initialize(str)
|
|
66
|
+
str = str.sub(/\A\s+/, '')
|
|
67
|
+
str.sub!(/\n(T?BLAST.*)/m, "\n") # remove trailing entries for sure
|
|
68
|
+
@entry_overrun = $1
|
|
69
|
+
@entry = str
|
|
70
|
+
data = str.split(/(?:^[ \t]*\n)+/)
|
|
71
|
+
|
|
72
|
+
format0_split_headers(data)
|
|
73
|
+
@iterations = format0_split_search(data)
|
|
74
|
+
format0_split_stat_params(data)
|
|
75
|
+
end
|
|
76
|
+
# piece of next entry. Bio::FlatFile uses it.
|
|
77
|
+
attr_reader :entry_overrun
|
|
78
|
+
|
|
79
|
+
# (PSI-BLAST)
|
|
80
|
+
# Returns iterations.
|
|
81
|
+
# It returns an array of Bio::Blast::Default::Report::Iteration class.
|
|
82
|
+
# Note that normal blastall result usually contains one iteration.
|
|
83
|
+
attr_reader :iterations
|
|
84
|
+
|
|
85
|
+
# Returns whole entry as a string.
|
|
86
|
+
def to_s; @entry; end
|
|
87
|
+
|
|
88
|
+
#:stopdoc:
|
|
89
|
+
# prevent using StringScanner_R (in old version of strscan)
|
|
90
|
+
if !defined?(StringScanner) then
|
|
91
|
+
def initialize(*arg)
|
|
92
|
+
raise 'couldn\'t load strscan.so'
|
|
93
|
+
end #def
|
|
94
|
+
elsif StringScanner.name == 'StringScanner_R' then
|
|
95
|
+
def initialize(*arg)
|
|
96
|
+
raise 'cannot use StringScanner_R'
|
|
97
|
+
end #def
|
|
98
|
+
end
|
|
99
|
+
#:startdoc:
|
|
100
|
+
|
|
101
|
+
# Defines attributes which delegate to @f0dbstat objects.
|
|
102
|
+
def self.delegate_to_f0dbstat(*names)
|
|
103
|
+
names.each do |x|
|
|
104
|
+
module_eval("def #{x}; @f0dbstat.#{x}; end")
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
private_class_method :delegate_to_f0dbstat
|
|
108
|
+
|
|
109
|
+
# number of sequences in database
|
|
110
|
+
attr_reader :db_num if false #dummy
|
|
111
|
+
delegate_to_f0dbstat :db_num
|
|
112
|
+
|
|
113
|
+
# number of letters in database
|
|
114
|
+
attr_reader :db_len if false #dummy
|
|
115
|
+
delegate_to_f0dbstat :db_len
|
|
116
|
+
|
|
117
|
+
# posted date of the database
|
|
118
|
+
attr_reader :posted_date if false #dummy
|
|
119
|
+
delegate_to_f0dbstat :posted_date
|
|
120
|
+
|
|
121
|
+
# effective length of the database
|
|
122
|
+
attr_reader :eff_space if false #dummy
|
|
123
|
+
delegate_to_f0dbstat :eff_space
|
|
124
|
+
|
|
125
|
+
# name of the matrix
|
|
126
|
+
attr_reader :matrix if false #dummy
|
|
127
|
+
delegate_to_f0dbstat :matrix
|
|
128
|
+
|
|
129
|
+
# match score of the matrix
|
|
130
|
+
attr_reader :sc_match if false #dummy
|
|
131
|
+
delegate_to_f0dbstat :sc_match
|
|
132
|
+
|
|
133
|
+
# mismatch score of the matrix
|
|
134
|
+
attr_reader :sc_mismatch if false #dummy
|
|
135
|
+
delegate_to_f0dbstat :sc_mismatch
|
|
136
|
+
|
|
137
|
+
# gap open penalty
|
|
138
|
+
attr_reader :gap_open if false #dummy
|
|
139
|
+
delegate_to_f0dbstat :gap_open
|
|
140
|
+
|
|
141
|
+
# gap extend penalty
|
|
142
|
+
attr_reader :gap_extend if false #dummy
|
|
143
|
+
delegate_to_f0dbstat :gap_extend
|
|
144
|
+
|
|
145
|
+
# e-value threshold specified when BLAST was executed
|
|
146
|
+
attr_reader :expect if false #dummy
|
|
147
|
+
delegate_to_f0dbstat :expect
|
|
148
|
+
|
|
149
|
+
# number of hits. Note that this may differ from <tt>hits.size</tt>.
|
|
150
|
+
attr_reader :num_hits if false #dummy
|
|
151
|
+
delegate_to_f0dbstat :num_hits
|
|
152
|
+
|
|
153
|
+
# Same as <tt>iterations.last.kappa</tt>.
|
|
154
|
+
def kappa; @iterations.last.kappa; end
|
|
155
|
+
# Same as <tt>iterations.last.lambda</tt>.
|
|
156
|
+
def lambda; @iterations.last.lambda; end
|
|
157
|
+
# Same as <tt>iterations.last.entropy</tt>.
|
|
158
|
+
def entropy; @iterations.last.entropy; end
|
|
159
|
+
|
|
160
|
+
# Same as <tt>iterations.last.gapped_kappa</tt>.
|
|
161
|
+
def gapped_kappa; @iterations.last.gapped_kappa; end
|
|
162
|
+
# Same as <tt>iterations.last.gapped_lambda</tt>.
|
|
163
|
+
def gapped_lambda; @iterations.last.gapped_lambda; end
|
|
164
|
+
# Same as <tt>iterations.last.gapped_entropy</tt>.
|
|
165
|
+
def gapped_entropy; @iterations.last.gapped_entropy; end
|
|
166
|
+
|
|
167
|
+
# Returns program name.
|
|
168
|
+
def program; format0_parse_header; @program; end
|
|
169
|
+
# Returns version of the program.
|
|
170
|
+
def version; format0_parse_header; @version; end
|
|
171
|
+
# Returns version number string of the program.
|
|
172
|
+
def version_number; format0_parse_header; @version_number; end
|
|
173
|
+
# Returns released date of the program.
|
|
174
|
+
def version_date; format0_parse_header; @version_date; end
|
|
175
|
+
|
|
176
|
+
# Returns length of the query.
|
|
177
|
+
def query_len; format0_parse_query; @query_len; end
|
|
178
|
+
|
|
179
|
+
# Returns definition of the query.
|
|
180
|
+
def query_def; format0_parse_query; @query_def; end
|
|
181
|
+
|
|
182
|
+
# (PHI-BLAST)
|
|
183
|
+
# Same as <tt>iterations.first.pattern</tt>.
|
|
184
|
+
# Note that it returns the FIRST iteration's value.
|
|
185
|
+
def pattern; @iterations.first.pattern; end
|
|
186
|
+
|
|
187
|
+
# (PHI-BLAST)
|
|
188
|
+
# Same as <tt>iterations.first.pattern_positions</tt>.
|
|
189
|
+
# Note that it returns the FIRST iteration's value.
|
|
190
|
+
def pattern_positions
|
|
191
|
+
@iterations.first.pattern_positions
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# (PSI-BLAST)
|
|
195
|
+
# Iterates over each iteration.
|
|
196
|
+
# Same as <tt>iterations.each</tt>.
|
|
197
|
+
# Yields a Bio::Blast::Default::Report::Iteration object.
|
|
198
|
+
def each_iteration
|
|
199
|
+
@iterations.each do |x|
|
|
200
|
+
yield x
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Iterates over each hit of the last iteration.
|
|
205
|
+
# Same as <tt>iterations.last.each_hit</tt>.
|
|
206
|
+
# Yields a Bio::Blast::Default::Report::Hit object.
|
|
207
|
+
# This is very useful in most cases, e.g. for blastall results.
|
|
208
|
+
def each_hit
|
|
209
|
+
@iterations.last.each do |x|
|
|
210
|
+
yield x
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
alias each each_hit
|
|
214
|
+
|
|
215
|
+
# Same as <tt>iterations.last.hits</tt>.
|
|
216
|
+
# Returns the last iteration's hits.
|
|
217
|
+
# Returns an array of Bio::Blast::Default::Report::Hit object.
|
|
218
|
+
# This is very useful in most cases, e.g. for blastall results.
|
|
219
|
+
def hits
|
|
220
|
+
@iterations.last.hits
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# (PSI-BLAST)
|
|
224
|
+
# Same as <tt>iterations.last.message</tt>.
|
|
225
|
+
def message
|
|
226
|
+
@iterations.last.message
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# (PSI-BLAST)
|
|
230
|
+
# Same as <tt>iterations.last.converged?</tt>.
|
|
231
|
+
# Returns true if the last iteration is converged,
|
|
232
|
+
# otherwise, returns false.
|
|
233
|
+
def converged?
|
|
234
|
+
@iterations.last.converged?
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Returns the bibliography reference of the BLAST software.
|
|
238
|
+
def reference
|
|
239
|
+
unless defined?(@reference)
|
|
240
|
+
@reference = @f0reference.to_s.gsub(/\s+/, ' ').strip
|
|
241
|
+
end #unless
|
|
242
|
+
@reference
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Returns the name (filename or title) of the database.
|
|
246
|
+
def db
|
|
247
|
+
unless defined?(@db)
|
|
248
|
+
if /Database *\: *(.*)/m =~ @f0database then
|
|
249
|
+
a = $1.split(/^/)
|
|
250
|
+
a.pop if a.size > 1
|
|
251
|
+
@db = a.collect { |x| x.sub(/\s+\z/, '') }.join(' ')
|
|
252
|
+
end
|
|
253
|
+
end #unless
|
|
254
|
+
@db
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
private
|
|
258
|
+
# Parses the query lines (begins with "Query = ").
|
|
259
|
+
def format0_parse_query
|
|
260
|
+
unless defined?(@query_def)
|
|
261
|
+
sc = StringScanner.new(@f0query)
|
|
262
|
+
sc.skip(/\s*/)
|
|
263
|
+
if sc.skip_until(/Query\= */) then
|
|
264
|
+
q = []
|
|
265
|
+
begin
|
|
266
|
+
q << sc.scan(/.*/)
|
|
267
|
+
sc.skip(/\s*^ ?/)
|
|
268
|
+
end until !sc.rest or r = sc.skip(/ *\( *(\d+) *letters *\)\s*\z/)
|
|
269
|
+
@query_len = sc[1].to_i if r
|
|
270
|
+
@query_def = q.join(' ')
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Parses the first line of the BLAST result.
|
|
276
|
+
def format0_parse_header
|
|
277
|
+
unless defined?(@program)
|
|
278
|
+
if /(\w+) +([\w\-\.\d]+) *\[ *([\-\.\w]+) *\] *(\[.+\])?/ =~ @f0header.to_s
|
|
279
|
+
@program = $1
|
|
280
|
+
@version = "#{$1} #{$2} [#{$3}]"
|
|
281
|
+
@version_number = $2
|
|
282
|
+
@version_date = $3
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Splits headers into the first line, reference, query line and
|
|
288
|
+
# database line.
|
|
289
|
+
def format0_split_headers(data)
|
|
290
|
+
@f0header = data.shift
|
|
291
|
+
@f0reference = data.shift
|
|
292
|
+
@f0query = data.shift
|
|
293
|
+
@f0database = data.shift
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Splits the statistical parameters.
|
|
297
|
+
def format0_split_stat_params(data)
|
|
298
|
+
dbs = []
|
|
299
|
+
while r = data.first and /^ *Database\:/ =~ r
|
|
300
|
+
dbs << data.shift
|
|
301
|
+
end
|
|
302
|
+
@f0dbstat = self.class::F0dbstat.new(dbs)
|
|
303
|
+
i = -1
|
|
304
|
+
while r = data[0] and /^Lambda/ =~ r
|
|
305
|
+
#i -= 1 unless /^Gapped/ =~ r
|
|
306
|
+
if itr = @iterations[i] then
|
|
307
|
+
x = data.shift; itr.instance_eval { @f0stat << x }
|
|
308
|
+
x = @f0dbstat; itr.instance_eval { @f0dbstat = x }
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
@f0dbstat.f0params = data
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# Splits the search results.
|
|
315
|
+
def format0_split_search(data)
|
|
316
|
+
iterations = []
|
|
317
|
+
while r = data[0] and /^Searching/ =~ r
|
|
318
|
+
iterations << Iteration.new(data)
|
|
319
|
+
end
|
|
320
|
+
iterations
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# Stores format0 database statistics.
|
|
324
|
+
# Internal use only. Users must not use the class.
|
|
325
|
+
class F0dbstat #:nodoc:
|
|
326
|
+
# Creates new F0dbstat class.
|
|
327
|
+
# Internal use only.
|
|
328
|
+
def initialize(ary)
|
|
329
|
+
@f0dbstat = ary
|
|
330
|
+
@hash = {}
|
|
331
|
+
end
|
|
332
|
+
attr_reader :f0dbstat
|
|
333
|
+
attr_accessor :f0params
|
|
334
|
+
|
|
335
|
+
# Parses colon-separeted lines (in +ary+) and stores to +hash+.
|
|
336
|
+
def parse_colon_separated_params(hash, ary)
|
|
337
|
+
ary.each do |str|
|
|
338
|
+
sc = StringScanner.new(str)
|
|
339
|
+
sc.skip(/\s*/)
|
|
340
|
+
while sc.rest?
|
|
341
|
+
if sc.match?(/Number of sequences better than +([e\-\.\d]+) *\: *(.+)/) then
|
|
342
|
+
@expect = sc[1]
|
|
343
|
+
@num_hits = sc[2].tr(',', '').to_i
|
|
344
|
+
end
|
|
345
|
+
if sc.skip(/([\-\,\.\'\(\)\#\w ]+)\: *(.*)/) then
|
|
346
|
+
hash[sc[1]] = sc[2]
|
|
347
|
+
else
|
|
348
|
+
#p sc.peek(20)
|
|
349
|
+
raise ScanError
|
|
350
|
+
end
|
|
351
|
+
sc.skip(/\s*/)
|
|
352
|
+
end #while
|
|
353
|
+
end #each
|
|
354
|
+
end #def
|
|
355
|
+
private :parse_colon_separated_params
|
|
356
|
+
|
|
357
|
+
# Parses parameters.
|
|
358
|
+
def parse_params
|
|
359
|
+
unless defined?(@parse_params)
|
|
360
|
+
parse_colon_separated_params(@hash, @f0params)
|
|
361
|
+
#p @hash
|
|
362
|
+
if val = @hash['Matrix'] then
|
|
363
|
+
if /blastn *matrix *\: *([e\-\.\d]+) +([e\-\.\d]+)/ =~ val then
|
|
364
|
+
@matrix = 'blastn'
|
|
365
|
+
@sc_match = $1.to_i
|
|
366
|
+
@sc_mismatch = $2.to_i
|
|
367
|
+
else
|
|
368
|
+
@matrix = val
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
if val = @hash['Gap Penalties'] then
|
|
372
|
+
if /Existence\: *([e\-\.\d]+)/ =~ val then
|
|
373
|
+
@gap_open = $1.to_i
|
|
374
|
+
end
|
|
375
|
+
if /Extension\: *([e\-\.\d]+)/ =~ val then
|
|
376
|
+
@gap_extend = $1.to_i
|
|
377
|
+
end
|
|
378
|
+
end
|
|
379
|
+
#@db_num = @hash['Number of Sequences'] unless defined?(@db_num)
|
|
380
|
+
#@db_len = @hash['length of database'] unless defined?(@db_len)
|
|
381
|
+
if val = @hash['effective length of database'] then
|
|
382
|
+
@eff_space = val.tr(',', '').to_i
|
|
383
|
+
end
|
|
384
|
+
@parse_params = true
|
|
385
|
+
end #unless
|
|
386
|
+
end
|
|
387
|
+
private :parse_params
|
|
388
|
+
|
|
389
|
+
# Returns name of the matrix.
|
|
390
|
+
def matrix; parse_params; @matrix; end
|
|
391
|
+
# Returns the match score of the matrix.
|
|
392
|
+
def sc_match; parse_params; @sc_match; end
|
|
393
|
+
# Returns the mismatch score of the matrix.
|
|
394
|
+
def sc_mismatch; parse_params; @sc_mismatch; end
|
|
395
|
+
|
|
396
|
+
# Returns gap open penalty value.
|
|
397
|
+
def gap_open; parse_params; @gap_open; end
|
|
398
|
+
# Returns gap extend penalty value.
|
|
399
|
+
def gap_extend; parse_params; @gap_extend; end
|
|
400
|
+
|
|
401
|
+
# Returns effective length of the database.
|
|
402
|
+
def eff_space; parse_params; @eff_space; end
|
|
403
|
+
|
|
404
|
+
# Returns e-value threshold specified when BLAST was executed.
|
|
405
|
+
def expect; parse_params; @expect; end
|
|
406
|
+
|
|
407
|
+
# Returns number of hits.
|
|
408
|
+
def num_hits; parse_params; @num_hits; end
|
|
409
|
+
|
|
410
|
+
# Parses database statistics lines.
|
|
411
|
+
def parse_dbstat
|
|
412
|
+
a = @f0dbstat[0].to_s.split(/^/)
|
|
413
|
+
d = []
|
|
414
|
+
i = 3
|
|
415
|
+
while i > 0 and line = a.pop
|
|
416
|
+
case line
|
|
417
|
+
when /^\s+Posted date\:\s*(.*)$/
|
|
418
|
+
unless defined?(@posted_date)
|
|
419
|
+
@posted_date = $1.strip
|
|
420
|
+
i -= 1; d.clear
|
|
421
|
+
end
|
|
422
|
+
when /^\s+Number of letters in database\:\s*(.*)$/
|
|
423
|
+
unless defined?(@db_len)
|
|
424
|
+
@db_len = $1.tr(',', '').to_i
|
|
425
|
+
i -= 1; d.clear
|
|
426
|
+
end
|
|
427
|
+
when /^\s+Number of sequences in database\:\s*(.*)$/
|
|
428
|
+
unless defined?(@db_num)
|
|
429
|
+
@db_num = $1.tr(',', '').to_i
|
|
430
|
+
i -= 1; d.clear
|
|
431
|
+
end
|
|
432
|
+
else
|
|
433
|
+
d.unshift(line)
|
|
434
|
+
end
|
|
435
|
+
end #while
|
|
436
|
+
a.concat(d)
|
|
437
|
+
while line = a.shift
|
|
438
|
+
if /^\s+Database\:\s*(.*)$/ =~ line
|
|
439
|
+
a.unshift($1)
|
|
440
|
+
a.each { |x| x.strip! }
|
|
441
|
+
@database = a.join(' ')
|
|
442
|
+
break #while
|
|
443
|
+
end
|
|
444
|
+
end
|
|
445
|
+
end #def
|
|
446
|
+
private :parse_dbstat
|
|
447
|
+
|
|
448
|
+
# Returns name (title or filename) of the database.
|
|
449
|
+
def database
|
|
450
|
+
unless defined?(@database); parse_dbstat; end; @database
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
# Returns posted date of the database.
|
|
454
|
+
def posted_date
|
|
455
|
+
unless defined?(@posted_date); parse_dbstat; end; @posted_date
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
# Returns number of letters in database.
|
|
459
|
+
def db_len
|
|
460
|
+
unless defined?(@db_len); parse_dbstat; end; @db_len
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
# Returns number of sequences in database.
|
|
464
|
+
def db_num
|
|
465
|
+
unless defined?(@db_num); parse_dbstat; end; @db_num
|
|
466
|
+
end
|
|
467
|
+
end #class F0dbstat
|
|
468
|
+
|
|
469
|
+
# Provides a singleton object of which any methods always return nil.
|
|
470
|
+
# Internal use only. Users must not use the class.
|
|
471
|
+
class AlwaysNil #:nodoc:
|
|
472
|
+
include Singleton
|
|
473
|
+
def method_missing(*arg)
|
|
474
|
+
nil
|
|
475
|
+
end
|
|
476
|
+
end #class AlwaysNil
|
|
477
|
+
|
|
478
|
+
# Bio::Blast::Default::Report::Iteration stores information about
|
|
479
|
+
# a iteration.
|
|
480
|
+
# It may contain some Bio::Blast::Default::Report::Hit objects.
|
|
481
|
+
# Note that a PSI-BLAST (blastpgp command) result usually contain
|
|
482
|
+
# multiple iterations in it, and a normal BLAST (blastall command)
|
|
483
|
+
# result usually contain one iteration in it.
|
|
484
|
+
class Iteration
|
|
485
|
+
# Creates a new Iteration object.
|
|
486
|
+
# It is designed to be called only internally from
|
|
487
|
+
# the Bio::Blast::Default::Report class.
|
|
488
|
+
# Users shall not use the method directly.
|
|
489
|
+
def initialize(data)
|
|
490
|
+
@f0stat = []
|
|
491
|
+
@f0dbstat = AlwaysNil.instance
|
|
492
|
+
@f0hitlist = []
|
|
493
|
+
@hits = []
|
|
494
|
+
@num = 1
|
|
495
|
+
r = data.shift
|
|
496
|
+
@f0message = [ r ]
|
|
497
|
+
r.gsub!(/^Results from round (\d+).*\z/) { |x|
|
|
498
|
+
@num = $1.to_i
|
|
499
|
+
@f0message << x
|
|
500
|
+
''
|
|
501
|
+
}
|
|
502
|
+
r = data.shift
|
|
503
|
+
while /^Number of occurrences of pattern in the database is +(\d+)/ =~ r
|
|
504
|
+
# PHI-BLAST
|
|
505
|
+
@pattern_in_database = $1.to_i
|
|
506
|
+
@f0message << r
|
|
507
|
+
r = data.shift
|
|
508
|
+
end
|
|
509
|
+
if /^Results from round (\d+)/ =~ r then
|
|
510
|
+
@num = $1.to_i
|
|
511
|
+
@f0message << r
|
|
512
|
+
r = data.shift
|
|
513
|
+
end
|
|
514
|
+
if r and !(/\*{5} No hits found \*{5}/ =~ r) then
|
|
515
|
+
@f0hitlist << r
|
|
516
|
+
begin
|
|
517
|
+
@f0hitlist << data.shift
|
|
518
|
+
end until r = data[0] and /^\>/ =~ r
|
|
519
|
+
if r and /^CONVERGED\!/ =~ r then
|
|
520
|
+
r.sub!(/(.*\n)*^CONVERGED\!.*\n/) { |x| @f0hitlist << x; '' }
|
|
521
|
+
end
|
|
522
|
+
if defined?(@pattern_in_database) and r = data.first then
|
|
523
|
+
#PHI-BLAST
|
|
524
|
+
while /^\>/ =~ r
|
|
525
|
+
@hits << Hit.new(data)
|
|
526
|
+
r = data.first
|
|
527
|
+
break unless r
|
|
528
|
+
if /^Significant alignments for pattern/ =~ r
|
|
529
|
+
data.shift
|
|
530
|
+
r = data.first
|
|
531
|
+
end
|
|
532
|
+
end
|
|
533
|
+
else
|
|
534
|
+
#not PHI-BLAST
|
|
535
|
+
while r = data[0] and /^\>/ =~ r
|
|
536
|
+
@hits << Hit.new(data)
|
|
537
|
+
end
|
|
538
|
+
end
|
|
539
|
+
end
|
|
540
|
+
if /^CONVERGED\!\s*$/ =~ @f0hitlist[-1].to_s then
|
|
541
|
+
@message = 'CONVERGED!'
|
|
542
|
+
@flag_converged = true
|
|
543
|
+
end
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
# (PSI-BLAST) Iteration round number.
|
|
547
|
+
attr_reader :num
|
|
548
|
+
# (PSI-BLAST) Messages of the iteration.
|
|
549
|
+
attr_reader :message
|
|
550
|
+
# (PHI-BLAST) Number of occurrences of pattern in the database.
|
|
551
|
+
attr_reader :pattern_in_database
|
|
552
|
+
|
|
553
|
+
# Returns the hits of the iteration.
|
|
554
|
+
# It returns an array of Bio::Blast::Default::Report::Hit objects.
|
|
555
|
+
def hits
|
|
556
|
+
parse_hitlist
|
|
557
|
+
@hits
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
# Iterates over each hit of the iteration.
|
|
561
|
+
# Yields a Bio::Blast::Default::Report::Hit object.
|
|
562
|
+
def each
|
|
563
|
+
hits.each do |x|
|
|
564
|
+
yield x
|
|
565
|
+
end
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
# (PSI-BLAST) Returns true if the iteration is converged.
|
|
569
|
+
# Otherwise, returns false.
|
|
570
|
+
def converged?
|
|
571
|
+
@flag_converged
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
# (PHI-BLAST) Returns pattern string.
|
|
575
|
+
# Returns nil if it is not a PHI-BLAST result.
|
|
576
|
+
def pattern
|
|
577
|
+
#PHI-BLAST
|
|
578
|
+
if !defined?(@pattern) and defined?(@pattern_in_database) then
|
|
579
|
+
@pattern = nil
|
|
580
|
+
@pattern_positions = []
|
|
581
|
+
@f0message.each do |r|
|
|
582
|
+
sc = StringScanner.new(r)
|
|
583
|
+
if sc.skip_until(/^ *pattern +(.+)$/) then
|
|
584
|
+
@pattern = sc[1] unless @pattern
|
|
585
|
+
sc.skip_until(/^ at position +(\d+)/)
|
|
586
|
+
@pattern_positions << sc[1].to_i
|
|
587
|
+
end
|
|
588
|
+
end
|
|
589
|
+
end
|
|
590
|
+
@pattern
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
# (PHI-BLAST) Returns pattern positions.
|
|
594
|
+
# Returns nil if it is not a PHI-BLAST result.
|
|
595
|
+
def pattern_positions
|
|
596
|
+
#PHI-BLAST
|
|
597
|
+
pattern
|
|
598
|
+
@pattern_positions
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
# (PSI-BLAST)
|
|
602
|
+
# Returns hits which have been found again in the iteration.
|
|
603
|
+
# It returns an array of Bio::Blast::Default::Report::Hit objects.
|
|
604
|
+
def hits_found_again
|
|
605
|
+
parse_hitlist
|
|
606
|
+
@hits_found_again
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
# (PSI-BLAST)
|
|
610
|
+
# Returns hits which have been newly found in the iteration.
|
|
611
|
+
# It returns an array of Bio::Blast::Default::Report::Hit objects.
|
|
612
|
+
def hits_newly_found
|
|
613
|
+
parse_hitlist
|
|
614
|
+
@hits_newly_found
|
|
615
|
+
end
|
|
616
|
+
|
|
617
|
+
# (PHI-BLAST) Returns hits for pattern. ????
|
|
618
|
+
def hits_for_pattern
|
|
619
|
+
parse_hitlist
|
|
620
|
+
@hits_for_pattern
|
|
621
|
+
end
|
|
622
|
+
|
|
623
|
+
# Parses list of hits.
|
|
624
|
+
def parse_hitlist
|
|
625
|
+
unless defined?(@parse_hitlist)
|
|
626
|
+
@hits_found_again = []
|
|
627
|
+
@hits_newly_found = []
|
|
628
|
+
@hits_unknown_state = []
|
|
629
|
+
i = 0
|
|
630
|
+
a = @hits_newly_found
|
|
631
|
+
flag = true
|
|
632
|
+
@f0hitlist.each do |x|
|
|
633
|
+
sc = StringScanner.new(x)
|
|
634
|
+
if flag then
|
|
635
|
+
if sc.skip_until(/^Sequences used in model and found again\:\s*$/)
|
|
636
|
+
a = @hits_found_again
|
|
637
|
+
end
|
|
638
|
+
flag = nil
|
|
639
|
+
next
|
|
640
|
+
end
|
|
641
|
+
next if sc.skip(/^CONVERGED\!$/)
|
|
642
|
+
if sc.skip(/^Sequences not found previously or not previously below threshold\:\s*$/) then
|
|
643
|
+
a = @hits_newly_found
|
|
644
|
+
next
|
|
645
|
+
elsif sc.skip(/^Sequences.+\:\s*$/) then
|
|
646
|
+
#possibly a bug or unknown format?
|
|
647
|
+
a = @hits_unknown_state
|
|
648
|
+
next
|
|
649
|
+
elsif sc.skip(/^Significant (matches|alignments) for pattern/) then
|
|
650
|
+
# PHI-BLAST
|
|
651
|
+
# do nothing when 'alignments'
|
|
652
|
+
if sc[1] == 'matches' then
|
|
653
|
+
unless defined?(@hits_for_pattern)
|
|
654
|
+
@hits_for_pattern = []
|
|
655
|
+
end
|
|
656
|
+
a = []
|
|
657
|
+
@hits_for_pattern << a
|
|
658
|
+
end
|
|
659
|
+
next
|
|
660
|
+
end
|
|
661
|
+
b = x.split(/^/)
|
|
662
|
+
b.collect! { |y| y.empty? ? nil : y }
|
|
663
|
+
b.compact!
|
|
664
|
+
if i + b.size > @hits.size then
|
|
665
|
+
((@hits.size - i)...(b.size)).each do |j|
|
|
666
|
+
y = b[j]; y.strip!
|
|
667
|
+
y.reverse!
|
|
668
|
+
z = y.split(/\s+/, 3)
|
|
669
|
+
z.each { |y| y.reverse! }
|
|
670
|
+
h = Hit.new([ z.pop.to_s.sub(/\.+\z/, '') ])
|
|
671
|
+
bs = z.pop.to_s
|
|
672
|
+
ev = z.pop.to_s
|
|
673
|
+
#ev = '1' + ev if ev[0] == ?e
|
|
674
|
+
h.instance_eval { @bit_score = bs; @evalue = ev }
|
|
675
|
+
@hits << h
|
|
676
|
+
end
|
|
677
|
+
end
|
|
678
|
+
a.concat(@hits[i, b.size])
|
|
679
|
+
i += b.size
|
|
680
|
+
end #each
|
|
681
|
+
@hits_found_again.each do |x|
|
|
682
|
+
x.instance_eval { @again = true }
|
|
683
|
+
end
|
|
684
|
+
@parse_hitlist = true
|
|
685
|
+
end #unless
|
|
686
|
+
end
|
|
687
|
+
private :parse_hitlist
|
|
688
|
+
|
|
689
|
+
# Parses statistics for the iteration.
|
|
690
|
+
def parse_stat
|
|
691
|
+
unless defined?(@parse_stat)
|
|
692
|
+
@f0stat.each do |x|
|
|
693
|
+
gapped = nil
|
|
694
|
+
sc = StringScanner.new(x)
|
|
695
|
+
sc.skip(/\s*/)
|
|
696
|
+
if sc.skip(/Gapped\s*/) then
|
|
697
|
+
gapped = true
|
|
698
|
+
end
|
|
699
|
+
s0 = []
|
|
700
|
+
h = {}
|
|
701
|
+
while r = sc.scan(/\w+/)
|
|
702
|
+
#p r
|
|
703
|
+
s0 << r
|
|
704
|
+
sc.skip(/ */)
|
|
705
|
+
end
|
|
706
|
+
sc.skip(/\s*/)
|
|
707
|
+
while r = sc.scan(/[e\.\-\d]+/)
|
|
708
|
+
#p r
|
|
709
|
+
h[s0.shift] = r
|
|
710
|
+
sc.skip(/ */)
|
|
711
|
+
end
|
|
712
|
+
if gapped then
|
|
713
|
+
@gapped_lambda = h['Lambda']
|
|
714
|
+
@gapped_kappa = h['K']
|
|
715
|
+
@gapped_entropy = h['H']
|
|
716
|
+
else
|
|
717
|
+
@lambda = h['Lambda']
|
|
718
|
+
@kappa = h['K']
|
|
719
|
+
@entropy = h['H']
|
|
720
|
+
end
|
|
721
|
+
end #each
|
|
722
|
+
@parse_stat = true
|
|
723
|
+
end #unless
|
|
724
|
+
end #def
|
|
725
|
+
private :parse_stat
|
|
726
|
+
|
|
727
|
+
# Defines attributes which call +parse_stat+ before accessing.
|
|
728
|
+
def self.method_after_parse_stat(*names)
|
|
729
|
+
names.each do |x|
|
|
730
|
+
module_eval("def #{x}; parse_stat; @#{x}; end")
|
|
731
|
+
end
|
|
732
|
+
end
|
|
733
|
+
private_class_method :method_after_parse_stat
|
|
734
|
+
|
|
735
|
+
# lambda of the database
|
|
736
|
+
attr_reader :lambda if false #dummy
|
|
737
|
+
method_after_parse_stat :lambda
|
|
738
|
+
# kappa of the database
|
|
739
|
+
attr_reader :kappa if false #dummy
|
|
740
|
+
method_after_parse_stat :kappa
|
|
741
|
+
# entropy of the database
|
|
742
|
+
attr_reader :entropy if false #dummy
|
|
743
|
+
method_after_parse_stat :entropy
|
|
744
|
+
|
|
745
|
+
# gapped lambda of the database
|
|
746
|
+
attr_reader :gapped_lambda if false #dummy
|
|
747
|
+
method_after_parse_stat :gapped_lambda
|
|
748
|
+
# gapped kappa of the database
|
|
749
|
+
attr_reader :gapped_kappa if false #dummy
|
|
750
|
+
method_after_parse_stat :gapped_kappa
|
|
751
|
+
# gapped entropy of the database
|
|
752
|
+
attr_reader :gapped_entropy if false #dummy
|
|
753
|
+
method_after_parse_stat :gapped_entropy
|
|
754
|
+
|
|
755
|
+
# Defines attributes which delegate to @f0dbstat objects.
|
|
756
|
+
def self.delegate_to_f0dbstat(*names)
|
|
757
|
+
names.each do |x|
|
|
758
|
+
module_eval("def #{x}; @f0dbstat.#{x}; end")
|
|
759
|
+
end
|
|
760
|
+
end
|
|
761
|
+
private_class_method :delegate_to_f0dbstat
|
|
762
|
+
|
|
763
|
+
# name (title or filename) of the database
|
|
764
|
+
attr_reader :database if false #dummy
|
|
765
|
+
delegate_to_f0dbstat :database
|
|
766
|
+
# posted date of the database
|
|
767
|
+
attr_reader :posted_date if false #dummy
|
|
768
|
+
delegate_to_f0dbstat :posted_date
|
|
769
|
+
|
|
770
|
+
# number of letters in database
|
|
771
|
+
attr_reader :db_num if false #dummy
|
|
772
|
+
delegate_to_f0dbstat :db_num
|
|
773
|
+
# number of sequences in database
|
|
774
|
+
attr_reader :db_len if false #dummy
|
|
775
|
+
delegate_to_f0dbstat :db_len
|
|
776
|
+
# effective length of the database
|
|
777
|
+
attr_reader :eff_space if false #dummy
|
|
778
|
+
delegate_to_f0dbstat :eff_space
|
|
779
|
+
|
|
780
|
+
# e-value threshold specified when BLAST was executed
|
|
781
|
+
attr_reader :expect if false #dummy
|
|
782
|
+
delegate_to_f0dbstat :expect
|
|
783
|
+
|
|
784
|
+
end #class Iteration
|
|
785
|
+
|
|
786
|
+
# Bio::Blast::Default::Report::Hit contains information about a hit.
|
|
787
|
+
# It may contain some Bio::Blast::Default::Report::HSP objects.
|
|
788
|
+
class Hit
|
|
789
|
+
# Creates a new Hit object.
|
|
790
|
+
# It is designed to be called only internally from the
|
|
791
|
+
# Bio::Blast::Default::Report::Iteration class.
|
|
792
|
+
# Users should not call the method directly.
|
|
793
|
+
def initialize(data)
|
|
794
|
+
@f0hitname = data.shift
|
|
795
|
+
@hsps = []
|
|
796
|
+
while r = data[0] and /^\s+Score/ =~ r
|
|
797
|
+
@hsps << HSP.new(data)
|
|
798
|
+
end
|
|
799
|
+
@again = false
|
|
800
|
+
end
|
|
801
|
+
|
|
802
|
+
# Hsp(high-scoring segment pair)s of the hit.
|
|
803
|
+
# Returns an array of Bio::Blast::Default::Report::HSP objects.
|
|
804
|
+
attr_reader :hsps
|
|
805
|
+
|
|
806
|
+
# Iterates over each hsp(high-scoring segment pair) of the hit.
|
|
807
|
+
# Yields a Bio::Blast::Default::Report::HSP object.
|
|
808
|
+
def each
|
|
809
|
+
@hsps.each { |x| yield x }
|
|
810
|
+
end
|
|
811
|
+
|
|
812
|
+
# (PSI-BLAST)
|
|
813
|
+
# Returns true if the hit is found again in the iteration.
|
|
814
|
+
# Otherwise, returns false or nil.
|
|
815
|
+
def found_again?
|
|
816
|
+
@again
|
|
817
|
+
end
|
|
818
|
+
|
|
819
|
+
# Returns first hsp's score.
|
|
820
|
+
def score
|
|
821
|
+
(h = @hsps.first) ? h.score : nil
|
|
822
|
+
end
|
|
823
|
+
|
|
824
|
+
# Returns first hsp's bit score.
|
|
825
|
+
# (shown in hit list of BLAST result)
|
|
826
|
+
def bit_score
|
|
827
|
+
unless defined?(@bit_score)
|
|
828
|
+
if h = @hsps.first then
|
|
829
|
+
@bit_score = h.bit_score
|
|
830
|
+
end
|
|
831
|
+
end
|
|
832
|
+
@bit_score
|
|
833
|
+
end
|
|
834
|
+
|
|
835
|
+
# Returns first hsp's e-value.
|
|
836
|
+
# (shown in hit list of BLAST result)
|
|
837
|
+
def evalue
|
|
838
|
+
unless defined?(@evalue)
|
|
839
|
+
if h = @hsps.first then
|
|
840
|
+
@evalue = h.evalue
|
|
841
|
+
end
|
|
842
|
+
end
|
|
843
|
+
@evalue
|
|
844
|
+
end
|
|
845
|
+
|
|
846
|
+
# Parses name of the hit.
|
|
847
|
+
def parse_hitname
|
|
848
|
+
unless defined?(@parse_hitname)
|
|
849
|
+
sc = StringScanner.new(@f0hitname)
|
|
850
|
+
sc.skip(/\s*/)
|
|
851
|
+
sc.skip(/\>/)
|
|
852
|
+
d = []
|
|
853
|
+
begin
|
|
854
|
+
d << sc.scan(/.*/)
|
|
855
|
+
sc.skip(/\s*/)
|
|
856
|
+
end until !sc.rest? or r = sc.skip(/ *Length *\= *([\,\d]+)\s*\z/)
|
|
857
|
+
@len = (r ? sc[1].to_i : nil)
|
|
858
|
+
@definition = d.join(" ")
|
|
859
|
+
@parse_hitname = true
|
|
860
|
+
end
|
|
861
|
+
end
|
|
862
|
+
private :parse_hitname
|
|
863
|
+
|
|
864
|
+
# Returns length of the hit.
|
|
865
|
+
def len; parse_hitname; @len; end
|
|
866
|
+
|
|
867
|
+
# Returns definition of the hit.
|
|
868
|
+
def definition; parse_hitname; @definition; end
|
|
869
|
+
|
|
870
|
+
#--
|
|
871
|
+
# Aliases to keep compatibility with Bio::Fasta::Report::Hit.
|
|
872
|
+
#alias target_id accession
|
|
873
|
+
alias target_def definition
|
|
874
|
+
alias target_len len
|
|
875
|
+
#++
|
|
876
|
+
|
|
877
|
+
# Sends given method to the first hsp or returns nil if
|
|
878
|
+
# there are no hsps.
|
|
879
|
+
def hsp_first(m)
|
|
880
|
+
(h = hsps.first) ? h.send(m) : nil
|
|
881
|
+
end
|
|
882
|
+
private :hsp_first
|
|
883
|
+
|
|
884
|
+
#--
|
|
885
|
+
# Shortcut methods for the best Hsp
|
|
886
|
+
# (Compatibility method with FASTA)
|
|
887
|
+
#++
|
|
888
|
+
|
|
889
|
+
# Same as hsps.first.identity.
|
|
890
|
+
# Returns nil if there are no hsp in the hit.
|
|
891
|
+
# (Compatibility method with FASTA)
|
|
892
|
+
def identity; hsp_first :identity; end
|
|
893
|
+
|
|
894
|
+
# Same as hsps.first.align_len.
|
|
895
|
+
# Returns nil if there are no hsp in the hit.
|
|
896
|
+
# (Compatibility method with FASTA)
|
|
897
|
+
def overlap; hsp_first :align_len; end
|
|
898
|
+
|
|
899
|
+
# Same as hsps.first.qseq.
|
|
900
|
+
# Returns nil if there are no hsp in the hit.
|
|
901
|
+
# (Compatibility method with FASTA)
|
|
902
|
+
def query_seq; hsp_first :qseq; end
|
|
903
|
+
|
|
904
|
+
# Same as hsps.first.hseq.
|
|
905
|
+
# Returns nil if there are no hsp in the hit.
|
|
906
|
+
# (Compatibility method with FASTA)
|
|
907
|
+
def target_seq; hsp_first :hseq; end
|
|
908
|
+
|
|
909
|
+
# Same as hsps.first.midline.
|
|
910
|
+
# Returns nil if there are no hsp in the hit.
|
|
911
|
+
# (Compatibility method with FASTA)
|
|
912
|
+
def midline; hsp_first :midline; end
|
|
913
|
+
|
|
914
|
+
# Same as hsps.first.query_from.
|
|
915
|
+
# Returns nil if there are no hsp in the hit.
|
|
916
|
+
# (Compatibility method with FASTA)
|
|
917
|
+
def query_start; hsp_first :query_from; end
|
|
918
|
+
|
|
919
|
+
# Same as hsps.first.query_to.
|
|
920
|
+
# Returns nil if there are no hsp in the hit.
|
|
921
|
+
# (Compatibility method with FASTA)
|
|
922
|
+
def query_end; hsp_first :query_to; end
|
|
923
|
+
|
|
924
|
+
# Same as hsps.first.hit_from.
|
|
925
|
+
# Returns nil if there are no hsp in the hit.
|
|
926
|
+
# (Compatibility method with FASTA)
|
|
927
|
+
def target_start; hsp_first :hit_from; end
|
|
928
|
+
|
|
929
|
+
# Same as hsps.first.hit_to.
|
|
930
|
+
# Returns nil if there are no hsp in the hit.
|
|
931
|
+
# (Compatibility method with FASTA)
|
|
932
|
+
def target_end; hsp_first :hit_to; end
|
|
933
|
+
|
|
934
|
+
# Returns an array which contains
|
|
935
|
+
# [ query_start, query_end, target_start, target_end ].
|
|
936
|
+
# (Compatibility method with FASTA)
|
|
937
|
+
def lap_at
|
|
938
|
+
[ query_start, query_end, target_start, target_end ]
|
|
939
|
+
end
|
|
940
|
+
end #class Hit
|
|
941
|
+
|
|
942
|
+
# Bio::Blast::Default::Report::HSP holds information about the hsp
|
|
943
|
+
# (high-scoring segment pair).
|
|
944
|
+
class HSP
|
|
945
|
+
# Creates new HSP object.
|
|
946
|
+
# It is designed to be called only internally from the
|
|
947
|
+
# Bio::Blast::Default::Report::Hit class.
|
|
948
|
+
# Users should not call the method directly.
|
|
949
|
+
def initialize(data)
|
|
950
|
+
@f0score = data.shift
|
|
951
|
+
@f0alignment = []
|
|
952
|
+
while r = data[0] and /^(Query|Sbjct)\:/ =~ r
|
|
953
|
+
@f0alignment << data.shift
|
|
954
|
+
end
|
|
955
|
+
end
|
|
956
|
+
|
|
957
|
+
# Parses scores, identities, positives, gaps, and so on.
|
|
958
|
+
def parse_score
|
|
959
|
+
unless defined?(@parse_score)
|
|
960
|
+
sc = StringScanner.new(@f0score)
|
|
961
|
+
while sc.rest?
|
|
962
|
+
sc.skip(/\s*/)
|
|
963
|
+
if sc.skip(/Expect(?:\(\d\))? *\= *([e\-\.\d]+)/) then
|
|
964
|
+
@evalue = sc[1]
|
|
965
|
+
#@evalue = '1' + @evalue if @evalue[0] == ?e
|
|
966
|
+
elsif sc.skip(/Score *\= *([e\-\.\d]+) *bits *\( *([e\-\.\d]+) *\)/) then
|
|
967
|
+
@bit_score = sc[1]
|
|
968
|
+
@score = sc[2]
|
|
969
|
+
elsif sc.skip(/(Identities|Positives|Gaps) *\= (\d+) *\/ *(\d+) *\(([\.\d]+) *\% *\)/) then
|
|
970
|
+
alen = sc[3].to_i
|
|
971
|
+
@align_len = alen unless defined?(@align_len)
|
|
972
|
+
raise ScanError if alen != @align_len
|
|
973
|
+
case sc[1]
|
|
974
|
+
when 'Identities'
|
|
975
|
+
@identity = sc[2].to_i
|
|
976
|
+
@percent_identity = sc[4]
|
|
977
|
+
when 'Positives'
|
|
978
|
+
@positive = sc[2].to_i
|
|
979
|
+
@percent_positive = sc[4]
|
|
980
|
+
when 'Gaps'
|
|
981
|
+
@gaps = sc[2].to_i
|
|
982
|
+
@percent_gaps = sc[4]
|
|
983
|
+
else
|
|
984
|
+
raise ScanError
|
|
985
|
+
end
|
|
986
|
+
elsif sc.skip(/Strand *\= *(Plus|Minus) *\/ *(Plus|Minus)/) then
|
|
987
|
+
@query_strand = sc[1]
|
|
988
|
+
@hit_strand = sc[2]
|
|
989
|
+
if sc[1] == sc[2] then
|
|
990
|
+
@query_frame = 1
|
|
991
|
+
@hit_frame = 1
|
|
992
|
+
elsif sc[1] == 'Plus' then # Plus/Minus
|
|
993
|
+
# complement sequence against xml(-m 7)
|
|
994
|
+
# In xml(-m 8), -1=>Plus, 1=>Minus ???
|
|
995
|
+
#@query_frame = -1
|
|
996
|
+
#@hit_frame = 1
|
|
997
|
+
@query_frame = 1
|
|
998
|
+
@hit_frame = -1
|
|
999
|
+
else # Minus/Plus
|
|
1000
|
+
@query_frame = -1
|
|
1001
|
+
@hit_frame = 1
|
|
1002
|
+
end
|
|
1003
|
+
elsif sc.skip(/Frame *\= *([\-\+]\d+)( *\/ *([\-\+]\d+))?/) then
|
|
1004
|
+
@query_frame = sc[1].to_i
|
|
1005
|
+
if sc[2] then
|
|
1006
|
+
@hit_frame = sc[3].to_i
|
|
1007
|
+
end
|
|
1008
|
+
elsif sc.skip(/Score *\= *([e\-\.\d]+) +\(([e\-\.\d]+) *bits *\)/) then
|
|
1009
|
+
#WU-BLAST
|
|
1010
|
+
@score = sc[1]
|
|
1011
|
+
@bit_score = sc[2]
|
|
1012
|
+
elsif sc.skip(/P *\= * ([e\-\.\d]+)/) then
|
|
1013
|
+
#WU-BLAST
|
|
1014
|
+
@p_sum_n = nil
|
|
1015
|
+
@pvalue = sc[1]
|
|
1016
|
+
elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\-\.\d]+)/) then
|
|
1017
|
+
#WU-BLAST
|
|
1018
|
+
@p_sum_n = sc[1].to_i
|
|
1019
|
+
@pvalue = sc[2]
|
|
1020
|
+
else
|
|
1021
|
+
raise ScanError
|
|
1022
|
+
end
|
|
1023
|
+
sc.skip(/\s*\,?\s*/)
|
|
1024
|
+
end
|
|
1025
|
+
@parse_score = true
|
|
1026
|
+
end
|
|
1027
|
+
end
|
|
1028
|
+
private :parse_score
|
|
1029
|
+
|
|
1030
|
+
# Defines attributes which call parse_score before accessing.
|
|
1031
|
+
def self.method_after_parse_score(*names)
|
|
1032
|
+
names.each do |x|
|
|
1033
|
+
module_eval("def #{x}; parse_score; @#{x}; end")
|
|
1034
|
+
end
|
|
1035
|
+
end
|
|
1036
|
+
private_class_method :method_after_parse_score
|
|
1037
|
+
|
|
1038
|
+
# bit score
|
|
1039
|
+
attr_reader :bit_score if false #dummy
|
|
1040
|
+
method_after_parse_score :bit_score
|
|
1041
|
+
# score
|
|
1042
|
+
attr_reader :score if false #dummy
|
|
1043
|
+
method_after_parse_score :score
|
|
1044
|
+
|
|
1045
|
+
# e-value
|
|
1046
|
+
attr_reader :evalue if false #dummy
|
|
1047
|
+
method_after_parse_score :evalue
|
|
1048
|
+
|
|
1049
|
+
# frame of the query
|
|
1050
|
+
attr_reader :query_frame if false #dummy
|
|
1051
|
+
method_after_parse_score :query_frame
|
|
1052
|
+
# frame of the hit
|
|
1053
|
+
attr_reader :hit_frame if false #dummy
|
|
1054
|
+
method_after_parse_score :hit_frame
|
|
1055
|
+
|
|
1056
|
+
# Identity (number of identical nucleotides or amino acids)
|
|
1057
|
+
attr_reader :identity if false #dummy
|
|
1058
|
+
method_after_parse_score :identity
|
|
1059
|
+
# percent of identical nucleotides or amino acids
|
|
1060
|
+
attr_reader :percent_identity if false #dummy
|
|
1061
|
+
method_after_parse_score :percent_identity
|
|
1062
|
+
|
|
1063
|
+
# Positives (number of positive hit amino acids or nucleotides)
|
|
1064
|
+
attr_reader :positive if false #dummy
|
|
1065
|
+
method_after_parse_score :positive
|
|
1066
|
+
# percent of positive hit amino acids or nucleotides
|
|
1067
|
+
attr_reader :percent_positive if false #dummy
|
|
1068
|
+
method_after_parse_score :percent_positive
|
|
1069
|
+
|
|
1070
|
+
# Gaps (number of gaps)
|
|
1071
|
+
attr_reader :gaps if false #dummy
|
|
1072
|
+
method_after_parse_score :gaps
|
|
1073
|
+
# percent of gaps
|
|
1074
|
+
attr_reader :percent_gaps if false #dummy
|
|
1075
|
+
method_after_parse_score :percent_gaps
|
|
1076
|
+
|
|
1077
|
+
# aligned length
|
|
1078
|
+
attr_reader :align_len if false #dummy
|
|
1079
|
+
method_after_parse_score :align_len
|
|
1080
|
+
|
|
1081
|
+
# strand of the query ("Plus" or "Minus" or nil)
|
|
1082
|
+
attr_reader :query_strand if false #dummy
|
|
1083
|
+
method_after_parse_score :query_strand
|
|
1084
|
+
|
|
1085
|
+
# strand of the hit ("Plus" or "Minus" or nil)
|
|
1086
|
+
attr_reader :hit_strand if false #dummy
|
|
1087
|
+
method_after_parse_score :hit_strand
|
|
1088
|
+
|
|
1089
|
+
# Parses alignments.
|
|
1090
|
+
def parse_alignment
|
|
1091
|
+
unless defined?(@parse_alignment)
|
|
1092
|
+
qpos1 = nil
|
|
1093
|
+
qpos2 = nil
|
|
1094
|
+
spos1 = nil
|
|
1095
|
+
spos2 = nil
|
|
1096
|
+
qseq = []
|
|
1097
|
+
sseq = []
|
|
1098
|
+
mseq = []
|
|
1099
|
+
pos_st = nil
|
|
1100
|
+
len_seq = 0
|
|
1101
|
+
nextline = :q
|
|
1102
|
+
@f0alignment.each do |x|
|
|
1103
|
+
sc = StringScanner.new(x)
|
|
1104
|
+
while sc.rest?
|
|
1105
|
+
#p pos_st, len_seq
|
|
1106
|
+
#p nextline.to_s
|
|
1107
|
+
if r = sc.skip(/(Query|Sbjct)\: *(\d+) */) then
|
|
1108
|
+
pos_st = r
|
|
1109
|
+
qs = sc[1]
|
|
1110
|
+
pos1 = sc[2]
|
|
1111
|
+
len_seq = sc.skip(/[^ ]*/)
|
|
1112
|
+
seq = sc[0]
|
|
1113
|
+
sc.skip(/ *(\d+) *\n/)
|
|
1114
|
+
pos2 = sc[1]
|
|
1115
|
+
if qs == 'Query' then
|
|
1116
|
+
raise ScanError unless nextline == :q
|
|
1117
|
+
qpos1 = pos1.to_i unless qpos1
|
|
1118
|
+
qpos2 = pos2.to_i
|
|
1119
|
+
qseq << seq
|
|
1120
|
+
nextline = :m
|
|
1121
|
+
elsif qs == 'Sbjct' then
|
|
1122
|
+
if nextline == :m then
|
|
1123
|
+
mseq << (' ' * len_seq)
|
|
1124
|
+
end
|
|
1125
|
+
spos1 = pos1.to_i unless spos1
|
|
1126
|
+
spos2 = pos2.to_i
|
|
1127
|
+
sseq << seq
|
|
1128
|
+
nextline = :q
|
|
1129
|
+
else
|
|
1130
|
+
raise ScanError
|
|
1131
|
+
end
|
|
1132
|
+
elsif r = sc.scan(/ {6}.+/) then
|
|
1133
|
+
raise ScanError unless nextline == :m
|
|
1134
|
+
mseq << r[pos_st, len_seq]
|
|
1135
|
+
sc.skip(/\n/)
|
|
1136
|
+
nextline = :s
|
|
1137
|
+
elsif r = sc.skip(/pattern +\d+.+/) then
|
|
1138
|
+
# PHI-BLAST
|
|
1139
|
+
# do nothing
|
|
1140
|
+
sc.skip(/\n/)
|
|
1141
|
+
else
|
|
1142
|
+
raise ScanError
|
|
1143
|
+
end
|
|
1144
|
+
end #while
|
|
1145
|
+
end #each
|
|
1146
|
+
#p qseq, sseq, mseq
|
|
1147
|
+
@qseq = qseq.join('')
|
|
1148
|
+
@hseq = sseq.join('')
|
|
1149
|
+
@midline = mseq.join('')
|
|
1150
|
+
@query_from = qpos1
|
|
1151
|
+
@query_to = qpos2
|
|
1152
|
+
@hit_from = spos1
|
|
1153
|
+
@hit_to = spos2
|
|
1154
|
+
@parse_alignment = true
|
|
1155
|
+
end #unless
|
|
1156
|
+
end #def
|
|
1157
|
+
private :parse_alignment
|
|
1158
|
+
|
|
1159
|
+
# Defines attributes which call parse_alignment before accessing.
|
|
1160
|
+
def self.method_after_parse_alignment(*names)
|
|
1161
|
+
names.each do |x|
|
|
1162
|
+
module_eval("def #{x}; parse_alignment; @#{x}; end")
|
|
1163
|
+
end
|
|
1164
|
+
end
|
|
1165
|
+
private_class_method :method_after_parse_alignment
|
|
1166
|
+
|
|
1167
|
+
# query sequence (with gaps) of the alignment of the hsp
|
|
1168
|
+
attr_reader :qseq if false #dummy
|
|
1169
|
+
method_after_parse_alignment :qseq
|
|
1170
|
+
# hit sequence (with gaps) of the alignment of the hsp
|
|
1171
|
+
attr_reader :hseq if false #dummy
|
|
1172
|
+
method_after_parse_alignment :hseq
|
|
1173
|
+
|
|
1174
|
+
# middle line of the alignment of the hsp
|
|
1175
|
+
attr_reader :midline if false #dummy
|
|
1176
|
+
method_after_parse_alignment :midline
|
|
1177
|
+
|
|
1178
|
+
# start position of the query (the first position is 1)
|
|
1179
|
+
attr_reader :query_from if false #dummy
|
|
1180
|
+
method_after_parse_alignment :query_from
|
|
1181
|
+
|
|
1182
|
+
# end position of the query (including its position)
|
|
1183
|
+
attr_reader :query_to
|
|
1184
|
+
method_after_parse_alignment :query_to
|
|
1185
|
+
|
|
1186
|
+
# start position of the hit (the first position is 1)
|
|
1187
|
+
attr_reader :hit_from if false #dummy
|
|
1188
|
+
method_after_parse_alignment :hit_from
|
|
1189
|
+
|
|
1190
|
+
# end position of the hit (including its position)
|
|
1191
|
+
attr_reader :hit_to if false #dummy
|
|
1192
|
+
method_after_parse_alignment :hit_to
|
|
1193
|
+
|
|
1194
|
+
end #class HSP
|
|
1195
|
+
|
|
1196
|
+
end #class Report
|
|
1197
|
+
|
|
1198
|
+
# NCBI BLAST default (-m 0 option) output parser for TBLAST.
|
|
1199
|
+
# All methods are equal to Bio::Blast::Default::Report.
|
|
1200
|
+
# Only DELIMITER (and RS) is different.
|
|
1201
|
+
class Report_TBlast < Report
|
|
1202
|
+
# Delimter of each entry for TBLAST. Bio::FlatFile uses it.
|
|
1203
|
+
DELIMITER = RS = "\nTBLAST"
|
|
1204
|
+
end #class Report_TBlast
|
|
1205
|
+
|
|
1206
|
+
end #module Default
|
|
1207
|
+
end #class Blast
|
|
1208
|
+
end #module Bio
|
|
1209
|
+
|
|
1210
|
+
######################################################################
|
|
1211
|
+
|
|
1212
|
+
if __FILE__ == $0
|
|
1213
|
+
|
|
1214
|
+
Bio::FlatFile.open(Bio::Blast::Default::Report, ARGF) do |ff|
|
|
1215
|
+
ff.each do |rep|
|
|
1216
|
+
|
|
1217
|
+
print "# === Bio::Blast::Default::Report\n"
|
|
1218
|
+
puts
|
|
1219
|
+
print " rep.program #=> "; p rep.program
|
|
1220
|
+
print " rep.version #=> "; p rep.version
|
|
1221
|
+
print " rep.reference #=> "; p rep.reference
|
|
1222
|
+
print " rep.db #=> "; p rep.db
|
|
1223
|
+
#print " rep.query_id #=> "; p rep.query_id
|
|
1224
|
+
print " rep.query_def #=> "; p rep.query_def
|
|
1225
|
+
print " rep.query_len #=> "; p rep.query_len
|
|
1226
|
+
#puts
|
|
1227
|
+
print " rep.version_number #=> "; p rep.version_number
|
|
1228
|
+
print " rep.version_date #=> "; p rep.version_date
|
|
1229
|
+
puts
|
|
1230
|
+
|
|
1231
|
+
print "# === Parameters\n"
|
|
1232
|
+
#puts
|
|
1233
|
+
#print " rep.parameters #=> "; p rep.parameters
|
|
1234
|
+
puts
|
|
1235
|
+
print " rep.matrix #=> "; p rep.matrix
|
|
1236
|
+
print " rep.expect #=> "; p rep.expect
|
|
1237
|
+
#print " rep.inclusion #=> "; p rep.inclusion
|
|
1238
|
+
print " rep.sc_match #=> "; p rep.sc_match
|
|
1239
|
+
print " rep.sc_mismatch #=> "; p rep.sc_mismatch
|
|
1240
|
+
print " rep.gap_open #=> "; p rep.gap_open
|
|
1241
|
+
print " rep.gap_extend #=> "; p rep.gap_extend
|
|
1242
|
+
#print " rep.filter #=> "; p rep.filter
|
|
1243
|
+
print " rep.pattern #=> "; p rep.pattern
|
|
1244
|
+
#print " rep.entrez_query #=> "; p rep.entrez_query
|
|
1245
|
+
#puts
|
|
1246
|
+
print " rep.pattern_positions #=> "; p rep.pattern_positions
|
|
1247
|
+
puts
|
|
1248
|
+
|
|
1249
|
+
print "# === Statistics (last iteration's)\n"
|
|
1250
|
+
#puts
|
|
1251
|
+
#print " rep.statistics #=> "; p rep.statistics
|
|
1252
|
+
puts
|
|
1253
|
+
print " rep.db_num #=> "; p rep.db_num
|
|
1254
|
+
print " rep.db_len #=> "; p rep.db_len
|
|
1255
|
+
#print " rep.hsp_len #=> "; p rep.hsp_len
|
|
1256
|
+
print " rep.eff_space #=> "; p rep.eff_space
|
|
1257
|
+
print " rep.kappa #=> "; p rep.kappa
|
|
1258
|
+
print " rep.lambda #=> "; p rep.lambda
|
|
1259
|
+
print " rep.entropy #=> "; p rep.entropy
|
|
1260
|
+
puts
|
|
1261
|
+
print " rep.num_hits #=> "; p rep.num_hits
|
|
1262
|
+
print " rep.gapped_kappa #=> "; p rep.gapped_kappa
|
|
1263
|
+
print " rep.gapped_lambda #=> "; p rep.gapped_lambda
|
|
1264
|
+
print " rep.gapped_entropy #=> "; p rep.gapped_entropy
|
|
1265
|
+
print " rep.posted_date #=> "; p rep.posted_date
|
|
1266
|
+
puts
|
|
1267
|
+
|
|
1268
|
+
print "# === Message (last iteration's)\n"
|
|
1269
|
+
puts
|
|
1270
|
+
print " rep.message #=> "; p rep.message
|
|
1271
|
+
#puts
|
|
1272
|
+
print " rep.converged? #=> "; p rep.converged?
|
|
1273
|
+
puts
|
|
1274
|
+
|
|
1275
|
+
print "# === Iterations\n"
|
|
1276
|
+
puts
|
|
1277
|
+
print " rep.itrerations.each do |itr|\n"
|
|
1278
|
+
puts
|
|
1279
|
+
|
|
1280
|
+
rep.iterations.each do |itr|
|
|
1281
|
+
|
|
1282
|
+
print "# --- Bio::Blast::Default::Report::Iteration\n"
|
|
1283
|
+
puts
|
|
1284
|
+
|
|
1285
|
+
print " itr.num #=> "; p itr.num
|
|
1286
|
+
#print " itr.statistics #=> "; p itr.statistics
|
|
1287
|
+
print " itr.message #=> "; p itr.message
|
|
1288
|
+
print " itr.hits.size #=> "; p itr.hits.size
|
|
1289
|
+
#puts
|
|
1290
|
+
print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size;
|
|
1291
|
+
print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size;
|
|
1292
|
+
if itr.hits_for_pattern then
|
|
1293
|
+
itr.hits_for_pattern.each_with_index do |hp, hpi|
|
|
1294
|
+
print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size;
|
|
1295
|
+
end
|
|
1296
|
+
end
|
|
1297
|
+
print " itr.converged? #=> "; p itr.converged?
|
|
1298
|
+
puts
|
|
1299
|
+
|
|
1300
|
+
print " itr.hits.each do |hit|\n"
|
|
1301
|
+
puts
|
|
1302
|
+
|
|
1303
|
+
itr.hits.each_with_index do |hit, i|
|
|
1304
|
+
|
|
1305
|
+
print "# --- Bio::Blast::Default::Report::Hit"
|
|
1306
|
+
print " ([#{i}])\n"
|
|
1307
|
+
puts
|
|
1308
|
+
|
|
1309
|
+
#print " hit.num #=> "; p hit.num
|
|
1310
|
+
#print " hit.hit_id #=> "; p hit.hit_id
|
|
1311
|
+
print " hit.len #=> "; p hit.len
|
|
1312
|
+
print " hit.definition #=> "; p hit.definition
|
|
1313
|
+
#print " hit.accession #=> "; p hit.accession
|
|
1314
|
+
#puts
|
|
1315
|
+
print " hit.found_again? #=> "; p hit.found_again?
|
|
1316
|
+
|
|
1317
|
+
print " --- compatible/shortcut ---\n"
|
|
1318
|
+
#print " hit.query_id #=> "; p hit.query_id
|
|
1319
|
+
#print " hit.query_def #=> "; p hit.query_def
|
|
1320
|
+
#print " hit.query_len #=> "; p hit.query_len
|
|
1321
|
+
#print " hit.target_id #=> "; p hit.target_id
|
|
1322
|
+
print " hit.target_def #=> "; p hit.target_def
|
|
1323
|
+
print " hit.target_len #=> "; p hit.target_len
|
|
1324
|
+
|
|
1325
|
+
print " --- first HSP's values (shortcut) ---\n"
|
|
1326
|
+
print " hit.evalue #=> "; p hit.evalue
|
|
1327
|
+
print " hit.bit_score #=> "; p hit.bit_score
|
|
1328
|
+
print " hit.identity #=> "; p hit.identity
|
|
1329
|
+
#print " hit.overlap #=> "; p hit.overlap
|
|
1330
|
+
|
|
1331
|
+
print " hit.query_seq #=> "; p hit.query_seq
|
|
1332
|
+
print " hit.midline #=> "; p hit.midline
|
|
1333
|
+
print " hit.target_seq #=> "; p hit.target_seq
|
|
1334
|
+
|
|
1335
|
+
print " hit.query_start #=> "; p hit.query_start
|
|
1336
|
+
print " hit.query_end #=> "; p hit.query_end
|
|
1337
|
+
print " hit.target_start #=> "; p hit.target_start
|
|
1338
|
+
print " hit.target_end #=> "; p hit.target_end
|
|
1339
|
+
print " hit.lap_at #=> "; p hit.lap_at
|
|
1340
|
+
print " --- first HSP's vaules (shortcut) ---\n"
|
|
1341
|
+
print " --- compatible/shortcut ---\n"
|
|
1342
|
+
|
|
1343
|
+
puts
|
|
1344
|
+
print " hit.hsps.size #=> "; p hit.hsps.size
|
|
1345
|
+
if hit.hsps.size == 0 then
|
|
1346
|
+
puts " (HSP not found: please see blastall's -b and -v options)"
|
|
1347
|
+
puts
|
|
1348
|
+
else
|
|
1349
|
+
|
|
1350
|
+
puts
|
|
1351
|
+
print " hit.hsps.each do |hsp|\n"
|
|
1352
|
+
puts
|
|
1353
|
+
|
|
1354
|
+
hit.hsps.each_with_index do |hsp, j|
|
|
1355
|
+
|
|
1356
|
+
print "# --- Bio::Blast::Default::Report::Hsp"
|
|
1357
|
+
print " ([#{j}])\n"
|
|
1358
|
+
puts
|
|
1359
|
+
#print " hsp.num #=> "; p hsp.num
|
|
1360
|
+
print " hsp.bit_score #=> "; p hsp.bit_score
|
|
1361
|
+
print " hsp.score #=> "; p hsp.score
|
|
1362
|
+
print " hsp.evalue #=> "; p hsp.evalue
|
|
1363
|
+
print " hsp.identity #=> "; p hsp.identity
|
|
1364
|
+
print " hsp.gaps #=> "; p hsp.gaps
|
|
1365
|
+
print " hsp.positive #=> "; p hsp.positive
|
|
1366
|
+
print " hsp.align_len #=> "; p hsp.align_len
|
|
1367
|
+
#print " hsp.density #=> "; p hsp.density
|
|
1368
|
+
|
|
1369
|
+
print " hsp.query_frame #=> "; p hsp.query_frame
|
|
1370
|
+
print " hsp.query_from #=> "; p hsp.query_from
|
|
1371
|
+
print " hsp.query_to #=> "; p hsp.query_to
|
|
1372
|
+
|
|
1373
|
+
print " hsp.hit_frame #=> "; p hsp.hit_frame
|
|
1374
|
+
print " hsp.hit_from #=> "; p hsp.hit_from
|
|
1375
|
+
print " hsp.hit_to #=> "; p hsp.hit_to
|
|
1376
|
+
|
|
1377
|
+
#print " hsp.pattern_from#=> "; p hsp.pattern_from
|
|
1378
|
+
#print " hsp.pattern_to #=> "; p hsp.pattern_to
|
|
1379
|
+
|
|
1380
|
+
print " hsp.qseq #=> "; p hsp.qseq
|
|
1381
|
+
print " hsp.midline #=> "; p hsp.midline
|
|
1382
|
+
print " hsp.hseq #=> "; p hsp.hseq
|
|
1383
|
+
puts
|
|
1384
|
+
print " hsp.percent_identity #=> "; p hsp.percent_identity
|
|
1385
|
+
#print " hsp.mismatch_count #=> "; p hsp.mismatch_count
|
|
1386
|
+
#
|
|
1387
|
+
print " hsp.query_strand #=> "; p hsp.query_strand
|
|
1388
|
+
print " hsp.hit_strand #=> "; p hsp.hit_strand
|
|
1389
|
+
print " hsp.percent_positive #=> "; p hsp.percent_positive
|
|
1390
|
+
print " hsp.percent_gaps #=> "; p hsp.percent_gaps
|
|
1391
|
+
puts
|
|
1392
|
+
|
|
1393
|
+
end #each
|
|
1394
|
+
end #if hit.hsps.size == 0
|
|
1395
|
+
end
|
|
1396
|
+
end
|
|
1397
|
+
end #ff.each
|
|
1398
|
+
end #FlatFile.open
|
|
1399
|
+
|
|
1400
|
+
end #if __FILE__ == $0
|
|
1401
|
+
|
|
1402
|
+
######################################################################
|