bio 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
@@ -0,0 +1,350 @@
|
|
1
|
+
#
|
2
|
+
# = bio/appl/bl2seq/report.rb - bl2seq (BLAST 2 sequences) parser
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2005 GOTO Naohisa <ng@bioruby.org>
|
5
|
+
# License:: LGPL
|
6
|
+
#
|
7
|
+
#--
|
8
|
+
# This library is free software; you can redistribute it and/or
|
9
|
+
# modify it under the terms of the GNU Lesser General Public
|
10
|
+
# License as published by the Free Software Foundation; either
|
11
|
+
# version 2 of the License, or (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This library is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
16
|
+
# Lesser General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU Lesser General Public
|
19
|
+
# License along with this library; if not, write to the Free Software
|
20
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
21
|
+
#++
|
22
|
+
#
|
23
|
+
# $Id: report.rb,v 1.6 2005/12/18 15:58:39 k Exp $
|
24
|
+
#
|
25
|
+
# Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser.
|
26
|
+
#
|
27
|
+
# = Acknowledgements
|
28
|
+
#
|
29
|
+
# Thanks to Tomoaki NISHIYAMA <tomoakin __at__ kenroku.kanazawa-u.ac.jp>
|
30
|
+
# for providing bl2seq parser patches based on
|
31
|
+
# lib/bio/appl/blast/format0.rb.
|
32
|
+
#
|
33
|
+
|
34
|
+
require 'bio/appl/blast/format0'
|
35
|
+
|
36
|
+
module Bio
|
37
|
+
class Blast
|
38
|
+
|
39
|
+
class Bl2seq
|
40
|
+
|
41
|
+
# Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser.
|
42
|
+
# It inherits Bio::Blast::Default::Report.
|
43
|
+
# Most of its methods are the same as Bio::Blast::Default::Report,
|
44
|
+
# but it lacks many methods.
|
45
|
+
class Report < Bio::Blast::Default::Report
|
46
|
+
|
47
|
+
# Delimiter of each entry. Bio::FlatFile uses it.
|
48
|
+
# In Bio::Bl2seq::Report, it it nil (1 entry 1 file).
|
49
|
+
DELIMITER = RS = nil
|
50
|
+
|
51
|
+
undef format0_parse_header
|
52
|
+
undef program, version, version_number, version_date,
|
53
|
+
message, converged?, reference, db
|
54
|
+
|
55
|
+
# Splits headers.
|
56
|
+
def format0_split_headers(data)
|
57
|
+
@f0query = data.shift
|
58
|
+
end
|
59
|
+
private :format0_split_headers
|
60
|
+
|
61
|
+
# Splits the search results.
|
62
|
+
def format0_split_search(data)
|
63
|
+
iterations = []
|
64
|
+
while r = data[0] and /^\>/ =~ r
|
65
|
+
iterations << Iteration.new(data)
|
66
|
+
end
|
67
|
+
if iterations.size <= 0 then
|
68
|
+
iterations << Iteration.new(data)
|
69
|
+
end
|
70
|
+
iterations
|
71
|
+
end
|
72
|
+
private :format0_split_search
|
73
|
+
|
74
|
+
# Stores format0 database statistics.
|
75
|
+
# Internal use only. Users must not use the class.
|
76
|
+
class F0dbstat < Bio::Blast::Default::Report::F0dbstat #:nodoc:
|
77
|
+
# Returns number of sequences in database.
|
78
|
+
def db_num
|
79
|
+
unless defined?(@db_num)
|
80
|
+
parse_params
|
81
|
+
@db_num = @hash['Number of Sequences'].to_i
|
82
|
+
end
|
83
|
+
@db_num
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns number of letters in database.
|
87
|
+
def db_len
|
88
|
+
unless defined?(@db_len)
|
89
|
+
parse_params
|
90
|
+
@db_len = @hash['length of database'].to_i
|
91
|
+
end
|
92
|
+
@db_len
|
93
|
+
end
|
94
|
+
end #class F0dbstat
|
95
|
+
|
96
|
+
# Bio::Bl2seq::Report::Iteration stores information about
|
97
|
+
# a iteration.
|
98
|
+
# Normally, it may contain some Bio::Bl2seq::Report::Hit objects.
|
99
|
+
#
|
100
|
+
# Note that its main existance reason is to keep complatibility
|
101
|
+
# between Bio::Blast::Default::Report::* classes.
|
102
|
+
class Iteration < Bio::Blast::Default::Report::Iteration
|
103
|
+
# Creates a new Iteration object.
|
104
|
+
# It is designed to be called only internally from
|
105
|
+
# the Bio::Blast::Default::Report class.
|
106
|
+
# Users shall not use the method directly.
|
107
|
+
def initialize(data)
|
108
|
+
@f0stat = []
|
109
|
+
@f0dbstat = Bio::Blast::Default::Report::AlwaysNil.instance
|
110
|
+
@hits = []
|
111
|
+
@num = 1
|
112
|
+
while r = data[0] and /^\>/ =~ r
|
113
|
+
@hits << Hit.new(data)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Returns the hits of the iteration.
|
118
|
+
# It returns an array of Bio::Bl2seq::Report::Hit objects.
|
119
|
+
def hits; @hits; end
|
120
|
+
|
121
|
+
undef message, pattern_in_database,
|
122
|
+
pattern, pattern_positions, hits_found_again,
|
123
|
+
hits_newly_found, hits_for_pattern, parse_hitlist,
|
124
|
+
converged?
|
125
|
+
end #class Iteration
|
126
|
+
|
127
|
+
# Bio::Bl2seq::Report::Hit contains information about a hit.
|
128
|
+
# It may contain some Bio::Blast::Default::Report::HSP objects.
|
129
|
+
# All methods are the same as Bio::Blast::Default::Report::Hit class.
|
130
|
+
# Please refer to Bio::Blast::Default::Report::Hit.
|
131
|
+
class Hit < Bio::Blast::Default::Report::Hit
|
132
|
+
end #class Hit
|
133
|
+
|
134
|
+
# Bio::Bl2seq::Report::HSP holds information about the hsp
|
135
|
+
# (high-scoring segment pair).
|
136
|
+
# NOTE that the HSP class below is NOT used because
|
137
|
+
# Ruby's constants namespace are normally statically determined
|
138
|
+
# and HSP object is created in Bio::Blast::Default::Report::Hit class.
|
139
|
+
# Please refer to Bio::Blast::Default::Report::HSP.
|
140
|
+
class HSP < Bio::Blast::Default::Report::HSP
|
141
|
+
end #class HSP
|
142
|
+
|
143
|
+
end #class Report
|
144
|
+
end #class Bl2seq
|
145
|
+
|
146
|
+
end #class Blast
|
147
|
+
end #module Bio
|
148
|
+
|
149
|
+
######################################################################
|
150
|
+
|
151
|
+
if __FILE__ == $0
|
152
|
+
|
153
|
+
Bio::FlatFile.open(Bio::Blast::Bl2seq::Report, ARGF) do |ff|
|
154
|
+
ff.each do |rep|
|
155
|
+
|
156
|
+
print "# === Bio::Blast::Bl2seq::Report\n"
|
157
|
+
puts
|
158
|
+
#@#print " rep.program #=> "; p rep.program
|
159
|
+
#@#print " rep.version #=> "; p rep.version
|
160
|
+
#@#print " rep.reference #=> "; p rep.reference
|
161
|
+
#@#print " rep.db #=> "; p rep.db
|
162
|
+
#print " rep.query_id #=> "; p rep.query_id
|
163
|
+
print " rep.query_def #=> "; p rep.query_def
|
164
|
+
print " rep.query_len #=> "; p rep.query_len
|
165
|
+
#puts
|
166
|
+
#@#print " rep.version_number #=> "; p rep.version_number
|
167
|
+
#@#print " rep.version_date #=> "; p rep.version_date
|
168
|
+
puts
|
169
|
+
|
170
|
+
print "# === Parameters\n"
|
171
|
+
#puts
|
172
|
+
#print " rep.parameters #=> "; p rep.parameters
|
173
|
+
puts
|
174
|
+
print " rep.matrix #=> "; p rep.matrix
|
175
|
+
print " rep.expect #=> "; p rep.expect
|
176
|
+
#print " rep.inclusion #=> "; p rep.inclusion
|
177
|
+
print " rep.sc_match #=> "; p rep.sc_match
|
178
|
+
print " rep.sc_mismatch #=> "; p rep.sc_mismatch
|
179
|
+
print " rep.gap_open #=> "; p rep.gap_open
|
180
|
+
print " rep.gap_extend #=> "; p rep.gap_extend
|
181
|
+
#print " rep.filter #=> "; p rep.filter
|
182
|
+
#@#print " rep.pattern #=> "; p rep.pattern
|
183
|
+
#print " rep.entrez_query #=> "; p rep.entrez_query
|
184
|
+
#puts
|
185
|
+
#@#print " rep.pattern_positions #=> "; p rep.pattern_positions
|
186
|
+
puts
|
187
|
+
|
188
|
+
print "# === Statistics (last iteration's)\n"
|
189
|
+
#puts
|
190
|
+
#print " rep.statistics #=> "; p rep.statistics
|
191
|
+
puts
|
192
|
+
print " rep.db_num #=> "; p rep.db_num
|
193
|
+
print " rep.db_len #=> "; p rep.db_len
|
194
|
+
#print " rep.hsp_len #=> "; p rep.hsp_len
|
195
|
+
print " rep.eff_space #=> "; p rep.eff_space
|
196
|
+
print " rep.kappa #=> "; p rep.kappa
|
197
|
+
print " rep.lambda #=> "; p rep.lambda
|
198
|
+
print " rep.entropy #=> "; p rep.entropy
|
199
|
+
puts
|
200
|
+
print " rep.num_hits #=> "; p rep.num_hits
|
201
|
+
print " rep.gapped_kappa #=> "; p rep.gapped_kappa
|
202
|
+
print " rep.gapped_lambda #=> "; p rep.gapped_lambda
|
203
|
+
print " rep.gapped_entropy #=> "; p rep.gapped_entropy
|
204
|
+
print " rep.posted_date #=> "; p rep.posted_date
|
205
|
+
puts
|
206
|
+
|
207
|
+
#@#print "# === Message (last iteration's)\n"
|
208
|
+
#@#puts
|
209
|
+
#@#print " rep.message #=> "; p rep.message
|
210
|
+
#puts
|
211
|
+
#@#print " rep.converged? #=> "; p rep.converged?
|
212
|
+
#@#puts
|
213
|
+
|
214
|
+
print "# === Iterations\n"
|
215
|
+
puts
|
216
|
+
print " rep.itrerations.each do |itr|\n"
|
217
|
+
puts
|
218
|
+
|
219
|
+
rep.iterations.each do |itr|
|
220
|
+
|
221
|
+
print "# --- Bio::Blast::Bl2seq::Report::Iteration\n"
|
222
|
+
puts
|
223
|
+
|
224
|
+
print " itr.num #=> "; p itr.num
|
225
|
+
#print " itr.statistics #=> "; p itr.statistics
|
226
|
+
#@#print " itr.message #=> "; p itr.message
|
227
|
+
print " itr.hits.size #=> "; p itr.hits.size
|
228
|
+
#puts
|
229
|
+
#@#print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size;
|
230
|
+
#@#print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size;
|
231
|
+
#@#if itr.hits_for_pattern then
|
232
|
+
#@#itr.hits_for_pattern.each_with_index do |hp, hpi|
|
233
|
+
#@#print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size;
|
234
|
+
#@#end
|
235
|
+
#@#end
|
236
|
+
#@#print " itr.converged? #=> "; p itr.converged?
|
237
|
+
puts
|
238
|
+
|
239
|
+
print " itr.hits.each do |hit|\n"
|
240
|
+
puts
|
241
|
+
|
242
|
+
itr.hits.each_with_index do |hit, i|
|
243
|
+
|
244
|
+
print "# --- Bio::Blast::Bl2seq::Default::Report::Hit"
|
245
|
+
print " ([#{i}])\n"
|
246
|
+
puts
|
247
|
+
|
248
|
+
#print " hit.num #=> "; p hit.num
|
249
|
+
#print " hit.hit_id #=> "; p hit.hit_id
|
250
|
+
print " hit.len #=> "; p hit.len
|
251
|
+
print " hit.definition #=> "; p hit.definition
|
252
|
+
#print " hit.accession #=> "; p hit.accession
|
253
|
+
#puts
|
254
|
+
print " hit.found_again? #=> "; p hit.found_again?
|
255
|
+
|
256
|
+
print " --- compatible/shortcut ---\n"
|
257
|
+
#print " hit.query_id #=> "; p hit.query_id
|
258
|
+
#print " hit.query_def #=> "; p hit.query_def
|
259
|
+
#print " hit.query_len #=> "; p hit.query_len
|
260
|
+
#print " hit.target_id #=> "; p hit.target_id
|
261
|
+
print " hit.target_def #=> "; p hit.target_def
|
262
|
+
print " hit.target_len #=> "; p hit.target_len
|
263
|
+
|
264
|
+
print " --- first HSP's values (shortcut) ---\n"
|
265
|
+
print " hit.evalue #=> "; p hit.evalue
|
266
|
+
print " hit.bit_score #=> "; p hit.bit_score
|
267
|
+
print " hit.identity #=> "; p hit.identity
|
268
|
+
#print " hit.overlap #=> "; p hit.overlap
|
269
|
+
|
270
|
+
print " hit.query_seq #=> "; p hit.query_seq
|
271
|
+
print " hit.midline #=> "; p hit.midline
|
272
|
+
print " hit.target_seq #=> "; p hit.target_seq
|
273
|
+
|
274
|
+
print " hit.query_start #=> "; p hit.query_start
|
275
|
+
print " hit.query_end #=> "; p hit.query_end
|
276
|
+
print " hit.target_start #=> "; p hit.target_start
|
277
|
+
print " hit.target_end #=> "; p hit.target_end
|
278
|
+
print " hit.lap_at #=> "; p hit.lap_at
|
279
|
+
print " --- first HSP's vaules (shortcut) ---\n"
|
280
|
+
print " --- compatible/shortcut ---\n"
|
281
|
+
|
282
|
+
puts
|
283
|
+
print " hit.hsps.size #=> "; p hit.hsps.size
|
284
|
+
if hit.hsps.size == 0 then
|
285
|
+
puts " (HSP not found: please see blastall's -b and -v options)"
|
286
|
+
puts
|
287
|
+
else
|
288
|
+
|
289
|
+
puts
|
290
|
+
print " hit.hsps.each do |hsp|\n"
|
291
|
+
puts
|
292
|
+
|
293
|
+
hit.hsps.each_with_index do |hsp, j|
|
294
|
+
|
295
|
+
print "# --- Bio::Blast::Default::Report::HSP (Bio::Blast::Bl2seq::Report::HSP)"
|
296
|
+
print " ([#{j}])\n"
|
297
|
+
puts
|
298
|
+
#print " hsp.num #=> "; p hsp.num
|
299
|
+
print " hsp.bit_score #=> "; p hsp.bit_score
|
300
|
+
print " hsp.score #=> "; p hsp.score
|
301
|
+
print " hsp.evalue #=> "; p hsp.evalue
|
302
|
+
print " hsp.identity #=> "; p hsp.identity
|
303
|
+
print " hsp.gaps #=> "; p hsp.gaps
|
304
|
+
print " hsp.positive #=> "; p hsp.positive
|
305
|
+
print " hsp.align_len #=> "; p hsp.align_len
|
306
|
+
#print " hsp.density #=> "; p hsp.density
|
307
|
+
|
308
|
+
print " hsp.query_frame #=> "; p hsp.query_frame
|
309
|
+
print " hsp.query_from #=> "; p hsp.query_from
|
310
|
+
print " hsp.query_to #=> "; p hsp.query_to
|
311
|
+
|
312
|
+
print " hsp.hit_frame #=> "; p hsp.hit_frame
|
313
|
+
print " hsp.hit_from #=> "; p hsp.hit_from
|
314
|
+
print " hsp.hit_to #=> "; p hsp.hit_to
|
315
|
+
|
316
|
+
#print " hsp.pattern_from#=> "; p hsp.pattern_from
|
317
|
+
#print " hsp.pattern_to #=> "; p hsp.pattern_to
|
318
|
+
|
319
|
+
print " hsp.qseq #=> "; p hsp.qseq
|
320
|
+
print " hsp.midline #=> "; p hsp.midline
|
321
|
+
print " hsp.hseq #=> "; p hsp.hseq
|
322
|
+
puts
|
323
|
+
print " hsp.percent_identity #=> "; p hsp.percent_identity
|
324
|
+
#print " hsp.mismatch_count #=> "; p hsp.mismatch_count
|
325
|
+
#
|
326
|
+
print " hsp.query_strand #=> "; p hsp.query_strand
|
327
|
+
print " hsp.hit_strand #=> "; p hsp.hit_strand
|
328
|
+
print " hsp.percent_positive #=> "; p hsp.percent_positive
|
329
|
+
print " hsp.percent_gaps #=> "; p hsp.percent_gaps
|
330
|
+
puts
|
331
|
+
|
332
|
+
end #each
|
333
|
+
end #if hit.hsps.size == 0
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end #ff.each
|
337
|
+
end #FlatFile.open
|
338
|
+
|
339
|
+
end #if __FILE__ == $0
|
340
|
+
|
341
|
+
######################################################################
|
342
|
+
|
343
|
+
=begin
|
344
|
+
|
345
|
+
= Bio::Blast::Bl2seq::Report
|
346
|
+
|
347
|
+
NCBI bl2seq (BLAST 2 sequences) output parser
|
348
|
+
|
349
|
+
=end
|
350
|
+
|
@@ -0,0 +1,269 @@
|
|
1
|
+
#
|
2
|
+
# bio/appl/blast.rb - BLAST wrapper
|
3
|
+
#
|
4
|
+
# Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
|
5
|
+
# Copyright (C) 2002,2003 KATAYAMA Toshiaki <k@bioruby.org>
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
# $Id: blast.rb,v 1.27 2005/12/18 17:28:55 nakao Exp $
|
22
|
+
#
|
23
|
+
|
24
|
+
require 'net/http'
|
25
|
+
require 'cgi' unless defined?(CGI)
|
26
|
+
require 'bio/command'
|
27
|
+
require 'shellwords'
|
28
|
+
|
29
|
+
module Bio
|
30
|
+
|
31
|
+
class Blast
|
32
|
+
|
33
|
+
autoload :Fastacmd, 'bio/io/fastacmd'
|
34
|
+
autoload :Report, 'bio/appl/blast/report'
|
35
|
+
autoload :Default, 'bio/appl/blast/format0'
|
36
|
+
autoload :WU, 'bio/appl/blast/wublast'
|
37
|
+
autoload :Bl2seq, 'bio/appl/bl2seq/report'
|
38
|
+
|
39
|
+
include Bio::Command::Tools
|
40
|
+
|
41
|
+
def initialize(program, db, opt = [], server = 'local')
|
42
|
+
@program = program
|
43
|
+
@db = db
|
44
|
+
@server = server
|
45
|
+
|
46
|
+
@blastall = 'blastall'
|
47
|
+
@matrix = nil
|
48
|
+
@filter = nil
|
49
|
+
|
50
|
+
@output = ''
|
51
|
+
@parser = nil
|
52
|
+
|
53
|
+
begin
|
54
|
+
a = opt.to_ary
|
55
|
+
rescue NameError #NoMethodError
|
56
|
+
# backward compatibility
|
57
|
+
a = Shellwords.shellwords(opt)
|
58
|
+
end
|
59
|
+
unless a.find { |x| /\A\-m/ =~ x.to_s } then
|
60
|
+
if defined?(XMLParser) or defined?(REXML)
|
61
|
+
@format = 7
|
62
|
+
else
|
63
|
+
@format = 8
|
64
|
+
end
|
65
|
+
end
|
66
|
+
@options = [ *a ]
|
67
|
+
end
|
68
|
+
attr_accessor :program, :db, :options, :server, :blastall, :matrix, :filter
|
69
|
+
attr_reader :output, :format
|
70
|
+
attr_writer :parser # to change :xmlparser, :rexml, :tab
|
71
|
+
|
72
|
+
def self.local(program, db, option = '')
|
73
|
+
self.new(program, db, option, 'local')
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.remote(program, db, option = '', server = 'genomenet')
|
77
|
+
self.new(program, db, option, server)
|
78
|
+
end
|
79
|
+
|
80
|
+
def query(query)
|
81
|
+
return self.send("exec_#{@server}", query.to_s)
|
82
|
+
end
|
83
|
+
|
84
|
+
def option
|
85
|
+
# backward compatibility
|
86
|
+
make_command_line(@options)
|
87
|
+
end
|
88
|
+
|
89
|
+
def option=(str)
|
90
|
+
# backward compatibility
|
91
|
+
@options = Shellwords.shellwords(str)
|
92
|
+
end
|
93
|
+
|
94
|
+
# the method Bio::Blast.report is moved from bio/appl/blast/report.rb.
|
95
|
+
# only for xml format
|
96
|
+
def self.reports(input, parser = nil)
|
97
|
+
ary = []
|
98
|
+
input.each("</BlastOutput>\n") do |xml|
|
99
|
+
xml.sub!(/[^<]*(<?)/, '\1') # skip before <?xml> tag
|
100
|
+
next if xml.empty? # skip trailing no hits
|
101
|
+
if block_given?
|
102
|
+
yield Report.new(xml, parser)
|
103
|
+
else
|
104
|
+
ary << Report.new(xml, parser)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
return ary
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
|
114
|
+
def parse_result(data)
|
115
|
+
Report.new(data, @parser)
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
def exec_local(query)
|
120
|
+
cmd = [ @blastall, '-p', @program, '-d', @db ]
|
121
|
+
cmd.concat([ '-M', @matrix ]) if @matrix
|
122
|
+
cmd.concat([ '-F', @filter ]) if @filter
|
123
|
+
cmd.concat([ '-m', @format.to_s ]) if @format
|
124
|
+
cmd.concat(@options) if @options
|
125
|
+
|
126
|
+
report = nil
|
127
|
+
|
128
|
+
@output = call_command_local(cmd, query)
|
129
|
+
report = parse_result(@output)
|
130
|
+
|
131
|
+
return report
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
def exec_genomenet(query)
|
136
|
+
host = "blast.genome.jp"
|
137
|
+
#path = "/sit-bin/nph-blast"
|
138
|
+
path = "/sit-bin/blast" #2005.08.12
|
139
|
+
|
140
|
+
matrix = @matrix ? @matrix : 'blosum62'
|
141
|
+
filter = @filter ? @filter : 'T'
|
142
|
+
|
143
|
+
opt = []
|
144
|
+
opt.concat([ '-m', @format.to_s ]) if @format
|
145
|
+
opt.concat(@options) if @options
|
146
|
+
|
147
|
+
form = {
|
148
|
+
'style' => 'raw',
|
149
|
+
'prog' => @program,
|
150
|
+
'dbname' => @db,
|
151
|
+
'sequence' => CGI.escape(query),
|
152
|
+
'other_param' => CGI.escape(make_command_line_unix(opt)),
|
153
|
+
'matrix' => matrix,
|
154
|
+
'filter' => filter,
|
155
|
+
'V_value' => 500, # default value for GenomeNet
|
156
|
+
'B_value' => 250, # default value for GenomeNet
|
157
|
+
'alignment_view' => 0,
|
158
|
+
}
|
159
|
+
|
160
|
+
data = []
|
161
|
+
|
162
|
+
form.each do |k, v|
|
163
|
+
data.push("#{k}=#{v}") if v
|
164
|
+
end
|
165
|
+
|
166
|
+
report = nil
|
167
|
+
|
168
|
+
begin
|
169
|
+
http = Net::HTTP.new(host)
|
170
|
+
http.open_timeout = 300
|
171
|
+
http.read_timeout = 600
|
172
|
+
result, = http.post(path, data.join('&'))
|
173
|
+
@output = result.body
|
174
|
+
# workaround 2005.08.12
|
175
|
+
if /\<A +HREF=\"(http\:\/\/blast\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
|
176
|
+
result, = http.get($2)
|
177
|
+
@output = result.body
|
178
|
+
txt = @output.to_s.split(/\<pre\>/)[1]
|
179
|
+
raise 'cannot understand response' unless txt
|
180
|
+
txt.sub!(/\<\/pre\>.*\z/m, '')
|
181
|
+
txt.sub!(/.*^ \-{20,}\s*/m, '')
|
182
|
+
@output = txt.gsub(/\<\;/, '<')
|
183
|
+
report = parse_result(@output)
|
184
|
+
else
|
185
|
+
raise 'cannot understand response'
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
return report
|
190
|
+
end
|
191
|
+
|
192
|
+
|
193
|
+
def exec_ncbi(query)
|
194
|
+
raise NotImplementedError
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
end
|
199
|
+
|
200
|
+
|
201
|
+
if __FILE__ == $0
|
202
|
+
begin
|
203
|
+
require 'pp'
|
204
|
+
alias p pp
|
205
|
+
rescue
|
206
|
+
end
|
207
|
+
|
208
|
+
# serv = Bio::Blast.local('blastn', 'hoge.nuc')
|
209
|
+
# serv = Bio::Blast.local('blastp', 'hoge.pep')
|
210
|
+
serv = Bio::Blast.remote('blastp', 'genes')
|
211
|
+
|
212
|
+
query = ARGF.read
|
213
|
+
p serv.query(query)
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
=begin
|
218
|
+
|
219
|
+
= Bio::Blast
|
220
|
+
|
221
|
+
--- Bio::Blast.new(program, db, option = '', server = 'local')
|
222
|
+
--- Bio::Blast.local(program, db, option = '')
|
223
|
+
--- Bio::Blast.remote(program, db, option = '', server = 'genomenet')
|
224
|
+
|
225
|
+
Returns a blast factory object (Bio::Blast).
|
226
|
+
|
227
|
+
For the develpper, you can add server 'hoge' by adding
|
228
|
+
exec_hoge(query) method.
|
229
|
+
|
230
|
+
--- Bio::Blast#query(query)
|
231
|
+
|
232
|
+
Execute blast search and returns Report object (Bio::Blast::Report).
|
233
|
+
|
234
|
+
--- Bio::Blast#output
|
235
|
+
|
236
|
+
Returns a String containing blast execution output in as is format.
|
237
|
+
|
238
|
+
--- Bio::Blast#program
|
239
|
+
--- Bio::Blast#db
|
240
|
+
--- Bio::Blast#options
|
241
|
+
--- Bio::Blast#server
|
242
|
+
--- Bio::Blast#blastall
|
243
|
+
--- Bio::Blast#filter
|
244
|
+
|
245
|
+
Accessors for the factory parameters.
|
246
|
+
|
247
|
+
--- Bio::Blast#option
|
248
|
+
--- Bio::Blast#option=(str)
|
249
|
+
|
250
|
+
Get/set options by string.
|
251
|
+
|
252
|
+
== Available databases for Blast.remote(@program, @db, option, 'genomenet')
|
253
|
+
|
254
|
+
# ----------+-------+---------------------------------------------------
|
255
|
+
# @program | query | @db (supported in GenomeNet)
|
256
|
+
# ----------+-------+---------------------------------------------------
|
257
|
+
# blastp | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd,
|
258
|
+
# ----------+-------+ pir, prf, pdbstr
|
259
|
+
# blastx | NA |
|
260
|
+
# ----------+-------+---------------------------------------------------
|
261
|
+
# blastn | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
|
262
|
+
# ----------+-------+ htgs, dbsts, embl-nonst, embnonst-upd, epd,
|
263
|
+
# tblastn | AA | genes-nt, genome, vgenes.nuc
|
264
|
+
# ----------+-------+---------------------------------------------------
|
265
|
+
|
266
|
+
See http://blast.genome.jp/ideas/ideas.html#blast for more details.
|
267
|
+
|
268
|
+
=end
|
269
|
+
|