bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/bl2seq/report.rb - bl2seq (BLAST 2 sequences) parser
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2005 GOTO Naohisa <ng@bioruby.org>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
#--
|
|
8
|
+
# This library is free software; you can redistribute it and/or
|
|
9
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
10
|
+
# License as published by the Free Software Foundation; either
|
|
11
|
+
# version 2 of the License, or (at your option) any later version.
|
|
12
|
+
#
|
|
13
|
+
# This library is distributed in the hope that it will be useful,
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
16
|
+
# Lesser General Public License for more details.
|
|
17
|
+
#
|
|
18
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
19
|
+
# License along with this library; if not, write to the Free Software
|
|
20
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
21
|
+
#++
|
|
22
|
+
#
|
|
23
|
+
# $Id: report.rb,v 1.6 2005/12/18 15:58:39 k Exp $
|
|
24
|
+
#
|
|
25
|
+
# Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser.
|
|
26
|
+
#
|
|
27
|
+
# = Acknowledgements
|
|
28
|
+
#
|
|
29
|
+
# Thanks to Tomoaki NISHIYAMA <tomoakin __at__ kenroku.kanazawa-u.ac.jp>
|
|
30
|
+
# for providing bl2seq parser patches based on
|
|
31
|
+
# lib/bio/appl/blast/format0.rb.
|
|
32
|
+
#
|
|
33
|
+
|
|
34
|
+
require 'bio/appl/blast/format0'
|
|
35
|
+
|
|
36
|
+
module Bio
|
|
37
|
+
class Blast
|
|
38
|
+
|
|
39
|
+
class Bl2seq
|
|
40
|
+
|
|
41
|
+
# Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser.
|
|
42
|
+
# It inherits Bio::Blast::Default::Report.
|
|
43
|
+
# Most of its methods are the same as Bio::Blast::Default::Report,
|
|
44
|
+
# but it lacks many methods.
|
|
45
|
+
class Report < Bio::Blast::Default::Report
|
|
46
|
+
|
|
47
|
+
# Delimiter of each entry. Bio::FlatFile uses it.
|
|
48
|
+
# In Bio::Bl2seq::Report, it it nil (1 entry 1 file).
|
|
49
|
+
DELIMITER = RS = nil
|
|
50
|
+
|
|
51
|
+
undef format0_parse_header
|
|
52
|
+
undef program, version, version_number, version_date,
|
|
53
|
+
message, converged?, reference, db
|
|
54
|
+
|
|
55
|
+
# Splits headers.
|
|
56
|
+
def format0_split_headers(data)
|
|
57
|
+
@f0query = data.shift
|
|
58
|
+
end
|
|
59
|
+
private :format0_split_headers
|
|
60
|
+
|
|
61
|
+
# Splits the search results.
|
|
62
|
+
def format0_split_search(data)
|
|
63
|
+
iterations = []
|
|
64
|
+
while r = data[0] and /^\>/ =~ r
|
|
65
|
+
iterations << Iteration.new(data)
|
|
66
|
+
end
|
|
67
|
+
if iterations.size <= 0 then
|
|
68
|
+
iterations << Iteration.new(data)
|
|
69
|
+
end
|
|
70
|
+
iterations
|
|
71
|
+
end
|
|
72
|
+
private :format0_split_search
|
|
73
|
+
|
|
74
|
+
# Stores format0 database statistics.
|
|
75
|
+
# Internal use only. Users must not use the class.
|
|
76
|
+
class F0dbstat < Bio::Blast::Default::Report::F0dbstat #:nodoc:
|
|
77
|
+
# Returns number of sequences in database.
|
|
78
|
+
def db_num
|
|
79
|
+
unless defined?(@db_num)
|
|
80
|
+
parse_params
|
|
81
|
+
@db_num = @hash['Number of Sequences'].to_i
|
|
82
|
+
end
|
|
83
|
+
@db_num
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Returns number of letters in database.
|
|
87
|
+
def db_len
|
|
88
|
+
unless defined?(@db_len)
|
|
89
|
+
parse_params
|
|
90
|
+
@db_len = @hash['length of database'].to_i
|
|
91
|
+
end
|
|
92
|
+
@db_len
|
|
93
|
+
end
|
|
94
|
+
end #class F0dbstat
|
|
95
|
+
|
|
96
|
+
# Bio::Bl2seq::Report::Iteration stores information about
|
|
97
|
+
# a iteration.
|
|
98
|
+
# Normally, it may contain some Bio::Bl2seq::Report::Hit objects.
|
|
99
|
+
#
|
|
100
|
+
# Note that its main existance reason is to keep complatibility
|
|
101
|
+
# between Bio::Blast::Default::Report::* classes.
|
|
102
|
+
class Iteration < Bio::Blast::Default::Report::Iteration
|
|
103
|
+
# Creates a new Iteration object.
|
|
104
|
+
# It is designed to be called only internally from
|
|
105
|
+
# the Bio::Blast::Default::Report class.
|
|
106
|
+
# Users shall not use the method directly.
|
|
107
|
+
def initialize(data)
|
|
108
|
+
@f0stat = []
|
|
109
|
+
@f0dbstat = Bio::Blast::Default::Report::AlwaysNil.instance
|
|
110
|
+
@hits = []
|
|
111
|
+
@num = 1
|
|
112
|
+
while r = data[0] and /^\>/ =~ r
|
|
113
|
+
@hits << Hit.new(data)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Returns the hits of the iteration.
|
|
118
|
+
# It returns an array of Bio::Bl2seq::Report::Hit objects.
|
|
119
|
+
def hits; @hits; end
|
|
120
|
+
|
|
121
|
+
undef message, pattern_in_database,
|
|
122
|
+
pattern, pattern_positions, hits_found_again,
|
|
123
|
+
hits_newly_found, hits_for_pattern, parse_hitlist,
|
|
124
|
+
converged?
|
|
125
|
+
end #class Iteration
|
|
126
|
+
|
|
127
|
+
# Bio::Bl2seq::Report::Hit contains information about a hit.
|
|
128
|
+
# It may contain some Bio::Blast::Default::Report::HSP objects.
|
|
129
|
+
# All methods are the same as Bio::Blast::Default::Report::Hit class.
|
|
130
|
+
# Please refer to Bio::Blast::Default::Report::Hit.
|
|
131
|
+
class Hit < Bio::Blast::Default::Report::Hit
|
|
132
|
+
end #class Hit
|
|
133
|
+
|
|
134
|
+
# Bio::Bl2seq::Report::HSP holds information about the hsp
|
|
135
|
+
# (high-scoring segment pair).
|
|
136
|
+
# NOTE that the HSP class below is NOT used because
|
|
137
|
+
# Ruby's constants namespace are normally statically determined
|
|
138
|
+
# and HSP object is created in Bio::Blast::Default::Report::Hit class.
|
|
139
|
+
# Please refer to Bio::Blast::Default::Report::HSP.
|
|
140
|
+
class HSP < Bio::Blast::Default::Report::HSP
|
|
141
|
+
end #class HSP
|
|
142
|
+
|
|
143
|
+
end #class Report
|
|
144
|
+
end #class Bl2seq
|
|
145
|
+
|
|
146
|
+
end #class Blast
|
|
147
|
+
end #module Bio
|
|
148
|
+
|
|
149
|
+
######################################################################
|
|
150
|
+
|
|
151
|
+
if __FILE__ == $0
|
|
152
|
+
|
|
153
|
+
Bio::FlatFile.open(Bio::Blast::Bl2seq::Report, ARGF) do |ff|
|
|
154
|
+
ff.each do |rep|
|
|
155
|
+
|
|
156
|
+
print "# === Bio::Blast::Bl2seq::Report\n"
|
|
157
|
+
puts
|
|
158
|
+
#@#print " rep.program #=> "; p rep.program
|
|
159
|
+
#@#print " rep.version #=> "; p rep.version
|
|
160
|
+
#@#print " rep.reference #=> "; p rep.reference
|
|
161
|
+
#@#print " rep.db #=> "; p rep.db
|
|
162
|
+
#print " rep.query_id #=> "; p rep.query_id
|
|
163
|
+
print " rep.query_def #=> "; p rep.query_def
|
|
164
|
+
print " rep.query_len #=> "; p rep.query_len
|
|
165
|
+
#puts
|
|
166
|
+
#@#print " rep.version_number #=> "; p rep.version_number
|
|
167
|
+
#@#print " rep.version_date #=> "; p rep.version_date
|
|
168
|
+
puts
|
|
169
|
+
|
|
170
|
+
print "# === Parameters\n"
|
|
171
|
+
#puts
|
|
172
|
+
#print " rep.parameters #=> "; p rep.parameters
|
|
173
|
+
puts
|
|
174
|
+
print " rep.matrix #=> "; p rep.matrix
|
|
175
|
+
print " rep.expect #=> "; p rep.expect
|
|
176
|
+
#print " rep.inclusion #=> "; p rep.inclusion
|
|
177
|
+
print " rep.sc_match #=> "; p rep.sc_match
|
|
178
|
+
print " rep.sc_mismatch #=> "; p rep.sc_mismatch
|
|
179
|
+
print " rep.gap_open #=> "; p rep.gap_open
|
|
180
|
+
print " rep.gap_extend #=> "; p rep.gap_extend
|
|
181
|
+
#print " rep.filter #=> "; p rep.filter
|
|
182
|
+
#@#print " rep.pattern #=> "; p rep.pattern
|
|
183
|
+
#print " rep.entrez_query #=> "; p rep.entrez_query
|
|
184
|
+
#puts
|
|
185
|
+
#@#print " rep.pattern_positions #=> "; p rep.pattern_positions
|
|
186
|
+
puts
|
|
187
|
+
|
|
188
|
+
print "# === Statistics (last iteration's)\n"
|
|
189
|
+
#puts
|
|
190
|
+
#print " rep.statistics #=> "; p rep.statistics
|
|
191
|
+
puts
|
|
192
|
+
print " rep.db_num #=> "; p rep.db_num
|
|
193
|
+
print " rep.db_len #=> "; p rep.db_len
|
|
194
|
+
#print " rep.hsp_len #=> "; p rep.hsp_len
|
|
195
|
+
print " rep.eff_space #=> "; p rep.eff_space
|
|
196
|
+
print " rep.kappa #=> "; p rep.kappa
|
|
197
|
+
print " rep.lambda #=> "; p rep.lambda
|
|
198
|
+
print " rep.entropy #=> "; p rep.entropy
|
|
199
|
+
puts
|
|
200
|
+
print " rep.num_hits #=> "; p rep.num_hits
|
|
201
|
+
print " rep.gapped_kappa #=> "; p rep.gapped_kappa
|
|
202
|
+
print " rep.gapped_lambda #=> "; p rep.gapped_lambda
|
|
203
|
+
print " rep.gapped_entropy #=> "; p rep.gapped_entropy
|
|
204
|
+
print " rep.posted_date #=> "; p rep.posted_date
|
|
205
|
+
puts
|
|
206
|
+
|
|
207
|
+
#@#print "# === Message (last iteration's)\n"
|
|
208
|
+
#@#puts
|
|
209
|
+
#@#print " rep.message #=> "; p rep.message
|
|
210
|
+
#puts
|
|
211
|
+
#@#print " rep.converged? #=> "; p rep.converged?
|
|
212
|
+
#@#puts
|
|
213
|
+
|
|
214
|
+
print "# === Iterations\n"
|
|
215
|
+
puts
|
|
216
|
+
print " rep.itrerations.each do |itr|\n"
|
|
217
|
+
puts
|
|
218
|
+
|
|
219
|
+
rep.iterations.each do |itr|
|
|
220
|
+
|
|
221
|
+
print "# --- Bio::Blast::Bl2seq::Report::Iteration\n"
|
|
222
|
+
puts
|
|
223
|
+
|
|
224
|
+
print " itr.num #=> "; p itr.num
|
|
225
|
+
#print " itr.statistics #=> "; p itr.statistics
|
|
226
|
+
#@#print " itr.message #=> "; p itr.message
|
|
227
|
+
print " itr.hits.size #=> "; p itr.hits.size
|
|
228
|
+
#puts
|
|
229
|
+
#@#print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size;
|
|
230
|
+
#@#print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size;
|
|
231
|
+
#@#if itr.hits_for_pattern then
|
|
232
|
+
#@#itr.hits_for_pattern.each_with_index do |hp, hpi|
|
|
233
|
+
#@#print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size;
|
|
234
|
+
#@#end
|
|
235
|
+
#@#end
|
|
236
|
+
#@#print " itr.converged? #=> "; p itr.converged?
|
|
237
|
+
puts
|
|
238
|
+
|
|
239
|
+
print " itr.hits.each do |hit|\n"
|
|
240
|
+
puts
|
|
241
|
+
|
|
242
|
+
itr.hits.each_with_index do |hit, i|
|
|
243
|
+
|
|
244
|
+
print "# --- Bio::Blast::Bl2seq::Default::Report::Hit"
|
|
245
|
+
print " ([#{i}])\n"
|
|
246
|
+
puts
|
|
247
|
+
|
|
248
|
+
#print " hit.num #=> "; p hit.num
|
|
249
|
+
#print " hit.hit_id #=> "; p hit.hit_id
|
|
250
|
+
print " hit.len #=> "; p hit.len
|
|
251
|
+
print " hit.definition #=> "; p hit.definition
|
|
252
|
+
#print " hit.accession #=> "; p hit.accession
|
|
253
|
+
#puts
|
|
254
|
+
print " hit.found_again? #=> "; p hit.found_again?
|
|
255
|
+
|
|
256
|
+
print " --- compatible/shortcut ---\n"
|
|
257
|
+
#print " hit.query_id #=> "; p hit.query_id
|
|
258
|
+
#print " hit.query_def #=> "; p hit.query_def
|
|
259
|
+
#print " hit.query_len #=> "; p hit.query_len
|
|
260
|
+
#print " hit.target_id #=> "; p hit.target_id
|
|
261
|
+
print " hit.target_def #=> "; p hit.target_def
|
|
262
|
+
print " hit.target_len #=> "; p hit.target_len
|
|
263
|
+
|
|
264
|
+
print " --- first HSP's values (shortcut) ---\n"
|
|
265
|
+
print " hit.evalue #=> "; p hit.evalue
|
|
266
|
+
print " hit.bit_score #=> "; p hit.bit_score
|
|
267
|
+
print " hit.identity #=> "; p hit.identity
|
|
268
|
+
#print " hit.overlap #=> "; p hit.overlap
|
|
269
|
+
|
|
270
|
+
print " hit.query_seq #=> "; p hit.query_seq
|
|
271
|
+
print " hit.midline #=> "; p hit.midline
|
|
272
|
+
print " hit.target_seq #=> "; p hit.target_seq
|
|
273
|
+
|
|
274
|
+
print " hit.query_start #=> "; p hit.query_start
|
|
275
|
+
print " hit.query_end #=> "; p hit.query_end
|
|
276
|
+
print " hit.target_start #=> "; p hit.target_start
|
|
277
|
+
print " hit.target_end #=> "; p hit.target_end
|
|
278
|
+
print " hit.lap_at #=> "; p hit.lap_at
|
|
279
|
+
print " --- first HSP's vaules (shortcut) ---\n"
|
|
280
|
+
print " --- compatible/shortcut ---\n"
|
|
281
|
+
|
|
282
|
+
puts
|
|
283
|
+
print " hit.hsps.size #=> "; p hit.hsps.size
|
|
284
|
+
if hit.hsps.size == 0 then
|
|
285
|
+
puts " (HSP not found: please see blastall's -b and -v options)"
|
|
286
|
+
puts
|
|
287
|
+
else
|
|
288
|
+
|
|
289
|
+
puts
|
|
290
|
+
print " hit.hsps.each do |hsp|\n"
|
|
291
|
+
puts
|
|
292
|
+
|
|
293
|
+
hit.hsps.each_with_index do |hsp, j|
|
|
294
|
+
|
|
295
|
+
print "# --- Bio::Blast::Default::Report::HSP (Bio::Blast::Bl2seq::Report::HSP)"
|
|
296
|
+
print " ([#{j}])\n"
|
|
297
|
+
puts
|
|
298
|
+
#print " hsp.num #=> "; p hsp.num
|
|
299
|
+
print " hsp.bit_score #=> "; p hsp.bit_score
|
|
300
|
+
print " hsp.score #=> "; p hsp.score
|
|
301
|
+
print " hsp.evalue #=> "; p hsp.evalue
|
|
302
|
+
print " hsp.identity #=> "; p hsp.identity
|
|
303
|
+
print " hsp.gaps #=> "; p hsp.gaps
|
|
304
|
+
print " hsp.positive #=> "; p hsp.positive
|
|
305
|
+
print " hsp.align_len #=> "; p hsp.align_len
|
|
306
|
+
#print " hsp.density #=> "; p hsp.density
|
|
307
|
+
|
|
308
|
+
print " hsp.query_frame #=> "; p hsp.query_frame
|
|
309
|
+
print " hsp.query_from #=> "; p hsp.query_from
|
|
310
|
+
print " hsp.query_to #=> "; p hsp.query_to
|
|
311
|
+
|
|
312
|
+
print " hsp.hit_frame #=> "; p hsp.hit_frame
|
|
313
|
+
print " hsp.hit_from #=> "; p hsp.hit_from
|
|
314
|
+
print " hsp.hit_to #=> "; p hsp.hit_to
|
|
315
|
+
|
|
316
|
+
#print " hsp.pattern_from#=> "; p hsp.pattern_from
|
|
317
|
+
#print " hsp.pattern_to #=> "; p hsp.pattern_to
|
|
318
|
+
|
|
319
|
+
print " hsp.qseq #=> "; p hsp.qseq
|
|
320
|
+
print " hsp.midline #=> "; p hsp.midline
|
|
321
|
+
print " hsp.hseq #=> "; p hsp.hseq
|
|
322
|
+
puts
|
|
323
|
+
print " hsp.percent_identity #=> "; p hsp.percent_identity
|
|
324
|
+
#print " hsp.mismatch_count #=> "; p hsp.mismatch_count
|
|
325
|
+
#
|
|
326
|
+
print " hsp.query_strand #=> "; p hsp.query_strand
|
|
327
|
+
print " hsp.hit_strand #=> "; p hsp.hit_strand
|
|
328
|
+
print " hsp.percent_positive #=> "; p hsp.percent_positive
|
|
329
|
+
print " hsp.percent_gaps #=> "; p hsp.percent_gaps
|
|
330
|
+
puts
|
|
331
|
+
|
|
332
|
+
end #each
|
|
333
|
+
end #if hit.hsps.size == 0
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
end #ff.each
|
|
337
|
+
end #FlatFile.open
|
|
338
|
+
|
|
339
|
+
end #if __FILE__ == $0
|
|
340
|
+
|
|
341
|
+
######################################################################
|
|
342
|
+
|
|
343
|
+
=begin
|
|
344
|
+
|
|
345
|
+
= Bio::Blast::Bl2seq::Report
|
|
346
|
+
|
|
347
|
+
NCBI bl2seq (BLAST 2 sequences) output parser
|
|
348
|
+
|
|
349
|
+
=end
|
|
350
|
+
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/appl/blast.rb - BLAST wrapper
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
|
|
5
|
+
# Copyright (C) 2002,2003 KATAYAMA Toshiaki <k@bioruby.org>
|
|
6
|
+
#
|
|
7
|
+
# This library is free software; you can redistribute it and/or
|
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
9
|
+
# License as published by the Free Software Foundation; either
|
|
10
|
+
# version 2 of the License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15
|
+
# Lesser General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
18
|
+
# License along with this library; if not, write to the Free Software
|
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
|
+
#
|
|
21
|
+
# $Id: blast.rb,v 1.27 2005/12/18 17:28:55 nakao Exp $
|
|
22
|
+
#
|
|
23
|
+
|
|
24
|
+
require 'net/http'
|
|
25
|
+
require 'cgi' unless defined?(CGI)
|
|
26
|
+
require 'bio/command'
|
|
27
|
+
require 'shellwords'
|
|
28
|
+
|
|
29
|
+
module Bio
|
|
30
|
+
|
|
31
|
+
class Blast
|
|
32
|
+
|
|
33
|
+
autoload :Fastacmd, 'bio/io/fastacmd'
|
|
34
|
+
autoload :Report, 'bio/appl/blast/report'
|
|
35
|
+
autoload :Default, 'bio/appl/blast/format0'
|
|
36
|
+
autoload :WU, 'bio/appl/blast/wublast'
|
|
37
|
+
autoload :Bl2seq, 'bio/appl/bl2seq/report'
|
|
38
|
+
|
|
39
|
+
include Bio::Command::Tools
|
|
40
|
+
|
|
41
|
+
def initialize(program, db, opt = [], server = 'local')
|
|
42
|
+
@program = program
|
|
43
|
+
@db = db
|
|
44
|
+
@server = server
|
|
45
|
+
|
|
46
|
+
@blastall = 'blastall'
|
|
47
|
+
@matrix = nil
|
|
48
|
+
@filter = nil
|
|
49
|
+
|
|
50
|
+
@output = ''
|
|
51
|
+
@parser = nil
|
|
52
|
+
|
|
53
|
+
begin
|
|
54
|
+
a = opt.to_ary
|
|
55
|
+
rescue NameError #NoMethodError
|
|
56
|
+
# backward compatibility
|
|
57
|
+
a = Shellwords.shellwords(opt)
|
|
58
|
+
end
|
|
59
|
+
unless a.find { |x| /\A\-m/ =~ x.to_s } then
|
|
60
|
+
if defined?(XMLParser) or defined?(REXML)
|
|
61
|
+
@format = 7
|
|
62
|
+
else
|
|
63
|
+
@format = 8
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
@options = [ *a ]
|
|
67
|
+
end
|
|
68
|
+
attr_accessor :program, :db, :options, :server, :blastall, :matrix, :filter
|
|
69
|
+
attr_reader :output, :format
|
|
70
|
+
attr_writer :parser # to change :xmlparser, :rexml, :tab
|
|
71
|
+
|
|
72
|
+
def self.local(program, db, option = '')
|
|
73
|
+
self.new(program, db, option, 'local')
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def self.remote(program, db, option = '', server = 'genomenet')
|
|
77
|
+
self.new(program, db, option, server)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def query(query)
|
|
81
|
+
return self.send("exec_#{@server}", query.to_s)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def option
|
|
85
|
+
# backward compatibility
|
|
86
|
+
make_command_line(@options)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def option=(str)
|
|
90
|
+
# backward compatibility
|
|
91
|
+
@options = Shellwords.shellwords(str)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# the method Bio::Blast.report is moved from bio/appl/blast/report.rb.
|
|
95
|
+
# only for xml format
|
|
96
|
+
def self.reports(input, parser = nil)
|
|
97
|
+
ary = []
|
|
98
|
+
input.each("</BlastOutput>\n") do |xml|
|
|
99
|
+
xml.sub!(/[^<]*(<?)/, '\1') # skip before <?xml> tag
|
|
100
|
+
next if xml.empty? # skip trailing no hits
|
|
101
|
+
if block_given?
|
|
102
|
+
yield Report.new(xml, parser)
|
|
103
|
+
else
|
|
104
|
+
ary << Report.new(xml, parser)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
return ary
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
private
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def parse_result(data)
|
|
115
|
+
Report.new(data, @parser)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def exec_local(query)
|
|
120
|
+
cmd = [ @blastall, '-p', @program, '-d', @db ]
|
|
121
|
+
cmd.concat([ '-M', @matrix ]) if @matrix
|
|
122
|
+
cmd.concat([ '-F', @filter ]) if @filter
|
|
123
|
+
cmd.concat([ '-m', @format.to_s ]) if @format
|
|
124
|
+
cmd.concat(@options) if @options
|
|
125
|
+
|
|
126
|
+
report = nil
|
|
127
|
+
|
|
128
|
+
@output = call_command_local(cmd, query)
|
|
129
|
+
report = parse_result(@output)
|
|
130
|
+
|
|
131
|
+
return report
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def exec_genomenet(query)
|
|
136
|
+
host = "blast.genome.jp"
|
|
137
|
+
#path = "/sit-bin/nph-blast"
|
|
138
|
+
path = "/sit-bin/blast" #2005.08.12
|
|
139
|
+
|
|
140
|
+
matrix = @matrix ? @matrix : 'blosum62'
|
|
141
|
+
filter = @filter ? @filter : 'T'
|
|
142
|
+
|
|
143
|
+
opt = []
|
|
144
|
+
opt.concat([ '-m', @format.to_s ]) if @format
|
|
145
|
+
opt.concat(@options) if @options
|
|
146
|
+
|
|
147
|
+
form = {
|
|
148
|
+
'style' => 'raw',
|
|
149
|
+
'prog' => @program,
|
|
150
|
+
'dbname' => @db,
|
|
151
|
+
'sequence' => CGI.escape(query),
|
|
152
|
+
'other_param' => CGI.escape(make_command_line_unix(opt)),
|
|
153
|
+
'matrix' => matrix,
|
|
154
|
+
'filter' => filter,
|
|
155
|
+
'V_value' => 500, # default value for GenomeNet
|
|
156
|
+
'B_value' => 250, # default value for GenomeNet
|
|
157
|
+
'alignment_view' => 0,
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
data = []
|
|
161
|
+
|
|
162
|
+
form.each do |k, v|
|
|
163
|
+
data.push("#{k}=#{v}") if v
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
report = nil
|
|
167
|
+
|
|
168
|
+
begin
|
|
169
|
+
http = Net::HTTP.new(host)
|
|
170
|
+
http.open_timeout = 300
|
|
171
|
+
http.read_timeout = 600
|
|
172
|
+
result, = http.post(path, data.join('&'))
|
|
173
|
+
@output = result.body
|
|
174
|
+
# workaround 2005.08.12
|
|
175
|
+
if /\<A +HREF=\"(http\:\/\/blast\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
|
|
176
|
+
result, = http.get($2)
|
|
177
|
+
@output = result.body
|
|
178
|
+
txt = @output.to_s.split(/\<pre\>/)[1]
|
|
179
|
+
raise 'cannot understand response' unless txt
|
|
180
|
+
txt.sub!(/\<\/pre\>.*\z/m, '')
|
|
181
|
+
txt.sub!(/.*^ \-{20,}\s*/m, '')
|
|
182
|
+
@output = txt.gsub(/\<\;/, '<')
|
|
183
|
+
report = parse_result(@output)
|
|
184
|
+
else
|
|
185
|
+
raise 'cannot understand response'
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
return report
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def exec_ncbi(query)
|
|
194
|
+
raise NotImplementedError
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
if __FILE__ == $0
|
|
202
|
+
begin
|
|
203
|
+
require 'pp'
|
|
204
|
+
alias p pp
|
|
205
|
+
rescue
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# serv = Bio::Blast.local('blastn', 'hoge.nuc')
|
|
209
|
+
# serv = Bio::Blast.local('blastp', 'hoge.pep')
|
|
210
|
+
serv = Bio::Blast.remote('blastp', 'genes')
|
|
211
|
+
|
|
212
|
+
query = ARGF.read
|
|
213
|
+
p serv.query(query)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
=begin
|
|
218
|
+
|
|
219
|
+
= Bio::Blast
|
|
220
|
+
|
|
221
|
+
--- Bio::Blast.new(program, db, option = '', server = 'local')
|
|
222
|
+
--- Bio::Blast.local(program, db, option = '')
|
|
223
|
+
--- Bio::Blast.remote(program, db, option = '', server = 'genomenet')
|
|
224
|
+
|
|
225
|
+
Returns a blast factory object (Bio::Blast).
|
|
226
|
+
|
|
227
|
+
For the develpper, you can add server 'hoge' by adding
|
|
228
|
+
exec_hoge(query) method.
|
|
229
|
+
|
|
230
|
+
--- Bio::Blast#query(query)
|
|
231
|
+
|
|
232
|
+
Execute blast search and returns Report object (Bio::Blast::Report).
|
|
233
|
+
|
|
234
|
+
--- Bio::Blast#output
|
|
235
|
+
|
|
236
|
+
Returns a String containing blast execution output in as is format.
|
|
237
|
+
|
|
238
|
+
--- Bio::Blast#program
|
|
239
|
+
--- Bio::Blast#db
|
|
240
|
+
--- Bio::Blast#options
|
|
241
|
+
--- Bio::Blast#server
|
|
242
|
+
--- Bio::Blast#blastall
|
|
243
|
+
--- Bio::Blast#filter
|
|
244
|
+
|
|
245
|
+
Accessors for the factory parameters.
|
|
246
|
+
|
|
247
|
+
--- Bio::Blast#option
|
|
248
|
+
--- Bio::Blast#option=(str)
|
|
249
|
+
|
|
250
|
+
Get/set options by string.
|
|
251
|
+
|
|
252
|
+
== Available databases for Blast.remote(@program, @db, option, 'genomenet')
|
|
253
|
+
|
|
254
|
+
# ----------+-------+---------------------------------------------------
|
|
255
|
+
# @program | query | @db (supported in GenomeNet)
|
|
256
|
+
# ----------+-------+---------------------------------------------------
|
|
257
|
+
# blastp | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd,
|
|
258
|
+
# ----------+-------+ pir, prf, pdbstr
|
|
259
|
+
# blastx | NA |
|
|
260
|
+
# ----------+-------+---------------------------------------------------
|
|
261
|
+
# blastn | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
|
|
262
|
+
# ----------+-------+ htgs, dbsts, embl-nonst, embnonst-upd, epd,
|
|
263
|
+
# tblastn | AA | genes-nt, genome, vgenes.nuc
|
|
264
|
+
# ----------+-------+---------------------------------------------------
|
|
265
|
+
|
|
266
|
+
See http://blast.genome.jp/ideas/ideas.html#blast for more details.
|
|
267
|
+
|
|
268
|
+
=end
|
|
269
|
+
|