bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,1402 @@
1
+ #
2
+ # = bio/appl/blast/format0.rb - BLAST default output (-m 0) parser
3
+ #
4
+ # Author:: Naohisa GOTO
5
+ # Copyright:: Copyright (C) 2003 GOTO Naohisa <ng@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ #--
9
+ # This library is free software; you can redistribute it and/or
10
+ # modify it under the terms of the GNU Lesser General Public
11
+ # License as published by the Free Software Foundation; either
12
+ # version 2 of the License, or (at your option) any later version.
13
+ #
14
+ # This library is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
+ # Lesser General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU Lesser General Public
20
+ # License along with this library; if not, write to the Free Software
21
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
+ #++
23
+ #
24
+ # $Id: format0.rb,v 1.16 2005/11/01 05:32:23 ngoto Exp $
25
+ #
26
+ # NCBI BLAST default (-m 0 option) output parser.
27
+ #
28
+ # == References
29
+ #
30
+ # * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
31
+ # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
32
+ # "Gapped BLAST and PSI-BLAST: a new generation of protein database search
33
+ # programs", Nucleic Acids Res. 25:3389-3402.
34
+ # * http://www.ncbi.nlm.nih.gov/blast/
35
+ #
36
+
37
+ begin
38
+ require 'strscan'
39
+ rescue LoadError
40
+ end
41
+ require 'singleton'
42
+
43
+ #--
44
+ #require 'bio/db'
45
+ #++
46
+ require 'bio/io/flatfile'
47
+
48
+ module Bio
49
+ class Blast
50
+ module Default #:nodoc:
51
+
52
+ # Bio::Blast::Default::Report parses NCBI BLAST default output
53
+ # and stores information in the data.
54
+ # It may store some Bio::Blast::Default::Report::Iteration objects.
55
+ class Report #< DB
56
+ # Delimiter of each entry. Bio::FlatFile uses it.
57
+ DELIMITER = RS = "\nBLAST"
58
+
59
+ # Opens file by using Bio::FlatFile.open.
60
+ def self.open(filename, *mode)
61
+ Bio::FlatFile.open(self, filename, *mode)
62
+ end
63
+
64
+ # Creates a new Report object from BLAST result text.
65
+ def initialize(str)
66
+ str = str.sub(/\A\s+/, '')
67
+ str.sub!(/\n(T?BLAST.*)/m, "\n") # remove trailing entries for sure
68
+ @entry_overrun = $1
69
+ @entry = str
70
+ data = str.split(/(?:^[ \t]*\n)+/)
71
+
72
+ format0_split_headers(data)
73
+ @iterations = format0_split_search(data)
74
+ format0_split_stat_params(data)
75
+ end
76
+ # piece of next entry. Bio::FlatFile uses it.
77
+ attr_reader :entry_overrun
78
+
79
+ # (PSI-BLAST)
80
+ # Returns iterations.
81
+ # It returns an array of Bio::Blast::Default::Report::Iteration class.
82
+ # Note that normal blastall result usually contains one iteration.
83
+ attr_reader :iterations
84
+
85
+ # Returns whole entry as a string.
86
+ def to_s; @entry; end
87
+
88
+ #:stopdoc:
89
+ # prevent using StringScanner_R (in old version of strscan)
90
+ if !defined?(StringScanner) then
91
+ def initialize(*arg)
92
+ raise 'couldn\'t load strscan.so'
93
+ end #def
94
+ elsif StringScanner.name == 'StringScanner_R' then
95
+ def initialize(*arg)
96
+ raise 'cannot use StringScanner_R'
97
+ end #def
98
+ end
99
+ #:startdoc:
100
+
101
+ # Defines attributes which delegate to @f0dbstat objects.
102
+ def self.delegate_to_f0dbstat(*names)
103
+ names.each do |x|
104
+ module_eval("def #{x}; @f0dbstat.#{x}; end")
105
+ end
106
+ end
107
+ private_class_method :delegate_to_f0dbstat
108
+
109
+ # number of sequences in database
110
+ attr_reader :db_num if false #dummy
111
+ delegate_to_f0dbstat :db_num
112
+
113
+ # number of letters in database
114
+ attr_reader :db_len if false #dummy
115
+ delegate_to_f0dbstat :db_len
116
+
117
+ # posted date of the database
118
+ attr_reader :posted_date if false #dummy
119
+ delegate_to_f0dbstat :posted_date
120
+
121
+ # effective length of the database
122
+ attr_reader :eff_space if false #dummy
123
+ delegate_to_f0dbstat :eff_space
124
+
125
+ # name of the matrix
126
+ attr_reader :matrix if false #dummy
127
+ delegate_to_f0dbstat :matrix
128
+
129
+ # match score of the matrix
130
+ attr_reader :sc_match if false #dummy
131
+ delegate_to_f0dbstat :sc_match
132
+
133
+ # mismatch score of the matrix
134
+ attr_reader :sc_mismatch if false #dummy
135
+ delegate_to_f0dbstat :sc_mismatch
136
+
137
+ # gap open penalty
138
+ attr_reader :gap_open if false #dummy
139
+ delegate_to_f0dbstat :gap_open
140
+
141
+ # gap extend penalty
142
+ attr_reader :gap_extend if false #dummy
143
+ delegate_to_f0dbstat :gap_extend
144
+
145
+ # e-value threshold specified when BLAST was executed
146
+ attr_reader :expect if false #dummy
147
+ delegate_to_f0dbstat :expect
148
+
149
+ # number of hits. Note that this may differ from <tt>hits.size</tt>.
150
+ attr_reader :num_hits if false #dummy
151
+ delegate_to_f0dbstat :num_hits
152
+
153
+ # Same as <tt>iterations.last.kappa</tt>.
154
+ def kappa; @iterations.last.kappa; end
155
+ # Same as <tt>iterations.last.lambda</tt>.
156
+ def lambda; @iterations.last.lambda; end
157
+ # Same as <tt>iterations.last.entropy</tt>.
158
+ def entropy; @iterations.last.entropy; end
159
+
160
+ # Same as <tt>iterations.last.gapped_kappa</tt>.
161
+ def gapped_kappa; @iterations.last.gapped_kappa; end
162
+ # Same as <tt>iterations.last.gapped_lambda</tt>.
163
+ def gapped_lambda; @iterations.last.gapped_lambda; end
164
+ # Same as <tt>iterations.last.gapped_entropy</tt>.
165
+ def gapped_entropy; @iterations.last.gapped_entropy; end
166
+
167
+ # Returns program name.
168
+ def program; format0_parse_header; @program; end
169
+ # Returns version of the program.
170
+ def version; format0_parse_header; @version; end
171
+ # Returns version number string of the program.
172
+ def version_number; format0_parse_header; @version_number; end
173
+ # Returns released date of the program.
174
+ def version_date; format0_parse_header; @version_date; end
175
+
176
+ # Returns length of the query.
177
+ def query_len; format0_parse_query; @query_len; end
178
+
179
+ # Returns definition of the query.
180
+ def query_def; format0_parse_query; @query_def; end
181
+
182
+ # (PHI-BLAST)
183
+ # Same as <tt>iterations.first.pattern</tt>.
184
+ # Note that it returns the FIRST iteration's value.
185
+ def pattern; @iterations.first.pattern; end
186
+
187
+ # (PHI-BLAST)
188
+ # Same as <tt>iterations.first.pattern_positions</tt>.
189
+ # Note that it returns the FIRST iteration's value.
190
+ def pattern_positions
191
+ @iterations.first.pattern_positions
192
+ end
193
+
194
+ # (PSI-BLAST)
195
+ # Iterates over each iteration.
196
+ # Same as <tt>iterations.each</tt>.
197
+ # Yields a Bio::Blast::Default::Report::Iteration object.
198
+ def each_iteration
199
+ @iterations.each do |x|
200
+ yield x
201
+ end
202
+ end
203
+
204
+ # Iterates over each hit of the last iteration.
205
+ # Same as <tt>iterations.last.each_hit</tt>.
206
+ # Yields a Bio::Blast::Default::Report::Hit object.
207
+ # This is very useful in most cases, e.g. for blastall results.
208
+ def each_hit
209
+ @iterations.last.each do |x|
210
+ yield x
211
+ end
212
+ end
213
+ alias each each_hit
214
+
215
+ # Same as <tt>iterations.last.hits</tt>.
216
+ # Returns the last iteration's hits.
217
+ # Returns an array of Bio::Blast::Default::Report::Hit object.
218
+ # This is very useful in most cases, e.g. for blastall results.
219
+ def hits
220
+ @iterations.last.hits
221
+ end
222
+
223
+ # (PSI-BLAST)
224
+ # Same as <tt>iterations.last.message</tt>.
225
+ def message
226
+ @iterations.last.message
227
+ end
228
+
229
+ # (PSI-BLAST)
230
+ # Same as <tt>iterations.last.converged?</tt>.
231
+ # Returns true if the last iteration is converged,
232
+ # otherwise, returns false.
233
+ def converged?
234
+ @iterations.last.converged?
235
+ end
236
+
237
+ # Returns the bibliography reference of the BLAST software.
238
+ def reference
239
+ unless defined?(@reference)
240
+ @reference = @f0reference.to_s.gsub(/\s+/, ' ').strip
241
+ end #unless
242
+ @reference
243
+ end
244
+
245
+ # Returns the name (filename or title) of the database.
246
+ def db
247
+ unless defined?(@db)
248
+ if /Database *\: *(.*)/m =~ @f0database then
249
+ a = $1.split(/^/)
250
+ a.pop if a.size > 1
251
+ @db = a.collect { |x| x.sub(/\s+\z/, '') }.join(' ')
252
+ end
253
+ end #unless
254
+ @db
255
+ end
256
+
257
+ private
258
+ # Parses the query lines (begins with "Query = ").
259
+ def format0_parse_query
260
+ unless defined?(@query_def)
261
+ sc = StringScanner.new(@f0query)
262
+ sc.skip(/\s*/)
263
+ if sc.skip_until(/Query\= */) then
264
+ q = []
265
+ begin
266
+ q << sc.scan(/.*/)
267
+ sc.skip(/\s*^ ?/)
268
+ end until !sc.rest or r = sc.skip(/ *\( *(\d+) *letters *\)\s*\z/)
269
+ @query_len = sc[1].to_i if r
270
+ @query_def = q.join(' ')
271
+ end
272
+ end
273
+ end
274
+
275
+ # Parses the first line of the BLAST result.
276
+ def format0_parse_header
277
+ unless defined?(@program)
278
+ if /(\w+) +([\w\-\.\d]+) *\[ *([\-\.\w]+) *\] *(\[.+\])?/ =~ @f0header.to_s
279
+ @program = $1
280
+ @version = "#{$1} #{$2} [#{$3}]"
281
+ @version_number = $2
282
+ @version_date = $3
283
+ end
284
+ end
285
+ end
286
+
287
+ # Splits headers into the first line, reference, query line and
288
+ # database line.
289
+ def format0_split_headers(data)
290
+ @f0header = data.shift
291
+ @f0reference = data.shift
292
+ @f0query = data.shift
293
+ @f0database = data.shift
294
+ end
295
+
296
+ # Splits the statistical parameters.
297
+ def format0_split_stat_params(data)
298
+ dbs = []
299
+ while r = data.first and /^ *Database\:/ =~ r
300
+ dbs << data.shift
301
+ end
302
+ @f0dbstat = self.class::F0dbstat.new(dbs)
303
+ i = -1
304
+ while r = data[0] and /^Lambda/ =~ r
305
+ #i -= 1 unless /^Gapped/ =~ r
306
+ if itr = @iterations[i] then
307
+ x = data.shift; itr.instance_eval { @f0stat << x }
308
+ x = @f0dbstat; itr.instance_eval { @f0dbstat = x }
309
+ end
310
+ end
311
+ @f0dbstat.f0params = data
312
+ end
313
+
314
+ # Splits the search results.
315
+ def format0_split_search(data)
316
+ iterations = []
317
+ while r = data[0] and /^Searching/ =~ r
318
+ iterations << Iteration.new(data)
319
+ end
320
+ iterations
321
+ end
322
+
323
+ # Stores format0 database statistics.
324
+ # Internal use only. Users must not use the class.
325
+ class F0dbstat #:nodoc:
326
+ # Creates new F0dbstat class.
327
+ # Internal use only.
328
+ def initialize(ary)
329
+ @f0dbstat = ary
330
+ @hash = {}
331
+ end
332
+ attr_reader :f0dbstat
333
+ attr_accessor :f0params
334
+
335
+ # Parses colon-separeted lines (in +ary+) and stores to +hash+.
336
+ def parse_colon_separated_params(hash, ary)
337
+ ary.each do |str|
338
+ sc = StringScanner.new(str)
339
+ sc.skip(/\s*/)
340
+ while sc.rest?
341
+ if sc.match?(/Number of sequences better than +([e\-\.\d]+) *\: *(.+)/) then
342
+ @expect = sc[1]
343
+ @num_hits = sc[2].tr(',', '').to_i
344
+ end
345
+ if sc.skip(/([\-\,\.\'\(\)\#\w ]+)\: *(.*)/) then
346
+ hash[sc[1]] = sc[2]
347
+ else
348
+ #p sc.peek(20)
349
+ raise ScanError
350
+ end
351
+ sc.skip(/\s*/)
352
+ end #while
353
+ end #each
354
+ end #def
355
+ private :parse_colon_separated_params
356
+
357
+ # Parses parameters.
358
+ def parse_params
359
+ unless defined?(@parse_params)
360
+ parse_colon_separated_params(@hash, @f0params)
361
+ #p @hash
362
+ if val = @hash['Matrix'] then
363
+ if /blastn *matrix *\: *([e\-\.\d]+) +([e\-\.\d]+)/ =~ val then
364
+ @matrix = 'blastn'
365
+ @sc_match = $1.to_i
366
+ @sc_mismatch = $2.to_i
367
+ else
368
+ @matrix = val
369
+ end
370
+ end
371
+ if val = @hash['Gap Penalties'] then
372
+ if /Existence\: *([e\-\.\d]+)/ =~ val then
373
+ @gap_open = $1.to_i
374
+ end
375
+ if /Extension\: *([e\-\.\d]+)/ =~ val then
376
+ @gap_extend = $1.to_i
377
+ end
378
+ end
379
+ #@db_num = @hash['Number of Sequences'] unless defined?(@db_num)
380
+ #@db_len = @hash['length of database'] unless defined?(@db_len)
381
+ if val = @hash['effective length of database'] then
382
+ @eff_space = val.tr(',', '').to_i
383
+ end
384
+ @parse_params = true
385
+ end #unless
386
+ end
387
+ private :parse_params
388
+
389
+ # Returns name of the matrix.
390
+ def matrix; parse_params; @matrix; end
391
+ # Returns the match score of the matrix.
392
+ def sc_match; parse_params; @sc_match; end
393
+ # Returns the mismatch score of the matrix.
394
+ def sc_mismatch; parse_params; @sc_mismatch; end
395
+
396
+ # Returns gap open penalty value.
397
+ def gap_open; parse_params; @gap_open; end
398
+ # Returns gap extend penalty value.
399
+ def gap_extend; parse_params; @gap_extend; end
400
+
401
+ # Returns effective length of the database.
402
+ def eff_space; parse_params; @eff_space; end
403
+
404
+ # Returns e-value threshold specified when BLAST was executed.
405
+ def expect; parse_params; @expect; end
406
+
407
+ # Returns number of hits.
408
+ def num_hits; parse_params; @num_hits; end
409
+
410
+ # Parses database statistics lines.
411
+ def parse_dbstat
412
+ a = @f0dbstat[0].to_s.split(/^/)
413
+ d = []
414
+ i = 3
415
+ while i > 0 and line = a.pop
416
+ case line
417
+ when /^\s+Posted date\:\s*(.*)$/
418
+ unless defined?(@posted_date)
419
+ @posted_date = $1.strip
420
+ i -= 1; d.clear
421
+ end
422
+ when /^\s+Number of letters in database\:\s*(.*)$/
423
+ unless defined?(@db_len)
424
+ @db_len = $1.tr(',', '').to_i
425
+ i -= 1; d.clear
426
+ end
427
+ when /^\s+Number of sequences in database\:\s*(.*)$/
428
+ unless defined?(@db_num)
429
+ @db_num = $1.tr(',', '').to_i
430
+ i -= 1; d.clear
431
+ end
432
+ else
433
+ d.unshift(line)
434
+ end
435
+ end #while
436
+ a.concat(d)
437
+ while line = a.shift
438
+ if /^\s+Database\:\s*(.*)$/ =~ line
439
+ a.unshift($1)
440
+ a.each { |x| x.strip! }
441
+ @database = a.join(' ')
442
+ break #while
443
+ end
444
+ end
445
+ end #def
446
+ private :parse_dbstat
447
+
448
+ # Returns name (title or filename) of the database.
449
+ def database
450
+ unless defined?(@database); parse_dbstat; end; @database
451
+ end
452
+
453
+ # Returns posted date of the database.
454
+ def posted_date
455
+ unless defined?(@posted_date); parse_dbstat; end; @posted_date
456
+ end
457
+
458
+ # Returns number of letters in database.
459
+ def db_len
460
+ unless defined?(@db_len); parse_dbstat; end; @db_len
461
+ end
462
+
463
+ # Returns number of sequences in database.
464
+ def db_num
465
+ unless defined?(@db_num); parse_dbstat; end; @db_num
466
+ end
467
+ end #class F0dbstat
468
+
469
+ # Provides a singleton object of which any methods always return nil.
470
+ # Internal use only. Users must not use the class.
471
+ class AlwaysNil #:nodoc:
472
+ include Singleton
473
+ def method_missing(*arg)
474
+ nil
475
+ end
476
+ end #class AlwaysNil
477
+
478
+ # Bio::Blast::Default::Report::Iteration stores information about
479
+ # a iteration.
480
+ # It may contain some Bio::Blast::Default::Report::Hit objects.
481
+ # Note that a PSI-BLAST (blastpgp command) result usually contain
482
+ # multiple iterations in it, and a normal BLAST (blastall command)
483
+ # result usually contain one iteration in it.
484
+ class Iteration
485
+ # Creates a new Iteration object.
486
+ # It is designed to be called only internally from
487
+ # the Bio::Blast::Default::Report class.
488
+ # Users shall not use the method directly.
489
+ def initialize(data)
490
+ @f0stat = []
491
+ @f0dbstat = AlwaysNil.instance
492
+ @f0hitlist = []
493
+ @hits = []
494
+ @num = 1
495
+ r = data.shift
496
+ @f0message = [ r ]
497
+ r.gsub!(/^Results from round (\d+).*\z/) { |x|
498
+ @num = $1.to_i
499
+ @f0message << x
500
+ ''
501
+ }
502
+ r = data.shift
503
+ while /^Number of occurrences of pattern in the database is +(\d+)/ =~ r
504
+ # PHI-BLAST
505
+ @pattern_in_database = $1.to_i
506
+ @f0message << r
507
+ r = data.shift
508
+ end
509
+ if /^Results from round (\d+)/ =~ r then
510
+ @num = $1.to_i
511
+ @f0message << r
512
+ r = data.shift
513
+ end
514
+ if r and !(/\*{5} No hits found \*{5}/ =~ r) then
515
+ @f0hitlist << r
516
+ begin
517
+ @f0hitlist << data.shift
518
+ end until r = data[0] and /^\>/ =~ r
519
+ if r and /^CONVERGED\!/ =~ r then
520
+ r.sub!(/(.*\n)*^CONVERGED\!.*\n/) { |x| @f0hitlist << x; '' }
521
+ end
522
+ if defined?(@pattern_in_database) and r = data.first then
523
+ #PHI-BLAST
524
+ while /^\>/ =~ r
525
+ @hits << Hit.new(data)
526
+ r = data.first
527
+ break unless r
528
+ if /^Significant alignments for pattern/ =~ r
529
+ data.shift
530
+ r = data.first
531
+ end
532
+ end
533
+ else
534
+ #not PHI-BLAST
535
+ while r = data[0] and /^\>/ =~ r
536
+ @hits << Hit.new(data)
537
+ end
538
+ end
539
+ end
540
+ if /^CONVERGED\!\s*$/ =~ @f0hitlist[-1].to_s then
541
+ @message = 'CONVERGED!'
542
+ @flag_converged = true
543
+ end
544
+ end
545
+
546
+ # (PSI-BLAST) Iteration round number.
547
+ attr_reader :num
548
+ # (PSI-BLAST) Messages of the iteration.
549
+ attr_reader :message
550
+ # (PHI-BLAST) Number of occurrences of pattern in the database.
551
+ attr_reader :pattern_in_database
552
+
553
+ # Returns the hits of the iteration.
554
+ # It returns an array of Bio::Blast::Default::Report::Hit objects.
555
+ def hits
556
+ parse_hitlist
557
+ @hits
558
+ end
559
+
560
+ # Iterates over each hit of the iteration.
561
+ # Yields a Bio::Blast::Default::Report::Hit object.
562
+ def each
563
+ hits.each do |x|
564
+ yield x
565
+ end
566
+ end
567
+
568
+ # (PSI-BLAST) Returns true if the iteration is converged.
569
+ # Otherwise, returns false.
570
+ def converged?
571
+ @flag_converged
572
+ end
573
+
574
+ # (PHI-BLAST) Returns pattern string.
575
+ # Returns nil if it is not a PHI-BLAST result.
576
+ def pattern
577
+ #PHI-BLAST
578
+ if !defined?(@pattern) and defined?(@pattern_in_database) then
579
+ @pattern = nil
580
+ @pattern_positions = []
581
+ @f0message.each do |r|
582
+ sc = StringScanner.new(r)
583
+ if sc.skip_until(/^ *pattern +(.+)$/) then
584
+ @pattern = sc[1] unless @pattern
585
+ sc.skip_until(/^ at position +(\d+)/)
586
+ @pattern_positions << sc[1].to_i
587
+ end
588
+ end
589
+ end
590
+ @pattern
591
+ end
592
+
593
+ # (PHI-BLAST) Returns pattern positions.
594
+ # Returns nil if it is not a PHI-BLAST result.
595
+ def pattern_positions
596
+ #PHI-BLAST
597
+ pattern
598
+ @pattern_positions
599
+ end
600
+
601
+ # (PSI-BLAST)
602
+ # Returns hits which have been found again in the iteration.
603
+ # It returns an array of Bio::Blast::Default::Report::Hit objects.
604
+ def hits_found_again
605
+ parse_hitlist
606
+ @hits_found_again
607
+ end
608
+
609
+ # (PSI-BLAST)
610
+ # Returns hits which have been newly found in the iteration.
611
+ # It returns an array of Bio::Blast::Default::Report::Hit objects.
612
+ def hits_newly_found
613
+ parse_hitlist
614
+ @hits_newly_found
615
+ end
616
+
617
+ # (PHI-BLAST) Returns hits for pattern. ????
618
+ def hits_for_pattern
619
+ parse_hitlist
620
+ @hits_for_pattern
621
+ end
622
+
623
+ # Parses list of hits.
624
+ def parse_hitlist
625
+ unless defined?(@parse_hitlist)
626
+ @hits_found_again = []
627
+ @hits_newly_found = []
628
+ @hits_unknown_state = []
629
+ i = 0
630
+ a = @hits_newly_found
631
+ flag = true
632
+ @f0hitlist.each do |x|
633
+ sc = StringScanner.new(x)
634
+ if flag then
635
+ if sc.skip_until(/^Sequences used in model and found again\:\s*$/)
636
+ a = @hits_found_again
637
+ end
638
+ flag = nil
639
+ next
640
+ end
641
+ next if sc.skip(/^CONVERGED\!$/)
642
+ if sc.skip(/^Sequences not found previously or not previously below threshold\:\s*$/) then
643
+ a = @hits_newly_found
644
+ next
645
+ elsif sc.skip(/^Sequences.+\:\s*$/) then
646
+ #possibly a bug or unknown format?
647
+ a = @hits_unknown_state
648
+ next
649
+ elsif sc.skip(/^Significant (matches|alignments) for pattern/) then
650
+ # PHI-BLAST
651
+ # do nothing when 'alignments'
652
+ if sc[1] == 'matches' then
653
+ unless defined?(@hits_for_pattern)
654
+ @hits_for_pattern = []
655
+ end
656
+ a = []
657
+ @hits_for_pattern << a
658
+ end
659
+ next
660
+ end
661
+ b = x.split(/^/)
662
+ b.collect! { |y| y.empty? ? nil : y }
663
+ b.compact!
664
+ if i + b.size > @hits.size then
665
+ ((@hits.size - i)...(b.size)).each do |j|
666
+ y = b[j]; y.strip!
667
+ y.reverse!
668
+ z = y.split(/\s+/, 3)
669
+ z.each { |y| y.reverse! }
670
+ h = Hit.new([ z.pop.to_s.sub(/\.+\z/, '') ])
671
+ bs = z.pop.to_s
672
+ ev = z.pop.to_s
673
+ #ev = '1' + ev if ev[0] == ?e
674
+ h.instance_eval { @bit_score = bs; @evalue = ev }
675
+ @hits << h
676
+ end
677
+ end
678
+ a.concat(@hits[i, b.size])
679
+ i += b.size
680
+ end #each
681
+ @hits_found_again.each do |x|
682
+ x.instance_eval { @again = true }
683
+ end
684
+ @parse_hitlist = true
685
+ end #unless
686
+ end
687
+ private :parse_hitlist
688
+
689
+ # Parses statistics for the iteration.
690
+ def parse_stat
691
+ unless defined?(@parse_stat)
692
+ @f0stat.each do |x|
693
+ gapped = nil
694
+ sc = StringScanner.new(x)
695
+ sc.skip(/\s*/)
696
+ if sc.skip(/Gapped\s*/) then
697
+ gapped = true
698
+ end
699
+ s0 = []
700
+ h = {}
701
+ while r = sc.scan(/\w+/)
702
+ #p r
703
+ s0 << r
704
+ sc.skip(/ */)
705
+ end
706
+ sc.skip(/\s*/)
707
+ while r = sc.scan(/[e\.\-\d]+/)
708
+ #p r
709
+ h[s0.shift] = r
710
+ sc.skip(/ */)
711
+ end
712
+ if gapped then
713
+ @gapped_lambda = h['Lambda']
714
+ @gapped_kappa = h['K']
715
+ @gapped_entropy = h['H']
716
+ else
717
+ @lambda = h['Lambda']
718
+ @kappa = h['K']
719
+ @entropy = h['H']
720
+ end
721
+ end #each
722
+ @parse_stat = true
723
+ end #unless
724
+ end #def
725
+ private :parse_stat
726
+
727
+ # Defines attributes which call +parse_stat+ before accessing.
728
+ def self.method_after_parse_stat(*names)
729
+ names.each do |x|
730
+ module_eval("def #{x}; parse_stat; @#{x}; end")
731
+ end
732
+ end
733
+ private_class_method :method_after_parse_stat
734
+
735
+ # lambda of the database
736
+ attr_reader :lambda if false #dummy
737
+ method_after_parse_stat :lambda
738
+ # kappa of the database
739
+ attr_reader :kappa if false #dummy
740
+ method_after_parse_stat :kappa
741
+ # entropy of the database
742
+ attr_reader :entropy if false #dummy
743
+ method_after_parse_stat :entropy
744
+
745
+ # gapped lambda of the database
746
+ attr_reader :gapped_lambda if false #dummy
747
+ method_after_parse_stat :gapped_lambda
748
+ # gapped kappa of the database
749
+ attr_reader :gapped_kappa if false #dummy
750
+ method_after_parse_stat :gapped_kappa
751
+ # gapped entropy of the database
752
+ attr_reader :gapped_entropy if false #dummy
753
+ method_after_parse_stat :gapped_entropy
754
+
755
+ # Defines attributes which delegate to @f0dbstat objects.
756
+ def self.delegate_to_f0dbstat(*names)
757
+ names.each do |x|
758
+ module_eval("def #{x}; @f0dbstat.#{x}; end")
759
+ end
760
+ end
761
+ private_class_method :delegate_to_f0dbstat
762
+
763
+ # name (title or filename) of the database
764
+ attr_reader :database if false #dummy
765
+ delegate_to_f0dbstat :database
766
+ # posted date of the database
767
+ attr_reader :posted_date if false #dummy
768
+ delegate_to_f0dbstat :posted_date
769
+
770
+ # number of letters in database
771
+ attr_reader :db_num if false #dummy
772
+ delegate_to_f0dbstat :db_num
773
+ # number of sequences in database
774
+ attr_reader :db_len if false #dummy
775
+ delegate_to_f0dbstat :db_len
776
+ # effective length of the database
777
+ attr_reader :eff_space if false #dummy
778
+ delegate_to_f0dbstat :eff_space
779
+
780
+ # e-value threshold specified when BLAST was executed
781
+ attr_reader :expect if false #dummy
782
+ delegate_to_f0dbstat :expect
783
+
784
+ end #class Iteration
785
+
786
+ # Bio::Blast::Default::Report::Hit contains information about a hit.
787
+ # It may contain some Bio::Blast::Default::Report::HSP objects.
788
+ class Hit
789
+ # Creates a new Hit object.
790
+ # It is designed to be called only internally from the
791
+ # Bio::Blast::Default::Report::Iteration class.
792
+ # Users should not call the method directly.
793
+ def initialize(data)
794
+ @f0hitname = data.shift
795
+ @hsps = []
796
+ while r = data[0] and /^\s+Score/ =~ r
797
+ @hsps << HSP.new(data)
798
+ end
799
+ @again = false
800
+ end
801
+
802
+ # Hsp(high-scoring segment pair)s of the hit.
803
+ # Returns an array of Bio::Blast::Default::Report::HSP objects.
804
+ attr_reader :hsps
805
+
806
+ # Iterates over each hsp(high-scoring segment pair) of the hit.
807
+ # Yields a Bio::Blast::Default::Report::HSP object.
808
+ def each
809
+ @hsps.each { |x| yield x }
810
+ end
811
+
812
+ # (PSI-BLAST)
813
+ # Returns true if the hit is found again in the iteration.
814
+ # Otherwise, returns false or nil.
815
+ def found_again?
816
+ @again
817
+ end
818
+
819
+ # Returns first hsp's score.
820
+ def score
821
+ (h = @hsps.first) ? h.score : nil
822
+ end
823
+
824
+ # Returns first hsp's bit score.
825
+ # (shown in hit list of BLAST result)
826
+ def bit_score
827
+ unless defined?(@bit_score)
828
+ if h = @hsps.first then
829
+ @bit_score = h.bit_score
830
+ end
831
+ end
832
+ @bit_score
833
+ end
834
+
835
+ # Returns first hsp's e-value.
836
+ # (shown in hit list of BLAST result)
837
+ def evalue
838
+ unless defined?(@evalue)
839
+ if h = @hsps.first then
840
+ @evalue = h.evalue
841
+ end
842
+ end
843
+ @evalue
844
+ end
845
+
846
+ # Parses name of the hit.
847
+ def parse_hitname
848
+ unless defined?(@parse_hitname)
849
+ sc = StringScanner.new(@f0hitname)
850
+ sc.skip(/\s*/)
851
+ sc.skip(/\>/)
852
+ d = []
853
+ begin
854
+ d << sc.scan(/.*/)
855
+ sc.skip(/\s*/)
856
+ end until !sc.rest? or r = sc.skip(/ *Length *\= *([\,\d]+)\s*\z/)
857
+ @len = (r ? sc[1].to_i : nil)
858
+ @definition = d.join(" ")
859
+ @parse_hitname = true
860
+ end
861
+ end
862
+ private :parse_hitname
863
+
864
+ # Returns length of the hit.
865
+ def len; parse_hitname; @len; end
866
+
867
+ # Returns definition of the hit.
868
+ def definition; parse_hitname; @definition; end
869
+
870
+ #--
871
+ # Aliases to keep compatibility with Bio::Fasta::Report::Hit.
872
+ #alias target_id accession
873
+ alias target_def definition
874
+ alias target_len len
875
+ #++
876
+
877
+ # Sends given method to the first hsp or returns nil if
878
+ # there are no hsps.
879
+ def hsp_first(m)
880
+ (h = hsps.first) ? h.send(m) : nil
881
+ end
882
+ private :hsp_first
883
+
884
+ #--
885
+ # Shortcut methods for the best Hsp
886
+ # (Compatibility method with FASTA)
887
+ #++
888
+
889
+ # Same as hsps.first.identity.
890
+ # Returns nil if there are no hsp in the hit.
891
+ # (Compatibility method with FASTA)
892
+ def identity; hsp_first :identity; end
893
+
894
+ # Same as hsps.first.align_len.
895
+ # Returns nil if there are no hsp in the hit.
896
+ # (Compatibility method with FASTA)
897
+ def overlap; hsp_first :align_len; end
898
+
899
+ # Same as hsps.first.qseq.
900
+ # Returns nil if there are no hsp in the hit.
901
+ # (Compatibility method with FASTA)
902
+ def query_seq; hsp_first :qseq; end
903
+
904
+ # Same as hsps.first.hseq.
905
+ # Returns nil if there are no hsp in the hit.
906
+ # (Compatibility method with FASTA)
907
+ def target_seq; hsp_first :hseq; end
908
+
909
+ # Same as hsps.first.midline.
910
+ # Returns nil if there are no hsp in the hit.
911
+ # (Compatibility method with FASTA)
912
+ def midline; hsp_first :midline; end
913
+
914
+ # Same as hsps.first.query_from.
915
+ # Returns nil if there are no hsp in the hit.
916
+ # (Compatibility method with FASTA)
917
+ def query_start; hsp_first :query_from; end
918
+
919
+ # Same as hsps.first.query_to.
920
+ # Returns nil if there are no hsp in the hit.
921
+ # (Compatibility method with FASTA)
922
+ def query_end; hsp_first :query_to; end
923
+
924
+ # Same as hsps.first.hit_from.
925
+ # Returns nil if there are no hsp in the hit.
926
+ # (Compatibility method with FASTA)
927
+ def target_start; hsp_first :hit_from; end
928
+
929
+ # Same as hsps.first.hit_to.
930
+ # Returns nil if there are no hsp in the hit.
931
+ # (Compatibility method with FASTA)
932
+ def target_end; hsp_first :hit_to; end
933
+
934
+ # Returns an array which contains
935
+ # [ query_start, query_end, target_start, target_end ].
936
+ # (Compatibility method with FASTA)
937
+ def lap_at
938
+ [ query_start, query_end, target_start, target_end ]
939
+ end
940
+ end #class Hit
941
+
942
+ # Bio::Blast::Default::Report::HSP holds information about the hsp
943
+ # (high-scoring segment pair).
944
+ class HSP
945
+ # Creates new HSP object.
946
+ # It is designed to be called only internally from the
947
+ # Bio::Blast::Default::Report::Hit class.
948
+ # Users should not call the method directly.
949
+ def initialize(data)
950
+ @f0score = data.shift
951
+ @f0alignment = []
952
+ while r = data[0] and /^(Query|Sbjct)\:/ =~ r
953
+ @f0alignment << data.shift
954
+ end
955
+ end
956
+
957
+ # Parses scores, identities, positives, gaps, and so on.
958
+ def parse_score
959
+ unless defined?(@parse_score)
960
+ sc = StringScanner.new(@f0score)
961
+ while sc.rest?
962
+ sc.skip(/\s*/)
963
+ if sc.skip(/Expect(?:\(\d\))? *\= *([e\-\.\d]+)/) then
964
+ @evalue = sc[1]
965
+ #@evalue = '1' + @evalue if @evalue[0] == ?e
966
+ elsif sc.skip(/Score *\= *([e\-\.\d]+) *bits *\( *([e\-\.\d]+) *\)/) then
967
+ @bit_score = sc[1]
968
+ @score = sc[2]
969
+ elsif sc.skip(/(Identities|Positives|Gaps) *\= (\d+) *\/ *(\d+) *\(([\.\d]+) *\% *\)/) then
970
+ alen = sc[3].to_i
971
+ @align_len = alen unless defined?(@align_len)
972
+ raise ScanError if alen != @align_len
973
+ case sc[1]
974
+ when 'Identities'
975
+ @identity = sc[2].to_i
976
+ @percent_identity = sc[4]
977
+ when 'Positives'
978
+ @positive = sc[2].to_i
979
+ @percent_positive = sc[4]
980
+ when 'Gaps'
981
+ @gaps = sc[2].to_i
982
+ @percent_gaps = sc[4]
983
+ else
984
+ raise ScanError
985
+ end
986
+ elsif sc.skip(/Strand *\= *(Plus|Minus) *\/ *(Plus|Minus)/) then
987
+ @query_strand = sc[1]
988
+ @hit_strand = sc[2]
989
+ if sc[1] == sc[2] then
990
+ @query_frame = 1
991
+ @hit_frame = 1
992
+ elsif sc[1] == 'Plus' then # Plus/Minus
993
+ # complement sequence against xml(-m 7)
994
+ # In xml(-m 8), -1=>Plus, 1=>Minus ???
995
+ #@query_frame = -1
996
+ #@hit_frame = 1
997
+ @query_frame = 1
998
+ @hit_frame = -1
999
+ else # Minus/Plus
1000
+ @query_frame = -1
1001
+ @hit_frame = 1
1002
+ end
1003
+ elsif sc.skip(/Frame *\= *([\-\+]\d+)( *\/ *([\-\+]\d+))?/) then
1004
+ @query_frame = sc[1].to_i
1005
+ if sc[2] then
1006
+ @hit_frame = sc[3].to_i
1007
+ end
1008
+ elsif sc.skip(/Score *\= *([e\-\.\d]+) +\(([e\-\.\d]+) *bits *\)/) then
1009
+ #WU-BLAST
1010
+ @score = sc[1]
1011
+ @bit_score = sc[2]
1012
+ elsif sc.skip(/P *\= * ([e\-\.\d]+)/) then
1013
+ #WU-BLAST
1014
+ @p_sum_n = nil
1015
+ @pvalue = sc[1]
1016
+ elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\-\.\d]+)/) then
1017
+ #WU-BLAST
1018
+ @p_sum_n = sc[1].to_i
1019
+ @pvalue = sc[2]
1020
+ else
1021
+ raise ScanError
1022
+ end
1023
+ sc.skip(/\s*\,?\s*/)
1024
+ end
1025
+ @parse_score = true
1026
+ end
1027
+ end
1028
+ private :parse_score
1029
+
1030
+ # Defines attributes which call parse_score before accessing.
1031
+ def self.method_after_parse_score(*names)
1032
+ names.each do |x|
1033
+ module_eval("def #{x}; parse_score; @#{x}; end")
1034
+ end
1035
+ end
1036
+ private_class_method :method_after_parse_score
1037
+
1038
+ # bit score
1039
+ attr_reader :bit_score if false #dummy
1040
+ method_after_parse_score :bit_score
1041
+ # score
1042
+ attr_reader :score if false #dummy
1043
+ method_after_parse_score :score
1044
+
1045
+ # e-value
1046
+ attr_reader :evalue if false #dummy
1047
+ method_after_parse_score :evalue
1048
+
1049
+ # frame of the query
1050
+ attr_reader :query_frame if false #dummy
1051
+ method_after_parse_score :query_frame
1052
+ # frame of the hit
1053
+ attr_reader :hit_frame if false #dummy
1054
+ method_after_parse_score :hit_frame
1055
+
1056
+ # Identity (number of identical nucleotides or amino acids)
1057
+ attr_reader :identity if false #dummy
1058
+ method_after_parse_score :identity
1059
+ # percent of identical nucleotides or amino acids
1060
+ attr_reader :percent_identity if false #dummy
1061
+ method_after_parse_score :percent_identity
1062
+
1063
+ # Positives (number of positive hit amino acids or nucleotides)
1064
+ attr_reader :positive if false #dummy
1065
+ method_after_parse_score :positive
1066
+ # percent of positive hit amino acids or nucleotides
1067
+ attr_reader :percent_positive if false #dummy
1068
+ method_after_parse_score :percent_positive
1069
+
1070
+ # Gaps (number of gaps)
1071
+ attr_reader :gaps if false #dummy
1072
+ method_after_parse_score :gaps
1073
+ # percent of gaps
1074
+ attr_reader :percent_gaps if false #dummy
1075
+ method_after_parse_score :percent_gaps
1076
+
1077
+ # aligned length
1078
+ attr_reader :align_len if false #dummy
1079
+ method_after_parse_score :align_len
1080
+
1081
+ # strand of the query ("Plus" or "Minus" or nil)
1082
+ attr_reader :query_strand if false #dummy
1083
+ method_after_parse_score :query_strand
1084
+
1085
+ # strand of the hit ("Plus" or "Minus" or nil)
1086
+ attr_reader :hit_strand if false #dummy
1087
+ method_after_parse_score :hit_strand
1088
+
1089
+ # Parses alignments.
1090
+ def parse_alignment
1091
+ unless defined?(@parse_alignment)
1092
+ qpos1 = nil
1093
+ qpos2 = nil
1094
+ spos1 = nil
1095
+ spos2 = nil
1096
+ qseq = []
1097
+ sseq = []
1098
+ mseq = []
1099
+ pos_st = nil
1100
+ len_seq = 0
1101
+ nextline = :q
1102
+ @f0alignment.each do |x|
1103
+ sc = StringScanner.new(x)
1104
+ while sc.rest?
1105
+ #p pos_st, len_seq
1106
+ #p nextline.to_s
1107
+ if r = sc.skip(/(Query|Sbjct)\: *(\d+) */) then
1108
+ pos_st = r
1109
+ qs = sc[1]
1110
+ pos1 = sc[2]
1111
+ len_seq = sc.skip(/[^ ]*/)
1112
+ seq = sc[0]
1113
+ sc.skip(/ *(\d+) *\n/)
1114
+ pos2 = sc[1]
1115
+ if qs == 'Query' then
1116
+ raise ScanError unless nextline == :q
1117
+ qpos1 = pos1.to_i unless qpos1
1118
+ qpos2 = pos2.to_i
1119
+ qseq << seq
1120
+ nextline = :m
1121
+ elsif qs == 'Sbjct' then
1122
+ if nextline == :m then
1123
+ mseq << (' ' * len_seq)
1124
+ end
1125
+ spos1 = pos1.to_i unless spos1
1126
+ spos2 = pos2.to_i
1127
+ sseq << seq
1128
+ nextline = :q
1129
+ else
1130
+ raise ScanError
1131
+ end
1132
+ elsif r = sc.scan(/ {6}.+/) then
1133
+ raise ScanError unless nextline == :m
1134
+ mseq << r[pos_st, len_seq]
1135
+ sc.skip(/\n/)
1136
+ nextline = :s
1137
+ elsif r = sc.skip(/pattern +\d+.+/) then
1138
+ # PHI-BLAST
1139
+ # do nothing
1140
+ sc.skip(/\n/)
1141
+ else
1142
+ raise ScanError
1143
+ end
1144
+ end #while
1145
+ end #each
1146
+ #p qseq, sseq, mseq
1147
+ @qseq = qseq.join('')
1148
+ @hseq = sseq.join('')
1149
+ @midline = mseq.join('')
1150
+ @query_from = qpos1
1151
+ @query_to = qpos2
1152
+ @hit_from = spos1
1153
+ @hit_to = spos2
1154
+ @parse_alignment = true
1155
+ end #unless
1156
+ end #def
1157
+ private :parse_alignment
1158
+
1159
+ # Defines attributes which call parse_alignment before accessing.
1160
+ def self.method_after_parse_alignment(*names)
1161
+ names.each do |x|
1162
+ module_eval("def #{x}; parse_alignment; @#{x}; end")
1163
+ end
1164
+ end
1165
+ private_class_method :method_after_parse_alignment
1166
+
1167
+ # query sequence (with gaps) of the alignment of the hsp
1168
+ attr_reader :qseq if false #dummy
1169
+ method_after_parse_alignment :qseq
1170
+ # hit sequence (with gaps) of the alignment of the hsp
1171
+ attr_reader :hseq if false #dummy
1172
+ method_after_parse_alignment :hseq
1173
+
1174
+ # middle line of the alignment of the hsp
1175
+ attr_reader :midline if false #dummy
1176
+ method_after_parse_alignment :midline
1177
+
1178
+ # start position of the query (the first position is 1)
1179
+ attr_reader :query_from if false #dummy
1180
+ method_after_parse_alignment :query_from
1181
+
1182
+ # end position of the query (including its position)
1183
+ attr_reader :query_to
1184
+ method_after_parse_alignment :query_to
1185
+
1186
+ # start position of the hit (the first position is 1)
1187
+ attr_reader :hit_from if false #dummy
1188
+ method_after_parse_alignment :hit_from
1189
+
1190
+ # end position of the hit (including its position)
1191
+ attr_reader :hit_to if false #dummy
1192
+ method_after_parse_alignment :hit_to
1193
+
1194
+ end #class HSP
1195
+
1196
+ end #class Report
1197
+
1198
+ # NCBI BLAST default (-m 0 option) output parser for TBLAST.
1199
+ # All methods are equal to Bio::Blast::Default::Report.
1200
+ # Only DELIMITER (and RS) is different.
1201
+ class Report_TBlast < Report
1202
+ # Delimter of each entry for TBLAST. Bio::FlatFile uses it.
1203
+ DELIMITER = RS = "\nTBLAST"
1204
+ end #class Report_TBlast
1205
+
1206
+ end #module Default
1207
+ end #class Blast
1208
+ end #module Bio
1209
+
1210
+ ######################################################################
1211
+
1212
+ if __FILE__ == $0
1213
+
1214
+ Bio::FlatFile.open(Bio::Blast::Default::Report, ARGF) do |ff|
1215
+ ff.each do |rep|
1216
+
1217
+ print "# === Bio::Blast::Default::Report\n"
1218
+ puts
1219
+ print " rep.program #=> "; p rep.program
1220
+ print " rep.version #=> "; p rep.version
1221
+ print " rep.reference #=> "; p rep.reference
1222
+ print " rep.db #=> "; p rep.db
1223
+ #print " rep.query_id #=> "; p rep.query_id
1224
+ print " rep.query_def #=> "; p rep.query_def
1225
+ print " rep.query_len #=> "; p rep.query_len
1226
+ #puts
1227
+ print " rep.version_number #=> "; p rep.version_number
1228
+ print " rep.version_date #=> "; p rep.version_date
1229
+ puts
1230
+
1231
+ print "# === Parameters\n"
1232
+ #puts
1233
+ #print " rep.parameters #=> "; p rep.parameters
1234
+ puts
1235
+ print " rep.matrix #=> "; p rep.matrix
1236
+ print " rep.expect #=> "; p rep.expect
1237
+ #print " rep.inclusion #=> "; p rep.inclusion
1238
+ print " rep.sc_match #=> "; p rep.sc_match
1239
+ print " rep.sc_mismatch #=> "; p rep.sc_mismatch
1240
+ print " rep.gap_open #=> "; p rep.gap_open
1241
+ print " rep.gap_extend #=> "; p rep.gap_extend
1242
+ #print " rep.filter #=> "; p rep.filter
1243
+ print " rep.pattern #=> "; p rep.pattern
1244
+ #print " rep.entrez_query #=> "; p rep.entrez_query
1245
+ #puts
1246
+ print " rep.pattern_positions #=> "; p rep.pattern_positions
1247
+ puts
1248
+
1249
+ print "# === Statistics (last iteration's)\n"
1250
+ #puts
1251
+ #print " rep.statistics #=> "; p rep.statistics
1252
+ puts
1253
+ print " rep.db_num #=> "; p rep.db_num
1254
+ print " rep.db_len #=> "; p rep.db_len
1255
+ #print " rep.hsp_len #=> "; p rep.hsp_len
1256
+ print " rep.eff_space #=> "; p rep.eff_space
1257
+ print " rep.kappa #=> "; p rep.kappa
1258
+ print " rep.lambda #=> "; p rep.lambda
1259
+ print " rep.entropy #=> "; p rep.entropy
1260
+ puts
1261
+ print " rep.num_hits #=> "; p rep.num_hits
1262
+ print " rep.gapped_kappa #=> "; p rep.gapped_kappa
1263
+ print " rep.gapped_lambda #=> "; p rep.gapped_lambda
1264
+ print " rep.gapped_entropy #=> "; p rep.gapped_entropy
1265
+ print " rep.posted_date #=> "; p rep.posted_date
1266
+ puts
1267
+
1268
+ print "# === Message (last iteration's)\n"
1269
+ puts
1270
+ print " rep.message #=> "; p rep.message
1271
+ #puts
1272
+ print " rep.converged? #=> "; p rep.converged?
1273
+ puts
1274
+
1275
+ print "# === Iterations\n"
1276
+ puts
1277
+ print " rep.itrerations.each do |itr|\n"
1278
+ puts
1279
+
1280
+ rep.iterations.each do |itr|
1281
+
1282
+ print "# --- Bio::Blast::Default::Report::Iteration\n"
1283
+ puts
1284
+
1285
+ print " itr.num #=> "; p itr.num
1286
+ #print " itr.statistics #=> "; p itr.statistics
1287
+ print " itr.message #=> "; p itr.message
1288
+ print " itr.hits.size #=> "; p itr.hits.size
1289
+ #puts
1290
+ print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size;
1291
+ print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size;
1292
+ if itr.hits_for_pattern then
1293
+ itr.hits_for_pattern.each_with_index do |hp, hpi|
1294
+ print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size;
1295
+ end
1296
+ end
1297
+ print " itr.converged? #=> "; p itr.converged?
1298
+ puts
1299
+
1300
+ print " itr.hits.each do |hit|\n"
1301
+ puts
1302
+
1303
+ itr.hits.each_with_index do |hit, i|
1304
+
1305
+ print "# --- Bio::Blast::Default::Report::Hit"
1306
+ print " ([#{i}])\n"
1307
+ puts
1308
+
1309
+ #print " hit.num #=> "; p hit.num
1310
+ #print " hit.hit_id #=> "; p hit.hit_id
1311
+ print " hit.len #=> "; p hit.len
1312
+ print " hit.definition #=> "; p hit.definition
1313
+ #print " hit.accession #=> "; p hit.accession
1314
+ #puts
1315
+ print " hit.found_again? #=> "; p hit.found_again?
1316
+
1317
+ print " --- compatible/shortcut ---\n"
1318
+ #print " hit.query_id #=> "; p hit.query_id
1319
+ #print " hit.query_def #=> "; p hit.query_def
1320
+ #print " hit.query_len #=> "; p hit.query_len
1321
+ #print " hit.target_id #=> "; p hit.target_id
1322
+ print " hit.target_def #=> "; p hit.target_def
1323
+ print " hit.target_len #=> "; p hit.target_len
1324
+
1325
+ print " --- first HSP's values (shortcut) ---\n"
1326
+ print " hit.evalue #=> "; p hit.evalue
1327
+ print " hit.bit_score #=> "; p hit.bit_score
1328
+ print " hit.identity #=> "; p hit.identity
1329
+ #print " hit.overlap #=> "; p hit.overlap
1330
+
1331
+ print " hit.query_seq #=> "; p hit.query_seq
1332
+ print " hit.midline #=> "; p hit.midline
1333
+ print " hit.target_seq #=> "; p hit.target_seq
1334
+
1335
+ print " hit.query_start #=> "; p hit.query_start
1336
+ print " hit.query_end #=> "; p hit.query_end
1337
+ print " hit.target_start #=> "; p hit.target_start
1338
+ print " hit.target_end #=> "; p hit.target_end
1339
+ print " hit.lap_at #=> "; p hit.lap_at
1340
+ print " --- first HSP's vaules (shortcut) ---\n"
1341
+ print " --- compatible/shortcut ---\n"
1342
+
1343
+ puts
1344
+ print " hit.hsps.size #=> "; p hit.hsps.size
1345
+ if hit.hsps.size == 0 then
1346
+ puts " (HSP not found: please see blastall's -b and -v options)"
1347
+ puts
1348
+ else
1349
+
1350
+ puts
1351
+ print " hit.hsps.each do |hsp|\n"
1352
+ puts
1353
+
1354
+ hit.hsps.each_with_index do |hsp, j|
1355
+
1356
+ print "# --- Bio::Blast::Default::Report::Hsp"
1357
+ print " ([#{j}])\n"
1358
+ puts
1359
+ #print " hsp.num #=> "; p hsp.num
1360
+ print " hsp.bit_score #=> "; p hsp.bit_score
1361
+ print " hsp.score #=> "; p hsp.score
1362
+ print " hsp.evalue #=> "; p hsp.evalue
1363
+ print " hsp.identity #=> "; p hsp.identity
1364
+ print " hsp.gaps #=> "; p hsp.gaps
1365
+ print " hsp.positive #=> "; p hsp.positive
1366
+ print " hsp.align_len #=> "; p hsp.align_len
1367
+ #print " hsp.density #=> "; p hsp.density
1368
+
1369
+ print " hsp.query_frame #=> "; p hsp.query_frame
1370
+ print " hsp.query_from #=> "; p hsp.query_from
1371
+ print " hsp.query_to #=> "; p hsp.query_to
1372
+
1373
+ print " hsp.hit_frame #=> "; p hsp.hit_frame
1374
+ print " hsp.hit_from #=> "; p hsp.hit_from
1375
+ print " hsp.hit_to #=> "; p hsp.hit_to
1376
+
1377
+ #print " hsp.pattern_from#=> "; p hsp.pattern_from
1378
+ #print " hsp.pattern_to #=> "; p hsp.pattern_to
1379
+
1380
+ print " hsp.qseq #=> "; p hsp.qseq
1381
+ print " hsp.midline #=> "; p hsp.midline
1382
+ print " hsp.hseq #=> "; p hsp.hseq
1383
+ puts
1384
+ print " hsp.percent_identity #=> "; p hsp.percent_identity
1385
+ #print " hsp.mismatch_count #=> "; p hsp.mismatch_count
1386
+ #
1387
+ print " hsp.query_strand #=> "; p hsp.query_strand
1388
+ print " hsp.hit_strand #=> "; p hsp.hit_strand
1389
+ print " hsp.percent_positive #=> "; p hsp.percent_positive
1390
+ print " hsp.percent_gaps #=> "; p hsp.percent_gaps
1391
+ puts
1392
+
1393
+ end #each
1394
+ end #if hit.hsps.size == 0
1395
+ end
1396
+ end
1397
+ end #ff.each
1398
+ end #FlatFile.open
1399
+
1400
+ end #if __FILE__ == $0
1401
+
1402
+ ######################################################################