bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,1402 @@
1
+ #
2
+ # = bio/appl/blast/format0.rb - BLAST default output (-m 0) parser
3
+ #
4
+ # Author:: Naohisa GOTO
5
+ # Copyright:: Copyright (C) 2003 GOTO Naohisa <ng@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ #--
9
+ # This library is free software; you can redistribute it and/or
10
+ # modify it under the terms of the GNU Lesser General Public
11
+ # License as published by the Free Software Foundation; either
12
+ # version 2 of the License, or (at your option) any later version.
13
+ #
14
+ # This library is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
+ # Lesser General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU Lesser General Public
20
+ # License along with this library; if not, write to the Free Software
21
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
+ #++
23
+ #
24
+ # $Id: format0.rb,v 1.16 2005/11/01 05:32:23 ngoto Exp $
25
+ #
26
+ # NCBI BLAST default (-m 0 option) output parser.
27
+ #
28
+ # == References
29
+ #
30
+ # * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
31
+ # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
32
+ # "Gapped BLAST and PSI-BLAST: a new generation of protein database search
33
+ # programs", Nucleic Acids Res. 25:3389-3402.
34
+ # * http://www.ncbi.nlm.nih.gov/blast/
35
+ #
36
+
37
+ begin
38
+ require 'strscan'
39
+ rescue LoadError
40
+ end
41
+ require 'singleton'
42
+
43
+ #--
44
+ #require 'bio/db'
45
+ #++
46
+ require 'bio/io/flatfile'
47
+
48
+ module Bio
49
+ class Blast
50
+ module Default #:nodoc:
51
+
52
+ # Bio::Blast::Default::Report parses NCBI BLAST default output
53
+ # and stores information in the data.
54
+ # It may store some Bio::Blast::Default::Report::Iteration objects.
55
+ class Report #< DB
56
+ # Delimiter of each entry. Bio::FlatFile uses it.
57
+ DELIMITER = RS = "\nBLAST"
58
+
59
+ # Opens file by using Bio::FlatFile.open.
60
+ def self.open(filename, *mode)
61
+ Bio::FlatFile.open(self, filename, *mode)
62
+ end
63
+
64
+ # Creates a new Report object from BLAST result text.
65
+ def initialize(str)
66
+ str = str.sub(/\A\s+/, '')
67
+ str.sub!(/\n(T?BLAST.*)/m, "\n") # remove trailing entries for sure
68
+ @entry_overrun = $1
69
+ @entry = str
70
+ data = str.split(/(?:^[ \t]*\n)+/)
71
+
72
+ format0_split_headers(data)
73
+ @iterations = format0_split_search(data)
74
+ format0_split_stat_params(data)
75
+ end
76
+ # piece of next entry. Bio::FlatFile uses it.
77
+ attr_reader :entry_overrun
78
+
79
+ # (PSI-BLAST)
80
+ # Returns iterations.
81
+ # It returns an array of Bio::Blast::Default::Report::Iteration class.
82
+ # Note that normal blastall result usually contains one iteration.
83
+ attr_reader :iterations
84
+
85
+ # Returns whole entry as a string.
86
+ def to_s; @entry; end
87
+
88
+ #:stopdoc:
89
+ # prevent using StringScanner_R (in old version of strscan)
90
+ if !defined?(StringScanner) then
91
+ def initialize(*arg)
92
+ raise 'couldn\'t load strscan.so'
93
+ end #def
94
+ elsif StringScanner.name == 'StringScanner_R' then
95
+ def initialize(*arg)
96
+ raise 'cannot use StringScanner_R'
97
+ end #def
98
+ end
99
+ #:startdoc:
100
+
101
+ # Defines attributes which delegate to @f0dbstat objects.
102
+ def self.delegate_to_f0dbstat(*names)
103
+ names.each do |x|
104
+ module_eval("def #{x}; @f0dbstat.#{x}; end")
105
+ end
106
+ end
107
+ private_class_method :delegate_to_f0dbstat
108
+
109
+ # number of sequences in database
110
+ attr_reader :db_num if false #dummy
111
+ delegate_to_f0dbstat :db_num
112
+
113
+ # number of letters in database
114
+ attr_reader :db_len if false #dummy
115
+ delegate_to_f0dbstat :db_len
116
+
117
+ # posted date of the database
118
+ attr_reader :posted_date if false #dummy
119
+ delegate_to_f0dbstat :posted_date
120
+
121
+ # effective length of the database
122
+ attr_reader :eff_space if false #dummy
123
+ delegate_to_f0dbstat :eff_space
124
+
125
+ # name of the matrix
126
+ attr_reader :matrix if false #dummy
127
+ delegate_to_f0dbstat :matrix
128
+
129
+ # match score of the matrix
130
+ attr_reader :sc_match if false #dummy
131
+ delegate_to_f0dbstat :sc_match
132
+
133
+ # mismatch score of the matrix
134
+ attr_reader :sc_mismatch if false #dummy
135
+ delegate_to_f0dbstat :sc_mismatch
136
+
137
+ # gap open penalty
138
+ attr_reader :gap_open if false #dummy
139
+ delegate_to_f0dbstat :gap_open
140
+
141
+ # gap extend penalty
142
+ attr_reader :gap_extend if false #dummy
143
+ delegate_to_f0dbstat :gap_extend
144
+
145
+ # e-value threshold specified when BLAST was executed
146
+ attr_reader :expect if false #dummy
147
+ delegate_to_f0dbstat :expect
148
+
149
+ # number of hits. Note that this may differ from <tt>hits.size</tt>.
150
+ attr_reader :num_hits if false #dummy
151
+ delegate_to_f0dbstat :num_hits
152
+
153
+ # Same as <tt>iterations.last.kappa</tt>.
154
+ def kappa; @iterations.last.kappa; end
155
+ # Same as <tt>iterations.last.lambda</tt>.
156
+ def lambda; @iterations.last.lambda; end
157
+ # Same as <tt>iterations.last.entropy</tt>.
158
+ def entropy; @iterations.last.entropy; end
159
+
160
+ # Same as <tt>iterations.last.gapped_kappa</tt>.
161
+ def gapped_kappa; @iterations.last.gapped_kappa; end
162
+ # Same as <tt>iterations.last.gapped_lambda</tt>.
163
+ def gapped_lambda; @iterations.last.gapped_lambda; end
164
+ # Same as <tt>iterations.last.gapped_entropy</tt>.
165
+ def gapped_entropy; @iterations.last.gapped_entropy; end
166
+
167
+ # Returns program name.
168
+ def program; format0_parse_header; @program; end
169
+ # Returns version of the program.
170
+ def version; format0_parse_header; @version; end
171
+ # Returns version number string of the program.
172
+ def version_number; format0_parse_header; @version_number; end
173
+ # Returns released date of the program.
174
+ def version_date; format0_parse_header; @version_date; end
175
+
176
+ # Returns length of the query.
177
+ def query_len; format0_parse_query; @query_len; end
178
+
179
+ # Returns definition of the query.
180
+ def query_def; format0_parse_query; @query_def; end
181
+
182
+ # (PHI-BLAST)
183
+ # Same as <tt>iterations.first.pattern</tt>.
184
+ # Note that it returns the FIRST iteration's value.
185
+ def pattern; @iterations.first.pattern; end
186
+
187
+ # (PHI-BLAST)
188
+ # Same as <tt>iterations.first.pattern_positions</tt>.
189
+ # Note that it returns the FIRST iteration's value.
190
+ def pattern_positions
191
+ @iterations.first.pattern_positions
192
+ end
193
+
194
+ # (PSI-BLAST)
195
+ # Iterates over each iteration.
196
+ # Same as <tt>iterations.each</tt>.
197
+ # Yields a Bio::Blast::Default::Report::Iteration object.
198
+ def each_iteration
199
+ @iterations.each do |x|
200
+ yield x
201
+ end
202
+ end
203
+
204
+ # Iterates over each hit of the last iteration.
205
+ # Same as <tt>iterations.last.each_hit</tt>.
206
+ # Yields a Bio::Blast::Default::Report::Hit object.
207
+ # This is very useful in most cases, e.g. for blastall results.
208
+ def each_hit
209
+ @iterations.last.each do |x|
210
+ yield x
211
+ end
212
+ end
213
+ alias each each_hit
214
+
215
+ # Same as <tt>iterations.last.hits</tt>.
216
+ # Returns the last iteration's hits.
217
+ # Returns an array of Bio::Blast::Default::Report::Hit object.
218
+ # This is very useful in most cases, e.g. for blastall results.
219
+ def hits
220
+ @iterations.last.hits
221
+ end
222
+
223
+ # (PSI-BLAST)
224
+ # Same as <tt>iterations.last.message</tt>.
225
+ def message
226
+ @iterations.last.message
227
+ end
228
+
229
+ # (PSI-BLAST)
230
+ # Same as <tt>iterations.last.converged?</tt>.
231
+ # Returns true if the last iteration is converged,
232
+ # otherwise, returns false.
233
+ def converged?
234
+ @iterations.last.converged?
235
+ end
236
+
237
+ # Returns the bibliography reference of the BLAST software.
238
+ def reference
239
+ unless defined?(@reference)
240
+ @reference = @f0reference.to_s.gsub(/\s+/, ' ').strip
241
+ end #unless
242
+ @reference
243
+ end
244
+
245
+ # Returns the name (filename or title) of the database.
246
+ def db
247
+ unless defined?(@db)
248
+ if /Database *\: *(.*)/m =~ @f0database then
249
+ a = $1.split(/^/)
250
+ a.pop if a.size > 1
251
+ @db = a.collect { |x| x.sub(/\s+\z/, '') }.join(' ')
252
+ end
253
+ end #unless
254
+ @db
255
+ end
256
+
257
+ private
258
+ # Parses the query lines (begins with "Query = ").
259
+ def format0_parse_query
260
+ unless defined?(@query_def)
261
+ sc = StringScanner.new(@f0query)
262
+ sc.skip(/\s*/)
263
+ if sc.skip_until(/Query\= */) then
264
+ q = []
265
+ begin
266
+ q << sc.scan(/.*/)
267
+ sc.skip(/\s*^ ?/)
268
+ end until !sc.rest or r = sc.skip(/ *\( *(\d+) *letters *\)\s*\z/)
269
+ @query_len = sc[1].to_i if r
270
+ @query_def = q.join(' ')
271
+ end
272
+ end
273
+ end
274
+
275
+ # Parses the first line of the BLAST result.
276
+ def format0_parse_header
277
+ unless defined?(@program)
278
+ if /(\w+) +([\w\-\.\d]+) *\[ *([\-\.\w]+) *\] *(\[.+\])?/ =~ @f0header.to_s
279
+ @program = $1
280
+ @version = "#{$1} #{$2} [#{$3}]"
281
+ @version_number = $2
282
+ @version_date = $3
283
+ end
284
+ end
285
+ end
286
+
287
+ # Splits headers into the first line, reference, query line and
288
+ # database line.
289
+ def format0_split_headers(data)
290
+ @f0header = data.shift
291
+ @f0reference = data.shift
292
+ @f0query = data.shift
293
+ @f0database = data.shift
294
+ end
295
+
296
+ # Splits the statistical parameters.
297
+ def format0_split_stat_params(data)
298
+ dbs = []
299
+ while r = data.first and /^ *Database\:/ =~ r
300
+ dbs << data.shift
301
+ end
302
+ @f0dbstat = self.class::F0dbstat.new(dbs)
303
+ i = -1
304
+ while r = data[0] and /^Lambda/ =~ r
305
+ #i -= 1 unless /^Gapped/ =~ r
306
+ if itr = @iterations[i] then
307
+ x = data.shift; itr.instance_eval { @f0stat << x }
308
+ x = @f0dbstat; itr.instance_eval { @f0dbstat = x }
309
+ end
310
+ end
311
+ @f0dbstat.f0params = data
312
+ end
313
+
314
+ # Splits the search results.
315
+ def format0_split_search(data)
316
+ iterations = []
317
+ while r = data[0] and /^Searching/ =~ r
318
+ iterations << Iteration.new(data)
319
+ end
320
+ iterations
321
+ end
322
+
323
+ # Stores format0 database statistics.
324
+ # Internal use only. Users must not use the class.
325
+ class F0dbstat #:nodoc:
326
+ # Creates new F0dbstat class.
327
+ # Internal use only.
328
+ def initialize(ary)
329
+ @f0dbstat = ary
330
+ @hash = {}
331
+ end
332
+ attr_reader :f0dbstat
333
+ attr_accessor :f0params
334
+
335
+ # Parses colon-separeted lines (in +ary+) and stores to +hash+.
336
+ def parse_colon_separated_params(hash, ary)
337
+ ary.each do |str|
338
+ sc = StringScanner.new(str)
339
+ sc.skip(/\s*/)
340
+ while sc.rest?
341
+ if sc.match?(/Number of sequences better than +([e\-\.\d]+) *\: *(.+)/) then
342
+ @expect = sc[1]
343
+ @num_hits = sc[2].tr(',', '').to_i
344
+ end
345
+ if sc.skip(/([\-\,\.\'\(\)\#\w ]+)\: *(.*)/) then
346
+ hash[sc[1]] = sc[2]
347
+ else
348
+ #p sc.peek(20)
349
+ raise ScanError
350
+ end
351
+ sc.skip(/\s*/)
352
+ end #while
353
+ end #each
354
+ end #def
355
+ private :parse_colon_separated_params
356
+
357
+ # Parses parameters.
358
+ def parse_params
359
+ unless defined?(@parse_params)
360
+ parse_colon_separated_params(@hash, @f0params)
361
+ #p @hash
362
+ if val = @hash['Matrix'] then
363
+ if /blastn *matrix *\: *([e\-\.\d]+) +([e\-\.\d]+)/ =~ val then
364
+ @matrix = 'blastn'
365
+ @sc_match = $1.to_i
366
+ @sc_mismatch = $2.to_i
367
+ else
368
+ @matrix = val
369
+ end
370
+ end
371
+ if val = @hash['Gap Penalties'] then
372
+ if /Existence\: *([e\-\.\d]+)/ =~ val then
373
+ @gap_open = $1.to_i
374
+ end
375
+ if /Extension\: *([e\-\.\d]+)/ =~ val then
376
+ @gap_extend = $1.to_i
377
+ end
378
+ end
379
+ #@db_num = @hash['Number of Sequences'] unless defined?(@db_num)
380
+ #@db_len = @hash['length of database'] unless defined?(@db_len)
381
+ if val = @hash['effective length of database'] then
382
+ @eff_space = val.tr(',', '').to_i
383
+ end
384
+ @parse_params = true
385
+ end #unless
386
+ end
387
+ private :parse_params
388
+
389
+ # Returns name of the matrix.
390
+ def matrix; parse_params; @matrix; end
391
+ # Returns the match score of the matrix.
392
+ def sc_match; parse_params; @sc_match; end
393
+ # Returns the mismatch score of the matrix.
394
+ def sc_mismatch; parse_params; @sc_mismatch; end
395
+
396
+ # Returns gap open penalty value.
397
+ def gap_open; parse_params; @gap_open; end
398
+ # Returns gap extend penalty value.
399
+ def gap_extend; parse_params; @gap_extend; end
400
+
401
+ # Returns effective length of the database.
402
+ def eff_space; parse_params; @eff_space; end
403
+
404
+ # Returns e-value threshold specified when BLAST was executed.
405
+ def expect; parse_params; @expect; end
406
+
407
+ # Returns number of hits.
408
+ def num_hits; parse_params; @num_hits; end
409
+
410
+ # Parses database statistics lines.
411
+ def parse_dbstat
412
+ a = @f0dbstat[0].to_s.split(/^/)
413
+ d = []
414
+ i = 3
415
+ while i > 0 and line = a.pop
416
+ case line
417
+ when /^\s+Posted date\:\s*(.*)$/
418
+ unless defined?(@posted_date)
419
+ @posted_date = $1.strip
420
+ i -= 1; d.clear
421
+ end
422
+ when /^\s+Number of letters in database\:\s*(.*)$/
423
+ unless defined?(@db_len)
424
+ @db_len = $1.tr(',', '').to_i
425
+ i -= 1; d.clear
426
+ end
427
+ when /^\s+Number of sequences in database\:\s*(.*)$/
428
+ unless defined?(@db_num)
429
+ @db_num = $1.tr(',', '').to_i
430
+ i -= 1; d.clear
431
+ end
432
+ else
433
+ d.unshift(line)
434
+ end
435
+ end #while
436
+ a.concat(d)
437
+ while line = a.shift
438
+ if /^\s+Database\:\s*(.*)$/ =~ line
439
+ a.unshift($1)
440
+ a.each { |x| x.strip! }
441
+ @database = a.join(' ')
442
+ break #while
443
+ end
444
+ end
445
+ end #def
446
+ private :parse_dbstat
447
+
448
+ # Returns name (title or filename) of the database.
449
+ def database
450
+ unless defined?(@database); parse_dbstat; end; @database
451
+ end
452
+
453
+ # Returns posted date of the database.
454
+ def posted_date
455
+ unless defined?(@posted_date); parse_dbstat; end; @posted_date
456
+ end
457
+
458
+ # Returns number of letters in database.
459
+ def db_len
460
+ unless defined?(@db_len); parse_dbstat; end; @db_len
461
+ end
462
+
463
+ # Returns number of sequences in database.
464
+ def db_num
465
+ unless defined?(@db_num); parse_dbstat; end; @db_num
466
+ end
467
+ end #class F0dbstat
468
+
469
+ # Provides a singleton object of which any methods always return nil.
470
+ # Internal use only. Users must not use the class.
471
+ class AlwaysNil #:nodoc:
472
+ include Singleton
473
+ def method_missing(*arg)
474
+ nil
475
+ end
476
+ end #class AlwaysNil
477
+
478
+ # Bio::Blast::Default::Report::Iteration stores information about
479
+ # a iteration.
480
+ # It may contain some Bio::Blast::Default::Report::Hit objects.
481
+ # Note that a PSI-BLAST (blastpgp command) result usually contain
482
+ # multiple iterations in it, and a normal BLAST (blastall command)
483
+ # result usually contain one iteration in it.
484
+ class Iteration
485
+ # Creates a new Iteration object.
486
+ # It is designed to be called only internally from
487
+ # the Bio::Blast::Default::Report class.
488
+ # Users shall not use the method directly.
489
+ def initialize(data)
490
+ @f0stat = []
491
+ @f0dbstat = AlwaysNil.instance
492
+ @f0hitlist = []
493
+ @hits = []
494
+ @num = 1
495
+ r = data.shift
496
+ @f0message = [ r ]
497
+ r.gsub!(/^Results from round (\d+).*\z/) { |x|
498
+ @num = $1.to_i
499
+ @f0message << x
500
+ ''
501
+ }
502
+ r = data.shift
503
+ while /^Number of occurrences of pattern in the database is +(\d+)/ =~ r
504
+ # PHI-BLAST
505
+ @pattern_in_database = $1.to_i
506
+ @f0message << r
507
+ r = data.shift
508
+ end
509
+ if /^Results from round (\d+)/ =~ r then
510
+ @num = $1.to_i
511
+ @f0message << r
512
+ r = data.shift
513
+ end
514
+ if r and !(/\*{5} No hits found \*{5}/ =~ r) then
515
+ @f0hitlist << r
516
+ begin
517
+ @f0hitlist << data.shift
518
+ end until r = data[0] and /^\>/ =~ r
519
+ if r and /^CONVERGED\!/ =~ r then
520
+ r.sub!(/(.*\n)*^CONVERGED\!.*\n/) { |x| @f0hitlist << x; '' }
521
+ end
522
+ if defined?(@pattern_in_database) and r = data.first then
523
+ #PHI-BLAST
524
+ while /^\>/ =~ r
525
+ @hits << Hit.new(data)
526
+ r = data.first
527
+ break unless r
528
+ if /^Significant alignments for pattern/ =~ r
529
+ data.shift
530
+ r = data.first
531
+ end
532
+ end
533
+ else
534
+ #not PHI-BLAST
535
+ while r = data[0] and /^\>/ =~ r
536
+ @hits << Hit.new(data)
537
+ end
538
+ end
539
+ end
540
+ if /^CONVERGED\!\s*$/ =~ @f0hitlist[-1].to_s then
541
+ @message = 'CONVERGED!'
542
+ @flag_converged = true
543
+ end
544
+ end
545
+
546
+ # (PSI-BLAST) Iteration round number.
547
+ attr_reader :num
548
+ # (PSI-BLAST) Messages of the iteration.
549
+ attr_reader :message
550
+ # (PHI-BLAST) Number of occurrences of pattern in the database.
551
+ attr_reader :pattern_in_database
552
+
553
+ # Returns the hits of the iteration.
554
+ # It returns an array of Bio::Blast::Default::Report::Hit objects.
555
+ def hits
556
+ parse_hitlist
557
+ @hits
558
+ end
559
+
560
+ # Iterates over each hit of the iteration.
561
+ # Yields a Bio::Blast::Default::Report::Hit object.
562
+ def each
563
+ hits.each do |x|
564
+ yield x
565
+ end
566
+ end
567
+
568
+ # (PSI-BLAST) Returns true if the iteration is converged.
569
+ # Otherwise, returns false.
570
+ def converged?
571
+ @flag_converged
572
+ end
573
+
574
+ # (PHI-BLAST) Returns pattern string.
575
+ # Returns nil if it is not a PHI-BLAST result.
576
+ def pattern
577
+ #PHI-BLAST
578
+ if !defined?(@pattern) and defined?(@pattern_in_database) then
579
+ @pattern = nil
580
+ @pattern_positions = []
581
+ @f0message.each do |r|
582
+ sc = StringScanner.new(r)
583
+ if sc.skip_until(/^ *pattern +(.+)$/) then
584
+ @pattern = sc[1] unless @pattern
585
+ sc.skip_until(/^ at position +(\d+)/)
586
+ @pattern_positions << sc[1].to_i
587
+ end
588
+ end
589
+ end
590
+ @pattern
591
+ end
592
+
593
+ # (PHI-BLAST) Returns pattern positions.
594
+ # Returns nil if it is not a PHI-BLAST result.
595
+ def pattern_positions
596
+ #PHI-BLAST
597
+ pattern
598
+ @pattern_positions
599
+ end
600
+
601
+ # (PSI-BLAST)
602
+ # Returns hits which have been found again in the iteration.
603
+ # It returns an array of Bio::Blast::Default::Report::Hit objects.
604
+ def hits_found_again
605
+ parse_hitlist
606
+ @hits_found_again
607
+ end
608
+
609
+ # (PSI-BLAST)
610
+ # Returns hits which have been newly found in the iteration.
611
+ # It returns an array of Bio::Blast::Default::Report::Hit objects.
612
+ def hits_newly_found
613
+ parse_hitlist
614
+ @hits_newly_found
615
+ end
616
+
617
+ # (PHI-BLAST) Returns hits for pattern. ????
618
+ def hits_for_pattern
619
+ parse_hitlist
620
+ @hits_for_pattern
621
+ end
622
+
623
+ # Parses list of hits.
624
+ def parse_hitlist
625
+ unless defined?(@parse_hitlist)
626
+ @hits_found_again = []
627
+ @hits_newly_found = []
628
+ @hits_unknown_state = []
629
+ i = 0
630
+ a = @hits_newly_found
631
+ flag = true
632
+ @f0hitlist.each do |x|
633
+ sc = StringScanner.new(x)
634
+ if flag then
635
+ if sc.skip_until(/^Sequences used in model and found again\:\s*$/)
636
+ a = @hits_found_again
637
+ end
638
+ flag = nil
639
+ next
640
+ end
641
+ next if sc.skip(/^CONVERGED\!$/)
642
+ if sc.skip(/^Sequences not found previously or not previously below threshold\:\s*$/) then
643
+ a = @hits_newly_found
644
+ next
645
+ elsif sc.skip(/^Sequences.+\:\s*$/) then
646
+ #possibly a bug or unknown format?
647
+ a = @hits_unknown_state
648
+ next
649
+ elsif sc.skip(/^Significant (matches|alignments) for pattern/) then
650
+ # PHI-BLAST
651
+ # do nothing when 'alignments'
652
+ if sc[1] == 'matches' then
653
+ unless defined?(@hits_for_pattern)
654
+ @hits_for_pattern = []
655
+ end
656
+ a = []
657
+ @hits_for_pattern << a
658
+ end
659
+ next
660
+ end
661
+ b = x.split(/^/)
662
+ b.collect! { |y| y.empty? ? nil : y }
663
+ b.compact!
664
+ if i + b.size > @hits.size then
665
+ ((@hits.size - i)...(b.size)).each do |j|
666
+ y = b[j]; y.strip!
667
+ y.reverse!
668
+ z = y.split(/\s+/, 3)
669
+ z.each { |y| y.reverse! }
670
+ h = Hit.new([ z.pop.to_s.sub(/\.+\z/, '') ])
671
+ bs = z.pop.to_s
672
+ ev = z.pop.to_s
673
+ #ev = '1' + ev if ev[0] == ?e
674
+ h.instance_eval { @bit_score = bs; @evalue = ev }
675
+ @hits << h
676
+ end
677
+ end
678
+ a.concat(@hits[i, b.size])
679
+ i += b.size
680
+ end #each
681
+ @hits_found_again.each do |x|
682
+ x.instance_eval { @again = true }
683
+ end
684
+ @parse_hitlist = true
685
+ end #unless
686
+ end
687
+ private :parse_hitlist
688
+
689
+ # Parses statistics for the iteration.
690
+ def parse_stat
691
+ unless defined?(@parse_stat)
692
+ @f0stat.each do |x|
693
+ gapped = nil
694
+ sc = StringScanner.new(x)
695
+ sc.skip(/\s*/)
696
+ if sc.skip(/Gapped\s*/) then
697
+ gapped = true
698
+ end
699
+ s0 = []
700
+ h = {}
701
+ while r = sc.scan(/\w+/)
702
+ #p r
703
+ s0 << r
704
+ sc.skip(/ */)
705
+ end
706
+ sc.skip(/\s*/)
707
+ while r = sc.scan(/[e\.\-\d]+/)
708
+ #p r
709
+ h[s0.shift] = r
710
+ sc.skip(/ */)
711
+ end
712
+ if gapped then
713
+ @gapped_lambda = h['Lambda']
714
+ @gapped_kappa = h['K']
715
+ @gapped_entropy = h['H']
716
+ else
717
+ @lambda = h['Lambda']
718
+ @kappa = h['K']
719
+ @entropy = h['H']
720
+ end
721
+ end #each
722
+ @parse_stat = true
723
+ end #unless
724
+ end #def
725
+ private :parse_stat
726
+
727
+ # Defines attributes which call +parse_stat+ before accessing.
728
+ def self.method_after_parse_stat(*names)
729
+ names.each do |x|
730
+ module_eval("def #{x}; parse_stat; @#{x}; end")
731
+ end
732
+ end
733
+ private_class_method :method_after_parse_stat
734
+
735
+ # lambda of the database
736
+ attr_reader :lambda if false #dummy
737
+ method_after_parse_stat :lambda
738
+ # kappa of the database
739
+ attr_reader :kappa if false #dummy
740
+ method_after_parse_stat :kappa
741
+ # entropy of the database
742
+ attr_reader :entropy if false #dummy
743
+ method_after_parse_stat :entropy
744
+
745
+ # gapped lambda of the database
746
+ attr_reader :gapped_lambda if false #dummy
747
+ method_after_parse_stat :gapped_lambda
748
+ # gapped kappa of the database
749
+ attr_reader :gapped_kappa if false #dummy
750
+ method_after_parse_stat :gapped_kappa
751
+ # gapped entropy of the database
752
+ attr_reader :gapped_entropy if false #dummy
753
+ method_after_parse_stat :gapped_entropy
754
+
755
+ # Defines attributes which delegate to @f0dbstat objects.
756
+ def self.delegate_to_f0dbstat(*names)
757
+ names.each do |x|
758
+ module_eval("def #{x}; @f0dbstat.#{x}; end")
759
+ end
760
+ end
761
+ private_class_method :delegate_to_f0dbstat
762
+
763
+ # name (title or filename) of the database
764
+ attr_reader :database if false #dummy
765
+ delegate_to_f0dbstat :database
766
+ # posted date of the database
767
+ attr_reader :posted_date if false #dummy
768
+ delegate_to_f0dbstat :posted_date
769
+
770
+ # number of letters in database
771
+ attr_reader :db_num if false #dummy
772
+ delegate_to_f0dbstat :db_num
773
+ # number of sequences in database
774
+ attr_reader :db_len if false #dummy
775
+ delegate_to_f0dbstat :db_len
776
+ # effective length of the database
777
+ attr_reader :eff_space if false #dummy
778
+ delegate_to_f0dbstat :eff_space
779
+
780
+ # e-value threshold specified when BLAST was executed
781
+ attr_reader :expect if false #dummy
782
+ delegate_to_f0dbstat :expect
783
+
784
+ end #class Iteration
785
+
786
+ # Bio::Blast::Default::Report::Hit contains information about a hit.
787
+ # It may contain some Bio::Blast::Default::Report::HSP objects.
788
+ class Hit
789
+ # Creates a new Hit object.
790
+ # It is designed to be called only internally from the
791
+ # Bio::Blast::Default::Report::Iteration class.
792
+ # Users should not call the method directly.
793
+ def initialize(data)
794
+ @f0hitname = data.shift
795
+ @hsps = []
796
+ while r = data[0] and /^\s+Score/ =~ r
797
+ @hsps << HSP.new(data)
798
+ end
799
+ @again = false
800
+ end
801
+
802
+ # Hsp(high-scoring segment pair)s of the hit.
803
+ # Returns an array of Bio::Blast::Default::Report::HSP objects.
804
+ attr_reader :hsps
805
+
806
+ # Iterates over each hsp(high-scoring segment pair) of the hit.
807
+ # Yields a Bio::Blast::Default::Report::HSP object.
808
+ def each
809
+ @hsps.each { |x| yield x }
810
+ end
811
+
812
+ # (PSI-BLAST)
813
+ # Returns true if the hit is found again in the iteration.
814
+ # Otherwise, returns false or nil.
815
+ def found_again?
816
+ @again
817
+ end
818
+
819
+ # Returns first hsp's score.
820
+ def score
821
+ (h = @hsps.first) ? h.score : nil
822
+ end
823
+
824
+ # Returns first hsp's bit score.
825
+ # (shown in hit list of BLAST result)
826
+ def bit_score
827
+ unless defined?(@bit_score)
828
+ if h = @hsps.first then
829
+ @bit_score = h.bit_score
830
+ end
831
+ end
832
+ @bit_score
833
+ end
834
+
835
+ # Returns first hsp's e-value.
836
+ # (shown in hit list of BLAST result)
837
+ def evalue
838
+ unless defined?(@evalue)
839
+ if h = @hsps.first then
840
+ @evalue = h.evalue
841
+ end
842
+ end
843
+ @evalue
844
+ end
845
+
846
+ # Parses name of the hit.
847
+ def parse_hitname
848
+ unless defined?(@parse_hitname)
849
+ sc = StringScanner.new(@f0hitname)
850
+ sc.skip(/\s*/)
851
+ sc.skip(/\>/)
852
+ d = []
853
+ begin
854
+ d << sc.scan(/.*/)
855
+ sc.skip(/\s*/)
856
+ end until !sc.rest? or r = sc.skip(/ *Length *\= *([\,\d]+)\s*\z/)
857
+ @len = (r ? sc[1].to_i : nil)
858
+ @definition = d.join(" ")
859
+ @parse_hitname = true
860
+ end
861
+ end
862
+ private :parse_hitname
863
+
864
+ # Returns length of the hit.
865
+ def len; parse_hitname; @len; end
866
+
867
+ # Returns definition of the hit.
868
+ def definition; parse_hitname; @definition; end
869
+
870
+ #--
871
+ # Aliases to keep compatibility with Bio::Fasta::Report::Hit.
872
+ #alias target_id accession
873
+ alias target_def definition
874
+ alias target_len len
875
+ #++
876
+
877
+ # Sends given method to the first hsp or returns nil if
878
+ # there are no hsps.
879
+ def hsp_first(m)
880
+ (h = hsps.first) ? h.send(m) : nil
881
+ end
882
+ private :hsp_first
883
+
884
+ #--
885
+ # Shortcut methods for the best Hsp
886
+ # (Compatibility method with FASTA)
887
+ #++
888
+
889
+ # Same as hsps.first.identity.
890
+ # Returns nil if there are no hsp in the hit.
891
+ # (Compatibility method with FASTA)
892
+ def identity; hsp_first :identity; end
893
+
894
+ # Same as hsps.first.align_len.
895
+ # Returns nil if there are no hsp in the hit.
896
+ # (Compatibility method with FASTA)
897
+ def overlap; hsp_first :align_len; end
898
+
899
+ # Same as hsps.first.qseq.
900
+ # Returns nil if there are no hsp in the hit.
901
+ # (Compatibility method with FASTA)
902
+ def query_seq; hsp_first :qseq; end
903
+
904
+ # Same as hsps.first.hseq.
905
+ # Returns nil if there are no hsp in the hit.
906
+ # (Compatibility method with FASTA)
907
+ def target_seq; hsp_first :hseq; end
908
+
909
+ # Same as hsps.first.midline.
910
+ # Returns nil if there are no hsp in the hit.
911
+ # (Compatibility method with FASTA)
912
+ def midline; hsp_first :midline; end
913
+
914
+ # Same as hsps.first.query_from.
915
+ # Returns nil if there are no hsp in the hit.
916
+ # (Compatibility method with FASTA)
917
+ def query_start; hsp_first :query_from; end
918
+
919
+ # Same as hsps.first.query_to.
920
+ # Returns nil if there are no hsp in the hit.
921
+ # (Compatibility method with FASTA)
922
+ def query_end; hsp_first :query_to; end
923
+
924
+ # Same as hsps.first.hit_from.
925
+ # Returns nil if there are no hsp in the hit.
926
+ # (Compatibility method with FASTA)
927
+ def target_start; hsp_first :hit_from; end
928
+
929
+ # Same as hsps.first.hit_to.
930
+ # Returns nil if there are no hsp in the hit.
931
+ # (Compatibility method with FASTA)
932
+ def target_end; hsp_first :hit_to; end
933
+
934
+ # Returns an array which contains
935
+ # [ query_start, query_end, target_start, target_end ].
936
+ # (Compatibility method with FASTA)
937
+ def lap_at
938
+ [ query_start, query_end, target_start, target_end ]
939
+ end
940
+ end #class Hit
941
+
942
+ # Bio::Blast::Default::Report::HSP holds information about the hsp
943
+ # (high-scoring segment pair).
944
+ class HSP
945
+ # Creates new HSP object.
946
+ # It is designed to be called only internally from the
947
+ # Bio::Blast::Default::Report::Hit class.
948
+ # Users should not call the method directly.
949
+ def initialize(data)
950
+ @f0score = data.shift
951
+ @f0alignment = []
952
+ while r = data[0] and /^(Query|Sbjct)\:/ =~ r
953
+ @f0alignment << data.shift
954
+ end
955
+ end
956
+
957
+ # Parses scores, identities, positives, gaps, and so on.
958
+ def parse_score
959
+ unless defined?(@parse_score)
960
+ sc = StringScanner.new(@f0score)
961
+ while sc.rest?
962
+ sc.skip(/\s*/)
963
+ if sc.skip(/Expect(?:\(\d\))? *\= *([e\-\.\d]+)/) then
964
+ @evalue = sc[1]
965
+ #@evalue = '1' + @evalue if @evalue[0] == ?e
966
+ elsif sc.skip(/Score *\= *([e\-\.\d]+) *bits *\( *([e\-\.\d]+) *\)/) then
967
+ @bit_score = sc[1]
968
+ @score = sc[2]
969
+ elsif sc.skip(/(Identities|Positives|Gaps) *\= (\d+) *\/ *(\d+) *\(([\.\d]+) *\% *\)/) then
970
+ alen = sc[3].to_i
971
+ @align_len = alen unless defined?(@align_len)
972
+ raise ScanError if alen != @align_len
973
+ case sc[1]
974
+ when 'Identities'
975
+ @identity = sc[2].to_i
976
+ @percent_identity = sc[4]
977
+ when 'Positives'
978
+ @positive = sc[2].to_i
979
+ @percent_positive = sc[4]
980
+ when 'Gaps'
981
+ @gaps = sc[2].to_i
982
+ @percent_gaps = sc[4]
983
+ else
984
+ raise ScanError
985
+ end
986
+ elsif sc.skip(/Strand *\= *(Plus|Minus) *\/ *(Plus|Minus)/) then
987
+ @query_strand = sc[1]
988
+ @hit_strand = sc[2]
989
+ if sc[1] == sc[2] then
990
+ @query_frame = 1
991
+ @hit_frame = 1
992
+ elsif sc[1] == 'Plus' then # Plus/Minus
993
+ # complement sequence against xml(-m 7)
994
+ # In xml(-m 8), -1=>Plus, 1=>Minus ???
995
+ #@query_frame = -1
996
+ #@hit_frame = 1
997
+ @query_frame = 1
998
+ @hit_frame = -1
999
+ else # Minus/Plus
1000
+ @query_frame = -1
1001
+ @hit_frame = 1
1002
+ end
1003
+ elsif sc.skip(/Frame *\= *([\-\+]\d+)( *\/ *([\-\+]\d+))?/) then
1004
+ @query_frame = sc[1].to_i
1005
+ if sc[2] then
1006
+ @hit_frame = sc[3].to_i
1007
+ end
1008
+ elsif sc.skip(/Score *\= *([e\-\.\d]+) +\(([e\-\.\d]+) *bits *\)/) then
1009
+ #WU-BLAST
1010
+ @score = sc[1]
1011
+ @bit_score = sc[2]
1012
+ elsif sc.skip(/P *\= * ([e\-\.\d]+)/) then
1013
+ #WU-BLAST
1014
+ @p_sum_n = nil
1015
+ @pvalue = sc[1]
1016
+ elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\-\.\d]+)/) then
1017
+ #WU-BLAST
1018
+ @p_sum_n = sc[1].to_i
1019
+ @pvalue = sc[2]
1020
+ else
1021
+ raise ScanError
1022
+ end
1023
+ sc.skip(/\s*\,?\s*/)
1024
+ end
1025
+ @parse_score = true
1026
+ end
1027
+ end
1028
+ private :parse_score
1029
+
1030
+ # Defines attributes which call parse_score before accessing.
1031
+ def self.method_after_parse_score(*names)
1032
+ names.each do |x|
1033
+ module_eval("def #{x}; parse_score; @#{x}; end")
1034
+ end
1035
+ end
1036
+ private_class_method :method_after_parse_score
1037
+
1038
+ # bit score
1039
+ attr_reader :bit_score if false #dummy
1040
+ method_after_parse_score :bit_score
1041
+ # score
1042
+ attr_reader :score if false #dummy
1043
+ method_after_parse_score :score
1044
+
1045
+ # e-value
1046
+ attr_reader :evalue if false #dummy
1047
+ method_after_parse_score :evalue
1048
+
1049
+ # frame of the query
1050
+ attr_reader :query_frame if false #dummy
1051
+ method_after_parse_score :query_frame
1052
+ # frame of the hit
1053
+ attr_reader :hit_frame if false #dummy
1054
+ method_after_parse_score :hit_frame
1055
+
1056
+ # Identity (number of identical nucleotides or amino acids)
1057
+ attr_reader :identity if false #dummy
1058
+ method_after_parse_score :identity
1059
+ # percent of identical nucleotides or amino acids
1060
+ attr_reader :percent_identity if false #dummy
1061
+ method_after_parse_score :percent_identity
1062
+
1063
+ # Positives (number of positive hit amino acids or nucleotides)
1064
+ attr_reader :positive if false #dummy
1065
+ method_after_parse_score :positive
1066
+ # percent of positive hit amino acids or nucleotides
1067
+ attr_reader :percent_positive if false #dummy
1068
+ method_after_parse_score :percent_positive
1069
+
1070
+ # Gaps (number of gaps)
1071
+ attr_reader :gaps if false #dummy
1072
+ method_after_parse_score :gaps
1073
+ # percent of gaps
1074
+ attr_reader :percent_gaps if false #dummy
1075
+ method_after_parse_score :percent_gaps
1076
+
1077
+ # aligned length
1078
+ attr_reader :align_len if false #dummy
1079
+ method_after_parse_score :align_len
1080
+
1081
+ # strand of the query ("Plus" or "Minus" or nil)
1082
+ attr_reader :query_strand if false #dummy
1083
+ method_after_parse_score :query_strand
1084
+
1085
+ # strand of the hit ("Plus" or "Minus" or nil)
1086
+ attr_reader :hit_strand if false #dummy
1087
+ method_after_parse_score :hit_strand
1088
+
1089
+ # Parses alignments.
1090
+ def parse_alignment
1091
+ unless defined?(@parse_alignment)
1092
+ qpos1 = nil
1093
+ qpos2 = nil
1094
+ spos1 = nil
1095
+ spos2 = nil
1096
+ qseq = []
1097
+ sseq = []
1098
+ mseq = []
1099
+ pos_st = nil
1100
+ len_seq = 0
1101
+ nextline = :q
1102
+ @f0alignment.each do |x|
1103
+ sc = StringScanner.new(x)
1104
+ while sc.rest?
1105
+ #p pos_st, len_seq
1106
+ #p nextline.to_s
1107
+ if r = sc.skip(/(Query|Sbjct)\: *(\d+) */) then
1108
+ pos_st = r
1109
+ qs = sc[1]
1110
+ pos1 = sc[2]
1111
+ len_seq = sc.skip(/[^ ]*/)
1112
+ seq = sc[0]
1113
+ sc.skip(/ *(\d+) *\n/)
1114
+ pos2 = sc[1]
1115
+ if qs == 'Query' then
1116
+ raise ScanError unless nextline == :q
1117
+ qpos1 = pos1.to_i unless qpos1
1118
+ qpos2 = pos2.to_i
1119
+ qseq << seq
1120
+ nextline = :m
1121
+ elsif qs == 'Sbjct' then
1122
+ if nextline == :m then
1123
+ mseq << (' ' * len_seq)
1124
+ end
1125
+ spos1 = pos1.to_i unless spos1
1126
+ spos2 = pos2.to_i
1127
+ sseq << seq
1128
+ nextline = :q
1129
+ else
1130
+ raise ScanError
1131
+ end
1132
+ elsif r = sc.scan(/ {6}.+/) then
1133
+ raise ScanError unless nextline == :m
1134
+ mseq << r[pos_st, len_seq]
1135
+ sc.skip(/\n/)
1136
+ nextline = :s
1137
+ elsif r = sc.skip(/pattern +\d+.+/) then
1138
+ # PHI-BLAST
1139
+ # do nothing
1140
+ sc.skip(/\n/)
1141
+ else
1142
+ raise ScanError
1143
+ end
1144
+ end #while
1145
+ end #each
1146
+ #p qseq, sseq, mseq
1147
+ @qseq = qseq.join('')
1148
+ @hseq = sseq.join('')
1149
+ @midline = mseq.join('')
1150
+ @query_from = qpos1
1151
+ @query_to = qpos2
1152
+ @hit_from = spos1
1153
+ @hit_to = spos2
1154
+ @parse_alignment = true
1155
+ end #unless
1156
+ end #def
1157
+ private :parse_alignment
1158
+
1159
+ # Defines attributes which call parse_alignment before accessing.
1160
+ def self.method_after_parse_alignment(*names)
1161
+ names.each do |x|
1162
+ module_eval("def #{x}; parse_alignment; @#{x}; end")
1163
+ end
1164
+ end
1165
+ private_class_method :method_after_parse_alignment
1166
+
1167
+ # query sequence (with gaps) of the alignment of the hsp
1168
+ attr_reader :qseq if false #dummy
1169
+ method_after_parse_alignment :qseq
1170
+ # hit sequence (with gaps) of the alignment of the hsp
1171
+ attr_reader :hseq if false #dummy
1172
+ method_after_parse_alignment :hseq
1173
+
1174
+ # middle line of the alignment of the hsp
1175
+ attr_reader :midline if false #dummy
1176
+ method_after_parse_alignment :midline
1177
+
1178
+ # start position of the query (the first position is 1)
1179
+ attr_reader :query_from if false #dummy
1180
+ method_after_parse_alignment :query_from
1181
+
1182
+ # end position of the query (including its position)
1183
+ attr_reader :query_to
1184
+ method_after_parse_alignment :query_to
1185
+
1186
+ # start position of the hit (the first position is 1)
1187
+ attr_reader :hit_from if false #dummy
1188
+ method_after_parse_alignment :hit_from
1189
+
1190
+ # end position of the hit (including its position)
1191
+ attr_reader :hit_to if false #dummy
1192
+ method_after_parse_alignment :hit_to
1193
+
1194
+ end #class HSP
1195
+
1196
+ end #class Report
1197
+
1198
+ # NCBI BLAST default (-m 0 option) output parser for TBLAST.
1199
+ # All methods are equal to Bio::Blast::Default::Report.
1200
+ # Only DELIMITER (and RS) is different.
1201
+ class Report_TBlast < Report
1202
+ # Delimter of each entry for TBLAST. Bio::FlatFile uses it.
1203
+ DELIMITER = RS = "\nTBLAST"
1204
+ end #class Report_TBlast
1205
+
1206
+ end #module Default
1207
+ end #class Blast
1208
+ end #module Bio
1209
+
1210
+ ######################################################################
1211
+
1212
+ if __FILE__ == $0
1213
+
1214
+ Bio::FlatFile.open(Bio::Blast::Default::Report, ARGF) do |ff|
1215
+ ff.each do |rep|
1216
+
1217
+ print "# === Bio::Blast::Default::Report\n"
1218
+ puts
1219
+ print " rep.program #=> "; p rep.program
1220
+ print " rep.version #=> "; p rep.version
1221
+ print " rep.reference #=> "; p rep.reference
1222
+ print " rep.db #=> "; p rep.db
1223
+ #print " rep.query_id #=> "; p rep.query_id
1224
+ print " rep.query_def #=> "; p rep.query_def
1225
+ print " rep.query_len #=> "; p rep.query_len
1226
+ #puts
1227
+ print " rep.version_number #=> "; p rep.version_number
1228
+ print " rep.version_date #=> "; p rep.version_date
1229
+ puts
1230
+
1231
+ print "# === Parameters\n"
1232
+ #puts
1233
+ #print " rep.parameters #=> "; p rep.parameters
1234
+ puts
1235
+ print " rep.matrix #=> "; p rep.matrix
1236
+ print " rep.expect #=> "; p rep.expect
1237
+ #print " rep.inclusion #=> "; p rep.inclusion
1238
+ print " rep.sc_match #=> "; p rep.sc_match
1239
+ print " rep.sc_mismatch #=> "; p rep.sc_mismatch
1240
+ print " rep.gap_open #=> "; p rep.gap_open
1241
+ print " rep.gap_extend #=> "; p rep.gap_extend
1242
+ #print " rep.filter #=> "; p rep.filter
1243
+ print " rep.pattern #=> "; p rep.pattern
1244
+ #print " rep.entrez_query #=> "; p rep.entrez_query
1245
+ #puts
1246
+ print " rep.pattern_positions #=> "; p rep.pattern_positions
1247
+ puts
1248
+
1249
+ print "# === Statistics (last iteration's)\n"
1250
+ #puts
1251
+ #print " rep.statistics #=> "; p rep.statistics
1252
+ puts
1253
+ print " rep.db_num #=> "; p rep.db_num
1254
+ print " rep.db_len #=> "; p rep.db_len
1255
+ #print " rep.hsp_len #=> "; p rep.hsp_len
1256
+ print " rep.eff_space #=> "; p rep.eff_space
1257
+ print " rep.kappa #=> "; p rep.kappa
1258
+ print " rep.lambda #=> "; p rep.lambda
1259
+ print " rep.entropy #=> "; p rep.entropy
1260
+ puts
1261
+ print " rep.num_hits #=> "; p rep.num_hits
1262
+ print " rep.gapped_kappa #=> "; p rep.gapped_kappa
1263
+ print " rep.gapped_lambda #=> "; p rep.gapped_lambda
1264
+ print " rep.gapped_entropy #=> "; p rep.gapped_entropy
1265
+ print " rep.posted_date #=> "; p rep.posted_date
1266
+ puts
1267
+
1268
+ print "# === Message (last iteration's)\n"
1269
+ puts
1270
+ print " rep.message #=> "; p rep.message
1271
+ #puts
1272
+ print " rep.converged? #=> "; p rep.converged?
1273
+ puts
1274
+
1275
+ print "# === Iterations\n"
1276
+ puts
1277
+ print " rep.itrerations.each do |itr|\n"
1278
+ puts
1279
+
1280
+ rep.iterations.each do |itr|
1281
+
1282
+ print "# --- Bio::Blast::Default::Report::Iteration\n"
1283
+ puts
1284
+
1285
+ print " itr.num #=> "; p itr.num
1286
+ #print " itr.statistics #=> "; p itr.statistics
1287
+ print " itr.message #=> "; p itr.message
1288
+ print " itr.hits.size #=> "; p itr.hits.size
1289
+ #puts
1290
+ print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size;
1291
+ print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size;
1292
+ if itr.hits_for_pattern then
1293
+ itr.hits_for_pattern.each_with_index do |hp, hpi|
1294
+ print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size;
1295
+ end
1296
+ end
1297
+ print " itr.converged? #=> "; p itr.converged?
1298
+ puts
1299
+
1300
+ print " itr.hits.each do |hit|\n"
1301
+ puts
1302
+
1303
+ itr.hits.each_with_index do |hit, i|
1304
+
1305
+ print "# --- Bio::Blast::Default::Report::Hit"
1306
+ print " ([#{i}])\n"
1307
+ puts
1308
+
1309
+ #print " hit.num #=> "; p hit.num
1310
+ #print " hit.hit_id #=> "; p hit.hit_id
1311
+ print " hit.len #=> "; p hit.len
1312
+ print " hit.definition #=> "; p hit.definition
1313
+ #print " hit.accession #=> "; p hit.accession
1314
+ #puts
1315
+ print " hit.found_again? #=> "; p hit.found_again?
1316
+
1317
+ print " --- compatible/shortcut ---\n"
1318
+ #print " hit.query_id #=> "; p hit.query_id
1319
+ #print " hit.query_def #=> "; p hit.query_def
1320
+ #print " hit.query_len #=> "; p hit.query_len
1321
+ #print " hit.target_id #=> "; p hit.target_id
1322
+ print " hit.target_def #=> "; p hit.target_def
1323
+ print " hit.target_len #=> "; p hit.target_len
1324
+
1325
+ print " --- first HSP's values (shortcut) ---\n"
1326
+ print " hit.evalue #=> "; p hit.evalue
1327
+ print " hit.bit_score #=> "; p hit.bit_score
1328
+ print " hit.identity #=> "; p hit.identity
1329
+ #print " hit.overlap #=> "; p hit.overlap
1330
+
1331
+ print " hit.query_seq #=> "; p hit.query_seq
1332
+ print " hit.midline #=> "; p hit.midline
1333
+ print " hit.target_seq #=> "; p hit.target_seq
1334
+
1335
+ print " hit.query_start #=> "; p hit.query_start
1336
+ print " hit.query_end #=> "; p hit.query_end
1337
+ print " hit.target_start #=> "; p hit.target_start
1338
+ print " hit.target_end #=> "; p hit.target_end
1339
+ print " hit.lap_at #=> "; p hit.lap_at
1340
+ print " --- first HSP's vaules (shortcut) ---\n"
1341
+ print " --- compatible/shortcut ---\n"
1342
+
1343
+ puts
1344
+ print " hit.hsps.size #=> "; p hit.hsps.size
1345
+ if hit.hsps.size == 0 then
1346
+ puts " (HSP not found: please see blastall's -b and -v options)"
1347
+ puts
1348
+ else
1349
+
1350
+ puts
1351
+ print " hit.hsps.each do |hsp|\n"
1352
+ puts
1353
+
1354
+ hit.hsps.each_with_index do |hsp, j|
1355
+
1356
+ print "# --- Bio::Blast::Default::Report::Hsp"
1357
+ print " ([#{j}])\n"
1358
+ puts
1359
+ #print " hsp.num #=> "; p hsp.num
1360
+ print " hsp.bit_score #=> "; p hsp.bit_score
1361
+ print " hsp.score #=> "; p hsp.score
1362
+ print " hsp.evalue #=> "; p hsp.evalue
1363
+ print " hsp.identity #=> "; p hsp.identity
1364
+ print " hsp.gaps #=> "; p hsp.gaps
1365
+ print " hsp.positive #=> "; p hsp.positive
1366
+ print " hsp.align_len #=> "; p hsp.align_len
1367
+ #print " hsp.density #=> "; p hsp.density
1368
+
1369
+ print " hsp.query_frame #=> "; p hsp.query_frame
1370
+ print " hsp.query_from #=> "; p hsp.query_from
1371
+ print " hsp.query_to #=> "; p hsp.query_to
1372
+
1373
+ print " hsp.hit_frame #=> "; p hsp.hit_frame
1374
+ print " hsp.hit_from #=> "; p hsp.hit_from
1375
+ print " hsp.hit_to #=> "; p hsp.hit_to
1376
+
1377
+ #print " hsp.pattern_from#=> "; p hsp.pattern_from
1378
+ #print " hsp.pattern_to #=> "; p hsp.pattern_to
1379
+
1380
+ print " hsp.qseq #=> "; p hsp.qseq
1381
+ print " hsp.midline #=> "; p hsp.midline
1382
+ print " hsp.hseq #=> "; p hsp.hseq
1383
+ puts
1384
+ print " hsp.percent_identity #=> "; p hsp.percent_identity
1385
+ #print " hsp.mismatch_count #=> "; p hsp.mismatch_count
1386
+ #
1387
+ print " hsp.query_strand #=> "; p hsp.query_strand
1388
+ print " hsp.hit_strand #=> "; p hsp.hit_strand
1389
+ print " hsp.percent_positive #=> "; p hsp.percent_positive
1390
+ print " hsp.percent_gaps #=> "; p hsp.percent_gaps
1391
+ puts
1392
+
1393
+ end #each
1394
+ end #if hit.hsps.size == 0
1395
+ end
1396
+ end
1397
+ end #ff.each
1398
+ end #FlatFile.open
1399
+
1400
+ end #if __FILE__ == $0
1401
+
1402
+ ######################################################################