bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,95 @@
1
+ #
2
+ # bio/appl/blast/format8.rb - BLAST tab-delimited output (-m 8) parser
3
+ #
4
+ # Copyright (C) 2002,2003 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: format8.rb,v 1.5 2005/09/08 01:22:08 k Exp $
21
+ #
22
+
23
+ module Bio
24
+ class Blast
25
+ class Report
26
+
27
+ private
28
+
29
+ def tab_parse(data)
30
+ iteration = Iteration.new
31
+ @iterations.push(iteration)
32
+ @query_id = @query_def = data[/\S+/]
33
+
34
+ target_prev = ''
35
+ hit_num = 1
36
+ hsp_num = 1
37
+ hit = ''
38
+ data.each do |line|
39
+ ary = line.chomp.split("\t")
40
+ query_id, target_id, hsp = tab_parse_hsp(ary)
41
+ if target_prev != target_id
42
+ hit = Hit.new
43
+ hit.num = hit_num
44
+ hit_num += 1
45
+ hit.query_id = hit.query_def = query_id
46
+ hit.accession = hit.definition = target_id
47
+ iteration.hits.push(hit)
48
+ hsp_num = 1
49
+ end
50
+ hsp.num = hsp_num
51
+ hsp_num += 1
52
+ hit.hsps.push(hsp)
53
+ target_prev = target_id
54
+ end
55
+ end
56
+
57
+ def tab_parse_hsp(ary)
58
+ query_id, target_id,
59
+ percent_identity,
60
+ align_len,
61
+ mismatch_count,
62
+ gaps,
63
+ query_from,
64
+ query_to,
65
+ hit_from,
66
+ hit_to,
67
+ evalue,
68
+ bit_score = *ary
69
+
70
+ hsp = Hsp.new
71
+ hsp.align_len = align_len.to_i
72
+ hsp.gaps = gaps.to_i
73
+ hsp.query_from = query_from.to_i
74
+ hsp.query_to = query_to.to_i
75
+ hsp.hit_from = hit_from.to_i
76
+ hsp.hit_to = hit_to.to_i
77
+ hsp.evalue = evalue.strip.to_f
78
+ hsp.bit_score = bit_score.to_f
79
+
80
+ hsp.percent_identity = percent_identity.to_f
81
+ hsp.mismatch_count = mismatch_count.to_i
82
+
83
+ return query_id, target_id, hsp
84
+ end
85
+
86
+ end
87
+ end
88
+ end
89
+
90
+
91
+ =begin
92
+
93
+ This file is automatically loaded by bio/appl/blast/report.rb
94
+
95
+ =end
@@ -0,0 +1,652 @@
1
+ #
2
+ # bio/appl/blast/report.rb - BLAST Report class
3
+ #
4
+ # Copyright (C) 2003 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: report.rb,v 1.9 2005/09/26 13:00:04 k Exp $
21
+ #
22
+
23
+ require 'bio/appl/blast'
24
+ require 'bio/appl/blast/xmlparser'
25
+ require 'bio/appl/blast/rexml'
26
+ require 'bio/appl/blast/format8'
27
+
28
+ module Bio
29
+ class Blast
30
+
31
+ class Report
32
+
33
+ # for Bio::FlatFile support (only for XML data)
34
+ DELIMITER = RS = "</BlastOutput>\n"
35
+
36
+ def self.xmlparser(data)
37
+ self.new(data, :xmlparser)
38
+ end
39
+ def self.rexml(data)
40
+ self.new(data, :rexml)
41
+ end
42
+ def self.tab(data)
43
+ self.new(data, :tab)
44
+ end
45
+
46
+ def auto_parse(data)
47
+ if /<?xml/.match(data[/.*/])
48
+ if defined?(XMLParser)
49
+ xmlparser_parse(data)
50
+ else
51
+ rexml_parse(data)
52
+ end
53
+ else
54
+ tab_parse(data)
55
+ end
56
+ end
57
+ private :auto_parse
58
+
59
+ def initialize(data, parser = nil)
60
+ @iterations = []
61
+ @parameters = {}
62
+ case parser
63
+ when :xmlparser # format 7
64
+ xmlparser_parse(data)
65
+ when :rexml # format 7
66
+ rexml_parse(data)
67
+ when :tab # format 8
68
+ tab_parse(data)
69
+ else
70
+ auto_parse(data)
71
+ end
72
+ end
73
+ attr_reader :iterations, :parameters,
74
+ :program, :version, :reference, :db, :query_id, :query_def, :query_len
75
+
76
+ # shortcut for @parameters
77
+ def matrix; @parameters['matrix']; end
78
+ def expect; @parameters['expect'].to_i; end
79
+ def inclusion; @parameters['include'].to_i; end
80
+ def sc_match; @parameters['sc-match'].to_i; end
81
+ def sc_mismatch; @parameters['sc-mismatch'].to_i; end
82
+ def gap_open; @parameters['gap-open'].to_i; end
83
+ def gap_extend; @parameters['gap-extend'].to_i; end
84
+ def filter; @parameters['filter']; end
85
+ def pattern; @parameters['pattern']; end
86
+ def entrez_query; @parameters['entrez-query']; end
87
+
88
+ # <for blastpgp>
89
+ def each_iteration
90
+ @iterations.each do |x|
91
+ yield x
92
+ end
93
+ end
94
+
95
+ # <for blastall> shortcut for the last iteration's hits
96
+ def each_hit
97
+ @iterations.last.each do |x|
98
+ yield x
99
+ end
100
+ end
101
+ alias each each_hit
102
+
103
+ # shortcut for the last iteration's hits
104
+ def hits
105
+ @iterations.last.hits
106
+ end
107
+
108
+ # shortcut for the last iteration's statistics
109
+ def statistics
110
+ @iterations.last.statistics
111
+ end
112
+ def db_num; statistics['db-num']; end
113
+ def db_len; statistics['db-len']; end
114
+ def hsp_len; statistics['hsp-len']; end
115
+ def eff_space; statistics['eff-space']; end
116
+ def kappa; statistics['kappa']; end
117
+ def lambda; statistics['lambda']; end
118
+ def entropy; statistics['entropy']; end
119
+
120
+ # shortcut for the last iteration's message (for checking 'CONVERGED')
121
+ def message
122
+ @iterations.last.message
123
+ end
124
+
125
+
126
+ # Bio::Blast::Report::Iteration
127
+ class Iteration
128
+ def initialize
129
+ @message = nil
130
+ @statistics = {}
131
+ @num = 1
132
+ @hits = []
133
+ end
134
+ attr_reader :hits, :statistics
135
+ attr_accessor :num, :message
136
+
137
+ def each
138
+ @hits.each do |x|
139
+ yield x
140
+ end
141
+ end
142
+ end
143
+
144
+
145
+ # Bio::Blast::Report::Hit
146
+ class Hit
147
+ def initialize
148
+ @hsps = []
149
+ end
150
+ attr_reader :hsps
151
+ attr_accessor :query_id, :query_def, :query_len,
152
+ :num, :hit_id, :len, :definition, :accession
153
+
154
+ def each
155
+ @hsps.each do |x|
156
+ yield x
157
+ end
158
+ end
159
+
160
+ # Compatible with Bio::Fasta::Report::Hit
161
+
162
+ alias target_id accession
163
+ alias target_def definition
164
+ alias target_len len
165
+
166
+ # Shortcut methods for the best Hsp
167
+
168
+ def evalue; @hsps.first.evalue; end
169
+ def bit_score; @hsps.first.bit_score; end
170
+ def identity; @hsps.first.identity; end
171
+ def percent_identity; @hsps.first.percent_identity; end
172
+ def overlap; @hsps.first.align_len; end
173
+
174
+ def query_seq; @hsps.first.qseq; end
175
+ def target_seq; @hsps.first.hseq; end
176
+ def midline; @hsps.first.midline; end
177
+
178
+ def query_start; @hsps.first.query_from; end
179
+ def query_end; @hsps.first.query_to; end
180
+ def target_start; @hsps.first.hit_from; end
181
+ def target_end; @hsps.first.hit_to; end
182
+ def lap_at
183
+ [ query_start, query_end, target_start, target_end ]
184
+ end
185
+ end
186
+
187
+
188
+ # Bio::Blast::Report::Hsp
189
+ class Hsp
190
+ def initialize
191
+ @hsp = {}
192
+ end
193
+ attr_reader :hsp
194
+ attr_accessor :num, :bit_score, :score, :evalue,
195
+ :query_from, :query_to, :hit_from, :hit_to,
196
+ :pattern_from, :pattern_to, :query_frame, :hit_frame,
197
+ :identity, :positive, :gaps, :align_len, :density,
198
+ :qseq, :hseq, :midline,
199
+ :percent_identity, :mismatch_count # only for '-m 8'
200
+ end
201
+
202
+ end
203
+ end
204
+ end
205
+
206
+
207
+ if __FILE__ == $0
208
+
209
+ =begin
210
+
211
+ begin # p is suitable than pp for the following test script
212
+ require 'pp'
213
+ alias p pp
214
+ rescue
215
+ end
216
+
217
+ # for multiple xml reports (iterates on each Blast::Report)
218
+ Bio::Blast.reports(ARGF) do |rep|
219
+ rep.iterations.each do |itr|
220
+ itr.hits.each do |hit|
221
+ hit.hsps.each do |hsp|
222
+ end
223
+ end
224
+ end
225
+ end
226
+
227
+ # for multiple xml reports (returns Array of Blast::Report)
228
+ reps = Bio::Blast.reports(ARGF.read)
229
+
230
+ # for a single report (xml or tab) format auto detect, parser auto selected
231
+ rep = Bio::Blast::Report.new(ARGF.read)
232
+
233
+ # to use xmlparser explicitly for a report
234
+ rep = Bio::Blast::Report.xmlparser(ARGF.read)
235
+
236
+ # to use resml explicitly for a report
237
+ rep = Bio::Blast::Report.rexml(ARGF.read)
238
+
239
+ # to use a tab delimited report
240
+ rep = Bio::Blast::Report.tab(ARGF.read)
241
+
242
+ =end
243
+
244
+ Bio::Blast.reports(ARGF) do |rep| # for multiple xml reports
245
+
246
+ print "# === Bio::Tools::Blast::Report\n"
247
+ puts
248
+ print " rep.program #=> "; p rep.program
249
+ print " rep.version #=> "; p rep.version
250
+ print " rep.reference #=> "; p rep.reference
251
+ print " rep.db #=> "; p rep.db
252
+ print " rep.query_id #=> "; p rep.query_id
253
+ print " rep.query_def #=> "; p rep.query_def
254
+ print " rep.query_len #=> "; p rep.query_len
255
+ puts
256
+
257
+ print "# === Parameters\n"
258
+ puts
259
+ print " rep.parameters #=> "; p rep.parameters
260
+ puts
261
+ print " rep.matrix #=> "; p rep.matrix
262
+ print " rep.expect #=> "; p rep.expect
263
+ print " rep.inclusion #=> "; p rep.inclusion
264
+ print " rep.sc_match #=> "; p rep.sc_match
265
+ print " rep.sc_mismatch #=> "; p rep.sc_mismatch
266
+ print " rep.gap_open #=> "; p rep.gap_open
267
+ print " rep.gap_extend #=> "; p rep.gap_extend
268
+ print " rep.filter #=> "; p rep.filter
269
+ print " rep.pattern #=> "; p rep.pattern
270
+ print " rep.entrez_query #=> "; p rep.entrez_query
271
+ puts
272
+
273
+ print "# === Statistics (last iteration's)\n"
274
+ puts
275
+ print " rep.statistics #=> "; p rep.statistics
276
+ puts
277
+ print " rep.db_num #=> "; p rep.db_num
278
+ print " rep.db_len #=> "; p rep.db_len
279
+ print " rep.hsp_len #=> "; p rep.hsp_len
280
+ print " rep.eff_space #=> "; p rep.eff_space
281
+ print " rep.kappa #=> "; p rep.kappa
282
+ print " rep.lambda #=> "; p rep.lambda
283
+ print " rep.entropy #=> "; p rep.entropy
284
+ puts
285
+
286
+ print "# === Message (last iteration's)\n"
287
+ puts
288
+ print " rep.message #=> "; p rep.message
289
+ puts
290
+
291
+ print "# === Iterations\n"
292
+ puts
293
+ print " rep.itrerations.each do |itr|\n"
294
+ puts
295
+
296
+ rep.iterations.each do |itr|
297
+
298
+ print "# --- Bio::Blast::Report::Iteration\n"
299
+ puts
300
+
301
+ print " itr.num #=> "; p itr.num
302
+ print " itr.statistics #=> "; p itr.statistics
303
+ print " itr.message #=> "; p itr.message
304
+ print " itr.hits.size #=> "; p itr.hits.size
305
+ puts
306
+
307
+ print " itr.hits.each do |hit|\n"
308
+ puts
309
+
310
+ itr.hits.each do |hit|
311
+
312
+ print "# --- Bio::Blast::Report::Hit\n"
313
+ puts
314
+
315
+ print " hit.num #=> "; p hit.num
316
+ print " hit.hit_id #=> "; p hit.hit_id
317
+ print " hit.len #=> "; p hit.len
318
+ print " hit.definition #=> "; p hit.definition
319
+ print " hit.accession #=> "; p hit.accession
320
+
321
+ print " --- compatible/shortcut ---\n"
322
+ print " hit.query_id #=> "; p hit.query_id
323
+ print " hit.query_def #=> "; p hit.query_def
324
+ print " hit.query_len #=> "; p hit.query_len
325
+ print " hit.target_id #=> "; p hit.target_id
326
+ print " hit.target_def #=> "; p hit.target_def
327
+ print " hit.target_len #=> "; p hit.target_len
328
+
329
+ print " hit.evalue #=> "; p hit.evalue
330
+ print " hit.bit_score #=> "; p hit.bit_score
331
+ print " hit.identity #=> "; p hit.identity
332
+ print " hit.overlap #=> "; p hit.overlap
333
+
334
+ print " hit.query_seq #=> "; p hit.query_seq
335
+ print " hit.midline #=> "; p hit.midline
336
+ print " hit.target_seq #=> "; p hit.target_seq
337
+
338
+ print " hit.query_start #=> "; p hit.query_start
339
+ print " hit.query_end #=> "; p hit.query_end
340
+ print " hit.target_start #=> "; p hit.target_start
341
+ print " hit.target_end #=> "; p hit.target_end
342
+ print " hit.lap_at #=> "; p hit.lap_at
343
+ print " --- compatible/shortcut ---\n"
344
+
345
+ print " hit.hsps.size #=> "; p hit.hsps.size
346
+ puts
347
+
348
+ print " hit.hsps.each do |hsp|\n"
349
+ puts
350
+
351
+ hit.hsps.each do |hsp|
352
+
353
+ print "# --- Bio::Blast::Report::Hsp\n"
354
+ puts
355
+ print " hsp.num #=> "; p hsp.num
356
+ print " hsp.bit_score #=> "; p hsp.bit_score
357
+ print " hsp.score #=> "; p hsp.score
358
+ print " hsp.evalue #=> "; p hsp.evalue
359
+ print " hsp.identity #=> "; p hsp.identity
360
+ print " hsp.gaps #=> "; p hsp.gaps
361
+ print " hsp.positive #=> "; p hsp.positive
362
+ print " hsp.align_len #=> "; p hsp.align_len
363
+ print " hsp.density #=> "; p hsp.density
364
+
365
+ print " hsp.query_frame #=> "; p hsp.query_frame
366
+ print " hsp.query_from #=> "; p hsp.query_from
367
+ print " hsp.query_to #=> "; p hsp.query_to
368
+
369
+ print " hsp.hit_frame #=> "; p hsp.hit_frame
370
+ print " hsp.hit_from #=> "; p hsp.hit_from
371
+ print " hsp.hit_to #=> "; p hsp.hit_to
372
+
373
+ print " hsp.pattern_from#=> "; p hsp.pattern_from
374
+ print " hsp.pattern_to #=> "; p hsp.pattern_to
375
+
376
+ print " hsp.qseq #=> "; p hsp.qseq
377
+ print " hsp.midline #=> "; p hsp.midline
378
+ print " hsp.hseq #=> "; p hsp.hseq
379
+ puts
380
+ print " hsp.percent_identity #=> "; p hsp.percent_identity
381
+ print " hsp.mismatch_count #=> "; p hsp.mismatch_count
382
+ puts
383
+
384
+ end
385
+ end
386
+ end
387
+ end # for multiple xml reports
388
+
389
+ end
390
+
391
+
392
+ =begin
393
+
394
+ = Bio::Blast::Report
395
+
396
+ Parsed results of the blast execution for Tab-delimited and XML output
397
+ format. Tab-delimited reports are consists of
398
+
399
+ Query id,
400
+ Subject id,
401
+ percent of identity,
402
+ alignment length,
403
+ number of mismatches (not including gaps),
404
+ number of gap openings,
405
+ start of alignment in query,
406
+ end of alignment in query,
407
+ start of alignment in subject,
408
+ end of alignment in subject,
409
+ expected value,
410
+ bit score.
411
+
412
+ according to the MEGABLAST document (README.mbl). As for XML output,
413
+ see the following DTDs.
414
+
415
+ * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd
416
+ * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod
417
+ * http://www.ncbi.nlm.nih.gov/dtd/NCBI_Entity.mod
418
+
419
+
420
+ --- Bio::Blast::Report.new(data)
421
+
422
+ Passing a BLAST output from 'blastall -m 7' or '-m 8' as a String.
423
+ Formats are auto detected.
424
+
425
+ --- Bio::Blast::Report.xmlparaser(xml)
426
+
427
+ Specify to use XMLParser to parse XML (-m 7) output.
428
+
429
+ --- Bio::Blast::Report.rexml(xml)
430
+
431
+ Specify to use REXML to parse XML (-m 7) output.
432
+
433
+ --- Bio::Blast::Report.tab(data)
434
+
435
+ Specify to use tab delimited output parser.
436
+
437
+ --- Bio::Blast::Report#program
438
+ --- Bio::Blast::Report#version
439
+ --- Bio::Blast::Report#reference
440
+ --- Bio::Blast::Report#db
441
+ --- Bio::Blast::Report#query_id
442
+ --- Bio::Blast::Report#query_def
443
+ --- Bio::Blast::Report#query_len
444
+
445
+ Shortcut for BlastOutput values.
446
+
447
+ --- Bio::Blast::Report#parameters
448
+
449
+ Returns a Hash containing execution parameters. Valid keys are:
450
+ 'matrix', 'expect', 'include', 'sc-match', 'sc-mismatch',
451
+ 'gap-open', 'gap-extend', 'filter'
452
+
453
+ --- Bio::Blast::Report#matrix
454
+ * Matrix used (-M)
455
+ --- Bio::Blast::Report#expect
456
+ * Expectation threshold (-e)
457
+ --- Bio::Blast::Report#inclusion
458
+ * Inclusion threshold (-h)
459
+ --- Bio::Blast::Report#sc_match
460
+ * Match score for NT (-r)
461
+ --- Bio::Blast::Report#sc_mismatch
462
+ * Mismatch score for NT (-q)
463
+ --- Bio::Blast::Report#gap_open
464
+ * Gap opening cost (-G)
465
+ --- Bio::Blast::Report#gap_extend
466
+ * Gap extension cost (-E)
467
+ --- Bio::Blast::Report#filter
468
+ * Filtering options (-F)
469
+ --- Bio::Blast::Report#pattern
470
+ * PHI-BLAST pattern
471
+ --- Bio::Blast::Report#entrez_query
472
+ * Limit of request to Entrez
473
+
474
+ These are shortcuts for parameters.
475
+
476
+
477
+ --- Bio::Blast::Report#iterations
478
+
479
+ Returns an Array of Bio::Blast::Report::Iteration objects.
480
+
481
+ --- Bio::Blast::Report#each_iteration
482
+
483
+ Iterates on each Bio::Blast::Report::Iteration object.
484
+
485
+ --- Bio::Blast::Report#each_hit
486
+ --- Bio::Blast::Report#each
487
+
488
+ Iterates on each Bio::Blast::Report::Hit object of the the
489
+ last Iteration.
490
+
491
+ --- Bio::Blast::Report#statistics
492
+
493
+ Returns a Hash containing execution statistics of the last iteration.
494
+ Valid keys are:
495
+ 'db-num', 'db-len', 'hsp-len', 'eff-space', 'kappa',
496
+ 'lambda', 'entropy'
497
+
498
+ --- Bio::Blast::Report#db_num
499
+ * Number of sequences in BLAST db
500
+ --- Bio::Blast::Report#db_len
501
+ * Length of BLAST db
502
+ --- Bio::Blast::Report#hsp_len
503
+ * Effective HSP length
504
+ --- Bio::Blast::Report#eff_space
505
+ * Effective search space
506
+ --- Bio::Blast::Report#kappa
507
+ * Karlin-Altschul parameter K
508
+ --- Bio::Blast::Report#lambda
509
+ * Karlin-Altschul parameter Lamba
510
+ --- Bio::Blast::Report#entropy
511
+ * Karlin-Altschul parameter H
512
+
513
+ These are shortcuts for statistics.
514
+
515
+
516
+ --- Bio::Blast::Report#message
517
+
518
+ Returns a String (or nil) containing execution message of the last
519
+ iteration (typically "CONVERGED").
520
+
521
+ --- Bio::Blast::Report#hits
522
+
523
+ Returns a Array of Bio::Blast::Report::Hits of the last iteration.
524
+
525
+
526
+ == Bio::Blast::Report::Iteration
527
+
528
+ --- Bio::Blast::Report::Iteration#num
529
+
530
+ Returns the number of iteration counts.
531
+
532
+ --- Bio::Blast::Report::Iteration#hits
533
+
534
+ Returns an Array of Bio::Blast::Report::Hit objects.
535
+
536
+ --- Bio::Blast::Report::Iteration#each
537
+
538
+ Iterates on each Bio::Blast::Report::Hit object.
539
+
540
+ --- Bio::Blast::Report::Iteration#statistics
541
+
542
+ Returns a Hash containing execution statistics.
543
+ Valid keys are:
544
+ 'db-len', 'db-num', 'eff-space', 'entropy', 'hsp-len',
545
+ 'kappa', 'lambda'
546
+
547
+ --- Bio::Blast::Report::Iteration#message
548
+
549
+ Returns a String (or nil) containing execution message (typically
550
+ "CONVERGED").
551
+
552
+
553
+ == Bio::Blast::Report::Hit
554
+
555
+ --- Bio::Blast::Report::Hit#each
556
+
557
+ Iterates on each Hsp object.
558
+
559
+ --- Bio::Blast::Report::Hit#hsps
560
+
561
+ Returns an Array of Bio::Blast::Report::Hsp objects.
562
+
563
+ --- Bio::Blast::Report::Hit#num
564
+ * hit number
565
+ --- Bio::Blast::Report::Hit#hit_id
566
+ * SeqId of subject
567
+ --- Bio::Blast::Report::Hit#len
568
+ * length of subject
569
+ --- Bio::Blast::Report::Hit#definition
570
+ * definition line of subject
571
+ --- Bio::Blast::Report::Hit#accession
572
+ * accession
573
+
574
+ Accessors for the Hit values.
575
+
576
+ --- Bio::Blast::Report::Hit#query_id
577
+ --- Bio::Blast::Report::Hit#query_def
578
+ --- Bio::Blast::Report::Hit#query_len
579
+ --- Bio::Blast::Report::Hit#target_id
580
+ --- Bio::Blast::Report::Hit#target_def
581
+ --- Bio::Blast::Report::Hit#target_len
582
+
583
+ Compatible methods with Bio::Fasta::Report::Hit class.
584
+
585
+ --- Bio::Blast::Report::Hit#evalue
586
+ --- Bio::Blast::Report::Hit#bit_score
587
+ --- Bio::Blast::Report::Hit#identity
588
+ --- Bio::Blast::Report::Hit#overlap
589
+
590
+ --- Bio::Blast::Report::Hit#query_seq
591
+ --- Bio::Blast::Report::Hit#midline
592
+ --- Bio::Blast::Report::Hit#target_seq
593
+
594
+ --- Bio::Blast::Report::Hit#query_start
595
+ --- Bio::Blast::Report::Hit#query_end
596
+ --- Bio::Blast::Report::Hit#target_start
597
+ --- Bio::Blast::Report::Hit#target_end
598
+ --- Bio::Blast::Report::Hit#lap_at
599
+
600
+ Shortcut methods for the best Hsp, some are also compatible with
601
+ Bio::Fasta::Report::Hit class.
602
+
603
+
604
+ == Bio::Blast::Report::Hsp
605
+
606
+ --- Bio::Blast::Report::Hsp#num
607
+ * HSP number
608
+ --- Bio::Blast::Report::Hsp#bit_score
609
+ * score (in bits) of HSP
610
+ --- Bio::Blast::Report::Hsp#score
611
+ * score of HSP
612
+ --- Bio::Blast::Report::Hsp#evalue
613
+ * e-value of HSP
614
+ --- Bio::Blast::Report::Hsp#query_from
615
+ * start of HSP in query
616
+ --- Bio::Blast::Report::Hsp#query_to
617
+ * end of HSP
618
+ --- Bio::Blast::Report::Hsp#hit_from
619
+ * start of HSP in subject
620
+ --- Bio::Blast::Report::Hsp#hit_to
621
+ * end of HSP
622
+ --- Bio::Blast::Report::Hsp#pattern_from
623
+ * start of PHI-BLAST pattern
624
+ --- Bio::Blast::Report::Hsp#pattern_to
625
+ * end of PHI-BLAST pattern
626
+ --- Bio::Blast::Report::Hsp#query_frame
627
+ * translation frame of query
628
+ --- Bio::Blast::Report::Hsp#hit_frame
629
+ * translation frame of subject
630
+ --- Bio::Blast::Report::Hsp#identity
631
+ * number of identities in HSP
632
+ --- Bio::Blast::Report::Hsp#positive
633
+ * number of positives in HSP
634
+ --- Bio::Blast::Report::Hsp#gaps
635
+ * number of gaps in HSP
636
+ --- Bio::Blast::Report::Hsp#align_len
637
+ * length of the alignment used
638
+ --- Bio::Blast::Report::Hsp#density
639
+ * score density
640
+ --- Bio::Blast::Report::Hsp#qseq
641
+ * alignment string for the query (with gaps)
642
+ --- Bio::Blast::Report::Hsp#hseq
643
+ * alignment string for subject (with gaps)
644
+ --- Bio::Blast::Report::Hsp#midline
645
+ * formating middle line
646
+
647
+ --- Bio::Blast::Report::Hsp#percent_identity
648
+ --- Bio::Blast::Report::Hsp#mismatch_count
649
+
650
+ Available only for '-m 8' format outputs.
651
+
652
+ =end