bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,95 @@
1
+ #
2
+ # bio/appl/blast/format8.rb - BLAST tab-delimited output (-m 8) parser
3
+ #
4
+ # Copyright (C) 2002,2003 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: format8.rb,v 1.5 2005/09/08 01:22:08 k Exp $
21
+ #
22
+
23
+ module Bio
24
+ class Blast
25
+ class Report
26
+
27
+ private
28
+
29
+ def tab_parse(data)
30
+ iteration = Iteration.new
31
+ @iterations.push(iteration)
32
+ @query_id = @query_def = data[/\S+/]
33
+
34
+ target_prev = ''
35
+ hit_num = 1
36
+ hsp_num = 1
37
+ hit = ''
38
+ data.each do |line|
39
+ ary = line.chomp.split("\t")
40
+ query_id, target_id, hsp = tab_parse_hsp(ary)
41
+ if target_prev != target_id
42
+ hit = Hit.new
43
+ hit.num = hit_num
44
+ hit_num += 1
45
+ hit.query_id = hit.query_def = query_id
46
+ hit.accession = hit.definition = target_id
47
+ iteration.hits.push(hit)
48
+ hsp_num = 1
49
+ end
50
+ hsp.num = hsp_num
51
+ hsp_num += 1
52
+ hit.hsps.push(hsp)
53
+ target_prev = target_id
54
+ end
55
+ end
56
+
57
+ def tab_parse_hsp(ary)
58
+ query_id, target_id,
59
+ percent_identity,
60
+ align_len,
61
+ mismatch_count,
62
+ gaps,
63
+ query_from,
64
+ query_to,
65
+ hit_from,
66
+ hit_to,
67
+ evalue,
68
+ bit_score = *ary
69
+
70
+ hsp = Hsp.new
71
+ hsp.align_len = align_len.to_i
72
+ hsp.gaps = gaps.to_i
73
+ hsp.query_from = query_from.to_i
74
+ hsp.query_to = query_to.to_i
75
+ hsp.hit_from = hit_from.to_i
76
+ hsp.hit_to = hit_to.to_i
77
+ hsp.evalue = evalue.strip.to_f
78
+ hsp.bit_score = bit_score.to_f
79
+
80
+ hsp.percent_identity = percent_identity.to_f
81
+ hsp.mismatch_count = mismatch_count.to_i
82
+
83
+ return query_id, target_id, hsp
84
+ end
85
+
86
+ end
87
+ end
88
+ end
89
+
90
+
91
+ =begin
92
+
93
+ This file is automatically loaded by bio/appl/blast/report.rb
94
+
95
+ =end
@@ -0,0 +1,652 @@
1
+ #
2
+ # bio/appl/blast/report.rb - BLAST Report class
3
+ #
4
+ # Copyright (C) 2003 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: report.rb,v 1.9 2005/09/26 13:00:04 k Exp $
21
+ #
22
+
23
+ require 'bio/appl/blast'
24
+ require 'bio/appl/blast/xmlparser'
25
+ require 'bio/appl/blast/rexml'
26
+ require 'bio/appl/blast/format8'
27
+
28
+ module Bio
29
+ class Blast
30
+
31
+ class Report
32
+
33
+ # for Bio::FlatFile support (only for XML data)
34
+ DELIMITER = RS = "</BlastOutput>\n"
35
+
36
+ def self.xmlparser(data)
37
+ self.new(data, :xmlparser)
38
+ end
39
+ def self.rexml(data)
40
+ self.new(data, :rexml)
41
+ end
42
+ def self.tab(data)
43
+ self.new(data, :tab)
44
+ end
45
+
46
+ def auto_parse(data)
47
+ if /<?xml/.match(data[/.*/])
48
+ if defined?(XMLParser)
49
+ xmlparser_parse(data)
50
+ else
51
+ rexml_parse(data)
52
+ end
53
+ else
54
+ tab_parse(data)
55
+ end
56
+ end
57
+ private :auto_parse
58
+
59
+ def initialize(data, parser = nil)
60
+ @iterations = []
61
+ @parameters = {}
62
+ case parser
63
+ when :xmlparser # format 7
64
+ xmlparser_parse(data)
65
+ when :rexml # format 7
66
+ rexml_parse(data)
67
+ when :tab # format 8
68
+ tab_parse(data)
69
+ else
70
+ auto_parse(data)
71
+ end
72
+ end
73
+ attr_reader :iterations, :parameters,
74
+ :program, :version, :reference, :db, :query_id, :query_def, :query_len
75
+
76
+ # shortcut for @parameters
77
+ def matrix; @parameters['matrix']; end
78
+ def expect; @parameters['expect'].to_i; end
79
+ def inclusion; @parameters['include'].to_i; end
80
+ def sc_match; @parameters['sc-match'].to_i; end
81
+ def sc_mismatch; @parameters['sc-mismatch'].to_i; end
82
+ def gap_open; @parameters['gap-open'].to_i; end
83
+ def gap_extend; @parameters['gap-extend'].to_i; end
84
+ def filter; @parameters['filter']; end
85
+ def pattern; @parameters['pattern']; end
86
+ def entrez_query; @parameters['entrez-query']; end
87
+
88
+ # <for blastpgp>
89
+ def each_iteration
90
+ @iterations.each do |x|
91
+ yield x
92
+ end
93
+ end
94
+
95
+ # <for blastall> shortcut for the last iteration's hits
96
+ def each_hit
97
+ @iterations.last.each do |x|
98
+ yield x
99
+ end
100
+ end
101
+ alias each each_hit
102
+
103
+ # shortcut for the last iteration's hits
104
+ def hits
105
+ @iterations.last.hits
106
+ end
107
+
108
+ # shortcut for the last iteration's statistics
109
+ def statistics
110
+ @iterations.last.statistics
111
+ end
112
+ def db_num; statistics['db-num']; end
113
+ def db_len; statistics['db-len']; end
114
+ def hsp_len; statistics['hsp-len']; end
115
+ def eff_space; statistics['eff-space']; end
116
+ def kappa; statistics['kappa']; end
117
+ def lambda; statistics['lambda']; end
118
+ def entropy; statistics['entropy']; end
119
+
120
+ # shortcut for the last iteration's message (for checking 'CONVERGED')
121
+ def message
122
+ @iterations.last.message
123
+ end
124
+
125
+
126
+ # Bio::Blast::Report::Iteration
127
+ class Iteration
128
+ def initialize
129
+ @message = nil
130
+ @statistics = {}
131
+ @num = 1
132
+ @hits = []
133
+ end
134
+ attr_reader :hits, :statistics
135
+ attr_accessor :num, :message
136
+
137
+ def each
138
+ @hits.each do |x|
139
+ yield x
140
+ end
141
+ end
142
+ end
143
+
144
+
145
+ # Bio::Blast::Report::Hit
146
+ class Hit
147
+ def initialize
148
+ @hsps = []
149
+ end
150
+ attr_reader :hsps
151
+ attr_accessor :query_id, :query_def, :query_len,
152
+ :num, :hit_id, :len, :definition, :accession
153
+
154
+ def each
155
+ @hsps.each do |x|
156
+ yield x
157
+ end
158
+ end
159
+
160
+ # Compatible with Bio::Fasta::Report::Hit
161
+
162
+ alias target_id accession
163
+ alias target_def definition
164
+ alias target_len len
165
+
166
+ # Shortcut methods for the best Hsp
167
+
168
+ def evalue; @hsps.first.evalue; end
169
+ def bit_score; @hsps.first.bit_score; end
170
+ def identity; @hsps.first.identity; end
171
+ def percent_identity; @hsps.first.percent_identity; end
172
+ def overlap; @hsps.first.align_len; end
173
+
174
+ def query_seq; @hsps.first.qseq; end
175
+ def target_seq; @hsps.first.hseq; end
176
+ def midline; @hsps.first.midline; end
177
+
178
+ def query_start; @hsps.first.query_from; end
179
+ def query_end; @hsps.first.query_to; end
180
+ def target_start; @hsps.first.hit_from; end
181
+ def target_end; @hsps.first.hit_to; end
182
+ def lap_at
183
+ [ query_start, query_end, target_start, target_end ]
184
+ end
185
+ end
186
+
187
+
188
+ # Bio::Blast::Report::Hsp
189
+ class Hsp
190
+ def initialize
191
+ @hsp = {}
192
+ end
193
+ attr_reader :hsp
194
+ attr_accessor :num, :bit_score, :score, :evalue,
195
+ :query_from, :query_to, :hit_from, :hit_to,
196
+ :pattern_from, :pattern_to, :query_frame, :hit_frame,
197
+ :identity, :positive, :gaps, :align_len, :density,
198
+ :qseq, :hseq, :midline,
199
+ :percent_identity, :mismatch_count # only for '-m 8'
200
+ end
201
+
202
+ end
203
+ end
204
+ end
205
+
206
+
207
+ if __FILE__ == $0
208
+
209
+ =begin
210
+
211
+ begin # p is suitable than pp for the following test script
212
+ require 'pp'
213
+ alias p pp
214
+ rescue
215
+ end
216
+
217
+ # for multiple xml reports (iterates on each Blast::Report)
218
+ Bio::Blast.reports(ARGF) do |rep|
219
+ rep.iterations.each do |itr|
220
+ itr.hits.each do |hit|
221
+ hit.hsps.each do |hsp|
222
+ end
223
+ end
224
+ end
225
+ end
226
+
227
+ # for multiple xml reports (returns Array of Blast::Report)
228
+ reps = Bio::Blast.reports(ARGF.read)
229
+
230
+ # for a single report (xml or tab) format auto detect, parser auto selected
231
+ rep = Bio::Blast::Report.new(ARGF.read)
232
+
233
+ # to use xmlparser explicitly for a report
234
+ rep = Bio::Blast::Report.xmlparser(ARGF.read)
235
+
236
+ # to use resml explicitly for a report
237
+ rep = Bio::Blast::Report.rexml(ARGF.read)
238
+
239
+ # to use a tab delimited report
240
+ rep = Bio::Blast::Report.tab(ARGF.read)
241
+
242
+ =end
243
+
244
+ Bio::Blast.reports(ARGF) do |rep| # for multiple xml reports
245
+
246
+ print "# === Bio::Tools::Blast::Report\n"
247
+ puts
248
+ print " rep.program #=> "; p rep.program
249
+ print " rep.version #=> "; p rep.version
250
+ print " rep.reference #=> "; p rep.reference
251
+ print " rep.db #=> "; p rep.db
252
+ print " rep.query_id #=> "; p rep.query_id
253
+ print " rep.query_def #=> "; p rep.query_def
254
+ print " rep.query_len #=> "; p rep.query_len
255
+ puts
256
+
257
+ print "# === Parameters\n"
258
+ puts
259
+ print " rep.parameters #=> "; p rep.parameters
260
+ puts
261
+ print " rep.matrix #=> "; p rep.matrix
262
+ print " rep.expect #=> "; p rep.expect
263
+ print " rep.inclusion #=> "; p rep.inclusion
264
+ print " rep.sc_match #=> "; p rep.sc_match
265
+ print " rep.sc_mismatch #=> "; p rep.sc_mismatch
266
+ print " rep.gap_open #=> "; p rep.gap_open
267
+ print " rep.gap_extend #=> "; p rep.gap_extend
268
+ print " rep.filter #=> "; p rep.filter
269
+ print " rep.pattern #=> "; p rep.pattern
270
+ print " rep.entrez_query #=> "; p rep.entrez_query
271
+ puts
272
+
273
+ print "# === Statistics (last iteration's)\n"
274
+ puts
275
+ print " rep.statistics #=> "; p rep.statistics
276
+ puts
277
+ print " rep.db_num #=> "; p rep.db_num
278
+ print " rep.db_len #=> "; p rep.db_len
279
+ print " rep.hsp_len #=> "; p rep.hsp_len
280
+ print " rep.eff_space #=> "; p rep.eff_space
281
+ print " rep.kappa #=> "; p rep.kappa
282
+ print " rep.lambda #=> "; p rep.lambda
283
+ print " rep.entropy #=> "; p rep.entropy
284
+ puts
285
+
286
+ print "# === Message (last iteration's)\n"
287
+ puts
288
+ print " rep.message #=> "; p rep.message
289
+ puts
290
+
291
+ print "# === Iterations\n"
292
+ puts
293
+ print " rep.itrerations.each do |itr|\n"
294
+ puts
295
+
296
+ rep.iterations.each do |itr|
297
+
298
+ print "# --- Bio::Blast::Report::Iteration\n"
299
+ puts
300
+
301
+ print " itr.num #=> "; p itr.num
302
+ print " itr.statistics #=> "; p itr.statistics
303
+ print " itr.message #=> "; p itr.message
304
+ print " itr.hits.size #=> "; p itr.hits.size
305
+ puts
306
+
307
+ print " itr.hits.each do |hit|\n"
308
+ puts
309
+
310
+ itr.hits.each do |hit|
311
+
312
+ print "# --- Bio::Blast::Report::Hit\n"
313
+ puts
314
+
315
+ print " hit.num #=> "; p hit.num
316
+ print " hit.hit_id #=> "; p hit.hit_id
317
+ print " hit.len #=> "; p hit.len
318
+ print " hit.definition #=> "; p hit.definition
319
+ print " hit.accession #=> "; p hit.accession
320
+
321
+ print " --- compatible/shortcut ---\n"
322
+ print " hit.query_id #=> "; p hit.query_id
323
+ print " hit.query_def #=> "; p hit.query_def
324
+ print " hit.query_len #=> "; p hit.query_len
325
+ print " hit.target_id #=> "; p hit.target_id
326
+ print " hit.target_def #=> "; p hit.target_def
327
+ print " hit.target_len #=> "; p hit.target_len
328
+
329
+ print " hit.evalue #=> "; p hit.evalue
330
+ print " hit.bit_score #=> "; p hit.bit_score
331
+ print " hit.identity #=> "; p hit.identity
332
+ print " hit.overlap #=> "; p hit.overlap
333
+
334
+ print " hit.query_seq #=> "; p hit.query_seq
335
+ print " hit.midline #=> "; p hit.midline
336
+ print " hit.target_seq #=> "; p hit.target_seq
337
+
338
+ print " hit.query_start #=> "; p hit.query_start
339
+ print " hit.query_end #=> "; p hit.query_end
340
+ print " hit.target_start #=> "; p hit.target_start
341
+ print " hit.target_end #=> "; p hit.target_end
342
+ print " hit.lap_at #=> "; p hit.lap_at
343
+ print " --- compatible/shortcut ---\n"
344
+
345
+ print " hit.hsps.size #=> "; p hit.hsps.size
346
+ puts
347
+
348
+ print " hit.hsps.each do |hsp|\n"
349
+ puts
350
+
351
+ hit.hsps.each do |hsp|
352
+
353
+ print "# --- Bio::Blast::Report::Hsp\n"
354
+ puts
355
+ print " hsp.num #=> "; p hsp.num
356
+ print " hsp.bit_score #=> "; p hsp.bit_score
357
+ print " hsp.score #=> "; p hsp.score
358
+ print " hsp.evalue #=> "; p hsp.evalue
359
+ print " hsp.identity #=> "; p hsp.identity
360
+ print " hsp.gaps #=> "; p hsp.gaps
361
+ print " hsp.positive #=> "; p hsp.positive
362
+ print " hsp.align_len #=> "; p hsp.align_len
363
+ print " hsp.density #=> "; p hsp.density
364
+
365
+ print " hsp.query_frame #=> "; p hsp.query_frame
366
+ print " hsp.query_from #=> "; p hsp.query_from
367
+ print " hsp.query_to #=> "; p hsp.query_to
368
+
369
+ print " hsp.hit_frame #=> "; p hsp.hit_frame
370
+ print " hsp.hit_from #=> "; p hsp.hit_from
371
+ print " hsp.hit_to #=> "; p hsp.hit_to
372
+
373
+ print " hsp.pattern_from#=> "; p hsp.pattern_from
374
+ print " hsp.pattern_to #=> "; p hsp.pattern_to
375
+
376
+ print " hsp.qseq #=> "; p hsp.qseq
377
+ print " hsp.midline #=> "; p hsp.midline
378
+ print " hsp.hseq #=> "; p hsp.hseq
379
+ puts
380
+ print " hsp.percent_identity #=> "; p hsp.percent_identity
381
+ print " hsp.mismatch_count #=> "; p hsp.mismatch_count
382
+ puts
383
+
384
+ end
385
+ end
386
+ end
387
+ end # for multiple xml reports
388
+
389
+ end
390
+
391
+
392
+ =begin
393
+
394
+ = Bio::Blast::Report
395
+
396
+ Parsed results of the blast execution for Tab-delimited and XML output
397
+ format. Tab-delimited reports are consists of
398
+
399
+ Query id,
400
+ Subject id,
401
+ percent of identity,
402
+ alignment length,
403
+ number of mismatches (not including gaps),
404
+ number of gap openings,
405
+ start of alignment in query,
406
+ end of alignment in query,
407
+ start of alignment in subject,
408
+ end of alignment in subject,
409
+ expected value,
410
+ bit score.
411
+
412
+ according to the MEGABLAST document (README.mbl). As for XML output,
413
+ see the following DTDs.
414
+
415
+ * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd
416
+ * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod
417
+ * http://www.ncbi.nlm.nih.gov/dtd/NCBI_Entity.mod
418
+
419
+
420
+ --- Bio::Blast::Report.new(data)
421
+
422
+ Passing a BLAST output from 'blastall -m 7' or '-m 8' as a String.
423
+ Formats are auto detected.
424
+
425
+ --- Bio::Blast::Report.xmlparaser(xml)
426
+
427
+ Specify to use XMLParser to parse XML (-m 7) output.
428
+
429
+ --- Bio::Blast::Report.rexml(xml)
430
+
431
+ Specify to use REXML to parse XML (-m 7) output.
432
+
433
+ --- Bio::Blast::Report.tab(data)
434
+
435
+ Specify to use tab delimited output parser.
436
+
437
+ --- Bio::Blast::Report#program
438
+ --- Bio::Blast::Report#version
439
+ --- Bio::Blast::Report#reference
440
+ --- Bio::Blast::Report#db
441
+ --- Bio::Blast::Report#query_id
442
+ --- Bio::Blast::Report#query_def
443
+ --- Bio::Blast::Report#query_len
444
+
445
+ Shortcut for BlastOutput values.
446
+
447
+ --- Bio::Blast::Report#parameters
448
+
449
+ Returns a Hash containing execution parameters. Valid keys are:
450
+ 'matrix', 'expect', 'include', 'sc-match', 'sc-mismatch',
451
+ 'gap-open', 'gap-extend', 'filter'
452
+
453
+ --- Bio::Blast::Report#matrix
454
+ * Matrix used (-M)
455
+ --- Bio::Blast::Report#expect
456
+ * Expectation threshold (-e)
457
+ --- Bio::Blast::Report#inclusion
458
+ * Inclusion threshold (-h)
459
+ --- Bio::Blast::Report#sc_match
460
+ * Match score for NT (-r)
461
+ --- Bio::Blast::Report#sc_mismatch
462
+ * Mismatch score for NT (-q)
463
+ --- Bio::Blast::Report#gap_open
464
+ * Gap opening cost (-G)
465
+ --- Bio::Blast::Report#gap_extend
466
+ * Gap extension cost (-E)
467
+ --- Bio::Blast::Report#filter
468
+ * Filtering options (-F)
469
+ --- Bio::Blast::Report#pattern
470
+ * PHI-BLAST pattern
471
+ --- Bio::Blast::Report#entrez_query
472
+ * Limit of request to Entrez
473
+
474
+ These are shortcuts for parameters.
475
+
476
+
477
+ --- Bio::Blast::Report#iterations
478
+
479
+ Returns an Array of Bio::Blast::Report::Iteration objects.
480
+
481
+ --- Bio::Blast::Report#each_iteration
482
+
483
+ Iterates on each Bio::Blast::Report::Iteration object.
484
+
485
+ --- Bio::Blast::Report#each_hit
486
+ --- Bio::Blast::Report#each
487
+
488
+ Iterates on each Bio::Blast::Report::Hit object of the the
489
+ last Iteration.
490
+
491
+ --- Bio::Blast::Report#statistics
492
+
493
+ Returns a Hash containing execution statistics of the last iteration.
494
+ Valid keys are:
495
+ 'db-num', 'db-len', 'hsp-len', 'eff-space', 'kappa',
496
+ 'lambda', 'entropy'
497
+
498
+ --- Bio::Blast::Report#db_num
499
+ * Number of sequences in BLAST db
500
+ --- Bio::Blast::Report#db_len
501
+ * Length of BLAST db
502
+ --- Bio::Blast::Report#hsp_len
503
+ * Effective HSP length
504
+ --- Bio::Blast::Report#eff_space
505
+ * Effective search space
506
+ --- Bio::Blast::Report#kappa
507
+ * Karlin-Altschul parameter K
508
+ --- Bio::Blast::Report#lambda
509
+ * Karlin-Altschul parameter Lamba
510
+ --- Bio::Blast::Report#entropy
511
+ * Karlin-Altschul parameter H
512
+
513
+ These are shortcuts for statistics.
514
+
515
+
516
+ --- Bio::Blast::Report#message
517
+
518
+ Returns a String (or nil) containing execution message of the last
519
+ iteration (typically "CONVERGED").
520
+
521
+ --- Bio::Blast::Report#hits
522
+
523
+ Returns a Array of Bio::Blast::Report::Hits of the last iteration.
524
+
525
+
526
+ == Bio::Blast::Report::Iteration
527
+
528
+ --- Bio::Blast::Report::Iteration#num
529
+
530
+ Returns the number of iteration counts.
531
+
532
+ --- Bio::Blast::Report::Iteration#hits
533
+
534
+ Returns an Array of Bio::Blast::Report::Hit objects.
535
+
536
+ --- Bio::Blast::Report::Iteration#each
537
+
538
+ Iterates on each Bio::Blast::Report::Hit object.
539
+
540
+ --- Bio::Blast::Report::Iteration#statistics
541
+
542
+ Returns a Hash containing execution statistics.
543
+ Valid keys are:
544
+ 'db-len', 'db-num', 'eff-space', 'entropy', 'hsp-len',
545
+ 'kappa', 'lambda'
546
+
547
+ --- Bio::Blast::Report::Iteration#message
548
+
549
+ Returns a String (or nil) containing execution message (typically
550
+ "CONVERGED").
551
+
552
+
553
+ == Bio::Blast::Report::Hit
554
+
555
+ --- Bio::Blast::Report::Hit#each
556
+
557
+ Iterates on each Hsp object.
558
+
559
+ --- Bio::Blast::Report::Hit#hsps
560
+
561
+ Returns an Array of Bio::Blast::Report::Hsp objects.
562
+
563
+ --- Bio::Blast::Report::Hit#num
564
+ * hit number
565
+ --- Bio::Blast::Report::Hit#hit_id
566
+ * SeqId of subject
567
+ --- Bio::Blast::Report::Hit#len
568
+ * length of subject
569
+ --- Bio::Blast::Report::Hit#definition
570
+ * definition line of subject
571
+ --- Bio::Blast::Report::Hit#accession
572
+ * accession
573
+
574
+ Accessors for the Hit values.
575
+
576
+ --- Bio::Blast::Report::Hit#query_id
577
+ --- Bio::Blast::Report::Hit#query_def
578
+ --- Bio::Blast::Report::Hit#query_len
579
+ --- Bio::Blast::Report::Hit#target_id
580
+ --- Bio::Blast::Report::Hit#target_def
581
+ --- Bio::Blast::Report::Hit#target_len
582
+
583
+ Compatible methods with Bio::Fasta::Report::Hit class.
584
+
585
+ --- Bio::Blast::Report::Hit#evalue
586
+ --- Bio::Blast::Report::Hit#bit_score
587
+ --- Bio::Blast::Report::Hit#identity
588
+ --- Bio::Blast::Report::Hit#overlap
589
+
590
+ --- Bio::Blast::Report::Hit#query_seq
591
+ --- Bio::Blast::Report::Hit#midline
592
+ --- Bio::Blast::Report::Hit#target_seq
593
+
594
+ --- Bio::Blast::Report::Hit#query_start
595
+ --- Bio::Blast::Report::Hit#query_end
596
+ --- Bio::Blast::Report::Hit#target_start
597
+ --- Bio::Blast::Report::Hit#target_end
598
+ --- Bio::Blast::Report::Hit#lap_at
599
+
600
+ Shortcut methods for the best Hsp, some are also compatible with
601
+ Bio::Fasta::Report::Hit class.
602
+
603
+
604
+ == Bio::Blast::Report::Hsp
605
+
606
+ --- Bio::Blast::Report::Hsp#num
607
+ * HSP number
608
+ --- Bio::Blast::Report::Hsp#bit_score
609
+ * score (in bits) of HSP
610
+ --- Bio::Blast::Report::Hsp#score
611
+ * score of HSP
612
+ --- Bio::Blast::Report::Hsp#evalue
613
+ * e-value of HSP
614
+ --- Bio::Blast::Report::Hsp#query_from
615
+ * start of HSP in query
616
+ --- Bio::Blast::Report::Hsp#query_to
617
+ * end of HSP
618
+ --- Bio::Blast::Report::Hsp#hit_from
619
+ * start of HSP in subject
620
+ --- Bio::Blast::Report::Hsp#hit_to
621
+ * end of HSP
622
+ --- Bio::Blast::Report::Hsp#pattern_from
623
+ * start of PHI-BLAST pattern
624
+ --- Bio::Blast::Report::Hsp#pattern_to
625
+ * end of PHI-BLAST pattern
626
+ --- Bio::Blast::Report::Hsp#query_frame
627
+ * translation frame of query
628
+ --- Bio::Blast::Report::Hsp#hit_frame
629
+ * translation frame of subject
630
+ --- Bio::Blast::Report::Hsp#identity
631
+ * number of identities in HSP
632
+ --- Bio::Blast::Report::Hsp#positive
633
+ * number of positives in HSP
634
+ --- Bio::Blast::Report::Hsp#gaps
635
+ * number of gaps in HSP
636
+ --- Bio::Blast::Report::Hsp#align_len
637
+ * length of the alignment used
638
+ --- Bio::Blast::Report::Hsp#density
639
+ * score density
640
+ --- Bio::Blast::Report::Hsp#qseq
641
+ * alignment string for the query (with gaps)
642
+ --- Bio::Blast::Report::Hsp#hseq
643
+ * alignment string for subject (with gaps)
644
+ --- Bio::Blast::Report::Hsp#midline
645
+ * formating middle line
646
+
647
+ --- Bio::Blast::Report::Hsp#percent_identity
648
+ --- Bio::Blast::Report::Hsp#mismatch_count
649
+
650
+ Available only for '-m 8' format outputs.
651
+
652
+ =end