bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,151 @@
1
+ #
2
+ # bio/appl/blast/rexml.rb - BLAST XML output (-m 7) parser by REXML
3
+ #
4
+ # Copyright (C) 2002,2003 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: rexml.rb,v 1.10 2005/09/08 01:22:08 k Exp $
21
+ #
22
+
23
+ begin
24
+ require 'rexml/document'
25
+ rescue LoadError
26
+ end
27
+
28
+ module Bio
29
+ class Blast
30
+ class Report
31
+
32
+ private
33
+
34
+ def rexml_parse(xml)
35
+ dom = REXML::Document.new(xml)
36
+ rexml_parse_program(dom)
37
+ dom.elements.each("*//Iteration") do |e|
38
+ @iterations.push(rexml_parse_iteration(e))
39
+ end
40
+ end
41
+
42
+ def rexml_parse_program(dom)
43
+ hash = {}
44
+ dom.root.each_element_with_text do |e|
45
+ name, text = e.name, e.text
46
+ case name
47
+ when 'BlastOutput_param'
48
+ e.elements["Parameters"].each_element_with_text do |p|
49
+ k = p.name.sub(/Parameters_/, '')
50
+ v = p.text =~ /\D/ ? p.text : p.text.to_i
51
+ @parameters[k] = v
52
+ end
53
+ else
54
+ hash[name] = text if text.strip.size > 0
55
+ end
56
+ end
57
+ @program = hash['BlastOutput_program']
58
+ @version = hash['BlastOutput_version']
59
+ @reference = hash['BlastOutput_reference']
60
+ @db = hash['BlastOutput_db']
61
+ @query_id = hash['BlastOutput_query-ID']
62
+ @query_def = hash['BlastOutput_query-def']
63
+ @query_len = hash['BlastOutput_query-len'].to_i
64
+ end
65
+
66
+ def rexml_parse_iteration(e)
67
+ iteration = Iteration.new
68
+ e.elements.each do |i|
69
+ case i.name
70
+ when 'Iteration_iter-num'
71
+ iteration.num = i.text.to_i
72
+ when 'Iteration_hits'
73
+ i.elements.each("Hit") do |h|
74
+ iteration.hits.push(rexml_parse_hit(h))
75
+ end
76
+ when 'Iteration_message'
77
+ iteration.message = i.text
78
+ when 'Iteration_stat'
79
+ i.elements["Statistics"].each_element_with_text do |s|
80
+ k = s.name.sub(/Statistics_/, '')
81
+ v = s.text =~ /\D/ ? s.text.to_f : s.text.to_i
82
+ iteration.statistics[k] = v
83
+ end
84
+ end
85
+ end
86
+ return iteration
87
+ end
88
+
89
+ def rexml_parse_hit(e)
90
+ hit = Hit.new
91
+ hash = {}
92
+ hit.query_id = @query_id
93
+ hit.query_def = @query_def
94
+ hit.query_len = @query_len
95
+ e.elements.each do |h|
96
+ case h.name
97
+ when 'Hit_hsps'
98
+ h.elements.each("Hsp") do |s|
99
+ hit.hsps.push(rexml_parse_hsp(s))
100
+ end
101
+ else
102
+ hash[h.name] = h.text
103
+ end
104
+ end
105
+ hit.num = hash['Hit_num'].to_i
106
+ hit.hit_id = hash['Hit_id']
107
+ hit.len = hash['Hit_len'].to_i
108
+ hit.definition = hash['Hit_def']
109
+ hit.accession = hash['Hit_accession']
110
+ return hit
111
+ end
112
+
113
+ def rexml_parse_hsp(e)
114
+ hsp = Hsp.new
115
+ hash = {}
116
+ e.each_element_with_text do |h|
117
+ hash[h.name] = h.text
118
+ end
119
+ hsp.num = hash['Hsp_num'].to_i
120
+ hsp.bit_score = hash['Hsp_bit-score'].to_f
121
+ hsp.score = hash['Hsp_score'].to_i
122
+ hsp.evalue = hash['Hsp_evalue'].to_f
123
+ hsp.query_from = hash['Hsp_query-from'].to_i
124
+ hsp.query_to = hash['Hsp_query-to'].to_i
125
+ hsp.hit_from = hash['Hsp_hit-from'].to_i
126
+ hsp.hit_to = hash['Hsp_hit-to'].to_i
127
+ hsp.pattern_from = hash['Hsp_pattern-from'].to_i
128
+ hsp.pattern_to = hash['Hsp_pattern-to'].to_i
129
+ hsp.query_frame = hash['Hsp_query-frame'].to_i
130
+ hsp.hit_frame = hash['Hsp_hit-frame'].to_i
131
+ hsp.identity = hash['Hsp_identity'].to_i
132
+ hsp.positive = hash['Hsp_positive'].to_i
133
+ hsp.gaps = hash['Hsp_gaps'].to_i
134
+ hsp.align_len = hash['Hsp_align-len'].to_i
135
+ hsp.density = hash['Hsp_density'].to_i
136
+ hsp.qseq = hash['Hsp_qseq']
137
+ hsp.hseq = hash['Hsp_hseq']
138
+ hsp.midline = hash['Hsp_midline']
139
+ return hsp
140
+ end
141
+
142
+ end
143
+ end
144
+ end
145
+
146
+
147
+ =begin
148
+
149
+ This file is automatically loaded by bio/appl/blast/report.rb
150
+
151
+ =end
@@ -0,0 +1,553 @@
1
+ #
2
+ # = bio/appl/blast/wublast.rb - WU-BLAST default output parser
3
+ #
4
+ # Copyright:: Copyright (C) 2003 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: wublast.rb,v 1.5 2005/12/18 15:58:39 k Exp $
24
+ #
25
+ # WU-BLAST default output parser.
26
+ #
27
+ # The parser is still incomplete and may contain many bugs,
28
+ # because I didn't have WU-BLAST license.
29
+ # It was tested under web-based WU-BLAST results and
30
+ # obsolete version downloaded from http://blast.wustl.edu/ .
31
+ #
32
+ # = References
33
+ # * http://blast.wustl.edu/
34
+ # * http://www.ebi.ac.uk/blast2/
35
+ #
36
+
37
+ require 'bio/appl/blast/format0'
38
+
39
+ module Bio
40
+ class Blast
41
+ module WU #:nodoc:
42
+
43
+ # Bio::Blast::WU::Report parses WU-BLAST default output
44
+ # and stores information in the data.
45
+ # It may contain a Bio::Blast::WU::Report::Iteration object.
46
+ # Because it inherits Bio::Blast::Default::Report,
47
+ # please also refer Bio::Blast::Default::Report.
48
+ class Report < Default::Report
49
+
50
+ # Returns parameters (???)
51
+ def parameters
52
+ parse_parameters
53
+ @parameters
54
+ end
55
+
56
+ # Returns parameter matrix (???)
57
+ def parameter_matrix
58
+ parse_parameters
59
+ @parameter_matrix
60
+ end
61
+
62
+ # Returns e-value threshold specified when BLAST was executed.
63
+ def expect; parse_parameters; @parameters['E']; end
64
+
65
+ # Returns warning messages.
66
+ def warnings
67
+ unless defined?(@warnings)
68
+ @warnings = @f0warnings
69
+ iterations.each { |x| @warnings.concat(x.warnings) }
70
+ end
71
+ @warnings
72
+ end
73
+
74
+ # Returns notice messages.
75
+ def notice
76
+ unless defined?(@notice)
77
+ @notice = @f0notice.to_s.gsub(/\s+/, ' ').strip
78
+ end #unless
79
+ @notice
80
+ end
81
+
82
+ private
83
+ # Splits headers.
84
+ def format0_split_headers(data)
85
+ @f0header = data.shift
86
+ while r = data.first
87
+ case r
88
+ when /^Reference\: /
89
+ @f0reference = data.shift
90
+ when /^Copyright /
91
+ @f0copyright = data.shift
92
+ when /^Notice\: /
93
+ @f0notice = data.shift
94
+ when /^Query\= /
95
+ break
96
+ else
97
+ break
98
+ end
99
+ end
100
+ @f0query = data.shift
101
+ if r = data.first and !(/^Database\: / =~ r)
102
+ @f0translate_info = data.shift
103
+ end
104
+ @f0database = data.shift
105
+ end
106
+
107
+ # Splits search data.
108
+ def format0_split_search(data)
109
+ [ Iteration.new(data) ]
110
+ end
111
+
112
+ # Splits statistics parameters.
113
+ def format0_split_stat_params(data)
114
+ @f0warnings = []
115
+ if r = data.first and r =~ /^WARNING\: / then
116
+ @f0warnings << data.shift
117
+ end
118
+ @f0wu_params = []
119
+ @f0wu_stats = []
120
+ while r = data.shift and !(r =~ /^Statistics\:/)
121
+ @f0wu_params << r
122
+ end
123
+ @f0wu_stats << r if r
124
+ while r = data.shift
125
+ @f0wu_stats << r
126
+ end
127
+ @f0dbstat = F0dbstat.new(@f0wu_stats)
128
+ itr = @iterations[0]
129
+ x = @f0dbstat
130
+ itr.instance_eval { @f0dbstat = x } if itr
131
+ end
132
+
133
+ # Splits parameters.
134
+ def parse_parameters
135
+ unless defined?(@parse_parameters)
136
+ @parameters = {}
137
+ @parameter_matrix = []
138
+ @f0wu_params.each do |x|
139
+ if /^ Query/ =~ x then
140
+ @parameter_matrix << x
141
+ else
142
+ x.split(/^/).each do |y|
143
+ if /\A\s*(.+)\s*\=\s*(.*)\s*/ =~ y then
144
+ @parameters[$1] = $2
145
+ elsif /\AParameters\:/ =~ y then
146
+ ; #ignore this
147
+ elsif /\A\s*(.+)\s*$/ =~ y then
148
+ @parameters[$1] = true
149
+ end
150
+ end
151
+ end
152
+ end
153
+ @parse_parameters = true
154
+ end
155
+ end
156
+
157
+ # Stores database statistics.
158
+ # Internal use only. Users must not use the class.
159
+ class F0dbstat < Default::Report::F0dbstat #:nodoc:
160
+ def initialize(ary)
161
+ @f0stat = ary
162
+ @hash = {}
163
+ end
164
+
165
+ #--
166
+ #undef :f0params
167
+ #undef :matrix, :gap_open, :gap_extend,
168
+ # :eff_space, :expect, :sc_match, :sc_mismatch,
169
+ # :num_hits
170
+ #++
171
+
172
+ # Parses database statistics.
173
+ def parse_dbstat
174
+ unless defined?(@parse_dbstat)
175
+ parse_colon_separated_params(@hash, @f0stat)
176
+ @database = @hash['Database']
177
+ @posted_date = @hash['Posted']
178
+ if val = @hash['# of letters in database'] then
179
+ @db_len = val.tr(',', '').to_i
180
+ end
181
+ if val = @hash['# of sequences in database'] then
182
+ @db_num = val.tr(',', '').to_i
183
+ end
184
+ @parse_dbstat = true
185
+ end #unless
186
+ end #def
187
+ private :parse_dbstat
188
+
189
+ end #class F0dbstat
190
+
191
+ #--
192
+ #class Frame
193
+ #end #class FrameParams
194
+ #++
195
+
196
+ # Iteration class for WU-BLAST report.
197
+ # Though WU-BLAST does not iterate like PSI-BLAST,
198
+ # Bio::Blast::WU::Report::Iteration aims to keep compatibility
199
+ # with Bio::Blast::Default::Report::* classes.
200
+ # It may contain some Bio::Blast::WU::Report::Hit objects.
201
+ # Because it inherits Bio::Blast::Default::Report::Iteration,
202
+ # please also refer Bio::Blast::Default::Report::Iteration.
203
+ class Iteration < Default::Report::Iteration
204
+ # Creates a new Iteration object.
205
+ # It is designed to be called only internally from
206
+ # the Bio::Blast::WU::Report class.
207
+ # Users shall not use the method directly.
208
+ def initialize(data)
209
+ @f0stat = []
210
+ @f0dbstat = Default::Report::AlwaysNil.instance
211
+ @f0hitlist = []
212
+ @hits = []
213
+ @num = 1
214
+ @f0message = []
215
+ @f0warnings = []
216
+ return unless r = data.shift
217
+ @f0hitlist << r
218
+ return unless r = data.shift
219
+ unless /\*{3} +NONE +\*{3}/ =~ r then
220
+ @f0hitlist << r
221
+ while r = data.first and /^WARNING\: / =~ r
222
+ @f0warnings << data.shift
223
+ end
224
+ while r = data.first and /^\>/ =~ r
225
+ @hits << Hit.new(data)
226
+ end
227
+ end #unless
228
+ end
229
+
230
+ # Returns warning messages.
231
+ def warnings
232
+ @f0warnings
233
+ end
234
+
235
+ private
236
+ # Parses hit list.
237
+ def parse_hitlist
238
+ unless defined?(@parse_hitlist)
239
+ r = @f0hitlist.shift.to_s
240
+ if /Reading/ =~ r and /Frame/ =~ r then
241
+ flag_tblast = true
242
+ spnum = 5
243
+ else
244
+ flag_tblast = nil
245
+ spnum = 4
246
+ end
247
+ i = 0
248
+ @f0hitlist.each do |x|
249
+ b = x.split(/^/)
250
+ b.collect! { |y| y.empty? ? nil : y }
251
+ b.compact!
252
+ b.each do |y|
253
+ y.strip!
254
+ y.reverse!
255
+ z = y.split(/\s+/, spnum)
256
+ z.each { |y| y.reverse! }
257
+ dfl = z.pop
258
+ h = @hits[i]
259
+ unless h then
260
+ h = Hit.new([ dfl.to_s.sub(/\.+\z/, '') ])
261
+ @hits[i] = h
262
+ end
263
+ z.pop if flag_tblast #ignore Reading Frame
264
+ scr = z.pop.to_s
265
+ pval = z.pop.to_s
266
+ nnum = z.pop.to_i
267
+ #ev = '1' + ev if ev[0] == ?e
268
+ h.instance_eval {
269
+ @score = scr
270
+ @pvalue = pval
271
+ @n_number = nnum
272
+ }
273
+ i += 1
274
+ end
275
+ end #each
276
+ @parse_hitlist = true
277
+ end #unless
278
+ end
279
+ end #class Iteration
280
+
281
+ # Bio::Blast::WU::Report::Hit contains information about a hit.
282
+ # It may contain some Bio::Blast::WU::Report::HSP objects.
283
+ #
284
+ # Because it inherits Bio::Blast::Default::Report::Hit,
285
+ # please also refer Bio::Blast::Default::Report::Hit.
286
+ class Hit < Default::Report::Hit
287
+ # Creates a new Hit object.
288
+ # It is designed to be called only internally from the
289
+ # Bio::Blast::WU::Report::Iteration class.
290
+ # Users should not call the method directly.
291
+ def initialize(data)
292
+ @f0hitname = data.shift
293
+ @hsps = []
294
+ while r = data.first
295
+ if r =~ /^\s*(?:Plus|Minus) +Strand +HSPs\:/ then
296
+ data.shift
297
+ r = data.first
298
+ end
299
+ if /^\s+Score/ =~ r then
300
+ @hsps << HSP.new(data)
301
+ else
302
+ break
303
+ end
304
+ end
305
+ @again = false
306
+ end
307
+
308
+ # Returns score.
309
+ def score
310
+ @score
311
+ end
312
+ # p-value
313
+ attr_reader :pvalue
314
+ # n-number (???)
315
+ attr_reader :n_number
316
+ end #class Hit
317
+
318
+ # Bio::Blast::WU::Report::HSP holds information about the hsp
319
+ # (high-scoring segment pair).
320
+ #
321
+ # Because it inherits Bio::Blast::Default::Report::HSP,
322
+ # please also refer Bio::Blast::Default::Report::HSP.
323
+ class HSP < Default::Report::HSP
324
+ # p-value
325
+ attr_reader :pvalue if false #dummy
326
+ method_after_parse_score :pvalue
327
+ # p_sum_n (???)
328
+ attr_reader :p_sum_n if false #dummy
329
+ method_after_parse_score :p_sum_n
330
+ end #class HSP
331
+
332
+ end #class Report
333
+
334
+ # WU-BLAST default output parser for TBLAST.
335
+ # All methods are equal to Bio::Blast::WU::Report.
336
+ # Only DELIMITER (and RS) is different.
337
+ class Report_TBlast < Report
338
+ # Delimter of each entry for TBLAST. Bio::FlatFile uses it.
339
+ DELIMITER = RS = "\nTBLAST"
340
+ end #class Report_TBlast
341
+
342
+ end #module WU
343
+ end #class Blast
344
+ end #module Bio
345
+
346
+ ######################################################################
347
+
348
+ if __FILE__ == $0
349
+
350
+ Bio::FlatFile.open(Bio::Blast::WU::Report, ARGF) do |ff|
351
+ ff.each do |rep|
352
+
353
+ print "# === Bio::Blast::WU::Report\n"
354
+ puts
355
+ print " rep.program #=> "; p rep.program
356
+ print " rep.version #=> "; p rep.version
357
+ print " rep.reference #=> "; p rep.reference
358
+ print " rep.notice #=> "; p rep.notice
359
+ print " rep.db #=> "; p rep.db
360
+ #print " rep.query_id #=> "; p rep.query_id
361
+ print " rep.query_def #=> "; p rep.query_def
362
+ print " rep.query_len #=> "; p rep.query_len
363
+ #puts
364
+ print " rep.version_number #=> "; p rep.version_number
365
+ print " rep.version_date #=> "; p rep.version_date
366
+ puts
367
+
368
+ print "# === Parameters\n"
369
+ #puts
370
+ print " rep.parameters #=> "; p rep.parameters
371
+ puts
372
+ #@#print " rep.matrix #=> "; p rep.matrix
373
+ print " rep.expect #=> "; p rep.expect
374
+ #print " rep.inclusion #=> "; p rep.inclusion
375
+ #@#print " rep.sc_match #=> "; p rep.sc_match
376
+ #@#print " rep.sc_mismatch #=> "; p rep.sc_mismatch
377
+ #@#print " rep.gap_open #=> "; p rep.gap_open
378
+ #@#print " rep.gap_extend #=> "; p rep.gap_extend
379
+ #print " rep.filter #=> "; p rep.filter
380
+ #@#print " rep.pattern #=> "; p rep.pattern
381
+ #print " rep.entrez_query #=> "; p rep.entrez_query
382
+ #puts
383
+ #@#print " rep.pattern_positions #=> "; p rep.pattern_positions
384
+ puts
385
+
386
+ print "# === Statistics (last iteration's)\n"
387
+ #puts
388
+ #print " rep.statistics #=> "; p rep.statistics
389
+ puts
390
+ print " rep.db_num #=> "; p rep.db_num
391
+ print " rep.db_len #=> "; p rep.db_len
392
+ #print " rep.hsp_len #=> "; p rep.hsp_len
393
+ #@#print " rep.eff_space #=> "; p rep.eff_space
394
+ #@#print " rep.kappa #=> "; p rep.kappa
395
+ #@#print " rep.lambda #=> "; p rep.lambda
396
+ #@#print " rep.entropy #=> "; p rep.entropy
397
+ puts
398
+ #@#print " rep.num_hits #=> "; p rep.num_hits
399
+ #@#print " rep.gapped_kappa #=> "; p rep.gapped_kappa
400
+ #@#print " rep.gapped_lambda #=> "; p rep.gapped_lambda
401
+ #@#print " rep.gapped_entropy #=> "; p rep.gapped_entropy
402
+ #@#print " rep.posted_date #=> "; p rep.posted_date
403
+ puts
404
+
405
+ #@#print "# === Message (last iteration's)\n"
406
+ #@#puts
407
+ #@#print " rep.message #=> "; p rep.message
408
+ #puts
409
+ #@#print " rep.converged? #=> "; p rep.converged?
410
+ #puts
411
+
412
+ print "# === Warning messages\n"
413
+ print " rep.warnings #=> "; p rep.warnings
414
+
415
+ print "# === Iterations\n"
416
+ puts
417
+ print " rep.itrerations.each do |itr|\n"
418
+ puts
419
+
420
+ rep.iterations.each do |itr|
421
+
422
+ print "# --- Bio::Blast::WU::Report::Iteration\n"
423
+ puts
424
+
425
+ print " itr.num #=> "; p itr.num
426
+ #print " itr.statistics #=> "; p itr.statistics
427
+ puts
428
+ print " itr.warnings #=> "; p itr.warnings
429
+ print " itr.message #=> "; p itr.message
430
+ print " itr.hits.size #=> "; p itr.hits.size
431
+ #puts
432
+ #@#print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size;
433
+ #@#print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size;
434
+ if itr.hits_for_pattern then
435
+ itr.hits_for_pattern.each_with_index do |hp, hpi|
436
+ print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size;
437
+ end
438
+ end
439
+ print " itr.converged? #=> "; p itr.converged?
440
+ puts
441
+
442
+ print " itr.hits.each do |hit|\n"
443
+ puts
444
+
445
+ itr.hits.each_with_index do |hit, i|
446
+
447
+ print "# --- Bio::Blast::WU::Report::Hit"
448
+ print " ([#{i}])\n"
449
+ puts
450
+
451
+ #print " hit.num #=> "; p hit.num
452
+ #print " hit.hit_id #=> "; p hit.hit_id
453
+ print " hit.len #=> "; p hit.len
454
+ print " hit.definition #=> "; p hit.definition
455
+ #print " hit.accession #=> "; p hit.accession
456
+ #puts
457
+ print " hit.found_again? #=> "; p hit.found_again?
458
+ #puts
459
+ print " hit.score #=> "; p hit.score
460
+ print " hit.pvalue #=> "; p hit.pvalue
461
+ print " hit.n_number #=> "; p hit.n_number
462
+
463
+ print " --- compatible/shortcut ---\n"
464
+ #print " hit.query_id #=> "; p hit.query_id
465
+ #print " hit.query_def #=> "; p hit.query_def
466
+ #print " hit.query_len #=> "; p hit.query_len
467
+ #print " hit.target_id #=> "; p hit.target_id
468
+ print " hit.target_def #=> "; p hit.target_def
469
+ print " hit.target_len #=> "; p hit.target_len
470
+
471
+ print " --- first HSP's values (shortcut) ---\n"
472
+ print " hit.evalue #=> "; p hit.evalue
473
+ print " hit.bit_score #=> "; p hit.bit_score
474
+ print " hit.identity #=> "; p hit.identity
475
+ #print " hit.overlap #=> "; p hit.overlap
476
+
477
+ print " hit.query_seq #=> "; p hit.query_seq
478
+ print " hit.midline #=> "; p hit.midline
479
+ print " hit.target_seq #=> "; p hit.target_seq
480
+
481
+ print " hit.query_start #=> "; p hit.query_start
482
+ print " hit.query_end #=> "; p hit.query_end
483
+ print " hit.target_start #=> "; p hit.target_start
484
+ print " hit.target_end #=> "; p hit.target_end
485
+ print " hit.lap_at #=> "; p hit.lap_at
486
+ print " --- first HSP's vaules (shortcut) ---\n"
487
+ print " --- compatible/shortcut ---\n"
488
+
489
+ puts
490
+ print " hit.hsps.size #=> "; p hit.hsps.size
491
+ if hit.hsps.size == 0 then
492
+ puts " (HSP not found: please see blastall's -b and -v options)"
493
+ puts
494
+ else
495
+
496
+ puts
497
+ print " hit.hsps.each do |hsp|\n"
498
+ puts
499
+
500
+ hit.hsps.each_with_index do |hsp, j|
501
+
502
+ print "# --- Bio::Blast::WU::Report::Hsp"
503
+ print " ([#{j}])\n"
504
+ puts
505
+ #print " hsp.num #=> "; p hsp.num
506
+ print " hsp.bit_score #=> "; p hsp.bit_score
507
+ print " hsp.score #=> "; p hsp.score
508
+ print " hsp.evalue #=> "; p hsp.evalue
509
+ print " hsp.identity #=> "; p hsp.identity
510
+ print " hsp.gaps #=> "; p hsp.gaps
511
+ print " hsp.positive #=> "; p hsp.positive
512
+ print " hsp.align_len #=> "; p hsp.align_len
513
+ #print " hsp.density #=> "; p hsp.density
514
+ puts
515
+ print " hsp.pvalue #=> "; p hsp.pvalue
516
+ print " hsp.p_sum_n #=> "; p hsp.p_sum_n
517
+ puts
518
+
519
+ print " hsp.query_frame #=> "; p hsp.query_frame
520
+ print " hsp.query_from #=> "; p hsp.query_from
521
+ print " hsp.query_to #=> "; p hsp.query_to
522
+
523
+ print " hsp.hit_frame #=> "; p hsp.hit_frame
524
+ print " hsp.hit_from #=> "; p hsp.hit_from
525
+ print " hsp.hit_to #=> "; p hsp.hit_to
526
+
527
+ #print " hsp.pattern_from#=> "; p hsp.pattern_from
528
+ #print " hsp.pattern_to #=> "; p hsp.pattern_to
529
+
530
+ print " hsp.qseq #=> "; p hsp.qseq
531
+ print " hsp.midline #=> "; p hsp.midline
532
+ print " hsp.hseq #=> "; p hsp.hseq
533
+ puts
534
+ print " hsp.percent_identity #=> "; p hsp.percent_identity
535
+ #print " hsp.mismatch_count #=> "; p hsp.mismatch_count
536
+ #
537
+ print " hsp.query_strand #=> "; p hsp.query_strand
538
+ print " hsp.hit_strand #=> "; p hsp.hit_strand
539
+ print " hsp.percent_positive #=> "; p hsp.percent_positive
540
+ print " hsp.percent_gaps #=> "; p hsp.percent_gaps
541
+ puts
542
+
543
+ end #each
544
+ end #if hit.hsps.size == 0
545
+ end
546
+ end
547
+ end #ff.each
548
+ end #FlatFile.open
549
+
550
+ end #if __FILE__ == $0
551
+
552
+ ######################################################################
553
+