bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,129 @@
1
+ #
2
+ # bio/appl/hmmer.rb - HMMER wrapper
3
+ #
4
+ # Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: hmmer.rb,v 1.4 2005/09/26 13:00:04 k Exp $
21
+ #
22
+
23
+ require 'bio/command'
24
+ require 'shellwords'
25
+
26
+ module Bio
27
+
28
+ class HMMER
29
+
30
+ autoload :Report, 'bio/appl/hmmer/report'
31
+
32
+ include Bio::Command::Tools
33
+
34
+ def initialize(program, hmmfile, seqfile, opt = [])
35
+ @program = program
36
+ @hmmfile = hmmfile
37
+ @seqfile = seqfile
38
+ @output = ''
39
+
40
+ begin
41
+ @options = opt.to_ary
42
+ rescue NameError #NoMethodError
43
+ # backward compatibility
44
+ @options = Shellwords.shellwords(opt)
45
+ end
46
+ end
47
+ attr_accessor :program, :hmmfile, :seqfile, :options
48
+ attr_reader :output
49
+
50
+ def option
51
+ # backward compatibility
52
+ make_command_line(@options)
53
+ end
54
+
55
+ def option=(str)
56
+ # backward compatibility
57
+ @options = Shellwords.shellwords(str)
58
+ end
59
+
60
+ def query
61
+ cmd = [ @program, *@options ]
62
+ cmd.concat([ @hmmfile, @seqfile ])
63
+
64
+ report = nil
65
+
66
+ @output = call_command_local(cmd, nil)
67
+ report = parse_result(@output)
68
+
69
+ return report
70
+ end
71
+
72
+
73
+ private
74
+
75
+ def parse_result(data)
76
+ Report.new(data)
77
+ end
78
+
79
+ end
80
+ end
81
+
82
+
83
+
84
+ if __FILE__ == $0
85
+
86
+ begin
87
+ require 'pp'
88
+ alias p pp
89
+ rescue
90
+ end
91
+
92
+ program = ARGV.shift # hmmsearch, hmmpfam
93
+ hmmfile = ARGV.shift
94
+ seqfile = ARGV.shift
95
+
96
+ factory = Bio::HMMER.new(program, hmmfile, seqfile)
97
+ p factory.query
98
+
99
+ end
100
+
101
+
102
+ =begin
103
+
104
+ = Bio::HMMER
105
+
106
+ --- Bio::HMMER.new(program, hmmfile, seqfile, option = '')
107
+ --- Bio::HMMER#program
108
+ --- Bio::HMMER#hmmfile
109
+ --- Bio::HMMER#seqfile
110
+ --- Bio::HMMER#options
111
+
112
+ Accessors for the factory.
113
+
114
+ --- Bio::HMMER#option
115
+ --- Bio::HMMER#option=(str)
116
+
117
+ Get/set options by string.
118
+
119
+ --- Bio::HMMER#query
120
+
121
+ Executes the hmmer search and returns Report object (Bio::HMMER::Report).
122
+
123
+ --- Bio::HMMER#output
124
+
125
+ Shows the raw output from hmmer search.
126
+
127
+ =end
128
+
129
+
@@ -0,0 +1,556 @@
1
+ #
2
+ # bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer
3
+ #
4
+ # Copyright (C) 2002 Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>
5
+ # Copyright (C) 2005 Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: report.rb,v 1.9 2005/10/31 09:12:03 k Exp $
22
+ #
23
+
24
+ require 'bio/appl/hmmer'
25
+
26
+ module Bio
27
+ class HMMER
28
+
29
+ def self.reports(input)
30
+ ary = []
31
+ input.each("\n//\n") do |data|
32
+ if block_given?
33
+ yield Report.new(data)
34
+ else
35
+ ary << Report.new(data)
36
+ end
37
+ end
38
+ return ary
39
+ end
40
+
41
+
42
+ # Bio::HMMER::Report
43
+ class Report
44
+
45
+ # for Bio::FlatFile support
46
+ DELIMITER = RS = "\n//\n"
47
+
48
+ def initialize(data)
49
+
50
+ # The input data is divided into six data fields, i.e. header,
51
+ # query infomation, hits, HSPs, alignments and search statistics.
52
+ # However, header and statistics data don't necessarily exist.
53
+ subdata, is_hmmsearch = get_subdata(data)
54
+
55
+ # if header exists, parse it
56
+ if subdata["header"]
57
+ @program, @parameter = parse_header_data(subdata["header"])
58
+ else
59
+ @program, @parameter = [{}, {}]
60
+ end
61
+
62
+ @query_info = parse_query_info(subdata["query"])
63
+ @hits = parse_hit_data(subdata["hit"])
64
+ @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch)
65
+
66
+ if @hsps != []
67
+ # split alignment subdata into an array of alignments
68
+ aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1)
69
+
70
+ # append alignment information to corresponding Hsp
71
+ aln_ary.each_with_index do |aln, i|
72
+ @hsps[i].set_alignment(aln)
73
+ end
74
+ end
75
+
76
+ # assign each Hsp object to its parent Hit
77
+ hits_hash = {}
78
+ @hits.each do |hit|
79
+ hits_hash[hit.accession] = hit
80
+ end
81
+ @hsps.each do |hsp|
82
+ if hits_hash.has_key?(hsp.accession)
83
+ hits_hash[hsp.accession].append_hsp(hsp)
84
+ end
85
+ end
86
+
87
+ # parse statistics (for hmmsearch)
88
+ if is_hmmsearch
89
+ @histogram, @statistical_detail, @total_seq_searched, \
90
+ @whole_seq_top_hits, @domain_top_hits = \
91
+ parse_stat_data(subdata["statistics"])
92
+ end
93
+
94
+ end
95
+ attr_reader :program, :parameter, :query_info, :hits, :hsps,
96
+ :histogram, :statistical_detail, :total_seq_searched,
97
+ :whole_seq_top_hits, :domain_top_hits
98
+
99
+
100
+ def each
101
+ @hits.each do |x|
102
+ yield x
103
+ end
104
+ end
105
+
106
+
107
+ # Bio::HMMER::Report::Hit
108
+ class Hit
109
+ def initialize(data)
110
+ @hsps = Array.new
111
+ if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ data
112
+ @accession, @description, @score, @evalue, @num =
113
+ [$1, $2, $3.to_f, $4.to_f, $5.to_i]
114
+ end
115
+ end
116
+ attr_reader :hsps, :accession, :description, :score, :evalue, :num
117
+
118
+ def each
119
+ @hsps.each do |x|
120
+ yield x
121
+ end
122
+ end
123
+
124
+ alias target_id accession
125
+ alias hit_id accession
126
+ alias entry_id accession
127
+ alias definition description
128
+ alias bit_score score
129
+
130
+ def target_def
131
+ if @hsps.size == 1
132
+ "<#{@hsps[0].domain}> #{@description}"
133
+ else
134
+ "<#{@num.to_s}> #{@description}"
135
+ end
136
+ end
137
+
138
+ def append_hsp(hsp)
139
+ @hsps << hsp
140
+ end
141
+
142
+ end
143
+
144
+
145
+ # Bio::HMMER::Report::Hsp
146
+ class Hsp
147
+ def initialize(data, is_hmmsearch)
148
+ @is_hmmsearch = is_hmmsearch
149
+
150
+ @accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,
151
+ score, evalue = data.split(' ')
152
+ @seq_f = seq_f.to_i
153
+ @seq_t = seq_t.to_i
154
+ @hmm_f = hmm_f.to_i
155
+ @hmm_t = hmm_t.to_i
156
+ @score = score.to_f
157
+ @evalue = evalue.to_f
158
+ @hmmseq = ''
159
+ @flatseq = ''
160
+ @midline = ''
161
+ @query_frame = 1
162
+ @target_frame = 1
163
+ # CS and RF lines are rarely used.
164
+ @csline = nil
165
+ @rfline = nil
166
+ end
167
+ attr_reader :accession, :domain, :seq_f, :seq_t, :seq_ft,
168
+ :hmm_f, :hmm_t, :hmm_ft, :score, :evalue, :midline, :hmmseq,
169
+ :flatseq, :query_frame, :target_frame, :csline, :rfline
170
+
171
+ def set_alignment(aln)
172
+ # First, split the input alignment into an array of
173
+ # "alignment blocks." One block usually has three lines,
174
+ # i.e. hmmseq, midline and flatseq.
175
+ # However, although infrequent, it can contain CS or RF lines.
176
+ aln.split(/ (?:\d+|-)\s*\n\n/).each do |blk|
177
+ lines = blk.split(/\n/)
178
+ cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil
179
+ rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil
180
+ aln_width = lines[0][/\S+/].length
181
+ @csline = @csline.to_s + cstmp[19, aln_width] if cstmp
182
+ @rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp
183
+ @hmmseq += lines[0][19, aln_width]
184
+ @midline += lines[1][19, aln_width]
185
+ @flatseq += lines[2][19, aln_width]
186
+ end
187
+ @csline = @csline[3...-3] if @csline
188
+ @rfline = @rfline[3...-3] if @rfline
189
+ @hmmseq = @hmmseq[3...-3]
190
+ @midline = @midline[3...-3]
191
+ @flatseq = @flatseq[3...-3]
192
+ end
193
+
194
+ def query_seq; @is_hmmsearch ? @hmmseq : @flatseq; end
195
+ def target_seq; @is_hmmsearch ? @flatseq : @hmmseq; end
196
+ def target_from; @is_hmmsearch ? @seq_f : @hmm_f; end
197
+ def target_to; @is_hmmsearch ? @seq_t : @hmm_t; end
198
+ def query_from; @is_hmmsearch ? @hmm_f : @seq_f; end
199
+ def query_to; @is_hmmsearch ? @hmm_t : @seq_t; end
200
+
201
+ alias bit_score score
202
+ alias target_id accession
203
+
204
+ end
205
+
206
+
207
+ # Bio::HMMER::Report#get_subdata
208
+ def get_subdata(data)
209
+ subdata = {}
210
+ header_prefix = '\Ahmm(search|pfam) - search'
211
+ query_prefix = '^Query (HMM|sequence): .*\nAccession: '
212
+ hit_prefix = '^Scores for (complete sequences|sequence family)'
213
+ hsp_prefix = '^Parsed for domains:'
214
+ aln_prefix = '^Alignments of top-scoring domains:\n'
215
+ stat_prefix = '^\nHistogram of all scores:'
216
+
217
+ # if header exists, get it
218
+ if data =~ /#{header_prefix}/
219
+ is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam
220
+ subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
221
+ else
222
+ is_hmmsearch = false # if no header, assumed to be hmmpfam
223
+ end
224
+
225
+ # get query, Hit and Hsp data
226
+ subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
227
+ subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m]
228
+ subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m]
229
+
230
+ # get alignment data
231
+ if is_hmmsearch
232
+ data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m
233
+ subdata["alignment"] = $1
234
+ else
235
+ data =~ /#{aln_prefix}(.+?)\/\/\n/m
236
+ subdata["alignment"] = $1
237
+ raise "multiple reports found" if $'.length > 0
238
+ end
239
+
240
+ # handle -A option of HMMER
241
+ cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z'
242
+ subdata["alignment"].sub!(/#{cutoff_line}/, '')
243
+
244
+ # get statistics data
245
+ subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
246
+
247
+ [subdata, is_hmmsearch]
248
+ end
249
+ private :get_subdata
250
+
251
+ # Bio::HMMER::Report#parse_header_data
252
+ def parse_header_data(data)
253
+ data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m
254
+ program_data = $1
255
+ parameter_data = $2
256
+
257
+ program = {}
258
+ program['name'], program['version'], program['copyright'], \
259
+ program['license'] = program_data.split(/\n/)
260
+
261
+ parameter = {}
262
+ parameter_data.each do |x|
263
+ if /^(.+?):\s+(.*?)\s*$/ =~ x
264
+ parameter[$1] = $2
265
+ end
266
+ end
267
+
268
+ [program, parameter]
269
+ end
270
+ private :parse_header_data
271
+
272
+ # Bio::HMMER::Report#parse_query_info
273
+ def parse_query_info(data)
274
+ hash = {}
275
+ data.each do |x|
276
+ if /^(.+?):\s+(.*?)\s*$/ =~ x
277
+ hash[$1] = $2
278
+ elsif /\s+\[(.+)\]/ =~ x
279
+ hash['comments'] = $1
280
+ end
281
+ end
282
+ hash
283
+ end
284
+ private :parse_query_info
285
+
286
+ # Bio::HMMER::Report#parse_hit_data
287
+ def parse_hit_data(data)
288
+ data.sub!(/.+?---\n/m, '').chop!
289
+ hits = []
290
+ return hits if data == "\t[no hits above thresholds]\n"
291
+ data.each do |l|
292
+ hits.push(Hit.new(l))
293
+ end
294
+ hits
295
+ end
296
+ private :parse_hit_data
297
+
298
+ # Bio::HMMER::Report#parse_hsp_data
299
+ def parse_hsp_data(data, is_hmmsearch)
300
+ data.sub!(/.+?---\n/m, '').chop!
301
+ hsps=[]
302
+ return hsps if data == "\t[no hits above thresholds]\n"
303
+ data.each do |l|
304
+ hsps.push(Hsp.new(l, is_hmmsearch))
305
+ end
306
+ return hsps
307
+ end
308
+ private :parse_hsp_data
309
+
310
+ # Bio::HMMER::Report#parse_stat_data
311
+ def parse_stat_data(data)
312
+ data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '')
313
+ histogram = $1
314
+
315
+ statistical_detail = {}
316
+ data.sub!(/(.+?)\n\n/m, '')
317
+ $1.each do |l|
318
+ statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
319
+ end
320
+
321
+ total_seq_searched = nil
322
+ data.sub!(/(.+?)\n\n/m, '')
323
+ $1.each do |l|
324
+ total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
325
+ end
326
+
327
+ whole_seq_top_hits = {}
328
+ data.sub!(/(.+?)\n\n/m, '')
329
+ $1.each do |l|
330
+ if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
331
+ whole_seq_top_hits[$1] = $2.to_i
332
+ elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
333
+ whole_seq_top_hits[$1] = $2
334
+ end
335
+ end
336
+
337
+ domain_top_hits = {}
338
+ data.each do |l|
339
+ if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
340
+ domain_top_hits[$1] = $2.to_i
341
+ elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
342
+ domain_top_hits[$1] = $2
343
+ end
344
+ end
345
+
346
+ [histogram, statistical_detail, total_seq_searched, \
347
+ whole_seq_top_hits, domain_top_hits]
348
+ end
349
+ private :parse_stat_data
350
+
351
+ end
352
+
353
+ end
354
+ end
355
+
356
+
357
+ if __FILE__ == $0
358
+
359
+ =begin
360
+
361
+ #
362
+ # for multiple reports in a single output file (hmmpfam)
363
+ #
364
+ Bio::HMMER.reports(ARGF.read) do |report|
365
+ report.hits.each do |hit|
366
+ hit.hsps.each do |hsp|
367
+ end
368
+ end
369
+ end
370
+
371
+ =end
372
+
373
+ begin
374
+ require 'pp'
375
+ alias p pp
376
+ rescue LoadError
377
+ end
378
+
379
+ rep = Bio::HMMER::Report.new(ARGF.read)
380
+ p rep
381
+
382
+ indent = 18
383
+
384
+ puts "### hmmer result"
385
+ print "name : ".rjust(indent)
386
+ p rep.program['name']
387
+ print "version : ".rjust(indent)
388
+ p rep.program['version']
389
+ print "copyright : ".rjust(indent)
390
+ p rep.program['copyright']
391
+ print "license : ".rjust(indent)
392
+ p rep.program['license']
393
+
394
+ print "HMM file : ".rjust(indent)
395
+ p rep.parameter['HMM file']
396
+ print "Sequence file : ".rjust(indent)
397
+ p rep.parameter['Sequence file']
398
+
399
+ print "Query sequence : ".rjust(indent)
400
+ p rep.query_info['Query sequence']
401
+ print "Accession : ".rjust(indent)
402
+ p rep.query_info['Accession']
403
+ print "Description : ".rjust(indent)
404
+ p rep.query_info['Description']
405
+
406
+ rep.each do |hit|
407
+ puts "## each hit"
408
+ print "accession : ".rjust(indent)
409
+ p [ hit.accession, hit.target_id, hit.hit_id, hit.entry_id ]
410
+ print "description : ".rjust(indent)
411
+ p [ hit.description, hit.definition ]
412
+ print "target_def : ".rjust(indent)
413
+ p hit.target_def
414
+ print "score : ".rjust(indent)
415
+ p [ hit.score, hit.bit_score ]
416
+ print "evalue : ".rjust(indent)
417
+ p hit.evalue
418
+ print "num : ".rjust(indent)
419
+ p hit.num
420
+
421
+ hit.each do |hsp|
422
+ puts "## each hsp"
423
+ print "accession : ".rjust(indent)
424
+ p [ hsp.accession, hsp.target_id ]
425
+ print "domain : ".rjust(indent)
426
+ p hsp.domain
427
+ print "seq_f : ".rjust(indent)
428
+ p hsp.seq_f
429
+ print "seq_t : ".rjust(indent)
430
+ p hsp.seq_t
431
+ print "seq_ft : ".rjust(indent)
432
+ p hsp.seq_ft
433
+ print "hmm_f : ".rjust(indent)
434
+ p hsp.hmm_f
435
+ print "hmm_t : ".rjust(indent)
436
+ p hsp.hmm_t
437
+ print "hmm_ft : ".rjust(indent)
438
+ p hsp.hmm_ft
439
+ print "score : ".rjust(indent)
440
+ p [ hsp.score, hsp.bit_score ]
441
+ print "evalue : ".rjust(indent)
442
+ p hsp.evalue
443
+ print "midline : ".rjust(indent)
444
+ p hsp.midline
445
+ print "hmmseq : ".rjust(indent)
446
+ p hsp.hmmseq
447
+ print "flatseq : ".rjust(indent)
448
+ p hsp.flatseq
449
+ print "query_frame : ".rjust(indent)
450
+ p hsp.query_frame
451
+ print "target_frame : ".rjust(indent)
452
+ p hsp.target_frame
453
+
454
+ print "query_seq : ".rjust(indent)
455
+ p hsp.query_seq # hmmseq, flatseq
456
+ print "target_seq : ".rjust(indent)
457
+ p hsp.target_seq # flatseq, hmmseq
458
+ print "target_from : ".rjust(indent)
459
+ p hsp.target_from # seq_f, hmm_f
460
+ print "target_to : ".rjust(indent)
461
+ p hsp.target_to # seq_t, hmm_t
462
+ print "query_from : ".rjust(indent)
463
+ p hsp.query_from # hmm_f, seq_f
464
+ print "query_to : ".rjust(indent)
465
+ p hsp.query_to # hmm_t, seq_t
466
+ end
467
+ end
468
+
469
+ end
470
+
471
+
472
+ =begin
473
+
474
+ = Bio::HMMER::Report
475
+
476
+ --- Bio::HMMER::Report.new(data)
477
+ --- Bio::HMMER::Report#each
478
+
479
+ Iterates on each Bio::HMMER::Report::Hit object.
480
+
481
+ --- Bio::HMMER::Report#hits
482
+
483
+ Returns an Array of Bio::HMMER::Report::Hit objects.
484
+
485
+
486
+ == Bio::HMMER::Report::Hit
487
+
488
+ --- Bio::HMMER::Report::Hit#each
489
+
490
+ Iterates on each Hsp object.
491
+
492
+ --- Bio::HMMER::Report::Hit#hsps
493
+
494
+ Returns an Array of Bio::HMMER::Report::Hsp objects.
495
+
496
+ --- Bio::HMMER::Report::Hit#target_id
497
+ --- Bio::HMMER::Report::Hit#hit_id
498
+ --- Bio::HMMER::Report::Hit#entry_id
499
+ --- Bio::HMMER::Report::Hit#definition
500
+ --- Bio::HMMER::Report::Hit#description
501
+ --- Bio::HMMER::Report::Hit#num
502
+
503
+ nunmer of domains
504
+
505
+ --- Bio::HMMER::Report::Hit#target_def
506
+
507
+ <domain number> + @description
508
+
509
+ --- Bio::HMMER::Report::Hit#evalue
510
+ --- Bio::HMMER::Report::Hit#bit_score
511
+ --- Bio::HMMER::Report::Hit#score
512
+
513
+ Matching scores (total of all HSPs).
514
+
515
+
516
+ == Bio::HMMER::Report::Hsp
517
+
518
+ --- Bio::HMMER::Report#hsps
519
+
520
+ Returns an Array of Bio::HMMER::Report::Hsp objects.
521
+ Under special circumstances, some HSPs do not have
522
+ parent Hit objects. If you want to access such HSPs,
523
+ use this method.
524
+
525
+ --- Bio::HMMER::Report::Hsp#target_id
526
+ --- Bio::HMMER::Report::Hsp#accession
527
+ --- Bio::HMMER::Report::Hsp#domain
528
+ --- Bio::HMMER::Report::Hsp#seq_f
529
+ --- Bio::HMMER::Report::Hsp#seq_t
530
+ --- Bio::HMMER::Report::Hsp#seq_ft
531
+ --- Bio::HMMER::Report::Hsp#hmm_f
532
+ --- Bio::HMMER::Report::Hsp#hmm_t
533
+ --- Bio::HMMER::Report::Hsp#hmm_ft
534
+
535
+ --- Bio::HMMER::Report::Hsp#bit_score
536
+ --- Bio::HMMER::Report::Hsp#score
537
+ --- Bio::HMMER::Report::Hsp#evalue
538
+
539
+ --- Bio::HMMER::Report::Hsp#midline
540
+ --- Bio::HMMER::Report::Hsp#hmmseq
541
+ --- Bio::HMMER::Report::Hsp#flatseq
542
+ --- Bio::HMMER::Report::Hsp#query_frame
543
+ --- Bio::HMMER::Report::Hsp#target_frame
544
+
545
+ --- Bio::HMMER::Report::Hsp#query_seq
546
+ --- Bio::HMMER::Report::Hsp#query_from
547
+ --- Bio::HMMER::Report::Hsp#query_to
548
+ --- Bio::HMMER::Report::Hsp#target_seq
549
+ --- Bio::HMMER::Report::Hsp#target_from
550
+ --- Bio::HMMER::Report::Hsp#target_to
551
+
552
+ --- Bio::HMMER::Report::Hsp#csline
553
+ --- Bio::HMMER::Report::Hsp#rfline
554
+
555
+ =end
556
+