bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,129 @@
1
+ #
2
+ # bio/appl/hmmer.rb - HMMER wrapper
3
+ #
4
+ # Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: hmmer.rb,v 1.4 2005/09/26 13:00:04 k Exp $
21
+ #
22
+
23
+ require 'bio/command'
24
+ require 'shellwords'
25
+
26
+ module Bio
27
+
28
+ class HMMER
29
+
30
+ autoload :Report, 'bio/appl/hmmer/report'
31
+
32
+ include Bio::Command::Tools
33
+
34
+ def initialize(program, hmmfile, seqfile, opt = [])
35
+ @program = program
36
+ @hmmfile = hmmfile
37
+ @seqfile = seqfile
38
+ @output = ''
39
+
40
+ begin
41
+ @options = opt.to_ary
42
+ rescue NameError #NoMethodError
43
+ # backward compatibility
44
+ @options = Shellwords.shellwords(opt)
45
+ end
46
+ end
47
+ attr_accessor :program, :hmmfile, :seqfile, :options
48
+ attr_reader :output
49
+
50
+ def option
51
+ # backward compatibility
52
+ make_command_line(@options)
53
+ end
54
+
55
+ def option=(str)
56
+ # backward compatibility
57
+ @options = Shellwords.shellwords(str)
58
+ end
59
+
60
+ def query
61
+ cmd = [ @program, *@options ]
62
+ cmd.concat([ @hmmfile, @seqfile ])
63
+
64
+ report = nil
65
+
66
+ @output = call_command_local(cmd, nil)
67
+ report = parse_result(@output)
68
+
69
+ return report
70
+ end
71
+
72
+
73
+ private
74
+
75
+ def parse_result(data)
76
+ Report.new(data)
77
+ end
78
+
79
+ end
80
+ end
81
+
82
+
83
+
84
+ if __FILE__ == $0
85
+
86
+ begin
87
+ require 'pp'
88
+ alias p pp
89
+ rescue
90
+ end
91
+
92
+ program = ARGV.shift # hmmsearch, hmmpfam
93
+ hmmfile = ARGV.shift
94
+ seqfile = ARGV.shift
95
+
96
+ factory = Bio::HMMER.new(program, hmmfile, seqfile)
97
+ p factory.query
98
+
99
+ end
100
+
101
+
102
+ =begin
103
+
104
+ = Bio::HMMER
105
+
106
+ --- Bio::HMMER.new(program, hmmfile, seqfile, option = '')
107
+ --- Bio::HMMER#program
108
+ --- Bio::HMMER#hmmfile
109
+ --- Bio::HMMER#seqfile
110
+ --- Bio::HMMER#options
111
+
112
+ Accessors for the factory.
113
+
114
+ --- Bio::HMMER#option
115
+ --- Bio::HMMER#option=(str)
116
+
117
+ Get/set options by string.
118
+
119
+ --- Bio::HMMER#query
120
+
121
+ Executes the hmmer search and returns Report object (Bio::HMMER::Report).
122
+
123
+ --- Bio::HMMER#output
124
+
125
+ Shows the raw output from hmmer search.
126
+
127
+ =end
128
+
129
+
@@ -0,0 +1,556 @@
1
+ #
2
+ # bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer
3
+ #
4
+ # Copyright (C) 2002 Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>
5
+ # Copyright (C) 2005 Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: report.rb,v 1.9 2005/10/31 09:12:03 k Exp $
22
+ #
23
+
24
+ require 'bio/appl/hmmer'
25
+
26
+ module Bio
27
+ class HMMER
28
+
29
+ def self.reports(input)
30
+ ary = []
31
+ input.each("\n//\n") do |data|
32
+ if block_given?
33
+ yield Report.new(data)
34
+ else
35
+ ary << Report.new(data)
36
+ end
37
+ end
38
+ return ary
39
+ end
40
+
41
+
42
+ # Bio::HMMER::Report
43
+ class Report
44
+
45
+ # for Bio::FlatFile support
46
+ DELIMITER = RS = "\n//\n"
47
+
48
+ def initialize(data)
49
+
50
+ # The input data is divided into six data fields, i.e. header,
51
+ # query infomation, hits, HSPs, alignments and search statistics.
52
+ # However, header and statistics data don't necessarily exist.
53
+ subdata, is_hmmsearch = get_subdata(data)
54
+
55
+ # if header exists, parse it
56
+ if subdata["header"]
57
+ @program, @parameter = parse_header_data(subdata["header"])
58
+ else
59
+ @program, @parameter = [{}, {}]
60
+ end
61
+
62
+ @query_info = parse_query_info(subdata["query"])
63
+ @hits = parse_hit_data(subdata["hit"])
64
+ @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch)
65
+
66
+ if @hsps != []
67
+ # split alignment subdata into an array of alignments
68
+ aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1)
69
+
70
+ # append alignment information to corresponding Hsp
71
+ aln_ary.each_with_index do |aln, i|
72
+ @hsps[i].set_alignment(aln)
73
+ end
74
+ end
75
+
76
+ # assign each Hsp object to its parent Hit
77
+ hits_hash = {}
78
+ @hits.each do |hit|
79
+ hits_hash[hit.accession] = hit
80
+ end
81
+ @hsps.each do |hsp|
82
+ if hits_hash.has_key?(hsp.accession)
83
+ hits_hash[hsp.accession].append_hsp(hsp)
84
+ end
85
+ end
86
+
87
+ # parse statistics (for hmmsearch)
88
+ if is_hmmsearch
89
+ @histogram, @statistical_detail, @total_seq_searched, \
90
+ @whole_seq_top_hits, @domain_top_hits = \
91
+ parse_stat_data(subdata["statistics"])
92
+ end
93
+
94
+ end
95
+ attr_reader :program, :parameter, :query_info, :hits, :hsps,
96
+ :histogram, :statistical_detail, :total_seq_searched,
97
+ :whole_seq_top_hits, :domain_top_hits
98
+
99
+
100
+ def each
101
+ @hits.each do |x|
102
+ yield x
103
+ end
104
+ end
105
+
106
+
107
+ # Bio::HMMER::Report::Hit
108
+ class Hit
109
+ def initialize(data)
110
+ @hsps = Array.new
111
+ if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ data
112
+ @accession, @description, @score, @evalue, @num =
113
+ [$1, $2, $3.to_f, $4.to_f, $5.to_i]
114
+ end
115
+ end
116
+ attr_reader :hsps, :accession, :description, :score, :evalue, :num
117
+
118
+ def each
119
+ @hsps.each do |x|
120
+ yield x
121
+ end
122
+ end
123
+
124
+ alias target_id accession
125
+ alias hit_id accession
126
+ alias entry_id accession
127
+ alias definition description
128
+ alias bit_score score
129
+
130
+ def target_def
131
+ if @hsps.size == 1
132
+ "<#{@hsps[0].domain}> #{@description}"
133
+ else
134
+ "<#{@num.to_s}> #{@description}"
135
+ end
136
+ end
137
+
138
+ def append_hsp(hsp)
139
+ @hsps << hsp
140
+ end
141
+
142
+ end
143
+
144
+
145
+ # Bio::HMMER::Report::Hsp
146
+ class Hsp
147
+ def initialize(data, is_hmmsearch)
148
+ @is_hmmsearch = is_hmmsearch
149
+
150
+ @accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,
151
+ score, evalue = data.split(' ')
152
+ @seq_f = seq_f.to_i
153
+ @seq_t = seq_t.to_i
154
+ @hmm_f = hmm_f.to_i
155
+ @hmm_t = hmm_t.to_i
156
+ @score = score.to_f
157
+ @evalue = evalue.to_f
158
+ @hmmseq = ''
159
+ @flatseq = ''
160
+ @midline = ''
161
+ @query_frame = 1
162
+ @target_frame = 1
163
+ # CS and RF lines are rarely used.
164
+ @csline = nil
165
+ @rfline = nil
166
+ end
167
+ attr_reader :accession, :domain, :seq_f, :seq_t, :seq_ft,
168
+ :hmm_f, :hmm_t, :hmm_ft, :score, :evalue, :midline, :hmmseq,
169
+ :flatseq, :query_frame, :target_frame, :csline, :rfline
170
+
171
+ def set_alignment(aln)
172
+ # First, split the input alignment into an array of
173
+ # "alignment blocks." One block usually has three lines,
174
+ # i.e. hmmseq, midline and flatseq.
175
+ # However, although infrequent, it can contain CS or RF lines.
176
+ aln.split(/ (?:\d+|-)\s*\n\n/).each do |blk|
177
+ lines = blk.split(/\n/)
178
+ cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil
179
+ rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil
180
+ aln_width = lines[0][/\S+/].length
181
+ @csline = @csline.to_s + cstmp[19, aln_width] if cstmp
182
+ @rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp
183
+ @hmmseq += lines[0][19, aln_width]
184
+ @midline += lines[1][19, aln_width]
185
+ @flatseq += lines[2][19, aln_width]
186
+ end
187
+ @csline = @csline[3...-3] if @csline
188
+ @rfline = @rfline[3...-3] if @rfline
189
+ @hmmseq = @hmmseq[3...-3]
190
+ @midline = @midline[3...-3]
191
+ @flatseq = @flatseq[3...-3]
192
+ end
193
+
194
+ def query_seq; @is_hmmsearch ? @hmmseq : @flatseq; end
195
+ def target_seq; @is_hmmsearch ? @flatseq : @hmmseq; end
196
+ def target_from; @is_hmmsearch ? @seq_f : @hmm_f; end
197
+ def target_to; @is_hmmsearch ? @seq_t : @hmm_t; end
198
+ def query_from; @is_hmmsearch ? @hmm_f : @seq_f; end
199
+ def query_to; @is_hmmsearch ? @hmm_t : @seq_t; end
200
+
201
+ alias bit_score score
202
+ alias target_id accession
203
+
204
+ end
205
+
206
+
207
+ # Bio::HMMER::Report#get_subdata
208
+ def get_subdata(data)
209
+ subdata = {}
210
+ header_prefix = '\Ahmm(search|pfam) - search'
211
+ query_prefix = '^Query (HMM|sequence): .*\nAccession: '
212
+ hit_prefix = '^Scores for (complete sequences|sequence family)'
213
+ hsp_prefix = '^Parsed for domains:'
214
+ aln_prefix = '^Alignments of top-scoring domains:\n'
215
+ stat_prefix = '^\nHistogram of all scores:'
216
+
217
+ # if header exists, get it
218
+ if data =~ /#{header_prefix}/
219
+ is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam
220
+ subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
221
+ else
222
+ is_hmmsearch = false # if no header, assumed to be hmmpfam
223
+ end
224
+
225
+ # get query, Hit and Hsp data
226
+ subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
227
+ subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m]
228
+ subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m]
229
+
230
+ # get alignment data
231
+ if is_hmmsearch
232
+ data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m
233
+ subdata["alignment"] = $1
234
+ else
235
+ data =~ /#{aln_prefix}(.+?)\/\/\n/m
236
+ subdata["alignment"] = $1
237
+ raise "multiple reports found" if $'.length > 0
238
+ end
239
+
240
+ # handle -A option of HMMER
241
+ cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z'
242
+ subdata["alignment"].sub!(/#{cutoff_line}/, '')
243
+
244
+ # get statistics data
245
+ subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
246
+
247
+ [subdata, is_hmmsearch]
248
+ end
249
+ private :get_subdata
250
+
251
+ # Bio::HMMER::Report#parse_header_data
252
+ def parse_header_data(data)
253
+ data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m
254
+ program_data = $1
255
+ parameter_data = $2
256
+
257
+ program = {}
258
+ program['name'], program['version'], program['copyright'], \
259
+ program['license'] = program_data.split(/\n/)
260
+
261
+ parameter = {}
262
+ parameter_data.each do |x|
263
+ if /^(.+?):\s+(.*?)\s*$/ =~ x
264
+ parameter[$1] = $2
265
+ end
266
+ end
267
+
268
+ [program, parameter]
269
+ end
270
+ private :parse_header_data
271
+
272
+ # Bio::HMMER::Report#parse_query_info
273
+ def parse_query_info(data)
274
+ hash = {}
275
+ data.each do |x|
276
+ if /^(.+?):\s+(.*?)\s*$/ =~ x
277
+ hash[$1] = $2
278
+ elsif /\s+\[(.+)\]/ =~ x
279
+ hash['comments'] = $1
280
+ end
281
+ end
282
+ hash
283
+ end
284
+ private :parse_query_info
285
+
286
+ # Bio::HMMER::Report#parse_hit_data
287
+ def parse_hit_data(data)
288
+ data.sub!(/.+?---\n/m, '').chop!
289
+ hits = []
290
+ return hits if data == "\t[no hits above thresholds]\n"
291
+ data.each do |l|
292
+ hits.push(Hit.new(l))
293
+ end
294
+ hits
295
+ end
296
+ private :parse_hit_data
297
+
298
+ # Bio::HMMER::Report#parse_hsp_data
299
+ def parse_hsp_data(data, is_hmmsearch)
300
+ data.sub!(/.+?---\n/m, '').chop!
301
+ hsps=[]
302
+ return hsps if data == "\t[no hits above thresholds]\n"
303
+ data.each do |l|
304
+ hsps.push(Hsp.new(l, is_hmmsearch))
305
+ end
306
+ return hsps
307
+ end
308
+ private :parse_hsp_data
309
+
310
+ # Bio::HMMER::Report#parse_stat_data
311
+ def parse_stat_data(data)
312
+ data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '')
313
+ histogram = $1
314
+
315
+ statistical_detail = {}
316
+ data.sub!(/(.+?)\n\n/m, '')
317
+ $1.each do |l|
318
+ statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
319
+ end
320
+
321
+ total_seq_searched = nil
322
+ data.sub!(/(.+?)\n\n/m, '')
323
+ $1.each do |l|
324
+ total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
325
+ end
326
+
327
+ whole_seq_top_hits = {}
328
+ data.sub!(/(.+?)\n\n/m, '')
329
+ $1.each do |l|
330
+ if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
331
+ whole_seq_top_hits[$1] = $2.to_i
332
+ elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
333
+ whole_seq_top_hits[$1] = $2
334
+ end
335
+ end
336
+
337
+ domain_top_hits = {}
338
+ data.each do |l|
339
+ if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
340
+ domain_top_hits[$1] = $2.to_i
341
+ elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
342
+ domain_top_hits[$1] = $2
343
+ end
344
+ end
345
+
346
+ [histogram, statistical_detail, total_seq_searched, \
347
+ whole_seq_top_hits, domain_top_hits]
348
+ end
349
+ private :parse_stat_data
350
+
351
+ end
352
+
353
+ end
354
+ end
355
+
356
+
357
+ if __FILE__ == $0
358
+
359
+ =begin
360
+
361
+ #
362
+ # for multiple reports in a single output file (hmmpfam)
363
+ #
364
+ Bio::HMMER.reports(ARGF.read) do |report|
365
+ report.hits.each do |hit|
366
+ hit.hsps.each do |hsp|
367
+ end
368
+ end
369
+ end
370
+
371
+ =end
372
+
373
+ begin
374
+ require 'pp'
375
+ alias p pp
376
+ rescue LoadError
377
+ end
378
+
379
+ rep = Bio::HMMER::Report.new(ARGF.read)
380
+ p rep
381
+
382
+ indent = 18
383
+
384
+ puts "### hmmer result"
385
+ print "name : ".rjust(indent)
386
+ p rep.program['name']
387
+ print "version : ".rjust(indent)
388
+ p rep.program['version']
389
+ print "copyright : ".rjust(indent)
390
+ p rep.program['copyright']
391
+ print "license : ".rjust(indent)
392
+ p rep.program['license']
393
+
394
+ print "HMM file : ".rjust(indent)
395
+ p rep.parameter['HMM file']
396
+ print "Sequence file : ".rjust(indent)
397
+ p rep.parameter['Sequence file']
398
+
399
+ print "Query sequence : ".rjust(indent)
400
+ p rep.query_info['Query sequence']
401
+ print "Accession : ".rjust(indent)
402
+ p rep.query_info['Accession']
403
+ print "Description : ".rjust(indent)
404
+ p rep.query_info['Description']
405
+
406
+ rep.each do |hit|
407
+ puts "## each hit"
408
+ print "accession : ".rjust(indent)
409
+ p [ hit.accession, hit.target_id, hit.hit_id, hit.entry_id ]
410
+ print "description : ".rjust(indent)
411
+ p [ hit.description, hit.definition ]
412
+ print "target_def : ".rjust(indent)
413
+ p hit.target_def
414
+ print "score : ".rjust(indent)
415
+ p [ hit.score, hit.bit_score ]
416
+ print "evalue : ".rjust(indent)
417
+ p hit.evalue
418
+ print "num : ".rjust(indent)
419
+ p hit.num
420
+
421
+ hit.each do |hsp|
422
+ puts "## each hsp"
423
+ print "accession : ".rjust(indent)
424
+ p [ hsp.accession, hsp.target_id ]
425
+ print "domain : ".rjust(indent)
426
+ p hsp.domain
427
+ print "seq_f : ".rjust(indent)
428
+ p hsp.seq_f
429
+ print "seq_t : ".rjust(indent)
430
+ p hsp.seq_t
431
+ print "seq_ft : ".rjust(indent)
432
+ p hsp.seq_ft
433
+ print "hmm_f : ".rjust(indent)
434
+ p hsp.hmm_f
435
+ print "hmm_t : ".rjust(indent)
436
+ p hsp.hmm_t
437
+ print "hmm_ft : ".rjust(indent)
438
+ p hsp.hmm_ft
439
+ print "score : ".rjust(indent)
440
+ p [ hsp.score, hsp.bit_score ]
441
+ print "evalue : ".rjust(indent)
442
+ p hsp.evalue
443
+ print "midline : ".rjust(indent)
444
+ p hsp.midline
445
+ print "hmmseq : ".rjust(indent)
446
+ p hsp.hmmseq
447
+ print "flatseq : ".rjust(indent)
448
+ p hsp.flatseq
449
+ print "query_frame : ".rjust(indent)
450
+ p hsp.query_frame
451
+ print "target_frame : ".rjust(indent)
452
+ p hsp.target_frame
453
+
454
+ print "query_seq : ".rjust(indent)
455
+ p hsp.query_seq # hmmseq, flatseq
456
+ print "target_seq : ".rjust(indent)
457
+ p hsp.target_seq # flatseq, hmmseq
458
+ print "target_from : ".rjust(indent)
459
+ p hsp.target_from # seq_f, hmm_f
460
+ print "target_to : ".rjust(indent)
461
+ p hsp.target_to # seq_t, hmm_t
462
+ print "query_from : ".rjust(indent)
463
+ p hsp.query_from # hmm_f, seq_f
464
+ print "query_to : ".rjust(indent)
465
+ p hsp.query_to # hmm_t, seq_t
466
+ end
467
+ end
468
+
469
+ end
470
+
471
+
472
+ =begin
473
+
474
+ = Bio::HMMER::Report
475
+
476
+ --- Bio::HMMER::Report.new(data)
477
+ --- Bio::HMMER::Report#each
478
+
479
+ Iterates on each Bio::HMMER::Report::Hit object.
480
+
481
+ --- Bio::HMMER::Report#hits
482
+
483
+ Returns an Array of Bio::HMMER::Report::Hit objects.
484
+
485
+
486
+ == Bio::HMMER::Report::Hit
487
+
488
+ --- Bio::HMMER::Report::Hit#each
489
+
490
+ Iterates on each Hsp object.
491
+
492
+ --- Bio::HMMER::Report::Hit#hsps
493
+
494
+ Returns an Array of Bio::HMMER::Report::Hsp objects.
495
+
496
+ --- Bio::HMMER::Report::Hit#target_id
497
+ --- Bio::HMMER::Report::Hit#hit_id
498
+ --- Bio::HMMER::Report::Hit#entry_id
499
+ --- Bio::HMMER::Report::Hit#definition
500
+ --- Bio::HMMER::Report::Hit#description
501
+ --- Bio::HMMER::Report::Hit#num
502
+
503
+ nunmer of domains
504
+
505
+ --- Bio::HMMER::Report::Hit#target_def
506
+
507
+ <domain number> + @description
508
+
509
+ --- Bio::HMMER::Report::Hit#evalue
510
+ --- Bio::HMMER::Report::Hit#bit_score
511
+ --- Bio::HMMER::Report::Hit#score
512
+
513
+ Matching scores (total of all HSPs).
514
+
515
+
516
+ == Bio::HMMER::Report::Hsp
517
+
518
+ --- Bio::HMMER::Report#hsps
519
+
520
+ Returns an Array of Bio::HMMER::Report::Hsp objects.
521
+ Under special circumstances, some HSPs do not have
522
+ parent Hit objects. If you want to access such HSPs,
523
+ use this method.
524
+
525
+ --- Bio::HMMER::Report::Hsp#target_id
526
+ --- Bio::HMMER::Report::Hsp#accession
527
+ --- Bio::HMMER::Report::Hsp#domain
528
+ --- Bio::HMMER::Report::Hsp#seq_f
529
+ --- Bio::HMMER::Report::Hsp#seq_t
530
+ --- Bio::HMMER::Report::Hsp#seq_ft
531
+ --- Bio::HMMER::Report::Hsp#hmm_f
532
+ --- Bio::HMMER::Report::Hsp#hmm_t
533
+ --- Bio::HMMER::Report::Hsp#hmm_ft
534
+
535
+ --- Bio::HMMER::Report::Hsp#bit_score
536
+ --- Bio::HMMER::Report::Hsp#score
537
+ --- Bio::HMMER::Report::Hsp#evalue
538
+
539
+ --- Bio::HMMER::Report::Hsp#midline
540
+ --- Bio::HMMER::Report::Hsp#hmmseq
541
+ --- Bio::HMMER::Report::Hsp#flatseq
542
+ --- Bio::HMMER::Report::Hsp#query_frame
543
+ --- Bio::HMMER::Report::Hsp#target_frame
544
+
545
+ --- Bio::HMMER::Report::Hsp#query_seq
546
+ --- Bio::HMMER::Report::Hsp#query_from
547
+ --- Bio::HMMER::Report::Hsp#query_to
548
+ --- Bio::HMMER::Report::Hsp#target_seq
549
+ --- Bio::HMMER::Report::Hsp#target_from
550
+ --- Bio::HMMER::Report::Hsp#target_to
551
+
552
+ --- Bio::HMMER::Report::Hsp#csline
553
+ --- Bio::HMMER::Report::Hsp#rfline
554
+
555
+ =end
556
+