bio 0.7.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
data/lib/bio/appl/emboss.rb
CHANGED
|
@@ -1,68 +1,61 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/appl/emboss.rb - EMBOSS wrapper
|
|
2
|
+
# = bio/appl/emboss.rb - EMBOSS wrapper
|
|
3
3
|
#
|
|
4
|
-
#
|
|
4
|
+
# Copyright:: Copyright (C) 2002, 2005
|
|
5
|
+
# KATAYAMA Toshiaki <k@bioruby.org>
|
|
6
|
+
# License:: Ruby's
|
|
5
7
|
#
|
|
6
|
-
#
|
|
7
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
-
# License as published by the Free Software Foundation; either
|
|
9
|
-
# version 2 of the License, or (at your option) any later version.
|
|
8
|
+
# $Id: emboss.rb,v 1.4 2006/02/27 09:14:30 k Exp $
|
|
10
9
|
#
|
|
11
|
-
#
|
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
-
# Lesser General Public License for more details.
|
|
10
|
+
# == References
|
|
15
11
|
#
|
|
16
|
-
#
|
|
17
|
-
# License along with this library; if not, write to the Free Software
|
|
18
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
-
#
|
|
20
|
-
# $Id: emboss.rb,v 1.2 2005/09/08 01:22:08 k Exp $
|
|
12
|
+
# * http://www.emboss.org
|
|
21
13
|
#
|
|
22
14
|
|
|
23
15
|
module Bio
|
|
24
16
|
|
|
25
|
-
|
|
17
|
+
autoload :Command, 'bio/command'
|
|
26
18
|
|
|
27
|
-
|
|
28
|
-
@cmd_line = cmd_line + ' -stdout'
|
|
29
|
-
end
|
|
19
|
+
class EMBOSS
|
|
30
20
|
|
|
31
|
-
|
|
32
|
-
begin
|
|
33
|
-
@io = IO.popen(@cmd_line, "w+")
|
|
34
|
-
@result = @io.read
|
|
35
|
-
return @result
|
|
36
|
-
ensure
|
|
37
|
-
@io.close
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
attr_reader :io, :result
|
|
21
|
+
extend Bio::Command::Tools
|
|
41
22
|
|
|
23
|
+
def self.seqret(arg)
|
|
24
|
+
str = self.retrieve('seqret', arg)
|
|
42
25
|
end
|
|
43
26
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
= Bio::EMBOSS
|
|
49
|
-
|
|
50
|
-
EMBOSS wrapper.
|
|
27
|
+
def self.entret(arg)
|
|
28
|
+
str = self.retrieve('entret', arg)
|
|
29
|
+
end
|
|
51
30
|
|
|
52
|
-
|
|
53
|
-
|
|
31
|
+
def initialize(cmd_line)
|
|
32
|
+
@cmd_line = cmd_line + ' -stdout -auto'
|
|
33
|
+
end
|
|
54
34
|
|
|
55
|
-
|
|
56
|
-
|
|
35
|
+
def exec
|
|
36
|
+
begin
|
|
37
|
+
@io = IO.popen(@cmd_line, "w+")
|
|
38
|
+
@result = @io.read
|
|
39
|
+
return @result
|
|
40
|
+
ensure
|
|
41
|
+
@io.close
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
attr_reader :io, :result
|
|
57
45
|
|
|
58
|
-
|
|
46
|
+
private
|
|
59
47
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
48
|
+
def self.retrieve(cmd, arg)
|
|
49
|
+
cmd = [ cmd, arg, '-auto', '-stdout' ]
|
|
50
|
+
str = ''
|
|
51
|
+
call_command_local(cmd) do |inn, out|
|
|
52
|
+
inn.close_write
|
|
53
|
+
str = out.read
|
|
54
|
+
end
|
|
55
|
+
return str
|
|
56
|
+
end
|
|
63
57
|
|
|
64
|
-
|
|
58
|
+
end # EMBOSS
|
|
65
59
|
|
|
66
|
-
|
|
60
|
+
end # Bio
|
|
67
61
|
|
|
68
|
-
=end
|
data/lib/bio/appl/hmmer.rb
CHANGED
|
@@ -1,7 +1,32 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/appl/hmmer.rb - HMMER wrapper
|
|
2
|
+
# = bio/appl/hmmer.rb - HMMER wrapper
|
|
3
3
|
#
|
|
4
|
-
# Copyright (C) 2002
|
|
4
|
+
# Copyright:: Copyright (C) 2002
|
|
5
|
+
# KATAYAMA Toshiaki <k@bioruby.org>
|
|
6
|
+
# Lisence:: LGPL
|
|
7
|
+
#
|
|
8
|
+
# $Id: hmmer.rb,v 1.5 2006/02/02 17:08:36 nakao Exp $
|
|
9
|
+
#
|
|
10
|
+
# == Description
|
|
11
|
+
#
|
|
12
|
+
# A wrapper for the HMMER programs (hmmsearch or hmmpfam).
|
|
13
|
+
#
|
|
14
|
+
# == Examples
|
|
15
|
+
#
|
|
16
|
+
# require 'bio'
|
|
17
|
+
# program = 'hmmsearch' # or 'hmmpfam'
|
|
18
|
+
# hmmfile = 'test.hmm'
|
|
19
|
+
# seqfile = 'test.faa'
|
|
20
|
+
#
|
|
21
|
+
# factory = Bio::HMMER.new(program, hmmfile, seqfile)
|
|
22
|
+
# p factory.query
|
|
23
|
+
#
|
|
24
|
+
# == References
|
|
25
|
+
#
|
|
26
|
+
# * HMMER
|
|
27
|
+
# http://hmmer.wustl.edu/
|
|
28
|
+
#
|
|
29
|
+
#--
|
|
5
30
|
#
|
|
6
31
|
# This library is free software; you can redistribute it and/or
|
|
7
32
|
# modify it under the terms of the GNU Lesser General Public
|
|
@@ -17,7 +42,7 @@
|
|
|
17
42
|
# License along with this library; if not, write to the Free Software
|
|
18
43
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
44
|
#
|
|
20
|
-
|
|
45
|
+
#++
|
|
21
46
|
#
|
|
22
47
|
|
|
23
48
|
require 'bio/command'
|
|
@@ -25,105 +50,114 @@ require 'shellwords'
|
|
|
25
50
|
|
|
26
51
|
module Bio
|
|
27
52
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
53
|
+
# A wapper for HMMER programs (hmmsearch or hmmpfam).
|
|
54
|
+
#
|
|
55
|
+
# === Examples
|
|
56
|
+
#
|
|
57
|
+
# require 'bio'
|
|
58
|
+
# program = 'hmmsearch' # or 'hmmpfam'
|
|
59
|
+
# hmmfile = 'test.hmm'
|
|
60
|
+
# seqfile = 'test.faa'
|
|
61
|
+
#
|
|
62
|
+
# factory = Bio::HMMER.new(program, hmmfile, seqfile)
|
|
63
|
+
# report = factory.query
|
|
64
|
+
# report.class #=> Bio::HMMER::Report
|
|
65
|
+
#
|
|
66
|
+
# === References
|
|
67
|
+
#
|
|
68
|
+
# * HMMER
|
|
69
|
+
# http://hmmer.wustl.edu/
|
|
70
|
+
#
|
|
71
|
+
class HMMER
|
|
72
|
+
|
|
73
|
+
autoload :Report, 'bio/appl/hmmer/report'
|
|
74
|
+
|
|
75
|
+
include Bio::Command::Tools
|
|
76
|
+
|
|
77
|
+
# Prgrams name. (hmmsearch or hmmpfam).
|
|
78
|
+
attr_accessor :program
|
|
79
|
+
|
|
80
|
+
# Name of hmmfile.
|
|
81
|
+
attr_accessor :hmmfile
|
|
82
|
+
|
|
83
|
+
# Name of seqfile.
|
|
84
|
+
attr_accessor :seqfile
|
|
85
|
+
|
|
86
|
+
# Command line options.
|
|
87
|
+
attr_accessor :options
|
|
88
|
+
|
|
89
|
+
# Shows the raw output from the hmmer search.
|
|
90
|
+
attr_reader :output
|
|
91
|
+
|
|
92
|
+
# Sets a program name, a profile hmm file name, a query sequence file name
|
|
93
|
+
# and options in string.
|
|
94
|
+
#
|
|
95
|
+
# Program names: hmmsearch, hmmpfam
|
|
96
|
+
#
|
|
97
|
+
def initialize(program, hmmfile, seqfile, options = [])
|
|
98
|
+
@program = program
|
|
99
|
+
@hmmfile = hmmfile
|
|
100
|
+
@seqfile = seqfile
|
|
101
|
+
@output = ''
|
|
102
|
+
|
|
103
|
+
begin
|
|
104
|
+
@options = opt.to_ary
|
|
105
|
+
rescue NameError #NoMethodError
|
|
106
|
+
# backward compatibility
|
|
107
|
+
@options = Shellwords.shellwords(options)
|
|
108
|
+
end
|
|
109
|
+
end
|
|
31
110
|
|
|
32
|
-
include Bio::Command::Tools
|
|
33
111
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
112
|
+
# Gets options by String.
|
|
113
|
+
# backward compatibility.
|
|
114
|
+
def option
|
|
115
|
+
make_command_line(@options)
|
|
116
|
+
end
|
|
39
117
|
|
|
40
|
-
begin
|
|
41
|
-
@options = opt.to_ary
|
|
42
|
-
rescue NameError #NoMethodError
|
|
43
|
-
# backward compatibility
|
|
44
|
-
@options = Shellwords.shellwords(opt)
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
attr_accessor :program, :hmmfile, :seqfile, :options
|
|
48
|
-
attr_reader :output
|
|
49
118
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
119
|
+
# Sets options by String.
|
|
120
|
+
# backward compatibility.
|
|
121
|
+
def option=(str)
|
|
122
|
+
@options = Shellwords.shellwords(str)
|
|
123
|
+
end
|
|
54
124
|
|
|
55
|
-
def option=(str)
|
|
56
|
-
# backward compatibility
|
|
57
|
-
@options = Shellwords.shellwords(str)
|
|
58
|
-
end
|
|
59
125
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
126
|
+
# Executes the hmmer search and returns the report
|
|
127
|
+
# (Bio::HMMER::Report object).
|
|
128
|
+
def query
|
|
129
|
+
cmd = [ @program, *@options ]
|
|
130
|
+
cmd.concat([ @hmmfile, @seqfile ])
|
|
63
131
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
132
|
+
report = nil
|
|
133
|
+
|
|
134
|
+
@output = call_command_local(cmd, nil)
|
|
135
|
+
report = parse_result(@output)
|
|
68
136
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
137
|
+
return report
|
|
138
|
+
end
|
|
72
139
|
|
|
73
|
-
|
|
140
|
+
private
|
|
141
|
+
|
|
142
|
+
def parse_result(data)
|
|
143
|
+
Report.new(data)
|
|
144
|
+
end
|
|
74
145
|
|
|
75
|
-
|
|
76
|
-
Report.new(data)
|
|
77
|
-
end
|
|
146
|
+
end # class HMMER
|
|
78
147
|
|
|
79
|
-
|
|
80
|
-
end
|
|
148
|
+
end # module Bio
|
|
81
149
|
|
|
82
150
|
|
|
83
151
|
|
|
84
152
|
if __FILE__ == $0
|
|
85
153
|
|
|
86
|
-
|
|
87
|
-
require 'pp'
|
|
88
|
-
alias p pp
|
|
89
|
-
rescue
|
|
90
|
-
end
|
|
154
|
+
require 'pp'
|
|
91
155
|
|
|
92
|
-
program = ARGV.shift
|
|
156
|
+
program = ARGV.shift # hmmsearch, hmmpfam
|
|
93
157
|
hmmfile = ARGV.shift
|
|
94
158
|
seqfile = ARGV.shift
|
|
95
159
|
|
|
96
160
|
factory = Bio::HMMER.new(program, hmmfile, seqfile)
|
|
97
|
-
|
|
161
|
+
pp factory.query
|
|
98
162
|
|
|
99
163
|
end
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
=begin
|
|
103
|
-
|
|
104
|
-
= Bio::HMMER
|
|
105
|
-
|
|
106
|
-
--- Bio::HMMER.new(program, hmmfile, seqfile, option = '')
|
|
107
|
-
--- Bio::HMMER#program
|
|
108
|
-
--- Bio::HMMER#hmmfile
|
|
109
|
-
--- Bio::HMMER#seqfile
|
|
110
|
-
--- Bio::HMMER#options
|
|
111
|
-
|
|
112
|
-
Accessors for the factory.
|
|
113
|
-
|
|
114
|
-
--- Bio::HMMER#option
|
|
115
|
-
--- Bio::HMMER#option=(str)
|
|
116
|
-
|
|
117
|
-
Get/set options by string.
|
|
118
|
-
|
|
119
|
-
--- Bio::HMMER#query
|
|
120
|
-
|
|
121
|
-
Executes the hmmer search and returns Report object (Bio::HMMER::Report).
|
|
122
|
-
|
|
123
|
-
--- Bio::HMMER#output
|
|
124
|
-
|
|
125
|
-
Shows the raw output from hmmer search.
|
|
126
|
-
|
|
127
|
-
=end
|
|
128
|
-
|
|
129
|
-
|
|
@@ -1,8 +1,44 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer
|
|
2
|
+
# = bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer
|
|
3
3
|
#
|
|
4
|
-
# Copyright (C) 2002
|
|
5
|
-
#
|
|
4
|
+
# Copyright:: Copyright (C) 2002
|
|
5
|
+
# Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>,
|
|
6
|
+
# Copyright:: Copyright (C) 2005
|
|
7
|
+
# Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
|
|
8
|
+
# Lisence:: LGPL
|
|
9
|
+
#
|
|
10
|
+
# $Id: report.rb,v 1.10 2006/02/02 17:08:36 nakao Exp $
|
|
11
|
+
#
|
|
12
|
+
# == Description
|
|
13
|
+
#
|
|
14
|
+
# Parser class for hmmsearch and hmmpfam in the HMMER package.
|
|
15
|
+
#
|
|
16
|
+
# == Examples
|
|
17
|
+
#
|
|
18
|
+
# #for multiple reports in a single output file (example.hmmpfam)
|
|
19
|
+
# Bio::HMMER.reports(File.read("example.hmmpfam")) do |report|
|
|
20
|
+
# report.program['name']
|
|
21
|
+
# report.parameter['HMM file']
|
|
22
|
+
# report.query_info['Query sequence']
|
|
23
|
+
# report.hits.each do |hit|
|
|
24
|
+
# hit.accession
|
|
25
|
+
# hit.description
|
|
26
|
+
# hit.score
|
|
27
|
+
# hit.evalue
|
|
28
|
+
# hit.hsps.each do |hsp|
|
|
29
|
+
# hsp.accession
|
|
30
|
+
# hsp.domain
|
|
31
|
+
# hsp.evalue
|
|
32
|
+
# hsp.midline
|
|
33
|
+
# end
|
|
34
|
+
# end
|
|
35
|
+
#
|
|
36
|
+
# == References
|
|
37
|
+
#
|
|
38
|
+
# * HMMER
|
|
39
|
+
# http://hmmer.wustl.edu/
|
|
40
|
+
#
|
|
41
|
+
#--
|
|
6
42
|
#
|
|
7
43
|
# This library is free software; you can redistribute it and/or
|
|
8
44
|
# modify it under the terms of the GNU Lesser General Public
|
|
@@ -18,340 +54,534 @@
|
|
|
18
54
|
# License along with this library; if not, write to the Free Software
|
|
19
55
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
56
|
#
|
|
21
|
-
|
|
57
|
+
#++
|
|
22
58
|
#
|
|
23
59
|
|
|
24
60
|
require 'bio/appl/hmmer'
|
|
25
61
|
|
|
26
62
|
module Bio
|
|
27
|
-
class HMMER
|
|
28
63
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
64
|
+
|
|
65
|
+
class HMMER
|
|
66
|
+
|
|
67
|
+
# A reader interface for multiple reports text into a report
|
|
68
|
+
# (Bio::HMMER::Report).
|
|
69
|
+
#
|
|
70
|
+
# === Examples
|
|
71
|
+
#
|
|
72
|
+
# # Iterator
|
|
73
|
+
# Bio::HMMER.reports(reports_text) do |report|
|
|
74
|
+
# report
|
|
75
|
+
# end
|
|
76
|
+
#
|
|
77
|
+
# # Array
|
|
78
|
+
# reports = Bio::HMMER.reports(reports_text)
|
|
79
|
+
#
|
|
80
|
+
def self.reports(multiple_report_text)
|
|
81
|
+
ary = []
|
|
82
|
+
multiple_report_text.each("\n//\n") do |report|
|
|
83
|
+
if block_given?
|
|
84
|
+
yield Report.new(report)
|
|
85
|
+
else
|
|
86
|
+
ary << Report.new(report)
|
|
37
87
|
end
|
|
38
|
-
return ary
|
|
39
88
|
end
|
|
89
|
+
return ary
|
|
90
|
+
end
|
|
40
91
|
|
|
41
92
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
93
|
+
# A parser class for a search report by hmmsearch or hmmpfam program in the
|
|
94
|
+
# HMMER package.
|
|
95
|
+
#
|
|
96
|
+
# === Examples
|
|
97
|
+
#
|
|
98
|
+
# Examples
|
|
99
|
+
# #for multiple reports in a single output file (example.hmmpfam)
|
|
100
|
+
# Bio::HMMER.reports(File.read("example.hmmpfam")) do |report|
|
|
101
|
+
# report.program['name']
|
|
102
|
+
# report.parameter['HMM file']
|
|
103
|
+
# report.query_info['Query sequence']
|
|
104
|
+
# report.hits.each do |hit|
|
|
105
|
+
# hit.accession
|
|
106
|
+
# hit.description
|
|
107
|
+
# hit.score
|
|
108
|
+
# hit.evalue
|
|
109
|
+
# hit.hsps.each do |hsp|
|
|
110
|
+
# hsp.accession
|
|
111
|
+
# hsp.domain
|
|
112
|
+
# hsp.evalue
|
|
113
|
+
# hsp.midline
|
|
114
|
+
# end
|
|
115
|
+
# end
|
|
116
|
+
#
|
|
117
|
+
# === References
|
|
118
|
+
#
|
|
119
|
+
# * HMMER
|
|
120
|
+
# http://hmmer.wustl.edu/
|
|
121
|
+
#
|
|
122
|
+
class Report
|
|
123
|
+
|
|
124
|
+
# Delimiter of each entry for Bio::FlatFile support.
|
|
125
|
+
DELIMITER = RS = "\n//\n"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# A Hash contains program information used.
|
|
129
|
+
# Valid keys are 'name', 'version', 'copyright' and 'license'.
|
|
130
|
+
attr_reader :program
|
|
131
|
+
|
|
132
|
+
# A hash contains parameters used.
|
|
133
|
+
# Valid keys are 'HMM file' and 'Sequence file'.
|
|
134
|
+
attr_reader :parameter
|
|
135
|
+
|
|
136
|
+
# A hash contains the query information.
|
|
137
|
+
# Valid keys are 'query sequence', 'Accession' and 'Description'.
|
|
138
|
+
attr_reader :query_info
|
|
139
|
+
|
|
140
|
+
#
|
|
141
|
+
attr_reader :hits
|
|
142
|
+
|
|
143
|
+
# Returns an Array of Bio::HMMER::Report::Hsp objects.
|
|
144
|
+
# Under special circumstances, some HSPs do not have
|
|
145
|
+
# parent Hit objects. If you want to access such HSPs,
|
|
146
|
+
# use this method.
|
|
147
|
+
attr_reader :hsps
|
|
148
|
+
|
|
149
|
+
# statistics by hmmsearch.
|
|
150
|
+
attr_reader :histogram
|
|
151
|
+
|
|
152
|
+
# statistics by hmmsearch. Keys are 'mu', 'lambda', 'chi-sq statistic' and 'P(chi-square)'.
|
|
153
|
+
attr_reader :statistical_detail
|
|
154
|
+
|
|
155
|
+
# statistics by hmmsearch.
|
|
156
|
+
attr_reader :total_seq_searched
|
|
157
|
+
|
|
158
|
+
# statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
|
|
159
|
+
attr_reader :whole_seq_top_hits
|
|
160
|
+
|
|
161
|
+
# statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
|
|
162
|
+
attr_reader :domain_top_hits
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# Parses a HMMER search report (by hmmpfam or hmmsearch program) and
|
|
166
|
+
# reutrns a Bio::HMMER::Report object.
|
|
167
|
+
#
|
|
168
|
+
# === Examples
|
|
169
|
+
#
|
|
170
|
+
# hmmpfam_report = Bio::HMMER::Report.new(File.read("hmmpfam.out"))
|
|
171
|
+
#
|
|
172
|
+
# hmmsearch_report = Bio::HMMER::Report.new(File.read("hmmsearch.out"))
|
|
173
|
+
#
|
|
174
|
+
def initialize(data)
|
|
175
|
+
|
|
176
|
+
# The input data is divided into six data fields, i.e. header,
|
|
177
|
+
# query infomation, hits, HSPs, alignments and search statistics.
|
|
178
|
+
# However, header and statistics data don't necessarily exist.
|
|
179
|
+
subdata, is_hmmsearch = get_subdata(data)
|
|
180
|
+
|
|
181
|
+
# if header exists, parse it
|
|
182
|
+
if subdata["header"]
|
|
183
|
+
@program, @parameter = parse_header_data(subdata["header"])
|
|
184
|
+
else
|
|
185
|
+
@program, @parameter = [{}, {}]
|
|
186
|
+
end
|
|
47
187
|
|
|
48
|
-
|
|
188
|
+
@query_info = parse_query_info(subdata["query"])
|
|
189
|
+
@hits = parse_hit_data(subdata["hit"])
|
|
190
|
+
@hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch)
|
|
49
191
|
|
|
50
|
-
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
subdata, is_hmmsearch = get_subdata(data)
|
|
192
|
+
if @hsps != []
|
|
193
|
+
# split alignment subdata into an array of alignments
|
|
194
|
+
aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1)
|
|
54
195
|
|
|
55
|
-
#
|
|
56
|
-
|
|
57
|
-
@
|
|
58
|
-
else
|
|
59
|
-
@program, @parameter = [{}, {}]
|
|
196
|
+
# append alignment information to corresponding Hsp
|
|
197
|
+
aln_ary.each_with_index do |aln, i|
|
|
198
|
+
@hsps[i].set_alignment(aln)
|
|
60
199
|
end
|
|
200
|
+
end
|
|
61
201
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
# append alignment information to corresponding Hsp
|
|
71
|
-
aln_ary.each_with_index do |aln, i|
|
|
72
|
-
@hsps[i].set_alignment(aln)
|
|
73
|
-
end
|
|
202
|
+
# assign each Hsp object to its parent Hit
|
|
203
|
+
hits_hash = {}
|
|
204
|
+
@hits.each do |hit|
|
|
205
|
+
hits_hash[hit.accession] = hit
|
|
206
|
+
end
|
|
207
|
+
@hsps.each do |hsp|
|
|
208
|
+
if hits_hash.has_key?(hsp.accession)
|
|
209
|
+
hits_hash[hsp.accession].append_hsp(hsp)
|
|
74
210
|
end
|
|
211
|
+
end
|
|
75
212
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
@
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
end
|
|
85
|
-
end
|
|
213
|
+
# parse statistics (for hmmsearch)
|
|
214
|
+
if is_hmmsearch
|
|
215
|
+
@histogram, @statistical_detail, @total_seq_searched, \
|
|
216
|
+
@whole_seq_top_hits, @domain_top_hits = \
|
|
217
|
+
parse_stat_data(subdata["statistics"])
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
end
|
|
86
221
|
|
|
87
|
-
# parse statistics (for hmmsearch)
|
|
88
|
-
if is_hmmsearch
|
|
89
|
-
@histogram, @statistical_detail, @total_seq_searched, \
|
|
90
|
-
@whole_seq_top_hits, @domain_top_hits = \
|
|
91
|
-
parse_stat_data(subdata["statistics"])
|
|
92
|
-
end
|
|
93
222
|
|
|
223
|
+
# Iterates each hit (Bio::HMMER::Report::Hit).
|
|
224
|
+
def each
|
|
225
|
+
@hits.each do |hit|
|
|
226
|
+
yield hit
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
alias :each_hit :each
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# Bio::HMMER::Report#get_subdata
|
|
233
|
+
def get_subdata(data)
|
|
234
|
+
subdata = {}
|
|
235
|
+
header_prefix = '\Ahmm(search|pfam) - search'
|
|
236
|
+
query_prefix = '^Query (HMM|sequence): .*\nAccession: '
|
|
237
|
+
hit_prefix = '^Scores for (complete sequences|sequence family)'
|
|
238
|
+
hsp_prefix = '^Parsed for domains:'
|
|
239
|
+
aln_prefix = '^Alignments of top-scoring domains:\n'
|
|
240
|
+
stat_prefix = '^\nHistogram of all scores:'
|
|
241
|
+
|
|
242
|
+
# if header exists, get it
|
|
243
|
+
if data =~ /#{header_prefix}/
|
|
244
|
+
is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam
|
|
245
|
+
subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
|
|
246
|
+
else
|
|
247
|
+
is_hmmsearch = false # if no header, assumed to be hmmpfam
|
|
94
248
|
end
|
|
95
|
-
attr_reader :program, :parameter, :query_info, :hits, :hsps,
|
|
96
|
-
:histogram, :statistical_detail, :total_seq_searched,
|
|
97
|
-
:whole_seq_top_hits, :domain_top_hits
|
|
98
249
|
|
|
250
|
+
# get query, Hit and Hsp data
|
|
251
|
+
subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
|
|
252
|
+
subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m]
|
|
253
|
+
subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m]
|
|
254
|
+
|
|
255
|
+
# get alignment data
|
|
256
|
+
if is_hmmsearch
|
|
257
|
+
data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m
|
|
258
|
+
subdata["alignment"] = $1
|
|
259
|
+
else
|
|
260
|
+
data =~ /#{aln_prefix}(.+?)\/\/\n/m
|
|
261
|
+
subdata["alignment"] = $1
|
|
262
|
+
raise "multiple reports found" if $'.length > 0
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# handle -A option of HMMER
|
|
266
|
+
cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z'
|
|
267
|
+
subdata["alignment"].sub!(/#{cutoff_line}/, '')
|
|
268
|
+
|
|
269
|
+
# get statistics data
|
|
270
|
+
subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
|
|
99
271
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
272
|
+
[subdata, is_hmmsearch]
|
|
273
|
+
end
|
|
274
|
+
private :get_subdata
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
# Bio::HMMER::Report#parse_header_data
|
|
278
|
+
def parse_header_data(data)
|
|
279
|
+
data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m
|
|
280
|
+
program_data = $1
|
|
281
|
+
parameter_data = $2
|
|
282
|
+
|
|
283
|
+
program = {}
|
|
284
|
+
program['name'], program['version'], program['copyright'], \
|
|
285
|
+
program['license'] = program_data.split(/\n/)
|
|
286
|
+
|
|
287
|
+
parameter = {}
|
|
288
|
+
parameter_data.each do |x|
|
|
289
|
+
if /^(.+?):\s+(.*?)\s*$/ =~ x
|
|
290
|
+
parameter[$1] = $2
|
|
103
291
|
end
|
|
104
292
|
end
|
|
105
293
|
|
|
294
|
+
[program, parameter]
|
|
295
|
+
end
|
|
296
|
+
private :parse_header_data
|
|
106
297
|
|
|
107
|
-
# Bio::HMMER::Report::Hit
|
|
108
|
-
class Hit
|
|
109
|
-
def initialize(data)
|
|
110
|
-
@hsps = Array.new
|
|
111
|
-
if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ data
|
|
112
|
-
@accession, @description, @score, @evalue, @num =
|
|
113
|
-
[$1, $2, $3.to_f, $4.to_f, $5.to_i]
|
|
114
|
-
end
|
|
115
|
-
end
|
|
116
|
-
attr_reader :hsps, :accession, :description, :score, :evalue, :num
|
|
117
298
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
299
|
+
# Bio::HMMER::Report#parse_query_info
|
|
300
|
+
def parse_query_info(data)
|
|
301
|
+
hash = {}
|
|
302
|
+
data.each do |x|
|
|
303
|
+
if /^(.+?):\s+(.*?)\s*$/ =~ x
|
|
304
|
+
hash[$1] = $2
|
|
305
|
+
elsif /\s+\[(.+)\]/ =~ x
|
|
306
|
+
hash['comments'] = $1
|
|
122
307
|
end
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
308
|
+
end
|
|
309
|
+
hash
|
|
310
|
+
end
|
|
311
|
+
private :parse_query_info
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
# Bio::HMMER::Report#parse_hit_data
|
|
315
|
+
def parse_hit_data(data)
|
|
316
|
+
data.sub!(/.+?---\n/m, '').chop!
|
|
317
|
+
hits = []
|
|
318
|
+
return hits if data == "\t[no hits above thresholds]\n"
|
|
319
|
+
data.each do |l|
|
|
320
|
+
hits.push(Hit.new(l))
|
|
321
|
+
end
|
|
322
|
+
hits
|
|
323
|
+
end
|
|
324
|
+
private :parse_hit_data
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
# Bio::HMMER::Report#parse_hsp_data
|
|
328
|
+
def parse_hsp_data(data, is_hmmsearch)
|
|
329
|
+
data.sub!(/.+?---\n/m, '').chop!
|
|
330
|
+
hsps=[]
|
|
331
|
+
return hsps if data == "\t[no hits above thresholds]\n"
|
|
332
|
+
data.each do |l|
|
|
333
|
+
hsps.push(Hsp.new(l, is_hmmsearch))
|
|
334
|
+
end
|
|
335
|
+
return hsps
|
|
336
|
+
end
|
|
337
|
+
private :parse_hsp_data
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# Bio::HMMER::Report#parse_stat_data
|
|
341
|
+
def parse_stat_data(data)
|
|
342
|
+
data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '')
|
|
343
|
+
histogram = $1.strip
|
|
344
|
+
|
|
345
|
+
statistical_detail = {}
|
|
346
|
+
data.sub!(/(.+?)\n\n/m, '')
|
|
347
|
+
$1.each do |l|
|
|
348
|
+
statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
total_seq_searched = nil
|
|
352
|
+
data.sub!(/(.+?)\n\n/m, '')
|
|
353
|
+
$1.each do |l|
|
|
354
|
+
total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
whole_seq_top_hits = {}
|
|
358
|
+
data.sub!(/(.+?)\n\n/m, '')
|
|
359
|
+
$1.each do |l|
|
|
360
|
+
if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
|
|
361
|
+
whole_seq_top_hits[$1] = $2.to_i
|
|
362
|
+
elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
|
|
363
|
+
whole_seq_top_hits[$1] = $2
|
|
136
364
|
end
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
domain_top_hits = {}
|
|
368
|
+
data.each do |l|
|
|
369
|
+
if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
|
|
370
|
+
domain_top_hits[$1] = $2.to_i
|
|
371
|
+
elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
|
|
372
|
+
domain_top_hits[$1] = $2
|
|
140
373
|
end
|
|
141
|
-
|
|
142
374
|
end
|
|
143
375
|
|
|
376
|
+
[histogram, statistical_detail, total_seq_searched, \
|
|
377
|
+
whole_seq_top_hits, domain_top_hits]
|
|
378
|
+
end
|
|
379
|
+
private :parse_stat_data
|
|
144
380
|
|
|
145
|
-
# Bio::HMMER::Report::Hsp
|
|
146
|
-
class Hsp
|
|
147
|
-
def initialize(data, is_hmmsearch)
|
|
148
|
-
@is_hmmsearch = is_hmmsearch
|
|
149
|
-
|
|
150
|
-
@accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,
|
|
151
|
-
score, evalue = data.split(' ')
|
|
152
|
-
@seq_f = seq_f.to_i
|
|
153
|
-
@seq_t = seq_t.to_i
|
|
154
|
-
@hmm_f = hmm_f.to_i
|
|
155
|
-
@hmm_t = hmm_t.to_i
|
|
156
|
-
@score = score.to_f
|
|
157
|
-
@evalue = evalue.to_f
|
|
158
|
-
@hmmseq = ''
|
|
159
|
-
@flatseq = ''
|
|
160
|
-
@midline = ''
|
|
161
|
-
@query_frame = 1
|
|
162
|
-
@target_frame = 1
|
|
163
|
-
# CS and RF lines are rarely used.
|
|
164
|
-
@csline = nil
|
|
165
|
-
@rfline = nil
|
|
166
|
-
end
|
|
167
|
-
attr_reader :accession, :domain, :seq_f, :seq_t, :seq_ft,
|
|
168
|
-
:hmm_f, :hmm_t, :hmm_ft, :score, :evalue, :midline, :hmmseq,
|
|
169
|
-
:flatseq, :query_frame, :target_frame, :csline, :rfline
|
|
170
|
-
|
|
171
|
-
def set_alignment(aln)
|
|
172
|
-
# First, split the input alignment into an array of
|
|
173
|
-
# "alignment blocks." One block usually has three lines,
|
|
174
|
-
# i.e. hmmseq, midline and flatseq.
|
|
175
|
-
# However, although infrequent, it can contain CS or RF lines.
|
|
176
|
-
aln.split(/ (?:\d+|-)\s*\n\n/).each do |blk|
|
|
177
|
-
lines = blk.split(/\n/)
|
|
178
|
-
cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil
|
|
179
|
-
rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil
|
|
180
|
-
aln_width = lines[0][/\S+/].length
|
|
181
|
-
@csline = @csline.to_s + cstmp[19, aln_width] if cstmp
|
|
182
|
-
@rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp
|
|
183
|
-
@hmmseq += lines[0][19, aln_width]
|
|
184
|
-
@midline += lines[1][19, aln_width]
|
|
185
|
-
@flatseq += lines[2][19, aln_width]
|
|
186
|
-
end
|
|
187
|
-
@csline = @csline[3...-3] if @csline
|
|
188
|
-
@rfline = @rfline[3...-3] if @rfline
|
|
189
|
-
@hmmseq = @hmmseq[3...-3]
|
|
190
|
-
@midline = @midline[3...-3]
|
|
191
|
-
@flatseq = @flatseq[3...-3]
|
|
192
|
-
end
|
|
193
381
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
def query_to; @is_hmmsearch ? @hmm_t : @seq_t; end
|
|
382
|
+
# Container class for HMMER search hits.
|
|
383
|
+
class Hit
|
|
384
|
+
|
|
385
|
+
# An Array of Bio::HMMER::Report::Hsp objects.
|
|
386
|
+
attr_reader :hsps
|
|
200
387
|
|
|
201
|
-
|
|
202
|
-
|
|
388
|
+
#
|
|
389
|
+
attr_reader :accession
|
|
390
|
+
alias target_id accession
|
|
391
|
+
alias hit_id accession
|
|
392
|
+
alias entry_id accession
|
|
393
|
+
|
|
394
|
+
#
|
|
395
|
+
attr_reader :description
|
|
396
|
+
alias definition description
|
|
397
|
+
|
|
398
|
+
# Matching scores (total of all HSPs).
|
|
399
|
+
attr_reader :score
|
|
400
|
+
alias bit_score score
|
|
401
|
+
|
|
402
|
+
# E-value
|
|
403
|
+
attr_reader :evalue
|
|
203
404
|
|
|
405
|
+
# Number of domains
|
|
406
|
+
attr_reader :num
|
|
407
|
+
|
|
408
|
+
# Sets hit data.
|
|
409
|
+
def initialize(hit_data)
|
|
410
|
+
@hsps = Array.new
|
|
411
|
+
if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ hit_data
|
|
412
|
+
@accession, @description, @score, @evalue, @num = \
|
|
413
|
+
[$1, $2, $3.to_f, $4.to_f, $5.to_i]
|
|
414
|
+
end
|
|
204
415
|
end
|
|
205
416
|
|
|
206
417
|
|
|
207
|
-
# Bio::HMMER::Report
|
|
208
|
-
def
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
query_prefix = '^Query (HMM|sequence): .*\nAccession: '
|
|
212
|
-
hit_prefix = '^Scores for (complete sequences|sequence family)'
|
|
213
|
-
hsp_prefix = '^Parsed for domains:'
|
|
214
|
-
aln_prefix = '^Alignments of top-scoring domains:\n'
|
|
215
|
-
stat_prefix = '^\nHistogram of all scores:'
|
|
216
|
-
|
|
217
|
-
# if header exists, get it
|
|
218
|
-
if data =~ /#{header_prefix}/
|
|
219
|
-
is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam
|
|
220
|
-
subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
|
|
221
|
-
else
|
|
222
|
-
is_hmmsearch = false # if no header, assumed to be hmmpfam
|
|
418
|
+
# Iterates on each Hsp object (Bio::HMMER::Report::Hsp).
|
|
419
|
+
def each
|
|
420
|
+
@hsps.each do |hsp|
|
|
421
|
+
yield hsp
|
|
223
422
|
end
|
|
423
|
+
end
|
|
424
|
+
alias :each_hsp :each
|
|
224
425
|
|
|
225
|
-
# get query, Hit and Hsp data
|
|
226
|
-
subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
|
|
227
|
-
subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m]
|
|
228
|
-
subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m]
|
|
229
426
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
427
|
+
# Shows the hit description.
|
|
428
|
+
def target_def
|
|
429
|
+
if @hsps.size == 1
|
|
430
|
+
"<#{@hsps[0].domain}> #{@description}"
|
|
234
431
|
else
|
|
235
|
-
|
|
236
|
-
subdata["alignment"] = $1
|
|
237
|
-
raise "multiple reports found" if $'.length > 0
|
|
432
|
+
"<#{@num.to_s}> #{@description}"
|
|
238
433
|
end
|
|
434
|
+
end
|
|
239
435
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
436
|
+
# Appends a Bio::HMMER::Report::Hsp object.
|
|
437
|
+
def append_hsp(hsp)
|
|
438
|
+
@hsps << hsp
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
end # class Hit
|
|
243
442
|
|
|
244
|
-
# get statistics data
|
|
245
|
-
subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
|
|
246
443
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
private :get_subdata
|
|
444
|
+
# Container class for HMMER search hsps.
|
|
445
|
+
class Hsp
|
|
250
446
|
|
|
251
|
-
#
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
447
|
+
#
|
|
448
|
+
attr_reader :accession
|
|
449
|
+
alias target_id accession
|
|
450
|
+
|
|
451
|
+
#
|
|
452
|
+
attr_reader :domain
|
|
453
|
+
|
|
454
|
+
#
|
|
455
|
+
attr_reader :seq_f
|
|
456
|
+
|
|
457
|
+
#
|
|
458
|
+
attr_reader :seq_t
|
|
459
|
+
|
|
460
|
+
#
|
|
461
|
+
attr_reader :seq_ft
|
|
462
|
+
|
|
463
|
+
#
|
|
464
|
+
attr_reader :hmm_f
|
|
465
|
+
|
|
466
|
+
#
|
|
467
|
+
attr_reader :hmm_t
|
|
468
|
+
|
|
469
|
+
#
|
|
470
|
+
attr_reader :hmm_ft
|
|
471
|
+
|
|
472
|
+
# Score
|
|
473
|
+
attr_reader :score
|
|
474
|
+
alias bit_score score
|
|
475
|
+
|
|
476
|
+
# E-value
|
|
477
|
+
attr_reader :evalue
|
|
478
|
+
|
|
479
|
+
# Alignment midline
|
|
480
|
+
attr_reader :midline
|
|
481
|
+
|
|
482
|
+
#
|
|
483
|
+
attr_reader :hmmseq
|
|
484
|
+
|
|
485
|
+
#
|
|
486
|
+
attr_reader :flatseq
|
|
487
|
+
|
|
488
|
+
#
|
|
489
|
+
attr_reader :query_frame
|
|
490
|
+
|
|
491
|
+
#
|
|
492
|
+
attr_reader :target_frame
|
|
267
493
|
|
|
268
|
-
|
|
494
|
+
# CS Line
|
|
495
|
+
attr_reader :csline
|
|
496
|
+
|
|
497
|
+
# RF Line
|
|
498
|
+
attr_reader :rfline
|
|
499
|
+
|
|
500
|
+
# Sets hsps.
|
|
501
|
+
def initialize(hsp_data, is_hmmsearch)
|
|
502
|
+
@is_hmmsearch = is_hmmsearch
|
|
503
|
+
|
|
504
|
+
@accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,\
|
|
505
|
+
score, evalue = hsp_data.split(' ')
|
|
506
|
+
@seq_f = seq_f.to_i
|
|
507
|
+
@seq_t = seq_t.to_i
|
|
508
|
+
@hmm_f = hmm_f.to_i
|
|
509
|
+
@hmm_t = hmm_t.to_i
|
|
510
|
+
@score = score.to_f
|
|
511
|
+
@evalue = evalue.to_f
|
|
512
|
+
@hmmseq = ''
|
|
513
|
+
@flatseq = ''
|
|
514
|
+
@midline = ''
|
|
515
|
+
@query_frame = 1
|
|
516
|
+
@target_frame = 1
|
|
517
|
+
# CS and RF lines are rarely used.
|
|
518
|
+
@csline = nil
|
|
519
|
+
@rfline = nil
|
|
269
520
|
end
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
521
|
+
|
|
522
|
+
#
|
|
523
|
+
def set_alignment(alignment)
|
|
524
|
+
# First, split the input alignment into an array of
|
|
525
|
+
# "alignment blocks." One block usually has three lines,
|
|
526
|
+
# i.e. hmmseq, midline and flatseq.
|
|
527
|
+
# However, although infrequent, it can contain CS or RF lines.
|
|
528
|
+
alignment.split(/ (?:\d+|-)\s*\n\n/).each do |blk|
|
|
529
|
+
lines = blk.split(/\n/)
|
|
530
|
+
cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil
|
|
531
|
+
rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil
|
|
532
|
+
aln_width = lines[0][/\S+/].length
|
|
533
|
+
@csline = @csline.to_s + cstmp[19, aln_width] if cstmp
|
|
534
|
+
@rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp
|
|
535
|
+
@hmmseq += lines[0][19, aln_width]
|
|
536
|
+
@midline += lines[1][19, aln_width]
|
|
537
|
+
@flatseq += lines[2][19, aln_width]
|
|
281
538
|
end
|
|
282
|
-
|
|
539
|
+
@csline = @csline[3...-3] if @csline
|
|
540
|
+
@rfline = @rfline[3...-3] if @rfline
|
|
541
|
+
@hmmseq = @hmmseq[3...-3]
|
|
542
|
+
@midline = @midline[3...-3]
|
|
543
|
+
@flatseq = @flatseq[3...-3]
|
|
283
544
|
end
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
#
|
|
287
|
-
def
|
|
288
|
-
|
|
289
|
-
hits = []
|
|
290
|
-
return hits if data == "\t[no hits above thresholds]\n"
|
|
291
|
-
data.each do |l|
|
|
292
|
-
hits.push(Hit.new(l))
|
|
293
|
-
end
|
|
294
|
-
hits
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
#
|
|
548
|
+
def query_seq
|
|
549
|
+
@is_hmmsearch ? @hmmseq : @flatseq
|
|
295
550
|
end
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
end
|
|
306
|
-
return hsps
|
|
551
|
+
|
|
552
|
+
#
|
|
553
|
+
def target_seq
|
|
554
|
+
@is_hmmsearch ? @flatseq : @hmmseq
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
#
|
|
558
|
+
def target_from
|
|
559
|
+
@is_hmmsearch ? @seq_f : @hmm_f
|
|
307
560
|
end
|
|
308
|
-
private :parse_hsp_data
|
|
309
561
|
|
|
310
|
-
#
|
|
311
|
-
def
|
|
312
|
-
|
|
313
|
-
|
|
562
|
+
#
|
|
563
|
+
def target_to
|
|
564
|
+
@is_hmmsearch ? @seq_t : @hmm_t
|
|
565
|
+
end
|
|
314
566
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
end
|
|
320
|
-
|
|
321
|
-
total_seq_searched = nil
|
|
322
|
-
data.sub!(/(.+?)\n\n/m, '')
|
|
323
|
-
$1.each do |l|
|
|
324
|
-
total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
|
|
325
|
-
end
|
|
326
|
-
|
|
327
|
-
whole_seq_top_hits = {}
|
|
328
|
-
data.sub!(/(.+?)\n\n/m, '')
|
|
329
|
-
$1.each do |l|
|
|
330
|
-
if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
|
|
331
|
-
whole_seq_top_hits[$1] = $2.to_i
|
|
332
|
-
elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
|
|
333
|
-
whole_seq_top_hits[$1] = $2
|
|
334
|
-
end
|
|
335
|
-
end
|
|
336
|
-
|
|
337
|
-
domain_top_hits = {}
|
|
338
|
-
data.each do |l|
|
|
339
|
-
if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
|
|
340
|
-
domain_top_hits[$1] = $2.to_i
|
|
341
|
-
elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
|
|
342
|
-
domain_top_hits[$1] = $2
|
|
343
|
-
end
|
|
344
|
-
end
|
|
567
|
+
#
|
|
568
|
+
def query_from
|
|
569
|
+
@is_hmmsearch ? @hmm_f : @seq_f
|
|
570
|
+
end
|
|
345
571
|
|
|
346
|
-
|
|
347
|
-
|
|
572
|
+
#
|
|
573
|
+
def query_to
|
|
574
|
+
@is_hmmsearch ? @hmm_t : @seq_t
|
|
348
575
|
end
|
|
349
|
-
|
|
576
|
+
|
|
350
577
|
|
|
351
|
-
end
|
|
578
|
+
end # class Hsp
|
|
352
579
|
|
|
353
|
-
end
|
|
354
|
-
|
|
580
|
+
end # class Report
|
|
581
|
+
|
|
582
|
+
end # class HMMER
|
|
583
|
+
|
|
584
|
+
end # module Bio
|
|
355
585
|
|
|
356
586
|
|
|
357
587
|
if __FILE__ == $0
|
|
@@ -463,94 +693,9 @@ if __FILE__ == $0
|
|
|
463
693
|
p hsp.query_from # hmm_f, seq_f
|
|
464
694
|
print "query_to : ".rjust(indent)
|
|
465
695
|
p hsp.query_to # hmm_t, seq_t
|
|
466
|
-
end
|
|
696
|
+
end
|
|
467
697
|
end
|
|
468
698
|
|
|
469
|
-
end
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
=begin
|
|
473
|
-
|
|
474
|
-
= Bio::HMMER::Report
|
|
475
|
-
|
|
476
|
-
--- Bio::HMMER::Report.new(data)
|
|
477
|
-
--- Bio::HMMER::Report#each
|
|
478
|
-
|
|
479
|
-
Iterates on each Bio::HMMER::Report::Hit object.
|
|
480
|
-
|
|
481
|
-
--- Bio::HMMER::Report#hits
|
|
482
|
-
|
|
483
|
-
Returns an Array of Bio::HMMER::Report::Hit objects.
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
== Bio::HMMER::Report::Hit
|
|
487
|
-
|
|
488
|
-
--- Bio::HMMER::Report::Hit#each
|
|
489
|
-
|
|
490
|
-
Iterates on each Hsp object.
|
|
699
|
+
end
|
|
491
700
|
|
|
492
|
-
--- Bio::HMMER::Report::Hit#hsps
|
|
493
|
-
|
|
494
|
-
Returns an Array of Bio::HMMER::Report::Hsp objects.
|
|
495
|
-
|
|
496
|
-
--- Bio::HMMER::Report::Hit#target_id
|
|
497
|
-
--- Bio::HMMER::Report::Hit#hit_id
|
|
498
|
-
--- Bio::HMMER::Report::Hit#entry_id
|
|
499
|
-
--- Bio::HMMER::Report::Hit#definition
|
|
500
|
-
--- Bio::HMMER::Report::Hit#description
|
|
501
|
-
--- Bio::HMMER::Report::Hit#num
|
|
502
|
-
|
|
503
|
-
nunmer of domains
|
|
504
|
-
|
|
505
|
-
--- Bio::HMMER::Report::Hit#target_def
|
|
506
|
-
|
|
507
|
-
<domain number> + @description
|
|
508
|
-
|
|
509
|
-
--- Bio::HMMER::Report::Hit#evalue
|
|
510
|
-
--- Bio::HMMER::Report::Hit#bit_score
|
|
511
|
-
--- Bio::HMMER::Report::Hit#score
|
|
512
|
-
|
|
513
|
-
Matching scores (total of all HSPs).
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
== Bio::HMMER::Report::Hsp
|
|
517
|
-
|
|
518
|
-
--- Bio::HMMER::Report#hsps
|
|
519
|
-
|
|
520
|
-
Returns an Array of Bio::HMMER::Report::Hsp objects.
|
|
521
|
-
Under special circumstances, some HSPs do not have
|
|
522
|
-
parent Hit objects. If you want to access such HSPs,
|
|
523
|
-
use this method.
|
|
524
|
-
|
|
525
|
-
--- Bio::HMMER::Report::Hsp#target_id
|
|
526
|
-
--- Bio::HMMER::Report::Hsp#accession
|
|
527
|
-
--- Bio::HMMER::Report::Hsp#domain
|
|
528
|
-
--- Bio::HMMER::Report::Hsp#seq_f
|
|
529
|
-
--- Bio::HMMER::Report::Hsp#seq_t
|
|
530
|
-
--- Bio::HMMER::Report::Hsp#seq_ft
|
|
531
|
-
--- Bio::HMMER::Report::Hsp#hmm_f
|
|
532
|
-
--- Bio::HMMER::Report::Hsp#hmm_t
|
|
533
|
-
--- Bio::HMMER::Report::Hsp#hmm_ft
|
|
534
|
-
|
|
535
|
-
--- Bio::HMMER::Report::Hsp#bit_score
|
|
536
|
-
--- Bio::HMMER::Report::Hsp#score
|
|
537
|
-
--- Bio::HMMER::Report::Hsp#evalue
|
|
538
|
-
|
|
539
|
-
--- Bio::HMMER::Report::Hsp#midline
|
|
540
|
-
--- Bio::HMMER::Report::Hsp#hmmseq
|
|
541
|
-
--- Bio::HMMER::Report::Hsp#flatseq
|
|
542
|
-
--- Bio::HMMER::Report::Hsp#query_frame
|
|
543
|
-
--- Bio::HMMER::Report::Hsp#target_frame
|
|
544
|
-
|
|
545
|
-
--- Bio::HMMER::Report::Hsp#query_seq
|
|
546
|
-
--- Bio::HMMER::Report::Hsp#query_from
|
|
547
|
-
--- Bio::HMMER::Report::Hsp#query_to
|
|
548
|
-
--- Bio::HMMER::Report::Hsp#target_seq
|
|
549
|
-
--- Bio::HMMER::Report::Hsp#target_from
|
|
550
|
-
--- Bio::HMMER::Report::Hsp#target_to
|
|
551
|
-
|
|
552
|
-
--- Bio::HMMER::Report::Hsp#csline
|
|
553
|
-
--- Bio::HMMER::Report::Hsp#rfline
|
|
554
|
-
|
|
555
|
-
=end
|
|
556
701
|
|