bio 0.7.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. data/bin/bioruby +71 -27
  2. data/bin/br_biofetch.rb +5 -17
  3. data/bin/br_bioflat.rb +14 -26
  4. data/bin/br_biogetseq.rb +6 -18
  5. data/bin/br_pmfetch.rb +6 -16
  6. data/doc/Changes-0.7.rd +35 -0
  7. data/doc/KEGG_API.rd +287 -172
  8. data/doc/KEGG_API.rd.ja +273 -160
  9. data/doc/Tutorial.rd +18 -9
  10. data/doc/Tutorial.rd.ja +656 -138
  11. data/lib/bio.rb +6 -24
  12. data/lib/bio/alignment.rb +5 -5
  13. data/lib/bio/appl/blast.rb +132 -98
  14. data/lib/bio/appl/blast/format0.rb +9 -19
  15. data/lib/bio/appl/blast/wublast.rb +5 -18
  16. data/lib/bio/appl/emboss.rb +40 -47
  17. data/lib/bio/appl/hmmer.rb +116 -82
  18. data/lib/bio/appl/hmmer/report.rb +509 -364
  19. data/lib/bio/appl/spidey/report.rb +7 -18
  20. data/lib/bio/data/na.rb +3 -21
  21. data/lib/bio/db.rb +3 -21
  22. data/lib/bio/db/aaindex.rb +147 -52
  23. data/lib/bio/db/embl/common.rb +27 -6
  24. data/lib/bio/db/embl/embl.rb +18 -10
  25. data/lib/bio/db/embl/sptr.rb +87 -67
  26. data/lib/bio/db/embl/swissprot.rb +32 -3
  27. data/lib/bio/db/embl/trembl.rb +32 -3
  28. data/lib/bio/db/embl/uniprot.rb +32 -3
  29. data/lib/bio/db/fasta.rb +327 -289
  30. data/lib/bio/db/medline.rb +25 -4
  31. data/lib/bio/db/nbrf.rb +12 -20
  32. data/lib/bio/db/pdb.rb +4 -1
  33. data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
  34. data/lib/bio/db/pdb/pdb.rb +13 -8
  35. data/lib/bio/db/rebase.rb +93 -97
  36. data/lib/bio/feature.rb +2 -31
  37. data/lib/bio/io/ddbjxml.rb +167 -139
  38. data/lib/bio/io/fastacmd.rb +89 -56
  39. data/lib/bio/io/flatfile.rb +994 -278
  40. data/lib/bio/io/flatfile/index.rb +257 -194
  41. data/lib/bio/io/flatfile/indexer.rb +37 -29
  42. data/lib/bio/reference.rb +147 -64
  43. data/lib/bio/sequence.rb +57 -417
  44. data/lib/bio/sequence/aa.rb +64 -0
  45. data/lib/bio/sequence/common.rb +175 -0
  46. data/lib/bio/sequence/compat.rb +68 -0
  47. data/lib/bio/sequence/format.rb +134 -0
  48. data/lib/bio/sequence/generic.rb +24 -0
  49. data/lib/bio/sequence/na.rb +189 -0
  50. data/lib/bio/shell.rb +9 -23
  51. data/lib/bio/shell/core.rb +130 -125
  52. data/lib/bio/shell/demo.rb +143 -0
  53. data/lib/bio/shell/{session.rb → interface.rb} +42 -40
  54. data/lib/bio/shell/object.rb +52 -0
  55. data/lib/bio/shell/plugin/codon.rb +4 -22
  56. data/lib/bio/shell/plugin/emboss.rb +23 -0
  57. data/lib/bio/shell/plugin/entry.rb +34 -25
  58. data/lib/bio/shell/plugin/flatfile.rb +5 -23
  59. data/lib/bio/shell/plugin/keggapi.rb +11 -24
  60. data/lib/bio/shell/plugin/midi.rb +5 -23
  61. data/lib/bio/shell/plugin/obda.rb +4 -22
  62. data/lib/bio/shell/plugin/seq.rb +6 -24
  63. data/lib/bio/shell/rails/Rakefile +10 -0
  64. data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
  65. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
  66. data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
  67. data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
  68. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
  69. data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
  70. data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
  71. data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
  72. data/lib/bio/shell/rails/config/boot.rb +19 -0
  73. data/lib/bio/shell/rails/config/database.yml +85 -0
  74. data/lib/bio/shell/rails/config/environment.rb +53 -0
  75. data/lib/bio/shell/rails/config/environments/development.rb +19 -0
  76. data/lib/bio/shell/rails/config/environments/production.rb +19 -0
  77. data/lib/bio/shell/rails/config/environments/test.rb +19 -0
  78. data/lib/bio/shell/rails/config/routes.rb +19 -0
  79. data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
  80. data/lib/bio/shell/rails/public/404.html +8 -0
  81. data/lib/bio/shell/rails/public/500.html +8 -0
  82. data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
  83. data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
  84. data/lib/bio/shell/rails/public/dispatch.rb +10 -0
  85. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  86. data/lib/bio/shell/rails/public/images/icon.png +0 -0
  87. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  88. data/lib/bio/shell/rails/public/index.html +277 -0
  89. data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
  90. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
  91. data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
  92. data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
  93. data/lib/bio/shell/rails/public/robots.txt +1 -0
  94. data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
  95. data/lib/bio/shell/rails/script/about +3 -0
  96. data/lib/bio/shell/rails/script/breakpointer +3 -0
  97. data/lib/bio/shell/rails/script/console +3 -0
  98. data/lib/bio/shell/rails/script/destroy +3 -0
  99. data/lib/bio/shell/rails/script/generate +3 -0
  100. data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
  101. data/lib/bio/shell/rails/script/performance/profiler +3 -0
  102. data/lib/bio/shell/rails/script/plugin +3 -0
  103. data/lib/bio/shell/rails/script/process/reaper +3 -0
  104. data/lib/bio/shell/rails/script/process/spawner +3 -0
  105. data/lib/bio/shell/rails/script/process/spinner +3 -0
  106. data/lib/bio/shell/rails/script/runner +3 -0
  107. data/lib/bio/shell/rails/script/server +42 -0
  108. data/lib/bio/shell/rails/test/test_helper.rb +28 -0
  109. data/lib/bio/shell/web.rb +90 -0
  110. data/lib/bio/util/contingency_table.rb +231 -225
  111. data/sample/any2fasta.rb +59 -0
  112. data/test/data/HMMER/hmmpfam.out +64 -0
  113. data/test/data/HMMER/hmmsearch.out +88 -0
  114. data/test/data/aaindex/DAYM780301 +30 -0
  115. data/test/data/aaindex/PRAM900102 +20 -0
  116. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  117. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  118. data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
  119. data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
  120. data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
  121. data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
  122. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  123. data/test/unit/bio/appl/blast/test_report.rb +15 -12
  124. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
  125. data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
  126. data/test/unit/bio/appl/test_blast.rb +5 -5
  127. data/test/unit/bio/data/test_na.rb +9 -18
  128. data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
  129. data/test/unit/bio/db/test_aaindex.rb +197 -0
  130. data/test/unit/bio/io/test_fastacmd.rb +55 -0
  131. data/test/unit/bio/sequence/test_aa.rb +102 -0
  132. data/test/unit/bio/sequence/test_common.rb +178 -0
  133. data/test/unit/bio/sequence/test_compat.rb +82 -0
  134. data/test/unit/bio/sequence/test_na.rb +242 -0
  135. data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
  136. data/test/unit/bio/test_alignment.rb +15 -7
  137. data/test/unit/bio/test_reference.rb +198 -0
  138. data/test/unit/bio/test_sequence.rb +4 -49
  139. data/test/unit/bio/test_shell.rb +2 -2
  140. metadata +118 -15
  141. data/lib/bio/io/brdb.rb +0 -103
  142. data/lib/bioruby.rb +0 -34
@@ -1,68 +1,61 @@
1
1
  #
2
- # bio/appl/emboss.rb - EMBOSS wrapper
2
+ # = bio/appl/emboss.rb - EMBOSS wrapper
3
3
  #
4
- # Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2002, 2005
5
+ # KATAYAMA Toshiaki <k@bioruby.org>
6
+ # License:: Ruby's
5
7
  #
6
- # This library is free software; you can redistribute it and/or
7
- # modify it under the terms of the GNU Lesser General Public
8
- # License as published by the Free Software Foundation; either
9
- # version 2 of the License, or (at your option) any later version.
8
+ # $Id: emboss.rb,v 1.4 2006/02/27 09:14:30 k Exp $
10
9
  #
11
- # This library is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- # Lesser General Public License for more details.
10
+ # == References
15
11
  #
16
- # You should have received a copy of the GNU Lesser General Public
17
- # License along with this library; if not, write to the Free Software
18
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
- #
20
- # $Id: emboss.rb,v 1.2 2005/09/08 01:22:08 k Exp $
12
+ # * http://www.emboss.org
21
13
  #
22
14
 
23
15
  module Bio
24
16
 
25
- class EMBOSS
17
+ autoload :Command, 'bio/command'
26
18
 
27
- def initialize(cmd_line)
28
- @cmd_line = cmd_line + ' -stdout'
29
- end
19
+ class EMBOSS
30
20
 
31
- def exec
32
- begin
33
- @io = IO.popen(@cmd_line, "w+")
34
- @result = @io.read
35
- return @result
36
- ensure
37
- @io.close
38
- end
39
- end
40
- attr_reader :io, :result
21
+ extend Bio::Command::Tools
41
22
 
23
+ def self.seqret(arg)
24
+ str = self.retrieve('seqret', arg)
42
25
  end
43
26
 
44
- end
45
-
46
- =begin
47
-
48
- = Bio::EMBOSS
49
-
50
- EMBOSS wrapper.
27
+ def self.entret(arg)
28
+ str = self.retrieve('entret', arg)
29
+ end
51
30
 
52
- #!/usr/bin/env ruby
53
- require 'bio'
31
+ def initialize(cmd_line)
32
+ @cmd_line = cmd_line + ' -stdout -auto'
33
+ end
54
34
 
55
- emboss = Bio::EMBOSS.new("getorf -sequence ~/xlrhodop -outseq stdout")
56
- puts emboss.exec
35
+ def exec
36
+ begin
37
+ @io = IO.popen(@cmd_line, "w+")
38
+ @result = @io.read
39
+ return @result
40
+ ensure
41
+ @io.close
42
+ end
43
+ end
44
+ attr_reader :io, :result
57
45
 
58
- --- Bio::EMBOSS.new(command_line)
46
+ private
59
47
 
60
- --- Bio::EMBOSS#exec
61
- --- Bio::EMBOSS#io
62
- --- Bio::EMBOSS#result
48
+ def self.retrieve(cmd, arg)
49
+ cmd = [ cmd, arg, '-auto', '-stdout' ]
50
+ str = ''
51
+ call_command_local(cmd) do |inn, out|
52
+ inn.close_write
53
+ str = out.read
54
+ end
55
+ return str
56
+ end
63
57
 
64
- === SEE ALSO
58
+ end # EMBOSS
65
59
 
66
- * http://www.emboss.org
60
+ end # Bio
67
61
 
68
- =end
@@ -1,7 +1,32 @@
1
1
  #
2
- # bio/appl/hmmer.rb - HMMER wrapper
2
+ # = bio/appl/hmmer.rb - HMMER wrapper
3
3
  #
4
- # Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2002
5
+ # KATAYAMA Toshiaki <k@bioruby.org>
6
+ # Lisence:: LGPL
7
+ #
8
+ # $Id: hmmer.rb,v 1.5 2006/02/02 17:08:36 nakao Exp $
9
+ #
10
+ # == Description
11
+ #
12
+ # A wrapper for the HMMER programs (hmmsearch or hmmpfam).
13
+ #
14
+ # == Examples
15
+ #
16
+ # require 'bio'
17
+ # program = 'hmmsearch' # or 'hmmpfam'
18
+ # hmmfile = 'test.hmm'
19
+ # seqfile = 'test.faa'
20
+ #
21
+ # factory = Bio::HMMER.new(program, hmmfile, seqfile)
22
+ # p factory.query
23
+ #
24
+ # == References
25
+ #
26
+ # * HMMER
27
+ # http://hmmer.wustl.edu/
28
+ #
29
+ #--
5
30
  #
6
31
  # This library is free software; you can redistribute it and/or
7
32
  # modify it under the terms of the GNU Lesser General Public
@@ -17,7 +42,7 @@
17
42
  # License along with this library; if not, write to the Free Software
18
43
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
44
  #
20
- # $Id: hmmer.rb,v 1.4 2005/09/26 13:00:04 k Exp $
45
+ #++
21
46
  #
22
47
 
23
48
  require 'bio/command'
@@ -25,105 +50,114 @@ require 'shellwords'
25
50
 
26
51
  module Bio
27
52
 
28
- class HMMER
29
-
30
- autoload :Report, 'bio/appl/hmmer/report'
53
+ # A wapper for HMMER programs (hmmsearch or hmmpfam).
54
+ #
55
+ # === Examples
56
+ #
57
+ # require 'bio'
58
+ # program = 'hmmsearch' # or 'hmmpfam'
59
+ # hmmfile = 'test.hmm'
60
+ # seqfile = 'test.faa'
61
+ #
62
+ # factory = Bio::HMMER.new(program, hmmfile, seqfile)
63
+ # report = factory.query
64
+ # report.class #=> Bio::HMMER::Report
65
+ #
66
+ # === References
67
+ #
68
+ # * HMMER
69
+ # http://hmmer.wustl.edu/
70
+ #
71
+ class HMMER
72
+
73
+ autoload :Report, 'bio/appl/hmmer/report'
74
+
75
+ include Bio::Command::Tools
76
+
77
+ # Prgrams name. (hmmsearch or hmmpfam).
78
+ attr_accessor :program
79
+
80
+ # Name of hmmfile.
81
+ attr_accessor :hmmfile
82
+
83
+ # Name of seqfile.
84
+ attr_accessor :seqfile
85
+
86
+ # Command line options.
87
+ attr_accessor :options
88
+
89
+ # Shows the raw output from the hmmer search.
90
+ attr_reader :output
91
+
92
+ # Sets a program name, a profile hmm file name, a query sequence file name
93
+ # and options in string.
94
+ #
95
+ # Program names: hmmsearch, hmmpfam
96
+ #
97
+ def initialize(program, hmmfile, seqfile, options = [])
98
+ @program = program
99
+ @hmmfile = hmmfile
100
+ @seqfile = seqfile
101
+ @output = ''
102
+
103
+ begin
104
+ @options = opt.to_ary
105
+ rescue NameError #NoMethodError
106
+ # backward compatibility
107
+ @options = Shellwords.shellwords(options)
108
+ end
109
+ end
31
110
 
32
- include Bio::Command::Tools
33
111
 
34
- def initialize(program, hmmfile, seqfile, opt = [])
35
- @program = program
36
- @hmmfile = hmmfile
37
- @seqfile = seqfile
38
- @output = ''
112
+ # Gets options by String.
113
+ # backward compatibility.
114
+ def option
115
+ make_command_line(@options)
116
+ end
39
117
 
40
- begin
41
- @options = opt.to_ary
42
- rescue NameError #NoMethodError
43
- # backward compatibility
44
- @options = Shellwords.shellwords(opt)
45
- end
46
- end
47
- attr_accessor :program, :hmmfile, :seqfile, :options
48
- attr_reader :output
49
118
 
50
- def option
51
- # backward compatibility
52
- make_command_line(@options)
53
- end
119
+ # Sets options by String.
120
+ # backward compatibility.
121
+ def option=(str)
122
+ @options = Shellwords.shellwords(str)
123
+ end
54
124
 
55
- def option=(str)
56
- # backward compatibility
57
- @options = Shellwords.shellwords(str)
58
- end
59
125
 
60
- def query
61
- cmd = [ @program, *@options ]
62
- cmd.concat([ @hmmfile, @seqfile ])
126
+ # Executes the hmmer search and returns the report
127
+ # (Bio::HMMER::Report object).
128
+ def query
129
+ cmd = [ @program, *@options ]
130
+ cmd.concat([ @hmmfile, @seqfile ])
63
131
 
64
- report = nil
65
-
66
- @output = call_command_local(cmd, nil)
67
- report = parse_result(@output)
132
+ report = nil
133
+
134
+ @output = call_command_local(cmd, nil)
135
+ report = parse_result(@output)
68
136
 
69
- return report
70
- end
71
-
137
+ return report
138
+ end
72
139
 
73
- private
140
+ private
141
+
142
+ def parse_result(data)
143
+ Report.new(data)
144
+ end
74
145
 
75
- def parse_result(data)
76
- Report.new(data)
77
- end
146
+ end # class HMMER
78
147
 
79
- end
80
- end
148
+ end # module Bio
81
149
 
82
150
 
83
151
 
84
152
  if __FILE__ == $0
85
153
 
86
- begin
87
- require 'pp'
88
- alias p pp
89
- rescue
90
- end
154
+ require 'pp'
91
155
 
92
- program = ARGV.shift # hmmsearch, hmmpfam
156
+ program = ARGV.shift # hmmsearch, hmmpfam
93
157
  hmmfile = ARGV.shift
94
158
  seqfile = ARGV.shift
95
159
 
96
160
  factory = Bio::HMMER.new(program, hmmfile, seqfile)
97
- p factory.query
161
+ pp factory.query
98
162
 
99
163
  end
100
-
101
-
102
- =begin
103
-
104
- = Bio::HMMER
105
-
106
- --- Bio::HMMER.new(program, hmmfile, seqfile, option = '')
107
- --- Bio::HMMER#program
108
- --- Bio::HMMER#hmmfile
109
- --- Bio::HMMER#seqfile
110
- --- Bio::HMMER#options
111
-
112
- Accessors for the factory.
113
-
114
- --- Bio::HMMER#option
115
- --- Bio::HMMER#option=(str)
116
-
117
- Get/set options by string.
118
-
119
- --- Bio::HMMER#query
120
-
121
- Executes the hmmer search and returns Report object (Bio::HMMER::Report).
122
-
123
- --- Bio::HMMER#output
124
-
125
- Shows the raw output from hmmer search.
126
-
127
- =end
128
-
129
-
@@ -1,8 +1,44 @@
1
1
  #
2
- # bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer
2
+ # = bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer
3
3
  #
4
- # Copyright (C) 2002 Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>
5
- # Copyright (C) 2005 Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
4
+ # Copyright:: Copyright (C) 2002
5
+ # Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>,
6
+ # Copyright:: Copyright (C) 2005
7
+ # Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
8
+ # Lisence:: LGPL
9
+ #
10
+ # $Id: report.rb,v 1.10 2006/02/02 17:08:36 nakao Exp $
11
+ #
12
+ # == Description
13
+ #
14
+ # Parser class for hmmsearch and hmmpfam in the HMMER package.
15
+ #
16
+ # == Examples
17
+ #
18
+ # #for multiple reports in a single output file (example.hmmpfam)
19
+ # Bio::HMMER.reports(File.read("example.hmmpfam")) do |report|
20
+ # report.program['name']
21
+ # report.parameter['HMM file']
22
+ # report.query_info['Query sequence']
23
+ # report.hits.each do |hit|
24
+ # hit.accession
25
+ # hit.description
26
+ # hit.score
27
+ # hit.evalue
28
+ # hit.hsps.each do |hsp|
29
+ # hsp.accession
30
+ # hsp.domain
31
+ # hsp.evalue
32
+ # hsp.midline
33
+ # end
34
+ # end
35
+ #
36
+ # == References
37
+ #
38
+ # * HMMER
39
+ # http://hmmer.wustl.edu/
40
+ #
41
+ #--
6
42
  #
7
43
  # This library is free software; you can redistribute it and/or
8
44
  # modify it under the terms of the GNU Lesser General Public
@@ -18,340 +54,534 @@
18
54
  # License along with this library; if not, write to the Free Software
19
55
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
56
  #
21
- # $Id: report.rb,v 1.9 2005/10/31 09:12:03 k Exp $
57
+ #++
22
58
  #
23
59
 
24
60
  require 'bio/appl/hmmer'
25
61
 
26
62
  module Bio
27
- class HMMER
28
63
 
29
- def self.reports(input)
30
- ary = []
31
- input.each("\n//\n") do |data|
32
- if block_given?
33
- yield Report.new(data)
34
- else
35
- ary << Report.new(data)
36
- end
64
+
65
+ class HMMER
66
+
67
+ # A reader interface for multiple reports text into a report
68
+ # (Bio::HMMER::Report).
69
+ #
70
+ # === Examples
71
+ #
72
+ # # Iterator
73
+ # Bio::HMMER.reports(reports_text) do |report|
74
+ # report
75
+ # end
76
+ #
77
+ # # Array
78
+ # reports = Bio::HMMER.reports(reports_text)
79
+ #
80
+ def self.reports(multiple_report_text)
81
+ ary = []
82
+ multiple_report_text.each("\n//\n") do |report|
83
+ if block_given?
84
+ yield Report.new(report)
85
+ else
86
+ ary << Report.new(report)
37
87
  end
38
- return ary
39
88
  end
89
+ return ary
90
+ end
40
91
 
41
92
 
42
- # Bio::HMMER::Report
43
- class Report
44
-
45
- # for Bio::FlatFile support
46
- DELIMITER = RS = "\n//\n"
93
+ # A parser class for a search report by hmmsearch or hmmpfam program in the
94
+ # HMMER package.
95
+ #
96
+ # === Examples
97
+ #
98
+ # Examples
99
+ # #for multiple reports in a single output file (example.hmmpfam)
100
+ # Bio::HMMER.reports(File.read("example.hmmpfam")) do |report|
101
+ # report.program['name']
102
+ # report.parameter['HMM file']
103
+ # report.query_info['Query sequence']
104
+ # report.hits.each do |hit|
105
+ # hit.accession
106
+ # hit.description
107
+ # hit.score
108
+ # hit.evalue
109
+ # hit.hsps.each do |hsp|
110
+ # hsp.accession
111
+ # hsp.domain
112
+ # hsp.evalue
113
+ # hsp.midline
114
+ # end
115
+ # end
116
+ #
117
+ # === References
118
+ #
119
+ # * HMMER
120
+ # http://hmmer.wustl.edu/
121
+ #
122
+ class Report
123
+
124
+ # Delimiter of each entry for Bio::FlatFile support.
125
+ DELIMITER = RS = "\n//\n"
126
+
127
+
128
+ # A Hash contains program information used.
129
+ # Valid keys are 'name', 'version', 'copyright' and 'license'.
130
+ attr_reader :program
131
+
132
+ # A hash contains parameters used.
133
+ # Valid keys are 'HMM file' and 'Sequence file'.
134
+ attr_reader :parameter
135
+
136
+ # A hash contains the query information.
137
+ # Valid keys are 'query sequence', 'Accession' and 'Description'.
138
+ attr_reader :query_info
139
+
140
+ #
141
+ attr_reader :hits
142
+
143
+ # Returns an Array of Bio::HMMER::Report::Hsp objects.
144
+ # Under special circumstances, some HSPs do not have
145
+ # parent Hit objects. If you want to access such HSPs,
146
+ # use this method.
147
+ attr_reader :hsps
148
+
149
+ # statistics by hmmsearch.
150
+ attr_reader :histogram
151
+
152
+ # statistics by hmmsearch. Keys are 'mu', 'lambda', 'chi-sq statistic' and 'P(chi-square)'.
153
+ attr_reader :statistical_detail
154
+
155
+ # statistics by hmmsearch.
156
+ attr_reader :total_seq_searched
157
+
158
+ # statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
159
+ attr_reader :whole_seq_top_hits
160
+
161
+ # statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
162
+ attr_reader :domain_top_hits
163
+
164
+
165
+ # Parses a HMMER search report (by hmmpfam or hmmsearch program) and
166
+ # reutrns a Bio::HMMER::Report object.
167
+ #
168
+ # === Examples
169
+ #
170
+ # hmmpfam_report = Bio::HMMER::Report.new(File.read("hmmpfam.out"))
171
+ #
172
+ # hmmsearch_report = Bio::HMMER::Report.new(File.read("hmmsearch.out"))
173
+ #
174
+ def initialize(data)
175
+
176
+ # The input data is divided into six data fields, i.e. header,
177
+ # query infomation, hits, HSPs, alignments and search statistics.
178
+ # However, header and statistics data don't necessarily exist.
179
+ subdata, is_hmmsearch = get_subdata(data)
180
+
181
+ # if header exists, parse it
182
+ if subdata["header"]
183
+ @program, @parameter = parse_header_data(subdata["header"])
184
+ else
185
+ @program, @parameter = [{}, {}]
186
+ end
47
187
 
48
- def initialize(data)
188
+ @query_info = parse_query_info(subdata["query"])
189
+ @hits = parse_hit_data(subdata["hit"])
190
+ @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch)
49
191
 
50
- # The input data is divided into six data fields, i.e. header,
51
- # query infomation, hits, HSPs, alignments and search statistics.
52
- # However, header and statistics data don't necessarily exist.
53
- subdata, is_hmmsearch = get_subdata(data)
192
+ if @hsps != []
193
+ # split alignment subdata into an array of alignments
194
+ aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1)
54
195
 
55
- # if header exists, parse it
56
- if subdata["header"]
57
- @program, @parameter = parse_header_data(subdata["header"])
58
- else
59
- @program, @parameter = [{}, {}]
196
+ # append alignment information to corresponding Hsp
197
+ aln_ary.each_with_index do |aln, i|
198
+ @hsps[i].set_alignment(aln)
60
199
  end
200
+ end
61
201
 
62
- @query_info = parse_query_info(subdata["query"])
63
- @hits = parse_hit_data(subdata["hit"])
64
- @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch)
65
-
66
- if @hsps != []
67
- # split alignment subdata into an array of alignments
68
- aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1)
69
-
70
- # append alignment information to corresponding Hsp
71
- aln_ary.each_with_index do |aln, i|
72
- @hsps[i].set_alignment(aln)
73
- end
202
+ # assign each Hsp object to its parent Hit
203
+ hits_hash = {}
204
+ @hits.each do |hit|
205
+ hits_hash[hit.accession] = hit
206
+ end
207
+ @hsps.each do |hsp|
208
+ if hits_hash.has_key?(hsp.accession)
209
+ hits_hash[hsp.accession].append_hsp(hsp)
74
210
  end
211
+ end
75
212
 
76
- # assign each Hsp object to its parent Hit
77
- hits_hash = {}
78
- @hits.each do |hit|
79
- hits_hash[hit.accession] = hit
80
- end
81
- @hsps.each do |hsp|
82
- if hits_hash.has_key?(hsp.accession)
83
- hits_hash[hsp.accession].append_hsp(hsp)
84
- end
85
- end
213
+ # parse statistics (for hmmsearch)
214
+ if is_hmmsearch
215
+ @histogram, @statistical_detail, @total_seq_searched, \
216
+ @whole_seq_top_hits, @domain_top_hits = \
217
+ parse_stat_data(subdata["statistics"])
218
+ end
219
+
220
+ end
86
221
 
87
- # parse statistics (for hmmsearch)
88
- if is_hmmsearch
89
- @histogram, @statistical_detail, @total_seq_searched, \
90
- @whole_seq_top_hits, @domain_top_hits = \
91
- parse_stat_data(subdata["statistics"])
92
- end
93
222
 
223
+ # Iterates each hit (Bio::HMMER::Report::Hit).
224
+ def each
225
+ @hits.each do |hit|
226
+ yield hit
227
+ end
228
+ end
229
+ alias :each_hit :each
230
+
231
+
232
+ # Bio::HMMER::Report#get_subdata
233
+ def get_subdata(data)
234
+ subdata = {}
235
+ header_prefix = '\Ahmm(search|pfam) - search'
236
+ query_prefix = '^Query (HMM|sequence): .*\nAccession: '
237
+ hit_prefix = '^Scores for (complete sequences|sequence family)'
238
+ hsp_prefix = '^Parsed for domains:'
239
+ aln_prefix = '^Alignments of top-scoring domains:\n'
240
+ stat_prefix = '^\nHistogram of all scores:'
241
+
242
+ # if header exists, get it
243
+ if data =~ /#{header_prefix}/
244
+ is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam
245
+ subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
246
+ else
247
+ is_hmmsearch = false # if no header, assumed to be hmmpfam
94
248
  end
95
- attr_reader :program, :parameter, :query_info, :hits, :hsps,
96
- :histogram, :statistical_detail, :total_seq_searched,
97
- :whole_seq_top_hits, :domain_top_hits
98
249
 
250
+ # get query, Hit and Hsp data
251
+ subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
252
+ subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m]
253
+ subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m]
254
+
255
+ # get alignment data
256
+ if is_hmmsearch
257
+ data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m
258
+ subdata["alignment"] = $1
259
+ else
260
+ data =~ /#{aln_prefix}(.+?)\/\/\n/m
261
+ subdata["alignment"] = $1
262
+ raise "multiple reports found" if $'.length > 0
263
+ end
264
+
265
+ # handle -A option of HMMER
266
+ cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z'
267
+ subdata["alignment"].sub!(/#{cutoff_line}/, '')
268
+
269
+ # get statistics data
270
+ subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
99
271
 
100
- def each
101
- @hits.each do |x|
102
- yield x
272
+ [subdata, is_hmmsearch]
273
+ end
274
+ private :get_subdata
275
+
276
+
277
+ # Bio::HMMER::Report#parse_header_data
278
+ def parse_header_data(data)
279
+ data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m
280
+ program_data = $1
281
+ parameter_data = $2
282
+
283
+ program = {}
284
+ program['name'], program['version'], program['copyright'], \
285
+ program['license'] = program_data.split(/\n/)
286
+
287
+ parameter = {}
288
+ parameter_data.each do |x|
289
+ if /^(.+?):\s+(.*?)\s*$/ =~ x
290
+ parameter[$1] = $2
103
291
  end
104
292
  end
105
293
 
294
+ [program, parameter]
295
+ end
296
+ private :parse_header_data
106
297
 
107
- # Bio::HMMER::Report::Hit
108
- class Hit
109
- def initialize(data)
110
- @hsps = Array.new
111
- if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ data
112
- @accession, @description, @score, @evalue, @num =
113
- [$1, $2, $3.to_f, $4.to_f, $5.to_i]
114
- end
115
- end
116
- attr_reader :hsps, :accession, :description, :score, :evalue, :num
117
298
 
118
- def each
119
- @hsps.each do |x|
120
- yield x
121
- end
299
+ # Bio::HMMER::Report#parse_query_info
300
+ def parse_query_info(data)
301
+ hash = {}
302
+ data.each do |x|
303
+ if /^(.+?):\s+(.*?)\s*$/ =~ x
304
+ hash[$1] = $2
305
+ elsif /\s+\[(.+)\]/ =~ x
306
+ hash['comments'] = $1
122
307
  end
123
-
124
- alias target_id accession
125
- alias hit_id accession
126
- alias entry_id accession
127
- alias definition description
128
- alias bit_score score
129
-
130
- def target_def
131
- if @hsps.size == 1
132
- "<#{@hsps[0].domain}> #{@description}"
133
- else
134
- "<#{@num.to_s}> #{@description}"
135
- end
308
+ end
309
+ hash
310
+ end
311
+ private :parse_query_info
312
+
313
+
314
+ # Bio::HMMER::Report#parse_hit_data
315
+ def parse_hit_data(data)
316
+ data.sub!(/.+?---\n/m, '').chop!
317
+ hits = []
318
+ return hits if data == "\t[no hits above thresholds]\n"
319
+ data.each do |l|
320
+ hits.push(Hit.new(l))
321
+ end
322
+ hits
323
+ end
324
+ private :parse_hit_data
325
+
326
+
327
+ # Bio::HMMER::Report#parse_hsp_data
328
+ def parse_hsp_data(data, is_hmmsearch)
329
+ data.sub!(/.+?---\n/m, '').chop!
330
+ hsps=[]
331
+ return hsps if data == "\t[no hits above thresholds]\n"
332
+ data.each do |l|
333
+ hsps.push(Hsp.new(l, is_hmmsearch))
334
+ end
335
+ return hsps
336
+ end
337
+ private :parse_hsp_data
338
+
339
+
340
+ # Bio::HMMER::Report#parse_stat_data
341
+ def parse_stat_data(data)
342
+ data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '')
343
+ histogram = $1.strip
344
+
345
+ statistical_detail = {}
346
+ data.sub!(/(.+?)\n\n/m, '')
347
+ $1.each do |l|
348
+ statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
349
+ end
350
+
351
+ total_seq_searched = nil
352
+ data.sub!(/(.+?)\n\n/m, '')
353
+ $1.each do |l|
354
+ total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
355
+ end
356
+
357
+ whole_seq_top_hits = {}
358
+ data.sub!(/(.+?)\n\n/m, '')
359
+ $1.each do |l|
360
+ if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
361
+ whole_seq_top_hits[$1] = $2.to_i
362
+ elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
363
+ whole_seq_top_hits[$1] = $2
136
364
  end
137
-
138
- def append_hsp(hsp)
139
- @hsps << hsp
365
+ end
366
+
367
+ domain_top_hits = {}
368
+ data.each do |l|
369
+ if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
370
+ domain_top_hits[$1] = $2.to_i
371
+ elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
372
+ domain_top_hits[$1] = $2
140
373
  end
141
-
142
374
  end
143
375
 
376
+ [histogram, statistical_detail, total_seq_searched, \
377
+ whole_seq_top_hits, domain_top_hits]
378
+ end
379
+ private :parse_stat_data
144
380
 
145
- # Bio::HMMER::Report::Hsp
146
- class Hsp
147
- def initialize(data, is_hmmsearch)
148
- @is_hmmsearch = is_hmmsearch
149
-
150
- @accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,
151
- score, evalue = data.split(' ')
152
- @seq_f = seq_f.to_i
153
- @seq_t = seq_t.to_i
154
- @hmm_f = hmm_f.to_i
155
- @hmm_t = hmm_t.to_i
156
- @score = score.to_f
157
- @evalue = evalue.to_f
158
- @hmmseq = ''
159
- @flatseq = ''
160
- @midline = ''
161
- @query_frame = 1
162
- @target_frame = 1
163
- # CS and RF lines are rarely used.
164
- @csline = nil
165
- @rfline = nil
166
- end
167
- attr_reader :accession, :domain, :seq_f, :seq_t, :seq_ft,
168
- :hmm_f, :hmm_t, :hmm_ft, :score, :evalue, :midline, :hmmseq,
169
- :flatseq, :query_frame, :target_frame, :csline, :rfline
170
-
171
- def set_alignment(aln)
172
- # First, split the input alignment into an array of
173
- # "alignment blocks." One block usually has three lines,
174
- # i.e. hmmseq, midline and flatseq.
175
- # However, although infrequent, it can contain CS or RF lines.
176
- aln.split(/ (?:\d+|-)\s*\n\n/).each do |blk|
177
- lines = blk.split(/\n/)
178
- cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil
179
- rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil
180
- aln_width = lines[0][/\S+/].length
181
- @csline = @csline.to_s + cstmp[19, aln_width] if cstmp
182
- @rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp
183
- @hmmseq += lines[0][19, aln_width]
184
- @midline += lines[1][19, aln_width]
185
- @flatseq += lines[2][19, aln_width]
186
- end
187
- @csline = @csline[3...-3] if @csline
188
- @rfline = @rfline[3...-3] if @rfline
189
- @hmmseq = @hmmseq[3...-3]
190
- @midline = @midline[3...-3]
191
- @flatseq = @flatseq[3...-3]
192
- end
193
381
 
194
- def query_seq; @is_hmmsearch ? @hmmseq : @flatseq; end
195
- def target_seq; @is_hmmsearch ? @flatseq : @hmmseq; end
196
- def target_from; @is_hmmsearch ? @seq_f : @hmm_f; end
197
- def target_to; @is_hmmsearch ? @seq_t : @hmm_t; end
198
- def query_from; @is_hmmsearch ? @hmm_f : @seq_f; end
199
- def query_to; @is_hmmsearch ? @hmm_t : @seq_t; end
382
+ # Container class for HMMER search hits.
383
+ class Hit
384
+
385
+ # An Array of Bio::HMMER::Report::Hsp objects.
386
+ attr_reader :hsps
200
387
 
201
- alias bit_score score
202
- alias target_id accession
388
+ #
389
+ attr_reader :accession
390
+ alias target_id accession
391
+ alias hit_id accession
392
+ alias entry_id accession
393
+
394
+ #
395
+ attr_reader :description
396
+ alias definition description
397
+
398
+ # Matching scores (total of all HSPs).
399
+ attr_reader :score
400
+ alias bit_score score
401
+
402
+ # E-value
403
+ attr_reader :evalue
203
404
 
405
+ # Number of domains
406
+ attr_reader :num
407
+
408
+ # Sets hit data.
409
+ def initialize(hit_data)
410
+ @hsps = Array.new
411
+ if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ hit_data
412
+ @accession, @description, @score, @evalue, @num = \
413
+ [$1, $2, $3.to_f, $4.to_f, $5.to_i]
414
+ end
204
415
  end
205
416
 
206
417
 
207
- # Bio::HMMER::Report#get_subdata
208
- def get_subdata(data)
209
- subdata = {}
210
- header_prefix = '\Ahmm(search|pfam) - search'
211
- query_prefix = '^Query (HMM|sequence): .*\nAccession: '
212
- hit_prefix = '^Scores for (complete sequences|sequence family)'
213
- hsp_prefix = '^Parsed for domains:'
214
- aln_prefix = '^Alignments of top-scoring domains:\n'
215
- stat_prefix = '^\nHistogram of all scores:'
216
-
217
- # if header exists, get it
218
- if data =~ /#{header_prefix}/
219
- is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam
220
- subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
221
- else
222
- is_hmmsearch = false # if no header, assumed to be hmmpfam
418
+ # Iterates on each Hsp object (Bio::HMMER::Report::Hsp).
419
+ def each
420
+ @hsps.each do |hsp|
421
+ yield hsp
223
422
  end
423
+ end
424
+ alias :each_hsp :each
224
425
 
225
- # get query, Hit and Hsp data
226
- subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
227
- subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m]
228
- subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m]
229
426
 
230
- # get alignment data
231
- if is_hmmsearch
232
- data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m
233
- subdata["alignment"] = $1
427
+ # Shows the hit description.
428
+ def target_def
429
+ if @hsps.size == 1
430
+ "<#{@hsps[0].domain}> #{@description}"
234
431
  else
235
- data =~ /#{aln_prefix}(.+?)\/\/\n/m
236
- subdata["alignment"] = $1
237
- raise "multiple reports found" if $'.length > 0
432
+ "<#{@num.to_s}> #{@description}"
238
433
  end
434
+ end
239
435
 
240
- # handle -A option of HMMER
241
- cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z'
242
- subdata["alignment"].sub!(/#{cutoff_line}/, '')
436
+ # Appends a Bio::HMMER::Report::Hsp object.
437
+ def append_hsp(hsp)
438
+ @hsps << hsp
439
+ end
440
+
441
+ end # class Hit
243
442
 
244
- # get statistics data
245
- subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
246
443
 
247
- [subdata, is_hmmsearch]
248
- end
249
- private :get_subdata
444
+ # Container class for HMMER search hsps.
445
+ class Hsp
250
446
 
251
- # Bio::HMMER::Report#parse_header_data
252
- def parse_header_data(data)
253
- data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m
254
- program_data = $1
255
- parameter_data = $2
256
-
257
- program = {}
258
- program['name'], program['version'], program['copyright'], \
259
- program['license'] = program_data.split(/\n/)
260
-
261
- parameter = {}
262
- parameter_data.each do |x|
263
- if /^(.+?):\s+(.*?)\s*$/ =~ x
264
- parameter[$1] = $2
265
- end
266
- end
447
+ #
448
+ attr_reader :accession
449
+ alias target_id accession
450
+
451
+ #
452
+ attr_reader :domain
453
+
454
+ #
455
+ attr_reader :seq_f
456
+
457
+ #
458
+ attr_reader :seq_t
459
+
460
+ #
461
+ attr_reader :seq_ft
462
+
463
+ #
464
+ attr_reader :hmm_f
465
+
466
+ #
467
+ attr_reader :hmm_t
468
+
469
+ #
470
+ attr_reader :hmm_ft
471
+
472
+ # Score
473
+ attr_reader :score
474
+ alias bit_score score
475
+
476
+ # E-value
477
+ attr_reader :evalue
478
+
479
+ # Alignment midline
480
+ attr_reader :midline
481
+
482
+ #
483
+ attr_reader :hmmseq
484
+
485
+ #
486
+ attr_reader :flatseq
487
+
488
+ #
489
+ attr_reader :query_frame
490
+
491
+ #
492
+ attr_reader :target_frame
267
493
 
268
- [program, parameter]
494
+ # CS Line
495
+ attr_reader :csline
496
+
497
+ # RF Line
498
+ attr_reader :rfline
499
+
500
+ # Sets hsps.
501
+ def initialize(hsp_data, is_hmmsearch)
502
+ @is_hmmsearch = is_hmmsearch
503
+
504
+ @accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,\
505
+ score, evalue = hsp_data.split(' ')
506
+ @seq_f = seq_f.to_i
507
+ @seq_t = seq_t.to_i
508
+ @hmm_f = hmm_f.to_i
509
+ @hmm_t = hmm_t.to_i
510
+ @score = score.to_f
511
+ @evalue = evalue.to_f
512
+ @hmmseq = ''
513
+ @flatseq = ''
514
+ @midline = ''
515
+ @query_frame = 1
516
+ @target_frame = 1
517
+ # CS and RF lines are rarely used.
518
+ @csline = nil
519
+ @rfline = nil
269
520
  end
270
- private :parse_header_data
271
-
272
- # Bio::HMMER::Report#parse_query_info
273
- def parse_query_info(data)
274
- hash = {}
275
- data.each do |x|
276
- if /^(.+?):\s+(.*?)\s*$/ =~ x
277
- hash[$1] = $2
278
- elsif /\s+\[(.+)\]/ =~ x
279
- hash['comments'] = $1
280
- end
521
+
522
+ #
523
+ def set_alignment(alignment)
524
+ # First, split the input alignment into an array of
525
+ # "alignment blocks." One block usually has three lines,
526
+ # i.e. hmmseq, midline and flatseq.
527
+ # However, although infrequent, it can contain CS or RF lines.
528
+ alignment.split(/ (?:\d+|-)\s*\n\n/).each do |blk|
529
+ lines = blk.split(/\n/)
530
+ cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil
531
+ rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil
532
+ aln_width = lines[0][/\S+/].length
533
+ @csline = @csline.to_s + cstmp[19, aln_width] if cstmp
534
+ @rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp
535
+ @hmmseq += lines[0][19, aln_width]
536
+ @midline += lines[1][19, aln_width]
537
+ @flatseq += lines[2][19, aln_width]
281
538
  end
282
- hash
539
+ @csline = @csline[3...-3] if @csline
540
+ @rfline = @rfline[3...-3] if @rfline
541
+ @hmmseq = @hmmseq[3...-3]
542
+ @midline = @midline[3...-3]
543
+ @flatseq = @flatseq[3...-3]
283
544
  end
284
- private :parse_query_info
285
-
286
- # Bio::HMMER::Report#parse_hit_data
287
- def parse_hit_data(data)
288
- data.sub!(/.+?---\n/m, '').chop!
289
- hits = []
290
- return hits if data == "\t[no hits above thresholds]\n"
291
- data.each do |l|
292
- hits.push(Hit.new(l))
293
- end
294
- hits
545
+
546
+
547
+ #
548
+ def query_seq
549
+ @is_hmmsearch ? @hmmseq : @flatseq
295
550
  end
296
- private :parse_hit_data
297
-
298
- # Bio::HMMER::Report#parse_hsp_data
299
- def parse_hsp_data(data, is_hmmsearch)
300
- data.sub!(/.+?---\n/m, '').chop!
301
- hsps=[]
302
- return hsps if data == "\t[no hits above thresholds]\n"
303
- data.each do |l|
304
- hsps.push(Hsp.new(l, is_hmmsearch))
305
- end
306
- return hsps
551
+
552
+ #
553
+ def target_seq
554
+ @is_hmmsearch ? @flatseq : @hmmseq
555
+ end
556
+
557
+ #
558
+ def target_from
559
+ @is_hmmsearch ? @seq_f : @hmm_f
307
560
  end
308
- private :parse_hsp_data
309
561
 
310
- # Bio::HMMER::Report#parse_stat_data
311
- def parse_stat_data(data)
312
- data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '')
313
- histogram = $1
562
+ #
563
+ def target_to
564
+ @is_hmmsearch ? @seq_t : @hmm_t
565
+ end
314
566
 
315
- statistical_detail = {}
316
- data.sub!(/(.+?)\n\n/m, '')
317
- $1.each do |l|
318
- statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
319
- end
320
-
321
- total_seq_searched = nil
322
- data.sub!(/(.+?)\n\n/m, '')
323
- $1.each do |l|
324
- total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
325
- end
326
-
327
- whole_seq_top_hits = {}
328
- data.sub!(/(.+?)\n\n/m, '')
329
- $1.each do |l|
330
- if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
331
- whole_seq_top_hits[$1] = $2.to_i
332
- elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
333
- whole_seq_top_hits[$1] = $2
334
- end
335
- end
336
-
337
- domain_top_hits = {}
338
- data.each do |l|
339
- if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
340
- domain_top_hits[$1] = $2.to_i
341
- elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
342
- domain_top_hits[$1] = $2
343
- end
344
- end
567
+ #
568
+ def query_from
569
+ @is_hmmsearch ? @hmm_f : @seq_f
570
+ end
345
571
 
346
- [histogram, statistical_detail, total_seq_searched, \
347
- whole_seq_top_hits, domain_top_hits]
572
+ #
573
+ def query_to
574
+ @is_hmmsearch ? @hmm_t : @seq_t
348
575
  end
349
- private :parse_stat_data
576
+
350
577
 
351
- end
578
+ end # class Hsp
352
579
 
353
- end
354
- end
580
+ end # class Report
581
+
582
+ end # class HMMER
583
+
584
+ end # module Bio
355
585
 
356
586
 
357
587
  if __FILE__ == $0
@@ -463,94 +693,9 @@ if __FILE__ == $0
463
693
  p hsp.query_from # hmm_f, seq_f
464
694
  print "query_to : ".rjust(indent)
465
695
  p hsp.query_to # hmm_t, seq_t
466
- end
696
+ end
467
697
  end
468
698
 
469
- end
470
-
471
-
472
- =begin
473
-
474
- = Bio::HMMER::Report
475
-
476
- --- Bio::HMMER::Report.new(data)
477
- --- Bio::HMMER::Report#each
478
-
479
- Iterates on each Bio::HMMER::Report::Hit object.
480
-
481
- --- Bio::HMMER::Report#hits
482
-
483
- Returns an Array of Bio::HMMER::Report::Hit objects.
484
-
485
-
486
- == Bio::HMMER::Report::Hit
487
-
488
- --- Bio::HMMER::Report::Hit#each
489
-
490
- Iterates on each Hsp object.
699
+ end
491
700
 
492
- --- Bio::HMMER::Report::Hit#hsps
493
-
494
- Returns an Array of Bio::HMMER::Report::Hsp objects.
495
-
496
- --- Bio::HMMER::Report::Hit#target_id
497
- --- Bio::HMMER::Report::Hit#hit_id
498
- --- Bio::HMMER::Report::Hit#entry_id
499
- --- Bio::HMMER::Report::Hit#definition
500
- --- Bio::HMMER::Report::Hit#description
501
- --- Bio::HMMER::Report::Hit#num
502
-
503
- nunmer of domains
504
-
505
- --- Bio::HMMER::Report::Hit#target_def
506
-
507
- <domain number> + @description
508
-
509
- --- Bio::HMMER::Report::Hit#evalue
510
- --- Bio::HMMER::Report::Hit#bit_score
511
- --- Bio::HMMER::Report::Hit#score
512
-
513
- Matching scores (total of all HSPs).
514
-
515
-
516
- == Bio::HMMER::Report::Hsp
517
-
518
- --- Bio::HMMER::Report#hsps
519
-
520
- Returns an Array of Bio::HMMER::Report::Hsp objects.
521
- Under special circumstances, some HSPs do not have
522
- parent Hit objects. If you want to access such HSPs,
523
- use this method.
524
-
525
- --- Bio::HMMER::Report::Hsp#target_id
526
- --- Bio::HMMER::Report::Hsp#accession
527
- --- Bio::HMMER::Report::Hsp#domain
528
- --- Bio::HMMER::Report::Hsp#seq_f
529
- --- Bio::HMMER::Report::Hsp#seq_t
530
- --- Bio::HMMER::Report::Hsp#seq_ft
531
- --- Bio::HMMER::Report::Hsp#hmm_f
532
- --- Bio::HMMER::Report::Hsp#hmm_t
533
- --- Bio::HMMER::Report::Hsp#hmm_ft
534
-
535
- --- Bio::HMMER::Report::Hsp#bit_score
536
- --- Bio::HMMER::Report::Hsp#score
537
- --- Bio::HMMER::Report::Hsp#evalue
538
-
539
- --- Bio::HMMER::Report::Hsp#midline
540
- --- Bio::HMMER::Report::Hsp#hmmseq
541
- --- Bio::HMMER::Report::Hsp#flatseq
542
- --- Bio::HMMER::Report::Hsp#query_frame
543
- --- Bio::HMMER::Report::Hsp#target_frame
544
-
545
- --- Bio::HMMER::Report::Hsp#query_seq
546
- --- Bio::HMMER::Report::Hsp#query_from
547
- --- Bio::HMMER::Report::Hsp#query_to
548
- --- Bio::HMMER::Report::Hsp#target_seq
549
- --- Bio::HMMER::Report::Hsp#target_from
550
- --- Bio::HMMER::Report::Hsp#target_to
551
-
552
- --- Bio::HMMER::Report::Hsp#csline
553
- --- Bio::HMMER::Report::Hsp#rfline
554
-
555
- =end
556
701