bio 0.7.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. data/bin/bioruby +71 -27
  2. data/bin/br_biofetch.rb +5 -17
  3. data/bin/br_bioflat.rb +14 -26
  4. data/bin/br_biogetseq.rb +6 -18
  5. data/bin/br_pmfetch.rb +6 -16
  6. data/doc/Changes-0.7.rd +35 -0
  7. data/doc/KEGG_API.rd +287 -172
  8. data/doc/KEGG_API.rd.ja +273 -160
  9. data/doc/Tutorial.rd +18 -9
  10. data/doc/Tutorial.rd.ja +656 -138
  11. data/lib/bio.rb +6 -24
  12. data/lib/bio/alignment.rb +5 -5
  13. data/lib/bio/appl/blast.rb +132 -98
  14. data/lib/bio/appl/blast/format0.rb +9 -19
  15. data/lib/bio/appl/blast/wublast.rb +5 -18
  16. data/lib/bio/appl/emboss.rb +40 -47
  17. data/lib/bio/appl/hmmer.rb +116 -82
  18. data/lib/bio/appl/hmmer/report.rb +509 -364
  19. data/lib/bio/appl/spidey/report.rb +7 -18
  20. data/lib/bio/data/na.rb +3 -21
  21. data/lib/bio/db.rb +3 -21
  22. data/lib/bio/db/aaindex.rb +147 -52
  23. data/lib/bio/db/embl/common.rb +27 -6
  24. data/lib/bio/db/embl/embl.rb +18 -10
  25. data/lib/bio/db/embl/sptr.rb +87 -67
  26. data/lib/bio/db/embl/swissprot.rb +32 -3
  27. data/lib/bio/db/embl/trembl.rb +32 -3
  28. data/lib/bio/db/embl/uniprot.rb +32 -3
  29. data/lib/bio/db/fasta.rb +327 -289
  30. data/lib/bio/db/medline.rb +25 -4
  31. data/lib/bio/db/nbrf.rb +12 -20
  32. data/lib/bio/db/pdb.rb +4 -1
  33. data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
  34. data/lib/bio/db/pdb/pdb.rb +13 -8
  35. data/lib/bio/db/rebase.rb +93 -97
  36. data/lib/bio/feature.rb +2 -31
  37. data/lib/bio/io/ddbjxml.rb +167 -139
  38. data/lib/bio/io/fastacmd.rb +89 -56
  39. data/lib/bio/io/flatfile.rb +994 -278
  40. data/lib/bio/io/flatfile/index.rb +257 -194
  41. data/lib/bio/io/flatfile/indexer.rb +37 -29
  42. data/lib/bio/reference.rb +147 -64
  43. data/lib/bio/sequence.rb +57 -417
  44. data/lib/bio/sequence/aa.rb +64 -0
  45. data/lib/bio/sequence/common.rb +175 -0
  46. data/lib/bio/sequence/compat.rb +68 -0
  47. data/lib/bio/sequence/format.rb +134 -0
  48. data/lib/bio/sequence/generic.rb +24 -0
  49. data/lib/bio/sequence/na.rb +189 -0
  50. data/lib/bio/shell.rb +9 -23
  51. data/lib/bio/shell/core.rb +130 -125
  52. data/lib/bio/shell/demo.rb +143 -0
  53. data/lib/bio/shell/{session.rb → interface.rb} +42 -40
  54. data/lib/bio/shell/object.rb +52 -0
  55. data/lib/bio/shell/plugin/codon.rb +4 -22
  56. data/lib/bio/shell/plugin/emboss.rb +23 -0
  57. data/lib/bio/shell/plugin/entry.rb +34 -25
  58. data/lib/bio/shell/plugin/flatfile.rb +5 -23
  59. data/lib/bio/shell/plugin/keggapi.rb +11 -24
  60. data/lib/bio/shell/plugin/midi.rb +5 -23
  61. data/lib/bio/shell/plugin/obda.rb +4 -22
  62. data/lib/bio/shell/plugin/seq.rb +6 -24
  63. data/lib/bio/shell/rails/Rakefile +10 -0
  64. data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
  65. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
  66. data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
  67. data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
  68. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
  69. data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
  70. data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
  71. data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
  72. data/lib/bio/shell/rails/config/boot.rb +19 -0
  73. data/lib/bio/shell/rails/config/database.yml +85 -0
  74. data/lib/bio/shell/rails/config/environment.rb +53 -0
  75. data/lib/bio/shell/rails/config/environments/development.rb +19 -0
  76. data/lib/bio/shell/rails/config/environments/production.rb +19 -0
  77. data/lib/bio/shell/rails/config/environments/test.rb +19 -0
  78. data/lib/bio/shell/rails/config/routes.rb +19 -0
  79. data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
  80. data/lib/bio/shell/rails/public/404.html +8 -0
  81. data/lib/bio/shell/rails/public/500.html +8 -0
  82. data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
  83. data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
  84. data/lib/bio/shell/rails/public/dispatch.rb +10 -0
  85. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  86. data/lib/bio/shell/rails/public/images/icon.png +0 -0
  87. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  88. data/lib/bio/shell/rails/public/index.html +277 -0
  89. data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
  90. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
  91. data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
  92. data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
  93. data/lib/bio/shell/rails/public/robots.txt +1 -0
  94. data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
  95. data/lib/bio/shell/rails/script/about +3 -0
  96. data/lib/bio/shell/rails/script/breakpointer +3 -0
  97. data/lib/bio/shell/rails/script/console +3 -0
  98. data/lib/bio/shell/rails/script/destroy +3 -0
  99. data/lib/bio/shell/rails/script/generate +3 -0
  100. data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
  101. data/lib/bio/shell/rails/script/performance/profiler +3 -0
  102. data/lib/bio/shell/rails/script/plugin +3 -0
  103. data/lib/bio/shell/rails/script/process/reaper +3 -0
  104. data/lib/bio/shell/rails/script/process/spawner +3 -0
  105. data/lib/bio/shell/rails/script/process/spinner +3 -0
  106. data/lib/bio/shell/rails/script/runner +3 -0
  107. data/lib/bio/shell/rails/script/server +42 -0
  108. data/lib/bio/shell/rails/test/test_helper.rb +28 -0
  109. data/lib/bio/shell/web.rb +90 -0
  110. data/lib/bio/util/contingency_table.rb +231 -225
  111. data/sample/any2fasta.rb +59 -0
  112. data/test/data/HMMER/hmmpfam.out +64 -0
  113. data/test/data/HMMER/hmmsearch.out +88 -0
  114. data/test/data/aaindex/DAYM780301 +30 -0
  115. data/test/data/aaindex/PRAM900102 +20 -0
  116. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  117. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  118. data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
  119. data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
  120. data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
  121. data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
  122. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  123. data/test/unit/bio/appl/blast/test_report.rb +15 -12
  124. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
  125. data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
  126. data/test/unit/bio/appl/test_blast.rb +5 -5
  127. data/test/unit/bio/data/test_na.rb +9 -18
  128. data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
  129. data/test/unit/bio/db/test_aaindex.rb +197 -0
  130. data/test/unit/bio/io/test_fastacmd.rb +55 -0
  131. data/test/unit/bio/sequence/test_aa.rb +102 -0
  132. data/test/unit/bio/sequence/test_common.rb +178 -0
  133. data/test/unit/bio/sequence/test_compat.rb +82 -0
  134. data/test/unit/bio/sequence/test_na.rb +242 -0
  135. data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
  136. data/test/unit/bio/test_alignment.rb +15 -7
  137. data/test/unit/bio/test_reference.rb +198 -0
  138. data/test/unit/bio/test_sequence.rb +4 -49
  139. data/test/unit/bio/test_shell.rb +2 -2
  140. metadata +118 -15
  141. data/lib/bio/io/brdb.rb +0 -103
  142. data/lib/bioruby.rb +0 -34
@@ -1,68 +1,61 @@
1
1
  #
2
- # bio/appl/emboss.rb - EMBOSS wrapper
2
+ # = bio/appl/emboss.rb - EMBOSS wrapper
3
3
  #
4
- # Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2002, 2005
5
+ # KATAYAMA Toshiaki <k@bioruby.org>
6
+ # License:: Ruby's
5
7
  #
6
- # This library is free software; you can redistribute it and/or
7
- # modify it under the terms of the GNU Lesser General Public
8
- # License as published by the Free Software Foundation; either
9
- # version 2 of the License, or (at your option) any later version.
8
+ # $Id: emboss.rb,v 1.4 2006/02/27 09:14:30 k Exp $
10
9
  #
11
- # This library is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- # Lesser General Public License for more details.
10
+ # == References
15
11
  #
16
- # You should have received a copy of the GNU Lesser General Public
17
- # License along with this library; if not, write to the Free Software
18
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
- #
20
- # $Id: emboss.rb,v 1.2 2005/09/08 01:22:08 k Exp $
12
+ # * http://www.emboss.org
21
13
  #
22
14
 
23
15
  module Bio
24
16
 
25
- class EMBOSS
17
+ autoload :Command, 'bio/command'
26
18
 
27
- def initialize(cmd_line)
28
- @cmd_line = cmd_line + ' -stdout'
29
- end
19
+ class EMBOSS
30
20
 
31
- def exec
32
- begin
33
- @io = IO.popen(@cmd_line, "w+")
34
- @result = @io.read
35
- return @result
36
- ensure
37
- @io.close
38
- end
39
- end
40
- attr_reader :io, :result
21
+ extend Bio::Command::Tools
41
22
 
23
+ def self.seqret(arg)
24
+ str = self.retrieve('seqret', arg)
42
25
  end
43
26
 
44
- end
45
-
46
- =begin
47
-
48
- = Bio::EMBOSS
49
-
50
- EMBOSS wrapper.
27
+ def self.entret(arg)
28
+ str = self.retrieve('entret', arg)
29
+ end
51
30
 
52
- #!/usr/bin/env ruby
53
- require 'bio'
31
+ def initialize(cmd_line)
32
+ @cmd_line = cmd_line + ' -stdout -auto'
33
+ end
54
34
 
55
- emboss = Bio::EMBOSS.new("getorf -sequence ~/xlrhodop -outseq stdout")
56
- puts emboss.exec
35
+ def exec
36
+ begin
37
+ @io = IO.popen(@cmd_line, "w+")
38
+ @result = @io.read
39
+ return @result
40
+ ensure
41
+ @io.close
42
+ end
43
+ end
44
+ attr_reader :io, :result
57
45
 
58
- --- Bio::EMBOSS.new(command_line)
46
+ private
59
47
 
60
- --- Bio::EMBOSS#exec
61
- --- Bio::EMBOSS#io
62
- --- Bio::EMBOSS#result
48
+ def self.retrieve(cmd, arg)
49
+ cmd = [ cmd, arg, '-auto', '-stdout' ]
50
+ str = ''
51
+ call_command_local(cmd) do |inn, out|
52
+ inn.close_write
53
+ str = out.read
54
+ end
55
+ return str
56
+ end
63
57
 
64
- === SEE ALSO
58
+ end # EMBOSS
65
59
 
66
- * http://www.emboss.org
60
+ end # Bio
67
61
 
68
- =end
@@ -1,7 +1,32 @@
1
1
  #
2
- # bio/appl/hmmer.rb - HMMER wrapper
2
+ # = bio/appl/hmmer.rb - HMMER wrapper
3
3
  #
4
- # Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2002
5
+ # KATAYAMA Toshiaki <k@bioruby.org>
6
+ # Lisence:: LGPL
7
+ #
8
+ # $Id: hmmer.rb,v 1.5 2006/02/02 17:08:36 nakao Exp $
9
+ #
10
+ # == Description
11
+ #
12
+ # A wrapper for the HMMER programs (hmmsearch or hmmpfam).
13
+ #
14
+ # == Examples
15
+ #
16
+ # require 'bio'
17
+ # program = 'hmmsearch' # or 'hmmpfam'
18
+ # hmmfile = 'test.hmm'
19
+ # seqfile = 'test.faa'
20
+ #
21
+ # factory = Bio::HMMER.new(program, hmmfile, seqfile)
22
+ # p factory.query
23
+ #
24
+ # == References
25
+ #
26
+ # * HMMER
27
+ # http://hmmer.wustl.edu/
28
+ #
29
+ #--
5
30
  #
6
31
  # This library is free software; you can redistribute it and/or
7
32
  # modify it under the terms of the GNU Lesser General Public
@@ -17,7 +42,7 @@
17
42
  # License along with this library; if not, write to the Free Software
18
43
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
44
  #
20
- # $Id: hmmer.rb,v 1.4 2005/09/26 13:00:04 k Exp $
45
+ #++
21
46
  #
22
47
 
23
48
  require 'bio/command'
@@ -25,105 +50,114 @@ require 'shellwords'
25
50
 
26
51
  module Bio
27
52
 
28
- class HMMER
29
-
30
- autoload :Report, 'bio/appl/hmmer/report'
53
+ # A wapper for HMMER programs (hmmsearch or hmmpfam).
54
+ #
55
+ # === Examples
56
+ #
57
+ # require 'bio'
58
+ # program = 'hmmsearch' # or 'hmmpfam'
59
+ # hmmfile = 'test.hmm'
60
+ # seqfile = 'test.faa'
61
+ #
62
+ # factory = Bio::HMMER.new(program, hmmfile, seqfile)
63
+ # report = factory.query
64
+ # report.class #=> Bio::HMMER::Report
65
+ #
66
+ # === References
67
+ #
68
+ # * HMMER
69
+ # http://hmmer.wustl.edu/
70
+ #
71
+ class HMMER
72
+
73
+ autoload :Report, 'bio/appl/hmmer/report'
74
+
75
+ include Bio::Command::Tools
76
+
77
+ # Prgrams name. (hmmsearch or hmmpfam).
78
+ attr_accessor :program
79
+
80
+ # Name of hmmfile.
81
+ attr_accessor :hmmfile
82
+
83
+ # Name of seqfile.
84
+ attr_accessor :seqfile
85
+
86
+ # Command line options.
87
+ attr_accessor :options
88
+
89
+ # Shows the raw output from the hmmer search.
90
+ attr_reader :output
91
+
92
+ # Sets a program name, a profile hmm file name, a query sequence file name
93
+ # and options in string.
94
+ #
95
+ # Program names: hmmsearch, hmmpfam
96
+ #
97
+ def initialize(program, hmmfile, seqfile, options = [])
98
+ @program = program
99
+ @hmmfile = hmmfile
100
+ @seqfile = seqfile
101
+ @output = ''
102
+
103
+ begin
104
+ @options = opt.to_ary
105
+ rescue NameError #NoMethodError
106
+ # backward compatibility
107
+ @options = Shellwords.shellwords(options)
108
+ end
109
+ end
31
110
 
32
- include Bio::Command::Tools
33
111
 
34
- def initialize(program, hmmfile, seqfile, opt = [])
35
- @program = program
36
- @hmmfile = hmmfile
37
- @seqfile = seqfile
38
- @output = ''
112
+ # Gets options by String.
113
+ # backward compatibility.
114
+ def option
115
+ make_command_line(@options)
116
+ end
39
117
 
40
- begin
41
- @options = opt.to_ary
42
- rescue NameError #NoMethodError
43
- # backward compatibility
44
- @options = Shellwords.shellwords(opt)
45
- end
46
- end
47
- attr_accessor :program, :hmmfile, :seqfile, :options
48
- attr_reader :output
49
118
 
50
- def option
51
- # backward compatibility
52
- make_command_line(@options)
53
- end
119
+ # Sets options by String.
120
+ # backward compatibility.
121
+ def option=(str)
122
+ @options = Shellwords.shellwords(str)
123
+ end
54
124
 
55
- def option=(str)
56
- # backward compatibility
57
- @options = Shellwords.shellwords(str)
58
- end
59
125
 
60
- def query
61
- cmd = [ @program, *@options ]
62
- cmd.concat([ @hmmfile, @seqfile ])
126
+ # Executes the hmmer search and returns the report
127
+ # (Bio::HMMER::Report object).
128
+ def query
129
+ cmd = [ @program, *@options ]
130
+ cmd.concat([ @hmmfile, @seqfile ])
63
131
 
64
- report = nil
65
-
66
- @output = call_command_local(cmd, nil)
67
- report = parse_result(@output)
132
+ report = nil
133
+
134
+ @output = call_command_local(cmd, nil)
135
+ report = parse_result(@output)
68
136
 
69
- return report
70
- end
71
-
137
+ return report
138
+ end
72
139
 
73
- private
140
+ private
141
+
142
+ def parse_result(data)
143
+ Report.new(data)
144
+ end
74
145
 
75
- def parse_result(data)
76
- Report.new(data)
77
- end
146
+ end # class HMMER
78
147
 
79
- end
80
- end
148
+ end # module Bio
81
149
 
82
150
 
83
151
 
84
152
  if __FILE__ == $0
85
153
 
86
- begin
87
- require 'pp'
88
- alias p pp
89
- rescue
90
- end
154
+ require 'pp'
91
155
 
92
- program = ARGV.shift # hmmsearch, hmmpfam
156
+ program = ARGV.shift # hmmsearch, hmmpfam
93
157
  hmmfile = ARGV.shift
94
158
  seqfile = ARGV.shift
95
159
 
96
160
  factory = Bio::HMMER.new(program, hmmfile, seqfile)
97
- p factory.query
161
+ pp factory.query
98
162
 
99
163
  end
100
-
101
-
102
- =begin
103
-
104
- = Bio::HMMER
105
-
106
- --- Bio::HMMER.new(program, hmmfile, seqfile, option = '')
107
- --- Bio::HMMER#program
108
- --- Bio::HMMER#hmmfile
109
- --- Bio::HMMER#seqfile
110
- --- Bio::HMMER#options
111
-
112
- Accessors for the factory.
113
-
114
- --- Bio::HMMER#option
115
- --- Bio::HMMER#option=(str)
116
-
117
- Get/set options by string.
118
-
119
- --- Bio::HMMER#query
120
-
121
- Executes the hmmer search and returns Report object (Bio::HMMER::Report).
122
-
123
- --- Bio::HMMER#output
124
-
125
- Shows the raw output from hmmer search.
126
-
127
- =end
128
-
129
-
@@ -1,8 +1,44 @@
1
1
  #
2
- # bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer
2
+ # = bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer
3
3
  #
4
- # Copyright (C) 2002 Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>
5
- # Copyright (C) 2005 Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
4
+ # Copyright:: Copyright (C) 2002
5
+ # Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>,
6
+ # Copyright:: Copyright (C) 2005
7
+ # Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
8
+ # Lisence:: LGPL
9
+ #
10
+ # $Id: report.rb,v 1.10 2006/02/02 17:08:36 nakao Exp $
11
+ #
12
+ # == Description
13
+ #
14
+ # Parser class for hmmsearch and hmmpfam in the HMMER package.
15
+ #
16
+ # == Examples
17
+ #
18
+ # #for multiple reports in a single output file (example.hmmpfam)
19
+ # Bio::HMMER.reports(File.read("example.hmmpfam")) do |report|
20
+ # report.program['name']
21
+ # report.parameter['HMM file']
22
+ # report.query_info['Query sequence']
23
+ # report.hits.each do |hit|
24
+ # hit.accession
25
+ # hit.description
26
+ # hit.score
27
+ # hit.evalue
28
+ # hit.hsps.each do |hsp|
29
+ # hsp.accession
30
+ # hsp.domain
31
+ # hsp.evalue
32
+ # hsp.midline
33
+ # end
34
+ # end
35
+ #
36
+ # == References
37
+ #
38
+ # * HMMER
39
+ # http://hmmer.wustl.edu/
40
+ #
41
+ #--
6
42
  #
7
43
  # This library is free software; you can redistribute it and/or
8
44
  # modify it under the terms of the GNU Lesser General Public
@@ -18,340 +54,534 @@
18
54
  # License along with this library; if not, write to the Free Software
19
55
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
56
  #
21
- # $Id: report.rb,v 1.9 2005/10/31 09:12:03 k Exp $
57
+ #++
22
58
  #
23
59
 
24
60
  require 'bio/appl/hmmer'
25
61
 
26
62
  module Bio
27
- class HMMER
28
63
 
29
- def self.reports(input)
30
- ary = []
31
- input.each("\n//\n") do |data|
32
- if block_given?
33
- yield Report.new(data)
34
- else
35
- ary << Report.new(data)
36
- end
64
+
65
+ class HMMER
66
+
67
+ # A reader interface for multiple reports text into a report
68
+ # (Bio::HMMER::Report).
69
+ #
70
+ # === Examples
71
+ #
72
+ # # Iterator
73
+ # Bio::HMMER.reports(reports_text) do |report|
74
+ # report
75
+ # end
76
+ #
77
+ # # Array
78
+ # reports = Bio::HMMER.reports(reports_text)
79
+ #
80
+ def self.reports(multiple_report_text)
81
+ ary = []
82
+ multiple_report_text.each("\n//\n") do |report|
83
+ if block_given?
84
+ yield Report.new(report)
85
+ else
86
+ ary << Report.new(report)
37
87
  end
38
- return ary
39
88
  end
89
+ return ary
90
+ end
40
91
 
41
92
 
42
- # Bio::HMMER::Report
43
- class Report
44
-
45
- # for Bio::FlatFile support
46
- DELIMITER = RS = "\n//\n"
93
+ # A parser class for a search report by hmmsearch or hmmpfam program in the
94
+ # HMMER package.
95
+ #
96
+ # === Examples
97
+ #
98
+ # Examples
99
+ # #for multiple reports in a single output file (example.hmmpfam)
100
+ # Bio::HMMER.reports(File.read("example.hmmpfam")) do |report|
101
+ # report.program['name']
102
+ # report.parameter['HMM file']
103
+ # report.query_info['Query sequence']
104
+ # report.hits.each do |hit|
105
+ # hit.accession
106
+ # hit.description
107
+ # hit.score
108
+ # hit.evalue
109
+ # hit.hsps.each do |hsp|
110
+ # hsp.accession
111
+ # hsp.domain
112
+ # hsp.evalue
113
+ # hsp.midline
114
+ # end
115
+ # end
116
+ #
117
+ # === References
118
+ #
119
+ # * HMMER
120
+ # http://hmmer.wustl.edu/
121
+ #
122
+ class Report
123
+
124
+ # Delimiter of each entry for Bio::FlatFile support.
125
+ DELIMITER = RS = "\n//\n"
126
+
127
+
128
+ # A Hash contains program information used.
129
+ # Valid keys are 'name', 'version', 'copyright' and 'license'.
130
+ attr_reader :program
131
+
132
+ # A hash contains parameters used.
133
+ # Valid keys are 'HMM file' and 'Sequence file'.
134
+ attr_reader :parameter
135
+
136
+ # A hash contains the query information.
137
+ # Valid keys are 'query sequence', 'Accession' and 'Description'.
138
+ attr_reader :query_info
139
+
140
+ #
141
+ attr_reader :hits
142
+
143
+ # Returns an Array of Bio::HMMER::Report::Hsp objects.
144
+ # Under special circumstances, some HSPs do not have
145
+ # parent Hit objects. If you want to access such HSPs,
146
+ # use this method.
147
+ attr_reader :hsps
148
+
149
+ # statistics by hmmsearch.
150
+ attr_reader :histogram
151
+
152
+ # statistics by hmmsearch. Keys are 'mu', 'lambda', 'chi-sq statistic' and 'P(chi-square)'.
153
+ attr_reader :statistical_detail
154
+
155
+ # statistics by hmmsearch.
156
+ attr_reader :total_seq_searched
157
+
158
+ # statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
159
+ attr_reader :whole_seq_top_hits
160
+
161
+ # statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
162
+ attr_reader :domain_top_hits
163
+
164
+
165
+ # Parses a HMMER search report (by hmmpfam or hmmsearch program) and
166
+ # reutrns a Bio::HMMER::Report object.
167
+ #
168
+ # === Examples
169
+ #
170
+ # hmmpfam_report = Bio::HMMER::Report.new(File.read("hmmpfam.out"))
171
+ #
172
+ # hmmsearch_report = Bio::HMMER::Report.new(File.read("hmmsearch.out"))
173
+ #
174
+ def initialize(data)
175
+
176
+ # The input data is divided into six data fields, i.e. header,
177
+ # query infomation, hits, HSPs, alignments and search statistics.
178
+ # However, header and statistics data don't necessarily exist.
179
+ subdata, is_hmmsearch = get_subdata(data)
180
+
181
+ # if header exists, parse it
182
+ if subdata["header"]
183
+ @program, @parameter = parse_header_data(subdata["header"])
184
+ else
185
+ @program, @parameter = [{}, {}]
186
+ end
47
187
 
48
- def initialize(data)
188
+ @query_info = parse_query_info(subdata["query"])
189
+ @hits = parse_hit_data(subdata["hit"])
190
+ @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch)
49
191
 
50
- # The input data is divided into six data fields, i.e. header,
51
- # query infomation, hits, HSPs, alignments and search statistics.
52
- # However, header and statistics data don't necessarily exist.
53
- subdata, is_hmmsearch = get_subdata(data)
192
+ if @hsps != []
193
+ # split alignment subdata into an array of alignments
194
+ aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1)
54
195
 
55
- # if header exists, parse it
56
- if subdata["header"]
57
- @program, @parameter = parse_header_data(subdata["header"])
58
- else
59
- @program, @parameter = [{}, {}]
196
+ # append alignment information to corresponding Hsp
197
+ aln_ary.each_with_index do |aln, i|
198
+ @hsps[i].set_alignment(aln)
60
199
  end
200
+ end
61
201
 
62
- @query_info = parse_query_info(subdata["query"])
63
- @hits = parse_hit_data(subdata["hit"])
64
- @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch)
65
-
66
- if @hsps != []
67
- # split alignment subdata into an array of alignments
68
- aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1)
69
-
70
- # append alignment information to corresponding Hsp
71
- aln_ary.each_with_index do |aln, i|
72
- @hsps[i].set_alignment(aln)
73
- end
202
+ # assign each Hsp object to its parent Hit
203
+ hits_hash = {}
204
+ @hits.each do |hit|
205
+ hits_hash[hit.accession] = hit
206
+ end
207
+ @hsps.each do |hsp|
208
+ if hits_hash.has_key?(hsp.accession)
209
+ hits_hash[hsp.accession].append_hsp(hsp)
74
210
  end
211
+ end
75
212
 
76
- # assign each Hsp object to its parent Hit
77
- hits_hash = {}
78
- @hits.each do |hit|
79
- hits_hash[hit.accession] = hit
80
- end
81
- @hsps.each do |hsp|
82
- if hits_hash.has_key?(hsp.accession)
83
- hits_hash[hsp.accession].append_hsp(hsp)
84
- end
85
- end
213
+ # parse statistics (for hmmsearch)
214
+ if is_hmmsearch
215
+ @histogram, @statistical_detail, @total_seq_searched, \
216
+ @whole_seq_top_hits, @domain_top_hits = \
217
+ parse_stat_data(subdata["statistics"])
218
+ end
219
+
220
+ end
86
221
 
87
- # parse statistics (for hmmsearch)
88
- if is_hmmsearch
89
- @histogram, @statistical_detail, @total_seq_searched, \
90
- @whole_seq_top_hits, @domain_top_hits = \
91
- parse_stat_data(subdata["statistics"])
92
- end
93
222
 
223
+ # Iterates each hit (Bio::HMMER::Report::Hit).
224
+ def each
225
+ @hits.each do |hit|
226
+ yield hit
227
+ end
228
+ end
229
+ alias :each_hit :each
230
+
231
+
232
+ # Bio::HMMER::Report#get_subdata
233
+ def get_subdata(data)
234
+ subdata = {}
235
+ header_prefix = '\Ahmm(search|pfam) - search'
236
+ query_prefix = '^Query (HMM|sequence): .*\nAccession: '
237
+ hit_prefix = '^Scores for (complete sequences|sequence family)'
238
+ hsp_prefix = '^Parsed for domains:'
239
+ aln_prefix = '^Alignments of top-scoring domains:\n'
240
+ stat_prefix = '^\nHistogram of all scores:'
241
+
242
+ # if header exists, get it
243
+ if data =~ /#{header_prefix}/
244
+ is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam
245
+ subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
246
+ else
247
+ is_hmmsearch = false # if no header, assumed to be hmmpfam
94
248
  end
95
- attr_reader :program, :parameter, :query_info, :hits, :hsps,
96
- :histogram, :statistical_detail, :total_seq_searched,
97
- :whole_seq_top_hits, :domain_top_hits
98
249
 
250
+ # get query, Hit and Hsp data
251
+ subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
252
+ subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m]
253
+ subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m]
254
+
255
+ # get alignment data
256
+ if is_hmmsearch
257
+ data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m
258
+ subdata["alignment"] = $1
259
+ else
260
+ data =~ /#{aln_prefix}(.+?)\/\/\n/m
261
+ subdata["alignment"] = $1
262
+ raise "multiple reports found" if $'.length > 0
263
+ end
264
+
265
+ # handle -A option of HMMER
266
+ cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z'
267
+ subdata["alignment"].sub!(/#{cutoff_line}/, '')
268
+
269
+ # get statistics data
270
+ subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
99
271
 
100
- def each
101
- @hits.each do |x|
102
- yield x
272
+ [subdata, is_hmmsearch]
273
+ end
274
+ private :get_subdata
275
+
276
+
277
+ # Bio::HMMER::Report#parse_header_data
278
+ def parse_header_data(data)
279
+ data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m
280
+ program_data = $1
281
+ parameter_data = $2
282
+
283
+ program = {}
284
+ program['name'], program['version'], program['copyright'], \
285
+ program['license'] = program_data.split(/\n/)
286
+
287
+ parameter = {}
288
+ parameter_data.each do |x|
289
+ if /^(.+?):\s+(.*?)\s*$/ =~ x
290
+ parameter[$1] = $2
103
291
  end
104
292
  end
105
293
 
294
+ [program, parameter]
295
+ end
296
+ private :parse_header_data
106
297
 
107
- # Bio::HMMER::Report::Hit
108
- class Hit
109
- def initialize(data)
110
- @hsps = Array.new
111
- if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ data
112
- @accession, @description, @score, @evalue, @num =
113
- [$1, $2, $3.to_f, $4.to_f, $5.to_i]
114
- end
115
- end
116
- attr_reader :hsps, :accession, :description, :score, :evalue, :num
117
298
 
118
- def each
119
- @hsps.each do |x|
120
- yield x
121
- end
299
+ # Bio::HMMER::Report#parse_query_info
300
+ def parse_query_info(data)
301
+ hash = {}
302
+ data.each do |x|
303
+ if /^(.+?):\s+(.*?)\s*$/ =~ x
304
+ hash[$1] = $2
305
+ elsif /\s+\[(.+)\]/ =~ x
306
+ hash['comments'] = $1
122
307
  end
123
-
124
- alias target_id accession
125
- alias hit_id accession
126
- alias entry_id accession
127
- alias definition description
128
- alias bit_score score
129
-
130
- def target_def
131
- if @hsps.size == 1
132
- "<#{@hsps[0].domain}> #{@description}"
133
- else
134
- "<#{@num.to_s}> #{@description}"
135
- end
308
+ end
309
+ hash
310
+ end
311
+ private :parse_query_info
312
+
313
+
314
+ # Bio::HMMER::Report#parse_hit_data
315
+ def parse_hit_data(data)
316
+ data.sub!(/.+?---\n/m, '').chop!
317
+ hits = []
318
+ return hits if data == "\t[no hits above thresholds]\n"
319
+ data.each do |l|
320
+ hits.push(Hit.new(l))
321
+ end
322
+ hits
323
+ end
324
+ private :parse_hit_data
325
+
326
+
327
+ # Bio::HMMER::Report#parse_hsp_data
328
+ def parse_hsp_data(data, is_hmmsearch)
329
+ data.sub!(/.+?---\n/m, '').chop!
330
+ hsps=[]
331
+ return hsps if data == "\t[no hits above thresholds]\n"
332
+ data.each do |l|
333
+ hsps.push(Hsp.new(l, is_hmmsearch))
334
+ end
335
+ return hsps
336
+ end
337
+ private :parse_hsp_data
338
+
339
+
340
+ # Bio::HMMER::Report#parse_stat_data
341
+ def parse_stat_data(data)
342
+ data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '')
343
+ histogram = $1.strip
344
+
345
+ statistical_detail = {}
346
+ data.sub!(/(.+?)\n\n/m, '')
347
+ $1.each do |l|
348
+ statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
349
+ end
350
+
351
+ total_seq_searched = nil
352
+ data.sub!(/(.+?)\n\n/m, '')
353
+ $1.each do |l|
354
+ total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
355
+ end
356
+
357
+ whole_seq_top_hits = {}
358
+ data.sub!(/(.+?)\n\n/m, '')
359
+ $1.each do |l|
360
+ if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
361
+ whole_seq_top_hits[$1] = $2.to_i
362
+ elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
363
+ whole_seq_top_hits[$1] = $2
136
364
  end
137
-
138
- def append_hsp(hsp)
139
- @hsps << hsp
365
+ end
366
+
367
+ domain_top_hits = {}
368
+ data.each do |l|
369
+ if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
370
+ domain_top_hits[$1] = $2.to_i
371
+ elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
372
+ domain_top_hits[$1] = $2
140
373
  end
141
-
142
374
  end
143
375
 
376
+ [histogram, statistical_detail, total_seq_searched, \
377
+ whole_seq_top_hits, domain_top_hits]
378
+ end
379
+ private :parse_stat_data
144
380
 
145
- # Bio::HMMER::Report::Hsp
146
- class Hsp
147
- def initialize(data, is_hmmsearch)
148
- @is_hmmsearch = is_hmmsearch
149
-
150
- @accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,
151
- score, evalue = data.split(' ')
152
- @seq_f = seq_f.to_i
153
- @seq_t = seq_t.to_i
154
- @hmm_f = hmm_f.to_i
155
- @hmm_t = hmm_t.to_i
156
- @score = score.to_f
157
- @evalue = evalue.to_f
158
- @hmmseq = ''
159
- @flatseq = ''
160
- @midline = ''
161
- @query_frame = 1
162
- @target_frame = 1
163
- # CS and RF lines are rarely used.
164
- @csline = nil
165
- @rfline = nil
166
- end
167
- attr_reader :accession, :domain, :seq_f, :seq_t, :seq_ft,
168
- :hmm_f, :hmm_t, :hmm_ft, :score, :evalue, :midline, :hmmseq,
169
- :flatseq, :query_frame, :target_frame, :csline, :rfline
170
-
171
- def set_alignment(aln)
172
- # First, split the input alignment into an array of
173
- # "alignment blocks." One block usually has three lines,
174
- # i.e. hmmseq, midline and flatseq.
175
- # However, although infrequent, it can contain CS or RF lines.
176
- aln.split(/ (?:\d+|-)\s*\n\n/).each do |blk|
177
- lines = blk.split(/\n/)
178
- cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil
179
- rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil
180
- aln_width = lines[0][/\S+/].length
181
- @csline = @csline.to_s + cstmp[19, aln_width] if cstmp
182
- @rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp
183
- @hmmseq += lines[0][19, aln_width]
184
- @midline += lines[1][19, aln_width]
185
- @flatseq += lines[2][19, aln_width]
186
- end
187
- @csline = @csline[3...-3] if @csline
188
- @rfline = @rfline[3...-3] if @rfline
189
- @hmmseq = @hmmseq[3...-3]
190
- @midline = @midline[3...-3]
191
- @flatseq = @flatseq[3...-3]
192
- end
193
381
 
194
- def query_seq; @is_hmmsearch ? @hmmseq : @flatseq; end
195
- def target_seq; @is_hmmsearch ? @flatseq : @hmmseq; end
196
- def target_from; @is_hmmsearch ? @seq_f : @hmm_f; end
197
- def target_to; @is_hmmsearch ? @seq_t : @hmm_t; end
198
- def query_from; @is_hmmsearch ? @hmm_f : @seq_f; end
199
- def query_to; @is_hmmsearch ? @hmm_t : @seq_t; end
382
+ # Container class for HMMER search hits.
383
+ class Hit
384
+
385
+ # An Array of Bio::HMMER::Report::Hsp objects.
386
+ attr_reader :hsps
200
387
 
201
- alias bit_score score
202
- alias target_id accession
388
+ #
389
+ attr_reader :accession
390
+ alias target_id accession
391
+ alias hit_id accession
392
+ alias entry_id accession
393
+
394
+ #
395
+ attr_reader :description
396
+ alias definition description
397
+
398
+ # Matching scores (total of all HSPs).
399
+ attr_reader :score
400
+ alias bit_score score
401
+
402
+ # E-value
403
+ attr_reader :evalue
203
404
 
405
+ # Number of domains
406
+ attr_reader :num
407
+
408
+ # Sets hit data.
409
+ def initialize(hit_data)
410
+ @hsps = Array.new
411
+ if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ hit_data
412
+ @accession, @description, @score, @evalue, @num = \
413
+ [$1, $2, $3.to_f, $4.to_f, $5.to_i]
414
+ end
204
415
  end
205
416
 
206
417
 
207
- # Bio::HMMER::Report#get_subdata
208
- def get_subdata(data)
209
- subdata = {}
210
- header_prefix = '\Ahmm(search|pfam) - search'
211
- query_prefix = '^Query (HMM|sequence): .*\nAccession: '
212
- hit_prefix = '^Scores for (complete sequences|sequence family)'
213
- hsp_prefix = '^Parsed for domains:'
214
- aln_prefix = '^Alignments of top-scoring domains:\n'
215
- stat_prefix = '^\nHistogram of all scores:'
216
-
217
- # if header exists, get it
218
- if data =~ /#{header_prefix}/
219
- is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam
220
- subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
221
- else
222
- is_hmmsearch = false # if no header, assumed to be hmmpfam
418
+ # Iterates on each Hsp object (Bio::HMMER::Report::Hsp).
419
+ def each
420
+ @hsps.each do |hsp|
421
+ yield hsp
223
422
  end
423
+ end
424
+ alias :each_hsp :each
224
425
 
225
- # get query, Hit and Hsp data
226
- subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
227
- subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m]
228
- subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m]
229
426
 
230
- # get alignment data
231
- if is_hmmsearch
232
- data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m
233
- subdata["alignment"] = $1
427
+ # Shows the hit description.
428
+ def target_def
429
+ if @hsps.size == 1
430
+ "<#{@hsps[0].domain}> #{@description}"
234
431
  else
235
- data =~ /#{aln_prefix}(.+?)\/\/\n/m
236
- subdata["alignment"] = $1
237
- raise "multiple reports found" if $'.length > 0
432
+ "<#{@num.to_s}> #{@description}"
238
433
  end
434
+ end
239
435
 
240
- # handle -A option of HMMER
241
- cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z'
242
- subdata["alignment"].sub!(/#{cutoff_line}/, '')
436
+ # Appends a Bio::HMMER::Report::Hsp object.
437
+ def append_hsp(hsp)
438
+ @hsps << hsp
439
+ end
440
+
441
+ end # class Hit
243
442
 
244
- # get statistics data
245
- subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
246
443
 
247
- [subdata, is_hmmsearch]
248
- end
249
- private :get_subdata
444
+ # Container class for HMMER search hsps.
445
+ class Hsp
250
446
 
251
- # Bio::HMMER::Report#parse_header_data
252
- def parse_header_data(data)
253
- data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m
254
- program_data = $1
255
- parameter_data = $2
256
-
257
- program = {}
258
- program['name'], program['version'], program['copyright'], \
259
- program['license'] = program_data.split(/\n/)
260
-
261
- parameter = {}
262
- parameter_data.each do |x|
263
- if /^(.+?):\s+(.*?)\s*$/ =~ x
264
- parameter[$1] = $2
265
- end
266
- end
447
+ #
448
+ attr_reader :accession
449
+ alias target_id accession
450
+
451
+ #
452
+ attr_reader :domain
453
+
454
+ #
455
+ attr_reader :seq_f
456
+
457
+ #
458
+ attr_reader :seq_t
459
+
460
+ #
461
+ attr_reader :seq_ft
462
+
463
+ #
464
+ attr_reader :hmm_f
465
+
466
+ #
467
+ attr_reader :hmm_t
468
+
469
+ #
470
+ attr_reader :hmm_ft
471
+
472
+ # Score
473
+ attr_reader :score
474
+ alias bit_score score
475
+
476
+ # E-value
477
+ attr_reader :evalue
478
+
479
+ # Alignment midline
480
+ attr_reader :midline
481
+
482
+ #
483
+ attr_reader :hmmseq
484
+
485
+ #
486
+ attr_reader :flatseq
487
+
488
+ #
489
+ attr_reader :query_frame
490
+
491
+ #
492
+ attr_reader :target_frame
267
493
 
268
- [program, parameter]
494
+ # CS Line
495
+ attr_reader :csline
496
+
497
+ # RF Line
498
+ attr_reader :rfline
499
+
500
+ # Sets hsps.
501
+ def initialize(hsp_data, is_hmmsearch)
502
+ @is_hmmsearch = is_hmmsearch
503
+
504
+ @accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,\
505
+ score, evalue = hsp_data.split(' ')
506
+ @seq_f = seq_f.to_i
507
+ @seq_t = seq_t.to_i
508
+ @hmm_f = hmm_f.to_i
509
+ @hmm_t = hmm_t.to_i
510
+ @score = score.to_f
511
+ @evalue = evalue.to_f
512
+ @hmmseq = ''
513
+ @flatseq = ''
514
+ @midline = ''
515
+ @query_frame = 1
516
+ @target_frame = 1
517
+ # CS and RF lines are rarely used.
518
+ @csline = nil
519
+ @rfline = nil
269
520
  end
270
- private :parse_header_data
271
-
272
- # Bio::HMMER::Report#parse_query_info
273
- def parse_query_info(data)
274
- hash = {}
275
- data.each do |x|
276
- if /^(.+?):\s+(.*?)\s*$/ =~ x
277
- hash[$1] = $2
278
- elsif /\s+\[(.+)\]/ =~ x
279
- hash['comments'] = $1
280
- end
521
+
522
+ #
523
+ def set_alignment(alignment)
524
+ # First, split the input alignment into an array of
525
+ # "alignment blocks." One block usually has three lines,
526
+ # i.e. hmmseq, midline and flatseq.
527
+ # However, although infrequent, it can contain CS or RF lines.
528
+ alignment.split(/ (?:\d+|-)\s*\n\n/).each do |blk|
529
+ lines = blk.split(/\n/)
530
+ cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil
531
+ rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil
532
+ aln_width = lines[0][/\S+/].length
533
+ @csline = @csline.to_s + cstmp[19, aln_width] if cstmp
534
+ @rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp
535
+ @hmmseq += lines[0][19, aln_width]
536
+ @midline += lines[1][19, aln_width]
537
+ @flatseq += lines[2][19, aln_width]
281
538
  end
282
- hash
539
+ @csline = @csline[3...-3] if @csline
540
+ @rfline = @rfline[3...-3] if @rfline
541
+ @hmmseq = @hmmseq[3...-3]
542
+ @midline = @midline[3...-3]
543
+ @flatseq = @flatseq[3...-3]
283
544
  end
284
- private :parse_query_info
285
-
286
- # Bio::HMMER::Report#parse_hit_data
287
- def parse_hit_data(data)
288
- data.sub!(/.+?---\n/m, '').chop!
289
- hits = []
290
- return hits if data == "\t[no hits above thresholds]\n"
291
- data.each do |l|
292
- hits.push(Hit.new(l))
293
- end
294
- hits
545
+
546
+
547
+ #
548
+ def query_seq
549
+ @is_hmmsearch ? @hmmseq : @flatseq
295
550
  end
296
- private :parse_hit_data
297
-
298
- # Bio::HMMER::Report#parse_hsp_data
299
- def parse_hsp_data(data, is_hmmsearch)
300
- data.sub!(/.+?---\n/m, '').chop!
301
- hsps=[]
302
- return hsps if data == "\t[no hits above thresholds]\n"
303
- data.each do |l|
304
- hsps.push(Hsp.new(l, is_hmmsearch))
305
- end
306
- return hsps
551
+
552
+ #
553
+ def target_seq
554
+ @is_hmmsearch ? @flatseq : @hmmseq
555
+ end
556
+
557
+ #
558
+ def target_from
559
+ @is_hmmsearch ? @seq_f : @hmm_f
307
560
  end
308
- private :parse_hsp_data
309
561
 
310
- # Bio::HMMER::Report#parse_stat_data
311
- def parse_stat_data(data)
312
- data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '')
313
- histogram = $1
562
+ #
563
+ def target_to
564
+ @is_hmmsearch ? @seq_t : @hmm_t
565
+ end
314
566
 
315
- statistical_detail = {}
316
- data.sub!(/(.+?)\n\n/m, '')
317
- $1.each do |l|
318
- statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
319
- end
320
-
321
- total_seq_searched = nil
322
- data.sub!(/(.+?)\n\n/m, '')
323
- $1.each do |l|
324
- total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
325
- end
326
-
327
- whole_seq_top_hits = {}
328
- data.sub!(/(.+?)\n\n/m, '')
329
- $1.each do |l|
330
- if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
331
- whole_seq_top_hits[$1] = $2.to_i
332
- elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
333
- whole_seq_top_hits[$1] = $2
334
- end
335
- end
336
-
337
- domain_top_hits = {}
338
- data.each do |l|
339
- if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
340
- domain_top_hits[$1] = $2.to_i
341
- elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
342
- domain_top_hits[$1] = $2
343
- end
344
- end
567
+ #
568
+ def query_from
569
+ @is_hmmsearch ? @hmm_f : @seq_f
570
+ end
345
571
 
346
- [histogram, statistical_detail, total_seq_searched, \
347
- whole_seq_top_hits, domain_top_hits]
572
+ #
573
+ def query_to
574
+ @is_hmmsearch ? @hmm_t : @seq_t
348
575
  end
349
- private :parse_stat_data
576
+
350
577
 
351
- end
578
+ end # class Hsp
352
579
 
353
- end
354
- end
580
+ end # class Report
581
+
582
+ end # class HMMER
583
+
584
+ end # module Bio
355
585
 
356
586
 
357
587
  if __FILE__ == $0
@@ -463,94 +693,9 @@ if __FILE__ == $0
463
693
  p hsp.query_from # hmm_f, seq_f
464
694
  print "query_to : ".rjust(indent)
465
695
  p hsp.query_to # hmm_t, seq_t
466
- end
696
+ end
467
697
  end
468
698
 
469
- end
470
-
471
-
472
- =begin
473
-
474
- = Bio::HMMER::Report
475
-
476
- --- Bio::HMMER::Report.new(data)
477
- --- Bio::HMMER::Report#each
478
-
479
- Iterates on each Bio::HMMER::Report::Hit object.
480
-
481
- --- Bio::HMMER::Report#hits
482
-
483
- Returns an Array of Bio::HMMER::Report::Hit objects.
484
-
485
-
486
- == Bio::HMMER::Report::Hit
487
-
488
- --- Bio::HMMER::Report::Hit#each
489
-
490
- Iterates on each Hsp object.
699
+ end
491
700
 
492
- --- Bio::HMMER::Report::Hit#hsps
493
-
494
- Returns an Array of Bio::HMMER::Report::Hsp objects.
495
-
496
- --- Bio::HMMER::Report::Hit#target_id
497
- --- Bio::HMMER::Report::Hit#hit_id
498
- --- Bio::HMMER::Report::Hit#entry_id
499
- --- Bio::HMMER::Report::Hit#definition
500
- --- Bio::HMMER::Report::Hit#description
501
- --- Bio::HMMER::Report::Hit#num
502
-
503
- nunmer of domains
504
-
505
- --- Bio::HMMER::Report::Hit#target_def
506
-
507
- <domain number> + @description
508
-
509
- --- Bio::HMMER::Report::Hit#evalue
510
- --- Bio::HMMER::Report::Hit#bit_score
511
- --- Bio::HMMER::Report::Hit#score
512
-
513
- Matching scores (total of all HSPs).
514
-
515
-
516
- == Bio::HMMER::Report::Hsp
517
-
518
- --- Bio::HMMER::Report#hsps
519
-
520
- Returns an Array of Bio::HMMER::Report::Hsp objects.
521
- Under special circumstances, some HSPs do not have
522
- parent Hit objects. If you want to access such HSPs,
523
- use this method.
524
-
525
- --- Bio::HMMER::Report::Hsp#target_id
526
- --- Bio::HMMER::Report::Hsp#accession
527
- --- Bio::HMMER::Report::Hsp#domain
528
- --- Bio::HMMER::Report::Hsp#seq_f
529
- --- Bio::HMMER::Report::Hsp#seq_t
530
- --- Bio::HMMER::Report::Hsp#seq_ft
531
- --- Bio::HMMER::Report::Hsp#hmm_f
532
- --- Bio::HMMER::Report::Hsp#hmm_t
533
- --- Bio::HMMER::Report::Hsp#hmm_ft
534
-
535
- --- Bio::HMMER::Report::Hsp#bit_score
536
- --- Bio::HMMER::Report::Hsp#score
537
- --- Bio::HMMER::Report::Hsp#evalue
538
-
539
- --- Bio::HMMER::Report::Hsp#midline
540
- --- Bio::HMMER::Report::Hsp#hmmseq
541
- --- Bio::HMMER::Report::Hsp#flatseq
542
- --- Bio::HMMER::Report::Hsp#query_frame
543
- --- Bio::HMMER::Report::Hsp#target_frame
544
-
545
- --- Bio::HMMER::Report::Hsp#query_seq
546
- --- Bio::HMMER::Report::Hsp#query_from
547
- --- Bio::HMMER::Report::Hsp#query_to
548
- --- Bio::HMMER::Report::Hsp#target_seq
549
- --- Bio::HMMER::Report::Hsp#target_from
550
- --- Bio::HMMER::Report::Hsp#target_to
551
-
552
- --- Bio::HMMER::Report::Hsp#csline
553
- --- Bio::HMMER::Report::Hsp#rfline
554
-
555
- =end
556
701