bio-hmmer3_report 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.travis.yml +12 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +20 -0
- data/README.md +99 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/lib/bio-hmmer3_report.rb +2 -0
- data/lib/bio/appl/hmmer/hmmer3/report.rb +193 -0
- data/test/data/HMMER/hmmscan_domtblout.out +9 -0
- data/test/data/HMMER/hmmscan_tblout.out +4 -0
- data/test/data/HMMER/hmmsearch_domtblout.out +5 -0
- data/test/data/HMMER/hmmsearch_domtblout_empty.out +3 -0
- data/test/data/HMMER/hmmsearch_tblout.out +4 -0
- data/test/helper.rb +18 -0
- data/test/unit/bio/appl/hmmer/test_hmmer3report.rb +240 -0
- metadata +133 -0
data/.document
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
- jruby-19mode # JRuby in 1.9 mode
|
6
|
+
- rbx-19mode
|
7
|
+
# - 1.8.7
|
8
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
9
|
+
# - rbx-18mode
|
10
|
+
|
11
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
12
|
+
# script: bundle exec rspec spec
|
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "shoulda", ">= 0"
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "jeweler", "~> 1.8.3"
|
12
|
+
gem "bundler", ">= 1.0.21"
|
13
|
+
gem "bio", ">= 1.4.2"
|
14
|
+
gem "rdoc", "~> 3.12"
|
15
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Christian Zmasek and Ben J Woodcroft
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# bio-hmmer3_report
|
2
|
+
|
3
|
+
[](http://travis-ci.org/wwood/bioruby-hmmer3_report)
|
4
|
+
|
5
|
+
Parser class for hmmsearch and hmmscan in the HMMER 3 package.
|
6
|
+
|
7
|
+
## Examples
|
8
|
+
|
9
|
+
# Description
|
10
|
+
|
11
|
+
|
12
|
+
# Input from string:
|
13
|
+
data = String.new
|
14
|
+
data << '# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord'
|
15
|
+
data << "\n" << '# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target'
|
16
|
+
data << "\n" << '#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------'
|
17
|
+
data << "\n" << 'Bcl-2 PF00452.13 101 sp|P10415|BCL2_HUMAN - 239 3.7e-30 103.7 0.1 1 1 7.9e-34 4.9e-30 103.3 0.0 1 101 97 195 97 195 0.99 Apoptosis regulator proteins, Bcl-2 family'
|
18
|
+
data << "\n" << 'BH4 PF02180.11 27 sp|P10415|BCL2_HUMAN - 239 3.9e-15 54.6 0.1 1 1 1.3e-18 8.2e-15 53.6 0.1 2 26 8 32 7 33 0.94 Bcl-2 homology region 4'
|
19
|
+
data << "\n"
|
20
|
+
|
21
|
+
report = Bio::HMMER::HMMER3::Report.new(data)
|
22
|
+
report.hits.each do |hit|
|
23
|
+
puts hit.target_name
|
24
|
+
puts hit.full_sequence_e_value
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Input from file:
|
29
|
+
report = Bio::HMMER::HMMER3::Report.new(File.open('test/data/HMMER/hmmsearch_domtblout.out'))
|
30
|
+
report.hits.each do |hit|
|
31
|
+
puts hit.target_name
|
32
|
+
puts hit.target_accession
|
33
|
+
puts hit.query_name
|
34
|
+
puts hit.query_accession
|
35
|
+
puts hit.query_length
|
36
|
+
puts hit.full_sequence_e_value
|
37
|
+
puts hit.full_sequence_score
|
38
|
+
puts hit.domain_number
|
39
|
+
puts hit.domain_sum
|
40
|
+
puts hit.domain_c_e_value
|
41
|
+
puts hit.domain_i_e_value
|
42
|
+
puts hit.domain_score
|
43
|
+
puts hit.domain_bias
|
44
|
+
puts hit.hmm_coord_from
|
45
|
+
puts hit.hmm_coord_to
|
46
|
+
puts hit.ali_coord_from
|
47
|
+
puts hit.ali_coord_to
|
48
|
+
puts hit.env_coord_from
|
49
|
+
puts hit.env_coord_to
|
50
|
+
puts hit.acc
|
51
|
+
puts hit.target_description
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
## References
|
56
|
+
|
57
|
+
* HMMER http://hmmer.janelia.org/
|
58
|
+
|
59
|
+
Note: this software is under active development!
|
60
|
+
|
61
|
+
## Installation
|
62
|
+
|
63
|
+
```sh
|
64
|
+
gem install bio-hmmer3_report
|
65
|
+
```
|
66
|
+
|
67
|
+
## Usage
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
require 'bio-hmmer3_report'
|
71
|
+
```
|
72
|
+
|
73
|
+
The API doc is online. For more code examples see the test files in
|
74
|
+
the source tree.
|
75
|
+
|
76
|
+
## Project home page
|
77
|
+
|
78
|
+
Information on the source tree, documentation, examples, issues and
|
79
|
+
how to contribute, see
|
80
|
+
|
81
|
+
http://github.com/wwood/bioruby-hmmer3_report
|
82
|
+
|
83
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
84
|
+
|
85
|
+
## Cite
|
86
|
+
|
87
|
+
If you use this software, please cite one of
|
88
|
+
|
89
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
90
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
91
|
+
|
92
|
+
## Biogems.info
|
93
|
+
|
94
|
+
This Biogem is published at [#bio-hmmer3_report](http://biogems.info/index.html)
|
95
|
+
|
96
|
+
## Copyright
|
97
|
+
|
98
|
+
Copyright (c) 2012 Ben J Woodcroft. See LICENSE.txt for further details.
|
99
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-hmmer3_report"
|
18
|
+
gem.homepage = "http://github.com/wwood/bioruby-hmmer3_report"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Enables parsing of HMMER version 3 reports}
|
21
|
+
gem.description = %Q{Enables parsing of HMMER version 3 reports}
|
22
|
+
gem.email = "gmail.com after donttrustben"
|
23
|
+
gem.authors = ["Christian Zmasek","Ben J Woodcroft"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
task :default => :test
|
36
|
+
|
37
|
+
require 'rdoc/task'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "bio-hmmer3_report #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,193 @@
|
|
1
|
+
#
|
2
|
+
# = bio/appl/hmmer/hmmer3/report.rb - hmmscan/hmmsearch parser
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2011
|
5
|
+
# Christian Zmasek <cmzmasek@yahoo.com>, Ben Woodcroft <https://github.com/wwood>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
|
11
|
+
require 'stringio'
|
12
|
+
|
13
|
+
module Bio
|
14
|
+
class HMMER
|
15
|
+
# == Description
|
16
|
+
#
|
17
|
+
# Parser class for hmmsearch and hmmscan in the HMMER 3 package. See README of this biogem for more information.
|
18
|
+
class HMMER3
|
19
|
+
class Report
|
20
|
+
def initialize(hmmer_output, format = nil)
|
21
|
+
|
22
|
+
@hits = Array.new
|
23
|
+
@line_number = 0
|
24
|
+
@format = format
|
25
|
+
if hmmer_output.kind_of?(String)
|
26
|
+
str = StringIO.new(hmmer_output)
|
27
|
+
str.each_line() { |line| parse_line(line) }
|
28
|
+
elsif hmmer_output.kind_of?(IO)
|
29
|
+
hmmer_output.each_line() { |line| parse_line(line) }
|
30
|
+
else
|
31
|
+
raise "Unexpected hmmer_output class: excpected String or IO, found #{hmmer_output.class}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
attr_reader :hits
|
36
|
+
attr_reader :format
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parse_line(line)
|
41
|
+
@line_number += 1
|
42
|
+
if line =~ /^#.+this\s+domain/
|
43
|
+
@format = :domtblout
|
44
|
+
elsif line =~ /^#.+best\s+1\s+domain/
|
45
|
+
@format = :tblout
|
46
|
+
elsif line =~ /\S/ && line !~ /^#/
|
47
|
+
if @format == nil
|
48
|
+
if looks_like_per_domain_result?(line)
|
49
|
+
@format = :domtblout
|
50
|
+
else
|
51
|
+
@format = :tblout
|
52
|
+
end
|
53
|
+
end
|
54
|
+
if @format == :domtblout
|
55
|
+
@hits << PerDomainHit.new(line, @line_number)
|
56
|
+
elsif @format == :tblout
|
57
|
+
@hits << PerSequenceHit.new(line, @line_number)
|
58
|
+
else
|
59
|
+
raise ArgumentError, "attempt to parse hmmscan/hmmsearch output style other than \"domtblout\" or \"tblout\""
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def looks_like_per_domain_result?(line)
|
65
|
+
line =~ /^(\S*)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s*(.*)/
|
66
|
+
end
|
67
|
+
|
68
|
+
end # class Report
|
69
|
+
|
70
|
+
class Hit
|
71
|
+
def initialize
|
72
|
+
# This is an abstract class. Prevents 'new' being called on this class
|
73
|
+
# and force implementation of 'initialize' in inheriting classes.
|
74
|
+
raise NotImplementedError
|
75
|
+
end
|
76
|
+
attr_reader :target_name
|
77
|
+
attr_reader :target_accession
|
78
|
+
attr_reader :target_description
|
79
|
+
attr_reader :query_name
|
80
|
+
attr_reader :query_accession
|
81
|
+
attr_reader :full_sequence_e_value
|
82
|
+
attr_reader :full_sequence_score
|
83
|
+
attr_reader :full_sequence_bias
|
84
|
+
|
85
|
+
end # class Hit
|
86
|
+
|
87
|
+
|
88
|
+
class PerSequenceHit < Hit
|
89
|
+
|
90
|
+
# Sets hit data.
|
91
|
+
def initialize(line, line_number)
|
92
|
+
|
93
|
+
# tblout:
|
94
|
+
# tn tacc qn qacc fs_eval fs_scor fs_bias bst_e bst_scor bst_bias exp reg clu ov env dom rep inc desc
|
95
|
+
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
|
96
|
+
if line =~ /^(\S*)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(.*)/
|
97
|
+
@target_name = $1
|
98
|
+
@target_accession = $2
|
99
|
+
@query_name = $3
|
100
|
+
@query_accession = $4
|
101
|
+
@full_sequence_e_value = $5.to_f
|
102
|
+
@full_sequence_score = $6.to_f
|
103
|
+
@full_sequence_bias = $7.to_f
|
104
|
+
@best_1_domain_e_value = $8.to_f
|
105
|
+
@best_1_domain_score = $9.to_f
|
106
|
+
@best_1_domain_bias = $10.to_f
|
107
|
+
@domain_number_est_exp = $11.to_i
|
108
|
+
@domain_number_est_reg = $12.to_i
|
109
|
+
@domain_number_est_clu = $13.to_i
|
110
|
+
@domain_number_est_ov = $14.to_i
|
111
|
+
@domain_number_est_env = $15.to_i
|
112
|
+
@domain_number_est_dom = $16.to_i
|
113
|
+
@domain_number_est_rep = $17.to_i
|
114
|
+
@domain_number_est_inc = $18.to_i
|
115
|
+
@target_description = $19
|
116
|
+
else
|
117
|
+
raise ArgumentError, "line "+ line_number.to_s + " is in an unrecognized format [#{line}]"
|
118
|
+
end
|
119
|
+
|
120
|
+
end # initialize
|
121
|
+
|
122
|
+
attr_reader :best_1_domain_e_value
|
123
|
+
attr_reader :best_1_domain_score
|
124
|
+
attr_reader :best_1_domain_bias
|
125
|
+
attr_reader :domain_number_est_exp
|
126
|
+
attr_reader :domain_number_est_reg
|
127
|
+
attr_reader :domain_number_est_clu
|
128
|
+
attr_reader :domain_number_est_ov
|
129
|
+
attr_reader :domain_number_est_env
|
130
|
+
attr_reader :domain_number_est_dom
|
131
|
+
attr_reader :domain_number_est_rep
|
132
|
+
attr_reader :domain_number_est_inc
|
133
|
+
|
134
|
+
end # class PerSequenceHit
|
135
|
+
|
136
|
+
class PerDomainHit < Hit
|
137
|
+
|
138
|
+
# Sets hit data.
|
139
|
+
def initialize(line, line_number)
|
140
|
+
|
141
|
+
# domtblout:
|
142
|
+
# tn acc tlen query acc qlen Evalue score bias # of c-E i-E score bias hf ht af at ef et acc desc
|
143
|
+
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
|
144
|
+
if line =~ /^(\S*)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s*(.*)/
|
145
|
+
@target_name = $1
|
146
|
+
@target_accession = $2
|
147
|
+
@target_length = $3.to_i
|
148
|
+
@query_name = $4
|
149
|
+
@query_accession = $5
|
150
|
+
@query_length = $6.to_i
|
151
|
+
@full_sequence_e_value = $7.to_f
|
152
|
+
@full_sequence_score = $8.to_f
|
153
|
+
@full_sequence_bias = $9.to_f
|
154
|
+
@domain_number = $10.to_i
|
155
|
+
@domain_sum = $11.to_i
|
156
|
+
@domain_c_e_value = $12.to_f
|
157
|
+
@domain_i_e_value = $13.to_f
|
158
|
+
@domain_score = $14.to_f
|
159
|
+
@domain_bias = $15.to_f
|
160
|
+
@hmm_coord_from = $16.to_i
|
161
|
+
@hmm_coord_to = $17.to_i
|
162
|
+
@ali_coord_from = $18.to_i
|
163
|
+
@ali_coord_to = $19.to_i
|
164
|
+
@env_coord_from = $20.to_i
|
165
|
+
@env_coord_to = $21.to_i
|
166
|
+
@acc = $22.to_f
|
167
|
+
@target_description = $23
|
168
|
+
else
|
169
|
+
raise ArgumentError, "line "+ line_number.to_s + " is in a unrecognized format [#{line}]"
|
170
|
+
end
|
171
|
+
|
172
|
+
end # initialize
|
173
|
+
|
174
|
+
attr_reader :target_length
|
175
|
+
attr_reader :query_length
|
176
|
+
attr_reader :domain_number
|
177
|
+
attr_reader :domain_sum
|
178
|
+
attr_reader :domain_c_e_value
|
179
|
+
attr_reader :domain_i_e_value
|
180
|
+
attr_reader :domain_score
|
181
|
+
attr_reader :domain_bias
|
182
|
+
attr_reader :hmm_coord_from
|
183
|
+
attr_reader :hmm_coord_to
|
184
|
+
attr_reader :ali_coord_from
|
185
|
+
attr_reader :ali_coord_to
|
186
|
+
attr_reader :env_coord_from
|
187
|
+
attr_reader :env_coord_to
|
188
|
+
attr_reader :acc
|
189
|
+
|
190
|
+
end # class PerDomainHit
|
191
|
+
end # class HMMER3
|
192
|
+
end # class HMMER
|
193
|
+
end # module Bio
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord
|
2
|
+
# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target
|
3
|
+
#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
|
4
|
+
#comment PF08016.5 426 jgi|Nemve1|7|gw.28.1.1 - 1604 5.9e-19 67.4 70.5 1 8 0.00053 1.1 7.3 0.4 220 264 142 191 134 200 0.73 Polycystin cation channel
|
5
|
+
Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 1 4 1.5e-41 3e-38 130.8 11.1 3 171 140 307 139 346 0.81 Ion transport protein
|
6
|
+
Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 2 4 9.1e-45 1.8e-41 141.3 13.1 4 200 479 664 476 665 0.97 Ion transport protein
|
7
|
+
Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 3 4 5.2e-45 1e-41 142.1 14.0 1 201 900 1117 900 1117 0.96 Ion transport protein
|
8
|
+
Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 4 4 9.2e-51 1.8e-47 160.9 11.3 1 201 1217 1423 1217 1423 0.97 Ion transport protein
|
9
|
+
PKD_channel PF08016.5 426 jgi|Nemve1|7|gw.28.1.1 - 1604 5.9e-19 67.4 70.5 1 8 0.00053 1.1 7.3 0.4 220 264 142 191 134 200 0.73 Polycystin cation channel
|
@@ -0,0 +1,4 @@
|
|
1
|
+
# --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
|
2
|
+
# target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target
|
3
|
+
#------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- ---------------------
|
4
|
+
wnt PF00110.12 sp|P56705|WNT4_HUMAN - 9.7e-130 418.5 23.4 1.1e-129 418.3 16.2 1.0 1 0 0 1 1 1 1 wnt family
|
@@ -0,0 +1,5 @@
|
|
1
|
+
# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord
|
2
|
+
# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target
|
3
|
+
#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
|
4
|
+
wnt PF00110.12 310 sp|P56705|WNT4_HUMAN - 351 1.2e-125 418.5 23.4 1 1 2.2e-129 1.3e-125 418.3 16.2 2 310 43 351 42 351 0.97 wnt family
|
5
|
+
MEF2_binding PF09047.3 35 sp|P56705|WNT4_HUMAN - 351 0.17 11.2 1.0 1 1 6.4e-05 0.38 10.1 0.7 3 21 31 50 30 51 0.84 MEF2 binding
|
@@ -0,0 +1,3 @@
|
|
1
|
+
# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord
|
2
|
+
# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target
|
3
|
+
#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
|
@@ -0,0 +1,4 @@
|
|
1
|
+
# --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
|
2
|
+
# target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target
|
3
|
+
#------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- ---------------------
|
4
|
+
wnt PF00110.12 sp|P56705|WNT4_HUMAN - 9.7e-130 418.5 23.4 1.1e-129 418.3 16.2 1.0 1 2 3 4 5 6 7 wnt family
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-hmmer3_report'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
@@ -0,0 +1,240 @@
|
|
1
|
+
#
|
2
|
+
#
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2011
|
5
|
+
# Christian Zmasek <cmzmasek@yahoo.com>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
require 'helper'
|
11
|
+
|
12
|
+
module Bio
|
13
|
+
module Testreport
|
14
|
+
|
15
|
+
HMMER_TEST_DATA = Pathname.new(File.join('test','data','HMMER')).cleanpath.to_s
|
16
|
+
|
17
|
+
def self.hmmsearch_domtblout_empty
|
18
|
+
File.open(File.join HMMER_TEST_DATA, 'hmmsearch_domtblout_empty.out')
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.hmmsearch_domtblout
|
22
|
+
File.open(File.join HMMER_TEST_DATA, 'hmmsearch_domtblout.out')
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.hmmsearch_tblout
|
26
|
+
File.open(File.join HMMER_TEST_DATA, 'hmmsearch_tblout.out')
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.hmmscan_domtblout
|
30
|
+
File.open(File.join HMMER_TEST_DATA, 'hmmscan_domtblout.out')
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.hmmscan_tblout
|
34
|
+
File.open(File.join HMMER_TEST_DATA, 'hmmscan_tblout.out')
|
35
|
+
end
|
36
|
+
|
37
|
+
end # Testreport
|
38
|
+
|
39
|
+
class Testreport_class_methods < Test::Unit::TestCase
|
40
|
+
|
41
|
+
|
42
|
+
def test_hmmsearch_domtblout_empty
|
43
|
+
filename = Testreport.hmmsearch_domtblout_empty
|
44
|
+
|
45
|
+
assert_instance_of(Bio::HMMER::HMMER3::Report,
|
46
|
+
report = Bio::HMMER::HMMER3::Report.new(filename))
|
47
|
+
|
48
|
+
assert_instance_of(Array,
|
49
|
+
report.hits)
|
50
|
+
|
51
|
+
assert_equal(0, report.hits.length)
|
52
|
+
end #
|
53
|
+
|
54
|
+
|
55
|
+
def test_hmmsearch_domtblout
|
56
|
+
filename = Testreport.hmmsearch_domtblout
|
57
|
+
|
58
|
+
assert_instance_of(Bio::HMMER::HMMER3::Report,
|
59
|
+
report = Bio::HMMER::HMMER3::Report.new(filename))
|
60
|
+
|
61
|
+
assert_instance_of(Array,
|
62
|
+
report.hits)
|
63
|
+
|
64
|
+
assert_instance_of(Bio::HMMER::HMMER3::PerDomainHit,
|
65
|
+
report.hits[0])
|
66
|
+
|
67
|
+
assert_kind_of(Bio::HMMER::HMMER3::Hit,
|
68
|
+
report.hits[0])
|
69
|
+
|
70
|
+
assert_equal(2,report.hits.length)
|
71
|
+
assert_equal("wnt", report.hits[0].target_name)
|
72
|
+
assert_equal("PF00110.12", report.hits[0].target_accession)
|
73
|
+
assert_equal(310, report.hits[0].target_length)
|
74
|
+
assert_equal("sp|P56705|WNT4_HUMAN", report.hits[0].query_name)
|
75
|
+
assert_equal("-", report.hits[0].query_accession)
|
76
|
+
assert_equal(351, report.hits[0].query_length)
|
77
|
+
assert_equal(1.2e-125, report.hits[0].full_sequence_e_value)
|
78
|
+
assert_equal(418.5, report.hits[0].full_sequence_score)
|
79
|
+
assert_equal(23.4, report.hits[0].full_sequence_bias)
|
80
|
+
assert_equal(1, report.hits[0].domain_number)
|
81
|
+
assert_equal(1, report.hits[0].domain_sum)
|
82
|
+
assert_equal(2.2e-129, report.hits[0].domain_c_e_value)
|
83
|
+
assert_equal(1.3e-125, report.hits[0].domain_i_e_value)
|
84
|
+
assert_equal(418.3, report.hits[0].domain_score)
|
85
|
+
assert_equal(16.2, report.hits[0].domain_bias)
|
86
|
+
assert_equal(2, report.hits[0].hmm_coord_from)
|
87
|
+
assert_equal(310, report.hits[0].hmm_coord_to)
|
88
|
+
assert_equal(43, report.hits[0].ali_coord_from)
|
89
|
+
assert_equal(351, report.hits[0].ali_coord_to)
|
90
|
+
assert_equal(42, report.hits[0].env_coord_from)
|
91
|
+
assert_equal(351, report.hits[0].env_coord_to)
|
92
|
+
assert_equal(0.97, report.hits[0].acc)
|
93
|
+
assert_equal("wnt family", report.hits[0].target_description)
|
94
|
+
assert_equal("MEF2_binding", report.hits[1].target_name)
|
95
|
+
assert_equal(0.17, report.hits[1].full_sequence_e_value)
|
96
|
+
assert_equal(6.4e-05, report.hits[1].domain_c_e_value)
|
97
|
+
assert_equal("MEF2 binding", report.hits[1].target_description)
|
98
|
+
|
99
|
+
end # test_hmmsearch_domtblout
|
100
|
+
|
101
|
+
def test_hmmsearch_tblout
|
102
|
+
|
103
|
+
filename = Testreport.hmmsearch_tblout
|
104
|
+
|
105
|
+
assert_instance_of(Bio::HMMER::HMMER3::Report,
|
106
|
+
report = Bio::HMMER::HMMER3::Report.new(filename))
|
107
|
+
|
108
|
+
assert_instance_of(Array,
|
109
|
+
report.hits)
|
110
|
+
|
111
|
+
assert_instance_of(Bio::HMMER::HMMER3::PerSequenceHit,
|
112
|
+
report.hits[0])
|
113
|
+
|
114
|
+
assert_kind_of(Bio::HMMER::HMMER3::Hit,
|
115
|
+
report.hits[0])
|
116
|
+
|
117
|
+
assert_equal(1,report.hits.length)
|
118
|
+
assert_equal("wnt", report.hits[0].target_name)
|
119
|
+
assert_equal("PF00110.12", report.hits[0].target_accession)
|
120
|
+
assert_equal("sp|P56705|WNT4_HUMAN", report.hits[0].query_name)
|
121
|
+
assert_equal("-", report.hits[0].query_accession)
|
122
|
+
assert_equal(9.7e-130, report.hits[0].full_sequence_e_value)
|
123
|
+
assert_equal(418.5, report.hits[0].full_sequence_score)
|
124
|
+
assert_equal(23.4, report.hits[0].full_sequence_bias)
|
125
|
+
assert_equal(1.1e-129, report.hits[0].best_1_domain_e_value)
|
126
|
+
assert_equal(418.3, report.hits[0].best_1_domain_score)
|
127
|
+
assert_equal(16.2, report.hits[0].best_1_domain_bias)
|
128
|
+
assert_equal(1.0, report.hits[0].domain_number_est_exp)
|
129
|
+
assert_equal(1, report.hits[0].domain_number_est_reg)
|
130
|
+
assert_equal(2, report.hits[0].domain_number_est_clu)
|
131
|
+
assert_equal(3, report.hits[0].domain_number_est_ov)
|
132
|
+
assert_equal(4, report.hits[0].domain_number_est_env)
|
133
|
+
assert_equal(5, report.hits[0].domain_number_est_dom)
|
134
|
+
assert_equal(6, report.hits[0].domain_number_est_rep)
|
135
|
+
assert_equal(7, report.hits[0].domain_number_est_inc)
|
136
|
+
assert_equal("wnt family", report.hits[0].target_description)
|
137
|
+
|
138
|
+
end # test_hmmsearch_tblout
|
139
|
+
|
140
|
+
def test_hmmscan_domtblout
|
141
|
+
|
142
|
+
filename = Testreport.hmmscan_domtblout
|
143
|
+
|
144
|
+
assert_instance_of(Bio::HMMER::HMMER3::Report,
|
145
|
+
report = Bio::HMMER::HMMER3::Report.new(filename))
|
146
|
+
|
147
|
+
assert_instance_of(Array,
|
148
|
+
report.hits)
|
149
|
+
|
150
|
+
assert_instance_of(Bio::HMMER::HMMER3::PerDomainHit,
|
151
|
+
report.hits[0])
|
152
|
+
|
153
|
+
assert_kind_of(Bio::HMMER::HMMER3::Hit,
|
154
|
+
report.hits[0])
|
155
|
+
|
156
|
+
assert_equal(5,report.hits.length)
|
157
|
+
assert_equal("Ion_trans", report.hits[0].target_name)
|
158
|
+
assert_equal("PF00520.24", report.hits[0].target_accession)
|
159
|
+
assert_equal(201, report.hits[0].target_length)
|
160
|
+
assert_equal("jgi|Nemve1|7|gw.28.1.1", report.hits[0].query_name)
|
161
|
+
assert_equal("-", report.hits[0].query_accession)
|
162
|
+
assert_equal(1604, report.hits[0].query_length)
|
163
|
+
assert_equal(6.3e-169, report.hits[0].full_sequence_e_value)
|
164
|
+
assert_equal(557.4, report.hits[0].full_sequence_score)
|
165
|
+
assert_equal(95.3, report.hits[0].full_sequence_bias)
|
166
|
+
assert_equal(1, report.hits[0].domain_number)
|
167
|
+
assert_equal(4, report.hits[0].domain_sum)
|
168
|
+
assert_equal(1.5e-41, report.hits[0].domain_c_e_value)
|
169
|
+
assert_equal(3e-38, report.hits[0].domain_i_e_value)
|
170
|
+
assert_equal(130.8, report.hits[0].domain_score)
|
171
|
+
assert_equal(11.1, report.hits[0].domain_bias)
|
172
|
+
assert_equal(3, report.hits[0].hmm_coord_from)
|
173
|
+
assert_equal(171, report.hits[0].hmm_coord_to)
|
174
|
+
assert_equal(140, report.hits[0].ali_coord_from)
|
175
|
+
assert_equal(307, report.hits[0].ali_coord_to)
|
176
|
+
assert_equal(139, report.hits[0].env_coord_from)
|
177
|
+
assert_equal(346, report.hits[0].env_coord_to)
|
178
|
+
assert_equal(0.81, report.hits[0].acc)
|
179
|
+
assert_equal("Ion transport protein", report.hits[0].target_description)
|
180
|
+
end # test_hmmscan_domtblout
|
181
|
+
|
182
|
+
def test_hmmscan_tblout
|
183
|
+
filename = Testreport.hmmscan_tblout
|
184
|
+
|
185
|
+
assert_instance_of(Bio::HMMER::HMMER3::Report,
|
186
|
+
report = Bio::HMMER::HMMER3::Report.new(filename))
|
187
|
+
|
188
|
+
assert_instance_of(Array,
|
189
|
+
report.hits)
|
190
|
+
|
191
|
+
assert_instance_of(Bio::HMMER::HMMER3::PerSequenceHit,
|
192
|
+
report.hits[0])
|
193
|
+
|
194
|
+
assert_kind_of(Bio::HMMER::HMMER3::Hit,
|
195
|
+
report.hits[0])
|
196
|
+
|
197
|
+
assert_equal(1,report.hits.length)
|
198
|
+
assert_equal("wnt", report.hits[0].target_name)
|
199
|
+
assert_equal("PF00110.12", report.hits[0].target_accession)
|
200
|
+
assert_equal("sp|P56705|WNT4_HUMAN", report.hits[0].query_name)
|
201
|
+
assert_equal("-", report.hits[0].query_accession)
|
202
|
+
assert_equal(9.7e-130, report.hits[0].full_sequence_e_value)
|
203
|
+
assert_equal(418.5, report.hits[0].full_sequence_score)
|
204
|
+
assert_equal(23.4, report.hits[0].full_sequence_bias)
|
205
|
+
assert_equal(1.1e-129, report.hits[0].best_1_domain_e_value)
|
206
|
+
assert_equal(418.3, report.hits[0].best_1_domain_score)
|
207
|
+
assert_equal(16.2, report.hits[0].best_1_domain_bias)
|
208
|
+
assert_equal(1.0, report.hits[0].domain_number_est_exp)
|
209
|
+
assert_equal(1, report.hits[0].domain_number_est_reg)
|
210
|
+
assert_equal(0, report.hits[0].domain_number_est_clu)
|
211
|
+
assert_equal(0, report.hits[0].domain_number_est_ov)
|
212
|
+
assert_equal(1, report.hits[0].domain_number_est_env)
|
213
|
+
assert_equal(1, report.hits[0].domain_number_est_dom)
|
214
|
+
assert_equal(1, report.hits[0].domain_number_est_rep)
|
215
|
+
assert_equal(1, report.hits[0].domain_number_est_inc)
|
216
|
+
assert_equal("wnt family", report.hits[0].target_description)
|
217
|
+
end # test_hmmscan_tblout
|
218
|
+
|
219
|
+
def test_string_input
|
220
|
+
data = String.new
|
221
|
+
data << '# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord'
|
222
|
+
data << "\n" << '# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target'
|
223
|
+
data << "\n" << '#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------'
|
224
|
+
data << "\n" << 'Bcl-2 PF00452.13 101 sp|P10415|BCL2_HUMAN - 239 3.7e-30 103.7 0.1 1 1 7.9e-34 4.9e-30 103.3 0.0 1 101 97 195 97 195 0.99 Apoptosis regulator proteins, Bcl-2 family'
|
225
|
+
data << "\n" << 'BH4 PF02180.11 27 sp|P10415|BCL2_HUMAN - 239 3.9e-15 54.6 0.1 1 1 1.3e-18 8.2e-15 53.6 0.1 2 26 8 32 7 33 0.94 Bcl-2 homology region 4'
|
226
|
+
data << "\n"
|
227
|
+
|
228
|
+
report = Bio::HMMER::HMMER3::Report.new(data)
|
229
|
+
hits = report.hits
|
230
|
+
hits.each do |hit|
|
231
|
+
assert_kind_of Bio::HMMER::HMMER3::PerDomainHit, hit
|
232
|
+
end
|
233
|
+
assert_equal 2, hits.length
|
234
|
+
assert_equal 'Bcl-2', hits[0].target_name
|
235
|
+
assert_equal 'BH4', hits[1].target_name
|
236
|
+
end
|
237
|
+
|
238
|
+
end # Testreport _class_methods
|
239
|
+
|
240
|
+
end # Bio
|
metadata
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-hmmer3_report
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Christian Zmasek
|
9
|
+
- Ben J Woodcroft
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2012-05-18 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: shoulda
|
17
|
+
requirement: &76510780 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *76510780
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rdoc
|
28
|
+
requirement: &76510420 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.12'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *76510420
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: jeweler
|
39
|
+
requirement: &76510040 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ~>
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 1.8.3
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *76510040
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: bundler
|
50
|
+
requirement: &76509650 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 1.0.21
|
56
|
+
type: :development
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: *76509650
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: bio
|
61
|
+
requirement: &76524960 !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 1.4.2
|
67
|
+
type: :development
|
68
|
+
prerelease: false
|
69
|
+
version_requirements: *76524960
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rdoc
|
72
|
+
requirement: &76524250 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '3.12'
|
78
|
+
type: :development
|
79
|
+
prerelease: false
|
80
|
+
version_requirements: *76524250
|
81
|
+
description: Enables parsing of HMMER version 3 reports
|
82
|
+
email: gmail.com after donttrustben
|
83
|
+
executables: []
|
84
|
+
extensions: []
|
85
|
+
extra_rdoc_files:
|
86
|
+
- LICENSE.txt
|
87
|
+
- README.md
|
88
|
+
files:
|
89
|
+
- .document
|
90
|
+
- .travis.yml
|
91
|
+
- Gemfile
|
92
|
+
- LICENSE.txt
|
93
|
+
- README.md
|
94
|
+
- Rakefile
|
95
|
+
- VERSION
|
96
|
+
- lib/bio-hmmer3_report.rb
|
97
|
+
- lib/bio/appl/hmmer/hmmer3/report.rb
|
98
|
+
- test/data/HMMER/hmmscan_domtblout.out
|
99
|
+
- test/data/HMMER/hmmscan_tblout.out
|
100
|
+
- test/data/HMMER/hmmsearch_domtblout.out
|
101
|
+
- test/data/HMMER/hmmsearch_domtblout_empty.out
|
102
|
+
- test/data/HMMER/hmmsearch_tblout.out
|
103
|
+
- test/helper.rb
|
104
|
+
- test/unit/bio/appl/hmmer/test_hmmer3report.rb
|
105
|
+
homepage: http://github.com/wwood/bioruby-hmmer3_report
|
106
|
+
licenses:
|
107
|
+
- MIT
|
108
|
+
post_install_message:
|
109
|
+
rdoc_options: []
|
110
|
+
require_paths:
|
111
|
+
- lib
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
segments:
|
119
|
+
- 0
|
120
|
+
hash: -284128881
|
121
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
|
+
none: false
|
123
|
+
requirements:
|
124
|
+
- - ! '>='
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
requirements: []
|
128
|
+
rubyforge_project:
|
129
|
+
rubygems_version: 1.8.17
|
130
|
+
signing_key:
|
131
|
+
specification_version: 3
|
132
|
+
summary: Enables parsing of HMMER version 3 reports
|
133
|
+
test_files: []
|