bio-hmmer3_report 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9279db9072373e76e70581cc5b0936a0cd229191
4
+ data.tar.gz: 2ea741942401e70f4fb1ba155a0f351c4d527d4c
5
+ SHA512:
6
+ metadata.gz: 730dea0d04c44f88c23a75c47fa50c692d18310b1b71df36806616eebdfbb5608fc9fc38319d67118c0a91cee6abc61904c951eb630f9e40282e659690cae4f9
7
+ data.tar.gz: 7a3a4effa90354ba7a1caba1167d95349b514164d95d142f5cb10e3b98b842eade5ab1a6a035414c4ca05ab23254247b2de29c56685bbd6613564d2ef3214716
data/Gemfile CHANGED
@@ -2,14 +2,16 @@ source "http://rubygems.org"
2
2
  # Add dependencies required to use your gem here.
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
+ gem 'bio-logger', '~> 1.0'
5
6
 
6
7
  # Add dependencies to develop your gem here.
7
8
  # Include everything needed to run rake, tests, features, etc.
8
9
  group :development do
9
- gem "shoulda", ">= 0"
10
- gem "rdoc", "~> 3.12"
11
- gem "jeweler", "~> 1.8.3"
12
- gem "bundler", ">= 1.0.21"
13
- gem "bio", ">= 1.4.2"
10
+ gem 'shoulda', '~> 3.5', '>= 3.5.0'
14
11
  gem "rdoc", "~> 3.12"
12
+ gem 'jeweler', '~> 1.8', '>= 1.8.3'
13
+ gem 'bundler', '~> 1.6', '>= 1.6.2'
14
+ gem 'bio', '~> 1.4', '>= 1.4.2'
15
+ gem 'minitest', '~> 4.7', '>= 4.7.5'
15
16
  end
17
+
data/Rakefile CHANGED
@@ -18,7 +18,7 @@ Jeweler::Tasks.new do |gem|
18
18
  gem.homepage = "http://github.com/wwood/bioruby-hmmer3_report"
19
19
  gem.license = "MIT"
20
20
  gem.summary = %Q{Enables parsing of HMMER version 3 reports}
21
- gem.description = %Q{Enables parsing of HMMER version 3 reports}
21
+ gem.description = %Q{Enables programmatic parsing of HMMER version 3 reports}
22
22
  gem.email = "gmail.com after donttrustben"
23
23
  gem.authors = ["Christian Zmasek","Ben J Woodcroft"]
24
24
  # dependencies defined in Gemfile
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.1.0
@@ -1,2 +1,4 @@
1
- require 'bio/appl/hmmer/hmmer3/report'
1
+ require 'bio-logger'
2
+ Bio::Log::LoggerPlus.new('bio-hmmer3report')
2
3
 
4
+ require 'bio/appl/hmmer/hmmer3/default_report'
@@ -0,0 +1,226 @@
1
+
2
+ require 'bio/appl/hmmer/hmmer3/tabular_report'
3
+
4
+
5
+ module Bio
6
+ class HMMER
7
+ class HMMER3
8
+
9
+ def self.reports(multiple_report_text, options={})
10
+ if [:domtblout, :tblout].include?(options[:format])
11
+ return TabularReport.new(multiple_report_text, options[:format])
12
+ else
13
+ ary = []
14
+ multiple_report_text.each_line("\n//\n") do |report|
15
+ if block_given?
16
+ yield DefaultHMMSearchReport.new(report)
17
+ else
18
+ ary << DefaultHMMSearchReport.new(report)
19
+ end
20
+ end
21
+ return ary
22
+ end
23
+ end
24
+
25
+ # This class is for parsing HMMSearch outputs from the default output
26
+ class DefaultHMMSearchReport
27
+ # Delimiter of each entry for Bio::FlatFile support.
28
+ DELIMITER = RS = "\n//\n"
29
+
30
+ def initialize(data)
31
+ # The input data is divided into chunks, a hash called @report_chunks (previously called subdata)
32
+ @report_chunks = get_subdata(data)
33
+
34
+ @log = Bio::Log::LoggerPlus['bio-hmmer3report']
35
+ end
36
+
37
+ # Return the report split up into chunks, so that those chunks can be further
38
+ # processed. Chunks are returned as a hash
39
+ def get_subdata(data)
40
+ subdata = {}
41
+ header_prefix = '\Ahmmsearch :: search' ## # hmmsearch :: search profile(s) against a sequence database
42
+ query_prefix = '^Query:' ## Query: 2-Hacid_dh [M=133]
43
+ hit_prefix = '^Scores for complete sequences' ## Scores for complete sequences (score includes all domains):
44
+ aln_prefix = '^Domain annotation for each sequence' ## Domain annotation for each sequence (and alignments):
45
+ stat_prefix = '^\nInternal pipeline statistics summary:' ## Internal pipeline statistics summary:
46
+
47
+ # if header exists, get it. Header only occurs in the first report
48
+ if data =~ /#{header_prefix}/
49
+ subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
50
+ end
51
+
52
+ # split rest of report into sub-sections
53
+ subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
54
+ subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{aln_prefix})/m]
55
+ subdata["alignment"] = data[/(#{aln_prefix}.+?)(?=#{stat_prefix})/m]
56
+ subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
57
+
58
+ return subdata
59
+ end
60
+ private :get_subdata
61
+
62
+ # Parse the Query:, Accession: and :Description parts of the
63
+ def parse_query
64
+ @report_chunks['query'].each_line do |line|
65
+ splits = line.split(':')
66
+ raise "Unexpected form of query header found in hmmsearch query chunk #{line.inspect}" unless splits.length>1
67
+ key = splits[0]
68
+ value = splits[1..(splits.length-1)].join(':').strip #in case there is colons in the value itself
69
+
70
+ if key == 'Query'
71
+ @query = value
72
+ elsif key == 'Accession'
73
+ @query_accession = value
74
+ elsif key == 'Description'
75
+ @query_description = value
76
+ else
77
+ raise "Unexpected form of query header found in hmmsearch query chunk #{line.inspect}"
78
+ end
79
+ end
80
+ end
81
+ private :parse_query
82
+
83
+ def query
84
+ return @query unless @query.nil?
85
+ parse_query
86
+ return @query
87
+ end
88
+
89
+ def query_accession
90
+ return @query_accession unless @query_accession.nil?
91
+ parse_query
92
+ return @query_accession
93
+ end
94
+
95
+ def query_description
96
+ return @query_description unless @query_description.nil?
97
+ parse_query
98
+ return @query_description
99
+ end
100
+
101
+ # TODO: parse statistical information
102
+
103
+ # TODO: parse sequence-wise hits
104
+ # (these can be derived from the domain_hits i.e. #hits, mind you, so ..).
105
+ # Ah, actually in rare cases when this happens that isn't true:
106
+ # " [No individual domains that satisfy reporting thresholds (although complete target did)]"
107
+
108
+ # Return an array of HMMER3::Hit objects from this report
109
+ def hits
110
+ return [] unless @report_chunks['alignment'].match(/^>>/)
111
+ # For each hit sequence (hits)
112
+ sequence_annotations = @report_chunks['alignment'].split(">>")
113
+ # puts "Found #{sequence_annotations.length} different hits e.g. #{sequence_annotations[0]}\n\n and #{sequence_annotations[1]} and \n\n #{sequence_annotations[sequence_annotations.length-1]}"
114
+
115
+ alignments = []
116
+ sequence_annotations.each_with_index do |seq_annot, i|
117
+ #Ignore the first split as it is rubbish leftover from the split above
118
+ next if i==0
119
+
120
+ # Now split on \n\n. Each of these splits should have 1 or more domains associated
121
+
122
+ #First of this split will be "stanzas" like this
123
+ #>> 637984252 Acid345_2236 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding [Korebacter versatilis Ellin345]
124
+ # # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
125
+ # --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ----
126
+ # 1 ! 120.2 0.5 7.6e-39 1.2e-35 1 133 [] 3 313 .. 3 313 .. 0.98
127
+
128
+ #And AFTER the first split comes "stanzas" like this
129
+ # Alignments for each domain:
130
+ # == domain 1 score: 120.2 bits; conditional E-value: 7.6e-39
131
+ # EEECST.-CCHHHHHCC..TEEEEEEC.GSSHHHHHC....-SEEEE-TTS-BSHHHHCC-TT--EEEES----TTB-HHHHHH---EEE--TTTTHHH CS
132
+ # xxxxxxxxxxxxxxxxx..xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx RF
133
+ # 2-Hacid_dh 1 vlileplreeelellke..gvevevkd.ellteellekakdadalivrsntkvtaevleklpkLkviatagvGvDniDldaakerGIlVtnvpgystes 96
134
+ # +++ e++ +++++l+k+ +v++ d ++e+lle++k+adalivrs v+a++le++ +L+vi++agvGvDni l+aa+++GI+V+n+pg+++ +
135
+ # 637982115 3 IVVAEKIAKAAIDLFKQdpTWNVVTPDqVAQKEQLLEQLKGADALIVRSAVFVDAAMLEHADQLRVIGRAGVGVDNIELEAATRKGIAVMNTPGANAIA 101
136
+ # 6889999**********8544777777778888****************************************************************** PP
137
+ stanzas = seq_annot.split("\n\n")
138
+
139
+ hit = Hit.new
140
+ hsps = []
141
+
142
+ # Parse the first
143
+ lines = stanzas[0].split("\n")
144
+ sequence_name = lines[0].gsub(/^ /,'')
145
+ hit.sequence_name = sequence_name
146
+
147
+ @log.debug "Now parsing table for #{sequence_name}"
148
+ return [] if lines[1].match(/^ \[No individual domains that/)
149
+
150
+ lines[3..(lines.length-1)].each do |line|
151
+ annotation = Hit::Hsp.new
152
+
153
+ splits = line.split(/\s+/)
154
+ i = 1
155
+ annotation.number = splits[i].to_i; i+=2
156
+ annotation.score = splits[i].to_f; i+=1
157
+ annotation.bias = splits[i].to_f; i+=1
158
+ annotation.c_evalue = splits[i].to_f; i+=1
159
+ annotation.i_evalue = splits[i].to_f; i+=1
160
+ annotation.hmmfrom = splits[i].to_i; i+=1
161
+ annotation.hmm_to = splits[i].to_i; i+=2
162
+ annotation.alifrom = splits[i].to_i; i+=1
163
+ annotation.ali_to = splits[i].to_i; i+=2
164
+ annotation.envfrom = splits[i].to_i; i+=1
165
+ annotation.env_to = splits[i].to_i; i+=2
166
+ annotation.acc = splits[i].to_f
167
+ hsps.push annotation
168
+ end
169
+
170
+ # Parse the second stanza and beyond
171
+ current_hsp = nil
172
+ (1..(stanzas.length-1)).each_with_index do |aln, index|
173
+ next if stanzas[aln] == "\n"
174
+ stanza = stanzas[aln].split("\n")
175
+ line_offset = 0
176
+ line_offset += 1 if index==0 #to account for the "Alignments for each domain:" line
177
+ # Is this a new HSP being described?
178
+ if matches = stanza[line_offset].match(/^ == domain (\d+)/)
179
+ domain_index = matches[1].to_i-1
180
+ current_hsp = hsps[domain_index]
181
+ line_offset += 1 # to account for the "== domain 1 score: 26.8 bits; conditional E-value: 5.7e-10" line
182
+ end
183
+
184
+ # Detect CS and RF lines
185
+ line_offset += 1 if stanza[line_offset].split(/\s+/)[2] == 'CS'
186
+ line_offset += 1 if stanza[line_offset].split(/\s+/)[2] == 'RF'
187
+
188
+ # Add the lines to the relevant places
189
+ current_hsp.hmmseq ||= ''
190
+ current_hsp.hmmseq += stanza[line_offset].split(/\s+/)[3]
191
+ current_hsp.flatseq ||= ''
192
+ current_hsp.flatseq += stanza[2+line_offset].split(/\s+/)[3]
193
+ end
194
+
195
+ hit.hsps = hsps
196
+ alignments.push hit
197
+
198
+ @log.debug "Parsed alignments for sequence #{hit.sequence_name}" if @log.debug?
199
+ end
200
+
201
+ return alignments
202
+ end
203
+
204
+ # TODO: There is some overlapping code here between the tabular report Hit object and this object, probably should DRY it up a bit.
205
+ class Hit
206
+ attr_accessor :sequence_name
207
+
208
+ attr_accessor :hsps
209
+
210
+ def initialise
211
+ @hsps = []
212
+ end
213
+
214
+ class Hsp
215
+ attr_accessor :number, :score, :bias, :c_evalue, :i_evalue, :hmmfrom, :hmm_to, :alifrom, :ali_to, :envfrom, :env_to, :acc
216
+
217
+ attr_accessor :hmmseq, :flatseq
218
+ end # class DomainHitAnnotation
219
+ end # class DomainHitAnnotation
220
+
221
+ end #class DefaultHMMSearchReport
222
+ end # class HMMER3
223
+
224
+ end # class HMMER
225
+
226
+ end # module Bio
@@ -16,7 +16,7 @@ module Bio
16
16
  #
17
17
  # Parser class for hmmsearch and hmmscan in the HMMER 3 package. See README of this biogem for more information.
18
18
  class HMMER3
19
- class Report
19
+ class TabularReport
20
20
  def initialize(hmmer_output, format = nil)
21
21
 
22
22
  @hits = Array.new
@@ -0,0 +1,39 @@
1
+ # hmmsearch :: search profile(s) against a sequence database
2
+ # HMMER 3.0 (March 2010); http://hmmer.org/
3
+ # Copyright (C) 2010 Howard Hughes Medical Institute.
4
+ # Freely distributed under the GNU General Public License (GPLv3).
5
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6
+ # query HMM file: /srv/whitlam/bio/db/pfam/26/Pfam-A.hmm
7
+ # target sequence database: /srv/whitlam/bio/db/img/3.5/genomes/finished//637000007/637000007.genes.faa
8
+ # model-specific thresholding: TC cutoffs
9
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
10
+
11
+ Query: 3Beta_HSD [M=280]
12
+ Accession: PF01073.14
13
+ Description: 3-beta hydroxysteroid dehydrogenase/isomerase family
14
+ Scores for complete sequences (score includes all domains):
15
+ --- full sequence --- --- best 1 domain --- -#dom-
16
+ E-value score bias E-value score bias exp N Sequence Description
17
+ ------- ------ ----- ------- ------ ----- ---- -- -------- -----------
18
+ 1.8e-06 25.1 0.0 0.0051 13.8 0.2 3.0 0 637862688 Adeh_1165 Long-chain-fatty-acid CoA ligase [Anaero
19
+
20
+
21
+ Domain annotation for each sequence (and alignments):
22
+ >> 637862688 Adeh_1165 Long-chain-fatty-acid CoA ligase [Anaeromyxobacter dehalogenans 2CP-C]
23
+ [No individual domains that satisfy reporting thresholds (although complete target did)]
24
+
25
+
26
+
27
+ Internal pipeline statistics summary:
28
+ -------------------------------------
29
+ Query model(s): 1 (280 nodes)
30
+ Target sequences: 4361 (1522812 residues)
31
+ Passed MSV filter: 103 (0.0236184); expected 87.2 (0.02)
32
+ Passed bias filter: 102 (0.0233891); expected 87.2 (0.02)
33
+ Passed Vit filter: 43 (0.00986012); expected 4.4 (0.001)
34
+ Passed Fwd filter: 28 (0.00642055); expected 0.0 (1e-05)
35
+ Initial search space (Z): 4361 [actual number of targets]
36
+ Domain search space (domZ): 22 [number of targets reported over threshold]
37
+ # CPU time: 0.29u 0.04s 00:00:00.32 Elapsed: 00:00:00.14
38
+ # Mc/sec: 3045.62
39
+ //
@@ -0,0 +1,100 @@
1
+ # hmmsearch :: search profile(s) against a sequence database
2
+ # HMMER 3.0 (March 2010); http://hmmer.org/
3
+ # Copyright (C) 2010 Howard Hughes Medical Institute.
4
+ # Freely distributed under the GNU General Public License (GPLv3).
5
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6
+ # query HMM file: /srv/whitlam/bio/db/pfam/26/Pfam-A.hmm
7
+ # target sequence database: /srv/whitlam/bio/db/img/3.5/genomes/finished//637000001/637000001.genes.faa
8
+ # model-specific thresholding: TC cutoffs
9
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
10
+
11
+ Query: 3HCDH [M=97]
12
+ Accession: PF00725.17
13
+ Description: 3-hydroxyacyl-CoA dehydrogenase, C-terminal domain
14
+ Scores for complete sequences (score includes all domains):
15
+ --- full sequence --- --- best 1 domain --- -#dom-
16
+ E-value score bias E-value score bias exp N Sequence Description
17
+ ------- ------ ----- ------- ------ ----- ---- -- -------- -----------
18
+ 1.7e-36 122.8 0.0 3.6e-36 121.8 0.0 1.6 1 637983221 Acid345_1212 3-hydroxybutyryl-CoA dehydrogenase [K
19
+ 2.4e-36 122.4 0.1 8.3e-36 120.7 0.0 1.9 1 637986361 Acid345_4328 3-hydroxybutyryl-CoA dehydrogenase [K
20
+ 5.7e-24 82.7 0.1 1.3e-14 52.7 0.0 3.4 2 637984486 Acid345_2469 3-hydroxyacyl-CoA dehydrogenase, NAD-
21
+ 5.3e-20 70.0 0.0 1.2e-19 68.8 0.0 1.6 1 637984778 Acid345_2761 3-hydroxybutyryl-CoA dehydrogenase [K
22
+
23
+
24
+ Domain annotation for each sequence (and alignments):
25
+ >> 637983221 Acid345_1212 3-hydroxybutyryl-CoA dehydrogenase [Korebacter versatilis Ellin345]
26
+ # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
27
+ --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ----
28
+ 1 ! 121.8 0.0 3e-39 3.6e-36 1 97 [] 185 281 .. 185 281 .. 1.00
29
+
30
+ Alignments for each domain:
31
+ == domain 1 score: 121.8 bits; conditional E-value: 3e-39
32
+ ---HHHHHHHHHHHHHHHHHTTSS-HHHHHHHHHH.-------HHHHHH---HHHHHHHHHHHCTCCCHGG-..HHHHHHHHTT-----------EC CS
33
+ 3HCDH 1 GFvvnRvlapllneairlveegvatpediDaamkkglGlpmGpfelsdlvgldvakkilevlaeelgerayapsplleklveagrlgrktgkgfyky 97
34
+ GF+vnR+l+p+l +air +eegv ++ diD+amk+g+G+pmGpf+l+d vgld++++i++v+ +e+ e+++a++pll+++v ag +grktgkgfy+y
35
+ 637983221 185 GFIVNRLLVPYLLDAIRAYEEGVGSIVDIDQAMKLGCGYPMGPFTLLDFVGLDTCYYITHVMFDEFREKRFAAPPLLKRMVLAGWYGRKTGKGFYDY 281
36
+ 9**********************************************************************************************99 PP
37
+
38
+ >> 637986361 Acid345_4328 3-hydroxybutyryl-CoA dehydrogenase [Korebacter versatilis Ellin345]
39
+ # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
40
+ --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ----
41
+ 1 ! 120.7 0.0 7e-39 8.3e-36 1 97 [] 186 282 .] 186 282 .] 1.00
42
+
43
+ Alignments for each domain:
44
+ == domain 1 score: 120.7 bits; conditional E-value: 7e-39
45
+ ---HHHHHHHHHHHHHHHHHTTSS-HHHHHHHHHH.-------HHHHHH---HHHHHHHHHHHCTCCCHGG-..HHHHHHHHTT-----------EC CS
46
+ 3HCDH 1 GFvvnRvlapllneairlveegvatpediDaamkkglGlpmGpfelsdlvgldvakkilevlaeelgerayapsplleklveagrlgrktgkgfyky 97
47
+ GFv nRvl+pllnea+ v egvatpe +D+++k+g+ +pmGp++l+d +gldv+ +i++vl+ +lg+++y+p+pll k+v+ag lgrk+g+gfyky
48
+ 637986361 186 GFVSNRVLMPLLNEAMYAVMEGVATPEAVDEVFKLGMAHPMGPLTLADFIGLDVCLDIMRVLQTGLGDPKYRPCPLLIKMVDAGWLGRKSGRGFYKY 282
49
+ 9***********************************************************************************************9 PP
50
+
51
+ >> 637984486 Acid345_2469 3-hydroxyacyl-CoA dehydrogenase, NAD-binding [Korebacter versatilis Ellin345]
52
+ # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
53
+ --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ----
54
+ 1 ! 52.7 0.0 1.1e-17 1.3e-14 2 96 .. 196 293 .. 196 294 .. 0.97
55
+ 2 ! 25.6 0.0 3.2e-09 3.8e-06 24 61 .. 378 415 .. 359 435 .. 0.78
56
+
57
+ Alignments for each domain:
58
+ == domain 1 score: 52.7 bits; conditional E-value: 1.1e-17
59
+ --HHHHHHHHHHHHHHHHHTTSS-HHHHHHHHHH.----.---HHHHHH---HHHHHHHHHHHCTCCCH..GG-..HHHHHHHHTT-----------E CS
60
+ 3HCDH 2 FvvnRvlapllneairlveegvatpediDaamkkglGlp.mGpfelsdlvgldvakkilevlaeelger..ayapsplleklveagrlgrktgkgfyk 96
61
+ F+ nR+ + ++ir+++e +++ed+Da++ ++G+p ++f+++dlvgld+ ++ +++ +++ + + +++ ++++e + lg kt+ gfyk
62
+ 637984486 196 FIGNRIGTFSVLNVIRVMQEMDLSIEDVDALTGSAVGWPkSATFRTIDLVGLDILGHVVGNMKQNVTDErsDLQIPDFYKQMLERKWLGDKTKGGFYK 293
63
+ 9**************************************999******************999999999899999**********************8 PP
64
+
65
+ == domain 2 score: 25.6 bits; conditional E-value: 3.2e-09
66
+ S-HHHHHHHHHH.-------HHHHHH---HHHHHHHHH CS
67
+ 3HCDH 24 atpediDaamkkglGlpmGpfelsdlvgldvakkilev 61
68
+ t+ +iDaam+ g+++ mGpfel d +g+++ +++
69
+ 637984486 378 DTIVEIDAAMRMGFNWEMGPFELWDAAGVEATVGRMKA 415
70
+ 5789*************************998877763 PP
71
+
72
+ >> 637984778 Acid345_2761 3-hydroxybutyryl-CoA dehydrogenase [Korebacter versatilis Ellin345]
73
+ # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
74
+ --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ----
75
+ 1 ! 68.8 0.0 1e-22 1.2e-19 3 85 .. 190 271 .. 188 275 .. 0.96
76
+
77
+ Alignments for each domain:
78
+ == domain 1 score: 68.8 bits; conditional E-value: 1e-22
79
+ -HHHHHHHHHHHHHHHHHTTSS-HHHHHHHHHH.-------HHHHHH---HHHHHHHHHHHCTCCCHGG-..HHHHHHHHTT- CS
80
+ 3HCDH 3 vvnRvlapllneairlveegvatpediDaamkkglGlpmGpfelsdlvgldvakkilevlaeelgerayapsplleklveagr 85
81
+ ++ R++a + nea++++ eg a++e+iD+a+++glGlpmGp++ ++ +gl+ +++e+l+++lge+ y+p+plle++v+a++
82
+ 637984778 190 ITARMQALISNEAFKMLGEGLASAEEIDRALQQGLGLPMGPIAEAEQYGLERRLRMMEYLHKTLGET-YRPAPLLEQYVKANK 271
83
+ 899***************************************************************9.899*********986 PP
84
+
85
+
86
+
87
+ Internal pipeline statistics summary:
88
+ -------------------------------------
89
+ Query model(s): 1 (97 nodes)
90
+ Target sequences: 4779 (1680477 residues)
91
+ Passed MSV filter: 142 (0.0297133); expected 95.6 (0.02)
92
+ Passed bias filter: 122 (0.0255284); expected 95.6 (0.02)
93
+ Passed Vit filter: 12 (0.00251099); expected 4.8 (0.001)
94
+ Passed Fwd filter: 4 (0.000836995); expected 0.0 (1e-05)
95
+ Initial search space (Z): 4779 [actual number of targets]
96
+ Domain search space (domZ): 4 [number of targets reported over threshold]
97
+ # CPU time: 0.09u 0.01s 00:00:00.10 Elapsed: 00:00:00.05
98
+ # Mc/sec: 3260.13
99
+ //
100
+
@@ -0,0 +1,58 @@
1
+ # hmmsearch :: search profile(s) against a sequence database
2
+ # HMMER 3.0 (March 2010); http://hmmer.org/
3
+ # Copyright (C) 2010 Howard Hughes Medical Institute.
4
+ # Freely distributed under the GNU General Public License (GPLv3).
5
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6
+ # query HMM file: /srv/whitlam/bio/db/pfam/26/Pfam-A.hmm
7
+ # target sequence database: /srv/whitlam/bio/db/img/3.5/genomes/finished//637000001/637000001.genes.faa
8
+ # model-specific thresholding: TC cutoffs
9
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
10
+
11
+ Query: 3HCDH [M=97]
12
+ Accession: PF00725.17
13
+ Description: 3-hydroxyacyl-CoA dehydrogenase, C-terminal domain
14
+ Scores for complete sequences (score includes all domains):
15
+ --- full sequence --- --- best 1 domain --- -#dom-
16
+ E-value score bias E-value score bias exp N Sequence Description
17
+ ------- ------ ----- ------- ------ ----- ---- -- -------- -----------
18
+ 5.7e-24 82.7 0.1 1.3e-14 52.7 0.0 3.4 2 637984486 Acid345_2469 3-hydroxyacyl-CoA dehydrogenase, NAD-
19
+
20
+
21
+ Domain annotation for each sequence (and alignments):
22
+ >> 637984486 Acid345_2469 3-hydroxyacyl-CoA dehydrogenase, NAD-binding [Korebacter versatilis Ellin345]
23
+ # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
24
+ --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ----
25
+ 1 ! 52.7 0.0 1.1e-17 1.3e-14 2 96 .. 196 293 .. 196 294 .. 0.97
26
+ 2 ! 25.6 0.0 3.2e-09 3.8e-06 24 61 .. 378 415 .. 359 435 .. 0.78
27
+
28
+ Alignments for each domain:
29
+ == domain 1 score: 52.7 bits; conditional E-value: 1.1e-17
30
+ --HHHHHHHHHHHHHHHHHTTSS-HHHHHHHHHH.----.---HHHHHH---HHHHHHHHHHHCTCCCH..GG-..HHHHHHHHTT-----------E CS
31
+ 3HCDH 2 FvvnRvlapllneairlveegvatpediDaamkkglGlp.mGpfelsdlvgldvakkilevlaeelger..ayapsplleklveagrlgrktgkgfyk 96
32
+ F+ nR+ + ++ir+++e +++ed+Da++ ++G+p ++f+++dlvgld+ ++ +++ +++ + + +++ ++++e + lg kt+ gfyk
33
+ 637984486 196 FIGNRIGTFSVLNVIRVMQEMDLSIEDVDALTGSAVGWPkSATFRTIDLVGLDILGHVVGNMKQNVTDErsDLQIPDFYKQMLERKWLGDKTKGGFYK 293
34
+ 9**************************************999******************999999999899999**********************8 PP
35
+
36
+ == domain 2 score: 25.6 bits; conditional E-value: 3.2e-09
37
+ S-HHHHHHHHHH.-------HHHHHH---HHHHHHHHH CS
38
+ 3HCDH 24 atpediDaamkkglGlpmGpfelsdlvgldvakkilev 61
39
+ t+ +iDaam+ g+++ mGpfel d +g+++ +++
40
+ 637984486 378 DTIVEIDAAMRMGFNWEMGPFELWDAAGVEATVGRMKA 415
41
+ 5789*************************998877763 PP
42
+
43
+
44
+
45
+ Internal pipeline statistics summary:
46
+ -------------------------------------
47
+ Query model(s): 1 (97 nodes)
48
+ Target sequences: 4779 (1680477 residues)
49
+ Passed MSV filter: 142 (0.0297133); expected 95.6 (0.02)
50
+ Passed bias filter: 122 (0.0255284); expected 95.6 (0.02)
51
+ Passed Vit filter: 12 (0.00251099); expected 4.8 (0.001)
52
+ Passed Fwd filter: 4 (0.000836995); expected 0.0 (1e-05)
53
+ Initial search space (Z): 4779 [actual number of targets]
54
+ Domain search space (domZ): 4 [number of targets reported over threshold]
55
+ # CPU time: 0.09u 0.01s 00:00:00.10 Elapsed: 00:00:00.05
56
+ # Mc/sec: 3260.13
57
+ //
58
+
@@ -0,0 +1,209 @@
1
+ # hmmsearch :: search profile(s) against a sequence database
2
+ # HMMER 3.0 (March 2010); http://hmmer.org/
3
+ # Copyright (C) 2010 Howard Hughes Medical Institute.
4
+ # Freely distributed under the GNU General Public License (GPLv3).
5
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6
+ # query HMM file: /srv/whitlam/bio/db/pfam/26/Pfam-A.hmm
7
+ # target sequence database: /srv/whitlam/bio/db/img/3.5/genomes/finished//637000001/637000001.genes.faa
8
+ # model-specific thresholding: TC cutoffs
9
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
10
+
11
+ Query: 1-cysPrx_C [M=40]
12
+ Accession: PF10417.4
13
+ Description: C-terminal domain of 1-Cys peroxiredoxin
14
+ Scores for complete sequences (score includes all domains):
15
+ --- full sequence --- --- best 1 domain --- -#dom-
16
+ E-value score bias E-value score bias exp N Sequence Description
17
+ ------- ------ ----- ------- ------ ----- ---- -- -------- -----------
18
+
19
+ [No hits detected that satisfy reporting thresholds]
20
+
21
+
22
+ Domain annotation for each sequence (and alignments):
23
+
24
+ [No targets detected that satisfy reporting thresholds]
25
+
26
+
27
+ Internal pipeline statistics summary:
28
+ -------------------------------------
29
+ Query model(s): 1 (40 nodes)
30
+ Target sequences: 4779 (1680477 residues)
31
+ Passed MSV filter: 169 (0.035363); expected 95.6 (0.02)
32
+ Passed bias filter: 142 (0.0297133); expected 95.6 (0.02)
33
+ Passed Vit filter: 10 (0.00209249); expected 4.8 (0.001)
34
+ Passed Fwd filter: 1 (0.000209249); expected 0.0 (1e-05)
35
+ Initial search space (Z): 4779 [actual number of targets]
36
+ Domain search space (domZ): 0 [number of targets reported over threshold]
37
+ # CPU time: 0.09u 0.01s 00:00:00.10 Elapsed: 00:00:00.06
38
+ # Mc/sec: 1120.32
39
+ //
40
+ Query: 120_Rick_ant [M=255]
41
+ Accession: PF12574.3
42
+ Description: 120 KDa Rickettsia surface antigen
43
+ Scores for complete sequences (score includes all domains):
44
+ --- full sequence --- --- best 1 domain --- -#dom-
45
+ E-value score bias E-value score bias exp N Sequence Description
46
+ ------- ------ ----- ------- ------ ----- ---- -- -------- -----------
47
+
48
+ [No hits detected that satisfy reporting thresholds]
49
+
50
+
51
+ Domain annotation for each sequence (and alignments):
52
+
53
+ [No targets detected that satisfy reporting thresholds]
54
+
55
+
56
+ Internal pipeline statistics summary:
57
+ -------------------------------------
58
+ Query model(s): 1 (255 nodes)
59
+ Target sequences: 4779 (1680477 residues)
60
+ Passed MSV filter: 109 (0.0228081); expected 95.6 (0.02)
61
+ Passed bias filter: 92 (0.0192509); expected 95.6 (0.02)
62
+ Passed Vit filter: 6 (0.00125549); expected 4.8 (0.001)
63
+ Passed Fwd filter: 0 (0); expected 0.0 (1e-05)
64
+ Initial search space (Z): 4779 [actual number of targets]
65
+ Domain search space (domZ): 0 [number of targets reported over threshold]
66
+ # CPU time: 0.12u 0.02s 00:00:00.13 Elapsed: 00:00:00.05
67
+ # Mc/sec: 8570.43
68
+ //
69
+ Query: 14-3-3 [M=236]
70
+ Accession: PF00244.15
71
+ Description: 14-3-3 protein
72
+ Scores for complete sequences (score includes all domains):
73
+ --- full sequence --- --- best 1 domain --- -#dom-
74
+ E-value score bias E-value score bias exp N Sequence Description
75
+ ------- ------ ----- ------- ------ ----- ---- -- -------- -----------
76
+
77
+ [No hits detected that satisfy reporting thresholds]
78
+
79
+
80
+ Domain annotation for each sequence (and alignments):
81
+
82
+ [No targets detected that satisfy reporting thresholds]
83
+
84
+
85
+ Internal pipeline statistics summary:
86
+ -------------------------------------
87
+ Query model(s): 1 (236 nodes)
88
+ Target sequences: 4779 (1680477 residues)
89
+ Passed MSV filter: 150 (0.0313873); expected 95.6 (0.02)
90
+ Passed bias filter: 96 (0.0200879); expected 95.6 (0.02)
91
+ Passed Vit filter: 4 (0.000836995); expected 4.8 (0.001)
92
+ Passed Fwd filter: 0 (0); expected 0.0 (1e-05)
93
+ Initial search space (Z): 4779 [actual number of targets]
94
+ Domain search space (domZ): 0 [number of targets reported over threshold]
95
+ # CPU time: 0.12u 0.03s 00:00:00.15 Elapsed: 00:00:00.07
96
+ # Mc/sec: 5665.61
97
+ //
98
+ Query: 2-Hacid_dh [M=133]
99
+ Accession: PF00389.25
100
+ Description: D-isomer specific 2-hydroxyacid dehydrogenase, catalytic domain
101
+ Scores for complete sequences (score includes all domains):
102
+ --- full sequence --- --- best 1 domain --- -#dom-
103
+ E-value score bias E-value score bias exp N Sequence Description
104
+ ------- ------ ----- ------- ------ ----- ---- -- -------- -----------
105
+ 6.2e-36 121.1 2.3 1.2e-35 120.2 0.5 2.2 1 637982115 Acid345_0115 D-3-phosphoglycerate dehydrogenase [K
106
+ 3e-19 67.1 0.0 3.8e-19 66.8 0.0 1.3 1 637984252 Acid345_2236 D-isomer specific 2-hydroxyacid dehyd
107
+ 6.2e-07 27.3 0.0 9e-07 26.8 0.0 1.3 1 637983083 Acid345_1074 D-isomer specific 2-hydroxyacid dehyd
108
+
109
+
110
+ Domain annotation for each sequence (and alignments):
111
+ >> 637982115 Acid345_0115 D-3-phosphoglycerate dehydrogenase [Korebacter versatilis Ellin345]
112
+ # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
113
+ --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ----
114
+ 1 ! 120.2 0.5 7.6e-39 1.2e-35 1 133 [] 3 313 .. 3 313 .. 0.98
115
+
116
+ Alignments for each domain:
117
+ == domain 1 score: 120.2 bits; conditional E-value: 7.6e-39
118
+ EEECST.-CCHHHHHCC..TEEEEEEC.GSSHHHHHC....-SEEEE-TTS-BSHHHHCC-TT--EEEES----TTB-HHHHHH---EEE--TTTTHHH CS
119
+ xxxxxxxxxxxxxxxxx..xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx RF
120
+ 2-Hacid_dh 1 vlileplreeelellke..gvevevkd.ellteellekakdadalivrsntkvtaevleklpkLkviatagvGvDniDldaakerGIlVtnvpgystes 96
121
+ +++ e++ +++++l+k+ +v++ d ++e+lle++k+adalivrs v+a++le++ +L+vi++agvGvDni l+aa+++GI+V+n+pg+++ +
122
+ 637982115 3 IVVAEKIAKAAIDLFKQdpTWNVVTPDqVAQKEQLLEQLKGADALIVRSAVFVDAAMLEHADQLRVIGRAGVGVDNIELEAATRKGIAVMNTPGANAIA 101
123
+ 6889999**********8544777777778888****************************************************************** PP
124
+
125
+ HHHHH.............................................................................................. CS
126
+ xxxxx.............................................................................................. RF
127
+ 2-Hacid_dh 97 vAEla.............................................................................................. 101
128
+ vAE++
129
+ 637982115 102 VAEHTiglmlalarfipratetmhagkwekkslqgtelrgktlgivglgriglevarraasfgmtlvahdpyvspaiahdakirladrdevlavadyit 200
130
+ *************************************************************************************************** PP
131
+
132
+ .................................................................................T-CHHHHHHHHHHHHHHH CS
133
+ .................................................................................xxxxxxxxxxxxxxxxxx RF
134
+ 2-Hacid_dh 102 .................................................................................fateeaqenmaeeaaenl 119
135
+ ++t eaq++++ ++a ++
136
+ 637982115 201 lhvgltpqtanminattlatmkkgvrivncargeliddaalaeavksghvggaaldvfteeplkaspyhgvpnviltphigGSTAEAQDAVGVQIAHQV 299
137
+ *************************************************************************************************** PP
138
+
139
+ HHHH--G--TCCE- CS
140
+ xxxxxxxxxxxxxx RF
141
+ 2-Hacid_dh 120 vaflkgespanavn 133
142
+ +++l+ + ++navn
143
+ 637982115 300 RDYLQRGVVQNAVN 313
144
+ *************8 PP
145
+
146
+ >> 637984252 Acid345_2236 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding [Korebacter versatilis Ellin345]
147
+ # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
148
+ --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ----
149
+ 1 ! 66.8 0.0 2.4e-22 3.8e-19 7 102 .. 14 146 .. 8 353 .. 0.77
150
+
151
+ Alignments for each domain:
152
+ == domain 1 score: 66.8 bits; conditional E-value: 2.4e-22
153
+ .-CCHHHHHCC.TEEEEEEC...GSSHHHHHC.....-SEEEE-TTS-BSHHHHCC-.TT--EEEES----TTB-HHHHHH---EEE--TTTTHHHHHH CS
154
+ xxxxxxxxxxx.xxxxxxxx...xxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx RF
155
+ 2-Hacid_dh 7 lreeelellke.gvevevkd...ellteellekakd.adalivrsntkvtaevlekl.pkLkviatagvGvDniDldaakerGIlVtnvpgystesvAE 99
156
+ + + +le+l++ g++vev+ ++++ ++ek++ +d+li++ ++k++aev+e+ +Lkv+a+++vG+Dni+ + a+++ + t++ +++te++AE
157
+ 637984252 14 IGKPALERLRAaGYDVEVYPqadPPPKSLIIEKVASgIDGLITTLRDKIDAEVFEAGkGNLKVVAQIAVGFDNINRADANKYKVPFTHTADVLTEATAE 112
158
+ 55667788888888*****999766667667776655********************9***************************************** PP
159
+
160
+ HH...............................T CS
161
+ xx...............................x RF
162
+ 2-Hacid_dh 100 la...............................f 102
163
+ +a f
164
+ 637984252 113 FAffimaaaarklwtaernvrdlkwgtwhpflpF 146
165
+ **66666555555555555555555555555550 PP
166
+
167
+ >> 637983083 Acid345_1074 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding [Korebacter versatilis Ellin345]
168
+ # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
169
+ --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ----
170
+ 1 ! 26.8 0.0 5.7e-10 9e-07 30 132 .. 40 321 .. 18 322 .. 0.89
171
+
172
+ Alignments for each domain:
173
+ == domain 1 score: 26.8 bits; conditional E-value: 5.7e-10
174
+ HHHHC....-SEEEE-TTS-BSHHHHCC-TT--EEEES----TTB-HHHHHH---EEE--TTTTHHHHHHHH........................... CS
175
+ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx........................... RF
176
+ 2-Hacid_dh 30 eellekakdadalivrsntkvtaevleklpkLkviatagvGvDniDldaakerGIlVtnvpgystesvAEla........................... 101
177
+ ++l ++++dad+ ++r + + ++++Lk+i a++ v + + +++ +I+Vtn+ ++ vAE+a
178
+ 637983083 40 ATLEQEISDADIALTRELK---PSQVHAAKQLKWIHSAAAAVHALMIPEIRQSNIIVTNATAVHGPVVAEHAlamilaiarridlavkaqtehiwkqee 135
179
+ 5677888888888888755...7778899********************************************************************** PP
180
+
181
+ ................................................................................................... CS
182
+ ................................................................................................... RF
183
+ 2-Hacid_dh 102 ................................................................................................... 101
184
+
185
+ 637983083 136 iwvtnppprdiagstllvvglgqigrplaqkakalgmhviavrehpergaetadevfaskdllkilpradfvmlcppvtpdtkeafgrdqlaamkpday 234
186
+ *99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 PP
187
+
188
+ ........................................................T-CHHHHHHHHHHHHHHHHHHH--G--TCCE CS
189
+ ........................................................xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx RF
190
+ 2-Hacid_dh 102 ........................................................fateeaqenmaeeaaenlvaflkgespanav 132
191
+ + + + e+ + enl++fl+g+++ v
192
+ 637983083 235 llnvgrgalidepaliealqqrriggaaldvtsveplpsdsplwaldncmitphtgGISPKLWERQYIFFTENLRRFLAGKPLLGLV 321
193
+ 999999999999999999999999999999999999999999999999999999998888888888888888888888888776655 PP
194
+
195
+
196
+
197
+ Internal pipeline statistics summary:
198
+ -------------------------------------
199
+ Query model(s): 1 (133 nodes)
200
+ Target sequences: 4779 (1680477 residues)
201
+ Passed MSV filter: 187 (0.0391295); expected 95.6 (0.02)
202
+ Passed bias filter: 167 (0.0349445); expected 95.6 (0.02)
203
+ Passed Vit filter: 22 (0.00460347); expected 4.8 (0.001)
204
+ Passed Fwd filter: 3 (0.000627746); expected 0.0 (1e-05)
205
+ Initial search space (Z): 4779 [actual number of targets]
206
+ Domain search space (domZ): 3 [number of targets reported over threshold]
207
+ # CPU time: 0.13u 0.00s 00:00:00.13 Elapsed: 00:00:00.07
208
+ # Mc/sec: 3192.91
209
+ //
@@ -8,11 +8,12 @@ rescue Bundler::BundlerError => e
8
8
  exit e.status_code
9
9
  end
10
10
  require 'test/unit'
11
- require 'shoulda'
11
+
12
12
 
13
13
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
14
  $LOAD_PATH.unshift(File.dirname(__FILE__))
15
15
  require 'bio-hmmer3_report'
16
16
 
17
17
  class Test::Unit::TestCase
18
+ HMMER_TEST_DATA = Pathname.new(File.join('test','data','HMMER')).cleanpath.to_s
18
19
  end
@@ -0,0 +1,84 @@
1
+ require 'helper'
2
+
3
+ module Bio
4
+ class TestDefaultReport < Test::Unit::TestCase
5
+ def test_splitting
6
+ reports = Bio::HMMER::HMMER3.reports(File.open(File.join(HMMER_TEST_DATA, 'test637000001.hmmsearch.txt')))
7
+ assert_equal 4, reports.length
8
+ assert_kind_of Bio::HMMER::HMMER3::DefaultHMMSearchReport, reports[0]
9
+ end
10
+
11
+ def test_alignment_when_no_hits
12
+ reports = Bio::HMMER::HMMER3.reports(File.open(File.join(HMMER_TEST_DATA, 'test637000001.hmmsearch.txt')))
13
+ assert_equal [], reports[0].hits
14
+ end
15
+
16
+ def test_alignment_when_three_hits
17
+ reports = Bio::HMMER::HMMER3.reports(File.open(File.join(HMMER_TEST_DATA, 'test637000001.hmmsearch.txt')))
18
+ hits = reports[3].hits
19
+ assert_kind_of Array, hits
20
+
21
+ assert_equal 3, hits.length
22
+
23
+
24
+ h = hits[1]
25
+ assert_kind_of Bio::HMMER::HMMER3::DefaultHMMSearchReport::Hit, h
26
+ assert_equal 1, h.hsps.length
27
+ assert_equal '637984252 Acid345_2236 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding [Korebacter versatilis Ellin345]',
28
+ h.sequence_name
29
+
30
+ d = h.hsps[0]
31
+ assert_kind_of Bio::HMMER::HMMER3::DefaultHMMSearchReport::Hit::Hsp, d
32
+ assert_equal 1, d.number
33
+ assert_equal 66.8, d.score
34
+ assert_equal 0.0, d.bias
35
+ assert_equal 2.4e-22, d.c_evalue
36
+ assert_equal 3.8e-19, d.i_evalue
37
+ assert_equal 7, d.hmmfrom
38
+ assert_equal 102, d.hmm_to
39
+ assert_equal 14, d.alifrom
40
+ assert_equal 146, d.ali_to
41
+ assert_equal 8, d.envfrom
42
+ assert_equal 353, d.env_to
43
+ assert_equal 0.77, d.acc
44
+
45
+ assert_equal 'lreeelellke.gvevevkd...ellteellekakd.adalivrsntkvtaevlekl.pkLkviatagvGvDniDldaakerGIlVtnvpgystesvAE'+
46
+ 'la...............................f', d.hmmseq
47
+ assert_equal 'IGKPALERLRAaGYDVEVYPqadPPPKSLIIEKVASgIDGLITTLRDKIDAEVFEAGkGNLKVVAQIAVGFDNINRADANKYKVPFTHTADVLTEATAE'+
48
+ 'FAffimaaaarklwtaernvrdlkwgtwhpflpF', d.flatseq
49
+
50
+ assert_equal 'IVVAEKIAKAAIDLFKQdpTWNVVTPDqVAQKEQLLEQLKGADALIVRSAVFVDAAMLEHADQLRVIGRAGVGVDNIELEAATRKGIAVMNTPGANAIA'+
51
+ 'VAEHTiglmlalarfipratetmhagkwekkslqgtelrgktlgivglgriglevarraasfgmtlvahdpyvspaiahdakirladrdevlavadyit'+
52
+ 'lhvgltpqtanminattlatmkkgvrivncargeliddaalaeavksghvggaaldvfteeplkaspyhgvpnviltphigGSTAEAQDAVGVQIAHQV'+
53
+ 'RDYLQRGVVQNAVN', hits[0].hsps[0].flatseq
54
+
55
+ assert_equal 'eellekakdadalivrsntkvtaevleklpkLkviatagvGvDniDldaakerGIlVtnvpgystesvAEla...........................'+
56
+ '...................................................................................................'+
57
+ '........................................................fateeaqenmaeeaaenlvaflkgespanav', hits[2].hsps[0].hmmseq
58
+ end
59
+
60
+ def test_multi_domain_hit_simple
61
+ hits = Bio::HMMER::HMMER3.reports(File.open(File.join(HMMER_TEST_DATA, 'hmmer3multidomainHitSimple.txt')))[0].hits
62
+ assert_equal 1, hits.length
63
+ assert_equal 2, hits[0].hsps.length
64
+
65
+ assert_equal 'FIGNRIGTFSVLNVIRVMQEMDLSIEDVDALTGSAVGWPkSATFRTIDLVGLDILGHVVGNMKQNVTDErsDLQIPDFYKQMLERKWLGDKTKGGFYK', hits[0].hsps[0].flatseq
66
+ assert_equal 'DTIVEIDAAMRMGFNWEMGPFELWDAAGVEATVGRMKA', hits[0].hsps[1].flatseq
67
+ end
68
+
69
+ def test_multi_domain_hit
70
+ hits = Bio::HMMER::HMMER3.reports(File.open(File.join(HMMER_TEST_DATA, 'hmmer3multidomainHit.txt')))[0].hits
71
+ assert_equal 4, hits.length
72
+ assert_equal 1, hits[0].hsps.length
73
+ assert_equal 2, hits[2].hsps.length
74
+
75
+ assert_equal 'FIGNRIGTFSVLNVIRVMQEMDLSIEDVDALTGSAVGWPkSATFRTIDLVGLDILGHVVGNMKQNVTDErsDLQIPDFYKQMLERKWLGDKTKGGFYK', hits[2].hsps[0].flatseq
76
+ assert_equal 'DTIVEIDAAMRMGFNWEMGPFELWDAAGVEATVGRMKA', hits[2].hsps[1].flatseq
77
+ end
78
+
79
+ def test_whole_sequence_only_not_individual_domains
80
+ assert_equal [],
81
+ Bio::HMMER::HMMER3.reports(File.open(File.join(HMMER_TEST_DATA, 'hmmer3_onlyWholeSequenceNotDomains.txt')))[0].hits
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,24 @@
1
+ require 'helper'
2
+
3
+ module Bio
4
+ class TestDefaultReport < Test::Unit::TestCase
5
+ def test_no_format_specified
6
+ reports = Bio::HMMER::HMMER3.reports(File.open(File.join(HMMER_TEST_DATA, 'test637000001.hmmsearch.txt')))
7
+ assert_kind_of Array, reports
8
+ assert_equal 4, reports.length
9
+ assert_kind_of Bio::HMMER::HMMER3::DefaultHMMSearchReport, reports[0]
10
+ end
11
+
12
+ def test_tblout_format_specified
13
+ report = Bio::HMMER::HMMER3.reports(File.open(File.join(HMMER_TEST_DATA, 'hmmsearch_tblout.out')), :format => :tblout)
14
+ assert_kind_of Bio::HMMER::HMMER3::TabularReport, report
15
+ assert_equal :tblout, report.format
16
+ end
17
+
18
+ def test_domtblout_format_specified
19
+ report = Bio::HMMER::HMMER3.reports(File.open(File.join(HMMER_TEST_DATA, 'hmmsearch_domtblout.out')), :format => :domtblout)
20
+ assert_kind_of Bio::HMMER::HMMER3::TabularReport, report
21
+ assert_equal :domtblout, report.format
22
+ end
23
+ end
24
+ end
@@ -10,40 +10,12 @@
10
10
  require 'helper'
11
11
 
12
12
  module Bio
13
- module Testreport
14
-
15
- HMMER_TEST_DATA = Pathname.new(File.join('test','data','HMMER')).cleanpath.to_s
16
-
17
- def self.hmmsearch_domtblout_empty
18
- File.open(File.join HMMER_TEST_DATA, 'hmmsearch_domtblout_empty.out')
19
- end
20
-
21
- def self.hmmsearch_domtblout
22
- File.open(File.join HMMER_TEST_DATA, 'hmmsearch_domtblout.out')
23
- end
24
-
25
- def self.hmmsearch_tblout
26
- File.open(File.join HMMER_TEST_DATA, 'hmmsearch_tblout.out')
27
- end
28
-
29
- def self.hmmscan_domtblout
30
- File.open(File.join HMMER_TEST_DATA, 'hmmscan_domtblout.out')
31
- end
32
-
33
- def self.hmmscan_tblout
34
- File.open(File.join HMMER_TEST_DATA, 'hmmscan_tblout.out')
35
- end
36
-
37
- end # Testreport
38
-
39
13
  class Testreport_class_methods < Test::Unit::TestCase
40
-
41
-
42
14
  def test_hmmsearch_domtblout_empty
43
- filename = Testreport.hmmsearch_domtblout_empty
15
+ filename = File.open(File.join HMMER_TEST_DATA, 'hmmsearch_domtblout_empty.out')
44
16
 
45
- assert_instance_of(Bio::HMMER::HMMER3::Report,
46
- report = Bio::HMMER::HMMER3::Report.new(filename))
17
+ assert_instance_of(Bio::HMMER::HMMER3::TabularReport,
18
+ report = Bio::HMMER::HMMER3::TabularReport.new(filename))
47
19
 
48
20
  assert_instance_of(Array,
49
21
  report.hits)
@@ -53,10 +25,10 @@ module Bio
53
25
 
54
26
 
55
27
  def test_hmmsearch_domtblout
56
- filename = Testreport.hmmsearch_domtblout
28
+ filename = File.open(File.join HMMER_TEST_DATA, 'hmmsearch_domtblout.out')
57
29
 
58
- assert_instance_of(Bio::HMMER::HMMER3::Report,
59
- report = Bio::HMMER::HMMER3::Report.new(filename))
30
+ assert_instance_of(Bio::HMMER::HMMER3::TabularReport,
31
+ report = Bio::HMMER::HMMER3::TabularReport.new(filename))
60
32
 
61
33
  assert_instance_of(Array,
62
34
  report.hits)
@@ -100,10 +72,10 @@ module Bio
100
72
 
101
73
  def test_hmmsearch_tblout
102
74
 
103
- filename = Testreport.hmmsearch_tblout
75
+ filename = File.open(File.join HMMER_TEST_DATA, 'hmmsearch_tblout.out')
104
76
 
105
- assert_instance_of(Bio::HMMER::HMMER3::Report,
106
- report = Bio::HMMER::HMMER3::Report.new(filename))
77
+ assert_instance_of(Bio::HMMER::HMMER3::TabularReport,
78
+ report = Bio::HMMER::HMMER3::TabularReport.new(filename))
107
79
 
108
80
  assert_instance_of(Array,
109
81
  report.hits)
@@ -139,10 +111,10 @@ module Bio
139
111
 
140
112
  def test_hmmscan_domtblout
141
113
 
142
- filename = Testreport.hmmscan_domtblout
114
+ filename = File.open(File.join HMMER_TEST_DATA, 'hmmscan_domtblout.out')
143
115
 
144
- assert_instance_of(Bio::HMMER::HMMER3::Report,
145
- report = Bio::HMMER::HMMER3::Report.new(filename))
116
+ assert_instance_of(Bio::HMMER::HMMER3::TabularReport,
117
+ report = Bio::HMMER::HMMER3::TabularReport.new(filename))
146
118
 
147
119
  assert_instance_of(Array,
148
120
  report.hits)
@@ -180,10 +152,10 @@ module Bio
180
152
  end # test_hmmscan_domtblout
181
153
 
182
154
  def test_hmmscan_tblout
183
- filename = Testreport.hmmscan_tblout
155
+ filename = File.open(File.join HMMER_TEST_DATA, 'hmmscan_tblout.out')
184
156
 
185
- assert_instance_of(Bio::HMMER::HMMER3::Report,
186
- report = Bio::HMMER::HMMER3::Report.new(filename))
157
+ assert_instance_of(Bio::HMMER::HMMER3::TabularReport,
158
+ report = Bio::HMMER::HMMER3::TabularReport.new(filename))
187
159
 
188
160
  assert_instance_of(Array,
189
161
  report.hits)
@@ -225,7 +197,7 @@ module Bio
225
197
  data << "\n" << 'BH4 PF02180.11 27 sp|P10415|BCL2_HUMAN - 239 3.9e-15 54.6 0.1 1 1 1.3e-18 8.2e-15 53.6 0.1 2 26 8 32 7 33 0.94 Bcl-2 homology region 4'
226
198
  data << "\n"
227
199
 
228
- report = Bio::HMMER::HMMER3::Report.new(data)
200
+ report = Bio::HMMER::HMMER3::TabularReport.new(data)
229
201
  hits = report.hits
230
202
  hits.each do |hit|
231
203
  assert_kind_of Bio::HMMER::HMMER3::PerDomainHit, hit
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-hmmer3_report
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Christian Zmasek
@@ -10,75 +9,137 @@ authors:
10
9
  autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
- date: 2012-05-18 00:00:00.000000000 Z
12
+ date: 2015-08-06 00:00:00.000000000 Z
14
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bio-logger
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.0'
15
28
  - !ruby/object:Gem::Dependency
16
29
  name: shoulda
17
- requirement: &76510780 !ruby/object:Gem::Requirement
18
- none: false
30
+ requirement: !ruby/object:Gem::Requirement
19
31
  requirements:
20
- - - ! '>='
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: 3.5.0
35
+ - - "~>"
21
36
  - !ruby/object:Gem::Version
22
- version: '0'
37
+ version: '3.5'
23
38
  type: :development
24
39
  prerelease: false
25
- version_requirements: *76510780
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: 3.5.0
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.5'
26
48
  - !ruby/object:Gem::Dependency
27
49
  name: rdoc
28
- requirement: &76510420 !ruby/object:Gem::Requirement
29
- none: false
50
+ requirement: !ruby/object:Gem::Requirement
30
51
  requirements:
31
- - - ~>
52
+ - - "~>"
32
53
  - !ruby/object:Gem::Version
33
54
  version: '3.12'
34
55
  type: :development
35
56
  prerelease: false
36
- version_requirements: *76510420
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.12'
37
62
  - !ruby/object:Gem::Dependency
38
63
  name: jeweler
39
- requirement: &76510040 !ruby/object:Gem::Requirement
40
- none: false
64
+ requirement: !ruby/object:Gem::Requirement
41
65
  requirements:
42
- - - ~>
66
+ - - ">="
43
67
  - !ruby/object:Gem::Version
44
68
  version: 1.8.3
69
+ - - "~>"
70
+ - !ruby/object:Gem::Version
71
+ version: '1.8'
45
72
  type: :development
46
73
  prerelease: false
47
- version_requirements: *76510040
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: 1.8.3
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.8'
48
82
  - !ruby/object:Gem::Dependency
49
83
  name: bundler
50
- requirement: &76509650 !ruby/object:Gem::Requirement
51
- none: false
84
+ requirement: !ruby/object:Gem::Requirement
52
85
  requirements:
53
- - - ! '>='
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 1.6.2
89
+ - - "~>"
54
90
  - !ruby/object:Gem::Version
55
- version: 1.0.21
91
+ version: '1.6'
56
92
  type: :development
57
93
  prerelease: false
58
- version_requirements: *76509650
94
+ version_requirements: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: 1.6.2
99
+ - - "~>"
100
+ - !ruby/object:Gem::Version
101
+ version: '1.6'
59
102
  - !ruby/object:Gem::Dependency
60
103
  name: bio
61
- requirement: &76524960 !ruby/object:Gem::Requirement
62
- none: false
104
+ requirement: !ruby/object:Gem::Requirement
63
105
  requirements:
64
- - - ! '>='
106
+ - - ">="
65
107
  - !ruby/object:Gem::Version
66
108
  version: 1.4.2
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: '1.4'
67
112
  type: :development
68
113
  prerelease: false
69
- version_requirements: *76524960
114
+ version_requirements: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: 1.4.2
119
+ - - "~>"
120
+ - !ruby/object:Gem::Version
121
+ version: '1.4'
70
122
  - !ruby/object:Gem::Dependency
71
- name: rdoc
72
- requirement: &76524250 !ruby/object:Gem::Requirement
73
- none: false
123
+ name: minitest
124
+ requirement: !ruby/object:Gem::Requirement
74
125
  requirements:
75
- - - ~>
126
+ - - ">="
76
127
  - !ruby/object:Gem::Version
77
- version: '3.12'
128
+ version: 4.7.5
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '4.7'
78
132
  type: :development
79
133
  prerelease: false
80
- version_requirements: *76524250
81
- description: Enables parsing of HMMER version 3 reports
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: 4.7.5
139
+ - - "~>"
140
+ - !ruby/object:Gem::Version
141
+ version: '4.7'
142
+ description: Enables programmatic parsing of HMMER version 3 reports
82
143
  email: gmail.com after donttrustben
83
144
  executables: []
84
145
  extensions: []
@@ -86,48 +147,51 @@ extra_rdoc_files:
86
147
  - LICENSE.txt
87
148
  - README.md
88
149
  files:
89
- - .document
90
- - .travis.yml
150
+ - ".document"
151
+ - ".travis.yml"
91
152
  - Gemfile
92
153
  - LICENSE.txt
93
154
  - README.md
94
155
  - Rakefile
95
156
  - VERSION
96
157
  - lib/bio-hmmer3_report.rb
97
- - lib/bio/appl/hmmer/hmmer3/report.rb
158
+ - lib/bio/appl/hmmer/hmmer3/default_report.rb
159
+ - lib/bio/appl/hmmer/hmmer3/tabular_report.rb
160
+ - test/data/HMMER/hmmer3_onlyWholeSequenceNotDomains.txt
161
+ - test/data/HMMER/hmmer3multidomainHit.txt
162
+ - test/data/HMMER/hmmer3multidomainHitSimple.txt
98
163
  - test/data/HMMER/hmmscan_domtblout.out
99
164
  - test/data/HMMER/hmmscan_tblout.out
100
165
  - test/data/HMMER/hmmsearch_domtblout.out
101
166
  - test/data/HMMER/hmmsearch_domtblout_empty.out
102
167
  - test/data/HMMER/hmmsearch_tblout.out
168
+ - test/data/HMMER/test637000001.hmmsearch.txt
103
169
  - test/helper.rb
170
+ - test/unit/bio/appl/hmmer/test_hmmer3_default_report.rb
171
+ - test/unit/bio/appl/hmmer/test_hmmer3_report_formatting.rb
104
172
  - test/unit/bio/appl/hmmer/test_hmmer3report.rb
105
173
  homepage: http://github.com/wwood/bioruby-hmmer3_report
106
174
  licenses:
107
175
  - MIT
176
+ metadata: {}
108
177
  post_install_message:
109
178
  rdoc_options: []
110
179
  require_paths:
111
180
  - lib
112
181
  required_ruby_version: !ruby/object:Gem::Requirement
113
- none: false
114
182
  requirements:
115
- - - ! '>='
183
+ - - ">="
116
184
  - !ruby/object:Gem::Version
117
185
  version: '0'
118
- segments:
119
- - 0
120
- hash: -284128881
121
186
  required_rubygems_version: !ruby/object:Gem::Requirement
122
- none: false
123
187
  requirements:
124
- - - ! '>='
188
+ - - ">="
125
189
  - !ruby/object:Gem::Version
126
190
  version: '0'
127
191
  requirements: []
128
192
  rubyforge_project:
129
- rubygems_version: 1.8.17
193
+ rubygems_version: 2.2.2
130
194
  signing_key:
131
- specification_version: 3
195
+ specification_version: 4
132
196
  summary: Enables parsing of HMMER version 3 reports
133
197
  test_files: []