bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/appl/hmmer.rb - HMMER wrapper
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: hmmer.rb,v 1.4 2005/09/26 13:00:04 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/command'
|
|
24
|
+
require 'shellwords'
|
|
25
|
+
|
|
26
|
+
module Bio
|
|
27
|
+
|
|
28
|
+
class HMMER
|
|
29
|
+
|
|
30
|
+
autoload :Report, 'bio/appl/hmmer/report'
|
|
31
|
+
|
|
32
|
+
include Bio::Command::Tools
|
|
33
|
+
|
|
34
|
+
def initialize(program, hmmfile, seqfile, opt = [])
|
|
35
|
+
@program = program
|
|
36
|
+
@hmmfile = hmmfile
|
|
37
|
+
@seqfile = seqfile
|
|
38
|
+
@output = ''
|
|
39
|
+
|
|
40
|
+
begin
|
|
41
|
+
@options = opt.to_ary
|
|
42
|
+
rescue NameError #NoMethodError
|
|
43
|
+
# backward compatibility
|
|
44
|
+
@options = Shellwords.shellwords(opt)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
attr_accessor :program, :hmmfile, :seqfile, :options
|
|
48
|
+
attr_reader :output
|
|
49
|
+
|
|
50
|
+
def option
|
|
51
|
+
# backward compatibility
|
|
52
|
+
make_command_line(@options)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def option=(str)
|
|
56
|
+
# backward compatibility
|
|
57
|
+
@options = Shellwords.shellwords(str)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def query
|
|
61
|
+
cmd = [ @program, *@options ]
|
|
62
|
+
cmd.concat([ @hmmfile, @seqfile ])
|
|
63
|
+
|
|
64
|
+
report = nil
|
|
65
|
+
|
|
66
|
+
@output = call_command_local(cmd, nil)
|
|
67
|
+
report = parse_result(@output)
|
|
68
|
+
|
|
69
|
+
return report
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def parse_result(data)
|
|
76
|
+
Report.new(data)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __FILE__ == $0
|
|
85
|
+
|
|
86
|
+
begin
|
|
87
|
+
require 'pp'
|
|
88
|
+
alias p pp
|
|
89
|
+
rescue
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
program = ARGV.shift # hmmsearch, hmmpfam
|
|
93
|
+
hmmfile = ARGV.shift
|
|
94
|
+
seqfile = ARGV.shift
|
|
95
|
+
|
|
96
|
+
factory = Bio::HMMER.new(program, hmmfile, seqfile)
|
|
97
|
+
p factory.query
|
|
98
|
+
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
=begin
|
|
103
|
+
|
|
104
|
+
= Bio::HMMER
|
|
105
|
+
|
|
106
|
+
--- Bio::HMMER.new(program, hmmfile, seqfile, option = '')
|
|
107
|
+
--- Bio::HMMER#program
|
|
108
|
+
--- Bio::HMMER#hmmfile
|
|
109
|
+
--- Bio::HMMER#seqfile
|
|
110
|
+
--- Bio::HMMER#options
|
|
111
|
+
|
|
112
|
+
Accessors for the factory.
|
|
113
|
+
|
|
114
|
+
--- Bio::HMMER#option
|
|
115
|
+
--- Bio::HMMER#option=(str)
|
|
116
|
+
|
|
117
|
+
Get/set options by string.
|
|
118
|
+
|
|
119
|
+
--- Bio::HMMER#query
|
|
120
|
+
|
|
121
|
+
Executes the hmmer search and returns Report object (Bio::HMMER::Report).
|
|
122
|
+
|
|
123
|
+
--- Bio::HMMER#output
|
|
124
|
+
|
|
125
|
+
Shows the raw output from hmmer search.
|
|
126
|
+
|
|
127
|
+
=end
|
|
128
|
+
|
|
129
|
+
|
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2002 Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>
|
|
5
|
+
# Copyright (C) 2005 Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
|
|
6
|
+
#
|
|
7
|
+
# This library is free software; you can redistribute it and/or
|
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
9
|
+
# License as published by the Free Software Foundation; either
|
|
10
|
+
# version 2 of the License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15
|
+
# Lesser General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
18
|
+
# License along with this library; if not, write to the Free Software
|
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
|
+
#
|
|
21
|
+
# $Id: report.rb,v 1.9 2005/10/31 09:12:03 k Exp $
|
|
22
|
+
#
|
|
23
|
+
|
|
24
|
+
require 'bio/appl/hmmer'
|
|
25
|
+
|
|
26
|
+
module Bio
|
|
27
|
+
class HMMER
|
|
28
|
+
|
|
29
|
+
def self.reports(input)
|
|
30
|
+
ary = []
|
|
31
|
+
input.each("\n//\n") do |data|
|
|
32
|
+
if block_given?
|
|
33
|
+
yield Report.new(data)
|
|
34
|
+
else
|
|
35
|
+
ary << Report.new(data)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
return ary
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Bio::HMMER::Report
|
|
43
|
+
class Report
|
|
44
|
+
|
|
45
|
+
# for Bio::FlatFile support
|
|
46
|
+
DELIMITER = RS = "\n//\n"
|
|
47
|
+
|
|
48
|
+
def initialize(data)
|
|
49
|
+
|
|
50
|
+
# The input data is divided into six data fields, i.e. header,
|
|
51
|
+
# query infomation, hits, HSPs, alignments and search statistics.
|
|
52
|
+
# However, header and statistics data don't necessarily exist.
|
|
53
|
+
subdata, is_hmmsearch = get_subdata(data)
|
|
54
|
+
|
|
55
|
+
# if header exists, parse it
|
|
56
|
+
if subdata["header"]
|
|
57
|
+
@program, @parameter = parse_header_data(subdata["header"])
|
|
58
|
+
else
|
|
59
|
+
@program, @parameter = [{}, {}]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
@query_info = parse_query_info(subdata["query"])
|
|
63
|
+
@hits = parse_hit_data(subdata["hit"])
|
|
64
|
+
@hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch)
|
|
65
|
+
|
|
66
|
+
if @hsps != []
|
|
67
|
+
# split alignment subdata into an array of alignments
|
|
68
|
+
aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1)
|
|
69
|
+
|
|
70
|
+
# append alignment information to corresponding Hsp
|
|
71
|
+
aln_ary.each_with_index do |aln, i|
|
|
72
|
+
@hsps[i].set_alignment(aln)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# assign each Hsp object to its parent Hit
|
|
77
|
+
hits_hash = {}
|
|
78
|
+
@hits.each do |hit|
|
|
79
|
+
hits_hash[hit.accession] = hit
|
|
80
|
+
end
|
|
81
|
+
@hsps.each do |hsp|
|
|
82
|
+
if hits_hash.has_key?(hsp.accession)
|
|
83
|
+
hits_hash[hsp.accession].append_hsp(hsp)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# parse statistics (for hmmsearch)
|
|
88
|
+
if is_hmmsearch
|
|
89
|
+
@histogram, @statistical_detail, @total_seq_searched, \
|
|
90
|
+
@whole_seq_top_hits, @domain_top_hits = \
|
|
91
|
+
parse_stat_data(subdata["statistics"])
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
end
|
|
95
|
+
attr_reader :program, :parameter, :query_info, :hits, :hsps,
|
|
96
|
+
:histogram, :statistical_detail, :total_seq_searched,
|
|
97
|
+
:whole_seq_top_hits, :domain_top_hits
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def each
|
|
101
|
+
@hits.each do |x|
|
|
102
|
+
yield x
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# Bio::HMMER::Report::Hit
|
|
108
|
+
class Hit
|
|
109
|
+
def initialize(data)
|
|
110
|
+
@hsps = Array.new
|
|
111
|
+
if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ data
|
|
112
|
+
@accession, @description, @score, @evalue, @num =
|
|
113
|
+
[$1, $2, $3.to_f, $4.to_f, $5.to_i]
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
attr_reader :hsps, :accession, :description, :score, :evalue, :num
|
|
117
|
+
|
|
118
|
+
def each
|
|
119
|
+
@hsps.each do |x|
|
|
120
|
+
yield x
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
alias target_id accession
|
|
125
|
+
alias hit_id accession
|
|
126
|
+
alias entry_id accession
|
|
127
|
+
alias definition description
|
|
128
|
+
alias bit_score score
|
|
129
|
+
|
|
130
|
+
def target_def
|
|
131
|
+
if @hsps.size == 1
|
|
132
|
+
"<#{@hsps[0].domain}> #{@description}"
|
|
133
|
+
else
|
|
134
|
+
"<#{@num.to_s}> #{@description}"
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def append_hsp(hsp)
|
|
139
|
+
@hsps << hsp
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# Bio::HMMER::Report::Hsp
|
|
146
|
+
class Hsp
|
|
147
|
+
def initialize(data, is_hmmsearch)
|
|
148
|
+
@is_hmmsearch = is_hmmsearch
|
|
149
|
+
|
|
150
|
+
@accession, @domain, seq_f, seq_t, @seq_ft, hmm_f, hmm_t, @hmm_ft,
|
|
151
|
+
score, evalue = data.split(' ')
|
|
152
|
+
@seq_f = seq_f.to_i
|
|
153
|
+
@seq_t = seq_t.to_i
|
|
154
|
+
@hmm_f = hmm_f.to_i
|
|
155
|
+
@hmm_t = hmm_t.to_i
|
|
156
|
+
@score = score.to_f
|
|
157
|
+
@evalue = evalue.to_f
|
|
158
|
+
@hmmseq = ''
|
|
159
|
+
@flatseq = ''
|
|
160
|
+
@midline = ''
|
|
161
|
+
@query_frame = 1
|
|
162
|
+
@target_frame = 1
|
|
163
|
+
# CS and RF lines are rarely used.
|
|
164
|
+
@csline = nil
|
|
165
|
+
@rfline = nil
|
|
166
|
+
end
|
|
167
|
+
attr_reader :accession, :domain, :seq_f, :seq_t, :seq_ft,
|
|
168
|
+
:hmm_f, :hmm_t, :hmm_ft, :score, :evalue, :midline, :hmmseq,
|
|
169
|
+
:flatseq, :query_frame, :target_frame, :csline, :rfline
|
|
170
|
+
|
|
171
|
+
def set_alignment(aln)
|
|
172
|
+
# First, split the input alignment into an array of
|
|
173
|
+
# "alignment blocks." One block usually has three lines,
|
|
174
|
+
# i.e. hmmseq, midline and flatseq.
|
|
175
|
+
# However, although infrequent, it can contain CS or RF lines.
|
|
176
|
+
aln.split(/ (?:\d+|-)\s*\n\n/).each do |blk|
|
|
177
|
+
lines = blk.split(/\n/)
|
|
178
|
+
cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil
|
|
179
|
+
rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil
|
|
180
|
+
aln_width = lines[0][/\S+/].length
|
|
181
|
+
@csline = @csline.to_s + cstmp[19, aln_width] if cstmp
|
|
182
|
+
@rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp
|
|
183
|
+
@hmmseq += lines[0][19, aln_width]
|
|
184
|
+
@midline += lines[1][19, aln_width]
|
|
185
|
+
@flatseq += lines[2][19, aln_width]
|
|
186
|
+
end
|
|
187
|
+
@csline = @csline[3...-3] if @csline
|
|
188
|
+
@rfline = @rfline[3...-3] if @rfline
|
|
189
|
+
@hmmseq = @hmmseq[3...-3]
|
|
190
|
+
@midline = @midline[3...-3]
|
|
191
|
+
@flatseq = @flatseq[3...-3]
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def query_seq; @is_hmmsearch ? @hmmseq : @flatseq; end
|
|
195
|
+
def target_seq; @is_hmmsearch ? @flatseq : @hmmseq; end
|
|
196
|
+
def target_from; @is_hmmsearch ? @seq_f : @hmm_f; end
|
|
197
|
+
def target_to; @is_hmmsearch ? @seq_t : @hmm_t; end
|
|
198
|
+
def query_from; @is_hmmsearch ? @hmm_f : @seq_f; end
|
|
199
|
+
def query_to; @is_hmmsearch ? @hmm_t : @seq_t; end
|
|
200
|
+
|
|
201
|
+
alias bit_score score
|
|
202
|
+
alias target_id accession
|
|
203
|
+
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# Bio::HMMER::Report#get_subdata
|
|
208
|
+
def get_subdata(data)
|
|
209
|
+
subdata = {}
|
|
210
|
+
header_prefix = '\Ahmm(search|pfam) - search'
|
|
211
|
+
query_prefix = '^Query (HMM|sequence): .*\nAccession: '
|
|
212
|
+
hit_prefix = '^Scores for (complete sequences|sequence family)'
|
|
213
|
+
hsp_prefix = '^Parsed for domains:'
|
|
214
|
+
aln_prefix = '^Alignments of top-scoring domains:\n'
|
|
215
|
+
stat_prefix = '^\nHistogram of all scores:'
|
|
216
|
+
|
|
217
|
+
# if header exists, get it
|
|
218
|
+
if data =~ /#{header_prefix}/
|
|
219
|
+
is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam
|
|
220
|
+
subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m]
|
|
221
|
+
else
|
|
222
|
+
is_hmmsearch = false # if no header, assumed to be hmmpfam
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# get query, Hit and Hsp data
|
|
226
|
+
subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m]
|
|
227
|
+
subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m]
|
|
228
|
+
subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m]
|
|
229
|
+
|
|
230
|
+
# get alignment data
|
|
231
|
+
if is_hmmsearch
|
|
232
|
+
data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m
|
|
233
|
+
subdata["alignment"] = $1
|
|
234
|
+
else
|
|
235
|
+
data =~ /#{aln_prefix}(.+?)\/\/\n/m
|
|
236
|
+
subdata["alignment"] = $1
|
|
237
|
+
raise "multiple reports found" if $'.length > 0
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# handle -A option of HMMER
|
|
241
|
+
cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z'
|
|
242
|
+
subdata["alignment"].sub!(/#{cutoff_line}/, '')
|
|
243
|
+
|
|
244
|
+
# get statistics data
|
|
245
|
+
subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m]
|
|
246
|
+
|
|
247
|
+
[subdata, is_hmmsearch]
|
|
248
|
+
end
|
|
249
|
+
private :get_subdata
|
|
250
|
+
|
|
251
|
+
# Bio::HMMER::Report#parse_header_data
|
|
252
|
+
def parse_header_data(data)
|
|
253
|
+
data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m
|
|
254
|
+
program_data = $1
|
|
255
|
+
parameter_data = $2
|
|
256
|
+
|
|
257
|
+
program = {}
|
|
258
|
+
program['name'], program['version'], program['copyright'], \
|
|
259
|
+
program['license'] = program_data.split(/\n/)
|
|
260
|
+
|
|
261
|
+
parameter = {}
|
|
262
|
+
parameter_data.each do |x|
|
|
263
|
+
if /^(.+?):\s+(.*?)\s*$/ =~ x
|
|
264
|
+
parameter[$1] = $2
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
[program, parameter]
|
|
269
|
+
end
|
|
270
|
+
private :parse_header_data
|
|
271
|
+
|
|
272
|
+
# Bio::HMMER::Report#parse_query_info
|
|
273
|
+
def parse_query_info(data)
|
|
274
|
+
hash = {}
|
|
275
|
+
data.each do |x|
|
|
276
|
+
if /^(.+?):\s+(.*?)\s*$/ =~ x
|
|
277
|
+
hash[$1] = $2
|
|
278
|
+
elsif /\s+\[(.+)\]/ =~ x
|
|
279
|
+
hash['comments'] = $1
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
hash
|
|
283
|
+
end
|
|
284
|
+
private :parse_query_info
|
|
285
|
+
|
|
286
|
+
# Bio::HMMER::Report#parse_hit_data
|
|
287
|
+
def parse_hit_data(data)
|
|
288
|
+
data.sub!(/.+?---\n/m, '').chop!
|
|
289
|
+
hits = []
|
|
290
|
+
return hits if data == "\t[no hits above thresholds]\n"
|
|
291
|
+
data.each do |l|
|
|
292
|
+
hits.push(Hit.new(l))
|
|
293
|
+
end
|
|
294
|
+
hits
|
|
295
|
+
end
|
|
296
|
+
private :parse_hit_data
|
|
297
|
+
|
|
298
|
+
# Bio::HMMER::Report#parse_hsp_data
|
|
299
|
+
def parse_hsp_data(data, is_hmmsearch)
|
|
300
|
+
data.sub!(/.+?---\n/m, '').chop!
|
|
301
|
+
hsps=[]
|
|
302
|
+
return hsps if data == "\t[no hits above thresholds]\n"
|
|
303
|
+
data.each do |l|
|
|
304
|
+
hsps.push(Hsp.new(l, is_hmmsearch))
|
|
305
|
+
end
|
|
306
|
+
return hsps
|
|
307
|
+
end
|
|
308
|
+
private :parse_hsp_data
|
|
309
|
+
|
|
310
|
+
# Bio::HMMER::Report#parse_stat_data
|
|
311
|
+
def parse_stat_data(data)
|
|
312
|
+
data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '')
|
|
313
|
+
histogram = $1
|
|
314
|
+
|
|
315
|
+
statistical_detail = {}
|
|
316
|
+
data.sub!(/(.+?)\n\n/m, '')
|
|
317
|
+
$1.each do |l|
|
|
318
|
+
statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
total_seq_searched = nil
|
|
322
|
+
data.sub!(/(.+?)\n\n/m, '')
|
|
323
|
+
$1.each do |l|
|
|
324
|
+
total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
whole_seq_top_hits = {}
|
|
328
|
+
data.sub!(/(.+?)\n\n/m, '')
|
|
329
|
+
$1.each do |l|
|
|
330
|
+
if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
|
|
331
|
+
whole_seq_top_hits[$1] = $2.to_i
|
|
332
|
+
elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
|
|
333
|
+
whole_seq_top_hits[$1] = $2
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
domain_top_hits = {}
|
|
338
|
+
data.each do |l|
|
|
339
|
+
if /^\s*(.+?):\s*(\d+)\s*$/ =~ l
|
|
340
|
+
domain_top_hits[$1] = $2.to_i
|
|
341
|
+
elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l
|
|
342
|
+
domain_top_hits[$1] = $2
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
[histogram, statistical_detail, total_seq_searched, \
|
|
347
|
+
whole_seq_top_hits, domain_top_hits]
|
|
348
|
+
end
|
|
349
|
+
private :parse_stat_data
|
|
350
|
+
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
if __FILE__ == $0
|
|
358
|
+
|
|
359
|
+
=begin
|
|
360
|
+
|
|
361
|
+
#
|
|
362
|
+
# for multiple reports in a single output file (hmmpfam)
|
|
363
|
+
#
|
|
364
|
+
Bio::HMMER.reports(ARGF.read) do |report|
|
|
365
|
+
report.hits.each do |hit|
|
|
366
|
+
hit.hsps.each do |hsp|
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
=end
|
|
372
|
+
|
|
373
|
+
begin
|
|
374
|
+
require 'pp'
|
|
375
|
+
alias p pp
|
|
376
|
+
rescue LoadError
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
rep = Bio::HMMER::Report.new(ARGF.read)
|
|
380
|
+
p rep
|
|
381
|
+
|
|
382
|
+
indent = 18
|
|
383
|
+
|
|
384
|
+
puts "### hmmer result"
|
|
385
|
+
print "name : ".rjust(indent)
|
|
386
|
+
p rep.program['name']
|
|
387
|
+
print "version : ".rjust(indent)
|
|
388
|
+
p rep.program['version']
|
|
389
|
+
print "copyright : ".rjust(indent)
|
|
390
|
+
p rep.program['copyright']
|
|
391
|
+
print "license : ".rjust(indent)
|
|
392
|
+
p rep.program['license']
|
|
393
|
+
|
|
394
|
+
print "HMM file : ".rjust(indent)
|
|
395
|
+
p rep.parameter['HMM file']
|
|
396
|
+
print "Sequence file : ".rjust(indent)
|
|
397
|
+
p rep.parameter['Sequence file']
|
|
398
|
+
|
|
399
|
+
print "Query sequence : ".rjust(indent)
|
|
400
|
+
p rep.query_info['Query sequence']
|
|
401
|
+
print "Accession : ".rjust(indent)
|
|
402
|
+
p rep.query_info['Accession']
|
|
403
|
+
print "Description : ".rjust(indent)
|
|
404
|
+
p rep.query_info['Description']
|
|
405
|
+
|
|
406
|
+
rep.each do |hit|
|
|
407
|
+
puts "## each hit"
|
|
408
|
+
print "accession : ".rjust(indent)
|
|
409
|
+
p [ hit.accession, hit.target_id, hit.hit_id, hit.entry_id ]
|
|
410
|
+
print "description : ".rjust(indent)
|
|
411
|
+
p [ hit.description, hit.definition ]
|
|
412
|
+
print "target_def : ".rjust(indent)
|
|
413
|
+
p hit.target_def
|
|
414
|
+
print "score : ".rjust(indent)
|
|
415
|
+
p [ hit.score, hit.bit_score ]
|
|
416
|
+
print "evalue : ".rjust(indent)
|
|
417
|
+
p hit.evalue
|
|
418
|
+
print "num : ".rjust(indent)
|
|
419
|
+
p hit.num
|
|
420
|
+
|
|
421
|
+
hit.each do |hsp|
|
|
422
|
+
puts "## each hsp"
|
|
423
|
+
print "accession : ".rjust(indent)
|
|
424
|
+
p [ hsp.accession, hsp.target_id ]
|
|
425
|
+
print "domain : ".rjust(indent)
|
|
426
|
+
p hsp.domain
|
|
427
|
+
print "seq_f : ".rjust(indent)
|
|
428
|
+
p hsp.seq_f
|
|
429
|
+
print "seq_t : ".rjust(indent)
|
|
430
|
+
p hsp.seq_t
|
|
431
|
+
print "seq_ft : ".rjust(indent)
|
|
432
|
+
p hsp.seq_ft
|
|
433
|
+
print "hmm_f : ".rjust(indent)
|
|
434
|
+
p hsp.hmm_f
|
|
435
|
+
print "hmm_t : ".rjust(indent)
|
|
436
|
+
p hsp.hmm_t
|
|
437
|
+
print "hmm_ft : ".rjust(indent)
|
|
438
|
+
p hsp.hmm_ft
|
|
439
|
+
print "score : ".rjust(indent)
|
|
440
|
+
p [ hsp.score, hsp.bit_score ]
|
|
441
|
+
print "evalue : ".rjust(indent)
|
|
442
|
+
p hsp.evalue
|
|
443
|
+
print "midline : ".rjust(indent)
|
|
444
|
+
p hsp.midline
|
|
445
|
+
print "hmmseq : ".rjust(indent)
|
|
446
|
+
p hsp.hmmseq
|
|
447
|
+
print "flatseq : ".rjust(indent)
|
|
448
|
+
p hsp.flatseq
|
|
449
|
+
print "query_frame : ".rjust(indent)
|
|
450
|
+
p hsp.query_frame
|
|
451
|
+
print "target_frame : ".rjust(indent)
|
|
452
|
+
p hsp.target_frame
|
|
453
|
+
|
|
454
|
+
print "query_seq : ".rjust(indent)
|
|
455
|
+
p hsp.query_seq # hmmseq, flatseq
|
|
456
|
+
print "target_seq : ".rjust(indent)
|
|
457
|
+
p hsp.target_seq # flatseq, hmmseq
|
|
458
|
+
print "target_from : ".rjust(indent)
|
|
459
|
+
p hsp.target_from # seq_f, hmm_f
|
|
460
|
+
print "target_to : ".rjust(indent)
|
|
461
|
+
p hsp.target_to # seq_t, hmm_t
|
|
462
|
+
print "query_from : ".rjust(indent)
|
|
463
|
+
p hsp.query_from # hmm_f, seq_f
|
|
464
|
+
print "query_to : ".rjust(indent)
|
|
465
|
+
p hsp.query_to # hmm_t, seq_t
|
|
466
|
+
end
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
=begin
|
|
473
|
+
|
|
474
|
+
= Bio::HMMER::Report
|
|
475
|
+
|
|
476
|
+
--- Bio::HMMER::Report.new(data)
|
|
477
|
+
--- Bio::HMMER::Report#each
|
|
478
|
+
|
|
479
|
+
Iterates on each Bio::HMMER::Report::Hit object.
|
|
480
|
+
|
|
481
|
+
--- Bio::HMMER::Report#hits
|
|
482
|
+
|
|
483
|
+
Returns an Array of Bio::HMMER::Report::Hit objects.
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
== Bio::HMMER::Report::Hit
|
|
487
|
+
|
|
488
|
+
--- Bio::HMMER::Report::Hit#each
|
|
489
|
+
|
|
490
|
+
Iterates on each Hsp object.
|
|
491
|
+
|
|
492
|
+
--- Bio::HMMER::Report::Hit#hsps
|
|
493
|
+
|
|
494
|
+
Returns an Array of Bio::HMMER::Report::Hsp objects.
|
|
495
|
+
|
|
496
|
+
--- Bio::HMMER::Report::Hit#target_id
|
|
497
|
+
--- Bio::HMMER::Report::Hit#hit_id
|
|
498
|
+
--- Bio::HMMER::Report::Hit#entry_id
|
|
499
|
+
--- Bio::HMMER::Report::Hit#definition
|
|
500
|
+
--- Bio::HMMER::Report::Hit#description
|
|
501
|
+
--- Bio::HMMER::Report::Hit#num
|
|
502
|
+
|
|
503
|
+
nunmer of domains
|
|
504
|
+
|
|
505
|
+
--- Bio::HMMER::Report::Hit#target_def
|
|
506
|
+
|
|
507
|
+
<domain number> + @description
|
|
508
|
+
|
|
509
|
+
--- Bio::HMMER::Report::Hit#evalue
|
|
510
|
+
--- Bio::HMMER::Report::Hit#bit_score
|
|
511
|
+
--- Bio::HMMER::Report::Hit#score
|
|
512
|
+
|
|
513
|
+
Matching scores (total of all HSPs).
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
== Bio::HMMER::Report::Hsp
|
|
517
|
+
|
|
518
|
+
--- Bio::HMMER::Report#hsps
|
|
519
|
+
|
|
520
|
+
Returns an Array of Bio::HMMER::Report::Hsp objects.
|
|
521
|
+
Under special circumstances, some HSPs do not have
|
|
522
|
+
parent Hit objects. If you want to access such HSPs,
|
|
523
|
+
use this method.
|
|
524
|
+
|
|
525
|
+
--- Bio::HMMER::Report::Hsp#target_id
|
|
526
|
+
--- Bio::HMMER::Report::Hsp#accession
|
|
527
|
+
--- Bio::HMMER::Report::Hsp#domain
|
|
528
|
+
--- Bio::HMMER::Report::Hsp#seq_f
|
|
529
|
+
--- Bio::HMMER::Report::Hsp#seq_t
|
|
530
|
+
--- Bio::HMMER::Report::Hsp#seq_ft
|
|
531
|
+
--- Bio::HMMER::Report::Hsp#hmm_f
|
|
532
|
+
--- Bio::HMMER::Report::Hsp#hmm_t
|
|
533
|
+
--- Bio::HMMER::Report::Hsp#hmm_ft
|
|
534
|
+
|
|
535
|
+
--- Bio::HMMER::Report::Hsp#bit_score
|
|
536
|
+
--- Bio::HMMER::Report::Hsp#score
|
|
537
|
+
--- Bio::HMMER::Report::Hsp#evalue
|
|
538
|
+
|
|
539
|
+
--- Bio::HMMER::Report::Hsp#midline
|
|
540
|
+
--- Bio::HMMER::Report::Hsp#hmmseq
|
|
541
|
+
--- Bio::HMMER::Report::Hsp#flatseq
|
|
542
|
+
--- Bio::HMMER::Report::Hsp#query_frame
|
|
543
|
+
--- Bio::HMMER::Report::Hsp#target_frame
|
|
544
|
+
|
|
545
|
+
--- Bio::HMMER::Report::Hsp#query_seq
|
|
546
|
+
--- Bio::HMMER::Report::Hsp#query_from
|
|
547
|
+
--- Bio::HMMER::Report::Hsp#query_to
|
|
548
|
+
--- Bio::HMMER::Report::Hsp#target_seq
|
|
549
|
+
--- Bio::HMMER::Report::Hsp#target_from
|
|
550
|
+
--- Bio::HMMER::Report::Hsp#target_to
|
|
551
|
+
|
|
552
|
+
--- Bio::HMMER::Report::Hsp#csline
|
|
553
|
+
--- Bio::HMMER::Report::Hsp#rfline
|
|
554
|
+
|
|
555
|
+
=end
|
|
556
|
+
|