bio 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
@@ -0,0 +1,122 @@
|
|
1
|
+
#
|
2
|
+
# bio/db/pdb/residue.rb - residue class for PDB
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004 Alex Gutteridge <alexg@ebi.ac.uk>
|
5
|
+
#
|
6
|
+
# This library is free software; you can redistribute it and/or
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
8
|
+
# License as published by the Free Software Foundation; either
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
# Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
17
|
+
# License along with this library; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
#
|
20
|
+
# $Id: residue.rb,v 1.4 2005/12/18 17:34:47 ngoto Exp $
|
21
|
+
|
22
|
+
require 'bio/db/pdb'
|
23
|
+
|
24
|
+
module Bio
|
25
|
+
|
26
|
+
class PDB
|
27
|
+
|
28
|
+
#Residue class - id is a composite of resSeq and iCode
|
29
|
+
class Residue
|
30
|
+
|
31
|
+
include Utils
|
32
|
+
include AtomFinder
|
33
|
+
include Enumerable
|
34
|
+
include Comparable
|
35
|
+
|
36
|
+
attr_reader :resName, :resSeq, :iCode, :id, :chain, :hetatm
|
37
|
+
attr_writer :resName, :chain, :hetatm
|
38
|
+
|
39
|
+
def initialize(resName = nil, resSeq = nil, iCode = nil,
|
40
|
+
chain = nil, hetatm = false)
|
41
|
+
|
42
|
+
@resName = resName
|
43
|
+
@resSeq = resSeq
|
44
|
+
@iCode = iCode
|
45
|
+
|
46
|
+
@hetatm = hetatm
|
47
|
+
|
48
|
+
#Residue id is required because resSeq doesn't uniquely identify
|
49
|
+
#a residue. ID is constructed from resSeq and iCode and is appended
|
50
|
+
#to 'LIGAND' if the residue is a HETATM
|
51
|
+
if (!@resSeq and !@iCode)
|
52
|
+
@id = nil
|
53
|
+
else
|
54
|
+
@id = "#{@resSeq}#{@iCode.strip}"
|
55
|
+
if @hetatm
|
56
|
+
@id = 'LIGAND' + @id
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
@chain = chain
|
61
|
+
|
62
|
+
@atoms = Array.new
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
#Keyed access to atoms based on element e.g. ["CA"]
|
67
|
+
def [](key)
|
68
|
+
atom = @atoms.find{ |atom| key == atom.element }
|
69
|
+
end
|
70
|
+
|
71
|
+
#Need to define these to make sure id is correctly updated
|
72
|
+
def resSeq=(resSeq)
|
73
|
+
@resSeq = resSeq.to_i
|
74
|
+
@id = "#{@resSeq}#{@iCode.strip}"
|
75
|
+
if @hetatm
|
76
|
+
@id = 'LIGAND' + @id
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def iCode=(iCode)
|
81
|
+
@iCode = iCode
|
82
|
+
@id = "#{@resSeq}#{@iCode.strip}"
|
83
|
+
if @hetatm
|
84
|
+
@id = 'LIGAND' + @id
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
#Adds an atom to this residue
|
89
|
+
def addAtom(atom)
|
90
|
+
raise "Expecting ATOM or HETATM" unless atom.is_a? Bio::PDB::Record::ATOM
|
91
|
+
@atoms.push(atom)
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
#Iterator over the atoms
|
96
|
+
def each
|
97
|
+
@atoms.each{ |atom| yield atom }
|
98
|
+
end
|
99
|
+
#Alias to override AtomFinder#each_atom
|
100
|
+
alias each_atom each
|
101
|
+
|
102
|
+
#Sorts based on resSeq and iCode if need be
|
103
|
+
def <=>(other)
|
104
|
+
if @resSeq != other.resSeq
|
105
|
+
return @resSeq <=> other.resSeq
|
106
|
+
else
|
107
|
+
return @iCode <=> other.iCode
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
#Stringifies each atom
|
112
|
+
def to_s
|
113
|
+
string = ""
|
114
|
+
@atoms.each{ |atom| string << atom.to_s << "\n" }
|
115
|
+
return string
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
@@ -0,0 +1,234 @@
|
|
1
|
+
#
|
2
|
+
# bio/db/pdb/utils.rb - Utility modules for PDB
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004 Alex Gutteridge <alexg@ebi.ac.uk>
|
5
|
+
# Copyright (C) 2004 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
# $Id: utils.rb,v 1.2 2005/09/08 01:22:11 k Exp $
|
22
|
+
|
23
|
+
require 'matrix'
|
24
|
+
require 'bio/db/pdb'
|
25
|
+
|
26
|
+
module Bio; class PDB
|
27
|
+
|
28
|
+
module Utils
|
29
|
+
#The methods in this mixin should be applicalbe to all PDB objects
|
30
|
+
|
31
|
+
#Returns the coordinates of the geometric centre (average co-ord)
|
32
|
+
#of any AtomFinder (or .atoms) implementing object
|
33
|
+
def geometricCentre()
|
34
|
+
|
35
|
+
x = y = z = count = 0
|
36
|
+
|
37
|
+
self.each_atom{ |atom|
|
38
|
+
x += atom.x
|
39
|
+
y += atom.y
|
40
|
+
z += atom.z
|
41
|
+
count += 1
|
42
|
+
}
|
43
|
+
|
44
|
+
x = x / count
|
45
|
+
y = y / count
|
46
|
+
z = z / count
|
47
|
+
|
48
|
+
Coordinate[x,y,z]
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
#Returns the coords of the centre of gravity for any
|
53
|
+
#AtomFinder implementing object
|
54
|
+
#Blleurgh! - working out what element it is from the atom name is
|
55
|
+
#tricky - this'll work in most cases but not metals etc...
|
56
|
+
#a proper element field is included in some PDB files but not all.
|
57
|
+
ElementMass = {
|
58
|
+
'H' => 1,
|
59
|
+
'C' => 12,
|
60
|
+
'N' => 14,
|
61
|
+
'O' => 16,
|
62
|
+
'S' => 32,
|
63
|
+
'P' => 31
|
64
|
+
}
|
65
|
+
|
66
|
+
def centreOfGravity()
|
67
|
+
|
68
|
+
x = y = z = total = 0
|
69
|
+
|
70
|
+
self.each_atom{ |atom|
|
71
|
+
element = atom.element[0,1]
|
72
|
+
mass = ElementMass[element]
|
73
|
+
total += mass
|
74
|
+
x += atom.x * mass
|
75
|
+
y += atom.y * mass
|
76
|
+
z += atom.z * mass
|
77
|
+
}
|
78
|
+
|
79
|
+
x = x / total
|
80
|
+
y = y / total
|
81
|
+
z = z / total
|
82
|
+
|
83
|
+
Coordinate[x,y,z]
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
#Perhaps distance and dihedral would be better off as class methods?
|
88
|
+
#(rather) than instance methods
|
89
|
+
def self.distance(coord1,coord2)
|
90
|
+
coord1 = to_xyz(coord1)
|
91
|
+
coord2 = to_xyz(coord2)
|
92
|
+
(coord1 - coord2).r
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.dihedral_angle(coord1,coord2,coord3,coord4)
|
96
|
+
|
97
|
+
(a1,b1,c1,d) = calculatePlane(coord1,coord2,coord3)
|
98
|
+
(a2,b2,c2) = calculatePlane(coord2,coord3,coord4)
|
99
|
+
|
100
|
+
torsion = acos((a1*a2 + b1*b2 + c1*c2)/(Math.sqrt(a1**2 + b1**2 + c1**2) * Math.sqrt(a2**2 + b2**2 + c2**2)))
|
101
|
+
|
102
|
+
if ((a1*coord4.x + b1*coord4.y + c1*coord4.z + d) < 0)
|
103
|
+
-torsion
|
104
|
+
else
|
105
|
+
torsion
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
#Implicit conversion into Vector or Bio::PDB::Coordinate
|
110
|
+
def self.to_xyz(obj)
|
111
|
+
unless obj.is_a?(Vector)
|
112
|
+
begin
|
113
|
+
obj = obj.xyz
|
114
|
+
rescue NameError
|
115
|
+
obj = Vector.elements(obj.to_a)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
obj
|
119
|
+
end
|
120
|
+
|
121
|
+
#Methods required for the dihedral angle calculations
|
122
|
+
#perhaps these should go in some separate Math module
|
123
|
+
def self.rad2deg(r)
|
124
|
+
(r/Math::PI)*180
|
125
|
+
end
|
126
|
+
|
127
|
+
def self.acos(x)
|
128
|
+
Math.atan2(Math.sqrt(1 - x**2),x)
|
129
|
+
end
|
130
|
+
|
131
|
+
def self.calculatePlane(coord1,coord2,coord3)
|
132
|
+
a = coord1.y * (coord2.z - coord3.z) +
|
133
|
+
coord2.y * (coord3.z - coord1.z) +
|
134
|
+
coord3.y * (coord1.z - coord2.z)
|
135
|
+
b = coord1.z * (coord2.x - coord3.x) +
|
136
|
+
coord2.z * (coord3.x - coord1.x) +
|
137
|
+
coord3.z * (coord1.x - coord2.x)
|
138
|
+
c = coord1.x * (coord2.y - coord3.y) +
|
139
|
+
coord2.x * (coord3.y - coord1.y) +
|
140
|
+
coord3.x * (coord1.y - coord2.y)
|
141
|
+
d = -1 *
|
142
|
+
(
|
143
|
+
(coord1.x * (coord2.y * coord3.z - coord3.y * coord2.z)) +
|
144
|
+
(coord2.x * (coord3.y * coord1.z - coord1.y * coord3.z)) +
|
145
|
+
(coord3.x * (coord1.y * coord2.z - coord2.y * coord1.z))
|
146
|
+
)
|
147
|
+
|
148
|
+
return [a,b,c,d]
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
#Every class in the heirarchy implements finder, this takes
|
153
|
+
#a class which determines which type of object to find, the associated
|
154
|
+
#block is then run in classic .find style
|
155
|
+
def finder(findtype,&block)
|
156
|
+
if findtype == Bio::PDB::Atom
|
157
|
+
return self.find_atom(&block)
|
158
|
+
elsif findtype == Bio::PDB::Residue
|
159
|
+
return self.find_residue(&block)
|
160
|
+
elsif findtype == Bio::PDB::Chain
|
161
|
+
return self.find_chain(&block)
|
162
|
+
elsif findtype == Bio::PDB::Model
|
163
|
+
return self.find_model(&block)
|
164
|
+
else
|
165
|
+
raise TypeError, "You can't find a #{findtype}"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end #module Utils
|
169
|
+
|
170
|
+
#The *Finder modules implement a find_* method which returns
|
171
|
+
#an array of anything for which the block evals true
|
172
|
+
#(suppose Enumerable#find_all method).
|
173
|
+
#The each_* style methods act as classic iterators.
|
174
|
+
module ModelFinder
|
175
|
+
def find_model()
|
176
|
+
array = []
|
177
|
+
self.each_model{ |model|
|
178
|
+
array.push(model) if yield(model)
|
179
|
+
}
|
180
|
+
return array
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
#The heirarchical nature of the objects allow us to re-use the
|
185
|
+
#methods from the previous level - e.g. A PDB object can use the .models
|
186
|
+
#method defined in ModuleFinder to iterate through the models to find the
|
187
|
+
#chains
|
188
|
+
module ChainFinder
|
189
|
+
def find_chain()
|
190
|
+
array = []
|
191
|
+
self.each_chain{ |chain|
|
192
|
+
array.push(chain) if yield(chain)
|
193
|
+
}
|
194
|
+
return array
|
195
|
+
end
|
196
|
+
def each_chain()
|
197
|
+
self.each_model{ |model|
|
198
|
+
model.each{ |chain| yield chain }
|
199
|
+
}
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
module ResidueFinder
|
204
|
+
def find_residue()
|
205
|
+
array = []
|
206
|
+
self.each_residue{ |residue|
|
207
|
+
array.push(residue) if yield(residue)
|
208
|
+
}
|
209
|
+
return array
|
210
|
+
end
|
211
|
+
def each_residue()
|
212
|
+
self.each_chain{ |chain|
|
213
|
+
chain.each{ |residue| yield residue }
|
214
|
+
}
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
module AtomFinder
|
219
|
+
def find_atom()
|
220
|
+
array = []
|
221
|
+
self.each_atom{ |atom|
|
222
|
+
array.push(atom) if yield(atom)
|
223
|
+
}
|
224
|
+
return array
|
225
|
+
end
|
226
|
+
def each_atom()
|
227
|
+
self.each_residue{ |residue|
|
228
|
+
residue.each{ |atom| yield atom }
|
229
|
+
}
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
end; end #module Bio; class PDB
|
234
|
+
|
@@ -0,0 +1,616 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/prosite.rb - PROSITE database class
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
|
5
|
+
# Licence:: LGPL
|
6
|
+
#
|
7
|
+
# $Id: prosite.rb,v 0.13 2005/12/18 18:24:08 k Exp $
|
8
|
+
#
|
9
|
+
# == Description
|
10
|
+
#
|
11
|
+
#
|
12
|
+
# == Example
|
13
|
+
# == References
|
14
|
+
#--
|
15
|
+
#
|
16
|
+
# This library is free software; you can redistribute it and/or
|
17
|
+
# modify it under the terms of the GNU Lesser General Public
|
18
|
+
# License as published by the Free Software Foundation; either
|
19
|
+
# version 2 of the License, or (at your option) any later version.
|
20
|
+
#
|
21
|
+
# This library is distributed in the hope that it will be useful,
|
22
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
23
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
24
|
+
# Lesser General Public License for more details.
|
25
|
+
#
|
26
|
+
# You should have received a copy of the GNU Lesser General Public
|
27
|
+
# License along with this library; if not, write to the Free Software
|
28
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
29
|
+
#
|
30
|
+
#++
|
31
|
+
#
|
32
|
+
|
33
|
+
require 'bio/db'
|
34
|
+
|
35
|
+
module Bio
|
36
|
+
|
37
|
+
class PROSITE < EMBLDB
|
38
|
+
|
39
|
+
# Delimiter
|
40
|
+
DELIMITER = "\n//\n"
|
41
|
+
|
42
|
+
# Delimiter
|
43
|
+
RS = DELIMITER
|
44
|
+
|
45
|
+
# Bio::DB API
|
46
|
+
TAGSIZE = 5
|
47
|
+
|
48
|
+
|
49
|
+
def initialize(entry)
|
50
|
+
super(entry, TAGSIZE)
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# ID Identification (Begins each entry; 1 per entry)
|
55
|
+
#
|
56
|
+
# ID ENTRY_NAME; ENTRY_TYPE. (ENTRY_TYPE : PATTERN, MATRIX, RULE)
|
57
|
+
#
|
58
|
+
# Returns
|
59
|
+
def name
|
60
|
+
unless @data['ID']
|
61
|
+
@data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ')
|
62
|
+
end
|
63
|
+
@data['ID']
|
64
|
+
end
|
65
|
+
|
66
|
+
# Returns
|
67
|
+
def division
|
68
|
+
unless @data['TYPE']
|
69
|
+
name
|
70
|
+
end
|
71
|
+
@data['TYPE']
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
# AC Accession number (1 per entry)
|
76
|
+
#
|
77
|
+
# AC PSnnnnn;
|
78
|
+
#
|
79
|
+
# Returns
|
80
|
+
def ac
|
81
|
+
unless @data['AC']
|
82
|
+
@data['AC'] = fetch('AC').chomp(';')
|
83
|
+
end
|
84
|
+
@data['AC']
|
85
|
+
end
|
86
|
+
|
87
|
+
alias entry_id ac
|
88
|
+
|
89
|
+
|
90
|
+
# DT Date (1 per entry)
|
91
|
+
#
|
92
|
+
# DT MMM-YYYY (CREATED); MMM-YYYY (DATA UPDATE); MMM-YYYY (INFO UPDATE).
|
93
|
+
#
|
94
|
+
# Returns
|
95
|
+
def dt
|
96
|
+
field_fetch('DT')
|
97
|
+
end
|
98
|
+
|
99
|
+
alias date dt
|
100
|
+
|
101
|
+
|
102
|
+
# DE Short description (1 per entry)
|
103
|
+
#
|
104
|
+
# DE Description.
|
105
|
+
#
|
106
|
+
# Returns
|
107
|
+
def de
|
108
|
+
field_fetch('DE')
|
109
|
+
end
|
110
|
+
|
111
|
+
alias definition de
|
112
|
+
|
113
|
+
|
114
|
+
# PA Pattern (>=0 per entry)
|
115
|
+
#
|
116
|
+
# see - pa2re method
|
117
|
+
#
|
118
|
+
# Returns
|
119
|
+
def pa
|
120
|
+
field_fetch('PA')
|
121
|
+
@data['PA'] = fetch('PA') unless @data['PA']
|
122
|
+
@data['PA'].gsub!(/\s+/, '') if @data['PA']
|
123
|
+
@data['PA']
|
124
|
+
end
|
125
|
+
|
126
|
+
alias pattern pa
|
127
|
+
|
128
|
+
|
129
|
+
# MA Matrix/profile (>=0 per entry)
|
130
|
+
#
|
131
|
+
# see - ma2re method
|
132
|
+
#
|
133
|
+
# Returns
|
134
|
+
def ma
|
135
|
+
field_fetch('MA')
|
136
|
+
end
|
137
|
+
|
138
|
+
alias profile ma
|
139
|
+
|
140
|
+
|
141
|
+
# RU Rule (>=0 per entry)
|
142
|
+
#
|
143
|
+
# RU Rule_Description.
|
144
|
+
#
|
145
|
+
# The rule is described in ordinary English and is free-format.
|
146
|
+
#
|
147
|
+
# Returns
|
148
|
+
def ru
|
149
|
+
field_fetch('RU')
|
150
|
+
end
|
151
|
+
|
152
|
+
alias rule ru
|
153
|
+
|
154
|
+
|
155
|
+
# NR Numerical results (>=0 per entry)
|
156
|
+
#
|
157
|
+
# - SWISS-PROT scan statistics of true and false positives/negatives
|
158
|
+
#
|
159
|
+
# /RELEASE SWISS-PROT release number and total number of sequence
|
160
|
+
# entries in that release.
|
161
|
+
# /TOTAL Total number of hits in SWISS-PROT.
|
162
|
+
# /POSITIVE Number of hits on proteins that are known to belong to the
|
163
|
+
# set in consideration.
|
164
|
+
# /UNKNOWN Number of hits on proteins that could possibly belong to
|
165
|
+
# the set in consideration.
|
166
|
+
# /FALSE_POS Number of false hits (on unrelated proteins).
|
167
|
+
# /FALSE_NEG Number of known missed hits.
|
168
|
+
# /PARTIAL Number of partial sequences which belong to the set in
|
169
|
+
# consideration, but which are not hit by the pattern or
|
170
|
+
# profile because they are partial (fragment) sequences.
|
171
|
+
#
|
172
|
+
# Returns
|
173
|
+
def nr
|
174
|
+
unless @data['NR']
|
175
|
+
hash = {} # temporal hash
|
176
|
+
fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
|
177
|
+
if v =~ /^(\d+)\((\d+)\)$/
|
178
|
+
hits = $1.to_i # the number of hits
|
179
|
+
seqs = $2.to_i # the number of sequences
|
180
|
+
v = [hits, seqs]
|
181
|
+
elsif v =~ /([\d\.]+),(\d+)/
|
182
|
+
sprel = $1 # the number of SWISS-PROT release
|
183
|
+
spseq = $2.to_i # the number of SWISS-PROT sequences
|
184
|
+
v = [sprel, spseq]
|
185
|
+
else
|
186
|
+
v = v.to_i
|
187
|
+
end
|
188
|
+
hash[k] = v
|
189
|
+
end
|
190
|
+
@data['NR'] = hash
|
191
|
+
end
|
192
|
+
@data['NR']
|
193
|
+
end
|
194
|
+
|
195
|
+
alias statistics nr
|
196
|
+
|
197
|
+
# Returns
|
198
|
+
def release
|
199
|
+
statistics['RELEASE']
|
200
|
+
end
|
201
|
+
|
202
|
+
# Returns
|
203
|
+
def swissprot_release_number
|
204
|
+
release.first
|
205
|
+
end
|
206
|
+
|
207
|
+
# Returns
|
208
|
+
def swissprot_release_sequences
|
209
|
+
release.last
|
210
|
+
end
|
211
|
+
|
212
|
+
# Returns
|
213
|
+
def total
|
214
|
+
statistics['TOTAL']
|
215
|
+
end
|
216
|
+
|
217
|
+
# Returns
|
218
|
+
def total_hits
|
219
|
+
total.first
|
220
|
+
end
|
221
|
+
|
222
|
+
# Returns
|
223
|
+
def total_sequences
|
224
|
+
total.last
|
225
|
+
end
|
226
|
+
|
227
|
+
# Returns
|
228
|
+
def positive
|
229
|
+
statistics['POSITIVE']
|
230
|
+
end
|
231
|
+
|
232
|
+
# Returns
|
233
|
+
def positive_hits
|
234
|
+
positive.first
|
235
|
+
end
|
236
|
+
|
237
|
+
# Returns
|
238
|
+
def positive_sequences
|
239
|
+
positive.last
|
240
|
+
end
|
241
|
+
|
242
|
+
# Returns
|
243
|
+
def unknown
|
244
|
+
statistics['UNKNOWN']
|
245
|
+
end
|
246
|
+
|
247
|
+
# Returns
|
248
|
+
def unknown_hits
|
249
|
+
unknown.first
|
250
|
+
end
|
251
|
+
|
252
|
+
# Returns
|
253
|
+
def unknown_sequences
|
254
|
+
unknown.last
|
255
|
+
end
|
256
|
+
|
257
|
+
# Returns
|
258
|
+
def false_pos
|
259
|
+
statistics['FALSE_POS']
|
260
|
+
end
|
261
|
+
|
262
|
+
# Returns
|
263
|
+
def false_positive_hits
|
264
|
+
false_pos.first
|
265
|
+
end
|
266
|
+
|
267
|
+
# Returns
|
268
|
+
def false_positive_sequences
|
269
|
+
false_pos.last
|
270
|
+
end
|
271
|
+
|
272
|
+
# Returns
|
273
|
+
def false_neg
|
274
|
+
statistics['FALSE_NEG']
|
275
|
+
end
|
276
|
+
alias false_negative_hits false_neg
|
277
|
+
|
278
|
+
# Returns
|
279
|
+
def partial
|
280
|
+
statistics['PARTIAL']
|
281
|
+
end
|
282
|
+
|
283
|
+
|
284
|
+
# CC Comments (>=0 per entry)
|
285
|
+
#
|
286
|
+
# CC /QUALIFIER=data; /QUALIFIER=data; .......
|
287
|
+
#
|
288
|
+
# /TAXO-RANGE Taxonomic range.
|
289
|
+
# /MAX-REPEAT Maximum known number of repetitions of the pattern in a
|
290
|
+
# single protein.
|
291
|
+
# /SITE Indication of an `interesting' site in the pattern.
|
292
|
+
# /SKIP-FLAG Indication of an entry that can be, in some cases, ignored
|
293
|
+
# by a program (because it is too unspecific).
|
294
|
+
#
|
295
|
+
# Returns
|
296
|
+
def cc
|
297
|
+
unless @data['CC']
|
298
|
+
hash = {} # temporal hash
|
299
|
+
fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
|
300
|
+
hash[k] = v
|
301
|
+
end
|
302
|
+
@data['CC'] = hash
|
303
|
+
end
|
304
|
+
@data['CC']
|
305
|
+
end
|
306
|
+
|
307
|
+
alias comment cc
|
308
|
+
|
309
|
+
# Returns
|
310
|
+
def taxon_range(expand = nil)
|
311
|
+
range = comment['TAXO-RANGE']
|
312
|
+
if range and expand
|
313
|
+
expand = []
|
314
|
+
range.scan(/./) do |x|
|
315
|
+
case x
|
316
|
+
when 'A'; expand.push('archaebacteria')
|
317
|
+
when 'B'; expand.push('bacteriophages')
|
318
|
+
when 'E'; expand.push('eukaryotes')
|
319
|
+
when 'P'; expand.push('prokaryotes')
|
320
|
+
when 'V'; expand.push('eukaryotic viruses')
|
321
|
+
end
|
322
|
+
end
|
323
|
+
range = expand
|
324
|
+
end
|
325
|
+
return range
|
326
|
+
end
|
327
|
+
|
328
|
+
# Returns
|
329
|
+
def max_repeat
|
330
|
+
comment['MAX-REPEAT'].to_i
|
331
|
+
end
|
332
|
+
|
333
|
+
# Returns
|
334
|
+
def site
|
335
|
+
if comment['SITE']
|
336
|
+
num, desc = comment['SITE'].split(',')
|
337
|
+
end
|
338
|
+
return [num.to_i, desc]
|
339
|
+
end
|
340
|
+
|
341
|
+
# Returns
|
342
|
+
def skip_flag
|
343
|
+
if comment['SKIP-FLAG'] == 'TRUE'
|
344
|
+
return true
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
|
349
|
+
# DR Cross-references to SWISS-PROT (>=0 per entry)
|
350
|
+
#
|
351
|
+
# DR AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C;
|
352
|
+
#
|
353
|
+
# - `AC_NB' is the SWISS-PROT primary accession number of the entry to
|
354
|
+
# which reference is being made.
|
355
|
+
# - `ENTRY_NAME' is the SWISS-PROT entry name.
|
356
|
+
# - `C' is a one character flag that can be one of the following:
|
357
|
+
#
|
358
|
+
# T For a true positive.
|
359
|
+
# N For a false negative; a sequence which belongs to the set under
|
360
|
+
# consideration, but which has not been picked up by the pattern or
|
361
|
+
# profile.
|
362
|
+
# P For a `potential' hit; a sequence that belongs to the set under
|
363
|
+
# consideration, but which was not picked up because the region(s) that
|
364
|
+
# are used as a 'fingerprint' (pattern or profile) is not yet available
|
365
|
+
# in the data bank (partial sequence).
|
366
|
+
# ? For an unknown; a sequence which possibly could belong to the set under
|
367
|
+
# consideration.
|
368
|
+
# F For a false positive; a sequence which does not belong to the set in
|
369
|
+
# consideration.
|
370
|
+
#
|
371
|
+
# Returns
|
372
|
+
def dr
|
373
|
+
unless @data['DR']
|
374
|
+
hash = {} # temporal hash
|
375
|
+
if fetch('DR')
|
376
|
+
fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c|
|
377
|
+
hash[a] = [e, c] # SWISS-PROT : accession, entry, true/false
|
378
|
+
end
|
379
|
+
end
|
380
|
+
@data['DR'] = hash
|
381
|
+
end
|
382
|
+
@data['DR']
|
383
|
+
end
|
384
|
+
|
385
|
+
alias sp_xref dr
|
386
|
+
|
387
|
+
# Returns
|
388
|
+
def list_xref(flag, by_name = nil)
|
389
|
+
ary = []
|
390
|
+
sp_xref.each do |sp_acc, value|
|
391
|
+
if value[1] == flag
|
392
|
+
if by_name
|
393
|
+
sp_name = value[0]
|
394
|
+
ary.push(sp_name)
|
395
|
+
else
|
396
|
+
ary.push(sp_acc)
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
400
|
+
return ary
|
401
|
+
end
|
402
|
+
|
403
|
+
# Returns
|
404
|
+
def list_truepositive(by_name = nil)
|
405
|
+
list_xref('T', by_name)
|
406
|
+
end
|
407
|
+
|
408
|
+
# Returns
|
409
|
+
def list_falsenegative(by_name = nil)
|
410
|
+
list_xref('F', by_name)
|
411
|
+
end
|
412
|
+
|
413
|
+
# Returns
|
414
|
+
def list_falsepositive(by_name = nil)
|
415
|
+
list_xref('P', by_name)
|
416
|
+
end
|
417
|
+
|
418
|
+
# Returns
|
419
|
+
def list_potentialhit(by_name = nil)
|
420
|
+
list_xref('P', by_name)
|
421
|
+
end
|
422
|
+
|
423
|
+
# Returns
|
424
|
+
def list_unknown(by_name = nil)
|
425
|
+
list_xref('?', by_name)
|
426
|
+
end
|
427
|
+
|
428
|
+
|
429
|
+
# 3D Cross-references to PDB (>=0 per entry)
|
430
|
+
#
|
431
|
+
# 3D name; [name2;...]
|
432
|
+
#
|
433
|
+
# Returns
|
434
|
+
def pdb_xref
|
435
|
+
unless @data['3D']
|
436
|
+
@data['3D'] = fetch('3D').split(/; */)
|
437
|
+
end
|
438
|
+
@data['3D']
|
439
|
+
end
|
440
|
+
|
441
|
+
|
442
|
+
# DO Pointer to the documentation file (1 per entry)
|
443
|
+
#
|
444
|
+
# DO PDOCnnnnn;
|
445
|
+
#
|
446
|
+
# Returns
|
447
|
+
def pdoc_xref
|
448
|
+
@data['DO'] = fetch('DO').chomp(';')
|
449
|
+
end
|
450
|
+
|
451
|
+
|
452
|
+
### prosite pattern to regular expression
|
453
|
+
#
|
454
|
+
# prosite/prosuser.txt:
|
455
|
+
#
|
456
|
+
# The PA (PAttern) lines contains the definition of a PROSITE pattern. The
|
457
|
+
# patterns are described using the following conventions:
|
458
|
+
#
|
459
|
+
# 0) The standard IUPAC one-letter codes for the amino acids are used.
|
460
|
+
# 0) Ambiguities are indicated by listing the acceptable amino acids for a
|
461
|
+
# given position, between square parentheses `[ ]'. For example: [ALT]
|
462
|
+
# stands for Ala or Leu or Thr.
|
463
|
+
# 1) A period ends the pattern.
|
464
|
+
# 2) When a pattern is restricted to either the N- or C-terminal of a
|
465
|
+
# sequence, that pattern either starts with a `<' symbol or respectively
|
466
|
+
# ends with a `>' symbol.
|
467
|
+
# 3) Ambiguities are also indicated by listing between a pair of curly
|
468
|
+
# brackets `{ }' the amino acids that are not accepted at a given
|
469
|
+
# position. For example: {AM} stands for any amino acid except Ala and
|
470
|
+
# Met.
|
471
|
+
# 4) Repetition of an element of the pattern can be indicated by following
|
472
|
+
# that element with a numerical value or a numerical range between
|
473
|
+
# parenthesis. Examples: x(3) corresponds to x-x-x, x(2,4) corresponds to
|
474
|
+
# x-x or x-x-x or x-x-x-x.
|
475
|
+
# 5) The symbol `x' is used for a position where any amino acid is accepted.
|
476
|
+
# 6) Each element in a pattern is separated from its neighbor by a `-'.
|
477
|
+
#
|
478
|
+
# Examples:
|
479
|
+
#
|
480
|
+
# PA [AC]-x-V-x(4)-{ED}.
|
481
|
+
#
|
482
|
+
# This pattern is translated as: [Ala or Cys]-any-Val-any-any-any-any-{any
|
483
|
+
# but Glu or Asp}
|
484
|
+
#
|
485
|
+
# PA <A-x-[ST](2)-x(0,1)-V.
|
486
|
+
#
|
487
|
+
# This pattern, which must be in the N-terminal of the sequence (`<'), is
|
488
|
+
# translated as: Ala-any-[Ser or Thr]-[Ser or Thr]-(any or none)-Val
|
489
|
+
#
|
490
|
+
def self.pa2re(pattern)
|
491
|
+
pattern.gsub!(/\s/, '') # remove white spaces
|
492
|
+
pattern.sub!(/\.$/, '') # (1) remove trailing '.'
|
493
|
+
pattern.sub!(/^</, '^') # (2) restricted to the N-terminal : `<'
|
494
|
+
pattern.sub!(/>$/, '$') # (2) restricted to the C-terminal : `>'
|
495
|
+
pattern.gsub!(/\{(\w+)\}/) { |m|
|
496
|
+
'[^' + $1 + ']' # (3) not accepted at a given position : '{}'
|
497
|
+
}
|
498
|
+
pattern.gsub!(/\(([\d,]+)\)/) { |m|
|
499
|
+
'{' + $1 + '}' # (4) repetition of an element : (n), (n,m)
|
500
|
+
}
|
501
|
+
pattern.tr!('x', '.') # (5) any amino acid is accepted : 'x'
|
502
|
+
pattern.tr!('-', '') # (6) each element is separated by a '-'
|
503
|
+
Regexp.new(pattern)
|
504
|
+
end
|
505
|
+
|
506
|
+
def pa2re(pattern)
|
507
|
+
self.class.pa2re(pattern)
|
508
|
+
end
|
509
|
+
|
510
|
+
|
511
|
+
### prosite profile to regular expression
|
512
|
+
#
|
513
|
+
# prosite/profile.txt:
|
514
|
+
#
|
515
|
+
# Returns
|
516
|
+
def ma2re(matrix)
|
517
|
+
raise NotImplementedError
|
518
|
+
end
|
519
|
+
|
520
|
+
end
|
521
|
+
|
522
|
+
end
|
523
|
+
|
524
|
+
|
525
|
+
if __FILE__ == $0
|
526
|
+
|
527
|
+
begin
|
528
|
+
require 'pp'
|
529
|
+
alias p pp
|
530
|
+
rescue LoadError
|
531
|
+
end
|
532
|
+
|
533
|
+
ps = Bio::PROSITE.new(ARGF.read)
|
534
|
+
|
535
|
+
list = %w(
|
536
|
+
name
|
537
|
+
division
|
538
|
+
ac
|
539
|
+
entry_id
|
540
|
+
dt
|
541
|
+
date
|
542
|
+
de
|
543
|
+
definition
|
544
|
+
pa
|
545
|
+
pattern
|
546
|
+
ma
|
547
|
+
profile
|
548
|
+
ru
|
549
|
+
rule
|
550
|
+
nr
|
551
|
+
statistics
|
552
|
+
release
|
553
|
+
swissprot_release_number
|
554
|
+
swissprot_release_sequences
|
555
|
+
total
|
556
|
+
total_hits
|
557
|
+
total_sequences
|
558
|
+
positive
|
559
|
+
positive_hits
|
560
|
+
positive_sequences
|
561
|
+
unknown
|
562
|
+
unknown_hits
|
563
|
+
unknown_sequences
|
564
|
+
false_pos
|
565
|
+
false_positive_hits
|
566
|
+
false_positive_sequences
|
567
|
+
false_neg
|
568
|
+
false_negative_hits
|
569
|
+
partial
|
570
|
+
cc
|
571
|
+
comment
|
572
|
+
max_repeat
|
573
|
+
site
|
574
|
+
skip_flag
|
575
|
+
dr
|
576
|
+
sp_xref
|
577
|
+
pdb_xref
|
578
|
+
pdoc_xref
|
579
|
+
)
|
580
|
+
|
581
|
+
list.each do |method|
|
582
|
+
puts ">>> #{method}"
|
583
|
+
p ps.send(method)
|
584
|
+
end
|
585
|
+
|
586
|
+
puts ">>> taxon_range"
|
587
|
+
p ps.taxon_range
|
588
|
+
puts ">>> taxon_range(expand)"
|
589
|
+
p ps.taxon_range(true)
|
590
|
+
|
591
|
+
puts ">>> list_truepositive"
|
592
|
+
p ps.list_truepositive
|
593
|
+
puts ">>> list_truepositive(by_name)"
|
594
|
+
p ps.list_truepositive(true)
|
595
|
+
|
596
|
+
puts ">>> list_falsenegative"
|
597
|
+
p ps.list_falsenegative
|
598
|
+
puts ">>> list_falsenegative(by_name)"
|
599
|
+
p ps.list_falsenegative(true)
|
600
|
+
|
601
|
+
puts ">>> list_falsepositive"
|
602
|
+
p ps.list_falsepositive
|
603
|
+
puts ">>> list_falsepositive(by_name)"
|
604
|
+
p ps.list_falsepositive(true)
|
605
|
+
|
606
|
+
puts ">>> list_potentialhit"
|
607
|
+
p ps.list_potentialhit
|
608
|
+
puts ">>> list_potentialhit(by_name)"
|
609
|
+
p ps.list_potentialhit(true)
|
610
|
+
|
611
|
+
puts ">>> list_unknown"
|
612
|
+
p ps.list_unknown
|
613
|
+
puts ">>> list_unknown(by_name)"
|
614
|
+
p ps.list_unknown(true)
|
615
|
+
|
616
|
+
end
|