bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/pdb/residue.rb - residue class for PDB
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2004 Alex Gutteridge <alexg@ebi.ac.uk>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: residue.rb,v 1.4 2005/12/18 17:34:47 ngoto Exp $
|
|
21
|
+
|
|
22
|
+
require 'bio/db/pdb'
|
|
23
|
+
|
|
24
|
+
module Bio
|
|
25
|
+
|
|
26
|
+
class PDB
|
|
27
|
+
|
|
28
|
+
#Residue class - id is a composite of resSeq and iCode
|
|
29
|
+
class Residue
|
|
30
|
+
|
|
31
|
+
include Utils
|
|
32
|
+
include AtomFinder
|
|
33
|
+
include Enumerable
|
|
34
|
+
include Comparable
|
|
35
|
+
|
|
36
|
+
attr_reader :resName, :resSeq, :iCode, :id, :chain, :hetatm
|
|
37
|
+
attr_writer :resName, :chain, :hetatm
|
|
38
|
+
|
|
39
|
+
def initialize(resName = nil, resSeq = nil, iCode = nil,
|
|
40
|
+
chain = nil, hetatm = false)
|
|
41
|
+
|
|
42
|
+
@resName = resName
|
|
43
|
+
@resSeq = resSeq
|
|
44
|
+
@iCode = iCode
|
|
45
|
+
|
|
46
|
+
@hetatm = hetatm
|
|
47
|
+
|
|
48
|
+
#Residue id is required because resSeq doesn't uniquely identify
|
|
49
|
+
#a residue. ID is constructed from resSeq and iCode and is appended
|
|
50
|
+
#to 'LIGAND' if the residue is a HETATM
|
|
51
|
+
if (!@resSeq and !@iCode)
|
|
52
|
+
@id = nil
|
|
53
|
+
else
|
|
54
|
+
@id = "#{@resSeq}#{@iCode.strip}"
|
|
55
|
+
if @hetatm
|
|
56
|
+
@id = 'LIGAND' + @id
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
@chain = chain
|
|
61
|
+
|
|
62
|
+
@atoms = Array.new
|
|
63
|
+
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
#Keyed access to atoms based on element e.g. ["CA"]
|
|
67
|
+
def [](key)
|
|
68
|
+
atom = @atoms.find{ |atom| key == atom.element }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
#Need to define these to make sure id is correctly updated
|
|
72
|
+
def resSeq=(resSeq)
|
|
73
|
+
@resSeq = resSeq.to_i
|
|
74
|
+
@id = "#{@resSeq}#{@iCode.strip}"
|
|
75
|
+
if @hetatm
|
|
76
|
+
@id = 'LIGAND' + @id
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def iCode=(iCode)
|
|
81
|
+
@iCode = iCode
|
|
82
|
+
@id = "#{@resSeq}#{@iCode.strip}"
|
|
83
|
+
if @hetatm
|
|
84
|
+
@id = 'LIGAND' + @id
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
#Adds an atom to this residue
|
|
89
|
+
def addAtom(atom)
|
|
90
|
+
raise "Expecting ATOM or HETATM" unless atom.is_a? Bio::PDB::Record::ATOM
|
|
91
|
+
@atoms.push(atom)
|
|
92
|
+
self
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
#Iterator over the atoms
|
|
96
|
+
def each
|
|
97
|
+
@atoms.each{ |atom| yield atom }
|
|
98
|
+
end
|
|
99
|
+
#Alias to override AtomFinder#each_atom
|
|
100
|
+
alias each_atom each
|
|
101
|
+
|
|
102
|
+
#Sorts based on resSeq and iCode if need be
|
|
103
|
+
def <=>(other)
|
|
104
|
+
if @resSeq != other.resSeq
|
|
105
|
+
return @resSeq <=> other.resSeq
|
|
106
|
+
else
|
|
107
|
+
return @iCode <=> other.iCode
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
#Stringifies each atom
|
|
112
|
+
def to_s
|
|
113
|
+
string = ""
|
|
114
|
+
@atoms.each{ |atom| string << atom.to_s << "\n" }
|
|
115
|
+
return string
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
end
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/pdb/utils.rb - Utility modules for PDB
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2004 Alex Gutteridge <alexg@ebi.ac.uk>
|
|
5
|
+
# Copyright (C) 2004 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
6
|
+
#
|
|
7
|
+
# This library is free software; you can redistribute it and/or
|
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
9
|
+
# License as published by the Free Software Foundation; either
|
|
10
|
+
# version 2 of the License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15
|
+
# Lesser General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
18
|
+
# License along with this library; if not, write to the Free Software
|
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
|
+
#
|
|
21
|
+
# $Id: utils.rb,v 1.2 2005/09/08 01:22:11 k Exp $
|
|
22
|
+
|
|
23
|
+
require 'matrix'
|
|
24
|
+
require 'bio/db/pdb'
|
|
25
|
+
|
|
26
|
+
module Bio; class PDB
|
|
27
|
+
|
|
28
|
+
module Utils
|
|
29
|
+
#The methods in this mixin should be applicalbe to all PDB objects
|
|
30
|
+
|
|
31
|
+
#Returns the coordinates of the geometric centre (average co-ord)
|
|
32
|
+
#of any AtomFinder (or .atoms) implementing object
|
|
33
|
+
def geometricCentre()
|
|
34
|
+
|
|
35
|
+
x = y = z = count = 0
|
|
36
|
+
|
|
37
|
+
self.each_atom{ |atom|
|
|
38
|
+
x += atom.x
|
|
39
|
+
y += atom.y
|
|
40
|
+
z += atom.z
|
|
41
|
+
count += 1
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
x = x / count
|
|
45
|
+
y = y / count
|
|
46
|
+
z = z / count
|
|
47
|
+
|
|
48
|
+
Coordinate[x,y,z]
|
|
49
|
+
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
#Returns the coords of the centre of gravity for any
|
|
53
|
+
#AtomFinder implementing object
|
|
54
|
+
#Blleurgh! - working out what element it is from the atom name is
|
|
55
|
+
#tricky - this'll work in most cases but not metals etc...
|
|
56
|
+
#a proper element field is included in some PDB files but not all.
|
|
57
|
+
ElementMass = {
|
|
58
|
+
'H' => 1,
|
|
59
|
+
'C' => 12,
|
|
60
|
+
'N' => 14,
|
|
61
|
+
'O' => 16,
|
|
62
|
+
'S' => 32,
|
|
63
|
+
'P' => 31
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
def centreOfGravity()
|
|
67
|
+
|
|
68
|
+
x = y = z = total = 0
|
|
69
|
+
|
|
70
|
+
self.each_atom{ |atom|
|
|
71
|
+
element = atom.element[0,1]
|
|
72
|
+
mass = ElementMass[element]
|
|
73
|
+
total += mass
|
|
74
|
+
x += atom.x * mass
|
|
75
|
+
y += atom.y * mass
|
|
76
|
+
z += atom.z * mass
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
x = x / total
|
|
80
|
+
y = y / total
|
|
81
|
+
z = z / total
|
|
82
|
+
|
|
83
|
+
Coordinate[x,y,z]
|
|
84
|
+
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
#Perhaps distance and dihedral would be better off as class methods?
|
|
88
|
+
#(rather) than instance methods
|
|
89
|
+
def self.distance(coord1,coord2)
|
|
90
|
+
coord1 = to_xyz(coord1)
|
|
91
|
+
coord2 = to_xyz(coord2)
|
|
92
|
+
(coord1 - coord2).r
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def self.dihedral_angle(coord1,coord2,coord3,coord4)
|
|
96
|
+
|
|
97
|
+
(a1,b1,c1,d) = calculatePlane(coord1,coord2,coord3)
|
|
98
|
+
(a2,b2,c2) = calculatePlane(coord2,coord3,coord4)
|
|
99
|
+
|
|
100
|
+
torsion = acos((a1*a2 + b1*b2 + c1*c2)/(Math.sqrt(a1**2 + b1**2 + c1**2) * Math.sqrt(a2**2 + b2**2 + c2**2)))
|
|
101
|
+
|
|
102
|
+
if ((a1*coord4.x + b1*coord4.y + c1*coord4.z + d) < 0)
|
|
103
|
+
-torsion
|
|
104
|
+
else
|
|
105
|
+
torsion
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
#Implicit conversion into Vector or Bio::PDB::Coordinate
|
|
110
|
+
def self.to_xyz(obj)
|
|
111
|
+
unless obj.is_a?(Vector)
|
|
112
|
+
begin
|
|
113
|
+
obj = obj.xyz
|
|
114
|
+
rescue NameError
|
|
115
|
+
obj = Vector.elements(obj.to_a)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
obj
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
#Methods required for the dihedral angle calculations
|
|
122
|
+
#perhaps these should go in some separate Math module
|
|
123
|
+
def self.rad2deg(r)
|
|
124
|
+
(r/Math::PI)*180
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def self.acos(x)
|
|
128
|
+
Math.atan2(Math.sqrt(1 - x**2),x)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def self.calculatePlane(coord1,coord2,coord3)
|
|
132
|
+
a = coord1.y * (coord2.z - coord3.z) +
|
|
133
|
+
coord2.y * (coord3.z - coord1.z) +
|
|
134
|
+
coord3.y * (coord1.z - coord2.z)
|
|
135
|
+
b = coord1.z * (coord2.x - coord3.x) +
|
|
136
|
+
coord2.z * (coord3.x - coord1.x) +
|
|
137
|
+
coord3.z * (coord1.x - coord2.x)
|
|
138
|
+
c = coord1.x * (coord2.y - coord3.y) +
|
|
139
|
+
coord2.x * (coord3.y - coord1.y) +
|
|
140
|
+
coord3.x * (coord1.y - coord2.y)
|
|
141
|
+
d = -1 *
|
|
142
|
+
(
|
|
143
|
+
(coord1.x * (coord2.y * coord3.z - coord3.y * coord2.z)) +
|
|
144
|
+
(coord2.x * (coord3.y * coord1.z - coord1.y * coord3.z)) +
|
|
145
|
+
(coord3.x * (coord1.y * coord2.z - coord2.y * coord1.z))
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return [a,b,c,d]
|
|
149
|
+
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
#Every class in the heirarchy implements finder, this takes
|
|
153
|
+
#a class which determines which type of object to find, the associated
|
|
154
|
+
#block is then run in classic .find style
|
|
155
|
+
def finder(findtype,&block)
|
|
156
|
+
if findtype == Bio::PDB::Atom
|
|
157
|
+
return self.find_atom(&block)
|
|
158
|
+
elsif findtype == Bio::PDB::Residue
|
|
159
|
+
return self.find_residue(&block)
|
|
160
|
+
elsif findtype == Bio::PDB::Chain
|
|
161
|
+
return self.find_chain(&block)
|
|
162
|
+
elsif findtype == Bio::PDB::Model
|
|
163
|
+
return self.find_model(&block)
|
|
164
|
+
else
|
|
165
|
+
raise TypeError, "You can't find a #{findtype}"
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end #module Utils
|
|
169
|
+
|
|
170
|
+
#The *Finder modules implement a find_* method which returns
|
|
171
|
+
#an array of anything for which the block evals true
|
|
172
|
+
#(suppose Enumerable#find_all method).
|
|
173
|
+
#The each_* style methods act as classic iterators.
|
|
174
|
+
module ModelFinder
|
|
175
|
+
def find_model()
|
|
176
|
+
array = []
|
|
177
|
+
self.each_model{ |model|
|
|
178
|
+
array.push(model) if yield(model)
|
|
179
|
+
}
|
|
180
|
+
return array
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
#The heirarchical nature of the objects allow us to re-use the
|
|
185
|
+
#methods from the previous level - e.g. A PDB object can use the .models
|
|
186
|
+
#method defined in ModuleFinder to iterate through the models to find the
|
|
187
|
+
#chains
|
|
188
|
+
module ChainFinder
|
|
189
|
+
def find_chain()
|
|
190
|
+
array = []
|
|
191
|
+
self.each_chain{ |chain|
|
|
192
|
+
array.push(chain) if yield(chain)
|
|
193
|
+
}
|
|
194
|
+
return array
|
|
195
|
+
end
|
|
196
|
+
def each_chain()
|
|
197
|
+
self.each_model{ |model|
|
|
198
|
+
model.each{ |chain| yield chain }
|
|
199
|
+
}
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
module ResidueFinder
|
|
204
|
+
def find_residue()
|
|
205
|
+
array = []
|
|
206
|
+
self.each_residue{ |residue|
|
|
207
|
+
array.push(residue) if yield(residue)
|
|
208
|
+
}
|
|
209
|
+
return array
|
|
210
|
+
end
|
|
211
|
+
def each_residue()
|
|
212
|
+
self.each_chain{ |chain|
|
|
213
|
+
chain.each{ |residue| yield residue }
|
|
214
|
+
}
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
module AtomFinder
|
|
219
|
+
def find_atom()
|
|
220
|
+
array = []
|
|
221
|
+
self.each_atom{ |atom|
|
|
222
|
+
array.push(atom) if yield(atom)
|
|
223
|
+
}
|
|
224
|
+
return array
|
|
225
|
+
end
|
|
226
|
+
def each_atom()
|
|
227
|
+
self.each_residue{ |residue|
|
|
228
|
+
residue.each{ |atom| yield atom }
|
|
229
|
+
}
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
end; end #module Bio; class PDB
|
|
234
|
+
|
|
@@ -0,0 +1,616 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/prosite.rb - PROSITE database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
# Licence:: LGPL
|
|
6
|
+
#
|
|
7
|
+
# $Id: prosite.rb,v 0.13 2005/12/18 18:24:08 k Exp $
|
|
8
|
+
#
|
|
9
|
+
# == Description
|
|
10
|
+
#
|
|
11
|
+
#
|
|
12
|
+
# == Example
|
|
13
|
+
# == References
|
|
14
|
+
#--
|
|
15
|
+
#
|
|
16
|
+
# This library is free software; you can redistribute it and/or
|
|
17
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
18
|
+
# License as published by the Free Software Foundation; either
|
|
19
|
+
# version 2 of the License, or (at your option) any later version.
|
|
20
|
+
#
|
|
21
|
+
# This library is distributed in the hope that it will be useful,
|
|
22
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
23
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
24
|
+
# Lesser General Public License for more details.
|
|
25
|
+
#
|
|
26
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
27
|
+
# License along with this library; if not, write to the Free Software
|
|
28
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
29
|
+
#
|
|
30
|
+
#++
|
|
31
|
+
#
|
|
32
|
+
|
|
33
|
+
require 'bio/db'
|
|
34
|
+
|
|
35
|
+
module Bio
|
|
36
|
+
|
|
37
|
+
class PROSITE < EMBLDB
|
|
38
|
+
|
|
39
|
+
# Delimiter
|
|
40
|
+
DELIMITER = "\n//\n"
|
|
41
|
+
|
|
42
|
+
# Delimiter
|
|
43
|
+
RS = DELIMITER
|
|
44
|
+
|
|
45
|
+
# Bio::DB API
|
|
46
|
+
TAGSIZE = 5
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def initialize(entry)
|
|
50
|
+
super(entry, TAGSIZE)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ID Identification (Begins each entry; 1 per entry)
|
|
55
|
+
#
|
|
56
|
+
# ID ENTRY_NAME; ENTRY_TYPE. (ENTRY_TYPE : PATTERN, MATRIX, RULE)
|
|
57
|
+
#
|
|
58
|
+
# Returns
|
|
59
|
+
def name
|
|
60
|
+
unless @data['ID']
|
|
61
|
+
@data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ')
|
|
62
|
+
end
|
|
63
|
+
@data['ID']
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Returns
|
|
67
|
+
def division
|
|
68
|
+
unless @data['TYPE']
|
|
69
|
+
name
|
|
70
|
+
end
|
|
71
|
+
@data['TYPE']
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# AC Accession number (1 per entry)
|
|
76
|
+
#
|
|
77
|
+
# AC PSnnnnn;
|
|
78
|
+
#
|
|
79
|
+
# Returns
|
|
80
|
+
def ac
|
|
81
|
+
unless @data['AC']
|
|
82
|
+
@data['AC'] = fetch('AC').chomp(';')
|
|
83
|
+
end
|
|
84
|
+
@data['AC']
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
alias entry_id ac
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# DT Date (1 per entry)
|
|
91
|
+
#
|
|
92
|
+
# DT MMM-YYYY (CREATED); MMM-YYYY (DATA UPDATE); MMM-YYYY (INFO UPDATE).
|
|
93
|
+
#
|
|
94
|
+
# Returns
|
|
95
|
+
def dt
|
|
96
|
+
field_fetch('DT')
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
alias date dt
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# DE Short description (1 per entry)
|
|
103
|
+
#
|
|
104
|
+
# DE Description.
|
|
105
|
+
#
|
|
106
|
+
# Returns
|
|
107
|
+
def de
|
|
108
|
+
field_fetch('DE')
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
alias definition de
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# PA Pattern (>=0 per entry)
|
|
115
|
+
#
|
|
116
|
+
# see - pa2re method
|
|
117
|
+
#
|
|
118
|
+
# Returns
|
|
119
|
+
def pa
|
|
120
|
+
field_fetch('PA')
|
|
121
|
+
@data['PA'] = fetch('PA') unless @data['PA']
|
|
122
|
+
@data['PA'].gsub!(/\s+/, '') if @data['PA']
|
|
123
|
+
@data['PA']
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
alias pattern pa
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# MA Matrix/profile (>=0 per entry)
|
|
130
|
+
#
|
|
131
|
+
# see - ma2re method
|
|
132
|
+
#
|
|
133
|
+
# Returns
|
|
134
|
+
def ma
|
|
135
|
+
field_fetch('MA')
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
alias profile ma
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# RU Rule (>=0 per entry)
|
|
142
|
+
#
|
|
143
|
+
# RU Rule_Description.
|
|
144
|
+
#
|
|
145
|
+
# The rule is described in ordinary English and is free-format.
|
|
146
|
+
#
|
|
147
|
+
# Returns
|
|
148
|
+
def ru
|
|
149
|
+
field_fetch('RU')
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
alias rule ru
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# NR Numerical results (>=0 per entry)
|
|
156
|
+
#
|
|
157
|
+
# - SWISS-PROT scan statistics of true and false positives/negatives
|
|
158
|
+
#
|
|
159
|
+
# /RELEASE SWISS-PROT release number and total number of sequence
|
|
160
|
+
# entries in that release.
|
|
161
|
+
# /TOTAL Total number of hits in SWISS-PROT.
|
|
162
|
+
# /POSITIVE Number of hits on proteins that are known to belong to the
|
|
163
|
+
# set in consideration.
|
|
164
|
+
# /UNKNOWN Number of hits on proteins that could possibly belong to
|
|
165
|
+
# the set in consideration.
|
|
166
|
+
# /FALSE_POS Number of false hits (on unrelated proteins).
|
|
167
|
+
# /FALSE_NEG Number of known missed hits.
|
|
168
|
+
# /PARTIAL Number of partial sequences which belong to the set in
|
|
169
|
+
# consideration, but which are not hit by the pattern or
|
|
170
|
+
# profile because they are partial (fragment) sequences.
|
|
171
|
+
#
|
|
172
|
+
# Returns
|
|
173
|
+
def nr
|
|
174
|
+
unless @data['NR']
|
|
175
|
+
hash = {} # temporal hash
|
|
176
|
+
fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
|
|
177
|
+
if v =~ /^(\d+)\((\d+)\)$/
|
|
178
|
+
hits = $1.to_i # the number of hits
|
|
179
|
+
seqs = $2.to_i # the number of sequences
|
|
180
|
+
v = [hits, seqs]
|
|
181
|
+
elsif v =~ /([\d\.]+),(\d+)/
|
|
182
|
+
sprel = $1 # the number of SWISS-PROT release
|
|
183
|
+
spseq = $2.to_i # the number of SWISS-PROT sequences
|
|
184
|
+
v = [sprel, spseq]
|
|
185
|
+
else
|
|
186
|
+
v = v.to_i
|
|
187
|
+
end
|
|
188
|
+
hash[k] = v
|
|
189
|
+
end
|
|
190
|
+
@data['NR'] = hash
|
|
191
|
+
end
|
|
192
|
+
@data['NR']
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
alias statistics nr
|
|
196
|
+
|
|
197
|
+
# Returns
|
|
198
|
+
def release
|
|
199
|
+
statistics['RELEASE']
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Returns
|
|
203
|
+
def swissprot_release_number
|
|
204
|
+
release.first
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Returns
|
|
208
|
+
def swissprot_release_sequences
|
|
209
|
+
release.last
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Returns
|
|
213
|
+
def total
|
|
214
|
+
statistics['TOTAL']
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Returns
|
|
218
|
+
def total_hits
|
|
219
|
+
total.first
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Returns
|
|
223
|
+
def total_sequences
|
|
224
|
+
total.last
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Returns
|
|
228
|
+
def positive
|
|
229
|
+
statistics['POSITIVE']
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Returns
|
|
233
|
+
def positive_hits
|
|
234
|
+
positive.first
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Returns
|
|
238
|
+
def positive_sequences
|
|
239
|
+
positive.last
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Returns
|
|
243
|
+
def unknown
|
|
244
|
+
statistics['UNKNOWN']
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Returns
|
|
248
|
+
def unknown_hits
|
|
249
|
+
unknown.first
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Returns
|
|
253
|
+
def unknown_sequences
|
|
254
|
+
unknown.last
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Returns
|
|
258
|
+
def false_pos
|
|
259
|
+
statistics['FALSE_POS']
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Returns
|
|
263
|
+
def false_positive_hits
|
|
264
|
+
false_pos.first
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# Returns
|
|
268
|
+
def false_positive_sequences
|
|
269
|
+
false_pos.last
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Returns
|
|
273
|
+
def false_neg
|
|
274
|
+
statistics['FALSE_NEG']
|
|
275
|
+
end
|
|
276
|
+
alias false_negative_hits false_neg
|
|
277
|
+
|
|
278
|
+
# Returns
|
|
279
|
+
def partial
|
|
280
|
+
statistics['PARTIAL']
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
# CC Comments (>=0 per entry)
|
|
285
|
+
#
|
|
286
|
+
# CC /QUALIFIER=data; /QUALIFIER=data; .......
|
|
287
|
+
#
|
|
288
|
+
# /TAXO-RANGE Taxonomic range.
|
|
289
|
+
# /MAX-REPEAT Maximum known number of repetitions of the pattern in a
|
|
290
|
+
# single protein.
|
|
291
|
+
# /SITE Indication of an `interesting' site in the pattern.
|
|
292
|
+
# /SKIP-FLAG Indication of an entry that can be, in some cases, ignored
|
|
293
|
+
# by a program (because it is too unspecific).
|
|
294
|
+
#
|
|
295
|
+
# Returns
|
|
296
|
+
def cc
|
|
297
|
+
unless @data['CC']
|
|
298
|
+
hash = {} # temporal hash
|
|
299
|
+
fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
|
|
300
|
+
hash[k] = v
|
|
301
|
+
end
|
|
302
|
+
@data['CC'] = hash
|
|
303
|
+
end
|
|
304
|
+
@data['CC']
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
alias comment cc
|
|
308
|
+
|
|
309
|
+
# Returns
|
|
310
|
+
def taxon_range(expand = nil)
|
|
311
|
+
range = comment['TAXO-RANGE']
|
|
312
|
+
if range and expand
|
|
313
|
+
expand = []
|
|
314
|
+
range.scan(/./) do |x|
|
|
315
|
+
case x
|
|
316
|
+
when 'A'; expand.push('archaebacteria')
|
|
317
|
+
when 'B'; expand.push('bacteriophages')
|
|
318
|
+
when 'E'; expand.push('eukaryotes')
|
|
319
|
+
when 'P'; expand.push('prokaryotes')
|
|
320
|
+
when 'V'; expand.push('eukaryotic viruses')
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
range = expand
|
|
324
|
+
end
|
|
325
|
+
return range
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# Returns
|
|
329
|
+
def max_repeat
|
|
330
|
+
comment['MAX-REPEAT'].to_i
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# Returns
|
|
334
|
+
def site
|
|
335
|
+
if comment['SITE']
|
|
336
|
+
num, desc = comment['SITE'].split(',')
|
|
337
|
+
end
|
|
338
|
+
return [num.to_i, desc]
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# Returns
|
|
342
|
+
def skip_flag
|
|
343
|
+
if comment['SKIP-FLAG'] == 'TRUE'
|
|
344
|
+
return true
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
# DR Cross-references to SWISS-PROT (>=0 per entry)
|
|
350
|
+
#
|
|
351
|
+
# DR AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C;
|
|
352
|
+
#
|
|
353
|
+
# - `AC_NB' is the SWISS-PROT primary accession number of the entry to
|
|
354
|
+
# which reference is being made.
|
|
355
|
+
# - `ENTRY_NAME' is the SWISS-PROT entry name.
|
|
356
|
+
# - `C' is a one character flag that can be one of the following:
|
|
357
|
+
#
|
|
358
|
+
# T For a true positive.
|
|
359
|
+
# N For a false negative; a sequence which belongs to the set under
|
|
360
|
+
# consideration, but which has not been picked up by the pattern or
|
|
361
|
+
# profile.
|
|
362
|
+
# P For a `potential' hit; a sequence that belongs to the set under
|
|
363
|
+
# consideration, but which was not picked up because the region(s) that
|
|
364
|
+
# are used as a 'fingerprint' (pattern or profile) is not yet available
|
|
365
|
+
# in the data bank (partial sequence).
|
|
366
|
+
# ? For an unknown; a sequence which possibly could belong to the set under
|
|
367
|
+
# consideration.
|
|
368
|
+
# F For a false positive; a sequence which does not belong to the set in
|
|
369
|
+
# consideration.
|
|
370
|
+
#
|
|
371
|
+
# Returns
|
|
372
|
+
def dr
|
|
373
|
+
unless @data['DR']
|
|
374
|
+
hash = {} # temporal hash
|
|
375
|
+
if fetch('DR')
|
|
376
|
+
fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c|
|
|
377
|
+
hash[a] = [e, c] # SWISS-PROT : accession, entry, true/false
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
@data['DR'] = hash
|
|
381
|
+
end
|
|
382
|
+
@data['DR']
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
alias sp_xref dr
|
|
386
|
+
|
|
387
|
+
# Returns
|
|
388
|
+
def list_xref(flag, by_name = nil)
|
|
389
|
+
ary = []
|
|
390
|
+
sp_xref.each do |sp_acc, value|
|
|
391
|
+
if value[1] == flag
|
|
392
|
+
if by_name
|
|
393
|
+
sp_name = value[0]
|
|
394
|
+
ary.push(sp_name)
|
|
395
|
+
else
|
|
396
|
+
ary.push(sp_acc)
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
return ary
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Returns
|
|
404
|
+
def list_truepositive(by_name = nil)
|
|
405
|
+
list_xref('T', by_name)
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
# Returns
|
|
409
|
+
def list_falsenegative(by_name = nil)
|
|
410
|
+
list_xref('F', by_name)
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
# Returns
|
|
414
|
+
def list_falsepositive(by_name = nil)
|
|
415
|
+
list_xref('P', by_name)
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# Returns
|
|
419
|
+
def list_potentialhit(by_name = nil)
|
|
420
|
+
list_xref('P', by_name)
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
# Returns
|
|
424
|
+
def list_unknown(by_name = nil)
|
|
425
|
+
list_xref('?', by_name)
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# 3D Cross-references to PDB (>=0 per entry)
|
|
430
|
+
#
|
|
431
|
+
# 3D name; [name2;...]
|
|
432
|
+
#
|
|
433
|
+
# Returns
|
|
434
|
+
def pdb_xref
|
|
435
|
+
unless @data['3D']
|
|
436
|
+
@data['3D'] = fetch('3D').split(/; */)
|
|
437
|
+
end
|
|
438
|
+
@data['3D']
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
# DO Pointer to the documentation file (1 per entry)
|
|
443
|
+
#
|
|
444
|
+
# DO PDOCnnnnn;
|
|
445
|
+
#
|
|
446
|
+
# Returns
|
|
447
|
+
def pdoc_xref
|
|
448
|
+
@data['DO'] = fetch('DO').chomp(';')
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
### prosite pattern to regular expression
|
|
453
|
+
#
|
|
454
|
+
# prosite/prosuser.txt:
|
|
455
|
+
#
|
|
456
|
+
# The PA (PAttern) lines contains the definition of a PROSITE pattern. The
|
|
457
|
+
# patterns are described using the following conventions:
|
|
458
|
+
#
|
|
459
|
+
# 0) The standard IUPAC one-letter codes for the amino acids are used.
|
|
460
|
+
# 0) Ambiguities are indicated by listing the acceptable amino acids for a
|
|
461
|
+
# given position, between square parentheses `[ ]'. For example: [ALT]
|
|
462
|
+
# stands for Ala or Leu or Thr.
|
|
463
|
+
# 1) A period ends the pattern.
|
|
464
|
+
# 2) When a pattern is restricted to either the N- or C-terminal of a
|
|
465
|
+
# sequence, that pattern either starts with a `<' symbol or respectively
|
|
466
|
+
# ends with a `>' symbol.
|
|
467
|
+
# 3) Ambiguities are also indicated by listing between a pair of curly
|
|
468
|
+
# brackets `{ }' the amino acids that are not accepted at a given
|
|
469
|
+
# position. For example: {AM} stands for any amino acid except Ala and
|
|
470
|
+
# Met.
|
|
471
|
+
# 4) Repetition of an element of the pattern can be indicated by following
|
|
472
|
+
# that element with a numerical value or a numerical range between
|
|
473
|
+
# parenthesis. Examples: x(3) corresponds to x-x-x, x(2,4) corresponds to
|
|
474
|
+
# x-x or x-x-x or x-x-x-x.
|
|
475
|
+
# 5) The symbol `x' is used for a position where any amino acid is accepted.
|
|
476
|
+
# 6) Each element in a pattern is separated from its neighbor by a `-'.
|
|
477
|
+
#
|
|
478
|
+
# Examples:
|
|
479
|
+
#
|
|
480
|
+
# PA [AC]-x-V-x(4)-{ED}.
|
|
481
|
+
#
|
|
482
|
+
# This pattern is translated as: [Ala or Cys]-any-Val-any-any-any-any-{any
|
|
483
|
+
# but Glu or Asp}
|
|
484
|
+
#
|
|
485
|
+
# PA <A-x-[ST](2)-x(0,1)-V.
|
|
486
|
+
#
|
|
487
|
+
# This pattern, which must be in the N-terminal of the sequence (`<'), is
|
|
488
|
+
# translated as: Ala-any-[Ser or Thr]-[Ser or Thr]-(any or none)-Val
|
|
489
|
+
#
|
|
490
|
+
def self.pa2re(pattern)
|
|
491
|
+
pattern.gsub!(/\s/, '') # remove white spaces
|
|
492
|
+
pattern.sub!(/\.$/, '') # (1) remove trailing '.'
|
|
493
|
+
pattern.sub!(/^</, '^') # (2) restricted to the N-terminal : `<'
|
|
494
|
+
pattern.sub!(/>$/, '$') # (2) restricted to the C-terminal : `>'
|
|
495
|
+
pattern.gsub!(/\{(\w+)\}/) { |m|
|
|
496
|
+
'[^' + $1 + ']' # (3) not accepted at a given position : '{}'
|
|
497
|
+
}
|
|
498
|
+
pattern.gsub!(/\(([\d,]+)\)/) { |m|
|
|
499
|
+
'{' + $1 + '}' # (4) repetition of an element : (n), (n,m)
|
|
500
|
+
}
|
|
501
|
+
pattern.tr!('x', '.') # (5) any amino acid is accepted : 'x'
|
|
502
|
+
pattern.tr!('-', '') # (6) each element is separated by a '-'
|
|
503
|
+
Regexp.new(pattern)
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
def pa2re(pattern)
|
|
507
|
+
self.class.pa2re(pattern)
|
|
508
|
+
end
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
### prosite profile to regular expression
|
|
512
|
+
#
|
|
513
|
+
# prosite/profile.txt:
|
|
514
|
+
#
|
|
515
|
+
# Returns
|
|
516
|
+
def ma2re(matrix)
|
|
517
|
+
raise NotImplementedError
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
if __FILE__ == $0
|
|
526
|
+
|
|
527
|
+
begin
|
|
528
|
+
require 'pp'
|
|
529
|
+
alias p pp
|
|
530
|
+
rescue LoadError
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
ps = Bio::PROSITE.new(ARGF.read)
|
|
534
|
+
|
|
535
|
+
list = %w(
|
|
536
|
+
name
|
|
537
|
+
division
|
|
538
|
+
ac
|
|
539
|
+
entry_id
|
|
540
|
+
dt
|
|
541
|
+
date
|
|
542
|
+
de
|
|
543
|
+
definition
|
|
544
|
+
pa
|
|
545
|
+
pattern
|
|
546
|
+
ma
|
|
547
|
+
profile
|
|
548
|
+
ru
|
|
549
|
+
rule
|
|
550
|
+
nr
|
|
551
|
+
statistics
|
|
552
|
+
release
|
|
553
|
+
swissprot_release_number
|
|
554
|
+
swissprot_release_sequences
|
|
555
|
+
total
|
|
556
|
+
total_hits
|
|
557
|
+
total_sequences
|
|
558
|
+
positive
|
|
559
|
+
positive_hits
|
|
560
|
+
positive_sequences
|
|
561
|
+
unknown
|
|
562
|
+
unknown_hits
|
|
563
|
+
unknown_sequences
|
|
564
|
+
false_pos
|
|
565
|
+
false_positive_hits
|
|
566
|
+
false_positive_sequences
|
|
567
|
+
false_neg
|
|
568
|
+
false_negative_hits
|
|
569
|
+
partial
|
|
570
|
+
cc
|
|
571
|
+
comment
|
|
572
|
+
max_repeat
|
|
573
|
+
site
|
|
574
|
+
skip_flag
|
|
575
|
+
dr
|
|
576
|
+
sp_xref
|
|
577
|
+
pdb_xref
|
|
578
|
+
pdoc_xref
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
list.each do |method|
|
|
582
|
+
puts ">>> #{method}"
|
|
583
|
+
p ps.send(method)
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
puts ">>> taxon_range"
|
|
587
|
+
p ps.taxon_range
|
|
588
|
+
puts ">>> taxon_range(expand)"
|
|
589
|
+
p ps.taxon_range(true)
|
|
590
|
+
|
|
591
|
+
puts ">>> list_truepositive"
|
|
592
|
+
p ps.list_truepositive
|
|
593
|
+
puts ">>> list_truepositive(by_name)"
|
|
594
|
+
p ps.list_truepositive(true)
|
|
595
|
+
|
|
596
|
+
puts ">>> list_falsenegative"
|
|
597
|
+
p ps.list_falsenegative
|
|
598
|
+
puts ">>> list_falsenegative(by_name)"
|
|
599
|
+
p ps.list_falsenegative(true)
|
|
600
|
+
|
|
601
|
+
puts ">>> list_falsepositive"
|
|
602
|
+
p ps.list_falsepositive
|
|
603
|
+
puts ">>> list_falsepositive(by_name)"
|
|
604
|
+
p ps.list_falsepositive(true)
|
|
605
|
+
|
|
606
|
+
puts ">>> list_potentialhit"
|
|
607
|
+
p ps.list_potentialhit
|
|
608
|
+
puts ">>> list_potentialhit(by_name)"
|
|
609
|
+
p ps.list_potentialhit(true)
|
|
610
|
+
|
|
611
|
+
puts ">>> list_unknown"
|
|
612
|
+
p ps.list_unknown
|
|
613
|
+
puts ">>> list_unknown(by_name)"
|
|
614
|
+
p ps.list_unknown(true)
|
|
615
|
+
|
|
616
|
+
end
|