bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/kegg/brite.rb - KEGG/BRITE database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: brite.rb,v 0.6 2005/09/08 01:22:11 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class KEGG
|
|
28
|
+
|
|
29
|
+
class BRITE < KEGGDB
|
|
30
|
+
|
|
31
|
+
DELIMITER = RS = "\n///\n"
|
|
32
|
+
TAGSIZE = 12
|
|
33
|
+
|
|
34
|
+
def initialize(entry)
|
|
35
|
+
super(entry, TAGSIZE)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# ENTRY
|
|
39
|
+
# DEFINITION
|
|
40
|
+
# RELATION
|
|
41
|
+
# FACTORS
|
|
42
|
+
# INTERACTION
|
|
43
|
+
# SOURCE
|
|
44
|
+
# REFERENCE
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
end
|
|
51
|
+
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/kegg/cell.rb - KEGG/CELL database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001 KAWASHIMA Shuichi <s@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: cell.rb,v 1.7 2005/09/08 01:22:11 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require "bio/db"
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class KEGG
|
|
28
|
+
|
|
29
|
+
class CELL < KEGGDB
|
|
30
|
+
|
|
31
|
+
DELIMITER = RS = "\n///\n"
|
|
32
|
+
TAGSIZE = 12
|
|
33
|
+
|
|
34
|
+
def initialize(entry)
|
|
35
|
+
super(entry, TAGSIZE)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def entry_id
|
|
39
|
+
field_fetch('ENTRY')
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def definition
|
|
43
|
+
field_fetch('DEFINITION')
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def org
|
|
47
|
+
field_fetch('ORGANISM')
|
|
48
|
+
end
|
|
49
|
+
alias organism org
|
|
50
|
+
|
|
51
|
+
def mother
|
|
52
|
+
field_fetch('MOTHER')
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def daughter
|
|
56
|
+
field_fetch('DAUGHTER').gsub(/ /, '').split(/,/)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def sister
|
|
60
|
+
field_fetch('SISTER')
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def fate
|
|
64
|
+
field_fetch('CELL_FATE').gsub(/ /, '').split(/,/)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def contact
|
|
68
|
+
field_fetch('CONTACT').gsub(/ /, '').split(/,/)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def expression
|
|
72
|
+
field_fetch('EXPRESSION')
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def fig
|
|
76
|
+
field_fetch('FIGURE')
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def ref
|
|
80
|
+
field_fetch('REFERENCE')
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
end
|
|
88
|
+
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/kegg/compound.rb - KEGG COMPOUND database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001, 2002, 2004 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: compound.rb,v 0.11 2005/09/08 01:22:11 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class KEGG
|
|
28
|
+
|
|
29
|
+
class COMPOUND < KEGGDB
|
|
30
|
+
|
|
31
|
+
DELIMITER = RS = "\n///\n"
|
|
32
|
+
TAGSIZE = 12
|
|
33
|
+
|
|
34
|
+
def initialize(entry)
|
|
35
|
+
super(entry, TAGSIZE)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# ENTRY
|
|
39
|
+
def entry_id
|
|
40
|
+
unless @data['ENTRY']
|
|
41
|
+
@data['ENTRY'] = fetch('ENTRY').split(/\s+/).first
|
|
42
|
+
end
|
|
43
|
+
@data['ENTRY']
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# NAME
|
|
47
|
+
def names
|
|
48
|
+
lines_fetch('NAME')
|
|
49
|
+
end
|
|
50
|
+
def name
|
|
51
|
+
names[0]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# FORMULA
|
|
55
|
+
def formula
|
|
56
|
+
field_fetch('FORMULA')
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# MASS
|
|
60
|
+
def mass
|
|
61
|
+
field_fetch('MASS').to_f
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# REACTION
|
|
65
|
+
def reactions
|
|
66
|
+
unless @data['REACTION']
|
|
67
|
+
@data['REACTION'] = fetch('REACTION').split(/\s+/)
|
|
68
|
+
end
|
|
69
|
+
@data['REACTION']
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# RPAIR
|
|
73
|
+
def rpairs
|
|
74
|
+
unless @data['RPAIR']
|
|
75
|
+
@data['RPAIR'] = fetch('RPAIR').split(/\s+/)
|
|
76
|
+
end
|
|
77
|
+
@data['RPAIR']
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# PATHWAY
|
|
81
|
+
def pathways
|
|
82
|
+
lines_fetch('PATHWAY')
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# ENZYME
|
|
86
|
+
def enzymes
|
|
87
|
+
unless @data['ENZYME']
|
|
88
|
+
field = fetch('ENZYME')
|
|
89
|
+
if /\(/.match(field) # old version
|
|
90
|
+
@data['ENZYME'] = field.scan(/\S+ \(\S+\)/)
|
|
91
|
+
else
|
|
92
|
+
@data['ENZYME'] = field.scan(/\S+/)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
@data['ENZYME']
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# DBLINKS
|
|
99
|
+
def dblinks
|
|
100
|
+
lines_fetch('DBLINKS')
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# ATOM, BOND
|
|
104
|
+
def kcf
|
|
105
|
+
return "#{get('ATOM')}#{get('BOND')}"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
if __FILE__ == $0
|
|
116
|
+
entry = ARGF.read
|
|
117
|
+
cpd = Bio::KEGG::COMPOUND.new(entry)
|
|
118
|
+
p cpd.entry_id
|
|
119
|
+
p cpd.names
|
|
120
|
+
p cpd.name
|
|
121
|
+
p cpd.formula
|
|
122
|
+
p cpd.mass
|
|
123
|
+
p cpd.reactions
|
|
124
|
+
p cpd.rpairs
|
|
125
|
+
p cpd.pathways
|
|
126
|
+
p cpd.enzymes
|
|
127
|
+
p cpd.dblinks
|
|
128
|
+
p cpd.kcf
|
|
129
|
+
end
|
|
130
|
+
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/kegg/enzyme.rb - KEGG/ENZYME database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: enzyme.rb,v 0.8 2005/09/08 01:22:11 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class KEGG
|
|
28
|
+
|
|
29
|
+
class ENZYME < KEGGDB
|
|
30
|
+
|
|
31
|
+
DELIMITER = RS = "\n///\n"
|
|
32
|
+
TAGSIZE = 12
|
|
33
|
+
|
|
34
|
+
def initialize(entry)
|
|
35
|
+
super(entry, TAGSIZE)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# ENTRY
|
|
39
|
+
def entry_id
|
|
40
|
+
field_fetch('ENTRY')
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# NAME
|
|
44
|
+
def names
|
|
45
|
+
lines_fetch('NAME')
|
|
46
|
+
end
|
|
47
|
+
def name
|
|
48
|
+
names[0]
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# CLASS
|
|
52
|
+
def classes
|
|
53
|
+
lines_fetch('CLASS')
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# SYSNAME
|
|
57
|
+
def sysname
|
|
58
|
+
field_fetch('SYSNAME')
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# REACTION ';'
|
|
62
|
+
def reaction
|
|
63
|
+
field_fetch('REACTION')
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# SUBSTRATE
|
|
67
|
+
def substrates
|
|
68
|
+
lines_fetch('SUBSTRATE')
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# PRODUCT
|
|
72
|
+
def products
|
|
73
|
+
lines_fetch('PRODUCT')
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# COFACTOR
|
|
77
|
+
def cofactors
|
|
78
|
+
lines_fetch('COFACTOR')
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# COMMENT
|
|
82
|
+
def comment
|
|
83
|
+
field_fetch('COMMENT')
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# PATHWAY
|
|
87
|
+
def pathways
|
|
88
|
+
lines_fetch('PATHWAY')
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# GENES
|
|
92
|
+
def genes
|
|
93
|
+
lines_fetch('GENES')
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# DISEASE
|
|
97
|
+
def diseases
|
|
98
|
+
lines_fetch('DISEASE')
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# MOTIF
|
|
102
|
+
def motifs
|
|
103
|
+
lines_fetch('MOTIF')
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# STRUCTURES
|
|
107
|
+
def structures
|
|
108
|
+
unless @data['STRUCTURES']
|
|
109
|
+
@data['STRUCTURES'] =
|
|
110
|
+
fetch('STRUCTURES').sub(/(PDB: )*/,'').split(/\s+/)
|
|
111
|
+
end
|
|
112
|
+
@data['STRUCTURES']
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# DBLINKS
|
|
116
|
+
def dblinks
|
|
117
|
+
lines_fetch('DBLINKS')
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
end
|
|
125
|
+
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/kegg/expression.rb - KEGG EXPRESSION database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2001-2003, 2005
|
|
5
|
+
# Shuichi Kawashima <shuichi@hgc.jp>,
|
|
6
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
7
|
+
# License:: LGPL
|
|
8
|
+
#
|
|
9
|
+
# $Id: expression.rb,v 1.9 2005/11/05 08:27:26 k Exp $
|
|
10
|
+
#
|
|
11
|
+
#--
|
|
12
|
+
#
|
|
13
|
+
# This library is free software; you can redistribute it and/or
|
|
14
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
15
|
+
# License as published by the Free Software Foundation; either
|
|
16
|
+
# version 2 of the License, or (at your option) any later version.
|
|
17
|
+
#
|
|
18
|
+
# This library is distributed in the hope that it will be useful,
|
|
19
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
21
|
+
# Lesser General Public License for more details.
|
|
22
|
+
#
|
|
23
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
24
|
+
# License along with this library; if not, write to the Free Software
|
|
25
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
26
|
+
#
|
|
27
|
+
#++
|
|
28
|
+
#
|
|
29
|
+
|
|
30
|
+
require "bio/db"
|
|
31
|
+
|
|
32
|
+
module Bio
|
|
33
|
+
class KEGG
|
|
34
|
+
|
|
35
|
+
class EXPRESSION
|
|
36
|
+
|
|
37
|
+
def initialize(entry)
|
|
38
|
+
@orf2val = Hash.new('')
|
|
39
|
+
@orf2rgb = Hash.new('')
|
|
40
|
+
@orf2ratio = Hash.new('')
|
|
41
|
+
@max_intensity = 10000
|
|
42
|
+
entry.split("\n").each do |line|
|
|
43
|
+
unless /^#/ =~ line
|
|
44
|
+
ary = line.split("\t")
|
|
45
|
+
orf = ary.shift
|
|
46
|
+
val = ary[2, 4].collect {|x| x.to_f}
|
|
47
|
+
@orf2val[orf] = val
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
attr_reader :orf2val
|
|
52
|
+
attr_reader :orf2rgb
|
|
53
|
+
attr_reader :orf2ratio
|
|
54
|
+
attr_reader :max_intensity
|
|
55
|
+
|
|
56
|
+
def control_avg
|
|
57
|
+
sum = 0.0
|
|
58
|
+
@orf2val.values.each do |v|
|
|
59
|
+
sum += v[0] - v[1]
|
|
60
|
+
end
|
|
61
|
+
sum/orf2val.size
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def target_avg
|
|
65
|
+
sum = 0.0
|
|
66
|
+
@orf2val.values.each do |v|
|
|
67
|
+
sum += v[2] - v[3]
|
|
68
|
+
end
|
|
69
|
+
sum/orf2val.size
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def control_var
|
|
73
|
+
sum = 0.0
|
|
74
|
+
avg = self.control_avg
|
|
75
|
+
@orf2val.values.each do |v|
|
|
76
|
+
tmp = v[0] - v[1]
|
|
77
|
+
sum += (tmp - avg)*(tmp - avg)
|
|
78
|
+
end
|
|
79
|
+
sum/orf2val.size
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def target_var
|
|
83
|
+
sum = 0.0
|
|
84
|
+
avg = self.target_avg
|
|
85
|
+
@orf2val.values.each do |v|
|
|
86
|
+
tmp = v[2] - v[3]
|
|
87
|
+
sum += (tmp - avg)*(tmp - avg)
|
|
88
|
+
end
|
|
89
|
+
sum/orf2val.size
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def control_sd
|
|
93
|
+
var = self.control_var
|
|
94
|
+
Math.sqrt(var)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def target_sd
|
|
98
|
+
var = self.target_var
|
|
99
|
+
Math.sqrt(var)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def up_regulated(num=20, threshold=nil)
|
|
103
|
+
logy_minus_logx
|
|
104
|
+
ary = @orf2ratio.to_a.sort{|a, b| b[1] <=> a[1]}
|
|
105
|
+
if threshold != nil
|
|
106
|
+
i = 0
|
|
107
|
+
while ary[i][1] > threshold
|
|
108
|
+
i += 1
|
|
109
|
+
end
|
|
110
|
+
return ary[0..i]
|
|
111
|
+
else
|
|
112
|
+
return ary[0..num-1]
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def down_regulated(num=20, threshold=nil)
|
|
117
|
+
logy_minus_logx
|
|
118
|
+
ary = @orf2ratio.to_a.sort{|a, b| a[1] <=> b[1]}
|
|
119
|
+
if threshold != nil
|
|
120
|
+
i = 0
|
|
121
|
+
while ary[i][1] < threshold
|
|
122
|
+
i += 1
|
|
123
|
+
end
|
|
124
|
+
return ary[0..i]
|
|
125
|
+
else
|
|
126
|
+
return ary[0..num-1]
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def regulated(num=20, threshold=nil)
|
|
131
|
+
logy_minus_logx
|
|
132
|
+
ary = @orf2ratio.to_a.sort{|a, b| b[1].abs <=> a[1].abs}
|
|
133
|
+
if threshold != nil
|
|
134
|
+
i = 0
|
|
135
|
+
while ary[i][1].abs > threshold
|
|
136
|
+
i += 1
|
|
137
|
+
end
|
|
138
|
+
return ary[0..i]
|
|
139
|
+
else
|
|
140
|
+
return ary[0..num-1]
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def logy_minus_logx
|
|
145
|
+
@orf2val.each do |k, v|
|
|
146
|
+
@orf2ratio[k] = (1.0/Math.log10(2))*(Math.log10(v[2]-v[3]) - Math.log10(v[0]-v[1]))
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def val2rgb
|
|
151
|
+
col_unit = @max_intensity/255
|
|
152
|
+
@orf2val.each do |k, v|
|
|
153
|
+
tmp_val = ((v[0] - v[1])/col_unit).to_i
|
|
154
|
+
if tmp_val > 255
|
|
155
|
+
g = "ff"
|
|
156
|
+
else
|
|
157
|
+
g = format("%02x", tmp_val)
|
|
158
|
+
end
|
|
159
|
+
tmp_val = ((v[2] - v[3])/col_unit).to_i
|
|
160
|
+
if tmp_val > 255
|
|
161
|
+
r = "ff"
|
|
162
|
+
else
|
|
163
|
+
r = format("%02x", tmp_val)
|
|
164
|
+
end
|
|
165
|
+
@orf2rgb[k] = r + g + "00"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
end # class EXPRESSION
|
|
171
|
+
|
|
172
|
+
end # class KEGG
|
|
173
|
+
end # module Bio
|