bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
data/sample/gt2fasta.rb
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# gt2fasta.rb - convert GenBank translations into FASTA format (pep)
|
|
4
|
+
#
|
|
5
|
+
# Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
|
|
6
|
+
#
|
|
7
|
+
# This program is free software; you can redistribute it and/or modify
|
|
8
|
+
# it under the terms of the GNU General Public License as published by
|
|
9
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
10
|
+
# (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
# GNU General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# $Id: gt2fasta.rb,v 0.3 2002/04/15 03:06:17 k Exp $
|
|
18
|
+
#
|
|
19
|
+
|
|
20
|
+
require 'bio/io/flatfile'
|
|
21
|
+
require 'bio/feature'
|
|
22
|
+
require 'bio/db/genbank'
|
|
23
|
+
|
|
24
|
+
include Bio
|
|
25
|
+
|
|
26
|
+
ff = FlatFile.new(GenBank, ARGF)
|
|
27
|
+
|
|
28
|
+
while gb = ff.next_entry
|
|
29
|
+
|
|
30
|
+
orf = 0
|
|
31
|
+
gb.features.each do |f|
|
|
32
|
+
f = f.assoc
|
|
33
|
+
if aaseq = f['translation']
|
|
34
|
+
orf += 1
|
|
35
|
+
gene = [
|
|
36
|
+
f['gene'],
|
|
37
|
+
f['product'],
|
|
38
|
+
f['note'],
|
|
39
|
+
f['function']
|
|
40
|
+
].compact.join(', ')
|
|
41
|
+
definition = "gp:#{gb.entry_id}_#{orf} #{gene} [#{gb.organism}]"
|
|
42
|
+
print aaseq.to_fasta(definition, 70)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
data/sample/pmfetch.rb
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# pmfetch.rb - generate BibTeX format reference list by PubMed ID list
|
|
4
|
+
#
|
|
5
|
+
# Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
6
|
+
#
|
|
7
|
+
# This program is free software; you can redistribute it and/or modify
|
|
8
|
+
# it under the terms of the GNU General Public License as published by
|
|
9
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
10
|
+
# (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
# GNU General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU General Public License
|
|
18
|
+
# along with this program; if not, write to the Free Software
|
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
|
+
#
|
|
21
|
+
# $Id: pmfetch.rb,v 1.2 2002/07/23 04:52:03 k Exp $
|
|
22
|
+
#
|
|
23
|
+
|
|
24
|
+
require 'bio'
|
|
25
|
+
|
|
26
|
+
if ARGV[0] =~ /-f/
|
|
27
|
+
ARGV.shift
|
|
28
|
+
form = ARGV.shift
|
|
29
|
+
else
|
|
30
|
+
form = 'bibtex'
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
ARGV.each do |id|
|
|
34
|
+
entry = Bio::PubMed.query(id)
|
|
35
|
+
case form
|
|
36
|
+
when 'medline'
|
|
37
|
+
puts entry
|
|
38
|
+
else
|
|
39
|
+
puts Bio::MEDLINE.new(entry).reference.send(form)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
data/sample/pmsearch.rb
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# pmsearch.rb - generate BibTeX format reference list by PubMed keyword search
|
|
4
|
+
#
|
|
5
|
+
# Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
6
|
+
#
|
|
7
|
+
# This program is free software; you can redistribute it and/or modify
|
|
8
|
+
# it under the terms of the GNU General Public License as published by
|
|
9
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
10
|
+
# (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
# GNU General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU General Public License
|
|
18
|
+
# along with this program; if not, write to the Free Software
|
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
|
+
#
|
|
21
|
+
# $Id: pmsearch.rb,v 1.2 2002/07/23 04:52:03 k Exp $
|
|
22
|
+
#
|
|
23
|
+
|
|
24
|
+
require 'bio'
|
|
25
|
+
|
|
26
|
+
if ARGV[0] =~ /-f/
|
|
27
|
+
ARGV.shift
|
|
28
|
+
form = ARGV.shift
|
|
29
|
+
else
|
|
30
|
+
form = 'bibtex'
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
entries = Bio::PubMed.search(ARGV.join(' '))
|
|
34
|
+
entries.each do |entry|
|
|
35
|
+
case form
|
|
36
|
+
when 'medline'
|
|
37
|
+
puts entry
|
|
38
|
+
else
|
|
39
|
+
puts Bio::MEDLINE.new(entry).reference.send(form)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# psortplot_html.rb - A KEGG API demo script. Generates a HTML file of
|
|
4
|
+
# genes marked by PSORT II predictions onto a
|
|
5
|
+
# KEGG/PATHWAY map.
|
|
6
|
+
#
|
|
7
|
+
# Usage:
|
|
8
|
+
#
|
|
9
|
+
# % ruby psortplot_html.rb
|
|
10
|
+
# % cat sce00010_psort2.html
|
|
11
|
+
# % ruby psortplot_html.rb path:eco00010
|
|
12
|
+
# % cat eco00010_psort2.html
|
|
13
|
+
#
|
|
14
|
+
# Copyright (C) 2005 Mitsuteru C. Nakao <n@bioruby.org>
|
|
15
|
+
#
|
|
16
|
+
# This program is free software; you can redistribute it and/or modify
|
|
17
|
+
# it under the terms of the GNU General Public License as published by
|
|
18
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
19
|
+
# (at your option) any later version.
|
|
20
|
+
#
|
|
21
|
+
# This program is distributed in the hope that it will be useful,
|
|
22
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
23
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
24
|
+
# GNU General Public License for more details.
|
|
25
|
+
#
|
|
26
|
+
# $Id: psortplot_html.rb,v 1.1 2005/10/12 02:10:11 nakao Exp $
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
require 'bio'
|
|
30
|
+
|
|
31
|
+
class KEGG
|
|
32
|
+
DBGET_BASEURI = 'http://kegg.com/dbget-bin'
|
|
33
|
+
WWW_BGET_BASEURI = DBGET_BASEURI + '/www_bget'
|
|
34
|
+
WWW_PATHWAY_BASEURI = DBGET_BASEURI + '/get_pathway'
|
|
35
|
+
|
|
36
|
+
# path := path:sce00010
|
|
37
|
+
def self.link_pathway(path0)
|
|
38
|
+
path, path = path0.split(':')
|
|
39
|
+
org_name = path.scan(/(^\w{3})/).to_s
|
|
40
|
+
mapno = path.sub(org_name, '')
|
|
41
|
+
str = "<a href='#{WWW_PATHWAY_BASEURI}?org_name=#{org_name}&mapno=#{mapno}'>#{path0}</a>"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# ec_num := ec:1.2.3.4
|
|
45
|
+
def self.link_ec(ec_num)
|
|
46
|
+
ec = ec_num.sub(/^ec:/, '')
|
|
47
|
+
str = "<a href='#{WWW_BGET_BASEURI}?enzyme+#{ec}'>#{ec_num}</a>"
|
|
48
|
+
return str
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# gene := eco:b0002
|
|
52
|
+
def self.link_genes(gene)
|
|
53
|
+
org_name, gene_name = gene.split(':')
|
|
54
|
+
str = "<a href='#{WWW_BGET_BASEURI}?#{org_name}+#{gene_name}'>#{gene}</a>"
|
|
55
|
+
return str
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class PSORT
|
|
61
|
+
COLOR_Palette = {
|
|
62
|
+
'csk' => "#FF0000", # 'cytoskeletal'
|
|
63
|
+
'cyt' => "#FF8000", # 'cytoplasmic'
|
|
64
|
+
'nuc' => "#FFFF00", # 'nuclear'
|
|
65
|
+
'mit' => "#80FF00", # 'mitochondrial'
|
|
66
|
+
'ves' => "#00FF00", # 'vesicles of secretory system'
|
|
67
|
+
'end' => "#00FF80", # 'endoplasmic reticulum'
|
|
68
|
+
'gol' => "#00FFFF", # 'Golgi'
|
|
69
|
+
'vac' => "#0080FF", # 'vacuolar'
|
|
70
|
+
'pla' => "#0000FF", # 'plasma membrane'
|
|
71
|
+
'pox' => "#8000FF", # 'peroxisomal'
|
|
72
|
+
'exc' => "#FF00FF", # 'extracellular, including cell wall'
|
|
73
|
+
'---' => "#FF0080" # 'other'
|
|
74
|
+
}
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
keggapi = Bio::KEGG::API.new
|
|
81
|
+
psort2serv = Bio::PSORT::PSORT2.imsut
|
|
82
|
+
|
|
83
|
+
# Obtains a list of genes on specified pathway
|
|
84
|
+
pathway = ARGV.shift || "path:sce00010"
|
|
85
|
+
genes = keggapi.get_genes_by_pathway(pathway)
|
|
86
|
+
|
|
87
|
+
scl = Hash.new # protein subcelluler localizations
|
|
88
|
+
ec = Hash.new # EC numbers
|
|
89
|
+
|
|
90
|
+
serial = 0
|
|
91
|
+
sync_default = $stdout.sync
|
|
92
|
+
$stdout.sync = true
|
|
93
|
+
genes.each do |gene|
|
|
94
|
+
print "#{(serial += 1).to_s.rjust(genes.size.to_s.size)}\t#{gene}\t"
|
|
95
|
+
# Obtains amino acid sequence from KEGG GENES entry
|
|
96
|
+
aaseq = keggapi.get_aaseqs([gene])
|
|
97
|
+
|
|
98
|
+
# Predicts protein subcellualr localization
|
|
99
|
+
result = psort2serv.exec(aaseq)
|
|
100
|
+
scl[gene] = result.pred
|
|
101
|
+
print "#{scl[gene]}\t"
|
|
102
|
+
|
|
103
|
+
# Obtains the EC number from KEGG GENES entry
|
|
104
|
+
ec[gene] = keggapi.get_enzymes_by_gene(gene)
|
|
105
|
+
puts "#{ec[gene].inspect}"
|
|
106
|
+
end
|
|
107
|
+
$stdout.sync = sync_default
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
fg_list = Array.new
|
|
113
|
+
bg_list = Array.new
|
|
114
|
+
|
|
115
|
+
genes.each do |gene|
|
|
116
|
+
fg_list << "#FF0000"
|
|
117
|
+
bg_list << PSORT::COLOR_Palette[scl[gene]]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# coloring KEGG pathway according to gene's localization
|
|
121
|
+
url = keggapi.color_pathway_by_objects(pathway, genes, fg_list, bg_list)
|
|
122
|
+
puts "#{url} downloaded."
|
|
123
|
+
|
|
124
|
+
# remove "path:" prefix from pathway_id
|
|
125
|
+
path_code = pathway.sub(/^path:/, '')
|
|
126
|
+
|
|
127
|
+
# save the result image
|
|
128
|
+
image_file = "#{path_code}_psort2.gif"
|
|
129
|
+
begin
|
|
130
|
+
keggapi.save_image(url, image_file)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# create html with a color palette
|
|
135
|
+
html = <<END
|
|
136
|
+
<html>
|
|
137
|
+
<head>
|
|
138
|
+
<title>PSORT II prediction protein subcellular localization map of KEGG/PATHWAY (#{pathway})</title>
|
|
139
|
+
<style>
|
|
140
|
+
table { border-collapse: collapse; }
|
|
141
|
+
td { border: 1px solid black; padding: 5px; }
|
|
142
|
+
td.outer { border: none; vertical-align: top; }
|
|
143
|
+
</style>
|
|
144
|
+
</head>
|
|
145
|
+
<body>
|
|
146
|
+
<h1><li><a href="http://psort.ims.u-tokyo.ac.jp/helpwww2.html">PSORT II</a> prediction protein subcellular localization map of <a href="http://kegg.com/kegg/pathway.html">KEGG/PATHWAY</a> (<a href="">#{KEGG.link_pathway(pathway)})</h1>
|
|
147
|
+
|
|
148
|
+
<table>
|
|
149
|
+
<tr>
|
|
150
|
+
<td class=outer>
|
|
151
|
+
<table>
|
|
152
|
+
<tr>
|
|
153
|
+
<th></th>
|
|
154
|
+
<th>EC</th>
|
|
155
|
+
<th>Gene</th>
|
|
156
|
+
<th>Localization</th>
|
|
157
|
+
</tr>
|
|
158
|
+
END
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# generate gene table with localization
|
|
162
|
+
names = Bio::PSORT::PSORT2::SclNames
|
|
163
|
+
multi_genes = Hash.new(0)
|
|
164
|
+
|
|
165
|
+
ec.values.flatten.sort.uniq.each do |ec_num|
|
|
166
|
+
ec.find_all {|x| x[1].include?(ec_num) }.each do |gene|
|
|
167
|
+
gene = gene[0]
|
|
168
|
+
loc = scl[gene]
|
|
169
|
+
color = PSORT::COLOR_Palette[loc]
|
|
170
|
+
name = names[loc]
|
|
171
|
+
multi_genes[gene] += 1
|
|
172
|
+
|
|
173
|
+
html += <<END
|
|
174
|
+
<tr>
|
|
175
|
+
<td>#{multi_genes[gene]}</td>
|
|
176
|
+
<td>#{KEGG.link_ec(ec_num)}</td>
|
|
177
|
+
<td>#{KEGG.link_genes(gene)}</td>
|
|
178
|
+
<td bgcolor="#{color}">#{name}</td>
|
|
179
|
+
</tr>
|
|
180
|
+
END
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
html += <<END
|
|
185
|
+
</table>
|
|
186
|
+
</td>
|
|
187
|
+
<td class=outer>
|
|
188
|
+
<table>
|
|
189
|
+
<tr>
|
|
190
|
+
<th>Code</th>
|
|
191
|
+
<th>Color</th>
|
|
192
|
+
</tr>
|
|
193
|
+
END
|
|
194
|
+
|
|
195
|
+
# generate color code table also
|
|
196
|
+
PSORT::COLOR_Palette.sort.each do |code, color|
|
|
197
|
+
html += <<END
|
|
198
|
+
<tr>
|
|
199
|
+
<td>#{code}</td>
|
|
200
|
+
<td bgcolor="#{color}">#{names[code]}</td>
|
|
201
|
+
</tr>
|
|
202
|
+
END
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
html += <<END
|
|
206
|
+
</table>
|
|
207
|
+
</td>
|
|
208
|
+
</tr>
|
|
209
|
+
</table>
|
|
210
|
+
<br>
|
|
211
|
+
<img src="#{image_file}">
|
|
212
|
+
</body>
|
|
213
|
+
</html>
|
|
214
|
+
END
|
|
215
|
+
|
|
216
|
+
# save generated HTML file
|
|
217
|
+
html_file = "#{path_code}_psort2.html"
|
|
218
|
+
File.open(html_file, "w+") do |file|
|
|
219
|
+
file.puts html
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
puts "Open #{html_file}"
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# ssearch2tab.rb - convert SSEARCH output into tab delimited data for MySQL
|
|
4
|
+
#
|
|
5
|
+
# Usage:
|
|
6
|
+
#
|
|
7
|
+
# % ssearch2tab.rb SSEARCH-output-file[s] > ssearch_results.tab
|
|
8
|
+
# % mysql < ssearch_results.sql (use sample at the end of this file)
|
|
9
|
+
#
|
|
10
|
+
# Format accepted:
|
|
11
|
+
#
|
|
12
|
+
# % ssearch3[3][_t] -Q -H -m 6 query.f target.f > SSEARCH-output-file
|
|
13
|
+
#
|
|
14
|
+
# Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
|
|
15
|
+
#
|
|
16
|
+
# This program is free software; you can redistribute it and/or modify
|
|
17
|
+
# it under the terms of the GNU General Public License as published by
|
|
18
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
19
|
+
# (at your option) any later version.
|
|
20
|
+
#
|
|
21
|
+
# This program is distributed in the hope that it will be useful,
|
|
22
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
23
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
24
|
+
# GNU General Public License for more details.
|
|
25
|
+
#
|
|
26
|
+
# $Id: ssearch2tab.rb,v 0.1 2001/06/21 08:25:58 katayama Exp $
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
while gets
|
|
30
|
+
|
|
31
|
+
# query
|
|
32
|
+
if /^\S+: (\d+) aa$/
|
|
33
|
+
q_len = $1
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# each hit
|
|
37
|
+
if /^>>([^>]\S+).*\((\d+) aa\)$/
|
|
38
|
+
target = $1
|
|
39
|
+
t_len = $2
|
|
40
|
+
|
|
41
|
+
# d = dummy variable
|
|
42
|
+
d, d, d, swopt, d, zscore, d, bits, d, evalue =
|
|
43
|
+
gets.split(/\s+/)
|
|
44
|
+
d, d, sw, ident, d, ugident, d, d, overlap, d, d, lap =
|
|
45
|
+
gets.split(/\s+/)
|
|
46
|
+
|
|
47
|
+
# query-hit pair
|
|
48
|
+
print "#{$FILENAME}\t#{q_len}\t#{target}\t#{t_len}"
|
|
49
|
+
|
|
50
|
+
# pick up values
|
|
51
|
+
ary = [
|
|
52
|
+
swopt,
|
|
53
|
+
zscore,
|
|
54
|
+
bits,
|
|
55
|
+
evalue,
|
|
56
|
+
sw,
|
|
57
|
+
ident,
|
|
58
|
+
ugident,
|
|
59
|
+
overlap,
|
|
60
|
+
lap
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
# print values
|
|
64
|
+
for i in ary
|
|
65
|
+
i.tr!('^0-9.:e\-','')
|
|
66
|
+
print "\t#{i}"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
print "\n"
|
|
70
|
+
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
=begin MySQL ssearch_results.sql sample
|
|
75
|
+
|
|
76
|
+
CREATE DATABASE IF NOT EXISTS db_name;
|
|
77
|
+
CREATE TABLE IF NOT EXISTS db_name.table_name (
|
|
78
|
+
query varchar(25) not NULL,
|
|
79
|
+
q_len integer unsigned default 0,
|
|
80
|
+
target varchar(25) not NULL,
|
|
81
|
+
t_len integer unsigned default 0,
|
|
82
|
+
swopt integer unsigned default 0,
|
|
83
|
+
zscore float default 0.0,
|
|
84
|
+
bits float default 0.0,
|
|
85
|
+
evalue float default 0.0,
|
|
86
|
+
sw integer unsigned default 0,
|
|
87
|
+
ident float default 0.0,
|
|
88
|
+
ugident float default 0.0,
|
|
89
|
+
overlap integer unsigned default 0,
|
|
90
|
+
lap_at varchar(25) default NULL
|
|
91
|
+
);
|
|
92
|
+
LOAD DATA LOCAL INFILE 'ssearch_results.tab' INTO TABLE db_name.table_name;
|
|
93
|
+
|
|
94
|
+
=end
|
|
95
|
+
|
|
96
|
+
|