bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/shell/plugin/codon.rb - plugin for the codon table
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2005
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: LGPL
|
|
7
|
+
#
|
|
8
|
+
# $Id: codon.rb,v 1.12 2005/12/19 02:34:24 k Exp $
|
|
9
|
+
#
|
|
10
|
+
#--
|
|
11
|
+
#
|
|
12
|
+
# This library is free software; you can redistribute it and/or
|
|
13
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
14
|
+
# License as published by the Free Software Foundation; either
|
|
15
|
+
# version 2 of the License, or (at your option) any later version.
|
|
16
|
+
#
|
|
17
|
+
# This library is distributed in the hope that it will be useful,
|
|
18
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20
|
+
# Lesser General Public License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
23
|
+
# License along with this library; if not, write to the Free Software
|
|
24
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
25
|
+
#
|
|
26
|
+
#++
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
module Bio::Shell
|
|
30
|
+
|
|
31
|
+
class ColoredCodonTable
|
|
32
|
+
|
|
33
|
+
@@properties = {
|
|
34
|
+
:basic => %w( H K R ),
|
|
35
|
+
:polar => %w( S T Y Q N S ),
|
|
36
|
+
:acidic => %w( D E ),
|
|
37
|
+
:nonpolar => %w( F L I M V P A C W G ),
|
|
38
|
+
:stop => %w( * ),
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def initialize(number, cuhash = nil)
|
|
42
|
+
@aacode = Bio::AminoAcid.names
|
|
43
|
+
@table = Bio::CodonTable[number]
|
|
44
|
+
@number = number
|
|
45
|
+
@cuhash = cuhash
|
|
46
|
+
setup_colors
|
|
47
|
+
if Bio::Shell.config[:color]
|
|
48
|
+
generate_colored_text
|
|
49
|
+
else
|
|
50
|
+
generate_mono_text
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
attr_reader :table
|
|
54
|
+
|
|
55
|
+
def setup_colors
|
|
56
|
+
esc_seq = Bio::Shell.esc_seq
|
|
57
|
+
|
|
58
|
+
@colors = {
|
|
59
|
+
:text => esc_seq[:none],
|
|
60
|
+
:aa => esc_seq[:green],
|
|
61
|
+
:start => esc_seq[:red],
|
|
62
|
+
:stop => esc_seq[:red],
|
|
63
|
+
:basic => esc_seq[:cyan],
|
|
64
|
+
:polar => esc_seq[:blue],
|
|
65
|
+
:acidic => esc_seq[:magenta],
|
|
66
|
+
:nonpolar => esc_seq[:yellow],
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def generate_mono_text
|
|
71
|
+
@table.each do |codon, aa|
|
|
72
|
+
if aa == '*'
|
|
73
|
+
code = "STOP"
|
|
74
|
+
aa = '' unless @cuhash
|
|
75
|
+
else
|
|
76
|
+
code = @aacode[aa]
|
|
77
|
+
end
|
|
78
|
+
if @cuhash
|
|
79
|
+
percent = @cuhash[codon].to_s.rjust(6)
|
|
80
|
+
eval("@#{codon} = '#{aa}#{percent}'")
|
|
81
|
+
else
|
|
82
|
+
eval("@#{codon} = ' #{code} #{aa} '")
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
@hydrophilic = [
|
|
87
|
+
@@properties[:basic].join(" "), "(basic),",
|
|
88
|
+
@@properties[:polar].join(" "), "(polar),",
|
|
89
|
+
@@properties[:acidic].join(" "), "(acidic)",
|
|
90
|
+
].join(" ")
|
|
91
|
+
@hydrophobic = @@properties[:nonpolar].join(" ") + " (nonpolar)"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def generate_colored_text
|
|
95
|
+
@table.each do |codon, aa|
|
|
96
|
+
property, = @@properties.detect {|key, list| list.include?(aa)}
|
|
97
|
+
|
|
98
|
+
if aa == '*'
|
|
99
|
+
color_code = "#{@colors[:stop]}STOP"
|
|
100
|
+
if @cuhash
|
|
101
|
+
color_aa = "#{@colors[:stop]}#{aa}"
|
|
102
|
+
else
|
|
103
|
+
color_aa = ''
|
|
104
|
+
end
|
|
105
|
+
else
|
|
106
|
+
color_code = "#{@colors[property]}#{@aacode[aa]}"
|
|
107
|
+
if @table.start_codon?(codon)
|
|
108
|
+
if @cuhash
|
|
109
|
+
color_aa = "#{@colors[:aa]}#{aa}"
|
|
110
|
+
else
|
|
111
|
+
color_aa = "#{@colors[:start]}#{aa}"
|
|
112
|
+
end
|
|
113
|
+
else
|
|
114
|
+
if @cuhash
|
|
115
|
+
color_aa = "#{@colors[property]}#{aa}"
|
|
116
|
+
else
|
|
117
|
+
color_aa = "#{@colors[:aa]}#{aa}"
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
if @cuhash
|
|
123
|
+
percent = @cuhash[codon].to_s.rjust(6)
|
|
124
|
+
eval("@#{codon} = '#{color_aa}#{@colors[:text]}#{percent}'")
|
|
125
|
+
else
|
|
126
|
+
eval("@#{codon} = ' #{color_code} #{color_aa}#{@colors[:text]} '")
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
@hydrophilic = [
|
|
131
|
+
"#{@colors[:basic]}basic#{@colors[:text]},",
|
|
132
|
+
"#{@colors[:polar]}polar#{@colors[:text]},",
|
|
133
|
+
"#{@colors[:acidic]}acidic#{@colors[:text]}"
|
|
134
|
+
].join(" ")
|
|
135
|
+
@hydrophobic = "#{@colors[:nonpolar]}nonpolar"
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def output
|
|
139
|
+
header = <<-END
|
|
140
|
+
#
|
|
141
|
+
# = Codon table #{@number} : #{@table.definition}
|
|
142
|
+
#
|
|
143
|
+
# hydrophilic: #{@hydrophilic}
|
|
144
|
+
# hydrophobic: #{@hydrophobic}
|
|
145
|
+
END
|
|
146
|
+
table = <<-END
|
|
147
|
+
#
|
|
148
|
+
# *---------------------------------------------*
|
|
149
|
+
# | | 2nd | |
|
|
150
|
+
# | 1st |-------------------------------| 3rd |
|
|
151
|
+
# | | U | C | A | G | |
|
|
152
|
+
# |-------+-------+-------+-------+-------+-----|
|
|
153
|
+
# | U U |#{@ttt}|#{@tct}|#{@tat}|#{@tgt}| u |
|
|
154
|
+
# | U U |#{@ttc}|#{@tcc}|#{@tac}|#{@tgc}| c |
|
|
155
|
+
# | U U |#{@tta}|#{@tca}|#{@taa}|#{@tga}| a |
|
|
156
|
+
# | UUU |#{@ttg}|#{@tcg}|#{@tag}|#{@tgg}| g |
|
|
157
|
+
# |-------+-------+-------+-------+-------+-----|
|
|
158
|
+
# | CCCC |#{@ctt}|#{@cct}|#{@cat}|#{@cgt}| u |
|
|
159
|
+
# | C |#{@ctc}|#{@ccc}|#{@cac}|#{@cgc}| c |
|
|
160
|
+
# | C |#{@cta}|#{@cca}|#{@caa}|#{@cga}| a |
|
|
161
|
+
# | CCCC |#{@ctg}|#{@ccg}|#{@cag}|#{@cgg}| g |
|
|
162
|
+
# |-------+-------+-------+-------+-------+-----|
|
|
163
|
+
# | A |#{@att}|#{@act}|#{@aat}|#{@agt}| u |
|
|
164
|
+
# | A A |#{@atc}|#{@acc}|#{@aac}|#{@agc}| c |
|
|
165
|
+
# | AAAAA |#{@ata}|#{@aca}|#{@aaa}|#{@aga}| a |
|
|
166
|
+
# | A A |#{@atg}|#{@acg}|#{@aag}|#{@agg}| g |
|
|
167
|
+
# |-------+-------+-------+-------+-------+-----|
|
|
168
|
+
# | GGGG |#{@gtt}|#{@gct}|#{@gat}|#{@ggt}| u |
|
|
169
|
+
# | G |#{@gtc}|#{@gcc}|#{@gac}|#{@ggc}| c |
|
|
170
|
+
# | G GGG |#{@gta}|#{@gca}|#{@gaa}|#{@gga}| a |
|
|
171
|
+
# | GG G |#{@gtg}|#{@gcg}|#{@gag}|#{@ggg}| g |
|
|
172
|
+
# *---------------------------------------------*
|
|
173
|
+
#
|
|
174
|
+
END
|
|
175
|
+
if @cuhash
|
|
176
|
+
text = table
|
|
177
|
+
else
|
|
178
|
+
text = header + table
|
|
179
|
+
end
|
|
180
|
+
if Bio::Shell.config[:color]
|
|
181
|
+
text.gsub(/^\s+#/, @colors[:text])
|
|
182
|
+
else
|
|
183
|
+
text.gsub(/^\s+#/, '')
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
private
|
|
190
|
+
|
|
191
|
+
def codontable(num = 1, codon_usage = nil)
|
|
192
|
+
cct = ColoredCodonTable.new(num, codon_usage)
|
|
193
|
+
if codon_usage
|
|
194
|
+
return cct
|
|
195
|
+
else
|
|
196
|
+
puts cct.output
|
|
197
|
+
return cct.table
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def codontables
|
|
202
|
+
tables = Bio::CodonTable::DEFINITIONS
|
|
203
|
+
tables.sort.each do |i, definition|
|
|
204
|
+
puts "#{i}\t#{definition}"
|
|
205
|
+
end
|
|
206
|
+
return tables
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def aminoacids
|
|
210
|
+
names = Bio::AminoAcid.names
|
|
211
|
+
names.sort.each do |aa, code|
|
|
212
|
+
if aa.length == 1
|
|
213
|
+
puts "#{aa}\t#{code}\t#{names[code]}"
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
return names
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def nucleicacids
|
|
220
|
+
names = Bio::NucleicAcid.names
|
|
221
|
+
%w(a t g c u r y w s k m b v h d n).each do |base|
|
|
222
|
+
puts "#{base}\t#{names[base]}\t#{names[base.upcase]}"
|
|
223
|
+
end
|
|
224
|
+
return names
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
end
|
|
228
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/shell/plugin/entry.rb - extract entry and sequence
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2005
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: LGPL
|
|
7
|
+
#
|
|
8
|
+
# $Id: entry.rb,v 1.4 2005/12/07 05:12:07 k Exp $
|
|
9
|
+
#
|
|
10
|
+
#--
|
|
11
|
+
#
|
|
12
|
+
# This library is free software; you can redistribute it and/or
|
|
13
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
14
|
+
# License as published by the Free Software Foundation; either
|
|
15
|
+
# version 2 of the License, or (at your option) any later version.
|
|
16
|
+
#
|
|
17
|
+
# This library is distributed in the hope that it will be useful,
|
|
18
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20
|
+
# Lesser General Public License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
23
|
+
# License along with this library; if not, write to the Free Software
|
|
24
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
25
|
+
#
|
|
26
|
+
#++
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
module Bio::Shell
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
# Obtain a Bio::Sequence::NA (DNA) or a Bio::Sequence::AA (Amino Acid)
|
|
34
|
+
# sequence from
|
|
35
|
+
# * String -- "atgcatgc" or "MQKKP"
|
|
36
|
+
# * IO -- io = IO.popen("gzip -dc db.gz") (first entry only)
|
|
37
|
+
# * "filename" -- "gbvrl.gbk" (first entry only)
|
|
38
|
+
# * "db:entry" -- "embl:BUM" (entry is retrieved by the ent method)
|
|
39
|
+
def seq(arg)
|
|
40
|
+
seq = ""
|
|
41
|
+
if arg.kind_of?(Bio::Sequence)
|
|
42
|
+
seq = arg
|
|
43
|
+
elsif arg.respond_to?(:gets) or File.exists?(arg)
|
|
44
|
+
ent = flatauto(arg)
|
|
45
|
+
elsif arg[/:/]
|
|
46
|
+
str = ent(arg)
|
|
47
|
+
ent = flatparse(str)
|
|
48
|
+
else
|
|
49
|
+
tmp = arg
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
if ent.respond_to?(:seq)
|
|
53
|
+
tmp = ent.seq
|
|
54
|
+
elsif ent.respond_to?(:naseq)
|
|
55
|
+
seq = ent.naseq
|
|
56
|
+
elsif ent.respond_to?(:aaseq)
|
|
57
|
+
seq = ent.aaseq
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
if tmp and tmp.is_a?(String) and not tmp.empty?
|
|
61
|
+
seq = Bio::Sequence.auto(tmp)
|
|
62
|
+
end
|
|
63
|
+
return seq
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Obtain a database entry from
|
|
67
|
+
# * IO -- IO object (first entry only)
|
|
68
|
+
# * "filename" -- local file (first entry only)
|
|
69
|
+
# * "db:entry" -- local bioflat, OBDA, KEGG API
|
|
70
|
+
def ent(arg)
|
|
71
|
+
entry = ""
|
|
72
|
+
db, entry_id = arg.to_s.strip.split(/:/)
|
|
73
|
+
if arg.respond_to?(:gets) or File.exists?(arg)
|
|
74
|
+
entry = flatfile(arg)
|
|
75
|
+
elsif Bio::Shell.find_flat_dir(db)
|
|
76
|
+
entry = flatsearch(db, entry_id)
|
|
77
|
+
elsif obdadbs.include?(db)
|
|
78
|
+
entry = obdaentry(db, entry_id)
|
|
79
|
+
else
|
|
80
|
+
entry = bget(arg)
|
|
81
|
+
end
|
|
82
|
+
return entry
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
end
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/shell/plugin/flatfile.rb - plugin for flatfile database
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2005
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: LGPL
|
|
7
|
+
#
|
|
8
|
+
# $Id: flatfile.rb,v 1.11 2005/11/30 01:57:18 k Exp $
|
|
9
|
+
#
|
|
10
|
+
#--
|
|
11
|
+
#
|
|
12
|
+
# This library is free software; you can redistribute it and/or
|
|
13
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
14
|
+
# License as published by the Free Software Foundation; either
|
|
15
|
+
# version 2 of the License, or (at your option) any later version.
|
|
16
|
+
#
|
|
17
|
+
# This library is distributed in the hope that it will be useful,
|
|
18
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20
|
+
# Lesser General Public License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
23
|
+
# License along with this library; if not, write to the Free Software
|
|
24
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
25
|
+
#
|
|
26
|
+
#++
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
module Bio::Shell
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def flatfile(filename)
|
|
34
|
+
if block_given?
|
|
35
|
+
Bio::FlatFile.auto(filename) do |flat|
|
|
36
|
+
flat.each do |entry|
|
|
37
|
+
yield flat.entry_raw
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
else
|
|
41
|
+
entry = ''
|
|
42
|
+
Bio::FlatFile.auto(filename) do |flat|
|
|
43
|
+
flat.next_entry
|
|
44
|
+
entry = flat.entry_raw
|
|
45
|
+
end
|
|
46
|
+
return entry
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def flatauto(filename)
|
|
51
|
+
if block_given?
|
|
52
|
+
Bio::FlatFile.auto(filename) do |flat|
|
|
53
|
+
flat.each do |entry|
|
|
54
|
+
yield entry
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
else
|
|
58
|
+
entry = ''
|
|
59
|
+
Bio::FlatFile.auto(filename) do |flat|
|
|
60
|
+
entry = flat.next_entry
|
|
61
|
+
end
|
|
62
|
+
return entry
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def flatparse(entry)
|
|
67
|
+
if cls = Bio::FlatFile.autodetect(entry)
|
|
68
|
+
return cls.new(entry)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def flatfasta(fastafile, *flatfiles)
|
|
73
|
+
puts "Saving fasta file (#{fastafile}) ... "
|
|
74
|
+
File.open(fastafile, "w") do |fasta|
|
|
75
|
+
flatfiles.each do |flatfile|
|
|
76
|
+
puts " converting -- #{flatfile}"
|
|
77
|
+
Bio::FlatFile.auto(flatfile) do |flat|
|
|
78
|
+
flat.each do |entry|
|
|
79
|
+
header = "#{entry.entry_id} #{entry.definition}"
|
|
80
|
+
fasta.puts entry.seq.to_fasta(header, 50)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
puts "done"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def flatindex(dbname, *flatfiles)
|
|
89
|
+
dir = Bio::Shell.create_flat_dir(dbname)
|
|
90
|
+
begin
|
|
91
|
+
print "Creating BioFlat index (#{dir}) ... "
|
|
92
|
+
bdb = format = options = nil
|
|
93
|
+
Bio::FlatFileIndex.makeindex(bdb, dir, format, options, *flatfiles)
|
|
94
|
+
puts "done"
|
|
95
|
+
rescue
|
|
96
|
+
warn "Error: Failed to create index (#{dir}) : #{$!}"
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def flatsearch(dbname, keyword)
|
|
101
|
+
dir = Bio::Shell.find_flat_dir(dbname)
|
|
102
|
+
unless dir
|
|
103
|
+
warn "Error: Failed to open database (#{dbname})"
|
|
104
|
+
return
|
|
105
|
+
end
|
|
106
|
+
entry = ''
|
|
107
|
+
Bio::FlatFileIndex.open(dir) do |db|
|
|
108
|
+
if results = db.include?(keyword)
|
|
109
|
+
results.each do |entry_id|
|
|
110
|
+
entry << db.search_primary(entry_id).to_s
|
|
111
|
+
end
|
|
112
|
+
else
|
|
113
|
+
warn "Error: No hits found in #{dbname} (#{keyword})"
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
return entry
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
end
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/shell/plugin/keggapi.rb - plugin for KEGG API
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2005
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: LGPL
|
|
7
|
+
#
|
|
8
|
+
# $Id: keggapi.rb,v 1.8 2005/12/18 15:47:33 k Exp $
|
|
9
|
+
#
|
|
10
|
+
#--
|
|
11
|
+
#
|
|
12
|
+
# This library is free software; you can redistribute it and/or
|
|
13
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
14
|
+
# License as published by the Free Software Foundation; either
|
|
15
|
+
# version 2 of the License, or (at your option) any later version.
|
|
16
|
+
#
|
|
17
|
+
# This library is distributed in the hope that it will be useful,
|
|
18
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20
|
+
# Lesser General Public License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
23
|
+
# License along with this library; if not, write to the Free Software
|
|
24
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
25
|
+
#
|
|
26
|
+
#++
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
module Bio::Shell
|
|
30
|
+
|
|
31
|
+
module Private
|
|
32
|
+
def keggapi_definition2tab(list)
|
|
33
|
+
ary = []
|
|
34
|
+
list.each do |entry|
|
|
35
|
+
ary << "#{entry.entry_id}:\t#{entry.definition}"
|
|
36
|
+
end
|
|
37
|
+
return ary
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def keggapi
|
|
44
|
+
@keggapi ||= Bio::KEGG::API.new
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# DBGET
|
|
48
|
+
|
|
49
|
+
def binfo(db = "all")
|
|
50
|
+
result = keggapi.binfo(db)
|
|
51
|
+
puts result
|
|
52
|
+
return result
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def bfind(str)
|
|
56
|
+
result = keggapi.bfind(str)
|
|
57
|
+
return result
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def bget(str)
|
|
61
|
+
result = keggapi.bget(str)
|
|
62
|
+
if block_given?
|
|
63
|
+
yield result
|
|
64
|
+
else
|
|
65
|
+
return result
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def btit(str)
|
|
70
|
+
result = keggapi.btit(str)
|
|
71
|
+
return result
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def bconv(str)
|
|
75
|
+
result = keggapi.bconv(str)
|
|
76
|
+
return result
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# DATABASES
|
|
80
|
+
|
|
81
|
+
def keggdbs
|
|
82
|
+
list = keggapi.list_databases
|
|
83
|
+
result = Bio::Shell.keggapi_definition2tab(list).join("\n")
|
|
84
|
+
puts result
|
|
85
|
+
return list.map {|x| x.entry_id}
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def keggorgs
|
|
89
|
+
list = keggapi.list_organisms
|
|
90
|
+
result = Bio::Shell.keggapi_definition2tab(list).sort.join("\n")
|
|
91
|
+
puts result
|
|
92
|
+
return list.map {|x| x.entry_id}
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def keggpathways(org = "map")
|
|
96
|
+
list = keggapi.list_pathways(org)
|
|
97
|
+
result = Bio::Shell.keggapi_definition2tab(list).join("\n")
|
|
98
|
+
puts result
|
|
99
|
+
return list.map {|x| x.entry_id}
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def kegggenomeseq(org)
|
|
103
|
+
result = ""
|
|
104
|
+
require 'net/ftp'
|
|
105
|
+
Net::FTP.open("ftp.genome.jp", "anonymous") do |ftp|
|
|
106
|
+
path = "/pub/kegg/genomes/#{org}"
|
|
107
|
+
list = ftp.nlst(path)
|
|
108
|
+
file = list.grep(/.*genome$/).shift
|
|
109
|
+
if file
|
|
110
|
+
open("ftp://ftp.genome.jp/#{file}") do |file|
|
|
111
|
+
result = file.read
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
return result
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
=begin
|
|
121
|
+
|
|
122
|
+
== BioRuby extensions
|
|
123
|
+
|
|
124
|
+
--- get_all_best_best_neighbors_by_gene(genes_id)
|
|
125
|
+
--- get_all_best_neighbors_by_gene(genes_id)
|
|
126
|
+
--- get_all_reverse_best_neighbors_by_gene(genes_id)
|
|
127
|
+
--- get_all_paralogs_by_gene(genes_id)
|
|
128
|
+
--- get_all_genes_by_motifs(motif_id_list)
|
|
129
|
+
--- get_all_oc_members_by_gene(genes_id)
|
|
130
|
+
--- get_all_pc_members_by_gene(genes_id)
|
|
131
|
+
--- get_all_genes_by_organism(org)
|
|
132
|
+
--- get_all_linkdb_by_entry(entry_id, db)
|
|
133
|
+
--- save_image(url, filename = nil)
|
|
134
|
+
--- get_entries(ary = [])
|
|
135
|
+
--- get_aaseqs(ary = [])
|
|
136
|
+
--- get_naseqs(ary = [])
|
|
137
|
+
--- get_definitions(ary = [])
|
|
138
|
+
|
|
139
|
+
== Original KEGG API methods
|
|
140
|
+
|
|
141
|
+
--- get_linkdb_by_entry(entry_id, db, start, max_results)
|
|
142
|
+
--- get_best_best_neighbors_by_gene(genes_id, start, max_results)
|
|
143
|
+
--- get_best_neighbors_by_gene(genes_id, start, max_results)
|
|
144
|
+
--- get_reverse_best_neighbors_by_gene(genes_id, start, max_results)
|
|
145
|
+
--- get_paralogs_by_gene(genes_id, start, max_results)
|
|
146
|
+
--- get_motifs_by_gene(genes_id, db)
|
|
147
|
+
--- get_genes_by_motifs(motif_id_list, start, max_results)
|
|
148
|
+
--- get_ko_by_gene(genes_id)
|
|
149
|
+
--- get_ko_by_ko_class(ko_class_id)
|
|
150
|
+
--- get_genes_by_ko_class(ko_class_id, org, start, max_results)
|
|
151
|
+
--- get_genes_by_ko(ko_id, org)
|
|
152
|
+
--- get_oc_members_by_gene(genes_id, start, max_results)
|
|
153
|
+
--- get_pc_members_by_gene(genes_id, start, max_results)
|
|
154
|
+
--- mark_pathway_by_objects(pathway_id, object_id_list)
|
|
155
|
+
--- color_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
|
|
156
|
+
--- get_html_of_marked_pathway_by_objects(pathway_id, object_id_list)
|
|
157
|
+
--- get_html_of_colored_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
|
|
158
|
+
--- get_genes_by_pathway(pathway_id)
|
|
159
|
+
--- get_enzymes_by_pathway(pathway_id)
|
|
160
|
+
--- get_compounds_by_pathway(pathway_id)
|
|
161
|
+
--- get_glycans_by_pathway(pathway_id)
|
|
162
|
+
--- get_reactions_by_pathway(pathway_id)
|
|
163
|
+
--- get_kos_by_pathway(pathway_id)
|
|
164
|
+
--- get_pathways_by_genes(genes_id_list)
|
|
165
|
+
--- get_pathways_by_enzymes(enzyme_id_list)
|
|
166
|
+
--- get_pathways_by_compounds(compound_id_list)
|
|
167
|
+
--- get_pathways_by_glycans(glycan_id_list)
|
|
168
|
+
--- get_pathways_by_reactions(reaction_id_list)
|
|
169
|
+
--- get_pathways_by_kos(ko_id_list, org)
|
|
170
|
+
--- get_linked_pathways(pathway_id)
|
|
171
|
+
--- get_genes_by_enzyme(enzyme_id, org)
|
|
172
|
+
--- get_enzymes_by_gene(genes_id)
|
|
173
|
+
--- get_enzymes_by_compound(compound_id)
|
|
174
|
+
--- get_enzymes_by_glycan(glycan_id)
|
|
175
|
+
--- get_enzymes_by_reaction(reaction_id)
|
|
176
|
+
--- get_compounds_by_enzyme(enzyme_id)
|
|
177
|
+
--- get_compounds_by_reaction(reaction_id)
|
|
178
|
+
--- get_glycans_by_enzyme(enzyme_id)
|
|
179
|
+
--- get_glycans_by_reaction(reaction_id)
|
|
180
|
+
--- get_reactions_by_enzyme(enzyme_id)
|
|
181
|
+
--- get_reactions_by_compound(compound_id)
|
|
182
|
+
--- get_reactions_by_glycan(glycan_id)
|
|
183
|
+
--- get_genes_by_organism(org, start, max_results)
|
|
184
|
+
--- get_number_of_genes_by_organism(org)
|
|
185
|
+
--- convert_mol_to_kcf(mol_text)
|
|
186
|
+
|
|
187
|
+
=end
|