bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
data/lib/bio/command.rb
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/command.rb - general methods for external command execution
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003-2005
|
|
5
|
+
# Naohisa Goto <ng@bioruby.org>,
|
|
6
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
7
|
+
# License:: LGPL
|
|
8
|
+
#
|
|
9
|
+
# $Id: command.rb,v 1.3 2005/11/04 17:36:00 k Exp $
|
|
10
|
+
#
|
|
11
|
+
#--
|
|
12
|
+
#
|
|
13
|
+
# This library is free software; you can redistribute it and/or
|
|
14
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
15
|
+
# License as published by the Free Software Foundation; either
|
|
16
|
+
# version 2 of the License, or (at your option) any later version.
|
|
17
|
+
#
|
|
18
|
+
# This library is distributed in the hope that it will be useful,
|
|
19
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
21
|
+
# Lesser General Public License for more details.
|
|
22
|
+
#
|
|
23
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
24
|
+
# License along with this library; if not, write to the Free Software
|
|
25
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
26
|
+
#
|
|
27
|
+
#++
|
|
28
|
+
#
|
|
29
|
+
|
|
30
|
+
require 'open3'
|
|
31
|
+
|
|
32
|
+
module Bio
|
|
33
|
+
module Command
|
|
34
|
+
|
|
35
|
+
# = Bio::Command::Tools
|
|
36
|
+
#
|
|
37
|
+
# Bio::Command::Tools is a collection of useful methods for execution
|
|
38
|
+
# of external commands or web applications. Any wrapper class for
|
|
39
|
+
# applications shall include this class. Note that all methods below
|
|
40
|
+
# are private except for some methods.
|
|
41
|
+
module Tools
|
|
42
|
+
|
|
43
|
+
UNSAFE_CHARS_UNIX = /[^A-Za-z0-9\_\-\.\:\,\/\@\x1b\x80-\xfe]/n
|
|
44
|
+
QUOTE_CHARS_WINDOWS = /[^A-Za-z0-9\_\-\.\:\,\/\@\\]/n
|
|
45
|
+
UNESCAPABLE_CHARS = /[\x00-\x08\x10-\x1a\x1c-\x1f\x7f\xff]/n
|
|
46
|
+
|
|
47
|
+
#module_function
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# Escape special characters in command line string for cmd.exe on Windows.
|
|
51
|
+
def escape_shell_windows(str)
|
|
52
|
+
str = str.to_s
|
|
53
|
+
raise 'cannot escape control characters' if UNESCAPABLE_CHARS =~ str
|
|
54
|
+
if QUOTE_CHARS_WINDOWS =~ str then
|
|
55
|
+
'"' + str.gsub(/\"/, '""') + '"'
|
|
56
|
+
else
|
|
57
|
+
String.new(str)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Escape special characters in command line string for UNIX shells.
|
|
62
|
+
def escape_shell_unix(str)
|
|
63
|
+
str = str.to_s
|
|
64
|
+
raise 'cannot escape control characters' if UNESCAPABLE_CHARS =~ str
|
|
65
|
+
str.gsub(UNSAFE_CHARS_UNIX) { |x| "\\#{x}" }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Escape special characters in command line string.
|
|
69
|
+
def escape_shell(str)
|
|
70
|
+
case RUBY_PLATFORM
|
|
71
|
+
when /mswin32|bccwin32/
|
|
72
|
+
escape_shell_windows(str)
|
|
73
|
+
else
|
|
74
|
+
escape_shell_unix(str)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Generate command line string with special characters escaped.
|
|
79
|
+
def make_command_line(ary)
|
|
80
|
+
case RUBY_PLATFORM
|
|
81
|
+
when /mswin32|bccwin32/
|
|
82
|
+
make_command_line_windows(ary)
|
|
83
|
+
else
|
|
84
|
+
make_command_line_unix(ary)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Generate command line string with special characters escaped
|
|
89
|
+
# for cmd.exe on Windows.
|
|
90
|
+
def make_command_line_windows(ary)
|
|
91
|
+
ary.collect { |str| escape_shell_windows(str) }.join(" ")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Generate command line string with special characters escaped
|
|
95
|
+
# for UNIX shells.
|
|
96
|
+
def make_command_line_unix(ary)
|
|
97
|
+
ary.collect { |str| escape_shell_unix(str) }.join(" ")
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Executes the program. Automatically select popen for Windows
|
|
101
|
+
# environment and open3 for the others.
|
|
102
|
+
#
|
|
103
|
+
# If block is given, yield the block with input and output IO objects.
|
|
104
|
+
# Note that in some platform, inn and out are the same object.
|
|
105
|
+
# Please be careful to do inn.close and out.close.
|
|
106
|
+
def call_command_local(cmd, query = nil, &block)
|
|
107
|
+
case RUBY_PLATFORM
|
|
108
|
+
when /mswin32|bccwin32/
|
|
109
|
+
call_command_local_popen(cmd, query, &block)
|
|
110
|
+
else
|
|
111
|
+
call_command_local_open3(cmd, query, &block)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Executes the program via IO.popen for OS which doesn't support fork.
|
|
116
|
+
# If block is given, yield the block with IO objects.
|
|
117
|
+
# The two objects are the same because of limitation of IO.popen.
|
|
118
|
+
def call_command_local_popen(cmd, query = nil)
|
|
119
|
+
str = make_command_line(cmd)
|
|
120
|
+
IO.popen(str, "w+") do |io|
|
|
121
|
+
if block_given? then
|
|
122
|
+
io.sync = true
|
|
123
|
+
yield io, io
|
|
124
|
+
else
|
|
125
|
+
io.sync = true
|
|
126
|
+
io.print query if query
|
|
127
|
+
io.close_write
|
|
128
|
+
io.read
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Executes the program via Open3.popen3
|
|
134
|
+
# If block is given, yield the block with input and output IO objects.
|
|
135
|
+
#
|
|
136
|
+
# From the view point of security, this method is recommended
|
|
137
|
+
# rather than exec_local_popen.
|
|
138
|
+
def call_command_local_open3(cmd, query = nil)
|
|
139
|
+
cmd = cmd.collect { |x| x.to_s }
|
|
140
|
+
Open3.popen3(*cmd) do |pin, pout, perr|
|
|
141
|
+
perr.sync = true
|
|
142
|
+
t = Thread.start { @errorlog = perr.read }
|
|
143
|
+
if block_given? then
|
|
144
|
+
yield pin, pout
|
|
145
|
+
else
|
|
146
|
+
begin
|
|
147
|
+
pin.print query if query
|
|
148
|
+
pin.close
|
|
149
|
+
output = pout.read
|
|
150
|
+
ensure
|
|
151
|
+
t.join
|
|
152
|
+
end
|
|
153
|
+
output
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Shows the latest stderr of the program execution.
|
|
159
|
+
# Note that this method may be thread unsafe.
|
|
160
|
+
attr_reader :errorlog
|
|
161
|
+
public :errorlog
|
|
162
|
+
|
|
163
|
+
end # module Tools
|
|
164
|
+
end # module Command
|
|
165
|
+
end # module Bio
|
|
166
|
+
|
data/lib/bio/data/aa.rb
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/data/aa.rb - Amino Acids
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2001, 2005
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: LGPL
|
|
7
|
+
#
|
|
8
|
+
# $Id: aa.rb,v 0.16 2005/11/15 13:33:11 k Exp $
|
|
9
|
+
#
|
|
10
|
+
#--
|
|
11
|
+
#
|
|
12
|
+
# This library is free software; you can redistribute it and/or
|
|
13
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
14
|
+
# License as published by the Free Software Foundation; either
|
|
15
|
+
# version 2 of the License, or (at your option) any later version.
|
|
16
|
+
#
|
|
17
|
+
# This library is distributed in the hope that it will be useful,
|
|
18
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20
|
+
# Lesser General Public License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
23
|
+
# License along with this library; if not, write to the Free Software
|
|
24
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
25
|
+
#
|
|
26
|
+
#++
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
module Bio
|
|
30
|
+
|
|
31
|
+
class AminoAcid
|
|
32
|
+
|
|
33
|
+
module Data
|
|
34
|
+
|
|
35
|
+
# IUPAC code
|
|
36
|
+
# * http://www.iupac.org/
|
|
37
|
+
# * http://www.chem.qmw.ac.uk/iubmb/newsletter/1999/item3.html
|
|
38
|
+
|
|
39
|
+
NAMES = {
|
|
40
|
+
|
|
41
|
+
'A' => 'Ala',
|
|
42
|
+
'C' => 'Cys',
|
|
43
|
+
'D' => 'Asp',
|
|
44
|
+
'E' => 'Glu',
|
|
45
|
+
'F' => 'Phe',
|
|
46
|
+
'G' => 'Gly',
|
|
47
|
+
'H' => 'His',
|
|
48
|
+
'I' => 'Ile',
|
|
49
|
+
'K' => 'Lys',
|
|
50
|
+
'L' => 'Leu',
|
|
51
|
+
'M' => 'Met',
|
|
52
|
+
'N' => 'Asn',
|
|
53
|
+
'P' => 'Pro',
|
|
54
|
+
'Q' => 'Gln',
|
|
55
|
+
'R' => 'Arg',
|
|
56
|
+
'S' => 'Ser',
|
|
57
|
+
'T' => 'Thr',
|
|
58
|
+
'V' => 'Val',
|
|
59
|
+
'W' => 'Trp',
|
|
60
|
+
'Y' => 'Tyr',
|
|
61
|
+
'B' => 'Asx', # D/N
|
|
62
|
+
'Z' => 'Glx', # E/Q
|
|
63
|
+
'U' => 'Sec', # 'uga' (stop)
|
|
64
|
+
'?' => 'Pyl', # 'uag' (stop)
|
|
65
|
+
|
|
66
|
+
'Ala' => 'alanine',
|
|
67
|
+
'Cys' => 'cysteine',
|
|
68
|
+
'Asp' => 'aspartic acid',
|
|
69
|
+
'Glu' => 'glutamic acid',
|
|
70
|
+
'Phe' => 'phenylalanine',
|
|
71
|
+
'Gly' => 'glycine',
|
|
72
|
+
'His' => 'histidine',
|
|
73
|
+
'Ile' => 'isoleucine',
|
|
74
|
+
'Lys' => 'lysine',
|
|
75
|
+
'Leu' => 'leucine',
|
|
76
|
+
'Met' => 'methionine',
|
|
77
|
+
'Asn' => 'asparagine',
|
|
78
|
+
'Pro' => 'proline',
|
|
79
|
+
'Gln' => 'glutamine',
|
|
80
|
+
'Arg' => 'arginine',
|
|
81
|
+
'Ser' => 'serine',
|
|
82
|
+
'Thr' => 'threonine',
|
|
83
|
+
'Val' => 'valine',
|
|
84
|
+
'Trp' => 'tryptophan',
|
|
85
|
+
'Tyr' => 'tyrosine',
|
|
86
|
+
'Asx' => 'asparagine/aspartic acid',
|
|
87
|
+
'Glx' => 'glutamine/glutamic acid',
|
|
88
|
+
'Sec' => 'selenocysteine',
|
|
89
|
+
'Pyl' => 'pyrrolysine',
|
|
90
|
+
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
# AAindex FASG760101 - Molecular weight (Fasman, 1976)
|
|
94
|
+
# Fasman, G.D., ed.
|
|
95
|
+
# Handbook of Biochemistry and Molecular Biology", 3rd ed.,
|
|
96
|
+
# Proteins - Volume 1, CRC Press, Cleveland (1976)
|
|
97
|
+
|
|
98
|
+
WEIGHT = {
|
|
99
|
+
|
|
100
|
+
'A' => 89.09,
|
|
101
|
+
'C' => 121.15, # 121.16 according to the Wikipedia
|
|
102
|
+
'D' => 133.10,
|
|
103
|
+
'E' => 147.13,
|
|
104
|
+
'F' => 165.19,
|
|
105
|
+
'G' => 75.07,
|
|
106
|
+
'H' => 155.16,
|
|
107
|
+
'I' => 131.17,
|
|
108
|
+
'K' => 146.19,
|
|
109
|
+
'L' => 131.17,
|
|
110
|
+
'M' => 149.21,
|
|
111
|
+
'N' => 132.12,
|
|
112
|
+
'P' => 115.13,
|
|
113
|
+
'Q' => 146.15,
|
|
114
|
+
'R' => 174.20,
|
|
115
|
+
'S' => 105.09,
|
|
116
|
+
'T' => 119.12,
|
|
117
|
+
'U' => 168.06,
|
|
118
|
+
'V' => 117.15,
|
|
119
|
+
'W' => 204.23,
|
|
120
|
+
'Y' => 181.19,
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
def weight(x = nil)
|
|
124
|
+
if x
|
|
125
|
+
if x.length > 1
|
|
126
|
+
total = 0.0
|
|
127
|
+
x.each_byte do |byte|
|
|
128
|
+
aa = byte.chr.upcase
|
|
129
|
+
if WEIGHT[aa]
|
|
130
|
+
total += WEIGHT[aa]
|
|
131
|
+
else
|
|
132
|
+
raise "Error: invalid amino acid '#{aa}'"
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
total -= NucleicAcid.weight[:water] * (x.length - 1)
|
|
136
|
+
else
|
|
137
|
+
WEIGHT[x]
|
|
138
|
+
end
|
|
139
|
+
else
|
|
140
|
+
WEIGHT
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def [](x)
|
|
145
|
+
NAMES[x]
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# backward compatibility
|
|
149
|
+
def names
|
|
150
|
+
NAMES
|
|
151
|
+
end
|
|
152
|
+
alias aa names
|
|
153
|
+
|
|
154
|
+
def name(x)
|
|
155
|
+
str = NAMES[x]
|
|
156
|
+
if str and str.length == 3
|
|
157
|
+
NAMES[str]
|
|
158
|
+
else
|
|
159
|
+
str
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def to_1(x)
|
|
164
|
+
case x.to_s.length
|
|
165
|
+
when 1
|
|
166
|
+
x
|
|
167
|
+
when 3
|
|
168
|
+
three2one(x)
|
|
169
|
+
else
|
|
170
|
+
name2one(x)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
alias one to_1
|
|
174
|
+
|
|
175
|
+
def to_3(x)
|
|
176
|
+
case x.to_s.length
|
|
177
|
+
when 1
|
|
178
|
+
one2three(x)
|
|
179
|
+
when 3
|
|
180
|
+
x
|
|
181
|
+
else
|
|
182
|
+
name2three(x)
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
alias three to_3
|
|
186
|
+
|
|
187
|
+
def one2three(x)
|
|
188
|
+
if x and x.length != 1
|
|
189
|
+
raise ArgumentError
|
|
190
|
+
else
|
|
191
|
+
NAMES[x]
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def three2one(x)
|
|
196
|
+
if x and x.length != 3
|
|
197
|
+
raise ArgumentError
|
|
198
|
+
else
|
|
199
|
+
reverse[x]
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def one2name(x)
|
|
204
|
+
if x and x.length != 1
|
|
205
|
+
raise ArgumentError
|
|
206
|
+
else
|
|
207
|
+
three2name(NAMES[x])
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def name2one(x)
|
|
212
|
+
str = reverse[x.to_s.downcase]
|
|
213
|
+
if str and str.length == 3
|
|
214
|
+
three2one(str)
|
|
215
|
+
else
|
|
216
|
+
str
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def three2name(x)
|
|
221
|
+
if x and x.length != 3
|
|
222
|
+
raise ArgumentError
|
|
223
|
+
else
|
|
224
|
+
NAMES[x]
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def name2three(x)
|
|
229
|
+
reverse[x.downcase]
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def to_re(seq)
|
|
233
|
+
str = seq.to_s.upcase
|
|
234
|
+
str.gsub!(/[^BZACDEFGHIKLMNPQRSTVWYU]/, ".")
|
|
235
|
+
str.gsub!("B", "[DN]")
|
|
236
|
+
str.gsub!("Z", "[EQ]")
|
|
237
|
+
Regexp.new(str)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
private
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def reverse
|
|
245
|
+
hash = Hash.new
|
|
246
|
+
NAMES.each do |k, v|
|
|
247
|
+
hash[v] = k
|
|
248
|
+
end
|
|
249
|
+
hash
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
# as instance methods
|
|
256
|
+
include Data
|
|
257
|
+
|
|
258
|
+
# as class methods
|
|
259
|
+
extend Data
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
private
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# override when used as an instance method to improve performance
|
|
266
|
+
alias orig_reverse reverse
|
|
267
|
+
def reverse
|
|
268
|
+
unless @reverse
|
|
269
|
+
@reverse = orig_reverse
|
|
270
|
+
end
|
|
271
|
+
@reverse
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
end # module Bio
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
if __FILE__ == $0
|
|
280
|
+
|
|
281
|
+
puts "### aa = Bio::AminoAcid.new"
|
|
282
|
+
aa = Bio::AminoAcid.new
|
|
283
|
+
|
|
284
|
+
puts "# Bio::AminoAcid['A']"
|
|
285
|
+
p Bio::AminoAcid['A']
|
|
286
|
+
puts "# aa['A']"
|
|
287
|
+
p aa['A']
|
|
288
|
+
|
|
289
|
+
puts "# Bio::AminoAcid.name('A'), Bio::AminoAcid.name('Ala')"
|
|
290
|
+
p Bio::AminoAcid.name('A'), Bio::AminoAcid.name('Ala')
|
|
291
|
+
puts "# aa.name('A'), aa.name('Ala')"
|
|
292
|
+
p aa.name('A'), aa.name('Ala')
|
|
293
|
+
|
|
294
|
+
puts "# Bio::AminoAcid.to_1('alanine'), Bio::AminoAcid.one('alanine')"
|
|
295
|
+
p Bio::AminoAcid.to_1('alanine'), Bio::AminoAcid.one('alanine')
|
|
296
|
+
puts "# aa.to_1('alanine'), aa.one('alanine')"
|
|
297
|
+
p aa.to_1('alanine'), aa.one('alanine')
|
|
298
|
+
puts "# Bio::AminoAcid.to_1('Ala'), Bio::AminoAcid.one('Ala')"
|
|
299
|
+
p Bio::AminoAcid.to_1('Ala'), Bio::AminoAcid.one('Ala')
|
|
300
|
+
puts "# aa.to_1('Ala'), aa.one('Ala')"
|
|
301
|
+
p aa.to_1('Ala'), aa.one('Ala')
|
|
302
|
+
puts "# Bio::AminoAcid.to_1('A'), Bio::AminoAcid.one('A')"
|
|
303
|
+
p Bio::AminoAcid.to_1('A'), Bio::AminoAcid.one('A')
|
|
304
|
+
puts "# aa.to_1('A'), aa.one('A')"
|
|
305
|
+
p aa.to_1('A'), aa.one('A')
|
|
306
|
+
|
|
307
|
+
puts "# Bio::AminoAcid.to_3('alanine'), Bio::AminoAcid.three('alanine')"
|
|
308
|
+
p Bio::AminoAcid.to_3('alanine'), Bio::AminoAcid.three('alanine')
|
|
309
|
+
puts "# aa.to_3('alanine'), aa.three('alanine')"
|
|
310
|
+
p aa.to_3('alanine'), aa.three('alanine')
|
|
311
|
+
puts "# Bio::AminoAcid.to_3('Ala'), Bio::AminoAcid.three('Ala')"
|
|
312
|
+
p Bio::AminoAcid.to_3('Ala'), Bio::AminoAcid.three('Ala')
|
|
313
|
+
puts "# aa.to_3('Ala'), aa.three('Ala')"
|
|
314
|
+
p aa.to_3('Ala'), aa.three('Ala')
|
|
315
|
+
puts "# Bio::AminoAcid.to_3('A'), Bio::AminoAcid.three('A')"
|
|
316
|
+
p Bio::AminoAcid.to_3('A'), Bio::AminoAcid.three('A')
|
|
317
|
+
puts "# aa.to_3('A'), aa.three('A')"
|
|
318
|
+
p aa.to_3('A'), aa.three('A')
|
|
319
|
+
|
|
320
|
+
puts "# Bio::AminoAcid.one2three('A')"
|
|
321
|
+
p Bio::AminoAcid.one2three('A')
|
|
322
|
+
puts "# aa.one2three('A')"
|
|
323
|
+
p aa.one2three('A')
|
|
324
|
+
|
|
325
|
+
puts "# Bio::AminoAcid.three2one('Ala')"
|
|
326
|
+
p Bio::AminoAcid.three2one('Ala')
|
|
327
|
+
puts "# aa.three2one('Ala')"
|
|
328
|
+
p aa.three2one('Ala')
|
|
329
|
+
|
|
330
|
+
puts "# Bio::AminoAcid.one2name('A')"
|
|
331
|
+
p Bio::AminoAcid.one2name('A')
|
|
332
|
+
puts "# aa.one2name('A')"
|
|
333
|
+
p aa.one2name('A')
|
|
334
|
+
|
|
335
|
+
puts "# Bio::AminoAcid.name2one('alanine')"
|
|
336
|
+
p Bio::AminoAcid.name2one('alanine')
|
|
337
|
+
puts "# aa.name2one('alanine')"
|
|
338
|
+
p aa.name2one('alanine')
|
|
339
|
+
|
|
340
|
+
puts "# Bio::AminoAcid.three2name('Ala')"
|
|
341
|
+
p Bio::AminoAcid.three2name('Ala')
|
|
342
|
+
puts "# aa.three2name('Ala')"
|
|
343
|
+
p aa.three2name('Ala')
|
|
344
|
+
|
|
345
|
+
puts "# Bio::AminoAcid.name2three('alanine')"
|
|
346
|
+
p Bio::AminoAcid.name2three('alanine')
|
|
347
|
+
puts "# aa.name2three('alanine')"
|
|
348
|
+
p aa.name2three('alanine')
|
|
349
|
+
|
|
350
|
+
puts "# Bio::AminoAcid.to_re('BZACDEFGHIKLMNPQRSTVWYU')"
|
|
351
|
+
p Bio::AminoAcid.to_re('BZACDEFGHIKLMNPQRSTVWYU')
|
|
352
|
+
|
|
353
|
+
end
|
|
354
|
+
|