bio 0.7.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
@@ -1,23 +1,10 @@
|
|
1
1
|
#
|
2
|
-
# bio/io/flatfile/indexer.rb - OBDA flatfile indexer
|
2
|
+
# = bio/io/flatfile/indexer.rb - OBDA flatfile indexer
|
3
3
|
#
|
4
|
-
#
|
4
|
+
# Copyright:: Copyright (C) 2002 GOTO Naohisa <ng@bioruby.org>
|
5
|
+
# License:: Ruby's
|
5
6
|
#
|
6
|
-
#
|
7
|
-
# modify it under the terms of the GNU Lesser General Public
|
8
|
-
# License as published by the Free Software Foundation; either
|
9
|
-
# version 2 of the License, or (at your option) any later version.
|
10
|
-
#
|
11
|
-
# This library is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
-
# Lesser General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU Lesser General Public
|
17
|
-
# License along with this library; if not, write to the Free Software
|
18
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
-
#
|
20
|
-
# $Id: indexer.rb,v 1.21 2005/09/26 13:00:08 k Exp $
|
7
|
+
# $Id: indexer.rb,v 1.23 2006/02/22 08:41:03 ngoto Exp $
|
21
8
|
#
|
22
9
|
|
23
10
|
require 'bio/io/flatfile/index'
|
@@ -80,6 +67,8 @@ module Bio
|
|
80
67
|
BlastDefaultParser.new(Bio::Blast::WU::Report, *arg)
|
81
68
|
when 'Bio::Blast::WU::Report_TBlast'
|
82
69
|
BlastDefaultParser.new(Bio::Blast::WU::Report_TBlast, *arg)
|
70
|
+
when 'Bio::PDB::ChemicalComponent'
|
71
|
+
PDBChemicalComponentParser.new(Bio::PDB::ChemicalComponent, *arg)
|
83
72
|
else
|
84
73
|
raise 'unknown or unsupported format'
|
85
74
|
end #case dbclass.to_s
|
@@ -130,10 +119,10 @@ module Bio
|
|
130
119
|
attr_reader :fileid
|
131
120
|
|
132
121
|
def each
|
133
|
-
pos = @flatfile.pos
|
134
122
|
@flatfile.each do |x|
|
135
123
|
@entry = x
|
136
|
-
|
124
|
+
pos = @flatfile.entry_start_pos
|
125
|
+
len = @flatfile.entry_ended_pos - @flatfile.entry_start_pos
|
137
126
|
begin
|
138
127
|
yield pos, len
|
139
128
|
rescue RuntimeError, NameError => evar
|
@@ -150,7 +139,6 @@ module Bio
|
|
150
139
|
DEBUG.print "This entry shall be incorrectly indexed.\n"
|
151
140
|
end
|
152
141
|
end #rescue
|
153
|
-
pos = @flatfile.pos
|
154
142
|
end
|
155
143
|
end
|
156
144
|
|
@@ -204,15 +192,6 @@ module Bio
|
|
204
192
|
end
|
205
193
|
self.add_secondary_namespaces(*sec_names)
|
206
194
|
end
|
207
|
-
def open_flatfile(fileid, file)
|
208
|
-
super
|
209
|
-
@flatfile.pos = 0
|
210
|
-
begin
|
211
|
-
pos = @flatfile.pos
|
212
|
-
line = @flatfile.gets
|
213
|
-
end until (!line or line =~ /^LOCUS /)
|
214
|
-
@flatfile.pos = pos
|
215
|
-
end
|
216
195
|
end #class GenBankParser
|
217
196
|
|
218
197
|
class GenPeptParser < GenBankParser
|
@@ -437,6 +416,35 @@ module Bio
|
|
437
416
|
end
|
438
417
|
end #class BlastDefaultReportParser
|
439
418
|
|
419
|
+
class PDBChemicalComponentParser < TemplateParser
|
420
|
+
NAMESTYLE = NameSpaces.new(
|
421
|
+
NameSpace.new( 'UNIQUE', Proc.new { |x| x.entry_id } )
|
422
|
+
)
|
423
|
+
PRIMARY = 'UNIQUE'
|
424
|
+
def initialize(klass, pri_name = nil, sec_names = nil)
|
425
|
+
super()
|
426
|
+
self.format = 'raw'
|
427
|
+
self.dbclass = Bio::PDB::ChemicalComponent
|
428
|
+
self.set_primary_namespace((pri_name or PRIMARY))
|
429
|
+
unless sec_names then
|
430
|
+
sec_names = []
|
431
|
+
@namestyle.each_value do |x|
|
432
|
+
sec_names << x.name if x.name != self.primary.name
|
433
|
+
end
|
434
|
+
end
|
435
|
+
self.add_secondary_namespaces(*sec_names)
|
436
|
+
end
|
437
|
+
def open_flatfile(fileid, file)
|
438
|
+
super
|
439
|
+
@flatfile.pos = 0
|
440
|
+
begin
|
441
|
+
pos = @flatfile.pos
|
442
|
+
line = @flatfile.gets
|
443
|
+
end until (!line or line =~ /^RESIDUE /)
|
444
|
+
@flatfile.pos = pos
|
445
|
+
end
|
446
|
+
end #class PDBChemicalComponentParser
|
447
|
+
|
440
448
|
end #module Parser
|
441
449
|
|
442
450
|
def self.makeindexBDB(name, parser, options, *files)
|
data/lib/bio/reference.rb
CHANGED
@@ -1,7 +1,23 @@
|
|
1
1
|
#
|
2
|
-
# bio/reference.rb -
|
2
|
+
# = bio/reference.rb - Journal reference classes
|
3
3
|
#
|
4
|
-
# Copyright (C) 2001
|
4
|
+
# Copyright:: Copyright (C) 2001
|
5
|
+
# KATAYAMA Toshiaki <k@bioruby.org>
|
6
|
+
# Lisence:: LGPL
|
7
|
+
#
|
8
|
+
# $Id: reference.rb,v 1.21 2006/02/08 15:06:26 nakao Exp $
|
9
|
+
#
|
10
|
+
# == Description
|
11
|
+
#
|
12
|
+
# Journal reference classes.
|
13
|
+
#
|
14
|
+
# == Examples
|
15
|
+
#
|
16
|
+
# == References
|
17
|
+
#
|
18
|
+
#
|
19
|
+
#
|
20
|
+
#--
|
5
21
|
#
|
6
22
|
# This library is free software; you can redistribute it and/or
|
7
23
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -17,13 +33,78 @@
|
|
17
33
|
# License along with this library; if not, write to the Free Software
|
18
34
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
35
|
#
|
20
|
-
|
36
|
+
#++
|
21
37
|
#
|
22
38
|
|
23
39
|
module Bio
|
24
40
|
|
41
|
+
# A class for journal reference information.
|
42
|
+
#
|
43
|
+
# === Examples
|
44
|
+
#
|
45
|
+
# hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
|
46
|
+
# 'title' => "Title of the study.",
|
47
|
+
# 'journal' => "Theor. J. Hoge",
|
48
|
+
# 'volume' => 12,
|
49
|
+
# 'issue' => 3,
|
50
|
+
# 'pages' => "123-145",
|
51
|
+
# 'year' => 2001,
|
52
|
+
# 'pubmed' => 12345678,
|
53
|
+
# 'medline' => 98765432,
|
54
|
+
# 'abstract' => "Hoge fuga. ...",
|
55
|
+
# 'url' => "http://example.com",
|
56
|
+
# 'mesh' => [],
|
57
|
+
# 'affiliations' => []}
|
58
|
+
# ref = Bio::Reference.new(hash)
|
59
|
+
#
|
60
|
+
# # Formats in the BiBTeX style.
|
61
|
+
# ref.format("bibtex")
|
62
|
+
#
|
63
|
+
# # Short-cut for Bio::Reference#format("bibtex")
|
64
|
+
# ref.bibtex
|
65
|
+
#
|
25
66
|
class Reference
|
26
67
|
|
68
|
+
# Author names in an Array, [ "Hoge, J.P.", "Fuga, F.B." ].
|
69
|
+
attr_reader :authors
|
70
|
+
|
71
|
+
# "Title of the study."
|
72
|
+
attr_reader :title
|
73
|
+
|
74
|
+
# "Theor. J. Hoge"
|
75
|
+
attr_reader :journal
|
76
|
+
|
77
|
+
# 12
|
78
|
+
attr_reader :volume
|
79
|
+
|
80
|
+
# 3
|
81
|
+
attr_reader :issue
|
82
|
+
|
83
|
+
# "123-145"
|
84
|
+
attr_reader :pages
|
85
|
+
|
86
|
+
# 2001
|
87
|
+
attr_reader :year
|
88
|
+
|
89
|
+
# 12345678
|
90
|
+
attr_reader :pubmed
|
91
|
+
|
92
|
+
# 98765432
|
93
|
+
attr_reader :medline
|
94
|
+
|
95
|
+
# Abstract test in String.
|
96
|
+
attr_reader :abstract
|
97
|
+
|
98
|
+
# A URL String.
|
99
|
+
attr_reader :url
|
100
|
+
|
101
|
+
# MeSH terms in an Array.
|
102
|
+
attr_reader :mesh
|
103
|
+
|
104
|
+
# Affiliations in an Array.
|
105
|
+
attr_reader :affiliations
|
106
|
+
|
107
|
+
#
|
27
108
|
def initialize(hash)
|
28
109
|
hash.default = ''
|
29
110
|
@authors = hash['authors'] # [ "Hoge, J.P.", "Fuga, F.B." ]
|
@@ -43,9 +124,23 @@ module Bio
|
|
43
124
|
@mesh = [] if @mesh.empty?
|
44
125
|
@affiliations = [] if @affiliations.empty?
|
45
126
|
end
|
46
|
-
attr_reader :authors, :title, :journal, :volume, :issue, :pages, :year,
|
47
|
-
:pubmed, :medline, :abstract, :url, :mesh, :affiliations
|
48
127
|
|
128
|
+
# Formats the reference in a given style.
|
129
|
+
#
|
130
|
+
# Styles:
|
131
|
+
# 0. nil - general
|
132
|
+
# 1. endnote - Endnote
|
133
|
+
# 2. bibitem - Bibitem (option acceptable)
|
134
|
+
# 3. bibtex - BiBTeX (option acceptable)
|
135
|
+
# 4. rd - rd (option acceptable)
|
136
|
+
# 5. nature - Nature (option acceptable)
|
137
|
+
# 6. science - Science
|
138
|
+
# 7. genome_biol - Genome Biology
|
139
|
+
# 8. genome_res - Genome Research
|
140
|
+
# 9. nar - Nucleic Acids Research
|
141
|
+
# 10. current - Current Biology
|
142
|
+
# 11. trends - Trends in *
|
143
|
+
# 12. cell - Cell Press
|
49
144
|
def format(style = nil, option = nil)
|
50
145
|
case style
|
51
146
|
when 'endnote'
|
@@ -77,19 +172,20 @@ module Bio
|
|
77
172
|
end
|
78
173
|
end
|
79
174
|
|
175
|
+
# Formats in the Endonote style.
|
80
176
|
def endnote
|
81
177
|
lines = []
|
82
178
|
lines << "%0 Journal Article"
|
83
179
|
@authors.each do |author|
|
84
180
|
lines << "%A #{author}"
|
85
181
|
end
|
86
|
-
lines << "%D #{@year}" unless @year.empty?
|
182
|
+
lines << "%D #{@year}" unless @year.to_s.empty?
|
87
183
|
lines << "%T #{@title}" unless @title.empty?
|
88
184
|
lines << "%J #{@journal}" unless @journal.empty?
|
89
|
-
lines << "%V #{@volume}" unless @volume.empty?
|
90
|
-
lines << "%N #{@issue}" unless @issue.empty?
|
185
|
+
lines << "%V #{@volume}" unless @volume.to_s.empty?
|
186
|
+
lines << "%N #{@issue}" unless @issue.to_s.empty?
|
91
187
|
lines << "%P #{@pages}" unless @pages.empty?
|
92
|
-
lines << "%M #{@pubmed}" unless @pubmed.empty?
|
188
|
+
lines << "%M #{@pubmed}" unless @pubmed.to_s.empty?
|
93
189
|
if @pubmed
|
94
190
|
cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
|
95
191
|
opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
|
@@ -104,6 +200,7 @@ module Bio
|
|
104
200
|
return lines.join("\n")
|
105
201
|
end
|
106
202
|
|
203
|
+
# Formats in the bibitem.
|
107
204
|
def bibitem(item = nil)
|
108
205
|
item = "PMID:#{@pubmed}" unless item
|
109
206
|
pages = @pages.sub('-', '--')
|
@@ -115,6 +212,7 @@ module Bio
|
|
115
212
|
END
|
116
213
|
end
|
117
214
|
|
215
|
+
# Formats in the BiBTeX style.
|
118
216
|
def bibtex(section = nil)
|
119
217
|
section = "article" unless section
|
120
218
|
authors = authors_join(' and ', ' and ')
|
@@ -132,11 +230,13 @@ module Bio
|
|
132
230
|
END
|
133
231
|
end
|
134
232
|
|
233
|
+
# Formats in a general style.
|
135
234
|
def general
|
136
235
|
authors = @authors.join(', ')
|
137
236
|
"#{authors} (#{@year}). \"#{@title}\" #{@journal} #{@volume}:#{@pages}."
|
138
237
|
end
|
139
238
|
|
239
|
+
# Formats in the RD style.
|
140
240
|
def rd(str = nil)
|
141
241
|
@abstract ||= str
|
142
242
|
lines = []
|
@@ -147,6 +247,8 @@ module Bio
|
|
147
247
|
return lines.join("\n\n")
|
148
248
|
end
|
149
249
|
|
250
|
+
# Formats in the Nature Publish Group style.
|
251
|
+
# * http://www.nature.com
|
150
252
|
def nature(short = false)
|
151
253
|
if short
|
152
254
|
if @authors.size > 4
|
@@ -163,6 +265,8 @@ module Bio
|
|
163
265
|
end
|
164
266
|
end
|
165
267
|
|
268
|
+
# Formats in the Science style.
|
269
|
+
# * http://www.siencemag.com/
|
166
270
|
def science
|
167
271
|
if @authors.size > 4
|
168
272
|
authors = rev_name(@authors[0]) + " et al."
|
@@ -173,28 +277,40 @@ module Bio
|
|
173
277
|
"#{authors}, #{@journal} #{@volume} #{page_from} (#{@year})."
|
174
278
|
end
|
175
279
|
|
280
|
+
# Formats in the Genome Biology style.
|
281
|
+
# * http://genomebiology.com/
|
176
282
|
def genome_biol
|
177
283
|
authors = @authors.collect {|name| strip_dots(name)}.join(', ')
|
178
284
|
journal = strip_dots(@journal)
|
179
285
|
"#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
|
180
286
|
end
|
287
|
+
# Formats in the Current Biology style.
|
288
|
+
# * http://www.current-biology.com/
|
181
289
|
alias current genome_biol
|
182
290
|
|
291
|
+
# Formats in the Genome Research style.
|
292
|
+
# * http://genome.org/
|
183
293
|
def genome_res
|
184
294
|
authors = authors_join(' and ')
|
185
295
|
"#{authors} #{@year}.\n #{@title} #{@journal} #{@volume}: #{@pages}."
|
186
296
|
end
|
187
297
|
|
298
|
+
# Formats in the Nucleic Acids Reseach style.
|
299
|
+
# * http://nar.oxfordjournals.org/
|
188
300
|
def nar
|
189
301
|
authors = authors_join(' and ')
|
190
302
|
"#{authors} (#{@year}) #{@title} #{@journal}, #{@volume}, #{@pages}."
|
191
303
|
end
|
192
304
|
|
305
|
+
# Formats in the CELL Press style.
|
306
|
+
# http://www.cell.com/
|
193
307
|
def cell
|
194
308
|
authors = authors_join(' and ')
|
195
309
|
"#{authors} (#{@year}). #{@title} #{@journal} #{@volume}, #{pages}."
|
196
310
|
end
|
197
|
-
|
311
|
+
|
312
|
+
# Formats in the TRENDS Journals.
|
313
|
+
# * http://www.trends.com/
|
198
314
|
def trends
|
199
315
|
if @authors.size > 2
|
200
316
|
authors = "#{@authors[0]} et al."
|
@@ -235,22 +351,37 @@ module Bio
|
|
235
351
|
|
236
352
|
end
|
237
353
|
|
238
|
-
|
354
|
+
# Set of Bio::Reference.
|
355
|
+
#
|
356
|
+
# === Examples
|
357
|
+
#
|
358
|
+
# refs = Bio::References.new
|
359
|
+
# refs.append(Bio::Reference.new(hash))
|
360
|
+
# refs.each do |reference|
|
361
|
+
# ...
|
362
|
+
# end
|
363
|
+
#
|
239
364
|
class References
|
240
365
|
|
366
|
+
# Array of Bio::Reference.
|
367
|
+
attr_accessor :references
|
368
|
+
|
369
|
+
#
|
241
370
|
def initialize(ary = [])
|
242
371
|
@references = ary
|
243
372
|
end
|
244
|
-
attr_accessor :references
|
245
373
|
|
246
|
-
|
247
|
-
|
374
|
+
|
375
|
+
# Append a Bio::Reference object.
|
376
|
+
def append(reference)
|
377
|
+
@references.push(reference) if reference.is_a? Reference
|
248
378
|
return self
|
249
379
|
end
|
250
380
|
|
381
|
+
# Iterates each Bio::Reference object.
|
251
382
|
def each
|
252
|
-
@references.each do |
|
253
|
-
yield
|
383
|
+
@references.each do |reference|
|
384
|
+
yield reference
|
254
385
|
end
|
255
386
|
end
|
256
387
|
|
@@ -258,51 +389,3 @@ module Bio
|
|
258
389
|
|
259
390
|
end
|
260
391
|
|
261
|
-
|
262
|
-
|
263
|
-
=begin
|
264
|
-
|
265
|
-
= Bio::Reference
|
266
|
-
|
267
|
-
--- Bio::Reference.new(hash)
|
268
|
-
|
269
|
-
--- Bio::Reference#authors -> Array
|
270
|
-
--- Bio::Reference#title -> String
|
271
|
-
--- Bio::Reference#journal -> String
|
272
|
-
--- Bio::Reference#volume -> Fixnum
|
273
|
-
--- Bio::Reference#issue -> Fixnum
|
274
|
-
--- Bio::Reference#pages -> String
|
275
|
-
--- Bio::Reference#year -> Fixnum
|
276
|
-
--- Bio::Reference#pubmed -> Fixnum
|
277
|
-
--- Bio::Reference#medline -> Fixnum
|
278
|
-
--- Bio::Reference#abstract -> String
|
279
|
-
--- Bio::Reference#url -> String
|
280
|
-
--- Bio::Reference#mesh -> Array
|
281
|
-
--- Bio::Reference#affiliations -> Array
|
282
|
-
|
283
|
-
--- Bio::Reference#format(style = nil, option = nil) -> String
|
284
|
-
|
285
|
-
--- Bio::Reference#endnote
|
286
|
-
--- Bio::Reference#bibitem(item = nil) -> String
|
287
|
-
--- Bio::Reference#bibtex(section = nil) -> String
|
288
|
-
--- Bio::Reference#rd(str = nil) -> String
|
289
|
-
--- Bio::Reference#nature(short = false) -> String
|
290
|
-
--- Bio::Reference#science -> String
|
291
|
-
--- Bio::Reference#genome_biol -> String
|
292
|
-
--- Bio::Reference#genome_res -> String
|
293
|
-
--- Bio::Reference#nar -> String
|
294
|
-
--- Bio::Reference#cell -> String
|
295
|
-
--- Bio::Reference#trends -> String
|
296
|
-
--- Bio::Reference#general -> String
|
297
|
-
|
298
|
-
= Bio::References
|
299
|
-
|
300
|
-
--- Bio::References.new(ary = [])
|
301
|
-
|
302
|
-
--- Bio::References#references -> Array
|
303
|
-
--- Bio::References#append(a) -> Bio::References
|
304
|
-
--- Bio::References#each -> Array
|
305
|
-
|
306
|
-
=end
|
307
|
-
|
308
|
-
|
data/lib/bio/sequence.rb
CHANGED
@@ -1,65 +1,75 @@
|
|
1
1
|
#
|
2
2
|
# = bio/sequence.rb - biological sequence class
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2000-
|
4
|
+
# Copyright:: Copyright (C) 2000-2006
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>,
|
6
|
-
# Yoshinori K. Okuji <okuji@
|
6
|
+
# Yoshinori K. Okuji <okuji@enbug.org>,
|
7
7
|
# Naohisa Goto <ng@bioruby.org>
|
8
|
-
# License::
|
8
|
+
# License:: Ruby's
|
9
9
|
#
|
10
|
-
# $Id: sequence.rb,v 0.
|
11
|
-
#
|
12
|
-
#--
|
13
|
-
# *TODO* remove this functionality?
|
14
|
-
# You can use Bio::Seq instead of Bio::Sequence for short.
|
15
|
-
#++
|
16
|
-
#
|
17
|
-
#--
|
18
|
-
#
|
19
|
-
# This library is free software; you can redistribute it and/or
|
20
|
-
# modify it under the terms of the GNU Lesser General Public
|
21
|
-
# License as published by the Free Software Foundation; either
|
22
|
-
# version 2 of the License, or (at your option) any later version.
|
23
|
-
#
|
24
|
-
# This library is distributed in the hope that it will be useful,
|
25
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
26
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
27
|
-
# Lesser General Public License for more details.
|
28
|
-
#
|
29
|
-
# You should have received a copy of the GNU Lesser General Public
|
30
|
-
# License along with this library; if not, write to the Free Software
|
31
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
32
|
-
#
|
33
|
-
#++
|
10
|
+
# $Id: sequence.rb,v 0.56 2006/02/17 17:15:08 k Exp $
|
34
11
|
#
|
35
12
|
|
36
|
-
require 'bio/
|
37
|
-
require 'bio/data/aa'
|
38
|
-
require 'bio/data/codontable'
|
39
|
-
require 'bio/location'
|
13
|
+
require 'bio/sequence/compat'
|
40
14
|
|
41
15
|
module Bio
|
42
16
|
|
43
|
-
|
17
|
+
class Sequence
|
44
18
|
|
45
|
-
|
19
|
+
autoload :Common, 'bio/sequence/common'
|
20
|
+
autoload :NA, 'bio/sequence/na'
|
21
|
+
autoload :AA, 'bio/sequence/aa'
|
22
|
+
autoload :Generic, 'bio/sequence/generic'
|
23
|
+
autoload :Format, 'bio/sequence/format'
|
46
24
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
25
|
+
def initialize(str)
|
26
|
+
@seq = str
|
27
|
+
end
|
28
|
+
|
29
|
+
def method_missing(*arg)
|
30
|
+
@seq.send(*arg)
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_accessor :entry_id, :definition, :features, :references, :comments,
|
34
|
+
:date, :keywords, :dblinks, :taxonomy, :moltype, :seq
|
35
|
+
|
36
|
+
def output(style)
|
37
|
+
extend Bio::Sequence::Format
|
38
|
+
case style
|
39
|
+
when :fasta
|
40
|
+
format_fasta
|
41
|
+
when :gff
|
42
|
+
format_gff
|
43
|
+
when :genbank
|
44
|
+
format_genbank
|
45
|
+
when :embl
|
46
|
+
format_embl
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def auto
|
51
|
+
@moltype = guess
|
52
|
+
if @moltype == NA
|
53
|
+
@seq = NA.new(@seq)
|
51
54
|
else
|
52
|
-
AA.new(
|
55
|
+
@seq = AA.new(@seq)
|
53
56
|
end
|
54
57
|
end
|
55
58
|
|
56
|
-
def
|
57
|
-
|
59
|
+
def self.auto(str)
|
60
|
+
seq = self.new(str)
|
61
|
+
seq.auto
|
62
|
+
return seq
|
63
|
+
end
|
64
|
+
|
65
|
+
def guess(threshold = 0.9, length = 10000, index = 0)
|
66
|
+
str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
|
67
|
+
cmp = str.composition
|
58
68
|
|
59
69
|
bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
|
60
70
|
cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
|
61
71
|
|
62
|
-
total =
|
72
|
+
total = @seq.length - cmp['N'] - cmp['n']
|
63
73
|
|
64
74
|
if bases.to_f / total > threshold
|
65
75
|
return NA
|
@@ -72,389 +82,19 @@ class Sequence < String
|
|
72
82
|
self.new(str).guess(*args)
|
73
83
|
end
|
74
84
|
|
75
|
-
def
|
76
|
-
|
77
|
-
|
78
|
-
alias to_str to_s
|
79
|
-
|
80
|
-
# Force self to re-initialize for clean up (remove white spaces,
|
81
|
-
# case unification).
|
82
|
-
def seq
|
83
|
-
self.class.new(self)
|
84
|
-
end
|
85
|
-
|
86
|
-
# Similar to the 'seq' method, but changes the self object destructively.
|
87
|
-
def normalize!
|
88
|
-
initialize(self)
|
89
|
-
self
|
90
|
-
end
|
91
|
-
alias seq! normalize!
|
92
|
-
|
93
|
-
def <<(*arg)
|
94
|
-
super(self.class.new(*arg))
|
85
|
+
def na
|
86
|
+
@seq = NA.new(@seq)
|
87
|
+
@moltype = NA
|
95
88
|
end
|
96
|
-
alias concat <<
|
97
|
-
|
98
|
-
def +(*arg)
|
99
|
-
self.class.new(super(*arg))
|
100
|
-
end
|
101
|
-
|
102
|
-
# Returns the subsequence of the self string.
|
103
|
-
def subseq(s = 1, e = self.length)
|
104
|
-
return nil if s < 1 or e < 1
|
105
|
-
s -= 1
|
106
|
-
e -= 1
|
107
|
-
self[s..e]
|
108
|
-
end
|
109
|
-
|
110
|
-
# Output the FASTA format string of the sequence. The 1st argument is
|
111
|
-
# used as the comment string. If the 2nd option is given, the output
|
112
|
-
# sequence will be folded.
|
113
|
-
def to_fasta(header = '', width = nil)
|
114
|
-
">#{header}\n" +
|
115
|
-
if width
|
116
|
-
self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
|
117
|
-
else
|
118
|
-
self.to_s + "\n"
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
# This method iterates on sub string with specified length 'window_size'.
|
123
|
-
# By specifing 'step_size', codon sized shifting or spliting genome
|
124
|
-
# sequence with ovelapping each end can easily be yielded.
|
125
|
-
#
|
126
|
-
# The remainder sequence at the terminal end will be returned.
|
127
|
-
#
|
128
|
-
# Example:
|
129
|
-
# # prints average GC% on each 100bp
|
130
|
-
# seq.window_search(100) do |subseq|
|
131
|
-
# puts subseq.gc
|
132
|
-
# end
|
133
|
-
# # prints every translated peptide (length 5aa) in the same frame
|
134
|
-
# seq.window_search(15, 3) do |subseq|
|
135
|
-
# puts subseq.translate
|
136
|
-
# end
|
137
|
-
# # split genome sequence by 10000bp with 1000bp overlap in fasta format
|
138
|
-
# i = 1
|
139
|
-
# remainder = seq.window_search(10000, 9000) do |subseq|
|
140
|
-
# puts subseq.to_fasta("segment #{i}", 60)
|
141
|
-
# i += 1
|
142
|
-
# end
|
143
|
-
# puts remainder.to_fasta("segment #{i}", 60)
|
144
|
-
#
|
145
|
-
def window_search(window_size, step_size = 1)
|
146
|
-
i = 0
|
147
|
-
0.step(self.length - window_size, step_size) do |i|
|
148
|
-
yield self[i, window_size]
|
149
|
-
end
|
150
|
-
return self[i + window_size .. -1]
|
151
|
-
end
|
152
|
-
|
153
|
-
# This method receive a hash of residues/bases to the particular values,
|
154
|
-
# and sum up the value along with the self sequence. Especially useful
|
155
|
-
# to use with the window_search method and amino acid indices etc.
|
156
|
-
def total(hash)
|
157
|
-
hash.default = 0.0 unless hash.default
|
158
|
-
sum = 0.0
|
159
|
-
self.each_byte do |x|
|
160
|
-
begin
|
161
|
-
sum += hash[x.chr]
|
162
|
-
end
|
163
|
-
end
|
164
|
-
return sum
|
165
|
-
end
|
166
|
-
|
167
|
-
# Returns a hash of the occurrence counts for each residue or base.
|
168
|
-
def composition
|
169
|
-
count = Hash.new(0)
|
170
|
-
self.scan(/./) do |x|
|
171
|
-
count[x] += 1
|
172
|
-
end
|
173
|
-
return count
|
174
|
-
end
|
175
|
-
|
176
|
-
# Returns a randomized sequence keeping its composition by default.
|
177
|
-
# The argument is required when generating a random sequence from the empty
|
178
|
-
# sequence (used by the class methods NA.randomize, AA.randomize).
|
179
|
-
# If the block is given, yields for each random residue/base.
|
180
|
-
def randomize(hash = nil)
|
181
|
-
length = self.length
|
182
|
-
if hash
|
183
|
-
count = hash.clone
|
184
|
-
count.each_value {|x| length += x}
|
185
|
-
else
|
186
|
-
count = self.composition
|
187
|
-
end
|
188
|
-
|
189
|
-
seq = ''
|
190
|
-
tmp = {}
|
191
|
-
length.times do
|
192
|
-
count.each do |k, v|
|
193
|
-
tmp[k] = v * rand
|
194
|
-
end
|
195
|
-
max = tmp.max {|a, b| a[1] <=> b[1]}
|
196
|
-
count[max.first] -= 1
|
197
|
-
|
198
|
-
if block_given?
|
199
|
-
yield max.first
|
200
|
-
else
|
201
|
-
seq += max.first
|
202
|
-
end
|
203
|
-
end
|
204
|
-
return self.class.new(seq)
|
205
|
-
end
|
206
|
-
|
207
|
-
# Generate a new random sequence with the given frequency of bases
|
208
|
-
# or residues. The sequence length is determined by the sum of each
|
209
|
-
# base/residue occurences.
|
210
|
-
def self.randomize(*arg, &block)
|
211
|
-
self.new('').randomize(*arg, &block)
|
212
|
-
end
|
213
|
-
|
214
|
-
# Receive a GenBank style position string and convert it to the Locations
|
215
|
-
# objects to splice the sequence itself. See also: bio/location.rb
|
216
|
-
#
|
217
|
-
# This method depends on Locations class, see bio/location.rb
|
218
|
-
def splicing(position)
|
219
|
-
unless position.is_a?(Locations) then
|
220
|
-
position = Locations.new(position)
|
221
|
-
end
|
222
|
-
s = ''
|
223
|
-
position.each do |location|
|
224
|
-
if location.sequence
|
225
|
-
s << location.sequence
|
226
|
-
else
|
227
|
-
exon = self.subseq(location.from, location.to)
|
228
|
-
begin
|
229
|
-
exon.complement! if location.strand < 0
|
230
|
-
rescue NameError
|
231
|
-
end
|
232
|
-
s << exon
|
233
|
-
end
|
234
|
-
end
|
235
|
-
return self.class.new(s)
|
236
|
-
end
|
237
|
-
|
238
|
-
|
239
|
-
# Nucleic Acid sequence
|
240
|
-
|
241
|
-
class NA < Sequence
|
242
|
-
|
243
|
-
# Generate a nucleic acid sequence object from a string.
|
244
|
-
def initialize(str)
|
245
|
-
super
|
246
|
-
self.downcase!
|
247
|
-
self.tr!(" \t\n\r",'')
|
248
|
-
end
|
249
|
-
|
250
|
-
# This method depends on Locations class, see bio/location.rb
|
251
|
-
def splicing(position)
|
252
|
-
mRNA = super
|
253
|
-
if mRNA.rna?
|
254
|
-
mRNA.tr!('t', 'u')
|
255
|
-
else
|
256
|
-
mRNA.tr!('u', 't')
|
257
|
-
end
|
258
|
-
mRNA
|
259
|
-
end
|
260
|
-
|
261
|
-
# Returns complement sequence without reversing ("atgc" -> "tacg")
|
262
|
-
def forward_complement
|
263
|
-
s = self.class.new(self)
|
264
|
-
s.forward_complement!
|
265
|
-
s
|
266
|
-
end
|
267
|
-
|
268
|
-
# Convert to complement sequence without reversing ("atgc" -> "tacg")
|
269
|
-
def forward_complement!
|
270
|
-
if self.rna?
|
271
|
-
self.tr!('augcrymkdhvbswn', 'uacgyrkmhdbvswn')
|
272
|
-
else
|
273
|
-
self.tr!('atgcrymkdhvbswn', 'tacgyrkmhdbvswn')
|
274
|
-
end
|
275
|
-
self
|
276
|
-
end
|
277
|
-
|
278
|
-
# Returns reverse complement sequence ("atgc" -> "gcat")
|
279
|
-
def reverse_complement
|
280
|
-
s = self.class.new(self)
|
281
|
-
s.reverse_complement!
|
282
|
-
s
|
283
|
-
end
|
284
|
-
|
285
|
-
# Convert to reverse complement sequence ("atgc" -> "gcat")
|
286
|
-
def reverse_complement!
|
287
|
-
self.reverse!
|
288
|
-
self.forward_complement!
|
289
|
-
end
|
290
|
-
|
291
|
-
# Aliases for short
|
292
|
-
alias complement reverse_complement
|
293
|
-
alias complement! reverse_complement!
|
294
|
-
|
295
|
-
|
296
|
-
# Translate into the amino acid sequence from the given frame and the
|
297
|
-
# selected codon table. The table also can be a Bio::CodonTable object.
|
298
|
-
# The 'unknown' character is used for invalid/unknown codon (can be
|
299
|
-
# used for 'nnn' and/or gap translation in practice).
|
300
|
-
#
|
301
|
-
# Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
|
302
|
-
# (4, 5 or 6 is also accepted) for the reverse strand.
|
303
|
-
def translate(frame = 1, table = 1, unknown = 'X')
|
304
|
-
if table.is_a?(Bio::CodonTable)
|
305
|
-
ct = table
|
306
|
-
else
|
307
|
-
ct = Bio::CodonTable[table]
|
308
|
-
end
|
309
|
-
naseq = self.dna
|
310
|
-
case frame
|
311
|
-
when 1, 2, 3
|
312
|
-
from = frame - 1
|
313
|
-
when 4, 5, 6
|
314
|
-
from = frame - 4
|
315
|
-
naseq.complement!
|
316
|
-
when -1, -2, -3
|
317
|
-
from = -1 - frame
|
318
|
-
naseq.complement!
|
319
|
-
else
|
320
|
-
from = 0
|
321
|
-
end
|
322
|
-
nalen = naseq.length - from
|
323
|
-
nalen -= nalen % 3
|
324
|
-
aaseq = naseq[from, nalen].gsub(/.{3}/) {|codon| ct[codon] or unknown}
|
325
|
-
return Bio::Sequence::AA.new(aaseq)
|
326
|
-
end
|
327
|
-
|
328
|
-
# Returns counts of the each codon in the sequence by Hash.
|
329
|
-
def codon_usage
|
330
|
-
hash = Hash.new(0)
|
331
|
-
self.window_search(3, 3) do |codon|
|
332
|
-
hash[codon] += 1
|
333
|
-
end
|
334
|
-
return hash
|
335
|
-
end
|
336
|
-
|
337
|
-
# Calculate the ratio of GC / ATGC bases in percent.
|
338
|
-
def gc_percent
|
339
|
-
count = self.composition
|
340
|
-
at = count['a'] + count['t'] + count['u']
|
341
|
-
gc = count['g'] + count['c']
|
342
|
-
gc = 100 * gc / (at + gc)
|
343
|
-
return gc
|
344
|
-
end
|
345
|
-
|
346
|
-
# Show abnormal bases other than 'atgcu'.
|
347
|
-
def illegal_bases
|
348
|
-
self.scan(/[^atgcu]/).sort.uniq
|
349
|
-
end
|
350
|
-
|
351
|
-
# Estimate the weight of this biological string molecule.
|
352
|
-
# NucleicAcid is defined in bio/data/na.rb
|
353
|
-
def molecular_weight
|
354
|
-
if self.rna?
|
355
|
-
NucleicAcid.weight(self, true)
|
356
|
-
else
|
357
|
-
NucleicAcid.weight(self)
|
358
|
-
end
|
359
|
-
end
|
360
|
-
|
361
|
-
# Convert the universal code string into the regular expression.
|
362
|
-
def to_re
|
363
|
-
if self.rna?
|
364
|
-
NucleicAcid.to_re(self.dna, true)
|
365
|
-
else
|
366
|
-
NucleicAcid.to_re(self)
|
367
|
-
end
|
368
|
-
end
|
369
|
-
|
370
|
-
# Convert the self string into the list of the names of the each base.
|
371
|
-
def names
|
372
|
-
array = []
|
373
|
-
self.each_byte do |x|
|
374
|
-
array.push(NucleicAcid.names[x.chr.upcase])
|
375
|
-
end
|
376
|
-
return array
|
377
|
-
end
|
378
|
-
|
379
|
-
# Output a DNA string by substituting 'u' to 't'.
|
380
|
-
def dna
|
381
|
-
self.tr('u', 't')
|
382
|
-
end
|
383
|
-
|
384
|
-
def dna!
|
385
|
-
self.tr!('u', 't')
|
386
|
-
end
|
387
|
-
|
388
|
-
# Output a RNA string by substituting 't' to 'u'.
|
389
|
-
def rna
|
390
|
-
self.tr('t', 'u')
|
391
|
-
end
|
392
|
-
|
393
|
-
def rna!
|
394
|
-
self.tr!('t', 'u')
|
395
|
-
end
|
396
|
-
|
397
|
-
def rna?
|
398
|
-
self.index('u')
|
399
|
-
end
|
400
|
-
protected :rna?
|
401
|
-
|
402
|
-
def pikachu
|
403
|
-
self.dna.tr("atgc", "pika") # joke, of course :-)
|
404
|
-
end
|
405
|
-
|
406
|
-
end
|
407
|
-
|
408
|
-
|
409
|
-
# Amino Acid sequence
|
410
|
-
|
411
|
-
class AA < Sequence
|
412
|
-
|
413
|
-
# Generate a amino acid sequence object from a string.
|
414
|
-
def initialize(str)
|
415
|
-
super
|
416
|
-
self.upcase!
|
417
|
-
self.tr!(" \t\n\r",'')
|
418
|
-
end
|
419
|
-
|
420
|
-
# Estimate the weight of this protein.
|
421
|
-
# AminoAcid is defined in bio/data/aa.rb
|
422
|
-
def molecular_weight
|
423
|
-
AminoAcid.weight(self)
|
424
|
-
end
|
425
|
-
|
426
|
-
def to_re
|
427
|
-
AminoAcid.to_re(self)
|
428
|
-
end
|
429
|
-
|
430
|
-
# Generate the list of the names of the each residue along with the
|
431
|
-
# sequence (3 letters code).
|
432
|
-
def codes
|
433
|
-
array = []
|
434
|
-
self.each_byte do |x|
|
435
|
-
array.push(AminoAcid.names[x.chr])
|
436
|
-
end
|
437
|
-
return array
|
438
|
-
end
|
439
|
-
|
440
|
-
# Similar to codes but returns long names.
|
441
|
-
def names
|
442
|
-
self.codes.map do |x|
|
443
|
-
AminoAcid.names[x]
|
444
|
-
end
|
445
|
-
end
|
446
89
|
|
90
|
+
def aa
|
91
|
+
@seq = AA.new(@seq)
|
92
|
+
@moltype = AA
|
447
93
|
end
|
448
94
|
|
449
95
|
end # Sequence
|
450
96
|
|
451
97
|
|
452
|
-
class Seq < Sequence
|
453
|
-
attr_accessor :entry_id, :definition, :features, :references, :comments,
|
454
|
-
:date, :keywords, :dblinks, :taxonomy, :moltype
|
455
|
-
end
|
456
|
-
|
457
|
-
|
458
98
|
end # Bio
|
459
99
|
|
460
100
|
|