bio 0.7.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
|
@@ -1,23 +1,10 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/io/flatfile/indexer.rb - OBDA flatfile indexer
|
|
2
|
+
# = bio/io/flatfile/indexer.rb - OBDA flatfile indexer
|
|
3
3
|
#
|
|
4
|
-
#
|
|
4
|
+
# Copyright:: Copyright (C) 2002 GOTO Naohisa <ng@bioruby.org>
|
|
5
|
+
# License:: Ruby's
|
|
5
6
|
#
|
|
6
|
-
#
|
|
7
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
-
# License as published by the Free Software Foundation; either
|
|
9
|
-
# version 2 of the License, or (at your option) any later version.
|
|
10
|
-
#
|
|
11
|
-
# This library is distributed in the hope that it will be useful,
|
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
-
# Lesser General Public License for more details.
|
|
15
|
-
#
|
|
16
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
-
# License along with this library; if not, write to the Free Software
|
|
18
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
-
#
|
|
20
|
-
# $Id: indexer.rb,v 1.21 2005/09/26 13:00:08 k Exp $
|
|
7
|
+
# $Id: indexer.rb,v 1.23 2006/02/22 08:41:03 ngoto Exp $
|
|
21
8
|
#
|
|
22
9
|
|
|
23
10
|
require 'bio/io/flatfile/index'
|
|
@@ -80,6 +67,8 @@ module Bio
|
|
|
80
67
|
BlastDefaultParser.new(Bio::Blast::WU::Report, *arg)
|
|
81
68
|
when 'Bio::Blast::WU::Report_TBlast'
|
|
82
69
|
BlastDefaultParser.new(Bio::Blast::WU::Report_TBlast, *arg)
|
|
70
|
+
when 'Bio::PDB::ChemicalComponent'
|
|
71
|
+
PDBChemicalComponentParser.new(Bio::PDB::ChemicalComponent, *arg)
|
|
83
72
|
else
|
|
84
73
|
raise 'unknown or unsupported format'
|
|
85
74
|
end #case dbclass.to_s
|
|
@@ -130,10 +119,10 @@ module Bio
|
|
|
130
119
|
attr_reader :fileid
|
|
131
120
|
|
|
132
121
|
def each
|
|
133
|
-
pos = @flatfile.pos
|
|
134
122
|
@flatfile.each do |x|
|
|
135
123
|
@entry = x
|
|
136
|
-
|
|
124
|
+
pos = @flatfile.entry_start_pos
|
|
125
|
+
len = @flatfile.entry_ended_pos - @flatfile.entry_start_pos
|
|
137
126
|
begin
|
|
138
127
|
yield pos, len
|
|
139
128
|
rescue RuntimeError, NameError => evar
|
|
@@ -150,7 +139,6 @@ module Bio
|
|
|
150
139
|
DEBUG.print "This entry shall be incorrectly indexed.\n"
|
|
151
140
|
end
|
|
152
141
|
end #rescue
|
|
153
|
-
pos = @flatfile.pos
|
|
154
142
|
end
|
|
155
143
|
end
|
|
156
144
|
|
|
@@ -204,15 +192,6 @@ module Bio
|
|
|
204
192
|
end
|
|
205
193
|
self.add_secondary_namespaces(*sec_names)
|
|
206
194
|
end
|
|
207
|
-
def open_flatfile(fileid, file)
|
|
208
|
-
super
|
|
209
|
-
@flatfile.pos = 0
|
|
210
|
-
begin
|
|
211
|
-
pos = @flatfile.pos
|
|
212
|
-
line = @flatfile.gets
|
|
213
|
-
end until (!line or line =~ /^LOCUS /)
|
|
214
|
-
@flatfile.pos = pos
|
|
215
|
-
end
|
|
216
195
|
end #class GenBankParser
|
|
217
196
|
|
|
218
197
|
class GenPeptParser < GenBankParser
|
|
@@ -437,6 +416,35 @@ module Bio
|
|
|
437
416
|
end
|
|
438
417
|
end #class BlastDefaultReportParser
|
|
439
418
|
|
|
419
|
+
class PDBChemicalComponentParser < TemplateParser
|
|
420
|
+
NAMESTYLE = NameSpaces.new(
|
|
421
|
+
NameSpace.new( 'UNIQUE', Proc.new { |x| x.entry_id } )
|
|
422
|
+
)
|
|
423
|
+
PRIMARY = 'UNIQUE'
|
|
424
|
+
def initialize(klass, pri_name = nil, sec_names = nil)
|
|
425
|
+
super()
|
|
426
|
+
self.format = 'raw'
|
|
427
|
+
self.dbclass = Bio::PDB::ChemicalComponent
|
|
428
|
+
self.set_primary_namespace((pri_name or PRIMARY))
|
|
429
|
+
unless sec_names then
|
|
430
|
+
sec_names = []
|
|
431
|
+
@namestyle.each_value do |x|
|
|
432
|
+
sec_names << x.name if x.name != self.primary.name
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
self.add_secondary_namespaces(*sec_names)
|
|
436
|
+
end
|
|
437
|
+
def open_flatfile(fileid, file)
|
|
438
|
+
super
|
|
439
|
+
@flatfile.pos = 0
|
|
440
|
+
begin
|
|
441
|
+
pos = @flatfile.pos
|
|
442
|
+
line = @flatfile.gets
|
|
443
|
+
end until (!line or line =~ /^RESIDUE /)
|
|
444
|
+
@flatfile.pos = pos
|
|
445
|
+
end
|
|
446
|
+
end #class PDBChemicalComponentParser
|
|
447
|
+
|
|
440
448
|
end #module Parser
|
|
441
449
|
|
|
442
450
|
def self.makeindexBDB(name, parser, options, *files)
|
data/lib/bio/reference.rb
CHANGED
|
@@ -1,7 +1,23 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/reference.rb -
|
|
2
|
+
# = bio/reference.rb - Journal reference classes
|
|
3
3
|
#
|
|
4
|
-
# Copyright (C) 2001
|
|
4
|
+
# Copyright:: Copyright (C) 2001
|
|
5
|
+
# KATAYAMA Toshiaki <k@bioruby.org>
|
|
6
|
+
# Lisence:: LGPL
|
|
7
|
+
#
|
|
8
|
+
# $Id: reference.rb,v 1.21 2006/02/08 15:06:26 nakao Exp $
|
|
9
|
+
#
|
|
10
|
+
# == Description
|
|
11
|
+
#
|
|
12
|
+
# Journal reference classes.
|
|
13
|
+
#
|
|
14
|
+
# == Examples
|
|
15
|
+
#
|
|
16
|
+
# == References
|
|
17
|
+
#
|
|
18
|
+
#
|
|
19
|
+
#
|
|
20
|
+
#--
|
|
5
21
|
#
|
|
6
22
|
# This library is free software; you can redistribute it and/or
|
|
7
23
|
# modify it under the terms of the GNU Lesser General Public
|
|
@@ -17,13 +33,78 @@
|
|
|
17
33
|
# License along with this library; if not, write to the Free Software
|
|
18
34
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
35
|
#
|
|
20
|
-
|
|
36
|
+
#++
|
|
21
37
|
#
|
|
22
38
|
|
|
23
39
|
module Bio
|
|
24
40
|
|
|
41
|
+
# A class for journal reference information.
|
|
42
|
+
#
|
|
43
|
+
# === Examples
|
|
44
|
+
#
|
|
45
|
+
# hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
|
|
46
|
+
# 'title' => "Title of the study.",
|
|
47
|
+
# 'journal' => "Theor. J. Hoge",
|
|
48
|
+
# 'volume' => 12,
|
|
49
|
+
# 'issue' => 3,
|
|
50
|
+
# 'pages' => "123-145",
|
|
51
|
+
# 'year' => 2001,
|
|
52
|
+
# 'pubmed' => 12345678,
|
|
53
|
+
# 'medline' => 98765432,
|
|
54
|
+
# 'abstract' => "Hoge fuga. ...",
|
|
55
|
+
# 'url' => "http://example.com",
|
|
56
|
+
# 'mesh' => [],
|
|
57
|
+
# 'affiliations' => []}
|
|
58
|
+
# ref = Bio::Reference.new(hash)
|
|
59
|
+
#
|
|
60
|
+
# # Formats in the BiBTeX style.
|
|
61
|
+
# ref.format("bibtex")
|
|
62
|
+
#
|
|
63
|
+
# # Short-cut for Bio::Reference#format("bibtex")
|
|
64
|
+
# ref.bibtex
|
|
65
|
+
#
|
|
25
66
|
class Reference
|
|
26
67
|
|
|
68
|
+
# Author names in an Array, [ "Hoge, J.P.", "Fuga, F.B." ].
|
|
69
|
+
attr_reader :authors
|
|
70
|
+
|
|
71
|
+
# "Title of the study."
|
|
72
|
+
attr_reader :title
|
|
73
|
+
|
|
74
|
+
# "Theor. J. Hoge"
|
|
75
|
+
attr_reader :journal
|
|
76
|
+
|
|
77
|
+
# 12
|
|
78
|
+
attr_reader :volume
|
|
79
|
+
|
|
80
|
+
# 3
|
|
81
|
+
attr_reader :issue
|
|
82
|
+
|
|
83
|
+
# "123-145"
|
|
84
|
+
attr_reader :pages
|
|
85
|
+
|
|
86
|
+
# 2001
|
|
87
|
+
attr_reader :year
|
|
88
|
+
|
|
89
|
+
# 12345678
|
|
90
|
+
attr_reader :pubmed
|
|
91
|
+
|
|
92
|
+
# 98765432
|
|
93
|
+
attr_reader :medline
|
|
94
|
+
|
|
95
|
+
# Abstract test in String.
|
|
96
|
+
attr_reader :abstract
|
|
97
|
+
|
|
98
|
+
# A URL String.
|
|
99
|
+
attr_reader :url
|
|
100
|
+
|
|
101
|
+
# MeSH terms in an Array.
|
|
102
|
+
attr_reader :mesh
|
|
103
|
+
|
|
104
|
+
# Affiliations in an Array.
|
|
105
|
+
attr_reader :affiliations
|
|
106
|
+
|
|
107
|
+
#
|
|
27
108
|
def initialize(hash)
|
|
28
109
|
hash.default = ''
|
|
29
110
|
@authors = hash['authors'] # [ "Hoge, J.P.", "Fuga, F.B." ]
|
|
@@ -43,9 +124,23 @@ module Bio
|
|
|
43
124
|
@mesh = [] if @mesh.empty?
|
|
44
125
|
@affiliations = [] if @affiliations.empty?
|
|
45
126
|
end
|
|
46
|
-
attr_reader :authors, :title, :journal, :volume, :issue, :pages, :year,
|
|
47
|
-
:pubmed, :medline, :abstract, :url, :mesh, :affiliations
|
|
48
127
|
|
|
128
|
+
# Formats the reference in a given style.
|
|
129
|
+
#
|
|
130
|
+
# Styles:
|
|
131
|
+
# 0. nil - general
|
|
132
|
+
# 1. endnote - Endnote
|
|
133
|
+
# 2. bibitem - Bibitem (option acceptable)
|
|
134
|
+
# 3. bibtex - BiBTeX (option acceptable)
|
|
135
|
+
# 4. rd - rd (option acceptable)
|
|
136
|
+
# 5. nature - Nature (option acceptable)
|
|
137
|
+
# 6. science - Science
|
|
138
|
+
# 7. genome_biol - Genome Biology
|
|
139
|
+
# 8. genome_res - Genome Research
|
|
140
|
+
# 9. nar - Nucleic Acids Research
|
|
141
|
+
# 10. current - Current Biology
|
|
142
|
+
# 11. trends - Trends in *
|
|
143
|
+
# 12. cell - Cell Press
|
|
49
144
|
def format(style = nil, option = nil)
|
|
50
145
|
case style
|
|
51
146
|
when 'endnote'
|
|
@@ -77,19 +172,20 @@ module Bio
|
|
|
77
172
|
end
|
|
78
173
|
end
|
|
79
174
|
|
|
175
|
+
# Formats in the Endonote style.
|
|
80
176
|
def endnote
|
|
81
177
|
lines = []
|
|
82
178
|
lines << "%0 Journal Article"
|
|
83
179
|
@authors.each do |author|
|
|
84
180
|
lines << "%A #{author}"
|
|
85
181
|
end
|
|
86
|
-
lines << "%D #{@year}" unless @year.empty?
|
|
182
|
+
lines << "%D #{@year}" unless @year.to_s.empty?
|
|
87
183
|
lines << "%T #{@title}" unless @title.empty?
|
|
88
184
|
lines << "%J #{@journal}" unless @journal.empty?
|
|
89
|
-
lines << "%V #{@volume}" unless @volume.empty?
|
|
90
|
-
lines << "%N #{@issue}" unless @issue.empty?
|
|
185
|
+
lines << "%V #{@volume}" unless @volume.to_s.empty?
|
|
186
|
+
lines << "%N #{@issue}" unless @issue.to_s.empty?
|
|
91
187
|
lines << "%P #{@pages}" unless @pages.empty?
|
|
92
|
-
lines << "%M #{@pubmed}" unless @pubmed.empty?
|
|
188
|
+
lines << "%M #{@pubmed}" unless @pubmed.to_s.empty?
|
|
93
189
|
if @pubmed
|
|
94
190
|
cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
|
|
95
191
|
opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
|
|
@@ -104,6 +200,7 @@ module Bio
|
|
|
104
200
|
return lines.join("\n")
|
|
105
201
|
end
|
|
106
202
|
|
|
203
|
+
# Formats in the bibitem.
|
|
107
204
|
def bibitem(item = nil)
|
|
108
205
|
item = "PMID:#{@pubmed}" unless item
|
|
109
206
|
pages = @pages.sub('-', '--')
|
|
@@ -115,6 +212,7 @@ module Bio
|
|
|
115
212
|
END
|
|
116
213
|
end
|
|
117
214
|
|
|
215
|
+
# Formats in the BiBTeX style.
|
|
118
216
|
def bibtex(section = nil)
|
|
119
217
|
section = "article" unless section
|
|
120
218
|
authors = authors_join(' and ', ' and ')
|
|
@@ -132,11 +230,13 @@ module Bio
|
|
|
132
230
|
END
|
|
133
231
|
end
|
|
134
232
|
|
|
233
|
+
# Formats in a general style.
|
|
135
234
|
def general
|
|
136
235
|
authors = @authors.join(', ')
|
|
137
236
|
"#{authors} (#{@year}). \"#{@title}\" #{@journal} #{@volume}:#{@pages}."
|
|
138
237
|
end
|
|
139
238
|
|
|
239
|
+
# Formats in the RD style.
|
|
140
240
|
def rd(str = nil)
|
|
141
241
|
@abstract ||= str
|
|
142
242
|
lines = []
|
|
@@ -147,6 +247,8 @@ module Bio
|
|
|
147
247
|
return lines.join("\n\n")
|
|
148
248
|
end
|
|
149
249
|
|
|
250
|
+
# Formats in the Nature Publish Group style.
|
|
251
|
+
# * http://www.nature.com
|
|
150
252
|
def nature(short = false)
|
|
151
253
|
if short
|
|
152
254
|
if @authors.size > 4
|
|
@@ -163,6 +265,8 @@ module Bio
|
|
|
163
265
|
end
|
|
164
266
|
end
|
|
165
267
|
|
|
268
|
+
# Formats in the Science style.
|
|
269
|
+
# * http://www.siencemag.com/
|
|
166
270
|
def science
|
|
167
271
|
if @authors.size > 4
|
|
168
272
|
authors = rev_name(@authors[0]) + " et al."
|
|
@@ -173,28 +277,40 @@ module Bio
|
|
|
173
277
|
"#{authors}, #{@journal} #{@volume} #{page_from} (#{@year})."
|
|
174
278
|
end
|
|
175
279
|
|
|
280
|
+
# Formats in the Genome Biology style.
|
|
281
|
+
# * http://genomebiology.com/
|
|
176
282
|
def genome_biol
|
|
177
283
|
authors = @authors.collect {|name| strip_dots(name)}.join(', ')
|
|
178
284
|
journal = strip_dots(@journal)
|
|
179
285
|
"#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
|
|
180
286
|
end
|
|
287
|
+
# Formats in the Current Biology style.
|
|
288
|
+
# * http://www.current-biology.com/
|
|
181
289
|
alias current genome_biol
|
|
182
290
|
|
|
291
|
+
# Formats in the Genome Research style.
|
|
292
|
+
# * http://genome.org/
|
|
183
293
|
def genome_res
|
|
184
294
|
authors = authors_join(' and ')
|
|
185
295
|
"#{authors} #{@year}.\n #{@title} #{@journal} #{@volume}: #{@pages}."
|
|
186
296
|
end
|
|
187
297
|
|
|
298
|
+
# Formats in the Nucleic Acids Reseach style.
|
|
299
|
+
# * http://nar.oxfordjournals.org/
|
|
188
300
|
def nar
|
|
189
301
|
authors = authors_join(' and ')
|
|
190
302
|
"#{authors} (#{@year}) #{@title} #{@journal}, #{@volume}, #{@pages}."
|
|
191
303
|
end
|
|
192
304
|
|
|
305
|
+
# Formats in the CELL Press style.
|
|
306
|
+
# http://www.cell.com/
|
|
193
307
|
def cell
|
|
194
308
|
authors = authors_join(' and ')
|
|
195
309
|
"#{authors} (#{@year}). #{@title} #{@journal} #{@volume}, #{pages}."
|
|
196
310
|
end
|
|
197
|
-
|
|
311
|
+
|
|
312
|
+
# Formats in the TRENDS Journals.
|
|
313
|
+
# * http://www.trends.com/
|
|
198
314
|
def trends
|
|
199
315
|
if @authors.size > 2
|
|
200
316
|
authors = "#{@authors[0]} et al."
|
|
@@ -235,22 +351,37 @@ module Bio
|
|
|
235
351
|
|
|
236
352
|
end
|
|
237
353
|
|
|
238
|
-
|
|
354
|
+
# Set of Bio::Reference.
|
|
355
|
+
#
|
|
356
|
+
# === Examples
|
|
357
|
+
#
|
|
358
|
+
# refs = Bio::References.new
|
|
359
|
+
# refs.append(Bio::Reference.new(hash))
|
|
360
|
+
# refs.each do |reference|
|
|
361
|
+
# ...
|
|
362
|
+
# end
|
|
363
|
+
#
|
|
239
364
|
class References
|
|
240
365
|
|
|
366
|
+
# Array of Bio::Reference.
|
|
367
|
+
attr_accessor :references
|
|
368
|
+
|
|
369
|
+
#
|
|
241
370
|
def initialize(ary = [])
|
|
242
371
|
@references = ary
|
|
243
372
|
end
|
|
244
|
-
attr_accessor :references
|
|
245
373
|
|
|
246
|
-
|
|
247
|
-
|
|
374
|
+
|
|
375
|
+
# Append a Bio::Reference object.
|
|
376
|
+
def append(reference)
|
|
377
|
+
@references.push(reference) if reference.is_a? Reference
|
|
248
378
|
return self
|
|
249
379
|
end
|
|
250
380
|
|
|
381
|
+
# Iterates each Bio::Reference object.
|
|
251
382
|
def each
|
|
252
|
-
@references.each do |
|
|
253
|
-
yield
|
|
383
|
+
@references.each do |reference|
|
|
384
|
+
yield reference
|
|
254
385
|
end
|
|
255
386
|
end
|
|
256
387
|
|
|
@@ -258,51 +389,3 @@ module Bio
|
|
|
258
389
|
|
|
259
390
|
end
|
|
260
391
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
=begin
|
|
264
|
-
|
|
265
|
-
= Bio::Reference
|
|
266
|
-
|
|
267
|
-
--- Bio::Reference.new(hash)
|
|
268
|
-
|
|
269
|
-
--- Bio::Reference#authors -> Array
|
|
270
|
-
--- Bio::Reference#title -> String
|
|
271
|
-
--- Bio::Reference#journal -> String
|
|
272
|
-
--- Bio::Reference#volume -> Fixnum
|
|
273
|
-
--- Bio::Reference#issue -> Fixnum
|
|
274
|
-
--- Bio::Reference#pages -> String
|
|
275
|
-
--- Bio::Reference#year -> Fixnum
|
|
276
|
-
--- Bio::Reference#pubmed -> Fixnum
|
|
277
|
-
--- Bio::Reference#medline -> Fixnum
|
|
278
|
-
--- Bio::Reference#abstract -> String
|
|
279
|
-
--- Bio::Reference#url -> String
|
|
280
|
-
--- Bio::Reference#mesh -> Array
|
|
281
|
-
--- Bio::Reference#affiliations -> Array
|
|
282
|
-
|
|
283
|
-
--- Bio::Reference#format(style = nil, option = nil) -> String
|
|
284
|
-
|
|
285
|
-
--- Bio::Reference#endnote
|
|
286
|
-
--- Bio::Reference#bibitem(item = nil) -> String
|
|
287
|
-
--- Bio::Reference#bibtex(section = nil) -> String
|
|
288
|
-
--- Bio::Reference#rd(str = nil) -> String
|
|
289
|
-
--- Bio::Reference#nature(short = false) -> String
|
|
290
|
-
--- Bio::Reference#science -> String
|
|
291
|
-
--- Bio::Reference#genome_biol -> String
|
|
292
|
-
--- Bio::Reference#genome_res -> String
|
|
293
|
-
--- Bio::Reference#nar -> String
|
|
294
|
-
--- Bio::Reference#cell -> String
|
|
295
|
-
--- Bio::Reference#trends -> String
|
|
296
|
-
--- Bio::Reference#general -> String
|
|
297
|
-
|
|
298
|
-
= Bio::References
|
|
299
|
-
|
|
300
|
-
--- Bio::References.new(ary = [])
|
|
301
|
-
|
|
302
|
-
--- Bio::References#references -> Array
|
|
303
|
-
--- Bio::References#append(a) -> Bio::References
|
|
304
|
-
--- Bio::References#each -> Array
|
|
305
|
-
|
|
306
|
-
=end
|
|
307
|
-
|
|
308
|
-
|
data/lib/bio/sequence.rb
CHANGED
|
@@ -1,65 +1,75 @@
|
|
|
1
1
|
#
|
|
2
2
|
# = bio/sequence.rb - biological sequence class
|
|
3
3
|
#
|
|
4
|
-
# Copyright:: Copyright (C) 2000-
|
|
4
|
+
# Copyright:: Copyright (C) 2000-2006
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>,
|
|
6
|
-
# Yoshinori K. Okuji <okuji@
|
|
6
|
+
# Yoshinori K. Okuji <okuji@enbug.org>,
|
|
7
7
|
# Naohisa Goto <ng@bioruby.org>
|
|
8
|
-
# License::
|
|
8
|
+
# License:: Ruby's
|
|
9
9
|
#
|
|
10
|
-
# $Id: sequence.rb,v 0.
|
|
11
|
-
#
|
|
12
|
-
#--
|
|
13
|
-
# *TODO* remove this functionality?
|
|
14
|
-
# You can use Bio::Seq instead of Bio::Sequence for short.
|
|
15
|
-
#++
|
|
16
|
-
#
|
|
17
|
-
#--
|
|
18
|
-
#
|
|
19
|
-
# This library is free software; you can redistribute it and/or
|
|
20
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
21
|
-
# License as published by the Free Software Foundation; either
|
|
22
|
-
# version 2 of the License, or (at your option) any later version.
|
|
23
|
-
#
|
|
24
|
-
# This library is distributed in the hope that it will be useful,
|
|
25
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
26
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
27
|
-
# Lesser General Public License for more details.
|
|
28
|
-
#
|
|
29
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
30
|
-
# License along with this library; if not, write to the Free Software
|
|
31
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
32
|
-
#
|
|
33
|
-
#++
|
|
10
|
+
# $Id: sequence.rb,v 0.56 2006/02/17 17:15:08 k Exp $
|
|
34
11
|
#
|
|
35
12
|
|
|
36
|
-
require 'bio/
|
|
37
|
-
require 'bio/data/aa'
|
|
38
|
-
require 'bio/data/codontable'
|
|
39
|
-
require 'bio/location'
|
|
13
|
+
require 'bio/sequence/compat'
|
|
40
14
|
|
|
41
15
|
module Bio
|
|
42
16
|
|
|
43
|
-
|
|
17
|
+
class Sequence
|
|
44
18
|
|
|
45
|
-
|
|
19
|
+
autoload :Common, 'bio/sequence/common'
|
|
20
|
+
autoload :NA, 'bio/sequence/na'
|
|
21
|
+
autoload :AA, 'bio/sequence/aa'
|
|
22
|
+
autoload :Generic, 'bio/sequence/generic'
|
|
23
|
+
autoload :Format, 'bio/sequence/format'
|
|
46
24
|
|
|
47
|
-
def
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
25
|
+
def initialize(str)
|
|
26
|
+
@seq = str
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def method_missing(*arg)
|
|
30
|
+
@seq.send(*arg)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
attr_accessor :entry_id, :definition, :features, :references, :comments,
|
|
34
|
+
:date, :keywords, :dblinks, :taxonomy, :moltype, :seq
|
|
35
|
+
|
|
36
|
+
def output(style)
|
|
37
|
+
extend Bio::Sequence::Format
|
|
38
|
+
case style
|
|
39
|
+
when :fasta
|
|
40
|
+
format_fasta
|
|
41
|
+
when :gff
|
|
42
|
+
format_gff
|
|
43
|
+
when :genbank
|
|
44
|
+
format_genbank
|
|
45
|
+
when :embl
|
|
46
|
+
format_embl
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def auto
|
|
51
|
+
@moltype = guess
|
|
52
|
+
if @moltype == NA
|
|
53
|
+
@seq = NA.new(@seq)
|
|
51
54
|
else
|
|
52
|
-
AA.new(
|
|
55
|
+
@seq = AA.new(@seq)
|
|
53
56
|
end
|
|
54
57
|
end
|
|
55
58
|
|
|
56
|
-
def
|
|
57
|
-
|
|
59
|
+
def self.auto(str)
|
|
60
|
+
seq = self.new(str)
|
|
61
|
+
seq.auto
|
|
62
|
+
return seq
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def guess(threshold = 0.9, length = 10000, index = 0)
|
|
66
|
+
str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
|
|
67
|
+
cmp = str.composition
|
|
58
68
|
|
|
59
69
|
bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
|
|
60
70
|
cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
|
|
61
71
|
|
|
62
|
-
total =
|
|
72
|
+
total = @seq.length - cmp['N'] - cmp['n']
|
|
63
73
|
|
|
64
74
|
if bases.to_f / total > threshold
|
|
65
75
|
return NA
|
|
@@ -72,389 +82,19 @@ class Sequence < String
|
|
|
72
82
|
self.new(str).guess(*args)
|
|
73
83
|
end
|
|
74
84
|
|
|
75
|
-
def
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
alias to_str to_s
|
|
79
|
-
|
|
80
|
-
# Force self to re-initialize for clean up (remove white spaces,
|
|
81
|
-
# case unification).
|
|
82
|
-
def seq
|
|
83
|
-
self.class.new(self)
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
# Similar to the 'seq' method, but changes the self object destructively.
|
|
87
|
-
def normalize!
|
|
88
|
-
initialize(self)
|
|
89
|
-
self
|
|
90
|
-
end
|
|
91
|
-
alias seq! normalize!
|
|
92
|
-
|
|
93
|
-
def <<(*arg)
|
|
94
|
-
super(self.class.new(*arg))
|
|
85
|
+
def na
|
|
86
|
+
@seq = NA.new(@seq)
|
|
87
|
+
@moltype = NA
|
|
95
88
|
end
|
|
96
|
-
alias concat <<
|
|
97
|
-
|
|
98
|
-
def +(*arg)
|
|
99
|
-
self.class.new(super(*arg))
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
# Returns the subsequence of the self string.
|
|
103
|
-
def subseq(s = 1, e = self.length)
|
|
104
|
-
return nil if s < 1 or e < 1
|
|
105
|
-
s -= 1
|
|
106
|
-
e -= 1
|
|
107
|
-
self[s..e]
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
# Output the FASTA format string of the sequence. The 1st argument is
|
|
111
|
-
# used as the comment string. If the 2nd option is given, the output
|
|
112
|
-
# sequence will be folded.
|
|
113
|
-
def to_fasta(header = '', width = nil)
|
|
114
|
-
">#{header}\n" +
|
|
115
|
-
if width
|
|
116
|
-
self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
|
|
117
|
-
else
|
|
118
|
-
self.to_s + "\n"
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
# This method iterates on sub string with specified length 'window_size'.
|
|
123
|
-
# By specifing 'step_size', codon sized shifting or spliting genome
|
|
124
|
-
# sequence with ovelapping each end can easily be yielded.
|
|
125
|
-
#
|
|
126
|
-
# The remainder sequence at the terminal end will be returned.
|
|
127
|
-
#
|
|
128
|
-
# Example:
|
|
129
|
-
# # prints average GC% on each 100bp
|
|
130
|
-
# seq.window_search(100) do |subseq|
|
|
131
|
-
# puts subseq.gc
|
|
132
|
-
# end
|
|
133
|
-
# # prints every translated peptide (length 5aa) in the same frame
|
|
134
|
-
# seq.window_search(15, 3) do |subseq|
|
|
135
|
-
# puts subseq.translate
|
|
136
|
-
# end
|
|
137
|
-
# # split genome sequence by 10000bp with 1000bp overlap in fasta format
|
|
138
|
-
# i = 1
|
|
139
|
-
# remainder = seq.window_search(10000, 9000) do |subseq|
|
|
140
|
-
# puts subseq.to_fasta("segment #{i}", 60)
|
|
141
|
-
# i += 1
|
|
142
|
-
# end
|
|
143
|
-
# puts remainder.to_fasta("segment #{i}", 60)
|
|
144
|
-
#
|
|
145
|
-
def window_search(window_size, step_size = 1)
|
|
146
|
-
i = 0
|
|
147
|
-
0.step(self.length - window_size, step_size) do |i|
|
|
148
|
-
yield self[i, window_size]
|
|
149
|
-
end
|
|
150
|
-
return self[i + window_size .. -1]
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
# This method receive a hash of residues/bases to the particular values,
|
|
154
|
-
# and sum up the value along with the self sequence. Especially useful
|
|
155
|
-
# to use with the window_search method and amino acid indices etc.
|
|
156
|
-
def total(hash)
|
|
157
|
-
hash.default = 0.0 unless hash.default
|
|
158
|
-
sum = 0.0
|
|
159
|
-
self.each_byte do |x|
|
|
160
|
-
begin
|
|
161
|
-
sum += hash[x.chr]
|
|
162
|
-
end
|
|
163
|
-
end
|
|
164
|
-
return sum
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
# Returns a hash of the occurrence counts for each residue or base.
|
|
168
|
-
def composition
|
|
169
|
-
count = Hash.new(0)
|
|
170
|
-
self.scan(/./) do |x|
|
|
171
|
-
count[x] += 1
|
|
172
|
-
end
|
|
173
|
-
return count
|
|
174
|
-
end
|
|
175
|
-
|
|
176
|
-
# Returns a randomized sequence keeping its composition by default.
|
|
177
|
-
# The argument is required when generating a random sequence from the empty
|
|
178
|
-
# sequence (used by the class methods NA.randomize, AA.randomize).
|
|
179
|
-
# If the block is given, yields for each random residue/base.
|
|
180
|
-
def randomize(hash = nil)
|
|
181
|
-
length = self.length
|
|
182
|
-
if hash
|
|
183
|
-
count = hash.clone
|
|
184
|
-
count.each_value {|x| length += x}
|
|
185
|
-
else
|
|
186
|
-
count = self.composition
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
seq = ''
|
|
190
|
-
tmp = {}
|
|
191
|
-
length.times do
|
|
192
|
-
count.each do |k, v|
|
|
193
|
-
tmp[k] = v * rand
|
|
194
|
-
end
|
|
195
|
-
max = tmp.max {|a, b| a[1] <=> b[1]}
|
|
196
|
-
count[max.first] -= 1
|
|
197
|
-
|
|
198
|
-
if block_given?
|
|
199
|
-
yield max.first
|
|
200
|
-
else
|
|
201
|
-
seq += max.first
|
|
202
|
-
end
|
|
203
|
-
end
|
|
204
|
-
return self.class.new(seq)
|
|
205
|
-
end
|
|
206
|
-
|
|
207
|
-
# Generate a new random sequence with the given frequency of bases
|
|
208
|
-
# or residues. The sequence length is determined by the sum of each
|
|
209
|
-
# base/residue occurences.
|
|
210
|
-
def self.randomize(*arg, &block)
|
|
211
|
-
self.new('').randomize(*arg, &block)
|
|
212
|
-
end
|
|
213
|
-
|
|
214
|
-
# Receive a GenBank style position string and convert it to the Locations
|
|
215
|
-
# objects to splice the sequence itself. See also: bio/location.rb
|
|
216
|
-
#
|
|
217
|
-
# This method depends on Locations class, see bio/location.rb
|
|
218
|
-
def splicing(position)
|
|
219
|
-
unless position.is_a?(Locations) then
|
|
220
|
-
position = Locations.new(position)
|
|
221
|
-
end
|
|
222
|
-
s = ''
|
|
223
|
-
position.each do |location|
|
|
224
|
-
if location.sequence
|
|
225
|
-
s << location.sequence
|
|
226
|
-
else
|
|
227
|
-
exon = self.subseq(location.from, location.to)
|
|
228
|
-
begin
|
|
229
|
-
exon.complement! if location.strand < 0
|
|
230
|
-
rescue NameError
|
|
231
|
-
end
|
|
232
|
-
s << exon
|
|
233
|
-
end
|
|
234
|
-
end
|
|
235
|
-
return self.class.new(s)
|
|
236
|
-
end
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
# Nucleic Acid sequence
|
|
240
|
-
|
|
241
|
-
class NA < Sequence
|
|
242
|
-
|
|
243
|
-
# Generate a nucleic acid sequence object from a string.
|
|
244
|
-
def initialize(str)
|
|
245
|
-
super
|
|
246
|
-
self.downcase!
|
|
247
|
-
self.tr!(" \t\n\r",'')
|
|
248
|
-
end
|
|
249
|
-
|
|
250
|
-
# This method depends on Locations class, see bio/location.rb
|
|
251
|
-
def splicing(position)
|
|
252
|
-
mRNA = super
|
|
253
|
-
if mRNA.rna?
|
|
254
|
-
mRNA.tr!('t', 'u')
|
|
255
|
-
else
|
|
256
|
-
mRNA.tr!('u', 't')
|
|
257
|
-
end
|
|
258
|
-
mRNA
|
|
259
|
-
end
|
|
260
|
-
|
|
261
|
-
# Returns complement sequence without reversing ("atgc" -> "tacg")
|
|
262
|
-
def forward_complement
|
|
263
|
-
s = self.class.new(self)
|
|
264
|
-
s.forward_complement!
|
|
265
|
-
s
|
|
266
|
-
end
|
|
267
|
-
|
|
268
|
-
# Convert to complement sequence without reversing ("atgc" -> "tacg")
|
|
269
|
-
def forward_complement!
|
|
270
|
-
if self.rna?
|
|
271
|
-
self.tr!('augcrymkdhvbswn', 'uacgyrkmhdbvswn')
|
|
272
|
-
else
|
|
273
|
-
self.tr!('atgcrymkdhvbswn', 'tacgyrkmhdbvswn')
|
|
274
|
-
end
|
|
275
|
-
self
|
|
276
|
-
end
|
|
277
|
-
|
|
278
|
-
# Returns reverse complement sequence ("atgc" -> "gcat")
|
|
279
|
-
def reverse_complement
|
|
280
|
-
s = self.class.new(self)
|
|
281
|
-
s.reverse_complement!
|
|
282
|
-
s
|
|
283
|
-
end
|
|
284
|
-
|
|
285
|
-
# Convert to reverse complement sequence ("atgc" -> "gcat")
|
|
286
|
-
def reverse_complement!
|
|
287
|
-
self.reverse!
|
|
288
|
-
self.forward_complement!
|
|
289
|
-
end
|
|
290
|
-
|
|
291
|
-
# Aliases for short
|
|
292
|
-
alias complement reverse_complement
|
|
293
|
-
alias complement! reverse_complement!
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
# Translate into the amino acid sequence from the given frame and the
|
|
297
|
-
# selected codon table. The table also can be a Bio::CodonTable object.
|
|
298
|
-
# The 'unknown' character is used for invalid/unknown codon (can be
|
|
299
|
-
# used for 'nnn' and/or gap translation in practice).
|
|
300
|
-
#
|
|
301
|
-
# Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
|
|
302
|
-
# (4, 5 or 6 is also accepted) for the reverse strand.
|
|
303
|
-
def translate(frame = 1, table = 1, unknown = 'X')
|
|
304
|
-
if table.is_a?(Bio::CodonTable)
|
|
305
|
-
ct = table
|
|
306
|
-
else
|
|
307
|
-
ct = Bio::CodonTable[table]
|
|
308
|
-
end
|
|
309
|
-
naseq = self.dna
|
|
310
|
-
case frame
|
|
311
|
-
when 1, 2, 3
|
|
312
|
-
from = frame - 1
|
|
313
|
-
when 4, 5, 6
|
|
314
|
-
from = frame - 4
|
|
315
|
-
naseq.complement!
|
|
316
|
-
when -1, -2, -3
|
|
317
|
-
from = -1 - frame
|
|
318
|
-
naseq.complement!
|
|
319
|
-
else
|
|
320
|
-
from = 0
|
|
321
|
-
end
|
|
322
|
-
nalen = naseq.length - from
|
|
323
|
-
nalen -= nalen % 3
|
|
324
|
-
aaseq = naseq[from, nalen].gsub(/.{3}/) {|codon| ct[codon] or unknown}
|
|
325
|
-
return Bio::Sequence::AA.new(aaseq)
|
|
326
|
-
end
|
|
327
|
-
|
|
328
|
-
# Returns counts of the each codon in the sequence by Hash.
|
|
329
|
-
def codon_usage
|
|
330
|
-
hash = Hash.new(0)
|
|
331
|
-
self.window_search(3, 3) do |codon|
|
|
332
|
-
hash[codon] += 1
|
|
333
|
-
end
|
|
334
|
-
return hash
|
|
335
|
-
end
|
|
336
|
-
|
|
337
|
-
# Calculate the ratio of GC / ATGC bases in percent.
|
|
338
|
-
def gc_percent
|
|
339
|
-
count = self.composition
|
|
340
|
-
at = count['a'] + count['t'] + count['u']
|
|
341
|
-
gc = count['g'] + count['c']
|
|
342
|
-
gc = 100 * gc / (at + gc)
|
|
343
|
-
return gc
|
|
344
|
-
end
|
|
345
|
-
|
|
346
|
-
# Show abnormal bases other than 'atgcu'.
|
|
347
|
-
def illegal_bases
|
|
348
|
-
self.scan(/[^atgcu]/).sort.uniq
|
|
349
|
-
end
|
|
350
|
-
|
|
351
|
-
# Estimate the weight of this biological string molecule.
|
|
352
|
-
# NucleicAcid is defined in bio/data/na.rb
|
|
353
|
-
def molecular_weight
|
|
354
|
-
if self.rna?
|
|
355
|
-
NucleicAcid.weight(self, true)
|
|
356
|
-
else
|
|
357
|
-
NucleicAcid.weight(self)
|
|
358
|
-
end
|
|
359
|
-
end
|
|
360
|
-
|
|
361
|
-
# Convert the universal code string into the regular expression.
|
|
362
|
-
def to_re
|
|
363
|
-
if self.rna?
|
|
364
|
-
NucleicAcid.to_re(self.dna, true)
|
|
365
|
-
else
|
|
366
|
-
NucleicAcid.to_re(self)
|
|
367
|
-
end
|
|
368
|
-
end
|
|
369
|
-
|
|
370
|
-
# Convert the self string into the list of the names of the each base.
|
|
371
|
-
def names
|
|
372
|
-
array = []
|
|
373
|
-
self.each_byte do |x|
|
|
374
|
-
array.push(NucleicAcid.names[x.chr.upcase])
|
|
375
|
-
end
|
|
376
|
-
return array
|
|
377
|
-
end
|
|
378
|
-
|
|
379
|
-
# Output a DNA string by substituting 'u' to 't'.
|
|
380
|
-
def dna
|
|
381
|
-
self.tr('u', 't')
|
|
382
|
-
end
|
|
383
|
-
|
|
384
|
-
def dna!
|
|
385
|
-
self.tr!('u', 't')
|
|
386
|
-
end
|
|
387
|
-
|
|
388
|
-
# Output a RNA string by substituting 't' to 'u'.
|
|
389
|
-
def rna
|
|
390
|
-
self.tr('t', 'u')
|
|
391
|
-
end
|
|
392
|
-
|
|
393
|
-
def rna!
|
|
394
|
-
self.tr!('t', 'u')
|
|
395
|
-
end
|
|
396
|
-
|
|
397
|
-
def rna?
|
|
398
|
-
self.index('u')
|
|
399
|
-
end
|
|
400
|
-
protected :rna?
|
|
401
|
-
|
|
402
|
-
def pikachu
|
|
403
|
-
self.dna.tr("atgc", "pika") # joke, of course :-)
|
|
404
|
-
end
|
|
405
|
-
|
|
406
|
-
end
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
# Amino Acid sequence
|
|
410
|
-
|
|
411
|
-
class AA < Sequence
|
|
412
|
-
|
|
413
|
-
# Generate a amino acid sequence object from a string.
|
|
414
|
-
def initialize(str)
|
|
415
|
-
super
|
|
416
|
-
self.upcase!
|
|
417
|
-
self.tr!(" \t\n\r",'')
|
|
418
|
-
end
|
|
419
|
-
|
|
420
|
-
# Estimate the weight of this protein.
|
|
421
|
-
# AminoAcid is defined in bio/data/aa.rb
|
|
422
|
-
def molecular_weight
|
|
423
|
-
AminoAcid.weight(self)
|
|
424
|
-
end
|
|
425
|
-
|
|
426
|
-
def to_re
|
|
427
|
-
AminoAcid.to_re(self)
|
|
428
|
-
end
|
|
429
|
-
|
|
430
|
-
# Generate the list of the names of the each residue along with the
|
|
431
|
-
# sequence (3 letters code).
|
|
432
|
-
def codes
|
|
433
|
-
array = []
|
|
434
|
-
self.each_byte do |x|
|
|
435
|
-
array.push(AminoAcid.names[x.chr])
|
|
436
|
-
end
|
|
437
|
-
return array
|
|
438
|
-
end
|
|
439
|
-
|
|
440
|
-
# Similar to codes but returns long names.
|
|
441
|
-
def names
|
|
442
|
-
self.codes.map do |x|
|
|
443
|
-
AminoAcid.names[x]
|
|
444
|
-
end
|
|
445
|
-
end
|
|
446
89
|
|
|
90
|
+
def aa
|
|
91
|
+
@seq = AA.new(@seq)
|
|
92
|
+
@moltype = AA
|
|
447
93
|
end
|
|
448
94
|
|
|
449
95
|
end # Sequence
|
|
450
96
|
|
|
451
97
|
|
|
452
|
-
class Seq < Sequence
|
|
453
|
-
attr_accessor :entry_id, :definition, :features, :references, :comments,
|
|
454
|
-
:date, :keywords, :dblinks, :taxonomy, :moltype
|
|
455
|
-
end
|
|
456
|
-
|
|
457
|
-
|
|
458
98
|
end # Bio
|
|
459
99
|
|
|
460
100
|
|