bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/reference.rb - journal reference class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: reference.rb,v 1.18 2005/12/18 16:58:58 nakao Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
module Bio
|
|
24
|
+
|
|
25
|
+
class Reference
|
|
26
|
+
|
|
27
|
+
def initialize(hash)
|
|
28
|
+
hash.default = ''
|
|
29
|
+
@authors = hash['authors'] # [ "Hoge, J.P.", "Fuga, F.B." ]
|
|
30
|
+
@title = hash['title'] # "Title of the study."
|
|
31
|
+
@journal = hash['journal'] # "Theor. J. Hoge"
|
|
32
|
+
@volume = hash['volume'] # 12
|
|
33
|
+
@issue = hash['issue'] # 3
|
|
34
|
+
@pages = hash['pages'] # 123-145
|
|
35
|
+
@year = hash['year'] # 2001
|
|
36
|
+
@pubmed = hash['pubmed'] # 12345678
|
|
37
|
+
@medline = hash['medline'] # 98765432
|
|
38
|
+
@abstract = hash['abstract']
|
|
39
|
+
@url = hash['url']
|
|
40
|
+
@mesh = hash['mesh']
|
|
41
|
+
@affiliations = hash['affiliations']
|
|
42
|
+
@authors = [] if @authors.empty?
|
|
43
|
+
@mesh = [] if @mesh.empty?
|
|
44
|
+
@affiliations = [] if @affiliations.empty?
|
|
45
|
+
end
|
|
46
|
+
attr_reader :authors, :title, :journal, :volume, :issue, :pages, :year,
|
|
47
|
+
:pubmed, :medline, :abstract, :url, :mesh, :affiliations
|
|
48
|
+
|
|
49
|
+
def format(style = nil, option = nil)
|
|
50
|
+
case style
|
|
51
|
+
when 'endnote'
|
|
52
|
+
return endnote
|
|
53
|
+
when 'bibitem'
|
|
54
|
+
return bibitem(option)
|
|
55
|
+
when 'bibtex'
|
|
56
|
+
return bibtex(option)
|
|
57
|
+
when 'rd'
|
|
58
|
+
return rd(option)
|
|
59
|
+
when /^nature$/i
|
|
60
|
+
return nature(option)
|
|
61
|
+
when /^science$/i
|
|
62
|
+
return science
|
|
63
|
+
when /^genome\s*_*biol/i
|
|
64
|
+
return genome_biol
|
|
65
|
+
when /^genome\s*_*res/i
|
|
66
|
+
return genome_res
|
|
67
|
+
when /^nar$/i
|
|
68
|
+
return nar
|
|
69
|
+
when /^current/i
|
|
70
|
+
return current
|
|
71
|
+
when /^trends/i
|
|
72
|
+
return trends
|
|
73
|
+
when /^cell$/i
|
|
74
|
+
return cell
|
|
75
|
+
else
|
|
76
|
+
return general
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def endnote
|
|
81
|
+
lines = []
|
|
82
|
+
lines << "%0 Journal Article"
|
|
83
|
+
@authors.each do |author|
|
|
84
|
+
lines << "%A #{author}"
|
|
85
|
+
end
|
|
86
|
+
lines << "%D #{@year}" unless @year.empty?
|
|
87
|
+
lines << "%T #{@title}" unless @title.empty?
|
|
88
|
+
lines << "%J #{@journal}" unless @journal.empty?
|
|
89
|
+
lines << "%V #{@volume}" unless @volume.empty?
|
|
90
|
+
lines << "%N #{@issue}" unless @issue.empty?
|
|
91
|
+
lines << "%P #{@pages}" unless @pages.empty?
|
|
92
|
+
lines << "%M #{@pubmed}" unless @pubmed.empty?
|
|
93
|
+
if @pubmed
|
|
94
|
+
cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
|
|
95
|
+
opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
|
|
96
|
+
@url = "#{cgi}?#{opts}=#{@pubmed}"
|
|
97
|
+
end
|
|
98
|
+
lines << "%U #{@url}" unless @url.empty?
|
|
99
|
+
lines << "%X #{@abstract}" unless @abstract.empty?
|
|
100
|
+
@mesh.each do |term|
|
|
101
|
+
lines << "%K #{term}"
|
|
102
|
+
end
|
|
103
|
+
lines << "%+ #{@affiliations.join(' ')}" unless @affiliations.empty?
|
|
104
|
+
return lines.join("\n")
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def bibitem(item = nil)
|
|
108
|
+
item = "PMID:#{@pubmed}" unless item
|
|
109
|
+
pages = @pages.sub('-', '--')
|
|
110
|
+
return <<-"END".collect {|line| line.strip}.join("\n")
|
|
111
|
+
\\bibitem{#{item}}
|
|
112
|
+
#{@authors.join(', ')}
|
|
113
|
+
#{@title},
|
|
114
|
+
{\\em #{@journal}}, #{@volume}(#{@issue}):#{pages}, #{@year}.
|
|
115
|
+
END
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def bibtex(section = nil)
|
|
119
|
+
section = "article" unless section
|
|
120
|
+
authors = authors_join(' and ', ' and ')
|
|
121
|
+
pages = @pages.sub('-', '--')
|
|
122
|
+
return <<-"END".gsub(/\t/, '')
|
|
123
|
+
@#{section}{PMID:#{@pubmed},
|
|
124
|
+
author = {#{authors}},
|
|
125
|
+
title = {#{@title}},
|
|
126
|
+
journal = {#{@journal}},
|
|
127
|
+
year = {#{@year}},
|
|
128
|
+
volume = {#{@volume}},
|
|
129
|
+
number = {#{@issue}},
|
|
130
|
+
pages = {#{pages}},
|
|
131
|
+
}
|
|
132
|
+
END
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def general
|
|
136
|
+
authors = @authors.join(', ')
|
|
137
|
+
"#{authors} (#{@year}). \"#{@title}\" #{@journal} #{@volume}:#{@pages}."
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def rd(str = nil)
|
|
141
|
+
@abstract ||= str
|
|
142
|
+
lines = []
|
|
143
|
+
lines << "== " + @title
|
|
144
|
+
lines << "* " + authors_join(' and ')
|
|
145
|
+
lines << "* #{@journal} #{@year} #{@volume}:#{@pages} [PMID:#{@pubmed}]"
|
|
146
|
+
lines << @abstract
|
|
147
|
+
return lines.join("\n\n")
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def nature(short = false)
|
|
151
|
+
if short
|
|
152
|
+
if @authors.size > 4
|
|
153
|
+
authors = "#{@authors[0]} et al."
|
|
154
|
+
elsif @authors.size == 1
|
|
155
|
+
authors = "#{@authors[0]}"
|
|
156
|
+
else
|
|
157
|
+
authors = authors_join(' & ')
|
|
158
|
+
end
|
|
159
|
+
"#{authors} #{@journal} #{@volume}, #{@pages} (#{@year})."
|
|
160
|
+
else
|
|
161
|
+
authors = authors_join(' & ')
|
|
162
|
+
"#{authors} #{@title} #{@journal} #{@volume}, #{@pages} (#{@year})."
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def science
|
|
167
|
+
if @authors.size > 4
|
|
168
|
+
authors = rev_name(@authors[0]) + " et al."
|
|
169
|
+
else
|
|
170
|
+
authors = @authors.collect {|name| rev_name(name)}.join(', ')
|
|
171
|
+
end
|
|
172
|
+
page_from, = @pages.split('-')
|
|
173
|
+
"#{authors}, #{@journal} #{@volume} #{page_from} (#{@year})."
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def genome_biol
|
|
177
|
+
authors = @authors.collect {|name| strip_dots(name)}.join(', ')
|
|
178
|
+
journal = strip_dots(@journal)
|
|
179
|
+
"#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
|
|
180
|
+
end
|
|
181
|
+
alias current genome_biol
|
|
182
|
+
|
|
183
|
+
def genome_res
|
|
184
|
+
authors = authors_join(' and ')
|
|
185
|
+
"#{authors} #{@year}.\n #{@title} #{@journal} #{@volume}: #{@pages}."
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def nar
|
|
189
|
+
authors = authors_join(' and ')
|
|
190
|
+
"#{authors} (#{@year}) #{@title} #{@journal}, #{@volume}, #{@pages}."
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def cell
|
|
194
|
+
authors = authors_join(' and ')
|
|
195
|
+
"#{authors} (#{@year}). #{@title} #{@journal} #{@volume}, #{pages}."
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def trends
|
|
199
|
+
if @authors.size > 2
|
|
200
|
+
authors = "#{@authors[0]} et al."
|
|
201
|
+
elsif @authors.size == 1
|
|
202
|
+
authors = "#{@authors[0]}"
|
|
203
|
+
else
|
|
204
|
+
authors = authors_join(' and ')
|
|
205
|
+
end
|
|
206
|
+
"#{authors} (#{@year}) #{@title} #{@journal} #{@volume}, #{@pages}"
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
private
|
|
211
|
+
|
|
212
|
+
def strip_dots(data)
|
|
213
|
+
data.tr(',.', '') if data
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def authors_join(amp, sep = ', ')
|
|
217
|
+
authors = @authors.clone
|
|
218
|
+
if authors.length > 1
|
|
219
|
+
last = authors.pop
|
|
220
|
+
authors = authors.join(sep) + "#{amp}" + last
|
|
221
|
+
elsif authors.length == 1
|
|
222
|
+
authors = authors.pop
|
|
223
|
+
else
|
|
224
|
+
authors = ""
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def rev_name(name)
|
|
229
|
+
if name =~ /,/
|
|
230
|
+
name, initial = name.split(/,\s+/)
|
|
231
|
+
name = "#{initial} #{name}"
|
|
232
|
+
end
|
|
233
|
+
return name
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class References
|
|
240
|
+
|
|
241
|
+
def initialize(ary = [])
|
|
242
|
+
@references = ary
|
|
243
|
+
end
|
|
244
|
+
attr_accessor :references
|
|
245
|
+
|
|
246
|
+
def append(a)
|
|
247
|
+
@references.push(a) if a.is_a? Reference
|
|
248
|
+
return self
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def each
|
|
252
|
+
@references.each do |x|
|
|
253
|
+
yield x
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
=begin
|
|
264
|
+
|
|
265
|
+
= Bio::Reference
|
|
266
|
+
|
|
267
|
+
--- Bio::Reference.new(hash)
|
|
268
|
+
|
|
269
|
+
--- Bio::Reference#authors -> Array
|
|
270
|
+
--- Bio::Reference#title -> String
|
|
271
|
+
--- Bio::Reference#journal -> String
|
|
272
|
+
--- Bio::Reference#volume -> Fixnum
|
|
273
|
+
--- Bio::Reference#issue -> Fixnum
|
|
274
|
+
--- Bio::Reference#pages -> String
|
|
275
|
+
--- Bio::Reference#year -> Fixnum
|
|
276
|
+
--- Bio::Reference#pubmed -> Fixnum
|
|
277
|
+
--- Bio::Reference#medline -> Fixnum
|
|
278
|
+
--- Bio::Reference#abstract -> String
|
|
279
|
+
--- Bio::Reference#url -> String
|
|
280
|
+
--- Bio::Reference#mesh -> Array
|
|
281
|
+
--- Bio::Reference#affiliations -> Array
|
|
282
|
+
|
|
283
|
+
--- Bio::Reference#format(style = nil, option = nil) -> String
|
|
284
|
+
|
|
285
|
+
--- Bio::Reference#endnote
|
|
286
|
+
--- Bio::Reference#bibitem(item = nil) -> String
|
|
287
|
+
--- Bio::Reference#bibtex(section = nil) -> String
|
|
288
|
+
--- Bio::Reference#rd(str = nil) -> String
|
|
289
|
+
--- Bio::Reference#nature(short = false) -> String
|
|
290
|
+
--- Bio::Reference#science -> String
|
|
291
|
+
--- Bio::Reference#genome_biol -> String
|
|
292
|
+
--- Bio::Reference#genome_res -> String
|
|
293
|
+
--- Bio::Reference#nar -> String
|
|
294
|
+
--- Bio::Reference#cell -> String
|
|
295
|
+
--- Bio::Reference#trends -> String
|
|
296
|
+
--- Bio::Reference#general -> String
|
|
297
|
+
|
|
298
|
+
= Bio::References
|
|
299
|
+
|
|
300
|
+
--- Bio::References.new(ary = [])
|
|
301
|
+
|
|
302
|
+
--- Bio::References#references -> Array
|
|
303
|
+
--- Bio::References#append(a) -> Bio::References
|
|
304
|
+
--- Bio::References#each -> Array
|
|
305
|
+
|
|
306
|
+
=end
|
|
307
|
+
|
|
308
|
+
|
data/lib/bio/sequence.rb
ADDED
|
@@ -0,0 +1,593 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/sequence.rb - biological sequence class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2000-2005
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>,
|
|
6
|
+
# Yoshinori K. Okuji <okuji@embug.org>,
|
|
7
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
8
|
+
# License:: LGPL
|
|
9
|
+
#
|
|
10
|
+
# $Id: sequence.rb,v 0.49 2005/11/27 15:46:01 k Exp $
|
|
11
|
+
#
|
|
12
|
+
#--
|
|
13
|
+
# *TODO* remove this functionality?
|
|
14
|
+
# You can use Bio::Seq instead of Bio::Sequence for short.
|
|
15
|
+
#++
|
|
16
|
+
#
|
|
17
|
+
#--
|
|
18
|
+
#
|
|
19
|
+
# This library is free software; you can redistribute it and/or
|
|
20
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
21
|
+
# License as published by the Free Software Foundation; either
|
|
22
|
+
# version 2 of the License, or (at your option) any later version.
|
|
23
|
+
#
|
|
24
|
+
# This library is distributed in the hope that it will be useful,
|
|
25
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
26
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
27
|
+
# Lesser General Public License for more details.
|
|
28
|
+
#
|
|
29
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
30
|
+
# License along with this library; if not, write to the Free Software
|
|
31
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
32
|
+
#
|
|
33
|
+
#++
|
|
34
|
+
#
|
|
35
|
+
|
|
36
|
+
require 'bio/data/na'
|
|
37
|
+
require 'bio/data/aa'
|
|
38
|
+
require 'bio/data/codontable'
|
|
39
|
+
require 'bio/location'
|
|
40
|
+
|
|
41
|
+
module Bio
|
|
42
|
+
|
|
43
|
+
# Nucleic/Amino Acid sequence
|
|
44
|
+
|
|
45
|
+
class Sequence < String
|
|
46
|
+
|
|
47
|
+
def self.auto(str)
|
|
48
|
+
moltype = self.guess(str)
|
|
49
|
+
if moltype == NA
|
|
50
|
+
NA.new(str)
|
|
51
|
+
else
|
|
52
|
+
AA.new(str)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def guess(threshold = 0.9)
|
|
57
|
+
cmp = self.composition
|
|
58
|
+
|
|
59
|
+
bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
|
|
60
|
+
cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
|
|
61
|
+
|
|
62
|
+
total = self.length - cmp['N'] - cmp['n']
|
|
63
|
+
|
|
64
|
+
if bases.to_f / total > threshold
|
|
65
|
+
return NA
|
|
66
|
+
else
|
|
67
|
+
return AA
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def self.guess(str, *args)
|
|
72
|
+
self.new(str).guess(*args)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def to_s
|
|
76
|
+
String.new(self)
|
|
77
|
+
end
|
|
78
|
+
alias to_str to_s
|
|
79
|
+
|
|
80
|
+
# Force self to re-initialize for clean up (remove white spaces,
|
|
81
|
+
# case unification).
|
|
82
|
+
def seq
|
|
83
|
+
self.class.new(self)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Similar to the 'seq' method, but changes the self object destructively.
|
|
87
|
+
def normalize!
|
|
88
|
+
initialize(self)
|
|
89
|
+
self
|
|
90
|
+
end
|
|
91
|
+
alias seq! normalize!
|
|
92
|
+
|
|
93
|
+
def <<(*arg)
|
|
94
|
+
super(self.class.new(*arg))
|
|
95
|
+
end
|
|
96
|
+
alias concat <<
|
|
97
|
+
|
|
98
|
+
def +(*arg)
|
|
99
|
+
self.class.new(super(*arg))
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Returns the subsequence of the self string.
|
|
103
|
+
def subseq(s = 1, e = self.length)
|
|
104
|
+
return nil if s < 1 or e < 1
|
|
105
|
+
s -= 1
|
|
106
|
+
e -= 1
|
|
107
|
+
self[s..e]
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Output the FASTA format string of the sequence. The 1st argument is
|
|
111
|
+
# used as the comment string. If the 2nd option is given, the output
|
|
112
|
+
# sequence will be folded.
|
|
113
|
+
def to_fasta(header = '', width = nil)
|
|
114
|
+
">#{header}\n" +
|
|
115
|
+
if width
|
|
116
|
+
self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
|
|
117
|
+
else
|
|
118
|
+
self.to_s + "\n"
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# This method iterates on sub string with specified length 'window_size'.
|
|
123
|
+
# By specifing 'step_size', codon sized shifting or spliting genome
|
|
124
|
+
# sequence with ovelapping each end can easily be yielded.
|
|
125
|
+
#
|
|
126
|
+
# The remainder sequence at the terminal end will be returned.
|
|
127
|
+
#
|
|
128
|
+
# Example:
|
|
129
|
+
# # prints average GC% on each 100bp
|
|
130
|
+
# seq.window_search(100) do |subseq|
|
|
131
|
+
# puts subseq.gc
|
|
132
|
+
# end
|
|
133
|
+
# # prints every translated peptide (length 5aa) in the same frame
|
|
134
|
+
# seq.window_search(15, 3) do |subseq|
|
|
135
|
+
# puts subseq.translate
|
|
136
|
+
# end
|
|
137
|
+
# # split genome sequence by 10000bp with 1000bp overlap in fasta format
|
|
138
|
+
# i = 1
|
|
139
|
+
# remainder = seq.window_search(10000, 9000) do |subseq|
|
|
140
|
+
# puts subseq.to_fasta("segment #{i}", 60)
|
|
141
|
+
# i += 1
|
|
142
|
+
# end
|
|
143
|
+
# puts remainder.to_fasta("segment #{i}", 60)
|
|
144
|
+
#
|
|
145
|
+
def window_search(window_size, step_size = 1)
|
|
146
|
+
i = 0
|
|
147
|
+
0.step(self.length - window_size, step_size) do |i|
|
|
148
|
+
yield self[i, window_size]
|
|
149
|
+
end
|
|
150
|
+
return self[i + window_size .. -1]
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# This method receive a hash of residues/bases to the particular values,
|
|
154
|
+
# and sum up the value along with the self sequence. Especially useful
|
|
155
|
+
# to use with the window_search method and amino acid indices etc.
|
|
156
|
+
def total(hash)
|
|
157
|
+
hash.default = 0.0 unless hash.default
|
|
158
|
+
sum = 0.0
|
|
159
|
+
self.each_byte do |x|
|
|
160
|
+
begin
|
|
161
|
+
sum += hash[x.chr]
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
return sum
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Returns a hash of the occurrence counts for each residue or base.
|
|
168
|
+
def composition
|
|
169
|
+
count = Hash.new(0)
|
|
170
|
+
self.scan(/./) do |x|
|
|
171
|
+
count[x] += 1
|
|
172
|
+
end
|
|
173
|
+
return count
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Returns a randomized sequence keeping its composition by default.
|
|
177
|
+
# The argument is required when generating a random sequence from the empty
|
|
178
|
+
# sequence (used by the class methods NA.randomize, AA.randomize).
|
|
179
|
+
# If the block is given, yields for each random residue/base.
|
|
180
|
+
def randomize(hash = nil)
|
|
181
|
+
length = self.length
|
|
182
|
+
if hash
|
|
183
|
+
count = hash.clone
|
|
184
|
+
count.each_value {|x| length += x}
|
|
185
|
+
else
|
|
186
|
+
count = self.composition
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
seq = ''
|
|
190
|
+
tmp = {}
|
|
191
|
+
length.times do
|
|
192
|
+
count.each do |k, v|
|
|
193
|
+
tmp[k] = v * rand
|
|
194
|
+
end
|
|
195
|
+
max = tmp.max {|a, b| a[1] <=> b[1]}
|
|
196
|
+
count[max.first] -= 1
|
|
197
|
+
|
|
198
|
+
if block_given?
|
|
199
|
+
yield max.first
|
|
200
|
+
else
|
|
201
|
+
seq += max.first
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
return self.class.new(seq)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Generate a new random sequence with the given frequency of bases
|
|
208
|
+
# or residues. The sequence length is determined by the sum of each
|
|
209
|
+
# base/residue occurences.
|
|
210
|
+
def self.randomize(*arg, &block)
|
|
211
|
+
self.new('').randomize(*arg, &block)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Receive a GenBank style position string and convert it to the Locations
|
|
215
|
+
# objects to splice the sequence itself. See also: bio/location.rb
|
|
216
|
+
#
|
|
217
|
+
# This method depends on Locations class, see bio/location.rb
|
|
218
|
+
def splicing(position)
|
|
219
|
+
unless position.is_a?(Locations) then
|
|
220
|
+
position = Locations.new(position)
|
|
221
|
+
end
|
|
222
|
+
s = ''
|
|
223
|
+
position.each do |location|
|
|
224
|
+
if location.sequence
|
|
225
|
+
s << location.sequence
|
|
226
|
+
else
|
|
227
|
+
exon = self.subseq(location.from, location.to)
|
|
228
|
+
begin
|
|
229
|
+
exon.complement! if location.strand < 0
|
|
230
|
+
rescue NameError
|
|
231
|
+
end
|
|
232
|
+
s << exon
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
return self.class.new(s)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# Nucleic Acid sequence
|
|
240
|
+
|
|
241
|
+
class NA < Sequence
|
|
242
|
+
|
|
243
|
+
# Generate a nucleic acid sequence object from a string.
|
|
244
|
+
def initialize(str)
|
|
245
|
+
super
|
|
246
|
+
self.downcase!
|
|
247
|
+
self.tr!(" \t\n\r",'')
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# This method depends on Locations class, see bio/location.rb
|
|
251
|
+
def splicing(position)
|
|
252
|
+
mRNA = super
|
|
253
|
+
if mRNA.rna?
|
|
254
|
+
mRNA.tr!('t', 'u')
|
|
255
|
+
else
|
|
256
|
+
mRNA.tr!('u', 't')
|
|
257
|
+
end
|
|
258
|
+
mRNA
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Returns complement sequence without reversing ("atgc" -> "tacg")
|
|
262
|
+
def forward_complement
|
|
263
|
+
s = self.class.new(self)
|
|
264
|
+
s.forward_complement!
|
|
265
|
+
s
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Convert to complement sequence without reversing ("atgc" -> "tacg")
|
|
269
|
+
def forward_complement!
|
|
270
|
+
if self.rna?
|
|
271
|
+
self.tr!('augcrymkdhvbswn', 'uacgyrkmhdbvswn')
|
|
272
|
+
else
|
|
273
|
+
self.tr!('atgcrymkdhvbswn', 'tacgyrkmhdbvswn')
|
|
274
|
+
end
|
|
275
|
+
self
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Returns reverse complement sequence ("atgc" -> "gcat")
|
|
279
|
+
def reverse_complement
|
|
280
|
+
s = self.class.new(self)
|
|
281
|
+
s.reverse_complement!
|
|
282
|
+
s
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Convert to reverse complement sequence ("atgc" -> "gcat")
|
|
286
|
+
def reverse_complement!
|
|
287
|
+
self.reverse!
|
|
288
|
+
self.forward_complement!
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Aliases for short
|
|
292
|
+
alias complement reverse_complement
|
|
293
|
+
alias complement! reverse_complement!
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# Translate into the amino acid sequence from the given frame and the
|
|
297
|
+
# selected codon table. The table also can be a Bio::CodonTable object.
|
|
298
|
+
# The 'unknown' character is used for invalid/unknown codon (can be
|
|
299
|
+
# used for 'nnn' and/or gap translation in practice).
|
|
300
|
+
#
|
|
301
|
+
# Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
|
|
302
|
+
# (4, 5 or 6 is also accepted) for the reverse strand.
|
|
303
|
+
def translate(frame = 1, table = 1, unknown = 'X')
|
|
304
|
+
if table.is_a?(Bio::CodonTable)
|
|
305
|
+
ct = table
|
|
306
|
+
else
|
|
307
|
+
ct = Bio::CodonTable[table]
|
|
308
|
+
end
|
|
309
|
+
naseq = self.dna
|
|
310
|
+
case frame
|
|
311
|
+
when 1, 2, 3
|
|
312
|
+
from = frame - 1
|
|
313
|
+
when 4, 5, 6
|
|
314
|
+
from = frame - 4
|
|
315
|
+
naseq.complement!
|
|
316
|
+
when -1, -2, -3
|
|
317
|
+
from = -1 - frame
|
|
318
|
+
naseq.complement!
|
|
319
|
+
else
|
|
320
|
+
from = 0
|
|
321
|
+
end
|
|
322
|
+
nalen = naseq.length - from
|
|
323
|
+
nalen -= nalen % 3
|
|
324
|
+
aaseq = naseq[from, nalen].gsub(/.{3}/) {|codon| ct[codon] or unknown}
|
|
325
|
+
return Bio::Sequence::AA.new(aaseq)
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# Returns counts of the each codon in the sequence by Hash.
|
|
329
|
+
def codon_usage
|
|
330
|
+
hash = Hash.new(0)
|
|
331
|
+
self.window_search(3, 3) do |codon|
|
|
332
|
+
hash[codon] += 1
|
|
333
|
+
end
|
|
334
|
+
return hash
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# Calculate the ratio of GC / ATGC bases in percent.
|
|
338
|
+
def gc_percent
|
|
339
|
+
count = self.composition
|
|
340
|
+
at = count['a'] + count['t'] + count['u']
|
|
341
|
+
gc = count['g'] + count['c']
|
|
342
|
+
gc = 100 * gc / (at + gc)
|
|
343
|
+
return gc
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
# Show abnormal bases other than 'atgcu'.
|
|
347
|
+
def illegal_bases
|
|
348
|
+
self.scan(/[^atgcu]/).sort.uniq
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Estimate the weight of this biological string molecule.
|
|
352
|
+
# NucleicAcid is defined in bio/data/na.rb
|
|
353
|
+
def molecular_weight
|
|
354
|
+
if self.rna?
|
|
355
|
+
NucleicAcid.weight(self, true)
|
|
356
|
+
else
|
|
357
|
+
NucleicAcid.weight(self)
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Convert the universal code string into the regular expression.
|
|
362
|
+
def to_re
|
|
363
|
+
if self.rna?
|
|
364
|
+
NucleicAcid.to_re(self.dna, true)
|
|
365
|
+
else
|
|
366
|
+
NucleicAcid.to_re(self)
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
# Convert the self string into the list of the names of the each base.
|
|
371
|
+
def names
|
|
372
|
+
array = []
|
|
373
|
+
self.each_byte do |x|
|
|
374
|
+
array.push(NucleicAcid.names[x.chr.upcase])
|
|
375
|
+
end
|
|
376
|
+
return array
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
# Output a DNA string by substituting 'u' to 't'.
|
|
380
|
+
def dna
|
|
381
|
+
self.tr('u', 't')
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
def dna!
|
|
385
|
+
self.tr!('u', 't')
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Output a RNA string by substituting 't' to 'u'.
|
|
389
|
+
def rna
|
|
390
|
+
self.tr('t', 'u')
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
def rna!
|
|
394
|
+
self.tr!('t', 'u')
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def rna?
|
|
398
|
+
self.index('u')
|
|
399
|
+
end
|
|
400
|
+
protected :rna?
|
|
401
|
+
|
|
402
|
+
def pikachu
|
|
403
|
+
self.dna.tr("atgc", "pika") # joke, of course :-)
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
# Amino Acid sequence
|
|
410
|
+
|
|
411
|
+
class AA < Sequence
|
|
412
|
+
|
|
413
|
+
# Generate a amino acid sequence object from a string.
|
|
414
|
+
def initialize(str)
|
|
415
|
+
super
|
|
416
|
+
self.upcase!
|
|
417
|
+
self.tr!(" \t\n\r",'')
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
# Estimate the weight of this protein.
|
|
421
|
+
# AminoAcid is defined in bio/data/aa.rb
|
|
422
|
+
def molecular_weight
|
|
423
|
+
AminoAcid.weight(self)
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def to_re
|
|
427
|
+
AminoAcid.to_re(self)
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Generate the list of the names of the each residue along with the
|
|
431
|
+
# sequence (3 letters code).
|
|
432
|
+
def codes
|
|
433
|
+
array = []
|
|
434
|
+
self.each_byte do |x|
|
|
435
|
+
array.push(AminoAcid.names[x.chr])
|
|
436
|
+
end
|
|
437
|
+
return array
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Similar to codes but returns long names.
|
|
441
|
+
def names
|
|
442
|
+
self.codes.map do |x|
|
|
443
|
+
AminoAcid.names[x]
|
|
444
|
+
end
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
end # Sequence
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
class Seq < Sequence
|
|
453
|
+
attr_accessor :entry_id, :definition, :features, :references, :comments,
|
|
454
|
+
:date, :keywords, :dblinks, :taxonomy, :moltype
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
end # Bio
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
if __FILE__ == $0
|
|
462
|
+
|
|
463
|
+
puts "== Test Bio::Sequence::NA.new"
|
|
464
|
+
p Bio::Sequence::NA.new('')
|
|
465
|
+
p na = Bio::Sequence::NA.new('atgcatgcATGCATGCAAAA')
|
|
466
|
+
p rna = Bio::Sequence::NA.new('augcaugcaugcaugcaaaa')
|
|
467
|
+
|
|
468
|
+
puts "\n== Test Bio::Sequence::AA.new"
|
|
469
|
+
p Bio::Sequence::AA.new('')
|
|
470
|
+
p aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
|
|
471
|
+
|
|
472
|
+
puts "\n== Test Bio::Sequence#to_s"
|
|
473
|
+
p na.to_s
|
|
474
|
+
p aa.to_s
|
|
475
|
+
|
|
476
|
+
puts "\n== Test Bio::Sequence#subseq(2,6)"
|
|
477
|
+
p na
|
|
478
|
+
p na.subseq(2,6)
|
|
479
|
+
|
|
480
|
+
puts "\n== Test Bio::Sequence#[2,6]"
|
|
481
|
+
p na
|
|
482
|
+
p na[2,6]
|
|
483
|
+
|
|
484
|
+
puts "\n== Test Bio::Sequence#to_fasta('hoge', 8)"
|
|
485
|
+
puts na.to_fasta('hoge', 8)
|
|
486
|
+
|
|
487
|
+
puts "\n== Test Bio::Sequence#window_search(15)"
|
|
488
|
+
p na
|
|
489
|
+
na.window_search(15) {|x| p x}
|
|
490
|
+
|
|
491
|
+
puts "\n== Test Bio::Sequence#total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})"
|
|
492
|
+
p na.total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})
|
|
493
|
+
|
|
494
|
+
puts "\n== Test Bio::Sequence#composition"
|
|
495
|
+
p na
|
|
496
|
+
p na.composition
|
|
497
|
+
p rna
|
|
498
|
+
p rna.composition
|
|
499
|
+
|
|
500
|
+
puts "\n== Test Bio::Sequence::NA#splicing('complement(join(1..5,16..20))')"
|
|
501
|
+
p na
|
|
502
|
+
p na.splicing("complement(join(1..5,16..20))")
|
|
503
|
+
p rna
|
|
504
|
+
p rna.splicing("complement(join(1..5,16..20))")
|
|
505
|
+
|
|
506
|
+
puts "\n== Test Bio::Sequence::NA#complement"
|
|
507
|
+
p na.complement
|
|
508
|
+
p rna.complement
|
|
509
|
+
p Bio::Sequence::NA.new('tacgyrkmhdbvswn').complement
|
|
510
|
+
p Bio::Sequence::NA.new('uacgyrkmhdbvswn').complement
|
|
511
|
+
|
|
512
|
+
puts "\n== Test Bio::Sequence::NA#translate"
|
|
513
|
+
p na
|
|
514
|
+
p na.translate
|
|
515
|
+
p rna
|
|
516
|
+
p rna.translate
|
|
517
|
+
|
|
518
|
+
puts "\n== Test Bio::Sequence::NA#gc_percent"
|
|
519
|
+
p na.gc
|
|
520
|
+
p rna.gc
|
|
521
|
+
|
|
522
|
+
puts "\n== Test Bio::Sequence::NA#illegal_bases"
|
|
523
|
+
p na.illegal_bases
|
|
524
|
+
p Bio::Sequence::NA.new('tacgyrkmhdbvswn').illegal_bases
|
|
525
|
+
p Bio::Sequence::NA.new('abcdefghijklmnopqrstuvwxyz-!%#$@').illegal_bases
|
|
526
|
+
|
|
527
|
+
puts "\n== Test Bio::Sequence::NA#molecular_weight"
|
|
528
|
+
p na
|
|
529
|
+
p na.molecular_weight
|
|
530
|
+
p rna
|
|
531
|
+
p rna.molecular_weight
|
|
532
|
+
|
|
533
|
+
puts "\n== Test Bio::Sequence::NA#to_re"
|
|
534
|
+
p Bio::Sequence::NA.new('atgcrymkdhvbswn')
|
|
535
|
+
p Bio::Sequence::NA.new('atgcrymkdhvbswn').to_re
|
|
536
|
+
p Bio::Sequence::NA.new('augcrymkdhvbswn')
|
|
537
|
+
p Bio::Sequence::NA.new('augcrymkdhvbswn').to_re
|
|
538
|
+
|
|
539
|
+
puts "\n== Test Bio::Sequence::NA#names"
|
|
540
|
+
p na.names
|
|
541
|
+
|
|
542
|
+
puts "\n== Test Bio::Sequence::NA#pikachu"
|
|
543
|
+
p na.pikachu
|
|
544
|
+
|
|
545
|
+
puts "\n== Test Bio::Sequence::NA#randomize"
|
|
546
|
+
print "Orig : "; p na
|
|
547
|
+
print "Rand : "; p na.randomize
|
|
548
|
+
print "Rand : "; p na.randomize
|
|
549
|
+
print "Rand : "; p na.randomize.randomize
|
|
550
|
+
print "Block : "; na.randomize do |x| print x end; puts
|
|
551
|
+
|
|
552
|
+
print "Orig : "; p rna
|
|
553
|
+
print "Rand : "; p rna.randomize
|
|
554
|
+
print "Rand : "; p rna.randomize
|
|
555
|
+
print "Rand : "; p rna.randomize.randomize
|
|
556
|
+
print "Block : "; rna.randomize do |x| print x end; puts
|
|
557
|
+
|
|
558
|
+
puts "\n== Test Bio::Sequence::NA.randomize(counts)"
|
|
559
|
+
print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'t'=>40}
|
|
560
|
+
print "Rand : "; p Bio::Sequence::NA.randomize(counts)
|
|
561
|
+
print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40}
|
|
562
|
+
print "Rand : "; p Bio::Sequence::NA.randomize(counts)
|
|
563
|
+
print "Block : "; Bio::Sequence::NA.randomize(counts) {|x| print x}; puts
|
|
564
|
+
|
|
565
|
+
puts "\n== Test Bio::Sequence::AA#codes"
|
|
566
|
+
p aa
|
|
567
|
+
p aa.codes
|
|
568
|
+
|
|
569
|
+
puts "\n== Test Bio::Sequence::AA#names"
|
|
570
|
+
p aa
|
|
571
|
+
p aa.names
|
|
572
|
+
|
|
573
|
+
puts "\n== Test Bio::Sequence::AA#molecular_weight"
|
|
574
|
+
p aa.subseq(1,20)
|
|
575
|
+
p aa.subseq(1,20).molecular_weight
|
|
576
|
+
|
|
577
|
+
puts "\n== Test Bio::Sequence::AA#randomize"
|
|
578
|
+
aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA'
|
|
579
|
+
s = Bio::Sequence::AA.new(aaseq)
|
|
580
|
+
print "Orig : "; p s
|
|
581
|
+
print "Rand : "; p s.randomize
|
|
582
|
+
print "Rand : "; p s.randomize
|
|
583
|
+
print "Rand : "; p s.randomize.randomize
|
|
584
|
+
print "Block : "; s.randomize {|x| print x}; puts
|
|
585
|
+
|
|
586
|
+
puts "\n== Test Bio::Sequence::AA.randomize(counts)"
|
|
587
|
+
print "Count : "; p counts = s.composition
|
|
588
|
+
print "Rand : "; puts Bio::Sequence::AA.randomize(counts)
|
|
589
|
+
print "Block : "; Bio::Sequence::AA.randomize(counts) {|x| print x}; puts
|
|
590
|
+
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
|