bio 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
data/lib/bio/db/fasta.rb
ADDED
@@ -0,0 +1,869 @@
|
|
1
|
+
#
|
2
|
+
# bio/db/fasta.rb - FASTA format class
|
3
|
+
#
|
4
|
+
# Copyright (C) 2001 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
5
|
+
# Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 2 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
#
|
21
|
+
# $Id: fasta.rb,v 1.21 2005/09/26 13:00:06 k Exp $
|
22
|
+
#
|
23
|
+
|
24
|
+
require 'bio/db'
|
25
|
+
require 'bio/sequence'
|
26
|
+
|
27
|
+
module Bio
|
28
|
+
|
29
|
+
class FastaFormat < DB
|
30
|
+
|
31
|
+
DELIMITER = RS = "\n>"
|
32
|
+
|
33
|
+
def initialize(str)
|
34
|
+
@definition = str[/.*/].sub(/^>/, '').strip # 1st line
|
35
|
+
@data = str.sub(/.*/, '') # rests
|
36
|
+
@data.sub!(/^>.*/m, '') # remove trailing entries for sure
|
37
|
+
@entry_overrun = $&
|
38
|
+
end
|
39
|
+
attr_accessor :definition, :data
|
40
|
+
attr_reader :entry_overrun
|
41
|
+
|
42
|
+
def entry
|
43
|
+
@entry = ">#{@definition}\n#{@data.strip}\n"
|
44
|
+
end
|
45
|
+
alias to_s entry
|
46
|
+
|
47
|
+
def query(factory)
|
48
|
+
factory.query(@entry)
|
49
|
+
end
|
50
|
+
alias fasta query
|
51
|
+
alias blast query
|
52
|
+
|
53
|
+
def seq
|
54
|
+
unless defined?(@seq)
|
55
|
+
unless /\A\s*^\#/ =~ @data then
|
56
|
+
@seq = Sequence.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
|
57
|
+
else
|
58
|
+
a = @data.split(/(^\#.*$)/)
|
59
|
+
i = 0
|
60
|
+
cmnt = {}
|
61
|
+
s = []
|
62
|
+
a.each do |x|
|
63
|
+
if /^# ?(.*)$/ =~ x then
|
64
|
+
cmnt[i] ? cmnt[i] << "\n" << $1 : cmnt[i] = $1
|
65
|
+
else
|
66
|
+
x.tr!(" \t\r\n0-9", '') # lazy clean up
|
67
|
+
i += x.length
|
68
|
+
s << x
|
69
|
+
end
|
70
|
+
end
|
71
|
+
@comment = cmnt
|
72
|
+
@seq = Bio::Sequence.new(s.join(''))
|
73
|
+
end
|
74
|
+
end
|
75
|
+
@seq
|
76
|
+
end
|
77
|
+
|
78
|
+
def comment
|
79
|
+
seq
|
80
|
+
@comment
|
81
|
+
end
|
82
|
+
|
83
|
+
def length
|
84
|
+
seq.length
|
85
|
+
end
|
86
|
+
|
87
|
+
def naseq
|
88
|
+
Sequence::NA.new(seq)
|
89
|
+
end
|
90
|
+
|
91
|
+
def nalen
|
92
|
+
self.naseq.length
|
93
|
+
end
|
94
|
+
|
95
|
+
def aaseq
|
96
|
+
Sequence::AA.new(seq)
|
97
|
+
end
|
98
|
+
|
99
|
+
def aalen
|
100
|
+
self.aaseq.length
|
101
|
+
end
|
102
|
+
|
103
|
+
def identifiers
|
104
|
+
unless defined?(@ids) then
|
105
|
+
@ids = FastaDefline.new(@definition)
|
106
|
+
end
|
107
|
+
@ids
|
108
|
+
end
|
109
|
+
|
110
|
+
def entry_id
|
111
|
+
identifiers.entry_id
|
112
|
+
end
|
113
|
+
|
114
|
+
def gi
|
115
|
+
identifiers.gi
|
116
|
+
end
|
117
|
+
|
118
|
+
def accession
|
119
|
+
identifiers.accession
|
120
|
+
end
|
121
|
+
|
122
|
+
def accessions
|
123
|
+
identifiers.accessions
|
124
|
+
end
|
125
|
+
|
126
|
+
def acc_version
|
127
|
+
identifiers.acc_version
|
128
|
+
end
|
129
|
+
|
130
|
+
def locus
|
131
|
+
identifiers.locus
|
132
|
+
end
|
133
|
+
|
134
|
+
end #class FastaFormat
|
135
|
+
|
136
|
+
class FastaNumericFormat < FastaFormat
|
137
|
+
|
138
|
+
def data
|
139
|
+
unless @list
|
140
|
+
@list = @data.strip.split(/\s+/).map {|x| x.to_i}
|
141
|
+
end
|
142
|
+
@list
|
143
|
+
end
|
144
|
+
|
145
|
+
def length
|
146
|
+
data.length
|
147
|
+
end
|
148
|
+
|
149
|
+
def each
|
150
|
+
data.each do |x|
|
151
|
+
yield x
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def [](n)
|
156
|
+
data[n]
|
157
|
+
end
|
158
|
+
|
159
|
+
undef query, blast, fasta, seq, naseq, nalen, aaseq, aalen
|
160
|
+
|
161
|
+
end #class FastaNumericFormat
|
162
|
+
|
163
|
+
class FastaDefline
|
164
|
+
|
165
|
+
# specs are described in:
|
166
|
+
# ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
|
167
|
+
# http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
168
|
+
|
169
|
+
NSIDs = {
|
170
|
+
# NCBI and WU-BLAST
|
171
|
+
'gi' => [ 'gi' ], # NCBI GI
|
172
|
+
'gb' => [ 'acc_version', 'locus' ], # GenBank
|
173
|
+
'emb' => [ 'acc_version', 'locus' ], # EMBL
|
174
|
+
'dbj' => [ 'acc_version', 'locus' ], # DDBJ
|
175
|
+
'sp' => [ 'accession', 'entry_id' ], # SWISS-PROT
|
176
|
+
'pdb' => [ 'entry_id', 'chain' ], # PDB
|
177
|
+
'bbs' => [ 'number' ], # GenInfo Backbone Id
|
178
|
+
'gnl' => [ 'database' , 'entry_id' ], # General database identifier
|
179
|
+
'ref' => [ 'acc_version' , 'locus' ], # NCBI Reference Sequence
|
180
|
+
'lcl' => [ 'entry_id' ], # Local Sequence identifier
|
181
|
+
|
182
|
+
# WU-BLAST and NCBI
|
183
|
+
'pir' => [ 'accession', 'entry_id' ], # PIR
|
184
|
+
'prf' => [ 'accession', 'entry_id' ], # Protein Research Foundation
|
185
|
+
'pat' => [ 'country', 'number', 'serial' ], # Patents
|
186
|
+
|
187
|
+
# WU-BLAST only
|
188
|
+
'bbm' => [ 'number' ], # NCBI GenInfo Backbone database identifier
|
189
|
+
'gim' => [ 'number' ], # NCBI GenInfo Import identifier
|
190
|
+
'gp' => [ 'acc_version', 'locus' ], # GenPept
|
191
|
+
'oth' => [ 'accession', 'name', 'release' ], # Other (user-definable) identifier
|
192
|
+
'tpd' => [ 'accession', 'name' ], # Third party annotation, DDBJ
|
193
|
+
'tpe' => [ 'accession', 'name' ], # Third party annotation, EMBL
|
194
|
+
'tpg' => [ 'accession', 'name' ], # Third party annotation, GenBank
|
195
|
+
|
196
|
+
# Original
|
197
|
+
'ri' => [ 'entry_id', 'rearray_id', 'len' ], # RIKEN FANTOM DB
|
198
|
+
}
|
199
|
+
|
200
|
+
def initialize(str)
|
201
|
+
@deflines = []
|
202
|
+
@info = {}
|
203
|
+
@list_ids = []
|
204
|
+
|
205
|
+
@entry_id = nil
|
206
|
+
|
207
|
+
lines = str.split("\x01")
|
208
|
+
lines.each do |line|
|
209
|
+
add_defline(line)
|
210
|
+
end
|
211
|
+
end #def initialize
|
212
|
+
|
213
|
+
attr_reader :list_ids
|
214
|
+
attr_reader :entry_id
|
215
|
+
|
216
|
+
def add_defline(str)
|
217
|
+
case str
|
218
|
+
when /^\>?\s*((?:[^\|\s]*\|)+[^\s]+)\s*(.*)$/
|
219
|
+
# NSIDs
|
220
|
+
# examples:
|
221
|
+
# >gi|9910844|sp|Q9UWG2|RL3_METVA 50S ribosomal protein L3P
|
222
|
+
#
|
223
|
+
# note: regexp (:?) means grouping without backreferences
|
224
|
+
i = $1
|
225
|
+
d = $2
|
226
|
+
tks = i.split('|')
|
227
|
+
tks << '' if i[-1,1] == '|'
|
228
|
+
a = parse_NSIDs(tks)
|
229
|
+
i = a[0].join('|')
|
230
|
+
a.unshift('|')
|
231
|
+
d = tks.join('|') + ' ' + d unless tks.empty?
|
232
|
+
a << d
|
233
|
+
this_line = a
|
234
|
+
match_EC(d)
|
235
|
+
parse_square_brackets(d).each do |x|
|
236
|
+
if !match_EC(x, false) and x =~ /\A[A-Z]/ then
|
237
|
+
di = [ x ]
|
238
|
+
@list_ids << di
|
239
|
+
@info['organism'] = x unless @info['organism']
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
when /^\>?\s*([a-zA-Z0-9]+\:[^\s]+)\s*(.*)$/
|
244
|
+
# examples:
|
245
|
+
# >sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
|
246
|
+
# >emb:CACDC28 [X80034] C.albicans CDC28 gene
|
247
|
+
i = $1
|
248
|
+
d = $2
|
249
|
+
a = parse_ColonSepID(i)
|
250
|
+
i = a.join(':')
|
251
|
+
this_line = [ ':', a , d ]
|
252
|
+
match_EC(d)
|
253
|
+
parse_square_brackets(d).each do |x|
|
254
|
+
if !match_EC(x, false) and x =~ /:/ then
|
255
|
+
parse_ColonSepID(x)
|
256
|
+
elsif x =~ /\A\s*([A-Z][A-Z0-9_\.]+)\s*\z/ then
|
257
|
+
@list_ids << [ $1 ]
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
when /^\>?\s*(\S+)(?:\s+(.+))?$/
|
262
|
+
# examples:
|
263
|
+
# >ABC12345 this is test
|
264
|
+
i = $1
|
265
|
+
d = $2.to_s
|
266
|
+
@list_ids << [ i.chomp('.') ]
|
267
|
+
this_line = [ '', [ i ], d ]
|
268
|
+
match_EC(d)
|
269
|
+
else
|
270
|
+
i = str
|
271
|
+
d = ''
|
272
|
+
match_EC(i)
|
273
|
+
this_line = [ '', [ i ], d ]
|
274
|
+
end
|
275
|
+
|
276
|
+
@deflines << this_line
|
277
|
+
@entry_id = i unless @entry_id
|
278
|
+
end
|
279
|
+
|
280
|
+
def match_EC(str, write_flag = true)
|
281
|
+
di = nil
|
282
|
+
str.scan(/EC\:((:?[\-\d]+\.){3}(:?[\-\d]+))/i) do |x|
|
283
|
+
di = [ 'EC', $1 ]
|
284
|
+
if write_flag then
|
285
|
+
@info['ec'] = di[1] if (!@info['ec'] or @info['ec'].to_s =~ /\-/)
|
286
|
+
@list_ids << di
|
287
|
+
end
|
288
|
+
end
|
289
|
+
di
|
290
|
+
end
|
291
|
+
private :match_EC
|
292
|
+
|
293
|
+
def parse_square_brackets(str)
|
294
|
+
r = []
|
295
|
+
str.scan(/\[([^\]]*)\]/) do |x|
|
296
|
+
r << x[0]
|
297
|
+
end
|
298
|
+
r
|
299
|
+
end
|
300
|
+
private :parse_square_brackets
|
301
|
+
|
302
|
+
def parse_ColonSepID(str)
|
303
|
+
di = str.split(':', 2)
|
304
|
+
di << nil if di.size <= 1
|
305
|
+
@list_ids << di
|
306
|
+
di
|
307
|
+
end
|
308
|
+
private :parse_ColonSepID
|
309
|
+
|
310
|
+
def parse_NSIDs(ary)
|
311
|
+
# this method destroys ary
|
312
|
+
data = []
|
313
|
+
while token = ary.shift
|
314
|
+
if labels = self.class::NSIDs[token] then
|
315
|
+
di = [ token ]
|
316
|
+
idtype = token
|
317
|
+
labels.each do |x|
|
318
|
+
token = ary.shift
|
319
|
+
break unless token
|
320
|
+
if self.class::NSIDs[token] then
|
321
|
+
ary.unshift(token)
|
322
|
+
break #each
|
323
|
+
end
|
324
|
+
if token.length > 0 then
|
325
|
+
di << token
|
326
|
+
else
|
327
|
+
di << nil
|
328
|
+
end
|
329
|
+
end
|
330
|
+
data << di
|
331
|
+
else
|
332
|
+
if token.length > 0 then
|
333
|
+
# UCID (uncontrolled identifiers)
|
334
|
+
di = [ token ]
|
335
|
+
data << di
|
336
|
+
@info['ucid'] = token unless @info['ucid']
|
337
|
+
end
|
338
|
+
break #while
|
339
|
+
end
|
340
|
+
end #while
|
341
|
+
@list_ids.concat data
|
342
|
+
data
|
343
|
+
end #def parse_NSIDs
|
344
|
+
private :parse_NSIDs
|
345
|
+
|
346
|
+
def to_s
|
347
|
+
@deflines.collect { |a|
|
348
|
+
s = a[0]
|
349
|
+
(a[1..-2].collect { |x| x.join(s) }.join(s) + ' ' + a[-1]).strip
|
350
|
+
}.join("\x01")
|
351
|
+
end
|
352
|
+
|
353
|
+
def description
|
354
|
+
@deflines[0].to_a[-1]
|
355
|
+
end
|
356
|
+
|
357
|
+
def descriptions
|
358
|
+
@deflines.collect do |a|
|
359
|
+
a[-1]
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
def id_strings
|
364
|
+
r = []
|
365
|
+
@list_ids.each do |a|
|
366
|
+
if a.size >= 2 then
|
367
|
+
r.concat a[1..-1].find_all { |x| x }
|
368
|
+
else
|
369
|
+
if a[0].to_s.size > 0 and a[0] =~ /\A[A-Za-z0-9\.\-\_]+\z/
|
370
|
+
r << a[0]
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
r.concat( words(true, []).find_all do |x|
|
375
|
+
x =~ /\A[A-Z][A-Za-z0-9\_]*[0-9]+[A-Za-z0-9\_]+\z/ or
|
376
|
+
x =~ /\A[A-Z][A-Z0-9]*\_[A-Z0-9\_]+\z/
|
377
|
+
end)
|
378
|
+
r
|
379
|
+
end
|
380
|
+
|
381
|
+
KillWords = [
|
382
|
+
'an', 'the', 'this', 'that',
|
383
|
+
'is', 'are', 'were', 'was', 'be', 'can', 'may', 'might',
|
384
|
+
'as', 'at', 'by', 'for', 'in', 'of', 'on', 'to', 'with',
|
385
|
+
'from', 'and', 'or', 'not',
|
386
|
+
'dna', 'rna', 'mrna', 'cdna', 'orf',
|
387
|
+
'aa', 'nt', 'pct', 'id', 'ec', 'sp', 'subsp',
|
388
|
+
'similar', 'involved', 'identical', 'identity',
|
389
|
+
'cds', 'clone', 'library', 'contig', 'contigs',
|
390
|
+
'homolog', 'homologue', 'homologs', 'homologous',
|
391
|
+
'protein', 'proteins', 'gene', 'genes',
|
392
|
+
'product', 'products', 'sequence', 'sequences',
|
393
|
+
'strain', 'strains', 'region', 'regions',
|
394
|
+
]
|
395
|
+
KillWordsHash = {}
|
396
|
+
KillWords.each { |x| KillWordsHash[x] = true }
|
397
|
+
|
398
|
+
KillRegexpArray = [
|
399
|
+
/\A\d{1,3}\%?\z/,
|
400
|
+
/\A[A-Z][A-Za-z0-9\_]*[0-9]+[A-Za-z0-9\_]+\z/,
|
401
|
+
/\A[A-Z][A-Z0-9]*\_[A-Z0-9\_]+\z/
|
402
|
+
]
|
403
|
+
|
404
|
+
def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
|
405
|
+
kwhash = self.class::KillWordsHash)
|
406
|
+
a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\#\s\x00-\x1f\x7f]+/)
|
407
|
+
a.collect! do |x|
|
408
|
+
x.sub!(/\A[\$\*\-\+]+/, '')
|
409
|
+
x.sub!(/[\$\*\-\=]+\z/, '')
|
410
|
+
if x.size <= 1 then
|
411
|
+
nil
|
412
|
+
elsif kwhash[x.downcase] then
|
413
|
+
nil
|
414
|
+
else
|
415
|
+
if kill_regexp.find { |expr| expr =~ x } then
|
416
|
+
nil
|
417
|
+
else
|
418
|
+
x
|
419
|
+
end
|
420
|
+
end
|
421
|
+
end
|
422
|
+
a.compact!
|
423
|
+
a.collect! { |x| x.downcase } unless case_sensitive
|
424
|
+
a.sort!
|
425
|
+
a.uniq!
|
426
|
+
a
|
427
|
+
end
|
428
|
+
|
429
|
+
def get(db)
|
430
|
+
db =db.to_s
|
431
|
+
r = nil
|
432
|
+
unless r = @info[db] then
|
433
|
+
di = @list_ids.find { |x| x[0] == db.to_s }
|
434
|
+
if di and di.size <= 2 then
|
435
|
+
r = di[-1]
|
436
|
+
elsif di then
|
437
|
+
labels = self.class::NSIDs[db]
|
438
|
+
[ 'acc_version', 'entry_id',
|
439
|
+
'locus', 'accession', 'number'].each do |x|
|
440
|
+
if i = labels.index(x) then
|
441
|
+
r = di[i+1]
|
442
|
+
break if r
|
443
|
+
end
|
444
|
+
end
|
445
|
+
r = di[1..-1].find { |x| x } unless r
|
446
|
+
end
|
447
|
+
@info[db] = r if r
|
448
|
+
end
|
449
|
+
r
|
450
|
+
end
|
451
|
+
|
452
|
+
def get_by_type(tstr)
|
453
|
+
@list_ids.each do |x|
|
454
|
+
if labels = self.class::NSIDs[x[0]] then
|
455
|
+
if i = labels.index(tstr) then
|
456
|
+
return x[i+1]
|
457
|
+
end
|
458
|
+
end
|
459
|
+
end
|
460
|
+
nil
|
461
|
+
end
|
462
|
+
|
463
|
+
def get_all_by_type(*tstrarg)
|
464
|
+
d = []
|
465
|
+
@list_ids.each do |x|
|
466
|
+
if labels = self.class::NSIDs[x[0]] then
|
467
|
+
tstrarg.each do |y|
|
468
|
+
if i = labels.index(y) then
|
469
|
+
d << x[i+1] if x[i+1]
|
470
|
+
end
|
471
|
+
end
|
472
|
+
end
|
473
|
+
end
|
474
|
+
d
|
475
|
+
end
|
476
|
+
|
477
|
+
def locus
|
478
|
+
unless defined?(@locus)
|
479
|
+
@locus = get_by_type('locus')
|
480
|
+
end
|
481
|
+
@locus
|
482
|
+
end
|
483
|
+
|
484
|
+
def gi
|
485
|
+
unless defined?(@gi) then
|
486
|
+
@gi = get_by_type('gi')
|
487
|
+
end
|
488
|
+
@gi
|
489
|
+
end
|
490
|
+
|
491
|
+
def acc_version
|
492
|
+
unless defined?(@acc_version) then
|
493
|
+
@acc_version = get_by_type('acc_version')
|
494
|
+
end
|
495
|
+
@acc_version
|
496
|
+
end
|
497
|
+
|
498
|
+
def accessions
|
499
|
+
unless defined?(@accessions) then
|
500
|
+
@accessions = get_all_by_type('accession', 'acc_version')
|
501
|
+
@accessions.collect! { |x| x.sub(/\..*\z/, '') }
|
502
|
+
end
|
503
|
+
@accessions
|
504
|
+
end
|
505
|
+
|
506
|
+
def accession
|
507
|
+
unless defined?(@accession) then
|
508
|
+
if acc_version then
|
509
|
+
@accession = acc_version.split('.')[0]
|
510
|
+
else
|
511
|
+
@accession = accessions[0]
|
512
|
+
end
|
513
|
+
end
|
514
|
+
@accession
|
515
|
+
end
|
516
|
+
|
517
|
+
def method_missing(name, *args)
|
518
|
+
# raise ArgumentError,
|
519
|
+
# "wrong # of arguments(#{args.size} for 1)" if args.size >= 2
|
520
|
+
r = get(name, *args)
|
521
|
+
if !r and !(self.class::NSIDs[name.to_s]) then
|
522
|
+
raise "NameError: undefined method `#{name.inspect}'"
|
523
|
+
end
|
524
|
+
r
|
525
|
+
end
|
526
|
+
|
527
|
+
end #class FastaDefline
|
528
|
+
|
529
|
+
end #module Bio
|
530
|
+
|
531
|
+
if __FILE__ == $0
|
532
|
+
|
533
|
+
f_str = <<END
|
534
|
+
>sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
|
535
|
+
MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG
|
536
|
+
VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME
|
537
|
+
GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL
|
538
|
+
KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC
|
539
|
+
IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP
|
540
|
+
QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES
|
541
|
+
>sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST]
|
542
|
+
MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP
|
543
|
+
TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG
|
544
|
+
GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL
|
545
|
+
DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA
|
546
|
+
DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI
|
547
|
+
EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL
|
548
|
+
AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF
|
549
|
+
QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND
|
550
|
+
CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN
|
551
|
+
FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER
|
552
|
+
KTGDPLEWRRLFKKISTICRDIILIPN
|
553
|
+
END
|
554
|
+
|
555
|
+
f = Bio::FastaFormat.new(f_str)
|
556
|
+
puts "### FastaFormat"
|
557
|
+
puts "# entry"
|
558
|
+
puts f.entry
|
559
|
+
puts "# entry_id"
|
560
|
+
p f.entry_id
|
561
|
+
puts "# definition"
|
562
|
+
p f.definition
|
563
|
+
puts "# data"
|
564
|
+
p f.data
|
565
|
+
puts "# seq"
|
566
|
+
p f.seq
|
567
|
+
puts "# seq.type"
|
568
|
+
p f.seq.type
|
569
|
+
puts "# length"
|
570
|
+
p f.length
|
571
|
+
puts "# aaseq"
|
572
|
+
p f.aaseq
|
573
|
+
puts "# aaseq.type"
|
574
|
+
p f.aaseq.type
|
575
|
+
puts "# aaseq.composition"
|
576
|
+
p f.aaseq.composition
|
577
|
+
puts "# aalen"
|
578
|
+
p f.aalen
|
579
|
+
|
580
|
+
puts
|
581
|
+
|
582
|
+
n_str = <<END
|
583
|
+
>CRA3575282.F
|
584
|
+
24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26
|
585
|
+
32 29 29 25
|
586
|
+
END
|
587
|
+
|
588
|
+
n = Bio::FastaNumericFormat.new(n_str)
|
589
|
+
puts "### FastaNumericFormat"
|
590
|
+
puts "# entry"
|
591
|
+
puts n.entry
|
592
|
+
puts "# entry_id"
|
593
|
+
p n.entry_id
|
594
|
+
puts "# definition"
|
595
|
+
p n.definition
|
596
|
+
puts "# data"
|
597
|
+
p n.data
|
598
|
+
puts "# length"
|
599
|
+
p n.length
|
600
|
+
puts "# percent to ratio by yield"
|
601
|
+
n.each do |x|
|
602
|
+
p x/100.0
|
603
|
+
end
|
604
|
+
puts "# first three"
|
605
|
+
p n[0]
|
606
|
+
p n[1]
|
607
|
+
p n[2]
|
608
|
+
puts "# last one"
|
609
|
+
p n[-1]
|
610
|
+
|
611
|
+
end
|
612
|
+
|
613
|
+
=begin
|
614
|
+
|
615
|
+
= Bio::FastaFormat
|
616
|
+
|
617
|
+
Treats a FASTA formatted entry, such as:
|
618
|
+
|
619
|
+
>id and/or some comments <== comment line
|
620
|
+
ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines
|
621
|
+
ATGCATGCATGCATGCATGCATGCATGCATGCATGC
|
622
|
+
ATGCATGCATGC
|
623
|
+
|
624
|
+
The precedent '>' can be omitted and the trailing '>' will be removed
|
625
|
+
automatically.
|
626
|
+
|
627
|
+
--- Bio::FastaFormat.new(entry)
|
628
|
+
|
629
|
+
Stores the comment and sequence information from one entry of the
|
630
|
+
FASTA format string. If the argument contains more than one
|
631
|
+
entry, only the first entry is used.
|
632
|
+
|
633
|
+
--- Bio::FastaFormat#entry
|
634
|
+
|
635
|
+
Returns the stored one entry as a FASTA format. (same as to_s)
|
636
|
+
|
637
|
+
--- Bio::FastaFormat#definition
|
638
|
+
|
639
|
+
Returns the comment line of the FASTA formatted data.
|
640
|
+
|
641
|
+
--- Bio::FastaFormat#seq
|
642
|
+
|
643
|
+
Returns a joined sequence line as a String.
|
644
|
+
|
645
|
+
--- Bio::FastaFormat#query(factory)
|
646
|
+
--- Bio::FastaFormat#fasta(factory)
|
647
|
+
--- Bio::FastaFormat#blast(factory)
|
648
|
+
|
649
|
+
Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast
|
650
|
+
factory object.
|
651
|
+
|
652
|
+
#!/usr/bin/env ruby
|
653
|
+
|
654
|
+
require 'bio'
|
655
|
+
|
656
|
+
factory = Bio::Fasta.local('fasta34', 'db/swissprot.f')
|
657
|
+
flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f')
|
658
|
+
flatfile.each do |entry|
|
659
|
+
p entry.definition
|
660
|
+
result = entry.fasta(factory)
|
661
|
+
result.each do |hit|
|
662
|
+
print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at "
|
663
|
+
p hit.lap_at
|
664
|
+
end
|
665
|
+
end
|
666
|
+
|
667
|
+
--- Bio::FastaFormat#length
|
668
|
+
|
669
|
+
Returns sequence length.
|
670
|
+
|
671
|
+
--- Bio::FastaFormat#naseq
|
672
|
+
--- Bio::FastaFormat#nalen
|
673
|
+
--- Bio::FastaFormat#aaseq
|
674
|
+
--- Bio::FastaFormat#aalen
|
675
|
+
|
676
|
+
If you know whether the sequence is NA or AA, use these methods.
|
677
|
+
'naseq' and 'aaseq' methods returen the Bio::Sequence::NA or
|
678
|
+
Bio::Sequence::AA object respectively. 'nalen' and 'aalen' methods
|
679
|
+
return the length of them.
|
680
|
+
|
681
|
+
--- Bio::FastaFormat#identifiers
|
682
|
+
|
683
|
+
Parsing FASTA Defline, and extract IDs.
|
684
|
+
IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
685
|
+
or ":"-separated IDs.
|
686
|
+
It returns a Bio::FastaDefline instance.
|
687
|
+
|
688
|
+
--- Bio::FastaFormat#entry_id
|
689
|
+
|
690
|
+
Parsing FASTA Defline (using #identifiers method), and
|
691
|
+
shows a possibly unique identifier.
|
692
|
+
It returns a string.
|
693
|
+
|
694
|
+
--- Bio::FastaFormat#gi
|
695
|
+
--- Bio::FastaFormat#locus
|
696
|
+
--- Bio::FastaFormat#accession
|
697
|
+
--- Bio::FastaFormat#acc_version
|
698
|
+
|
699
|
+
Parsing FASTA Defline (using #identifiers method), and
|
700
|
+
shows GI/locus/accession/accession with version number.
|
701
|
+
If a entry has more than two of such IDs,
|
702
|
+
only the first ID are shown.
|
703
|
+
It returns a string or nil.
|
704
|
+
|
705
|
+
--- Bio::FastaFormat#accessions
|
706
|
+
|
707
|
+
Parsing FASTA Defline (using #identifiers method), and
|
708
|
+
shows accession numbers.
|
709
|
+
It returns an array of strings.
|
710
|
+
|
711
|
+
--- Bio::FastaFormat
|
712
|
+
|
713
|
+
= Bio::FastaNumericFormat
|
714
|
+
|
715
|
+
Treats a FASTA formatted numerical entry, such as:
|
716
|
+
|
717
|
+
>id and/or some comments <== comment line
|
718
|
+
24 15 23 29 20 13 20 21 21 23 22 25 13 <== numerical data
|
719
|
+
22 17 15 25 27 32 26 32 29 29 25
|
720
|
+
|
721
|
+
The precedent '>' can be omitted and the trailing '>' will be removed
|
722
|
+
automatically.
|
723
|
+
|
724
|
+
--- Bio::FastaNumericFormat.new(entry)
|
725
|
+
|
726
|
+
Stores the comment and the list of the numerical data.
|
727
|
+
|
728
|
+
--- Bio::FastaNumericFormat#definition
|
729
|
+
|
730
|
+
The comment line of the FASTA formatted data.
|
731
|
+
|
732
|
+
--- Bio::FastaNumericFormat#data
|
733
|
+
|
734
|
+
Returns the list of the numerical data (typically the quality score
|
735
|
+
of its corresponding sequence) as an Array.
|
736
|
+
|
737
|
+
--- Bio::FastaNumericFormat#length
|
738
|
+
|
739
|
+
Returns the number of elements in the numerical data.
|
740
|
+
|
741
|
+
--- Bio::FastaNumericFormat#each
|
742
|
+
|
743
|
+
Yields on each elements of the numerical data.
|
744
|
+
|
745
|
+
--- Bio::FastaNumericFormat#[](n)
|
746
|
+
|
747
|
+
Returns the n-th element.
|
748
|
+
|
749
|
+
--- Bio::FastaNumericFormat#identifiers
|
750
|
+
--- Bio::FastaNumericFormat#entry_id
|
751
|
+
--- Bio::FastaNumericFormat#gi
|
752
|
+
--- Bio::FastaNumericFormat#locus
|
753
|
+
--- Bio::FastaNumericFormat#accession
|
754
|
+
--- Bio::FastaNumericFormat#acc_version
|
755
|
+
--- Bio::FastaNumericFormat#accessions
|
756
|
+
|
757
|
+
Same as Bio::FastaFormat.
|
758
|
+
|
759
|
+
|
760
|
+
= Bio::FastaDefline
|
761
|
+
|
762
|
+
Parsing FASTA Defline, and extract IDs and other informations.
|
763
|
+
IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
764
|
+
or ":"-separated IDs.
|
765
|
+
|
766
|
+
--- see also:
|
767
|
+
ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
|
768
|
+
http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
769
|
+
|
770
|
+
--- Bio::FastaDefline.new(str)
|
771
|
+
|
772
|
+
Parses given string.
|
773
|
+
|
774
|
+
--- Bio::FastaFormat#entry_id
|
775
|
+
|
776
|
+
Shows a possibly unique identifier.
|
777
|
+
Returns a string.
|
778
|
+
|
779
|
+
--- Bio::FastaDefline#gi
|
780
|
+
--- Bio::FastaDefline#locus
|
781
|
+
--- Bio::FastaDefline#accession
|
782
|
+
--- Bio::FastaDefline#acc_version
|
783
|
+
|
784
|
+
Shows GI/locus/accession/accession with version number.
|
785
|
+
If the entry has more than two of such IDs,
|
786
|
+
only the first ID are shown.
|
787
|
+
Returns a string or nil.
|
788
|
+
|
789
|
+
--- Bio::FastaFormat#accessions
|
790
|
+
|
791
|
+
Shows accession numbers.
|
792
|
+
Returns an array of strings.
|
793
|
+
|
794
|
+
--- Bio::FastaDefline#add_defline(str)
|
795
|
+
|
796
|
+
Parses given string and adds parsed data.
|
797
|
+
|
798
|
+
--- Bio::FastaDefline#to_s
|
799
|
+
|
800
|
+
Shows original string.
|
801
|
+
Note that the result of this method may be different from
|
802
|
+
original string which is given in FastaDefline.new method.
|
803
|
+
|
804
|
+
--- Bio::FastaDefline#id_strings
|
805
|
+
|
806
|
+
Shows ID-like strings.
|
807
|
+
Returns an array of strings.
|
808
|
+
|
809
|
+
--- Bio::FastaDefline#list_ids
|
810
|
+
|
811
|
+
Shows array that contains IDs (or ID-like strings).
|
812
|
+
Returns an array of arrays of strings.
|
813
|
+
|
814
|
+
--- Bio::FastaDefline#description
|
815
|
+
--- Bio::FastaDefline#descriptions
|
816
|
+
|
817
|
+
--- Bio::FastaDefline#words(case_sensitive = nil,
|
818
|
+
kill_words_regexp_array, kill_words_hash)
|
819
|
+
|
820
|
+
--- Bio::FastaDefline#get(tag_of_id)
|
821
|
+
|
822
|
+
--- Bio::FastaDefline#get_by_type(type_of_id)
|
823
|
+
|
824
|
+
--- Bio::FastaDefline#get_all_by_type(type_of_id)
|
825
|
+
|
826
|
+
--- examples:
|
827
|
+
rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
|
828
|
+
rub.entry_id ==> 'gi|671595'
|
829
|
+
rub.get('emb') ==> 'CAA85678.1'
|
830
|
+
rub.emb ==> 'CAA85678.1'
|
831
|
+
rub.gi ==> '671595'
|
832
|
+
rub.accession ==> 'CAA85678'
|
833
|
+
rub.accessions ==> [ 'CAA85678' ]
|
834
|
+
rub.acc_version ==> 'CAA85678.1'
|
835
|
+
rub.locus ==> nil
|
836
|
+
rub.list_ids ==> [["gi", "671595"],
|
837
|
+
["emb", "CAA85678.1", nil],
|
838
|
+
["Perovskia abrotanoides"]]
|
839
|
+
|
840
|
+
ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
|
841
|
+
ckr.entry_id ==> "gi|2495000"
|
842
|
+
ckr.sp ==> "CCKR_CAVPO"
|
843
|
+
ckr.pir ==> "I51898"
|
844
|
+
ckr.gb ==> "AAB29504.1"
|
845
|
+
ckr.gi ==> "2495000"
|
846
|
+
ckr.accession ==> "AAB29504"
|
847
|
+
ckr.accessions ==> ["Q63931", "AAB29504"]
|
848
|
+
ckr.acc_version ==> "AAB29504.1"
|
849
|
+
ckr.locus ==> nil
|
850
|
+
ckr.description ==>
|
851
|
+
"CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
|
852
|
+
ckr.descriptions ==>
|
853
|
+
["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
|
854
|
+
"cholecystokinin A receptor - guinea pig",
|
855
|
+
"cholecystokinin A receptor; CCK-A receptor [Cavia]"]
|
856
|
+
ckr.words ==>
|
857
|
+
["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
|
858
|
+
"receptor", "type"]
|
859
|
+
ckr.id_strings ==>
|
860
|
+
["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
|
861
|
+
"544724", "AAB29504.1", "Cavia"]
|
862
|
+
ckr.list_ids ==>
|
863
|
+
[["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
|
864
|
+
["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
|
865
|
+
["gb", "AAB29504.1", nil], ["Cavia"]]
|
866
|
+
|
867
|
+
=end
|
868
|
+
|
869
|
+
|