bio 0.7.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
data/lib/bio/db/embl/uniprot.rb
CHANGED
@@ -1,7 +1,34 @@
|
|
1
1
|
#
|
2
|
-
# bio/db/embl/uniprot.rb - UniProt database class
|
2
|
+
# = bio/db/embl/uniprot.rb - UniProt database class
|
3
3
|
#
|
4
|
-
# Copyright (C) 2005 KATAYAMA Toshiaki <k@bioruby.org>
|
4
|
+
# Copyright:: Copyright (C) 2005 KATAYAMA Toshiaki <k@bioruby.org>
|
5
|
+
# License:: LGPL
|
6
|
+
#
|
7
|
+
# $Id: uniprot.rb,v 1.2 2006/01/28 06:40:39 nakao Exp $
|
8
|
+
#
|
9
|
+
# == Description
|
10
|
+
#
|
11
|
+
# Name space for UniProtKB/SwissProt specific methods.
|
12
|
+
#
|
13
|
+
# UniProtKB/SwissProt specific methods are defined in this class.
|
14
|
+
# Shared methods for UniProtKB/SwissProt and TrEMBL classes are
|
15
|
+
# defined in Bio::SPTR class.
|
16
|
+
#
|
17
|
+
# == Examples
|
18
|
+
#
|
19
|
+
# str = File.read("p53_human.swiss")
|
20
|
+
# obj = Bio::UniProt.new(str)
|
21
|
+
# obj.entry_id #=> "P53_HUMAN"
|
22
|
+
#
|
23
|
+
# == Referencees
|
24
|
+
#
|
25
|
+
# * UniProt
|
26
|
+
# http://uniprot.org/
|
27
|
+
#
|
28
|
+
# * The UniProtKB/SwissProt/TrEMBL User Manual
|
29
|
+
# http://www.expasy.org/sprot/userman.html
|
30
|
+
|
31
|
+
#--
|
5
32
|
#
|
6
33
|
# This library is free software; you can redistribute it and/or
|
7
34
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -17,13 +44,15 @@
|
|
17
44
|
# License along with this library; if not, write to the Free Software
|
18
45
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
46
|
#
|
20
|
-
|
47
|
+
#++
|
21
48
|
#
|
22
49
|
|
23
50
|
require 'bio/db/embl/sptr'
|
24
51
|
|
25
52
|
module Bio
|
26
53
|
|
54
|
+
# Parser class for SwissProt database entry.
|
55
|
+
# See also Bio::SPTR class.
|
27
56
|
class UniProt < SPTR
|
28
57
|
# Nothing to do (UniProt format is abstracted in SPTR)
|
29
58
|
end
|
data/lib/bio/db/fasta.rb
CHANGED
@@ -1,24 +1,66 @@
|
|
1
1
|
#
|
2
|
-
# bio/db/fasta.rb - FASTA format class
|
2
|
+
# = bio/db/fasta.rb - FASTA format class
|
3
3
|
#
|
4
|
-
#
|
5
|
-
#
|
4
|
+
# Copyright:: Copyright (C) 2001, 2002
|
5
|
+
# GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>,
|
6
|
+
# KATAYAMA Toshiaki <k@bioruby.org>
|
7
|
+
# Lisence:: Ruby's
|
6
8
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
9
|
+
# $Id: fasta.rb,v 1.25 2006/02/22 08:44:46 ngoto Exp $
|
10
|
+
#
|
11
|
+
# == Description
|
12
|
+
#
|
13
|
+
# FASTA format class.
|
11
14
|
#
|
12
|
-
#
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# Lesser General Public License for more details.
|
15
|
+
# == Examples
|
16
16
|
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
17
|
+
# rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
|
18
|
+
# rub.entry_id ==> 'gi|671595'
|
19
|
+
# rub.get('emb') ==> 'CAA85678.1'
|
20
|
+
# rub.emb ==> 'CAA85678.1'
|
21
|
+
# rub.gi ==> '671595'
|
22
|
+
# rub.accession ==> 'CAA85678'
|
23
|
+
# rub.accessions ==> [ 'CAA85678' ]
|
24
|
+
# rub.acc_version ==> 'CAA85678.1'
|
25
|
+
# rub.locus ==> nil
|
26
|
+
# rub.list_ids ==> [["gi", "671595"],
|
27
|
+
# ["emb", "CAA85678.1", nil],
|
28
|
+
# ["Perovskia abrotanoides"]]
|
20
29
|
#
|
21
|
-
#
|
30
|
+
# ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
|
31
|
+
# ckr.entry_id ==> "gi|2495000"
|
32
|
+
# ckr.sp ==> "CCKR_CAVPO"
|
33
|
+
# ckr.pir ==> "I51898"
|
34
|
+
# ckr.gb ==> "AAB29504.1"
|
35
|
+
# ckr.gi ==> "2495000"
|
36
|
+
# ckr.accession ==> "AAB29504"
|
37
|
+
# ckr.accessions ==> ["Q63931", "AAB29504"]
|
38
|
+
# ckr.acc_version ==> "AAB29504.1"
|
39
|
+
# ckr.locus ==> nil
|
40
|
+
# ckr.description ==>
|
41
|
+
# "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
|
42
|
+
# ckr.descriptions ==>
|
43
|
+
# ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
|
44
|
+
# "cholecystokinin A receptor - guinea pig",
|
45
|
+
# "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
|
46
|
+
# ckr.words ==>
|
47
|
+
# ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
|
48
|
+
# "receptor", "type"]
|
49
|
+
# ckr.id_strings ==>
|
50
|
+
# ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
|
51
|
+
# "544724", "AAB29504.1", "Cavia"]
|
52
|
+
# ckr.list_ids ==>
|
53
|
+
# [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
|
54
|
+
# ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
|
55
|
+
# ["gb", "AAB29504.1", nil], ["Cavia"]]
|
56
|
+
#
|
57
|
+
# == References
|
58
|
+
#
|
59
|
+
# * FASTA format (WikiPedia)
|
60
|
+
# http://en.wikipedia.org/wiki/FASTA_format
|
61
|
+
#
|
62
|
+
# * Fasta format description (NCBI)
|
63
|
+
# http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
|
22
64
|
#
|
23
65
|
|
24
66
|
require 'bio/db'
|
@@ -26,34 +68,132 @@ require 'bio/sequence'
|
|
26
68
|
|
27
69
|
module Bio
|
28
70
|
|
71
|
+
|
72
|
+
# Treats a FASTA formatted entry, such as:
|
73
|
+
#
|
74
|
+
# >id and/or some comments <== comment line
|
75
|
+
# ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines
|
76
|
+
# ATGCATGCATGCATGCATGCATGCATGCATGCATGC
|
77
|
+
# ATGCATGCATGC
|
78
|
+
#
|
79
|
+
# The precedent '>' can be omitted and the trailing '>' will be removed
|
80
|
+
# automatically.
|
81
|
+
#
|
82
|
+
# === Examples
|
83
|
+
#
|
84
|
+
# f_str = <<END
|
85
|
+
# >sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
|
86
|
+
# MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG
|
87
|
+
# VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME
|
88
|
+
# GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL
|
89
|
+
# KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC
|
90
|
+
# IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP
|
91
|
+
# QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES
|
92
|
+
# >sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST]
|
93
|
+
# MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP
|
94
|
+
# TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG
|
95
|
+
# GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL
|
96
|
+
# DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA
|
97
|
+
# DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI
|
98
|
+
# EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL
|
99
|
+
# AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF
|
100
|
+
# QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND
|
101
|
+
# CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN
|
102
|
+
# FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER
|
103
|
+
# KTGDPLEWRRLFKKISTICRDIILIPN
|
104
|
+
# END
|
105
|
+
#
|
106
|
+
# f = Bio::FastaFormat.new(f_str)
|
107
|
+
# puts "### FastaFormat"
|
108
|
+
# puts "# entry"
|
109
|
+
# puts f.entry
|
110
|
+
# puts "# entry_id"
|
111
|
+
# p f.entry_id
|
112
|
+
# puts "# definition"
|
113
|
+
# p f.definition
|
114
|
+
# puts "# data"
|
115
|
+
# p f.data
|
116
|
+
# puts "# seq"
|
117
|
+
# p f.seq
|
118
|
+
# puts "# seq.type"
|
119
|
+
# p f.seq.type
|
120
|
+
# puts "# length"
|
121
|
+
# p f.length
|
122
|
+
# puts "# aaseq"
|
123
|
+
# p f.aaseq
|
124
|
+
# puts "# aaseq.type"
|
125
|
+
# p f.aaseq.type
|
126
|
+
# puts "# aaseq.composition"
|
127
|
+
# p f.aaseq.composition
|
128
|
+
# puts "# aalen"
|
129
|
+
# p f.aalen
|
130
|
+
#
|
131
|
+
# === References
|
132
|
+
#
|
133
|
+
# * FASTA format (WikiPedia)
|
134
|
+
# http://en.wikipedia.org/wiki/FASTA_format
|
135
|
+
#
|
29
136
|
class FastaFormat < DB
|
30
137
|
|
138
|
+
# Entry delimiter in flatfile text.
|
31
139
|
DELIMITER = RS = "\n>"
|
32
140
|
|
141
|
+
# (Integer) excess read size included in DELIMITER.
|
142
|
+
DELIMITER_OVERRUN = 1 # '>'
|
143
|
+
|
144
|
+
# The comment line of the FASTA formatted data.
|
145
|
+
attr_accessor :definition
|
146
|
+
|
147
|
+
# The seuqnce lines in text.
|
148
|
+
attr_accessor :data
|
149
|
+
|
150
|
+
attr_reader :entry_overrun
|
151
|
+
|
152
|
+
# Stores the comment and sequence information from one entry of the
|
153
|
+
# FASTA format string. If the argument contains more than one
|
154
|
+
# entry, only the first entry is used.
|
33
155
|
def initialize(str)
|
34
156
|
@definition = str[/.*/].sub(/^>/, '').strip # 1st line
|
35
157
|
@data = str.sub(/.*/, '') # rests
|
36
158
|
@data.sub!(/^>.*/m, '') # remove trailing entries for sure
|
37
159
|
@entry_overrun = $&
|
38
160
|
end
|
39
|
-
attr_accessor :definition, :data
|
40
|
-
attr_reader :entry_overrun
|
41
161
|
|
162
|
+
# Returns the stored one entry as a FASTA format. (same as to_s)
|
42
163
|
def entry
|
43
164
|
@entry = ">#{@definition}\n#{@data.strip}\n"
|
44
165
|
end
|
45
166
|
alias to_s entry
|
46
167
|
|
168
|
+
|
169
|
+
# Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast
|
170
|
+
# factory object.
|
171
|
+
#
|
172
|
+
# #!/usr/bin/env ruby
|
173
|
+
# require 'bio'
|
174
|
+
#
|
175
|
+
# factory = Bio::Fasta.local('fasta34', 'db/swissprot.f')
|
176
|
+
# flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f')
|
177
|
+
# flatfile.each do |entry|
|
178
|
+
# p entry.definition
|
179
|
+
# result = entry.fasta(factory)
|
180
|
+
# result.each do |hit|
|
181
|
+
# print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at "
|
182
|
+
# p hit.lap_at
|
183
|
+
# end
|
184
|
+
# end
|
185
|
+
#
|
47
186
|
def query(factory)
|
48
187
|
factory.query(@entry)
|
49
188
|
end
|
50
189
|
alias fasta query
|
51
190
|
alias blast query
|
52
191
|
|
192
|
+
# Returns a joined sequence line as a String.
|
53
193
|
def seq
|
54
194
|
unless defined?(@seq)
|
55
195
|
unless /\A\s*^\#/ =~ @data then
|
56
|
-
@seq = Sequence.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
|
196
|
+
@seq = Sequence::Generic.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
|
57
197
|
else
|
58
198
|
a = @data.split(/(^\#.*$)/)
|
59
199
|
i = 0
|
@@ -69,37 +209,61 @@ module Bio
|
|
69
209
|
end
|
70
210
|
end
|
71
211
|
@comment = cmnt
|
72
|
-
@seq = Bio::Sequence.new(s.join(''))
|
212
|
+
@seq = Bio::Sequence::Generic.new(s.join(''))
|
73
213
|
end
|
74
214
|
end
|
75
215
|
@seq
|
76
216
|
end
|
77
217
|
|
218
|
+
# Returns comments.
|
78
219
|
def comment
|
79
220
|
seq
|
80
221
|
@comment
|
81
222
|
end
|
82
223
|
|
224
|
+
# Returns sequence length.
|
83
225
|
def length
|
84
226
|
seq.length
|
85
227
|
end
|
86
228
|
|
229
|
+
# Returens the Bio::Sequence::NA.
|
87
230
|
def naseq
|
88
231
|
Sequence::NA.new(seq)
|
89
232
|
end
|
90
233
|
|
234
|
+
# Returens the length of Bio::Sequence::NA.
|
91
235
|
def nalen
|
92
236
|
self.naseq.length
|
93
237
|
end
|
94
238
|
|
239
|
+
# Returens the Bio::Sequence::AA.
|
95
240
|
def aaseq
|
96
241
|
Sequence::AA.new(seq)
|
97
242
|
end
|
98
243
|
|
244
|
+
# Returens the length of Bio::Sequence::AA.
|
99
245
|
def aalen
|
100
246
|
self.aaseq.length
|
101
247
|
end
|
102
248
|
|
249
|
+
# Returns sequence as a Bio::Sequence object.
|
250
|
+
#
|
251
|
+
# Note: If you modify the returned Bio::Sequence object,
|
252
|
+
# the sequence or definition in this FastaFormat object
|
253
|
+
# might also be changed (but not always be changed)
|
254
|
+
# because of efficiency.
|
255
|
+
#
|
256
|
+
def to_seq
|
257
|
+
seq
|
258
|
+
obj = Bio::Sequence.new(@seq)
|
259
|
+
obj.definition = self.definition
|
260
|
+
obj
|
261
|
+
end
|
262
|
+
|
263
|
+
# Parsing FASTA Defline, and extract IDs.
|
264
|
+
# IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
265
|
+
# or ":"-separated IDs.
|
266
|
+
# It returns a Bio::FastaDefline instance.
|
103
267
|
def identifiers
|
104
268
|
unless defined?(@ids) then
|
105
269
|
@ids = FastaDefline.new(@definition)
|
@@ -107,34 +271,69 @@ module Bio
|
|
107
271
|
@ids
|
108
272
|
end
|
109
273
|
|
274
|
+
# Parsing FASTA Defline (using #identifiers method), and
|
275
|
+
# shows a possibly unique identifier.
|
276
|
+
# It returns a string.
|
110
277
|
def entry_id
|
111
278
|
identifiers.entry_id
|
112
279
|
end
|
113
280
|
|
281
|
+
# Parsing FASTA Defline (using #identifiers method), and
|
282
|
+
# shows GI/locus/accession/accession with version number.
|
283
|
+
# If a entry has more than two of such IDs,
|
284
|
+
# only the first ID are shown.
|
285
|
+
# It returns a string or nil.
|
114
286
|
def gi
|
115
287
|
identifiers.gi
|
116
288
|
end
|
117
289
|
|
290
|
+
# Returns an accession number.
|
118
291
|
def accession
|
119
292
|
identifiers.accession
|
120
293
|
end
|
121
294
|
|
295
|
+
# Parsing FASTA Defline (using #identifiers method), and
|
296
|
+
# shows accession numbers.
|
297
|
+
# It returns an array of strings.
|
122
298
|
def accessions
|
123
299
|
identifiers.accessions
|
124
300
|
end
|
125
301
|
|
302
|
+
# Returns accession number with version.
|
126
303
|
def acc_version
|
127
304
|
identifiers.acc_version
|
128
305
|
end
|
129
306
|
|
307
|
+
# Returns locus.
|
130
308
|
def locus
|
131
309
|
identifiers.locus
|
132
310
|
end
|
133
311
|
|
134
312
|
end #class FastaFormat
|
135
313
|
|
314
|
+
# Treats a FASTA formatted numerical entry, such as:
|
315
|
+
#
|
316
|
+
# >id and/or some comments <== comment line
|
317
|
+
# 24 15 23 29 20 13 20 21 21 23 22 25 13 <== numerical data
|
318
|
+
# 22 17 15 25 27 32 26 32 29 29 25
|
319
|
+
#
|
320
|
+
# The precedent '>' can be omitted and the trailing '>' will be removed
|
321
|
+
# automatically.
|
322
|
+
#
|
323
|
+
# --- Bio::FastaNumericFormat.new(entry)
|
324
|
+
#
|
325
|
+
# Stores the comment and the list of the numerical data.
|
326
|
+
#
|
327
|
+
# --- Bio::FastaNumericFormat#definition
|
328
|
+
#
|
329
|
+
# The comment line of the FASTA formatted data.
|
330
|
+
#
|
331
|
+
# * FASTA format (Wikipedia)
|
332
|
+
# http://en.wikipedia.org/wiki/FASTA_format
|
136
333
|
class FastaNumericFormat < FastaFormat
|
137
334
|
|
335
|
+
# Returns the list of the numerical data (typically the quality score
|
336
|
+
# of its corresponding sequence) as an Array.
|
138
337
|
def data
|
139
338
|
unless @list
|
140
339
|
@list = @data.strip.split(/\s+/).map {|x| x.to_i}
|
@@ -142,16 +341,19 @@ module Bio
|
|
142
341
|
@list
|
143
342
|
end
|
144
343
|
|
344
|
+
# Returns the number of elements in the numerical data.
|
145
345
|
def length
|
146
346
|
data.length
|
147
347
|
end
|
148
348
|
|
349
|
+
# Yields on each elements of the numerical data.
|
149
350
|
def each
|
150
351
|
data.each do |x|
|
151
352
|
yield x
|
152
353
|
end
|
153
354
|
end
|
154
355
|
|
356
|
+
# Returns the n-th element.
|
155
357
|
def [](n)
|
156
358
|
data[n]
|
157
359
|
end
|
@@ -160,11 +362,69 @@ module Bio
|
|
160
362
|
|
161
363
|
end #class FastaNumericFormat
|
162
364
|
|
163
|
-
class FastaDefline
|
164
365
|
|
165
|
-
|
166
|
-
|
167
|
-
|
366
|
+
# Parsing FASTA Defline, and extract IDs and other informations.
|
367
|
+
# IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
368
|
+
# or ":"-separated IDs.
|
369
|
+
#
|
370
|
+
# specs are described in:
|
371
|
+
# ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
|
372
|
+
# http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
373
|
+
#
|
374
|
+
# === Examples
|
375
|
+
#
|
376
|
+
# rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
|
377
|
+
# rub.entry_id ==> 'gi|671595'
|
378
|
+
# rub.get('emb') ==> 'CAA85678.1'
|
379
|
+
# rub.emb ==> 'CAA85678.1'
|
380
|
+
# rub.gi ==> '671595'
|
381
|
+
# rub.accession ==> 'CAA85678'
|
382
|
+
# rub.accessions ==> [ 'CAA85678' ]
|
383
|
+
# rub.acc_version ==> 'CAA85678.1'
|
384
|
+
# rub.locus ==> nil
|
385
|
+
# rub.list_ids ==> [["gi", "671595"],
|
386
|
+
# ["emb", "CAA85678.1", nil],
|
387
|
+
# ["Perovskia abrotanoides"]]
|
388
|
+
#
|
389
|
+
# ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
|
390
|
+
# ckr.entry_id ==> "gi|2495000"
|
391
|
+
# ckr.sp ==> "CCKR_CAVPO"
|
392
|
+
# ckr.pir ==> "I51898"
|
393
|
+
# ckr.gb ==> "AAB29504.1"
|
394
|
+
# ckr.gi ==> "2495000"
|
395
|
+
# ckr.accession ==> "AAB29504"
|
396
|
+
# ckr.accessions ==> ["Q63931", "AAB29504"]
|
397
|
+
# ckr.acc_version ==> "AAB29504.1"
|
398
|
+
# ckr.locus ==> nil
|
399
|
+
# ckr.description ==>
|
400
|
+
# "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
|
401
|
+
# ckr.descriptions ==>
|
402
|
+
# ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
|
403
|
+
# "cholecystokinin A receptor - guinea pig",
|
404
|
+
# "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
|
405
|
+
# ckr.words ==>
|
406
|
+
# ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
|
407
|
+
# "receptor", "type"]
|
408
|
+
# ckr.id_strings ==>
|
409
|
+
# ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
|
410
|
+
# "544724", "AAB29504.1", "Cavia"]
|
411
|
+
# ckr.list_ids ==>
|
412
|
+
# [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
|
413
|
+
# ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
|
414
|
+
# ["gb", "AAB29504.1", nil], ["Cavia"]]
|
415
|
+
#
|
416
|
+
# === Refereneces
|
417
|
+
#
|
418
|
+
# * Fasta format description (NCBI)
|
419
|
+
# http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
|
420
|
+
#
|
421
|
+
# * Frequently Asked Questions: Indexing of Sequence Identifiers (by Warren R. Gish.)
|
422
|
+
# http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
423
|
+
#
|
424
|
+
# * README.formatdb
|
425
|
+
# ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
|
426
|
+
#
|
427
|
+
class FastaDefline
|
168
428
|
|
169
429
|
NSIDs = {
|
170
430
|
# NCBI and WU-BLAST
|
@@ -197,6 +457,15 @@ module Bio
|
|
197
457
|
'ri' => [ 'entry_id', 'rearray_id', 'len' ], # RIKEN FANTOM DB
|
198
458
|
}
|
199
459
|
|
460
|
+
# Shows array that contains IDs (or ID-like strings).
|
461
|
+
# Returns an array of arrays of strings.
|
462
|
+
attr_reader :list_ids
|
463
|
+
|
464
|
+
# Shows a possibly unique identifier.
|
465
|
+
# Returns a string.
|
466
|
+
attr_reader :entry_id
|
467
|
+
|
468
|
+
# Parses given string.
|
200
469
|
def initialize(str)
|
201
470
|
@deflines = []
|
202
471
|
@info = {}
|
@@ -210,9 +479,7 @@ module Bio
|
|
210
479
|
end
|
211
480
|
end #def initialize
|
212
481
|
|
213
|
-
|
214
|
-
attr_reader :entry_id
|
215
|
-
|
482
|
+
# Parses given string and adds parsed data.
|
216
483
|
def add_defline(str)
|
217
484
|
case str
|
218
485
|
when /^\>?\s*((?:[^\|\s]*\|)+[^\s]+)\s*(.*)$/
|
@@ -343,6 +610,10 @@ module Bio
|
|
343
610
|
end #def parse_NSIDs
|
344
611
|
private :parse_NSIDs
|
345
612
|
|
613
|
+
|
614
|
+
# Shows original string.
|
615
|
+
# Note that the result of this method may be different from
|
616
|
+
# original string which is given in FastaDefline.new method.
|
346
617
|
def to_s
|
347
618
|
@deflines.collect { |a|
|
348
619
|
s = a[0]
|
@@ -350,16 +621,20 @@ module Bio
|
|
350
621
|
}.join("\x01")
|
351
622
|
end
|
352
623
|
|
624
|
+
# Shows description.
|
353
625
|
def description
|
354
626
|
@deflines[0].to_a[-1]
|
355
627
|
end
|
356
628
|
|
629
|
+
# Returns descriptions.
|
357
630
|
def descriptions
|
358
631
|
@deflines.collect do |a|
|
359
632
|
a[-1]
|
360
633
|
end
|
361
634
|
end
|
362
635
|
|
636
|
+
# Shows ID-like strings.
|
637
|
+
# Returns an array of strings.
|
363
638
|
def id_strings
|
364
639
|
r = []
|
365
640
|
@list_ids.each do |a|
|
@@ -401,6 +676,7 @@ module Bio
|
|
401
676
|
/\A[A-Z][A-Z0-9]*\_[A-Z0-9\_]+\z/
|
402
677
|
]
|
403
678
|
|
679
|
+
# Shows words used in the defline. Returns an Array.
|
404
680
|
def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
|
405
681
|
kwhash = self.class::KillWordsHash)
|
406
682
|
a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\#\s\x00-\x1f\x7f]+/)
|
@@ -426,8 +702,9 @@ module Bio
|
|
426
702
|
a
|
427
703
|
end
|
428
704
|
|
429
|
-
|
430
|
-
|
705
|
+
# Returns identifires by a database name.
|
706
|
+
def get(dbname)
|
707
|
+
db = dbname.to_s
|
431
708
|
r = nil
|
432
709
|
unless r = @info[db] then
|
433
710
|
di = @list_ids.find { |x| x[0] == db.to_s }
|
@@ -449,10 +726,11 @@ module Bio
|
|
449
726
|
r
|
450
727
|
end
|
451
728
|
|
452
|
-
|
729
|
+
# Returns an identifier by given type.
|
730
|
+
def get_by_type(type_str)
|
453
731
|
@list_ids.each do |x|
|
454
732
|
if labels = self.class::NSIDs[x[0]] then
|
455
|
-
if i = labels.index(
|
733
|
+
if i = labels.index(type_str) then
|
456
734
|
return x[i+1]
|
457
735
|
end
|
458
736
|
end
|
@@ -460,11 +738,12 @@ module Bio
|
|
460
738
|
nil
|
461
739
|
end
|
462
740
|
|
463
|
-
|
741
|
+
# Returns identifiers by given type.
|
742
|
+
def get_all_by_type(*type_strarg)
|
464
743
|
d = []
|
465
744
|
@list_ids.each do |x|
|
466
745
|
if labels = self.class::NSIDs[x[0]] then
|
467
|
-
|
746
|
+
type_strarg.each do |y|
|
468
747
|
if i = labels.index(y) then
|
469
748
|
d << x[i+1] if x[i+1]
|
470
749
|
end
|
@@ -474,6 +753,10 @@ module Bio
|
|
474
753
|
d
|
475
754
|
end
|
476
755
|
|
756
|
+
# Shows locus.
|
757
|
+
# If the entry has more than two of such IDs,
|
758
|
+
# only the first ID are shown.
|
759
|
+
# Returns a string or nil.
|
477
760
|
def locus
|
478
761
|
unless defined?(@locus)
|
479
762
|
@locus = get_by_type('locus')
|
@@ -481,6 +764,10 @@ module Bio
|
|
481
764
|
@locus
|
482
765
|
end
|
483
766
|
|
767
|
+
# Shows GI.
|
768
|
+
# If the entry has more than two of such IDs,
|
769
|
+
# only the first ID are shown.
|
770
|
+
# Returns a string or nil.
|
484
771
|
def gi
|
485
772
|
unless defined?(@gi) then
|
486
773
|
@gi = get_by_type('gi')
|
@@ -488,6 +775,10 @@ module Bio
|
|
488
775
|
@gi
|
489
776
|
end
|
490
777
|
|
778
|
+
# Shows accession with version number.
|
779
|
+
# If the entry has more than two of such IDs,
|
780
|
+
# only the first ID are shown.
|
781
|
+
# Returns a string or nil.
|
491
782
|
def acc_version
|
492
783
|
unless defined?(@acc_version) then
|
493
784
|
@acc_version = get_by_type('acc_version')
|
@@ -495,6 +786,8 @@ module Bio
|
|
495
786
|
@acc_version
|
496
787
|
end
|
497
788
|
|
789
|
+
# Shows accession numbers.
|
790
|
+
# Returns an array of strings.
|
498
791
|
def accessions
|
499
792
|
unless defined?(@accessions) then
|
500
793
|
@accessions = get_all_by_type('accession', 'acc_version')
|
@@ -503,6 +796,7 @@ module Bio
|
|
503
796
|
@accessions
|
504
797
|
end
|
505
798
|
|
799
|
+
# Shows an accession number.
|
506
800
|
def accession
|
507
801
|
unless defined?(@accession) then
|
508
802
|
if acc_version then
|
@@ -523,6 +817,7 @@ module Bio
|
|
523
817
|
end
|
524
818
|
r
|
525
819
|
end
|
820
|
+
|
526
821
|
|
527
822
|
end #class FastaDefline
|
528
823
|
|
@@ -610,260 +905,3 @@ END
|
|
610
905
|
|
611
906
|
end
|
612
907
|
|
613
|
-
=begin
|
614
|
-
|
615
|
-
= Bio::FastaFormat
|
616
|
-
|
617
|
-
Treats a FASTA formatted entry, such as:
|
618
|
-
|
619
|
-
>id and/or some comments <== comment line
|
620
|
-
ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines
|
621
|
-
ATGCATGCATGCATGCATGCATGCATGCATGCATGC
|
622
|
-
ATGCATGCATGC
|
623
|
-
|
624
|
-
The precedent '>' can be omitted and the trailing '>' will be removed
|
625
|
-
automatically.
|
626
|
-
|
627
|
-
--- Bio::FastaFormat.new(entry)
|
628
|
-
|
629
|
-
Stores the comment and sequence information from one entry of the
|
630
|
-
FASTA format string. If the argument contains more than one
|
631
|
-
entry, only the first entry is used.
|
632
|
-
|
633
|
-
--- Bio::FastaFormat#entry
|
634
|
-
|
635
|
-
Returns the stored one entry as a FASTA format. (same as to_s)
|
636
|
-
|
637
|
-
--- Bio::FastaFormat#definition
|
638
|
-
|
639
|
-
Returns the comment line of the FASTA formatted data.
|
640
|
-
|
641
|
-
--- Bio::FastaFormat#seq
|
642
|
-
|
643
|
-
Returns a joined sequence line as a String.
|
644
|
-
|
645
|
-
--- Bio::FastaFormat#query(factory)
|
646
|
-
--- Bio::FastaFormat#fasta(factory)
|
647
|
-
--- Bio::FastaFormat#blast(factory)
|
648
|
-
|
649
|
-
Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast
|
650
|
-
factory object.
|
651
|
-
|
652
|
-
#!/usr/bin/env ruby
|
653
|
-
|
654
|
-
require 'bio'
|
655
|
-
|
656
|
-
factory = Bio::Fasta.local('fasta34', 'db/swissprot.f')
|
657
|
-
flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f')
|
658
|
-
flatfile.each do |entry|
|
659
|
-
p entry.definition
|
660
|
-
result = entry.fasta(factory)
|
661
|
-
result.each do |hit|
|
662
|
-
print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at "
|
663
|
-
p hit.lap_at
|
664
|
-
end
|
665
|
-
end
|
666
|
-
|
667
|
-
--- Bio::FastaFormat#length
|
668
|
-
|
669
|
-
Returns sequence length.
|
670
|
-
|
671
|
-
--- Bio::FastaFormat#naseq
|
672
|
-
--- Bio::FastaFormat#nalen
|
673
|
-
--- Bio::FastaFormat#aaseq
|
674
|
-
--- Bio::FastaFormat#aalen
|
675
|
-
|
676
|
-
If you know whether the sequence is NA or AA, use these methods.
|
677
|
-
'naseq' and 'aaseq' methods returen the Bio::Sequence::NA or
|
678
|
-
Bio::Sequence::AA object respectively. 'nalen' and 'aalen' methods
|
679
|
-
return the length of them.
|
680
|
-
|
681
|
-
--- Bio::FastaFormat#identifiers
|
682
|
-
|
683
|
-
Parsing FASTA Defline, and extract IDs.
|
684
|
-
IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
685
|
-
or ":"-separated IDs.
|
686
|
-
It returns a Bio::FastaDefline instance.
|
687
|
-
|
688
|
-
--- Bio::FastaFormat#entry_id
|
689
|
-
|
690
|
-
Parsing FASTA Defline (using #identifiers method), and
|
691
|
-
shows a possibly unique identifier.
|
692
|
-
It returns a string.
|
693
|
-
|
694
|
-
--- Bio::FastaFormat#gi
|
695
|
-
--- Bio::FastaFormat#locus
|
696
|
-
--- Bio::FastaFormat#accession
|
697
|
-
--- Bio::FastaFormat#acc_version
|
698
|
-
|
699
|
-
Parsing FASTA Defline (using #identifiers method), and
|
700
|
-
shows GI/locus/accession/accession with version number.
|
701
|
-
If a entry has more than two of such IDs,
|
702
|
-
only the first ID are shown.
|
703
|
-
It returns a string or nil.
|
704
|
-
|
705
|
-
--- Bio::FastaFormat#accessions
|
706
|
-
|
707
|
-
Parsing FASTA Defline (using #identifiers method), and
|
708
|
-
shows accession numbers.
|
709
|
-
It returns an array of strings.
|
710
|
-
|
711
|
-
--- Bio::FastaFormat
|
712
|
-
|
713
|
-
= Bio::FastaNumericFormat
|
714
|
-
|
715
|
-
Treats a FASTA formatted numerical entry, such as:
|
716
|
-
|
717
|
-
>id and/or some comments <== comment line
|
718
|
-
24 15 23 29 20 13 20 21 21 23 22 25 13 <== numerical data
|
719
|
-
22 17 15 25 27 32 26 32 29 29 25
|
720
|
-
|
721
|
-
The precedent '>' can be omitted and the trailing '>' will be removed
|
722
|
-
automatically.
|
723
|
-
|
724
|
-
--- Bio::FastaNumericFormat.new(entry)
|
725
|
-
|
726
|
-
Stores the comment and the list of the numerical data.
|
727
|
-
|
728
|
-
--- Bio::FastaNumericFormat#definition
|
729
|
-
|
730
|
-
The comment line of the FASTA formatted data.
|
731
|
-
|
732
|
-
--- Bio::FastaNumericFormat#data
|
733
|
-
|
734
|
-
Returns the list of the numerical data (typically the quality score
|
735
|
-
of its corresponding sequence) as an Array.
|
736
|
-
|
737
|
-
--- Bio::FastaNumericFormat#length
|
738
|
-
|
739
|
-
Returns the number of elements in the numerical data.
|
740
|
-
|
741
|
-
--- Bio::FastaNumericFormat#each
|
742
|
-
|
743
|
-
Yields on each elements of the numerical data.
|
744
|
-
|
745
|
-
--- Bio::FastaNumericFormat#[](n)
|
746
|
-
|
747
|
-
Returns the n-th element.
|
748
|
-
|
749
|
-
--- Bio::FastaNumericFormat#identifiers
|
750
|
-
--- Bio::FastaNumericFormat#entry_id
|
751
|
-
--- Bio::FastaNumericFormat#gi
|
752
|
-
--- Bio::FastaNumericFormat#locus
|
753
|
-
--- Bio::FastaNumericFormat#accession
|
754
|
-
--- Bio::FastaNumericFormat#acc_version
|
755
|
-
--- Bio::FastaNumericFormat#accessions
|
756
|
-
|
757
|
-
Same as Bio::FastaFormat.
|
758
|
-
|
759
|
-
|
760
|
-
= Bio::FastaDefline
|
761
|
-
|
762
|
-
Parsing FASTA Defline, and extract IDs and other informations.
|
763
|
-
IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
764
|
-
or ":"-separated IDs.
|
765
|
-
|
766
|
-
--- see also:
|
767
|
-
ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
|
768
|
-
http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
769
|
-
|
770
|
-
--- Bio::FastaDefline.new(str)
|
771
|
-
|
772
|
-
Parses given string.
|
773
|
-
|
774
|
-
--- Bio::FastaFormat#entry_id
|
775
|
-
|
776
|
-
Shows a possibly unique identifier.
|
777
|
-
Returns a string.
|
778
|
-
|
779
|
-
--- Bio::FastaDefline#gi
|
780
|
-
--- Bio::FastaDefline#locus
|
781
|
-
--- Bio::FastaDefline#accession
|
782
|
-
--- Bio::FastaDefline#acc_version
|
783
|
-
|
784
|
-
Shows GI/locus/accession/accession with version number.
|
785
|
-
If the entry has more than two of such IDs,
|
786
|
-
only the first ID are shown.
|
787
|
-
Returns a string or nil.
|
788
|
-
|
789
|
-
--- Bio::FastaFormat#accessions
|
790
|
-
|
791
|
-
Shows accession numbers.
|
792
|
-
Returns an array of strings.
|
793
|
-
|
794
|
-
--- Bio::FastaDefline#add_defline(str)
|
795
|
-
|
796
|
-
Parses given string and adds parsed data.
|
797
|
-
|
798
|
-
--- Bio::FastaDefline#to_s
|
799
|
-
|
800
|
-
Shows original string.
|
801
|
-
Note that the result of this method may be different from
|
802
|
-
original string which is given in FastaDefline.new method.
|
803
|
-
|
804
|
-
--- Bio::FastaDefline#id_strings
|
805
|
-
|
806
|
-
Shows ID-like strings.
|
807
|
-
Returns an array of strings.
|
808
|
-
|
809
|
-
--- Bio::FastaDefline#list_ids
|
810
|
-
|
811
|
-
Shows array that contains IDs (or ID-like strings).
|
812
|
-
Returns an array of arrays of strings.
|
813
|
-
|
814
|
-
--- Bio::FastaDefline#description
|
815
|
-
--- Bio::FastaDefline#descriptions
|
816
|
-
|
817
|
-
--- Bio::FastaDefline#words(case_sensitive = nil,
|
818
|
-
kill_words_regexp_array, kill_words_hash)
|
819
|
-
|
820
|
-
--- Bio::FastaDefline#get(tag_of_id)
|
821
|
-
|
822
|
-
--- Bio::FastaDefline#get_by_type(type_of_id)
|
823
|
-
|
824
|
-
--- Bio::FastaDefline#get_all_by_type(type_of_id)
|
825
|
-
|
826
|
-
--- examples:
|
827
|
-
rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
|
828
|
-
rub.entry_id ==> 'gi|671595'
|
829
|
-
rub.get('emb') ==> 'CAA85678.1'
|
830
|
-
rub.emb ==> 'CAA85678.1'
|
831
|
-
rub.gi ==> '671595'
|
832
|
-
rub.accession ==> 'CAA85678'
|
833
|
-
rub.accessions ==> [ 'CAA85678' ]
|
834
|
-
rub.acc_version ==> 'CAA85678.1'
|
835
|
-
rub.locus ==> nil
|
836
|
-
rub.list_ids ==> [["gi", "671595"],
|
837
|
-
["emb", "CAA85678.1", nil],
|
838
|
-
["Perovskia abrotanoides"]]
|
839
|
-
|
840
|
-
ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
|
841
|
-
ckr.entry_id ==> "gi|2495000"
|
842
|
-
ckr.sp ==> "CCKR_CAVPO"
|
843
|
-
ckr.pir ==> "I51898"
|
844
|
-
ckr.gb ==> "AAB29504.1"
|
845
|
-
ckr.gi ==> "2495000"
|
846
|
-
ckr.accession ==> "AAB29504"
|
847
|
-
ckr.accessions ==> ["Q63931", "AAB29504"]
|
848
|
-
ckr.acc_version ==> "AAB29504.1"
|
849
|
-
ckr.locus ==> nil
|
850
|
-
ckr.description ==>
|
851
|
-
"CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
|
852
|
-
ckr.descriptions ==>
|
853
|
-
["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
|
854
|
-
"cholecystokinin A receptor - guinea pig",
|
855
|
-
"cholecystokinin A receptor; CCK-A receptor [Cavia]"]
|
856
|
-
ckr.words ==>
|
857
|
-
["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
|
858
|
-
"receptor", "type"]
|
859
|
-
ckr.id_strings ==>
|
860
|
-
["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
|
861
|
-
"544724", "AAB29504.1", "Cavia"]
|
862
|
-
ckr.list_ids ==>
|
863
|
-
[["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
|
864
|
-
["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
|
865
|
-
["gb", "AAB29504.1", nil], ["Cavia"]]
|
866
|
-
|
867
|
-
=end
|
868
|
-
|
869
|
-
|